call .\venv\Scripts\activate python deactivate
import cv2 import glob import os import os.path import re import numpy as np ocvIcoDirPath="data/ico-96" # def binary_threshold(path):def binary_threshold(img): # img = cv2.imread(path) # grayed = cv2.cvtColor(img, cv2.IMREAD_GRAYSCALE) grayed = img under_thresh = 105 upper_thresh = 145 maxValue = 255 th, drop_back = cv2.threshold(grayed, under_thresh, maxValue, cv2.THRESH_BINARY) th, clarify_born = cv2.threshold(grayed, upper_thresh, maxValue, cv2.THRESH_BINARY_INV) merged = np.minimum(drop_back, clarify_born) merged = drop_back return merged def mask_blue(path): img = cv2.imread(path) hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) blue_min = np.array([210*180/240, 170*100/240, 200*100/240], np.uint8) blue_max = np.array([230*180/240, 190*100/240, 230*100/240], np.uint8) blue_region = cv2.inRange(hsv, blue_min, blue_max) white = np.full(img.shape, 255, dtype=img.dtype) background = cv2.bitwise_and(white, white, mask=blue_region) # detected blue area becomes white inv_mask = cv2.bitwise_not(blue_region) # make mask for not-blue area extracted = cv2.bitwise_and(img, img, mask=inv_mask) masked = cv2.add(extracted, background) return masked def morph(img): kernel = np.ones((3, 3),np.uint8) opened = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel, iterations=2) # opened = cv2.morphologyEx(img, cv2.MORPH_ERODE, kernel, iterations=2) return opened def morph_and_blur(img): kernel = np.ones((3, 3),np.uint8) m = cv2.GaussianBlur(img, (3, 3), 0) m = cv2.morphologyEx(m, cv2.MORPH_OPEN, kernel, iterations=2) m = cv2.GaussianBlur(m, (5, 5), 0) return m def icoFile(icoFilePath): gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) # gray = cv2.cvtColor(img, cv2.IMREAD_COLOR) # gray = mask_blue(gray) gray = morph(gray) # gray = binary_threshold(icoFilePath) # cv2.imwrite(detectedFilePath, gray) return gray def resize_image(img, size): # size is enough to img img_size = img.shape[:2] if img_size[0] > size[1] or img_size[1] > size[0]: raise Exception("img is larger than size") # centering row = (size[1] - img_size[0]) // 2 col = (size[0] - img_size[1]) // 2 resized = np.zeros(list(size) + [img.shape[2]], dtype=np.uint8) resized[row:(row + img.shape[0]), col:(col + img.shape[1])] = img # filling mask = np.full(size, 255, dtype=np.uint8) mask[row:(row + img.shape[0]), col:(col + img.shape[1])] = 0 filled = cv2.inpaint(resized, mask, 3, cv2.INPAINT_TELEA) return filled icoFilePaths = glob.glob(str(ocvIcoDirPath) + "/*") pathname = ocvIcoDirPath + "-2"os.makedirs(pathname, exist_ok=True) for icoFilePath in icoFilePaths: basename = os.path.basename(icoFilePath) icoFilePath = re.sub('[/\\\\]', "/", icoFilePath) img = cv2.imread(icoFilePath) gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) gray = binary_threshold(gray) # gray = morph(gray) # gray = morph_and_blur(gray) # gray = resize_image(gray, (210, 210)) cv2.imwrite(pathname + "/" + basename, gray) gray = gray
import os
import re
import requests
import time
from bs4 import BeautifulSoup
staSitLst = {
# jreast # # # </td><td style="width:99px;"><A href="" target = "_blank" onclick="openMap('1039');return false;">東京</A> # # # <img src="img/floormap/1039_1f.png" alt="東京駅 1F構内図"> # # "jreast": {
"itemEnable": False, "ocvSitUrl": "", "staLstUrl": "", "reImgLstUrl": "^(http://www\.jreast\.co\.jp/estation/stations/[0-9]+\..+)$", "reImgLstId": "\\1", "fmtImgLstUrl": "{}", "reStaImgUrl": "^(img/floormap/.+\..+)$", "reStaImgId": "\\1", "fmtStaImgUrl": "{}", "reStaImgUrlFilBas": "^.*img/floormap/([^.]+)\.[^.]+.*$", "reStaImgUrlFilExt": "^.*img/floormap/[^.]+\.([^.]+)$", "regStaImgUrlFilBas": "\\1", "regStaImgUrlFilExt": "\\1", "imgUrl": "{}/{}", "z": "z" }, # keikyu # # # <area coords="206,48,426,88" href="/train-info/kakueki/KK01.html" alt="品川" data-imagemap-rollover-url="/assets/image/train-info/kakueki/index_img_route_KK01.png"> # # # <img src="/assets/image/train-info/kakueki/KK01/img_03.png" alt="品川駅の設備図です"> # # "keikyu": {
"itemEnable": False, "ocvSitUrl": "", "staLstUrl": "", "reImgLstUrl": "^(/train-info/kakueki/KK[0-9][0-9]\.htm.*)", "reImgLstId": "\\1", "fmtImgLstUrl": "{}", "reStaImgUrl": "^(/assets/image/train-info/kakueki/KK[0-9][0-9]/img_03\..+)$", "reStaImgId": "\\1", "fmtStaImgUrl": "{}", "reStaImgUrlFilBas": "^.*/assets/image/train-info/kakueki/(KK[0-9][0-9])/img_03.[^.]+.*$", "reStaImgUrlFilExt": "^.*/assets/image/train-info/kakueki/KK[0-9][0-9]/img_03.([^.]+).*$", "regStaImgUrlFilBas": "\\1", "regStaImgUrlFilExt": "\\1", "imgUrl": "{}/{}", "z": "z" }, # tokyometro # # # <a href="./akihabara/index.html"> # # # <p class="v2_yardmapImg"><img src="../../yardmap_img/_station_%E7%A7%8B%E8%91%89%E5%8E%9F_yardmap_images_yardmap.jpg" alt="" class="v2_js-yardmapImg"></p> # <p class="v2_yardmapImg"><img src="../../yardmap_img/figure_yardmap_ayase.gif" alt="" class="v2_js-yardmapImg"></p> # # # "tokyometro": {
"itemEnable": False, "ocvSitUrl": "", "staLstUrl": "", "reImgLstUrl": "^\./(.+)/index\.html$", "reImgLstId": "\\1", "fmtImgLstUrl": "{}/yardmap/index.html", "reStaImgUrl": "^\.\./\.\./(yardmap_img/.+\..+)$", "reStaImgId": "\\1", "fmtStaImgUrl": "{}", "reStaImgUrlFilBas": "^.*[^/]+)/yardmap/index.html.*$", "reStaImgUrlFilExt": "^.*/yardmap_img/.+\.([^.]+).*$", "regStaImgUrlFilBas": "\\1", "regStaImgUrlFilExt": "\\1", "imgUrl": "{}/{}", "z": "z" }
ocvItvlt = 10
def getStaImg(ocvSit, imgLstUrl, staImgUrl):
reStaImgUrlFilBas = staSitLst[ocvSit]["reStaImgUrlFilBas"]
regStaImgUrlFilBas = staSitLst[ocvSit]["regStaImgUrlFilBas"]
reStaImgUrlFilExt = staSitLst[ocvSit]["reStaImgUrlFilExt"]
regStaImgUrlFilExt = staSitLst[ocvSit]["regStaImgUrlFilExt"]
imgFilBas = re.sub(reStaImgUrlFilBas, regStaImgUrlFilBas, imgLstUrl + "," + staImgUrl)
imgFilExt = re.sub(reStaImgUrlFilExt, regStaImgUrlFilExt, imgLstUrl + "," + staImgUrl)
fmtImgDirPth = "/".join(["data", "_img", "{}", "floormap"])
fmtImgFilPth = "/".join([fmtImgDirPth, "{}.{}"])
imgDirPth = fmtImgDirPth.format(ocvSit)
imgFilPth = fmtImgFilPth.format(ocvSit, imgFilBas, imgFilExt)
print("# staImgUrl=" + staImgUrl)
print("# imgFilPth=" + imgFilPth)
os.makedirs(imgDirPth, exist_ok=True)
staImg = requests.get(staImgUrl)
with open(imgFilPth, "wb") as file:
def getImgLst(ocvSit, imgLstUrl):
reStaImgUrl = staSitLst[ocvSit]["reStaImgUrl"]
reStaImgId = staSitLst[ocvSit]["reStaImgId"]
fmtStaImgUrl = staSitLst[ocvSit]["fmtStaImgUrl"]
staImgPag = requests.get(imgLstUrl)
soup = BeautifulSoup(staImgPag.text, "lxml")
staImgImgTags = soup.find_all("img", src=re.compile(reStaImgUrl))
for staImgImgTag in staImgImgTags:
staImgId = re.sub(reStaImgUrl, reStaImgId, staImgImgTag["src"])
staImgUrl = fmtStaImgUrl.format(staImgId)
print("# staImgId=" + staImgId + ", staImgUrl=" + staImgUrl)
getStaImg(ocvSit, imgLstUrl, staImgUrl)
def getStaLst(ocvSit, staLstUrl):
reImgLstUrl = staSitLst[ocvSit]["reImgLstUrl"]
reImgLstId = staSitLst[ocvSit]["reImgLstId"]
fmtImgLstUrl = staSitLst[ocvSit]["fmtImgLstUrl"]
staLstPag = requests.get(staLstUrl)
soup = BeautifulSoup(staLstPag.text, "lxml")
staPagATags = soup.find_all("a", href=re.compile(reImgLstUrl))
for staPagATag in staPagATags:
staPagId = re.sub(reImgLstUrl, reImgLstId, staPagATag["href"])
imgLstUrl = fmtImgLstUrl.format(staPagId)
print("# staPagId=" + staPagId + ", imgLstUrl=" + imgLstUrl)
getImgLst(ocvSit, imgLstUrl)
def getSitLst(staSitLst):
for staSitNam in staSitLst.keys():
if staSitLst[staSitNam]["itemEnable"] == True:
staLstUrl = staSitLst[staSitNam]["staLstUrl"]
print("# staSitNam=" + staSitNam + ", staLstUrl=" + staLstUrl)
getStaLst(staSitNam, staLstUrl)
def getSit():
投稿 (Atom)