2019年8月12日月曜日

mkCascadeDetect.bat

call .\venv\Scripts\activate
python mkCascadeDetect.py
deactivate

cvtIcon.py

import cv2
import glob
import os
import os.path
import re
import numpy as np

ocvIcoDirPath="data/ico-96"
# def binary_threshold(path):def binary_threshold(img):
    # img = cv2.imread(path)    # grayed = cv2.cvtColor(img, cv2.IMREAD_GRAYSCALE)    grayed = img
    under_thresh = 105    upper_thresh = 145    maxValue = 255    th, drop_back = cv2.threshold(grayed, under_thresh, maxValue, cv2.THRESH_BINARY)
    th, clarify_born = cv2.threshold(grayed, upper_thresh, maxValue, cv2.THRESH_BINARY_INV)
    merged = np.minimum(drop_back, clarify_born)
    merged = drop_back
    return merged

def mask_blue(path):
    img = cv2.imread(path)
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)

    blue_min = np.array([210*180/240, 170*100/240, 200*100/240], np.uint8)
    blue_max = np.array([230*180/240, 190*100/240, 230*100/240], np.uint8)

    blue_region = cv2.inRange(hsv, blue_min, blue_max)
    white = np.full(img.shape, 255, dtype=img.dtype)
    background = cv2.bitwise_and(white, white, mask=blue_region)  # detected blue area becomes white
    inv_mask = cv2.bitwise_not(blue_region)  # make mask for not-blue area    extracted = cv2.bitwise_and(img, img, mask=inv_mask)

    masked = cv2.add(extracted, background)

    return masked

def morph(img):
    kernel = np.ones((3, 3),np.uint8)
    opened = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel, iterations=2)
    # opened = cv2.morphologyEx(img, cv2.MORPH_ERODE, kernel, iterations=2)    return opened

def morph_and_blur(img):
    kernel = np.ones((3, 3),np.uint8)
    m = cv2.GaussianBlur(img, (3, 3), 0)
    m = cv2.morphologyEx(m, cv2.MORPH_OPEN, kernel, iterations=2)
    m = cv2.GaussianBlur(m, (5, 5), 0)
    return m

def icoFile(icoFilePath):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    # gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)    # gray = cv2.cvtColor(img, cv2.IMREAD_COLOR)    # gray = mask_blue(gray)    gray = morph(gray)
    # gray = binary_threshold(icoFilePath)    # cv2.imwrite(detectedFilePath, gray)    return gray

def resize_image(img, size):
    # size is enough to img    img_size = img.shape[:2]
    if img_size[0] > size[1] or img_size[1] > size[0]:
        raise Exception("img is larger than size")

    # centering    row = (size[1] - img_size[0]) // 2    col = (size[0] - img_size[1]) // 2    resized = np.zeros(list(size) + [img.shape[2]], dtype=np.uint8)
    resized[row:(row + img.shape[0]), col:(col + img.shape[1])] = img

    # filling    mask = np.full(size, 255, dtype=np.uint8)
    mask[row:(row + img.shape[0]), col:(col + img.shape[1])] = 0    filled = cv2.inpaint(resized, mask, 3, cv2.INPAINT_TELEA)

    return filled

icoFilePaths = glob.glob(str(ocvIcoDirPath) + "/*")
pathname = ocvIcoDirPath + "-2"os.makedirs(pathname,  exist_ok=True)
for icoFilePath in icoFilePaths:
    basename = os.path.basename(icoFilePath)
    icoFilePath = re.sub('[/\\\\]', "/", icoFilePath)
    img = cv2.imread(icoFilePath)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    gray = binary_threshold(gray)
    # gray = morph(gray)    # gray = morph_and_blur(gray)    # gray = resize_image(gray, (210, 210))    cv2.imwrite(pathname + "/" + basename, gray)
    gray = gray

2019年8月5日月曜日

getSitImg.py

import os
import re
import requests
import time
from bs4 import BeautifulSoup

staSitLst = {
    # jreast    #    # https://www.jreast.co.jp/estation/stations/    #     </td><td style="width:99px;"><A href="http://www.jreast.co.jp/estation/stations/1039.html" target = "_blank" onclick="openMap('1039');return false;">東京</A>    #    # https://www.jreast.co.jp/estation/stations/1039.html    #                  <img src="img/floormap/1039_1f.png" alt="東京駅 1F構内図">    #    # https://www.jreast.co.jp/estation/stations/img/floormap/1039_1f.png    "jreast": {
        "itemEnable": False,        "ocvSitUrl": "https://www.jreast.co.jp/",        "staLstUrl": "https://www.jreast.co.jp/estation/stations",        "reImgLstUrl": "^(http://www\.jreast\.co\.jp/estation/stations/[0-9]+\..+)$",        "reImgLstId": "\\1",        "fmtImgLstUrl": "{}",        "reStaImgUrl": "^(img/floormap/.+\..+)$",        "reStaImgId": "\\1",        "fmtStaImgUrl": "https://www.jreast.co.jp/estation/stations/{}",        "reStaImgUrlFilBas": "^.*img/floormap/([^.]+)\.[^.]+.*$",        "reStaImgUrlFilExt": "^.*img/floormap/[^.]+\.([^.]+)$",        "regStaImgUrlFilBas": "\\1",        "regStaImgUrlFilExt": "\\1",        "imgUrl": "{}/{}",        "z": "z"    },    # keikyu    #    # https://www.keikyu.co.jp/train-info/kakueki/index.html    #     <area coords="206,48,426,88" href="/train-info/kakueki/KK01.html" alt="品川" data-imagemap-rollover-url="/assets/image/train-info/kakueki/index_img_route_KK01.png">    #    # https://www.keikyu.co.jp/train-info/kakueki/KK01.html    #     <img src="/assets/image/train-info/kakueki/KK01/img_03.png" alt="品川駅の設備図です">    #    # https://www.keikyu.co.jp/assets/image/train-info/kakueki/KK01/img_03.png    "keikyu": {
        "itemEnable": False,        "ocvSitUrl": "https://www.keikyu.co.jp/",        "staLstUrl": "https://www.keikyu.co.jp/train-info/kakueki/index.html",        "reImgLstUrl": "^(/train-info/kakueki/KK[0-9][0-9]\.htm.*)",        "reImgLstId": "\\1",        "fmtImgLstUrl": "https://www.keikyu.co.jp{}",        "reStaImgUrl": "^(/assets/image/train-info/kakueki/KK[0-9][0-9]/img_03\..+)$",        "reStaImgId": "\\1",        "fmtStaImgUrl": "https://www.keikyu.co.jp{}",        "reStaImgUrlFilBas": "^.*/assets/image/train-info/kakueki/(KK[0-9][0-9])/img_03.[^.]+.*$",        "reStaImgUrlFilExt": "^.*/assets/image/train-info/kakueki/KK[0-9][0-9]/img_03.([^.]+).*$",        "regStaImgUrlFilBas": "\\1",        "regStaImgUrlFilExt": "\\1",        "imgUrl": "{}/{}",        "z": "z"    },    # tokyometro    #    # https://www.tokyometro.jp/station/index03.html    #                   <a href="./akihabara/index.html">    #    # https://www.tokyometro.jp/station/akihabara/yardmap/index.html#adjacent    #               <p class="v2_yardmapImg"><img src="../../yardmap_img/_station_%E7%A7%8B%E8%91%89%E5%8E%9F_yardmap_images_yardmap.jpg" alt="" class="v2_js-yardmapImg"></p>    #               <p class="v2_yardmapImg"><img src="../../yardmap_img/figure_yardmap_ayase.gif" alt="" class="v2_js-yardmapImg"></p>    #    # https://www.tokyometro.jp/station/yardmap_img/_station_%E7%A7%8B%E8%91%89%E5%8E%9F_yardmap_images_yardmap.jpg    # https://www.tokyometro.jp/station/yardmap_img/figure_yardmap_ayase.gif    "tokyometro": {
        "itemEnable": False,        "ocvSitUrl": "https://www.tokyometro.jp/",        "staLstUrl": "https://www.tokyometro.jp/station/index03.html",        "reImgLstUrl": "^\./(.+)/index\.html$",        "reImgLstId": "\\1",        "fmtImgLstUrl": "https://www.tokyometro.jp/station/{}/yardmap/index.html",        "reStaImgUrl": "^\.\./\.\./(yardmap_img/.+\..+)$",        "reStaImgId": "\\1",        "fmtStaImgUrl": "https://www.tokyometro.jp/station/{}",        "reStaImgUrlFilBas": "^.*https://www.tokyometro.jp/station/([^/]+)/yardmap/index.html.*$",        "reStaImgUrlFilExt": "^.*/yardmap_img/.+\.([^.]+).*$",        "regStaImgUrlFilBas": "\\1",        "regStaImgUrlFilExt": "\\1",        "imgUrl": "{}/{}",        "z": "z"    }

}

ocvItvlt = 10
def getStaImg(ocvSit, imgLstUrl, staImgUrl):
    reStaImgUrlFilBas = staSitLst[ocvSit]["reStaImgUrlFilBas"]
    regStaImgUrlFilBas = staSitLst[ocvSit]["regStaImgUrlFilBas"]
    reStaImgUrlFilExt = staSitLst[ocvSit]["reStaImgUrlFilExt"]
    regStaImgUrlFilExt = staSitLst[ocvSit]["regStaImgUrlFilExt"]
    imgFilBas = re.sub(reStaImgUrlFilBas, regStaImgUrlFilBas, imgLstUrl + "," + staImgUrl)
    imgFilExt = re.sub(reStaImgUrlFilExt, regStaImgUrlFilExt, imgLstUrl + "," + staImgUrl)
    fmtImgDirPth = "/".join(["data", "_img", "{}", "floormap"])
    fmtImgFilPth = "/".join([fmtImgDirPth, "{}.{}"])
    imgDirPth = fmtImgDirPth.format(ocvSit)
    imgFilPth = fmtImgFilPth.format(ocvSit, imgFilBas, imgFilExt)
    print("# staImgUrl=" + staImgUrl)
    print("# imgFilPth=" + imgFilPth)
    os.makedirs(imgDirPth, exist_ok=True)
    staImg = requests.get(staImgUrl)
    with open(imgFilPth, "wb") as file:
            file.write(staImg.content)
    time.sleep(ocvItvlt)

def getImgLst(ocvSit, imgLstUrl):
    reStaImgUrl = staSitLst[ocvSit]["reStaImgUrl"]
    reStaImgId = staSitLst[ocvSit]["reStaImgId"]
    fmtStaImgUrl = staSitLst[ocvSit]["fmtStaImgUrl"]
    staImgPag = requests.get(imgLstUrl)
    soup = BeautifulSoup(staImgPag.text, "lxml")
    staImgImgTags = soup.find_all("img", src=re.compile(reStaImgUrl))
    for staImgImgTag in staImgImgTags:
        staImgId = re.sub(reStaImgUrl, reStaImgId, staImgImgTag["src"])
        staImgUrl = fmtStaImgUrl.format(staImgId)
        print("# staImgId=" + staImgId + ", staImgUrl=" + staImgUrl)
        getStaImg(ocvSit, imgLstUrl, staImgUrl)

def getStaLst(ocvSit, staLstUrl):
    reImgLstUrl = staSitLst[ocvSit]["reImgLstUrl"]
    reImgLstId = staSitLst[ocvSit]["reImgLstId"]
    fmtImgLstUrl = staSitLst[ocvSit]["fmtImgLstUrl"]
    staLstPag = requests.get(staLstUrl)
    soup = BeautifulSoup(staLstPag.text, "lxml")
    staPagATags = soup.find_all("a", href=re.compile(reImgLstUrl))
    for staPagATag in staPagATags:
        staPagId = re.sub(reImgLstUrl, reImgLstId, staPagATag["href"])
        imgLstUrl = fmtImgLstUrl.format(staPagId)
        print("# staPagId=" + staPagId + ", imgLstUrl=" + imgLstUrl)
        getImgLst(ocvSit, imgLstUrl)

def getSitLst(staSitLst):
    for staSitNam in staSitLst.keys():
        if staSitLst[staSitNam]["itemEnable"] == True:
            staLstUrl = staSitLst[staSitNam]["staLstUrl"]
            print("# staSitNam=" + staSitNam + ", staLstUrl=" + staLstUrl)
            getStaLst(staSitNam, staLstUrl)

def getSit():
    getSitLst(staSitLst)

getSit()