Commit 6df07c1a by wudiao

Initial commit

parents
.idea
.DS_Store
MIT License
Copyright (c) 2020 chineseocr
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
### test table detect(表格检测)
`
python table_detect.py --jpgPath img/table-detect.jpg
`
### test table ceil detect with unet(表格识别输出到excel)
`
python table_ceil.py --isToExcel True --jpgPath img/table-detect.jpg
`
## train table line(训练表格)
### label table with labelme(https://github.com/wkentaro/labelme)
`
python train/train.py
`
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Sep 9 23:11:51 2020
@author: chineseocr
"""
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Sep 9 23:11:51 2020
@author: chineseocr
"""
tableModelDetectPath = 'models/table-detect.weights'
tableModeLinePath = "models/table-line.h5"
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Sep 9 23:11:51 2020
image
@author: chineseocr
"""
import base64
import json
import tesserocr
import cv2
import numpy as np
import six
from PIL import Image
from cnocr import CnOcr
def plot_lines(img, lines, linetype=2):
tmp = np.copy(img)
for line in lines:
p1, p2 = line
cv2.line(tmp, (int(p1[0]), int(p1[1])), (int(p2[0]), int(p2[1])), (0, 0, 0), linetype, lineType=cv2.LINE_AA)
return Image.fromarray(tmp)
def base64_to_PIL(string):
try:
base64_data = base64.b64decode(string)
buf = six.BytesIO()
buf.write(base64_data)
buf.seek(0)
img = Image.open(buf).convert('RGB')
return img
except:
return None
def read_json(p):
with open(p) as f:
jsonData = json.loads(f.read())
shapes = jsonData.get('shapes')
imageData = jsonData.get('imageData')
lines = []
labels = []
for shape in shapes:
lines.append(shape['points'])
[x0, y0], [x1, y1] = shape['points']
label = shape['label']
if label == '0':
if abs(y1 - y0) > 500:
label = '1'
elif label == '1':
if abs(x1 - x0) > 500:
label = '0'
labels.append(label)
img = base64_to_PIL(imageData)
return img, lines, labels
from numpy import cos, sin, pi
def rotate(x, y, angle, cx, cy):
"""
点(x,y) 绕(cx,cy)点旋转
"""
angle = angle * pi / 180
x_new = (x - cx) * cos(angle) - (y - cy) * sin(angle) + cx
y_new = (x - cx) * sin(angle) + (y - cy) * cos(angle) + cy
return x_new, y_new
def box_rotate(box, angle=0, imgH=0, imgW=0):
"""
对坐标进行旋转 逆时针方向 0\90\180\270,
"""
x1, y1, x2, y2, x3, y3, x4, y4 = box[:8]
if angle == 90:
x1_, y1_ = y2, imgW - x2
x2_, y2_ = y3, imgW - x3
x3_, y3_ = y4, imgW - x4
x4_, y4_ = y1, imgW - x1
elif angle == 180:
x1_, y1_ = imgW - x3, imgH - y3
x2_, y2_ = imgW - x4, imgH - y4
x3_, y3_ = imgW - x1, imgH - y1
x4_, y4_ = imgW - x2, imgH - y2
elif angle == 270:
x1_, y1_ = imgH - y4, x4
x2_, y2_ = imgH - y1, x1
x3_, y3_ = imgH - y2, x2
x4_, y4_ = imgH - y3, x3
else:
x1_, y1_, x2_, y2_, x3_, y3_, x4_, y4_ = x1, y1, x2, y2, x3, y3, x4, y4
return (x1_, y1_, x2_, y2_, x3_, y3_, x4_, y4_)
def angle_transpose(p, angle, w, h):
x, y = p
if angle == 90:
x, y = y, w - x
elif angle == 180:
x, y = w - x, h - y
elif angle == 270:
x, y = h - y, x
return x, y
def img_argument(img, lines, labels, size=(512, 512)):
w, h = img.size
if np.random.randint(0, 100) > 80:
degree = np.random.uniform(-5, 5)
else:
degree = 0
# degree = np.random.uniform(-5,5)
newlines = []
for line in lines:
p1, p2 = line
p1 = rotate(p1[0], p1[1], degree, w / 2, h / 2)
p2 = rotate(p2[0], p2[1], degree, w / 2, h / 2)
newlines.append([p1, p2])
# img = img.rotate(-degree,center=(w/2,h/2),resample=Image.BILINEAR,fillcolor=(128,128,128))
img = img.rotate(-degree, center=(w / 2, h / 2), resample=Image.BILINEAR)
angle = np.random.choice([0, 90, 180, 270], 1)[0]
newlables = []
for i in range(len(newlines)):
p1, p2 = newlines[i]
p1 = angle_transpose(p1, angle, w, h)
p2 = angle_transpose(p2, angle, w, h)
newlines[i] = [p1, p2]
if angle in [90, 270]:
if labels[i] == '0':
newlables.append('1')
else:
newlables.append('0')
else:
newlables.append(labels[i])
if angle == 90:
img = img.transpose(Image.ROTATE_90)
elif angle == 180:
img = img.transpose(Image.ROTATE_180)
elif angle == 270:
img = img.transpose(Image.ROTATE_270)
return img, newlines, newlables
def fill_lines(img, lines, linetype=2):
tmp = np.copy(img)
for line in lines:
p1, p2 = line
cv2.line(tmp, (int(p1[0]), int(p1[1])), (int(p2[0]), int(p2[1])), 255, linetype, lineType=cv2.LINE_AA)
return tmp
def get_img_label(p, size, linetype=1):
img, lines, labels = read_json(p)
img, lines = img_resize(img, lines, target_size=512, max_size=1024)
img, lines, labels = img_argument(img, lines, labels, size)
img, lines, labels = get_random_data(img, lines, labels, size=size)
lines = np.array(lines)
labels = np.array(labels)
labelImg0 = np.zeros(size[::-1], dtype='uint8')
labelImg1 = np.zeros(size[::-1], dtype='uint8')
ind = np.where(labels == '0')[0]
labelImg0 = fill_lines(labelImg0, lines[ind], linetype=linetype)
ind = np.where(labels == '1')[0]
labelImg1 = fill_lines(labelImg1, lines[ind], linetype=linetype)
labelY = np.zeros((size[1], size[0], 2), dtype='uint8')
labelY[:, :, 0] = labelImg0
labelY[:, :, 1] = labelImg1
labelY = labelY > 0
return np.array(img), lines, labelY
from matplotlib.colors import rgb_to_hsv, hsv_to_rgb
def rand(a=0, b=1):
return np.random.rand() * (b - a) + a
def get_random_data(image, lines, labels, size=(1024, 1024), jitter=.3, hue=.1, sat=1.5, val=1.5):
'''random preprocessing for real-time data augmentation'''
iw, ih = image.size
# resize image
w, h = size
new_ar = w / h * rand(1 - jitter, 1 + jitter) / rand(1 - jitter, 1 + jitter)
# scale = rand(.2, 2)
scale = rand(0.2, 3)
if new_ar < 1:
nh = int(scale * h)
nw = int(nh * new_ar)
else:
nw = int(scale * w)
nh = int(nw / new_ar)
image = image.resize((nw, nh), Image.BICUBIC)
# place image
dx = int(rand(0, w - nw))
dy = int(rand(0, h - nh))
new_image = Image.new('RGB', (w, h), (128, 128, 128))
new_image.paste(image, (dx, dy))
image = new_image
# distort image
hue = rand(-hue, hue)
sat = rand(1, sat) if rand() < .5 else 1 / rand(1, sat)
val = rand(1, val) if rand() < .5 else 1 / rand(1, val)
x = rgb_to_hsv(np.array(image) / 255.)
x[..., 0] += hue
x[..., 0][x[..., 0] > 1] -= 1
x[..., 0][x[..., 0] < 0] += 1
x[..., 1] *= sat
x[..., 2] *= val
x[x > 1] = 1
x[x < 0] = 0
image_data = hsv_to_rgb(x) # numpy array, 0 to 1
N = len(lines)
for i in range(N):
p1, p2 = lines[i]
p1 = p1[0] * nw / iw + dx, p1[1] * nh / ih + dy
p2 = p2[0] * nw / iw + dx, p2[1] * nh / ih + dy
lines[i] = [p1, p2]
return image_data, lines, labels
def gen(paths, batchsize=2, linetype=2):
num = len(paths)
i = 0
while True:
# sizes = [512,512,512,512,640,1024] ##多尺度训练
# size = np.random.choice(sizes,1)[0]
size = 640
X = np.zeros((batchsize, size, size, 3))
Y = np.zeros((batchsize, size, size, 2))
for j in range(batchsize):
if i >= num:
i = 0
np.random.shuffle(paths)
p = paths[i]
i += 1
# linetype=2
img, lines, labelImg = get_img_label(p, size=(size, size), linetype=linetype)
X[j] = img
Y[j] = labelImg
yield X, Y
def img_resize(im, lines, target_size=600, max_size=1500):
w, h = im.size
im_size_min = np.min(im.size)
im_size_max = np.max(im.size)
im_scale = float(target_size) / float(im_size_min)
if max_size is not None:
if np.round(im_scale * im_size_max) > max_size:
im_scale = float(max_size) / float(im_size_max)
im = im.resize((int(w * im_scale), int(h * im_scale)), Image.BICUBIC)
N = len(lines)
for i in range(N):
p1, p2 = lines[i]
p1 = p1[0] * im_scale, p1[1] * im_scale
p2 = p2[0] * im_scale, p2[1] * im_scale
lines[i] = [p1, p2]
return im, lines
from cnocr.utils import read_img
from cnocr import CnOcr
ocr = CnOcr()
if __name__ == '__main__':
# ocr = CnOcr()
# img_fp = 'img/gou.png'
# res = ocr.ocr(img_fp)
# print("Predicted Chars:", res)
from PIL import Image
img = Image.open('img/gou.png')
result = tesserocr.image_to_text(img)
print(result)
# im = read_img(img_fp)
# if(min(im.shape[0],im.shape[1])<32):
# w, h = im.shape[0],im.shape[1]
# im_size_min = min(im.shape[0],im.shape[1])
# im_size_max = max(im.shape[0],im.shape[1])
#
# im_scale = float(32) / float(im_size_min)
# max_scale = max((int(w * im_scale), int(h * im_scale)))
# zeros = np.ones((max_scale,max_scale,3),dtype = np.uint8)
# zeros = zeros*255
# zeros[:w,:h] = im
#
# x = Image.fromarray(zeros)
# x.save("test.jpg")
# print(zeros.shape)
# res = ocr.ocr(zeros)
#
# print("Predicted Chars:", res)
\ No newline at end of file
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Sep 10 01:33:19 2020
@author: chineseocr
"""
img/gou.png

14.6 KB

[net]
# Testing
# batch=1
# subdivisions=1
# Training
batch=32
subdivisions=16
width=608
height=608
channels=3
momentum=0.9
decay=0.0005
angle=0
saturation = 1.5
exposure = 1.5
hue=.1
learning_rate=0.0001
burn_in=1000
max_batches = 50200
policy=steps
steps=40000,45000
scales=.1,.1
[convolutional]
batch_normalize=1
filters=32
size=3
stride=1
pad=1
activation=leaky
# Downsample
[convolutional]
batch_normalize=1
filters=64
size=3
stride=2
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=32
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
# Downsample
[convolutional]
batch_normalize=1
filters=128
size=3
stride=2
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
# Downsample
[convolutional]
batch_normalize=1
filters=256
size=3
stride=2
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
# Downsample
[convolutional]
batch_normalize=1
filters=512
size=3
stride=2
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
# Downsample
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=2
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
[shortcut]
from=-3
activation=linear
######################
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
[convolutional]
size=1
stride=1
pad=1
filters=21
activation=linear
[yolo]
mask = 6,7,8
anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
classes=2
num=9
jitter=.3
ignore_thresh = .5
truth_thresh = 1
random=1
[route]
layers = -4
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[upsample]
stride=2
[route]
layers = -1, 61
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=leaky
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=leaky
[convolutional]
size=1
stride=1
pad=1
filters=21
activation=linear
[yolo]
mask = 3,4,5
anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
classes=2
num=9
jitter=.3
ignore_thresh = .5
truth_thresh = 1
random=1
[route]
layers = -4
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[upsample]
stride=2
[route]
layers = -1, 36
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=leaky
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=leaky
[convolutional]
size=1
stride=1
pad=1
filters=21
activation=linear
[yolo]
mask = 0,1,2
anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
classes=2
num=9
jitter=.3
ignore_thresh = .5
truth_thresh = 1
random=1
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Wed Jan 13 17:31:37 2021
@author: lywen
"""
class tableBuid:
##表格重建
def __init__(self, ceilbox, interval=10):
"""
ceilboxes:[[x0,y0,x1,y1,x2,y2,x3,y3,x4,y4]]
"""
diagBoxes =[[int(x[0]), int(x[1]), int(x[4]), int(x[5])] for x in ceilbox]
self.diagBoxes = diagBoxes
self.interval = interval
self.batch()
def batch(self):
self.cor = []
rowcor = self.table_line_cor(self.diagBoxes, axis='row', interval=self.interval)
colcor = self.table_line_cor(self.diagBoxes, axis='col', interval=self.interval)
cor = [{'row': line[1], 'col': line[0]} for line in zip(rowcor, colcor)]
self.cor = cor
def table_line_cor(self, lines, axis='col', interval=10):
if axis == 'col':
edges = [[line[1], line[3]] for line in lines]
else:
edges = [[line[0], line[2]] for line in lines]
edges = sum(edges, [])
edges = sorted(edges)
nedges = len(edges)
edgesMap = {}
for i in range(nedges):
if i == 0:
edgesMap[edges[i]] = edges[i]
continue
else:
if edges[i] - edgesMap[edges[i - 1]] < interval:
edgesMap[edges[i]] = edgesMap[edges[i - 1]]
else:
edgesMap[edges[i]] = edges[i]
edgesMapList = [[key, edgesMap[key]] for key in edgesMap]
edgesMapIndex = [line[1] for line in edgesMapList]
edgesMapIndex = list(set(edgesMapIndex))
edgesMapIndex = {x: ind for ind, x in enumerate(sorted(edgesMapIndex))}
if axis == 'col':
cor = [[edgesMapIndex[edgesMap[line[1]]], edgesMapIndex[edgesMap[line[3]]]] for line in lines]
else:
cor = [[edgesMapIndex[edgesMap[line[0]]], edgesMapIndex[edgesMap[line[2]]]] for line in lines]
return cor
import xlwt
def to_excel(res, workbook=None):
##res:[{'text': '购 买 方', 'cx': 192.0, 'w': 58.0, 'h': 169.0, 'cy': 325.5, 'angle': 0.0, 'row': [0, 1], 'col': [0, 1]}]
row = 0
if workbook is None:
workbook = xlwt.Workbook()
if len(res) == 0:
worksheet = workbook.add_sheet('table')
worksheet.write_merge(0, 0, 0, 0, "无数据")
else:
worksheet = workbook.add_sheet('page')
pageRow = 0
for line in res:
row0, row1 = line['row']
col0, col1 = line['col']
text = line.get('text','')
try:
pageRow = max(row1 - 1, pageRow)
worksheet.write_merge(row + row0, row + row1 - 1, col0, col1 - 1, text)
except:
pass
return workbook
if __name__=='__main__':
pass
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import cv2
import numpy as np
from table_detect import table_detect
from table_line import table_line
from table_build import tableBuid,to_excel
from utils import minAreaRectbox, measure, eval_angle, draw_lines
from cnocr import CnOcr
from scipy.ndimage import filters
class table:
def __init__(self, img, tableSize=(416, 416), tableLineSize=(1024, 1024), isTableDetect=False, isToExcel=False):
self.img = img
self.tableSize = tableSize
self.tableLineSize = tableLineSize
self.isTableDetect = isTableDetect
self.isToExcel = isToExcel
self.img_degree()
self.table_boxes_detect() ##表格定位
self.table_ceil() ##表格单元格定位
self.table_build()
self.cnocr = CnOcr()
self.table_ocr()
def img_degree(self):
img, degree = eval_angle(self.img, angleRange=[-15, 15])
self.img = img
self.degree = degree
def table_boxes_detect(self):
h, w = self.img.shape[:2]
if self.isTableDetect:
boxes, adBoxes, scores = table_detect(self.img, sc=self.tableSize, thresh=0.2, NMSthresh=0.3)
if len(boxes) == 0:
boxes = [[0, 0, w, h]]
adBoxes = [[0, 0, w, h]]
scores = [0]
else:
boxes = [[0, 0, w, h]]
adBoxes = [[0, 0, w, h]]
scores = [0]
self.boxes = boxes
self.adBoxes = adBoxes
self.scores = scores
def table_ceil(self):
###表格单元格
n = len(self.adBoxes)
self.tableCeilBoxes = []
self.childImgs = []
for i in range(n):
xmin, ymin, xmax, ymax = [int(x) for x in self.adBoxes[i]]
childImg = self.img[ymin:ymax, xmin:xmax]
rowboxes, colboxes = table_line(childImg[..., ::-1], size=self.tableLineSize, hprob=0.5, vprob=0.5)
tmp = np.zeros(self.img.shape[:2], dtype='uint8')
tmp = draw_lines(tmp, rowboxes + colboxes, color=255, lineW=2)
labels = measure.label(tmp < 255, connectivity=2) # 8连通区域标记
regions = measure.regionprops(labels)
ceilboxes = minAreaRectbox(regions, False, tmp.shape[1], tmp.shape[0], True, True)
ceilboxes = np.array(ceilboxes)
ceilboxes[:, [0, 2, 4, 6]] += xmin
ceilboxes[:, [1, 3, 5, 7]] += ymin
self.tableCeilBoxes.extend(ceilboxes)
self.childImgs.append(childImg)
def table_build(self):
tablebuild = tableBuid(self.tableCeilBoxes)
cor = tablebuild.cor
for line in cor:
line['text'] = 'table-test'##ocr
# if self.isToExcel:
# workbook = to_excel(cor, workbook=None)
# else:
# workbook=None
self.res = cor
# self.workbook = workbook
def table_ocr(self):
res = []
"""use ocr and match ceil"""
i = 0
for ceil in zip(self.tableCeilBoxes,self.res):
cor = {}
i = i + 1
ceilBoxes = list(ceil[0])
# image[600:1200, 750:1500]
left,right,top,bottom = ceilBoxes[0],ceilBoxes[2],ceilBoxes[1],ceilBoxes[5]
# print(int(left),int(right),int(top),int(bottom))
tmpImg = img[int(top)+4:int(bottom)-4,int(left)+4:int(right)-4,:]
cv2.imwrite("img/{0}.png".format(str(i)),tmpImg)
# tmpImg = mx.image.imread("img/tmp.png",1)
content = self.cnocr.ocr(tmpImg)
# print(content)
s_content = ""
for x in content:
s = "".join(x[0])
s_content += "\n"
s_content += s
print(s_content)
cor['row'] = ceil[1]['row']
cor['col'] = ceil[1]['col']
cor['text'] = s_content
res.append(cor)
if self.isToExcel:
pass
# workbook = to_excel(res, workbook=None)
else:
workbook=None
# self.workbook = workbook
if __name__ == '__main__':
import argparse
import os
import time
from utils import draw_boxes
parser = argparse.ArgumentParser(description='tabel to excel demo')
parser.add_argument('--isTableDetect', default=False, type=bool, help="是否先进行表格检测")
parser.add_argument('--tableSize', default='416,416', type=str, help="表格检测输入size")
parser.add_argument('--tableLineSize', default='1024,1024', type=str, help="表格直线输入size")
parser.add_argument('--isToExcel', default=False, type=bool, help="是否输出到excel")
parser.add_argument('--jpgPath', default='img/table-detect.jpg',type=str, help="测试图像地址")
args = parser.parse_args()
args.tableSize = [int(x) for x in args.tableSize.split(',')]
args.tableLineSize = [int(x) for x in args.tableLineSize.split(',')]
print(args)
img = cv2.imread(args.jpgPath)
t = time.time()
tableDetect = table(img,tableSize=args.tableSize,
tableLineSize=args.tableLineSize,
isTableDetect=args.isTableDetect,
isToExcel=args.isToExcel
)
tableCeilBoxes = tableDetect.tableCeilBoxes
tableJson = tableDetect.res
workbook = tableDetect.workbook
img = tableDetect.img
tmp = np.zeros_like(img)
img = draw_boxes(tmp, tableDetect.tableCeilBoxes, color=(255, 255, 255))
print(time.time() - t)
pngP = os.path.splitext(args.jpgPath)[0]+'ceil.png'
cv2.imwrite(pngP, img)
if workbook is not None:
workbook.save(os.path.splitext(args.jpgPath)[0]+'.xlsx')
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import cv2
import numpy as np
from config import tableModelDetectPath
from utils import nms_box, letterbox_image, rectangle
tableDetectNet = cv2.dnn.readNetFromDarknet(tableModelDetectPath.replace('.weights', '.cfg'), tableModelDetectPath) #
def table_detect(img, sc=(416, 416), thresh=0.5, NMSthresh=0.3):
"""
表格检测
img:GBR
"""
scale = sc[0]
img_height, img_width = img.shape[:2]
inputBlob, fx, fy = letterbox_image(img[..., ::-1], (scale, scale))
inputBlob = cv2.dnn.blobFromImage(inputBlob, scalefactor=1.0, size=(scale, scale), swapRB=True, crop=False);
tableDetectNet.setInput(inputBlob / 255.0)
outputName = tableDetectNet.getUnconnectedOutLayersNames()
outputs = tableDetectNet.forward(outputName)
class_ids = []
confidences = []
boxes = []
for output in outputs:
for detection in output:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > thresh:
center_x = int(detection[0] * scale / fx)
center_y = int(detection[1] * scale / fy)
width = int(detection[2] * scale / fx)
height = int(detection[3] * scale / fy)
left = int(center_x - width / 2)
top = int(center_y - height / 2)
if class_id == 1:
class_ids.append(class_id)
confidences.append(float(confidence))
xmin, ymin, xmax, ymax = left, top, left + width, top + height
xmin = max(xmin, 1)
ymin = max(ymin, 1)
xmax = min(xmax, img_width - 1)
ymax = min(ymax, img_height - 1)
boxes.append([xmin, ymin, xmax, ymax])
boxes = np.array(boxes)
confidences = np.array(confidences)
if len(boxes) > 0:
boxes, confidences = nms_box(boxes, confidences, score_threshold=thresh, nms_threshold=NMSthresh)
boxes, adBoxes = fix_table_box_for_table_line(boxes, confidences, img)
return boxes, adBoxes, confidences
def point_in_box(p, box):
x, y = p
xmin, ymin, xmax, ymax = box
if xmin <= x <= xmin and ymin <= y <= ymax:
return True
else:
return False
def fix_table_box_for_table_line(boxes, confidences, img):
### 修正表格用于表格线检测
h, w = img.shape[:2]
n = len(boxes)
adBoxes = []
for i in range(n):
prob = confidences[i]
xmin, ymin, xmax, ymax = boxes[i]
padx = (xmax - xmin) * (1 - prob)
padx = padx
pady = (ymax - ymin) * (1 - prob)
pady = pady
xminNew = max(xmin - padx, 1)
yminNew = max(ymin - pady, 1)
xmaxNew = min(xmax + padx, w)
ymaxNew = min(ymax + pady, h)
adBoxes.append([xminNew, yminNew, xmaxNew, ymaxNew])
return boxes, adBoxes
if __name__ == '__main__':
import time
import argparse
parser = argparse.ArgumentParser(description='tabel to excel demo')
parser.add_argument('--tableSize', default='416,416', type=str, help="表格检测输入size")
parser.add_argument('--jpgPath', default='img/table-detect.jpg', type=str, help="测试图像地址")
args = parser.parse_args()
args.tableSize = [int(x) for x in args.tableSize.split(',')]
p = 'img/table-detect.jpg'
img = cv2.imread(args.jpgPath)
t = time.time()
boxes, adBoxes, scores = table_detect(img, sc=(416, 416), thresh=0.5, NMSthresh=0.3)
print(time.time() - t, boxes, adBoxes, scores)
img = rectangle(img, adBoxes)
img.save('img/table-detect.png')
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from tensorflow.keras.layers import Input, concatenate, Conv2D, MaxPooling2D, BatchNormalization, UpSampling2D
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.models import Model
def table_net(input_shape=(512, 512, 3), num_classes=1):
inputs = Input(shape=input_shape)
# 512
use_bias = False
down0a = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(inputs)
down0a = BatchNormalization()(down0a)
down0a = LeakyReLU(alpha=0.1)(down0a)
down0a = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(down0a)
down0a = BatchNormalization()(down0a)
down0a = LeakyReLU(alpha=0.1)(down0a)
down0a_pool = MaxPooling2D((2, 2), strides=(2, 2))(down0a)
# 256
down0 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down0a_pool)
down0 = BatchNormalization()(down0)
down0 = LeakyReLU(alpha=0.1)(down0)
down0 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(down0)
down0 = BatchNormalization()(down0)
down0 = LeakyReLU(alpha=0.1)(down0)
down0_pool = MaxPooling2D((2, 2), strides=(2, 2))(down0)
# 128
down1 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down0_pool)
down1 = BatchNormalization()(down1)
down1 = LeakyReLU(alpha=0.1)(down1)
down1 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(down1)
down1 = BatchNormalization()(down1)
down1 = LeakyReLU(alpha=0.1)(down1)
down1_pool = MaxPooling2D((2, 2), strides=(2, 2))(down1)
# 64
down2 = Conv2D(128, (3, 3), padding='same', use_bias=use_bias)(down1_pool)
down2 = BatchNormalization()(down2)
down2 = LeakyReLU(alpha=0.1)(down2)
down2 = Conv2D(128, (3, 3), padding='same', use_bias=use_bias)(down2)
down2 = BatchNormalization()(down2)
down2 = LeakyReLU(alpha=0.1)(down2)
down2_pool = MaxPooling2D((2, 2), strides=(2, 2))(down2)
# 32
down3 = Conv2D(256, (3, 3), padding='same', use_bias=use_bias)(down2_pool)
down3 = BatchNormalization()(down3)
down3 = LeakyReLU(alpha=0.1)(down3)
down3 = Conv2D(256, (3, 3), padding='same', use_bias=use_bias)(down3)
down3 = BatchNormalization()(down3)
down3 = LeakyReLU(alpha=0.1)(down3)
down3_pool = MaxPooling2D((2, 2), strides=(2, 2))(down3)
# 16
down4 = Conv2D(512, (3, 3), padding='same', use_bias=use_bias)(down3_pool)
down4 = BatchNormalization()(down4)
down4 = LeakyReLU(alpha=0.1)(down4)
down4 = Conv2D(512, (3, 3), padding='same', use_bias=use_bias)(down4)
down4 = BatchNormalization()(down4)
down4 = LeakyReLU(alpha=0.1)(down4)
down4_pool = MaxPooling2D((2, 2), strides=(2, 2))(down4)
# 8
center = Conv2D(1024, (3, 3), padding='same', use_bias=use_bias)(down4_pool)
center = BatchNormalization()(center)
center = LeakyReLU(alpha=0.1)(center)
center = Conv2D(1024, (3, 3), padding='same', use_bias=use_bias)(center)
center = BatchNormalization()(center)
center = LeakyReLU(alpha=0.1)(center)
# center
up4 = UpSampling2D((2, 2))(center)
up4 = concatenate([down4, up4], axis=3)
up4 = Conv2D(512, (3, 3), padding='same', use_bias=use_bias)(up4)
up4 = BatchNormalization()(up4)
up4 = LeakyReLU(alpha=0.1)(up4)
up4 = Conv2D(512, (3, 3), padding='same', use_bias=use_bias)(up4)
up4 = BatchNormalization()(up4)
up4 = LeakyReLU(alpha=0.1)(up4)
up4 = Conv2D(512, (3, 3), padding='same', use_bias=use_bias)(up4)
up4 = BatchNormalization()(up4)
up4 = LeakyReLU(alpha=0.1)(up4)
# 16
up3 = UpSampling2D((2, 2))(up4)
up3 = concatenate([down3, up3], axis=3)
up3 = Conv2D(256, (3, 3), padding='same', use_bias=use_bias)(up3)
up3 = BatchNormalization()(up3)
up3 = LeakyReLU(alpha=0.1)(up3)
up3 = Conv2D(256, (3, 3), padding='same', use_bias=use_bias)(up3)
up3 = BatchNormalization()(up3)
up3 = LeakyReLU(alpha=0.1)(up3)
up3 = Conv2D(256, (3, 3), padding='same', use_bias=use_bias)(up3)
up3 = BatchNormalization()(up3)
up3 = LeakyReLU(alpha=0.1)(up3)
# 32
up2 = UpSampling2D((2, 2))(up3)
up2 = concatenate([down2, up2], axis=3)
up2 = Conv2D(128, (3, 3), padding='same', use_bias=use_bias)(up2)
up2 = BatchNormalization()(up2)
up2 = LeakyReLU(alpha=0.1)(up2)
up2 = Conv2D(128, (3, 3), padding='same', use_bias=use_bias)(up2)
up2 = BatchNormalization()(up2)
up2 = LeakyReLU(alpha=0.1)(up2)
up2 = Conv2D(128, (3, 3), padding='same', use_bias=use_bias)(up2)
up2 = BatchNormalization()(up2)
up2 = LeakyReLU(alpha=0.1)(up2)
# 64
up1 = UpSampling2D((2, 2))(up2)
up1 = concatenate([down1, up1], axis=3)
up1 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(up1)
up1 = BatchNormalization()(up1)
up1 = LeakyReLU(alpha=0.1)(up1)
up1 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(up1)
up1 = BatchNormalization()(up1)
up1 = LeakyReLU(alpha=0.1)(up1)
up1 = Conv2D(64, (3, 3), padding='same', use_bias=use_bias)(up1)
up1 = BatchNormalization()(up1)
up1 = LeakyReLU(alpha=0.1)(up1)
# 128
up0 = UpSampling2D((2, 2))(up1)
up0 = concatenate([down0, up0], axis=3)
up0 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(up0)
up0 = BatchNormalization()(up0)
up0 = LeakyReLU(alpha=0.1)(up0)
up0 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(up0)
up0 = BatchNormalization()(up0)
up0 = LeakyReLU(alpha=0.1)(up0)
up0 = Conv2D(32, (3, 3), padding='same', use_bias=use_bias)(up0)
up0 = BatchNormalization()(up0)
up0 = LeakyReLU(alpha=0.1)(up0)
# 256
up0a = UpSampling2D((2, 2))(up0)
up0a = concatenate([down0a, up0a], axis=3)
up0a = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(up0a)
up0a = BatchNormalization()(up0a)
up0a = LeakyReLU(alpha=0.1)(up0a)
up0a = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(up0a)
up0a = BatchNormalization()(up0a)
up0a = LeakyReLU(alpha=0.1)(up0a)
up0a = Conv2D(16, (3, 3), padding='same', use_bias=use_bias)(up0a)
up0a = BatchNormalization()(up0a)
up0a = LeakyReLU(alpha=0.1)(up0a)
# 512
classify = Conv2D(num_classes, (1, 1), activation='sigmoid')(up0a)
print(classify.shape)
print(inputs.shape)
model = Model(inputs=inputs, outputs=classify)
return model
from config import tableModeLinePath
from utils import letterbox_image, get_table_line, adjust_lines, line_to_line
import numpy as np
import cv2
model = table_net((None, None, 3), 2)
model.load_weights(tableModeLinePath)
def table_line(img, size=(512, 512), hprob=0.5, vprob=0.5, row=50, col=30, alph=15):
sizew, sizeh = size
inputBlob, fx, fy = letterbox_image(img[..., ::-1], (sizew, sizeh))
pred = model.predict(np.array([np.array(inputBlob) / 255.0]))
pred = pred[0]
# shape = [1024,1024,2]
vpred = pred[..., 1] > vprob ##竖线
hpred = pred[..., 0] > hprob ##横线
vpred = vpred.astype(int)
hpred = hpred.astype(int)
colboxes = get_table_line(vpred, axis=1, lineW=col)
rowboxes = get_table_line(hpred, axis=0, lineW=row)
ccolbox = []
crowlbox = []
if len(rowboxes) > 0:
rowboxes = np.array(rowboxes)
rowboxes[:, [0, 2]] = rowboxes[:, [0, 2]] / fx
rowboxes[:, [1, 3]] = rowboxes[:, [1, 3]] / fy
xmin = rowboxes[:, [0, 2]].min()
xmax = rowboxes[:, [0, 2]].max()
ymin = rowboxes[:, [1, 3]].min()
ymax = rowboxes[:, [1, 3]].max()
ccolbox = [[xmin, ymin, xmin, ymax], [xmax, ymin, xmax, ymax]]
rowboxes = rowboxes.tolist()
if len(colboxes) > 0:
colboxes = np.array(colboxes)
colboxes[:, [0, 2]] = colboxes[:, [0, 2]] / fx
colboxes[:, [1, 3]] = colboxes[:, [1, 3]] / fy
xmin = colboxes[:, [0, 2]].min()
xmax = colboxes[:, [0, 2]].max()
ymin = colboxes[:, [1, 3]].min()
ymax = colboxes[:, [1, 3]].max()
colboxes = colboxes.tolist()
crowlbox = [[xmin, ymin, xmax, ymin], [xmin, ymax, xmax, ymax]]
rowboxes += crowlbox
colboxes += ccolbox
rboxes_row_, rboxes_col_ = adjust_lines(rowboxes, colboxes, alph=alph)
rowboxes += rboxes_row_
colboxes += rboxes_col_
nrow = len(rowboxes)
ncol = len(colboxes)
for i in range(nrow):
for j in range(ncol):
rowboxes[i] = line_to_line(rowboxes[i], colboxes[j], 10)
colboxes[j] = line_to_line(colboxes[j], rowboxes[i], 10)
return rowboxes, colboxes
if __name__ == '__main__':
import time
p = 'img/table-detect.jpg'
from utils import draw_lines
img = cv2.imread(p)
t = time.time()
rowboxes, colboxes = table_line(img[..., ::-1], size=(512, 512), hprob=0.5, vprob=0.5)
img = draw_lines(img, rowboxes + colboxes, color=(255, 0, 0), lineW=2)
print(time.time() - t, len(rowboxes), len(colboxes))
cv2.imwrite('img/table-line.png', img)
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Sep 10 02:52:45 2020
@author: chineseocr
"""
import sys
sys.path.append('.')
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Sep 9 23:11:51 2020
@author: chineseocr
"""
import sys
sys.path.append('.')
from table_line import model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from sklearn.model_selection import train_test_split
from glob import glob
from image import gen
if __name__ == '__main__':
filepath = './models/table-line-fine.h5' ##模型权重存放位置
checkpointer = ModelCheckpoint(filepath=filepath, monitor='loss', verbose=0, save_weights_only=True,
save_best_only=True)
rlu = ReduceLROnPlateau(monitor='loss', factor=0.1, patience=5, verbose=0, mode='auto', cooldown=0, min_lr=0)
model.compile(optimizer=Adam(lr=0.0001), loss='binary_crossentropy', metrics=['acc'])
paths = glob('./train/dataset-line/*/*.json') ##table line dataset label with labelme
trainP, testP = train_test_split(paths, test_size=0.1)
print('total:', len(paths), 'train:', len(trainP), 'test:', len(testP))
batchsize = 4
trainloader = gen(trainP, batchsize=batchsize, linetype=1)
testloader = gen(testP, batchsize=batchsize, linetype=1)
model.fit_generator(trainloader,
steps_per_epoch=max(1, len(trainP) // batchsize),
callbacks=[checkpointer],
validation_data=testloader,
validation_steps=max(1, len(testP) // batchsize),
epochs=30)
This diff is collapsed. Click to expand it.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment