我有一个多页的
.tif
我需要从中提取文本。我想申请
Gaussian blur
提高质量,然后应用
Tesseract OCR
提取文本。在应用高斯模糊时,我的误差小于
代码
from PIL import Image, ImageSequence
from tesserocr import PyTessBaseAPI
import numpy as np
import pycountry
import cv2
with PyTessBaseAPI() as api:
img = Image.open('sample.tif')
for i, page in enumerate(ImageSequence.Iterator(img)):
page2 = np.asarray(page)
# Gaussian Blur
imgG = cv2.GaussianBlur(page2, (5,5), 0) # <---- ERROR
# Tesseract OCR
api.SetImage(imgG)
text = api.GetUTF8Text()