I tried using Tesseract OCR to recognize text in images and output the results, but it seems to only roughly identify the text positions and cannot accurately recognize each character. This is the same for recognizing Chinese characters. I tried using Easy OCR or CnOCR, but they don’t seem to run on TouchDesigner. Is there any way to make Tesseract OCR more accurate, capable of accurately recognizing each character, or even better, recognizing Chinese characters?
Here is my script code
import cv2
import pytesseract
import numpy as np
from PIL import Image
# Set the Tesseract command path
pytesseract.pytesseract.tesseract_cmd = '/usr/local/bin/tesseract'
# Retrieve image data from TouchDesigner and convert it to OpenCV format
def get_image_from_top(top):
# Get the numpy array from the TOP
img_array = top.numpyArray()
# Convert it to OpenCV format (BGR)
img = cv2.cvtColor(img_array, cv2.COLOR_RGBA2BGR)
return img
# Preprocess the image
def preprocess_image(img):
# Convert the image to grayscale
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Convert the image to an 8-bit unsigned integer type
gray = np.uint8(gray * 255)
# Apply histogram equalization to the image
gray = cv2.equalizeHist(gray)
# Apply binarization to the image
_, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
# Perform erosion and dilation to remove noise
kernel = np.ones((2,2), np.uint8)
binary = cv2.erode(binary, kernel, iterations=1)
binary = cv2.dilate(binary, kernel, iterations=1)
return binary
# Perform OCR
def perform_ocr(img):
# Perform OCR using Tesseract, specify the language as English, and adjust the PSM and OEM parameters
custom_config = r'--oem 3 --psm 6'
txt = pytesseract.image_to_string(img, lang='eng', config=custom_config)
return txt
# Get the TOP node
top = op('null1') # Replace with TOP node path
# Get the image from the TOP
image = get_image_from_top(top)
# Preprocess the image
preprocessed_image = preprocess_image(image)
# Perform OCR
text = perform_ocr(preprocessed_image)
# Print the recognized text
print("Recognized text:", text)
# Display the result in a Text DAT
op('text3').text = text