本仓库是 eSearch的 OCR 服务依赖
支持本地 OCR(基于 PaddleOCR)
基于onnxruntime的 web runtime,使用 wasm 运行,未来可能使用 webgl 甚至是 webgpu。
模型需要转换为 onnx 才能使用:Paddle2ONNX 或在线转换
在 js 文件下使用 electron 进行调试(主要是 require 几个模块和 fs 读取字典,若想纯网页实现,可以自行修改)
npm i esearch-ocr
web
import * as ocr from "esearch-ocr";
nodejs
const ocr = require("esearch-ocr");
await lo.init({
detPath: "ocr/det.onnx",
recPath: "ocr/rec.onnx",
dic: "",
});
let img = document.createElement("img");
img.src = "data:image/png;base64,...";
img.onload = async () => {
let canvas = document.createElement("canvas");
canvas.width = img.width;
canvas.height = img.height;
canvas.getContext("2d").drawImage(img, 0, 0);
ocr.ocr(canvas.getContext("2d").getImageData(0, 0, img.width, img.height))
.then((l) => {})
.catch((e) => {});
};
init type
{
detPath: string;
recPath: string;
dic: string; // raw !string[] && !filePath
node?: boolean;
dev?: boolean;
maxSide?: number;
imgh?: number;
imgw?: number;
ort?: typeof import("onnxruntime-web");
detShape?: [number, number];
}
ocr type
type PointType = [number, number]
ocr(img: ImageData): Promise<{
text: string;
mean: number;
box: [PointType, PointType, PointType, PointType]; // lt rt rb lb
}[]>