Spring Boot 集成 Tess4J 实现图片OCR文字识别-北京尧图网络科技有限公司

目录一、环境准备二、Maven项目依赖引入pom.xml三、配置Tess4J1.配置文件application.yml2.Tesseract配置类TesseractConfig.java四、核心代码1.服务层(OcrService.java)2.接口层OcrController五、扩展识别精度优化1. 基础预处理类2. 完整OCR处理流程一、环境准备1.安装Tesseract OCR 引擎Windows: 下载并安装 Tesseract 安装包。安装后需要将 Tesseract 的安装路径添加到系统环境变量中。Linux: 使用包管理器安装例如sudo apt-get install tesseract-ocr。macOS: 使用 Homebrew 安装命令为 brew install tesseract。2.下载语言数据包(tessdata)Tesseract 需要语言数据包才能识别特定语言。你需要下载chi_sim.traineddata(简体中文)和eng.traineddata(英文) 等文件。重要路径中不要包含中文或空格。将这些文件放在一个固定的目录中例如D:/tessdata (Windows) 或 /usr/local/share/tessdata(Linux/macOS)。二、Maven项目依赖引入pom.xml在你的pom.xml文件中添加 Tess4J 的依赖。如果你使用的是 Spring Boot 3 (JDK 17)建议使用 4.5.6 或更高版本以获得更好的兼容性。dependencies!--Tess4J 依赖--dependency groupIdnet.sourceforge.tess4j/groupId artifactIdtess4j/artifactId version5.3.0/version !-- 推荐使用较新版本 -- /dependency/dependencies三、配置Tess4J创建一个配置类将Tesseract 实例定义为 Spring Bean方便在项目中注入使用。通过配置文件来管理数据路径和识别语言使配置更灵活。1.配置文件application.ymltess4j:data-path:D:/tessdata# 替换为你存放语言数据包的绝对路径language:chi_simeng# 设置识别语言chi_sim为中文eng为英文2.Tesseract配置类TesseractConfig.javapackagecom.example.ocr.config;importnet.sourceforge.tess4j.Tesseract;importorg.springframework.beans.factory.annotation.Value;importorg.springframework.context.annotation.Bean;importorg.springframework.context.annotation.Configuration;ConfigurationpublicclassTesseractConfig{Value(${tess4j.data-path})privateStringdataPath;Value(${tess4j.language})privateStringlanguage;BeanpublicTesseracttesseract(){TesseracttesseractnewTesseract();tesseract.setDatapath(dataPath);// 设置tessdata目录路径tesseract.setLanguage(language);// 设置识别语言returntesseract;}}四、核心代码接下来创建Service和Controller来提供图片识别功能。1.服务层(OcrService.java)该服务负责接收图片文件并将其转换为BufferedImage后交给Tesseract处理。packagecom.example.ocr.service;importnet.sourceforge.tess4j.Tesseract;importnet.sourceforge.tess4j.TesseractException;importorg.springframework.stereotype.Service;importorg.springframework.web.multipart.MultipartFile;importjavax.imageio.ImageIO;importjava.awt.image.BufferedImage;importjava.io.ByteArrayInputStream;importjava.io.IOException;ServicepublicclassOcrService{privatefinalTesseracttesseract;publicOcrService(Tesseracttesseract){this.tesseracttesseract;}/** * 识别图片中的文字 * param imageFile 上传的图片文件 * return 识别出的文本 */publicStringrecognizeText(MultipartFileimageFile)throwsIOException,TesseractException{// 将 MultipartFile 转换为 BufferedImageBufferedImageimageImageIO.read(newByteArrayInputStream(imageFile.getBytes()));if(imagenull){thrownewIOException(无法读取图片请检查图片格式是否支持 (如PNG, JPG));}// 调用 Tesseract 进行识别returntesseract.doOCR(image);}}2.接口层OcrController提供一个 REST 接口用于接收前端上传的图片并返回识别结果。packagecom.example.ocr.controller;importcom.example.ocr.service.OcrService;importorg.springframework.web.bind.annotation.*;importorg.springframework.web.multipart.MultipartFile;RestControllerRequestMapping(/ocr)publicclassOcrController{privatefinalOcrServiceocrService;publicOcrController(OcrServiceocrService){this.ocrServiceocrService;}PostMapping(/recognize)publicStringrecognizeImage(RequestParam(file)MultipartFilefile)throwsException{returnocrService.recognizeText(file);}}五、扩展识别精度优化Tesseract 的识别效果与图片质量密切相关。通过预处理图片可以显著提升识别准确率。图像预处理: 在调用doOCR之前可以对BufferedImage进行以下处理灰度化: 将彩色图片转换为灰度图减少颜色干扰。二值化: 将灰度图转换为只有黑白两色的图像能有效去除背景噪音。降噪: 使用算法去除图片上的噪点。调整DPI: 对于文档扫描件300 DPI 是理想的识别分辨率。设置PSM (Page Segmentation Mode): 根据图片内容类型调整 PSM 模式。例如对于单行文本如验证码可以设置为 SINGLE_LINE对于普通文档使用默认的 AUTO 即可。1. 基础预处理类importjava.awt.Color;importjava.awt.image.BufferedImage;importjava.io.File;importjava.io.IOException;importjavax.imageio.ImageIO;publicclassImagePreprocessor{/** * 灰度化处理 */publicstaticBufferedImagegrayScale(BufferedImageimage){intwidthimage.getWidth();intheightimage.getHeight();BufferedImagegrayImagenewBufferedImage(width,height,BufferedImage.TYPE_BYTE_GRAY);for(inty0;yheight;y){for(intx0;xwidth;x){intrgbimage.getRGB(x,y);intr(rgb16)0xff;intg(rgb8)0xff;intbrgb0xff;// 使用加权平均法人眼对绿色最敏感intgray(int)(0.299*r0.587*g0.114*b);intnewPixel(gray16)|(gray8)|gray;grayImage.setRGB(x,y,newPixel);}}returngrayImage;}/** * 二值化处理Otsu算法 */publicstaticBufferedImagebinary(BufferedImageimage){BufferedImagegrayImagegrayScale(image);intwidthgrayImage.getWidth();intheightgrayImage.getHeight();BufferedImagebinaryImagenewBufferedImage(width,height,BufferedImage.TYPE_BYTE_BINARY);// 计算灰度直方图int[]histogramnewint[256];for(inty0;yheight;y){for(intx0;xwidth;x){intgraygrayImage.getRGB(x,y)0xff;histogram[gray];}}// Otsu算法计算最佳阈值floatsum0;for(inti0;i256;i){sumi*histogram[i];}floatsumB0;intwB0;intwF0;floatvarMax0;intthreshold0;inttotalwidth*height;for(intt0;t256;t){wBhistogram[t];if(wB0)continue;wFtotal-wB;if(wF0)break;sumBt*histogram[t];floatmBsumB/wB;floatmF(sum-sumB)/wF;floatvarBetween(float)wB*(float)wF*(mB-mF)*(mB-mF);if(varBetweenvarMax){varMaxvarBetween;thresholdt;}}// 应用阈值for(inty0;yheight;y){for(intx0;xwidth;x){intgraygrayImage.getRGB(x,y)0xff;intnewPixel(graythreshold)?Color.WHITE.getRGB():Color.BLACK.getRGB();binaryImage.setRGB(x,y,newPixel);}}returnbinaryImage;}/** * 降噪处理领域检测法 */publicstaticBufferedImagedenoise(BufferedImageimage){intwidthimage.getWidth();intheightimage.getHeight();BufferedImagedenoisedImagenewBufferedImage(width,height,image.getType());// 边界处理for(intx0;xwidth;x){for(inty0;yheight;y){denoisedImage.setRGB(x,y,image.getRGB(x,y));}}// 降噪核心算法for(inty1;yheight-1;y){for(intx1;xwidth-1;x){if(isBlack(image.getRGB(x,y))){intblackCount0;// 检查周围8个像素if(isBlack(image.getRGB(x-1,y-1)))blackCount;if(isBlack(image.getRGB(x,y-1)))blackCount;if(isBlack(image.getRGB(x1,y-1)))blackCount;if(isBlack(image.getRGB(x-1,y)))blackCount;if(isBlack(image.getRGB(x1,y)))blackCount;if(isBlack(image.getRGB(x-1,y1)))blackCount;if(isBlack(image.getRGB(x,y1)))blackCount;if(isBlack(image.getRGB(x1,y1)))blackCount;// 如果周围黑色像素少于5个则认为是噪点if(blackCount5){denoisedImage.setRGB(x,y,Color.WHITE.getRGB());}else{denoisedImage.setRGB(x,y,image.getRGB(x,y));}}else{denoisedImage.setRGB(x,y,image.getRGB(x,y));}}}returndenoisedImage;}/** * 调整DPI通过缩放实现 */publicstaticBufferedImageadjustDPI(BufferedImageimage,inttargetDPI){intwidthimage.getWidth();intheightimage.getHeight();intcurrentDPI72;// 默认DPI// 计算缩放比例doublescale(double)targetDPI/currentDPI;intnewWidth(int)(width*scale);intnewHeight(int)(height*scale);BufferedImagescaledImagenewBufferedImage(newWidth,newHeight,image.getType());for(inty0;ynewHeight;y){for(intx0;xnewWidth;x){intsrcX(int)(x/scale);intsrcY(int)(y/scale);if(srcXwidthsrcYheight){scaledImage.setRGB(x,y,image.getRGB(srcX,srcY));}}}returnscaledImage;}/** * 设置PSM模式 */publicstaticvoidsetPSM(ITesseracttesseract,Stringmode){tesseract.setTessVariable(tessedit_pageseg_mode,mode);}// 辅助方法privatestaticbooleanisBlack(intcolor){ColorcnewColor(color);return(c.getRed()c.getGreen()c.getBlue())300;}privatestaticbooleanisWhite(intcolor){ColorcnewColor(color);return(c.getRed()c.getGreen()c.getBlue())300;}}2. 完整OCR处理流程importnet.sourceforge.tess4j.*;importjavax.imageio.ImageIO;importjava.awt.image.BufferedImage;importjava.io.File;publicclassOCRProcessor{publicstaticStringrecognizeText(FileimageFile){try{// 读取图像BufferedImageimageImageIO.read(imageFile);// 1. 灰度化BufferedImagegrayImageImagePreprocessor.grayScale(image);// 2. 二值化BufferedImagebinaryImageImagePreprocessor.binary(grayImage);// 3. 降噪BufferedImagedenoisedImageImagePreprocessor.denoise(binaryImage);// 4. 调整DPI到300BufferedImagehighDPIImageImagePreprocessor.adjustDPI(denoisedImage,300);// 5. 初始化TesseractITesseracttesseractnewTesseract();tesseract.setDatapath(C:/Program Files/Tesseract-OCR/tessdata);tesseract.setLanguage(eng);// 或 chi_sim 用于中文// 6. 设置PSM模式// PSM 6: 假设单行文本// PSM 7: 假设文本在单个 uniform 块中// PSM 3: 完全自动页面分割但适合具有明显文本区域的页面ImagePreprocessor.setPSM(tesseract,6);// 7. 执行OCRStringresulttesseract.doOCR(highDPIImage);returnresult.trim();}catch(Exceptione){e.printStackTrace();return识别失败: e.getMessage();}}publicstaticvoidmain(String[]args){StringimagePathyour_image.jpg;StringresultrecognizeText(newFile(imagePath));System.out.println(识别结果: result);}}

Spring Boot 集成 Tess4J 实现图片OCR文字识别

相关资讯