// -*- C++ -*- // Copyright 2006-2007 Deutsches Forschungszentrum fuer Kuenstliche Intelligenz // or its licensors, as applicable. // // You may not use this file except under the terms of the accompanying license. // // Licensed under the Apache License, Version 2.0 (the "License"); you // may not use this file except in compliance with the License. You may // obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // // Project: roughocr -- mock OCR system exercising the interfaces and useful for testing // File: ocr-rough.cc // Purpose: perform rough OCR (really just a mockup) // Responsible: tmb // Reviewer: // Primary Repository: // Web Sites: www.iupr.org, www.dfki.de #include "colib.h" #include "imgio.h" #include "imglib.h" #include "ocrcomponents.h" using namespace ocropus; using namespace colib; param_string debug_lines("debug_lines",0,"output each line, as found by the segmenter"); param_string debug_segm("debug_segm",0,"output segmentation file"); void extract_lines(intarray &pixels,narray &lines,intarray &image,narray &rboxes) { int minwidth = 10, minheight = 10; int padding = 3; inthash bboxes; for(int i=0;i &components,intarray &image) { narray bboxes; bounding_boxes(bboxes,image); for(int i=1;i files; files.reserve(1000); if(argc>1) { for(int i=1;i\n"); printf("\n"); printf(" \n"); printf(" \n"); printf("\n"); printf("\n"); try { autodel binarizer(make_BinarizeByRange()); autodel pageseg(make_SegmentPageBy1CP()); autodel lineseg(make_SegmentLineByProjection()); autodel lineocr(make_LineOCRTrivial()); autodel langmod(make_LanguageModelTrivial()); langmod->start_context(); for(int pageno=0;pageno\n",+files[pageno]); bytearray image; read_png(image,stdio(argv[1],"r"),true); bytearray bimage; floatarray temp; copy(temp,image); binarizer->binarize(bimage,temp); autoinvert(bimage); temp.dealloc(); intarray page; pageseg->segment(page,bimage); if(debug_segm) { intarray temp; copy(temp,page); simple_recolor(temp); write_png_rgb(stdio(debug_segm,"w"),temp); } intarray rgb; narray lines; narray bboxes; extract_lines(rgb,lines,page,bboxes); objlist components; for(int line=0;linecharseg(segline,lines[line]); if(debug_lines) { char buf[1000]; intarray temp; copy(temp,segline); simple_recolor(temp); sprintf(buf,debug_lines,line); write_png_rgb(stdio(buf,"w"),temp); } idmap im; lineocr->recognize_binary(*langmod,im,segline); langmod->compute(1); if(langmod->nresults()<1) { printf(" \n"); continue; } intarray ocr; floatarray costs; intarray ids; intarray states; langmod->nbest(ocr,costs,ids,states,0); char bbox[1000]; rectangle r = bboxes[line]; sprintf(bbox,"%d %d %d %d",r.x0,bimage.dim(1)-r.y1-1,r.x1,bimage.dim(1)-r.y0-1); char buf[1000]; for(int i=0;i%s\n",bbox,buf); } printf("\n"); } printf("\n"); printf("\n"); } catch(const char *oops) { fprintf(stderr,"oops: %s\n",oops); exit(1); } }