// -*- C++ -*- // Copyright 2006-2008 Deutsches Forschungszentrum fuer Kuenstliche Intelligenz // or its licensors, as applicable. // // You may not use this file except under the terms of the accompanying license. // // Licensed under the Apache License, Version 2.0 (the "License"); you // may not use this file except in compliance with the License. You may // obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // // Project: OCRopus // File: ocr-layout-rast.cc // Purpose: perform layout analysis by RAST // Responsible: Faisal Shafait (faisal.shafait@dfki.de) // Reviewer: // Primary Repository: // Web Sites: www.iupr.org, www.dfki.de #include #include "ocr-layout-rast.h" using namespace iulib; using namespace colib; namespace ocropus { const int LEFT_ALIGNED = 1; const int RIGHT_ALIGNED = 2; const int CENTER_ALIGNED = 3; const int JUSTIFIED = 4; const int NOT_ALIGNED = 0; param_string debug_segm("debug_segm",0,"output segmentation file"); param_int debug_layout("debug_layout",0,"print the intermediate results to stdout"); SegmentPageByRAST::SegmentPageByRAST(){ column_threshold = 0.6; id = 0; max_results = 1000; } line::line(TextLine &tl): c(tl.c), m(tl.m), d(tl.d), start(tl.bbox.x0), end(tl.bbox.x1), top(tl.bbox.y0), bottom(tl.bbox.y1), istart(tl.bbox.x0), iend(tl.bbox.x1), xheight(tl.xheight){ } TextLine line::getTextLine(){ TextLine tl; tl.c = c; tl.m = m; tl.d = d; tl.xheight = (int)xheight; //rectangle r((int)start, (int)top, (int)end, (int)bottom); rectangle r((int)istart, (int)top, (int)iend, (int)bottom); tl.bbox = r; return tl; } //Assuming horizontal lines with slope in the interval [-0.05, 0.05] static int wbox_intersection(line l, rectangle wbox){ float y = l.m * wbox.xcenter() + l.c; return ( (y > wbox.y0) && (y < wbox.y1) ); } // FIXME comment --tmb static void extend_lines(narray &lines, rectarray &wboxes, int image_width){ int num_lines = lines.length(); int num_wboxes = wboxes.length(); // FIXME: extend to image start/width if not intersecting with column separator for(int i = 0; i wboxes[j].x1) ? new_start : wboxes[j].x1; else new_end = (new_end < wboxes[j].x0) ? new_end : wboxes[j].x0; } } lines[i].start = (lines[i].start > new_start) ? new_start : lines[i].start; lines[i].end = (lines[i].end < new_end ) ? new_end : lines[i].end ; //printf("%.0f %.0f %.0f %.0f \n",lines[i].start,lines[i].top,lines[i].end,lines[i].bottom); } } // FIXME put the graphics stuff into a separate source file --tmb static void paint_line(intarray &image, line l){ int width=image.dim(0); int height=image.dim(1); float y; float slope = l.m; float y_intercept = l.c; float descender = l.d; int start = (l.start>0) ? (int) l.start : 0; int end = (l.end = 0) && (yl < height) ) image(x,yl) &= 0xff0000ff; if( (yh >= 0) && (yh < height) ) image(x,yh) &= 0xff0000ff; if(!baseline_only){ if( (dl >= 0) && (dl < height) ) image(x,dl) &= 0xff00ffff; if( (dh >= 0) && (dh < height) ) image(x,dh) &= 0xff00ffff; } } } // FIXME explain what this does --tmb static void connect_line_centers(intarray &image, line a, line b){ int width=image.dim(0); float x1 = (a.start + a.end)/2.0; float x2 = (b.start + b.end)/2.0; float y1 = a.m * x1 + a.c ; float y2 = b.m * x2 + b.c ; if (y2==y1) return; float slope_inverse = ((y2 - y1) != 0)? (x2 - x1)/(y2 - y1) : HUGE_VAL; //if (x1 > x2) swap(x1, x2); //if (y1 > y2) swap(y1, y2); int linewidth = 1; //actual line width = 2*linewidth +1 int thickness = 2 * linewidth + 1; int yoffset = 10; //Height of arrow head float x,y; if(y1 < y2){ for (y = y1; y<= y2; y++){ x = slope_inverse * (y - y1) + x1; x = (x < 2*linewidth) ? 2*linewidth : x; x = (x >= width-2*linewidth) ? width-2*linewidth-1 : x; for(int i = 0; i < thickness; i++) image((int)x-linewidth+i ,(int)y) &= 0xffff00ff; if (y >= y2 - yoffset){ for(int i = 0; i < 2*thickness; i++) image((int)x-2*linewidth+i ,(int)y) &= 0xffff00ff; } } }else{ for (y = y1; y>= y2; y--){ x = slope_inverse * (y - y1) + x1; x = (x < 2*linewidth) ? 2*linewidth : x; x = (x >= width-2*linewidth) ? width-2*linewidth-1 : x; for(int i = 0; i < thickness; i++) image((int)x-linewidth+i ,(int)y) &= 0xffff00ff; if (y <= y2 + yoffset){ for(int i = 0; i < 4*thickness; i++) image((int)x-4*linewidth+i ,(int)y) &= 0xffff0000; } } } } static void paint_reading_order(intarray &image, narray &lines_ordered){ int size = lines_ordered.length(); for(int i=0; i charstats(make_CharStats()); charstats->getCharBoxes(bboxes); charstats->calcCharStats(); if(debug_layout>=2){ charstats->print(); } //fprintf(stderr,"Time elapsed (charstats): %.3f \n",(clock()/float(CLOCKS_PER_SEC)) - startTime); // Compute Whitespace Cover autodel whitespaces(make_WhitespaceCover(0,0,in.dim(0),in.dim(1))); rectarray whitespaceboxes; whitespaces->compute(whitespaceboxes,charstats->char_boxes); //fprintf(stderr,"Time elapsed (whitespaces): %.3f \n",(clock()/float(CLOCKS_PER_SEC)) - startTime); // Find column separators autodel gutters(make_ColSeparators()); rectarray columns,colcandidates; // FIXME // why does findGutters return a list of "colcandidates"? // are colcandidates gutters? or what? // --tmb gutters->findGutters(colcandidates,whitespaceboxes,*charstats); gutters->filterOverlaps(columns,colcandidates); // it appears that this isn't a list of columns, but a list of // obstacles, so add the user-supplied obstacles as well for(int i=0;i ctextline(make_CTextlineRAST()); narray textlines; ctextline->min_q = 2.0; // Minimum acceptable quality of a textline ctextline->min_count = 2; // ---- number of characters in a textline ctextline->min_length= 30; // ---- length in pixels of a textline ctextline->max_results= max_results; ctextline->extract(textlines,columns,charstats); roSort(textlines,columns,*charstats); //fprintf(stderr,"Time elapsed (ctextline): %.3f \n",(clock()/float(CLOCKS_PER_SEC)) - startTime); // FIXME paragraph extraction should work generically on segmentation images. // It also doesn't belong inside the main RAST loop. --tmb //rectarray paragraphs; rectarray textcolumns; //ctextline->groupPara(paragraphs,textlines,charstats); getCol(textcolumns,textlines,columns); color(image,in,textlines,textcolumns); //fprintf(stderr,"Time elapsed (find-columns): %.3f \n",(clock()/float(CLOCKS_PER_SEC)) - startTime); if(debug_layout){ for(int i=0; i &textlines, rectarray &columns, CharStats &charstats) { makelike(debug_image,in_not_inverted); int v0 = min(in_not_inverted); int v1 = max(in_not_inverted); int threshold = (v1+v0)/2; for(int i=0; i lines; for(int i = 0; i 1){ for(int i=0; i= b.start) && (b.end >= a.start) ); } static bool separator_segment_found(line a, line b, narray &lines){ int lines_length = lines.length(); float y_min = (a.c < b.c) ? a.c : b.c; float y_max = (a.c > b.c) ? a.c : b.c; for(int i = 0; i y_min) && (lines[i].c < y_max) ) return true; return false; } static void construct_graph(narray &lines, narray &lines_dag){ //lines_dag(i,j) = 1 iff there is a directed edge from i to j int graph_length = lines.length(); for(int i = 0; i lines[j].top) { lines_dag(i,j) = 1; } else { lines_dag(j,i) = 1; } } else{ if( separator_segment_found(lines[i],lines[j],lines) ) continue; else if(lines[i].end <= lines[j].start) { lines_dag(i,j) = 1; } else { lines_dag(j,i) = 1; } } } } } void SegmentPageByRAST::visit(int k, narray &lines_dag){ int size = lines_dag.dim(0); val(k) = ++id; for (int i = 0; i< size; i++){ if(lines_dag(k,i) != 0) if(val(i) == 0) visit(i, lines_dag); } //cout << k << "\t"; ro_index.push(k); } void SegmentPageByRAST::depthFirstSearch(narray &lines_dag){ //void visit (int k); int size = lines_dag.dim(0); val.resize(size); fill(val,false); for (int k = 0; k< size; k++) if (val(k) == 0) visit(k, lines_dag); } // FIXME use descriptive name; "ro"??? --tmb void SegmentPageByRAST::roSort(narray &textlines, rectarray &columns, CharStats &charstats){ id = 0; val.clear(); ro_index.clear(); narray lines; for(int i = 0; i lines_dag; // Directed acyclic graph of lines lines_dag.resize( lines.length(), lines.length() ); fill(lines_dag,false); construct_graph(lines, lines_dag); depthFirstSearch(lines_dag); int size = ro_index.length(); //"\nNumber of connected lines = " << size <<"\n"; textlines.clear(); for(int i = 1; i <= size; i++){ textlines.push(lines[ro_index[size-i]].getTextLine()); } } // FIXME newColumn --tmb static bool new_column(rectangle current, rectangle previous){ if(current.y0 > previous.y1){return true;} return false; } // FIXME name getBoundingBox --tmb static void getbbox(rectangle &bbox,rectarray &bboxes){ //first copying into arrays for x0,x1,y0,y1 bbox = rectangle(); for(int i = 0; i(previous.x0-align_range)) {left = true;} if(current.x1<(previous.x1+align_range) && current.x1>(previous.x1-align_range)) {right = true;} if(current.xcenter() < (previous.xcenter() + align_range) && current.xcenter() >(previous.xcenter() - align_range)) {center = true;} return alignment(left, right, center); } // FIXME name getAlignment (?) or something else (?) --tmb static void getalign(objlist< narray > &tempalign, objlist< narray > &temppara){ narray talign; rectangle current,previous; for(int i = 0;i &amcolumns, objlist &finalpara, objlist > &finalalign){ objlist< narray > amalignment; rectarray current_tpara; narray current_talign; for(int i = 0;i= 0){ amcolumns[amcolumns.length()-1].push(finalpara[i][0]); amalignment[amcolumns.length()-1].push(finalalign[i][0]); } }else if(finalpara[i].length() == 2 && finalalign[i][0] == 0 && finalalign[i][1] == 2 && (i+1) &textlines, CharStats &charstats){ if(textlines.length() == 0) return ; rectangle current,previous; //initializing previous objlist< narray > temppara; objlist< narray > tempalign; objlist< narray > finalpara; objlist< narray > finalalign; rectarray current_tpara; narray current_talign; //since the textlines are sorted we group them on the basis of y //coordinate and gaps between them. previous = textlines[0].bbox; current_tpara.push(previous); for(int i = 1;i > amcolumns; merge_single_line_paras(amcolumns,finalpara,finalalign); // get the final bounding boxes of the paragraphs rectangle temp; for(int i = 0; i tempcol; objlist< narray > floatcol; rectarray temp; narray temp1; rectarray probablecol; rectangle previous; rectangle current; rectangle tempt; if(paragraphs.length() == 0) return ; //first separating on the basis of y coordinate previous = paragraphs[0]; temp.push(previous); temp1.push(1.0); for(int i = 1;i previous.y1){ move(tempcol.push(),temp); move(floatcol.push(),temp1); temp.push(current); temp1.push(getoverlap(current,previous)); previous = current; }else{ temp1.push(getoverlap(current,previous)); temp.push(current); previous = current; } } move(tempcol.push(),temp); move(floatcol.push(),temp1); // now grouping on the basis of overlap and getting bounding boxes of the columns FILE *colfile = fopen("columns.dat","w"); for(int i=0; icolumn_threshold){ probablecol.push(tempcol[i][j]); }else{ getbbox(tempt,probablecol); columns.push(tempt); probablecol.dealloc(); probablecol.push(tempcol[i][j]); } } getbbox(tempt,probablecol); probablecol.dealloc(); columns.push(tempt); tempt.println(colfile); } fclose(colfile); } // FIXME // method too long // name: getColumn or more descriptive // functionality overlaps with other overloaded method? // --tmb void SegmentPageByRAST::getCol(rectarray &textcolumns, narray &textlines, rectarray &gutters){ if(!textlines.length()) return; if(!gutters.length()){ rectangle column = rectangle(); for(int i=0; i &textlines, rectarray &textcolumns){ int color; int column_num = 0x00010000; makelike(image,in); //Comment out this loop when the input image is not inverted for(int i = 0, l = image.length1d(); i