// -*- C++ -*- // Copyright 2006-2008 Deutsches Forschungszentrum fuer Kuenstliche Intelligenz // or its licensors, as applicable. // // You may not use this file except under the terms of the accompanying license. // // Licensed under the Apache License, Version 2.0 (the "License"); you // may not use this file except in compliance with the License. You may // obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // // Project: OCRopus // File: ocr-ctextline-rast.h // Purpose: Header file declaring data structures for constrained textline // extraction using RAST // Responsible: Faisal Shafait (faisal.shafait@dfki.de) // Reviewer: // Primary Repository: // Web Sites: www.iupr.org, www.dfki.de #ifndef h_ocrctextlinerastextended__ #define h_ocrctextlinerastextended__ #include "colib.h" #include "iarith.h" #include "ocr-char-stats.h" namespace ocropus { using namespace colib; ///////////////////////////////////////////////////////////////////// /// /// \struct TextLineParam4line /// Purpose: Textline parameters /// ////////////////////////////////////////////////////////////////////// struct TextLineParam4line { float c,m,d; // c is y-intercept, m is slope, d is the line of descenders float a,x; // a is ascender height, x is the x-height void print(FILE *stream=stdout){ fprintf(stream,"%.3f %f %.2f\n",c,m,d); } }; ///////////////////////////////////////////////////////////////////// /// /// \struct CTextlineRAST4line /// Purpose: 4line implementation of the constrained textline finding /// algorithm using RAST. Returns parameters of text-lines in /// descending order of quality. /// ////////////////////////////////////////////////////////////////////// static const int ntl4params = 5; struct CTextlineRAST4line { CTextlineRAST4line(); virtual ~CTextlineRAST4line(){ } int generation; bool lsq; double epsilon; int maxsplits; double delta; double adelta; float min_length; int min_gap; double min_q; int min_count; int max_results; bool use_whitespace; // The parameters are: // all_params[0] = y-intercept of baseline // all_params[1] = slope // all_params[2] = descender distance from baseline // all_params[3] = xheight // all_params[4] = ascender distance from xheight typedef colib::vecni Parameters; double splitscale[ntl4params]; Parameters all_params; Parameters empty_parameters; colib::vec2i normalized(colib::vec2i v) { colib::interval a = atan2(v.y,v.x); return colib::vec2i(cos(a),sin(a)); } inline colib::interval influence(bool lsq,colib::interval d,double epsilon) { if(lsq) return sqinfluence(d,epsilon); else return rinfluence(d,epsilon); } typedef colib::narray Matches; colib::rectarray cboxes; colib::rectarray wboxes; colib::narray used; bool final(colib::interval q,const Parameters &p) { return p[0].width() CState; heap queue; colib::narray results; colib::autodel linestats; Matches all_matches; void setDefaultParameters(); void setMaxSlope(double max_slope); void setMaxYintercept(double ymin, double ymax); void prepare(); void makeSubStates(colib::narray &substates,CState &state); int wboxIntersection(CState &top); void search(); virtual void pushResult(CState &result); virtual void extract(colib::narray &textlines, colib::autodel &charstats); virtual void extract(colib::narray &textlines, colib::rectarray &columns, colib::autodel &charstats); }; CTextlineRAST4line *make_CTextlineRAST4line(); ///////////////////////////////////////////////////////////////////// /// /// \struct TextLineExtended /// Purpose: Textline bounding box and it attributes /// ////////////////////////////////////////////////////////////////////// struct TextLineExtended : TextLineParam4line{ TextLineExtended(){ } TextLineExtended(TextLineParam4line &tl){ c = tl.c; m = tl.m; d = tl.d; a = tl.a; x = tl.x; } colib::rectangle bbox; void print(FILE *stream=stdout){ fprintf(stream,"%d %d %d %d ",bbox.x0,bbox.y0,bbox.x1,bbox.y1); fprintf(stream,"%.3f %f %.2f %.2f %.2f\n",c,m,d,a,x); } }; ///////////////////////////////////////////////////////////////////// /// /// \struct CTextlineRAST /// Purpose: Constrained Textline finding using RAST /// ////////////////////////////////////////////////////////////////////// struct CTextlineRASTExtended : CTextlineRAST4line{ CTextlineRASTExtended(); ~CTextlineRASTExtended(){ } // fraction of area covered by line bounding box // so that char_box is included in line_box float minoverlap; // rejection threshold for the height of a box = tr*xheight float min_box_height; // average distance between words int word_gap; int min_height; int assign_boxes; bool aggressive; int extend; int pagewidth; int pageheight; colib::rectarray cboxes_all; colib::narray used_all; colib::narray result_lines; void setDefaultParameters(); void pushResult(CState &result); void extract(colib::narray &textlines, colib::autodel &charstats); void extract(colib::narray &textlines, colib::rectarray &columns, colib::autodel &charstats); }; CTextlineRASTExtended *make_CTextlineRASTExtended(); } #endif