// -*- C++ -*- // Copyright 2006-2007 Deutsches Forschungszentrum fuer Kuenstliche Intelligenz // or its licensors, as applicable. // Copyright 1995-2005 by Thomas M. Breuel // // You may not use this file except under the terms of the accompanying license. // // Licensed under the Apache License, Version 2.0 (the "License"); you // may not use this file except in compliance with the License. You may // obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // // Project: bpnet -- Neural Network Classifier // File: grouping.cc // Purpose: train and recognize lines // Responsible: mezhirov // Reviewer: rangoni // Primary Repository: // Web Sites: www.iupr.org, www.dfki.de // TODO: add garbage collection of unreachable components #include #include #include #include #include #include "colib.h" #include "imgio.h" #include "imglib.h" #include "grid.h" #include "line-info.h" #include "segmentation.h" #include "ocr-utils.h" #include "logger.h" #include "lattice.h" #include "ocr-segmentations.h" #include "beam-search.h" #include "narray-util.h" #include "make-garbage.h" #include "seg-eval.h" using namespace colib; using namespace iulib; using namespace ocropus; //#define FLT_MAX 3.402823466e+38F namespace { // log raw input line, binarized and segmented verion while aligning Logger align_log("align"); // log raw input line, binarized and segmented version Logger line_ocr_log("line_ocr.hilevel"); // log segments, with pictures, ids and transitions Logger line_ocr_graph_log("line_ocr.graph"); // log all the transitions, with associated costs and labels Logger line_ocr_transitions_log("line_ocr.transitions"); // log all the transitions, with the associated n-best costs, pictures // and labels, as a 'list' // n is set by ndispclasses, by default, ndispclasses=-1 and display // all the costs and labels Logger line_ocr_transition_sorted_log("line_ocr.transition_sorted"); // log all the transitions, with the associated n-best costs, pictures // and labels, as a 'graph' (html table) // n is set by ndispclasses, by default, ndispclasses=-1 and display // all the costs and labels Logger line_ocr_transition_graph_log("line_ocr.transition_graph"); struct Mean2 { float x,y,n,minx; Mean2() { x = 0; y = 0; n = 0; minx=FLT_MAX; } void add(float nx,float ny) { if(nx= 0); ASSERT(segid_to >= 0); ASSERT(endpoint >= 0); ASSERT(points.length() == characters.length()); ASSERT(costs.length() == characters.length()); } /// Add the chain of transitions to the language model. /// The start is searched in `vtable' if `start_is_virtual'. /// The end is searched in `vtable' if `endpoint_is_virtual'. /// Otherwise, they're assumed to be node indices. /// (This virtual stuff is necessary because /// we just can't know some node indices /// when the Transition is created). /// All segments will be bound to all characters in the idmap. /// \param vtable - the table of indices to search for the endpoint void add_to(IGenericFst &fst, intarray &vtable, idmap &im, floatarray &debug) { check(); int prev = start_is_virtual ? vtable[points[0]] : points[0]; if (line_ocr_transition_sorted_log.enabled || line_ocr_transition_graph_log.enabled) { debug.resize(characters.length(), 5); // store information about transitions } for(int i = 0; i < characters.length(); i++) { for(int seg = segid_from; seg < segid_to; seg++) im.associate(id, seg + 1); int t; if(i == characters.length() - 1) { if(endpoint_is_virtual) { t = vtable[endpoint]; } else { t = endpoint; } } else { t = points[i+1]; } fst.addTransition(prev,t,characters[i].ord(),costs[i],id); nustring char_text; char_text.resize(1); char_text[0] = nuchar(characters[i].ord()); char *buf = char_text.newUtf8Encode(); line_ocr_transitions_log.format( "transition `%s' (%d) id %d from %d to %d costs %f", buf, characters[i].ord(), id, prev, t, costs[i]); delete[] buf; if (line_ocr_transition_sorted_log.enabled || line_ocr_transition_graph_log.enabled) { debug(i,0) = characters[i].ord(); debug(i,1) = id; debug(i,2) = prev; debug(i,3) = t; debug(i,4) = costs[i]; } prev = t; } } }; } void sort_trans(intarray &charcode_t, intarray &id_t, intarray &prev_t, intarray &trans_t, floatarray &cost_t, intarray &cuts) { if (line_ocr_transition_sorted_log.enabled || line_ocr_transition_graph_log.enabled) { int n = cost_t.dim(0); // number of transitions int i_beg = 0, i_end = 0; // indexes of begin and end transition int v_beg = prev_t(0), v_end = trans_t(0); // values of begin and end transition int x, y; intarray permut; // preparing permuatation table permut.resize(n); //fill(permut,-1); // not necessary cuts.resize(n); //fill(cuts, 0); // not necessary for (int i = 0; i < n; i++) { permut(i) = i; // defaut order x = prev_t(i); // new transition values y = trans_t(i); if ((x != v_beg) || (y != v_end)) { // if different from the last time? i_end = i; // new end quicksort(permut, cost_t, i_beg, i_end); // sort the segment i_beg = i; // update the new beginning v_beg = x; v_end = y; // and the new end } cuts(i) = i - i_beg; // and number of classes in the transition } quicksort(permut, cost_t, i_beg, n); // for the last segment permute(charcode_t,permut); // permute charcodes //permute(id_t,permut); // id_t should be the same in a segment permute(cost_t,permut); // and costs } } static void add_all_transitions(IGenericFst &fst, idmap &im, intarray &vtable, objlist &transitions, objlist &subimages) { intarray charcode_t, id_t, prev_t, trans_t, cuts; floatarray cost_t, debug; for(int i = 0; i < transitions.length(); i++) { transitions[i].add_to(fst, vtable, im, debug); // get debug info also if (line_ocr_transition_sorted_log.enabled || line_ocr_transition_graph_log.enabled) { for(int j = 0; j < debug.dim(0); j++) { // store information in narrays charcode_t.push((int)debug(j,0)); id_t.push((int)debug(j,1)); prev_t.push((int)debug(j,2)); trans_t.push((int)debug(j,3)); cost_t.push(debug(j,4)); // just for the reverse sorting } } } if (line_ocr_transition_sorted_log.enabled || line_ocr_transition_graph_log.enabled) { sort_trans(charcode_t,id_t,prev_t,trans_t,cost_t,cuts); } #if 0 line_ocr_transition_sorted_log.transitions_in_list(maxcomp,ndispclasses, cuts,cost_t,id_t,prev_t,trans_t,charcode_t,subimages); // log in 'list' mode line_ocr_transition_graph_log.transitions_in_graph(maxcomp,ndispclasses, cuts,cost_t,id_t,prev_t,trans_t,charcode_t,subimages); // log in 'graph' mode #endif } static void renumber_components_by_x_coordinate(intarray &image, int nlabels) { narray means(nlabels); for(int i=0;i &transitions, int &fst_nodes_allocated, ICharacterClassifier &classification, int fst_node_from, int from, int to, int id) { for(int k = 0; k < classification.length(); k++) { nustring variant; classification.cls(variant, k); ASSERT(variant.length()==1); float cost_per_char = classification.cost(k) / variant.length(); Transition &t = transitions.push(); t.segid_from = from; t.segid_to = to; t.id = id; t.resize(variant.length()); // FIXME: normalization doesn't work. I would suggest: // double sum_scores = sum(scores), but I'm not sure if this is correct // in case of multiple character output. In this case we have probably // to calculate this: // double sum_scores = 0; // for(int c = 0; c < classes.dim(0); c++) // sum_scores += scores(c,m); // inside the loop where we calculate t.costs[m] = -log(scores(k,m)); // or better calculating it ones at the beginnig of the function and // store it into floatarray sum_score and then use it in the loop. But // in general I don't think that normalization is the right thing to // do. One other problem with normalization could be, that we ignore // garbage. //double sum_scores = 0; //for(int m = 0; m < classes.dim(1); m++) // sum_scores += scores(k,m); //ASSERT(sum_scores > 0); for(int m = 0; m < variant.length(); m++) { t.characters[m] = variant[m]; //t.costs[m] = -log(scores(k,m) / sum_scores); t.costs[m] = cost_per_char; } t.points[0] = fst_node_from; // allocate intermediate nodes for(int m = 1; m < variant.length(); m++) t.points[m] = fst_nodes_allocated++; t.endpoint = to; t.endpoint_is_virtual = true; // we can't know the node of cut `to' yet t.check(); line_ocr_transitions_log.format( "cached transition from %d (fst index %d) to (virtual) %d\n", from, fst_node_from, to); } } static int extract_merged_component(bytearray &sub, intarray &image, rectangle r, int i, int j, float slope, int maxShift) { sub.resize(r.width(), r.height() + maxShift + 1); // 'erase' sub => paint all white fill(sub, 255); int numpixels=0; int shift = 0; for(int x=r.x0;x=i&&pixel<=j); int pX, pY; pX = x-r.x0; pY = y-r.y0; // shift the sub image according to slope // (use x center of bbox for correction) if(x==r.x0) { shift = 0; if(slope > 0.f) shift += maxShift; //shift -= (int) (slope * ((float) (r.x0+r.x1))/2.f); shift -= (int) (slope * (float)x); } // draw baseline (for debugging) //if(pY + shift == basepoint) sub(pX, pY + shift) = 200; if(pixel) { // paint data in black sub(pX, pY + shift) = 0; numpixels++; } } return numpixels; } /*static void check_classes_and_scores(nustring &classes, floatarray &scores) { ASSERT(classes.rank() == 2); // multi-character output ASSERT(samedims(classes, scores)); for(int i = 0; i < classes.length1d(); i++) { if(classes.at1d(i).ord()) CHECK_CONDITION(scores.at1d(i) > -1e-6); } }*/ void draw_line(intarray &a, int y, int color) { if(y < 0 || y >= a.dim(1)) return; for(int x = 0; x < a.dim(0); x++) a(x, y) = color; } void log_line_info(Logger &l, const char *description, bytearray &image, int baseline, int xheight, int ascender, int descender) { if(l.enabled) { intarray aux; copy(aux, image); draw_line(aux, ascender, 0x77FF00); draw_line(aux, descender, 0x00FF00); draw_line(aux, baseline, 0xFF0000); draw_line(aux, xheight, 0xFF7700); l(description, aux); } } void log_line_info(Logger &l, const char *description, intarray &image, int baseline, int xheight, int ascender, int descender) { if(l.enabled) { intarray aux; copy(aux, image); ocropus::make_line_segmentation_white(aux); draw_line(aux, ascender, 0x77FF00); draw_line(aux, descender, 0x00FF00); draw_line(aux, baseline, 0xFF0000); draw_line(aux, xheight, 0xFF7700); l(description, aux); } } // FIXME this function is way too long, please refactor --tmb static void lineOCR(IGenericFst &fst, idmap &im, intarray &orig_image, ICharacterClassifier &classifier, bool use_line_info = true) { intarray image; check_line_segmentation(orig_image); copy(image, orig_image); line_ocr_log("line_segmentation_white",image); make_line_segmentation_black(image); float bm,bb,bd,be; if(baseline) { if(sscanf(baseline,"%f,%f,%f,%f",&bm,&bb,&bd,&be)!=4) throw "baseline info format error"; bd = bb-bd; } int w0,h0,w1,h1; if(size) { if(sscanf(size,"%d,%d,%d,%d",&w0,&h0,&w1,&h1)!=4) throw "size info format error"; } int sw, sh; if(smallsize) { if(sscanf(smallsize,"%d,%d",&sw,&sh)!=2) throw "smallsize info format error"; } int nlabels = renumber_labels(image,1); line_ocr_log.format("nlabels (number of segments)= %d",nlabels); renumber_components_by_x_coordinate(image, nlabels); int lastInto = -1; int lastRightEdge = 100000; narray spaces_added; spaces_added.resize(nlabels + 1); fill(spaces_added, false); narray bboxes; bounding_boxes(bboxes, image); float intercept; float slope; float xheight; float descender_sink; float ascender_rise; if(!get_extended_line_info(intercept,slope,xheight,descender_sink, ascender_rise,image)) { intercept = 0; slope = 0; xheight = 0; descender_sink = 0; ascender_rise = 0; } int basepoint = (int) intercept; #if 0 // FIXME line_ocr_log.format("extended_line_info raw, basepoint=%d, " "slope=%f, xheigth=%f, descender_sink=%f, " "ascender_rise=%f, image height=%d",basepoint,slope,xheight, descender_sink,ascender_rise,image.dim(1)); line_ocr_log("extended_line_info raw", image, basepoint, (int)(basepoint+xheight), (int)(basepoint-descender_sink), (int)(basepoint+xheight+ascender_rise)); #endif int maxShift = abs((int) (slope * (float) image.dim(0))); if (slope > 0.f) basepoint += maxShift; if (basepoint == 0) basepoint = 1; #if 0 // FIXME line_ocr_log.format("extended_line_info corrected, basepoint=%d, " "slope=%f, xheigth=%f, descender_sink=%f, ascender_rise=%f, image height=%d", basepoint,slope,xheight,descender_sink,ascender_rise,image.dim(1)); line_ocr_log("extended_line_info corrected", image, basepoint, (int)(basepoint+xheight), (int)(basepoint+xheight+ascender_rise), (int)(basepoint-descender_sink)); #endif ASSERT(xheight>0); ASSERTWARN(basepoint - descender_sink >= 0); ASSERTWARN(basepoint + xheight < image.dim(1)); ASSERTWARN(basepoint + xheight + ascender_rise < image.dim(1)); // ________________________________________________________________________ // extract ranges of connected components // Note: the older script "classify_simple.sh" relied on a slightly different version of the following code // if you intend to use a classifier based on connected components, please check out revision 180 // We are going to translate the segment ids into FST node ids. // One segment interval (a continuous sequence of segments) may produce // more then one FST nodes, for example, for ligatures. intarray fst_node_ids(nlabels); fill(fst_node_ids, -1); int fst_nodes_allocated = 0; objlist transitions; narray has_incoming_arcs(nlabels); narray has_outgoing_arcs(nlabels); fill(has_incoming_arcs, false); fill(has_outgoing_arcs, false); intarray fst_afterspace_ids(nlabels); fill(fst_afterspace_ids, -1); int offset = 1; int id = 1; // start with id 1 objlist sub_images; for(int i=1;ii && (bboxes(k).x0-bboxes(k-1).x1) > maxgap) if((k>i) && (((bboxes(k).x0-r.x1) > maxgap) || //since bboxes(k-1).x1 might be smaller then r.x1 (mergeabove && (r.x1>=r.x0) && ((bboxes(k).y0r.y0))))) { // only merge if one cc is above other // not sure if(r.x1>=r.x0) is necessary here, because we already know that k>i ... if(maxonly==1) offset=k-i; skip = true; break; } if(smallsize) { // don't add small connected "sub"-components, e.g. single pixel... if((bboxes(k).width()be) { line_ocr_graph_log.format("dropping component %d-%d because it's not on the baseline", from, into); continue; } } // the merged component must be within size range if(size) { if(r.width()w1||r.height()h1) continue; } int bottomBak = 0, topBak = 0; bottomBak = r.y0; topBak = r.y1; r.y0 = 0; r.y1 = image.dim(1); ///////////////////////////////////////////////////// // extract pixels belonging to the merged component bytearray sub; int npixels = extract_merged_component(sub, image, r, i, j, slope, maxShift); r.y0 = bottomBak; r.y1 = topBak; // Re-enabled the checks because connectivity seems to be fixed -IM // only add merged components that have a minimum number of pixels if(npixels<4) continue; // only add merged components that have a minimum "blackness" float blackness = ((float)npixels)/(float)(r.width()*r.height()); if(blackness<0.1) continue; // scaled is not used... // bytearray scaled(16,16); // rescale(scaled,sub); line_ocr_graph_log.format("segment from %d to %d, boundingbox=(%d,%d,%d,%d)", from, into, r.x0,r.y0,r.x1,r.y1); // Allocate a FST node for the starting segment if not done yet if(fst_node_ids[from] == -1) { fst_node_ids[from] = fst_nodes_allocated++; } int fst_node_from = fst_node_ids[from]; if(fst_afterspace_ids[from] != -1) { fst_node_from = fst_afterspace_ids[from]; } else if((from == lastInto) || (r.x0 - lastRightEdge > xheight / 2.)) { // add two arcs in parallel one allowing for a space (t1), the // other one not (t2); an id of 0 means that it's an EPSILON // add whitespace before the `from' segment (EPSILON:SPACE/0.0) Transition &t1 = transitions.push(); t1.segid_from = from; t1.segid_to = from; t1.id = 0; t1.resize(1); t1.characters[0] = nuchar(' '); // FIXME: add heuristic function that adjusts the weights for // adding a space //float space_width = float(r.x0-lastRightEdge); //float a = 1.0; //float b = xheight/2; //float space_prob = 1/(1+exp(-(a*space_width-b))); //float space_weight = -log(space_prob); //t1.costs[0] = space_weight; t1.costs[0] = 0.; t1.points(0) = lastInto; t1.start_is_virtual = true; // add no whitespace before the `from' segment (EPSILON:EPSILON/0.0) Transition &t2 = transitions.push(); t2.segid_from = from; t2.segid_to = from; t2.id = 0; t2.resize(1); t2.characters[0] = nuchar(' '); //FIXME // FIXME: add heuristic function that adjusts the weights for // not adding a space // float no_space_weight = 1-space_prob; // t2.costs[0] = -log(no_space_weight); t2.costs[0] = 0; // FIXME t2.costs[0] = 10000;//FLT_MAX; // -log(1- space_prob=1) t2.points(0) = lastInto; t2.start_is_virtual = true; //printf("space_width: %g xheight/2: %f space_weight: %f " // "no_space_weight: %g\n",space_width,xheight/2, // space_weight,no_space_weight); line_ocr_graph_log.format("adding (possible) space from %d to %d", fst_node_from, fst_nodes_allocated); fst_node_from = fst_nodes_allocated++; fst_afterspace_ids[from] = fst_node_from; t1.endpoint = fst_node_from; t1.endpoint_is_virtual = false; t1.check(); t2.endpoint = fst_node_from; t2.endpoint_is_virtual = false; t2.check(); } lastInto = into; lastRightEdge = r.x1; // classify and add if(use_line_info) { float baseline = intercept + r.x0 * slope; float ascender = baseline + xheight + descender_sink; classifier.setImage(sub, (int) (baseline + .5), (int) (baseline + xheight + .5), (int) (baseline - descender_sink + .5), (int) (ascender + .5)); } else { classifier.setImage(sub); } if (line_ocr_transition_sorted_log.enabled || line_ocr_transition_graph_log.enabled) { copy(sub_images.push(),sub); } //check_classes_and_scores(classes, scores); line_ocr_graph_log.format("adding segment from %d to %d (id=%d)", from, into, id); line_ocr_graph_log("picture ", sub); line_ocr_graph_log.format("fst_nodes_allocated (before) is %d", fst_nodes_allocated); add_all_variants(transitions, fst_nodes_allocated, classifier, fst_node_from, from, into, id++); line_ocr_graph_log.format("fst_nodes_allocated (after) is %d", fst_nodes_allocated); if(classifier.length()) { has_incoming_arcs[into] = true; has_outgoing_arcs[from] = true; } } } for(int i = nlabels - 1; i >= 0; i--) { // fix connectivity: if a node has no outgoing arcs, // copy a vtable node from some future one if(i < nlabels - 1 && !has_outgoing_arcs[i]) fst_node_ids[i] = fst_node_ids[i + 1]; else if(has_incoming_arcs[i] && (fst_node_ids[i] == -1)) { line_ocr_graph_log.format("vtable of %d points to %d\n", i, fst_nodes_allocated); fst_node_ids[i] = fst_nodes_allocated++; } } if(!fst_nodes_allocated) fst_nodes_allocated = 1; for(int i=0;i classifier; autodel segmenter; bool use_line_info; NewGroupingLineOCR(ICharacterClassifier *c, ISegmentLine *s, bool use_line_info) : classifier(c), segmenter(s), use_line_info(use_line_info) { } const char *description() { return "NewGroupingLineOCR"; } void init(const char **) { } void recognizeLine(IGenericFst &result,/*idmap &components, intarray &segmentation,*/bytearray &image) { line_ocr_log("input", image); bytearray binarized; binarize_simple(binarized, image); line_ocr_log("binarized", binarized); intarray segmentation; segmenter->charseg(segmentation, binarized); char temp[1024]; sprintf(temp,"raw from %s", segmenter->description()); line_ocr_log(temp, segmentation); sprintf(temp,"recolor from %s", segmenter->description()); line_ocr_log.recolor(temp, segmentation); idmap components; lineOCR(result,components,segmentation,*classifier,use_line_info); line_ocr_log("result", result); } void recognizeLineSeg(intarray &true_seg, IGenericFst &result, bytearray &image, bool useit) { //throw "not yet implemented"; line_ocr_log("input", image); bytearray binarized; binarize_simple(binarized, image); line_ocr_log("binarized", binarized); line_ocr_log("true segmentation raw", true_seg); line_ocr_log.recolor("true segmentation recolor", true_seg); intarray seg; segmenter->charseg(seg, binarized); if(line_ocr_log.enabled) { char temp[1024]; sprintf(temp,"raw from %s", segmenter->description()); line_ocr_log(temp, seg); sprintf(temp,"recolor from %s", segmenter->description()); line_ocr_log.recolor(temp, seg); int nover, nunder, nmis; evaluate_segmentation(nover,nunder,nmis,true_seg,seg,0); line_ocr_log.format("evaluate_segmentation says: segments in model: %d," " segments from segmenter: %d, over_seg: %d," " under_seg: %d, miss: %d", renumber_labels(true_seg,1)-1, renumber_labels(seg,1)-1, nover, nunder, nmis); Evaluator E; intarray ground_err; intarray seg_err; int nfalarm; segeval_full(true_seg, seg, E, ground_err, seg_err, nover, nunder, nmis, nfalarm, 0, 0); line_ocr_log.format( "segeval_full says: " "total_over_seg: %d, " "over_seg_char: %d, under_seg: %d, " "miss: %d, false_alarm: %d", E.mover-E.mcount, nover, nunder, nmis, nfalarm); line_ocr_log("segeval_full says: ground_err", ground_err); line_ocr_log("segeval_full says: seg_err", seg_err); } idmap components; lineOCR(result,components,(useit)?true_seg:seg,*classifier,use_line_info); line_ocr_log("result", result); } virtual void addTrainingLine(intarray &trueseg, bytearray &image, nustring &chars) { make_line_segmentation_black(trueseg); rectarray bboxes; bounding_boxes(bboxes, trueseg); float intercept; float slope; float xheight; float descender_sink; float ascender_rise; if(use_line_info) { if(!get_extended_line_info(intercept, slope,xheight,descender_sink, ascender_rise,trueseg)) { intercept = 0; slope = 0; xheight = 0; descender_sink = 0; ascender_rise = 0; } line_ocr_log("intercept", intercept); line_ocr_log("slope", slope); line_ocr_log("xheight", xheight); } for(int i = 1; i < bboxes.length(); i++) { intarray segment; rectangle &b = bboxes[i]; extract_subimage(segment,trueseg,b.x0,b.y0,b.x1,b.y1); bytearray subimage; extract_segment(subimage,segment,i); nustring char_text; char_text.resize(1); char_text[0] = chars[i - 1]; if(use_line_info) { float baseline = intercept + b.x0 * slope; float ascender = baseline + xheight + descender_sink; classifier->addTrainingChar(subimage, (int) (baseline + .5), (int) (baseline + xheight + .5), (int) (baseline - descender_sink + .5), (int) (baseline + xheight + ascender + .5), char_text); } else { classifier->addTrainingChar(subimage, char_text); } } rectarray garbage_bboxes; narray garbage; make_garbage(garbage_bboxes, garbage, trueseg, *segmenter); for(int i = 0; i < garbage.length(); i++) { nustring char_text; char_text.resize(1); char_text[0] = nuchar(0xAC); if(use_line_info) { float baseline = intercept + garbage_bboxes[i].x0 * slope; float ascender = baseline + xheight + descender_sink; classifier->addTrainingChar(garbage[i], (int) (baseline + .5), (int) (baseline + xheight + .5), (int) (baseline - descender_sink + .5), (int) (baseline + xheight + ascender + .5), char_text); } else { classifier->addTrainingChar(garbage[i], char_text); } } } virtual void startTraining(const char *type="adaptation") { classifier->startTraining(type); } virtual void addTrainingLine(bytearray &image,nustring &transcription) { autodel fst(make_StandardFst()); int k = transcription.length(); floatarray costs(k); intarray ids(k); for(int i = 0; i < k; i++) { costs[i] = 0; ids[i] = i + 1; } fst->setString(transcription, costs, ids); costs.clear(); nustring chars; intarray trueseg; align(chars, trueseg, costs, image, *fst); addTrainingLine(trueseg, image, chars); } virtual void finishTraining() { classifier->finishTraining(); } virtual void align( nustring &chars, intarray &result, floatarray &result_costs, bytearray &image, IGenericFst &transcription) { align_log("alignment: ", image); if(align_log.enabled) { nustring s; transcription.bestpath(s); align_log("ground truth", s); } bytearray binarized; binarize_simple(binarized, image); align_log("binarized", binarized); intarray segmentation; segmenter->charseg(segmentation, binarized); align_log.recolor("overseg", segmentation); idmap components; autodel fst(make_StandardFst()); lineOCR(*fst,components,segmentation,*classifier,use_line_info); // align intarray ids; intarray vertices; intarray vertices2; intarray outputs; floatarray costs; beam_search_in_composition(ids, vertices, vertices2, outputs, costs, *fst, transcription); // remove zeros from ids intarray ids_cleaned; for(int i = 0; i < ids.length(); i++) { if(ids[i]) { ids_cleaned.push(ids[i]); align_log("id", ids[i]); if(align_log.enabled) { intarray segs; components.segments_of_id(segs, ids[i]); align_log("id's segs", segs); } } } // build the output string for(int i = 0; i < outputs.length(); i++) { if(outputs[i]) { chars.push(nuchar(outputs[i])); align_log.format("cost for %c is %f", outputs[i], costs[i]); result_costs.push(costs[i]); } } // push the accept cost result_costs.push(costs[costs.length() - 1]); align_log.format("accept cost is %f", costs[costs.length() - 1]); align_log("chars", chars); // recolor ocr_result_to_charseg(result, components, ids_cleaned, segmentation); align_log.recolor("result", result); } virtual void save(FILE *f) { classifier->save(f); } virtual void load(FILE *f) { classifier->load(f); } }; IRecognizeLine *make_NewGroupingLineOCR(ICharacterClassifier *classifier, ISegmentLine *segmenter, bool use_line_info) { return new NewGroupingLineOCR(classifier, segmenter, use_line_info); } }