// -*- C++ -*- // Copyright 2006-2007 Deutsches Forschungszentrum fuer Kuenstliche Intelligenz // or its licensors, as applicable. // // You may not use this file except under the terms of the accompanying license. // // Licensed under the Apache License, Version 2.0 (the "License"); you // may not use this file except in compliance with the License. You may // obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // // Project: // File: ocr-utils.cc // Purpose: miscelaneous routines // Responsible: mezhirov // Reviewer: // Primary Repository: // Web Sites: www.iupr.org, www.dfki.de #include #include "colib.h" #include "imgio.h" #include "imglib.h" #include "ocr-utils.h" #include "sysutil.h" #include "ocr-segmentations.h" using namespace iulib; using namespace colib; namespace ocropus { param_bool bgcheck("bgcheck", true, "abort on detecting an inverted image"); void invert(bytearray &a) { int n = a.length1d(); for (int i = 0; i < n; i++) { a.at1d(i) = 255 - a.at1d(i); } } void crop_masked(bytearray &result, bytearray &source, rectangle crop_rect, bytearray &mask, int def_val, int pad) { CHECK_ARG(background_seems_black(mask)); rectangle box(0, 0, source.dim(0), source.dim(1)); box.intersect(crop_rect); result.resize(box.width() + 2 * pad, box.height() + 2 * pad); fill(result, def_val); for(int x = 0; x < box.width(); x++) { for(int y = 0; y < box.height(); y++) { if(mask(x + box.x0, y + box.y0)) result(x + pad, y + pad) = source(x + box.x0, y + box.y0); } } } int average_on_border(colib::bytearray &a) { int sum = 0; int right = a.dim(0) - 1; int top = a.dim(1) - 1; for(int x = 0; x < a.dim(0); x++) sum += a(x, 0); for(int x = 0; x < a.dim(0); x++) sum += a(x, top); for(int y = 1; y < top; y++) sum += a(0, y); for(int y = 1; y < top; y++) sum += a(right, y); // If average border intensity is between 127-128, inverting the // image does not work correctly float average_border_intensity = sum / ((right + top) * 2.0); ASSERTWARN(average_border_intensity<=127 || average_border_intensity>=128); return sum / ((right + top) * 2); } // FIXME use imgmorph stuff void blit2d(bytearray &dest, const bytearray &src, int shift_x, int shift_y) { int w = src.dim(0); int h = src.dim(1); for (int x=0;x &bboxes,int max_height) { int i,d; histogram.resize(max_height); for(i=0;i=0 && d &bboxes,int h,int min_height,float smooth) { int i; int best_i; float best_v; floatarray histogram; boxes_height_histogram(histogram, bboxes, h); gauss1d(histogram,smooth); best_i = -1; best_v = -1.0; for(i=min_height;i bboxes; bounding_boxes(bboxes, seg); return estimate_boxes_height(bboxes, seg.dim(1), 10, 4); } void plot_hist(FILE *stream, floatarray &hist){ if(!stream){ fprintf(stderr,"Unable to open histogram image stream.\n"); exit(0); } int maxval = 1000; int len = hist.length(); narray image(len, maxval); fill(image,0xff); for(int x=0; x=width) ? width-1 : r.x1; bottom = (r.y1>=height) ? height-1 : r.y1; if(right <= left || bottom <= top) return; for(int x=left; x=width) ? width-1 : r.x1; bottom = (r.y1>=height) ? height-1 : r.y1; if(right <= left || bottom <= top) return; for(int x=left; x=width) ? width-1 : r.x1; bottom = (r.y1>=height) ? height-1 : r.y1; if(right < left || bottom < top) return; int x,y; if(!inverted){ for(x=left; x<=right; x++){ image(x,top) &=color; } for(x=left; x<=right; x++){ image(x,bottom) &=color; } for(y=top; y<=bottom; y++){ image(left,y) &=color; } for(y=top; y<=bottom; y++){ image(right,y) &=color; } }else{ for(x=left; x<=right; x++){ image(x,top) |=color; } for(x=left; x<=right; x++){ image(x,bottom) |=color; } for(y=top; y<=bottom; y++){ image(left,y) |=color; } for(y=top; y<=bottom; y++){ image(right,y) |=color; } } } void paint_box_border(bytearray &image, rectangle r, byte color, bool inverted){ int width = image.dim(0); int height = image.dim(1); int left, top, right, bottom; left = (r.x0<0) ? 0 : r.x0; top = (r.y0<0) ? 0 : r.y0; right = (r.x1>=width) ? width-1 : r.x1; bottom = (r.y1>=height) ? height-1 : r.y1; if(right < left || bottom < top) return; int x,y; if(!inverted){ for(x=left; x<=right; x++){ image(x,top) &=color; } for(x=left; x<=right; x++){ image(x,bottom) &=color; } for(y=top; y<=bottom; y++){ image(left,y) &=color; } for(y=top; y<=bottom; y++){ image(right,y) &=color; } }else{ for(x=left; x<=right; x++){ image(x,top) |=color; } for(x=left; x<=right; x++){ image(x,bottom) |=color; } for(y=top; y<=bottom; y++){ image(left,y) |=color; } for(y=top; y<=bottom; y++){ image(right,y) |=color; } } } static void subsample_boxes(narray &boxes, int factor) { int len = boxes.length(); if (factor == 0) return; for(int i=0; i &rects, int downsample_factor, int color){ int ds = downsample_factor; if(ds <= 0) ds = 1; int width = in.dim(0); int height = in.dim(1); int xdim = width/ds; int ydim = height/ds; out.resize(xdim, ydim); for(int ix=0; ix boxes; copy(boxes,rects); if(ds > 1) subsample_boxes(boxes, ds); for(int i=0, len=boxes.length(); i &rects, int downsample_factor, int color, int border_color){ int ds = downsample_factor; if(ds <= 0) ds = 1; int width = in.dim(0); int height = in.dim(1); int xdim = width/ds; int ydim = height/ds; out.resize(xdim, ydim); for(int ix=0; ix boxes; copy(boxes,rects); if(ds > 1) subsample_boxes(boxes, ds); for(int i=0, len=boxes.length(); i bboxes; bounding_boxes(bboxes, seg); intarray tops, bottoms; makelike(tops, bboxes); makelike(bottoms, bboxes); for(int i = 0; i < bboxes.length(); i++) { tops[i] = bboxes[i].y1; bottoms[i] = bboxes[i].y0; } baseline = median(bottoms) + 1; xheight = median(tops) - baseline; descender = baseline - 0.4 * xheight; ascender = baseline + 2 * xheight; } // FIXME comments static const char *version_string = NULL; // FIXME comments const char *get_version_string() { return version_string; } // FIXME comments void set_version_string(const char *new_version_string) { if (version_string) { ASSERT(new_version_string && !strcmp(version_string, new_version_string)); } else { version_string = new_version_string; } } Timers::Timers() { } void Timers::report() { fprintf(stderr,"time binarizer %g\n",*binarizer); fprintf(stderr,"time cleanup %g\n",*cleanup); fprintf(stderr,"time page_segmenter %g\n",*page_segmenter); fprintf(stderr,"time line_segmenter %g\n",*line_segmenter); fprintf(stderr,"time ocr %g\n",*ocr); fprintf(stderr,"time langmod %g\n",*langmod); } void Timers::reset() { binarizer.reset(); cleanup.reset(); page_segmenter.reset(); line_segmenter.reset(); ocr.reset(); langmod.reset(); } #if 0 // FIXME static Timers ocr_timers; void report_ocr_timings() { ocr_timers.report(); } void reset_ocr_timings() { ocr_timers.reset(); } Timers &get_ocr_timings() { return ocr_timers; } #endif void normalize_input_classify(floatarray &feature,doublearray &stdev,doublearray &m_x) { CHECK_ARG(stdev.length()==m_x.length()); ASSERT(is_nan_free(m_x)); ASSERT(is_nan_free(stdev)); int ninput = m_x.length(); // normalize for(int d=0;d0) { feature(d) = (feature(d)-m_x(d))/stdev(d); } else { feature(d) = feature(d)-m_x(d); //var=0: all the same; } } } void align_segmentation(intarray &segmentation,narray &bboxes) { intarray temp; make_line_segmentation_black(segmentation); renumber_labels(segmentation,1); int nsegs = max(segmentation)+1; intarray counts; counts.resize(nsegs,bboxes.length()); fill(counts,0); for(int i=0;imc) { mj = j; mc = counts(i,j); } } segmap(i) = mj; } for(int i=0;i &bboxes) { intarray charseg; copy(charseg,overseg); align_segmentation(charseg,bboxes); idmap_of_correspondences(result,charseg,overseg); } namespace { void getrow(intarray &a,intarray &m,int i) { a.resize(m.dim(1)); for(int j=0;j tolerance) { nover++; } else { nmis++; } } } for(int j=1;j tolerance) { nunder++; } else { nmis++; } } } } void ocr_result_to_charseg(intarray &cseg,idmap &map,intarray &ids,intarray &segmentation,bool map_all) { make_line_segmentation_black(segmentation); makelike(cseg,segmentation); fill(cseg,0); intarray cseg_to_char; cseg_to_char.resize(max(segmentation)+1); fill(cseg_to_char,-1); for(int i=0;i // or something. // segments shouldn't be shared between characters //if(!(cseg_to_char(cs)==-1 || cseg_to_char(cs)==chars_allocated)) // throw "segments are shared between multiple ids"; cseg_to_char(cs) = i + 1; } } for(int i=0;i &bboxes,intarray &segmentation) { make_line_segmentation_black(segmentation); CHECK_ARG(max(segmentation)<100000); intarray counts(max(segmentation)+1,bboxes.length()); fill(counts,0); for(int i=0;i void remove_small_components(narray &bimage,int mw,int mh) { intarray image; copy(image,bimage); label_components(image); narray rects; bounding_boxes(rects,image); bytearray good(rects.length()); for(int i=0;i(narray &,int,int); template void remove_small_components(narray &,int,int); template void remove_marginal_components(narray &bimage,int x0,int y0,int x1,int y1) { intarray image; copy(image,bimage); label_components(image); narray rects; bounding_boxes(rects,image); if(rects.length()>0) { x1 = bimage.dim(0)-x1; y1 = bimage.dim(1)-y1; bytearray good(rects.length()); fill(good, 1); for(int i=0;i= ? if(r.x1 < x0 || r.x0 > x1 || r.y1 < y0 || r.y0 > y1) { good[i] = 0; } } for(int i=0;i(narray &,int,int,int,int); template void remove_marginal_components(narray &,int,int,int,int); void remove_neighbour_line_components(bytearray &line) { invert(line); intarray image; copy(image,line); label_components(image); narray rects; bounding_boxes(rects,image); if(rects.length()>0) { int h = line.dim(1); int lower = int(h*0.33); int upper = int(h*0.67); bytearray good(rects.length()); fill(good, 1); for(int i=0;i upper)) ) good[i] = 0; } for(int i=0;i &components, const char *s, const char *delimiters) { components.clear(); if(!*s) return; while(1) { const char *p = s; while(*p && !strchr(delimiters, *p)) p++; int len = p - s; if(len) { strbuf &item = components.push(); item.ensure(len + 1); strncpy(item, s, len); item[len] = '\0'; } if(!*p) return; s = p + 1; } } int binarize_simple(bytearray &result, bytearray &image) { int threshold = (max(image)+min(image))/2; makelike(result,image); for(int i=0;i &hist){ int index= 0; float partial_sum = 0, sum = 0 ; for(int i = 0; i < hist.length(); i++) sum += hist(i); for(int j = 0; j < hist.length(); j++) hist(j) /= sum; while(partial_sum < 0.5) { partial_sum += hist(index); index++; } return index; } void throw_fmt(const char *format, ...) { va_list v; va_start(v, format); static char buf[1000]; vsnprintf(buf, sizeof(buf), format, v); va_end(v); throw (const char *) buf; } void optional_check_background_is_darker(colib::bytearray &a) { if(bgcheck) { CHECK_CONDITION(background_seems_black(a)); } } void optional_check_background_is_lighter(colib::bytearray &a) { if(bgcheck) { CHECK_CONDITION(background_seems_white(a)); } } void paint_rectangles(intarray &image,rectarray &rectangles) { int w = image.dim(0), h = image.dim(1); for(int i=0;i=w?w-1:r.x1; r.y1 = r.y1>=h?h-1:r.y1; int color = i+1; for(int x=r.x0;x len) { len = written; result.ensure(len); va_start(v, format); written = vsnprintf(result, len + 1, format, v); va_end(v); } str = result; } void code_to_strbuf(strbuf &sb, int code) { nuchar ch(code); nustring ns(1); ns[0] = ch; char* buf = ns.newUtf8Encode(); sb = buf; delete[] buf; } template void rotate_90(narray &out, narray &in) { out.resize(in.dim(1),in.dim(0)); for (int x=0;x(narray &,narray &); template void rotate_270(narray &out, narray &in) { out.resize(in.dim(1), in.dim(0)); for (int x=0;x(narray &,narray &); template void rotate_180(narray &out, narray &in) { out.resize(in.dim(0), in.dim(1)); for (int x=0;x(narray &,narray &); }