// Copyright 2007 Deutsches Forschungszentrum fuer Kuenstliche Intelligenz // or its licensors, as applicable. // // You may not use this file except under the terms of the accompanying license. // // Licensed under the Apache License, Version 2.0 (the "License"); you // may not use this file except in compliance with the License. You may // obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // // Project: ocr-bpnet // File: feature-extractor.h // Purpose: feature extractor class // Responsible: kapry // Reviewer: // Primary Repository: // Web Sites: www.iupr.org, www.dfki.de #include "feature-extractor.h" #include "imglib.h" #include "checks.h" #include "ocr-utils.h" //#include "peel.cc" #include "logger.h" using namespace colib; using namespace iulib; using namespace ocropus; namespace { Logger logger("fe"); } #define STANDARD_Y 40 #define MAX_REL_SIZE 1.5 #define IN(i,j,image) ((i>=0)&&(i<(image).dim(0))&&(j>=0)&&(j<(image).dim(1))) #define IN_BOX(i,j,box) ((i>=(box).x0)&&(i<=(box).x1)&&(j>=(box).y0)&&(j<=(box).y1)) // if boundary pixel: DO delete... // WE ASSUME A BINARY IMAGE! static int n_x[8] = {0,1,1,1,0,-1,-1,-1}; static int n_y[8] = {1,1,0,-1,-1,-1,0,1}; // _____________________________________________________________________________ void embed_img_with_white(bytearray &out,bytearray &in,int size) { out.resize(in.dim(0)+2*size,in.dim(1)+2*size); fill(out,255); for (int i=0;i 0 would be nice! int xmin=img.dim(0)-1,xmax=0,ymin=img.dim(1)-1,ymax=0; for (int i=0; ixmax?i:xmax; ymax=j>ymax?j:ymax; } return rectangle(xmin,ymin,max(xmax,1),max(ymax,1)); } rectangle find_one_box(bytearray &img, int T) { int xmin=img.dim(0)-1,xmax=0,ymin=img.dim(1)-1,ymax=0; for (int i=0; ixmax?i:xmax; ymax=j>ymax?j:ymax; } } } return rectangle(xmin,ymin,max(xmax,1),max(ymax,1)); } float interpolate_bilinear_from_gray(bytearray &img,float x,float y) { int valll, valhl, vallh, valhh; int xl = (int)x; int yl = (int)y; float u = x - xl; float v = y - yl; float coll, cohl, colh, cohh; coll = (1-u)*(1-v); cohl = u*(1-v); colh = (1-u)*v; cohh = u*v; if (IN(xl,yl,img)) valll = img(xl,yl); else valll = 255; if (IN(xl,yl+1,img)) vallh = img(xl,yl+1); else vallh = 255; if (IN(xl+1,yl,img)) valhl = img(xl+1,yl); else valhl = 255; if (IN(xl+1,yl+1,img)) valhh = img(xl+1,yl+1); else valhh = 255; return coll*valll + cohl*valhl + colh*vallh + cohh*valhh; } // T Threshold, z.B. 128. byte interpolate_bin(bytearray &img,float x,float y, byte T) { byte valll, valhl, vallh, valhh; int xl = (int)x; int yl = (int)y; float u = x - xl; float v = y - yl; float coll, cohl, colh, cohh; coll = (1-u)*(1-v); cohl = u*(1-v); colh = (1-u)*v; cohh = u*v; if (IN(xl,yl,img)) valll = img(xl,yl); else valll = 0; if (IN(xl,yl+1,img)) vallh = img(xl,yl+1); else vallh = 0; if (IN(xl+1,yl,img)) valhl = img(xl+1,yl); else valhl = 0; if (IN(xl+1,yl+1,img)) valhh = img(xl+1,yl+1); else valhh = 0; if (valll > 0) valll = 255; if (vallh > 0) vallh = 255; if (valhl > 0) valhl = 255; if (valhh > 0) valhh = 255; float val = coll*valll + cohl*valhl + colh*vallh + cohh*valhh; if (val 0) valll = 255; if (vallh > 0) vallh = 255; if (valhl > 0) valhl = 255; if (valhh > 0) valhh = 255; return coll*valll + cohl*valhl + colh*vallh + cohh*valhh; } // scale box from source image to box in dest image void scale_box_gray_from_gray(bytearray &dst, rectangle bDst, bytearray &src, rectangle bSrc) { int w = bDst.width(); int h = bDst.height(); float u,v; float xSrc, ySrc; for (int i=0; i<=w; i++) for (int j=0; j<=h; j++) { u = (float)i/w; v = (float)j/h; xSrc = bSrc.x0 + u * (bSrc.x1-bSrc.x0); ySrc = bSrc.y0 + v * (bSrc.y1-bSrc.y0); if(IN(bDst.x0+i,bDst.y0+j,dst)) { int val = (int)interpolate_bilinear_from_gray(src,xSrc,ySrc); dst(bDst.x0+i,bDst.y0+j) = val; } } } // scale box from source image to box in dest image void scale_box_gray(bytearray &dst, rectangle bDst, bytearray &src, rectangle bSrc) { int w = bDst.width(); int h = bDst.height(); float u,v; float xSrc, ySrc; for (int i=0; i<=w; i++) for (int j=0; j<=h; j++) { u = (float)i/w; v = (float)j/h; xSrc = bSrc.x0 + u * (bSrc.x1-bSrc.x0); ySrc = bSrc.y0 + v * (bSrc.y1-bSrc.y0); if(IN(bDst.x0+i,bDst.y0+j,dst)) { int val = (int)interpolate_bilinear(src,xSrc,ySrc); dst(bDst.x0+i,bDst.y0+j) = val; } } } // scale box from source image to box in dest image void scale_box_bin_from_gray(bytearray &dst, rectangle bDst, bytearray &src, rectangle bSrc, int T) { int w = bDst.width(); int h = bDst.height(); float u,v; float xSrc, ySrc; for (int i=0; i<=w; i++) for (int j=0; j<=h; j++) { u = (float)i/w; v = (float)j/h; xSrc = bSrc.x0 + u * (bSrc.x1-bSrc.x0); ySrc = bSrc.y0 + v * (bSrc.y1-bSrc.y0); if(IN(bDst.x0+i,bDst.y0+j,dst)) { float val = (int)interpolate_bilinear_from_gray(src,xSrc,ySrc); dst(bDst.x0+i,bDst.y0+j) = val=high) do{x-=d;} while(x>=high); return x; } bool color_neighbors(bytearray &img,rectangle b,point p,int color) { int x = p.x; int y = p.y; bool change=false; for (int i=-1;i<=1;i++) for (int j=-1;j<=1;j++) if ((i!=0||j!=0) && IN_BOX(x+i,y+j,b) && IN(x+i,y+j,img) && img(x+i,y+j)==255) { set(img,x+i,y+j,color); change=true; } return change; } bool is_boundary_pixel_wang(colib::bytearray &img, point p, int flag) { int x = p.x; int y = p.y; if (img(x,y)!=0) return false; byte vals[8]; for (int i=0; i<8; i++) vals[i] = get(img,x+n_x[i],y+n_y[i])<255?0:255; // some of the neighbors may be labelled with 1, but in fact are 0! // first criterion: only ONE white-black-passage int bw_count = 0; for (int i=0; i<8; i++) if (vals[i]==255 && vals[(i+1)%8]==0) bw_count++; if (bw_count>1) return false; // second criterion: at least three black pixel neighbors int b_count = 0; for (int i=0; i<8; i++) if (vals[i]==0) b_count++; if (b_count<2 || b_count>6) return false; // according to wang: two possibilities. if (flag==1) if (vals[0]*vals[2]*vals[4]==0 && vals[2]*vals[4]*vals[6]==0 && vals[5]>0) return true; if (flag==2) if (vals[0]*vals[2]*vals[6]==0 && vals[0]*vals[4]*vals[6]==0 && vals[1]>0) return true; return false; } bool mark_boundary_pixels(colib::bytearray &img, rectangle b, int flag) { bool marked = false; for (int i=b.x0; i<=b.x1; i++) for (int j=b.y0; j<=b.y1; j++) { if (is_boundary_pixel_wang(img,point(i,j),flag)) { set(img,i,j,1); marked = true; } } return marked; } void delete_boundary_pixels(colib::bytearray &img, rectangle b) { for (int i=b.x0; i<=b.x1; i++) for (int j=b.y0; j<=b.y1; j++) if (get(img,i,j)==1) set(img,i,j,255); } void thinning_box(colib::bytearray &img, rectangle b) { bool change1,change2; do { change1 = change2 = false; change1 = mark_boundary_pixels(img,b,1); if (change1) delete_boundary_pixels(img,b); change2 = mark_boundary_pixels(img,b,2); if (change2) delete_boundary_pixels(img,b); } while(change1||change2); } int num_bin_neighbors(bytearray &img,int x,int y) { int n_count=0; for (int i=0; i<8; i++) if (get(img,x+n_x[i],y+n_y[i])!=255) n_count++; return n_count; } rectangle scale_feature_box(rectangle b_in, int w_dst, int h_dst) { // calc dst box int w = b_in.width(); int h = b_in.height(); rectangle dst_box; if (w>h) { float dst_height = h_dst * h/w; float m = 0.5*(h_dst-1); dst_box=rectangle(0,(int)(m-dst_height/2),w_dst-1,(int)(m+dst_height/2)); } else { float dst_width = w_dst * (h>0?(float)w/h:0); float m = 0.5*(w_dst-1); // make box a bit wider for letters like 'i' and 'l' dst_box=rectangle((int)(m-dst_width/2-1),0,(int)(m+dst_width/2+1),h_dst-1); } return dst_box.intersection(rectangle(0,0,w_dst-1,h_dst-1)); } float interpolate_bilinear_0(floatarray &img,float x,float y) { float valll, valhl, vallh, valhh; int xl = (int)x; int yl = (int)y; float u = x - xl; float v = y - yl; float coll, cohl, colh, cohh; coll = (1-u)*(1-v); cohl = u*(1-v); colh = (1-u)*v; cohh = u*v; if (IN(xl,yl,img)) valll = img(xl,yl); else valll = 0; if (IN(xl,yl+1,img)) vallh = img(xl,yl+1); else vallh = 0; if (IN(xl+1,yl,img)) valhl = img(xl+1,yl); else valhl = 0; if (IN(xl+1,yl+1,img)) valhh = img(xl+1,yl+1); else valhh = 0; return coll*valll + cohl*valhl + colh*vallh + cohh*valhh; } void scale_box_0(floatarray &dst, rectangle bDst,floatarray &src, rectangle bSrc) { int w = bDst.width(); int h = bDst.height(); float u,v; float xSrc, ySrc; for (int i=0; i<=w; i++) { for (int j=0; j<=h; j++) { u = (float)i/w; v = (float)j/h; xSrc = bSrc.x0 + u * (bSrc.x1-bSrc.x0); ySrc = bSrc.y0 + v * (bSrc.y1-bSrc.y0); dst(bDst.x0+i,bDst.y0+j) = (int)interpolate_bilinear_0(src,xSrc,ySrc); } } } rectangle whole_image(bytearray &img) { return rectangle(0,0,img.dim(0)-1,img.dim(1)-1); } bool skeletal_key_point(colib::bytearray &img, point p) { int n_neighbors=num_bin_neighbors(img,p.x,p.y); return (n_neighbors==1 || n_neighbors>2); } bool skeletal_end_point(colib::bytearray &img, point p) { int n_neighbors=num_bin_neighbors(img,p.x,p.y); return (n_neighbors<=1); } bool skeletal_junction_point(colib::bytearray &img, point p) { int n_neighbors=num_bin_neighbors(img,p.x,p.y); return (n_neighbors>2); } void mark_skeletal_points(colib::bytearray &img,colib::bytearray &img2) { fill(img2, 0); for (int i=0; i *s,int mode) { int i = p.x; int j = p.y; if(IN(i,j-1,img)) { if (img(i ,j-1) == 255) { s->push(point(i ,j-1)); } } if (mode==8) { if (IN(i+1,j-1,img)) { if (img(i+1,j-1) == 255) { s->push(point(i+1,j-1)); } } } if (IN(i+1,j ,img)) { if (img(i+1,j ) == 255) { s->push(point(i+1,j )); } } if (mode==8) { if (IN(i+1,j+1,img)) { if (img(i+1,j+1) == 255) { s->push(point(i+1,j+1)); } } } if (IN(i ,j+1,img)) { if (img(i ,j+1) == 255) { s->push(point(i ,j+1)); } } if (mode==8) { if (IN(i-1,j+1,img)) { if (img(i-1,j+1) == 255) { s->push(point(i-1,j+1)); } } } if (IN(i-1,j ,img)) { if (img(i-1,j ) == 255) { s->push(point(i-1,j )); } } if (mode==8) { if (IN(i-1,j-1,img)) { if (img(i-1,j-1) == 255) { s->push(point(i-1,j-1)); } } } } void grow_white(bytearray &src,point seed){ int MODE = 4; narray pixels; pixels.clear(); pixels.push(seed); while (pixels.length()) { point p = pixels.pop(); if (src(p.x,p.y)!=255) continue; set(src,p.x,p.y,0); push_neighbors_255(src, p, &pixels, MODE); } } bool white_pixel_on_boundary(bytearray &img, point *p) { for (int i=0; ix = i; p->y = 0; return true; } if (img(i,img.dim(1)-1)==255) { p->x = i; p->y = img.dim(1)-1; return true; } } for (int j=0; jx = 0; p->y = j; return true; } if (img(img.dim(0)-1,j)==255) { p->x = img.dim(0)-1; p->y = j; return true; } } return false; } void binarize_inclusions(colib::bytearray &img) { // find some white pixel and grow from there. point white_pix; while(white_pixel_on_boundary(img, &white_pix)) { grow_white(img,white_pix); } } void preprocess(bytearray &image) { rectangle red_box = find_one_box(image,150); bytearray img_tmp; img_tmp.resize((int)round(STANDARD_Y*(float)red_box.width()/red_box.height()),STANDARD_Y); if(red_box.width()!=0&&red_box.height()!=0) { scale_box_gray_from_gray(img_tmp,whole_image(img_tmp),image,red_box); move(image,img_tmp); } } void remove_isolated_pixels(colib::bytearray &out,colib::bytearray &in,float d,byte T) { int window = min(in.dim(0),in.dim(1)); window = max(window,1); window = int(window * d +.5); window = max(window,1); copy(out,in); for (int i=0;i gradients; floatarray grad0; floatarray grad1; floatarray grad2; floatarray grad3; gradients.clear(); move(gradients.push(),grad0); move(gradients.push(),grad1); move(gradients.push(),grad2); move(gradients.push(),grad3); for(int k=0;k=0 && image(i,j)==255;--i) { bay1(i,j) = 255; } } for (int i=0;i=0 && image(i,j)==255;--j) bay3(i,j) = 255; } gauss2d(bay0,1,1); gauss2d(bay1,1,1); gauss2d(bay2,1,1); gauss2d(bay3,1,1); rectangle dst_box = rectangle(0,0,DIM_X-1,DIM_Y-1); floatarray bay0rescaled; bay0rescaled.resize(DIM_X,DIM_Y); fill(bay0rescaled,0); scale_box_0(bay0rescaled,dst_box,bay0,image_box); //rescale(bay0rescaled,bay0,DIM_X,DIM_Y); floatarray bay1rescaled; bay1rescaled.resize(DIM_X,DIM_Y); fill(bay1rescaled,0); scale_box_0(bay1rescaled,dst_box,bay1,image_box); //rescale(bay1rescaled,bay1,DIM_X,DIM_Y); floatarray bay2rescaled; bay2rescaled.resize(DIM_X,DIM_Y); fill(bay2rescaled,0); scale_box_0(bay2rescaled,dst_box,bay2,image_box); //rescale(bay2rescaled,bay2,DIM_X,DIM_Y); floatarray bay3rescaled; bay3rescaled.resize(DIM_X,DIM_Y); fill(bay3rescaled,0); scale_box_0(bay3rescaled,dst_box,bay3,image_box); //rescale(bay3rescaled,bay3,DIM_X,DIM_Y); bays_feature.resize(4*DIM_X*DIM_Y); int c = 0; for(int i=0;i0); position_feature.resize(DIM_X); fill(position_feature,0); int x,y; // go through input_image linewise from bottom and find lowest part of the char bool leave = false; for(y=0;(y < input_image.dim(1))&&!leave;++y) { for (x=0; x < input_image.dim(0); ++x) { if ( input_image(x,y) == 0) { // found black pixel, make sure it's not alone int neighbors = 0; // check one to the right if (x < input_image.dim(0) - 1) { if ( input_image(x+1,y) == 0) ++neighbors; } if (x > 0 && y < input_image.dim(1) - 1) { // check one above if ( input_image(x,y+1) == 0) ++neighbors; // check to the left of one above if ( input_image(x-1,y+1) == 0) ++neighbors; // check to the right of one above if ((x < input_image.dim(0) - 1) && ( input_image(x+1,y+1)) == 0) ++neighbors; } // leave if we found at least one neighbor if (neighbors > 0) { leave = true; break; } } } } // distance from the baspoint of the text line float position = (float) (y - basepoint); // relative position in units of xheight float relativePosition = 0.f; // we assume a possible range of 'positions' between // 0.5 * xheight below baseline and 2.0 * xheight above baseline // that's why we must add 0.5 to position before we divide it by the // entire range of values (0.5 + 2.0 = 2.5) relativePosition = ((position + 0.5 * (float) xheight) / (2.5 * (float) xheight)); // unary coding for (int j=0; j < position_feature.dim(0); ++j) { if (((float)j / (float)(position_feature.dim(0))) < relativePosition) { position_feature(j) = 255; } } gauss1d(position_feature, 1.5); } void FeatureExtractor::calculate_relsize_feature(floatarray &relsize_feature, bytearray &input_image) { ALWAYS_ASSERT(have_line_info); CHECK_CONDITION(xheight>0); rectangle box = find_one_box(input_image); float height = box.height(); float rel_size = height/xheight; // unary coding relsize_feature.resize(DIM_X); for (int j=0;j(rel_size/MAX_REL_SIZE)) { relsize_feature(j)=0; } else relsize_feature(j)=255; } gauss1d(relsize_feature,1.5); } void FeatureExtractor::feat2image(bytearray &image,floatarray &feature) { // rotate+flip: image, inclusion, skeleton, skeleton points int c=0; int DIM_X_NFEAT = feature.length()/DIM_Y; image.resize(DIM_X_NFEAT,DIM_Y); for(int i=0;isetFeatureSize(10,10); FE->setRemoveIsolatedPixels(false); FE->have_line_info = false; return FE; } FeatureExtractor *make_FeatureExtractor(int x, int y) { FeatureExtractor* FE = new FeatureExtractor(); FE->setFeatureSize(x,y); FE->setRemoveIsolatedPixels(false); FE->have_line_info = false; return FE; } };