// -*- C++ -*- // Copyright 2006-2007 Deutsches Forschungszentrum fuer Kuenstliche Intelligenz // or its licensors, as applicable. // // You may not use this file except under the terms of the accompanying license. // // Licensed under the Apache License, Version 2.0 (the "License"); you // may not use this file except in compliance with the License. You may // obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // // Project: ocr-bpnet - neural network classifier // File: classify-chars.cc // Purpose: train/recognize characters with feature extraction and classmap information // Responsible: kapry // Reviewer: rangoni // Primary Repository: // Web Sites: www.iupr.org, www.dfki.de #include "colib.h" #include "classmap.h" #include "charlib.h" #include "classify-chars.h" #include "feature-extractor.h" #include "narray-io.h" #include "additions.h" #include "confusion-matrix.h" #include "bpnet.h" #include "didegrade.h" using namespace ocropus; using namespace colib; using namespace iupr_bpnet; namespace { // log the final confusion matrix for the training dataset Logger logger_confusion_map_train("classchar.conf_train"); // log the final confusion matrix for the testing dataset Logger logger_confusion_map_test("classchar.conf_test"); // log the final confusion matrix in a list-layout for the training dataset Logger logger_conf_map_train_reduced("classchar.conf_train_reduced"); // log the final confusion matrix in a list-layout for the testing dataset Logger logger_conf_map_test_reduced("classchar.conf_test_reduced"); // log the characters added to the classifier, they are normalized in a 40*40 box Logger logger_chars("classchar.chars"); // log the garbages added to the classifier, they are normalized in a 40*40 box Logger logger_garbage("classchar.garbages"); // log the feature map of the characters added to the classifier Logger logger_f_chars("classchar.f_chars"); // log the feature map of the garbages added to the classifier Logger logger_f_garbage("classchar.f_garbages"); int nb_max_features = 9; FeatureExtractor::FeatureType possible_features[] = { FeatureExtractor::BAYS, FeatureExtractor::GRAD, FeatureExtractor::INCL, FeatureExtractor::IMAGE, FeatureExtractor::SKEL, FeatureExtractor::SKELPTS, FeatureExtractor::RELSIZE, FeatureExtractor::SKEL2, FeatureExtractor::HISTO }; char possible_feature_names[][10] = {"BAYS","GRAD","INCL","IMAGE","SKEL", "SKELPTS","RELSIZE","SKEL2","HISTO"}; char OUR_PREVIOUS_FEATURES[] = "111111100"; // char OUR_FEATURES[] = "111101110"; } struct LineCharacterClassifier : ICharacterClassifier { autodel classifier; autodel extractor; ClassMap map; objlist variants; floatarray costs; bool output_garbage; int ninput; bool init; intarray whichfeatures; int dim_f_x, dim_f_y; bool usedegrade; // if set to true, the characters and garbages are degraded on the fly when feeding the classifier float garbage_portion; // 0.0 means no garbage, 1.0 means all garbage void append_our_features(FeatureExtractor &extractor, floatarray &features, bytearray &image, bool with_line_info = true) { for(int i=0;idiscriminant(result, features); for(int i=0; isetLineInfo(baseline,xheight_y-baseline); append_our_features(*extractor, features, in); floatarray result; classifier->discriminant(result, features); for(int i=0;ifeat2image(png, features); additions::save_char(png,where); } virtual void cls(nustring &result, int i) { copy(result, variants[i]); } virtual float cost(int i) { return costs[i]; } virtual int length() { ASSERT(variants.length() == costs.length()); return variants.length(); } virtual void load(FILE *stream) { // the load method does not use the read_check_point since it is // necessary to handle previously trained and saved classifiers which // do not include 'feature' information map.load(stream); classifier->load(stream); init = true; char aux[1024];aux[0]='\0'; CHECK_CONDITION(fscanf(stream,"%1000s",aux)==1); if(strcmp(aux,"dim_features") == 0) { // if feature size information CHECK_CONDITION(fscanf(stream,"%d",&dim_f_x)==1); CHECK_CONDITION(fscanf(stream,"%d",&dim_f_y)==1); CHECK_CONDITION(fscanf(stream,"%s",aux)==1); if(strcmp(aux,"features") == 0) { // if feature name information int tmp; CHECK_CONDITION(fscanf(stream,"%d",&tmp)==1); ASSERT(tmp<=nb_max_features); for(int i=0; isave(stream); write_checkpoint(stream,"dim_features"); fprintf(stream, "%d\n", dim_f_x); fprintf(stream, "%d\n", dim_f_y); write_checkpoint(stream,"features"); fprintf(stream, "%d\n", nb_max_features); for(int i=0;isetLineInfo(base_y, xheight_y - base_y); if(usedegrade) { degrade(image); } append_our_features(*extractor, features, image); ninput = features.length(); int cls; ASSERTWARN(characters[0].ord()!=32); // maybe it's an error to put a white space if(init) { cls = map.get_class_no_add(characters[0].ord()); if(cls!=-1) { if(characters[0].ord() != GARBAGE) { classifier->add(features, cls); } else { if((rand()/float(RAND_MAX))add(features, cls); } } } } else { cls = map.get_class(characters[0].ord()); if(characters[0].ord() != GARBAGE) { classifier->add(features, cls); } else { if((rand()/float(RAND_MAX))add(features, cls); } } } } virtual void addTrainingChar(bytearray &image, nustring &characters) { if(characters.length() != 1) { throw "addTrainingChar cannot handle multiple characters"; } floatarray features; append_our_features(*extractor, features, image, false); ninput = features.length(); int cls; if(init) { cls = map.get_class_no_add(characters[0].ord()); if(cls!=-1) { if(characters[0].ord() != GARBAGE) { classifier->add(features, cls); } else { if((rand()/float(RAND_MAX))add(features, cls); } } } } else { cls = map.get_class(characters[0].ord()); if(characters[0].ord() != GARBAGE) { classifier->add(features, cls); } else { if((rand()/float(RAND_MAX))add(features, cls); } } } } virtual void startTraining(const char *type) { } virtual void finishTraining() { if(!init) { classifier->set("ninput", ninput); classifier->set("noutput", map.length()); } classifier->train(); BpnetClassifier* BC = dynamic_cast(classifier.ptr()); if (BC) { strbuf aux; logger_confusion_map_train.format("Final train confusion matrix"); BC->best_confusion_train->logMatrix(aux,map); logger_confusion_map_train.html((char*)aux); logger_confusion_map_test.format("Final test confusion matrix"); BC->best_confusion_test->logMatrix(aux,map); logger_confusion_map_test.html((char*)aux); //printf("===== Train =====\n"); //BC->best_confusion_train->printReduced(stdout,map); //printf("===== Test =====\n"); //BC->best_confusion_test->printReduced(stdout,map); logger_conf_map_train_reduced.format("Final train reduced confusion matrix"); BC->best_confusion_train->logReducedConfusion(aux,map); logger_conf_map_train_reduced.html((char*)aux); logger_conf_map_test_reduced.format("Final test reduced confusion matrix"); BC->best_confusion_test->logReducedConfusion(aux,map); logger_conf_map_test_reduced.html((char*)aux); } init = true; } virtual void set(const char *key, double value) { if(strcmp(key,"garbage_portion") == 0) { garbage_portion = value; } else if(strcmp(key,"degrade") == 0) { usedegrade = bool(value); } else { classifier->set(key, value); } } }; namespace ocropus { void train(ICharacterClassifier &classifier, ICharacterLibrary &charlib) { for(int i=0; i