 
            * scribo/core/line_info.hh, * scribo/text/recognition.hh: Here. --- scribo/ChangeLog | 7 ++ scribo/scribo/core/line_info.hh | 11 ++- scribo/scribo/text/recognition.hh | 189 +++++++++++++++++++++++++++++-------- 3 files changed, 167 insertions(+), 40 deletions(-) diff --git a/scribo/ChangeLog b/scribo/ChangeLog index a105435..e8fe131 100644 --- a/scribo/ChangeLog +++ b/scribo/ChangeLog @@ -1,3 +1,10 @@ +2011-11-29 Guillaume Lazzara <z@lrde.epita.fr> + + Make recognition faster and store recognition confidence. + + * scribo/core/line_info.hh, + * scribo/text/recognition.hh: Here. + 2011-11-23 Guillaume Lazzara <z@lrde.epita.fr> Regenerate mk files. diff --git a/scribo/scribo/core/line_info.hh b/scribo/scribo/core/line_info.hh index 2913f81..b121205 100644 --- a/scribo/scribo/core/line_info.hh +++ b/scribo/scribo/core/line_info.hh @@ -137,6 +137,7 @@ namespace scribo // sqrt(Max(VAR(R), VAR(G), VAR(B))) float color_reliability_; + float text_confidence_; std::string text_; std::string html_text_; @@ -237,9 +238,12 @@ namespace scribo bool indented() const; bool has_text() const; + // Returns the percentage of confidence of the recognized text. If + // no text has been recognized, it returns -1. + float text_confidence() const; const std::string& text() const; const std::string& html_text() const; - void update_text(const std::string& str); + void update_text(const std::string& str, float confidence); bool is_valid() const; @@ -400,6 +404,8 @@ namespace scribo reading_orientation_ = 0.; indented_ = false; + + text_confidence_ = -1; } } // end of namespace scribo::internal @@ -769,8 +775,9 @@ namespace scribo template <typename L> void - line_info<L>::update_text(const std::string& str) + line_info<L>::update_text(const std::string& str, float confidence = 1.0) { + data_->text_confidence_ = confidence; data_->text_ = str; data_->html_text_ = scribo::internal::html_markups_replace(str); } diff --git a/scribo/scribo/text/recognition.hh b/scribo/scribo/text/recognition.hh index b7e5c91..358ff24 100644 --- a/scribo/scribo/text/recognition.hh +++ b/scribo/scribo/text/recognition.hh @@ -59,6 +59,8 @@ # include <tesseract/baseapi.h> +# include <tesseract/ocrclass.h> +# include <tesseract/resultiterator.h> # if !defined HAVE_TESSERACT_2 && !defined HAVE_TESSERACT_3 @@ -100,6 +102,7 @@ namespace scribo # ifndef MLN_INCLUDE_ONLY +# ifdef HAVE_TESSERACT_2 template <typename L> void recognition(line_set<L>& lines, const char *language) @@ -108,19 +111,8 @@ namespace scribo // Initialize Tesseract. -# ifdef HAVE_TESSERACT_2 - TessBaseAPI::InitWithLanguage(NULL, NULL, language, NULL, false, 0, NULL); -# else // HAVE_TESSERACT_3 - tesseract::TessBaseAPI tess; - if (tess.Init(NULL, language, tesseract::OEM_DEFAULT) == -1) - { - std::cout << "Error: cannot initialize tesseract!" << std::endl; - abort(); - } - tess.SetPageSegMode(tesseract::PSM_SINGLE_LINE); - -# endif // HAVE_TESSERACT_2 + TessBaseAPI::InitWithLanguage(NULL, NULL, language, NULL, false, 0, NULL); typedef mln_ch_value(L,bool) I; @@ -148,7 +140,8 @@ namespace scribo for_all_elements(e, lines(i).component_ids()) { unsigned comp_id = comps(e); - data::fill(((text_ima | comp_set(comp_id).bbox()).rw() | (pw::value(lbl) == pw::cst(comp_id))).rw(), + data::fill(((text_ima | comp_set(comp_id).bbox()).rw() + | (pw::value(lbl) == pw::cst(comp_id))).rw(), true); } @@ -170,7 +163,6 @@ namespace scribo data::paste_without_localization(text_ima, line_image); // Recognize characters. -# ifdef HAVE_TESSERACT_2 char* s = TessBaseAPI::TesseractRect( (unsigned char*) line_image.buffer(), sizeof (bool), // Pixel size. @@ -179,15 +171,6 @@ namespace scribo 0, // Top line_image.ncols(), // n cols line_image.nrows()); // n rows -# else // HAVE_TESSERACT_3 - tess.SetImage( - (unsigned char*) line_image.buffer(), - line_image.ncols(), // n cols - line_image.nrows(), // n rows - sizeof (bool), // Pixel size. - line_image.ncols() * sizeof (bool)); // Row_offset - char* s = tess.GetUTF8Text(); -# endif // ! HAVE_TESSERACT_2 if (s != 0) { @@ -216,16 +199,7 @@ namespace scribo mln_precondition(line.is_valid()); // Initialize Tesseract. -# ifdef HAVE_TESSERACT_2 TessBaseAPI::InitWithLanguage(NULL, NULL, language, NULL, false, 0, NULL); -# else // HAVE_TESSERACT_3 - tesseract::TessBaseAPI tess; - if (tess.Init(NULL, language, tesseract::OEM_DEFAULT) == -1) - { - std::cout << "Error: cannot initialize tesseract!" << std::endl; - abort(); - } -# endif // ! HAVE_TESSERACT_2 std::ofstream file; if (!output_file.empty()) @@ -244,7 +218,6 @@ namespace scribo border::resize(text_ima, 0); // Recognize characters. -# ifdef HAVE_TESSERACT_2 char* s = TessBaseAPI::TesseractRect( (unsigned char*) text_ima.buffer(), sizeof (bool), // Pixel size. @@ -253,16 +226,152 @@ namespace scribo 0, // Top text_ima.ncols(), // n cols text_ima.nrows()); // n rows + + if (s != 0) + { + if (!output_file.empty()) + { + std::string str(s); + str = str.substr(0, str.length() - 1); + file << line.domain().bbox().pmin().row() + << " " + << line.domain().bbox().pmin().col() + << " " + << line.domain().bbox().pmax().row() + << " " + << line.domain().bbox().pmax().col() + << " " + << str; + } + } + + // The string has been allocated by Tesseract. We must free it. + delete [] s; + + if (!output_file.empty()) + file.close(); + + trace::exiting("scribo::text::recognition"); + } + + + # else // HAVE_TESSERACT_3 + + + + template <typename L> + void + recognition(line_set<L>& lines, const char *language) + { + trace::entering("scribo::text::recognition"); + + + // Initialize Tesseract. + tesseract::TessBaseAPI tess; + if (tess.Init(NULL, language, tesseract::OEM_DEFAULT) == -1) + { + std::cout << "Error: cannot initialize tesseract!" << std::endl; + abort(); + } + tess.SetPageSegMode(tesseract::PSM_SINGLE_LINE); + + typedef mln_ch_value(L,bool) I; + + + /// Use text bboxes with Tesseract + for_all_lines(i, lines) + { + if (! lines(i).is_textline()) + continue; + + mln_domain(I) box = lines(i).bbox(); + + // Make sure characters are isolated from the borders. + // Help Tesseract. + // FIXME: not needed anymore in tesseract 3 ? + // + box.enlarge(2); + + I text_ima(box); + data::fill(text_ima, false); + + // Careful : background is set to 'False' + const component_set<L>& comp_set = lines.components(); + const L& lbl = comp_set.labeled_image(); + + // Extract each character components to create the line image. + const mln::util::array<component_id_t>& comps = lines(i).component_ids(); + for_all_elements(e, lines(i).component_ids()) + { + unsigned comp_id = comps(e); + data::fill(((text_ima | comp_set(comp_id).bbox()).rw() + | (pw::value(lbl) == pw::cst(comp_id))).rw(), + true); + } + + /// Improve text quality. + text::clean_inplace(lines(i), text_ima); + + // Recognize characters. + tess.SetImage( + (unsigned char*) &text_ima(text_ima.domain().pmin()), + text_ima.ncols(), // n cols + text_ima.nrows(), // n rows + sizeof (bool), // Pixel size. + text_ima.ncols() * sizeof (bool) + 2 * text_ima.border()); // Row_offset + + char *s = tess.GetUTF8Text(); + if (s != 0) + { + tesseract::ResultIterator *it = tess.GetIterator(); + std::string str(s); + str = str.substr(0, str.length() - 2); + lines(i).update_text(str, it->Confidence(tesseract::RIL_TEXTLINE)); + } + + delete[] s; + } + + trace::exiting("scribo::text::recognition"); + } + + + + template <typename I> + void + recognition(const Image<I>& line_, + const char *language, + const std::string& output_file = std::string()) + { + trace::entering("scribo::text::recognition"); + + const I& line = exact(line_); + mln_precondition(line.is_valid()); + + // Initialize Tesseract. + tesseract::TessBaseAPI tess; + if (tess.Init(NULL, language, tesseract::OEM_DEFAULT) == -1) + { + std::cout << "Error: cannot initialize tesseract!" << std::endl; + abort(); + } + + std::ofstream file; + if (!output_file.empty()) + file.open(output_file.c_str()); + + mln_domain(I) box = line.domain(); + + // Recognize characters. char* s = tess.TesseractRect( - (unsigned char*) text_ima.buffer(), + (unsigned char*) &line(line.domain().pmin()), sizeof (bool), // Pixel size. - text_ima.ncols() * sizeof (bool), // Row_offset + line.ncols() * sizeof (bool) + line.border() * 2, // Row_offset 0, // Left 0, // Top - text_ima.ncols(), // n cols - text_ima.nrows()); // n rows -# endif // ! HAVE_TESSERACT_2 + line.ncols(), // n cols + line.nrows()); // n rows if (s != 0) { @@ -292,6 +401,10 @@ namespace scribo } +# endif // ! HAVE_TESSERACT_2 + + + # endif // ! MLN_INCLUDE_ONLY -- 1.7.2.5