olena: olena-2.0-576-g056a7da scribo/text/recognition.hh: Aesthetic changes.

--- scribo/ChangeLog | 4 + scribo/scribo/text/recognition.hh | 171 ++++++++++++++++++------------------- 2 files changed, 89 insertions(+), 86 deletions(-) diff --git a/scribo/ChangeLog b/scribo/ChangeLog index 412f271..bd66473 100644 --- a/scribo/ChangeLog +++ b/scribo/ChangeLog @@ -1,5 +1,9 @@ 2013-06-25 Roland Levillain <roland@lrde.epita.fr> + * scribo/text/recognition.hh: Aesthetic changes. + +2013-06-25 Roland Levillain <roland@lrde.epita.fr> + Restrict workaround-related locale changes to LC_NUMERIC. * scribo/text/recognition.hh: Here. diff --git a/scribo/scribo/text/recognition.hh b/scribo/scribo/text/recognition.hh index d74ae5c..00e18d5 100644 --- a/scribo/scribo/text/recognition.hh +++ b/scribo/scribo/text/recognition.hh @@ -38,9 +38,9 @@ # define HAVE_TESSERACT_2 # endif +# include <clocale> # include <ostream> -# include <clocale> # include <mln/core/image/dmorph/image_if.hh> # include <mln/core/concept/neighborhood.hh> @@ -79,13 +79,11 @@ namespace scribo namespace text { - using namespace mln; - /*! \brief Passes the text bboxes to Tesseract (OCR). \param[in] lines The lines of text. \param[in] language The language which should be recognized by - Tesseract. (fra, en, ...) + Tesseract. (fra, en, ...) \ingroup grpocr */ @@ -98,9 +96,9 @@ namespace scribo \param[in] line Image of text line. \param[in] language The language which should be recognized by - Tesseract. (fra, en, ...) + Tesseract. (fra, en, ...) \param[in] output_file If set, store the recognized text in - this file. + this file. \ingroup grpocr */ @@ -114,12 +112,15 @@ namespace scribo # ifndef MLN_INCLUDE_ONLY - # ifdef HAVE_TESSERACT_2 + + template <typename L> void recognition(line_set<L>& lines, const char *language) { + using namespace mln; + mln_trace("scribo::text::recognition"); /* Tesseract is known to have issues while reading training @@ -142,8 +143,7 @@ namespace scribo typedef mln_ch_value(L,bool) I; - - /// Use text bboxes with Tesseract + /// Use text bboxes with Tesseract. for_all_lines(i, lines) { if (! lines(i).is_textline()) @@ -158,12 +158,13 @@ namespace scribo I text_ima(box); data::fill(text_ima, false); - // Careful : background is set to 'False' + // Careful: background is set to 'false'. const component_set<L>& comp_set = lines.components(); const L& lbl = comp_set.labeled_image(); - // Extract each character components to create the line image. - const mln::util::array<component_id_t>& comps = lines(i).component_ids(); + // Extract each character component to create the line image. + const mln::util::array<component_id_t>& comps = + lines(i).component_ids(); for_all_elements(e, lines(i).component_ids()) { unsigned comp_id = comps(e); @@ -192,10 +193,10 @@ namespace scribo // Recognize characters. char* s = TessBaseAPI::TesseractRect( (unsigned char*) line_image.buffer(), - sizeof (bool), // Pixel size. - line_image.ncols() * sizeof (bool), // Row_offset - 0, // Left - 0, // Top + sizeof (bool), // pixel size + line_image.ncols() * sizeof (bool), // row offset + 0, // left + 0, // top line_image.ncols(), // n cols line_image.nrows()); // n rows @@ -207,12 +208,11 @@ namespace scribo } // The string has been allocated by Tesseract. It must be released. - delete [] s; + delete[] s; } // Restore the default locale from the environment. setlocale(LC_NUMERIC, ""); - } @@ -222,6 +222,8 @@ namespace scribo const char *language, const std::string& output_file) { + using namespace mln; + mln_trace("scribo::text::recognition"); const I& line = exact(line_); @@ -252,52 +254,51 @@ namespace scribo // Recognize characters. char* s = TessBaseAPI::TesseractRect( (unsigned char*) text_ima.buffer(), - sizeof (bool), // Pixel size. - text_ima.ncols() * sizeof (bool), // Row_offset - 0, // Left - 0, // Top + sizeof (bool), // pixel size + text_ima.ncols() * sizeof (bool), // row offset + 0, // left + 0, // top text_ima.ncols(), // n cols text_ima.nrows()); // n rows - if (s != 0) + if (s != 0) { if (!output_file.empty()) - { - std::string str(s); - str = str.substr(0, str.length() - 1); - file << line.domain().bbox().pmin().row() - << " " - << line.domain().bbox().pmin().col() - << " " - << line.domain().bbox().pmax().row() - << " " - << line.domain().bbox().pmax().col() - << " " - << str; - } + { + std::string str(s); + str = str.substr(0, str.length() - 1); + file << line.domain().bbox().pmin().row() + << " " + << line.domain().bbox().pmin().col() + << " " + << line.domain().bbox().pmax().row() + << " " + << line.domain().bbox().pmax().col() + << " " + << str; + } } - // The string has been allocated by Tesseract. We must free it. - delete [] s; + // The string has been allocated by Tesseract. We must free it. + delete[] s; - if (!output_file.empty()) - file.close(); - - // See the above explanations about setlocale. - setlocale(LC_NUMERIC, ""); + if (!output_file.empty()) + file.close(); + // See the above explanations about setlocale. + setlocale(LC_NUMERIC, ""); } - # else // HAVE_TESSERACT_3 - template <typename L> void recognition(line_set<L>& lines, const char *language) { + using namespace mln; + mln_trace("scribo::text::recognition"); // See the above explanations about setlocale. @@ -314,8 +315,7 @@ namespace scribo typedef mln_ch_value(L,bool) I; - - /// Use text bboxes with Tesseract + // Use text bboxes with Tesseract. for_all_lines(i, lines) { if (! lines(i).is_textline()) @@ -332,12 +332,13 @@ namespace scribo I text_ima(box); data::fill(text_ima, false); - // Careful : background is set to 'False' + // Careful: background is set to 'false'. const component_set<L>& comp_set = lines.components(); const L& lbl = comp_set.labeled_image(); - // Extract each character components to create the line image. - const mln::util::array<component_id_t>& comps = lines(i).component_ids(); + // Extract each character component to create the line image. + const mln::util::array<component_id_t>& comps = + lines(i).component_ids(); for_all_elements(e, lines(i).component_ids()) { unsigned comp_id = comps(e); @@ -352,10 +353,11 @@ namespace scribo // Recognize characters. tess.SetImage( (unsigned char*) &text_ima(text_ima.domain().pmin()), - text_ima.ncols(), // n cols - text_ima.nrows(), // n rows - sizeof (bool), // Pixel size. - text_ima.ncols() * sizeof (bool) + 2 * text_ima.border()); // Row_offset + text_ima.ncols(), // n cols + text_ima.nrows(), // n rows + sizeof (bool), // pixel size + text_ima.ncols() * sizeof (bool) + + 2 * text_ima.border()); // row offset char *s = tess.GetUTF8Text(); if (s != 0) @@ -374,13 +376,14 @@ namespace scribo } - template <typename I> void recognition(const Image<I>& line_, const char *language, const std::string& output_file) { + using namespace mln; + mln_trace("scribo::text::recognition"); const I& line = exact(line_); @@ -404,48 +407,44 @@ namespace scribo // Recognize characters. char* s = tess.TesseractRect( (unsigned char*) &line(line.domain().pmin()), - sizeof (bool), // Pixel size. - line.ncols() * sizeof (bool) + line.border() * 2, // Row_offset - 0, // Left - 0, // Top - line.ncols(), // n cols - line.nrows()); // n rows - - if (s != 0) + sizeof (bool), // pixel size + line.ncols() * sizeof (bool) + line.border() * 2, // row offset + 0, // left + 0, // top + line.ncols(), // n cols + line.nrows()); // n rows + + if (s != 0) { if (!output_file.empty()) - { - std::string str(s); - str = str.substr(0, str.length() - 1); - file << line.domain().bbox().pmin().row() - << " " - << line.domain().bbox().pmin().col() - << " " - << line.domain().bbox().pmax().row() - << " " - << line.domain().bbox().pmax().col() - << " " - << str; - } + { + std::string str(s); + str = str.substr(0, str.length() - 1); + file << line.domain().bbox().pmin().row() + << " " + << line.domain().bbox().pmin().col() + << " " + << line.domain().bbox().pmax().row() + << " " + << line.domain().bbox().pmax().col() + << " " + << str; + } } - // The string has been allocated by Tesseract. We must free it. - delete [] s; + // The string has been allocated by Tesseract. We must free it. + delete[] s; - if (!output_file.empty()) - file.close(); - - // Restore the default locale from the environment. - setlocale(LC_NUMERIC, ""); + if (!output_file.empty()) + file.close(); + // Restore the default locale from the environment. + setlocale(LC_NUMERIC, ""); } # endif // ! HAVE_TESSERACT_2 - - - # endif // ! MLN_INCLUDE_ONLY } // end of namespace scribo::text -- 1.7.10.4
participants (1)
-
Roland Levillain