
* text/clean.hh: Use upsampling::eagle. * text/recognition.hh: Make sure there is a border around the text even after having resized images. --- scribo/ChangeLog | 9 +++++++++ scribo/text/clean.hh | 3 ++- scribo/text/recognition.hh | 32 ++++++++++++++++++++------------ 3 files changed, 31 insertions(+), 13 deletions(-) diff --git a/scribo/ChangeLog b/scribo/ChangeLog index 7b779f7..f4fce54 100644 --- a/scribo/ChangeLog +++ b/scribo/ChangeLog @@ -1,5 +1,14 @@ 2010-06-25 Guillaume Lazzara <z@lrde.epita.fr> + Improve text recognition. + + * text/clean.hh: Use upsampling::eagle. + + * text/recognition.hh: Make sure there is a border around the + text even after having resized images. + +2010-06-25 Guillaume Lazzara <z@lrde.epita.fr> + * binarization/sauvola_ms.hh: Fix an invalid test. 2010-06-25 Guillaume Lazzara <z@lrde.epita.fr> diff --git a/scribo/text/clean.hh b/scribo/text/clean.hh index 341d64c..1a9e458 100644 --- a/scribo/text/clean.hh +++ b/scribo/text/clean.hh @@ -68,6 +68,7 @@ #include <scribo/core/line_info.hh> #include <scribo/upsampling/bs2x.hh> +#include <scribo/upsampling/eagle.hh> #include <scribo/subsampling/bilinear.hh> @@ -119,7 +120,7 @@ namespace scribo << fact << std::endl; while (fact < 0.90) { - output = scribo::upsampling::bs2x(output); // 2x upsampling + output = scribo::upsampling::eagle(output); // 2x upsampling fact *= 2.0f; // std::cout << "fact = " << fact // << " - output.domain = " << output.domain() diff --git a/scribo/text/recognition.hh b/scribo/text/recognition.hh index a3ccd50..35d343a 100644 --- a/scribo/text/recognition.hh +++ b/scribo/text/recognition.hh @@ -43,6 +43,7 @@ # include <mln/util/array.hh> # include <mln/data/fill.hh> # include <mln/data/paste.hh> +# include <mln/data/paste_without_localization.hh> # include <mln/pw/all.hh> # include <mln/core/alias/w_window2d_int.hh> @@ -125,6 +126,7 @@ namespace scribo std::cout << "x_height = " << lines(i).x_height() << std::endl; mln_domain(I) box = lines(i).bbox(); + // Make sure characters are isolated from the borders. // Help Tesseract. box.enlarge(2); @@ -146,23 +148,29 @@ namespace scribo /// Improve text quality. - /// text_ima_cleaned domain is larger than text_ima's. - I text_ima_cleaned = text::clean(lines(i), text_ima); -// mln::io::pbm::save(text_ima_cleaned, mln::debug::filename("line.pbm", debug_id++)); + /// text_ima_cleaned domain may be larger than text_ima's. + text::clean_inplace(lines(i), text_ima); +// mln::io::pbm::save(text_ima_cleaned, mln::debug::filename("line.pbm", debug_id++)); + // Make sure characters are isolated from the borders. + // Help Tesseract. + mln_domain(I) lbox = text_ima.domain(); + lbox.enlarge(lines(i).char_space() + 2); + I line_image(lbox, 0); // Make sure there is no border! + data::fill(line_image, false); + data::paste_without_localization(text_ima, line_image); - // Make sure there is no border. - border::resize(text_ima_cleaned, 0); +// mln::io::pbm::save(line_image, mln::debug::filename("line_image.pbm", debug_id++)); // Recognize characters. char* s = TessBaseAPI::TesseractRect( - (unsigned char*) text_ima_cleaned.buffer(), - sizeof (bool), // Pixel size. - text_ima_cleaned.ncols() * sizeof (bool), // Row_offset - 0, // Left - 0, // Top - text_ima_cleaned.ncols(), // n cols - text_ima_cleaned.nrows()); // n rows + (unsigned char*) line_image.buffer(), + sizeof (bool), // Pixel size. + line_image.ncols() * sizeof (bool), // Row_offset + 0, // Left + 0, // Top + line_image.ncols(), // n cols + line_image.nrows()); // n rows if (s != 0) -- 1.5.6.5