
URL: https://svn.lrde.epita.fr/svn/oln/branches/cleanup-2008/milena/sandbox ChangeLog: 2008-11-02 Jimmy Ma <jimmy.ma@lrde.epita.fr> Cleanup OCR preprocessing. * garrigues/ocr/ocr_with_preprocess.cc, garrigues/ocr/ocr_without_preprocess.cc: Cleanup the call to tesseract. * garrigues/ocr/tesseract_wrap.hh: New. Provide a Tesseract wrapper which is also able to compute an approximate confidence value of the recognition based on tesseract internal data. --- ocr_with_preprocess.cc | 64 ++++++++++++++++++-------------- ocr_without_preprocess.cc | 24 ++++-------- tesseract_wrap.hh | 91 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 135 insertions(+), 44 deletions(-) Index: branches/cleanup-2008/milena/sandbox/garrigues/ocr/ocr_without_preprocess.cc =================================================================== --- branches/cleanup-2008/milena/sandbox/garrigues/ocr/ocr_without_preprocess.cc (revision 2759) +++ branches/cleanup-2008/milena/sandbox/garrigues/ocr/ocr_without_preprocess.cc (revision 2760) @@ -53,7 +53,7 @@ #include <mln/labeling/regional_maxima.hh> #include <mln/morpho/dilation.hh> -#include <tesseract/baseapi.h> +#include "tesseract_wrap.hh" // _COMPILATION_ // g++ -DNDEBUG -O3 -I../../.. ocr.cc -L/usr/lib -ltesseract_full -lpthread @@ -61,19 +61,6 @@ // Call tesseract // lang: expected language -template <typename T> -char* tesseract(const char* lang, const mln::image2d<T>& input) -{ - TessBaseAPI::InitWithLanguage(NULL, NULL, lang, NULL, false, 0, NULL); - char* s = TessBaseAPI::TesseractRect( - (unsigned char*) input.buffer(), - sizeof (T), - input.ncols() * sizeof (T), - 0, 0, - input.ncols(), - input.nrows()); - return s; -} int main(int argc, char** argv) { @@ -92,7 +79,12 @@ io::pbm::load(input, argv[1]); - char* s = tesseract("fra", input); + { + image2d<int_u8> tmp = clone(cast_image<int_u8>(input)); + float score = 0; + char* s = tesseract("fra", tmp, &score); + std::cerr << "Tesseract result: (score " << score << ")" << std::endl; std::cout << s; - free(s); + delete[] s; + } } Index: branches/cleanup-2008/milena/sandbox/garrigues/ocr/tesseract_wrap.hh =================================================================== --- branches/cleanup-2008/milena/sandbox/garrigues/ocr/tesseract_wrap.hh (revision 0) +++ branches/cleanup-2008/milena/sandbox/garrigues/ocr/tesseract_wrap.hh (revision 2760) @@ -0,0 +1,91 @@ +#ifndef TESSERACT_WRAP_HH +# define TESSERACT_WRAP_HH + +# include <cassert> + +# include <mln/core/image/image2d.hh> + +# include <tesseract/baseapi.h> + +struct TessWrap : public TessBaseAPI +{ + static int InitWithLanguage(const char* datapath, const char* outputbase, + const char* language, const char* configfile, + bool numeric_mode, int argc, char* argv[]) + { + return TessBaseAPI::InitWithLanguage(datapath, outputbase, + language, configfile, + numeric_mode, argc, argv); + } + + static char* TesseractRect(const unsigned char* imagedata, + int bytes_per_pixel, + int bytes_per_line, + int left, int top, int width, int height, + float* score) + { + int kMinRectSize = 10; // Quick and dirty... (value imported from tesseract src) + if (width < kMinRectSize || height < kMinRectSize) + return NULL; // Nothing worth doing. + + // Copy/Threshold the image to the tesseract global page_image. + CopyImageToTesseract(imagedata, bytes_per_pixel, bytes_per_line, + left, top, width, height); + + { // RecognizeToString + BLOCK_LIST* block_list = FindLinesCreateBlockList(); + PAGE_RES* page_res = Recognize(block_list, NULL); + + char* string; + int* lengths; + float* costs; + int* x0; + int* y0; + int* x1; + int* y1; + // Retrieve Tesseract internal data to compute the quality score. + int n = TessBaseAPI::TesseractExtractResult(&string, &lengths, &costs, &x0, &y0, &x1, &y1, page_res); + + float average_uncertainty = 0.f; + for (int i = 0; i < n; ++i) + average_uncertainty += costs[i]; + + if (n) + *score = average_uncertainty / n; + else + *score = -1; + + // Some memory cleanup + delete[] string; + delete[] lengths; + delete[] costs; + delete[] x0; + delete[] y0; + delete[] x1; + delete[] y1; + + return TesseractToText(page_res); + } + } +}; + +// Call tesseract +// lang: expected language +template <typename T> +char* tesseract(const char* lang, const mln::image2d<T>& input, float* score) +{ + assert(!score); + TessWrap::InitWithLanguage(NULL, NULL, lang, NULL, false, 0, NULL); + char* s = TessWrap::TesseractRect( + (unsigned char*) input.buffer(), + sizeof (T), + input.ncols() * sizeof (T), + 0, 0, + input.ncols(), + input.nrows(), + score); + return s; +} + + +#endif // ! TESSERACT_WRAP_HH Index: branches/cleanup-2008/milena/sandbox/garrigues/ocr/ocr_with_preprocess.cc =================================================================== --- branches/cleanup-2008/milena/sandbox/garrigues/ocr/ocr_with_preprocess.cc (revision 2759) +++ branches/cleanup-2008/milena/sandbox/garrigues/ocr/ocr_with_preprocess.cc (revision 2760) @@ -57,28 +57,24 @@ #include <mln/io/pbm/save.hh> #include <mln/logical/not.hh> -#include <tesseract/baseapi.h> - +#include "tesseract_wrap.hh" // _COMPILATION_ // g++ -DNDEBUG -O3 -I../../.. ocr.cc -L/usr/lib -ltesseract_full -lpthread - -// Call tesseract -// lang: expected language -template <typename T> -char* tesseract(const char* lang, const mln::image2d<T>& input) -{ - TessBaseAPI::InitWithLanguage(NULL, NULL, lang, NULL, false, 0, NULL); - char* s = TessBaseAPI::TesseractRect( - (unsigned char*) input.buffer(), - sizeof (T), - input.ncols() * sizeof (T), - 0, 0, - input.ncols(), - input.nrows()); - return s; +#if 0 +# define TEST(Var) \ + { \ + image2d<int_u8> tmp = clone(cast_image<int_u8>(Var)); \ + float score = 0.f; \ + char* s = tesseract("fra", tmp, &score); \ + std::cerr << #Var << ": " << score << std::endl << s; \ + delete[] s; \ } +#else +# define TEST(Var) +#endif + int main(int argc, char** argv) { @@ -96,20 +92,23 @@ mln::border::thickness = 0; io::pbm::load(input, argv[1]); + TEST(input); // Resize - std::cerr << "Enlarge the image" << std::endl; + //std::cerr << "Enlarge the image" << std::endl; image2d<int_u8> enlarged = enlarge(logical::not_(input), 2); //image2d<bool> enlarged = geom::resize(logical::not_(input), 4); - io::pgm::save(enlarged, "1_enlage.pgm"); + io::pgm::save(enlarged, "1_enlarge.pgm"); + TEST(enlarged); // Blur. - std::cerr << "Blur the enlarged image" << std::endl; + //std::cerr << "Blur the enlarged image" << std::endl; // image2d<int_u8> blur = linear::gaussian(fun::p2v::ternary(pw::value(enlarged), pw::cst(int_u8(255)), pw::cst(int_u8(0))) | enlarged.domain(), // 4); - image2d<int_u8> blur = linear::gaussian(enlarged, 1); + image2d<int_u8> blur = linear::gaussian(clone(enlarged), 1); io::pgm::save(blur, "2_gaussian.pgm"); + TEST(blur); // Crest. // image2d<bool> c = crest(enlarged, blur, c4()); @@ -120,7 +119,7 @@ // Threshold image2d<bool> binary; { - std::cerr << "Threshold the blur image" << std::endl; + //std::cerr << "Threshold the blur image" << std::endl; // // Compute the histogram. // histo::data<int_u8> h = histo::compute(blur); @@ -141,26 +140,35 @@ binary(p) = blur(p) > 100; io::pbm::save(binary, "3_threshold.pbm"); + TEST(binary); } // Skeleton - std::cerr << "Compute the skeleton" << std::endl; + //std::cerr << "Compute the skeleton" << std::endl; image2d<bool> skel = skeleton(binary, 4); io::pbm::save(skel, "4_skeleton.pbm"); + TEST(skel); // Dilation - std::cerr << "Dilate the skeleton" << std::endl; + //std::cerr << "Dilate the skeleton" << std::endl; win::octagon2d oct(7); for (unsigned i = 0; i < 1; i++) skel = morpho::dilation(skel, oct); io::pbm::save(skel, "5_dilation.pbm"); + TEST(skel); io::pbm::save(skel, argv[2]); - std::cerr << "Text recognition" << std::endl; - char* s = tesseract("fra", clone(logical::not_(skel))); - std::cerr << "Tesseract result:"<< std::endl; + //std::cerr << "Text recognition" << std::endl; + //char* s = tesseract("fra", clone(logical::not_(skel))); + { + image2d<int_u8> tmp = clone(cast_image<int_u8>(skel)); + float score = 0; + char* s = tesseract("fra", tmp, &score); + std::cerr << "Tesseract result: (score " << score << ")" << std::endl; std::cout << s; - free(s); + delete[] s; + } + }