last-svn-commit-736-g50ac33a Various fixes in Scribo.

* scribo/core/component_info.hh: Initialize type_ attribute. * scribo/core/line_info.hh: Rename accumulators. * scribo/debug/usage.hh: Update copyright. * scribo/toolchain/internal/text_in_doc_functor.hh: Initialize input_cleaned local variable. * tests/toolchain/nepomuk/text_extraction.cc: Support both Tesseract 2 and 3. --- scribo/ChangeLog | 16 ++++++++++++ scribo/scribo/core/component_info.hh | 8 +++--- scribo/scribo/core/line_info.hh | 26 ++++++++++--------- scribo/scribo/debug/usage.hh | 6 ++-- .../toolchain/internal/text_in_doc_functor.hh | 8 ++++- scribo/tests/toolchain/nepomuk/text_extraction.cc | 8 +++++- 6 files changed, 50 insertions(+), 22 deletions(-) diff --git a/scribo/ChangeLog b/scribo/ChangeLog index 7982a78..22ed824 100644 --- a/scribo/ChangeLog +++ b/scribo/ChangeLog @@ -1,5 +1,21 @@ 2011-01-18 Guillaume Lazzara <z@lrde.epita.fr> + Various fixes in Scribo. + + * scribo/core/component_info.hh: Initialize type_ attribute. + + * scribo/core/line_info.hh: Rename accumulators. + + * scribo/debug/usage.hh: Update copyright. + + * scribo/toolchain/internal/text_in_doc_functor.hh: Initialize + input_cleaned local variable. + + * tests/toolchain/nepomuk/text_extraction.cc: Support both + Tesseract 2 and 3. + +2011-01-18 Guillaume Lazzara <z@lrde.epita.fr> + * demo/viewer/Makefile.am: Explicitly link to libtiff. 2011-01-13 Guillaume Lazzara <z@lrde.epita.fr> diff --git a/scribo/scribo/core/component_info.hh b/scribo/scribo/core/component_info.hh index 8345266..4ed6db7 100644 --- a/scribo/scribo/core/component_info.hh +++ b/scribo/scribo/core/component_info.hh @@ -1,5 +1,5 @@ -// Copyright (C) 2009, 2010 EPITA Research and Development Laboratory -// (LRDE) +// Copyright (C) 2009, 2010, 2011 EPITA Research and Development +// Laboratory (LRDE) // // This file is part of Olena. // @@ -97,7 +97,7 @@ namespace scribo inline component_info::component_info() - : id_(0), tag_(component::Ignored) + : id_(0), tag_(component::Ignored), type_(component::Undefined) { } @@ -109,7 +109,7 @@ namespace scribo const mln::point2d& mass_center, unsigned card) : id_(id), bbox_(bbox), mass_center_(mass_center), card_(card), - tag_(component::None), line_id_(0) + tag_(component::None), type_(component::Undefined), line_id_(0) { } diff --git a/scribo/scribo/core/line_info.hh b/scribo/scribo/core/line_info.hh index ed31d17..54a5094 100644 --- a/scribo/scribo/core/line_info.hh +++ b/scribo/scribo/core/line_info.hh @@ -1,5 +1,5 @@ -// Copyright (C) 2009, 2010 EPITA Research and Development Laboratory -// (LRDE) +// Copyright (C) 2009, 2010, 2011 EPITA Research and Development +// Laboratory (LRDE) // // This file is part of Olena. // @@ -801,8 +801,8 @@ namespace scribo typedef mln::value::int_u<12> median_data_t; typedef mln::accu::stat::median_h<median_data_t> median_t; median_t - absolute_meanline, - absolute_baseline, + meanline, + baseline, char_space, char_width; @@ -871,13 +871,15 @@ namespace scribo if (bb.width() <= 1000) char_width.take(bb.width()); - // Meanline (compute an absolute value, from the top left - // corner of the image). - absolute_meanline.take(bb.pmin().row() - ref_line); + // Meanline (compute an absolute value, from the top left corner + // of the highest character bounding box, excluding + // punctuation). + meanline.take(bb.pmin().row() - ref_line); - // Baseline (compute an absolute value, from the top left - // corner of the image). - absolute_baseline.take(bb.pmax().row() - ref_line); + // Baseline (compute an absolute value, from the top left corner + // of the highest character bounding box, excluding + // punctuation). + baseline.take(bb.pmax().row() - ref_line); } // Finalization @@ -899,8 +901,8 @@ namespace scribo char_width_ = char_width.to_result(); mln::def::coord - absolute_baseline_r = absolute_baseline.to_result() + ref_line, - absolute_meanline_r = absolute_meanline.to_result() + ref_line; + absolute_baseline_r = baseline.to_result() + ref_line, + absolute_meanline_r = meanline.to_result() + ref_line; baseline_ = absolute_baseline_r; meanline_ = absolute_meanline_r; diff --git a/scribo/scribo/debug/usage.hh b/scribo/scribo/debug/usage.hh index 7a284e3..28ab145 100644 --- a/scribo/scribo/debug/usage.hh +++ b/scribo/scribo/debug/usage.hh @@ -1,5 +1,5 @@ -// Copyright (C) 2009, 2010 EPITA Research and Development Laboratory -// (LRDE) +// Copyright (C) 2009, 2010, 2011 EPITA Research and Development +// Laboratory (LRDE) // // This file is part of Olena. // @@ -68,7 +68,7 @@ namespace scribo << std::endl; std::cout << "-----------" << std::endl; - std::cout << "EPITA/LRDE - Scribo 2008-2010" << std::endl; + std::cout << "EPITA/LRDE - Scribo 2008-2011" << std::endl; return 1; } diff --git a/scribo/scribo/toolchain/internal/text_in_doc_functor.hh b/scribo/scribo/toolchain/internal/text_in_doc_functor.hh index fdbee90..16e981d 100644 --- a/scribo/scribo/toolchain/internal/text_in_doc_functor.hh +++ b/scribo/scribo/toolchain/internal/text_in_doc_functor.hh @@ -1,4 +1,5 @@ -// Copyright (C) 2010 EPITA Research and Development Laboratory (LRDE) +// Copyright (C) 2010, 2011 EPITA Research and Development Laboratory +// (LRDE) // // This file is part of Olena. // @@ -132,7 +133,10 @@ namespace scribo text_in_doc_functor<I>::operator()(const Image<I>& input) { // Remove separators - mln_ch_value(I,bool) separators, input_cleaned; + mln_ch_value(I,bool) + separators, + input_cleaned = exact(input); + if (enable_line_seps) { on_new_progress_label("Find vertical separators..."); diff --git a/scribo/tests/toolchain/nepomuk/text_extraction.cc b/scribo/tests/toolchain/nepomuk/text_extraction.cc index c475aa9..30051ae 100644 --- a/scribo/tests/toolchain/nepomuk/text_extraction.cc +++ b/scribo/tests/toolchain/nepomuk/text_extraction.cc @@ -1,4 +1,5 @@ -// Copyright (C) 2010 EPITA Research and Development Laboratory (LRDE) +// Copyright (C) 2010, 2011 EPITA Research and Development Laboratory +// (LRDE) // // This file is part of Olena. // @@ -43,6 +44,11 @@ int main() QSet<QString> words = scribo::toolchain::nepomuk::text_extraction(ima); mln_assertion(words.size() == 1); + +#ifndef HAVE_TESSERACT_3 mln_assertion(words.contains("Wildly")); +#else // HAVE_TESSERACT_2 + mln_assertion(words.contains("wildly")); +#endif // ! HAVE_TESSERACT_3 return 0; } -- 1.5.6.5
participants (1)
-
Guillaume Lazzara