last-svn-commit-882-gf8dae9b Improve results.

* scribo/primitive/extract/lines_h_thick_and_thin.hh: Fix structural element length. * scribo/primitive/extract/non_text_hdoc.hh: Filter too small images. * scribo/text/paragraphs.hh: Make it compile. * scribo/toolchain/internal/content_in_hdoc_functor.hh: Denoise larger objects. Fix vseparator image and binary_image_wo_seps. * src/primitive/extract/lines_thick_and_thin.cc: Be less severe on parameters. --- scribo/ChangeLog | 16 +++++++ .../primitive/extract/lines_h_thick_and_thin.hh | 15 ++++++- scribo/scribo/primitive/extract/non_text_hdoc.hh | 4 ++ scribo/scribo/text/paragraphs.hh | 44 ++++++++++--------- .../toolchain/internal/content_in_hdoc_functor.hh | 15 ++++--- scribo/src/content_in_hdoc.cc | 2 +- .../src/primitive/extract/lines_thick_and_thin.cc | 2 +- 7 files changed, 67 insertions(+), 31 deletions(-) diff --git a/scribo/ChangeLog b/scribo/ChangeLog index 5e29749..bfe43b2 100644 --- a/scribo/ChangeLog +++ b/scribo/ChangeLog @@ -1,3 +1,19 @@ +2011-05-19 Guillaume Lazzara <z@lrde.epita.fr> + + Improve results. + + * scribo/primitive/extract/lines_h_thick_and_thin.hh: Fix structural element length. + + * scribo/primitive/extract/non_text_hdoc.hh: Filter too small images. + + * scribo/text/paragraphs.hh: Make it compile. + + * scribo/toolchain/internal/content_in_hdoc_functor.hh: Denoise + larger objects. Fix vseparator image and binary_image_wo_seps. + + * src/primitive/extract/lines_thick_and_thin.cc: Be less severe on + parameters. + 2011-05-18 Julien Marquegnies <marquegnies@lrde.epita.fr> Improve paragraph grouping for historical documents. diff --git a/scribo/scribo/primitive/extract/lines_h_thick_and_thin.hh b/scribo/scribo/primitive/extract/lines_h_thick_and_thin.hh index b9b44b4..511da9f 100644 --- a/scribo/scribo/primitive/extract/lines_h_thick_and_thin.hh +++ b/scribo/scribo/primitive/extract/lines_h_thick_and_thin.hh @@ -49,6 +49,7 @@ # include <mln/pw/all.hh> # include <mln/core/routine/duplicate.hh> # include <mln/win/rectangle2d.hh> +# include <mln/win/hline2d.hh> # include <mln/morpho/dilation.hh> # include <mln/data/convert.hh> @@ -63,6 +64,7 @@ # include <scribo/core/def/lbl_type.hh> # include <scribo/primitive/internal/rd.hh> +# include <scribo/debug/logger.hh> namespace scribo @@ -485,8 +487,15 @@ namespace scribo mln_concrete(I) mask = internal::detect_thick(tags); internal::add_thin(tags, mask); + + debug::logger().log_image(debug::AuxiliaryResults, + mask, "lines_h_thick_and_thin_mask"); + image2d<bool> output = internal::rd3_fast(mask, binary_image, - length, delta); + 2 * length, 2 * delta); + + debug::logger().log_image(debug::AuxiliaryResults, + output, "lines_h_thick_and_thin_output_before_filter"); // Remove invalid lines typedef scribo::def::lbl_type V; @@ -499,6 +508,10 @@ namespace scribo if (bbox(e).width() < filter_factor * length || bbox(e).width() / bbox(e).height() < 3) data::fill(((output | bbox(e)).rw() | (pw::value(lbl) == pw::cst(e))).rw(), false); + debug::logger().log_image(debug::Results, + output, "lines_h_thick_and_thin_output"); + + trace::exiting("scribo::primitive::extract::lines_h_thick_and_thin"); return output; } diff --git a/scribo/scribo/primitive/extract/non_text_hdoc.hh b/scribo/scribo/primitive/extract/non_text_hdoc.hh index 4924189..97e1f0e 100644 --- a/scribo/scribo/primitive/extract/non_text_hdoc.hh +++ b/scribo/scribo/primitive/extract/non_text_hdoc.hh @@ -47,6 +47,8 @@ # include <scribo/filter/objects_small.hh> # include <scribo/filter/objects_on_border.hh> +# include <scribo/filter/objects_v_thin.hh> +# include <scribo/filter/objects_h_thin.hh> //DEBUG #include <mln/util/timer.hh> @@ -111,6 +113,8 @@ namespace scribo elements = scribo::filter::components_small(elements, 200); elements = scribo::filter::components_on_border(elements); + elements = scribo::filter::objects_v_thin(elements, 100); + elements = scribo::filter::objects_h_thin(elements, 100); // Debug { diff --git a/scribo/scribo/text/paragraphs.hh b/scribo/scribo/text/paragraphs.hh index 9a59e02..af01581 100644 --- a/scribo/scribo/text/paragraphs.hh +++ b/scribo/scribo/text/paragraphs.hh @@ -30,10 +30,10 @@ namespace scribo //------------------------------------- // Extracting root of links //------------------------------------- - template <typename T> + template <typename L> inline unsigned - find_root(util::array<T>& parent, unsigned x) + find_root(line_links<L>& parent, unsigned x) { unsigned tmp_x = x; @@ -50,10 +50,10 @@ namespace scribo return x; } - template <typename T> + template <typename L> inline void - set_root(util::array<T>& parent, unsigned x, const unsigned root) + set_root(line_links<L>& parent, unsigned x, const unsigned root) { while (parent(x) != x && parent(x) != root) { @@ -799,27 +799,29 @@ namespace scribo } } - // Only debug + // Post link processing + + const line_links<L> backup = output.duplicate(); + for (unsigned i = 0; i < output.nelements(); ++i) + { + const line_id_t current_neighbor = backup(i); + output(i) = scribo::internal::find_root(output, i); + const line_id_t root_index = output(i); + + for (unsigned j = 0; j < right.nelements(); ++j) + { + if (i != j && + current_neighbor != i && + right(j) == i) + scribo::internal::set_root(output, j, root_index); + } + } + + // Only debug // { // image2d<value::rgb8> debug = data::convert(value::rgb8(), input); - // const util::array<value::int_u16> backup = output; - // for (unsigned i = 0; i < output.nelements(); ++i) - // { - // const value::int_u16 current_neighbor = backup(i); - // output(i) = internal::find_root(output, i); - // const value::int_u16 root_index = output(i); - - // for (unsigned j = 0; j < right.nelements(); ++j) - // { - // if (i != j && - // current_neighbor != i && - // right(j) == i) - // internal::set_root(output, j, root_index); - // } - // } - // mln::util::array<accu::shape::bbox<point2d> > nbbox(output.nelements()); // for_all_lines(l, lines) // if (lines(l).is_textline()) diff --git a/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh b/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh index 92db8a7..e7d14ff 100644 --- a/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh +++ b/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh @@ -182,14 +182,14 @@ namespace scribo mln_ch_value(I,bool) vseparators = preprocessing::rotate_90( primitive::extract::lines_h_thick_and_thin( - preprocessing::rotate_90(processed_image), 101, 3, 0.05, 0.80, 2), false), + preprocessing::rotate_90(processed_image), 101, 3, 0.2, 0.6, 1), false), hseparators = primitive::extract::lines_h_thick_and_thin( processed_image, 101, 3); doc.set_vline_separators(vseparators); doc.set_hline_separators(hseparators); - separators = vseparators; + separators = duplicate(vseparators); separators += hseparators; border::resize(processed_image, border::thickness); @@ -202,7 +202,6 @@ namespace scribo input_cleaned = primitive::remove::separators(processed_image, separators); - doc.set_binary_image_wo_seps(input_cleaned); on_progress(); } @@ -231,7 +230,7 @@ namespace scribo { on_new_progress_label("Denoise..."); - input_cleaned = preprocessing::denoise_fg(input_cleaned, c8(), 3); + input_cleaned = preprocessing::denoise_fg(input_cleaned, c8(), 10); // Debug # ifndef SCRIBO_NDEBUG @@ -242,13 +241,15 @@ namespace scribo on_progress(); } + doc.set_binary_image_wo_seps(input_cleaned); + /// Finding components. on_new_progress_label("Finding components..."); + // NOTE: Component features computation is disabled. V ncomponents; component_set<L> - components = scribo::primitive::extract::components(original_image, - input_cleaned, + components = scribo::primitive::extract::components(input_cleaned, c8(), ncomponents); @@ -269,7 +270,7 @@ namespace scribo on_new_progress_label("Filtering components"); - components = scribo::filter::components_small(components, 3); + components = scribo::filter::components_small(components, 10); on_progress(); diff --git a/scribo/src/content_in_hdoc.cc b/scribo/src/content_in_hdoc.cc index e0d2258..ed15693 100644 --- a/scribo/src/content_in_hdoc.cc +++ b/scribo/src/content_in_hdoc.cc @@ -200,7 +200,7 @@ int main(int argc, char* argv[]) // Saving results std::cout << "Saving results..." << std::endl; - scribo::io::xml::save(doc, "page.xml", scribo::io::xml::Page); +// scribo::io::xml::save(doc, "page.xml", scribo::io::xml::Page); std::cout << "End of process - " << t << std::endl; diff --git a/scribo/src/primitive/extract/lines_thick_and_thin.cc b/scribo/src/primitive/extract/lines_thick_and_thin.cc index f574b3d..fb78862 100644 --- a/scribo/src/primitive/extract/lines_thick_and_thin.cc +++ b/scribo/src/primitive/extract/lines_thick_and_thin.cc @@ -91,7 +91,7 @@ int main(int argc, char *argv[]) input, length, delta); I vseparators = preprocessing::rotate_90( primitive::extract::lines_h_thick_and_thin( - preprocessing::rotate_90(input), length, delta, 0.05, 0.80, 2), false); + preprocessing::rotate_90(input), length, delta, 0.1, 0.80, 2), false); I separators = duplicate(vseparators); separators += hseparators; -- 1.5.6.5
participants (1)
-
Guillaume Lazzara