last-svn-commit-874-gcb19d2d Various small fixes in Scribo.

* scribo/core/document.hh: Store binary image without separators. * scribo/filter/object_links_bbox_overlap.hh: Fix use of object_links structure. * scribo/io/img/internal/debug_img_visitor.hh: Do not draw invalid elements. * scribo/make/text_components_image.hh: Add a precondition. * scribo/primitive/extract/alignments.hh: Add debug guards. * scribo/primitive/extract/lines_h_pattern.hh, * scribo/primitive/extract/lines_v_pattern.hh: Fix structural element used for dilation. * scribo/text/merging.hh: Reindent comments. * scribo/src/Makefile.am: Add content_in_hdoc target. --- scribo/ChangeLog | 24 ++++++++++++++++++++ scribo/scribo/core/document.hh | 21 +++++++++++++++++ scribo/scribo/filter/object_links_bbox_overlap.hh | 19 +++++++++------ scribo/scribo/io/img/internal/debug_img_visitor.hh | 13 ++++++++-- scribo/scribo/make/text_components_image.hh | 1 + scribo/scribo/primitive/extract/alignments.hh | 24 +++++++++++++++++++- scribo/scribo/primitive/extract/lines_h_pattern.hh | 10 +++----- scribo/scribo/primitive/extract/lines_v_pattern.hh | 10 +++----- scribo/scribo/text/merging.hh | 5 ++- scribo/src/Makefile.am | 22 ++++++++++++++++- 10 files changed, 121 insertions(+), 28 deletions(-) diff --git a/scribo/ChangeLog b/scribo/ChangeLog index 8e3c903..df242f1 100644 --- a/scribo/ChangeLog +++ b/scribo/ChangeLog @@ -1,5 +1,29 @@ 2011-05-16 Guillaume Lazzara <lazzara@fidji.lrde.epita.fr> + Various small fixes in Scribo. + + * scribo/core/document.hh: Store binary image without separators. + + * scribo/filter/object_links_bbox_overlap.hh: Fix use of + object_links structure. + + * scribo/io/img/internal/debug_img_visitor.hh: Do not draw invalid + elements. + + * scribo/make/text_components_image.hh: Add a precondition. + + * scribo/primitive/extract/alignments.hh: Add debug guards. + + * scribo/primitive/extract/lines_h_pattern.hh, + * scribo/primitive/extract/lines_v_pattern.hh: Fix structural + element used for dilation. + + * scribo/text/merging.hh: Reindent comments. + + * scribo/src/Makefile.am: Add content_in_hdoc target. + +2011-05-16 Guillaume Lazzara <lazzara@fidji.lrde.epita.fr> + Add holder information to component_info. * scribo/core/component_info.hh, diff --git a/scribo/scribo/core/document.hh b/scribo/scribo/core/document.hh index 0fe2be3..98c438d 100644 --- a/scribo/scribo/core/document.hh +++ b/scribo/scribo/core/document.hh @@ -116,11 +116,15 @@ namespace scribo const mln::image2d<bool>& binary_image() const; void set_binary_image(const mln::image2d<bool>& binary_image); + const mln::image2d<bool>& binary_image_wo_seps() const; + void set_binary_image_wo_seps(const mln::image2d<bool>& binary_image_wo_seps); + private: std::string filename_; mln::image2d<mln::value::rgb8> image_; mln::image2d<bool> binary_image_; + mln::image2d<bool> binary_image_wo_seps_; paragraph_set<L> parset_; component_set<L> elements_; @@ -437,6 +441,23 @@ namespace scribo template <typename L> + const mln::image2d<bool>& + document<L>::binary_image_wo_seps() const + { + return binary_image_wo_seps_; + } + + + template <typename L> + void + document<L>::set_binary_image_wo_seps( + const mln::image2d<bool>& binary_image_wo_seps) + { + binary_image_wo_seps_ = binary_image_wo_seps; + } + + + template <typename L> bool operator==(const document<L>& lhs, const document<L>& rhs) { diff --git a/scribo/scribo/filter/object_links_bbox_overlap.hh b/scribo/scribo/filter/object_links_bbox_overlap.hh index 3bf3c50..a93d849 100644 --- a/scribo/scribo/filter/object_links_bbox_overlap.hh +++ b/scribo/scribo/filter/object_links_bbox_overlap.hh @@ -78,13 +78,16 @@ namespace scribo mln_precondition(links.is_valid()); const component_set<L>& components = links.components(); - object_links<L> output(links); + object_links<L> output = links.duplicate(); - for_all_comps(i, components) - if (components(i).is_valid() && links(i) && links(i) != i) + bool has_intersection; + mln_site(L) pmin, pmax; + float ratio_i, ratio_link_i; + + for_all_links(i, links) + if (links.is_linked(i)) { - bool has_intersection = true; - mln_site(L) pmin, pmax; + has_intersection = true; for (unsigned dim = 0; dim < mln_site_(L)::dim; ++dim) { pmin[dim] = math::max(components(i).bbox().pmin()[dim], @@ -103,9 +106,9 @@ namespace scribo continue; mln_box(L) interbbox(pmin, pmax); - float - ratio_i = interbbox.nsites() /(float)components(i).bbox().nsites(), - ratio_link_i = interbbox.nsites() /(float)components(links(i)).bbox().nsites(); + + ratio_i = interbbox.nsites() /(float)components(i).bbox().nsites(); + ratio_link_i = interbbox.nsites() /(float)components(links(i)).bbox().nsites(); if (ratio_i >= max_overlap_ratio || ratio_link_i >= max_overlap_ratio) diff --git a/scribo/scribo/io/img/internal/debug_img_visitor.hh b/scribo/scribo/io/img/internal/debug_img_visitor.hh index 5ad1dd3..7d1d3d7 100644 --- a/scribo/scribo/io/img/internal/debug_img_visitor.hh +++ b/scribo/scribo/io/img/internal/debug_img_visitor.hh @@ -130,11 +130,16 @@ namespace scribo { // Prepare element edges + L lbl = duplicate(doc.elements().labeled_image()); + for_all_comps(c, doc.elements()) + if (! doc.elements()(c).is_valid()) + data::fill(((lbl | doc.elements()(c).bbox()).rw() + | (pw::value(lbl) == pw::cst(c))).rw(), 0); + // FIXME: UGLY! Too slow! scribo::def::lbl_type nlabels; component_set<L> elts = primitive::extract::components( - data::convert(bool(), mln::subsampling::antialiased(doc.elements().labeled_image(), - output_ratio)), + data::convert(bool(), mln::subsampling::antialiased(lbl, output_ratio)), c8(), nlabels); @@ -150,11 +155,13 @@ namespace scribo } else for_all_comps(c, doc.elements()) + { elts(c).update_type(doc.elements()(c).type()); + elts(c).update_tag(doc.elements()(c).tag()); + } elt_edge = morpho::elementary::gradient_external(elts.labeled_image(), c8()); -// const component_set<L>& elts = doc.elements(); for_all_comps(e, elts) if (elts(e).is_valid()) elts(e).accept(*this); diff --git a/scribo/scribo/make/text_components_image.hh b/scribo/scribo/make/text_components_image.hh index 522505e..1a30a04 100644 --- a/scribo/scribo/make/text_components_image.hh +++ b/scribo/scribo/make/text_components_image.hh @@ -65,6 +65,7 @@ namespace scribo trace::entering("scribo::make::text_components_image"); mln_precondition(doc.is_open()); + mln_precondition(doc.has_text()); mln_ch_value(L,bool) output; initialize(output, doc.image()); diff --git a/scribo/scribo/primitive/extract/alignments.hh b/scribo/scribo/primitive/extract/alignments.hh index 1e3d835..7dbf683 100644 --- a/scribo/scribo/primitive/extract/alignments.hh +++ b/scribo/scribo/primitive/extract/alignments.hh @@ -50,6 +50,7 @@ # include <scribo/core/def/lbl_type.hh> # include <scribo/primitive/extract/components.hh> # include <scribo/filter/object_links_aligned.hh> +# include <scribo/filter/object_links_bbox_overlap.hh> # include <scribo/filter/object_groups_small.hh> # include <scribo/preprocessing/denoise_fg.hh> # include <scribo/primitive/link/internal/link_single_dmax_ratio_aligned_delta_base.hh> @@ -252,7 +253,9 @@ namespace scribo : super_(components, dmax_f, delta, delta_direction), bbox_ima_(bbox_ima), delta_ws_lookup_(delta_ws_lookup) { +# ifndef SCRIBO_NDEBUG debug_ = data::convert(value::rgb8(), data::convert(bool(), bbox_ima)); +# endif // ! SCRIBO_NDEBUG } void compute_next_site_(P& p) @@ -286,7 +289,9 @@ namespace scribo for (; p.col() <= this->components_(nbh).bbox().pmax().col() && (bbox_ima_(p) == 0);) { +# ifndef SCRIBO_NDEBUG debug_(p) = literal::violet; +# endif // ! SCRIBO_NDEBUG ++p.col(); } @@ -304,7 +309,9 @@ namespace scribo for (; p.col() <= this->components_(nbh).bbox().pmax().col() && (bbox_ima_(p) == 0);) { +# ifndef SCRIBO_NDEBUG debug_(p) = literal::violet; +# endif // ! SCRIBO_NDEBUG ++p.col(); } @@ -322,7 +329,9 @@ namespace scribo L bbox_ima_; unsigned delta_ws_lookup_; +# ifndef SCRIBO_NDEBUG image2d<value::rgb8> debug_; +# endif // ! SCRIBO_NDEBUG }; @@ -346,7 +355,9 @@ namespace scribo : super_(components, dmax_f, delta, delta_direction), bbox_ima_(bbox_ima), delta_ws_lookup_(delta_ws_lookup) { +# ifndef SCRIBO_NDEBUG debug_ = data::convert(value::rgb8(), data::convert(bool(), bbox_ima)); +# endif // ! SCRIBO_NDEBUG } void compute_next_site_(P& p) @@ -381,7 +392,9 @@ namespace scribo for (; p.col() > this->components_(nbh).bbox().pmin().col() && (bbox_ima_(p) == 0);) { +# ifndef SCRIBO_NDEBUG debug_(p) = literal::violet; +# endif // ! SCRIBO_NDEBUG --p.col(); } @@ -399,7 +412,9 @@ namespace scribo for (; p.col() > this->components_(nbh).bbox().pmin().col() && (bbox_ima_(p) == 0);) { +# ifndef SCRIBO_NDEBUG debug_(p) = literal::violet; +# endif // ! SCRIBO_NDEBUG --p.col(); } @@ -417,7 +432,9 @@ namespace scribo L bbox_ima_; unsigned delta_ws_lookup_; +# ifndef SCRIBO_NDEBUG image2d<value::rgb8> debug_; +# endif // ! SCRIBO_NDEBUG }; @@ -656,11 +673,13 @@ namespace scribo top_links = primitive::link::merge_double_link_closest_aligned(left, right, anchor::StrictTopCenter); + // Remove links if component bboxes overlap too much. + top_links = filter::object_links_bbox_overlap(top_links, 0.80f); + // Remove groups with not enough links. top_groups = primitive::group::from_single_link(top_links); top_groups = filter::object_groups_small(top_groups, min_card); - // Compute char_width and char_space statistics. // // Here, we also compute max_char_width, in case other @@ -889,6 +908,9 @@ namespace scribo bot_links = primitive::link::merge_double_link_closest_aligned(left, right, anchor::StrictBottomCenter); + // Remove links if component bboxes overlap too much. + bot_links = filter::object_links_bbox_overlap(bot_links, 0.80f); + // Remove groups with not enough links. bot_groups = primitive::group::from_single_link(bot_links); diff --git a/scribo/scribo/primitive/extract/lines_h_pattern.hh b/scribo/scribo/primitive/extract/lines_h_pattern.hh index 6a1f7f0..3cedf53 100644 --- a/scribo/scribo/primitive/extract/lines_h_pattern.hh +++ b/scribo/scribo/primitive/extract/lines_h_pattern.hh @@ -1,5 +1,5 @@ -// Copyright (C) 2009, 2010 EPITA Research and Development Laboratory -// (LRDE) +// Copyright (C) 2009, 2010, 2011 EPITA Research and Development +// Laboratory (LRDE) // // This file is part of Olena. // @@ -106,12 +106,10 @@ namespace scribo mln_concrete(I) output = lines_pattern(input, length, 1, win); - unsigned new_length = length / 2 + delta; - new_length += 1 - (new_length % 2); // Guaranty that new_length is odd. - mln_concrete(I) output_dil = morpho::dilation(output, - win::rectangle2d(3, new_length)); + win::rectangle2d(2 * delta + 1, + length + 2)); output = scribo::primitive::internal::rd(output, input * output_dil); diff --git a/scribo/scribo/primitive/extract/lines_v_pattern.hh b/scribo/scribo/primitive/extract/lines_v_pattern.hh index 8a103ac..2908c8b 100644 --- a/scribo/scribo/primitive/extract/lines_v_pattern.hh +++ b/scribo/scribo/primitive/extract/lines_v_pattern.hh @@ -1,5 +1,5 @@ -// Copyright (C) 2009, 2010 EPITA Research and Development Laboratory -// (LRDE) +// Copyright (C) 2009, 2010, 2011 EPITA Research and Development +// Laboratory (LRDE) // // This file is part of Olena. // @@ -88,12 +88,10 @@ namespace scribo mln_concrete(I) output = lines_pattern(input, length, 0, win); - unsigned new_length = length / 2 + delta; - new_length += 1 - (new_length % 2); // Guaranty that new_length is odd. - mln_concrete(I) output_dil = morpho::dilation(output, - win::rectangle2d(new_length, 3)); + win::rectangle2d(2 * delta + 1, + length + 2)); output = scribo::primitive::internal::rd(output, input * output_dil); diff --git a/scribo/scribo/text/merging.hh b/scribo/scribo/text/merging.hh index c94f9f5..f691188 100644 --- a/scribo/scribo/text/merging.hh +++ b/scribo/scribo/text/merging.hh @@ -1,4 +1,5 @@ -// Copyright (C) 2010 EPITA Research and Development Laboratory (LRDE) +// Copyright (C) 2010, 2011 EPITA Research and Development Laboratory +// (LRDE) // // This file is part of Olena. // @@ -657,7 +658,7 @@ namespace scribo x---------------x | | | mc | - ml x x x mr + ml x x x mr | | | | x---------------x diff --git a/scribo/src/Makefile.am b/scribo/src/Makefile.am index 6360a56..6ab5d7d 100644 --- a/scribo/src/Makefile.am +++ b/scribo/src/Makefile.am @@ -1,5 +1,5 @@ -# Copyright (C) 2009, 2010 EPITA Research and Development Laboratory -# (LRDE). +# Copyright (C) 2009, 2010, 2011 EPITA Research and Development +# Laboratory (LRDE). # # This file is part of Olena. # @@ -116,6 +116,24 @@ if HAVE_QT content_in_doc_LDADD = $(LDADD) \ $(QT_LIBS) + utilexec_PROGRAMS += content_in_hdoc + content_in_hdoc_SOURCES = content_in_hdoc.cc + content_in_hdoc_CPPFLAGS = $(AM_CPPFLAGS) \ + $(TESSERACT_CPPFLAGS) \ + $(TIFF_CPPFLAGS) \ + $(MAGICKXX_CPPFLAGS) \ + $(QT_CPPFLAGS) -DHAVE_QT + content_in_hdoc_CXXFLAGS = $(AM_CXXFLAGS) \ + $(QT_CXXFLAGS) + content_in_hdoc_LDFLAGS = $(AM_LDFLAGS) \ + $(TESSERACT_LDFLAGS) \ + $(TIFF_LDFLAGS) \ + $(MAGICKXX_LDFLAGS) \ + $(QT_LDFLAGS) \ + -lpthread + content_in_hdoc_LDADD = $(LDADD) \ + $(QT_LIBS) + utilexec_PROGRAMS += non_text_components non_text_components_SOURCES = non_text_components.cc -- 1.5.6.5
participants (1)
-
Guillaume Lazzara