last-svn-commit-830-g9cfcb85 Various fixes in Scribo.

* scribo/core/component_set.hh: Update FIXMEs. (update_labeled_image_()): New method. * scribo/core/def/color_type.hh: Introduce a new global type. * scribo/core/line_info.hh: Compute line pixels area and order component list by localization. * scribo/core/line_set.hh: Add operator<<. * scribo/core/object_groups.hh, * scribo/core/object_links.hh (is_valid(unsigned)): new method. * scribo/debug/links_image.hh: Make bounding box drawing optional. * scribo/filter/object_groups_small.hh: Return a temporary result. * scribo/filter/objects_h_thick.hh, * scribo/filter/objects_h_thin.hh, * scribo/filter/objects_v_thick.hh, * scribo/filter/objects_v_thin.hh: Fix variable names and make these routines work properly. * scribo/toolchain/content_in_doc.hh, * scribo/preprocessing/rotate_90.hh: Fix preconditions. * scribo/primitive/extract/non_text.hh: Remove useless precondition. * scribo/primitive/link/internal/link_single_dmax_ratio_aligned_base.hh: Remove a useless method. * scribo/primitive/link/internal/link_single_dmax_ratio_base.hh: Remove a useless (void). * scribo/text/extract_lines.hh: Use dmax_default functor. * src/debug/show_groups_bboxes.cc: Check hratio while linking. * src/preprocessing/denoise_fg.cc: Fix usage. * scribo/tests/toolchain/nepomuk/Makefile.am: Check if Magick++ is available. --- scribo/ChangeLog | 47 ++++++++++++++++ scribo/scribo/core/component_set.hh | 21 ++++++-- .../core/def/color_type.hh} | 25 +++++---- scribo/scribo/core/line_info.hh | 56 ++++++++++++++++++- scribo/scribo/core/line_set.hh | 17 ++++++ scribo/scribo/core/object_groups.hh | 14 ++++- scribo/scribo/core/object_links.hh | 11 ++++ scribo/scribo/debug/links_image.hh | 9 ++- scribo/scribo/filter/object_groups_small.hh | 30 +++++++++-- scribo/scribo/filter/objects_h_thick.hh | 42 +++++++------- scribo/scribo/filter/objects_h_thin.hh | 6 +- scribo/scribo/filter/objects_v_thick.hh | 42 +++++++------- scribo/scribo/filter/objects_v_thin.hh | 6 +- scribo/scribo/preprocessing/rotate_90.hh | 2 + scribo/scribo/primitive/extract/non_text.hh | 2 - .../link_single_dmax_ratio_aligned_base.hh | 5 -- .../link/internal/link_single_dmax_ratio_base.hh | 3 +- scribo/scribo/text/extract_lines.hh | 8 ++-- scribo/scribo/toolchain/content_in_doc.hh | 4 +- scribo/src/debug/show_groups_bboxes.cc | 16 +++++- scribo/src/preprocessing/denoise_fg.cc | 6 ++- scribo/tests/toolchain/nepomuk/Makefile.am | 2 + 22 files changed, 277 insertions(+), 97 deletions(-) copy scribo/{demo/demat/src/process_args.hh => scribo/core/def/color_type.hh} (78%) diff --git a/scribo/ChangeLog b/scribo/ChangeLog index e81959d..d388268 100644 --- a/scribo/ChangeLog +++ b/scribo/ChangeLog @@ -1,5 +1,52 @@ 2011-04-05 Guillaume Lazzara <z@lrde.epita.fr> + Various fixes in Scribo. + + * scribo/core/component_set.hh: Update FIXMEs. + (update_labeled_image_()): New method. + + * scribo/core/def/color_type.hh: Introduce a new global type. + + * scribo/core/line_info.hh: Compute line pixels area and order + component list by localization. + + * scribo/core/line_set.hh: Add operator<<. + + * scribo/core/object_groups.hh, + * scribo/core/object_links.hh (is_valid(unsigned)): new method. + + * scribo/debug/links_image.hh: Make bounding box drawing optional. + + * scribo/filter/object_groups_small.hh: Return a temporary result. + + * scribo/filter/objects_h_thick.hh, + * scribo/filter/objects_h_thin.hh, + * scribo/filter/objects_v_thick.hh, + * scribo/filter/objects_v_thin.hh: Fix variable names and make + these routines work properly. + + * scribo/toolchain/content_in_doc.hh, + * scribo/preprocessing/rotate_90.hh: Fix preconditions. + + * scribo/primitive/extract/non_text.hh: Remove useless precondition. + + * scribo/primitive/link/internal/link_single_dmax_ratio_aligned_base.hh: + Remove a useless method. + + * scribo/primitive/link/internal/link_single_dmax_ratio_base.hh: + Remove a useless (void). + + * scribo/text/extract_lines.hh: Use dmax_default functor. + + * src/debug/show_groups_bboxes.cc: Check hratio while linking. + + * src/preprocessing/denoise_fg.cc: Fix usage. + + * scribo/tests/toolchain/nepomuk/Makefile.am: Check if Magick++ is + available. + +2011-04-05 Guillaume Lazzara <z@lrde.epita.fr> + * scribo/text/extract_lines_wo_merge.hh: New line extraction routine. diff --git a/scribo/scribo/core/component_set.hh b/scribo/scribo/core/component_set.hh index a63ed6c..d729802 100644 --- a/scribo/scribo/core/component_set.hh +++ b/scribo/scribo/core/component_set.hh @@ -223,6 +223,8 @@ namespace scribo // L& labeled_image_(); + void update_labeled_image_(const L& lbl); + /// Return the underlying labeled image where invalid components /// have been erased. /// @@ -232,7 +234,7 @@ namespace scribo /// @} - private: + protected: /// Duplicate the underlying image and create a new component_set. void init_(const component_set<L>& model); @@ -276,7 +278,7 @@ namespace scribo const mln_value(L)& ncomps) : ima_(ima), ncomps_(ncomps) { - initialize(separators_, ima); // FIXME: do we really want that? + initialize(separators_, ima); // FIXME: to be removed mln::data::fill(separators_, false); typedef mln::accu::shape::bbox<mln_site(L)> bbox_accu_t; @@ -299,7 +301,7 @@ namespace scribo component::Type type) : ima_(ima), ncomps_(ncomps) { - initialize(separators_, ima); // FIXME: do we really want that? + initialize(separators_, ima); // FIXME: to be removed mln::data::fill(separators_, false); fill_infos(attribs, type); @@ -313,7 +315,7 @@ namespace scribo component::Type type) : ima_(ima), ncomps_(ncomps) { - initialize(separators_, ima); // FIXME: do we really want that? + initialize(separators_, ima); // FIXME: to be removed mln::data::fill(separators_, false); fill_infos(attribs, type); @@ -326,7 +328,7 @@ namespace scribo const mln::util::array<scribo::component_info>& infos) : ima_(ima), ncomps_(ncomps), infos_(infos) { - initialize(separators_, ima); // FIXME: do we really want that? + initialize(separators_, ima); // FIXME: to be removed mln::data::fill(separators_, false); } @@ -528,6 +530,15 @@ namespace scribo template <typename L> inline + void + component_set<L>::update_labeled_image_(const L& lbl) + { + data_->ima_ = lbl; + } + + + template <typename L> + inline bool component_set<L>::is_valid() const { diff --git a/scribo/demo/demat/src/process_args.hh b/scribo/scribo/core/def/color_type.hh similarity index 78% copy from scribo/demo/demat/src/process_args.hh copy to scribo/scribo/core/def/color_type.hh index 5ca72cf..7acd334 100644 --- a/scribo/demo/demat/src/process_args.hh +++ b/scribo/scribo/core/def/color_type.hh @@ -1,4 +1,4 @@ -// Copyright (C) 2010 EPITA Research and Development Laboratory (LRDE) +// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE) // // This file is part of Olena. // @@ -23,25 +23,26 @@ // exception does not however invalidate any other reasons why the // executable file might be covered by the GNU General Public License. -#ifndef SCRIBO_DEMO_SHARED_SRC_PROCESS_ARGS_HH -# define SCRIBO_DEMO_SHARED_SRC_PROCESS_ARGS_HH +#ifndef SCRIBO_CORE_COLOR_TYPE_HH +# define SCRIBO_CORE_COLOR_TYPE_HH + +/// \file +/// +/// Global type definition for labels. + +# include <mln/value/rgb8.hh> namespace scribo { - namespace demo + namespace def { - struct process_args - { - unsigned scale; - }; + typedef mln::value::rgb8 color_type; - - } // end of namespace scribo::demo + } // end of namespace scribo::def } // end of namespace scribo - -#endif // !SCRIBO_DEMO_SHARED_SRC_PROCESS_ARGS_HH +#endif // ! SCRIBO_CORE_DEF_HH diff --git a/scribo/scribo/core/line_info.hh b/scribo/scribo/core/line_info.hh index 866e3f4..749e627 100644 --- a/scribo/scribo/core/line_info.hh +++ b/scribo/scribo/core/line_info.hh @@ -84,6 +84,9 @@ namespace scribo mln::box2d ebbox_; mln::util::array<component_id_t> components_; + // The number of pixels used for line characters. + unsigned pixel_area_; + // Values relative to the line bbox. int baseline_; int meanline_; @@ -176,6 +179,8 @@ namespace scribo const mln::util::array<component_id_t>& component_ids() const; unsigned card() const; + unsigned pixel_area() const; + int baseline() const; int meanline() const; int ascent() const; @@ -348,6 +353,26 @@ namespace scribo indented_ = false; } + + + // sort_comp_ids functor + + template <typename L> + sort_comp_ids<L>::sort_comp_ids(const component_set<L>& comp_set) + : comps_(comp_set) + { + } + + + template <typename L> + bool + sort_comp_ids<L>::operator()(const component_id_t& l, + const component_id_t& r) const + { + return comps_(l).bbox().pmin().col() < comps_(r).bbox().pmin().col() + && comps_(l).bbox().pmax().col() < comps_(r).bbox().pmax().col(); + } + } // end of namespace scribo::internal @@ -381,7 +406,7 @@ namespace scribo template <typename L> inline line_info<L>::line_info(const line_info<L>& other) - : id_(0) + : parent_t(other), id_(0) { //data_->hidden_ = false; copy_data(other); @@ -487,6 +512,7 @@ namespace scribo return data_->components_; } + template <typename L> unsigned line_info<L>::card() const @@ -496,6 +522,14 @@ namespace scribo template <typename L> + unsigned + line_info<L>::pixel_area() const + { + return data_->pixel_area_; + } + + + template <typename L> int line_info<L>::baseline() const { @@ -885,6 +919,8 @@ namespace scribo char_space, char_width; + unsigned pixel_area = 0; + mln::accu::shape::bbox<P> bbox; mln::def::coord ref_line = mln_max(mln::def::coord); @@ -899,7 +935,7 @@ namespace scribo // Ignore punctuation for stats computation but not for bbox // computation. - if (data_->holder_.components()(c).type() == component::Punctuation) + if (comp_set(c).type() == component::Punctuation) continue; ref_line = mln::math::min(comp_set(c).bbox().pmin().row(), ref_line); @@ -911,6 +947,8 @@ namespace scribo { unsigned c = data_->components_(i); + pixel_area += comp_set(c).card(); + const mln::box2d& bb = comp_set(c).bbox(); // Bounding box. @@ -918,7 +956,7 @@ namespace scribo // Ignore punctuation for stats computation but not for bbox // computation. - if (data_->holder_.components()(c).type() == component::Punctuation) + if (comp_set(c).type() == component::Punctuation) continue; @@ -965,9 +1003,21 @@ namespace scribo // Finalization { + // Tag data_->tag_ = line::None; + + // Bbox data_->bbox_ = bbox.to_result(); + // Pixel area + data_->pixel_area_ = pixel_area; + + // Order component ids according to component localization (left + // to right). + std::sort(data_->components_.hook_std_vector_().begin(), + data_->components_.hook_std_vector_().end(), + internal::sort_comp_ids<L>(comp_set)); + // Char space if (char_space.card() < 2) data_->char_space_ = 0; diff --git a/scribo/scribo/core/line_set.hh b/scribo/scribo/core/line_set.hh index bfa9240..f1e443b 100644 --- a/scribo/scribo/core/line_set.hh +++ b/scribo/scribo/core/line_set.hh @@ -174,6 +174,9 @@ namespace scribo mln::util::tracked_ptr< internal::line_set_data<L> > data_; }; + template <typename L> + std::ostream& + operator<<(std::ostream& ostr, const line_set<L>& lines); namespace make { @@ -435,6 +438,20 @@ namespace scribo } + + template <typename L> + std::ostream& + operator<<(std::ostream& ostr, const line_set<L>& lines) + { + ostr << "line_set[" << std::endl; + for_all_lines(i, lines) + ostr << lines(i); + ostr << "]" << std::endl; + + return ostr; + } + + // Make routines. namespace make diff --git a/scribo/scribo/core/object_groups.hh b/scribo/scribo/core/object_groups.hh index bbfaf6e..2a4b0b1 100644 --- a/scribo/scribo/core/object_groups.hh +++ b/scribo/scribo/core/object_groups.hh @@ -1,5 +1,5 @@ -// Copyright (C) 2009, 2010 EPITA Research and Development Laboratory -// (LRDE) +// Copyright (C) 2009, 2010, 2011 EPITA Research and Development +// Laboratory (LRDE) // // This file is part of Olena. // @@ -86,6 +86,7 @@ namespace scribo void init_(const object_links<L>& links); bool is_valid() const; + bool is_valid(unsigned comp_id) const; unsigned nelements() const; @@ -190,6 +191,15 @@ namespace scribo } template <typename L> + bool + object_groups<L>::is_valid(unsigned comp_id) const + { + mln_assertion(is_valid()); + mln_assertion(comp_id < data_->links_.nelements()); + return data_->links_(comp_id) != 0; + } + + template <typename L> unsigned object_groups<L>::nelements() const { diff --git a/scribo/scribo/core/object_links.hh b/scribo/scribo/core/object_links.hh index 2c2eea1..1fbcd5a 100644 --- a/scribo/scribo/core/object_links.hh +++ b/scribo/scribo/core/object_links.hh @@ -84,6 +84,7 @@ namespace scribo const component_set<L>& components() const; bool is_valid() const; + bool is_valid(unsigned comp_id) const; unsigned nelements() const; @@ -181,6 +182,16 @@ namespace scribo template <typename L> + bool + object_links<L>::is_valid(unsigned comp_id) const + { + mln_precondition(is_valid()); + mln_precondition(comp_id < data_->comp_to_link_.nelements()); + return data_->comp_to_link_(comp_id) != 0; + } + + + template <typename L> unsigned object_links<L>::nelements() const { diff --git a/scribo/scribo/debug/links_image.hh b/scribo/scribo/debug/links_image.hh index 76447b7..47394be 100644 --- a/scribo/scribo/debug/links_image.hh +++ b/scribo/scribo/debug/links_image.hh @@ -55,7 +55,8 @@ namespace scribo mln_ch_value(I,value::rgb8) links_image(const Image<I>& input_, const object_links<L>& links, - anchor::Type anchor); + anchor::Type anchor, + bool draw_bboxes = true); # ifndef MLN_INCLUDE_ONLY @@ -65,7 +66,8 @@ namespace scribo mln_ch_value(I,value::rgb8) links_image(const Image<I>& input_, const object_links<L>& links, - anchor::Type anchor) + anchor::Type anchor, + bool draw_bboxes) { trace::entering("scribo::debug::links_image"); const I& input = exact(input_); @@ -78,7 +80,8 @@ namespace scribo image2d<value::rgb8> links_image = data::convert(value::rgb8(), input); - scribo::draw::bounding_boxes(links_image, comps, literal::blue); + if (draw_bboxes) + scribo::draw::bounding_boxes(links_image, comps, literal::blue); for_all_links(l, links) if (links(l) != l) diff --git a/scribo/scribo/filter/object_groups_small.hh b/scribo/scribo/filter/object_groups_small.hh index f736a50..8dd244c 100644 --- a/scribo/scribo/filter/object_groups_small.hh +++ b/scribo/scribo/filter/object_groups_small.hh @@ -1,5 +1,5 @@ -// Copyright (C) 2009, 2010 EPITA Research and Development Laboratory -// (LRDE) +// Copyright (C) 2009, 2010, 2011 EPITA Research and Development +// Laboratory (LRDE) // // This file is part of Olena. // @@ -50,6 +50,7 @@ namespace scribo \param[in] groups Information about object groups. \param[in] n_links The minimum number of links per group. + \param[out] group_size Return the group sizes _before_ filtering. \return A copy of object group in which small groups have been removed. @@ -57,9 +58,17 @@ namespace scribo template <typename L> object_groups<L> object_groups_small(const object_groups<L>& groups, + unsigned n_links, + mln::util::array<unsigned>& group_size); + + // \overload + template <typename L> + object_groups<L> + object_groups_small(const object_groups<L>& groups, unsigned n_links); + # ifndef MLN_INCLUDE_ONLY @@ -67,14 +76,15 @@ namespace scribo inline object_groups<L> object_groups_small(const object_groups<L>& groups, - unsigned n_links) + unsigned n_links, + mln::util::array<unsigned>& group_size) { trace::entering("scribo::filter::object_groups_small"); mln_precondition(groups.is_valid()); // Counting the number of objects per group. - mln::util::array<unsigned> group_size(groups.nelements(), 0); + group_size = mln::util::array<unsigned>(groups.nelements(), 0); for_all_groups(i, group_size) ++group_size[groups(i)]; @@ -90,6 +100,18 @@ namespace scribo } + template <typename L> + inline + object_groups<L> + object_groups_small(const object_groups<L>& groups, + unsigned n_links) + { + mln::util::array<unsigned> group_size; + return object_groups_small(groups, n_links, group_size); + } + + + # endif // ! MLN_INCLUDE_ONLY } // end of namespace scribo::filter diff --git a/scribo/scribo/filter/objects_h_thick.hh b/scribo/scribo/filter/objects_h_thick.hh index 86dc8dc..06b75dc 100644 --- a/scribo/scribo/filter/objects_h_thick.hh +++ b/scribo/scribo/filter/objects_h_thick.hh @@ -1,5 +1,5 @@ -// Copyright (C) 2009, 2010 EPITA Research and Development Laboratory -// (LRDE) +// Copyright (C) 2009, 2010, 2011 EPITA Research and Development +// Laboratory (LRDE) // // This file is part of Olena. // @@ -46,12 +46,12 @@ namespace scribo using namespace mln; - /// Remove objects horizontaly thicker or equal to \p min_thickness. + /// Remove objects horizontaly thicker or equal to \p max_thickness. /// /// \param[in] input_ A binary image. /// \param[in] nbh_ A neighborhood used in labeling algorithms. /// \param[in] label_type The label type used for labeling. - /// \param[in] min_thickness The maximum thickness value. + /// \param[in] max_thickness The maximum thickness value. /// /// \result A binary image without thick objects. // @@ -61,13 +61,13 @@ namespace scribo objects_h_thick(const Image<I>& input_, const Neighborhood<N>& nbh_, const V& label_type, - unsigned min_thickness); + unsigned max_thickness); - /// Remove objects horizontaly thicker or equal to \p min_thickness. + /// Remove objects horizontaly thicker or equal to \p max_thickness. /// /// \param[in] comps Component data. - /// \param[in] min_thickness The minimum thickness value. + /// \param[in] max_thickness The minimum thickness value. /// /// \result A component data set without too thick components. // @@ -75,7 +75,7 @@ namespace scribo inline component_set<L> objects_h_thick(const component_set<L>& comps, - unsigned min_thickness); + unsigned max_thickness); @@ -95,20 +95,20 @@ namespace scribo /// Constructor /// /// \param[in] comps Component data. - /// \param[in] min_thickness the minimum thickness allowed. + /// \param[in] max_thickness the minimum thickness allowed. // h_thick_object_filter(const component_set<L>& comps, - unsigned min_thickness) - : comps_(comps), min_thickness_(min_thickness) + unsigned max_thickness) + : comps_(comps), max_thickness_(max_thickness) { } /// Constructor /// - /// \param[in] min_thickness the minimum thickness allowed. + /// \param[in] max_thickness the minimum thickness allowed. // - h_thick_object_filter(unsigned min_thickness) - : min_thickness_(min_thickness) + h_thick_object_filter(unsigned max_thickness) + : max_thickness_(max_thickness) { } @@ -120,14 +120,14 @@ namespace scribo } /// Return false if the component is thicker than - /// \p min_thickness_. + /// \p max_thickness_. /// /// \param[in] l An image value. bool operator()(const mln_value(L)& l) const { if (l == literal::zero) return false; - return comps_(l).bbox().height() > min_thickness_; + return comps_(l).bbox().width() < max_thickness_; } @@ -135,7 +135,7 @@ namespace scribo component_set<L> comps_; /// The minimum thickness. - unsigned min_thickness_; + unsigned max_thickness_; }; @@ -148,7 +148,7 @@ namespace scribo objects_thick(const Image<I>& input_, const Neighborhood<N>& nbh_, const V& label_type, - unsigned min_thickness) + unsigned max_thickness) { trace::entering("scribo::filter::objects_h_thick"); @@ -158,7 +158,7 @@ namespace scribo mln_precondition(input.is_valid()); mln_precondition(nbh.is_valid()); - internal::h_thick_object_filter<V> functor(min_thickness); + internal::h_thick_object_filter<V> functor(max_thickness); mln_concrete(I) output = internal::compute(input, nbh, label_type, functor); @@ -171,11 +171,11 @@ namespace scribo inline component_set<L> objects_h_thick(const component_set<L>& comps, - unsigned min_thickness) + unsigned max_thickness) { trace::entering("scribo::filter::objects_h_thick"); - internal::h_thick_object_filter<L> functor(comps, min_thickness); + internal::h_thick_object_filter<L> functor(comps, max_thickness); component_set<L> output = internal::compute(comps, functor); trace::exiting("scribo::filter::objects_h_thick"); diff --git a/scribo/scribo/filter/objects_h_thin.hh b/scribo/scribo/filter/objects_h_thin.hh index 43bcdfc..8caa2cc 100644 --- a/scribo/scribo/filter/objects_h_thin.hh +++ b/scribo/scribo/filter/objects_h_thin.hh @@ -1,5 +1,5 @@ -// Copyright (C) 2009, 2010 EPITA Research and Development Laboratory -// (LRDE) +// Copyright (C) 2009, 2010, 2011 EPITA Research and Development +// Laboratory (LRDE) // // This file is part of Olena. // @@ -129,7 +129,7 @@ namespace scribo { if (l == literal::zero) return false; - return comps_(l).bbox().ncols() > min_thinness_; + return comps_(l).bbox().width() > min_thinness_; } /// Component bounding boxes. diff --git a/scribo/scribo/filter/objects_v_thick.hh b/scribo/scribo/filter/objects_v_thick.hh index 1db00cb..ad1a4df 100644 --- a/scribo/scribo/filter/objects_v_thick.hh +++ b/scribo/scribo/filter/objects_v_thick.hh @@ -1,5 +1,5 @@ -// Copyright (C) 2009, 2010 EPITA Research and Development Laboratory -// (LRDE) +// Copyright (C) 2009, 2010, 2011 EPITA Research and Development +// Laboratory (LRDE) // // This file is part of Olena. // @@ -46,12 +46,12 @@ namespace scribo using namespace mln; - /// Remove components verticaly thicker or equal to \p min_thickness. + /// Remove components verticaly thicker or equal to \p max_thickness. /// /// \param[in] input_ A binary image. /// \param[in] nbh_ A neighborhood used in labeling algorithms. /// \param[in] label_type The label type used for labeling. - /// \param[in] min_thickness The maximum thickness value. + /// \param[in] max_thickness The maximum thickness value. /// /// \result A binary image without thick components. // @@ -61,13 +61,13 @@ namespace scribo objects_v_thick(const Image<I>& input_, const Neighborhood<N>& nbh_, const V& label_type, - unsigned min_thickness); + unsigned max_thickness); - /// Remove components verticaly thicker or equal to \p min_thickness. + /// Remove components verticaly thicker or equal to \p max_thickness. /// /// \param[in] comps A component set. - /// \param[in] min_thickness The maximum thickness value. + /// \param[in] max_thickness The maximum thickness value. /// /// \result An object image without too thick components. // @@ -75,7 +75,7 @@ namespace scribo inline component_set<L> objects_v_thick(const component_set<L>& comps, - unsigned min_thickness); + unsigned max_thickness); @@ -95,20 +95,20 @@ namespace scribo /// Constructor /// /// \param[in] comps A component set. - /// \param[in] min_thickness the maximum thickness allowed. + /// \param[in] max_thickness the maximum thickness allowed. // v_thick_object_filter(const component_set<L>& comps, - unsigned min_thickness) - : comps_(comps), min_thickness_(min_thickness) + unsigned max_thickness) + : comps_(comps), max_thickness_(max_thickness) { } /// Constructor /// - /// \param[in] min_thickness the maximum thickness allowed. + /// \param[in] max_thickness the maximum thickness allowed. // - v_thick_object_filter(unsigned min_thickness) - : min_thickness_(min_thickness) + v_thick_object_filter(unsigned max_thickness) + : max_thickness_(max_thickness) { } @@ -120,14 +120,14 @@ namespace scribo } /// Return false if the components is thicker than - /// \p min_thickness_. + /// \p max_thickness_. /// /// \param[in] l An image value. bool operator()(const mln_value(L)& l) const { if (l == literal::zero) return false; - return comps_(l).bbox().nrows() > min_thickness_; + return comps_(l).bbox().height() < max_thickness_; } @@ -135,7 +135,7 @@ namespace scribo component_set<L> comps_; /// The min thickness. - unsigned min_thickness_; + unsigned max_thickness_; }; @@ -148,7 +148,7 @@ namespace scribo objects_thick(const Image<I>& input_, const Neighborhood<N>& nbh_, const V& label_type, - unsigned min_thickness) + unsigned max_thickness) { trace::entering("scribo::filter::objects_v_thick"); @@ -158,7 +158,7 @@ namespace scribo mln_precondition(input.is_valid()); mln_precondition(nbh.is_valid()); - internal::v_thick_object_filter<V> functor(min_thickness); + internal::v_thick_object_filter<V> functor(max_thickness); mln_concrete(I) output = internal::compute(input, nbh, label_type, functor); @@ -171,11 +171,11 @@ namespace scribo inline component_set<L> objects_v_thick(const component_set<L>& comps, - unsigned min_thickness) + unsigned max_thickness) { trace::entering("scribo::filter::objects_v_thick"); - internal::v_thick_object_filter<L> functor(comps, min_thickness); + internal::v_thick_object_filter<L> functor(comps, max_thickness); component_set<L> output = internal::compute(comps, functor); trace::exiting("scribo::filter::objects_v_thick"); diff --git a/scribo/scribo/filter/objects_v_thin.hh b/scribo/scribo/filter/objects_v_thin.hh index de11cec..f2280eb 100644 --- a/scribo/scribo/filter/objects_v_thin.hh +++ b/scribo/scribo/filter/objects_v_thin.hh @@ -1,5 +1,5 @@ -// Copyright (C) 2009, 2010 EPITA Research and Development Laboratory -// (LRDE) +// Copyright (C) 2009, 2010, 2011 EPITA Research and Development +// Laboratory (LRDE) // // This file is part of Olena. // @@ -130,7 +130,7 @@ namespace scribo { if (l == literal::zero) return false; - return comps_.bbox(l).nrows() > min_thinness_; + return comps_(l).bbox().height() > min_thinness_; } /// Component bounding boxes. diff --git a/scribo/scribo/preprocessing/rotate_90.hh b/scribo/scribo/preprocessing/rotate_90.hh index 523b82f..8db703d 100644 --- a/scribo/scribo/preprocessing/rotate_90.hh +++ b/scribo/scribo/preprocessing/rotate_90.hh @@ -80,6 +80,8 @@ namespace scribo const I& input = exact(input_); mln_precondition(input.is_valid()); + mln_precondition(input.domain().pmin().row() > 0); + mln_precondition(input.domain().pmin().col() > 0); // Works only on one block images. mlc_is(mln_trait_image_value_access(I), diff --git a/scribo/scribo/primitive/extract/non_text.hh b/scribo/scribo/primitive/extract/non_text.hh index cbc6ea2..e2a6a03 100644 --- a/scribo/scribo/primitive/extract/non_text.hh +++ b/scribo/scribo/primitive/extract/non_text.hh @@ -308,8 +308,6 @@ namespace scribo t.start(); mln_precondition(doc.is_valid()); - - mln_precondition(doc.has_line_seps()); mln_precondition(doc.has_text()); // FIXME: Do these images exist elsewhere? diff --git a/scribo/scribo/primitive/link/internal/link_single_dmax_ratio_aligned_base.hh b/scribo/scribo/primitive/link/internal/link_single_dmax_ratio_aligned_base.hh index 56879bb..64c89ca 100644 --- a/scribo/scribo/primitive/link/internal/link_single_dmax_ratio_aligned_base.hh +++ b/scribo/scribo/primitive/link/internal/link_single_dmax_ratio_aligned_base.hh @@ -102,11 +102,6 @@ namespace scribo # endif // ! SCRIBO_NDEBUG } - void compute_next_site_(P& p) - { - ++p.col(); - } - mln_site(L) start_point_(unsigned current_object, anchor::Type anchor) diff --git a/scribo/scribo/primitive/link/internal/link_single_dmax_ratio_base.hh b/scribo/scribo/primitive/link/internal/link_single_dmax_ratio_base.hh index 3abe663..d99cbee 100644 --- a/scribo/scribo/primitive/link/internal/link_single_dmax_ratio_base.hh +++ b/scribo/scribo/primitive/link/internal/link_single_dmax_ratio_base.hh @@ -139,9 +139,8 @@ namespace scribo inline mln_site(L) link_single_dmax_ratio_base<L, F, E>::start_point_(unsigned current_object, - anchor::Type anchor) + anchor::Type anchor) { - (void) anchor; return internal::compute_anchor(this->components_, current_object, anchor); } diff --git a/scribo/scribo/text/extract_lines.hh b/scribo/scribo/text/extract_lines.hh index e69a249..53fd742 100644 --- a/scribo/scribo/text/extract_lines.hh +++ b/scribo/scribo/text/extract_lines.hh @@ -123,15 +123,15 @@ namespace scribo object_links<L> left_link = primitive::link::with_single_left_link_dmax_ratio( comps, - primitive::link::internal::dmax_width_and_height(1), -// primitive::link::internal::dmax_default(1), +// primitive::link::internal::dmax_width_and_height(1), + primitive::link::internal::dmax_default(1), anchor::MassCenter); object_links<L> right_link = primitive::link::with_single_right_link_dmax_ratio( comps, - primitive::link::internal::dmax_width_and_height(1), -// primitive::link::internal::dmax_default(1), +// primitive::link::internal::dmax_width_and_height(1), + primitive::link::internal::dmax_default(1), anchor::MassCenter); // Validating left and right links. diff --git a/scribo/scribo/toolchain/content_in_doc.hh b/scribo/scribo/toolchain/content_in_doc.hh index 4469afa..8f6f7a4 100644 --- a/scribo/scribo/toolchain/content_in_doc.hh +++ b/scribo/scribo/toolchain/content_in_doc.hh @@ -63,8 +63,8 @@ namespace scribo bool enable_ocr = true, const std::string& language = std::string("eng")) { - mln_precondition(input.is_valid()); - mln_precondition(input_preproc.is_valid()); + mln_precondition(exact(input).is_valid()); + mln_precondition(exact(input_preproc).is_valid()); internal::content_in_doc_functor<J> f("noname"); f.enable_denoising = denoise; diff --git a/scribo/src/debug/show_groups_bboxes.cc b/scribo/src/debug/show_groups_bboxes.cc index b323976..c7457ad 100644 --- a/scribo/src/debug/show_groups_bboxes.cc +++ b/scribo/src/debug/show_groups_bboxes.cc @@ -30,15 +30,18 @@ #include <scribo/primitive/link/merge_double_link.hh> #include <scribo/primitive/link/internal/dmax_width_and_height.hh> +#include <scribo/filter/object_links_bbox_h_ratio.hh> + int main(int argc, char *argv[]) { using namespace mln; using namespace scribo; using namespace scribo::primitive; - if (argc != 3) + if (argc != 3 && argc != 4) { - std::cerr << "Usage : " << argv[0] << " input.pbm out.pbm" << std::endl; + std::cerr << "Usage : " << argv[0] << " input.pbm out.pbm [hratio = 2.5]" << std::endl; + std::cerr << "Group components and makes sure size ratio is correct." << std::endl; return 1; } @@ -73,9 +76,16 @@ int main(int argc, char *argv[]) merged_links = primitive::link::merge_double_link(left_link, right_link); + float hratio = 2.5f; + if (argc == 4) + hratio = atof(argv[3]); + + object_links<L> hratio_filtered_links + = filter::object_links_bbox_h_ratio(merged_links, hratio); + object_groups<L> - groups = group::from_single_link(merged_links); + groups = group::from_single_link(hratio_filtered_links); line_set<L> lines(groups); diff --git a/scribo/src/preprocessing/denoise_fg.cc b/scribo/src/preprocessing/denoise_fg.cc index 307e8f6..a7a5bea 100644 --- a/scribo/src/preprocessing/denoise_fg.cc +++ b/scribo/src/preprocessing/denoise_fg.cc @@ -1,4 +1,5 @@ -// Copyright (C) 2010 EPITA Research and Development Laboratory (LRDE) +// Copyright (C) 2010, 2011 EPITA Research and Development Laboratory +// (LRDE) // // This file is part of Olena. // @@ -35,7 +36,8 @@ const char *args_desc[][2] = { { "input.pbm", "A binary image. 'True' for objects, 'False'\ for the background." }, - { "fg_min_card", "The minimum neighbor count to be set to false." }, + { "fg_min_card", "The minimum neighbor count to be set to false." + " (common value : 3 or 4)" }, {0, 0} }; diff --git a/scribo/tests/toolchain/nepomuk/Makefile.am b/scribo/tests/toolchain/nepomuk/Makefile.am index 331fb6f..ce75f17 100644 --- a/scribo/tests/toolchain/nepomuk/Makefile.am +++ b/scribo/tests/toolchain/nepomuk/Makefile.am @@ -26,6 +26,7 @@ check_PROGRAMS = if HAVE_QT if HAVE_TESSERACT +if HAVE_MAGICKXX check_PROGRAMS += text_extraction text_extraction_SOURCES = text_extraction.cc @@ -42,6 +43,7 @@ text_extraction_LDFLAGS = $(QT_LDFLAGS) $(LDFLAGS) \ -lpthread text_extraction_LDADD = $(QT_LIBS) $(LDADD) +endif HAVE_MAGICKXX endif HAVE_TESSERACT endif HAVE_QT -- 1.5.6.5
participants (1)
-
Guillaume Lazzara