last-svn-commit-908-g3f5a6b2 Handle drop capital components.

* scribo/core/tag/component.hh: New tag. * scribo/postprocessing/images_to_drop_capital.hh: New routine. --- scribo/ChangeLog | 8 +++ scribo/scribo/core/tag/component.hh | 7 ++- .../images_to_drop_capital.hh} | 66 +++++++++++++------- 3 files changed, 58 insertions(+), 23 deletions(-) copy scribo/scribo/{filter/images_in_paragraph.hh => postprocessing/images_to_drop_capital.hh} (64%) diff --git a/scribo/ChangeLog b/scribo/ChangeLog index c8b3503..b10058f 100644 --- a/scribo/ChangeLog +++ b/scribo/ChangeLog @@ -1,5 +1,13 @@ 2011-06-07 Guillaume Lazzara <z@lrde.epita.fr> + Handle drop capital components. + + * scribo/core/tag/component.hh: New tag. + + * scribo/postprocessing/images_to_drop_capital.hh: New routine. + +2011-06-07 Guillaume Lazzara <z@lrde.epita.fr> + Save component outlines instead of bboxes. * scribo/io/img/internal/debug_img_visitor.hh, diff --git a/scribo/scribo/core/tag/component.hh b/scribo/scribo/core/tag/component.hh index dc9db90..d5afb36 100644 --- a/scribo/scribo/core/tag/component.hh +++ b/scribo/scribo/core/tag/component.hh @@ -60,7 +60,8 @@ namespace scribo WhitespaceSeparator, Noise, Punctuation, - Image + Image, + DropCapital }; @@ -135,6 +136,8 @@ namespace scribo break; case Image: str = "Image"; + case DropCapital: + str = "DropCapital"; break; } @@ -159,6 +162,8 @@ namespace scribo return Punctuation; else if (str == "Image") return Image; + else if (str == "DropCapital") + return DropCapital; return Undefined; } diff --git a/scribo/scribo/filter/images_in_paragraph.hh b/scribo/scribo/postprocessing/images_to_drop_capital.hh similarity index 64% copy from scribo/scribo/filter/images_in_paragraph.hh copy to scribo/scribo/postprocessing/images_to_drop_capital.hh index 3cf64e1..ca76609 100644 --- a/scribo/scribo/filter/images_in_paragraph.hh +++ b/scribo/scribo/postprocessing/images_to_drop_capital.hh @@ -23,12 +23,12 @@ // exception does not however invalidate any other reasons why the // executable file might be covered by the GNU General Public License. -#ifndef SCRIBO_FILTER_IMAGES_IN_PARAGRAPH_HH -# define SCRIBO_FILTER_IMAGES_IN_PARAGRAPH_HH +#ifndef SCRIBO_POSTPROCESSING_IMAGES_TO_DROP_CAPITAL_HH +# define SCRIBO_POSTPROCESSING_IMAGES_TO_DROP_CAPITAL_HH /// \file /// -/// Invalidate false positive separators. +/// Set type for specific images to Drop Capital component. /// \fixme Share same test canvas as text::merging. @@ -40,34 +40,31 @@ namespace scribo { - namespace filter + namespace postprocessing { using namespace mln; - /// Invalidate false positive separators. + /// Set type for specific images to Drop Capital component. /// /// \param[in] separators A paragraph set. /// - /// \return A doc with invalidated separators. - /// - /// Warning: it does not remove separators from separator - /// image. It only invalidate separator components in their - /// respective component_set. - /// + /// \return A doc with images tagged as dropped capital is such + /// images have been found. + // template <typename L> void - images_in_paragraph(document<L>& doc); + images_to_drop_capital(document<L>& doc); # ifndef MLN_INCLUDE_ONLY template <typename L> void - images_in_paragraph(document<L>& doc) + images_to_drop_capital(document<L>& doc) { - trace::entering("scribo::filter::images_in_paragraph"); + trace::entering("scribo::postprocessing::images_to_drop_capital"); mln_precondition(doc.is_valid()); @@ -82,6 +79,8 @@ namespace scribo if (doc.paragraphs()(p).is_valid()) mln::draw::box_plain(billboard, doc.paragraphs()(p).bbox(), true); + float min_img_size = 0.2 * (doc.image().domain().width() + + doc.image().domain().height()); component_set<L> elts = doc.elements(); for_all_comps(c, elts) @@ -91,29 +90,52 @@ namespace scribo const bool tl = billboard(b_.pmin()); const bool tr = billboard.at_(b_.pmin().row(), b_.pmax().col()); - const bool ml = billboard.at_(b_.pcenter().row(), b_.pmin().col()); + const bool mb = billboard.at_(b_.pmax().row(), b_.pcenter().col()); const bool mc = billboard.at_(b_.pcenter().row(), b_.pcenter().col()); const bool mr = billboard.at_(b_.pcenter().row(), b_.pmax().col()); const bool bl = billboard.at_(b_.pmax().row(), b_.pmin().col()); const bool br = billboard(b_.pmax()); - // This separator is included in an element (picture, drawing...) - // => Ignore it. - if (tl && tr && ml && mc && mr && bl && br) - elts(c).update_tag(component::Ignored); + typedef mln::util::set<int> set_t; + set_t s; + s.insert(tl); + s.insert(tr); + s.insert(mb); + s.insert(mc); + s.insert(mr); + s.insert(bl); + s.insert(br); + + if (s.nelements() > 2 || (s.nelements() == 2 && !s.has(0))) + continue; + + float elt_size = elts(c).bbox().width() + elts(c).bbox().height(); + for_all_elements(e, s) + if (s[e] != 0 + && (mc != 0 && mc == s[e] + && ((tl == mc && bl == mc) + || (tr == mc && br == mc) + || (bl == mc && br == mc) + || (tl == mc && tr == mc) + || (br == mc && mr == mc && mb == mc))) + && (elt_size < min_img_size)) + { + elts(c).update_type(component::DropCapital); + break; + } } // FIXME: warning this call may produce inconsistent data // Ignored components are still in the separator image... doc.set_elements(elts); - trace::exiting("scribo::filter::images_in_paragraph"); + trace::exiting("scribo::postprocessing::images_to_drop_capital"); } # endif // ! MLN_INCLUDE_ONLY - } // end of namespace scribo::filter + } // end of namespace scribo::postprocessing } // end of namespace scribo -#endif // ! SCRIBO_FILTER_IMAGES_IN_PARAGRAPH_HH +#endif // ! SCRIBO_POSTPROCESSING_IMAGES_TO_DROP_CAPITAL_HH -- 1.5.6.5
participants (1)
-
Guillaume Lazzara