last-svn-commit-905-g7142b16 BACKUP

--- milena/mln/convert/from_to.hxx | 7 +- milena/mln/draw/polygon.hh | 105 +++++++ milena/mln/util/object_id.hh | 41 +++- scribo/scribo/core/paragraph_info.hh | 52 +++- scribo/scribo/core/tag/component.hh | 7 +- scribo/scribo/core/tag/paragraph.hh | 14 + scribo/scribo/filter/images_in_paragraph.hh | 8 +- scribo/scribo/filter/paragraphs_bbox_overlap.hh | 145 +++++++---- scribo/scribo/filter/paragraphs_in_borders.hh | 140 ++++++++++ scribo/scribo/filter/paragraphs_in_image.hh | 29 ++- scribo/scribo/filter/separators_in_borders.hh | 206 ++++++++++++++ scribo/scribo/filter/separators_in_element.hh | 84 +++--- scribo/scribo/filter/separators_in_paragraph.hh | 92 ++++--- scribo/scribo/filter/separators_vert_in_borders.hh | 143 ++++++++++ scribo/scribo/io/img/internal/debug_img_visitor.hh | 69 ++---- scribo/scribo/io/img/internal/full_img_visitor.hh | 39 ++- scribo/scribo/io/xml/internal/page_xml_visitor.hh | 23 ++- .../scribo/io/xml/internal/print_image_coords.hh | 6 +- .../scribo/io/xml/internal/print_page_preambule.hh | 13 +- scribo/scribo/io/xml/internal/time_info.hh | 75 +++++ .../postprocessing/images_to_drop_capital.hh | 141 ++++++++++ scribo/scribo/text/paragraphs_closing.hh | 284 ++++++++++++-------- .../toolchain/internal/content_in_hdoc_functor.hh | 58 ++++- scribo/scribo/util/box_is_included.hh | 74 +++++ scribo/scribo/util/component_precise_outline.hh | 83 ++++-- 25 files changed, 1549 insertions(+), 389 deletions(-) create mode 100644 milena/mln/draw/polygon.hh create mode 100644 scribo/scribo/filter/paragraphs_in_borders.hh create mode 100644 scribo/scribo/filter/separators_in_borders.hh create mode 100644 scribo/scribo/filter/separators_vert_in_borders.hh create mode 100644 scribo/scribo/io/xml/internal/time_info.hh create mode 100644 scribo/scribo/postprocessing/images_to_drop_capital.hh create mode 100644 scribo/scribo/util/box_is_included.hh diff --git a/milena/mln/convert/from_to.hxx b/milena/mln/convert/from_to.hxx index cc7cc15..7891e9a 100644 --- a/milena/mln/convert/from_to.hxx +++ b/milena/mln/convert/from_to.hxx @@ -1,4 +1,4 @@ -// Copyright (C) 2008, 2009, 2010 EPITA Research and Development +// Copyright (C) 2008, 2009, 2010, 2011 EPITA Research and Development // Laboratory (LRDE) // // This file is part of Olena. @@ -84,6 +84,7 @@ namespace mln namespace util { template <typename T> class array; + template <typename Tag, typename V> class object_id; } namespace value { @@ -473,6 +474,10 @@ namespace mln from_to(from.second(), to.second()); } + // util::object_id<Tag,V> -> V. + template <typename Tag, typename V> + void from_to_(const util::object_id<Tag,V>& from, V& to_); + } // end of namespace mln::convert::over_load } // end of namespace mln::convert diff --git a/milena/mln/draw/polygon.hh b/milena/mln/draw/polygon.hh new file mode 100644 index 0000000..5c6c917 --- /dev/null +++ b/milena/mln/draw/polygon.hh @@ -0,0 +1,105 @@ +// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE) +// +// This file is part of Olena. +// +// Olena is free software: you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free +// Software Foundation, version 2 of the License. +// +// Olena is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Olena. If not, see <http://www.gnu.org/licenses/>. +// +// As a special exception, you may use this file as part of a free +// software project without restriction. Specifically, if other files +// instantiate templates or use macros or inpolygon functions from this +// file, or you compile this file and link it with other files to produce +// an executable, this file does not by itself cause the resulting +// executable to be covered by the GNU General Public License. This +// exception does not however invalidate any other reasons why the +// executable file might be covered by the GNU General Public License. + +#ifndef MLN_DRAW_POLYGON_HH +# define MLN_DRAW_POLYGON_HH + +/// \file +/// +/// Draw a polygon in an image. +/// \fixme Add specializations for horizontal polygons (use pointers/memset). + +# include <mln/core/concept/image.hh> +# include <mln/core/site_set/p_array.hh> +# include <mln/draw/line.hh> + + +namespace mln +{ + + namespace draw + { + + /*! Draw a polygon at level \p v in image \p ima. + * + * \param[in,out] ima The image to be drawn. + * \param[in] par The polygon site set. + * \param[in] v The value to assign to all drawn pixels. + * + * \pre \p ima has to be initialized. + * + */ + template <typename I> + void polygon(Image<I>& ima, + const p_array<mln_site(I)>& par, + const mln_value(I)& v, + unsigned output_ratio); + + // \overload + template <typename I> + void polygon(Image<I>& ima, + const p_array<mln_site(I)>& par, + const mln_value(I)& v); + + +# ifndef MLN_INCLUDE_ONLY + + template <typename I> + void polygon(Image<I>& ima_, + const p_array<mln_site(I)>& par, + const mln_value(I)& v, + unsigned output_ratio) + { + I& ima = exact(ima_); + mln_precondition(ima.is_valid()); + mln_precondition(par.nelements() > 1); + + mln_site(I) p_last, tmp; + mln_piter(p_array<mln_site(I)>) p(par); + p_last = par[0] / output_ratio; + for_all(p) + { + tmp = p / output_ratio; + draw::line(ima, p_last, tmp, v); + p_last = tmp; + } + } + + template <typename I> + void polygon(Image<I>& ima, + const p_array<mln_site(I)>& par, + const mln_value(I)& v) + { + polygon(ima, par, v, 1); + } + +# endif // ! MLN_INCLUDE_ONLY + + } // end of namespace mln::draw + +} // end of namespace mln + + +#endif // ! MLN_DRAW_POLYGON_HH diff --git a/milena/mln/util/object_id.hh b/milena/mln/util/object_id.hh index d7db929..794abc4 100644 --- a/milena/mln/util/object_id.hh +++ b/milena/mln/util/object_id.hh @@ -38,6 +38,24 @@ namespace mln { + // Forward declaration + namespace util { template <typename Tag, typename V> class object_id; } + + namespace convert + { + + namespace over_load + { + + // object_id<Tag,V> -> V. + template <typename Tag, typename V> + void from_to_(const util::object_id<Tag,V>& from, V& to_); + + } // end of namespace mln::convert::over_load + + } // end of namespace mln::convert + + namespace util { @@ -90,9 +108,12 @@ namespace mln bool operator<(const object_id<Tag,V>& lhs, const object_id<Tag,V>& rhs); + } // end of namespace mln::util # ifndef MLN_INCLUDE_ONLY + namespace util + { template <typename Tag, typename V> inline @@ -205,10 +226,26 @@ namespace mln return lhs.value() < rhs.value(); } + } // end of namespace mln::util -# endif // ! MLN_INCLUDE_ONLY + namespace convert + { - } // end of namespace mln::util + namespace over_load + { + + // object_id<Tag,V> -> V. + template <typename Tag, typename V> + void from_to_(const util::object_id<Tag,V>& from, V& to_) + { + to_ = from.value(); + } + + } // end of namespace mln::convert::over_load + + } // end of namespace mln::convert + +# endif // ! MLN_INCLUDE_ONLY } // end of namespace mln diff --git a/scribo/scribo/core/paragraph_info.hh b/scribo/scribo/core/paragraph_info.hh index 90db7da..1029913 100644 --- a/scribo/scribo/core/paragraph_info.hh +++ b/scribo/scribo/core/paragraph_info.hh @@ -28,6 +28,7 @@ # include <scribo/core/line_info.hh> # include <scribo/core/line_links.hh> +# include <scribo/core/tag/paragraph.hh> # include <mln/util/array.hh> # include <mln/accu/shape/bbox.hh> @@ -79,6 +80,11 @@ namespace scribo void set_delta_baseline(const int delta_baseline); int delta_baseline() const; + void fast_merge(paragraph_info<L>& info); + + void update_tag(paragraph::Tag tag); + paragraph::Tag tag() const; + private: mln::util::array<line_id_t> line_ids_; mln::accu::shape::bbox<mln_site(L)> bbox_; @@ -88,7 +94,7 @@ namespace scribo float color_reliability_; int delta_baseline_; - bool needs_stats_update_; + paragraph::Tag tag_; bool is_valid_; }; @@ -103,13 +109,13 @@ namespace scribo template <typename L> paragraph_info<L>::paragraph_info() - : needs_stats_update_(false), is_valid_(false) + : tag_(paragraph::None), is_valid_(false) { } template <typename L> paragraph_info<L>::paragraph_info(const line_links<L>& llinks) - : llinks_(llinks), needs_stats_update_(false), is_valid_(true) + : llinks_(llinks), tag_(paragraph::None), is_valid_(true) { } @@ -121,7 +127,7 @@ namespace scribo bbox_.take(line.bbox()); // More data may need to be updated! - needs_stats_update_ = true; + tag_ = paragraph::Needs_Precise_Stats_Update; } template <typename L> @@ -206,14 +212,14 @@ namespace scribo bool paragraph_info<L>::needs_stats_update() const { - return needs_stats_update_; + return tag_ == paragraph::Needs_Precise_Stats_Update; } template <typename L> void paragraph_info<L>::force_stats_update() { - if (!needs_stats_update_) + if (!needs_stats_update()) return; const line_set<L>& lines = llinks_.lines(); @@ -259,7 +265,7 @@ namespace scribo // FIXME: Update paragraph stats - needs_stats_update_ = false; + tag_ = paragraph::None; } template <typename L> @@ -277,6 +283,38 @@ namespace scribo } template <typename L> + void + paragraph_info<L>::fast_merge(paragraph_info<L>& other) + { + tag_ = paragraph::Needs_Precise_Stats_Update; + other.update_tag(paragraph::Merged); + other.invalidate(); + + // Merge bboxes. + bbox_.take(other.bbox()); + + // Update delta_baseline + // FIXME: delta base line should be updated correctly!! + set_delta_baseline(std::max(other.delta_baseline_, delta_baseline_)); + + line_ids_.append(other.line_ids()); + } + + template <typename L> + void + paragraph_info<L>::update_tag(paragraph::Tag tag) + { + tag_ = tag; + } + + template <typename L> + paragraph::Tag + paragraph_info<L>::tag() const + { + return tag_; + } + + template <typename L> bool operator==(const paragraph_info<L>& lhs, const paragraph_info<L>& rhs) { diff --git a/scribo/scribo/core/tag/component.hh b/scribo/scribo/core/tag/component.hh index dc9db90..d5afb36 100644 --- a/scribo/scribo/core/tag/component.hh +++ b/scribo/scribo/core/tag/component.hh @@ -60,7 +60,8 @@ namespace scribo WhitespaceSeparator, Noise, Punctuation, - Image + Image, + DropCapital }; @@ -135,6 +136,8 @@ namespace scribo break; case Image: str = "Image"; + case DropCapital: + str = "DropCapital"; break; } @@ -159,6 +162,8 @@ namespace scribo return Punctuation; else if (str == "Image") return Image; + else if (str == "DropCapital") + return DropCapital; return Undefined; } diff --git a/scribo/scribo/core/tag/paragraph.hh b/scribo/scribo/core/tag/paragraph.hh index 14dd579..9a11a45 100644 --- a/scribo/scribo/core/tag/paragraph.hh +++ b/scribo/scribo/core/tag/paragraph.hh @@ -36,6 +36,20 @@ namespace scribo // Paragraph id tag. struct ParagraphId; + namespace paragraph + { + + + enum Tag + { + None = 0, + Needs_Precise_Stats_Update, + Merged + }; + + + } // end of namespace scribo::paragraph + } // end of namespace scribo diff --git a/scribo/scribo/filter/images_in_paragraph.hh b/scribo/scribo/filter/images_in_paragraph.hh index e05b202..3cf64e1 100644 --- a/scribo/scribo/filter/images_in_paragraph.hh +++ b/scribo/scribo/filter/images_in_paragraph.hh @@ -101,12 +101,12 @@ namespace scribo // => Ignore it. if (tl && tr && ml && mc && mr && bl && br) elts(c).update_tag(component::Ignored); - - // FIXME: warning this call may produce inconsistent data - // Ignored components are still in the separator image... - doc.set_elements(elts); } + // FIXME: warning this call may produce inconsistent data + // Ignored components are still in the separator image... + doc.set_elements(elts); + trace::exiting("scribo::filter::images_in_paragraph"); } diff --git a/scribo/scribo/filter/paragraphs_bbox_overlap.hh b/scribo/scribo/filter/paragraphs_bbox_overlap.hh index d40d42f..188a77e 100644 --- a/scribo/scribo/filter/paragraphs_bbox_overlap.hh +++ b/scribo/scribo/filter/paragraphs_bbox_overlap.hh @@ -126,70 +126,113 @@ namespace scribo const box2d& b_ = parset(cur_id).bbox(); - if (parset(cur_id).nlines() > 1) + if (parset(cur_id).nlines() > 3) { mln::draw::box_plain(billboard, b_, cur_id); continue; } - const unsigned tl = billboard(b_.pmin()); - const unsigned tr = billboard.at_(b_.pmin().row(), b_.pmax().col()); - const unsigned ml = billboard.at_(b_.pcenter().row(), b_.pmin().col()); const unsigned mc = billboard.at_(b_.pcenter().row(), b_.pcenter().col()); - const unsigned mr = billboard.at_(b_.pcenter().row(), b_.pmax().col()); - const unsigned bl = billboard.at_(b_.pmax().row(), b_.pmin().col()); - const unsigned br = billboard(b_.pmax()); - - typedef std::set<unsigned> set_t; - set_t labels; - labels.insert(tl); - labels.insert(tl); - labels.insert(tr); - labels.insert(ml); - labels.insert(mc); - labels.insert(mr); - labels.insert(bl); - labels.insert(br); - - for (set_t::const_iterator it = labels.begin(); - it != labels.end(); - ++it) - if (not_to_ignore(*it)) - { - box2d b2 = output(*it).bbox(); - box2d b_i = scribo::util::box_intersection(b_, b2); - volatile float - b_ratio = b_i.nsites() / (float)b_.nsites(), - b2_ratio = b_i.nsites() / (float)b2.nsites(); + // Box is mostly in the background => do nothing. + if (mc == 0) + { + mln::draw::box_plain(billboard, b_, cur_id); + continue; + } + else // Bbox center is inside another box. Check if we can + // merge the current box with it. + { + // Consider other potential overlapping bboxes. + const unsigned tl = billboard(b_.pmin()); + const unsigned tr = billboard.at_(b_.pmin().row(), b_.pmax().col()); + const unsigned bl = billboard.at_(b_.pmax().row(), b_.pmin().col()); + const unsigned br = billboard(b_.pmax()); + + typedef std::set<unsigned> set_t; + set_t labels; + labels.insert(tl); + labels.insert(tr); + labels.insert(mc); + labels.insert(bl); + labels.insert(br); + + // FIXME: check that there are at least 3 points (including + // the center) in another paragraph. + + // The potential merged bbox is already ignored or the + // current bbox overlaps with several bboxes. + // => Ignore current bbox . + // + if (!not_to_ignore(mc) + || (labels.size() > 1 && labels.find(0) == labels.end())) + { + mln::draw::box_plain(billboard, b_, cur_id); // Really? + not_to_ignore(cur_id) = false; + continue; + } - if (b2_ratio == 1) + for (set_t::const_iterator it = labels.begin(); + it != labels.end(); ++it) + if (*it) { - // Merge paragraphs and redraw the new bbox. - output(cur_id).fast_merge(output(*it)); - mln::draw::box_plain(billboard, output(cur_id).bbox(), cur_id); + mln_assertion(*it != mc); + + box2d b2 = output(*it).bbox(); + box2d b_i = scribo::util::box_intersection(b_, b2); + volatile float + b_ratio = b_i.nsites() / (float)b_.nsites(); + + // If the bbox is widely included in another box. + if (b_ratio > 0.8) + { + output(mc).fast_merge(output(cur_id)); + mln::draw::box_plain(billboard, parset(mc).bbox(), mc); + } + else + mln::draw::box_plain(billboard, parset(cur_id).bbox(), cur_id); + break; } - else if (b_ratio == 1) - { - // Merge paragraphs and redraw the new bbox. - output(*it).fast_merge(output(cur_id)); - mln::draw::box_plain(billboard, output(*it).bbox(), *it); - } - else if ((b_ratio > 0.4 || b2_ratio > 0.9)) - { - // si b_ est inclus dans une boite dont le nombre de - // comp > 4 => invalid juste b_ sinon => invalid b_ et - // b2 - not_to_ignore(cur_id) = false; - - if (parset(*it).nlines() < 4) - not_to_ignore(*it) = false; - } - } - mln::draw::box_plain(billboard, b_, cur_id); + } } + // if (not_to_ignore(*it)) + // { + // box2d b2 = output(*it).bbox(); + // box2d b_i = scribo::util::box_intersection(b_, b2); + + // volatile float + // b_ratio = b_i.nsites() / (float)b_.nsites(), + // b2_ratio = b_i.nsites() / (float)b2.nsites(); + + // if (b2_ratio == 1) + // { + // // Merge paragraphs and redraw the new bbox. + // output(cur_id).fast_merge(output(*it)); + // mln::draw::box_plain(billboard, output(cur_id).bbox(), cur_id); + // } + // else if (b_ratio == 1) + // { + // // Merge paragraphs and redraw the new bbox. + // output(*it).fast_merge(output(cur_id)); + // mln::draw::box_plain(billboard, output(*it).bbox(), *it); + // } + // else if ((b_ratio > 0.4 || b2_ratio > 0.9)) + // { + // // si b_ est inclus dans une boite dont le nombre de + // // comp > 4 => invalid juste b_ sinon => invalid b_ et + // // b2 + // not_to_ignore(cur_id) = false; + + // if (parset(*it).nlines() < 4) + // not_to_ignore(*it) = false; + // } + // } + + // mln::draw::box_plain(billboard, b_, cur_id); + // } + output.invalidate(not_to_ignore); for_all_paragraphs(p, output) diff --git a/scribo/scribo/filter/paragraphs_in_borders.hh b/scribo/scribo/filter/paragraphs_in_borders.hh new file mode 100644 index 0000000..8953282 --- /dev/null +++ b/scribo/scribo/filter/paragraphs_in_borders.hh @@ -0,0 +1,140 @@ +// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE) +// +// This file is part of Olena. +// +// Olena is free software: you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free +// Software Foundation, version 2 of the License. +// +// Olena is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Olena. If not, see <http://www.gnu.org/licenses/>. +// +// As a special exception, you may use this file as part of a free +// software project without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to produce +// an executable, this file does not by itself cause the resulting +// executable to be covered by the GNU General Public License. This +// exception does not however invalidate any other reasons why the +// executable file might be covered by the GNU General Public License. + +#ifndef SCRIBO_FILTER_PARAGRAPHS_IN_BORDERS_HH +# define SCRIBO_FILTER_PARAGRAPHS_IN_BORDERS_HH + +/// \file +/// +/// Invalidate false positive paragraphs. +/// \fixme Share same test canvas as text::merging. + + +# include <mln/core/concept/image.hh> +# include <scribo/core/component_set.hh> +# include <scribo/core/document.hh> +# include <scribo/util/box_is_included.hh> + +namespace scribo +{ + + namespace filter + { + + using namespace mln; + + + /// Invalidate paragraphs located close to the image borders. + /// + /// \param[in,out] doc A document structure. + /// + /// Warning: it does not remove paragraphs from separator + /// image. It only invalidate separator components in their + /// respective component_set. + /// + /// \verbatim + /// + /// ----------- + /// |_!____!__| + /// | ! ! <--------- Paragraphs located in this area are + /// | ! ! | invalidated. + /// | ! ! | + /// |_!____!__| + /// | ! ! | + /// ----------- + /// + /// \endverbatim + // + template <typename L> + void + paragraphs_in_borders(document<L>& doc); + + +# ifndef MLN_INCLUDE_ONLY + + template <typename L> + void + paragraphs_in_borders(document<L>& doc) + { + trace::entering("scribo::filter::paragraphs_in_borders"); + + mln_precondition(doc.is_valid()); + + const mln::image2d<mln::value::rgb8>& ima = doc.image(); + + unsigned border_size = std::min(43., 0.02 * ima.domain().width()); + + /// pt + /// ptl X------X--- + /// |_!____!__X ptr + /// | ! ! | + /// | ! ! | + /// | ! ! | + /// pbl X_!____!__| + /// | ! ! | + /// --X-------X + /// pb pbr + /// + point2d + ptl = ima.domain().pmin(), + pt(geom::min_row(ima), geom::max_col(ima) - border_size), + ptr(border_size, geom::max_col(ima)), + pbr = ima.domain().pmax(), + pb(geom::max_row(ima), border_size), + pbl(geom::max_row(ima) - border_size, geom::min_col(ima)); + + box2d + bt(ptl, ptr), + br(pt, pbr), + bb(pbl, pbr), + bl(ptl, pb); + + // Horizontal paragraphs + if (doc.has_text()) + { + paragraph_set<L> parset = doc.paragraphs(); + for_all_paragraphs(p, parset) + if (parset(p).is_valid()) + if (util::box_is_included(parset(p).bbox(), bt) + || util::box_is_included(parset(p).bbox(), br) + || util::box_is_included(parset(p).bbox(), bb) + || util::box_is_included(parset(p).bbox(), bl)) + { + parset(p).invalidate(); + } + + doc.set_paragraphs(parset); + } + + trace::exiting("scribo::filter::paragraphs_in_borders"); + } + +# endif // ! MLN_INCLUDE_ONLY + + } // end of namespace scribo::filter + +} // end of namespace scribo + +#endif // ! SCRIBO_FILTER_PARAGRAPHS_IN_BORDERS_HH diff --git a/scribo/scribo/filter/paragraphs_in_image.hh b/scribo/scribo/filter/paragraphs_in_image.hh index 1029430..f67b863 100644 --- a/scribo/scribo/filter/paragraphs_in_image.hh +++ b/scribo/scribo/filter/paragraphs_in_image.hh @@ -89,8 +89,6 @@ namespace scribo && doc.elements()(e).type() == component::Image) mln::draw::box_plain(billboard, doc.elements()(e).bbox(), true); - mln::io::pbm::save(billboard, "billboard_parimage.pbm"); - const paragraph_set<L>& parset = doc.paragraphs(); mln::util::array<bool> not_to_ignore(parset.nelements() + 1, true); not_to_ignore(0) = false; @@ -101,15 +99,34 @@ namespace scribo const bool tl = billboard(b_.pmin()), tr = billboard.at_(b_.pmin().row(), b_.pmax().col()), - ml = billboard.at_(b_.pcenter().row(), b_.pmin().col()), mc = billboard.at_(b_.pcenter().row(), b_.pcenter().col()), - mr = billboard.at_(b_.pcenter().row(), b_.pmax().col()), bl = billboard.at_(b_.pmax().row(), b_.pmin().col()), br = billboard(b_.pmax()); + typedef mln::util::set<int> set_t; + set_t s; + s.insert(tl); + s.insert(tr); + s.insert(mc); + s.insert(bl); + s.insert(br); + + if (s.nelements() > 2 || (s.nelements() == 2 && !s.has(0))) + continue; + // The paragraph is fully included in an image. - if (tl && tr && ml && mc && mr && bl && br) - not_to_ignore(cur_id) = false; + for_all_elements(e, s) + if (s[e] != 0 + && (mc != 0 && mc == s[e] + && ((tl == mc && bl == mc) + || (tr == mc && br == mc) + || (tl == mc && tr == mc) + || (bl == mc && br == mc)))) + { +// if (tl && tr && ml && mc && mr && bl && br) + not_to_ignore(cur_id) = false; + break; + } } paragraph_set<L> output = parset.duplicate(); diff --git a/scribo/scribo/filter/separators_in_borders.hh b/scribo/scribo/filter/separators_in_borders.hh new file mode 100644 index 0000000..8ccb6b1 --- /dev/null +++ b/scribo/scribo/filter/separators_in_borders.hh @@ -0,0 +1,206 @@ +// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE) +// +// This file is part of Olena. +// +// Olena is free software: you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free +// Software Foundation, version 2 of the License. +// +// Olena is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Olena. If not, see <http://www.gnu.org/licenses/>. +// +// As a special exception, you may use this file as part of a free +// software project without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to produce +// an executable, this file does not by itself cause the resulting +// executable to be covered by the GNU General Public License. This +// exception does not however invalidate any other reasons why the +// executable file might be covered by the GNU General Public License. + +#ifndef SCRIBO_FILTER_SEPARATORS_IN_BORDERS_HH +# define SCRIBO_FILTER_SEPARATORS_IN_BORDERS_HH + +/// \file +/// +/// Invalidate false positive separators. +/// \fixme Share same test canvas as text::merging. + + +# include <mln/core/concept/image.hh> +# include <scribo/core/component_set.hh> +# include <scribo/core/document.hh> +# include <scribo/util/box_is_included.hh> + +namespace scribo +{ + + namespace filter + { + + using namespace mln; + + + /// Invalidate separators located close to the image borders. + /// + /// \param[in,out] doc A document structure. + /// + /// Warning: it does not remove separators from separator + /// image. It only invalidate separator components in their + /// respective component_set. + /// + /// \verbatim + /// + /// ----------- + /// |_!____!__| + /// | ! ! <--------- Separators located in this area are + /// | ! ! | invalidated. + /// | ! ! | + /// |_!____!__| + /// | ! ! | + /// ----------- + /// + /// \endverbatim + // + template <typename L> + void + separators_in_borders(document<L>& doc, float vratio, float hratio); + + +# ifndef MLN_INCLUDE_ONLY + + template <typename L> + void + separators_in_borders(document<L>& doc, float vratio, float hratio) + { + trace::entering("scribo::filter::separators_in_borders"); + + mln_precondition(doc.is_valid()); + + const mln::image2d<mln::value::rgb8>& ima = doc.image(); + + // Horizontal separators + if (doc.has_hline_seps()) + { + unsigned border_size = hratio * std::min(ima.domain().width(), ima.domain().height()); + + /// pt + /// ptl X------X--- + /// |_!____!__X ptr + /// | ! ! | + /// | ! ! | + /// | ! ! | + /// pbl X_!____!__| + /// | ! ! | + /// --X-------X + /// pb pbr + /// + point2d + ptl = ima.domain().pmin(), + pt(geom::min_row(ima), geom::max_col(ima) - border_size), + ptr(border_size, geom::max_col(ima)), + pbr = ima.domain().pmax(), + pb(geom::max_row(ima), border_size), + pbl(geom::max_row(ima) - border_size, geom::min_col(ima)); + + box2d + bt(ptl, ptr), + br(pt, pbr), + bb(pbl, pbr), + bl(ptl, pb); + + + component_set<L> hline = doc.hline_seps_comps().duplicate(); + for_all_comps(c, hline) + if (hline(c).is_valid()) + if (util::box_is_included(hline(c).bbox(), bt) + || util::box_is_included(hline(c).bbox(), br) + || util::box_is_included(hline(c).bbox(), bb) + || util::box_is_included(hline(c).bbox(), bl)) + { + hline(c).update_tag(component::Ignored); + } + + // FIXME: warning this call may produce inconsistent data + // Ignored components are still in the separator image... + doc.set_hline_separators(doc.hline_seps(), hline); + } + + + // Vertical separators + if (doc.has_vline_seps()) + { + unsigned border_size = vratio * std::min(ima.domain().width(), ima.domain().height()); + + /// pt + /// ptl X------X--- + /// |_!____!__X ptr + /// | ! ! | + /// | ! ! | + /// | ! ! | + /// pbl X_!____!__| + /// | ! ! | + /// --X-------X + /// pb pbr + /// + point2d + ptl = ima.domain().pmin(), + pt(geom::min_row(ima), geom::max_col(ima) - border_size), + ptr(border_size, geom::max_col(ima)), + pbr = ima.domain().pmax(), + pb(geom::max_row(ima), border_size), + pbl(geom::max_row(ima) - border_size, geom::min_col(ima)); + + box2d + bt(ptl, ptr), + br(pt, pbr), + bb(pbl, pbr), + bl(ptl, pb); + + + component_set<L> vline = doc.vline_seps_comps().duplicate(); + for_all_comps(c, vline) + if (vline(c).is_valid()) + { + if (util::box_is_included(vline(c).bbox(), bt) + || util::box_is_included(vline(c).bbox(), br) + || util::box_is_included(vline(c).bbox(), bb) + || util::box_is_included(vline(c).bbox(), bl)) + { + // std::cout << vline(c).bbox() << " is included in "; + // if (util::box_is_included(vline(c).bbox(), bt)) + // std::cout << bt << std::endl; + // if (util::box_is_included(vline(c).bbox(), br)) + // std::cout << br << std::endl; + // if (util::box_is_included(vline(c).bbox(), bb)) + // std::cout << bb << std::endl; + // if (util::box_is_included(vline(c).bbox(), bl)) + // std::cout << bl << std::endl; + + vline(c).update_tag(component::Ignored); + } + // else + // { + // std::cout << vline(c).bbox() << " is not included in " << bt << " - " << br << " - " << bb << " - " << bl << std::endl; + // } + } + // FIXME: warning this call may produce inconsistent data + // Ignored components are still in the separator image... + doc.set_vline_separators(doc.vline_seps(), vline); + } + + trace::exiting("scribo::filter::separators_in_borders"); + } + +# endif // ! MLN_INCLUDE_ONLY + + } // end of namespace scribo::filter + +} // end of namespace scribo + +#endif // ! SCRIBO_FILTER_SEPARATORS_IN_BORDERS_HH diff --git a/scribo/scribo/filter/separators_in_element.hh b/scribo/scribo/filter/separators_in_element.hh index 228d82f..a8b0ebb 100644 --- a/scribo/scribo/filter/separators_in_element.hh +++ b/scribo/scribo/filter/separators_in_element.hh @@ -90,26 +90,26 @@ namespace scribo { component_set<L> hline = doc.hline_seps_comps().duplicate(); for_all_comps(c, hline) - { - const mln_box(L)& b_ = hline(c).bbox(); - - const bool tl = billboard(b_.pmin()); - const bool tr = billboard.at_(b_.pmin().row(), b_.pmax().col()); - const bool ml = billboard.at_(b_.pcenter().row(), b_.pmin().col()); - const bool mc = billboard.at_(b_.pcenter().row(), b_.pcenter().col()); - const bool mr = billboard.at_(b_.pcenter().row(), b_.pmax().col()); - const bool bl = billboard.at_(b_.pmax().row(), b_.pmin().col()); - const bool br = billboard(b_.pmax()); - - // This separator is included in an element (picture, drawing...) - // => Ignore it. - if (tl && tr && ml && mc && mr && bl && br) - hline(c).update_tag(component::Ignored); - - // FIXME: warning this call may produce inconsistent data - // Ignored components are still in the separator image... - doc.set_hline_separators(doc.hline_seps(), hline); - } + if (hline(c).is_valid()) + { + const mln_box(L)& b_ = hline(c).bbox(); + + const bool tl = billboard(b_.pmin()); + const bool tr = billboard.at_(b_.pmin().row(), b_.pmax().col()); + const bool ml = billboard.at_(b_.pcenter().row(), b_.pmin().col()); + const bool mc = billboard.at_(b_.pcenter().row(), b_.pcenter().col()); + const bool mr = billboard.at_(b_.pcenter().row(), b_.pmax().col()); + const bool bl = billboard.at_(b_.pmax().row(), b_.pmin().col()); + const bool br = billboard(b_.pmax()); + + // This separator is included in an element (picture, drawing...) + // => Ignore it. + if (tl && tr && ml && mc && mr && bl && br) + hline(c).update_tag(component::Ignored); + } + // FIXME: warning this call may produce inconsistent data + // Ignored components are still in the separator image... + doc.set_hline_separators(doc.hline_seps(), hline); } // Vertical separators @@ -117,29 +117,29 @@ namespace scribo { component_set<L> vline = doc.vline_seps_comps().duplicate(); for_all_comps(c, vline) - { - const mln_box(L)& b_ = vline(c).bbox(); - - const bool tl = billboard(b_.pmin()); - const bool tr = billboard.at_(b_.pmin().row(), b_.pmax().col()); - const bool ml = billboard.at_(b_.pcenter().row(), b_.pmin().col()); - const bool mc = billboard.at_(b_.pcenter().row(), b_.pcenter().col()); - const bool mr = billboard.at_(b_.pcenter().row(), b_.pmax().col()); - const bool bl = billboard.at_(b_.pmax().row(), b_.pmin().col()); - const bool br = billboard(b_.pmax()); - - // This separator is included in an element (picture, drawing...) - // => Ignore it. - if (tl && tr && ml && mc && mr && bl && br) - vline(c).update_tag(component::Ignored); - - // FIXME: warning this call may produce inconsistent data - // Ignored components are still in the separator image... - doc.set_vline_separators(doc.vline_seps(), vline); - } - - trace::exiting("scribo::filter::separators_in_element"); + if (vline(c).is_valid()) + { + const mln_box(L)& b_ = vline(c).bbox(); + + const bool tl = billboard(b_.pmin()); + const bool tr = billboard.at_(b_.pmin().row(), b_.pmax().col()); + const bool ml = billboard.at_(b_.pcenter().row(), b_.pmin().col()); + const bool mc = billboard.at_(b_.pcenter().row(), b_.pcenter().col()); + const bool mr = billboard.at_(b_.pcenter().row(), b_.pmax().col()); + const bool bl = billboard.at_(b_.pmax().row(), b_.pmin().col()); + const bool br = billboard(b_.pmax()); + + // This separator is included in an element (picture, drawing...) + // => Ignore it. + if (tl && tr && ml && mc && mr && bl && br) + vline(c).update_tag(component::Ignored); + } + // FIXME: warning this call may produce inconsistent data + // Ignored components are still in the separator image... + doc.set_vline_separators(doc.vline_seps(), vline); } + + trace::exiting("scribo::filter::separators_in_element"); } # endif // ! MLN_INCLUDE_ONLY diff --git a/scribo/scribo/filter/separators_in_paragraph.hh b/scribo/scribo/filter/separators_in_paragraph.hh index 3e7a150..7c157be 100644 --- a/scribo/scribo/filter/separators_in_paragraph.hh +++ b/scribo/scribo/filter/separators_in_paragraph.hh @@ -58,14 +58,14 @@ namespace scribo /// template <typename L> void - separators_in_paragraph(document<L>& doc); + separators_in_paragraph(document<L>& doc, unsigned hmax_size, unsigned vmax_size); # ifndef MLN_INCLUDE_ONLY template <typename L> void - separators_in_paragraph(document<L>& doc) + separators_in_paragraph(document<L>& doc, unsigned hmax_size, unsigned vmax_size) { trace::entering("scribo::filter::separators_in_paragraph"); @@ -90,26 +90,28 @@ namespace scribo { component_set<L> hline = doc.hline_seps_comps().duplicate(); for_all_comps(c, hline) - { - const mln_box(L)& b_ = hline(c).bbox(); - - const bool tl = billboard(b_.pmin()); - const bool tr = billboard.at_(b_.pmin().row(), b_.pmax().col()); - const bool ml = billboard.at_(b_.pcenter().row(), b_.pmin().col()); - const bool mc = billboard.at_(b_.pcenter().row(), b_.pcenter().col()); - const bool mr = billboard.at_(b_.pcenter().row(), b_.pmax().col()); - const bool bl = billboard.at_(b_.pmax().row(), b_.pmin().col()); - const bool br = billboard(b_.pmax()); - - // This separator is included in an element (picture, drawing...) - // => Ignore it. - if (tl && tr && ml && mc && mr && bl && br) - hline(c).update_tag(component::Ignored); - - // FIXME: warning this call may produce inconsistent data - // Ignored components are still in the separator image... - doc.set_hline_separators(doc.hline_seps(), hline); - } + if (hline(c).is_valid()) + { + const mln_box(L)& b_ = hline(c).bbox(); + + const bool tl = billboard(b_.pmin()); + const bool tr = billboard.at_(b_.pmin().row(), b_.pmax().col()); + const bool ml = billboard.at_(b_.pcenter().row(), b_.pmin().col()); + const bool mc = billboard.at_(b_.pcenter().row(), b_.pcenter().col()); + const bool mr = billboard.at_(b_.pcenter().row(), b_.pmax().col()); + const bool bl = billboard.at_(b_.pmax().row(), b_.pmin().col()); + const bool br = billboard(b_.pmax()); + + // This separator is included in an element (picture, drawing...) + // => Ignore it. + if (tl && tr && ml && mc && mr && bl && br + && hline(c).bbox().width() < hmax_size) + hline(c).update_tag(component::Ignored); + } + + // FIXME: warning this call may produce inconsistent data + // Ignored components are still in the separator image... + doc.set_hline_separators(doc.hline_seps(), hline); } // Vertical separators @@ -117,29 +119,31 @@ namespace scribo { component_set<L> vline = doc.vline_seps_comps().duplicate(); for_all_comps(c, vline) - { - const mln_box(L)& b_ = vline(c).bbox(); - - const bool tl = billboard(b_.pmin()); - const bool tr = billboard.at_(b_.pmin().row(), b_.pmax().col()); - const bool ml = billboard.at_(b_.pcenter().row(), b_.pmin().col()); - const bool mc = billboard.at_(b_.pcenter().row(), b_.pcenter().col()); - const bool mr = billboard.at_(b_.pcenter().row(), b_.pmax().col()); - const bool bl = billboard.at_(b_.pmax().row(), b_.pmin().col()); - const bool br = billboard(b_.pmax()); - - // This separator is included in an element (picture, drawing...) - // => Ignore it. - if (tl && tr && ml && mc && mr && bl && br) - vline(c).update_tag(component::Ignored); - - // FIXME: warning this call may produce inconsistent data - // Ignored components are still in the separator image... - doc.set_vline_separators(doc.vline_seps(), vline); - } - - trace::exiting("scribo::filter::separators_in_paragraph"); + if (vline(c).is_valid()) + { + const mln_box(L)& b_ = vline(c).bbox(); + + const bool tl = billboard(b_.pmin()); + const bool tr = billboard.at_(b_.pmin().row(), b_.pmax().col()); + const bool ml = billboard.at_(b_.pcenter().row(), b_.pmin().col()); + const bool mc = billboard.at_(b_.pcenter().row(), b_.pcenter().col()); + const bool mr = billboard.at_(b_.pcenter().row(), b_.pmax().col()); + const bool bl = billboard.at_(b_.pmax().row(), b_.pmin().col()); + const bool br = billboard(b_.pmax()); + + // This separator is included in an element (picture, drawing...) + // => Ignore it. + if (tl && tr && ml && mc && mr && bl && br + && vline(c).bbox().height() < vmax_size) + vline(c).update_tag(component::Ignored); + } + + // FIXME: warning this call may produce inconsistent data + // Ignored components are still in the separator image... + doc.set_vline_separators(doc.vline_seps(), vline); } + + trace::exiting("scribo::filter::separators_in_paragraph"); } # endif // ! MLN_INCLUDE_ONLY diff --git a/scribo/scribo/filter/separators_vert_in_borders.hh b/scribo/scribo/filter/separators_vert_in_borders.hh new file mode 100644 index 0000000..4a9e806 --- /dev/null +++ b/scribo/scribo/filter/separators_vert_in_borders.hh @@ -0,0 +1,143 @@ +// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE) +// +// This file is part of Olena. +// +// Olena is free software: you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free +// Software Foundation, version 2 of the License. +// +// Olena is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Olena. If not, see <http://www.gnu.org/licenses/>. +// +// As a special exception, you may use this file as part of a free +// software project without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to produce +// an executable, this file does not by itself cause the resulting +// executable to be covered by the GNU General Public License. This +// exception does not however invalidate any other reasons why the +// executable file might be covered by the GNU General Public License. + +#ifndef SCRIBO_FILTER_SEPARATORS_VERT_IN_BORDERS_HH +# define SCRIBO_FILTER_SEPARATORS_VERT_IN_BORDERS_HH + +/// \file +/// +/// Invalidate false positive separators. +/// \fixme Share same test canvas as text::merging. + + +# include <mln/core/concept/image.hh> +# include <scribo/core/component_set.hh> +# include <scribo/core/document.hh> +# include <scribo/util/box_is_included.hh> + + +namespace scribo +{ + + namespace filter + { + + using namespace mln; + + + /// Invalidate separators located close to the image borders. + /// + /// \param[in,out] doc A document structure. + /// + /// Warning: it does not remove separators from separator + /// image. It only invalidate separator components in their + /// respective component_set. + /// + /// \verbatim + /// + /// ----------- + /// |_!____!__| + /// | ! ! <--------- Separators located in this area are + /// | ! ! | invalidated. + /// | ! ! | + /// |_!____!__| + /// | ! ! | + /// ----------- + /// + /// \endverbatim + // + template <typename L> + void + separators_vert_in_borders(document<L>& doc); + + +# ifndef MLN_INCLUDE_ONLY + + template <typename L> + void + separators_vert_in_borders(document<L>& doc) + { + trace::entering("scribo::filter::separators_vert_in_borders"); + + mln_precondition(doc.is_valid()); + + const mln::image2d<mln::value::rgb8>& ima = doc.image(); + + float border_size = std::min(43., 0.05 * ima.domain().width()); + + /// pt + /// ptl X------X--- + /// |_!____!__X ptr + /// | ! ! | + /// | ! ! | + /// | ! ! | + /// pbl X_!____!__| + /// | ! ! | + /// --X-------X + /// pb pbr + /// + point2d + ptl = ima.domain().pmin(), + pt(geom::min_row(ima), geom::max_col(ima) - border_size), + ptr(border_size, geom::max_col(ima)), + pbr = ima.domain().pmax(), + pb(geom::max_row(ima), border_size), + pbl(geom::max_row(ima) - border_size, geom::min_col(ima)); + + box2d + bt(ptl, ptr), + br(pt, pbr), + bb(pbl, pbr), + bl(ptl, pb); + + // Vertical separators + if (doc.has_vline_seps()) + { + component_set<L> vline = doc.vline_seps_comps().duplicate(); + for_all_comps(c, vline) + if (vline(c).is_valid()) + if (util::box_is_included(vline(c).bbox(), bt) + || util::box_is_included(vline(c).bbox(), br) + || util::box_is_included(vline(c).bbox(), bb) + || util::box_is_included(vline(c).bbox(), bl)) + { + vline(c).update_tag(component::Ignored); + } + + // FIXME: warning this call may produce inconsistent data + // Ignored components are still in the separator image... + doc.set_vline_separators(doc.vline_seps(), vline); + } + + trace::exiting("scribo::filter::separators_vert_in_borders"); + } + +# endif // ! MLN_INCLUDE_ONLY + + } // end of namespace scribo::filter + +} // end of namespace scribo + +#endif // ! SCRIBO_FILTER_SEPARATORS_VERT_IN_BORDERS_HH diff --git a/scribo/scribo/io/img/internal/debug_img_visitor.hh b/scribo/scribo/io/img/internal/debug_img_visitor.hh index a4715f5..520a743 100644 --- a/scribo/scribo/io/img/internal/debug_img_visitor.hh +++ b/scribo/scribo/io/img/internal/debug_img_visitor.hh @@ -34,7 +34,7 @@ # include <mln/core/image/image2d.hh> # include <mln/value/rgb8.hh> -# include <mln/draw/site_set.hh> +# include <mln/draw/polygon.hh> # include <mln/subsampling/antialiased.hh> # include <mln/morpho/elementary/gradient_external.hh> @@ -45,7 +45,7 @@ # include <scribo/util/component_precise_outline.hh> # include <scribo/io/img/internal/draw_edges.hh> - +# include <scribo/text/paragraphs_closing.hh> namespace scribo @@ -85,9 +85,6 @@ namespace scribo mln::image2d<value::rgb8>& output; unsigned output_ratio; - // FIXME: we would like its type to be L. - mutable image2d<scribo::def::lbl_type> lbl_sub; - private: // Methods box2d compute_bbox(const box2d& b) const; }; @@ -97,7 +94,7 @@ namespace scribo # ifndef MLN_INCLUDE_ONLY - inline + inline box2d debug_img_visitor::compute_bbox(const box2d& b) const { @@ -131,50 +128,11 @@ namespace scribo // Page elements (Pictures, ...) if (doc.has_elements()) { - // Prepare element edges - - // L lbl = duplicate(doc.elements().labeled_image()); - // for_all_comps(c, doc.elements()) - // if (! doc.elements()(c).is_valid()) - // data::fill(((lbl | doc.elements()(c).bbox()).rw() - // | (pw::value(lbl) == pw::cst(c))).rw(), 0); - - // const L& lbl = doc.lines().components().labeled_image(); - // lbl_sub = mln::subsampling::antialiased(lbl, output_ratio); - - // mln::io::pgm::save(data::wrap(value::int_u8(), lbl), "lbl.pgm"); - // mln::io::pgm::save(data::wrap(value::int_u8(), lbl_sub), "lbl_sub.pgm"); - - // // FIXME: UGLY! Too slow! - // scribo::def::lbl_type nlabels; - // component_set<L> elts = primitive::extract::components( - // data::convert(bool(), lbl_sub), - // c8(), - // nlabels); - - // Preserving elements tags - // if (doc.elements().nelements() != elts.nelements()) - // { - // std::cerr << "Warnig: could not preserve element type in " - // << "img debug output." << std::endl; - // std::cerr << "The number of non text element has changed while " - // << "subsampling images : " - // << doc.elements().nelements() << " vs " - // << elts.nelements() << std::endl; - // } - // else - // for_all_comps(c, doc.elements()) - // { - // elts(c).update_type(doc.elements()(c).type()); - // elts(c).update_tag(doc.elements()(c).tag()); - // } - for_all_comps(e, doc.elements()) if (doc.elements()(e).is_valid()) doc.elements()(e).accept(*this); } - // line seraparators if (doc.has_vline_seps()) for_all_comps(c, doc.vline_seps_comps()) @@ -198,23 +156,28 @@ namespace scribo scribo::def::lbl_type id = (scribo::def::lbl_type)info.id().to_equiv(); const L& lbl = info.holder().labeled_image(); p_array<point2d> - par = scribo::util::component_precise_outline( - extend((lbl | info.bbox()) | (pw::value(lbl) == pw::cst(id)), 0)); + par = scribo::util::component_precise_outline(lbl | info.bbox(), id); switch (info.type()) { case component::HorizontalLineSeparator: case component::VerticalLineSeparator: { - mln::draw::site_set(output, par, literal::cyan, output_ratio); + mln::draw::polygon(output, par, literal::cyan, output_ratio); } break; + case component::DropCapital: + { + mln::draw::polygon(output, par, literal::violet, output_ratio); + } + break; + default: case component::Image: { - mln::draw::site_set(output, par, literal::orange, output_ratio); + mln::draw::polygon(output, par, literal::orange, output_ratio); } break; } @@ -228,6 +191,9 @@ namespace scribo { const line_set<L>& lines = parset.lines(); + // Prepare paragraph outlines. + L par_clo = text::paragraphs_closing(parset); + for_all_paragraphs(p, parset) if (parset(p).is_valid()) { @@ -235,10 +201,11 @@ namespace scribo for_all_paragraph_lines(lid, line_ids) { - line_id_t l = line_ids(lid); - lines(l).accept(*this); + line_id_t l = line_ids(lid); + lines(l).accept(*this); } + // Adjust bbox to output image size. box2d b = compute_bbox(parset(p).bbox()); b.enlarge(1); b.crop_wrt(output.domain()); diff --git a/scribo/scribo/io/img/internal/full_img_visitor.hh b/scribo/scribo/io/img/internal/full_img_visitor.hh index f2c0f5c..7b20970 100644 --- a/scribo/scribo/io/img/internal/full_img_visitor.hh +++ b/scribo/scribo/io/img/internal/full_img_visitor.hh @@ -34,7 +34,7 @@ # include <mln/core/image/image2d.hh> # include <mln/value/rgb8.hh> -# include <mln/draw/site_set.hh> +# include <mln/draw/polygon.hh> # include <mln/draw/box.hh> # include <scribo/core/internal/doc_serializer.hh> @@ -137,22 +137,27 @@ namespace scribo scribo::def::lbl_type id = (scribo::def::lbl_type)info.id().to_equiv(); const L& lbl = info.holder().labeled_image(); p_array<point2d> - par = scribo::util::component_precise_outline((lbl | info.bbox()) | (pw::value(lbl) == pw::cst(id))); + par = scribo::util::component_precise_outline(lbl | info.bbox(), id); switch (info.type()) { case component::HorizontalLineSeparator: case component::VerticalLineSeparator: { - mln::draw::site_set(output, par, literal::cyan); + mln::draw::polygon(output, par, literal::cyan); } break; + case component::DropCapital: + { + mln::draw::polygon(output, par, literal::violet); + } + break; default: case component::Image: { - mln::draw::site_set(output, par, literal::orange); + mln::draw::polygon(output, par, literal::orange); } break; } @@ -164,20 +169,18 @@ namespace scribo void full_img_visitor::visit(const paragraph_set<L>& parset) const { - const line_set<L>& lines = parset.lines(); + // const line_set<L>& lines = parset.lines(); + + // Prepare paragraph outlines. + L par_clo = text::paragraphs_closing(parset); for_all_paragraphs(p, parset) if (parset(p).is_valid()) { - const mln::util::array<line_id_t>& line_ids = parset(p).line_ids(); + p_array<point2d> par = scribo::util::component_precise_outline(par_clo + | parset(p).bbox(), p); - for_all_paragraph_lines(lid, line_ids) - { - line_id_t l = line_ids(lid); - lines(l).accept(*this); - } - - mln::draw::box(output, parset(p).bbox(), literal::blue); + mln::draw::polygon(output, par, literal::blue); } } @@ -186,7 +189,15 @@ namespace scribo void full_img_visitor::visit(const line_info<L>& line) const { - mln::draw::box(output, line.bbox(), literal::red); +// mln::draw::box(output, line.bbox(), literal::red); + + point2d + pmin = line.bbox().pmin(), + pmax = line.bbox().pmax(); + pmax.row() = line.baseline(); + pmin.row() = line.baseline(); + + mln::draw::line(output, pmin, pmax, literal::red); } #endif // MLN_INCLUDE_ONLY diff --git a/scribo/scribo/io/xml/internal/page_xml_visitor.hh b/scribo/scribo/io/xml/internal/page_xml_visitor.hh index 1659a85..bbdd3e2 100644 --- a/scribo/scribo/io/xml/internal/page_xml_visitor.hh +++ b/scribo/scribo/io/xml/internal/page_xml_visitor.hh @@ -40,6 +40,7 @@ # include <scribo/io/xml/internal/print_box_coords.hh> # include <scribo/io/xml/internal/print_page_preambule.hh> # include <scribo/io/xml/internal/compute_text_colour.hh> +# include <scribo/text/paragraphs_closing.hh> namespace scribo @@ -160,8 +161,7 @@ namespace scribo scribo::def::lbl_type id = (scribo::def::lbl_type)info.id().to_equiv(); const L& lbl = info.holder().labeled_image(); p_array<point2d> - par = scribo::util::component_precise_outline( - extend((lbl | info.bbox()) | (pw::value(lbl) == pw::cst(id)), 0)); + par = scribo::util::component_precise_outline(lbl | info.bbox(), id); switch (info.type()) { @@ -189,6 +189,17 @@ namespace scribo break; } + case component::DropCapital: + { + output << " <TextRegion id=\"r" << id << "\" " + << " Type=\"Drop_Capital\">" + << std::endl; + + internal::print_image_coords(output, par, " "); + + output << " </TextRegion>" << std::endl; + break; + } default: case component::Image: @@ -216,9 +227,15 @@ namespace scribo { const line_set<L>& lines = parset.lines(); + // Prepare paragraph outlines. + L par_clo = text::paragraphs_closing(parset); + for_all_paragraphs(p, parset) if (parset(p).is_valid()) { + p_array<mln_site(L)> par = scribo::util::component_precise_outline(par_clo + | parset(p).bbox(), p); + const mln::util::array<line_id_t>& line_ids = parset(p).line_ids(); // FIXME: compute that information on the whole paragraph @@ -245,7 +262,7 @@ namespace scribo // <Unicode></Unicode> // </TextEquiv> - internal::print_box_coords(output, parset(p).bbox(), " "); + internal::print_image_coords(output, par, " "); output << " </TextRegion>" << std::endl; } diff --git a/scribo/scribo/io/xml/internal/print_image_coords.hh b/scribo/scribo/io/xml/internal/print_image_coords.hh index ebfe402..41c4e30 100644 --- a/scribo/scribo/io/xml/internal/print_image_coords.hh +++ b/scribo/scribo/io/xml/internal/print_image_coords.hh @@ -69,15 +69,15 @@ namespace scribo const S& b = exact(b_); mln_precondition(b.is_valid()); - ostr << sc << "<coords>" << std::endl; + ostr << sc << "<Coords>" << std::endl; mln_piter(S) p(b); for_all(p) - ostr << sp << "<point x=\"" << p.col() + ostr << sp << "<Point x=\"" << p.col() << "\" y=\"" << p.row() << "\"/>" << std::endl; - ostr << sc << "</coords>" << std::endl; + ostr << sc << "</Coords>" << std::endl; } diff --git a/scribo/scribo/io/xml/internal/print_page_preambule.hh b/scribo/scribo/io/xml/internal/print_page_preambule.hh index bcb6b33..9f00c60 100644 --- a/scribo/scribo/io/xml/internal/print_page_preambule.hh +++ b/scribo/scribo/io/xml/internal/print_page_preambule.hh @@ -30,10 +30,10 @@ /// /// \brief Print PAGE XML format preambule. -# include <ctime> # include <fstream> # include <mln/core/alias/box2d.hh> # include <scribo/core/document.hh> +# include <scribo/io/xml/internal/time_info.hh> namespace scribo { @@ -75,17 +75,10 @@ namespace scribo else output << "<PcGts>" << std::endl; - - time_t cur_time = time(NULL); - tm * time_struct; - time_struct = localtime(&cur_time); - char time_info[55]; - strftime(time_info, 55, "%Y-%m-%dT%H:%M:%S", time_struct); - output << " <Metadata>" << std::endl; output << " <Creator>LRDE</Creator>" << std::endl; - output << " <Created>" << time_info << "</Created>" << std::endl; - output << " <LastChange>" << time_info << "</LastChange>" << std::endl; + output << " <Created>" << time_info() << "</Created>" << std::endl; + output << " <LastChange>" << time_info() << "</LastChange>" << std::endl; output << " <Comments>Generated by Scribo from Olena.</Comments>" << std::endl; output << " </Metadata>" << std::endl; diff --git a/scribo/scribo/io/xml/internal/time_info.hh b/scribo/scribo/io/xml/internal/time_info.hh new file mode 100644 index 0000000..6adc49a --- /dev/null +++ b/scribo/scribo/io/xml/internal/time_info.hh @@ -0,0 +1,75 @@ +// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE) +// +// This file is part of Olena. +// +// Olena is free software: you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free +// Software Foundation, version 2 of the License. +// +// Olena is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Olena. If not, see <http://www.gnu.org/licenses/>. +// +// As a special exception, you may use this file as part of a free +// software project without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to produce +// an executable, this file does not by itself cause the resulting +// executable to be covered by the GNU General Public License. This +// exception does not however invalidate any other reasons why the +// executable file might be covered by the GNU General Public License. + +#ifndef SCRIBO_IO_XML_INTERNAL_TIME_INFO_HH +# define SCRIBO_IO_XML_INTERNAL_TIME_INFO_HH + +/// \file +/// +/// Get formated time info for PAGE XML format. + +# include <ctime> + +namespace scribo +{ + + namespace io + { + + namespace xml + { + + namespace internal + { + + using namespace mln; + +# ifndef MLN_INCLUDE_ONLY + + std::string time_info() + { + time_t cur_time = time(NULL); + tm * time_struct; + time_struct = localtime(&cur_time); + char time_info_[55]; + strftime(time_info_, 55, "%Y-%m-%dT%H:%M:%S", time_struct); + std::string output(time_info_); + + return output; + } + + +# endif // ! MLN_INCLUDE_ONLY + + } // end of namespace scribo::io::xml::internal + + } // end of namespace scribo::io::xml + + } // end of namespace scribo::io + +} // end of namespace scribo + + +#endif // ! SCRIBO_IO_XML_INTERNAL_TIME_INFO_HH diff --git a/scribo/scribo/postprocessing/images_to_drop_capital.hh b/scribo/scribo/postprocessing/images_to_drop_capital.hh new file mode 100644 index 0000000..ca76609 --- /dev/null +++ b/scribo/scribo/postprocessing/images_to_drop_capital.hh @@ -0,0 +1,141 @@ +// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE) +// +// This file is part of Olena. +// +// Olena is free software: you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free +// Software Foundation, version 2 of the License. +// +// Olena is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Olena. If not, see <http://www.gnu.org/licenses/>. +// +// As a special exception, you may use this file as part of a free +// software project without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to produce +// an executable, this file does not by itself cause the resulting +// executable to be covered by the GNU General Public License. This +// exception does not however invalidate any other reasons why the +// executable file might be covered by the GNU General Public License. + +#ifndef SCRIBO_POSTPROCESSING_IMAGES_TO_DROP_CAPITAL_HH +# define SCRIBO_POSTPROCESSING_IMAGES_TO_DROP_CAPITAL_HH + +/// \file +/// +/// Set type for specific images to Drop Capital component. +/// \fixme Share same test canvas as text::merging. + + +# include <mln/core/concept/image.hh> +# include <scribo/core/component_set.hh> +# include <scribo/core/document.hh> + + +namespace scribo +{ + + namespace postprocessing + { + + using namespace mln; + + + /// Set type for specific images to Drop Capital component. + /// + /// \param[in] separators A paragraph set. + /// + /// \return A doc with images tagged as dropped capital is such + /// images have been found. + // + template <typename L> + void + images_to_drop_capital(document<L>& doc); + + +# ifndef MLN_INCLUDE_ONLY + + template <typename L> + void + images_to_drop_capital(document<L>& doc) + { + trace::entering("scribo::postprocessing::images_to_drop_capital"); + + mln_precondition(doc.is_valid()); + + if (! doc.has_elements()) + return; + + mln_ch_value(L,bool) billboard; + initialize(billboard, doc.image()); + data::fill(billboard, false); + + for_all_comps(p, doc.paragraphs()) + if (doc.paragraphs()(p).is_valid()) + mln::draw::box_plain(billboard, doc.paragraphs()(p).bbox(), true); + + float min_img_size = 0.2 * (doc.image().domain().width() + + doc.image().domain().height()); + + component_set<L> elts = doc.elements(); + for_all_comps(c, elts) + if (elts(c).is_valid() && elts(c).type() == component::Image) + { + const mln_box(L)& b_ = elts(c).bbox(); + + const bool tl = billboard(b_.pmin()); + const bool tr = billboard.at_(b_.pmin().row(), b_.pmax().col()); + const bool mb = billboard.at_(b_.pmax().row(), b_.pcenter().col()); + const bool mc = billboard.at_(b_.pcenter().row(), b_.pcenter().col()); + const bool mr = billboard.at_(b_.pcenter().row(), b_.pmax().col()); + const bool bl = billboard.at_(b_.pmax().row(), b_.pmin().col()); + const bool br = billboard(b_.pmax()); + + typedef mln::util::set<int> set_t; + set_t s; + s.insert(tl); + s.insert(tr); + s.insert(mb); + s.insert(mc); + s.insert(mr); + s.insert(bl); + s.insert(br); + + if (s.nelements() > 2 || (s.nelements() == 2 && !s.has(0))) + continue; + + float elt_size = elts(c).bbox().width() + elts(c).bbox().height(); + for_all_elements(e, s) + if (s[e] != 0 + && (mc != 0 && mc == s[e] + && ((tl == mc && bl == mc) + || (tr == mc && br == mc) + || (bl == mc && br == mc) + || (tl == mc && tr == mc) + || (br == mc && mr == mc && mb == mc))) + && (elt_size < min_img_size)) + { + elts(c).update_type(component::DropCapital); + break; + } + } + + // FIXME: warning this call may produce inconsistent data + // Ignored components are still in the separator image... + doc.set_elements(elts); + + trace::exiting("scribo::postprocessing::images_to_drop_capital"); + } + +# endif // ! MLN_INCLUDE_ONLY + + } // end of namespace scribo::postprocessing + +} // end of namespace scribo + +#endif // ! SCRIBO_POSTPROCESSING_IMAGES_TO_DROP_CAPITAL_HH diff --git a/scribo/scribo/text/paragraphs_closing.hh b/scribo/scribo/text/paragraphs_closing.hh index ec1d5c8..efc5259 100644 --- a/scribo/scribo/text/paragraphs_closing.hh +++ b/scribo/scribo/text/paragraphs_closing.hh @@ -31,6 +31,9 @@ /// /// Paragraphs closing using CRLA. +# include <mln/draw/line.hh> +# include <scribo/draw/line_components.hh> + namespace scribo { @@ -39,162 +42,217 @@ namespace scribo using namespace mln; - template< typename L, typename V > - void - paragraphs_closing(image2d<V>& output, - const paragraph_set<L>& par_set, - const box2d& domain); + template< typename L> + mln_concrete(L) + paragraphs_closing(const paragraph_set<L>& parset); + # ifndef MLN_INCLUDE_ONLY - template< typename V > - inline - void horizontal_CRLA(const image2d<V>& input, - image2d<V>& output, - const mln::util::array<int>& deltas) + namespace internal { - mln_piter(image2d<V>) p(input.domain()); - int count = 0; - unsigned nrows = input.nrows(); - unsigned ncols = input.ncols(); - V last_pixel_value = 0; - for (unsigned i = 0; i < nrows; ++i) + template<typename L> + inline + void horizontal_CRLA(const Image<L>& input_, + Image<L>& output_, + const mln::util::array<int>& deltas) { - for (unsigned j = 0; j < ncols; ++j) - { - const V& current_pixel = input.at_(i, j); + const L& input = exact(input_); + L& output = exact(output_); + mln_precondition(input.is_valid()); + mln_precondition(output.is_valid()); + + mln_piter(L) p(input.domain()); + int count = 0; + unsigned nrows = input.nrows(); + unsigned ncols = input.ncols(); + mln_value(L) last_pixel_value = 0; - if (!current_pixel) + for (unsigned i = 0; i < nrows; ++i) + { + for (unsigned j = 0; j < ncols; ++j) { - if (last_pixel_value) + const mln_value(L)& current_pixel = input.at_(i, j); + + if (!current_pixel) { - unsigned k = j + 1; - for (; !input.at_(i, k) && (k < ncols); ++k); + if (last_pixel_value) + { + unsigned k = j + 1; + for (; !(input.at_(i, k)) && (k < ncols); ++k); - count = k - j; - const int threshold = deltas(last_pixel_value - 1); + count = k - j; + const int threshold = deltas(last_pixel_value); - if (last_pixel_value == input.at_(i, k) && count < threshold) - for (unsigned l = j; l <= k; ++l) - output.at_(i, l) = last_pixel_value; + if (last_pixel_value == input.at_(i, k) && count < threshold) + for (unsigned l = j; l <= k; ++l) + output.at_(i, l) = last_pixel_value; - j = k; - last_pixel_value = 0; + j = k; + last_pixel_value = 0; + } + } + else + { + output.at_(i, j) = current_pixel; + last_pixel_value = current_pixel; } - } - else - { - output.at_(i, j) = current_pixel; - last_pixel_value = current_pixel; } } } - } - template< typename V > - inline - void vertical_CRLA(const image2d<V>& input, - image2d<V>& output, - const mln::util::array<int>& deltas) - { - mln_piter(image2d<V>) p(input.domain()); - int count = 0; - unsigned nrows = input.nrows(); - unsigned ncols = input.ncols(); - V last_pixel_value = 0; - - for (unsigned j = 0; j < ncols; ++j) + template<typename L> + inline + void vertical_CRLA(const Image<L>& input_, + Image<L>& output_, + const mln::util::array<int>& deltas) { - for (unsigned i = 0; i < nrows; ++i) - { - const V& current_pixel = input.at_(i, j); - - if (!current_pixel) - { - if (last_pixel_value) - { - unsigned k = i + 1; - for (; !input.at_(k, j) && (k < nrows); ++k); + const L& input = exact(input_); + L& output = exact(output_); + mln_precondition(input.is_valid()); + mln_precondition(output.is_valid()); - count = k - i; - const int threshold = deltas(last_pixel_value - 1); + mln_piter(L) p(input.domain()); + int count = 0; + unsigned nrows = input.nrows(); + unsigned ncols = input.ncols(); + mln_value(L) last_pixel_value = 0; - if (last_pixel_value == input.at_(k, j) - && count < threshold) - for (unsigned l = i; l <= k; ++l) - output.at_(l, j) = last_pixel_value; + for (unsigned j = 0; j < ncols; ++j) + { + for (unsigned i = 0; i < nrows; ++i) + { + const mln_value(L)& current_pixel = input.at_(i, j); - i = k; - last_pixel_value = 0; + if (!current_pixel) + { + if (last_pixel_value) + { + unsigned k = i + 1; + for (; !(input.at_(k, j)) && (k < nrows); ++k); + + count = k - i; + const int threshold = deltas(last_pixel_value); + + if (last_pixel_value == input.at_(k, j) + && count < threshold) + for (unsigned l = i; l <= k; ++l) + output.at_(l, j) = last_pixel_value; + + i = k; + last_pixel_value = 0; + } + } + else + { + output.at_(i, j) = current_pixel; + last_pixel_value = current_pixel; } - } - else - { - output.at_(i, j) = current_pixel; - last_pixel_value = current_pixel; } } } - } - template< typename V > - inline - void CRLA(const image2d<V>& input, - image2d<V>& output, - const mln::util::array<int>& deltas, - const mln::util::array<int>& deltas_factor) - { - horizontal_CRLA(input, output, deltas_factor); - vertical_CRLA(output, output, deltas); - horizontal_CRLA(output, output, deltas_factor); - } + template<typename L> + inline + void CRLA(const Image<L>& input, + Image<L>& output, + const mln::util::array<int>& deltas, + const mln::util::array<int>& deltas_factor) + { + horizontal_CRLA(input, output, deltas_factor); + + debug::logger().log_image(debug::AuxiliaryResults, + output, + "paragraph_closing_horizontal_CRLA"); + + + vertical_CRLA(output, output, deltas); + + debug::logger().log_image(debug::AuxiliaryResults, + output, + "paragraph_closing_vertical_CRLA"); + + horizontal_CRLA(output, output, deltas_factor); + } + + } // end of namespace scribo::text::internal + - template< typename L, typename V > - void - paragraphs_closing(image2d<V>& output, - const paragraph_set<L>& par_set, - const box2d& domain) + template<typename L> + mln_concrete(L) + paragraphs_closing(const paragraph_set<L>& parset) { trace::entering("scribo::text::paragraphs_closing"); - image2d<V> debug(domain); + // FIXME: 'debug' may be useless. + mln_concrete(L) output, debug; + initialize(output, parset.lines().components().labeled_image()); + initialize(debug, output); - mln::util::array<int> deltas; - deltas.reserve(par_set.nelements()); - mln::util::array<int> deltas_factor; - deltas_factor.reserve(par_set.nelements()); + mln::util::array<int> deltas(parset.nelements() + 1, 0); + mln::util::array<int> deltas_factor(parset.nelements() + 1, 0); data::fill(debug, 0); data::fill(output, 0); - const line_set<L>& lines = par_set.lines(); + const line_set<L>& lines = parset.lines(); - for_all_paragraphs(p, par_set) - { - const paragraph_info<L>& current_par = par_set(p); - const mln::util::array<line_id_t>& line_ids = current_par.line_ids(); - const unsigned nelements = line_ids.nelements(); - - for (unsigned i = 0; i < nelements; ++i) + for_all_paragraphs(p, parset) + if (parset(p).is_valid()) { - const line_id_t& line_id = line_ids(i); - const line_info<L>& current_line = lines(line_id); + const paragraph_info<L>& current_par = parset(p); + const mln::util::array<line_id_t>& line_ids = current_par.line_ids(); - draw::line_components(debug, current_line, p); - } + line_id_t last_id = line_ids[0]; + for_all_elements(i, line_ids) + { + const line_id_t& line_id = line_ids(i); + const line_info<L>& current_line = lines(line_id); + + scribo::draw::line_components(debug, current_line, p); + + // HACK DISCLAIMER : this line is drawn in order to be + // sure that every line will be reduced to a single + // component after closing. It is necessary to reduce a + // paragraph to one component in order to extract its + // outline correctly for xml/debug output. + component_id_t last_comp = lines(line_id).component_ids()(0); + for_all_elements(i, lines(line_id).component_ids()) + { + const unsigned c = lines(line_id).component_ids()(i); + mln::draw::line(debug, + lines.components()(c).mass_center(), + lines.components()(last_comp).mass_center(), + p); + last_comp = c; + } - int delta_baseline = current_par.delta_baseline(); + // mln::draw::line(debug, current_line.bbox().pcenter(), lines(last_id).bbox().pcenter(), p); + // last_id = line_id; + } - if (delta_baseline % 2 == 0) + int delta_baseline = current_par.delta_baseline(); + + if (delta_baseline % 2 == 0) --delta_baseline; - deltas.append(delta_baseline); - deltas_factor.append(3 * delta_baseline); - } - CRLA(debug, output, deltas, deltas_factor); + deltas(p) = 2 * delta_baseline; // Vertical + deltas_factor(p) = 3 * delta_baseline; // Horizontal + } + + debug::logger().log_image(debug::AuxiliaryResults, + debug, + "paragraph_closing_input_CRLA"); + + internal::CRLA(debug, output, deltas, deltas_factor); + + debug::logger().log_image(debug::Results, + output, + "paragraph_closing"); - trace::exiting("scribo::draw::line_components"); + trace::exiting("scribo::text::paragraphs_closing"); + return output; } # endif diff --git a/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh b/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh index e0c5b50..24d24a3 100644 --- a/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh +++ b/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh @@ -53,8 +53,10 @@ # include <scribo/filter/objects_small.hh> # include <scribo/filter/paragraphs_bbox_overlap.hh> # include <scribo/filter/paragraphs_in_image.hh> +# include <scribo/filter/paragraphs_in_borders.hh> # include <scribo/filter/separators_in_element.hh> # include <scribo/filter/separators_in_paragraph.hh> +# include <scribo/filter/separators_in_borders.hh> # include <scribo/filter/images_in_paragraph.hh> # include <scribo/primitive/group/from_single_link.hh> @@ -66,6 +68,8 @@ # include <scribo/preprocessing/denoise_fg.hh> +# include <scribo/postprocessing/images_to_drop_capital.hh> + # include <scribo/text/recognition.hh> # include <scribo/text/merging.hh> # include <scribo/text/link_lines.hh> @@ -84,6 +88,7 @@ # include <scribo/io/xml/save.hh> +#include <scribo/io/img/save.hh> namespace scribo { @@ -201,12 +206,22 @@ namespace scribo // Vertical and horizontal separators { + unsigned closing_size = std::min(0.01 * doc.image().domain().width(), + 0.01 * doc.image().domain().height()); + win::hline2d hl(closing_size); + + // Apply a closing::structural in order to disconnected + // parts of a single separator. mln_ch_value(I,bool) vseparators = preprocessing::rotate_90( - primitive::extract::lines_h_thick_and_thin( - preprocessing::rotate_90(processed_image), 101, 3, 0.2, 0.6, 10), false), - hseparators = primitive::extract::lines_h_thick_and_thin( - processed_image, 101, 3); + morpho::closing::structural( + primitive::extract::lines_h_thick_and_thin( + preprocessing::rotate_90(processed_image), + 101, 3, 0.2, 0.6, 10), hl), false), + + hseparators = morpho::closing::structural( + primitive::extract::lines_h_thick_and_thin( + processed_image, 101, 3), hl); doc.set_vline_separators(vseparators); doc.set_hline_separators(hseparators); @@ -509,9 +524,11 @@ namespace scribo on_new_progress_label("Filtering paragraphs"); - parset = filter::paragraphs_bbox_overlap(parset); + paragraph_set<L> parset_f = filter::paragraphs_bbox_overlap(parset); + doc.set_paragraphs(parset_f); - doc.set_paragraphs(parset); + // parset = filter::paragraphs_bbox_overlap(parset); + // doc.set_paragraphs(parset); on_progress(); @@ -540,16 +557,38 @@ namespace scribo on_progress(); +// TEMPORARY DEBUG + on_new_progress_label("Saving debug data"); + doc.set_paragraphs(parset); + scribo::io::img::save(doc, "debug_wo_filter.png", scribo::io::img::DebugWoImage); + scribo::io::img::save(doc, "full_wo_filter.png", scribo::io::img::DebugWithImage); + doc.set_paragraphs(parset_f); + on_progress(); +// END OF TEMPORARY DEBUG + on_new_progress_label("Cleanup miscellaneous false positive"); filter::separators_in_element(doc); - filter::separators_in_paragraph(doc); + filter::separators_in_paragraph(doc, 81, 121); + filter::separators_in_borders(doc, 0.05, 0.02); + filter::paragraphs_in_image(doc); - filter::images_in_paragraph(doc); + filter::paragraphs_in_borders(doc); on_progress(); + on_new_progress_label("Rebuild extracted images"); + elements = scribo::primitive::extract::non_text_hdoc(doc, closing_size); + doc.set_elements(elements); + + on_progress(); + + on_new_progress_label("Tag images as drop capital"); + + postprocessing::images_to_drop_capital(doc); + + on_progress(); // Saving results if (save_doc_as_xml) @@ -564,6 +603,9 @@ namespace scribo on_end(); + + sleep(10); + return doc; } diff --git a/scribo/scribo/util/box_is_included.hh b/scribo/scribo/util/box_is_included.hh new file mode 100644 index 0000000..dc3f791 --- /dev/null +++ b/scribo/scribo/util/box_is_included.hh @@ -0,0 +1,74 @@ +// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE) +// +// This file is part of Olena. +// +// Olena is free software: you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free +// Software Foundation, version 2 of the License. +// +// Olena is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Olena. If not, see <http://www.gnu.org/licenses/>. +// +// As a special exception, you may use this file as part of a free +// software project without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to produce +// an executable, this file does not by itself cause the resulting +// executable to be covered by the GNU General Public License. This +// exception does not however invalidate any other reasons why the +// executable file might be covered by the GNU General Public License. + +#ifndef SCRIBO_UTIL_BOX_IS_INCLUDED_HH +# define SCRIBO_UTIL_BOX_IS_INCLUDED_HH + +/// \file +/// +/// Check whether a box is included in another one. + + +#include <mln/core/site_set/box.hh> + +namespace scribo +{ + + namespace util + { + using namespace mln; + + /// \brief Check whether a box is included in another one. + /// + /// \return true if \p lhs is included in \p rhs. + // + template <typename P> + bool + box_is_included(const box<P>& lhs, const box<P>& rhs); + + +# ifndef MLN_INCLUDE_ONLY + + template <typename P> + bool + box_is_included(const box<P>& lhs, const box<P>& rhs) + { + trace::entering("scribo::util::box_is_included"); + + for (unsigned i = 0; i < P::dim; ++i) + if (!(lhs.pmin()[i] >= rhs.pmin()[i] && lhs.pmax()[i] <= rhs.pmax()[i])) + return false; + + trace::exiting("scribo::util::box_is_included"); + return true; + } + +# endif // ! MLN_INCLUDE_ONLY + + } // end of namespace scribo::util + +} // end of namespace scribo + +#endif // ! SCRIBO_UTIL_BOX_IS_INCLUDED_HH diff --git a/scribo/scribo/util/component_precise_outline.hh b/scribo/scribo/util/component_precise_outline.hh index 490b814..70fc995 100644 --- a/scribo/scribo/util/component_precise_outline.hh +++ b/scribo/scribo/util/component_precise_outline.hh @@ -40,9 +40,15 @@ # include <mln/io/ppm/save.hh> # include <mln/data/convert.hh> # include <mln/opt/at.hh> +# include <mln/extension/fill.hh> # include <iostream> +#include <mln/io/pgm/save.hh> +#include <mln/data/wrap.hh> +#include <mln/data/convert.hh> + + namespace scribo { @@ -79,7 +85,8 @@ namespace scribo template <typename I> void find_first_point(const I& input, - point2d& p) + point2d& p, + const mln_value(I)& id) { const mln::def::coord mid_row = geom::min_row(input) + (geom::nrows(input) >> 1); @@ -87,7 +94,7 @@ namespace scribo for (mln::def::coord i = geom::min_col(input); i <= geom::max_col(input); ++i) { - if (opt::at(input, mid_row, i)) + if (opt::at(input, mid_row, i) == id) { p.row() = mid_row; p.col() = i; @@ -100,14 +107,15 @@ namespace scribo void left_up(int& direction, const I& input, - const point2d& cur_pt) + const point2d& cur_pt, + const mln_value(I)& id) { const point2d p2(cur_pt.row() + offset[direction][5][1], cur_pt.col() + offset[direction][5][0]); const point2d p3(cur_pt.row() + offset[direction][7][1], cur_pt.col() + offset[direction][7][0]); - if (!input(p2) && input(p3)) + if ((input(p2) != id) && (input(p3) == id)) { direction = 3; return; @@ -130,7 +138,8 @@ namespace scribo void right_up(int& direction, const I& input, - const point2d& cur_pt) + const point2d& cur_pt, + const mln_value(I)& id) { const point2d p1(cur_pt.row() + offset[direction][0][1], cur_pt.col() + offset[direction][0][0]); @@ -139,7 +148,7 @@ namespace scribo const point2d p3(cur_pt.row() + offset[direction][7][1], cur_pt.col() + offset[direction][7][0]); - if (!input(p2) && (input(p1) || input(p3))) + if ((input(p2) != id) && ((input(p1) == id) || (input(p3) == id))) { direction = 0; return; @@ -162,14 +171,15 @@ namespace scribo void right_down(int& direction, const I& input, - const point2d& cur_pt) + const point2d& cur_pt, + const mln_value(I)& id) { const point2d p2(cur_pt.row() + offset[direction][5][1], cur_pt.col() + offset[direction][5][0]); const point2d p3(cur_pt.row() + offset[direction][7][1], cur_pt.col() + offset[direction][7][0]); - if (!input(p2) && input(p3)) + if ((input(p2) != id) && (input(p3) == id)) { direction = 1; return; @@ -192,7 +202,8 @@ namespace scribo void left_down(int& direction, const I& input, - const point2d& cur_pt) + const point2d& cur_pt, + const mln_value(I)& id) { const point2d p1(cur_pt.row() + offset[direction][0][1], cur_pt.col() + offset[direction][0][0]); @@ -201,7 +212,7 @@ namespace scribo const point2d p3(cur_pt.row() + offset[direction][7][1], cur_pt.col() + offset[direction][7][0]); - if (!input(p2) && (input(p1) || input(p3))) + if ((input(p2) != id) && ((input(p1) == id) || (input(p3) == id))) { direction = 2; return; @@ -225,17 +236,18 @@ namespace scribo void find_next_point(const I& input, point2d& cur_pt, - int& direction) + int& direction, + const mln_value(I)& id) { unsigned i = 0; point2d tmp; switch (direction) { - case 0: left_up(direction, input, cur_pt); break; - case 1: right_up(direction , input, cur_pt); break; - case 2: right_down(direction, input, cur_pt); break; - case 3: left_down(direction, input, cur_pt); break; + case 0: left_up(direction, input, cur_pt, id); break; + case 1: right_up(direction , input, cur_pt, id); break; + case 2: right_down(direction, input, cur_pt, id); break; + case 3: left_down(direction, input, cur_pt, id); break; } for (; i < 8; ++i) @@ -243,7 +255,7 @@ namespace scribo tmp = point2d(cur_pt.row() + offset[direction][i][1], cur_pt.col() + offset[direction][i][0]); - if (input.domain().has(tmp) && input(tmp)) + if (input(tmp) == id) break; } @@ -263,7 +275,7 @@ namespace scribo } void - filter_points(mln::p_array<point2d>& points, + filter_points(const mln::p_array<point2d>& points, mln::p_array<point2d>& waypoints) { const unsigned nelements = points.nsites(); @@ -330,33 +342,35 @@ namespace scribo template <typename I> mln::p_array<point2d> - component_precise_outline(const Image<I>& input_) + component_precise_outline(const Image<I>& input_, const mln_value(I)& id) { trace::entering("scribo::util::component_precise_outline"); const I& input = exact(input_); typedef mln_site(I) P; - point2d start_pt; - int direction = 0; + extension::fill(input, 0); + mln::p_array<P> points; points.reserve(std::max(geom::ncols(input), geom::nrows(input))); - internal::find_first_point(input, start_pt); + point2d start_pt; + int direction = 0; + + internal::find_first_point(input, start_pt, id); P cur_pt = start_pt; - internal::find_next_point(input, cur_pt, direction); + internal::find_next_point(input, cur_pt, direction, id); points.append(cur_pt); while (cur_pt != start_pt) { - internal::find_next_point(input, cur_pt, direction); + internal::find_next_point(input, cur_pt, direction, id); points.append(cur_pt); } - - internal::find_next_point(input, cur_pt, direction); + internal::find_next_point(input, cur_pt, direction, id); const std::vector<point2d>& vec_points = points.hook_std_vector_(); @@ -367,16 +381,27 @@ namespace scribo while (cur_pt != start_pt) { - internal::find_next_point(input, cur_pt, direction); + internal::find_next_point(input, cur_pt, direction, id); points.append(cur_pt); } } - // mln::p_array<P> waypoints; - // internal::filter_points(points, waypoints); + std::cout << "Before filter points - " << points.nsites() << std::endl; + + mln::p_array<P> waypoints; + internal::filter_points(points, waypoints); + + std::cout << "After filter points - " << waypoints.nsites() << std::endl; trace::exiting("scribo::util::component_precise_outline"); - return points; + return waypoints; + } + + template <typename I> + mln::p_array<point2d> + component_precise_outline(const Image<I>& input) + { + return component_precise_outline(input, true); } # endif // ! MLN_INCLUDE_ONLY -- 1.5.6.5
participants (1)
-
Guillaume Lazzara