---
milena/mln/convert/from_to.hxx | 7 +-
milena/mln/draw/polygon.hh | 105 +++++++
milena/mln/util/object_id.hh | 41 +++-
scribo/scribo/core/paragraph_info.hh | 52 +++-
scribo/scribo/core/tag/component.hh | 7 +-
scribo/scribo/core/tag/paragraph.hh | 14 +
scribo/scribo/filter/images_in_paragraph.hh | 8 +-
scribo/scribo/filter/paragraphs_bbox_overlap.hh | 145 +++++++----
scribo/scribo/filter/paragraphs_in_borders.hh | 140 ++++++++++
scribo/scribo/filter/paragraphs_in_image.hh | 29 ++-
scribo/scribo/filter/separators_in_borders.hh | 206 ++++++++++++++
scribo/scribo/filter/separators_in_element.hh | 84 +++---
scribo/scribo/filter/separators_in_paragraph.hh | 92 ++++---
scribo/scribo/filter/separators_vert_in_borders.hh | 143 ++++++++++
scribo/scribo/io/img/internal/debug_img_visitor.hh | 69 ++----
scribo/scribo/io/img/internal/full_img_visitor.hh | 39 ++-
scribo/scribo/io/xml/internal/page_xml_visitor.hh | 23 ++-
.../scribo/io/xml/internal/print_image_coords.hh | 6 +-
.../scribo/io/xml/internal/print_page_preambule.hh | 13 +-
scribo/scribo/io/xml/internal/time_info.hh | 75 +++++
.../postprocessing/images_to_drop_capital.hh | 141 ++++++++++
scribo/scribo/text/paragraphs_closing.hh | 284 ++++++++++++--------
.../toolchain/internal/content_in_hdoc_functor.hh | 58 ++++-
scribo/scribo/util/box_is_included.hh | 74 +++++
scribo/scribo/util/component_precise_outline.hh | 83 ++++--
25 files changed, 1549 insertions(+), 389 deletions(-)
create mode 100644 milena/mln/draw/polygon.hh
create mode 100644 scribo/scribo/filter/paragraphs_in_borders.hh
create mode 100644 scribo/scribo/filter/separators_in_borders.hh
create mode 100644 scribo/scribo/filter/separators_vert_in_borders.hh
create mode 100644 scribo/scribo/io/xml/internal/time_info.hh
create mode 100644 scribo/scribo/postprocessing/images_to_drop_capital.hh
create mode 100644 scribo/scribo/util/box_is_included.hh
diff --git a/milena/mln/convert/from_to.hxx b/milena/mln/convert/from_to.hxx
index cc7cc15..7891e9a 100644
--- a/milena/mln/convert/from_to.hxx
+++ b/milena/mln/convert/from_to.hxx
@@ -1,4 +1,4 @@
-// Copyright (C) 2008, 2009, 2010 EPITA Research and Development
+// Copyright (C) 2008, 2009, 2010, 2011 EPITA Research and Development
// Laboratory (LRDE)
//
// This file is part of Olena.
@@ -84,6 +84,7 @@ namespace mln
namespace util {
template <typename T> class array;
+ template <typename Tag, typename V> class object_id;
}
namespace value {
@@ -473,6 +474,10 @@ namespace mln
from_to(from.second(), to.second());
}
+ // util::object_id<Tag,V> -> V.
+ template <typename Tag, typename V>
+ void from_to_(const util::object_id<Tag,V>& from, V& to_);
+
} // end of namespace mln::convert::over_load
} // end of namespace mln::convert
diff --git a/milena/mln/draw/polygon.hh b/milena/mln/draw/polygon.hh
new file mode 100644
index 0000000..5c6c917
--- /dev/null
+++ b/milena/mln/draw/polygon.hh
@@ -0,0 +1,105 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inpolygon functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef MLN_DRAW_POLYGON_HH
+# define MLN_DRAW_POLYGON_HH
+
+/// \file
+///
+/// Draw a polygon in an image.
+/// \fixme Add specializations for horizontal polygons (use pointers/memset).
+
+# include <mln/core/concept/image.hh>
+# include <mln/core/site_set/p_array.hh>
+# include <mln/draw/line.hh>
+
+
+namespace mln
+{
+
+ namespace draw
+ {
+
+ /*! Draw a polygon at level \p v in image \p ima.
+ *
+ * \param[in,out] ima The image to be drawn.
+ * \param[in] par The polygon site set.
+ * \param[in] v The value to assign to all drawn pixels.
+ *
+ * \pre \p ima has to be initialized.
+ *
+ */
+ template <typename I>
+ void polygon(Image<I>& ima,
+ const p_array<mln_site(I)>& par,
+ const mln_value(I)& v,
+ unsigned output_ratio);
+
+ // \overload
+ template <typename I>
+ void polygon(Image<I>& ima,
+ const p_array<mln_site(I)>& par,
+ const mln_value(I)& v);
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+ template <typename I>
+ void polygon(Image<I>& ima_,
+ const p_array<mln_site(I)>& par,
+ const mln_value(I)& v,
+ unsigned output_ratio)
+ {
+ I& ima = exact(ima_);
+ mln_precondition(ima.is_valid());
+ mln_precondition(par.nelements() > 1);
+
+ mln_site(I) p_last, tmp;
+ mln_piter(p_array<mln_site(I)>) p(par);
+ p_last = par[0] / output_ratio;
+ for_all(p)
+ {
+ tmp = p / output_ratio;
+ draw::line(ima, p_last, tmp, v);
+ p_last = tmp;
+ }
+ }
+
+ template <typename I>
+ void polygon(Image<I>& ima,
+ const p_array<mln_site(I)>& par,
+ const mln_value(I)& v)
+ {
+ polygon(ima, par, v, 1);
+ }
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace mln::draw
+
+} // end of namespace mln
+
+
+#endif // ! MLN_DRAW_POLYGON_HH
diff --git a/milena/mln/util/object_id.hh b/milena/mln/util/object_id.hh
index d7db929..794abc4 100644
--- a/milena/mln/util/object_id.hh
+++ b/milena/mln/util/object_id.hh
@@ -38,6 +38,24 @@
namespace mln
{
+ // Forward declaration
+ namespace util { template <typename Tag, typename V> class object_id; }
+
+ namespace convert
+ {
+
+ namespace over_load
+ {
+
+ // object_id<Tag,V> -> V.
+ template <typename Tag, typename V>
+ void from_to_(const util::object_id<Tag,V>& from, V& to_);
+
+ } // end of namespace mln::convert::over_load
+
+ } // end of namespace mln::convert
+
+
namespace util
{
@@ -90,9 +108,12 @@ namespace mln
bool
operator<(const object_id<Tag,V>& lhs, const object_id<Tag,V>&
rhs);
+ } // end of namespace mln::util
# ifndef MLN_INCLUDE_ONLY
+ namespace util
+ {
template <typename Tag, typename V>
inline
@@ -205,10 +226,26 @@ namespace mln
return lhs.value() < rhs.value();
}
+ } // end of namespace mln::util
-# endif // ! MLN_INCLUDE_ONLY
+ namespace convert
+ {
- } // end of namespace mln::util
+ namespace over_load
+ {
+
+ // object_id<Tag,V> -> V.
+ template <typename Tag, typename V>
+ void from_to_(const util::object_id<Tag,V>& from, V& to_)
+ {
+ to_ = from.value();
+ }
+
+ } // end of namespace mln::convert::over_load
+
+ } // end of namespace mln::convert
+
+# endif // ! MLN_INCLUDE_ONLY
} // end of namespace mln
diff --git a/scribo/scribo/core/paragraph_info.hh b/scribo/scribo/core/paragraph_info.hh
index 90db7da..1029913 100644
--- a/scribo/scribo/core/paragraph_info.hh
+++ b/scribo/scribo/core/paragraph_info.hh
@@ -28,6 +28,7 @@
# include <scribo/core/line_info.hh>
# include <scribo/core/line_links.hh>
+# include <scribo/core/tag/paragraph.hh>
# include <mln/util/array.hh>
# include <mln/accu/shape/bbox.hh>
@@ -79,6 +80,11 @@ namespace scribo
void set_delta_baseline(const int delta_baseline);
int delta_baseline() const;
+ void fast_merge(paragraph_info<L>& info);
+
+ void update_tag(paragraph::Tag tag);
+ paragraph::Tag tag() const;
+
private:
mln::util::array<line_id_t> line_ids_;
mln::accu::shape::bbox<mln_site(L)> bbox_;
@@ -88,7 +94,7 @@ namespace scribo
float color_reliability_;
int delta_baseline_;
- bool needs_stats_update_;
+ paragraph::Tag tag_;
bool is_valid_;
};
@@ -103,13 +109,13 @@ namespace scribo
template <typename L>
paragraph_info<L>::paragraph_info()
- : needs_stats_update_(false), is_valid_(false)
+ : tag_(paragraph::None), is_valid_(false)
{
}
template <typename L>
paragraph_info<L>::paragraph_info(const line_links<L>& llinks)
- : llinks_(llinks), needs_stats_update_(false), is_valid_(true)
+ : llinks_(llinks), tag_(paragraph::None), is_valid_(true)
{
}
@@ -121,7 +127,7 @@ namespace scribo
bbox_.take(line.bbox());
// More data may need to be updated!
- needs_stats_update_ = true;
+ tag_ = paragraph::Needs_Precise_Stats_Update;
}
template <typename L>
@@ -206,14 +212,14 @@ namespace scribo
bool
paragraph_info<L>::needs_stats_update() const
{
- return needs_stats_update_;
+ return tag_ == paragraph::Needs_Precise_Stats_Update;
}
template <typename L>
void
paragraph_info<L>::force_stats_update()
{
- if (!needs_stats_update_)
+ if (!needs_stats_update())
return;
const line_set<L>& lines = llinks_.lines();
@@ -259,7 +265,7 @@ namespace scribo
// FIXME: Update paragraph stats
- needs_stats_update_ = false;
+ tag_ = paragraph::None;
}
template <typename L>
@@ -277,6 +283,38 @@ namespace scribo
}
template <typename L>
+ void
+ paragraph_info<L>::fast_merge(paragraph_info<L>& other)
+ {
+ tag_ = paragraph::Needs_Precise_Stats_Update;
+ other.update_tag(paragraph::Merged);
+ other.invalidate();
+
+ // Merge bboxes.
+ bbox_.take(other.bbox());
+
+ // Update delta_baseline
+ // FIXME: delta base line should be updated correctly!!
+ set_delta_baseline(std::max(other.delta_baseline_, delta_baseline_));
+
+ line_ids_.append(other.line_ids());
+ }
+
+ template <typename L>
+ void
+ paragraph_info<L>::update_tag(paragraph::Tag tag)
+ {
+ tag_ = tag;
+ }
+
+ template <typename L>
+ paragraph::Tag
+ paragraph_info<L>::tag() const
+ {
+ return tag_;
+ }
+
+ template <typename L>
bool
operator==(const paragraph_info<L>& lhs, const paragraph_info<L>&
rhs)
{
diff --git a/scribo/scribo/core/tag/component.hh b/scribo/scribo/core/tag/component.hh
index dc9db90..d5afb36 100644
--- a/scribo/scribo/core/tag/component.hh
+++ b/scribo/scribo/core/tag/component.hh
@@ -60,7 +60,8 @@ namespace scribo
WhitespaceSeparator,
Noise,
Punctuation,
- Image
+ Image,
+ DropCapital
};
@@ -135,6 +136,8 @@ namespace scribo
break;
case Image:
str = "Image";
+ case DropCapital:
+ str = "DropCapital";
break;
}
@@ -159,6 +162,8 @@ namespace scribo
return Punctuation;
else if (str == "Image")
return Image;
+ else if (str == "DropCapital")
+ return DropCapital;
return Undefined;
}
diff --git a/scribo/scribo/core/tag/paragraph.hh b/scribo/scribo/core/tag/paragraph.hh
index 14dd579..9a11a45 100644
--- a/scribo/scribo/core/tag/paragraph.hh
+++ b/scribo/scribo/core/tag/paragraph.hh
@@ -36,6 +36,20 @@ namespace scribo
// Paragraph id tag.
struct ParagraphId;
+ namespace paragraph
+ {
+
+
+ enum Tag
+ {
+ None = 0,
+ Needs_Precise_Stats_Update,
+ Merged
+ };
+
+
+ } // end of namespace scribo::paragraph
+
} // end of namespace scribo
diff --git a/scribo/scribo/filter/images_in_paragraph.hh
b/scribo/scribo/filter/images_in_paragraph.hh
index e05b202..3cf64e1 100644
--- a/scribo/scribo/filter/images_in_paragraph.hh
+++ b/scribo/scribo/filter/images_in_paragraph.hh
@@ -101,12 +101,12 @@ namespace scribo
// => Ignore it.
if (tl && tr && ml && mc && mr && bl
&& br)
elts(c).update_tag(component::Ignored);
-
- // FIXME: warning this call may produce inconsistent data
- // Ignored components are still in the separator image...
- doc.set_elements(elts);
}
+ // FIXME: warning this call may produce inconsistent data
+ // Ignored components are still in the separator image...
+ doc.set_elements(elts);
+
trace::exiting("scribo::filter::images_in_paragraph");
}
diff --git a/scribo/scribo/filter/paragraphs_bbox_overlap.hh
b/scribo/scribo/filter/paragraphs_bbox_overlap.hh
index d40d42f..188a77e 100644
--- a/scribo/scribo/filter/paragraphs_bbox_overlap.hh
+++ b/scribo/scribo/filter/paragraphs_bbox_overlap.hh
@@ -126,70 +126,113 @@ namespace scribo
const box2d& b_ = parset(cur_id).bbox();
- if (parset(cur_id).nlines() > 1)
+ if (parset(cur_id).nlines() > 3)
{
mln::draw::box_plain(billboard, b_, cur_id);
continue;
}
- const unsigned tl = billboard(b_.pmin());
- const unsigned tr = billboard.at_(b_.pmin().row(), b_.pmax().col());
- const unsigned ml = billboard.at_(b_.pcenter().row(), b_.pmin().col());
const unsigned mc = billboard.at_(b_.pcenter().row(), b_.pcenter().col());
- const unsigned mr = billboard.at_(b_.pcenter().row(), b_.pmax().col());
- const unsigned bl = billboard.at_(b_.pmax().row(), b_.pmin().col());
- const unsigned br = billboard(b_.pmax());
-
- typedef std::set<unsigned> set_t;
- set_t labels;
- labels.insert(tl);
- labels.insert(tl);
- labels.insert(tr);
- labels.insert(ml);
- labels.insert(mc);
- labels.insert(mr);
- labels.insert(bl);
- labels.insert(br);
-
- for (set_t::const_iterator it = labels.begin();
- it != labels.end();
- ++it)
- if (not_to_ignore(*it))
- {
- box2d b2 = output(*it).bbox();
- box2d b_i = scribo::util::box_intersection(b_, b2);
- volatile float
- b_ratio = b_i.nsites() / (float)b_.nsites(),
- b2_ratio = b_i.nsites() / (float)b2.nsites();
+ // Box is mostly in the background => do nothing.
+ if (mc == 0)
+ {
+ mln::draw::box_plain(billboard, b_, cur_id);
+ continue;
+ }
+ else // Bbox center is inside another box. Check if we can
+ // merge the current box with it.
+ {
+ // Consider other potential overlapping bboxes.
+ const unsigned tl = billboard(b_.pmin());
+ const unsigned tr = billboard.at_(b_.pmin().row(), b_.pmax().col());
+ const unsigned bl = billboard.at_(b_.pmax().row(), b_.pmin().col());
+ const unsigned br = billboard(b_.pmax());
+
+ typedef std::set<unsigned> set_t;
+ set_t labels;
+ labels.insert(tl);
+ labels.insert(tr);
+ labels.insert(mc);
+ labels.insert(bl);
+ labels.insert(br);
+
+ // FIXME: check that there are at least 3 points (including
+ // the center) in another paragraph.
+
+ // The potential merged bbox is already ignored or the
+ // current bbox overlaps with several bboxes.
+ // => Ignore current bbox .
+ //
+ if (!not_to_ignore(mc)
+ || (labels.size() > 1 && labels.find(0) == labels.end()))
+ {
+ mln::draw::box_plain(billboard, b_, cur_id); // Really?
+ not_to_ignore(cur_id) = false;
+ continue;
+ }
- if (b2_ratio == 1)
+ for (set_t::const_iterator it = labels.begin();
+ it != labels.end(); ++it)
+ if (*it)
{
- // Merge paragraphs and redraw the new bbox.
- output(cur_id).fast_merge(output(*it));
- mln::draw::box_plain(billboard, output(cur_id).bbox(), cur_id);
+ mln_assertion(*it != mc);
+
+ box2d b2 = output(*it).bbox();
+ box2d b_i = scribo::util::box_intersection(b_, b2);
+ volatile float
+ b_ratio = b_i.nsites() / (float)b_.nsites();
+
+ // If the bbox is widely included in another box.
+ if (b_ratio > 0.8)
+ {
+ output(mc).fast_merge(output(cur_id));
+ mln::draw::box_plain(billboard, parset(mc).bbox(), mc);
+ }
+ else
+ mln::draw::box_plain(billboard, parset(cur_id).bbox(), cur_id);
+ break;
}
- else if (b_ratio == 1)
- {
- // Merge paragraphs and redraw the new bbox.
- output(*it).fast_merge(output(cur_id));
- mln::draw::box_plain(billboard, output(*it).bbox(), *it);
- }
- else if ((b_ratio > 0.4 || b2_ratio > 0.9))
- {
- // si b_ est inclus dans une boite dont le nombre de
- // comp > 4 => invalid juste b_ sinon => invalid b_ et
- // b2
- not_to_ignore(cur_id) = false;
-
- if (parset(*it).nlines() < 4)
- not_to_ignore(*it) = false;
- }
- }
- mln::draw::box_plain(billboard, b_, cur_id);
+ }
}
+ // if (not_to_ignore(*it))
+ // {
+ // box2d b2 = output(*it).bbox();
+ // box2d b_i = scribo::util::box_intersection(b_, b2);
+
+ // volatile float
+ // b_ratio = b_i.nsites() / (float)b_.nsites(),
+ // b2_ratio = b_i.nsites() / (float)b2.nsites();
+
+ // if (b2_ratio == 1)
+ // {
+ // // Merge paragraphs and redraw the new bbox.
+ // output(cur_id).fast_merge(output(*it));
+ // mln::draw::box_plain(billboard, output(cur_id).bbox(), cur_id);
+ // }
+ // else if (b_ratio == 1)
+ // {
+ // // Merge paragraphs and redraw the new bbox.
+ // output(*it).fast_merge(output(cur_id));
+ // mln::draw::box_plain(billboard, output(*it).bbox(), *it);
+ // }
+ // else if ((b_ratio > 0.4 || b2_ratio > 0.9))
+ // {
+ // // si b_ est inclus dans une boite dont le nombre de
+ // // comp > 4 => invalid juste b_ sinon => invalid b_ et
+ // // b2
+ // not_to_ignore(cur_id) = false;
+
+ // if (parset(*it).nlines() < 4)
+ // not_to_ignore(*it) = false;
+ // }
+ // }
+
+ // mln::draw::box_plain(billboard, b_, cur_id);
+ // }
+
output.invalidate(not_to_ignore);
for_all_paragraphs(p, output)
diff --git a/scribo/scribo/filter/paragraphs_in_borders.hh
b/scribo/scribo/filter/paragraphs_in_borders.hh
new file mode 100644
index 0000000..8953282
--- /dev/null
+++ b/scribo/scribo/filter/paragraphs_in_borders.hh
@@ -0,0 +1,140 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef SCRIBO_FILTER_PARAGRAPHS_IN_BORDERS_HH
+# define SCRIBO_FILTER_PARAGRAPHS_IN_BORDERS_HH
+
+/// \file
+///
+/// Invalidate false positive paragraphs.
+/// \fixme Share same test canvas as text::merging.
+
+
+# include <mln/core/concept/image.hh>
+# include <scribo/core/component_set.hh>
+# include <scribo/core/document.hh>
+# include <scribo/util/box_is_included.hh>
+
+namespace scribo
+{
+
+ namespace filter
+ {
+
+ using namespace mln;
+
+
+ /// Invalidate paragraphs located close to the image borders.
+ ///
+ /// \param[in,out] doc A document structure.
+ ///
+ /// Warning: it does not remove paragraphs from separator
+ /// image. It only invalidate separator components in their
+ /// respective component_set.
+ ///
+ /// \verbatim
+ ///
+ /// -----------
+ /// |_!____!__|
+ /// | ! ! <--------- Paragraphs located in this area are
+ /// | ! ! | invalidated.
+ /// | ! ! |
+ /// |_!____!__|
+ /// | ! ! |
+ /// -----------
+ ///
+ /// \endverbatim
+ //
+ template <typename L>
+ void
+ paragraphs_in_borders(document<L>& doc);
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+ template <typename L>
+ void
+ paragraphs_in_borders(document<L>& doc)
+ {
+ trace::entering("scribo::filter::paragraphs_in_borders");
+
+ mln_precondition(doc.is_valid());
+
+ const mln::image2d<mln::value::rgb8>& ima = doc.image();
+
+ unsigned border_size = std::min(43., 0.02 * ima.domain().width());
+
+ /// pt
+ /// ptl X------X---
+ /// |_!____!__X ptr
+ /// | ! ! |
+ /// | ! ! |
+ /// | ! ! |
+ /// pbl X_!____!__|
+ /// | ! ! |
+ /// --X-------X
+ /// pb pbr
+ ///
+ point2d
+ ptl = ima.domain().pmin(),
+ pt(geom::min_row(ima), geom::max_col(ima) - border_size),
+ ptr(border_size, geom::max_col(ima)),
+ pbr = ima.domain().pmax(),
+ pb(geom::max_row(ima), border_size),
+ pbl(geom::max_row(ima) - border_size, geom::min_col(ima));
+
+ box2d
+ bt(ptl, ptr),
+ br(pt, pbr),
+ bb(pbl, pbr),
+ bl(ptl, pb);
+
+ // Horizontal paragraphs
+ if (doc.has_text())
+ {
+ paragraph_set<L> parset = doc.paragraphs();
+ for_all_paragraphs(p, parset)
+ if (parset(p).is_valid())
+ if (util::box_is_included(parset(p).bbox(), bt)
+ || util::box_is_included(parset(p).bbox(), br)
+ || util::box_is_included(parset(p).bbox(), bb)
+ || util::box_is_included(parset(p).bbox(), bl))
+ {
+ parset(p).invalidate();
+ }
+
+ doc.set_paragraphs(parset);
+ }
+
+ trace::exiting("scribo::filter::paragraphs_in_borders");
+ }
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::filter
+
+} // end of namespace scribo
+
+#endif // ! SCRIBO_FILTER_PARAGRAPHS_IN_BORDERS_HH
diff --git a/scribo/scribo/filter/paragraphs_in_image.hh
b/scribo/scribo/filter/paragraphs_in_image.hh
index 1029430..f67b863 100644
--- a/scribo/scribo/filter/paragraphs_in_image.hh
+++ b/scribo/scribo/filter/paragraphs_in_image.hh
@@ -89,8 +89,6 @@ namespace scribo
&& doc.elements()(e).type() == component::Image)
mln::draw::box_plain(billboard, doc.elements()(e).bbox(), true);
- mln::io::pbm::save(billboard, "billboard_parimage.pbm");
-
const paragraph_set<L>& parset = doc.paragraphs();
mln::util::array<bool> not_to_ignore(parset.nelements() + 1, true);
not_to_ignore(0) = false;
@@ -101,15 +99,34 @@ namespace scribo
const bool
tl = billboard(b_.pmin()),
tr = billboard.at_(b_.pmin().row(), b_.pmax().col()),
- ml = billboard.at_(b_.pcenter().row(), b_.pmin().col()),
mc = billboard.at_(b_.pcenter().row(), b_.pcenter().col()),
- mr = billboard.at_(b_.pcenter().row(), b_.pmax().col()),
bl = billboard.at_(b_.pmax().row(), b_.pmin().col()),
br = billboard(b_.pmax());
+ typedef mln::util::set<int> set_t;
+ set_t s;
+ s.insert(tl);
+ s.insert(tr);
+ s.insert(mc);
+ s.insert(bl);
+ s.insert(br);
+
+ if (s.nelements() > 2 || (s.nelements() == 2 && !s.has(0)))
+ continue;
+
// The paragraph is fully included in an image.
- if (tl && tr && ml && mc && mr && bl &&
br)
- not_to_ignore(cur_id) = false;
+ for_all_elements(e, s)
+ if (s[e] != 0
+ && (mc != 0 && mc == s[e]
+ && ((tl == mc && bl == mc)
+ || (tr == mc && br == mc)
+ || (tl == mc && tr == mc)
+ || (bl == mc && br == mc))))
+ {
+// if (tl && tr && ml && mc && mr && bl
&& br)
+ not_to_ignore(cur_id) = false;
+ break;
+ }
}
paragraph_set<L> output = parset.duplicate();
diff --git a/scribo/scribo/filter/separators_in_borders.hh
b/scribo/scribo/filter/separators_in_borders.hh
new file mode 100644
index 0000000..8ccb6b1
--- /dev/null
+++ b/scribo/scribo/filter/separators_in_borders.hh
@@ -0,0 +1,206 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef SCRIBO_FILTER_SEPARATORS_IN_BORDERS_HH
+# define SCRIBO_FILTER_SEPARATORS_IN_BORDERS_HH
+
+/// \file
+///
+/// Invalidate false positive separators.
+/// \fixme Share same test canvas as text::merging.
+
+
+# include <mln/core/concept/image.hh>
+# include <scribo/core/component_set.hh>
+# include <scribo/core/document.hh>
+# include <scribo/util/box_is_included.hh>
+
+namespace scribo
+{
+
+ namespace filter
+ {
+
+ using namespace mln;
+
+
+ /// Invalidate separators located close to the image borders.
+ ///
+ /// \param[in,out] doc A document structure.
+ ///
+ /// Warning: it does not remove separators from separator
+ /// image. It only invalidate separator components in their
+ /// respective component_set.
+ ///
+ /// \verbatim
+ ///
+ /// -----------
+ /// |_!____!__|
+ /// | ! ! <--------- Separators located in this area are
+ /// | ! ! | invalidated.
+ /// | ! ! |
+ /// |_!____!__|
+ /// | ! ! |
+ /// -----------
+ ///
+ /// \endverbatim
+ //
+ template <typename L>
+ void
+ separators_in_borders(document<L>& doc, float vratio, float hratio);
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+ template <typename L>
+ void
+ separators_in_borders(document<L>& doc, float vratio, float hratio)
+ {
+ trace::entering("scribo::filter::separators_in_borders");
+
+ mln_precondition(doc.is_valid());
+
+ const mln::image2d<mln::value::rgb8>& ima = doc.image();
+
+ // Horizontal separators
+ if (doc.has_hline_seps())
+ {
+ unsigned border_size = hratio * std::min(ima.domain().width(), ima.domain().height());
+
+ /// pt
+ /// ptl X------X---
+ /// |_!____!__X ptr
+ /// | ! ! |
+ /// | ! ! |
+ /// | ! ! |
+ /// pbl X_!____!__|
+ /// | ! ! |
+ /// --X-------X
+ /// pb pbr
+ ///
+ point2d
+ ptl = ima.domain().pmin(),
+ pt(geom::min_row(ima), geom::max_col(ima) - border_size),
+ ptr(border_size, geom::max_col(ima)),
+ pbr = ima.domain().pmax(),
+ pb(geom::max_row(ima), border_size),
+ pbl(geom::max_row(ima) - border_size, geom::min_col(ima));
+
+ box2d
+ bt(ptl, ptr),
+ br(pt, pbr),
+ bb(pbl, pbr),
+ bl(ptl, pb);
+
+
+ component_set<L> hline = doc.hline_seps_comps().duplicate();
+ for_all_comps(c, hline)
+ if (hline(c).is_valid())
+ if (util::box_is_included(hline(c).bbox(), bt)
+ || util::box_is_included(hline(c).bbox(), br)
+ || util::box_is_included(hline(c).bbox(), bb)
+ || util::box_is_included(hline(c).bbox(), bl))
+ {
+ hline(c).update_tag(component::Ignored);
+ }
+
+ // FIXME: warning this call may produce inconsistent data
+ // Ignored components are still in the separator image...
+ doc.set_hline_separators(doc.hline_seps(), hline);
+ }
+
+
+ // Vertical separators
+ if (doc.has_vline_seps())
+ {
+ unsigned border_size = vratio * std::min(ima.domain().width(), ima.domain().height());
+
+ /// pt
+ /// ptl X------X---
+ /// |_!____!__X ptr
+ /// | ! ! |
+ /// | ! ! |
+ /// | ! ! |
+ /// pbl X_!____!__|
+ /// | ! ! |
+ /// --X-------X
+ /// pb pbr
+ ///
+ point2d
+ ptl = ima.domain().pmin(),
+ pt(geom::min_row(ima), geom::max_col(ima) - border_size),
+ ptr(border_size, geom::max_col(ima)),
+ pbr = ima.domain().pmax(),
+ pb(geom::max_row(ima), border_size),
+ pbl(geom::max_row(ima) - border_size, geom::min_col(ima));
+
+ box2d
+ bt(ptl, ptr),
+ br(pt, pbr),
+ bb(pbl, pbr),
+ bl(ptl, pb);
+
+
+ component_set<L> vline = doc.vline_seps_comps().duplicate();
+ for_all_comps(c, vline)
+ if (vline(c).is_valid())
+ {
+ if (util::box_is_included(vline(c).bbox(), bt)
+ || util::box_is_included(vline(c).bbox(), br)
+ || util::box_is_included(vline(c).bbox(), bb)
+ || util::box_is_included(vline(c).bbox(), bl))
+ {
+ // std::cout << vline(c).bbox() << " is included in ";
+ // if (util::box_is_included(vline(c).bbox(), bt))
+ // std::cout << bt << std::endl;
+ // if (util::box_is_included(vline(c).bbox(), br))
+ // std::cout << br << std::endl;
+ // if (util::box_is_included(vline(c).bbox(), bb))
+ // std::cout << bb << std::endl;
+ // if (util::box_is_included(vline(c).bbox(), bl))
+ // std::cout << bl << std::endl;
+
+ vline(c).update_tag(component::Ignored);
+ }
+ // else
+ // {
+ // std::cout << vline(c).bbox() << " is not included in "
<< bt << " - " << br << " - " << bb
<< " - " << bl << std::endl;
+ // }
+ }
+ // FIXME: warning this call may produce inconsistent data
+ // Ignored components are still in the separator image...
+ doc.set_vline_separators(doc.vline_seps(), vline);
+ }
+
+ trace::exiting("scribo::filter::separators_in_borders");
+ }
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::filter
+
+} // end of namespace scribo
+
+#endif // ! SCRIBO_FILTER_SEPARATORS_IN_BORDERS_HH
diff --git a/scribo/scribo/filter/separators_in_element.hh
b/scribo/scribo/filter/separators_in_element.hh
index 228d82f..a8b0ebb 100644
--- a/scribo/scribo/filter/separators_in_element.hh
+++ b/scribo/scribo/filter/separators_in_element.hh
@@ -90,26 +90,26 @@ namespace scribo
{
component_set<L> hline = doc.hline_seps_comps().duplicate();
for_all_comps(c, hline)
- {
- const mln_box(L)& b_ = hline(c).bbox();
-
- const bool tl = billboard(b_.pmin());
- const bool tr = billboard.at_(b_.pmin().row(), b_.pmax().col());
- const bool ml = billboard.at_(b_.pcenter().row(), b_.pmin().col());
- const bool mc = billboard.at_(b_.pcenter().row(), b_.pcenter().col());
- const bool mr = billboard.at_(b_.pcenter().row(), b_.pmax().col());
- const bool bl = billboard.at_(b_.pmax().row(), b_.pmin().col());
- const bool br = billboard(b_.pmax());
-
- // This separator is included in an element (picture, drawing...)
- // => Ignore it.
- if (tl && tr && ml && mc && mr && bl
&& br)
- hline(c).update_tag(component::Ignored);
-
- // FIXME: warning this call may produce inconsistent data
- // Ignored components are still in the separator image...
- doc.set_hline_separators(doc.hline_seps(), hline);
- }
+ if (hline(c).is_valid())
+ {
+ const mln_box(L)& b_ = hline(c).bbox();
+
+ const bool tl = billboard(b_.pmin());
+ const bool tr = billboard.at_(b_.pmin().row(), b_.pmax().col());
+ const bool ml = billboard.at_(b_.pcenter().row(), b_.pmin().col());
+ const bool mc = billboard.at_(b_.pcenter().row(), b_.pcenter().col());
+ const bool mr = billboard.at_(b_.pcenter().row(), b_.pmax().col());
+ const bool bl = billboard.at_(b_.pmax().row(), b_.pmin().col());
+ const bool br = billboard(b_.pmax());
+
+ // This separator is included in an element (picture, drawing...)
+ // => Ignore it.
+ if (tl && tr && ml && mc && mr && bl
&& br)
+ hline(c).update_tag(component::Ignored);
+ }
+ // FIXME: warning this call may produce inconsistent data
+ // Ignored components are still in the separator image...
+ doc.set_hline_separators(doc.hline_seps(), hline);
}
// Vertical separators
@@ -117,29 +117,29 @@ namespace scribo
{
component_set<L> vline = doc.vline_seps_comps().duplicate();
for_all_comps(c, vline)
- {
- const mln_box(L)& b_ = vline(c).bbox();
-
- const bool tl = billboard(b_.pmin());
- const bool tr = billboard.at_(b_.pmin().row(), b_.pmax().col());
- const bool ml = billboard.at_(b_.pcenter().row(), b_.pmin().col());
- const bool mc = billboard.at_(b_.pcenter().row(), b_.pcenter().col());
- const bool mr = billboard.at_(b_.pcenter().row(), b_.pmax().col());
- const bool bl = billboard.at_(b_.pmax().row(), b_.pmin().col());
- const bool br = billboard(b_.pmax());
-
- // This separator is included in an element (picture, drawing...)
- // => Ignore it.
- if (tl && tr && ml && mc && mr && bl
&& br)
- vline(c).update_tag(component::Ignored);
-
- // FIXME: warning this call may produce inconsistent data
- // Ignored components are still in the separator image...
- doc.set_vline_separators(doc.vline_seps(), vline);
- }
-
- trace::exiting("scribo::filter::separators_in_element");
+ if (vline(c).is_valid())
+ {
+ const mln_box(L)& b_ = vline(c).bbox();
+
+ const bool tl = billboard(b_.pmin());
+ const bool tr = billboard.at_(b_.pmin().row(), b_.pmax().col());
+ const bool ml = billboard.at_(b_.pcenter().row(), b_.pmin().col());
+ const bool mc = billboard.at_(b_.pcenter().row(), b_.pcenter().col());
+ const bool mr = billboard.at_(b_.pcenter().row(), b_.pmax().col());
+ const bool bl = billboard.at_(b_.pmax().row(), b_.pmin().col());
+ const bool br = billboard(b_.pmax());
+
+ // This separator is included in an element (picture, drawing...)
+ // => Ignore it.
+ if (tl && tr && ml && mc && mr && bl
&& br)
+ vline(c).update_tag(component::Ignored);
+ }
+ // FIXME: warning this call may produce inconsistent data
+ // Ignored components are still in the separator image...
+ doc.set_vline_separators(doc.vline_seps(), vline);
}
+
+ trace::exiting("scribo::filter::separators_in_element");
}
# endif // ! MLN_INCLUDE_ONLY
diff --git a/scribo/scribo/filter/separators_in_paragraph.hh
b/scribo/scribo/filter/separators_in_paragraph.hh
index 3e7a150..7c157be 100644
--- a/scribo/scribo/filter/separators_in_paragraph.hh
+++ b/scribo/scribo/filter/separators_in_paragraph.hh
@@ -58,14 +58,14 @@ namespace scribo
///
template <typename L>
void
- separators_in_paragraph(document<L>& doc);
+ separators_in_paragraph(document<L>& doc, unsigned hmax_size, unsigned
vmax_size);
# ifndef MLN_INCLUDE_ONLY
template <typename L>
void
- separators_in_paragraph(document<L>& doc)
+ separators_in_paragraph(document<L>& doc, unsigned hmax_size, unsigned
vmax_size)
{
trace::entering("scribo::filter::separators_in_paragraph");
@@ -90,26 +90,28 @@ namespace scribo
{
component_set<L> hline = doc.hline_seps_comps().duplicate();
for_all_comps(c, hline)
- {
- const mln_box(L)& b_ = hline(c).bbox();
-
- const bool tl = billboard(b_.pmin());
- const bool tr = billboard.at_(b_.pmin().row(), b_.pmax().col());
- const bool ml = billboard.at_(b_.pcenter().row(), b_.pmin().col());
- const bool mc = billboard.at_(b_.pcenter().row(), b_.pcenter().col());
- const bool mr = billboard.at_(b_.pcenter().row(), b_.pmax().col());
- const bool bl = billboard.at_(b_.pmax().row(), b_.pmin().col());
- const bool br = billboard(b_.pmax());
-
- // This separator is included in an element (picture, drawing...)
- // => Ignore it.
- if (tl && tr && ml && mc && mr && bl
&& br)
- hline(c).update_tag(component::Ignored);
-
- // FIXME: warning this call may produce inconsistent data
- // Ignored components are still in the separator image...
- doc.set_hline_separators(doc.hline_seps(), hline);
- }
+ if (hline(c).is_valid())
+ {
+ const mln_box(L)& b_ = hline(c).bbox();
+
+ const bool tl = billboard(b_.pmin());
+ const bool tr = billboard.at_(b_.pmin().row(), b_.pmax().col());
+ const bool ml = billboard.at_(b_.pcenter().row(), b_.pmin().col());
+ const bool mc = billboard.at_(b_.pcenter().row(), b_.pcenter().col());
+ const bool mr = billboard.at_(b_.pcenter().row(), b_.pmax().col());
+ const bool bl = billboard.at_(b_.pmax().row(), b_.pmin().col());
+ const bool br = billboard(b_.pmax());
+
+ // This separator is included in an element (picture, drawing...)
+ // => Ignore it.
+ if (tl && tr && ml && mc && mr && bl
&& br
+ && hline(c).bbox().width() < hmax_size)
+ hline(c).update_tag(component::Ignored);
+ }
+
+ // FIXME: warning this call may produce inconsistent data
+ // Ignored components are still in the separator image...
+ doc.set_hline_separators(doc.hline_seps(), hline);
}
// Vertical separators
@@ -117,29 +119,31 @@ namespace scribo
{
component_set<L> vline = doc.vline_seps_comps().duplicate();
for_all_comps(c, vline)
- {
- const mln_box(L)& b_ = vline(c).bbox();
-
- const bool tl = billboard(b_.pmin());
- const bool tr = billboard.at_(b_.pmin().row(), b_.pmax().col());
- const bool ml = billboard.at_(b_.pcenter().row(), b_.pmin().col());
- const bool mc = billboard.at_(b_.pcenter().row(), b_.pcenter().col());
- const bool mr = billboard.at_(b_.pcenter().row(), b_.pmax().col());
- const bool bl = billboard.at_(b_.pmax().row(), b_.pmin().col());
- const bool br = billboard(b_.pmax());
-
- // This separator is included in an element (picture, drawing...)
- // => Ignore it.
- if (tl && tr && ml && mc && mr && bl
&& br)
- vline(c).update_tag(component::Ignored);
-
- // FIXME: warning this call may produce inconsistent data
- // Ignored components are still in the separator image...
- doc.set_vline_separators(doc.vline_seps(), vline);
- }
-
- trace::exiting("scribo::filter::separators_in_paragraph");
+ if (vline(c).is_valid())
+ {
+ const mln_box(L)& b_ = vline(c).bbox();
+
+ const bool tl = billboard(b_.pmin());
+ const bool tr = billboard.at_(b_.pmin().row(), b_.pmax().col());
+ const bool ml = billboard.at_(b_.pcenter().row(), b_.pmin().col());
+ const bool mc = billboard.at_(b_.pcenter().row(), b_.pcenter().col());
+ const bool mr = billboard.at_(b_.pcenter().row(), b_.pmax().col());
+ const bool bl = billboard.at_(b_.pmax().row(), b_.pmin().col());
+ const bool br = billboard(b_.pmax());
+
+ // This separator is included in an element (picture, drawing...)
+ // => Ignore it.
+ if (tl && tr && ml && mc && mr && bl
&& br
+ && vline(c).bbox().height() < vmax_size)
+ vline(c).update_tag(component::Ignored);
+ }
+
+ // FIXME: warning this call may produce inconsistent data
+ // Ignored components are still in the separator image...
+ doc.set_vline_separators(doc.vline_seps(), vline);
}
+
+ trace::exiting("scribo::filter::separators_in_paragraph");
}
# endif // ! MLN_INCLUDE_ONLY
diff --git a/scribo/scribo/filter/separators_vert_in_borders.hh
b/scribo/scribo/filter/separators_vert_in_borders.hh
new file mode 100644
index 0000000..4a9e806
--- /dev/null
+++ b/scribo/scribo/filter/separators_vert_in_borders.hh
@@ -0,0 +1,143 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef SCRIBO_FILTER_SEPARATORS_VERT_IN_BORDERS_HH
+# define SCRIBO_FILTER_SEPARATORS_VERT_IN_BORDERS_HH
+
+/// \file
+///
+/// Invalidate false positive separators.
+/// \fixme Share same test canvas as text::merging.
+
+
+# include <mln/core/concept/image.hh>
+# include <scribo/core/component_set.hh>
+# include <scribo/core/document.hh>
+# include <scribo/util/box_is_included.hh>
+
+
+namespace scribo
+{
+
+ namespace filter
+ {
+
+ using namespace mln;
+
+
+ /// Invalidate separators located close to the image borders.
+ ///
+ /// \param[in,out] doc A document structure.
+ ///
+ /// Warning: it does not remove separators from separator
+ /// image. It only invalidate separator components in their
+ /// respective component_set.
+ ///
+ /// \verbatim
+ ///
+ /// -----------
+ /// |_!____!__|
+ /// | ! ! <--------- Separators located in this area are
+ /// | ! ! | invalidated.
+ /// | ! ! |
+ /// |_!____!__|
+ /// | ! ! |
+ /// -----------
+ ///
+ /// \endverbatim
+ //
+ template <typename L>
+ void
+ separators_vert_in_borders(document<L>& doc);
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+ template <typename L>
+ void
+ separators_vert_in_borders(document<L>& doc)
+ {
+ trace::entering("scribo::filter::separators_vert_in_borders");
+
+ mln_precondition(doc.is_valid());
+
+ const mln::image2d<mln::value::rgb8>& ima = doc.image();
+
+ float border_size = std::min(43., 0.05 * ima.domain().width());
+
+ /// pt
+ /// ptl X------X---
+ /// |_!____!__X ptr
+ /// | ! ! |
+ /// | ! ! |
+ /// | ! ! |
+ /// pbl X_!____!__|
+ /// | ! ! |
+ /// --X-------X
+ /// pb pbr
+ ///
+ point2d
+ ptl = ima.domain().pmin(),
+ pt(geom::min_row(ima), geom::max_col(ima) - border_size),
+ ptr(border_size, geom::max_col(ima)),
+ pbr = ima.domain().pmax(),
+ pb(geom::max_row(ima), border_size),
+ pbl(geom::max_row(ima) - border_size, geom::min_col(ima));
+
+ box2d
+ bt(ptl, ptr),
+ br(pt, pbr),
+ bb(pbl, pbr),
+ bl(ptl, pb);
+
+ // Vertical separators
+ if (doc.has_vline_seps())
+ {
+ component_set<L> vline = doc.vline_seps_comps().duplicate();
+ for_all_comps(c, vline)
+ if (vline(c).is_valid())
+ if (util::box_is_included(vline(c).bbox(), bt)
+ || util::box_is_included(vline(c).bbox(), br)
+ || util::box_is_included(vline(c).bbox(), bb)
+ || util::box_is_included(vline(c).bbox(), bl))
+ {
+ vline(c).update_tag(component::Ignored);
+ }
+
+ // FIXME: warning this call may produce inconsistent data
+ // Ignored components are still in the separator image...
+ doc.set_vline_separators(doc.vline_seps(), vline);
+ }
+
+ trace::exiting("scribo::filter::separators_vert_in_borders");
+ }
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::filter
+
+} // end of namespace scribo
+
+#endif // ! SCRIBO_FILTER_SEPARATORS_VERT_IN_BORDERS_HH
diff --git a/scribo/scribo/io/img/internal/debug_img_visitor.hh
b/scribo/scribo/io/img/internal/debug_img_visitor.hh
index a4715f5..520a743 100644
--- a/scribo/scribo/io/img/internal/debug_img_visitor.hh
+++ b/scribo/scribo/io/img/internal/debug_img_visitor.hh
@@ -34,7 +34,7 @@
# include <mln/core/image/image2d.hh>
# include <mln/value/rgb8.hh>
-# include <mln/draw/site_set.hh>
+# include <mln/draw/polygon.hh>
# include <mln/subsampling/antialiased.hh>
# include <mln/morpho/elementary/gradient_external.hh>
@@ -45,7 +45,7 @@
# include <scribo/util/component_precise_outline.hh>
# include <scribo/io/img/internal/draw_edges.hh>
-
+# include <scribo/text/paragraphs_closing.hh>
namespace scribo
@@ -85,9 +85,6 @@ namespace scribo
mln::image2d<value::rgb8>& output;
unsigned output_ratio;
- // FIXME: we would like its type to be L.
- mutable image2d<scribo::def::lbl_type> lbl_sub;
-
private: // Methods
box2d compute_bbox(const box2d& b) const;
};
@@ -97,7 +94,7 @@ namespace scribo
# ifndef MLN_INCLUDE_ONLY
- inline
+ inline
box2d
debug_img_visitor::compute_bbox(const box2d& b) const
{
@@ -131,50 +128,11 @@ namespace scribo
// Page elements (Pictures, ...)
if (doc.has_elements())
{
- // Prepare element edges
-
- // L lbl = duplicate(doc.elements().labeled_image());
- // for_all_comps(c, doc.elements())
- // if (! doc.elements()(c).is_valid())
- // data::fill(((lbl | doc.elements()(c).bbox()).rw()
- // | (pw::value(lbl) == pw::cst(c))).rw(), 0);
-
- // const L& lbl = doc.lines().components().labeled_image();
- // lbl_sub = mln::subsampling::antialiased(lbl, output_ratio);
-
- // mln::io::pgm::save(data::wrap(value::int_u8(), lbl), "lbl.pgm");
- // mln::io::pgm::save(data::wrap(value::int_u8(), lbl_sub),
"lbl_sub.pgm");
-
- // // FIXME: UGLY! Too slow!
- // scribo::def::lbl_type nlabels;
- // component_set<L> elts = primitive::extract::components(
- // data::convert(bool(), lbl_sub),
- // c8(),
- // nlabels);
-
- // Preserving elements tags
- // if (doc.elements().nelements() != elts.nelements())
- // {
- // std::cerr << "Warnig: could not preserve element type in "
- // << "img debug output." << std::endl;
- // std::cerr << "The number of non text element has changed while
"
- // << "subsampling images : "
- // << doc.elements().nelements() << " vs "
- // << elts.nelements() << std::endl;
- // }
- // else
- // for_all_comps(c, doc.elements())
- // {
- // elts(c).update_type(doc.elements()(c).type());
- // elts(c).update_tag(doc.elements()(c).tag());
- // }
-
for_all_comps(e, doc.elements())
if (doc.elements()(e).is_valid())
doc.elements()(e).accept(*this);
}
-
// line seraparators
if (doc.has_vline_seps())
for_all_comps(c, doc.vline_seps_comps())
@@ -198,23 +156,28 @@ namespace scribo
scribo::def::lbl_type id = (scribo::def::lbl_type)info.id().to_equiv();
const L& lbl = info.holder().labeled_image();
p_array<point2d>
- par = scribo::util::component_precise_outline(
- extend((lbl | info.bbox()) | (pw::value(lbl) == pw::cst(id)), 0));
+ par = scribo::util::component_precise_outline(lbl | info.bbox(), id);
switch (info.type())
{
case component::HorizontalLineSeparator:
case component::VerticalLineSeparator:
{
- mln::draw::site_set(output, par, literal::cyan, output_ratio);
+ mln::draw::polygon(output, par, literal::cyan, output_ratio);
}
break;
+ case component::DropCapital:
+ {
+ mln::draw::polygon(output, par, literal::violet, output_ratio);
+ }
+ break;
+
default:
case component::Image:
{
- mln::draw::site_set(output, par, literal::orange, output_ratio);
+ mln::draw::polygon(output, par, literal::orange, output_ratio);
}
break;
}
@@ -228,6 +191,9 @@ namespace scribo
{
const line_set<L>& lines = parset.lines();
+ // Prepare paragraph outlines.
+ L par_clo = text::paragraphs_closing(parset);
+
for_all_paragraphs(p, parset)
if (parset(p).is_valid())
{
@@ -235,10 +201,11 @@ namespace scribo
for_all_paragraph_lines(lid, line_ids)
{
- line_id_t l = line_ids(lid);
- lines(l).accept(*this);
+ line_id_t l = line_ids(lid);
+ lines(l).accept(*this);
}
+ // Adjust bbox to output image size.
box2d b = compute_bbox(parset(p).bbox());
b.enlarge(1);
b.crop_wrt(output.domain());
diff --git a/scribo/scribo/io/img/internal/full_img_visitor.hh
b/scribo/scribo/io/img/internal/full_img_visitor.hh
index f2c0f5c..7b20970 100644
--- a/scribo/scribo/io/img/internal/full_img_visitor.hh
+++ b/scribo/scribo/io/img/internal/full_img_visitor.hh
@@ -34,7 +34,7 @@
# include <mln/core/image/image2d.hh>
# include <mln/value/rgb8.hh>
-# include <mln/draw/site_set.hh>
+# include <mln/draw/polygon.hh>
# include <mln/draw/box.hh>
# include <scribo/core/internal/doc_serializer.hh>
@@ -137,22 +137,27 @@ namespace scribo
scribo::def::lbl_type id = (scribo::def::lbl_type)info.id().to_equiv();
const L& lbl = info.holder().labeled_image();
p_array<point2d>
- par = scribo::util::component_precise_outline((lbl | info.bbox()) | (pw::value(lbl)
== pw::cst(id)));
+ par = scribo::util::component_precise_outline(lbl | info.bbox(), id);
switch (info.type())
{
case component::HorizontalLineSeparator:
case component::VerticalLineSeparator:
{
- mln::draw::site_set(output, par, literal::cyan);
+ mln::draw::polygon(output, par, literal::cyan);
}
break;
+ case component::DropCapital:
+ {
+ mln::draw::polygon(output, par, literal::violet);
+ }
+ break;
default:
case component::Image:
{
- mln::draw::site_set(output, par, literal::orange);
+ mln::draw::polygon(output, par, literal::orange);
}
break;
}
@@ -164,20 +169,18 @@ namespace scribo
void
full_img_visitor::visit(const paragraph_set<L>& parset) const
{
- const line_set<L>& lines = parset.lines();
+ // const line_set<L>& lines = parset.lines();
+
+ // Prepare paragraph outlines.
+ L par_clo = text::paragraphs_closing(parset);
for_all_paragraphs(p, parset)
if (parset(p).is_valid())
{
- const mln::util::array<line_id_t>& line_ids = parset(p).line_ids();
+ p_array<point2d> par = scribo::util::component_precise_outline(par_clo
+ | parset(p).bbox(), p);
- for_all_paragraph_lines(lid, line_ids)
- {
- line_id_t l = line_ids(lid);
- lines(l).accept(*this);
- }
-
- mln::draw::box(output, parset(p).bbox(), literal::blue);
+ mln::draw::polygon(output, par, literal::blue);
}
}
@@ -186,7 +189,15 @@ namespace scribo
void
full_img_visitor::visit(const line_info<L>& line) const
{
- mln::draw::box(output, line.bbox(), literal::red);
+// mln::draw::box(output, line.bbox(), literal::red);
+
+ point2d
+ pmin = line.bbox().pmin(),
+ pmax = line.bbox().pmax();
+ pmax.row() = line.baseline();
+ pmin.row() = line.baseline();
+
+ mln::draw::line(output, pmin, pmax, literal::red);
}
#endif // MLN_INCLUDE_ONLY
diff --git a/scribo/scribo/io/xml/internal/page_xml_visitor.hh
b/scribo/scribo/io/xml/internal/page_xml_visitor.hh
index 1659a85..bbdd3e2 100644
--- a/scribo/scribo/io/xml/internal/page_xml_visitor.hh
+++ b/scribo/scribo/io/xml/internal/page_xml_visitor.hh
@@ -40,6 +40,7 @@
# include <scribo/io/xml/internal/print_box_coords.hh>
# include <scribo/io/xml/internal/print_page_preambule.hh>
# include <scribo/io/xml/internal/compute_text_colour.hh>
+# include <scribo/text/paragraphs_closing.hh>
namespace scribo
@@ -160,8 +161,7 @@ namespace scribo
scribo::def::lbl_type id = (scribo::def::lbl_type)info.id().to_equiv();
const L& lbl = info.holder().labeled_image();
p_array<point2d>
- par = scribo::util::component_precise_outline(
- extend((lbl | info.bbox()) | (pw::value(lbl) == pw::cst(id)), 0));
+ par = scribo::util::component_precise_outline(lbl | info.bbox(), id);
switch (info.type())
{
@@ -189,6 +189,17 @@ namespace scribo
break;
}
+ case component::DropCapital:
+ {
+ output << " <TextRegion id=\"r" << id <<
"\" "
+ << " Type=\"Drop_Capital\">"
+ << std::endl;
+
+ internal::print_image_coords(output, par, " ");
+
+ output << " </TextRegion>" << std::endl;
+ break;
+ }
default:
case component::Image:
@@ -216,9 +227,15 @@ namespace scribo
{
const line_set<L>& lines = parset.lines();
+ // Prepare paragraph outlines.
+ L par_clo = text::paragraphs_closing(parset);
+
for_all_paragraphs(p, parset)
if (parset(p).is_valid())
{
+ p_array<mln_site(L)> par = scribo::util::component_precise_outline(par_clo
+ | parset(p).bbox(), p);
+
const mln::util::array<line_id_t>& line_ids = parset(p).line_ids();
// FIXME: compute that information on the whole paragraph
@@ -245,7 +262,7 @@ namespace scribo
// <Unicode></Unicode>
// </TextEquiv>
- internal::print_box_coords(output, parset(p).bbox(), " ");
+ internal::print_image_coords(output, par, " ");
output << " </TextRegion>" << std::endl;
}
diff --git a/scribo/scribo/io/xml/internal/print_image_coords.hh
b/scribo/scribo/io/xml/internal/print_image_coords.hh
index ebfe402..41c4e30 100644
--- a/scribo/scribo/io/xml/internal/print_image_coords.hh
+++ b/scribo/scribo/io/xml/internal/print_image_coords.hh
@@ -69,15 +69,15 @@ namespace scribo
const S& b = exact(b_);
mln_precondition(b.is_valid());
- ostr << sc << "<coords>" << std::endl;
+ ostr << sc << "<Coords>" << std::endl;
mln_piter(S) p(b);
for_all(p)
- ostr << sp << "<point x=\"" << p.col()
+ ostr << sp << "<Point x=\"" << p.col()
<< "\" y=\"" << p.row() <<
"\"/>"
<< std::endl;
- ostr << sc << "</coords>" << std::endl;
+ ostr << sc << "</Coords>" << std::endl;
}
diff --git a/scribo/scribo/io/xml/internal/print_page_preambule.hh
b/scribo/scribo/io/xml/internal/print_page_preambule.hh
index bcb6b33..9f00c60 100644
--- a/scribo/scribo/io/xml/internal/print_page_preambule.hh
+++ b/scribo/scribo/io/xml/internal/print_page_preambule.hh
@@ -30,10 +30,10 @@
///
/// \brief Print PAGE XML format preambule.
-# include <ctime>
# include <fstream>
# include <mln/core/alias/box2d.hh>
# include <scribo/core/document.hh>
+# include <scribo/io/xml/internal/time_info.hh>
namespace scribo
{
@@ -75,17 +75,10 @@ namespace scribo
else
output << "<PcGts>" << std::endl;
-
- time_t cur_time = time(NULL);
- tm * time_struct;
- time_struct = localtime(&cur_time);
- char time_info[55];
- strftime(time_info, 55, "%Y-%m-%dT%H:%M:%S", time_struct);
-
output << " <Metadata>" << std::endl;
output << " <Creator>LRDE</Creator>" <<
std::endl;
- output << " <Created>" << time_info <<
"</Created>" << std::endl;
- output << " <LastChange>" << time_info <<
"</LastChange>" << std::endl;
+ output << " <Created>" << time_info() <<
"</Created>" << std::endl;
+ output << " <LastChange>" << time_info() <<
"</LastChange>" << std::endl;
output << " <Comments>Generated by Scribo from
Olena.</Comments>" << std::endl;
output << " </Metadata>" << std::endl;
diff --git a/scribo/scribo/io/xml/internal/time_info.hh
b/scribo/scribo/io/xml/internal/time_info.hh
new file mode 100644
index 0000000..6adc49a
--- /dev/null
+++ b/scribo/scribo/io/xml/internal/time_info.hh
@@ -0,0 +1,75 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef SCRIBO_IO_XML_INTERNAL_TIME_INFO_HH
+# define SCRIBO_IO_XML_INTERNAL_TIME_INFO_HH
+
+/// \file
+///
+/// Get formated time info for PAGE XML format.
+
+# include <ctime>
+
+namespace scribo
+{
+
+ namespace io
+ {
+
+ namespace xml
+ {
+
+ namespace internal
+ {
+
+ using namespace mln;
+
+# ifndef MLN_INCLUDE_ONLY
+
+ std::string time_info()
+ {
+ time_t cur_time = time(NULL);
+ tm * time_struct;
+ time_struct = localtime(&cur_time);
+ char time_info_[55];
+ strftime(time_info_, 55, "%Y-%m-%dT%H:%M:%S", time_struct);
+ std::string output(time_info_);
+
+ return output;
+ }
+
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::io::xml::internal
+
+ } // end of namespace scribo::io::xml
+
+ } // end of namespace scribo::io
+
+} // end of namespace scribo
+
+
+#endif // ! SCRIBO_IO_XML_INTERNAL_TIME_INFO_HH
diff --git a/scribo/scribo/postprocessing/images_to_drop_capital.hh
b/scribo/scribo/postprocessing/images_to_drop_capital.hh
new file mode 100644
index 0000000..ca76609
--- /dev/null
+++ b/scribo/scribo/postprocessing/images_to_drop_capital.hh
@@ -0,0 +1,141 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef SCRIBO_POSTPROCESSING_IMAGES_TO_DROP_CAPITAL_HH
+# define SCRIBO_POSTPROCESSING_IMAGES_TO_DROP_CAPITAL_HH
+
+/// \file
+///
+/// Set type for specific images to Drop Capital component.
+/// \fixme Share same test canvas as text::merging.
+
+
+# include <mln/core/concept/image.hh>
+# include <scribo/core/component_set.hh>
+# include <scribo/core/document.hh>
+
+
+namespace scribo
+{
+
+ namespace postprocessing
+ {
+
+ using namespace mln;
+
+
+ /// Set type for specific images to Drop Capital component.
+ ///
+ /// \param[in] separators A paragraph set.
+ ///
+ /// \return A doc with images tagged as dropped capital is such
+ /// images have been found.
+ //
+ template <typename L>
+ void
+ images_to_drop_capital(document<L>& doc);
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+ template <typename L>
+ void
+ images_to_drop_capital(document<L>& doc)
+ {
+ trace::entering("scribo::postprocessing::images_to_drop_capital");
+
+ mln_precondition(doc.is_valid());
+
+ if (! doc.has_elements())
+ return;
+
+ mln_ch_value(L,bool) billboard;
+ initialize(billboard, doc.image());
+ data::fill(billboard, false);
+
+ for_all_comps(p, doc.paragraphs())
+ if (doc.paragraphs()(p).is_valid())
+ mln::draw::box_plain(billboard, doc.paragraphs()(p).bbox(), true);
+
+ float min_img_size = 0.2 * (doc.image().domain().width()
+ + doc.image().domain().height());
+
+ component_set<L> elts = doc.elements();
+ for_all_comps(c, elts)
+ if (elts(c).is_valid() && elts(c).type() == component::Image)
+ {
+ const mln_box(L)& b_ = elts(c).bbox();
+
+ const bool tl = billboard(b_.pmin());
+ const bool tr = billboard.at_(b_.pmin().row(), b_.pmax().col());
+ const bool mb = billboard.at_(b_.pmax().row(), b_.pcenter().col());
+ const bool mc = billboard.at_(b_.pcenter().row(), b_.pcenter().col());
+ const bool mr = billboard.at_(b_.pcenter().row(), b_.pmax().col());
+ const bool bl = billboard.at_(b_.pmax().row(), b_.pmin().col());
+ const bool br = billboard(b_.pmax());
+
+ typedef mln::util::set<int> set_t;
+ set_t s;
+ s.insert(tl);
+ s.insert(tr);
+ s.insert(mb);
+ s.insert(mc);
+ s.insert(mr);
+ s.insert(bl);
+ s.insert(br);
+
+ if (s.nelements() > 2 || (s.nelements() == 2 && !s.has(0)))
+ continue;
+
+ float elt_size = elts(c).bbox().width() + elts(c).bbox().height();
+ for_all_elements(e, s)
+ if (s[e] != 0
+ && (mc != 0 && mc == s[e]
+ && ((tl == mc && bl == mc)
+ || (tr == mc && br == mc)
+ || (bl == mc && br == mc)
+ || (tl == mc && tr == mc)
+ || (br == mc && mr == mc && mb == mc)))
+ && (elt_size < min_img_size))
+ {
+ elts(c).update_type(component::DropCapital);
+ break;
+ }
+ }
+
+ // FIXME: warning this call may produce inconsistent data
+ // Ignored components are still in the separator image...
+ doc.set_elements(elts);
+
+ trace::exiting("scribo::postprocessing::images_to_drop_capital");
+ }
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::postprocessing
+
+} // end of namespace scribo
+
+#endif // ! SCRIBO_POSTPROCESSING_IMAGES_TO_DROP_CAPITAL_HH
diff --git a/scribo/scribo/text/paragraphs_closing.hh
b/scribo/scribo/text/paragraphs_closing.hh
index ec1d5c8..efc5259 100644
--- a/scribo/scribo/text/paragraphs_closing.hh
+++ b/scribo/scribo/text/paragraphs_closing.hh
@@ -31,6 +31,9 @@
///
/// Paragraphs closing using CRLA.
+# include <mln/draw/line.hh>
+# include <scribo/draw/line_components.hh>
+
namespace scribo
{
@@ -39,162 +42,217 @@ namespace scribo
using namespace mln;
- template< typename L, typename V >
- void
- paragraphs_closing(image2d<V>& output,
- const paragraph_set<L>& par_set,
- const box2d& domain);
+ template< typename L>
+ mln_concrete(L)
+ paragraphs_closing(const paragraph_set<L>& parset);
+
# ifndef MLN_INCLUDE_ONLY
- template< typename V >
- inline
- void horizontal_CRLA(const image2d<V>& input,
- image2d<V>& output,
- const mln::util::array<int>& deltas)
+ namespace internal
{
- mln_piter(image2d<V>) p(input.domain());
- int count = 0;
- unsigned nrows = input.nrows();
- unsigned ncols = input.ncols();
- V last_pixel_value = 0;
- for (unsigned i = 0; i < nrows; ++i)
+ template<typename L>
+ inline
+ void horizontal_CRLA(const Image<L>& input_,
+ Image<L>& output_,
+ const mln::util::array<int>& deltas)
{
- for (unsigned j = 0; j < ncols; ++j)
- {
- const V& current_pixel = input.at_(i, j);
+ const L& input = exact(input_);
+ L& output = exact(output_);
+ mln_precondition(input.is_valid());
+ mln_precondition(output.is_valid());
+
+ mln_piter(L) p(input.domain());
+ int count = 0;
+ unsigned nrows = input.nrows();
+ unsigned ncols = input.ncols();
+ mln_value(L) last_pixel_value = 0;
- if (!current_pixel)
+ for (unsigned i = 0; i < nrows; ++i)
+ {
+ for (unsigned j = 0; j < ncols; ++j)
{
- if (last_pixel_value)
+ const mln_value(L)& current_pixel = input.at_(i, j);
+
+ if (!current_pixel)
{
- unsigned k = j + 1;
- for (; !input.at_(i, k) && (k < ncols); ++k);
+ if (last_pixel_value)
+ {
+ unsigned k = j + 1;
+ for (; !(input.at_(i, k)) && (k < ncols); ++k);
- count = k - j;
- const int threshold = deltas(last_pixel_value - 1);
+ count = k - j;
+ const int threshold = deltas(last_pixel_value);
- if (last_pixel_value == input.at_(i, k) && count < threshold)
- for (unsigned l = j; l <= k; ++l)
- output.at_(i, l) = last_pixel_value;
+ if (last_pixel_value == input.at_(i, k) && count < threshold)
+ for (unsigned l = j; l <= k; ++l)
+ output.at_(i, l) = last_pixel_value;
- j = k;
- last_pixel_value = 0;
+ j = k;
+ last_pixel_value = 0;
+ }
+ }
+ else
+ {
+ output.at_(i, j) = current_pixel;
+ last_pixel_value = current_pixel;
}
- }
- else
- {
- output.at_(i, j) = current_pixel;
- last_pixel_value = current_pixel;
}
}
}
- }
- template< typename V >
- inline
- void vertical_CRLA(const image2d<V>& input,
- image2d<V>& output,
- const mln::util::array<int>& deltas)
- {
- mln_piter(image2d<V>) p(input.domain());
- int count = 0;
- unsigned nrows = input.nrows();
- unsigned ncols = input.ncols();
- V last_pixel_value = 0;
-
- for (unsigned j = 0; j < ncols; ++j)
+ template<typename L>
+ inline
+ void vertical_CRLA(const Image<L>& input_,
+ Image<L>& output_,
+ const mln::util::array<int>& deltas)
{
- for (unsigned i = 0; i < nrows; ++i)
- {
- const V& current_pixel = input.at_(i, j);
-
- if (!current_pixel)
- {
- if (last_pixel_value)
- {
- unsigned k = i + 1;
- for (; !input.at_(k, j) && (k < nrows); ++k);
+ const L& input = exact(input_);
+ L& output = exact(output_);
+ mln_precondition(input.is_valid());
+ mln_precondition(output.is_valid());
- count = k - i;
- const int threshold = deltas(last_pixel_value - 1);
+ mln_piter(L) p(input.domain());
+ int count = 0;
+ unsigned nrows = input.nrows();
+ unsigned ncols = input.ncols();
+ mln_value(L) last_pixel_value = 0;
- if (last_pixel_value == input.at_(k, j)
- && count < threshold)
- for (unsigned l = i; l <= k; ++l)
- output.at_(l, j) = last_pixel_value;
+ for (unsigned j = 0; j < ncols; ++j)
+ {
+ for (unsigned i = 0; i < nrows; ++i)
+ {
+ const mln_value(L)& current_pixel = input.at_(i, j);
- i = k;
- last_pixel_value = 0;
+ if (!current_pixel)
+ {
+ if (last_pixel_value)
+ {
+ unsigned k = i + 1;
+ for (; !(input.at_(k, j)) && (k < nrows); ++k);
+
+ count = k - i;
+ const int threshold = deltas(last_pixel_value);
+
+ if (last_pixel_value == input.at_(k, j)
+ && count < threshold)
+ for (unsigned l = i; l <= k; ++l)
+ output.at_(l, j) = last_pixel_value;
+
+ i = k;
+ last_pixel_value = 0;
+ }
+ }
+ else
+ {
+ output.at_(i, j) = current_pixel;
+ last_pixel_value = current_pixel;
}
- }
- else
- {
- output.at_(i, j) = current_pixel;
- last_pixel_value = current_pixel;
}
}
}
- }
- template< typename V >
- inline
- void CRLA(const image2d<V>& input,
- image2d<V>& output,
- const mln::util::array<int>& deltas,
- const mln::util::array<int>& deltas_factor)
- {
- horizontal_CRLA(input, output, deltas_factor);
- vertical_CRLA(output, output, deltas);
- horizontal_CRLA(output, output, deltas_factor);
- }
+ template<typename L>
+ inline
+ void CRLA(const Image<L>& input,
+ Image<L>& output,
+ const mln::util::array<int>& deltas,
+ const mln::util::array<int>& deltas_factor)
+ {
+ horizontal_CRLA(input, output, deltas_factor);
+
+ debug::logger().log_image(debug::AuxiliaryResults,
+ output,
+ "paragraph_closing_horizontal_CRLA");
+
+
+ vertical_CRLA(output, output, deltas);
+
+ debug::logger().log_image(debug::AuxiliaryResults,
+ output,
+ "paragraph_closing_vertical_CRLA");
+
+ horizontal_CRLA(output, output, deltas_factor);
+ }
+
+ } // end of namespace scribo::text::internal
+
- template< typename L, typename V >
- void
- paragraphs_closing(image2d<V>& output,
- const paragraph_set<L>& par_set,
- const box2d& domain)
+ template<typename L>
+ mln_concrete(L)
+ paragraphs_closing(const paragraph_set<L>& parset)
{
trace::entering("scribo::text::paragraphs_closing");
- image2d<V> debug(domain);
+ // FIXME: 'debug' may be useless.
+ mln_concrete(L) output, debug;
+ initialize(output, parset.lines().components().labeled_image());
+ initialize(debug, output);
- mln::util::array<int> deltas;
- deltas.reserve(par_set.nelements());
- mln::util::array<int> deltas_factor;
- deltas_factor.reserve(par_set.nelements());
+ mln::util::array<int> deltas(parset.nelements() + 1, 0);
+ mln::util::array<int> deltas_factor(parset.nelements() + 1, 0);
data::fill(debug, 0);
data::fill(output, 0);
- const line_set<L>& lines = par_set.lines();
+ const line_set<L>& lines = parset.lines();
- for_all_paragraphs(p, par_set)
- {
- const paragraph_info<L>& current_par = par_set(p);
- const mln::util::array<line_id_t>& line_ids = current_par.line_ids();
- const unsigned nelements = line_ids.nelements();
-
- for (unsigned i = 0; i < nelements; ++i)
+ for_all_paragraphs(p, parset)
+ if (parset(p).is_valid())
{
- const line_id_t& line_id = line_ids(i);
- const line_info<L>& current_line = lines(line_id);
+ const paragraph_info<L>& current_par = parset(p);
+ const mln::util::array<line_id_t>& line_ids = current_par.line_ids();
- draw::line_components(debug, current_line, p);
- }
+ line_id_t last_id = line_ids[0];
+ for_all_elements(i, line_ids)
+ {
+ const line_id_t& line_id = line_ids(i);
+ const line_info<L>& current_line = lines(line_id);
+
+ scribo::draw::line_components(debug, current_line, p);
+
+ // HACK DISCLAIMER : this line is drawn in order to be
+ // sure that every line will be reduced to a single
+ // component after closing. It is necessary to reduce a
+ // paragraph to one component in order to extract its
+ // outline correctly for xml/debug output.
+ component_id_t last_comp = lines(line_id).component_ids()(0);
+ for_all_elements(i, lines(line_id).component_ids())
+ {
+ const unsigned c = lines(line_id).component_ids()(i);
+ mln::draw::line(debug,
+ lines.components()(c).mass_center(),
+ lines.components()(last_comp).mass_center(),
+ p);
+ last_comp = c;
+ }
- int delta_baseline = current_par.delta_baseline();
+ // mln::draw::line(debug, current_line.bbox().pcenter(),
lines(last_id).bbox().pcenter(), p);
+ // last_id = line_id;
+ }
- if (delta_baseline % 2 == 0)
+ int delta_baseline = current_par.delta_baseline();
+
+ if (delta_baseline % 2 == 0)
--delta_baseline;
- deltas.append(delta_baseline);
- deltas_factor.append(3 * delta_baseline);
- }
- CRLA(debug, output, deltas, deltas_factor);
+ deltas(p) = 2 * delta_baseline; // Vertical
+ deltas_factor(p) = 3 * delta_baseline; // Horizontal
+ }
+
+ debug::logger().log_image(debug::AuxiliaryResults,
+ debug,
+ "paragraph_closing_input_CRLA");
+
+ internal::CRLA(debug, output, deltas, deltas_factor);
+
+ debug::logger().log_image(debug::Results,
+ output,
+ "paragraph_closing");
- trace::exiting("scribo::draw::line_components");
+ trace::exiting("scribo::text::paragraphs_closing");
+ return output;
}
# endif
diff --git a/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh
b/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh
index e0c5b50..24d24a3 100644
--- a/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh
+++ b/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh
@@ -53,8 +53,10 @@
# include <scribo/filter/objects_small.hh>
# include <scribo/filter/paragraphs_bbox_overlap.hh>
# include <scribo/filter/paragraphs_in_image.hh>
+# include <scribo/filter/paragraphs_in_borders.hh>
# include <scribo/filter/separators_in_element.hh>
# include <scribo/filter/separators_in_paragraph.hh>
+# include <scribo/filter/separators_in_borders.hh>
# include <scribo/filter/images_in_paragraph.hh>
# include <scribo/primitive/group/from_single_link.hh>
@@ -66,6 +68,8 @@
# include <scribo/preprocessing/denoise_fg.hh>
+# include <scribo/postprocessing/images_to_drop_capital.hh>
+
# include <scribo/text/recognition.hh>
# include <scribo/text/merging.hh>
# include <scribo/text/link_lines.hh>
@@ -84,6 +88,7 @@
# include <scribo/io/xml/save.hh>
+#include <scribo/io/img/save.hh>
namespace scribo
{
@@ -201,12 +206,22 @@ namespace scribo
// Vertical and horizontal separators
{
+ unsigned closing_size = std::min(0.01 * doc.image().domain().width(),
+ 0.01 * doc.image().domain().height());
+ win::hline2d hl(closing_size);
+
+ // Apply a closing::structural in order to disconnected
+ // parts of a single separator.
mln_ch_value(I,bool)
vseparators = preprocessing::rotate_90(
- primitive::extract::lines_h_thick_and_thin(
- preprocessing::rotate_90(processed_image), 101, 3, 0.2, 0.6, 10), false),
- hseparators = primitive::extract::lines_h_thick_and_thin(
- processed_image, 101, 3);
+ morpho::closing::structural(
+ primitive::extract::lines_h_thick_and_thin(
+ preprocessing::rotate_90(processed_image),
+ 101, 3, 0.2, 0.6, 10), hl), false),
+
+ hseparators = morpho::closing::structural(
+ primitive::extract::lines_h_thick_and_thin(
+ processed_image, 101, 3), hl);
doc.set_vline_separators(vseparators);
doc.set_hline_separators(hseparators);
@@ -509,9 +524,11 @@ namespace scribo
on_new_progress_label("Filtering paragraphs");
- parset = filter::paragraphs_bbox_overlap(parset);
+ paragraph_set<L> parset_f = filter::paragraphs_bbox_overlap(parset);
+ doc.set_paragraphs(parset_f);
- doc.set_paragraphs(parset);
+ // parset = filter::paragraphs_bbox_overlap(parset);
+ // doc.set_paragraphs(parset);
on_progress();
@@ -540,16 +557,38 @@ namespace scribo
on_progress();
+// TEMPORARY DEBUG
+ on_new_progress_label("Saving debug data");
+ doc.set_paragraphs(parset);
+ scribo::io::img::save(doc, "debug_wo_filter.png",
scribo::io::img::DebugWoImage);
+ scribo::io::img::save(doc, "full_wo_filter.png",
scribo::io::img::DebugWithImage);
+ doc.set_paragraphs(parset_f);
+ on_progress();
+// END OF TEMPORARY DEBUG
+
on_new_progress_label("Cleanup miscellaneous false positive");
filter::separators_in_element(doc);
- filter::separators_in_paragraph(doc);
+ filter::separators_in_paragraph(doc, 81, 121);
+ filter::separators_in_borders(doc, 0.05, 0.02);
+
filter::paragraphs_in_image(doc);
- filter::images_in_paragraph(doc);
+ filter::paragraphs_in_borders(doc);
on_progress();
+ on_new_progress_label("Rebuild extracted images");
+ elements = scribo::primitive::extract::non_text_hdoc(doc, closing_size);
+ doc.set_elements(elements);
+
+ on_progress();
+
+ on_new_progress_label("Tag images as drop capital");
+
+ postprocessing::images_to_drop_capital(doc);
+
+ on_progress();
// Saving results
if (save_doc_as_xml)
@@ -564,6 +603,9 @@ namespace scribo
on_end();
+
+ sleep(10);
+
return doc;
}
diff --git a/scribo/scribo/util/box_is_included.hh
b/scribo/scribo/util/box_is_included.hh
new file mode 100644
index 0000000..dc3f791
--- /dev/null
+++ b/scribo/scribo/util/box_is_included.hh
@@ -0,0 +1,74 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef SCRIBO_UTIL_BOX_IS_INCLUDED_HH
+# define SCRIBO_UTIL_BOX_IS_INCLUDED_HH
+
+/// \file
+///
+/// Check whether a box is included in another one.
+
+
+#include <mln/core/site_set/box.hh>
+
+namespace scribo
+{
+
+ namespace util
+ {
+ using namespace mln;
+
+ /// \brief Check whether a box is included in another one.
+ ///
+ /// \return true if \p lhs is included in \p rhs.
+ //
+ template <typename P>
+ bool
+ box_is_included(const box<P>& lhs, const box<P>& rhs);
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+ template <typename P>
+ bool
+ box_is_included(const box<P>& lhs, const box<P>& rhs)
+ {
+ trace::entering("scribo::util::box_is_included");
+
+ for (unsigned i = 0; i < P::dim; ++i)
+ if (!(lhs.pmin()[i] >= rhs.pmin()[i] && lhs.pmax()[i] <= rhs.pmax()[i]))
+ return false;
+
+ trace::exiting("scribo::util::box_is_included");
+ return true;
+ }
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::util
+
+} // end of namespace scribo
+
+#endif // ! SCRIBO_UTIL_BOX_IS_INCLUDED_HH
diff --git a/scribo/scribo/util/component_precise_outline.hh
b/scribo/scribo/util/component_precise_outline.hh
index 490b814..70fc995 100644
--- a/scribo/scribo/util/component_precise_outline.hh
+++ b/scribo/scribo/util/component_precise_outline.hh
@@ -40,9 +40,15 @@
# include <mln/io/ppm/save.hh>
# include <mln/data/convert.hh>
# include <mln/opt/at.hh>
+# include <mln/extension/fill.hh>
# include <iostream>
+#include <mln/io/pgm/save.hh>
+#include <mln/data/wrap.hh>
+#include <mln/data/convert.hh>
+
+
namespace scribo
{
@@ -79,7 +85,8 @@ namespace scribo
template <typename I>
void
find_first_point(const I& input,
- point2d& p)
+ point2d& p,
+ const mln_value(I)& id)
{
const mln::def::coord
mid_row = geom::min_row(input) + (geom::nrows(input) >> 1);
@@ -87,7 +94,7 @@ namespace scribo
for (mln::def::coord i = geom::min_col(input);
i <= geom::max_col(input); ++i)
{
- if (opt::at(input, mid_row, i))
+ if (opt::at(input, mid_row, i) == id)
{
p.row() = mid_row;
p.col() = i;
@@ -100,14 +107,15 @@ namespace scribo
void
left_up(int& direction,
const I& input,
- const point2d& cur_pt)
+ const point2d& cur_pt,
+ const mln_value(I)& id)
{
const point2d p2(cur_pt.row() + offset[direction][5][1],
cur_pt.col() + offset[direction][5][0]);
const point2d p3(cur_pt.row() + offset[direction][7][1],
cur_pt.col() + offset[direction][7][0]);
- if (!input(p2) && input(p3))
+ if ((input(p2) != id) && (input(p3) == id))
{
direction = 3;
return;
@@ -130,7 +138,8 @@ namespace scribo
void
right_up(int& direction,
const I& input,
- const point2d& cur_pt)
+ const point2d& cur_pt,
+ const mln_value(I)& id)
{
const point2d p1(cur_pt.row() + offset[direction][0][1],
cur_pt.col() + offset[direction][0][0]);
@@ -139,7 +148,7 @@ namespace scribo
const point2d p3(cur_pt.row() + offset[direction][7][1],
cur_pt.col() + offset[direction][7][0]);
- if (!input(p2) && (input(p1) || input(p3)))
+ if ((input(p2) != id) && ((input(p1) == id) || (input(p3) == id)))
{
direction = 0;
return;
@@ -162,14 +171,15 @@ namespace scribo
void
right_down(int& direction,
const I& input,
- const point2d& cur_pt)
+ const point2d& cur_pt,
+ const mln_value(I)& id)
{
const point2d p2(cur_pt.row() + offset[direction][5][1],
cur_pt.col() + offset[direction][5][0]);
const point2d p3(cur_pt.row() + offset[direction][7][1],
cur_pt.col() + offset[direction][7][0]);
- if (!input(p2) && input(p3))
+ if ((input(p2) != id) && (input(p3) == id))
{
direction = 1;
return;
@@ -192,7 +202,8 @@ namespace scribo
void
left_down(int& direction,
const I& input,
- const point2d& cur_pt)
+ const point2d& cur_pt,
+ const mln_value(I)& id)
{
const point2d p1(cur_pt.row() + offset[direction][0][1],
cur_pt.col() + offset[direction][0][0]);
@@ -201,7 +212,7 @@ namespace scribo
const point2d p3(cur_pt.row() + offset[direction][7][1],
cur_pt.col() + offset[direction][7][0]);
- if (!input(p2) && (input(p1) || input(p3)))
+ if ((input(p2) != id) && ((input(p1) == id) || (input(p3) == id)))
{
direction = 2;
return;
@@ -225,17 +236,18 @@ namespace scribo
void
find_next_point(const I& input,
point2d& cur_pt,
- int& direction)
+ int& direction,
+ const mln_value(I)& id)
{
unsigned i = 0;
point2d tmp;
switch (direction)
{
- case 0: left_up(direction, input, cur_pt); break;
- case 1: right_up(direction , input, cur_pt); break;
- case 2: right_down(direction, input, cur_pt); break;
- case 3: left_down(direction, input, cur_pt); break;
+ case 0: left_up(direction, input, cur_pt, id); break;
+ case 1: right_up(direction , input, cur_pt, id); break;
+ case 2: right_down(direction, input, cur_pt, id); break;
+ case 3: left_down(direction, input, cur_pt, id); break;
}
for (; i < 8; ++i)
@@ -243,7 +255,7 @@ namespace scribo
tmp = point2d(cur_pt.row() + offset[direction][i][1],
cur_pt.col() + offset[direction][i][0]);
- if (input.domain().has(tmp) && input(tmp))
+ if (input(tmp) == id)
break;
}
@@ -263,7 +275,7 @@ namespace scribo
}
void
- filter_points(mln::p_array<point2d>& points,
+ filter_points(const mln::p_array<point2d>& points,
mln::p_array<point2d>& waypoints)
{
const unsigned nelements = points.nsites();
@@ -330,33 +342,35 @@ namespace scribo
template <typename I>
mln::p_array<point2d>
- component_precise_outline(const Image<I>& input_)
+ component_precise_outline(const Image<I>& input_, const mln_value(I)&
id)
{
trace::entering("scribo::util::component_precise_outline");
const I& input = exact(input_);
typedef mln_site(I) P;
- point2d start_pt;
- int direction = 0;
+ extension::fill(input, 0);
+
mln::p_array<P> points;
points.reserve(std::max(geom::ncols(input), geom::nrows(input)));
- internal::find_first_point(input, start_pt);
+ point2d start_pt;
+ int direction = 0;
+
+ internal::find_first_point(input, start_pt, id);
P cur_pt = start_pt;
- internal::find_next_point(input, cur_pt, direction);
+ internal::find_next_point(input, cur_pt, direction, id);
points.append(cur_pt);
while (cur_pt != start_pt)
{
- internal::find_next_point(input, cur_pt, direction);
+ internal::find_next_point(input, cur_pt, direction, id);
points.append(cur_pt);
}
-
- internal::find_next_point(input, cur_pt, direction);
+ internal::find_next_point(input, cur_pt, direction, id);
const std::vector<point2d>& vec_points = points.hook_std_vector_();
@@ -367,16 +381,27 @@ namespace scribo
while (cur_pt != start_pt)
{
- internal::find_next_point(input, cur_pt, direction);
+ internal::find_next_point(input, cur_pt, direction, id);
points.append(cur_pt);
}
}
- // mln::p_array<P> waypoints;
- // internal::filter_points(points, waypoints);
+ std::cout << "Before filter points - " << points.nsites()
<< std::endl;
+
+ mln::p_array<P> waypoints;
+ internal::filter_points(points, waypoints);
+
+ std::cout << "After filter points - " << waypoints.nsites()
<< std::endl;
trace::exiting("scribo::util::component_precise_outline");
- return points;
+ return waypoints;
+ }
+
+ template <typename I>
+ mln::p_array<point2d>
+ component_precise_outline(const Image<I>& input)
+ {
+ return component_precise_outline(input, true);
}
# endif // ! MLN_INCLUDE_ONLY
--
1.5.6.5