---
scribo/ChangeLog | 4 +
scribo/scribo/primitive/extract/elements.hh | 227 +++++++++++++++++++++++++++
2 files changed, 231 insertions(+), 0 deletions(-)
create mode 100644 scribo/scribo/primitive/extract/elements.hh
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index 9c65829..e738a8a 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,5 +1,9 @@
2010-11-15 Guillaume Lazzara <z(a)lrde.epita.fr>
+ * scribo/primitive/extract/elements.hh: New routine.
+
+2010-11-15 Guillaume Lazzara <z(a)lrde.epita.fr>
+
* scribo/primitive/extract/separators_nonvisible.hh: New routine.
2010-11-15 Guillaume Lazzara <z(a)lrde.epita.fr>
diff --git a/scribo/scribo/primitive/extract/elements.hh
b/scribo/scribo/primitive/extract/elements.hh
new file mode 100644
index 0000000..c083988
--- /dev/null
+++ b/scribo/scribo/primitive/extract/elements.hh
@@ -0,0 +1,227 @@
+// Copyright (C) 2010 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+/// \file
+///
+/// \brief Find in a document elements which are not text.
+///
+/// \fixme To be optimized!
+
+#ifndef SCRIBO_PRIMITIVE_EXTRACT_ELEMENTS_HH
+# define SCRIBO_PRIMITIVE_EXTRACT_ELEMENTS_HH
+
+# include <mln/core/image/image2d.hh>
+# include <mln/data/fill.hh>
+# include <mln/util/array.hh>
+# include <mln/labeling/compute.hh>
+# include <mln/labeling/relabel.hh>
+# include <mln/accu/math/count.hh>
+# include <mln/pw/all.hh>
+
+# include <mln/value/label_8.hh>
+# include <mln/value/rgb8.hh>
+
+# include <scribo/core/macros.hh>
+# include <scribo/core/component_set.hh>
+# include <scribo/core/line_set.hh>
+# include <scribo/filter/objects_small.hh>
+
+#include <mln/clustering/kmean_rgb.hh>
+#include <mln/fun/v2v/rgb8_to_rgbn.hh>
+
+namespace scribo
+{
+
+ namespace primitive
+ {
+
+ namespace extract
+ {
+
+
+ template <typename L, typename I>
+ component_set<L>
+ elements(const document<L>& doc, const Image<I>& input);
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+
+ namespace internal
+ {
+
+ template <typename L>
+ struct order_bbox
+ {
+ order_bbox(const scribo::component_set<L>& comps)
+ : comps_(comps)
+ {
+ }
+
+ bool operator()(const unsigned& c1, const unsigned& c2) const
+ {
+ if (comps_(c1).bbox().nsites() == comps_(c2).bbox().nsites())
+ return c1 > c2;
+ return comps_(c1).bbox().nsites() > comps_(c2).bbox().nsites();
+ }
+
+ scribo::component_set<L> comps_;
+ };
+
+ } // end of namespace scribo::primitive::extract::internal
+
+
+
+ // FACADE
+
+ template <typename L, typename I>
+ component_set<L>
+ elements(const document<L>& doc, const Image<I>& input_)
+ {
+ trace::entering("scribo::primitive::extract::elements");
+
+ const I& input = exact(input_);
+ mln_precondition(doc.is_valid());
+ mln_precondition(input.is_valid());
+
+ const line_set<L>& lines = doc.text();
+
+ // Element extraction
+
+ image2d<value::label_8> img_lbl8;
+ {
+ image2d<bool> content;
+ initialize(content, input);
+ data::fill(content, true);
+
+ for_all_lines(l, lines)
+ if (lines(l).type() == line::Text)
+ data::fill((content | lines(l).bbox()).rw(), false);
+
+ typedef mln::value::rgb<5> t_rgb5;
+ typedef mln::fun::v2v::rgb8_to_rgbn<5> t_rgb8_to_rgb5;
+
+ image2d<t_rgb5>
+ img_rgb5 = mln::data::transform(doc.image(), t_rgb8_to_rgb5());
+
+ img_lbl8 =
+ mln::clustering::kmean_rgb<double,5>((img_rgb5 | pw::value(content)), 3, 10,
10).unmorph_();
+ data::fill((img_lbl8 | !pw::value(content)).rw(), 0u);
+
+ mln::util::array<unsigned>
+ card = mln::labeling::compute(accu::math::count<value::label_8>(),
+ img_lbl8, img_lbl8, 3);
+
+ unsigned max = 0, bg_id = 0;
+ for_all_ncomponents(c, 3)
+ if (card(c) > max)
+ {
+ max = card(c);
+ bg_id = c;
+ }
+
+ mln::fun::i2v::array<bool> f(4, true);
+ f(0) = false;
+ f(bg_id) = false;
+ labeling::relabel_inplace(img_lbl8, 4, f);
+ }
+
+
+ component_set<L> output;
+
+ std::cout << "Removing small elements" << std::endl;
+ {
+ image2d<bool> elts;
+ initialize(elts, img_lbl8);
+ data::fill(elts, false);
+ data::fill((elts | (pw::value(img_lbl8) != pw::cst(0))).rw(), true);
+
+ scribo::def::lbl_type nlabels;
+ elts = filter::components_small(elts, c8(), nlabels, 40);
+
+ output = primitive::extract::components(elts, c8(), nlabels);
+ }
+
+
+ std::cout << "Ignoring inner elements" << std::endl;
+
+ {
+ // FIXME: We would like to use the convex hull instead of the bbox.
+ internal::order_bbox<L> func(output);
+ util::array<unsigned> box_ordered_comps;
+ for (unsigned i = 1; i < output.nelements(); ++i)
+ box_ordered_comps.append(i);
+ std::sort(box_ordered_comps.hook_std_vector_().begin(),
+ box_ordered_comps.hook_std_vector_().end(), func);
+
+ image2d<bool> merged_elts;
+ initialize(merged_elts, img_lbl8);
+ data::fill(merged_elts, false);
+ for (unsigned i = 0; i < box_ordered_comps.nelements(); ++i)
+ {
+ unsigned c = box_ordered_comps(i);
+ point2d
+ pminright = output(c).bbox().pmin(),
+ pmaxleft = output(c).bbox().pmax();
+ pminright.col() = output(c).bbox().pmax().col();
+ pmaxleft.col() = output(c).bbox().pmin().col();
+
+ if (merged_elts(output(c).bbox().pmin())
+ && merged_elts(output(c).bbox().pmax())
+ && merged_elts(pminright)
+ && merged_elts(pmaxleft))
+ output(c).update_tag(component::Ignored);
+ else
+ mln::draw::box_plain(merged_elts, output(c).bbox(), true);
+ }
+ }
+
+// mln::io::pbm::save(merged_elts, "merged_elts.pbm");
+
+// mln::util::array<image2d<value::rgb8> > elt_ima;
+// unsigned i = 0;
+// for_all_comps(c, elt_comp)
+// if (elt_comp(c).is_valid())
+// {
+// elt_ima.append(preprocessing::crop(doc.image(), elt_comp(c).bbox()));
+// mln::io::ppm::save(elt_ima(i), mln::debug::filename("elt.ppm", i));
+// ++i;
+// }
+
+
+ trace::exiting("scribo::primitive::extract::elements");
+ return output;
+ }
+
+# endif // ! MLN_INCLUDE_ONLY
+
+
+ } // end of namespace scribo::primitive::extract
+
+ } // end of namespace scribo::primitive
+
+} // end of namespace scribo
+
+#endif // ! SCRIBO_PRIMITIVE_EXTRACT_ELEMENTS_HH
--
1.5.6.5