* scribo/layout/docstrum.hh,
* scribo/layout/internal/docstrum/all.hh,
* scribo/layout/internal/docstrum/bbox2d.hh,
* scribo/layout/internal/docstrum/between_closure.hh,
* scribo/layout/internal/docstrum/components_size.hh,
* scribo/layout/internal/docstrum/distances.hh,
* scribo/layout/internal/docstrum/extract_lines.hh,
* scribo/layout/internal/docstrum/extract_paragraphs.hh,
* scribo/layout/internal/docstrum/kd_hyperrect.hh,
* scribo/layout/internal/docstrum/kd_node.hh,
* scribo/layout/internal/docstrum/kd_tree.hh,
* scribo/layout/internal/docstrum/labeling.hh,
* scribo/layout/internal/docstrum/line.hh,
* scribo/layout/internal/docstrum/nearest_neighbors.hh,
* scribo/layout/internal/docstrum/paragraph.hh,
* scribo/layout/internal/docstrum/within_closure.hh,
* tests/layout/docstrum-line.pgm,
* tests/layout/docstrum-paragraph.pgm,
* tests/layout/docstrum.cc: New.
* tests/layout/Makefile.am: New target and extra dist files.
---
scribo/ChangeLog | 26 ++
scribo/scribo/layout/docstrum.hh | 256 +++++++++++
scribo/scribo/layout/internal/docstrum/all.hh | 43 ++
scribo/scribo/layout/internal/docstrum/bbox2d.hh | 161 +++++++
.../layout/internal/docstrum/between_closure.hh | 353 +++++++++++++++
.../layout/internal/docstrum/components_size.hh | 181 ++++++++
.../scribo/layout/internal/docstrum/distances.hh | 167 +++++++
.../layout/internal/docstrum/extract_lines.hh | 128 ++++++
.../layout/internal/docstrum/extract_paragraphs.hh | 116 +++++
.../layout/internal/docstrum/kd_hyperrect.hh | 210 +++++++++
scribo/scribo/layout/internal/docstrum/kd_node.hh | 182 ++++++++
scribo/scribo/layout/internal/docstrum/kd_tree.hh | 472 ++++++++++++++++++++
scribo/scribo/layout/internal/docstrum/labeling.hh | 347 ++++++++++++++
scribo/scribo/layout/internal/docstrum/line.hh | 229 ++++++++++
.../layout/internal/docstrum/nearest_neighbors.hh | 265 +++++++++++
.../scribo/layout/internal/docstrum/paragraph.hh | 126 ++++++
.../layout/internal/docstrum/within_closure.hh | 175 ++++++++
scribo/tests/layout/Makefile.am | 11 +-
scribo/tests/layout/docstrum-line.pgm | Bin 0 -> 3118 bytes
scribo/tests/layout/docstrum-paragraph.pgm | Bin 0 -> 3118 bytes
scribo/tests/layout/docstrum.cc | 64 +++
21 files changed, 3509 insertions(+), 3 deletions(-)
create mode 100644 scribo/scribo/layout/docstrum.hh
create mode 100644 scribo/scribo/layout/internal/docstrum/all.hh
create mode 100644 scribo/scribo/layout/internal/docstrum/bbox2d.hh
create mode 100644 scribo/scribo/layout/internal/docstrum/between_closure.hh
create mode 100644 scribo/scribo/layout/internal/docstrum/components_size.hh
create mode 100644 scribo/scribo/layout/internal/docstrum/distances.hh
create mode 100644 scribo/scribo/layout/internal/docstrum/extract_lines.hh
create mode 100644 scribo/scribo/layout/internal/docstrum/extract_paragraphs.hh
create mode 100644 scribo/scribo/layout/internal/docstrum/kd_hyperrect.hh
create mode 100644 scribo/scribo/layout/internal/docstrum/kd_node.hh
create mode 100644 scribo/scribo/layout/internal/docstrum/kd_tree.hh
create mode 100644 scribo/scribo/layout/internal/docstrum/labeling.hh
create mode 100644 scribo/scribo/layout/internal/docstrum/line.hh
create mode 100644 scribo/scribo/layout/internal/docstrum/nearest_neighbors.hh
create mode 100644 scribo/scribo/layout/internal/docstrum/paragraph.hh
create mode 100644 scribo/scribo/layout/internal/docstrum/within_closure.hh
create mode 100644 scribo/tests/layout/docstrum-line.pgm
create mode 100644 scribo/tests/layout/docstrum-paragraph.pgm
create mode 100644 scribo/tests/layout/docstrum.cc
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index 90de65c..878d729 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,3 +1,29 @@
+2013-05-02 Guillaume Lazzara <z(a)lrde.epita.fr>
+
+ Add Docstrum document layout analysis algorithm.
+
+ * scribo/layout/docstrum.hh,
+ * scribo/layout/internal/docstrum/all.hh,
+ * scribo/layout/internal/docstrum/bbox2d.hh,
+ * scribo/layout/internal/docstrum/between_closure.hh,
+ * scribo/layout/internal/docstrum/components_size.hh,
+ * scribo/layout/internal/docstrum/distances.hh,
+ * scribo/layout/internal/docstrum/extract_lines.hh,
+ * scribo/layout/internal/docstrum/extract_paragraphs.hh,
+ * scribo/layout/internal/docstrum/kd_hyperrect.hh,
+ * scribo/layout/internal/docstrum/kd_node.hh,
+ * scribo/layout/internal/docstrum/kd_tree.hh,
+ * scribo/layout/internal/docstrum/labeling.hh,
+ * scribo/layout/internal/docstrum/line.hh,
+ * scribo/layout/internal/docstrum/nearest_neighbors.hh,
+ * scribo/layout/internal/docstrum/paragraph.hh,
+ * scribo/layout/internal/docstrum/within_closure.hh,
+ * tests/layout/docstrum-line.pgm,
+ * tests/layout/docstrum-paragraph.pgm,
+ * tests/layout/docstrum.cc: New.
+
+ * tests/layout/Makefile.am: New target and extra dist files.
+
2013-04-26 Guillaume Lazzara <z(a)lrde.epita.fr>
* scribo/binarization/sauvola_ms_split.hh: Make use of all arguments.
diff --git a/scribo/scribo/layout/docstrum.hh b/scribo/scribo/layout/docstrum.hh
new file mode 100644
index 0000000..8291861
--- /dev/null
+++ b/scribo/scribo/layout/docstrum.hh
@@ -0,0 +1,256 @@
+// Copyright (C) 2013 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+/// \file
+///
+/// \brief Docstrum layout analysis algorithm.
+
+#ifndef SCRIBO_LAYOUT_DOCSTRUM_HH
+# define SCRIBO_LAYOUT_DOCSTRUM_HH
+
+#include <iostream>
+
+#include <mln/io/pbm/load.hh>
+#include <mln/io/ppm/save.hh>
+#include <mln/value/rgb8.hh>
+#include <mln/value/int_u8.hh>
+#include <mln/util/timer.hh>
+#include <mln/data/convert.hh>
+#include <mln/draw/box.hh>
+#include <mln/literal/colors.hh>
+#include <mln/core/image/image2d.hh>
+
+
+#include <scribo/layout/internal/docstrum/all.hh>
+
+using namespace mln;
+
+namespace scribo
+{
+
+ namespace layout
+ {
+
+ using namespace mln;
+
+ /*!
+ \brief Extraction type for Docstrum algorithm.
+
+ Defines how the components should be grouped in Docstrum
+ algorithm.
+
+ \sa scribo::docstrum
+ */
+ enum docstrum_extraction_type
+ {
+ Line, ///< Group by lines
+ Paragraph ///< Group by paragraphs.
+ };
+
+
+ /*!
+ \brief Docstrum layout analysis algorithm.
+
+ \param[in] ima A binary 2D document image.
+
+ \param[in] extraction_type how the components should be grouped
+ in results.
+
+ \return An image of Paragraph or lines bounding boxes (w.r.t to
+ \p extraction_type). Small elements are set to 1 and large ones
+ to 2.
+
+ This algorithm has been implemented w.r.t. the description in
+ paper \cite ogorman1993pami.
+
+ \ingroup grpalgolayout
+ */
+ template <typename I>
+ mln_ch_value(I,value::int_u8)
+ docstrum(const Image<I>& ima, docstrum_extraction_type extraction_type);
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+ using namespace scribo::layout::internal::docstrum;
+
+ template <typename I>
+ mln_ch_value(I,value::int_u8)
+ docstrum(const Image<I>& ima_, docstrum_extraction_type extraction_type)
+ {
+ mln_trace("scribo::layout::docstrum");
+
+ const I& ima = exact(ima_);
+ mln_precondition(ima.is_valid());
+ mln_precondition(extraction_type == Line
+ || extraction_type == Paragraph);
+ mlc_equal(mln_trait_image_kind(I),
+ mln::trait::image::kind::binary)::check();
+
+ //-------------------------
+ // Component labeling
+ //-------------------------
+
+ std::vector<bbox2d> bbox_list;
+ image2d<unsigned> labeled_img(ima.domain());
+ data::fill(labeled_img, 0);
+ unsigned nlabels = 0;
+
+ component_labeling(ima, nlabels, bbox_list, labeled_img, 3);
+
+ //-------------------------
+ // Separation in two groups
+ //-------------------------
+
+ std::vector<bbox2d> small_comp;
+ std::vector<bbox2d> big_comp;
+ small_comp.reserve(nlabels / 2);
+ big_comp.reserve(nlabels / 2);
+
+ compute_comp_size(bbox_list, small_comp, big_comp, 3.0f);
+ bbox_list.clear();
+
+ //-------------------------
+ // Extract lines from small components
+ //-------------------------
+
+ std::vector<line> small_lines;
+ int small_within_distance = 0;
+ int small_between_distance = 0;
+ extract_lines(small_comp, small_lines, small_within_distance,
+ small_between_distance, math::pi / 6, math::pi / 6, 5);
+
+ //-------------------------
+ // Extract lines from big components
+ //-------------------------
+
+ std::vector<line> big_lines;
+ int big_within_distance = 0;
+ int big_between_distance = 0;
+ extract_lines(big_comp, big_lines, big_within_distance,
+ big_between_distance, math::pi / 6, math::pi / 6, 5);
+
+ //-------------------------
+ // Extract paragraphs if requested
+ //-------------------------
+
+ std::vector<paragraph> small_paragraphs;
+ std::vector<paragraph> big_paragraphs;
+
+ if (extraction_type == Paragraph)
+ {
+ //-------------------------
+ // Extract paragraphs from small components
+ //-------------------------
+
+ extract_paragraphs(small_lines, small_paragraphs, 1.5f *
+ small_within_distance, 1.3f *
+ small_between_distance, math::pi / 6);
+
+ //-------------------------
+ // Extract paragraphs from big components
+ //-------------------------
+
+ extract_paragraphs(big_lines, big_paragraphs, 1.5f *
+ big_within_distance, 2.0f *
+ small_between_distance, math::pi / 6);
+ }
+
+
+ //-------------------------
+ // Saving
+ //-------------------------
+
+ mln_ch_value(I,value::int_u8) output(ima.domain());
+ data::fill(output, literal::zero);
+
+ if (extraction_type == Line)
+ {
+ {
+ const unsigned small_nlines = small_lines.size();
+
+ for (unsigned i = 0; i < small_nlines; ++i)
+ {
+ const bbox2d& b = small_lines[i].bbox();
+ box2d box(b.pmin(), b.pmax());
+
+ draw::box(output, box, 1u);
+ }
+ }
+
+ {
+ const unsigned big_nlines = big_lines.size();
+
+ for (unsigned i = 0; i < big_nlines; ++i)
+ {
+ const bbox2d& b = big_lines[i].bbox();
+ box2d box(b.pmin(), b.pmax());
+
+ draw::box(output, box, 2u);
+ }
+ }
+ }
+ else if (extraction_type == Paragraph)
+ {
+ {
+ const unsigned small_nparas = small_paragraphs.size();
+
+ for (unsigned i = 0; i < small_nparas; ++i)
+ {
+ const bbox2d& b = small_paragraphs[i].bbox();
+ box2d box(b.pmin(), b.pmax());
+
+ draw::box(output, box, 1u);
+ }
+ }
+
+ {
+ const unsigned big_nparas = big_paragraphs.size();
+
+ for (unsigned i = 0; i < big_nparas; ++i)
+ {
+ const bbox2d& b = big_paragraphs[i].bbox();
+ box2d box(b.pmin(), b.pmax());
+
+ draw::box(output, box, 2u);
+ }
+ }
+ }
+ else
+ {
+ mln_trace_warning("Invalid extraction type!");
+ abort();
+ }
+
+ return output;
+ }
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::layout
+
+} // end of namespace scribo
+
+#endif // ! SCRIBO_LAYOUT_DOCSTRUM_HH
diff --git a/scribo/scribo/layout/internal/docstrum/all.hh
b/scribo/scribo/layout/internal/docstrum/all.hh
new file mode 100644
index 0000000..0c83d1b
--- /dev/null
+++ b/scribo/scribo/layout/internal/docstrum/all.hh
@@ -0,0 +1,43 @@
+// Copyright (C) 2013 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+/// \file
+///
+/// \brief Internal utilities related to Docstrum layout analysis
+/// algorithm.
+
+#ifndef SCRIBO_LAYOUT_INTERNAL_DOCSTRUM_ALL_HH
+# define SCRIBO_LAYOUT_INTERNAL_DOCSTRUM_ALL_HH
+
+# include <scribo/layout/internal/docstrum/bbox2d.hh>
+# include <scribo/layout/internal/docstrum/labeling.hh>
+# include <scribo/layout/internal/docstrum/components_size.hh>
+# include <scribo/layout/internal/docstrum/distances.hh>
+# include <scribo/layout/internal/docstrum/line.hh>
+# include <scribo/layout/internal/docstrum/paragraph.hh>
+# include <scribo/layout/internal/docstrum/extract_lines.hh>
+# include <scribo/layout/internal/docstrum/extract_paragraphs.hh>
+
+#endif // ! SCRIBO_LAYOUT_INTERNAL_DOCSTRUM_ALL_HH
diff --git a/scribo/scribo/layout/internal/docstrum/bbox2d.hh
b/scribo/scribo/layout/internal/docstrum/bbox2d.hh
new file mode 100644
index 0000000..13efc07
--- /dev/null
+++ b/scribo/scribo/layout/internal/docstrum/bbox2d.hh
@@ -0,0 +1,161 @@
+// Copyright (C) 2013 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+/// \file
+///
+
+#ifndef SCRIBO_LAYOUT_INTERNAL_DOCSTRUM_BBOX2D_HH
+# define SCRIBO_LAYOUT_INTERNAL_DOCSTRUM_BBOX2D_HH
+
+# include <mln/core/alias/point2d.hh>
+
+
+namespace scribo
+{
+
+ namespace layout
+ {
+
+ namespace internal
+ {
+
+ namespace docstrum
+ {
+
+ using namespace mln;
+
+ class bbox2d
+ {
+ public:
+ bbox2d();
+
+ bbox2d(const unsigned n,
+ const point2d& pmin,
+ const point2d& pmax);
+
+ point2d& pmin();
+
+ const point2d& pmin() const;
+
+ point2d& pmax();
+
+ const point2d& pmax() const;
+
+ point2d pcenter() const;
+
+ unsigned bbox_number() const;
+
+ void set_bbox_number(const unsigned id);
+
+ private:
+ unsigned n_;
+ point2d pmin_;
+ point2d pmax_;
+ };
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+ //---------------------------
+ // Bounding box 2D
+ //---------------------------
+
+ inline
+ bbox2d::bbox2d()
+ : n_(0), pmin_(point2d(0, 0)), pmax_(point2d(0, 0))
+ {
+ }
+
+ inline
+ bbox2d::bbox2d(const unsigned n,
+ const point2d& pmin,
+ const point2d& pmax)
+ : n_(n), pmin_(pmin), pmax_(pmax)
+ {
+ }
+
+ inline
+ point2d&
+ bbox2d::pmin()
+ {
+ return pmin_;
+ }
+
+ inline
+ const point2d&
+ bbox2d::pmin() const
+ {
+ return pmin_;
+ }
+
+ inline
+ point2d&
+ bbox2d::pmax()
+ {
+ return pmax_;
+ }
+
+ inline
+ const point2d&
+ bbox2d::pmax() const
+ {
+ return pmax_;
+ }
+
+ inline
+ point2d
+ bbox2d::pcenter() const
+ {
+ point2d p(pmin_.row() + (pmax_.row() - pmin_.row()) / 2,
+ pmin_.col() + (pmax_.col() - pmin_.col()) / 2);
+
+ return p;
+ }
+
+ inline
+ unsigned
+ bbox2d::bbox_number() const
+ {
+ return n_;
+ }
+
+ inline
+ void
+ bbox2d::set_bbox_number(const unsigned id)
+ {
+ n_ = id;
+ }
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::layout::internal::docstrum
+
+ } // end of namespace scribo::layout::internal
+
+ } // end of namespace scribo::layout
+
+} // end of namespace scribo
+
+#endif // ! SCRIBO_LAYOUT_INTERNAL_DOCSTRUM_BBOX2D_HH
diff --git a/scribo/scribo/layout/internal/docstrum/between_closure.hh
b/scribo/scribo/layout/internal/docstrum/between_closure.hh
new file mode 100644
index 0000000..d466734
--- /dev/null
+++ b/scribo/scribo/layout/internal/docstrum/between_closure.hh
@@ -0,0 +1,353 @@
+// Copyright (C) 2013 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+/// \file
+///
+
+#ifndef SCRIBO_LAYOUT_INTERNAL_DOCSTRUM_BETWEEN_CLOSURE_HH
+# define SCRIBO_LAYOUT_INTERNAL_DOCSTRUM_BETWEEN_CLOSURE_HH
+
+# include <vector>
+
+# include <scribo/layout/internal/docstrum/line.hh>
+# include <scribo/layout/internal/docstrum/paragraph.hh>
+
+
+namespace scribo
+{
+
+ namespace layout
+ {
+
+ namespace internal
+ {
+
+ namespace docstrum
+ {
+
+ using namespace mln;
+
+
+ float
+ compute_lines_angle(const line& l1, const line& l2);
+
+
+ void
+ project_point(const line& destination,
+ const point2d& p,
+ point2d& projection);
+
+
+ float
+ compute_parallel_distance(const line& origin,
+ const line& destination,
+ point2d& p_center);
+
+ float
+ point_to_line_distance(const float slope,
+ const float y_intercept,
+ const point2d& p_center);
+
+
+ void
+ between_closure(const std::vector<line>& lines,
+ std::vector<paragraph>& paragraphs,
+ const std::vector< std::vector<unsigned> >& clusters,
+ const float within_distance,
+ const float between_distance,
+ const float theta);
+
+# ifndef MLN_INCLUDE_ONLY
+
+ //-------------------------
+ // Compute angle between two lines
+ //-------------------------
+
+ inline
+ float compute_lines_angle(const line& l1,
+ const line& l2)
+ {
+ const point2d& l1_begin = l1.line_begin();
+ const point2d& l1_end = l1.line_end();
+ const point2d& l2_begin = l2.line_begin();
+ const point2d& l2_end = l2.line_end();
+
+ const float delta_y_l1 = l1_end.row() - l1_begin.row();
+ const float delta_x_l1 = l1_end.col() - l1_begin.col();
+ const float delta_y_l2 = l2_end.row() - l2_begin.row();
+ const float delta_x_l2 = l2_end.col() - l2_begin.col();
+
+ return (atan(delta_y_l2 / delta_x_l2) - atan(delta_y_l1 / delta_x_l1));
+ }
+
+ //-------------------------
+ // Projection of a point
+ //-------------------------
+
+ inline
+ void project_point(const line& destination,
+ const point2d& p,
+ point2d& projection)
+ {
+ const point2d& destination_begin = destination.line_begin();
+ const point2d& destination_end = destination.line_end();
+
+ const int delta_x = destination_end.col() - destination_begin.col();
+ const int delta_y = destination_end.row() - destination_begin.row();
+ const float length = (delta_x * delta_x) + (delta_y * delta_y);
+ float r = (destination_begin.row() - p.row()) * (-delta_y) -
+ (destination_begin.col() - p.col()) * (delta_x);
+ r /= length;
+
+ projection.row() = destination_begin.row() + r * delta_y;
+ projection.col() = destination_begin.col() + r * delta_x;
+ }
+
+ //-------------------------
+ // Compute parallel distance
+ //-------------------------
+
+ inline
+ float compute_parallel_distance(const line& origin,
+ const line& destination,
+ point2d& p_center)
+ {
+ mln_trace("scribo::layout::internal::docstrum::compute_parallel_distance");
+
+ const point2d& origin_begin = origin.line_begin();
+ const point2d& origin_end = origin.line_end();
+ const point2d& destination_begin = destination.line_begin();
+ const point2d& destination_end = destination.line_end();
+ point2d projection_begin;
+ point2d projection_end;
+
+ project_point(destination, origin_begin, projection_begin);
+ project_point(destination, origin_end, projection_end);
+
+ point2d overlap_begin;
+ point2d overlap_end;
+ float overlap_length = 0.0f;
+ // float parallel_distance_left = 0;
+ // float parallel_distance_right = 0;
+
+ if (projection_begin.col() > destination_end.col() ||
+ projection_end.col() < destination_begin.col())
+ return -1.0f;
+
+ if (projection_begin.col() < destination_begin.col())
+ overlap_begin = destination_begin;
+ else
+ overlap_begin = projection_begin;
+
+ if (projection_end.col() < destination_end.col())
+ overlap_end = projection_end;
+ else
+ overlap_end = destination_end;
+
+ p_center = point2d(overlap_begin.row() + (overlap_end.row() -
+ overlap_begin.row()) / 2,
+ overlap_begin.col() + (overlap_end.col() -
+ overlap_begin.col()) / 2);
+
+ // const int delta_x_left = destination_begin.col() - projection_begin.col();
+ // const int delta_y_left = destination_begin.row() - projection_begin.row();
+ // const int delta_x_right = destination_end.col() - projection_end.col();
+ // const int delta_y_right = destination_end.row() - projection_end.row();
+
+ // parallel_distance_left = sqrt(delta_x_left * delta_x_left +
+ // delta_y_left * delta_y_left);
+ // parallel_distance_right = sqrt(delta_x_right * delta_x_right +
+ // delta_y_right * delta_y_right);
+
+ const int over_delta_x = overlap_end.col() - overlap_begin.col();
+ const int over_delta_y = overlap_end.row() - overlap_begin.row();
+
+ overlap_length = sqrt(over_delta_y * over_delta_y + over_delta_x
+ * over_delta_x);
+
+ return overlap_length;
+
+ // return std::min(parallel_distance_left, parallel_distance_right);
+ }
+
+ //-------------------------
+ // Point to line distance
+ //-------------------------
+
+ inline
+ float point_to_line_distance(const float slope,
+ const float y_intercept,
+ const point2d& p_center)
+ {
+ return (std::fabs(-slope * p_center.col() +
+ p_center.row() - y_intercept) /
+ sqrt(slope * slope + 1));
+ }
+
+ //-------------------------
+ // Between closure
+ //-------------------------
+
+ inline
+ void between_closure(const std::vector<line>& lines,
+ std::vector<paragraph>& paragraphs,
+ const std::vector< std::vector<unsigned> >& clusters,
+ const float within_distance,
+ const float between_distance,
+ const float theta)
+ {
+ mln_trace("scribo::layout::internal::docstrum::between_closure");
+
+ // FIXME: why taking this value ?
+ (void) within_distance;
+
+ const unsigned nelements = lines.size();
+ std::set<unsigned> already_seen;
+ unsigned paragraph_count = 0;
+
+ for (unsigned i = 0; i < nelements; ++i)
+ {
+ const line& l = lines[i];
+ const unsigned id = l.id();
+
+ if (already_seen.find(id) == already_seen.end())
+ {
+ const std::vector<unsigned>& neighbors = clusters[i];
+
+ std::vector<unsigned>::const_iterator it_begin = neighbors.begin();
+ std::vector<unsigned>::const_iterator it_end = neighbors.end();
+
+ std::vector<const line*> propagation;
+ std::vector<const line*> next_propagation;
+
+ // Initialize a new paragraph with the current line
+ paragraph p(&l, paragraph_count);
+ ++paragraph_count;
+ already_seen.insert(id);
+
+ // Firstly, we try to link the current line with its closest
+ // neighbors
+ for (; it_begin != it_end; ++it_begin)
+ {
+ // We first retrieve the corresponding line
+ const line& ln = lines[(*it_begin)];
+ const unsigned id_ln = ln.id();
+
+ if (already_seen.find(id_ln) == already_seen.end())
+ {
+ // Compute the angle between the two lines
+ const float angle = compute_lines_angle(l, ln);
+
+ if (angle > -theta && angle < theta)
+ {
+ point2d p_center;
+ const float p_distance = compute_parallel_distance(l, ln, p_center);
+
+ if (p_distance >= 0)
+ {
+ const float distance_to_line =
+ point_to_line_distance(l.slope(), l.y_intercept(), p_center);
+
+ if (distance_to_line <= between_distance)
+ {
+ p.add_line(&ln);
+ already_seen.insert(id_ln);
+ propagation.push_back(&lines[id_ln]);
+ }
+ }
+ }
+ }
+ }
+
+ // Then, we propagate the linking to our neighbors
+ while (! propagation.empty())
+ {
+ next_propagation.clear();
+ const unsigned prop_nelements = propagation.size();
+
+ for (unsigned j = 0; j < prop_nelements; ++j)
+ {
+ const std::vector<unsigned>& neighbors =
+ clusters[propagation[j]->id()];
+ const line& ln1 = lines[propagation[j]->id()];
+
+ std::vector<unsigned>::const_iterator it_begin = neighbors.begin();
+ std::vector<unsigned>::const_iterator it_end = neighbors.end();
+
+ // Firstly, we try to link the current line with its closest
+ // neighbors
+ for (; it_begin != it_end; ++it_begin)
+ {
+ // We first retrieve the corresponding line
+ const line& ln = lines[(*it_begin)];
+ const unsigned id_ln = ln.id();
+
+ if (already_seen.find(id_ln) == already_seen.end())
+ {
+ // Compute the angle between the two lines
+ const float angle = compute_lines_angle(ln1, ln);
+
+ if (angle > -theta && angle < theta)
+ {
+ point2d p_center;
+ const float p_distance = compute_parallel_distance(ln1, ln, p_center);
+
+ if (p_distance >= 0)
+ {
+ const float distance_to_line = point_to_line_distance(ln1.slope(),
+ ln1.y_intercept(), p_center);
+
+ if (distance_to_line <= between_distance)
+ {
+ p.add_line(&ln);
+ already_seen.insert(id_ln);
+ next_propagation.push_back(&lines[id_ln]);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ propagation.clear();
+ propagation = next_propagation;
+ }
+
+ paragraphs.push_back(p);
+ }
+ }
+ }
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::layout::internal::docstrum
+
+ } // end of namespace scribo::layout::internal
+
+ } // end of namespace scribo::layout
+
+} // end of namespace scribo
+
+#endif // ! SCRIBO_LAYOUT_INTERNAL_DOCSTRUM_BETWEEN_CLOSURE_HH
diff --git a/scribo/scribo/layout/internal/docstrum/components_size.hh
b/scribo/scribo/layout/internal/docstrum/components_size.hh
new file mode 100644
index 0000000..02a8fea
--- /dev/null
+++ b/scribo/scribo/layout/internal/docstrum/components_size.hh
@@ -0,0 +1,181 @@
+// Copyright (C) 2013 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+/// \file
+///
+
+#ifndef SCRIBO_LAYOUT_INTERNAL_DOCSTRUM_COMPONENT_SIZE_HH
+# define SCRIBO_LAYOUT_INTERNAL_DOCSTRUM_COMPONENT_SIZE_HH
+
+# include <vector>
+
+# include <mln/core/image/image2d.hh>
+
+# include <scribo/layout/internal/docstrum/bbox2d.hh>
+
+
+namespace scribo
+{
+
+ namespace layout
+ {
+
+ namespace internal
+ {
+
+ namespace docstrum
+ {
+
+ using namespace mln;
+
+ void
+ compute_comp_size(const std::vector<bbox2d>& bbox_list,
+ std::vector<bbox2d>& small_comp,
+ std::vector<bbox2d>& big_comp,
+ const float max_size_factor);
+
+# ifndef MLN_INCLUDE_ONLY
+
+ //-------------------------
+ // Compute components size
+ //-------------------------
+
+ inline
+ void
+ compute_comp_size(const std::vector<bbox2d>& bbox_list,
+ std::vector<bbox2d>& small_comp,
+ std::vector<bbox2d>& big_comp,
+ const float max_size_factor)
+ {
+ mln_trace("scribo::layout::internal::docstrum::compute_comp_size");
+
+ std::vector<bbox2d>::const_iterator it_begin = bbox_list.begin();
+ std::vector<bbox2d>::const_iterator it_end = bbox_list.end();
+ std::map<int, int> size_hist;
+ std::vector<int> smoothed_size_hist;
+ std::vector<int> sizes;
+ sizes.reserve(bbox_list.size());
+ int max_value = 0;
+ int comp_size = 0;
+ int window = 1;
+
+ // Compute the histogram and find the dominant characters size
+ for (; it_begin != it_end; ++it_begin)
+ {
+ const bbox2d& box = (*it_begin);
+ const unsigned height = box.pmax().row() - box.pmin().row();
+ const unsigned width = box.pmax().col() - box.pmin().col();
+ const int size = round(sqrt(width * height));
+
+ ++size_hist[size];
+
+ if (size > max_value)
+ max_value = size;
+
+ sizes.push_back(size);
+ }
+
+ const int nelements = max_value + 1;
+ smoothed_size_hist.resize(nelements);
+
+ const int factor = 2 * window + 1;
+ max_value = 0;
+
+ // Smoothed distribution
+ for (int i = 0; i < nelements; ++i)
+ {
+ int count = 0;
+
+ for (int j = i - window; j <= i + window; ++j)
+ {
+ if (j < 0 || j >= nelements)
+ continue;
+
+ if (size_hist.find(j) != size_hist.end())
+ count += size_hist[j];
+ }
+
+ smoothed_size_hist[i] = count / factor;
+
+ if (smoothed_size_hist[i] > max_value)
+ {
+ comp_size = i;
+ max_value = smoothed_size_hist[i];
+ }
+ }
+
+ // std::map<unsigned, unsigned> size_hist;
+ // unsigned max_value = 0;
+ // unsigned comp_size = 0;
+ // std::vector<unsigned> sizes;
+ // sizes.reserve(bbox_list.size());
+
+ // // Compute the histogram and find the dominant characters size
+
+ // for (; it_begin != it_end; ++it_begin)
+ // {
+ // const bbox2d& box = (*it_begin);
+ // const unsigned height = box.pmax().row() - box.pmin().row();
+ // const unsigned width = box.pmax().col() - box.pmin().col();
+ // const unsigned size = sqrt(width * height);
+ // const unsigned hist_value = ++size_hist[size];
+
+ // sizes.push_back(size);
+
+ // if (hist_value > max_value)
+ // {
+ // comp_size = size;
+ // max_value = hist_value;
+ // }
+ // }
+
+ // Separate components into two classes
+ it_begin = bbox_list.begin();
+ std::vector<int>::iterator it_size_begin = sizes.begin();
+ const float dominant_size = comp_size * max_size_factor;
+
+ for (; it_begin != it_end; ++it_begin, ++it_size_begin)
+ {
+ const bbox2d& box = (*it_begin);
+ const float size = (*it_size_begin);
+
+ if (size < dominant_size)
+ small_comp.push_back(box);
+ else
+ big_comp.push_back(box);
+ }
+ }
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::layout::internal::docstrum
+
+ } // end of namespace scribo::layout::internal
+
+ } // end of namespace scribo::layout
+
+} // end of namespace scribo
+
+#endif // ! SCRIBO_LAYOUT_INTERNAL_DOCSTRUM_COMPONENT_SIZE_HH
diff --git a/scribo/scribo/layout/internal/docstrum/distances.hh
b/scribo/scribo/layout/internal/docstrum/distances.hh
new file mode 100644
index 0000000..f548712
--- /dev/null
+++ b/scribo/scribo/layout/internal/docstrum/distances.hh
@@ -0,0 +1,167 @@
+// Copyright (C) 2013 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+/// \file
+///
+
+#ifndef SCRIBO_LAYOUT_INTERNAL_DOCSTRUM_DISTANCES_HH
+# define SCRIBO_LAYOUT_INTERNAL_DOCSTRUM_DISTANCES_HH
+
+# include <vector>
+
+# include <mln/core/image/image2d.hh>
+
+# include <scribo/layout/internal/docstrum/nearest_neighbors.hh>
+
+
+namespace scribo
+{
+
+ namespace layout
+ {
+
+ namespace internal
+ {
+
+ namespace docstrum
+ {
+
+ using namespace mln;
+
+
+ int
+ within_line_distance(const std::vector<nearestn>& clusters,
+ const float theta_h);
+
+
+ int
+ between_line_distance(const std::vector<nearestn>& clusters,
+ const float theta_v);
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+ //-------------------------
+ // Within-line distance
+ //-------------------------
+
+ inline
+ int
+ within_line_distance(const std::vector<nearestn>& clusters,
+ const float theta_h)
+ {
+ mln_trace("scribo::layout::internal::docstrum::within_line_distance");
+
+ std::vector<nearestn>::const_iterator it_begin = clusters.begin();
+ std::vector<nearestn>::const_iterator it_end = clusters.end();
+ std::map<int, int> distance_hist;
+ int max_value = 0;
+ int mean_distance = 0;
+
+ for (; it_begin != it_end; ++it_begin)
+ {
+ const std::vector<nnpoint>& neighbors =
+ (*it_begin).nearest_points();
+ const unsigned nelements = (*it_begin).ninserted();
+
+ for (unsigned i = 0; i < nelements; ++i)
+ {
+ const nnpoint& nnp = neighbors[i];
+ const float angle_h = nnp.angle_h;
+
+ if (angle_h > -theta_h && angle_h < theta_h)
+ {
+ const int distance = nnp.distance;
+ const int value = ++distance_hist[distance];
+
+ if (value > max_value)
+ {
+ max_value = value;
+ mean_distance = distance;
+ }
+ }
+ }
+ }
+
+ return mean_distance;
+ }
+
+
+
+ //-------------------------
+ // Between-line distance
+ //-------------------------
+
+ inline
+ int
+ between_line_distance(const std::vector<nearestn>& clusters,
+ const float theta_v)
+ {
+ mln_trace("scribo::layout::internal::docstrum::between_line_distance");
+
+ std::vector<nearestn>::const_iterator it_begin = clusters.begin();
+ std::vector<nearestn>::const_iterator it_end = clusters.end();
+ std::map<int, int> distance_hist;
+ int max_value = 0;
+ int mean_distance = 0;
+
+ for (; it_begin != it_end; ++it_begin)
+ {
+ const std::vector<nnpoint>& neighbors = (*it_begin).nearest_points();
+ const unsigned nelements = (*it_begin).ninserted();
+
+ for (unsigned i = 0; i < nelements; ++i)
+ {
+ const nnpoint& nnp = neighbors[i];
+ const float angle_v = nnp.angle_v;
+
+ if (angle_v > -theta_v && angle_v < theta_v)
+ {
+ const int distance = nnp.distance;
+ const int value = ++distance_hist[distance];
+
+ if (value > max_value)
+ {
+ max_value = value;
+ mean_distance = distance;
+ }
+ }
+ }
+ }
+
+ return mean_distance;
+ }
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::layout::internal::docstrum
+
+ } // end of namespace scribo::layout::internal
+
+ } // end of namespace scribo::layout
+
+} // end of namespace scribo
+
+#endif // ! SCRIBO_LAYOUT_INTERNAL_DOCSTRUM_DISTANCES_HH
diff --git a/scribo/scribo/layout/internal/docstrum/extract_lines.hh
b/scribo/scribo/layout/internal/docstrum/extract_lines.hh
new file mode 100644
index 0000000..18ceb00
--- /dev/null
+++ b/scribo/scribo/layout/internal/docstrum/extract_lines.hh
@@ -0,0 +1,128 @@
+// Copyright (C) 2013 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+/// \file
+///
+
+#ifndef SCRIBO_LAYOUT_INTERNAL_DOCSTRUM_EXTRACT_LINES_HH
+# define SCRIBO_LAYOUT_INTERNAL_DOCSTRUM_EXTRACT_LINES_HH
+
+# include <scribo/layout/internal/docstrum/bbox2d.hh>
+# include <scribo/layout/internal/docstrum/line.hh>
+# include <scribo/layout/internal/docstrum/kd_tree.hh>
+# include <scribo/layout/internal/docstrum/nearest_neighbors.hh>
+# include <scribo/layout/internal/docstrum/within_closure.hh>
+
+
+namespace scribo
+{
+
+ namespace layout
+ {
+
+ namespace internal
+ {
+
+ namespace docstrum
+ {
+
+ using namespace mln;
+
+
+ void extract_lines(std::vector<bbox2d>& comp,
+ std::vector<line>& lines,
+ int& within_distance,
+ int& between_distance,
+ const float theta_h,
+ const float theta_v,
+ const int k);
+
+# ifndef MLN_INCLUDE_ONLY
+
+ //-------------------------
+ // Extract lines
+ //-------------------------
+
+ inline
+ void
+ extract_lines(std::vector<bbox2d>& comp,
+ std::vector<line>& lines,
+ int& within_distance,
+ int& between_distance,
+ const float theta_h,
+ const float theta_v,
+ const int k)
+ {
+ mln_trace("scribo::layout::internal::docstrum::extract_lines");
+
+ // Build a KD tree for the nearest neighbors search
+ const unsigned nelements = comp.size();
+ kd_tree kd(nelements);
+
+ for (unsigned i = 0; i < nelements; ++i)
+ {
+ // Reset the bbox id of each element of small_comp for indexing
+ comp[i].set_bbox_number(i);
+
+ // Insert the bbox in the KD-tree
+ const bbox2d* b = &(comp[i]);
+ kd.insert(b);
+ }
+
+ // Compute the K nearest neighbors
+ std::vector<nearestn> knn;
+ knn.reserve(nelements);
+
+ for (unsigned i = 0; i < nelements; ++i)
+ {
+ const bbox2d* b = &(comp[i]);
+ const point2d b_center = b->pcenter();
+ nearestn n(k, b);
+
+ kd.find_nnearest(b, n);
+ n.compute_angles(b_center);
+ n.root_distances();
+ knn.push_back(n);
+ }
+
+ // Compute within line and between line mean distances
+ within_distance = within_line_distance(knn, theta_h);
+ between_distance = between_line_distance(knn, theta_v);
+
+ // Transitive closure on within-line nearest neighbor pairings
+ within_closure(lines, knn, within_distance, 3, theta_h);
+ }
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::layout::internal::docstrum
+
+ } // end of namespace scribo::layout::internal
+
+ } // end of namespace scribo::layout
+
+} // end of namespace scribo
+
+#endif // ! SCRIBO_LAYOUT_INTERNAL_DOCSTRUM_EXTRACT_LINES_HH
diff --git a/scribo/scribo/layout/internal/docstrum/extract_paragraphs.hh
b/scribo/scribo/layout/internal/docstrum/extract_paragraphs.hh
new file mode 100644
index 0000000..0c523fd
--- /dev/null
+++ b/scribo/scribo/layout/internal/docstrum/extract_paragraphs.hh
@@ -0,0 +1,116 @@
+// Copyright (C) 2013 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+/// \file
+///
+
+#ifndef SCRIBO_LAYOUT_INTERNAL_DOCSTRUM_EXTRACT_PARAGRAPHS_HH
+# define SCRIBO_LAYOUT_INTERNAL_DOCSTRUM_EXTRACT_PARAGRAPHS_HH
+
+# include <scribo/layout/internal/docstrum/line.hh>
+# include <scribo/layout/internal/docstrum/paragraph.hh>
+# include <scribo/layout/internal/docstrum/kd_tree.hh>
+# include <scribo/layout/internal/docstrum/between_closure.hh>
+
+
+namespace scribo
+{
+
+ namespace layout
+ {
+
+ namespace internal
+ {
+
+ namespace docstrum
+ {
+
+ using namespace mln;
+
+ void
+ extract_paragraphs(const std::vector<line>& lines,
+ std::vector<paragraph>& paragraphs,
+ const float within_distance,
+ const float between_distance,
+ const float theta);
+
+# ifndef MLN_INCLUDE_ONLY
+
+ //-------------------------
+ // Extract paragraphs
+ //-------------------------
+
+ inline
+ void extract_paragraphs(const std::vector<line>& lines,
+ std::vector<paragraph>& paragraphs,
+ const float within_distance,
+ const float between_distance,
+ const float theta)
+ {
+ mln_trace("scribo::layout::internal::docstrum::extract_paragraphs");
+
+ // Build a KD tree for nearest line neighbor search
+ const unsigned nelements = lines.size();
+ kd_tree kd(nelements);
+
+ for (unsigned i = 0; i < nelements; ++i)
+ {
+ // Insert the bbox in the KD-tree
+ const bbox2d* b = &(lines[i].bbox());
+ kd.insert(b);
+ }
+
+ // Compute the nearest neighbors for each line
+ std::vector< std::vector<unsigned> > knn;
+ knn.reserve(nelements);
+
+ for (unsigned i = 0; i < nelements; ++i)
+ {
+ std::vector<unsigned> neighbors;
+ const line& l = lines[i];
+
+ kd.find_nnearest_within_range(neighbors, 3.0f * between_distance,
+ &l.bbox(), l.slope(), l.y_intercept());
+
+ knn.push_back(neighbors);
+ }
+
+ // Transitive closure on between-line nearest neighbors pairings
+ // using parallel and perpendicular distances
+ between_closure(lines, paragraphs, knn, within_distance,
+ between_distance, theta);
+ }
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::layout::internal::docstrum
+
+ } // end of namespace scribo::layout::internal
+
+ } // end of namespace scribo::layout
+
+} // end of namespace scribo
+
+#endif // ! SCRIBO_LAYOUT_INTERNAL_DOCSTRUM_EXTRACT_PARAGRAPHS_HH
diff --git a/scribo/scribo/layout/internal/docstrum/kd_hyperrect.hh
b/scribo/scribo/layout/internal/docstrum/kd_hyperrect.hh
new file mode 100644
index 0000000..4cdefb8
--- /dev/null
+++ b/scribo/scribo/layout/internal/docstrum/kd_hyperrect.hh
@@ -0,0 +1,210 @@
+// Copyright (C) 2013 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+/// \file
+///
+
+#ifndef SCRIBO_LAYOUT_INTERNAL_DOCSTRUM_KD_HYPERRECT_HH
+# define SCRIBO_LAYOUT_INTERNAL_DOCSTRUM_KD_HYPERRECT_HH
+
+# include <scribo/layout/internal/docstrum/bbox2d.hh>
+
+
+namespace scribo
+{
+
+ namespace layout
+ {
+
+ namespace internal
+ {
+
+ namespace docstrum
+ {
+
+ using namespace mln;
+
+
+
+ class kd_hyperrect
+ {
+ public:
+ kd_hyperrect();
+
+ int min_coord(const int dim) const;
+ int max_coord(const int dim) const;
+ void set_min_coord(const int dim, const int value);
+ void set_max_coord(const int dim, const int value);
+
+ void init(const bbox2d* bbox);
+
+ void extend(const bbox2d* bbox);
+
+ int distance(const bbox2d* bbox);
+
+ int vertical_distance(const bbox2d* bbox);
+
+ private:
+ int min_[2];
+ int max_[2];
+ };
+
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+
+ //---------------------------
+ // KD hyperrectangle
+ //---------------------------
+
+ inline
+ kd_hyperrect::kd_hyperrect()
+ {
+ }
+
+ inline
+ int
+ kd_hyperrect::min_coord(const int dim) const
+ {
+ return min_[dim];
+ }
+
+ inline
+ int
+ kd_hyperrect::max_coord(const int dim) const
+ {
+ return max_[dim];
+ }
+
+ inline
+ void
+ kd_hyperrect::set_min_coord(const int dim, const int value)
+ {
+ min_[dim] = value;
+ }
+
+ inline
+ void
+ kd_hyperrect::set_max_coord(const int dim, const int value)
+ {
+ max_[dim] = value;
+ }
+
+ inline
+ void
+ kd_hyperrect::init(const bbox2d* bbox)
+ {
+ const point2d& center = bbox->pcenter();
+ min_[0] = max_[0] = center.col();
+ min_[1] = max_[1] = center.row();
+ }
+
+ inline
+ void
+ kd_hyperrect::extend(const bbox2d* bbox)
+ {
+ const point2d& center = bbox->pcenter();
+ const int x = center.col();
+ const int y = center.row();
+
+ if (x < min_[0])
+ min_[0] = x;
+ if (y < min_[1])
+ min_[1] = y;
+
+ if (x > max_[0])
+ max_[0] = x;
+ if (y > max_[1])
+ max_[1] = y;
+ }
+
+ inline
+ int
+ kd_hyperrect::distance(const bbox2d* bbox)
+ {
+ int distance = 0;
+ const point2d& center = bbox->pcenter();
+ const int x = center.col();
+ const int y = center.row();
+
+ if (x < min_[0])
+ {
+ const int tmp = min_[0] - x;
+ distance += (tmp * tmp);
+ }
+ else if (x > max_[0])
+ {
+ const int tmp = max_[0] - x;
+ distance += (tmp * tmp);
+ }
+
+ if (y < min_[1])
+ {
+ const int tmp = min_[1] - y;
+ distance += (tmp * tmp);
+ }
+ else if (y > max_[1])
+ {
+ const int tmp = max_[1] - y;
+ distance += (tmp * tmp);
+ }
+
+ return distance;
+ }
+
+ inline
+ int
+ kd_hyperrect::vertical_distance(const bbox2d* bbox)
+ {
+ int distance = 0;
+ const point2d& center = bbox->pcenter();
+ const int y = center.row();
+
+ if (y < min_[1])
+ {
+ const int tmp = min_[1] - y;
+ distance += (tmp * tmp);
+ }
+ else if (y > max_[1])
+ {
+ const int tmp = max_[1] - y;
+ distance += (tmp * tmp);
+ }
+
+ return distance;
+ }
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::layout::internal::docstrum
+
+ } // end of namespace scribo::layout::internal
+
+ } // end of namespace scribo::layout
+
+} // end of namespace scribo
+
+#endif // ! SCRIBO_LAYOUT_INTERNAL_DOCSTRUM_KD_HYPERRECT_HH
diff --git a/scribo/scribo/layout/internal/docstrum/kd_node.hh
b/scribo/scribo/layout/internal/docstrum/kd_node.hh
new file mode 100644
index 0000000..9826128
--- /dev/null
+++ b/scribo/scribo/layout/internal/docstrum/kd_node.hh
@@ -0,0 +1,182 @@
+// Copyright (C) 2013 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+/// \file
+///
+
+#ifndef SCRIBO_LAYOUT_INTERNAL_DOCSTRUM_KD_NODE_HH
+# define SCRIBO_LAYOUT_INTERNAL_DOCSTRUM_KD_NODE_HH
+
+#include <iostream>
+
+# include <scribo/layout/internal/docstrum/bbox2d.hh>
+
+
+
+namespace scribo
+{
+
+ namespace layout
+ {
+
+ namespace internal
+ {
+
+ namespace docstrum
+ {
+
+ using namespace mln;
+
+ class kd_node
+ {
+ public:
+ kd_node(const bbox2d* bbox, const int father, const int dim);
+
+ bool is_leaf() const;
+
+ int dim() const;
+ const bbox2d* bbox() const;
+ int father() const;
+ int left_son() const;
+ int right_son() const;
+ int coord(const int dim) const;
+
+ int x() const;
+ int y() const;
+
+ void set_left_son(const int son_index);
+ void set_right_son(const int son_index);
+
+ private:
+ int dim_;
+ const bbox2d* bbox_;
+ int father_;
+ int left_son_;
+ int right_son_;
+ };
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+ //---------------------------
+ // KD node
+ //---------------------------
+
+ inline
+ kd_node::kd_node(const bbox2d* bbox, const int father, const int dim)
+ : dim_(dim), bbox_(bbox), father_(father), left_son_(-1), right_son_(-1)
+ {
+ }
+
+ inline
+ bool
+ kd_node::is_leaf() const
+ {
+ return (left_son_ == -1 && right_son_ == -1);
+ }
+
+ inline
+ int
+ kd_node::dim() const
+ {
+ return dim_;
+ }
+
+ inline
+ const bbox2d*
+ kd_node::bbox() const
+ {
+ return bbox_;
+ }
+
+ inline
+ int
+ kd_node::father() const
+ {
+ return father_;
+ }
+
+ inline
+ int
+ kd_node::left_son() const
+ {
+ return left_son_;
+ }
+
+ inline
+ int
+ kd_node::right_son() const
+ {
+ return right_son_;
+ }
+
+ inline
+ int
+ kd_node::coord(const int dim) const
+ {
+ if (not dim)
+ return bbox_->pcenter().col();
+ else
+ return bbox_->pcenter().row();
+ }
+
+ inline
+ int kd_node::x() const
+ {
+ return bbox_->pcenter().col();
+ }
+
+ inline
+ int
+ kd_node::y() const
+ {
+ return bbox_->pcenter().row();
+ }
+
+ inline
+ void
+ kd_node::set_left_son(const int son_index)
+ {
+ left_son_ = son_index;
+ }
+
+ inline
+ void
+ kd_node::set_right_son(const int son_index)
+ {
+ right_son_ = son_index;
+ }
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::layout::internal::docstrum
+
+ } // end of namespace scribo::layout::internal
+
+ } // end of namespace scribo::layout
+
+} // end of namespace scribo
+
+#endif // ! SCRIBO_LAYOUT_INTERNAL_DOCSTRUM_KD_NODE_HH
diff --git a/scribo/scribo/layout/internal/docstrum/kd_tree.hh
b/scribo/scribo/layout/internal/docstrum/kd_tree.hh
new file mode 100644
index 0000000..74b247a
--- /dev/null
+++ b/scribo/scribo/layout/internal/docstrum/kd_tree.hh
@@ -0,0 +1,472 @@
+// Copyright (C) 2013 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+/// \file
+///
+
+#ifndef SCRIBO_LAYOUT_INTERNAL_DOCSTRUM_KD_TREE_HH
+# define SCRIBO_LAYOUT_INTERNAL_DOCSTRUM_KD_TREE_HH
+
+#include <vector>
+#include <climits>
+#include <cmath>
+
+# include <scribo/layout/internal/docstrum/kd_node.hh>
+# include <scribo/layout/internal/docstrum/kd_hyperrect.hh>
+# include <scribo/layout/internal/docstrum/bbox2d.hh>
+# include <scribo/layout/internal/docstrum/nearest_neighbors.hh>
+
+
+namespace scribo
+{
+
+ namespace layout
+ {
+
+ namespace internal
+ {
+
+ namespace docstrum
+ {
+
+ using namespace mln;
+
+
+ class kd_tree
+ {
+ public:
+ static const int nb_dims = 2;
+
+ kd_tree(const int nb_nodes);
+
+ int find_parent(const bbox2d* bbox,
+ int* const dim) const;
+
+ void insert(const bbox2d* bbox);
+
+ void build_kd_tree(const std::vector<const bbox2d*>& points);
+
+ int euclidean_distance(const bbox2d* bbox1,
+ const bbox2d* bbox2);
+
+ float point_to_line_distance(const float slope,
+ const float y_intercept,
+ const bbox2d* bbox);
+
+ void nn(const bbox2d* bbox,
+ const int node_index,
+ nearestn& n);
+
+ void nn_within_range(const bbox2d* bbox,
+ const int node_index,
+ const float max_distance,
+ std::vector<unsigned>& neighbors_id,
+ const float slope,
+ const float y_intercept);
+
+ void find_nnearest(const bbox2d* bbox,
+ nearestn& n);
+
+ void find_nnearest_within_range(std::vector<unsigned>& neighbors_id,
+ const float max_distance,
+ const bbox2d* bbox,
+ const float slope,
+ const float y_intercept);
+
+ bool empty();
+
+ int size();
+
+ private:
+ bool root_inserted_;
+ int nodes_inserted_;
+ kd_hyperrect hyperrect_;
+ std::vector<kd_node> nodes_;
+ };
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+ //---------------------------
+ // KD tree
+ //---------------------------
+
+ inline
+ kd_tree::kd_tree(const int nb_nodes)
+ : root_inserted_(false), nodes_inserted_(0)
+ {
+ nodes_.reserve(nb_nodes);
+ }
+
+ inline
+ int
+ kd_tree::find_parent(const bbox2d* bbox,
+ int* const dim) const
+ {
+ int node_index = 0;
+ int father_index = 0;
+ const point2d& center = bbox->pcenter();
+ int coord_value = 0;
+
+ while (node_index != -1 && not nodes_[node_index].is_leaf())
+ {
+ (not *dim) ? coord_value = center.col() : coord_value = center.row();
+
+ const kd_node& current_node = nodes_[node_index];
+ father_index = node_index;
+
+ int node_coord_value = current_node.coord(*dim);
+
+ if (coord_value <= node_coord_value)
+ node_index = current_node.left_son();
+ else
+ node_index = current_node.right_son();
+
+ *dim = (*dim + 1) % 2;
+ }
+
+ if (node_index == -1)
+ {
+ node_index = father_index;
+ *dim = (*dim + 1) % 2;
+ }
+
+ return node_index;
+ }
+
+ inline
+ void
+ kd_tree::insert(const bbox2d* bbox)
+ {
+ if (not root_inserted_)
+ {
+ kd_node node(bbox, 0, 0);
+ root_inserted_ = true;
+ hyperrect_.init(bbox);
+ nodes_.push_back(node);
+ }
+ else
+ {
+ int dim = 0;
+ const int node_index = find_parent(bbox, &dim);
+ kd_node node(bbox, node_index, (dim + 1) % 2);
+
+ const point2d& center = bbox->pcenter();
+ int coord_value = 0;
+
+ (not dim) ? coord_value = center.col() : coord_value = center.row();
+
+ if (coord_value <= nodes_[node_index].coord(dim))
+ nodes_[node_index].set_left_son(nodes_.size());
+ else
+ nodes_[node_index].set_right_son(nodes_.size());
+
+ hyperrect_.extend(bbox);
+ nodes_.push_back(node);
+ }
+
+ ++nodes_inserted_;
+ }
+
+ inline
+ void
+ kd_tree::build_kd_tree(const std::vector<const bbox2d*>& points)
+ {
+ const int nb_points = points.size();
+
+ for (int i = 0; i < nb_points; ++i)
+ insert(points[i]);
+ }
+
+ inline
+ int
+ kd_tree::euclidean_distance(const bbox2d* bbox1,
+ const bbox2d* bbox2)
+ {
+ const point2d& center1 = bbox1->pcenter();
+ const point2d& center2 = bbox2->pcenter();
+ const int dx = center1.col() - center2.col();
+ const int dy = center1.row() - center2.row();
+
+ return (dx * dx + dy * dy);
+ }
+
+ inline
+ float
+ kd_tree::point_to_line_distance(const float slope,
+ const float y_intercept,
+ const bbox2d* bbox)
+ {
+ const point2d& center = bbox->pcenter();
+
+ return (std::fabs(-slope * center.col() +
+ center.row() - y_intercept) /
+ sqrt(slope * slope + 1));
+ }
+
+ inline
+ void
+ kd_tree::nn(const bbox2d* bbox,
+ const int node_index,
+ nearestn& n)
+ {
+ if (node_index == -1)
+ return;
+
+ const kd_node& node = nodes_[node_index];
+ const bbox2d* b = node.bbox();
+
+ if (node.is_leaf())
+ {
+ const int d = euclidean_distance(bbox, b);
+
+ if (d < n.get_highest_distance() && bbox != b)
+ n.add_point(nnpoint(b, d));
+
+ return;
+ }
+
+ int nearer_sub_tree = -1;
+ int further_sub_tree = -1;
+ bool nearer_h = false; // hyperrect.min
+ bool further_h = false; //
+ const int dim = node.dim();
+ int coord_value = 0;
+
+ (not dim) ? coord_value = bbox->pcenter().col() :
+ coord_value = bbox->pcenter().row();
+
+ if (coord_value <= node.coord(dim))
+ {
+ nearer_sub_tree = node.left_son();
+ further_sub_tree = node.right_son();
+ nearer_h = true; // hyperrect.max
+ }
+ else
+ {
+ nearer_sub_tree = node.right_son();
+ further_sub_tree = node.left_son();
+ further_h = true; // hyperrect.max
+ }
+
+ if (nearer_sub_tree > 0)
+ {
+ if (nearer_h)
+ {
+ const int tmp = hyperrect_.max_coord(dim);
+ hyperrect_.set_max_coord(dim, node.coord(dim));
+ nn(bbox, nearer_sub_tree, n);
+ hyperrect_.set_max_coord(dim, tmp);
+ }
+ else
+ {
+ const int tmp = hyperrect_.min_coord(dim);
+ hyperrect_.set_min_coord(dim, node.coord(dim));
+ nn(bbox, nearer_sub_tree, n);
+ hyperrect_.set_min_coord(dim, tmp);
+ }
+ }
+
+ const int d = euclidean_distance(bbox, b);
+
+ if (d < n.get_highest_distance() && bbox != b)
+ n.add_point(nnpoint(b, d));
+
+ if (further_sub_tree > 0)
+ {
+ if (further_h)
+ {
+ const int tmp = hyperrect_.max_coord(dim);
+ hyperrect_.set_max_coord(dim, node.coord(dim));
+
+ if (hyperrect_.distance(bbox) < n.get_highest_distance())
+ nn(bbox, further_sub_tree, n);
+
+ hyperrect_.set_max_coord(dim, tmp);
+ }
+ else
+ {
+ const int tmp = hyperrect_.min_coord(dim);
+ hyperrect_.set_min_coord(dim, node.coord(dim));
+
+ if (hyperrect_.distance(bbox) < n.get_highest_distance())
+ nn(bbox, further_sub_tree, n);
+
+ hyperrect_.set_min_coord(dim, tmp);
+ }
+ }
+ }
+
+ inline
+ void
+ kd_tree::nn_within_range(const bbox2d* bbox,
+ const int node_index,
+ const float max_distance,
+ std::vector<unsigned>& neighbors_id,
+ const float slope,
+ const float y_intercept)
+ {
+ if (node_index == -1)
+ return;
+
+ const kd_node& node = nodes_[node_index];
+ const bbox2d* b = node.bbox();
+
+ if (node.is_leaf())
+ {
+ const float d = point_to_line_distance(slope, y_intercept, b);
+
+ if (d < max_distance && bbox != b)
+ neighbors_id.push_back(b->bbox_number());
+
+ return;
+ }
+
+ int nearer_sub_tree = -1;
+ int further_sub_tree = -1;
+ bool nearer_h = false; // hyperrect.min
+ bool further_h = false; //
+ const int dim = node.dim();
+ int coord_value = 0;
+
+ (not dim) ? coord_value = bbox->pcenter().col() :
+ coord_value = bbox->pcenter().row();
+
+ if (coord_value <= node.coord(dim))
+ {
+ nearer_sub_tree = node.left_son();
+ further_sub_tree = node.right_son();
+ nearer_h = true; // hyperrect.max
+ }
+ else
+ {
+ nearer_sub_tree = node.right_son();
+ further_sub_tree = node.left_son();
+ further_h = true; // hyperrect.max
+ }
+
+ if (nearer_sub_tree > 0)
+ {
+ if (nearer_h)
+ {
+ const int tmp = hyperrect_.max_coord(dim);
+ hyperrect_.set_max_coord(dim, node.coord(dim));
+ nn_within_range(bbox, nearer_sub_tree, max_distance,
+ neighbors_id, slope, y_intercept);
+ hyperrect_.set_max_coord(dim, tmp);
+ }
+ else
+ {
+ const int tmp = hyperrect_.min_coord(dim);
+ hyperrect_.set_min_coord(dim, node.coord(dim));
+ nn_within_range(bbox, nearer_sub_tree, max_distance,
+ neighbors_id, slope, y_intercept);
+ hyperrect_.set_min_coord(dim, tmp);
+ }
+ }
+
+ const float d = point_to_line_distance(slope, y_intercept, b);
+
+ if (d < max_distance && bbox != b)
+ neighbors_id.push_back(b->bbox_number());
+
+ if (further_sub_tree > 0)
+ {
+ if (further_h)
+ {
+ const int tmp = hyperrect_.max_coord(dim);
+ hyperrect_.set_max_coord(dim, node.coord(dim));
+
+ if (hyperrect_.vertical_distance(bbox) < max_distance * max_distance)
+ nn_within_range(bbox, further_sub_tree, max_distance,
+ neighbors_id, slope, y_intercept);
+
+ hyperrect_.set_max_coord(dim, tmp);
+ }
+ else
+ {
+ const int tmp = hyperrect_.min_coord(dim);
+ hyperrect_.set_min_coord(dim, node.coord(dim));
+
+ if (hyperrect_.vertical_distance(bbox) < max_distance * max_distance)
+ nn_within_range(bbox, further_sub_tree, max_distance,
+ neighbors_id, slope, y_intercept);
+
+ hyperrect_.set_min_coord(dim, tmp);
+ }
+ }
+ }
+
+ inline
+ void
+ kd_tree::find_nnearest(const bbox2d* bbox,
+ nearestn& n)
+ {
+ if (not root_inserted_)
+ return;
+
+ nn(bbox, 0, n);
+ }
+
+ inline
+ void
+ kd_tree::find_nnearest_within_range(std::vector<unsigned>& neighbors_id,
+ const float max_distance,
+ const bbox2d* bbox,
+ const float slope,
+ const float y_intercept)
+ {
+ if (not root_inserted_)
+ return;
+
+ nn_within_range(bbox, 0, max_distance, neighbors_id, slope, y_intercept);
+ }
+
+ inline
+ bool
+ kd_tree::empty()
+ {
+ return nodes_inserted_ == 0;
+ }
+
+ inline
+ int
+ kd_tree::size()
+ {
+ return nodes_inserted_;
+ }
+
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::layout::internal::docstrum
+
+ } // end of namespace scribo::layout::internal
+
+ } // end of namespace scribo::layout
+
+} // end of namespace scribo
+
+#endif // ! SCRIBO_LAYOUT_INTERNAL_DOCSTRUM_KD_TREE_HH
diff --git a/scribo/scribo/layout/internal/docstrum/labeling.hh
b/scribo/scribo/layout/internal/docstrum/labeling.hh
new file mode 100644
index 0000000..c0269da
--- /dev/null
+++ b/scribo/scribo/layout/internal/docstrum/labeling.hh
@@ -0,0 +1,347 @@
+// Copyright (C) 2013 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+/// \file
+///
+
+#ifndef SCRIBO_LAYOUT_INTERNAL_DOCSTRUM_LABELING_HH
+# define SCRIBO_LAYOUT_INTERNAL_DOCSTRUM_LABELING_HH
+
+# include <vector>
+
+# include <mln/core/image/image2d.hh>
+
+# include <scribo/layout/internal/docstrum/bbox2d.hh>
+
+
+namespace scribo
+{
+
+ namespace layout
+ {
+
+ namespace internal
+ {
+
+ namespace docstrum
+ {
+
+ using namespace mln;
+
+ struct labeling_position
+ {
+ labeling_position(const int x_, const int y_);
+
+ int x;
+ int y;
+ };
+
+
+ void
+ propagate(const labeling_position& position,
+ std::vector< labeling_position >& next_position_list,
+ const image2d<bool>& input, image2d<unsigned>& labeled_image,
+ const unsigned nb_labels, bbox2d& bbox);
+
+
+ void
+ component_labeling(const image2d<bool>& input,
+ unsigned& nb_labels,
+ std::vector<bbox2d>& bbox_list,
+ image2d<unsigned>& labeled_image,
+ const unsigned min_size);
+
+# ifndef MLN_INCLUDE_ONLY
+
+ //---------------------------
+ // Labeling position
+ //---------------------------
+
+ labeling_position::labeling_position(const int x_, const int y_)
+ : x(x_),
+ y(y_)
+ {
+ }
+
+ //---------------------------
+ // Propagate labeling
+ //---------------------------
+
+ inline
+ void propagate(const labeling_position& position,
+ std::vector< labeling_position >& next_position_list,
+ const image2d<bool>& input, image2d<unsigned>&
labeled_image,
+ const unsigned nb_labels, bbox2d& bbox)
+ {
+ mln_trace("scribo::layout::internal::docstrum::component_labeling");
+
+ mln_assertion(input.is_valid());
+ mln_assertion(labeled_image.is_valid());
+
+ int x = position.x;
+ int y = position.y;
+ int nx;
+ int ny;
+ int ncols = input.ncols();
+ int nrows = input.nrows();
+
+ bool increment_xlr = false;
+ bool increment_ylr = false;
+ bool decrement_xul = false;
+ bool decrement_yul = false;
+
+ point2d& pmin = bbox.pmin();
+ point2d& pmax = bbox.pmax();
+ const int xul = pmin.col();
+ const int yul = pmin.row();
+ const int xlr = pmax.col();
+ const int ylr = pmax.row();
+
+ {
+ nx = x + 1;
+
+ if (nx < ncols)
+ {
+ unsigned& pixel = labeled_image.at_(y, nx);
+ if (input.at_(y, nx) && !pixel)
+ {
+ if (xlr < nx)
+ increment_xlr = true;
+ pixel = nb_labels;
+ next_position_list.push_back(labeling_position(nx, y));
+ }
+ }
+ }
+
+ {
+ nx = x + 1;
+ ny = y + 1;
+
+ if (nx < ncols && ny < nrows)
+ {
+ unsigned& pixel = labeled_image.at_(ny, nx);
+ if (input.at_(ny, nx) && !pixel)
+ {
+ if (xlr < nx)
+ increment_xlr = true;
+ if (ylr < ny)
+ increment_ylr = true;
+ pixel = nb_labels;
+ next_position_list.push_back(labeling_position(nx, ny));
+ }
+ }
+ }
+
+ {
+ ny = y + 1;
+
+ if (ny < nrows)
+ {
+ unsigned& pixel = labeled_image.at_(ny, x);
+ if (input.at_(ny, x) && !pixel)
+ {
+ if (ylr < ny)
+ increment_ylr = true;
+ pixel = nb_labels;
+ next_position_list.push_back(labeling_position(x, ny));
+ }
+ }
+ }
+
+ {
+ nx = x - 1;
+ ny = y + 1;
+
+ if (nx >= 0 && ny < nrows)
+ {
+ unsigned& pixel = labeled_image.at_(ny, nx);
+ if (input.at_(ny, nx) && !pixel)
+ {
+ if (xul > nx)
+ decrement_xul = true;
+ if (ylr < ny)
+ increment_ylr = true;
+ pixel = nb_labels;
+ next_position_list.push_back(labeling_position(nx, ny));
+ }
+ }
+ }
+
+ {
+ nx = x - 1;
+
+ if (nx >= 0)
+ {
+ unsigned& pixel = labeled_image.at_(y, nx);
+ if (input.at_(y, nx) && !pixel)
+ {
+ if (xul > nx)
+ decrement_xul = true;
+ pixel = nb_labels;
+ next_position_list.push_back(labeling_position(nx, y));
+ }
+ }
+ }
+
+ {
+ nx = x - 1;
+ ny = y - 1;
+
+ if (nx >= 0 && ny >= 0)
+ {
+ unsigned& pixel = labeled_image.at_(ny, nx);
+ if (input.at_(ny, nx) && !pixel)
+ {
+ if (xul > nx)
+ decrement_xul = true;
+ if (yul > ny)
+ decrement_yul = true;
+ pixel = nb_labels;
+ next_position_list.push_back(labeling_position(nx, ny));
+ }
+ }
+ }
+
+ {
+ ny = y - 1;
+
+ if (ny >= 0)
+ {
+ unsigned& pixel = labeled_image.at_(ny, x);
+ if (input.at_(ny, x) && !pixel)
+ {
+ if (yul > ny)
+ decrement_yul = true;
+ pixel = nb_labels;
+ next_position_list.push_back(labeling_position(x, ny));
+ }
+ }
+ }
+
+ {
+ nx = x + 1;
+ ny = y - 1;
+
+ if (nx < ncols && ny >= 0)
+ {
+ unsigned& pixel = labeled_image.at_(ny, nx);
+ if (input.at_(ny, nx) && !pixel)
+ {
+ if (xlr < nx)
+ increment_xlr = true;
+ if (yul > ny)
+ decrement_yul = true;
+ pixel = nb_labels;
+ next_position_list.push_back(labeling_position(nx, ny));
+ }
+ }
+ }
+
+ if (increment_xlr)
+ ++pmax.col();
+ if (increment_ylr)
+ ++pmax.row();
+ if (decrement_xul)
+ --pmin.col();
+ if (decrement_yul)
+ --pmin.row();
+ }
+
+ //---------------------------
+ // Component labeling
+ //---------------------------
+
+ inline
+ void component_labeling(const image2d<bool>& input,
+ unsigned& nb_labels,
+ std::vector<bbox2d>& bbox_list,
+ image2d<unsigned>& labeled_image,
+ const unsigned min_size)
+ {
+ mln_trace("scribo::layout::internal::docstrum::component_labeling");
+
+ mln_assertion(input.is_valid());
+ mln_assertion(labeled_image.is_valid());
+
+ std::vector<labeling_position> position_list;
+ std::vector<labeling_position> next_position_list;
+ unsigned nrows = input.nrows();
+ unsigned ncols = input.ncols();
+
+ next_position_list.reserve(8);
+
+ for (unsigned i = 0; i < nrows; ++i)
+ {
+ for (unsigned j = 0; j < ncols; ++j)
+ {
+ if (input.at_(i, j) && !labeled_image.at_(i, j))
+ {
+ ++nb_labels;
+ position_list.clear();
+ position_list.push_back(labeling_position(j, i));
+
+ labeled_image.at_(i, j) = nb_labels;
+
+ point2d p(i, j);
+ bbox2d bbox(nb_labels, p, p);
+
+ while (!position_list.empty())
+ {
+ unsigned nb_positions = position_list.size();
+ next_position_list.clear();
+
+ for (unsigned k = 0; k < nb_positions; ++k)
+ {
+ const labeling_position& position = position_list[k];
+
+ propagate(position, next_position_list, input,
+ labeled_image, nb_labels, bbox);
+ }
+
+ position_list.clear();
+ position_list = next_position_list;
+ }
+
+ const unsigned height = bbox.pmax().row() - bbox.pmin().row();
+ const unsigned width = bbox.pmax().col() - bbox.pmin().col();
+
+ if (height > min_size || width > min_size)
+ bbox_list.push_back(bbox);
+ }
+ }
+ }
+ }
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::layout::internal::docstrum
+
+ } // end of namespace scribo::layout::internal
+
+ } // end of namespace scribo::layout
+
+} // end of namespace scribo
+
+#endif // ! SCRIBO_LAYOUT_INTERNAL_DOCSTRUM_LABELING_HH
diff --git a/scribo/scribo/layout/internal/docstrum/line.hh
b/scribo/scribo/layout/internal/docstrum/line.hh
new file mode 100644
index 0000000..23a9e10
--- /dev/null
+++ b/scribo/scribo/layout/internal/docstrum/line.hh
@@ -0,0 +1,229 @@
+// Copyright (C) 2013 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+/// \file
+///
+
+#ifndef SCRIBO_LAYOUT_INTERNAL_DOCSTRUM_LINE_HH
+# define SCRIBO_LAYOUT_INTERNAL_DOCSTRUM_LINE_HH
+
+# include <queue>
+
+# include <mln/core/image/image2d.hh>
+
+# include <scribo/layout/internal/docstrum/bbox2d.hh>
+# include <scribo/layout/internal/docstrum/nearest_neighbors.hh>
+
+
+namespace scribo
+{
+
+ namespace layout
+ {
+
+ namespace internal
+ {
+
+ namespace docstrum
+ {
+
+ using namespace mln;
+
+
+ class line
+ {
+ public:
+ line(const nnpoint* p, const unsigned id);
+
+ void add_component(const nnpoint* p);
+
+ void linear_regression();
+
+ const bbox2d& bbox() const;
+
+ float slope() const;
+
+ float y_intercept() const;
+
+ const point2d& line_begin() const;
+
+ const point2d& line_end() const;
+
+ unsigned id() const;
+
+ private:
+ std::vector<const nnpoint*> comp_;
+ float slope_;
+ float y_intercept_;
+ point2d line_begin_;
+ point2d line_end_;
+ bbox2d bbox_;
+ };
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+ //-------------------------
+ // Line
+ //-------------------------
+
+ inline
+ line::line(const nnpoint* p,
+ const unsigned id)
+ {
+ bbox_.pmin() = p->bbox->pmin();
+ bbox_.pmax() = p->bbox->pmax();
+ bbox_.set_bbox_number(id);
+
+ comp_.push_back(p);
+ slope_ = 0.0f;
+ y_intercept_ = 0.0f;
+ }
+
+ inline
+ void
+ line::add_component(const nnpoint* p)
+ {
+ comp_.push_back(p);
+
+ const point2d& pmin = p->bbox->pmin();
+ const point2d& pmax = p->bbox->pmax();
+ point2d& bbox_pmin = bbox_.pmin();
+ point2d& bbox_pmax = bbox_.pmax();
+
+ if (bbox_pmin.col() > pmin.col())
+ bbox_pmin.col() = pmin.col();
+
+ if (bbox_pmin.row() > pmin.row())
+ bbox_pmin.row() = pmin.row();
+
+ if (bbox_pmax.col() < pmax.col())
+ bbox_pmax.col() = pmax.col();
+
+ if (bbox_pmax.row() < pmax.row())
+ bbox_pmax.row() = pmax.row();
+ }
+
+ inline
+ void
+ line::linear_regression()
+ {
+ const int nelements = comp_.size();
+
+ if (nelements == 1)
+ {
+ const bbox2d* b = comp_[0]->bbox;
+ slope_ = 0.0f;
+ y_intercept_ = b->pmin().row() + 0.5f * (b->pmax().row() -
b->pmin().row());
+ line_begin_ = point2d(y_intercept_, bbox_.pmin().col());
+ line_end_ = point2d(y_intercept_, bbox_.pmax().col());
+
+ return;
+ }
+
+ int sum_x = 0;
+ int sum_y = 0;
+ int sum_xy = 0;
+ int sum_x_square = 0;
+
+ std::vector<const nnpoint*>::iterator it_begin = comp_.begin();
+ std::vector<const nnpoint*>::iterator it_end = comp_.end();
+
+ for (; it_begin != it_end; ++it_begin)
+ {
+ const point2d center = (*it_begin)->bbox->pcenter();
+ const int x = center.col();
+ const int y = center.row();
+
+ sum_x += x;
+ sum_y += y;
+ sum_xy += x * y;
+ sum_x_square += x * x;
+ }
+
+ slope_ = (float) (nelements * sum_xy - sum_x * sum_y) /
+ (nelements * sum_x_square - sum_x * sum_x);
+ y_intercept_ = ((float) sum_y / nelements) -
+ slope_ * ((float) sum_x / nelements);
+
+ const int y_begin = slope_ * bbox_.pmin().col() + y_intercept_;
+ const int y_end = slope_ * bbox_.pmax().col() + y_intercept_;
+ line_begin_ = point2d(y_begin, bbox_.pmin().col());
+ line_end_ = point2d(y_end, bbox_.pmax().col());
+ }
+
+ inline
+ const bbox2d&
+ line::bbox() const
+ {
+ return bbox_;
+ }
+
+ inline
+ float
+ line::slope() const
+ {
+ return slope_;
+ }
+
+ inline
+ float
+ line::y_intercept() const
+ {
+ return y_intercept_;
+ }
+
+ inline
+ const point2d&
+ line::line_begin() const
+ {
+ return line_begin_;
+ }
+
+ inline
+ const point2d&
+ line::line_end() const
+ {
+ return line_end_;
+ }
+
+ inline
+ unsigned
+ line::id() const
+ {
+ return bbox_.bbox_number();
+ }
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::layout::internal::docstrum
+
+ } // end of namespace scribo::layout::internal
+
+ } // end of namespace scribo::layout
+
+} // end of namespace scribo
+
+#endif // ! SCRIBO_LAYOUT_INTERNAL_DOCSTRUM_LINE_HH
diff --git a/scribo/scribo/layout/internal/docstrum/nearest_neighbors.hh
b/scribo/scribo/layout/internal/docstrum/nearest_neighbors.hh
new file mode 100644
index 0000000..0846540
--- /dev/null
+++ b/scribo/scribo/layout/internal/docstrum/nearest_neighbors.hh
@@ -0,0 +1,265 @@
+// Copyright (C) 2013 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+/// \file
+///
+
+#ifndef SCRIBO_LAYOUT_INTERNAL_DOCSTRUM_NEAREST_NEIGHBORS_HH
+# define SCRIBO_LAYOUT_INTERNAL_DOCSTRUM_NEAREST_NEIGHBORS_HH
+
+# include <vector>
+# include <climits>
+
+# include <mln/core/image/image2d.hh>
+# include <mln/math/pi.hh>
+
+# include <scribo/layout/internal/docstrum/bbox2d.hh>
+
+
+namespace scribo
+{
+
+ namespace layout
+ {
+
+ namespace internal
+ {
+
+ namespace docstrum
+ {
+
+ using namespace mln;
+
+
+ struct nnpoint
+ {
+ nnpoint();
+ nnpoint(const bbox2d* b, const unsigned d);
+
+ void compute_angle(const point2d& origin);
+
+ void root_distance();
+
+ bool operator< (const nnpoint& p) const;
+
+ unsigned id() const;
+
+ const bbox2d* bbox;
+ int distance;
+ float angle_h;
+ float angle_v;
+ };
+
+
+ class nearestn
+ {
+ public:
+ nearestn();
+ nearestn(const int n, const bbox2d* bbox);
+
+ void add_point(const nnpoint& p);
+ void compute_angles(const point2d& origin);
+
+ void root_distances();
+ int get_highest_distance() const;
+
+ const std::vector<nnpoint>& nearest_points() const;
+
+ int ninserted() const;
+
+ const nnpoint* nnp() const;
+
+ private:
+ std::vector<nnpoint> neighbors_;
+ int inserted_;
+ int n_;
+ int highest_distance_;
+ nnpoint nnpoint_;
+ };
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+ //---------------------------
+ // Nearest neighbor point
+ //---------------------------
+
+ inline
+ nnpoint::nnpoint()
+ : bbox(0), distance(0), angle_h(0.0f), angle_v(0.0f)
+ {
+ }
+
+ inline
+ nnpoint::nnpoint(const bbox2d* b, const unsigned d)
+ : bbox(b), distance(d), angle_h(0.0f), angle_v(0.0f)
+ {
+ }
+
+ inline
+ void
+ nnpoint::compute_angle(const point2d& origin)
+ {
+ const point2d center = bbox->pcenter();
+ const float delta_x = center.col() - origin.col();
+ const float delta_y = center.row() - origin.row();
+
+ if (delta_x == 0 && delta_y > 0)
+ angle_h = math::pi / 2;
+ else if (delta_x == 0 && delta_y < 0)
+ angle_h = -math::pi / 2;
+ else
+ {
+ angle_h = atan(delta_y / delta_x);
+ if (angle_h > 0)
+ angle_v = angle_h - math::pi / 2;
+ else
+ angle_v = angle_h + math::pi / 2;
+ }
+ }
+
+ inline
+ void
+ nnpoint::root_distance()
+ {
+ distance = sqrt(distance);
+ }
+
+ inline
+ bool
+ nnpoint::operator< (const nnpoint& p) const
+ {
+ return (distance < p.distance);
+ }
+
+ inline
+ unsigned
+ nnpoint::id() const
+ {
+ return bbox->bbox_number();
+ }
+
+
+ //---------------------------
+ // Nearest neighbors
+ //---------------------------
+
+
+ inline
+ nearestn::nearestn()
+ : inserted_(0), n_(0), highest_distance_(INT_MAX)
+ {
+ }
+
+ inline
+ nearestn::nearestn(const int n,
+ const bbox2d* bbox)
+ : inserted_(0), n_(n), highest_distance_(INT_MAX)
+ {
+ for (int i = 0; i < n_; ++i)
+ neighbors_.push_back(nnpoint());
+
+ nnpoint_ = nnpoint(bbox, 0);
+ }
+
+ inline
+ void
+ nearestn::add_point(const nnpoint& p)
+ {
+ int i = 0;
+
+ while (i < n_ && i < inserted_ && neighbors_[i] < p)
+ ++i;
+
+ if (i < n_)
+ {
+ for (int j = inserted_; j > i; --j)
+ neighbors_[j] = neighbors_[j - 1];
+
+ neighbors_[i] = p;
+
+ if (inserted_ < n_)
+ ++inserted_;
+
+ if (inserted_ >= n_)
+ highest_distance_ = neighbors_[n_ - 1].distance;
+ }
+ }
+
+ inline
+ void
+ nearestn::compute_angles(const point2d& origin)
+ {
+ for (int i = 0; i < inserted_; ++i)
+ neighbors_[i].compute_angle(origin);
+ }
+
+ inline
+ void
+ nearestn::root_distances()
+ {
+ for (int i = 0; i < inserted_; ++i)
+ neighbors_[i].root_distance();
+ }
+
+ inline
+ int
+ nearestn::get_highest_distance() const
+ {
+ return highest_distance_;
+ }
+
+ inline
+ const std::vector<nnpoint>&
+ nearestn::nearest_points() const
+ {
+ return neighbors_;
+ }
+
+ inline
+ int
+ nearestn::ninserted() const
+ {
+ return inserted_;
+ }
+
+ inline
+ const nnpoint*
+ nearestn::nnp() const
+ {
+ return &nnpoint_;
+ }
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::layout::internal::docstrum
+
+ } // end of namespace scribo::layout::internal
+
+ } // end of namespace scribo::layout
+
+} // end of namespace scribo
+
+#endif // ! SCRIBO_LAYOUT_INTERNAL_DOCSTRUM_NEAREST_NEIGHBORS_HH
diff --git a/scribo/scribo/layout/internal/docstrum/paragraph.hh
b/scribo/scribo/layout/internal/docstrum/paragraph.hh
new file mode 100644
index 0000000..66cdbf1
--- /dev/null
+++ b/scribo/scribo/layout/internal/docstrum/paragraph.hh
@@ -0,0 +1,126 @@
+// Copyright (C) 2013 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+/// \file
+///
+
+#ifndef SCRIBO_LAYOUT_INTERNAL_DOCSTRUM_PARAGRAPH_HH
+# define SCRIBO_LAYOUT_INTERNAL_DOCSTRUM_PARAGRAPH_HH
+
+# include <scribo/layout/internal/docstrum/line.hh>
+
+
+namespace scribo
+{
+
+ namespace layout
+ {
+
+ namespace internal
+ {
+
+ namespace docstrum
+ {
+
+ using namespace mln;
+
+ class paragraph
+ {
+ public:
+ paragraph(const line* l,
+ const unsigned id);
+
+ void add_line(const line* l);
+
+ const bbox2d& bbox() const;
+
+ private:
+ std::vector<const line*> lines_;
+ bbox2d bbox_;
+ };
+
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+ //-------------------------
+ // Paragraph
+ //-------------------------
+
+ inline
+ paragraph::paragraph(const line* l,
+ const unsigned id)
+ {
+ const bbox2d& box = l->bbox();
+
+ bbox_.pmin() = box.pmin();
+ bbox_.pmax() = box.pmax();
+ bbox_.set_bbox_number(id);
+
+ lines_.push_back(l);
+ }
+
+ inline
+ void
+ paragraph::add_line(const line* l)
+ {
+ lines_.push_back(l);
+
+ const point2d& pmin = l->bbox().pmin();
+ const point2d& pmax = l->bbox().pmax();
+ point2d& bbox_pmin = bbox_.pmin();
+ point2d& bbox_pmax = bbox_.pmax();
+
+ if (bbox_pmin.col() > pmin.col())
+ bbox_pmin.col() = pmin.col();
+
+ if (bbox_pmin.row() > pmin.row())
+ bbox_pmin.row() = pmin.row();
+
+ if (bbox_pmax.col() < pmax.col())
+ bbox_pmax.col() = pmax.col();
+
+ if (bbox_pmax.row() < pmax.row())
+ bbox_pmax.row() = pmax.row();
+ }
+
+ inline
+ const bbox2d&
+ paragraph::bbox() const
+ {
+ return bbox_;
+ }
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::layout::internal::docstrum
+
+ } // end of namespace scribo::layout::internal
+
+ } // end of namespace scribo::layout
+
+} // end of namespace scribo
+
+#endif // ! SCRIBO_LAYOUT_INTERNAL_DOCSTRUM_PARAGRAPH_HH
diff --git a/scribo/scribo/layout/internal/docstrum/within_closure.hh
b/scribo/scribo/layout/internal/docstrum/within_closure.hh
new file mode 100644
index 0000000..dc1630d
--- /dev/null
+++ b/scribo/scribo/layout/internal/docstrum/within_closure.hh
@@ -0,0 +1,175 @@
+// Copyright (C) 2013 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+/// \file
+///
+
+#ifndef SCRIBO_LAYOUT_INTERNAL_DOCSTRUM_WITHIN_CLOSURE_HH
+# define SCRIBO_LAYOUT_INTERNAL_DOCSTRUM_WITHIN_CLOSURE_HH
+
+# include <vector>
+
+# include <scribo/layout/internal/docstrum/line.hh>
+# include <scribo/layout/internal/docstrum/distances.hh>
+
+
+namespace scribo
+{
+
+ namespace layout
+ {
+
+ namespace internal
+ {
+
+ namespace docstrum
+ {
+
+ using namespace mln;
+
+
+ void
+ within_closure(std::vector<line>& lines,
+ const std::vector<nearestn>& clusters,
+ const int within_distance,
+ const int distance_factor,
+ const float theta_h);
+
+# ifndef MLN_INCLUDE_ONLY
+
+ //-------------------------
+ // Within closure
+ //-------------------------
+
+ inline
+ void
+ within_closure(std::vector<line>& lines,
+ const std::vector<nearestn>& clusters,
+ const int within_distance,
+ const int distance_factor,
+ const float theta_h)
+ {
+ mln_trace("scribo::layout::internal::docstrum::within_closure");
+
+ const int max_within_distance = distance_factor * within_distance;
+
+ std::vector<nearestn>::const_iterator it_begin = clusters.begin();
+ std::vector<nearestn>::const_iterator it_end = clusters.end();
+ std::set<unsigned> already_seen;
+ unsigned line_count = 0;
+
+ for (; it_begin != it_end; ++it_begin)
+ {
+ const nnpoint* n = (*it_begin).nnp();
+ const unsigned id = n->id();
+
+ if (already_seen.find(id) == already_seen.end())
+ {
+ const std::vector<nnpoint>& neighbors = (*it_begin).nearest_points();
+ const unsigned nelements = (*it_begin).ninserted();
+
+ std::vector<const nearestn*> propagation;
+ std::vector<const nearestn*> next_propagation;
+
+ // Initialize a new line with the current component
+ line l(n, line_count);
+ ++line_count;
+
+ // Firstly, we try link the current component n with its closest
+ // neighbors
+ for (unsigned i = 0; i < nelements; ++i)
+ {
+ const nnpoint& nnp = neighbors[i];
+ const float angle_h = nnp.angle_h;
+ const int distance = nnp.distance;
+ const unsigned id = nnp.id();
+
+ if (distance <= max_within_distance &&
+ angle_h > -theta_h && angle_h < theta_h)
+ {
+ if (already_seen.find(id) == already_seen.end())
+ {
+ l.add_component(&nnp);
+ already_seen.insert(id);
+ propagation.push_back(&clusters[id]);
+ }
+ }
+ }
+
+ // Then, we propagate the linking to our neighbors
+ while (! propagation.empty())
+ {
+ next_propagation.clear();
+ const unsigned nelements = propagation.size();
+
+ for (unsigned i = 0; i < nelements; ++i)
+ {
+ const std::vector<nnpoint>& neighbors =
propagation[i]->nearest_points();
+ const unsigned nelements = (*it_begin).ninserted();
+
+ for (unsigned i = 0; i < nelements; ++i)
+ {
+ const nnpoint& nnp = neighbors[i];
+ const float angle_h = nnp.angle_h;
+ const int distance = nnp.distance;
+ const unsigned id = nnp.id();
+
+ if (distance <= max_within_distance &&
+ angle_h > -theta_h && angle_h < theta_h)
+ {
+ if (already_seen.find(id) == already_seen.end())
+ {
+ l.add_component(&nnp);
+ already_seen.insert(id);
+ next_propagation.push_back(&clusters[id]);
+ }
+ }
+ }
+ }
+
+ propagation.clear();
+ propagation = next_propagation;
+ }
+
+ // Compute linear regression of the centroids of the line's
+ // components
+ l.linear_regression();
+
+ lines.push_back(l);
+ }
+ }
+ }
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::layout::internal::docstrum
+
+ } // end of namespace scribo::layout::internal
+
+ } // end of namespace scribo::layout
+
+} // end of namespace scribo
+
+#endif // ! SCRIBO_LAYOUT_INTERNAL_DOCSTRUM_WITHIN_CLOSURE_HH
diff --git a/scribo/tests/layout/Makefile.am b/scribo/tests/layout/Makefile.am
index 09990b1..d59ef17 100644
--- a/scribo/tests/layout/Makefile.am
+++ b/scribo/tests/layout/Makefile.am
@@ -1,5 +1,4 @@
-# Copyright (C) 2013 EPITA Research and Development
-# Laboratory (LRDE).
+# Copyright (C) 2013 EPITA Research and Development Laboratory (LRDE).
#
# This file is part of Olena.
#
@@ -17,6 +16,12 @@
include $(top_srcdir)/scribo/tests/tests.mk
-check_PROGRAMS = xy_cut
+check_PROGRAMS = \
+ docstrum \
+ xy_cut
+
+EXTRA_DIST = \
+ docstrum-line.pgm \
+ docstrum-paragraph.pgm
TESTS = $(check_PROGRAMS)
diff --git a/scribo/tests/layout/docstrum-line.pgm
b/scribo/tests/layout/docstrum-line.pgm
new file mode 100644
index 0000000..7cf1195
Binary files /dev/null and b/scribo/tests/layout/docstrum-line.pgm differ
diff --git a/scribo/tests/layout/docstrum-paragraph.pgm
b/scribo/tests/layout/docstrum-paragraph.pgm
new file mode 100644
index 0000000..924f3d7
Binary files /dev/null and b/scribo/tests/layout/docstrum-paragraph.pgm differ
diff --git a/scribo/tests/layout/docstrum.cc b/scribo/tests/layout/docstrum.cc
new file mode 100644
index 0000000..0979e3d
--- /dev/null
+++ b/scribo/tests/layout/docstrum.cc
@@ -0,0 +1,64 @@
+// Copyright (C) 2013 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+/// \file
+
+#include <mln/core/image/image2d.hh>
+#include <scribo/layout/docstrum.hh>
+#include <mln/io/pbm/load.hh>
+#include <mln/io/pgm/load.hh>
+#include <mln/data/compare.hh>
+
+
+#include "tests/data.hh"
+
+using namespace mln;
+
+int main()
+{
+
+ image2d<bool> input;
+ io::pbm::load(input, SCRIBO_IMG_DIR "/text_to_group.pbm");
+
+ // Line mode
+ {
+ image2d<value::int_u8>
+ output = scribo::layout::docstrum(input, scribo::layout::Line);
+ image2d<value::int_u8> ref;
+ io::pgm::load(ref, SCRIBO_TESTS_DIR "/layout/docstrum-line.pgm");
+ mln_assertion(output == ref);
+ }
+
+ // Paragraph mode
+ {
+ image2d<value::int_u8>
+ output = scribo::layout::docstrum(input, scribo::layout::Paragraph);
+ image2d<value::int_u8> ref;
+ io::pgm::load(ref, SCRIBO_TESTS_DIR "/layout/docstrum-paragraph.pgm");
+ mln_assertion(output == ref);
+ }
+
+ return 0;
+}
--
1.7.2.5