* scribo/layout/internal/hist_info.hh,
* scribo/layout/internal/node.hh,
* scribo/layout/xy_cut.hh,
* tests/layout/Makefile.am,
* tests/layout/xy_cut.cc: New.
* tests/Makefile.am: Add new subdir.
---
scribo/ChangeLog | 13 +
.../scribo/layout/internal/hist_info.hh | 43 +-
scribo/scribo/layout/internal/node.hh | 156 ++++++++
scribo/scribo/layout/xy_cut.hh | 405 ++++++++++++++++++++
scribo/tests/Makefile.am | 1 +
scribo/tests/{text => layout}/Makefile.am | 5 +-
.../lines_h_pattern.cc => layout/xy_cut.cc} | 32 +-
7 files changed, 615 insertions(+), 40 deletions(-)
copy milena/mln/io/abort.hh => scribo/scribo/layout/internal/hist_info.hh (66%)
create mode 100644 scribo/scribo/layout/internal/node.hh
create mode 100644 scribo/scribo/layout/xy_cut.hh
copy scribo/tests/{text => layout}/Makefile.am (87%)
copy scribo/tests/{primitive/extract/lines_h_pattern.cc => layout/xy_cut.cc} (71%)
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index 66341a3..ce793fa 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,5 +1,18 @@
2013-03-07 Guillaume Lazzara <z(a)lrde.epita.fr>
+ Import Julien Marquegnies's implementation of XY-Cut layout
+ analysis algorithm.
+
+ * scribo/layout/internal/hist_info.hh,
+ * scribo/layout/internal/node.hh,
+ * scribo/layout/xy_cut.hh,
+ * tests/layout/Makefile.am,
+ * tests/layout/xy_cut.cc: New.
+
+ * tests/Makefile.am: Add new subdir.
+
+2013-03-07 Guillaume Lazzara <z(a)lrde.epita.fr>
+
Remove deprecated programs.
* demo/demat/demat.pro,
diff --git a/milena/mln/io/abort.hh b/scribo/scribo/layout/internal/hist_info.hh
similarity index 66%
copy from milena/mln/io/abort.hh
copy to scribo/scribo/layout/internal/hist_info.hh
index 9d06e7b..13de128 100644
--- a/milena/mln/io/abort.hh
+++ b/scribo/scribo/layout/internal/hist_info.hh
@@ -1,5 +1,4 @@
-// Copyright (C) 2007, 2008, 2009 EPITA Research and Development
-// Laboratory (LRDE)
+// Copyright (C) 2013 EPITA Research and Development Laboratory (LRDE)
//
// This file is part of Olena.
//
@@ -24,44 +23,46 @@
// exception does not however invalidate any other reasons why the
// executable file might be covered by the GNU General Public License.
-#ifndef MLN_IO_ABORT_HH
-# define MLN_IO_ABORT_HH
-
/// \file
+///
+/// \brief Histogram information used in XY-Cut algorithm.
-/// Define a function which aborts a process in io module.
-
-# include <cstdlib>
-# include <iostream>
+#ifndef SCRIBO_LAYOUT_INTERNAL_HIST_INFO_HH
+# define SCRIBO_LAYOUT_INTERNAL_HIST_INFO_HH
-namespace mln
+namespace scribo
{
- namespace io
+ namespace layout
{
namespace internal
{
- /// The way to abort when an error occur in io processing.
- void abort();
+ /// \internal \brief Histogram information used in XY-Cut algorithm.
+ struct hist_info
+ {
+ hist_info();
+
+ unsigned horizontal;
+ unsigned vertical;
+ };
# ifndef MLN_INCLUDE_ONLY
- inline
- void abort()
+ hist_info::hist_info()
+ : horizontal(0),
+ vertical(0)
{
- std::cerr << "I/O error, aborting." << std::endl;
- std::abort();
}
# endif // ! MLN_INCLUDE_ONLY
- } // end of namespace mln::io::internal
+ } // end of namespace scribo::layout::internal
- } // end of namespace mln::io
+ } // end of namespace scribo::layout
-} // end of namespace mln
+} // end of namespace scribo
-#endif // ! MLN_IO_ABORT_HH
+#endif // ! SCRIBO_LAYOUT_INTERNAL_HIST_INFO_HH
diff --git a/scribo/scribo/layout/internal/node.hh
b/scribo/scribo/layout/internal/node.hh
new file mode 100644
index 0000000..5d937ea
--- /dev/null
+++ b/scribo/scribo/layout/internal/node.hh
@@ -0,0 +1,156 @@
+// Copyright (C) 2013 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+/// \file
+///
+/// \brief Tree node class used in XY-Cut algorithm.
+
+
+#ifndef SCRIBO_LAYOUT_INTERNAL_NODE_HH
+# define SCRIBO_LAYOUT_INTERNAL_NODE_HH
+
+# include <mln/core/alias/box2d.hh>
+
+namespace scribo
+{
+
+ namespace layout
+ {
+
+ namespace internal
+ {
+
+ /// \internal \brief Tree node class used in XY-Cut algorithm.
+ template <typename B>
+ class node
+ {
+ public:
+ node();
+ node(const B& bbox);
+
+ ~node();
+
+ const B& get_bbox() const;
+
+ node<B>* get_ls();
+ const node<B>* get_ls() const;
+ node<B>* get_rs();
+ const node<B>* get_rs() const;
+
+ bool is_leaf() const;
+
+ void set_ls(node<B>* n);
+ void set_rs(node<B>* n);
+
+ private:
+ B bbox_;
+ node<B>* ls_;
+ node<B>* rs_;
+ };
+
+# ifndef MLN_INCLUDE_ONLY
+
+ template <typename B>
+ node<B>::node()
+ : ls_(0),
+ rs_(0)
+ {
+ }
+
+ template <typename B>
+ node<B>::node(const B& bbox)
+ : bbox_(bbox),
+ ls_(0),
+ rs_(0)
+ {
+ }
+
+ template <typename B>
+ node<B>::~node()
+ {
+ if (ls_)
+ delete ls_;
+
+ if (rs_)
+ delete rs_;
+ }
+
+ template <typename B>
+ const B& node<B>::get_bbox() const
+ {
+ return bbox_;
+ }
+
+ template <typename B>
+ node<B>* node<B>::get_ls()
+ {
+ return ls_;
+ }
+
+ template <typename B>
+ const node<B>* node<B>::get_ls() const
+ {
+ return ls_;
+ }
+
+ template <typename B>
+ node<B>* node<B>::get_rs()
+ {
+ return rs_;
+ }
+
+ template <typename B>
+ const node<B>* node<B>::get_rs() const
+ {
+ return rs_;
+ }
+
+ template <typename B>
+ bool node<B>::is_leaf() const
+ {
+ return ls_ == rs_;
+ }
+
+ template <typename B>
+ void node<B>::set_ls(node<B>* n)
+ {
+ ls_ = n;
+ }
+
+ template <typename B>
+ void node<B>::set_rs(node<B>* n)
+ {
+ rs_ = n;
+ }
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::layout::internal
+
+ } // end of namespace scribo::layout
+
+} // end of namespace scribo
+
+#endif // ! SCRIBO_LAYOUT_INTERNAL_NODE_HH
diff --git a/scribo/scribo/layout/xy_cut.hh b/scribo/scribo/layout/xy_cut.hh
new file mode 100644
index 0000000..d6818e9
--- /dev/null
+++ b/scribo/scribo/layout/xy_cut.hh
@@ -0,0 +1,405 @@
+// Copyright (C) 2013 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+/// \file
+///
+/// \brief XY-Cut layout analysis algorithm.
+
+#ifndef SCRIBO_LAYOUT_XY_CUT_HH
+# define SCRIBO_LAYOUT_XY_CUT_HH
+
+# include <mln/geom/min_row.hh>
+# include <mln/geom/min_col.hh>
+# include <mln/geom/max_row.hh>
+# include <mln/geom/max_col.hh>
+# include <mln/core/concept/image.hh>
+# include <mln/core/macros.hh>
+# include <mln/util/array.hh>
+# include <scribo/layout/internal/hist_info.hh>
+# include <scribo/layout/internal/node.hh>
+
+namespace scribo
+{
+
+ namespace layout
+ {
+ using namespace mln;
+
+ /// \brief XY-Cut layout analysis algorithm.
+ /*! This algorithm is an implementation made from \cite
+ nagy.1992.computer7 : "A prototype document image analysis
+ system for technical journals.".
+
+ It recusively subdivides empty spaces in the document until a
+ minimum division size is reached. The latter is defined with \p
+ min_height and \p min_width.
+
+ \input[in] ima A binary image.
+ \input[in] min_height The minimum height of a subdivision.
+ \input[in] min_width The minimum width of a subdivision.
+
+ \return An array of component group bounding boxes.
+ */
+ template <typename I>
+ mln::util::array<mln_box(I)>
+ xy_cut(const Image<I>& ima, int min_height, int min_width);
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+ namespace internal
+ {
+
+
+ //-------------------------
+ // Compute horizontal hist info
+ //-------------------------
+
+ template <typename I>
+ void compute_hist_info_h(const Image<I>& input_,
+ mln_ch_value(I,internal::hist_info)& hist)
+ {
+ trace::entering("compute_hist_info_h");
+
+ const I& input = exact(input_);
+
+ mln_precondition(input.is_valid());
+ mln_precondition(hist.is_valid());
+
+ unsigned accumulator = 0;
+
+ for (unsigned i = geom::min_row(input); i <= geom::max_row(input); ++i)
+ {
+ accumulator = 0;
+ for (unsigned j = geom::min_col(input); j <= geom::max_col(input); ++j)
+ {
+ if (input.at_(i, j))
+ ++accumulator;
+
+ hist.at_(i, j).horizontal = accumulator;
+ }
+ }
+
+ trace::exiting("compute_hist_info_h");
+ }
+
+
+ //-------------------------
+ // Compute vertical hist info
+ //-------------------------
+
+ template <typename I>
+ void compute_hist_info_v(const Image<I>& input_,
+ mln_ch_value(I,internal::hist_info)& hist)
+ {
+ trace::entering("compute_hist_info_v");
+
+ const I& input = exact(input_);
+
+ mln_precondition(input.is_valid());
+ mln_precondition(hist.is_valid());
+
+ unsigned accumulator = 0;
+
+ for (unsigned j = geom::min_col(input); j <= geom::max_col(input); ++j)
+ {
+ accumulator = 0;
+ for (unsigned i = geom::min_row(input); i <= geom::max_row(input); ++i)
+ {
+ if (input.at_(i, j))
+ ++accumulator;
+
+ hist.at_(i, j).vertical = accumulator;
+ }
+ }
+
+ trace::exiting("compute_hist_info_v");
+ }
+
+
+
+ template <typename H>
+ void horizontal_whitespace(const Image<H>& hist_,
+ const mln_site(H)& pmin,
+ const mln_site(H)& pmax,
+ int& max_height,
+ int& ws_min_row,
+ int& ws_max_row)
+ {
+ trace::entering("horizontal_whitespace");
+
+ typedef mln_value(H) V;
+ mlc_is(V,internal::hist_info)::check();
+
+ const H& hist = exact(hist_);
+
+ mln_precondition(hist.is_valid());
+
+ const int x_min = pmin.col();
+ const int x_max = pmax.col();
+ const int y_min = pmin.row();
+ const int y_max = pmax.row();
+
+ for (int y = y_min; y <= y_max; ++y)
+ {
+ const int n_white_pixels = hist.at_(y, x_max).horizontal -
+ hist.at_(y, x_min).horizontal;
+
+ if (!n_white_pixels)
+ {
+ int r = y + 1; // r : row
+
+ // While there is a gap in the histogram
+ for (; r <= y_max && !(hist.at_(r, x_max).horizontal -
+ hist.at_(r, x_min).horizontal); ++r);
+
+ const int count = r - y;
+
+ if (count > max_height)
+ {
+ max_height = count;
+ ws_min_row = (y == y_min) ? y_min : y - 1;
+ ws_max_row = r;
+ }
+
+ y = r;
+ }
+ }
+
+ trace::exiting("horizontal_whitespace");
+ }
+
+
+ template <typename H>
+ void vertical_whitespace(const Image<H>& hist_,
+ const mln_site(H)& pmin,
+ const mln_site(H)& pmax,
+ int& max_width,
+ int& ws_min_col,
+ int& ws_max_col)
+ {
+ trace::entering("vertical_whitespace");
+
+ typedef mln_value(H) V;
+ mlc_is(V,internal::hist_info)::check();
+
+ const H& hist = exact(hist_);
+
+ mln_precondition(hist.is_valid());
+
+ const int x_min = pmin.col();
+ const int x_max = pmax.col();
+ const int y_min = pmin.row();
+ const int y_max = pmax.row();
+
+ for (int x = x_min; x <= x_max; ++x)
+ {
+ const int n_white_pixels = hist.at_(y_max, x).vertical -
+ hist.at_(y_min, x).vertical;
+
+ if (!n_white_pixels)
+ {
+ int c = x + 1; // c : column
+
+ // While there is a gap in the histogram
+ for (; c <= x_max && !(hist.at_(y_max, c).vertical -
+ hist.at_(y_min, c).vertical); ++c);
+
+ const int count = c - x;
+
+ if (count > max_width)
+ {
+ max_width = count;
+ ws_min_col = (x == x_min) ? x_min : x - 1;
+ ws_max_col = c;
+ }
+
+ x = c;
+ }
+ }
+
+ trace::exiting("vertical_whitespace");
+ }
+
+
+ template <typename H>
+ void do_xy_cut(const Image<H>& hist_,
+ node<mln_box(H)>* root,
+ const mln_box(H)& domain,
+ const bool horizontal,
+ const bool should_stop,
+ const int min_height,
+ const int min_width)
+ {
+ trace::entering("do_xy_cut");
+
+ typedef mln_value(H) V;
+ mlc_is(V,internal::hist_info)::check();
+
+ const H& hist = exact(hist_);
+
+ mln_precondition(hist.is_valid());
+ mln_precondition(root != 0);
+ mln_precondition(domain.is_valid());
+
+ const mln_site(H)& pmin = domain.pmin();
+ const mln_site(H)& pmax = domain.pmax();
+ const int x_min = pmin.col();
+ const int x_max = pmax.col();
+ const int y_min = pmin.row();
+ const int y_max = pmax.row();
+
+ if (horizontal)
+ {
+ int max_height = 0;
+ int ws_min_row = 0; // Whitespace min and max
+ int ws_max_row = 0; // rows
+
+ horizontal_whitespace(hist, pmin, pmax, max_height, ws_min_row, ws_max_row);
+
+ if (max_height < min_height || max_height == (pmax.row() - pmin.row()))
+ {
+ if (should_stop)
+ return;
+
+ do_xy_cut(hist, root, domain, false, true, min_height, min_width);
+ }
+ else
+ {
+ typedef mln_box(H) B;
+ node<B>* ls = (ws_min_row == y_min) ? 0 :
+ new node<B>(box2d(pmin, point2d(ws_min_row, pmax.col())));
+ node<B>* rs = (ws_max_row >= y_max) ? 0 :
+ new node<B>(box2d(point2d(ws_max_row, pmin.col()), pmax));
+
+ root->set_ls(ls);
+ root->set_rs(rs);
+
+ if (ls)
+ do_xy_cut(hist, ls, ls->get_bbox(), false, false, min_height, min_width);
+ if (rs)
+ do_xy_cut(hist, rs, rs->get_bbox(), false, false, min_height, min_width);
+ }
+ }
+ else
+ {
+ int max_width = 0;
+ int ws_min_col = 0; // Whitespace min and max
+ int ws_max_col = 0; // cols
+
+ vertical_whitespace(hist, pmin, pmax, max_width, ws_min_col, ws_max_col);
+
+ if (max_width < min_width || max_width == (pmax.col() - pmin.col()))
+ {
+ if (should_stop)
+ return;
+
+ do_xy_cut(hist, root, domain, true, true, min_height, min_width);
+ }
+ else
+ {
+ typedef mln_box(H) B;
+ node<B>* ls = (ws_min_col == x_min) ? 0 :
+ new node<B>(box2d(pmin, point2d(pmax.row(), ws_min_col)));
+ node<B>* rs = (ws_max_col >= x_max) ? 0 :
+ new node<B>(box2d(point2d(pmin.row(), ws_max_col), pmax));
+
+ root->set_ls(ls);
+ root->set_rs(rs);
+
+ if (ls)
+ do_xy_cut(hist, ls, ls->get_bbox(), true, false, min_height, min_width);
+ if (rs)
+ do_xy_cut(hist, rs, rs->get_bbox(), true, false, min_height, min_width);
+ }
+ }
+
+ trace::exiting("do_xy_cut");
+ }
+
+
+ template <typename B>
+ void xy_dfs(const node<B>* root, util::array<B>& result)
+ {
+ if (root->is_leaf())
+ {
+ result.append(root->get_bbox());
+ return;
+ }
+
+ const node<B>* ls = root->get_ls();
+ const node<B>* rs = root->get_rs();
+
+ if (ls)
+ xy_dfs(root->get_ls(), result);
+ if (rs)
+ xy_dfs(root->get_rs(), result);
+ }
+
+ } // end of namespace scribo::layout::internal
+
+
+
+ template <typename I>
+ mln::util::array<mln_box(I)>
+ xy_cut(const Image<I>& ima_, int min_height, int min_width)
+ {
+ trace::entering("scribo::layout::xy_cut");
+
+ typedef mln_value(I) V;
+ typedef mln_box(I) B;
+ mlc_is(V,bool)::check();
+
+ const I& ima = exact(ima_);
+ mln_precondition(ima.is_valid());
+
+ // Compute histogram
+ mln_ch_value(I,internal::hist_info) hist(ima.domain());
+ internal::compute_hist_info_h(ima, hist);
+ internal::compute_hist_info_v(ima, hist);
+
+ // XY-Cut
+ internal::node<B>* n = new internal::node<B>(ima.domain());
+ internal::do_xy_cut(hist, n, ima.domain(), true, false, min_height, min_width);
+
+ // Build output.
+ mln::util::array<B> output;
+ internal::xy_dfs(n, output);
+
+ // Clear temporary data.
+ delete n;
+
+ trace::exiting("scribo::layout::xy_cut");
+ return output;
+ }
+
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::layout
+
+} // end of namespace scribo
+
+#endif // ! SCRIBO_LAYOUT_XY_CUT_HH
diff --git a/scribo/tests/Makefile.am b/scribo/tests/Makefile.am
index dd4fd9d..4d6ce66 100644
--- a/scribo/tests/Makefile.am
+++ b/scribo/tests/Makefile.am
@@ -45,6 +45,7 @@ SUBDIRS = \
core \
estim \
filter \
+ layout \
preprocessing \
primitive \
table \
diff --git a/scribo/tests/text/Makefile.am b/scribo/tests/layout/Makefile.am
similarity index 87%
copy from scribo/tests/text/Makefile.am
copy to scribo/tests/layout/Makefile.am
index 0d48025..09990b1 100644
--- a/scribo/tests/text/Makefile.am
+++ b/scribo/tests/layout/Makefile.am
@@ -1,4 +1,5 @@
-# Copyright (C) 2009 EPITA Research and Development Laboratory (LRDE).
+# Copyright (C) 2013 EPITA Research and Development
+# Laboratory (LRDE).
#
# This file is part of Olena.
#
@@ -16,6 +17,6 @@
include $(top_srcdir)/scribo/tests/tests.mk
-check_PROGRAMS =
+check_PROGRAMS = xy_cut
TESTS = $(check_PROGRAMS)
diff --git a/scribo/tests/primitive/extract/lines_h_pattern.cc
b/scribo/tests/layout/xy_cut.cc
similarity index 71%
copy from scribo/tests/primitive/extract/lines_h_pattern.cc
copy to scribo/tests/layout/xy_cut.cc
index 64c09f8..3601d64 100644
--- a/scribo/tests/primitive/extract/lines_h_pattern.cc
+++ b/scribo/tests/layout/xy_cut.cc
@@ -23,29 +23,27 @@
// exception does not however invalidate any other reasons why the
// executable file might be covered by the GNU General Public License.
-// \file
+/// \file
-#include <mln/core/image/image2d.hh>
-#include <mln/data/compare.hh>
+#include <iostream>
#include <mln/io/pbm/load.hh>
-#include <scribo/primitive/extract/lines_h_pattern.hh>
+#include <mln/make/box2d.hh>
+#include <scribo/layout/xy_cut.hh>
#include "tests/data.hh"
-int main()
+using namespace mln;
+
+int main(int argc, char** argv)
{
- using namespace mln;
- using namespace scribo;
image2d<bool> input;
- io::pbm::load(input, SCRIBO_IMG_DIR "/lines_discontinued.pbm");
-
- image2d<bool> ref;
- io::pbm::load(ref,
- SCRIBO_TESTS_DIR "/primitive/extract/lines_h_pattern.ref.pbm");
-
- image2d<bool>
- vlines = primitive::extract::lines_h_pattern(input, 51, 3);
-
- mln_assertion(vlines == ref);
+ io::pbm::load(input, SCRIBO_IMG_DIR "/alignment_3.pbm");
+ util::array<box2d> bb = scribo::layout::xy_cut(input, 10, 10);
+
+ mln_assertion(bb.nelements() == 4);
+ mln_assertion(bb[0] == make::box2d(20,66, 38,140));
+ mln_assertion(bb[1] == make::box2d(11,171, 38,221));
+ mln_assertion(bb[2] == make::box2d(0,237, 38,384));
+ mln_assertion(bb[3] == make::box2d(50,60, 215,402));
}
--
1.7.2.5