
* scribo/layout/internal/hist_info.hh, * scribo/layout/internal/node.hh, * scribo/layout/xy_cut.hh, * tests/layout/Makefile.am, * tests/layout/xy_cut.cc: New. * tests/Makefile.am: Add new subdir. --- scribo/ChangeLog | 13 + .../scribo/layout/internal/hist_info.hh | 43 +- scribo/scribo/layout/internal/node.hh | 156 ++++++++ scribo/scribo/layout/xy_cut.hh | 405 ++++++++++++++++++++ scribo/tests/Makefile.am | 1 + scribo/tests/{text => layout}/Makefile.am | 5 +- .../lines_h_pattern.cc => layout/xy_cut.cc} | 32 +- 7 files changed, 615 insertions(+), 40 deletions(-) copy milena/mln/io/abort.hh => scribo/scribo/layout/internal/hist_info.hh (66%) create mode 100644 scribo/scribo/layout/internal/node.hh create mode 100644 scribo/scribo/layout/xy_cut.hh copy scribo/tests/{text => layout}/Makefile.am (87%) copy scribo/tests/{primitive/extract/lines_h_pattern.cc => layout/xy_cut.cc} (71%) diff --git a/scribo/ChangeLog b/scribo/ChangeLog index 66341a3..ce793fa 100644 --- a/scribo/ChangeLog +++ b/scribo/ChangeLog @@ -1,5 +1,18 @@ 2013-03-07 Guillaume Lazzara <z@lrde.epita.fr> + Import Julien Marquegnies's implementation of XY-Cut layout + analysis algorithm. + + * scribo/layout/internal/hist_info.hh, + * scribo/layout/internal/node.hh, + * scribo/layout/xy_cut.hh, + * tests/layout/Makefile.am, + * tests/layout/xy_cut.cc: New. + + * tests/Makefile.am: Add new subdir. + +2013-03-07 Guillaume Lazzara <z@lrde.epita.fr> + Remove deprecated programs. * demo/demat/demat.pro, diff --git a/milena/mln/io/abort.hh b/scribo/scribo/layout/internal/hist_info.hh similarity index 66% copy from milena/mln/io/abort.hh copy to scribo/scribo/layout/internal/hist_info.hh index 9d06e7b..13de128 100644 --- a/milena/mln/io/abort.hh +++ b/scribo/scribo/layout/internal/hist_info.hh @@ -1,5 +1,4 @@ -// Copyright (C) 2007, 2008, 2009 EPITA Research and Development -// Laboratory (LRDE) +// Copyright (C) 2013 EPITA Research and Development Laboratory (LRDE) // // This file is part of Olena. // @@ -24,44 +23,46 @@ // exception does not however invalidate any other reasons why the // executable file might be covered by the GNU General Public License. -#ifndef MLN_IO_ABORT_HH -# define MLN_IO_ABORT_HH - /// \file +/// +/// \brief Histogram information used in XY-Cut algorithm. -/// Define a function which aborts a process in io module. - -# include <cstdlib> -# include <iostream> +#ifndef SCRIBO_LAYOUT_INTERNAL_HIST_INFO_HH +# define SCRIBO_LAYOUT_INTERNAL_HIST_INFO_HH -namespace mln +namespace scribo { - namespace io + namespace layout { namespace internal { - /// The way to abort when an error occur in io processing. - void abort(); + /// \internal \brief Histogram information used in XY-Cut algorithm. + struct hist_info + { + hist_info(); + + unsigned horizontal; + unsigned vertical; + }; # ifndef MLN_INCLUDE_ONLY - inline - void abort() + hist_info::hist_info() + : horizontal(0), + vertical(0) { - std::cerr << "I/O error, aborting." << std::endl; - std::abort(); } # endif // ! MLN_INCLUDE_ONLY - } // end of namespace mln::io::internal + } // end of namespace scribo::layout::internal - } // end of namespace mln::io + } // end of namespace scribo::layout -} // end of namespace mln +} // end of namespace scribo -#endif // ! MLN_IO_ABORT_HH +#endif // ! SCRIBO_LAYOUT_INTERNAL_HIST_INFO_HH diff --git a/scribo/scribo/layout/internal/node.hh b/scribo/scribo/layout/internal/node.hh new file mode 100644 index 0000000..5d937ea --- /dev/null +++ b/scribo/scribo/layout/internal/node.hh @@ -0,0 +1,156 @@ +// Copyright (C) 2013 EPITA Research and Development Laboratory (LRDE) +// +// This file is part of Olena. +// +// Olena is free software: you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free +// Software Foundation, version 2 of the License. +// +// Olena is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Olena. If not, see <http://www.gnu.org/licenses/>. +// +// As a special exception, you may use this file as part of a free +// software project without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to produce +// an executable, this file does not by itself cause the resulting +// executable to be covered by the GNU General Public License. This +// exception does not however invalidate any other reasons why the +// executable file might be covered by the GNU General Public License. + +/// \file +/// +/// \brief Tree node class used in XY-Cut algorithm. + + +#ifndef SCRIBO_LAYOUT_INTERNAL_NODE_HH +# define SCRIBO_LAYOUT_INTERNAL_NODE_HH + +# include <mln/core/alias/box2d.hh> + +namespace scribo +{ + + namespace layout + { + + namespace internal + { + + /// \internal \brief Tree node class used in XY-Cut algorithm. + template <typename B> + class node + { + public: + node(); + node(const B& bbox); + + ~node(); + + const B& get_bbox() const; + + node<B>* get_ls(); + const node<B>* get_ls() const; + node<B>* get_rs(); + const node<B>* get_rs() const; + + bool is_leaf() const; + + void set_ls(node<B>* n); + void set_rs(node<B>* n); + + private: + B bbox_; + node<B>* ls_; + node<B>* rs_; + }; + +# ifndef MLN_INCLUDE_ONLY + + template <typename B> + node<B>::node() + : ls_(0), + rs_(0) + { + } + + template <typename B> + node<B>::node(const B& bbox) + : bbox_(bbox), + ls_(0), + rs_(0) + { + } + + template <typename B> + node<B>::~node() + { + if (ls_) + delete ls_; + + if (rs_) + delete rs_; + } + + template <typename B> + const B& node<B>::get_bbox() const + { + return bbox_; + } + + template <typename B> + node<B>* node<B>::get_ls() + { + return ls_; + } + + template <typename B> + const node<B>* node<B>::get_ls() const + { + return ls_; + } + + template <typename B> + node<B>* node<B>::get_rs() + { + return rs_; + } + + template <typename B> + const node<B>* node<B>::get_rs() const + { + return rs_; + } + + template <typename B> + bool node<B>::is_leaf() const + { + return ls_ == rs_; + } + + template <typename B> + void node<B>::set_ls(node<B>* n) + { + ls_ = n; + } + + template <typename B> + void node<B>::set_rs(node<B>* n) + { + rs_ = n; + } + +# endif // ! MLN_INCLUDE_ONLY + + } // end of namespace scribo::layout::internal + + } // end of namespace scribo::layout + +} // end of namespace scribo + +#endif // ! SCRIBO_LAYOUT_INTERNAL_NODE_HH diff --git a/scribo/scribo/layout/xy_cut.hh b/scribo/scribo/layout/xy_cut.hh new file mode 100644 index 0000000..d6818e9 --- /dev/null +++ b/scribo/scribo/layout/xy_cut.hh @@ -0,0 +1,405 @@ +// Copyright (C) 2013 EPITA Research and Development Laboratory (LRDE) +// +// This file is part of Olena. +// +// Olena is free software: you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free +// Software Foundation, version 2 of the License. +// +// Olena is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Olena. If not, see <http://www.gnu.org/licenses/>. +// +// As a special exception, you may use this file as part of a free +// software project without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to produce +// an executable, this file does not by itself cause the resulting +// executable to be covered by the GNU General Public License. This +// exception does not however invalidate any other reasons why the +// executable file might be covered by the GNU General Public License. + +/// \file +/// +/// \brief XY-Cut layout analysis algorithm. + +#ifndef SCRIBO_LAYOUT_XY_CUT_HH +# define SCRIBO_LAYOUT_XY_CUT_HH + +# include <mln/geom/min_row.hh> +# include <mln/geom/min_col.hh> +# include <mln/geom/max_row.hh> +# include <mln/geom/max_col.hh> +# include <mln/core/concept/image.hh> +# include <mln/core/macros.hh> +# include <mln/util/array.hh> +# include <scribo/layout/internal/hist_info.hh> +# include <scribo/layout/internal/node.hh> + +namespace scribo +{ + + namespace layout + { + using namespace mln; + + /// \brief XY-Cut layout analysis algorithm. + /*! This algorithm is an implementation made from \cite + nagy.1992.computer7 : "A prototype document image analysis + system for technical journals.". + + It recusively subdivides empty spaces in the document until a + minimum division size is reached. The latter is defined with \p + min_height and \p min_width. + + \input[in] ima A binary image. + \input[in] min_height The minimum height of a subdivision. + \input[in] min_width The minimum width of a subdivision. + + \return An array of component group bounding boxes. + */ + template <typename I> + mln::util::array<mln_box(I)> + xy_cut(const Image<I>& ima, int min_height, int min_width); + + +# ifndef MLN_INCLUDE_ONLY + + namespace internal + { + + + //------------------------- + // Compute horizontal hist info + //------------------------- + + template <typename I> + void compute_hist_info_h(const Image<I>& input_, + mln_ch_value(I,internal::hist_info)& hist) + { + trace::entering("compute_hist_info_h"); + + const I& input = exact(input_); + + mln_precondition(input.is_valid()); + mln_precondition(hist.is_valid()); + + unsigned accumulator = 0; + + for (unsigned i = geom::min_row(input); i <= geom::max_row(input); ++i) + { + accumulator = 0; + for (unsigned j = geom::min_col(input); j <= geom::max_col(input); ++j) + { + if (input.at_(i, j)) + ++accumulator; + + hist.at_(i, j).horizontal = accumulator; + } + } + + trace::exiting("compute_hist_info_h"); + } + + + //------------------------- + // Compute vertical hist info + //------------------------- + + template <typename I> + void compute_hist_info_v(const Image<I>& input_, + mln_ch_value(I,internal::hist_info)& hist) + { + trace::entering("compute_hist_info_v"); + + const I& input = exact(input_); + + mln_precondition(input.is_valid()); + mln_precondition(hist.is_valid()); + + unsigned accumulator = 0; + + for (unsigned j = geom::min_col(input); j <= geom::max_col(input); ++j) + { + accumulator = 0; + for (unsigned i = geom::min_row(input); i <= geom::max_row(input); ++i) + { + if (input.at_(i, j)) + ++accumulator; + + hist.at_(i, j).vertical = accumulator; + } + } + + trace::exiting("compute_hist_info_v"); + } + + + + template <typename H> + void horizontal_whitespace(const Image<H>& hist_, + const mln_site(H)& pmin, + const mln_site(H)& pmax, + int& max_height, + int& ws_min_row, + int& ws_max_row) + { + trace::entering("horizontal_whitespace"); + + typedef mln_value(H) V; + mlc_is(V,internal::hist_info)::check(); + + const H& hist = exact(hist_); + + mln_precondition(hist.is_valid()); + + const int x_min = pmin.col(); + const int x_max = pmax.col(); + const int y_min = pmin.row(); + const int y_max = pmax.row(); + + for (int y = y_min; y <= y_max; ++y) + { + const int n_white_pixels = hist.at_(y, x_max).horizontal - + hist.at_(y, x_min).horizontal; + + if (!n_white_pixels) + { + int r = y + 1; // r : row + + // While there is a gap in the histogram + for (; r <= y_max && !(hist.at_(r, x_max).horizontal - + hist.at_(r, x_min).horizontal); ++r); + + const int count = r - y; + + if (count > max_height) + { + max_height = count; + ws_min_row = (y == y_min) ? y_min : y - 1; + ws_max_row = r; + } + + y = r; + } + } + + trace::exiting("horizontal_whitespace"); + } + + + template <typename H> + void vertical_whitespace(const Image<H>& hist_, + const mln_site(H)& pmin, + const mln_site(H)& pmax, + int& max_width, + int& ws_min_col, + int& ws_max_col) + { + trace::entering("vertical_whitespace"); + + typedef mln_value(H) V; + mlc_is(V,internal::hist_info)::check(); + + const H& hist = exact(hist_); + + mln_precondition(hist.is_valid()); + + const int x_min = pmin.col(); + const int x_max = pmax.col(); + const int y_min = pmin.row(); + const int y_max = pmax.row(); + + for (int x = x_min; x <= x_max; ++x) + { + const int n_white_pixels = hist.at_(y_max, x).vertical - + hist.at_(y_min, x).vertical; + + if (!n_white_pixels) + { + int c = x + 1; // c : column + + // While there is a gap in the histogram + for (; c <= x_max && !(hist.at_(y_max, c).vertical - + hist.at_(y_min, c).vertical); ++c); + + const int count = c - x; + + if (count > max_width) + { + max_width = count; + ws_min_col = (x == x_min) ? x_min : x - 1; + ws_max_col = c; + } + + x = c; + } + } + + trace::exiting("vertical_whitespace"); + } + + + template <typename H> + void do_xy_cut(const Image<H>& hist_, + node<mln_box(H)>* root, + const mln_box(H)& domain, + const bool horizontal, + const bool should_stop, + const int min_height, + const int min_width) + { + trace::entering("do_xy_cut"); + + typedef mln_value(H) V; + mlc_is(V,internal::hist_info)::check(); + + const H& hist = exact(hist_); + + mln_precondition(hist.is_valid()); + mln_precondition(root != 0); + mln_precondition(domain.is_valid()); + + const mln_site(H)& pmin = domain.pmin(); + const mln_site(H)& pmax = domain.pmax(); + const int x_min = pmin.col(); + const int x_max = pmax.col(); + const int y_min = pmin.row(); + const int y_max = pmax.row(); + + if (horizontal) + { + int max_height = 0; + int ws_min_row = 0; // Whitespace min and max + int ws_max_row = 0; // rows + + horizontal_whitespace(hist, pmin, pmax, max_height, ws_min_row, ws_max_row); + + if (max_height < min_height || max_height == (pmax.row() - pmin.row())) + { + if (should_stop) + return; + + do_xy_cut(hist, root, domain, false, true, min_height, min_width); + } + else + { + typedef mln_box(H) B; + node<B>* ls = (ws_min_row == y_min) ? 0 : + new node<B>(box2d(pmin, point2d(ws_min_row, pmax.col()))); + node<B>* rs = (ws_max_row >= y_max) ? 0 : + new node<B>(box2d(point2d(ws_max_row, pmin.col()), pmax)); + + root->set_ls(ls); + root->set_rs(rs); + + if (ls) + do_xy_cut(hist, ls, ls->get_bbox(), false, false, min_height, min_width); + if (rs) + do_xy_cut(hist, rs, rs->get_bbox(), false, false, min_height, min_width); + } + } + else + { + int max_width = 0; + int ws_min_col = 0; // Whitespace min and max + int ws_max_col = 0; // cols + + vertical_whitespace(hist, pmin, pmax, max_width, ws_min_col, ws_max_col); + + if (max_width < min_width || max_width == (pmax.col() - pmin.col())) + { + if (should_stop) + return; + + do_xy_cut(hist, root, domain, true, true, min_height, min_width); + } + else + { + typedef mln_box(H) B; + node<B>* ls = (ws_min_col == x_min) ? 0 : + new node<B>(box2d(pmin, point2d(pmax.row(), ws_min_col))); + node<B>* rs = (ws_max_col >= x_max) ? 0 : + new node<B>(box2d(point2d(pmin.row(), ws_max_col), pmax)); + + root->set_ls(ls); + root->set_rs(rs); + + if (ls) + do_xy_cut(hist, ls, ls->get_bbox(), true, false, min_height, min_width); + if (rs) + do_xy_cut(hist, rs, rs->get_bbox(), true, false, min_height, min_width); + } + } + + trace::exiting("do_xy_cut"); + } + + + template <typename B> + void xy_dfs(const node<B>* root, util::array<B>& result) + { + if (root->is_leaf()) + { + result.append(root->get_bbox()); + return; + } + + const node<B>* ls = root->get_ls(); + const node<B>* rs = root->get_rs(); + + if (ls) + xy_dfs(root->get_ls(), result); + if (rs) + xy_dfs(root->get_rs(), result); + } + + } // end of namespace scribo::layout::internal + + + + template <typename I> + mln::util::array<mln_box(I)> + xy_cut(const Image<I>& ima_, int min_height, int min_width) + { + trace::entering("scribo::layout::xy_cut"); + + typedef mln_value(I) V; + typedef mln_box(I) B; + mlc_is(V,bool)::check(); + + const I& ima = exact(ima_); + mln_precondition(ima.is_valid()); + + // Compute histogram + mln_ch_value(I,internal::hist_info) hist(ima.domain()); + internal::compute_hist_info_h(ima, hist); + internal::compute_hist_info_v(ima, hist); + + // XY-Cut + internal::node<B>* n = new internal::node<B>(ima.domain()); + internal::do_xy_cut(hist, n, ima.domain(), true, false, min_height, min_width); + + // Build output. + mln::util::array<B> output; + internal::xy_dfs(n, output); + + // Clear temporary data. + delete n; + + trace::exiting("scribo::layout::xy_cut"); + return output; + } + + +# endif // ! MLN_INCLUDE_ONLY + + } // end of namespace scribo::layout + +} // end of namespace scribo + +#endif // ! SCRIBO_LAYOUT_XY_CUT_HH diff --git a/scribo/tests/Makefile.am b/scribo/tests/Makefile.am index dd4fd9d..4d6ce66 100644 --- a/scribo/tests/Makefile.am +++ b/scribo/tests/Makefile.am @@ -45,6 +45,7 @@ SUBDIRS = \ core \ estim \ filter \ + layout \ preprocessing \ primitive \ table \ diff --git a/scribo/tests/text/Makefile.am b/scribo/tests/layout/Makefile.am similarity index 87% copy from scribo/tests/text/Makefile.am copy to scribo/tests/layout/Makefile.am index 0d48025..09990b1 100644 --- a/scribo/tests/text/Makefile.am +++ b/scribo/tests/layout/Makefile.am @@ -1,4 +1,5 @@ -# Copyright (C) 2009 EPITA Research and Development Laboratory (LRDE). +# Copyright (C) 2013 EPITA Research and Development +# Laboratory (LRDE). # # This file is part of Olena. # @@ -16,6 +17,6 @@ include $(top_srcdir)/scribo/tests/tests.mk -check_PROGRAMS = +check_PROGRAMS = xy_cut TESTS = $(check_PROGRAMS) diff --git a/scribo/tests/primitive/extract/lines_h_pattern.cc b/scribo/tests/layout/xy_cut.cc similarity index 71% copy from scribo/tests/primitive/extract/lines_h_pattern.cc copy to scribo/tests/layout/xy_cut.cc index 64c09f8..3601d64 100644 --- a/scribo/tests/primitive/extract/lines_h_pattern.cc +++ b/scribo/tests/layout/xy_cut.cc @@ -23,29 +23,27 @@ // exception does not however invalidate any other reasons why the // executable file might be covered by the GNU General Public License. -// \file +/// \file -#include <mln/core/image/image2d.hh> -#include <mln/data/compare.hh> +#include <iostream> #include <mln/io/pbm/load.hh> -#include <scribo/primitive/extract/lines_h_pattern.hh> +#include <mln/make/box2d.hh> +#include <scribo/layout/xy_cut.hh> #include "tests/data.hh" -int main() +using namespace mln; + +int main(int argc, char** argv) { - using namespace mln; - using namespace scribo; image2d<bool> input; - io::pbm::load(input, SCRIBO_IMG_DIR "/lines_discontinued.pbm"); - - image2d<bool> ref; - io::pbm::load(ref, - SCRIBO_TESTS_DIR "/primitive/extract/lines_h_pattern.ref.pbm"); - - image2d<bool> - vlines = primitive::extract::lines_h_pattern(input, 51, 3); - - mln_assertion(vlines == ref); + io::pbm::load(input, SCRIBO_IMG_DIR "/alignment_3.pbm"); + util::array<box2d> bb = scribo::layout::xy_cut(input, 10, 10); + + mln_assertion(bb.nelements() == 4); + mln_assertion(bb[0] == make::box2d(20,66, 38,140)); + mln_assertion(bb[1] == make::box2d(11,171, 38,221)); + mln_assertion(bb[2] == make::box2d(0,237, 38,384)); + mln_assertion(bb[3] == make::box2d(50,60, 215,402)); } -- 1.7.2.5