This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "Olena, a generic and efficient image processing platform".
The branch icdar/hdlac2011 has been updated
via 93197d7eb85b70237cdfa3c2613051a5d19f1455 (commit)
via 5454dc1932181c13ef468b63d6a31334f2c2c8af (commit)
from 563c43fdb996c6891301d4ac83e8547f12a50e57 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
93197d7 Fix small fixes [to cleanup]
5454dc1 Make different algorithms for doc and hdoc use cases.
-----------------------------------------------------------------------
Summary of changes:
scribo/ChangeLog | 21 +
scribo/demo/viewer/viewer.cc | 10 +-
scribo/demo/viewer/xml_widget.cc | 2 +-
scribo/scribo/core/component_features_data.hh | 1 +
scribo/scribo/core/group_info.hh | 11 +
scribo/scribo/core/paragraph_set.hh | 29 +
.../scribo/io/xml/internal/compute_text_colour.hh | 1 +
.../io/xml/internal/extended_page_xml_visitor.hh | 49 +-
scribo/scribo/io/xml/internal/time_info.hh | 1 +
scribo/scribo/io/xml/save.hh | 2 +-
scribo/scribo/text/extract_paragraphs.hh | 1091 ++++++++++++++++
scribo/scribo/text/extract_paragraphs_hdoc.hh | 1327 ++++++++++++++++++++
scribo/scribo/text/merging.hh | 81 +-
scribo/scribo/text/{merging.hh => merging_hdoc.hh} | 10 +-
scribo/scribo/text/paragraphs.hh | 1270 -------------------
.../toolchain/internal/content_in_doc_functor.hh | 16 +-
.../toolchain/internal/content_in_hdoc_functor.hh | 9 +-
.../internal/text_in_doc_preprocess_functor.hh | 2 +
scribo/scribo/util/color_to_hex.hh | 1 +
scribo/scribo/util/component_precise_outline.hh | 5 +
20 files changed, 2575 insertions(+), 1364 deletions(-)
create mode 100644 scribo/scribo/text/extract_paragraphs.hh
create mode 100644 scribo/scribo/text/extract_paragraphs_hdoc.hh
copy scribo/scribo/text/{merging.hh => merging_hdoc.hh} (99%)
delete mode 100644 scribo/scribo/text/paragraphs.hh
hooks/post-receive
--
Olena, a generic and efficient image processing platform
* scribo/core/paragraph_set.hh: Add an overload of
make::paragraph().
* scribo/text/paragraphs.hh: Move...
* scribo/text/extract_paragraphs.hh,
* scribo/text/extract_paragraphs_hdoc.hh: ... here.
* scribo/text/merging.hh: Remove holder() calls.
* scribo/text/merging_hdoc.hh: New.
* scribo/toolchain/internal/content_in_doc_functor.hh: Fix compilation.
* scribo/toolchain/internal/content_in_hdoc_functor.hh: Make use
of hdoc algorithms.
---
scribo/ChangeLog | 21 +
scribo/scribo/core/paragraph_set.hh | 29 +
scribo/scribo/text/extract_paragraphs.hh | 1091 ++++++++++++++++
scribo/scribo/text/extract_paragraphs_hdoc.hh | 1327 ++++++++++++++++++++
scribo/scribo/text/merging.hh | 81 +-
scribo/scribo/text/{merging.hh => merging_hdoc.hh} | 10 +-
scribo/scribo/text/paragraphs.hh | 1270 -------------------
.../toolchain/internal/content_in_doc_functor.hh | 4 +-
.../toolchain/internal/content_in_hdoc_functor.hh | 9 +-
9 files changed, 2510 insertions(+), 1332 deletions(-)
create mode 100644 scribo/scribo/text/extract_paragraphs.hh
create mode 100644 scribo/scribo/text/extract_paragraphs_hdoc.hh
copy scribo/scribo/text/{merging.hh => merging_hdoc.hh} (99%)
delete mode 100644 scribo/scribo/text/paragraphs.hh
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index 394c247..d1678c5 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,3 +1,24 @@
+2011-06-20 Guillaume Lazzara <z(a)lrde.epita.fr>
+
+ Make different algorithms for doc and hdoc use cases.
+
+ * scribo/core/paragraph_set.hh: Add an overload of
+ make::paragraph().
+
+ * scribo/text/paragraphs.hh: Move...
+
+ * scribo/text/extract_paragraphs.hh,
+ * scribo/text/extract_paragraphs_hdoc.hh: ... here.
+
+ * scribo/text/merging.hh: Remove holder() calls.
+
+ * scribo/text/merging_hdoc.hh: New.
+
+ * scribo/toolchain/internal/content_in_doc_functor.hh: Fix compilation.
+
+ * scribo/toolchain/internal/content_in_hdoc_functor.hh: Make use
+ of hdoc algorithms.
+
2011-06-07 Guillaume Lazzara <z(a)lrde.epita.fr>
Add a new toolchain for ICDAR contests.
diff --git a/scribo/scribo/core/paragraph_set.hh b/scribo/scribo/core/paragraph_set.hh
index 9214f20..c21359d 100644
--- a/scribo/scribo/core/paragraph_set.hh
+++ b/scribo/scribo/core/paragraph_set.hh
@@ -377,6 +377,35 @@ namespace scribo
}
+ template <typename L>
+ scribo::paragraph_set<L>
+ paragraph(const line_links<L>& llinks)
+ {
+ line_links<L> links = llinks.duplicate();
+
+ for_all_links(l, links)
+ links(l) = internal::find_root(links, l);
+
+ unsigned npars;
+ mln::fun::i2v::array<unsigned>
+ par_ids = mln::make::relabelfun(links.line_to_link(),
+ links.nelements() - 1, npars);
+ paragraph_set<L> parset(links, npars);
+
+ const scribo::line_set<L>& lines = links.lines();
+ for_all_links(l, links)
+ if (links(l))
+ {
+ value::int_u16 par_id = par_ids(l);
+ parset(par_id).add_line(lines(l));
+ }
+
+ for_all_paragraphs(p, parset)
+ parset(p).force_stats_update();
+
+ return parset;
+ }
+
// FIXME: move that code into paragraph_set constructor?
template <typename L>
scribo::paragraph_set<L>
diff --git a/scribo/scribo/text/extract_paragraphs.hh b/scribo/scribo/text/extract_paragraphs.hh
new file mode 100644
index 0000000..e8e9ac6
--- /dev/null
+++ b/scribo/scribo/text/extract_paragraphs.hh
@@ -0,0 +1,1091 @@
+// Copyright (C) 2010, 2011 EPITA Research and Development Laboratory
+// (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+/// \file
+///
+///
+
+
+#ifndef SCRIBO_TEXT_EXTRACT_PARAGRAPHS_HH
+# define SCRIBO_TEXT_EXTRACT_PARAGRAPHS_HH
+
+#include <mln/util/array.hh>
+#include <mln/accu/shape/bbox.hh>
+#include <mln/core/image/image2d.hh>
+#include <mln/core/alias/neighb2d.hh>
+#include <mln/draw/box.hh>
+#include <mln/data/convert.hh>
+#include <mln/value/int_u16.hh>
+#include <mln/value/label_16.hh>
+#include <mln/value/int_u8.hh>
+#include <mln/value/rgb8.hh>
+#include <mln/io/ppm/save.hh>
+#include <mln/io/pgm/save.hh>
+#include <mln/geom/rotate.hh>
+#include <mln/literal/colors.hh>
+
+#include <scribo/core/macros.hh>
+#include <scribo/core/line_set.hh>
+#include <scribo/core/line_links.hh>
+#include <scribo/core/line_info.hh>
+#include <scribo/core/paragraph_set.hh>
+
+using namespace mln;
+
+namespace scribo
+{
+
+ namespace text
+ {
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+ namespace internal
+ {
+
+//-------------------------------------
+// Extracting root of links
+//-------------------------------------
+ template <typename T>
+ inline
+ unsigned
+ find_root(mln::util::array<T>& parent, unsigned x)
+ {
+ unsigned tmp_x = x;
+
+ while (parent(tmp_x) != tmp_x)
+ tmp_x = parent(tmp_x);
+
+ while (parent(x) != x)
+ {
+ const unsigned tmp = parent(x);
+ x = parent(x);
+ parent(tmp) = tmp_x;
+ }
+
+ return x;
+ }
+
+
+//---------------------------------------------------------------------
+// This method aims to cut the links between lines that do not fit the
+// different criteria
+//---------------------------------------------------------------------
+
+ template <typename L>
+ inline
+ void paragraph_links(const line_links<L>& left,
+ const line_links<L>& right,
+ line_links<L>& output,
+ const line_set<L>& lines)
+ {
+ output = left.duplicate();
+
+ // const unsigned nlines = lines.nelements();
+
+ // image2d<value::rgb8> links = data::convert(value::rgb8(), input);
+ // for (unsigned l = 0; l < nlines; ++l)
+ // {
+ // mln::draw::line(links, lines(l).bbox().pcenter(), lines(left(l)).bbox().pcenter(), literal::red);
+ // }
+ // mln::io::ppm::save(links, "out_links.ppm");
+
+ // For each line
+ for_all_lines(l, lines)
+ if (lines(l).is_textline())
+ {
+ // Neighbors
+
+ const line_id_t left_nbh = output(l);
+ const line_id_t right_nbh = right(l);
+ const line_id_t lol_nbh = output(left_nbh);
+
+ // Line features
+ const float x_height = lines(l).x_height();
+ const float left_x_height = lines(left_nbh).x_height();
+ const float right_x_height = lines(right_nbh).x_height();
+
+ const box2d& left_line_bbox = lines(left_nbh).bbox();
+ const box2d& current_line_bbox = lines(l).bbox();
+ const box2d& right_line_bbox = lines(right_nbh).bbox();
+ const box2d& lol_line_bbox = lines(lol_nbh).bbox(); // lol : left neighbor of the left neighbor
+
+ const int lline_col_min = left_line_bbox.pmin().col();
+ const int cline_col_min = current_line_bbox.pmin().col();
+ const int rline_col_min = right_line_bbox.pmin().col();
+ const int lolline_col_min = lol_line_bbox.pmin().col();
+
+ const int lline_col_max = left_line_bbox.pmax().col();
+ const int cline_col_max = current_line_bbox.pmax().col();
+ const int rline_col_max = right_line_bbox.pmax().col();
+
+ const int lline_cw = lines(left_nbh).char_width();
+ const int cline_cw = lines(l).char_width();
+ const int rline_cw = lines(right_nbh).char_width();
+ // Maximal x variation to consider two lines vertically aligned
+ const int delta_alignment = cline_cw;
+
+ // Checks the baseline distances of the two neighbors
+ {
+ // Current line baseline
+ const int c_baseline = lines(l).baseline();
+
+ // Baseline distance with the left and right neighbors
+ const int lc_baseline = lines(left_nbh).baseline() - c_baseline;
+ const int rc_baseline = c_baseline -lines(right_nbh).baseline();
+
+ // Max baseline distance between the two neighbors
+ // const float delta_baseline_max = std::max(lc_baseline, rc_baseline);
+ // const float delta_baseline_min = std::min(lc_baseline,
+ // rc_baseline);
+
+ // Only two lines, meaning the current line has only one neighbor
+ bool two_lines = false;
+
+ // If the current line has no left neighbor
+ if (lc_baseline == 0)
+ {
+ // ror : right neighbor of the right neighbor
+ const line_id_t ror_nbh = right(right_nbh);
+ //const box2d& ror_line_bbox = lines(ror_nbh).bbox();
+
+ // If the current line has a ror
+ if (ror_nbh != right_nbh
+ && output(ror_nbh) == right_nbh)
+ {
+ // Distance between the current line and the right neighbor
+ const float right_distance = lines(l).meanline() - lines(right_nbh).baseline();
+ // Distance between the right neighbor and the ror
+ const float ror_distance = lines(right_nbh).meanline() - lines(ror_nbh).baseline();
+ // ror x_height
+ const float ror_x_height = lines(ror_nbh).x_height();
+
+ // Conditions to cut the link between the current line
+ // and its right neighbor
+ if (right_distance > 1.4f * ror_distance
+ && std::max(ror_x_height, right_x_height) <
+ 1.2f * std::min(ror_x_height, right_x_height)
+ && output(right_nbh) == l)
+ {
+ output(right_nbh) = right_nbh;
+ continue;
+ }
+ }
+ // Otherwise we only have a group of two lines
+ else
+ {
+ // We determine the distance between the two lines
+ const float distance = lines(l).meanline() - lines(right_nbh).baseline();
+ two_lines = true;
+
+ // If the distance between the two lines is greater than
+ // the minimum x height of the two lines then we cut the
+ // link between them
+ if (distance > 2.0f * std::min(x_height, right_x_height)
+ && output(right_nbh) == l)
+ {
+ output(right_nbh) = right_nbh;
+ continue;
+ }
+ }
+
+ // Lines features
+ const float min_x_height = std::min(x_height, right_x_height);
+ const float max_x_height = std::max(x_height, right_x_height);
+ const float min_char_width = std::min(rline_cw, cline_cw);
+ const float max_char_width = std::max(rline_cw, cline_cw);
+
+ // Condition to cut the link between the current line and
+ // its right neighbor
+ if ((max_x_height > min_x_height * 1.2f) &&
+ !(max_char_width <= 1.2f * min_char_width))
+ {
+ if (output(right_nbh) == l)
+ {
+ output(right_nbh) = right_nbh;
+ continue;
+ }
+ }
+
+ // If we only have two lines we stop the study
+ if (two_lines)
+ continue;
+ }
+ // If the current line has no right neighbor
+ else if (rc_baseline == 0)
+ {
+ // lol : left neighbor of the left neighbor
+
+ // If the left neighbor of the current line has a left neighbor
+ if (lol_nbh != left_nbh)
+ {
+ // Distance between the current line and its left neighbor
+ const float left_distance = lines(left_nbh).meanline() -
+ lines(l).baseline();
+ // Distance between the left neighbor and the left
+ // neighbor of its left neighbor
+ const float lol_distance = lines(lol_nbh).meanline() -
+ lines(left_nbh).baseline();
+ // lol x height
+ const float lol_x_height = lines(lol_nbh).x_height();
+
+ // Conditions to cut the link between the current line
+ // and its left neighbor
+ if (left_distance > 1.4f * lol_distance
+ && std::max(lol_x_height, left_x_height) <
+ 1.2f * std::min(lol_x_height, left_x_height))
+ {
+ output(l) = l;
+ continue;
+ }
+ }
+ // Otherwise we only have a group of two lines
+ else
+ {
+ // Distance between the current line and it left neighbor
+ const float distance = lines(left_nbh).meanline() -
+ lines(l).baseline();
+
+ two_lines = true;
+
+ // If the distance is greater than the min x height
+ // between the two lines
+ if (distance > 2.0f * std::min(x_height, left_x_height))
+ {
+ output(l) = l;
+ continue;
+ }
+ }
+
+ // Lines features
+ const float min_x_height = std::min(x_height, left_x_height);
+ const float max_x_height = std::max(x_height, left_x_height);
+ const float min_char_width = std::min(lline_cw, cline_cw);
+ const float max_char_width = std::max(lline_cw, cline_cw);
+
+ // Condition to cut the link between the current line and
+ // its left neighbor
+ if ((max_x_height > min_x_height * 1.2f) &&
+ !(max_char_width <= 1.2f * min_char_width))
+ {
+ output(l) = l;
+ continue;
+ }
+
+ // If we only have two lines we stop the study
+ if (two_lines)
+ continue;
+ }
+ // The current line has at least one left and one right neighbor
+ else // if (delta_baseline_max >= delta_baseline_min)
+ {
+ // Distance between the left and the current line
+ const float left_distance =
+ lines(left_nbh).meanline() - lines(l).baseline();
+ // Distance between the right and the current line
+ const float right_distance =
+ lines(l).meanline() - lines(right_nbh).baseline();
+
+ // If the left line is too far compared to the right one
+ // we cut the link with it
+ if (left_distance > 1.2f * right_distance
+ && std::max(x_height, left_x_height) > 1.2f * std::min(x_height, left_x_height))
+ {
+ output(l) = l;
+ continue;
+ }
+ // If the right line is too far compared to the left one
+ // we cut the link with it
+ else if (right_distance > 1.2f * left_distance
+ && std::max(x_height, right_x_height) > 1.2f * std::min(x_height, right_x_height)
+ && output(right_nbh) == l)
+ {
+ output(right_nbh) = right_nbh;
+ continue;
+ }
+
+ // If the distance between the baseline of the left
+ // neighbor and the baseline of the current line is
+ // greater than the one between the current line baseline
+ // and the right line baseline we have to study the texte
+ // features of the right and left lines
+ if (lc_baseline > rc_baseline)
+ {
+ const float cw_max = std::max(lline_cw, cline_cw);
+ const float cw_min = std::min(lline_cw, cline_cw);
+ const float min_x_height = std::min(x_height, left_x_height);
+ const float max_x_height = std::max(x_height, left_x_height);
+
+ if ((max_x_height > min_x_height * 1.2f) &&
+ !(cw_max <= 1.2f * cw_min))
+ {
+ output(l) = l;
+ continue;
+ }
+
+ {
+ const float min_x_height = std::min(x_height, right_x_height);
+ const float max_x_height = std::max(x_height, right_x_height);
+ const float cw_max = std::max(rline_cw, cline_cw);
+ const float cw_min = std::min(rline_cw, cline_cw);
+
+ if ((max_x_height > min_x_height * 1.2f)
+ && !(cw_max <= 1.2f * cw_min)
+ && output(right_nbh) == l)
+ {
+ output(right_nbh) = right_nbh;
+ continue;
+ }
+ }
+ }
+ else
+ {
+ const float cw_max = std::max(rline_cw, cline_cw);
+ const float cw_min = std::min(rline_cw, cline_cw);
+ const float min_x_height = std::min(x_height, right_x_height);
+ const float max_x_height = std::max(x_height, right_x_height);
+
+ if ((max_x_height > min_x_height * 1.2f)
+ && !(cw_max <= 1.2f * cw_min)
+ && output(right_nbh) == l)
+ {
+ output(right_nbh) = right_nbh;
+ continue;
+ }
+
+ {
+ const float min_x_height = std::min(x_height, left_x_height);
+ const float max_x_height = std::max(x_height, left_x_height);
+ const float cw_max = std::max(lline_cw, cline_cw);
+ const float cw_min = std::min(lline_cw, cline_cw);
+
+ if ((max_x_height > min_x_height * 1.2f)
+ && !(cw_max <= 1.2f * cw_min))
+ {
+ output(l) = l;
+ continue;
+ }
+ }
+ }
+ }
+ }
+
+ // If we arrive here, it means than the lines in the
+ // neighborhood of the current line are quite similar. We can
+ // then begin to study the indentations in order to determine
+ // the beginning of new paragraphs
+
+//-----------------------------------------------------------------------------
+// ___________________________
+// |___________________________|
+// ________________________
+// |________________________|
+// ___________________________
+// |___________________________|
+// ___________________________
+// |___________________________|
+//
+// Simple case : paragraphs are justified on the left. We try to find any
+// indentation like above.
+//
+//-----------------------------------------------------------------------------
+
+ {
+ // Check if the current line neighbors are aligned
+ bool left_right_aligned = false;
+ bool left_lol_aligned = false;
+ const int dx_lr = std::abs(lline_col_min - rline_col_min);
+ const int dx_llol = std::abs(lline_col_min - lolline_col_min);
+
+ if (dx_lr < delta_alignment)
+ left_right_aligned = true;
+
+ if (dx_llol < delta_alignment)
+ left_lol_aligned = true;
+
+ if (left_right_aligned && left_lol_aligned)
+ {
+ const int left_right_col_min = std::min(lline_col_min, rline_col_min);
+ const int dx_lrc = std::abs(left_right_col_min - cline_col_min);
+ const float l_char_width = 1.5f * lines(l).char_width();
+
+ if (dx_lrc > l_char_width &&
+ dx_lrc < 3.0f * l_char_width &&
+ cline_col_min > rline_col_min &&
+ cline_col_min > lline_col_min)
+ {
+ output(right_nbh) = right_nbh;
+ continue;
+ }
+ }
+ }
+
+//-----------------------------------------------------------------------------
+// ___________________________
+// |___________________________|
+// ___________________
+// |___________________| End of the paragraph - Current line
+// ________________________
+// |________________________| Beginning of a new one
+// ___________________________
+// |___________________________| Left of left of current line
+//
+// End of paragraph case : we try to find an end to the current paragraph
+//
+//-----------------------------------------------------------------------------
+
+ {
+ // Check if the current line neighbors are aligned
+ bool left_right_max_aligned = false;
+ bool left_current_min_aligned = false;
+ bool lol_current_min_aligned = false;
+ const bool lol_is_left = output(left_nbh) == left_nbh;
+ const int dx_lr_max = std::abs(lline_col_max - rline_col_max);
+ const int dx_lc_min = std::abs(lline_col_min - cline_col_min);
+ const int dx_lolc_min = std::abs(lolline_col_min - cline_col_min);
+
+ if (dx_lr_max < delta_alignment)
+ left_right_max_aligned = true;
+
+ if (dx_lc_min < delta_alignment)
+ left_current_min_aligned = true;
+
+ if (dx_lolc_min < delta_alignment)
+ lol_current_min_aligned = true;
+
+ if (!left_current_min_aligned && left_right_max_aligned &&
+ (lol_current_min_aligned || lol_is_left))
+ {
+ const int dx_lrc = std::abs(lline_col_max - cline_col_max);
+ const int l_char_width = lines(l).char_width();
+
+ if (dx_lrc > l_char_width &&
+ cline_col_max < lline_col_max &&
+ cline_col_min < lline_col_min &&
+ (lline_col_min > lolline_col_min || lol_is_left))
+ {
+ output(l) = l;
+ continue;
+ }
+ }
+ }
+
+
+//-----------------------------------------------------------------------------
+// ___________________________
+// |___________________________|
+// ___________________________
+// |___________________________|
+// ________________________
+// |________________________|
+//
+// Simple case : paragraphs are justified on the left. We try to find any
+// indentation like above at the end of a column.
+//
+//-----------------------------------------------------------------------------
+
+ if (left_nbh == l)
+ {
+ const line_id_t ror_nbh = right(right_nbh);
+ const box2d& ror_line_bbox = lines(ror_nbh).bbox();
+ const int rorline_col_min = ror_line_bbox.pmin().col();
+
+ bool right_ror_min_aligned = false;
+ const int dx_rror_min = std::abs(rline_col_min - rorline_col_min);
+
+ if (dx_rror_min < delta_alignment)
+ right_ror_min_aligned = true;
+
+ if (right_ror_min_aligned)
+ {
+ const int right_ror_col_min = std::min(rline_col_min, rorline_col_min);
+ const int dx_rrorc = std::abs(right_ror_col_min - cline_col_min);
+ const float l_char_width = 1.5f * lines(l).char_width();
+
+ if (dx_rrorc > l_char_width &&
+ dx_rrorc < 3.0f * l_char_width &&
+ cline_col_min > rline_col_min &&
+ cline_col_max >= rline_col_max)
+ {
+ output(right_nbh) = right_nbh;
+ continue;
+ }
+ }
+ }
+ }
+
+
+ // Only debug
+
+ // {
+ // image2d<value::rgb8> debug = data::convert(value::rgb8(), input);
+
+ // for (unsigned i = 0; i < output.nelements(); ++i)
+ // output(i) = scribo::make::internal::find_root(output, i);
+
+ // mln::util::array<accu::shape::bbox<point2d> > nbbox(output.nelements());
+ // for_all_lines(l, lines)
+ // if (lines(l).is_textline())
+ // {
+ // // if (lines(i).is_textline())
+ // // {
+ // // mln::draw::box(debug, lines(i).bbox(), literal::red);
+ // nbbox(output(l)).take(lines(l).bbox());
+ // // }
+ // }
+
+ // for (unsigned i = 0; i < nbbox.nelements(); ++i)
+ // if (nbbox(i).is_valid())
+ // {
+ // box2d b = nbbox(i).to_result();
+ // mln::draw::box(debug, b, literal::orange);
+ // b.enlarge(1);
+ // mln::draw::box(debug, b, literal::orange);
+ // b.enlarge(1);
+ // mln::draw::box(debug, b, literal::orange);
+ // }
+
+ // mln::io::ppm::save(debug, "out_paragraph.ppm");
+ // }
+
+ }
+
+//-------------------------------------------------------------
+// Preparation of the lines before linking them.
+// For each line we draw the top and the bottom of it.
+// Assuming than i is the number of the line. Then the top of the line
+// will be affected with the value 2 * i in the block image and the
+// bottom with 2 * i + 1.
+//
+//-------------------------------------------------------------
+
+ template <typename L>
+ inline
+ void prepare_lines(const box2d& domain,
+ const line_set<L>& lines,
+ L& blocks,
+ mln::util::array<box2d>& rbbox)
+ {
+ std::map< int, std::vector< const box2d* > > drawn_lines;
+ // const unsigned nlines = lines.nelements();
+
+ // For each line
+ //for (unsigned l = 0; l < nlines; ++l)
+ for_all_lines(l, lines)
+ if (lines(l).is_textline())
+ {
+ // Rotation of the bounding box
+ box2d b = geom::rotate(lines(l).bbox(), -90, domain.pcenter());
+// rbbox.append(b);
+ rbbox(l) = b;
+
+ const unsigned index = l + 1;
+ const unsigned even_index = 2 * index;
+ const unsigned odd_index = even_index + 1;
+
+ // Top of the line
+ {
+ bool not_finished = true;
+ int col_offset = 0;
+
+ while (not_finished)
+ {
+ // Looking for a column in the image to draw the top of the
+ // line
+
+ const int col = b.pmax().col() + col_offset;
+ std::map< int, std::vector< const box2d* > >::iterator it
+ = drawn_lines.find(col);
+
+ if (it != drawn_lines.end())
+ {
+ const std::vector< const box2d* >& lines = (*it).second;
+ const unsigned nb_lines = lines.size();
+ unsigned i = 0;
+
+ for (i = 0; i < nb_lines; ++i)
+ {
+ const box2d* box = lines[i];
+ const int min_row = std::max(b.pmin().row(), box->pmin().row());
+ const int max_row = std::min(b.pmax().row(), box->pmax().row());
+
+ if (min_row - max_row <= 0)
+ break;
+ }
+
+ if (i == nb_lines)
+ {
+ mln::draw::line(blocks, point2d(b.pmin().row(), col),
+ point2d(b.pmax().row(), col), even_index);
+ not_finished = false;
+ drawn_lines[col].push_back(&(rbbox[l]));
+ }
+ else
+ ++col_offset;
+ }
+ else
+ {
+ mln::draw::line(blocks, point2d(b.pmin().row(), col),
+ point2d(b.pmax().row(), col), even_index);
+ not_finished = false;
+ drawn_lines[col].push_back(&(rbbox[l]));
+ }
+ }
+ }
+
+ // Bottom of the line
+ {
+ bool not_finished = true;
+ int col_offset = 0;
+
+ while (not_finished)
+ {
+ // Looking for a column in the image to draw the bottom of
+ // the line
+
+ const int col = b.pmin().col() - col_offset;
+ std::map< int, std::vector< const box2d* > >::iterator it
+ = drawn_lines.find(col);
+
+ if (it != drawn_lines.end())
+ {
+ const std::vector< const box2d* >& lines = (*it).second;
+ const unsigned nb_lines = lines.size();
+ unsigned i = 0;
+
+ for (i = 0; i < nb_lines; ++i)
+ {
+ const box2d* box = lines[i];
+ const int min_row = std::max(b.pmin().row(), box->pmin().row());
+ const int max_row = std::min(b.pmax().row(), box->pmax().row());
+
+ if (min_row - max_row <= 0)
+ break;
+ }
+
+ if (i == nb_lines)
+ {
+ mln::draw::line(blocks, point2d(b.pmin().row(), col),
+ point2d(b.pmax().row(), col), odd_index);
+ not_finished = false;
+ drawn_lines[col].push_back(&(rbbox[l]));
+ }
+ else
+ ++col_offset;
+ }
+ else
+ {
+ mln::draw::line(blocks, point2d(b.pmin().row(), col),
+ point2d(b.pmax().row(), col), odd_index);
+ not_finished = false;
+ drawn_lines[col].push_back(&(rbbox[l]));
+ }
+ }
+ }
+ }
+ }
+
+ template <typename L>
+ inline
+ void
+ process_left_link(L& blocks,
+ const mln::util::array<box2d>& rbbox,
+ const line_set<L>& lines,
+ line_links<L>& left)
+ {
+ typedef scribo::def::lbl_type V;
+
+ // At the beginning each line is its own neighbor
+ for_all_lines(l, lines)
+ if (lines(l).is_textline())
+ left(l) = l;
+ else
+ left(l) = 0;
+
+ // const unsigned nlines = lines.nelements();
+
+ // For each line
+ for_all_lines(l, lines)
+ if (lines(l).is_textline())
+ {
+ // Max distance for the line search
+ int dmax = 1.5f * lines(l).x_height();
+
+ // Starting points in the current line box
+ point2d c = rbbox(l).pcenter();
+ point2d q(rbbox(l).pmin().row() + ((c.row() - rbbox(l).pmin().row()) / 4), c.col());
+
+ int
+ midcol = (rbbox(l).pmax().col()
+ - rbbox(l).pmin().col()) / 2;
+
+ // Left
+ {
+ // marge gauche
+ int
+ nleftima = c.col() - blocks.domain().pmin().col(),
+ // Distance gauche
+ nleft = std::min(nleftima, midcol + dmax);
+
+ V
+ // Starting points in the box
+ *p = &blocks(c),
+ *p2 = &blocks(q),
+ // End of search
+ *pstop = p - nleft - 1,
+ // Line neighbor
+ *nbh_p = 0;
+
+ // While we haven't found a neighbor or reached the limit
+ for (; p != pstop; --p, --p2)
+ {
+ if (*p2 != literal::zero // Not the background
+ && ((*p2 % 2) == 0) // Looking for the bottom of a line
+ && left((*p2 >> 1) - 1) != l) // No loops
+ {
+ // Neightbor found, we stop the research
+ nbh_p = p2;
+ break;
+ }
+
+ if (*p != literal::zero // Not the background
+ && ((*p % 2) == 0) // Looking for the bottom of a line
+ && left((*p >> 1) - 1) != l) // No loops
+ {
+ // Neightbor found, we stop the research
+ nbh_p = p;
+ break;
+ }
+ }
+
+ // If a neighbor was found, then we have found the top of the
+ // line. We are then looking for the bottom of the encountered
+ // line. If during the search process we find a complete line
+ // included in the touched line, this line is considered as
+ // the neighbor under certain conditions (see below)
+
+ //---------------------------------------------------------------
+ // _________________________ |
+ // |_________________________| => Current line | Search direction
+ // v
+ // => First encountered top line
+ // __________________________________________________ 2Q
+ // | Q |
+ // | _________________________ |2P
+ // | |_____________P___________| => Second top |2P + 1
+ // | line |
+ // |__________________________________________________|2Q + 1
+ //
+ //
+ //---------------------------------------------------------------
+
+ if (nbh_p)
+ {
+ std::vector<V> lines_nbh;
+ const V end_p = *nbh_p + 1;
+ const V* nbh_p_copy = nbh_p;
+
+ for (; *nbh_p != end_p; --nbh_p)
+ {
+ if ((*nbh_p) != literal::zero) // Not the background
+ {
+ if ((*nbh_p) % 2 == 0)// We have found the top of
+ // another line
+ lines_nbh.push_back(*nbh_p);
+ else
+ {
+ // We have found the bottom of a line. We are looking if
+ // we have already encountered the top of this
+ // line. If so, we link the current line with this one
+ // under certain conditions:
+
+ if (std::find(lines_nbh.begin(), lines_nbh.end(),
+ (*nbh_p) - 1) != lines_nbh.end())
+ {
+ // If we can link the complete line with the current line
+ if (// It must be in the search range
+ nbh_p > pstop
+ // Avoid loops
+ && left(((*nbh_p - 1) >> 1) - 1) != l)
+ left(l) = ((*nbh_p - 1) >> 1) - 1;
+
+ // We have found a complete line so we stop the search
+ break;
+ }
+ }
+ }
+ }
+
+
+ // If we haven't found any included line in the first
+ // neighbor, then the line is considered as the neighbor of
+ // the current line
+ if (*nbh_p == end_p)
+ left(l) = (*nbh_p_copy >> 1) - 1;
+ }
+ }
+ }
+ }
+
+
+ // We assume that the lines have been rotated
+ template <typename L>
+ inline
+ void
+ process_right_link(L& blocks,
+ const mln::util::array<box2d>& rbbox,
+ const line_set<L>& lines,
+ line_links<L>& right)
+ {
+ typedef scribo::def::lbl_type V;
+
+ // At the beginning each line is its own neighbor
+ for_all_lines(l, lines)
+ if (lines(l).is_textline())
+ right(l) = l;
+ else
+ right(l) = 0;
+
+ // const unsigned nlines = lines.nelements();
+
+ // For each line
+ for_all_lines(l, lines)
+ if (lines(l).is_textline())
+ {
+ // Max distance for the line search
+ int dmax = 1.5f * lines(l).x_height();
+
+ // Starting points in the current line box
+ point2d c = rbbox(l).pcenter();
+ point2d q(rbbox(l).pmax().row() - ((rbbox(l).pmax().row() - c.row()) / 4), c.col());
+
+ int
+ midcol = (rbbox(l).pmax().col()
+ - rbbox(l).pmin().col()) / 2;
+
+ // Right
+ {
+ int
+ nrightima = geom::ncols(blocks) - c.col() + blocks.domain().pmin().col(),
+ nright = std::min(nrightima, midcol + dmax);
+
+ V
+ // Starting points in the box
+ *p = &blocks(c),
+ *p2 = &blocks(q),
+ // End of search
+ *pstop = p + nright - 1,
+ // Line neighbor
+ *nbh_p = 0;
+
+ // While we haven't found a neighbor or reached the limit
+ for (; p != pstop; ++p, ++p2)
+ {
+ if (*p2 != literal::zero // Not the background
+ && ((*p2 % 2) == 1) // Looking for the bottom of a line
+ && right(((*p2 - 1) >> 1) - 1) != l) // No loops
+ {
+ // Neightbor found, we stop the research
+ nbh_p = p2;
+ break;
+ }
+
+ if (*p != literal::zero // Not the background
+ && ((*p % 2) == 1) // Looking for the bottom of a line
+ && right(((*p - 1) >> 1) - 1) != l) // No loops
+ {
+ // Neightbor found, we stop the research
+ nbh_p = p;
+ break;
+ }
+ }
+
+ // If a neighbor was found, then we have found the bottom of the
+ // line. We are then looking for the top of the encountered
+ // line. If during the search process we find a complete line
+ // included in the touched line, this line is considered as
+ // the neighbor under certain conditions (see below)
+
+ //---------------------------------------------------------------
+ //
+ //
+ // __________________________________________________ 2Q
+ // | Q |
+ // | _________________________ |2P
+ // | |_____________P___________| => Second bottom |2P + 1
+ // | line |
+ // |__________________________________________________|2Q + 1
+ // => First encountered bottom line
+ // _________________________ ^
+ // |_________________________| => Current line | Search direction
+ // |
+ //---------------------------------------------------------------
+
+ if (nbh_p)
+ {
+ std::vector<V> lines_nbh;
+ const V end_p = *nbh_p - 1;
+ const V* nbh_p_copy = nbh_p;
+
+ for (; *nbh_p != end_p; ++nbh_p)
+ {
+ if (*nbh_p != literal::zero) // Not the background
+ {
+ if (*nbh_p % 2 == 1) // We have found the bottom of
+ // another line
+ lines_nbh.push_back(*nbh_p);
+ else
+ {
+ // We have found the top of a line. We are looking if
+ //we have already encountered the bottom of this
+ // line. If so, we link the current line with this one
+ // under certain conditions:
+
+ if (std::find(lines_nbh.begin(), lines_nbh.end(),
+ *nbh_p + 1) != lines_nbh.end())
+ {
+ // If we can link the complete line with the current line
+ if (// It must be in the search range
+ nbh_p < pstop
+ // Avoid loops
+ && right((*nbh_p >> 1) - 1) != l)
+ right(l) = (*nbh_p >> 1) - 1;
+
+ // We have found a complete line, so we stop the search
+ break;
+ }
+ }
+ }
+ }
+
+ // If we haven't found any included line in the first
+ // neighbor, then the line is considered as the neighbor of
+ // the current line
+
+ if (*nbh_p == end_p)
+ right(l) = ((*nbh_p_copy - 1) >> 1) - 1;
+ }
+ }
+ }
+ }
+
+//-----------------------------------------------------------------------
+// Finalizing the links by merging information extracted from the left
+// and right links
+//-----------------------------------------------------------------------
+
+ template< typename L >
+ inline
+ void finalize_links(line_links<L>& left,
+ line_links<L>& right,
+ const line_set<L>& lines)
+ {
+ // const unsigned nlines = lines.nelements();
+
+ for_all_lines(l, lines)
+ if (lines(l).is_textline())
+ {
+ const unsigned left_value = left(l);
+ const unsigned right_value = right(l);
+
+ // If the right neighbor of my left neighbor is itself then its
+ // right neighbor is me
+ {
+ line_id_t& v = right(left_value);
+
+ if (v == left_value)
+ v = l;
+ }
+
+ // If the left neighbor of my right neighbor is itself then its
+ // left neighbor is me
+ {
+ line_id_t& v = left(right_value);
+
+ if (v == right_value)
+ v = l;
+ }
+ }
+ }
+
+ } // end of namespace scribo::text::internal
+
+ template <typename L>
+ inline
+ paragraph_set<L>
+ extract_paragraphs(line_set<L>& lines,
+ const image2d<bool>& input)
+ {
+ typedef scribo::def::lbl_type V;
+
+ image2d<V> blocks(geom::rotate(input.domain(), -90, input.domain().pcenter()));
+ data::fill(blocks, 0);
+
+ // util::array< line_info<L> > lines_info;
+
+ // for_all_lines(l, lines)
+ // {
+ // if (lines(l).is_textline())
+ // lines_info.append(lines(l));
+ // }
+
+/// const unsigned nlines = lines_info.nelemnts();
+ mln::util::array<box2d> rbbox;
+ line_links<L> left(lines);
+ left(0) = 0;
+ line_links<L> right(lines);
+ right(0) = 0;
+ line_links<L> output(lines);
+ output(0) = 0;
+
+ rbbox.resize(lines.nelements() + 1);
+
+ std::cout << "Preparing lines" << std::endl;
+ internal::prepare_lines(input.domain(), lines , blocks, rbbox);
+// io::pgm::save(blocks, "blocks.pgm");
+ std::cout << "Linking left" << std::endl;
+ internal::process_left_link(blocks, rbbox, lines , left);
+ std::cout << "Linking right" << std::endl;
+ internal::process_right_link(blocks, rbbox, lines , right);
+ std::cout << "Finalizing links" << std::endl;
+ internal::finalize_links(left, right, lines );
+ // std::cout << "Finalizing merging" << std::endl;
+ // finalize_line_merging(left, right, lines);
+ std::cout << "Extracting paragraphs" << std::endl;
+ internal::paragraph_links(left, right, output, lines);
+
+ paragraph_set<L> par_set = make::paragraph(output);
+ return par_set;
+ }
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::text
+
+} // end of namespace scribo
+
+#endif // ! SCRIBO_TEXT_EXTRACT_PARAGRAPHS_HH
diff --git a/scribo/scribo/text/extract_paragraphs_hdoc.hh b/scribo/scribo/text/extract_paragraphs_hdoc.hh
new file mode 100644
index 0000000..57d3713
--- /dev/null
+++ b/scribo/scribo/text/extract_paragraphs_hdoc.hh
@@ -0,0 +1,1327 @@
+// Copyright (C) 2010, 2011 EPITA Research and Development Laboratory
+// (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+/// \file
+///
+///
+
+
+#ifndef SCRIBO_TEXT_EXTRACT_PARAGRAPHS_HDOC_HH
+# define SCRIBO_TEXT_EXTRACT_PARAGRAPHS_HDOC_HH
+
+#include <mln/util/array.hh>
+#include <mln/accu/shape/bbox.hh>
+#include <mln/core/image/image2d.hh>
+#include <mln/core/alias/neighb2d.hh>
+#include <mln/draw/box.hh>
+#include <mln/data/convert.hh>
+#include <mln/value/int_u16.hh>
+#include <mln/value/label_16.hh>
+#include <mln/value/int_u8.hh>
+#include <mln/value/rgb8.hh>
+#include <mln/io/ppm/save.hh>
+#include <mln/io/pgm/save.hh>
+#include <mln/geom/rotate.hh>
+#include <mln/literal/colors.hh>
+
+#include <scribo/core/macros.hh>
+#include <scribo/core/line_set.hh>
+#include <scribo/core/line_links.hh>
+#include <scribo/core/line_info.hh>
+#include <scribo/core/paragraph_set.hh>
+
+using namespace mln;
+
+
+namespace scribo
+{
+
+ namespace text
+ {
+
+ template <typename L>
+ inline
+ paragraph_set<L>
+ extract_paragraphs_hdoc(line_set<L>& lines,
+ const image2d<bool>& input);
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+ namespace internal
+ {
+
+ template <typename L>
+ inline
+ bool
+ between_horizontal_separator(const line_set<L>& lines,
+ const line_id_t& l1_,
+ const line_id_t& l2_)
+ {
+ const line_info<L>& l1 = lines(l1_);
+ const line_info<L>& l2 = lines(l2_);
+
+ // No separators found in image.
+ mln_precondition(lines.components().has_separators());
+
+ const box2d& l1_bbox = l1.bbox();
+ const box2d& l2_bbox = l2.bbox();
+
+ unsigned
+ row1 = l1_bbox.pcenter().row(),
+ row2 = l2_bbox.pcenter().row();
+ const mln_ch_value(L, bool)&
+ separators = lines.components().separators();
+
+ unsigned row;
+ unsigned col_ptr;
+ unsigned left_col_ptr;
+ unsigned right_col_ptr;
+ unsigned end;
+
+ if (row1 < row2)
+ {
+ row1 = l1_bbox.pmax().row();
+ row2 = l2_bbox.pmin().row();
+
+ const unsigned quarter =
+ ((l1_bbox.pcenter().col() - l1_bbox.pmin().col()) >> 2);
+
+ row = l1_bbox.pcenter().row();
+ col_ptr = l1_bbox.pcenter().col();
+ left_col_ptr = l1_bbox.pmin().col() + quarter;
+ right_col_ptr = l1_bbox.pmax().col() - quarter;
+ end = row2;
+ }
+ else
+ {
+ row2 = l2_bbox.pmax().row();
+ row1 = l1_bbox.pmin().row();
+
+ const unsigned quarter =
+ ((l2_bbox.pcenter().col() - l2_bbox.pmin().col()) >> 2);
+
+ row = l2_bbox.pcenter().row();
+ col_ptr = l2_bbox.pcenter().col();
+ left_col_ptr = l2_bbox.pmin().col() + quarter;
+ right_col_ptr = l2_bbox.pmax().col() - quarter;
+ end = row1;
+ }
+
+ // If sep_ptr is true, then a separator is reached.
+ while (row < end)
+ {
+ ++row;
+ if (separators.at_(row, col_ptr)
+ || separators.at_(row, left_col_ptr)
+ || separators.at_(row, right_col_ptr))
+ return true;
+ }
+
+ return false;
+ }
+
+
+ template <typename L>
+ bool may_have_another_left_link(const line_links<L>& right,
+ const line_id_t& index,
+ const line_id_t& current_line,
+ const line_set<L>& lines)
+ {
+ const line_info<L>& l = lines(current_line);
+ const point2d& pmin = l.bbox().pmin();
+ const unsigned x1 = l.x_height();
+
+ for_all_links(i, right)
+ if (i != index && right(i) == index)
+ {
+ const line_info<L>& l_info = lines(i);
+ const unsigned x2 = l_info.x_height();
+
+ const float delta_max = 0.5f * std::min(x1, x2);
+
+ if (l_info.bbox().pmin().col() < pmin.col()
+ && std::abs(l.baseline() - l_info.baseline()) < delta_max
+ )
+ return true;
+ }
+
+ return false;
+ }
+
+//---------------------------------------------------------------------
+// This method aims to cut the links between lines that do not fit the
+// different criteria
+//---------------------------------------------------------------------
+
+ template <typename L>
+ inline
+ void paragraph_links(const line_links<L>& left,
+ line_links<L>& right,
+ line_links<L>& output,
+ const line_set<L>& lines)
+ {
+ output = left.duplicate();
+
+ // const unsigned nlines = lines.nelements();
+
+ // image2d<value::rgb8> links = data::convert(value::rgb8(), input);
+ // for (unsigned l = 0; l < nlines; ++l)
+ // {
+ // mln::draw::line(links, lines(l).bbox().pcenter(), lines(left(l)).bbox().pcenter(), literal::red);
+ // }
+ // mln::io::ppm::save(links, "out_links.ppm");
+
+ // For each line
+ for_all_lines(l, lines)
+ if (lines(l).is_textline())
+ {
+ // Neighbors
+
+ line_id_t left_nbh = output(l);
+ line_id_t right_nbh = right(l);
+ line_id_t lol_nbh = output(left_nbh);
+
+ // const line_info<L>& left_line = lines(left_nbh);
+ // const line_info<L>& current_line = lines(l);
+ // const line_info<L>& right_line = lines(right_nbh);
+
+ if (lines.components().has_separators() &&
+ between_horizontal_separator(lines, right_nbh, l))
+ {
+ output(right_nbh) = right_nbh;
+ right_nbh = l;
+ }
+ if (lines.components().has_separators() &&
+ between_horizontal_separator(lines, l, left_nbh))
+ {
+ output(l) = l;
+ left_nbh = l;
+ lol_nbh = l;
+ }
+
+ // Line features
+ const float x_height = lines(l).x_height();
+ const float left_x_height = lines(left_nbh).x_height();
+ const float right_x_height = lines(right_nbh).x_height();
+
+ const box2d& left_line_bbox = lines(left_nbh).bbox();
+ const box2d& current_line_bbox = lines(l).bbox();
+ const box2d& right_line_bbox = lines(right_nbh).bbox();
+ const box2d& lol_line_bbox = lines(lol_nbh).bbox(); // lol : left neighbor of the left neighbor
+
+ const int lline_col_min = left_line_bbox.pmin().col();
+ const int cline_col_min = current_line_bbox.pmin().col();
+ const int rline_col_min = right_line_bbox.pmin().col();
+ const int lolline_col_min = lol_line_bbox.pmin().col();
+
+ const int lline_col_max = left_line_bbox.pmax().col();
+ const int cline_col_max = current_line_bbox.pmax().col();
+ const int rline_col_max = right_line_bbox.pmax().col();
+
+ const int lline_cw = lines(left_nbh).char_width();
+ const int cline_cw = lines(l).char_width();
+ const int rline_cw = lines(right_nbh).char_width();
+ // Maximal x variation to consider two lines vertically aligned
+ const int delta_alignment = cline_cw;
+
+
+ // Checks the baseline distances of the two neighbors
+ {
+ // Current line baseline
+ const int c_baseline = lines(l).baseline();
+
+ // Baseline distance with the left and right neighbors
+ const int lc_baseline = lines(left_nbh).baseline() - c_baseline;
+ const int rc_baseline = c_baseline -lines(right_nbh).baseline();
+
+ // Max baseline distance between the two neighbors
+ // const float delta_baseline_max = std::max(lc_baseline, rc_baseline);
+ // const float delta_baseline_min = std::min(lc_baseline,
+ // rc_baseline);
+
+ // Only two lines, meaning the current line has only one neighbor
+ bool two_lines = false;
+
+ // If the current line has no left neighbor
+ if (lc_baseline == 0)
+ {
+ // ror : right neighbor of the right neighbor
+ const line_id_t ror_nbh = right(right_nbh);
+ //const box2d& ror_line_bbox = lines(ror_nbh).bbox();
+
+ // If the current line has a ror
+ if (ror_nbh != right_nbh
+ && output(ror_nbh) == right_nbh)
+ {
+ // Distance between the current line and the right neighbor
+ const float right_distance = lines(l).meanline() - lines(right_nbh).baseline();
+ // Distance between the right neighbor and the ror
+ const float ror_distance = lines(right_nbh).meanline() - lines(ror_nbh).baseline();
+ // ror x_height
+ const float ror_x_height = lines(ror_nbh).x_height();
+
+ // Conditions to cut the link between the current line
+ // and its right neighbor
+ if (right_distance > 1.4f * ror_distance
+ && std::max(ror_x_height, right_x_height) <
+ 1.4f * std::min(ror_x_height, right_x_height)
+ && output(right_nbh) == l)
+ {
+ output(right_nbh) = right_nbh;
+ continue;
+ }
+ }
+ // Otherwise we only have a group of two lines
+ else
+ {
+ // We determine the distance between the two lines
+ const float distance = lines(l).meanline() - lines(right_nbh).baseline();
+ two_lines = true;
+
+ // If the distance between the two lines is greater than
+ // the minimum x height of the two lines then we cut the
+ // link between them
+ if (distance > 2.0f * std::min(x_height, right_x_height)
+ && output(right_nbh) == l)
+ {
+ output(right_nbh) = right_nbh;
+ continue;
+ }
+ }
+
+ // Lines features
+ const float min_x_height = std::min(x_height, right_x_height);
+ const float max_x_height = std::max(x_height, right_x_height);
+ const float min_char_width = std::min(rline_cw, cline_cw);
+ const float max_char_width = std::max(rline_cw, cline_cw);
+
+ // Condition to cut the link between the current line and
+ // its right neighbor
+ if ((max_x_height > min_x_height * 1.4f) &&
+ !(max_char_width <= 1.2f * min_char_width))
+ {
+ if (output(right_nbh) == l)
+ {
+ output(right_nbh) = right_nbh;
+ continue;
+ }
+ }
+
+ // If we only have two lines we stop the study
+ if (two_lines)
+ continue;
+ }
+ // If the current line has no right neighbor
+ else if (rc_baseline == 0)
+ {
+ // lol : left neighbor of the left neighbor
+
+ // If the left neighbor of the current line has a left neighbor
+ if (lol_nbh != left_nbh)
+ {
+ // Distance between the current line and its left neighbor
+ const float left_distance = lines(left_nbh).meanline() -
+ lines(l).baseline();
+ // Distance between the left neighbor and the left
+ // neighbor of its left neighbor
+ const float lol_distance = lines(lol_nbh).meanline() -
+ lines(left_nbh).baseline();
+ // lol x height
+ const float lol_x_height = lines(lol_nbh).x_height();
+
+ // Conditions to cut the link between the current line
+ // and its left neighbor
+ if (left_distance > 1.4f * lol_distance
+ && std::max(lol_x_height, left_x_height) <
+ 1.4f * std::min(lol_x_height, left_x_height))
+ {
+ output(l) = l;
+ continue;
+ }
+ }
+ // Otherwise we only have a group of two lines
+ else
+ {
+ // Distance between the current line and it left neighbor
+ const float distance = lines(left_nbh).meanline() -
+ lines(l).baseline();
+
+ two_lines = true;
+
+ // If the distance is greater than the min x height
+ // between the two lines
+ if (distance > 2.0f * std::min(x_height, left_x_height))
+ {
+ output(l) = l;
+ continue;
+ }
+ }
+
+ // Lines features
+ const float min_x_height = std::min(x_height, left_x_height);
+ const float max_x_height = std::max(x_height, left_x_height);
+ const float min_char_width = std::min(lline_cw, cline_cw);
+ const float max_char_width = std::max(lline_cw, cline_cw);
+
+ // Condition to cut the link between the current line and
+ // its left neighbor
+ if ((max_x_height > min_x_height * 1.4f) &&
+ !(max_char_width <= 1.2f * min_char_width))
+ {
+ output(l) = l;
+ continue;
+ }
+
+ // If we only have two lines we stop the study
+ if (two_lines)
+ continue;
+ }
+ // The current line has at least one left and one right neighbor
+ else // if (delta_baseline_max >= 1.1 * delta_baseline_min)
+ {
+ // Distance between the left and the current line
+ const float
+ left_distance = left_line_bbox.pcenter().row() - current_line_bbox.pcenter().row();
+ // Distance between the right and the current line
+ const float
+ right_distance = current_line_bbox.pcenter().row() - right_line_bbox.pcenter().row();;
+
+ // If the left line is too far compared to the right one
+ // we cut the link with it
+ if (left_distance > 1.5f * right_distance
+ && std::max(x_height, left_x_height) > 1.2f * std::min(x_height, left_x_height))
+ {
+ output(l) = l;
+ continue;
+ }
+ // If the right line is too far compared to the left one
+ // we cut the link with it
+ else if (right_distance > 1.5f * left_distance
+ && std::max(x_height, right_x_height) >= 1.2f * std::min(x_height, right_x_height)
+ && output(right_nbh) == l)
+ {
+ output(right_nbh) = right_nbh;
+ continue;
+ }
+
+ // If the distance between the baseline of the left
+ // neighbor and the baseline of the current line is
+ // greater than the one between the current line baseline
+ // and the right line baseline we have to study the texte
+ // features of the right and left lines
+ if (lc_baseline > rc_baseline)
+ {
+ const float cw_max = std::max(lline_cw, cline_cw);
+ const float cw_min = std::min(lline_cw, cline_cw);
+ const float min_x_height = std::min(x_height, left_x_height);
+ const float max_x_height = std::max(x_height, left_x_height);
+
+ if ((max_x_height > min_x_height * 1.4f) &&
+ !(cw_max <= 1.2f * cw_min))
+ {
+ output(l) = l;
+ continue;
+ }
+
+ {
+ const float min_x_height = std::min(x_height, right_x_height);
+ const float max_x_height = std::max(x_height, right_x_height);
+ const float cw_max = std::max(rline_cw, cline_cw);
+ const float cw_min = std::min(rline_cw, cline_cw);
+
+ if ((max_x_height > min_x_height * 1.4f)
+ && !(cw_max <= 1.2f * cw_min)
+ && output(right_nbh) == l)
+ {
+ output(right_nbh) = right_nbh;
+ continue;
+ }
+ }
+ }
+ else
+ {
+ const float cw_max = std::max(rline_cw, cline_cw);
+ const float cw_min = std::min(rline_cw, cline_cw);
+ const float min_x_height = std::min(x_height, right_x_height);
+ const float max_x_height = std::max(x_height, right_x_height);
+
+ if ((max_x_height > min_x_height * 1.4f)
+ && !(cw_max <= 1.2f * cw_min)
+ && output(right_nbh) == l)
+ {
+ output(right_nbh) = right_nbh;
+ continue;
+ }
+
+ {
+ const float min_x_height = std::min(x_height, left_x_height);
+ const float max_x_height = std::max(x_height, left_x_height);
+ const float cw_max = std::max(lline_cw, cline_cw);
+ const float cw_min = std::min(lline_cw, cline_cw);
+
+ if ((max_x_height > min_x_height * 1.4f)
+ && !(cw_max <= 1.2f * cw_min))
+ {
+ output(l) = l;
+ continue;
+ }
+ }
+ }
+ }
+ }
+
+ // If we arrive here, it means than the lines in the
+ // neighborhood of the current line are quite similar. We can
+ // then begin to study the indentations in order to determine
+ // the beginning of new paragraphs
+
+//-----------------------------------------------------------------------------
+// ___________________________
+// |___________________________|
+// ________________________
+// |________________________|
+// ___________________________
+// |___________________________|
+// ___________________________
+// |___________________________|
+//
+// Simple case : paragraphs are justified on the left. We try to find any
+// indentation like above.
+//
+//-----------------------------------------------------------------------------
+
+ {
+ // Check if the current line neighbors are aligned
+ bool left_right_aligned = false;
+ bool left_lol_aligned = false;
+ const int dx_lr = std::abs(lline_col_min - rline_col_min);
+ const int dx_llol = std::abs(lline_col_min - lolline_col_min);
+
+ if (dx_lr < delta_alignment)
+ left_right_aligned = true;
+
+ if (dx_llol < delta_alignment)
+ left_lol_aligned = true;
+
+ if (left_right_aligned && left_lol_aligned)
+ {
+ const int left_right_col_min = std::min(lline_col_min, rline_col_min);
+ const int dx_lrc = std::abs(left_right_col_min - cline_col_min);
+ const float l_char_width = 1.5f * lines(l).char_width();
+
+ if (dx_lrc > l_char_width &&
+ dx_lrc < 3.0f * l_char_width &&
+ cline_col_min > rline_col_min &&
+ cline_col_min > lline_col_min)
+ {
+ const line_id_t out_right_nbh = output(right_nbh);
+
+ if (out_right_nbh != l)
+ {
+ if (output(out_right_nbh) == l)
+ output(out_right_nbh) = out_right_nbh;
+ right(l) = l;
+ }
+ else
+ output(right_nbh) = right_nbh;
+ continue;
+ }
+ }
+ }
+
+//-----------------------------------------------------------------------------
+// ___________________________
+// |___________________________|
+// ___________________
+// |___________________| End of the paragraph - Current line
+// ________________________
+// |________________________| Beginning of a new one
+// ___________________________
+// |___________________________| Left of left of current line
+//
+// End of paragraph case : we try to find an end to the current paragraph
+//
+//-----------------------------------------------------------------------------
+
+ {
+ // Check if the current line neighbors are aligned
+ bool left_right_max_aligned = false;
+ bool left_current_min_aligned = false;
+ bool lol_current_min_aligned = false;
+ const bool lol_is_left = output(left_nbh) == left_nbh;
+ const int dx_lr_max = std::abs(lline_col_max - rline_col_max);
+ const int dx_lc_min = std::abs(lline_col_min - cline_col_min);
+ const int dx_lolc_min = std::abs(lolline_col_min - cline_col_min);
+
+ if (dx_lr_max < delta_alignment)
+ left_right_max_aligned = true;
+
+ if (dx_lc_min < delta_alignment)
+ left_current_min_aligned = true;
+
+ if (dx_lolc_min < delta_alignment)
+ lol_current_min_aligned = true;
+
+ if (!left_current_min_aligned && left_right_max_aligned &&
+ (lol_current_min_aligned || lol_is_left))
+ {
+ const int dx_lrc = std::abs(lline_col_max - cline_col_max);
+ const int l_char_width = lines(l).char_width();
+ const int dx_indent = std::abs(std::max(lline_col_min,
+ rline_col_min) - cline_col_min);
+
+ if (dx_lrc > l_char_width &&
+ dx_indent < 4 * delta_alignment &&
+ cline_col_max < lline_col_max &&
+ cline_col_min < lline_col_min &&
+ (lline_col_min > lolline_col_min || lol_is_left))
+ {
+ output(l) = l;
+ continue;
+ }
+ }
+ }
+
+//-----------------------------------------------------------------------------
+// ___________________________
+// |___________________________|
+// ___________________________
+// |___________________________|
+// ________________________
+// |________________________|
+// ___________________________
+// |___________________________|
+//
+// Simple case : paragraphs are justified on the left. We try to find any
+// indentation like above.
+//
+//-----------------------------------------------------------------------------
+
+ {
+ const line_id_t ror_nbh = right(right_nbh);
+ const box2d& ror_line_bbox = lines(ror_nbh).bbox();
+ const int rorline_col_min = ror_line_bbox.pmin().col();
+
+ bool right_ror_min_aligned = false;
+ bool left_right_aligned = false;
+ const int dx_lr = std::abs(lline_col_min - rline_col_min);
+ const int dx_rror_min = std::abs(rline_col_min - rorline_col_min);
+
+ if (dx_rror_min < delta_alignment)
+ right_ror_min_aligned = true;
+
+ if (dx_lr < delta_alignment)
+ left_right_aligned = true;
+
+ if (right_ror_min_aligned && left_right_aligned &&
+ ror_nbh != right_nbh)
+ {
+ const int left_right_col_min = std::min(lline_col_min, rline_col_min);
+ const int dx_lrc = std::abs(left_right_col_min - cline_col_min);
+ const float l_char_width = 1.5f * lines(l).char_width();
+
+ if (dx_lrc > l_char_width &&
+ !may_have_another_left_link(right, right_nbh, l, lines) &&
+ dx_lrc < 10.0f * l_char_width &&
+ cline_col_min > rline_col_min &&
+ cline_col_min > lline_col_min)
+ {
+ const line_id_t out_right_nbh = output(right_nbh);
+
+ if (out_right_nbh != l)
+ {
+ if (output(out_right_nbh) == l)
+ output(out_right_nbh) = out_right_nbh;
+ right(l) = l;
+ }
+ else
+ output(right_nbh) = right_nbh;
+ continue;
+ }
+ }
+ }
+
+//-----------------------------------------------------------------------------
+// ___________________________
+// |___________________________|
+// ___________
+// |___________|
+// ________________________
+// |________________________|
+// ___________________________
+// |___________________________|
+//
+// Simple case : paragraphs are justified on the left. We try to find any
+// indentation like above.
+//
+//-----------------------------------------------------------------------------
+
+ {
+ const line_id_t ror_nbh = right(right_nbh);
+ const box2d& ror_line_bbox = lines(ror_nbh).bbox();
+ const int rorline_col_min = ror_line_bbox.pmin().col();
+
+ bool left_ror_aligned = false;
+ const int dx_lror = std::abs(lline_col_min - rorline_col_min);
+
+ if (dx_lror < delta_alignment)
+ left_ror_aligned = true;
+
+ if (left_ror_aligned)
+ {
+ const int left_ror_col_min = std::min(lline_col_min, rorline_col_min);
+ const int dx_lrorc = std::abs(left_ror_col_min - cline_col_min);
+ const float l_char_width = 1.5f * lines(l).char_width();
+ const int dx_lrorr = std::abs(left_ror_col_min - rline_col_min);
+ const int dx_crmax = std::abs(rline_col_max - cline_col_max);
+
+ if (dx_lrorc > l_char_width &&
+ dx_lrorr > 5 * l_char_width &&
+ dx_lrorr > dx_lrorc &&
+ dx_crmax > 5 * l_char_width &&
+ !may_have_another_left_link(right, right_nbh, l, lines) &&
+ dx_lrorc < 10.0f * l_char_width &&
+ cline_col_min > rorline_col_min &&
+ cline_col_min > lline_col_min)
+ {
+ right(right_nbh) = right_nbh;
+ continue;
+ }
+ }
+ }
+
+
+// Strange case
+ {
+ if (rline_col_min > current_line_bbox.pcenter().col()
+ && !may_have_another_left_link(right, right_nbh, l, lines)
+ && cline_col_max < rline_col_max
+ && output(right_nbh) == l)
+ {
+ output(right_nbh) = right_nbh;
+ }
+ }
+
+//-----------------------------------------------------------------------------
+// ___________________________
+// |___________________________|
+// ___________________________
+// |___________________________|
+// ________________________
+// |________________________|
+//
+// Simple case : paragraphs are justified on the left. We try to find any
+// indentation like above at the end of a column.
+//
+//-----------------------------------------------------------------------------
+
+ if (left_nbh == l)
+ {
+ const line_id_t ror_nbh = right(right_nbh);
+ const box2d& ror_line_bbox = lines(ror_nbh).bbox();
+ const int rorline_col_min = ror_line_bbox.pmin().col();
+
+ bool right_ror_min_aligned = false;
+ const int dx_rror_min = std::abs(rline_col_min - rorline_col_min);
+
+ if (dx_rror_min < delta_alignment)
+ right_ror_min_aligned = true;
+
+ if (right_ror_min_aligned)
+ {
+ const int right_ror_col_min = std::min(rline_col_min, rorline_col_min);
+ const int dx_rrorc = std::abs(right_ror_col_min - cline_col_min);
+ const float l_char_width = 1.5f * lines(l).char_width();
+
+ if (dx_rrorc > l_char_width &&
+ dx_rrorc < 10.0f * l_char_width &&
+ cline_col_min > rline_col_min &&
+ cline_col_max >= rline_col_max)
+ {
+ const line_id_t out_right_nbh = output(right_nbh);
+
+ if (out_right_nbh != l)
+ {
+ if (output(out_right_nbh) == l)
+ output(out_right_nbh) = out_right_nbh;
+ right(l) = l;
+ }
+ else
+ output(right_nbh) = right_nbh;
+ continue;
+ }
+ }
+ }
+ }
+
+ // Only debug
+ // {
+ // image2d<value::rgb8> debug = data::convert(value::rgb8(), input);
+
+ // mln::util::array<accu::shape::bbox<point2d> > nbbox(output.nelements());
+ // for_all_lines(l, lines)
+ // if (lines(l).is_textline())
+ // {
+ // // if (lines(i).is_textline())
+ // // {
+ // // mln::draw::box(debug, lines(i).bbox(), literal::red);
+ // nbbox(output(l)).take(lines(l).bbox());
+ // // }
+ // }
+
+ // for (unsigned i = 0; i < nbbox.nelements(); ++i)
+ // if (nbbox(i).is_valid())
+ // {
+ // box2d b = nbbox(i).to_result();
+ // mln::draw::box(debug, b, literal::orange);
+ // b.enlarge(1);
+ // mln::draw::box(debug, b, literal::orange);
+ // b.enlarge(1);
+ // mln::draw::box(debug, b, literal::orange);
+ // }
+
+ // mln::io::ppm::save(debug, "out_paragraph.ppm");
+ // }
+
+ }
+
+//-------------------------------------------------------------
+// Preparation of the lines before linking them.
+// For each line we draw the top and the bottom of it.
+// Assuming than i is the number of the line. Then the top of the line
+// will be affected with the value 2 * i in the block image and the
+// bottom with 2 * i + 1.
+//
+//-------------------------------------------------------------
+
+ template <typename L>
+ inline
+ void prepare_lines(const box2d& domain,
+ const line_set<L>& lines,
+ L& blocks,
+ mln::util::array<box2d>& rbbox)
+ {
+ std::map< int, std::vector< const box2d* > > drawn_lines;
+ // const unsigned nlines = lines.nelements();
+
+ // For each line
+ //for (unsigned l = 0; l < nlines; ++l)
+ for_all_lines(l, lines)
+ if (lines(l).is_textline())
+ {
+ // Rotation of the bounding box
+ box2d b = geom::rotate(lines(l).bbox(), -90, domain.pcenter());
+// rbbox.append(b);
+ rbbox(l) = b;
+
+ const unsigned index = l + 1;
+ const unsigned even_index = 2 * index;
+ const unsigned odd_index = even_index + 1;
+
+ // Top of the line
+ {
+ bool not_finished = true;
+ int col_offset = 0;
+
+ while (not_finished)
+ {
+ // Looking for a column in the image to draw the top of the
+ // line
+
+ const int col = b.pmax().col() + col_offset;
+ std::map< int, std::vector< const box2d* > >::iterator it
+ = drawn_lines.find(col);
+
+ if (it != drawn_lines.end())
+ {
+ const std::vector< const box2d* >& lines = (*it).second;
+ const unsigned nb_lines = lines.size();
+ unsigned i = 0;
+
+ for (i = 0; i < nb_lines; ++i)
+ {
+ const box2d* box = lines[i];
+ const int min_row = std::max(b.pmin().row(), box->pmin().row());
+ const int max_row = std::min(b.pmax().row(), box->pmax().row());
+
+ if (min_row - max_row <= 0)
+ break;
+ }
+
+ if (i == nb_lines)
+ {
+ mln::draw::line(blocks, point2d(b.pmin().row(), col),
+ point2d(b.pmax().row(), col), even_index);
+ not_finished = false;
+ drawn_lines[col].push_back(&(rbbox[l]));
+ }
+ else
+ ++col_offset;
+ }
+ else
+ {
+ mln::draw::line(blocks, point2d(b.pmin().row(), col),
+ point2d(b.pmax().row(), col), even_index);
+ not_finished = false;
+ drawn_lines[col].push_back(&(rbbox[l]));
+ }
+ }
+ }
+
+ // Bottom of the line
+ {
+ bool not_finished = true;
+ int col_offset = 0;
+
+ while (not_finished)
+ {
+ // Looking for a column in the image to draw the bottom of
+ // the line
+
+ const int col = b.pmin().col() - col_offset;
+ std::map< int, std::vector< const box2d* > >::iterator it
+ = drawn_lines.find(col);
+
+ if (it != drawn_lines.end())
+ {
+ const std::vector< const box2d* >& lines = (*it).second;
+ const unsigned nb_lines = lines.size();
+ unsigned i = 0;
+
+ for (i = 0; i < nb_lines; ++i)
+ {
+ const box2d* box = lines[i];
+ const int min_row = std::max(b.pmin().row(), box->pmin().row());
+ const int max_row = std::min(b.pmax().row(), box->pmax().row());
+
+ if (min_row - max_row <= 0)
+ break;
+ }
+
+ if (i == nb_lines)
+ {
+ mln::draw::line(blocks, point2d(b.pmin().row(), col),
+ point2d(b.pmax().row(), col), odd_index);
+ not_finished = false;
+ drawn_lines[col].push_back(&(rbbox[l]));
+ }
+ else
+ ++col_offset;
+ }
+ else
+ {
+ mln::draw::line(blocks, point2d(b.pmin().row(), col),
+ point2d(b.pmax().row(), col), odd_index);
+ not_finished = false;
+ drawn_lines[col].push_back(&(rbbox[l]));
+ }
+ }
+ }
+ }
+ }
+
+ template <typename L>
+ inline
+ void
+ process_left_link(L& blocks,
+ const mln::util::array<box2d>& rbbox,
+ const line_set<L>& lines,
+ line_links<L>& left)
+ {
+ typedef scribo::def::lbl_type V;
+
+ // At the beginning each line is its own neighbor
+ for_all_lines(l, lines)
+ if (lines(l).is_textline())
+ left(l) = l;
+ else
+ left(l) = 0;
+
+ // const unsigned nlines = lines.nelements();
+
+ // For each line
+ for_all_lines(l, lines)
+ if (lines(l).is_textline())
+ {
+ // Max distance for the line search
+ int dmax = 1.5f * lines(l).x_height();
+
+ // Starting points in the current line box
+ point2d c = rbbox(l).pcenter();
+ point2d q(rbbox(l).pmin().row() + ((c.row() - rbbox(l).pmin().row()) / 4), c.col());
+
+ int
+ midcol = (rbbox(l).pmax().col()
+ - rbbox(l).pmin().col()) / 2;
+
+ // Left
+ {
+ // marge gauche
+ int
+ nleftima = c.col() - blocks.domain().pmin().col(),
+ // Distance gauche
+ nleft = std::min(nleftima, midcol + dmax);
+
+ V
+ // Starting points in the box
+ *p = &blocks(c),
+ *p2 = &blocks(q),
+ // End of search
+ *pstop = p - nleft - 1,
+ // Line neighbor
+ *nbh_p = 0;
+
+ // While we haven't found a neighbor or reached the limit
+ for (; p != pstop; --p, --p2)
+ {
+ if (*p2 != literal::zero // Not the background
+ && ((*p2 % 2) == 0) // Looking for the bottom of a line
+ && left((*p2 >> 1) - 1) != l) // No loops
+ {
+ // Neightbor found, we stop the research
+ nbh_p = p2;
+ break;
+ }
+
+ if (*p != literal::zero // Not the background
+ && ((*p % 2) == 0) // Looking for the bottom of a line
+ && left((*p >> 1) - 1) != l) // No loops
+ {
+ // Neightbor found, we stop the research
+ nbh_p = p;
+ break;
+ }
+ }
+
+ // If a neighbor was found, then we have found the top of the
+ // line. We are then looking for the bottom of the encountered
+ // line. If during the search process we find a complete line
+ // included in the touched line, this line is considered as
+ // the neighbor under certain conditions (see below)
+
+ //---------------------------------------------------------------
+ // _________________________ |
+ // |_________________________| => Current line | Search direction
+ // v
+ // => First encountered top line
+ // __________________________________________________ 2Q
+ // | Q |
+ // | _________________________ |2P
+ // | |_____________P___________| => Second top |2P + 1
+ // | line |
+ // |__________________________________________________|2Q + 1
+ //
+ //
+ //---------------------------------------------------------------
+
+ if (nbh_p)
+ {
+ std::vector<V> lines_nbh;
+ const V end_p = *nbh_p + 1;
+ const V* nbh_p_copy = nbh_p;
+
+ for (; *nbh_p != end_p; --nbh_p)
+ {
+ if ((*nbh_p) != literal::zero) // Not the background
+ {
+ if ((*nbh_p) % 2 == 0)// We have found the top of
+ // another line
+ lines_nbh.push_back(*nbh_p);
+ else
+ {
+ // We have found the bottom of a line. We are looking if
+ // we have already encountered the top of this
+ // line. If so, we link the current line with this one
+ // under certain conditions:
+
+ if (std::find(lines_nbh.begin(), lines_nbh.end(),
+ (*nbh_p) - 1) != lines_nbh.end())
+ {
+ // If we can link the complete line with the current line
+ if (// It must be in the search range
+ nbh_p > pstop
+ // Avoid loops
+ && left(((*nbh_p - 1) >> 1) - 1) != l)
+ left(l) = ((*nbh_p - 1) >> 1) - 1;
+
+ // We have found a complete line so we stop the search
+ break;
+ }
+ }
+ }
+ }
+
+
+ // If we haven't found any included line in the first
+ // neighbor, then the line is considered as the neighbor of
+ // the current line
+ if (*nbh_p == end_p)
+ left(l) = (*nbh_p_copy >> 1) - 1;
+ }
+ }
+ }
+ }
+
+
+ // We assume that the lines have been rotated
+ template <typename L>
+ inline
+ void
+ process_right_link(L& blocks,
+ const mln::util::array<box2d>& rbbox,
+ const line_set<L>& lines,
+ line_links<L>& right)
+ {
+ typedef scribo::def::lbl_type V;
+
+ // At the beginning each line is its own neighbor
+ for_all_lines(l, lines)
+ if (lines(l).is_textline())
+ right(l) = l;
+ else
+ right(l) = 0;
+
+ // const unsigned nlines = lines.nelements();
+
+ // For each line
+ for_all_lines(l, lines)
+ if (lines(l).is_textline())
+ {
+ // Max distance for the line search
+ int dmax = 1.5f * lines(l).x_height();
+
+ // Starting points in the current line box
+ point2d c = rbbox(l).pcenter();
+ point2d q(rbbox(l).pmax().row() - ((rbbox(l).pmax().row() - c.row()) / 4), c.col());
+
+ int
+ midcol = (rbbox(l).pmax().col()
+ - rbbox(l).pmin().col()) / 2;
+
+ // Right
+ {
+ int
+ nrightima = geom::ncols(blocks) - c.col() + blocks.domain().pmin().col(),
+ nright = std::min(nrightima, midcol + dmax);
+
+ V
+ // Starting points in the box
+ *p = &blocks(c),
+ *p2 = &blocks(q),
+ // End of search
+ *pstop = p + nright - 1,
+ // Line neighbor
+ *nbh_p = 0;
+
+ // While we haven't found a neighbor or reached the limit
+ for (; p != pstop; ++p, ++p2)
+ {
+ if (*p2 != literal::zero // Not the background
+ && ((*p2 % 2) == 1) // Looking for the bottom of a line
+ && right(((*p2 - 1) >> 1) - 1) != l) // No loops
+ {
+ // Neightbor found, we stop the research
+ nbh_p = p2;
+ break;
+ }
+
+ if (*p != literal::zero // Not the background
+ && ((*p % 2) == 1) // Looking for the bottom of a line
+ && right(((*p - 1) >> 1) - 1) != l) // No loops
+ {
+ // Neightbor found, we stop the research
+ nbh_p = p;
+ break;
+ }
+ }
+
+ // If a neighbor was found, then we have found the bottom of the
+ // line. We are then looking for the top of the encountered
+ // line. If during the search process we find a complete line
+ // included in the touched line, this line is considered as
+ // the neighbor under certain conditions (see below)
+
+ //---------------------------------------------------------------
+ //
+ //
+ // __________________________________________________ 2Q
+ // | Q |
+ // | _________________________ |2P
+ // | |_____________P___________| => Second bottom |2P + 1
+ // | line |
+ // |__________________________________________________|2Q + 1
+ // => First encountered bottom line
+ // _________________________ ^
+ // |_________________________| => Current line | Search direction
+ // |
+ //---------------------------------------------------------------
+
+ if (nbh_p)
+ {
+ std::vector<V> lines_nbh;
+ const V end_p = *nbh_p - 1;
+ const V* nbh_p_copy = nbh_p;
+
+ for (; *nbh_p != end_p; ++nbh_p)
+ {
+ if (*nbh_p != literal::zero) // Not the background
+ {
+ if (*nbh_p % 2 == 1) // We have found the bottom of
+ // another line
+ lines_nbh.push_back(*nbh_p);
+ else
+ {
+ // We have found the top of a line. We are looking if
+ //we have already encountered the bottom of this
+ // line. If so, we link the current line with this one
+ // under certain conditions:
+
+ if (std::find(lines_nbh.begin(), lines_nbh.end(),
+ *nbh_p + 1) != lines_nbh.end())
+ {
+ // If we can link the complete line with the current line
+ if (// It must be in the search range
+ nbh_p < pstop
+ // Avoid loops
+ && right((*nbh_p >> 1) - 1) != l)
+ right(l) = (*nbh_p >> 1) - 1;
+
+ // We have found a complete line, so we stop the search
+ break;
+ }
+ }
+ }
+ }
+
+ // If we haven't found any included line in the first
+ // neighbor, then the line is considered as the neighbor of
+ // the current line
+
+ if (*nbh_p == end_p)
+ right(l) = ((*nbh_p_copy - 1) >> 1) - 1;
+ }
+ }
+ }
+ }
+
+//-----------------------------------------------------------------------
+// Finalizing the links by merging information extracted from the left
+// and right links
+//-----------------------------------------------------------------------
+
+ template< typename L >
+ inline
+ void finalize_links(line_links<L>& left,
+ line_links<L>& right,
+ const line_set<L>& lines)
+ {
+ // const unsigned nlines = lines.nelements();
+
+ for_all_lines(l, lines)
+ if (lines(l).is_textline())
+ {
+ const unsigned left_value = left(l);
+ const unsigned right_value = right(l);
+
+ // If the right neighbor of my left neighbor is itself then its
+ // right neighbor is me
+ {
+ line_id_t& v = right(left_value);
+
+ if (v == left_value)
+ v = l;
+ }
+
+ // If the left neighbor of my right neighbor is itself then its
+ // left neighbor is me
+ {
+ line_id_t& v = left(right_value);
+
+ if (v == right_value)
+ v = l;
+ }
+ }
+ }
+
+ } // end of namespace scribo::text::internal
+
+
+ template <typename L>
+ inline
+ paragraph_set<L>
+ extract_paragraphs_hdoc(line_set<L>& lines,
+ const image2d<bool>& input)
+ {
+ typedef scribo::def::lbl_type V;
+
+ image2d<V> blocks(geom::rotate(input.domain(), -90, input.domain().pcenter()));
+ data::fill(blocks, 0);
+
+ // util::array< line_info<L> > lines_info;
+
+ // for_all_lines(l, lines)
+ // {
+ // if (lines(l).is_textline())
+ // lines_info.append(lines(l));
+ // }
+
+/// const unsigned nlines = lines_info.nelemnts();
+ mln::util::array<box2d> rbbox;
+ line_links<L> left(lines);
+ left(0) = 0;
+ line_links<L> right(lines);
+ right(0) = 0;
+ line_links<L> output(lines);
+ output(0) = 0;
+
+ rbbox.resize(lines.nelements() + 1);
+
+// std::cout << "Preparing lines" << std::endl;
+ prepare_lines(input.domain(), lines , blocks, rbbox);
+// io::pgm::save(blocks, "blocks.pgm");
+// std::cout << "Linking left" << std::endl;
+ process_left_link(blocks, rbbox, lines , left);
+// std::cout << "Linking right" << std::endl;
+ process_right_link(blocks, rbbox, lines , right);
+// std::cout << "Finalizing links" << std::endl;
+ finalize_links(left, right, lines );
+ // std::cout << "Finalizing merging" << std::endl;
+ // finalize_line_merging(left, right, lines);
+// std::cout << "Extracting paragraphs" << std::endl;
+ filter::paragraph_links(left, right, output, lines);
+
+ paragraph_set<L> par_set = make::paragraph(output, right);
+ return par_set;
+ }
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::text
+
+} // end of namespace scribo
+
+#endif // ! SCRIBO_TEXT_EXTRACT_PARAGRAPHS_HDOC_HH
diff --git a/scribo/scribo/text/merging.hh b/scribo/scribo/text/merging.hh
index 31a5ed4..dd5762a 100644
--- a/scribo/scribo/text/merging.hh
+++ b/scribo/scribo/text/merging.hh
@@ -1,5 +1,4 @@
-// Copyright (C) 2010, 2011 EPITA Research and Development Laboratory
-// (LRDE)
+// Copyright (C) 2010 EPITA Research and Development Laboratory (LRDE)
//
// This file is part of Olena.
//
@@ -181,7 +180,10 @@ namespace scribo
l1 = my_find_root(parent, l1);
l2 = my_find_root(parent, l2);
if (l1 == l2)
- return l1;
+ {
+ std::cerr << "what! in'do_union': already merged!!!" << std::endl;
+ return l1;
+ }
swap_ordering(l1, l2);
parent[l2] = l1; // The smallest label value is root.
@@ -224,16 +226,16 @@ namespace scribo
template <typename L>
- bool between_separators(const scribo::line_set<L>& lines,
+ bool between_separators(const line_set<L>& lines,
const line_id_t& l1_,
const line_id_t& l2_)
{
- const scribo::line_info<L>& l1 = lines(l1_);
- const scribo::line_info<L>& l2 = lines(l2_);
-
// No separators found in image.
mln_precondition(lines.components().has_separators());
+ const scribo::line_info<L>& l1 = lines(l1_);
+ const scribo::line_info<L>& l2 = lines(l2_);
+
const box2d& l1_bbox = l1.bbox();
const box2d& l2_bbox = l2.bbox();
@@ -295,11 +297,11 @@ namespace scribo
*/
template <typename L>
bool lines_can_merge(scribo::line_set<L>& lines,
- const scribo::line_id_t& l1_,
- const scribo::line_id_t& l2_)
+ const line_id_t& l1_,
+ const line_id_t& l2_)
{
scribo::line_info<L>& l1 = lines(l1_);
- scribo::line_info<L>& l2 = lines(l2_);
+ const scribo::line_info<L>& l2 = lines(l2_);
// Parameters.
const float x_ratio_max = 1.7f;
@@ -432,12 +434,13 @@ namespace scribo
*/
template <typename L>
- bool non_text_and_text_can_merge(scribo::line_set<L>& lines,
- const scribo::line_id_t& l_cur_, // current
- const scribo::line_id_t l_ted_) // touched
+ bool non_text_and_text_can_merge(line_set<L>& lines,
+ const line_id_t& l_cur_, // current
+ const line_id_t& l_ted_) // touched
{
scribo::line_info<L>& l_cur = lines(l_cur_);
- scribo::line_info<L>& l_ted = lines(l_ted_);
+ const scribo::line_info<L>& l_ted = lines(l_ted_);
+
if (l_cur.type() == line::Text || l_ted.type() != line::Text)
return false;
@@ -470,8 +473,8 @@ namespace scribo
if (l_cur_height < l_ted_x_height
&& l_cur_height > 0.05f * l_ted_x_height
&& float(l_cur_width) / float(l_cur.card()) < l_ted.char_width()
- && dx < 2 * l_ted_cw
- && l_cur_pmin.row() < l_ted.baseline())
+ && dx < l_ted_cw
+ && l_cur_pmin.row() < l_ted_pmax.row())
{
l_cur.update_type(line::Punctuation);
return true;
@@ -634,10 +637,8 @@ namespace scribo
image2d<unsigned> billboard(domain);
data::fill(billboard, 0);
-# ifndef SCRIBO_NDEBUG
image2d<value::int_u8> log(domain);
data::fill(log, 0);
-# endif // ! SCRIBO_NDEBUG
const unsigned n = v.size();
unsigned l_;
@@ -669,7 +670,7 @@ namespace scribo
x---------------x
| |
| mc |
- ml x x x mr
+ ml x x x mr
| |
| |
x---------------x
@@ -752,12 +753,11 @@ namespace scribo
// vertically aligned
// Obviously no separators between the two lines
if ((l_info.card() <= 5 ||
- (std::abs(l_info.baseline() - mc_info.baseline())
- < 5 && std::abs(l_info.meanline() -
- mc_info.meanline()) < 5))
- && dx < l_ted_cw && dy < 0
- && not (lines.components().has_separators()
- && between_separators(lines, l, mc)))
+ (std::abs(l_info.baseline() - mc_info.baseline()) < 5
+ && std::abs(l_info.meanline() - mc_info.meanline()) < 5))
+ && dx < l_ted_cw && dy < 0
+ && not (lines.components().has_separators()
+ && between_separators(lines, l, mc)))
l = do_union(lines, l, mc, parent);
// }
@@ -776,10 +776,9 @@ namespace scribo
// l_ = do_union(lines, mc, l, parent);
// draw_box(billboard, lines(l_).ebbox(), l_);
-# ifndef SCRIBO_NDEBUG
// Log:
draw_box(log, b, 126);
-# endif // ! SCRIBO_NDEBUG
+
}
else // FIXME: Remove! since included in a non-text-line, so not drawn, so inclusion impossible!!!!!!!!!!
@@ -789,11 +788,8 @@ namespace scribo
// a non-text-line (probably a drawing or a frame) includes a text line
draw_box(billboard, lines(l).ebbox(), l);
-
-# ifndef SCRIBO_NDEBUG
// Log:
draw_box(log, b, 100);
-# endif // ! SCRIBO_NDEBUG
}
}
@@ -832,10 +828,8 @@ namespace scribo
// it may change of label (take the one of the included line).
draw_box(billboard, lines(l_).ebbox(), l_);
-# ifndef SCRIBO_NDEBUG
// Log:
draw_box(log, b, 128);
-# endif // ! SCRIBO_NDEBUG
}
}
}
@@ -848,15 +842,11 @@ namespace scribo
{
++count_new_txtline;
draw_box(billboard, lines(l).ebbox(), l);
-# ifndef SCRIBO_NDEBUG
// Log:
draw_box(log, b, 127);
-# endif // ! SCRIBO_NDEBUG
}
-# ifndef SCRIBO_NDEBUG
else
- draw_box(log, b, 1);
-# endif // ! SCRIBO_NDEBUG
+ draw_box(log, b, 1);
}
}
else
@@ -873,8 +863,8 @@ namespace scribo
if (lcand == 0) // Skip background.
continue;
- // if (lines(lcand).type() != line::Text)
- // std::cerr << "again!" << std::endl;
+ if (lines(lcand).type() != line::Text)
+ std::cerr << "again!" << std::endl;
if (lines(l_).type() == line::Text)
@@ -886,22 +876,15 @@ namespace scribo
l_ = do_union(lines, l_, lcand, parent);
draw_box(billboard, lines(l_).ebbox(), l_);
-
-# ifndef SCRIBO_NDEBUG
// Log:
draw_box(log, b, 151);
-# endif // ! SCRIBO_NDEBUG
-
continue;
}
else
{
++count_WTF;
-
-# ifndef SCRIBO_NDEBUG
// Log:
draw_box(log, b, 255);
-# endif // ! SCRIBO_NDEBUG
// (*) SEE BELOW
draw_box(billboard, lines(l_).ebbox(), l_);
@@ -918,20 +901,15 @@ namespace scribo
l_ = do_union(lines, l_, lcand, parent);
draw_box(billboard, lines(l_).ebbox(), l_);
-# ifndef SCRIBO_NDEBUG
// Log:
draw_box(log, b, 169);
-# endif // ! SCRIBO_NDEBUG
-
continue;
}
-# ifndef SCRIBO_NDEBUG
else
{
// Log:
draw_box(log, b, 254);
}
-# endif // ! SCRIBO_NDEBUG
}
@@ -1081,6 +1059,7 @@ namespace scribo
// ts = t.stop();
// std::cout << "time " << ts << std::endl;
+
lines.force_stats_update();
return lines;
diff --git a/scribo/scribo/text/merging.hh b/scribo/scribo/text/merging_hdoc.hh
similarity index 99%
copy from scribo/scribo/text/merging.hh
copy to scribo/scribo/text/merging_hdoc.hh
index 31a5ed4..6f89d2a 100644
--- a/scribo/scribo/text/merging.hh
+++ b/scribo/scribo/text/merging_hdoc.hh
@@ -24,8 +24,8 @@
// exception does not however invalidate any other reasons why the
// executable file might be covered by the GNU General Public License.
-#ifndef SCRIBO_TEXT_MERGING_HH
-# define SCRIBO_TEXT_MERGING_HH
+#ifndef SCRIBO_TEXT_MERGING_HDOC_HH
+# define SCRIBO_TEXT_MERGING_HDOC_HH
/// \file
///
@@ -82,7 +82,7 @@ namespace scribo
//
template <typename L>
line_set<L>
- merging(const scribo::line_set<L>& lines);
+ merging_hdoc(const scribo::line_set<L>& lines);
# ifndef MLN_INCLUDE_ONLY
@@ -1094,7 +1094,7 @@ namespace scribo
template <typename L>
line_set<L>
- merging(const scribo::line_set<L>& lines)
+ merging_hdoc(const scribo::line_set<L>& lines)
{
using namespace mln;
@@ -1116,4 +1116,4 @@ namespace scribo
} // end of namespace scribo
-#endif // ! SCRIBO_TEXT_MERGING_HH
+#endif // ! SCRIBO_TEXT_MERGING_HDOC_HH
diff --git a/scribo/scribo/text/paragraphs.hh b/scribo/scribo/text/paragraphs.hh
deleted file mode 100644
index e37f610..0000000
--- a/scribo/scribo/text/paragraphs.hh
+++ /dev/null
@@ -1,1270 +0,0 @@
-#include <mln/util/array.hh>
-#include <mln/accu/shape/bbox.hh>
-#include <mln/core/image/image2d.hh>
-#include <mln/core/alias/neighb2d.hh>
-#include <mln/draw/box.hh>
-#include <mln/data/convert.hh>
-#include <mln/value/int_u16.hh>
-#include <mln/value/label_16.hh>
-#include <mln/value/int_u8.hh>
-#include <mln/value/rgb8.hh>
-#include <mln/io/ppm/save.hh>
-#include <mln/io/pgm/save.hh>
-#include <mln/geom/rotate.hh>
-#include <mln/literal/colors.hh>
-
-#include <scribo/core/macros.hh>
-#include <scribo/core/line_set.hh>
-#include <scribo/core/line_links.hh>
-#include <scribo/core/line_info.hh>
-#include <scribo/core/paragraph_set.hh>
-
-using namespace mln;
-
-namespace scribo
-{
- namespace filter
- {
-
- template <typename L>
- inline
- bool
- between_horizontal_separator(const line_set<L>& lines,
- const line_id_t& l1_,
- const line_id_t& l2_)
- {
- const line_info<L>& l1 = lines(l1_);
- const line_info<L>& l2 = lines(l2_);
-
- // No separators found in image.
- mln_precondition(lines.components().has_separators());
-
- const box2d& l1_bbox = l1.bbox();
- const box2d& l2_bbox = l2.bbox();
-
- unsigned
- row1 = l1_bbox.pcenter().row(),
- row2 = l2_bbox.pcenter().row();
- const mln_ch_value(L, bool)&
- separators = lines.components().separators();
-
- unsigned row;
- unsigned col_ptr;
- unsigned left_col_ptr;
- unsigned right_col_ptr;
- unsigned end;
-
- if (row1 < row2)
- {
- row1 = l1_bbox.pmax().row();
- row2 = l2_bbox.pmin().row();
-
- const unsigned quarter =
- ((l1_bbox.pcenter().col() - l1_bbox.pmin().col()) >> 2);
-
- row = l1_bbox.pcenter().row();
- col_ptr = l1_bbox.pcenter().col();
- left_col_ptr = l1_bbox.pmin().col() + quarter;
- right_col_ptr = l1_bbox.pmax().col() - quarter;
- end = row2;
- }
- else
- {
- row2 = l2_bbox.pmax().row();
- row1 = l1_bbox.pmin().row();
-
- const unsigned quarter =
- ((l2_bbox.pcenter().col() - l2_bbox.pmin().col()) >> 2);
-
- row = l2_bbox.pcenter().row();
- col_ptr = l2_bbox.pcenter().col();
- left_col_ptr = l2_bbox.pmin().col() + quarter;
- right_col_ptr = l2_bbox.pmax().col() - quarter;
- end = row1;
- }
-
- // If sep_ptr is true, then a separator is reached.
- while (row < end)
- {
- ++row;
- if (separators.at_(row, col_ptr)
- || separators.at_(row, left_col_ptr)
- || separators.at_(row, right_col_ptr))
- return true;
- }
-
- return false;
- }
-
-
- template <typename L>
- bool may_have_another_left_link(const line_links<L>& right,
- const line_id_t& index,
- const line_id_t& current_line,
- const line_set<L>& lines)
- {
- const line_info<L>& l = lines(current_line);
- const point2d& pmin = l.bbox().pmin();
- const unsigned x1 = l.x_height();
-
- for_all_links(i, right)
- if (i != index && right(i) == index)
- {
- const line_info<L>& l_info = lines(i);
- const unsigned x2 = l_info.x_height();
-
- const float delta_max = 0.5f * std::min(x1, x2);
-
- if (l_info.bbox().pmin().col() < pmin.col()
- && std::abs(l.baseline() - l_info.baseline()) < delta_max
- )
- return true;
- }
-
- return false;
- }
-
-//---------------------------------------------------------------------
-// This method aims to cut the links between lines that do not fit the
-// different criteria
-//---------------------------------------------------------------------
-
- template <typename L>
- inline
- void paragraph_links(const line_links<L>& left,
- line_links<L>& right,
- line_links<L>& output,
- const line_set<L>& lines)
- {
- output = left.duplicate();
-
- // const unsigned nlines = lines.nelements();
-
- // image2d<value::rgb8> links = data::convert(value::rgb8(), input);
- // for (unsigned l = 0; l < nlines; ++l)
- // {
- // mln::draw::line(links, lines(l).bbox().pcenter(), lines(left(l)).bbox().pcenter(), literal::red);
- // }
- // mln::io::ppm::save(links, "out_links.ppm");
-
- // For each line
- for_all_lines(l, lines)
- if (lines(l).is_textline())
- {
- // Neighbors
-
- line_id_t left_nbh = output(l);
- line_id_t right_nbh = right(l);
- line_id_t lol_nbh = output(left_nbh);
-
- // const line_info<L>& left_line = lines(left_nbh);
- // const line_info<L>& current_line = lines(l);
- // const line_info<L>& right_line = lines(right_nbh);
-
- if (lines.components().has_separators() &&
- between_horizontal_separator(lines, right_nbh, l))
- {
- output(right_nbh) = right_nbh;
- right_nbh = l;
- }
- if (lines.components().has_separators() &&
- between_horizontal_separator(lines, l, left_nbh))
- {
- output(l) = l;
- left_nbh = l;
- lol_nbh = l;
- }
-
- // Line features
- const float x_height = lines(l).x_height();
- const float left_x_height = lines(left_nbh).x_height();
- const float right_x_height = lines(right_nbh).x_height();
-
- const box2d& left_line_bbox = lines(left_nbh).bbox();
- const box2d& current_line_bbox = lines(l).bbox();
- const box2d& right_line_bbox = lines(right_nbh).bbox();
- const box2d& lol_line_bbox = lines(lol_nbh).bbox(); // lol : left neighbor of the left neighbor
-
- const int lline_col_min = left_line_bbox.pmin().col();
- const int cline_col_min = current_line_bbox.pmin().col();
- const int rline_col_min = right_line_bbox.pmin().col();
- const int lolline_col_min = lol_line_bbox.pmin().col();
-
- const int lline_col_max = left_line_bbox.pmax().col();
- const int cline_col_max = current_line_bbox.pmax().col();
- const int rline_col_max = right_line_bbox.pmax().col();
-
- const int lline_cw = lines(left_nbh).char_width();
- const int cline_cw = lines(l).char_width();
- const int rline_cw = lines(right_nbh).char_width();
- // Maximal x variation to consider two lines vertically aligned
- const int delta_alignment = cline_cw;
-
-
- // Checks the baseline distances of the two neighbors
- {
- // Current line baseline
- const int c_baseline = lines(l).baseline();
-
- // Baseline distance with the left and right neighbors
- const int lc_baseline = lines(left_nbh).baseline() - c_baseline;
- const int rc_baseline = c_baseline -lines(right_nbh).baseline();
-
- // Max baseline distance between the two neighbors
- // const float delta_baseline_max = std::max(lc_baseline, rc_baseline);
- // const float delta_baseline_min = std::min(lc_baseline,
- // rc_baseline);
-
- // Only two lines, meaning the current line has only one neighbor
- bool two_lines = false;
-
- // If the current line has no left neighbor
- if (lc_baseline == 0)
- {
- // ror : right neighbor of the right neighbor
- const line_id_t ror_nbh = right(right_nbh);
- //const box2d& ror_line_bbox = lines(ror_nbh).bbox();
-
- // If the current line has a ror
- if (ror_nbh != right_nbh
- && output(ror_nbh) == right_nbh)
- {
- // Distance between the current line and the right neighbor
- const float right_distance = lines(l).meanline() - lines(right_nbh).baseline();
- // Distance between the right neighbor and the ror
- const float ror_distance = lines(right_nbh).meanline() - lines(ror_nbh).baseline();
- // ror x_height
- const float ror_x_height = lines(ror_nbh).x_height();
-
- // Conditions to cut the link between the current line
- // and its right neighbor
- if (right_distance > 1.4f * ror_distance
- && std::max(ror_x_height, right_x_height) <
- 1.4f * std::min(ror_x_height, right_x_height)
- && output(right_nbh) == l)
- {
- output(right_nbh) = right_nbh;
- continue;
- }
- }
- // Otherwise we only have a group of two lines
- else
- {
- // We determine the distance between the two lines
- const float distance = lines(l).meanline() - lines(right_nbh).baseline();
- two_lines = true;
-
- // If the distance between the two lines is greater than
- // the minimum x height of the two lines then we cut the
- // link between them
- if (distance > 2.0f * std::min(x_height, right_x_height)
- && output(right_nbh) == l)
- {
- output(right_nbh) = right_nbh;
- continue;
- }
- }
-
- // Lines features
- const float min_x_height = std::min(x_height, right_x_height);
- const float max_x_height = std::max(x_height, right_x_height);
- const float min_char_width = std::min(rline_cw, cline_cw);
- const float max_char_width = std::max(rline_cw, cline_cw);
-
- // Condition to cut the link between the current line and
- // its right neighbor
- if ((max_x_height > min_x_height * 1.4f) &&
- !(max_char_width <= 1.2f * min_char_width))
- {
- if (output(right_nbh) == l)
- {
- output(right_nbh) = right_nbh;
- continue;
- }
- }
-
- // If we only have two lines we stop the study
- if (two_lines)
- continue;
- }
- // If the current line has no right neighbor
- else if (rc_baseline == 0)
- {
- // lol : left neighbor of the left neighbor
-
- // If the left neighbor of the current line has a left neighbor
- if (lol_nbh != left_nbh)
- {
- // Distance between the current line and its left neighbor
- const float left_distance = lines(left_nbh).meanline() -
- lines(l).baseline();
- // Distance between the left neighbor and the left
- // neighbor of its left neighbor
- const float lol_distance = lines(lol_nbh).meanline() -
- lines(left_nbh).baseline();
- // lol x height
- const float lol_x_height = lines(lol_nbh).x_height();
-
- // Conditions to cut the link between the current line
- // and its left neighbor
- if (left_distance > 1.4f * lol_distance
- && std::max(lol_x_height, left_x_height) <
- 1.4f * std::min(lol_x_height, left_x_height))
- {
- output(l) = l;
- continue;
- }
- }
- // Otherwise we only have a group of two lines
- else
- {
- // Distance between the current line and it left neighbor
- const float distance = lines(left_nbh).meanline() -
- lines(l).baseline();
-
- two_lines = true;
-
- // If the distance is greater than the min x height
- // between the two lines
- if (distance > 2.0f * std::min(x_height, left_x_height))
- {
- output(l) = l;
- continue;
- }
- }
-
- // Lines features
- const float min_x_height = std::min(x_height, left_x_height);
- const float max_x_height = std::max(x_height, left_x_height);
- const float min_char_width = std::min(lline_cw, cline_cw);
- const float max_char_width = std::max(lline_cw, cline_cw);
-
- // Condition to cut the link between the current line and
- // its left neighbor
- if ((max_x_height > min_x_height * 1.4f) &&
- !(max_char_width <= 1.2f * min_char_width))
- {
- output(l) = l;
- continue;
- }
-
- // If we only have two lines we stop the study
- if (two_lines)
- continue;
- }
- // The current line has at least one left and one right neighbor
- else // if (delta_baseline_max >= 1.1 * delta_baseline_min)
- {
- // Distance between the left and the current line
- const float
- left_distance = left_line_bbox.pcenter().row() - current_line_bbox.pcenter().row();
- // Distance between the right and the current line
- const float
- right_distance = current_line_bbox.pcenter().row() - right_line_bbox.pcenter().row();;
-
- // If the left line is too far compared to the right one
- // we cut the link with it
- if (left_distance > 1.5f * right_distance
- && std::max(x_height, left_x_height) > 1.2f * std::min(x_height, left_x_height))
- {
- output(l) = l;
- continue;
- }
- // If the right line is too far compared to the left one
- // we cut the link with it
- else if (right_distance > 1.5f * left_distance
- && std::max(x_height, right_x_height) >= 1.2f * std::min(x_height, right_x_height)
- && output(right_nbh) == l)
- {
- output(right_nbh) = right_nbh;
- continue;
- }
-
- // If the distance between the baseline of the left
- // neighbor and the baseline of the current line is
- // greater than the one between the current line baseline
- // and the right line baseline we have to study the texte
- // features of the right and left lines
- if (lc_baseline > rc_baseline)
- {
- const float cw_max = std::max(lline_cw, cline_cw);
- const float cw_min = std::min(lline_cw, cline_cw);
- const float min_x_height = std::min(x_height, left_x_height);
- const float max_x_height = std::max(x_height, left_x_height);
-
- if ((max_x_height > min_x_height * 1.4f) &&
- !(cw_max <= 1.2f * cw_min))
- {
- output(l) = l;
- continue;
- }
-
- {
- const float min_x_height = std::min(x_height, right_x_height);
- const float max_x_height = std::max(x_height, right_x_height);
- const float cw_max = std::max(rline_cw, cline_cw);
- const float cw_min = std::min(rline_cw, cline_cw);
-
- if ((max_x_height > min_x_height * 1.4f)
- && !(cw_max <= 1.2f * cw_min)
- && output(right_nbh) == l)
- {
- output(right_nbh) = right_nbh;
- continue;
- }
- }
- }
- else
- {
- const float cw_max = std::max(rline_cw, cline_cw);
- const float cw_min = std::min(rline_cw, cline_cw);
- const float min_x_height = std::min(x_height, right_x_height);
- const float max_x_height = std::max(x_height, right_x_height);
-
- if ((max_x_height > min_x_height * 1.4f)
- && !(cw_max <= 1.2f * cw_min)
- && output(right_nbh) == l)
- {
- output(right_nbh) = right_nbh;
- continue;
- }
-
- {
- const float min_x_height = std::min(x_height, left_x_height);
- const float max_x_height = std::max(x_height, left_x_height);
- const float cw_max = std::max(lline_cw, cline_cw);
- const float cw_min = std::min(lline_cw, cline_cw);
-
- if ((max_x_height > min_x_height * 1.4f)
- && !(cw_max <= 1.2f * cw_min))
- {
- output(l) = l;
- continue;
- }
- }
- }
- }
- }
-
- // If we arrive here, it means than the lines in the
- // neighborhood of the current line are quite similar. We can
- // then begin to study the indentations in order to determine
- // the beginning of new paragraphs
-
-//-----------------------------------------------------------------------------
-// ___________________________
-// |___________________________|
-// ________________________
-// |________________________|
-// ___________________________
-// |___________________________|
-// ___________________________
-// |___________________________|
-//
-// Simple case : paragraphs are justified on the left. We try to find any
-// indentation like above.
-//
-//-----------------------------------------------------------------------------
-
- {
- // Check if the current line neighbors are aligned
- bool left_right_aligned = false;
- bool left_lol_aligned = false;
- const int dx_lr = std::abs(lline_col_min - rline_col_min);
- const int dx_llol = std::abs(lline_col_min - lolline_col_min);
-
- if (dx_lr < delta_alignment)
- left_right_aligned = true;
-
- if (dx_llol < delta_alignment)
- left_lol_aligned = true;
-
- if (left_right_aligned && left_lol_aligned)
- {
- const int left_right_col_min = std::min(lline_col_min, rline_col_min);
- const int dx_lrc = std::abs(left_right_col_min - cline_col_min);
- const float l_char_width = 1.5f * lines(l).char_width();
-
- if (dx_lrc > l_char_width &&
- dx_lrc < 3.0f * l_char_width &&
- cline_col_min > rline_col_min &&
- cline_col_min > lline_col_min)
- {
- const line_id_t out_right_nbh = output(right_nbh);
-
- if (out_right_nbh != l)
- {
- if (output(out_right_nbh) == l)
- output(out_right_nbh) = out_right_nbh;
- right(l) = l;
- }
- else
- output(right_nbh) = right_nbh;
- continue;
- }
- }
- }
-
-//-----------------------------------------------------------------------------
-// ___________________________
-// |___________________________|
-// ___________________
-// |___________________| End of the paragraph - Current line
-// ________________________
-// |________________________| Beginning of a new one
-// ___________________________
-// |___________________________| Left of left of current line
-//
-// End of paragraph case : we try to find an end to the current paragraph
-//
-//-----------------------------------------------------------------------------
-
- {
- // Check if the current line neighbors are aligned
- bool left_right_max_aligned = false;
- bool left_current_min_aligned = false;
- bool lol_current_min_aligned = false;
- const bool lol_is_left = output(left_nbh) == left_nbh;
- const int dx_lr_max = std::abs(lline_col_max - rline_col_max);
- const int dx_lc_min = std::abs(lline_col_min - cline_col_min);
- const int dx_lolc_min = std::abs(lolline_col_min - cline_col_min);
-
- if (dx_lr_max < delta_alignment)
- left_right_max_aligned = true;
-
- if (dx_lc_min < delta_alignment)
- left_current_min_aligned = true;
-
- if (dx_lolc_min < delta_alignment)
- lol_current_min_aligned = true;
-
- if (!left_current_min_aligned && left_right_max_aligned &&
- (lol_current_min_aligned || lol_is_left))
- {
- const int dx_lrc = std::abs(lline_col_max - cline_col_max);
- const int l_char_width = lines(l).char_width();
- const int dx_indent = std::abs(std::max(lline_col_min,
- rline_col_min) - cline_col_min);
-
- if (dx_lrc > l_char_width &&
- dx_indent < 4 * delta_alignment &&
- cline_col_max < lline_col_max &&
- cline_col_min < lline_col_min &&
- (lline_col_min > lolline_col_min || lol_is_left))
- {
- output(l) = l;
- continue;
- }
- }
- }
-
-//-----------------------------------------------------------------------------
-// ___________________________
-// |___________________________|
-// ___________________________
-// |___________________________|
-// ________________________
-// |________________________|
-// ___________________________
-// |___________________________|
-//
-// Simple case : paragraphs are justified on the left. We try to find any
-// indentation like above.
-//
-//-----------------------------------------------------------------------------
-
- {
- const line_id_t ror_nbh = right(right_nbh);
- const box2d& ror_line_bbox = lines(ror_nbh).bbox();
- const int rorline_col_min = ror_line_bbox.pmin().col();
-
- bool right_ror_min_aligned = false;
- bool left_right_aligned = false;
- const int dx_lr = std::abs(lline_col_min - rline_col_min);
- const int dx_rror_min = std::abs(rline_col_min - rorline_col_min);
-
- if (dx_rror_min < delta_alignment)
- right_ror_min_aligned = true;
-
- if (dx_lr < delta_alignment)
- left_right_aligned = true;
-
- if (right_ror_min_aligned && left_right_aligned &&
- ror_nbh != right_nbh)
- {
- const int left_right_col_min = std::min(lline_col_min, rline_col_min);
- const int dx_lrc = std::abs(left_right_col_min - cline_col_min);
- const float l_char_width = 1.5f * lines(l).char_width();
-
- if (dx_lrc > l_char_width &&
- !may_have_another_left_link(right, right_nbh, l, lines) &&
- dx_lrc < 10.0f * l_char_width &&
- cline_col_min > rline_col_min &&
- cline_col_min > lline_col_min)
- {
- const line_id_t out_right_nbh = output(right_nbh);
-
- if (out_right_nbh != l)
- {
- if (output(out_right_nbh) == l)
- output(out_right_nbh) = out_right_nbh;
- right(l) = l;
- }
- else
- output(right_nbh) = right_nbh;
- continue;
- }
- }
- }
-
-//-----------------------------------------------------------------------------
-// ___________________________
-// |___________________________|
-// ___________
-// |___________|
-// ________________________
-// |________________________|
-// ___________________________
-// |___________________________|
-//
-// Simple case : paragraphs are justified on the left. We try to find any
-// indentation like above.
-//
-//-----------------------------------------------------------------------------
-
- {
- const line_id_t ror_nbh = right(right_nbh);
- const box2d& ror_line_bbox = lines(ror_nbh).bbox();
- const int rorline_col_min = ror_line_bbox.pmin().col();
-
- bool left_ror_aligned = false;
- const int dx_lror = std::abs(lline_col_min - rorline_col_min);
-
- if (dx_lror < delta_alignment)
- left_ror_aligned = true;
-
- if (left_ror_aligned)
- {
- const int left_ror_col_min = std::min(lline_col_min, rorline_col_min);
- const int dx_lrorc = std::abs(left_ror_col_min - cline_col_min);
- const float l_char_width = 1.5f * lines(l).char_width();
- const int dx_lrorr = std::abs(left_ror_col_min - rline_col_min);
- const int dx_crmax = std::abs(rline_col_max - cline_col_max);
-
- if (dx_lrorc > l_char_width &&
- dx_lrorr > 5 * l_char_width &&
- dx_lrorr > dx_lrorc &&
- dx_crmax > 5 * l_char_width &&
- !may_have_another_left_link(right, right_nbh, l, lines) &&
- dx_lrorc < 10.0f * l_char_width &&
- cline_col_min > rorline_col_min &&
- cline_col_min > lline_col_min)
- {
- right(right_nbh) = right_nbh;
- continue;
- }
- }
- }
-
-
-// Strange case
- {
- if (rline_col_min > current_line_bbox.pcenter().col()
- && !may_have_another_left_link(right, right_nbh, l, lines)
- && cline_col_max < rline_col_max
- && output(right_nbh) == l)
- {
- output(right_nbh) = right_nbh;
- }
- }
-
-//-----------------------------------------------------------------------------
-// ___________________________
-// |___________________________|
-// ___________________________
-// |___________________________|
-// ________________________
-// |________________________|
-//
-// Simple case : paragraphs are justified on the left. We try to find any
-// indentation like above at the end of a column.
-//
-//-----------------------------------------------------------------------------
-
- if (left_nbh == l)
- {
- const line_id_t ror_nbh = right(right_nbh);
- const box2d& ror_line_bbox = lines(ror_nbh).bbox();
- const int rorline_col_min = ror_line_bbox.pmin().col();
-
- bool right_ror_min_aligned = false;
- const int dx_rror_min = std::abs(rline_col_min - rorline_col_min);
-
- if (dx_rror_min < delta_alignment)
- right_ror_min_aligned = true;
-
- if (right_ror_min_aligned)
- {
- const int right_ror_col_min = std::min(rline_col_min, rorline_col_min);
- const int dx_rrorc = std::abs(right_ror_col_min - cline_col_min);
- const float l_char_width = 1.5f * lines(l).char_width();
-
- if (dx_rrorc > l_char_width &&
- dx_rrorc < 10.0f * l_char_width &&
- cline_col_min > rline_col_min &&
- cline_col_max >= rline_col_max)
- {
- const line_id_t out_right_nbh = output(right_nbh);
-
- if (out_right_nbh != l)
- {
- if (output(out_right_nbh) == l)
- output(out_right_nbh) = out_right_nbh;
- right(l) = l;
- }
- else
- output(right_nbh) = right_nbh;
- continue;
- }
- }
- }
- }
-
- // Only debug
- // {
- // image2d<value::rgb8> debug = data::convert(value::rgb8(), input);
-
- // mln::util::array<accu::shape::bbox<point2d> > nbbox(output.nelements());
- // for_all_lines(l, lines)
- // if (lines(l).is_textline())
- // {
- // // if (lines(i).is_textline())
- // // {
- // // mln::draw::box(debug, lines(i).bbox(), literal::red);
- // nbbox(output(l)).take(lines(l).bbox());
- // // }
- // }
-
- // for (unsigned i = 0; i < nbbox.nelements(); ++i)
- // if (nbbox(i).is_valid())
- // {
- // box2d b = nbbox(i).to_result();
- // mln::draw::box(debug, b, literal::orange);
- // b.enlarge(1);
- // mln::draw::box(debug, b, literal::orange);
- // b.enlarge(1);
- // mln::draw::box(debug, b, literal::orange);
- // }
-
- // mln::io::ppm::save(debug, "out_paragraph.ppm");
- // }
-
- }
- }
-
-//-------------------------------------------------------------
-// Preparation of the lines before linking them.
-// For each line we draw the top and the bottom of it.
-// Assuming than i is the number of the line. Then the top of the line
-// will be affected with the value 2 * i in the block image and the
-// bottom with 2 * i + 1.
-//
-//-------------------------------------------------------------
-
- template <typename L>
- inline
- void prepare_lines(const box2d& domain,
- const line_set<L>& lines,
- L& blocks,
- mln::util::array<box2d>& rbbox)
- {
- std::map< int, std::vector< const box2d* > > drawn_lines;
- // const unsigned nlines = lines.nelements();
-
- // For each line
- //for (unsigned l = 0; l < nlines; ++l)
- for_all_lines(l, lines)
- if (lines(l).is_textline())
- {
- // Rotation of the bounding box
- box2d b = geom::rotate(lines(l).bbox(), -90, domain.pcenter());
-// rbbox.append(b);
- rbbox(l) = b;
-
- const unsigned index = l + 1;
- const unsigned even_index = 2 * index;
- const unsigned odd_index = even_index + 1;
-
- // Top of the line
- {
- bool not_finished = true;
- int col_offset = 0;
-
- while (not_finished)
- {
- // Looking for a column in the image to draw the top of the
- // line
-
- const int col = b.pmax().col() + col_offset;
- std::map< int, std::vector< const box2d* > >::iterator it
- = drawn_lines.find(col);
-
- if (it != drawn_lines.end())
- {
- const std::vector< const box2d* >& lines = (*it).second;
- const unsigned nb_lines = lines.size();
- unsigned i = 0;
-
- for (i = 0; i < nb_lines; ++i)
- {
- const box2d* box = lines[i];
- const int min_row = std::max(b.pmin().row(), box->pmin().row());
- const int max_row = std::min(b.pmax().row(), box->pmax().row());
-
- if (min_row - max_row <= 0)
- break;
- }
-
- if (i == nb_lines)
- {
- mln::draw::line(blocks, point2d(b.pmin().row(), col),
- point2d(b.pmax().row(), col), even_index);
- not_finished = false;
- drawn_lines[col].push_back(&(rbbox[l]));
- }
- else
- ++col_offset;
- }
- else
- {
- mln::draw::line(blocks, point2d(b.pmin().row(), col),
- point2d(b.pmax().row(), col), even_index);
- not_finished = false;
- drawn_lines[col].push_back(&(rbbox[l]));
- }
- }
- }
-
- // Bottom of the line
- {
- bool not_finished = true;
- int col_offset = 0;
-
- while (not_finished)
- {
- // Looking for a column in the image to draw the bottom of
- // the line
-
- const int col = b.pmin().col() - col_offset;
- std::map< int, std::vector< const box2d* > >::iterator it
- = drawn_lines.find(col);
-
- if (it != drawn_lines.end())
- {
- const std::vector< const box2d* >& lines = (*it).second;
- const unsigned nb_lines = lines.size();
- unsigned i = 0;
-
- for (i = 0; i < nb_lines; ++i)
- {
- const box2d* box = lines[i];
- const int min_row = std::max(b.pmin().row(), box->pmin().row());
- const int max_row = std::min(b.pmax().row(), box->pmax().row());
-
- if (min_row - max_row <= 0)
- break;
- }
-
- if (i == nb_lines)
- {
- mln::draw::line(blocks, point2d(b.pmin().row(), col),
- point2d(b.pmax().row(), col), odd_index);
- not_finished = false;
- drawn_lines[col].push_back(&(rbbox[l]));
- }
- else
- ++col_offset;
- }
- else
- {
- mln::draw::line(blocks, point2d(b.pmin().row(), col),
- point2d(b.pmax().row(), col), odd_index);
- not_finished = false;
- drawn_lines[col].push_back(&(rbbox[l]));
- }
- }
- }
- }
- }
-
- template <typename L>
- inline
- void
- process_left_link(L& blocks,
- const mln::util::array<box2d>& rbbox,
- const line_set<L>& lines,
- line_links<L>& left)
- {
- typedef scribo::def::lbl_type V;
-
- // At the beginning each line is its own neighbor
- for_all_lines(l, lines)
- if (lines(l).is_textline())
- left(l) = l;
- else
- left(l) = 0;
-
- // const unsigned nlines = lines.nelements();
-
- // For each line
- for_all_lines(l, lines)
- if (lines(l).is_textline())
- {
- // Max distance for the line search
- int dmax = 1.5f * lines(l).x_height();
-
- // Starting points in the current line box
- point2d c = rbbox(l).pcenter();
- point2d q(rbbox(l).pmin().row() + ((c.row() - rbbox(l).pmin().row()) / 4), c.col());
-
- int
- midcol = (rbbox(l).pmax().col()
- - rbbox(l).pmin().col()) / 2;
-
- // Left
- {
- // marge gauche
- int
- nleftima = c.col() - blocks.domain().pmin().col(),
- // Distance gauche
- nleft = std::min(nleftima, midcol + dmax);
-
- V
- // Starting points in the box
- *p = &blocks(c),
- *p2 = &blocks(q),
- // End of search
- *pstop = p - nleft - 1,
- // Line neighbor
- *nbh_p = 0;
-
- // While we haven't found a neighbor or reached the limit
- for (; p != pstop; --p, --p2)
- {
- if (*p2 != literal::zero // Not the background
- && ((*p2 % 2) == 0) // Looking for the bottom of a line
- && left((*p2 >> 1) - 1) != l) // No loops
- {
- // Neightbor found, we stop the research
- nbh_p = p2;
- break;
- }
-
- if (*p != literal::zero // Not the background
- && ((*p % 2) == 0) // Looking for the bottom of a line
- && left((*p >> 1) - 1) != l) // No loops
- {
- // Neightbor found, we stop the research
- nbh_p = p;
- break;
- }
- }
-
- // If a neighbor was found, then we have found the top of the
- // line. We are then looking for the bottom of the encountered
- // line. If during the search process we find a complete line
- // included in the touched line, this line is considered as
- // the neighbor under certain conditions (see below)
-
- //---------------------------------------------------------------
- // _________________________ |
- // |_________________________| => Current line | Search direction
- // v
- // => First encountered top line
- // __________________________________________________ 2Q
- // | Q |
- // | _________________________ |2P
- // | |_____________P___________| => Second top |2P + 1
- // | line |
- // |__________________________________________________|2Q + 1
- //
- //
- //---------------------------------------------------------------
-
- if (nbh_p)
- {
- std::vector<V> lines_nbh;
- const V end_p = *nbh_p + 1;
- const V* nbh_p_copy = nbh_p;
-
- for (; *nbh_p != end_p; --nbh_p)
- {
- if ((*nbh_p) != literal::zero) // Not the background
- {
- if ((*nbh_p) % 2 == 0)// We have found the top of
- // another line
- lines_nbh.push_back(*nbh_p);
- else
- {
- // We have found the bottom of a line. We are looking if
- // we have already encountered the top of this
- // line. If so, we link the current line with this one
- // under certain conditions:
-
- if (std::find(lines_nbh.begin(), lines_nbh.end(),
- (*nbh_p) - 1) != lines_nbh.end())
- {
- // If we can link the complete line with the current line
- if (// It must be in the search range
- nbh_p > pstop
- // Avoid loops
- && left(((*nbh_p - 1) >> 1) - 1) != l)
- left(l) = ((*nbh_p - 1) >> 1) - 1;
-
- // We have found a complete line so we stop the search
- break;
- }
- }
- }
- }
-
-
- // If we haven't found any included line in the first
- // neighbor, then the line is considered as the neighbor of
- // the current line
- if (*nbh_p == end_p)
- left(l) = (*nbh_p_copy >> 1) - 1;
- }
- }
- }
- }
-
-
- // We assume that the lines have been rotated
- template <typename L>
- inline
- void
- process_right_link(L& blocks,
- const mln::util::array<box2d>& rbbox,
- const line_set<L>& lines,
- line_links<L>& right)
- {
- typedef scribo::def::lbl_type V;
-
- // At the beginning each line is its own neighbor
- for_all_lines(l, lines)
- if (lines(l).is_textline())
- right(l) = l;
- else
- right(l) = 0;
-
- // const unsigned nlines = lines.nelements();
-
- // For each line
- for_all_lines(l, lines)
- if (lines(l).is_textline())
- {
- // Max distance for the line search
- int dmax = 1.5f * lines(l).x_height();
-
- // Starting points in the current line box
- point2d c = rbbox(l).pcenter();
- point2d q(rbbox(l).pmax().row() - ((rbbox(l).pmax().row() - c.row()) / 4), c.col());
-
- int
- midcol = (rbbox(l).pmax().col()
- - rbbox(l).pmin().col()) / 2;
-
- // Right
- {
- int
- nrightima = geom::ncols(blocks) - c.col() + blocks.domain().pmin().col(),
- nright = std::min(nrightima, midcol + dmax);
-
- V
- // Starting points in the box
- *p = &blocks(c),
- *p2 = &blocks(q),
- // End of search
- *pstop = p + nright - 1,
- // Line neighbor
- *nbh_p = 0;
-
- // While we haven't found a neighbor or reached the limit
- for (; p != pstop; ++p, ++p2)
- {
- if (*p2 != literal::zero // Not the background
- && ((*p2 % 2) == 1) // Looking for the bottom of a line
- && right(((*p2 - 1) >> 1) - 1) != l) // No loops
- {
- // Neightbor found, we stop the research
- nbh_p = p2;
- break;
- }
-
- if (*p != literal::zero // Not the background
- && ((*p % 2) == 1) // Looking for the bottom of a line
- && right(((*p - 1) >> 1) - 1) != l) // No loops
- {
- // Neightbor found, we stop the research
- nbh_p = p;
- break;
- }
- }
-
- // If a neighbor was found, then we have found the bottom of the
- // line. We are then looking for the top of the encountered
- // line. If during the search process we find a complete line
- // included in the touched line, this line is considered as
- // the neighbor under certain conditions (see below)
-
- //---------------------------------------------------------------
- //
- //
- // __________________________________________________ 2Q
- // | Q |
- // | _________________________ |2P
- // | |_____________P___________| => Second bottom |2P + 1
- // | line |
- // |__________________________________________________|2Q + 1
- // => First encountered bottom line
- // _________________________ ^
- // |_________________________| => Current line | Search direction
- // |
- //---------------------------------------------------------------
-
- if (nbh_p)
- {
- std::vector<V> lines_nbh;
- const V end_p = *nbh_p - 1;
- const V* nbh_p_copy = nbh_p;
-
- for (; *nbh_p != end_p; ++nbh_p)
- {
- if (*nbh_p != literal::zero) // Not the background
- {
- if (*nbh_p % 2 == 1) // We have found the bottom of
- // another line
- lines_nbh.push_back(*nbh_p);
- else
- {
- // We have found the top of a line. We are looking if
- //we have already encountered the bottom of this
- // line. If so, we link the current line with this one
- // under certain conditions:
-
- if (std::find(lines_nbh.begin(), lines_nbh.end(),
- *nbh_p + 1) != lines_nbh.end())
- {
- // If we can link the complete line with the current line
- if (// It must be in the search range
- nbh_p < pstop
- // Avoid loops
- && right((*nbh_p >> 1) - 1) != l)
- right(l) = (*nbh_p >> 1) - 1;
-
- // We have found a complete line, so we stop the search
- break;
- }
- }
- }
- }
-
- // If we haven't found any included line in the first
- // neighbor, then the line is considered as the neighbor of
- // the current line
-
- if (*nbh_p == end_p)
- right(l) = ((*nbh_p_copy - 1) >> 1) - 1;
- }
- }
- }
- }
-
-//-----------------------------------------------------------------------
-// Finalizing the links by merging information extracted from the left
-// and right links
-//-----------------------------------------------------------------------
-
- template< typename L >
- inline
- void finalize_links(line_links<L>& left,
- line_links<L>& right,
- const line_set<L>& lines)
- {
- // const unsigned nlines = lines.nelements();
-
- for_all_lines(l, lines)
- if (lines(l).is_textline())
- {
- const unsigned left_value = left(l);
- const unsigned right_value = right(l);
-
- // If the right neighbor of my left neighbor is itself then its
- // right neighbor is me
- {
- line_id_t& v = right(left_value);
-
- if (v == left_value)
- v = l;
- }
-
- // If the left neighbor of my right neighbor is itself then its
- // left neighbor is me
- {
- line_id_t& v = left(right_value);
-
- if (v == right_value)
- v = l;
- }
- }
- }
-
- template <typename L>
- inline
- paragraph_set<L>
- extract_paragraphs(line_set<L>& lines,
- const image2d<bool>& input)
- {
- typedef scribo::def::lbl_type V;
-
- image2d<V> blocks(geom::rotate(input.domain(), -90, input.domain().pcenter()));
- data::fill(blocks, 0);
-
- // util::array< line_info<L> > lines_info;
-
- // for_all_lines(l, lines)
- // {
- // if (lines(l).is_textline())
- // lines_info.append(lines(l));
- // }
-
-/// const unsigned nlines = lines_info.nelemnts();
- mln::util::array<box2d> rbbox;
- line_links<L> left(lines);
- left(0) = 0;
- line_links<L> right(lines);
- right(0) = 0;
- line_links<L> output(lines);
- output(0) = 0;
-
- rbbox.resize(lines.nelements() + 1);
-
-// std::cout << "Preparing lines" << std::endl;
- prepare_lines(input.domain(), lines , blocks, rbbox);
-// io::pgm::save(blocks, "blocks.pgm");
-// std::cout << "Linking left" << std::endl;
- process_left_link(blocks, rbbox, lines , left);
-// std::cout << "Linking right" << std::endl;
- process_right_link(blocks, rbbox, lines , right);
-// std::cout << "Finalizing links" << std::endl;
- finalize_links(left, right, lines );
- // std::cout << "Finalizing merging" << std::endl;
- // finalize_line_merging(left, right, lines);
-// std::cout << "Extracting paragraphs" << std::endl;
- filter::paragraph_links(left, right, output, lines);
-
- paragraph_set<L> par_set = make::paragraph(output, right);
- return par_set;
- }
-}
diff --git a/scribo/scribo/toolchain/internal/content_in_doc_functor.hh b/scribo/scribo/toolchain/internal/content_in_doc_functor.hh
index 4f2c074..52ee5b0 100644
--- a/scribo/scribo/toolchain/internal/content_in_doc_functor.hh
+++ b/scribo/scribo/toolchain/internal/content_in_doc_functor.hh
@@ -63,7 +63,7 @@
# include <scribo/text/merging.hh>
# include <scribo/text/link_lines.hh>
-# include <scribo/text/paragraphs.hh>
+# include <scribo/text/extract_paragraphs.hh>
# include <scribo/make/debug_filename.hh>
@@ -541,7 +541,7 @@ namespace scribo
// scribo::paragraph_set<L> parset = scribo::make::paragraph(llinks);
scribo::paragraph_set<L>
- parset = extract_paragraphs(lines, doc.binary_image());
+ parset = text::extract_paragraphs(lines, doc.binary_image());
doc.set_paragraphs(parset);
on_progress();
diff --git a/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh b/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh
index 5e98f3e..4a74aef 100644
--- a/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh
+++ b/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh
@@ -74,9 +74,9 @@
# include <scribo/text/recognition.hh>
# endif // ! SCRIBO_NOCR
-# include <scribo/text/merging.hh>
+# include <scribo/text/merging_hdoc.hh>
# include <scribo/text/link_lines.hh>
-# include <scribo/text/paragraphs.hh>
+# include <scribo/text/paragraphs_hdoc.hh>
# include <scribo/make/debug_filename.hh>
@@ -461,7 +461,7 @@ namespace scribo
on_new_progress_label("Merging segmented lines");
- lines = scribo::text::merging(lines);
+ lines = scribo::text::merging_hdoc(lines);
//===== DEBUG =====
@@ -525,7 +525,8 @@ namespace scribo
on_new_progress_label("Extracting paragraphs");
scribo::paragraph_set<L>
- parset = extract_paragraphs(lines, doc.binary_image());
+ parset = scribo::text::extract_paragraphs_hdoc(lines,
+ doc.binary_image());
on_progress();
--
1.5.6.5
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "Olena, a generic and efficient image processing platform".
The branch fix-leak-remove_holder has been deleted
was a2c67cf6ac3577535d74469cece6919ed1e67c2e
-----------------------------------------------------------------------
a2c67cf6ac3577535d74469cece6919ed1e67c2e Add file.
-----------------------------------------------------------------------
hooks/post-receive
--
Olena, a generic and efficient image processing platform
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "Olena, a generic and efficient image processing platform".
The branch unstable/scribo has been updated
via 3a86ff37896bf5b5788b25efa97a2c7a8c700a5d (commit)
from 3368692f6ecf9e857f8443caa3f8d60da470a1f9 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
3a86ff3 Adding horizontal separators check in paragraphs construction
-----------------------------------------------------------------------
Summary of changes:
scribo/scribo/text/paragraphs.hh | 116 ++++++++++++++++++++++++++++++++++----
1 files changed, 105 insertions(+), 11 deletions(-)
hooks/post-receive
--
Olena, a generic and efficient image processing platform
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "Olena, a generic and efficient image processing platform".
The branch Sylvain has been updated
via a794c88fd084cd17f09ce4228c6b80f138f42f47 (commit)
from 9d2089bedcc47c6b7acbe24719b0f41e12d4c2da (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
a794c88 Modified weights in the choose function.
-----------------------------------------------------------------------
Summary of changes:
scribo/ChangeLog | 6 ++++++
scribo/scribo/inverse_video/choose.hh | 4 ++--
2 files changed, 8 insertions(+), 2 deletions(-)
hooks/post-receive
--
Olena, a generic and efficient image processing platform