Olena-patches
Threads by month
- ----- 2025 -----
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2024 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2023 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2022 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2021 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2020 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2019 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2018 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2017 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2016 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2015 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2014 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2013 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2012 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2011 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2010 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2009 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2008 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2007 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2006 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2005 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2004 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- 9625 discussions

03 May '11
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "Olena, a generic and efficient image processing platform".
The branch julien_temp_paragraphs has been updated
via 6de9f46259689a7e2e125e79da07c3bd7335d8be (commit)
from 8bae0713f5a4e26137e59a5cc6a3f2e947592f7d (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
6de9f46 Correction on line spacing
-----------------------------------------------------------------------
Summary of changes:
scribo/scribo/core/line_info.hh | 24 ++++++++++++------------
scribo/scribo/core/stats.hh | 4 ++--
scribo/scribo/text/merging.hh | 3 ++-
scribo/scribo/text/paragraphs.hh | 20 +++++++++-----------
4 files changed, 25 insertions(+), 26 deletions(-)
hooks/post-receive
--
Olena, a generic and efficient image processing platform
1
0
---
scribo/scribo/core/line_info.hh | 24 ++++++++++++------------
scribo/scribo/core/stats.hh | 4 ++--
scribo/scribo/text/merging.hh | 3 ++-
scribo/scribo/text/paragraphs.hh | 20 +++++++++-----------
4 files changed, 25 insertions(+), 26 deletions(-)
diff --git a/scribo/scribo/core/line_info.hh b/scribo/scribo/core/line_info.hh
index b44b379..a798e9b 100644
--- a/scribo/scribo/core/line_info.hh
+++ b/scribo/scribo/core/line_info.hh
@@ -1193,18 +1193,18 @@ namespace scribo
// incremented.
++used_comps;
- // // Compute boldness
- // boldness.take(comp_set(c).features().boldness);
- // sum2_boldness += mln::math::sqr<float>(comp_set(c).features().boldness);
-
- // // Compute color
- // color_red.take(comp_set(c).features().color.red());
- // color_green.take(comp_set(c).features().color.green());
- // color_blue.take(comp_set(c).features().color.blue());
-
- // sum2_red += mln::math::sqr<unsigned>(comp_set(c).features().color.red());
- // sum2_green += mln::math::sqr<unsigned>(comp_set(c).features().color.green());
- // sum2_blue += mln::math::sqr<unsigned>(comp_set(c).features().color.blue());
+ // Compute boldness
+ boldness.take(comp_set(c).features().boldness);
+ sum2_boldness += mln::math::sqr<float>(comp_set(c).features().boldness);
+
+ // Compute color
+ color_red.take(comp_set(c).features().color.red());
+ color_green.take(comp_set(c).features().color.green());
+ color_blue.take(comp_set(c).features().color.blue());
+
+ sum2_red += mln::math::sqr<unsigned>(comp_set(c).features().color.red());
+ sum2_green += mln::math::sqr<unsigned>(comp_set(c).features().color.green());
+ sum2_blue += mln::math::sqr<unsigned>(comp_set(c).features().color.blue());
// FIXME: we must guaranty here that the relationship is from
diff --git a/scribo/scribo/core/stats.hh b/scribo/scribo/core/stats.hh
index 570325c..bc24044 100644
--- a/scribo/scribo/core/stats.hh
+++ b/scribo/scribo/core/stats.hh
@@ -273,14 +273,14 @@ private:
&& left_distance <= std))
++cluster_index;
- clusters[i] = cluster_index;
+ clusters.push_back(cluster_index);
}
if (nelements > 1
&& data_[i] - data_[i - 1] > std)
++cluster_index;
- clusters[i] = cluster_index;
+ clusters.push_back(cluster_index);
clusters_.clear();
clusters_.reserve(cluster_index);
diff --git a/scribo/scribo/text/merging.hh b/scribo/scribo/text/merging.hh
index 8ee349b..453c6c3 100644
--- a/scribo/scribo/text/merging.hh
+++ b/scribo/scribo/text/merging.hh
@@ -460,7 +460,8 @@ namespace scribo
if (l_cur_height < l_ted_x_height
&& l_cur_height > 0.05f * l_ted_x_height
&& float(l_cur_width) / float(l_cur.card()) < l_ted.char_width()
- && dx < l_ted_cw)
+ && dx < l_ted_cw
+ && l_cur_pmin.row() < l_ted_pmax.row())
{
l_cur.update_type(line::Punctuation);
return true;
diff --git a/scribo/scribo/text/paragraphs.hh b/scribo/scribo/text/paragraphs.hh
index a3ff802..5cb253e 100644
--- a/scribo/scribo/text/paragraphs.hh
+++ b/scribo/scribo/text/paragraphs.hh
@@ -132,18 +132,16 @@ namespace scribo
{
// ror : right neighbor of the right neighbor
const value::int_u16 ror_nbh = right(right_nbh);
- const box2d& ror_line_bbox = lines(ror_nbh).bbox();
+ // const box2d& ror_line_bbox = lines(ror_nbh).bbox();
// If the current line has a ror
if (ror_nbh != right_nbh
&& output(ror_nbh) == right_nbh)
{
// Distance between the current line and the right neighbor
- const float right_distance =
- current_line_bbox.pcenter().row() - right_line_bbox.pcenter().row();
+ const float right_distance = lines(l).meanline() - lines(right_nbh).baseline();
// Distance between the right neighbor and the ror
- const float ror_distance =
- right_line_bbox.pcenter().row() - ror_line_bbox.pcenter().row();
+ const float ror_distance = lines(right_nbh).meanline() - lines(ror_nbh).baseline();
// ror x_height
const float ror_x_height = lines(ror_nbh).x_height();
@@ -168,7 +166,7 @@ namespace scribo
// If the distance between the two lines is greater than
// the minimum x height of the two lines then we cut the
// link between them
- if (distance > std::min(x_height, right_x_height)
+ if (distance > 2.0f * std::min(x_height, right_x_height)
&& output(right_nbh) == l)
{
output(right_nbh) = right_nbh;
@@ -207,12 +205,12 @@ namespace scribo
if (lol_nbh != left_nbh)
{
// Distance between the current line and its left neighbor
- const float left_distance =
- left_line_bbox.pcenter().row() - current_line_bbox.pcenter().row();
+ const float left_distance = lines(left_nbh).meanline() -
+ lines(l).baseline();
// Distance between the left neighbor and the left
// neighbor of its left neighbor
- const float lol_distance =
- lol_line_bbox.pcenter().row() - left_line_bbox.pcenter().row();
+ const float lol_distance = lines(lol_nbh).meanline() -
+ lines(left_nbh).baseline();
// lol x height
const float lol_x_height = lines(lol_nbh).x_height();
@@ -237,7 +235,7 @@ namespace scribo
// If the distance is greater than the min x height
// between the two lines
- if (distance > std::min(x_height, left_x_height))
+ if (distance > 2.0f * std::min(x_height, left_x_height))
{
output(l) = l;
continue;
--
1.5.6.5
1
0
---
scribo/scribo/core/line_info.hh | 31 ++++++++++++++++---------------
scribo/scribo/core/stats.hh | 4 ++--
scribo/scribo/text/merging.hh | 3 ++-
scribo/scribo/text/paragraphs.hh | 20 +++++++++-----------
4 files changed, 29 insertions(+), 29 deletions(-)
diff --git a/scribo/scribo/core/line_info.hh b/scribo/scribo/core/line_info.hh
index c683be4..949315a 100644
--- a/scribo/scribo/core/line_info.hh
+++ b/scribo/scribo/core/line_info.hh
@@ -1188,22 +1188,23 @@ namespace scribo
// incremented.
++used_comps;
+
// COMPUTE FEATURES DATA
- // if (comp_set(c).has_features())
- // {
- // // Compute boldness
- // boldness.take(comp_set(c).features().boldness);
- // sum2_boldness += mln::math::sqr<float>(comp_set(c).features().boldness);
-
- // // Compute color
- // color_red.take(comp_set(c).features().color.red());
- // color_green.take(comp_set(c).features().color.green());
- // color_blue.take(comp_set(c).features().color.blue());
-
- // sum2_red += mln::math::sqr<unsigned>(comp_set(c).features().color.red());
- // sum2_green += mln::math::sqr<unsigned>(comp_set(c).features().color.green());
- // sum2_blue += mln::math::sqr<unsigned>(comp_set(c).features().color.blue());
- // }
+ if (comp_set(c).has_features())
+ {
+ // Compute boldness
+ boldness.take(comp_set(c).features().boldness);
+ sum2_boldness += mln::math::sqr<float>(comp_set(c).features().boldness);
+
+ // Compute color
+ color_red.take(comp_set(c).features().color.red());
+ color_green.take(comp_set(c).features().color.green());
+ color_blue.take(comp_set(c).features().color.blue());
+
+ sum2_red += mln::math::sqr<unsigned>(comp_set(c).features().color.red());
+ sum2_green += mln::math::sqr<unsigned>(comp_set(c).features().color.green());
+ sum2_blue += mln::math::sqr<unsigned>(comp_set(c).features().color.blue());
+ }
// FIXME: we must guaranty here that the relationship is from
// right to left, otherwise, the space size computed between
diff --git a/scribo/scribo/core/stats.hh b/scribo/scribo/core/stats.hh
index 570325c..bc24044 100644
--- a/scribo/scribo/core/stats.hh
+++ b/scribo/scribo/core/stats.hh
@@ -273,14 +273,14 @@ private:
&& left_distance <= std))
++cluster_index;
- clusters[i] = cluster_index;
+ clusters.push_back(cluster_index);
}
if (nelements > 1
&& data_[i] - data_[i - 1] > std)
++cluster_index;
- clusters[i] = cluster_index;
+ clusters.push_back(cluster_index);
clusters_.clear();
clusters_.reserve(cluster_index);
diff --git a/scribo/scribo/text/merging.hh b/scribo/scribo/text/merging.hh
index 8ee349b..453c6c3 100644
--- a/scribo/scribo/text/merging.hh
+++ b/scribo/scribo/text/merging.hh
@@ -460,7 +460,8 @@ namespace scribo
if (l_cur_height < l_ted_x_height
&& l_cur_height > 0.05f * l_ted_x_height
&& float(l_cur_width) / float(l_cur.card()) < l_ted.char_width()
- && dx < l_ted_cw)
+ && dx < l_ted_cw
+ && l_cur_pmin.row() < l_ted_pmax.row())
{
l_cur.update_type(line::Punctuation);
return true;
diff --git a/scribo/scribo/text/paragraphs.hh b/scribo/scribo/text/paragraphs.hh
index a3ff802..5cb253e 100644
--- a/scribo/scribo/text/paragraphs.hh
+++ b/scribo/scribo/text/paragraphs.hh
@@ -132,18 +132,16 @@ namespace scribo
{
// ror : right neighbor of the right neighbor
const value::int_u16 ror_nbh = right(right_nbh);
- const box2d& ror_line_bbox = lines(ror_nbh).bbox();
+ // const box2d& ror_line_bbox = lines(ror_nbh).bbox();
// If the current line has a ror
if (ror_nbh != right_nbh
&& output(ror_nbh) == right_nbh)
{
// Distance between the current line and the right neighbor
- const float right_distance =
- current_line_bbox.pcenter().row() - right_line_bbox.pcenter().row();
+ const float right_distance = lines(l).meanline() - lines(right_nbh).baseline();
// Distance between the right neighbor and the ror
- const float ror_distance =
- right_line_bbox.pcenter().row() - ror_line_bbox.pcenter().row();
+ const float ror_distance = lines(right_nbh).meanline() - lines(ror_nbh).baseline();
// ror x_height
const float ror_x_height = lines(ror_nbh).x_height();
@@ -168,7 +166,7 @@ namespace scribo
// If the distance between the two lines is greater than
// the minimum x height of the two lines then we cut the
// link between them
- if (distance > std::min(x_height, right_x_height)
+ if (distance > 2.0f * std::min(x_height, right_x_height)
&& output(right_nbh) == l)
{
output(right_nbh) = right_nbh;
@@ -207,12 +205,12 @@ namespace scribo
if (lol_nbh != left_nbh)
{
// Distance between the current line and its left neighbor
- const float left_distance =
- left_line_bbox.pcenter().row() - current_line_bbox.pcenter().row();
+ const float left_distance = lines(left_nbh).meanline() -
+ lines(l).baseline();
// Distance between the left neighbor and the left
// neighbor of its left neighbor
- const float lol_distance =
- lol_line_bbox.pcenter().row() - left_line_bbox.pcenter().row();
+ const float lol_distance = lines(lol_nbh).meanline() -
+ lines(left_nbh).baseline();
// lol x height
const float lol_x_height = lines(lol_nbh).x_height();
@@ -237,7 +235,7 @@ namespace scribo
// If the distance is greater than the min x height
// between the two lines
- if (distance > std::min(x_height, left_x_height))
+ if (distance > 2.0f * std::min(x_height, left_x_height))
{
output(l) = l;
continue;
--
1.5.6.5
1
0

03 May '11
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "Olena, a generic and efficient image processing platform".
The branch unstable/scribo has been updated
via 397d9adad2d9d3854bb9f6b43fd3b0530af973e3 (commit)
via 25b4393efeaebd453f6c6f55616763076f145472 (commit)
via ff536c853e0777eec9610e8031c26ccfbf5dd791 (commit)
via 6404315b5301e09986e725b6cf33c5a15b62b887 (commit)
via 2fbfae871ca040fa80ac1470913c478fd59e6ad0 (commit)
via 41e20558341e32084c0c45a3590bcb75e91b6c6f (commit)
via 3f50eb758a0ec55dc4a498d10142906db32e4fe9 (commit)
via 7e736d6b5cd3431452a7814d9fb8d1d60f05584e (commit)
via 3ac6dd8d266f0303fd64775d59d37bd6fbf0cdc7 (commit)
via 1bcc1751c88f6a6e1e9850a5cbc0ef0650beff9c (commit)
via 78b802085aa8549923238090b701aa1654425ba9 (commit)
via d82fdf6b2d0142333ee5b365f6fba6cc47923f2e (commit)
via 52a1107eac0fb8d75c28dc12cb5ed9ee798d2ad0 (commit)
via 5b660b4e73e89805ebf3452c7257990922ce3a06 (commit)
via 52f8110a5258a88e6fb0d88ca4e97e24054a0270 (commit)
from f5a185dfd59d5ed9f804d92262899f0cada75631 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
No new revisions were added by this update.
-----------------------------------------------------------------------
Summary of changes:
scribo/ChangeLog | 190 +++++++++
scribo/scribo/core/component_features_data.hh | 24 +
scribo/scribo/core/component_info.hh | 17 +
scribo/scribo/core/component_set.hh | 36 +-
scribo/scribo/core/document.hh | 68 +++-
scribo/scribo/core/group_info.hh | 184 ++++++++
scribo/scribo/core/internal/sort_comp_ids.hh | 77 ++++
scribo/scribo/core/line_info.hh | 168 +++++---
scribo/scribo/core/line_links.hh | 12 +
scribo/scribo/core/line_set.hh | 66 ++--
scribo/scribo/core/object_groups.hh | 228 ++++++++---
scribo/scribo/core/object_links.hh | 81 +++-
scribo/scribo/core/paragraph_info.hh | 46 ++
scribo/scribo/core/paragraph_set.hh | 21 +
scribo/scribo/debug/bboxes_image.hh | 18 +-
scribo/scribo/debug/decision_image.hh | 8 +-
scribo/scribo/draw/bounding_box_links.hh | 16 +-
scribo/scribo/estim/object_groups_mean_width.hh | 99 +++++
scribo/scribo/estim/object_groups_v_thickness.hh | 106 -----
scribo/scribo/filter/object_groups_mean_width.hh | 99 +++++
scribo/scribo/filter/object_groups_size_ratio.hh | 24 +-
scribo/scribo/filter/object_groups_small.hh | 36 +--
scribo/scribo/filter/object_groups_v_thickness.hh | 99 -----
scribo/scribo/filter/object_groups_with_holes.hh | 2 +-
scribo/scribo/filter/object_links_bbox_overlap.hh | 6 +-
scribo/scribo/filter/object_links_bbox_ratio.hh | 10 +-
scribo/scribo/filter/object_links_left_aligned.hh | 19 +-
.../filter/object_links_non_aligned_simple.hh | 6 +-
scribo/scribo/filter/object_links_right_aligned.hh | 19 +-
scribo/scribo/io/xml/internal/full_xml_visitor.hh | 118 ++++--
scribo/scribo/io/xml/internal/save_image_to_xml.hh | 106 +++++
scribo/scribo/io/xml/load.hh | 164 +++++++-
scribo/scribo/make/text_components_image.hh | 8 +-
scribo/scribo/preprocessing/rotate_90.hh | 8 +-
scribo/scribo/primitive/extract/alignments.hh | 445 ++++++++++----------
.../primitive/extract/separators_nonvisible.hh | 229 +++--------
.../scribo/primitive/group/from_double_link_any.hh | 57 ++-
scribo/scribo/primitive/group/from_single_link.hh | 13 -
scribo/scribo/primitive/internal/is_link_valid.hh | 12 +-
.../primitive/link/internal/link_functor_base.hh | 2 +-
.../link/internal/link_several_dmax_base.hh | 10 +-
scribo/scribo/primitive/link/merge_double_link.hh | 17 +-
.../link/merge_double_link_closest_aligned.hh | 6 +-
.../toolchain/internal/content_in_doc_functor.hh | 54 ++--
scribo/scribo/toolchain/text_in_picture.hh | 10 +-
scribo/src/debug/show_paragraph_blocks.cc | 2 +
scribo/src/preprocessing/split_bg_fg.cc | 6 +-
scribo/tests/Makefile.am | 5 +
scribo/tests/binarization/Makefile.am | 13 +-
scribo/tests/binarization/sauvola.cc | 52 +++
scribo/tests/binarization/sauvola.ref.pbm | Bin 0 -> 32884 bytes
scribo/tests/binarization/sauvola_ms.cc | 53 +++
scribo/tests/binarization/sauvola_ms.ref.pbm | Bin 0 -> 32884 bytes
scribo/tests/core/Makefile.am | 8 +-
scribo/tests/core/object_groups.cc | 103 +++++
scribo/tests/data.hh.in | 3 +
scribo/tests/filter/Makefile.am | 28 ++-
scribo/tests/filter/object_groups_mean_width.cc | 73 ++++
scribo/tests/filter/object_groups_size_ratio.cc | 73 ++++
scribo/tests/filter/object_groups_small.cc | 79 ++++
scribo/tests/filter/object_groups_with_holes.cc | 73 ++++
scribo/tests/filter/object_links_bbox_h_ratio.cc | 69 +++
scribo/tests/filter/object_links_bbox_overlap.cc | 69 +++
scribo/tests/filter/object_links_bbox_w_ratio.cc | 69 +++
scribo/tests/filter/object_links_bottom_aligned.cc | 69 +++
scribo/tests/filter/object_links_center_aligned.cc | 70 +++
scribo/tests/filter/object_links_left_aligned.cc | 69 +++
scribo/tests/filter/object_links_right_aligned.cc | 69 +++
scribo/tests/filter/object_links_top_aligned.cc | 69 +++
scribo/tests/img/alignment_1.pbm | Bin 0 -> 12167 bytes
scribo/tests/img/alignment_1.png | Bin 3145 -> 0 bytes
scribo/tests/img/alignment_2.pbm | Bin 0 -> 12167 bytes
scribo/tests/img/alignment_2.png | Bin 3147 -> 0 bytes
scribo/tests/img/alignment_3.pbm | Bin 0 -> 12167 bytes
scribo/tests/img/alignment_3.png | Bin 3145 -> 0 bytes
scribo/tests/img/alignment_4.pbm | Bin 0 -> 12167 bytes
scribo/tests/img/alignment_4.png | Bin 3155 -> 0 bytes
scribo/tests/img/the_valleys.pbm | Bin 0 -> 1724 bytes
scribo/tests/primitive/extract/Makefile.am | 6 +
scribo/tests/primitive/extract/alignments.cc | 3 +-
scribo/tests/unit_test/cond_tests_qt | 1 +
scribo/tests/unit_test/unit-tests.mk | 16 +-
82 files changed, 3151 insertions(+), 1043 deletions(-)
create mode 100644 scribo/scribo/core/group_info.hh
create mode 100644 scribo/scribo/core/internal/sort_comp_ids.hh
create mode 100644 scribo/scribo/estim/object_groups_mean_width.hh
delete mode 100644 scribo/scribo/estim/object_groups_v_thickness.hh
create mode 100644 scribo/scribo/filter/object_groups_mean_width.hh
delete mode 100644 scribo/scribo/filter/object_groups_v_thickness.hh
create mode 100644 scribo/scribo/io/xml/internal/save_image_to_xml.hh
create mode 100644 scribo/tests/binarization/sauvola.cc
create mode 100644 scribo/tests/binarization/sauvola.ref.pbm
create mode 100644 scribo/tests/binarization/sauvola_ms.cc
create mode 100644 scribo/tests/binarization/sauvola_ms.ref.pbm
create mode 100644 scribo/tests/core/object_groups.cc
create mode 100644 scribo/tests/filter/object_groups_mean_width.cc
create mode 100644 scribo/tests/filter/object_groups_size_ratio.cc
create mode 100644 scribo/tests/filter/object_groups_small.cc
create mode 100644 scribo/tests/filter/object_groups_with_holes.cc
create mode 100644 scribo/tests/filter/object_links_bbox_h_ratio.cc
create mode 100644 scribo/tests/filter/object_links_bbox_overlap.cc
create mode 100644 scribo/tests/filter/object_links_bbox_w_ratio.cc
create mode 100644 scribo/tests/filter/object_links_bottom_aligned.cc
create mode 100644 scribo/tests/filter/object_links_center_aligned.cc
create mode 100644 scribo/tests/filter/object_links_left_aligned.cc
create mode 100644 scribo/tests/filter/object_links_right_aligned.cc
create mode 100644 scribo/tests/filter/object_links_top_aligned.cc
create mode 100644 scribo/tests/img/alignment_1.pbm
delete mode 100644 scribo/tests/img/alignment_1.png
create mode 100644 scribo/tests/img/alignment_2.pbm
delete mode 100644 scribo/tests/img/alignment_2.png
create mode 100644 scribo/tests/img/alignment_3.pbm
delete mode 100644 scribo/tests/img/alignment_3.png
create mode 100644 scribo/tests/img/alignment_4.pbm
delete mode 100644 scribo/tests/img/alignment_4.png
create mode 100644 scribo/tests/img/the_valleys.pbm
hooks/post-receive
--
Olena, a generic and efficient image processing platform
1
0

03 May '11
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "Olena, a generic and efficient image processing platform".
The branch julien_temp_paragraphs has been created
at 8bae0713f5a4e26137e59a5cc6a3f2e947592f7d (commit)
- Log -----------------------------------------------------------------
8bae071 Paragraphs extraction
-----------------------------------------------------------------------
hooks/post-receive
--
Olena, a generic and efficient image processing platform
1
0
---
scribo/scribo/core/line_info.hh | 199 +++++-
scribo/scribo/core/stats.hh | 320 +++++++++
scribo/scribo/text/look_like_text_lines.hh | 16 +
scribo/scribo/text/merging.hh | 306 ++++++---
scribo/scribo/text/paragraphs.hh | 1027 ++++++++++++++++++++++++++++
5 files changed, 1767 insertions(+), 101 deletions(-)
create mode 100644 scribo/scribo/core/stats.hh
create mode 100644 scribo/scribo/text/paragraphs.hh
diff --git a/scribo/scribo/core/line_info.hh b/scribo/scribo/core/line_info.hh
index 73d7856..b44b379 100644
--- a/scribo/scribo/core/line_info.hh
+++ b/scribo/scribo/core/line_info.hh
@@ -59,6 +59,9 @@
# include <scribo/core/concept/serializable.hh>
+// DEBUG
+
+# include <scribo/core/stats.hh>
namespace scribo
{
@@ -138,6 +141,9 @@ namespace scribo
// Line set holding this element.
line_set<L> holder_;
+ // DEBUG
+ stats< float > meanline_clusters_;
+ stats< float > baseline_clusters_;
};
@@ -279,7 +285,8 @@ namespace scribo
/// @}
-
+ bool chars_same_width() const;
+ unsigned get_first_char_height() const;
/// Force a new computation of statistics.
void force_stats_update();
@@ -307,6 +314,9 @@ namespace scribo
void update_components_type(component::Type type);
+ int compute_baseline();
+ int compute_meanline();
+
private: // Attributes
line_id_t id_;
mln::util::tracked_ptr<data_t> data_;
@@ -817,7 +827,7 @@ namespace scribo
int
line_info<L>::delta_of_line() const
{
- return char_width() + 2 * char_space();
+ return 2 * char_width() + 2 * char_space();
// FIXME: choose between:
// not enough: char_width + char_space
// too much: 2 * char_width
@@ -967,6 +977,145 @@ namespace scribo
}
template <typename L>
+ bool
+ line_info<L>::chars_same_width() const
+ {
+ // Only for the case of two-character words
+ if (card() == 2)
+ {
+ const component_set<L>& comp_set = data_->holder_.components();
+
+ const unsigned c1 = data_->components_(0);
+ const unsigned c2 = data_->components_(1);
+
+ if (data_->holder_.components()(c1).type() == component::Punctuation
+ || data_->holder_.components()(c2).type() == component::Punctuation)
+ return false;
+
+ const mln::box2d& bb1 = comp_set(c1).bbox();
+ const mln::box2d& bb2 = comp_set(c2).bbox();
+
+ const float w1 = bb1.width();
+ const float h1 = bb1.height();
+ const float w2 = bb2.width();
+ const float h2 = bb2.height();
+
+ const float space = std::max(bb1.pmin().col(), bb2.pmin().col()) -
+ std::min(bb1.pmax().col(), bb2.pmax().col());
+
+ const int dy = bb1.pmax().row() - bb2.pmax().row();
+
+ // The two characters must be distinct
+ if (space < 0)
+ return false;
+
+ if (// Approximately the same width
+ ((std::max(w1, w2) / std::min(w1, w2)) > 1.1f ||
+ // One character must not be smaller than the space between
+ // the two characters
+ (w1 < space || w2 < space))
+ // If the two characters have a different width they must also
+ // have a different height
+ && not (std::max(h1, h2) / std::min(h1, h2) <= 1.5f))
+ return false;
+
+ // Approximately aligned on baseline
+ if (std::abs(dy) > 10)
+ return false;
+
+ return true;
+ }
+
+ return false;
+ }
+
+ template< typename L >
+ unsigned
+ line_info<L>::get_first_char_height() const
+ {
+ const component_set<L>& comp_set = data_->holder_.components();
+ const unsigned c1 = data_->components_(0);
+ const mln::box2d& bb1 = comp_set(c1).bbox();
+
+ return bb1.height();
+ }
+
+ template <typename L>
+ int
+ line_info<L>::compute_baseline()
+ {
+ const unsigned nelements = data_->baseline_clusters_.nelements();
+
+ if (nelements == 2)
+ return data_->baseline_clusters_.mean();
+
+ mln::util::array< cluster_stats< float > >& clusters_b = data_->baseline_clusters_.clusters();
+
+ unsigned index = 0;
+ float min_base = 0.0f;
+ const unsigned clusters_b_nelements = clusters_b.nelements();
+
+ for (unsigned i = 0; i < clusters_b_nelements; ++i)
+ {
+ const unsigned clusters_b_i_nelements = clusters_b[i].nelements();
+
+ if (clusters_b_i_nelements >= min_base * 2.0f)
+ {
+ min_base = clusters_b_i_nelements;
+ index = i;
+ }
+ else if (clusters_b_i_nelements >= 0.5f * min_base)
+ {
+ if (clusters_b_i_nelements > 1 &&
+ clusters_b[index].median() > clusters_b[i].median())
+ {
+ if (clusters_b_i_nelements > min_base)
+ min_base = clusters_b_i_nelements;
+ index = i;
+ }
+ }
+ }
+
+ if (clusters_b[index].nelements() <= 2 && nelements <= 5)
+ return data_->baseline_clusters_.mean();
+
+ return clusters_b[index].median();
+ }
+
+ template <typename L>
+ int
+ line_info<L>::compute_meanline()
+ {
+ mln::util::array< cluster_stats< float > >& clusters_m = data_->meanline_clusters_.clusters();
+
+ unsigned index = 0;
+ float max_mean = 0.0f;
+ const unsigned clusters_m_nelements = clusters_m.nelements();
+
+ for (unsigned i = 0; i < clusters_m_nelements; ++i)
+ {
+ const unsigned clusters_m_i_nelements = clusters_m[i].nelements();
+
+ if (clusters_m_i_nelements >= max_mean * 2.0f)
+ {
+ max_mean = clusters_m_i_nelements;
+ index = i;
+ }
+ else if (clusters_m_i_nelements >= 0.5f * max_mean)
+ {
+ if (clusters_m[index].median() < clusters_m[i].median())
+ {
+ if (clusters_m_i_nelements > max_mean)
+ max_mean = clusters_m_i_nelements;
+ index = i;
+ }
+ }
+ }
+
+ return clusters_m[index].median();
+ }
+
+ template <typename L>
void
line_info<L>::force_stats_update()
{
@@ -977,8 +1126,8 @@ namespace scribo
typedef mln::value::int_u<12> median_data_t;
typedef mln::accu::stat::median_h<median_data_t> median_t;
median_t
- meanline,
- baseline,
+ // meanline,
+ // baseline,
char_space,
char_width;
@@ -1002,6 +1151,10 @@ namespace scribo
mln::def::coord ref_line = mln_max(mln::def::coord);
+ // DEBUG
+ data_->baseline_clusters_.reset();
+ data_->meanline_clusters_.reset();
+
// Find a reference line to compute baselines and other attributes.
// Workaround to avoid overflow with int_u<12> in median accumulators.
//
@@ -1040,18 +1193,18 @@ namespace scribo
// incremented.
++used_comps;
- // Compute boldness
- boldness.take(comp_set(c).features().boldness);
- sum2_boldness += mln::math::sqr<float>(comp_set(c).features().boldness);
+ // // Compute boldness
+ // boldness.take(comp_set(c).features().boldness);
+ // sum2_boldness += mln::math::sqr<float>(comp_set(c).features().boldness);
- // Compute color
- color_red.take(comp_set(c).features().color.red());
- color_green.take(comp_set(c).features().color.green());
- color_blue.take(comp_set(c).features().color.blue());
+ // // Compute color
+ // color_red.take(comp_set(c).features().color.red());
+ // color_green.take(comp_set(c).features().color.green());
+ // color_blue.take(comp_set(c).features().color.blue());
- sum2_red += mln::math::sqr<unsigned>(comp_set(c).features().color.red());
- sum2_green += mln::math::sqr<unsigned>(comp_set(c).features().color.green());
- sum2_blue += mln::math::sqr<unsigned>(comp_set(c).features().color.blue());
+ // sum2_red += mln::math::sqr<unsigned>(comp_set(c).features().color.red());
+ // sum2_green += mln::math::sqr<unsigned>(comp_set(c).features().color.green());
+ // sum2_blue += mln::math::sqr<unsigned>(comp_set(c).features().color.blue());
// FIXME: we must guaranty here that the relationship is from
@@ -1086,12 +1239,14 @@ namespace scribo
// Meanline (compute an absolute value, from the top left corner
// of the highest character bounding box, excluding
// punctuation).
- meanline.take(bb.pmin().row() - ref_line);
+ // meanline.take(bb.pmin().row() - ref_line);
+ data_->meanline_clusters_.take(bb.pmin().row());
// Baseline (compute an absolute value, from the top left corner
// of the highest character bounding box, excluding
// punctuation).
- baseline.take(bb.pmax().row() - ref_line);
+ // baseline.take(bb.pmax().row() - ref_line);
+ data_->baseline_clusters_.take(bb.pmax().row());
}
// Finalization
@@ -1143,12 +1298,14 @@ namespace scribo
else
data_->char_width_ = char_width.to_result();
- mln::def::coord
- absolute_baseline_r = baseline.to_result() + ref_line,
- absolute_meanline_r = meanline.to_result() + ref_line;
+ // // mln::def::coord
+ // // absolute_baseline_r = baseline.to_result() + ref_line,
+ // // absolute_meanline_r = meanline.to_result() + ref_line;
- data_->baseline_ = absolute_baseline_r;
- data_->meanline_ = absolute_meanline_r;
+ // data_->baseline_ = absolute_baseline_r;
+ // data_->meanline_ = absolute_meanline_r;
+ data_->baseline_ = compute_baseline();
+ data_->meanline_ = compute_meanline();
data_->x_height_ = data_->baseline_ - data_->meanline_ + 1;
data_->d_height_ = data_->baseline_ - bbox.to_result().pmax().row();
data_->a_height_ = data_->baseline_ - bbox.to_result().pmin().row() + 1;
diff --git a/scribo/scribo/core/stats.hh b/scribo/scribo/core/stats.hh
new file mode 100644
index 0000000..570325c
--- /dev/null
+++ b/scribo/scribo/core/stats.hh
@@ -0,0 +1,320 @@
+#ifndef STATS_HH_
+# define STATS_HH_
+
+# include <vector>
+# include <algorithm>
+
+# include <mln/util/array.hh>
+
+using namespace mln;
+
+//---------------------------------------------------------------------------
+// compare_values
+//---------------------------------------------------------------------------
+
+template< typename T >
+struct compare_values
+{
+ bool operator() (const T& lhs,
+ const T& rhs)
+ {
+ return (lhs < rhs);
+ }
+};
+
+
+//---------------------------------------------------------------------------
+// cluster_stats
+//---------------------------------------------------------------------------
+
+template< typename T >
+class cluster_stats
+{
+public:
+ cluster_stats()
+ : mean_needs_update_(false), median_needs_update_(false),
+ variance_needs_update_(false), std_needs_update_(false), size_(0)
+ {
+ }
+
+ cluster_stats(const unsigned size)
+ : mean_needs_update_(false), median_needs_update_(false),
+ variance_needs_update_(false), std_needs_update_(false), size_(0)
+ {
+ data_.reserve(size);
+ }
+
+ void reset()
+ {
+ std::vector< T >& data = data_.hook_std_vector_();
+ data.clear();
+
+ size_ = 0;
+ needs_update();
+ }
+
+ void take(const T& value)
+ {
+ if (not size_)
+ {
+ min_ = value;
+ max_ = value;
+ }
+ else
+ {
+ if (value < min_)
+ min_ = value;
+ else if (value > max_)
+ max_ = value;
+ }
+
+ ++size_;
+ data_.append(value);
+ needs_update();
+ }
+
+ T mean()
+ {
+ if (mean_needs_update_)
+ {
+ mean_ = 0;
+
+ for (unsigned i = 0; i < size_; ++i)
+ mean_ += data_[i];
+
+ mean_ /= size_;
+ mean_needs_update_ = false;
+ }
+
+ return mean_;
+ }
+
+ T median()
+ {
+ if (median_needs_update_)
+ {
+ std::vector< T >& data = data_.hook_std_vector_();
+ std::sort(data.begin(), data.end(), compare_values< T >());
+
+ median_ = data[(size_ - 1) >> 1];
+ median_needs_update_ = false;
+ }
+
+ return median_;
+ }
+
+ T variance()
+ {
+ if (variance_needs_update_)
+ {
+ mean();
+ variance_ = 0;
+
+ for (unsigned i = 0; i < size_; ++i)
+ {
+ const T tmp = mean_ - data_[i];
+
+ variance_ += (tmp * tmp);
+ }
+
+ variance_ /= size_;
+ std_ = sqrt(variance_);
+
+ variance_needs_update_ = false;
+ std_needs_update_ = false;
+ }
+
+ return variance_;
+ }
+
+ T standard_deviation()
+ {
+ if (std_needs_update_)
+ variance();
+
+ return std_;
+ }
+
+ T min() { return min_; }
+ T max() { return max_; }
+
+ void sort()
+ {
+ std::vector< T >& data = data_.hook_std_vector_();
+ std::sort(data.begin(), data.end(), compare_values< T >());
+ }
+
+ unsigned nelements() { return size_; }
+
+ T operator[] (const unsigned index)
+ {
+ return data_[index];
+ }
+
+private:
+ void needs_update()
+ {
+ mean_needs_update_ = true;
+ median_needs_update_ = true;
+ variance_needs_update_ = true;
+ std_needs_update_ = true;
+ }
+
+private:
+ bool mean_needs_update_ : 1;
+ bool median_needs_update_ : 1;
+ bool variance_needs_update_ : 1;
+ bool std_needs_update_ : 1;
+ T mean_;
+ T median_;
+ T min_;
+ T max_;
+ T variance_;
+ T std_;
+ unsigned size_;
+ util::array< T > data_;
+};
+
+//---------------------------------------------------------------------------
+// stats
+//---------------------------------------------------------------------------
+
+template< typename T >
+class stats
+{
+public:
+ stats()
+ : clusters_need_update_(true), data_sorted_(false)
+ {
+ }
+
+ stats(const int size)
+ : clusters_need_update_(true), data_sorted_(false), data_(size)
+ {
+ }
+
+ void reset()
+ {
+ data_.reset();
+ std::vector< cluster_stats< T > >& clusters = clusters_.hook_std_vector_();
+ clusters.clear();
+ data_sorted_ = false;
+ clusters_need_update_ = true;
+ }
+
+ void take(const T& value)
+ {
+ data_.take(value);
+ clusters_need_update_ = true;
+ }
+
+ T mean()
+ {
+ return data_.mean();
+ }
+
+ T median()
+ {
+ return data_.median();
+ }
+
+ T variance()
+ {
+ return data_.variance();
+ }
+
+ T standard_deviation()
+ {
+ return data_.standard_deviation();
+ }
+
+ T min() { return data_.min(); }
+ T max() { return data_.max(); }
+
+ unsigned nelements() { return data_.nelements(); }
+
+ util::array< cluster_stats< T > >& clusters()
+ {
+ if (clusters_need_update_)
+ {
+ compute_clusters();
+ clusters_need_update_ = false;
+ }
+
+ return clusters_;
+ }
+
+private:
+ void compute_clusters()
+ {
+ std::vector< unsigned > clusters;
+ unsigned cluster_index = 1;
+
+ clusters.reserve(data_.nelements());
+
+ if (not data_sorted_)
+ {
+ data_.sort();
+ data_sorted_ = true;
+ }
+
+ unsigned i = 0;
+ const unsigned nelements = data_.nelements();
+
+ clusters[0] = cluster_index;
+ const T std = data_.standard_deviation();
+
+ for (i = 1; i < nelements - 1; ++i)
+ {
+ const T left_distance = data_[i] - data_[i - 1];
+ const T right_distance = data_[i + 1] - data_[i];
+
+ if (not ((left_distance <= 2 || left_distance < right_distance)
+ && left_distance <= std))
+ ++cluster_index;
+
+ clusters[i] = cluster_index;
+ }
+
+ if (nelements > 1
+ && data_[i] - data_[i - 1] > std)
+ ++cluster_index;
+
+ clusters[i] = cluster_index;
+
+ clusters_.clear();
+ clusters_.reserve(cluster_index);
+ cluster_index = 1;
+
+ i = 0;
+ while (i < nelements)
+ {
+ unsigned tmp = i;
+
+ while (tmp < nelements && clusters[tmp] == clusters[i])
+ ++tmp;
+
+ cluster_stats< T > cluster(tmp - i);
+
+ tmp = i;
+ while (tmp < nelements && clusters[tmp] == clusters[i])
+ {
+ cluster.take(data_[tmp]);
+ ++tmp;
+ }
+
+ clusters_.append(cluster);
+
+ i = tmp;
+ ++cluster_index;
+ }
+ }
+
+private:
+ bool clusters_need_update_ : 1;
+ bool data_sorted_ : 1;
+ cluster_stats< T > data_;
+ util::array< cluster_stats< T > > clusters_;
+};
+
+#endif /* STATS_HH_ */
diff --git a/scribo/scribo/text/look_like_text_lines.hh b/scribo/scribo/text/look_like_text_lines.hh
index 2ced7ce..80d9995 100644
--- a/scribo/scribo/text/look_like_text_lines.hh
+++ b/scribo/scribo/text/look_like_text_lines.hh
@@ -64,6 +64,22 @@ namespace scribo
inline
bool looks_like_a_text_line(const scribo::line_info<L>& l)
{
+ // Special case for two-letter words
+ if (l.card() == 2)
+ {
+ const float ratio = (float) l.bbox().width() / l.bbox().height();
+
+ if (// Minimal width / height ratio
+ ratio > 0.4f && ratio < 2.0f
+ // Minimal height
+ && l.bbox().height() >= 15
+ // Characters must have approximately the same width
+ && l.chars_same_width())
+ {
+ return true;
+ }
+ }
+
return
l.card() >= 3 // at least 3 components
&& l.bbox().height() > 10 // and minimal height
diff --git a/scribo/scribo/text/merging.hh b/scribo/scribo/text/merging.hh
index 26e2da7..8ee349b 100644
--- a/scribo/scribo/text/merging.hh
+++ b/scribo/scribo/text/merging.hh
@@ -188,21 +188,24 @@ namespace scribo
swap_ordering(l1, l2);
parent[l2] = l1; // The smallest label value is root.
- if (lines(l2).card() > lines(l1).card())
+ line_info<L>& l1_info = lines(l1);
+ line_info<L>& l2_info = lines(l2);
+
+ if (l2_info.card() > l1_info.card())
{
// we transfer data from the largest item to the root one.
- scribo::line_info<L> tmp = lines(l1);
- std::swap(lines(l1), lines(l2));
- lines(l1).fast_merge(tmp);
+ scribo::line_info<L> tmp = l1_info;
+ std::swap(l1_info, l2_info);
+ l1_info.fast_merge(tmp);
// We must set manually the tag for lines(l2) since it is
// not used directly in merge process so its tag cannot be
// updated automatically.
- lines(l2).update_tag(line::Merged);
- lines(l2).set_hidden(true);
+ l2_info.update_tag(line::Merged);
+ l2_info.set_hidden(true);
}
else
- lines(l1).fast_merge(lines(l2));
+ l1_info.fast_merge(l2_info);
// l1's tag is automatically set to line::Needs_Precise_Stats_Update
// l2's tag is automatically set to line::Merged
@@ -226,34 +229,57 @@ namespace scribo
bool between_separators(const scribo::line_info<L>& l1,
const scribo::line_info<L>& l2)
{
- // No separators found in image.
+ // No separators found in image.
mln_precondition(l1.holder().components().has_separators());
- unsigned
- col1 = l1.bbox().pcenter().col(),
- col2 = l2.bbox().pcenter().col();
+ const box2d& l1_bbox = l1.bbox();
+ const box2d& l2_bbox = l2.bbox();
+
+ const unsigned
+ col1 = l1_bbox.pcenter().col(),
+ col2 = l2_bbox.pcenter().col();
const mln_ch_value(L, bool)&
separators = l1.holder().components().separators();
+ // Checking for separators starting from 1 / 4, 3/ 4 and the
+ // center of the box
typedef const bool* sep_ptr_t;
- sep_ptr_t sep_ptr, end;
+ sep_ptr_t sep_ptr, sep_ptr_top, sep_ptr_bottom, end;
if (col1 < col2)
{
- sep_ptr = &separators(l1.bbox().pcenter());
+ const unsigned quarter =
+ ((l1_bbox.pcenter().row() - l1_bbox.pmin().row()) >> 1);
+
+ sep_ptr = &separators(l1_bbox.pcenter());
+ sep_ptr_top = &separators(point2d(l1_bbox.pmin().row() + quarter,
+ l1_bbox.pcenter().col()));
+ sep_ptr_bottom = &separators(point2d(l1_bbox.pmax().row() - quarter,
+ l1_bbox.pcenter().col()));
end = sep_ptr + col2 - col1;
}
else
{
- sep_ptr = &separators(l2.bbox().pcenter());
+ const unsigned quarter =
+ ((l2_bbox.pcenter().row() - l2_bbox.pmin().row()) >> 1);
+
+ sep_ptr = &separators(l2_bbox.pcenter());
+ sep_ptr_top = &separators(point2d(l2_bbox.pmin().row() + quarter,
+ l2_bbox.pcenter().col()));
+ sep_ptr_bottom = &separators(point2d(l2_bbox.pmax().row() - quarter,
+ l2_bbox.pcenter().col()));
end = sep_ptr + col1 - col2;
}
// If sep_ptr is true, then a separator is reached.
- while (!*sep_ptr && sep_ptr != end)
+ while (!*sep_ptr && !*sep_ptr_top && !*sep_ptr_bottom && sep_ptr != end)
+ {
++sep_ptr;
+ ++sep_ptr_top;
+ ++sep_ptr_bottom;
+ }
- return *sep_ptr;
+ return (*sep_ptr || *sep_ptr_top || *sep_ptr_bottom);
}
@@ -266,16 +292,78 @@ namespace scribo
*/
template <typename L>
- bool lines_can_merge(const scribo::line_info<L>& l1,
+ bool lines_can_merge(scribo::line_info<L>& l1,
const scribo::line_info<L>& l2)
{
// Parameters.
- const float x_ratio_max = 1.7, baseline_delta_max = 3;
+ const float x_ratio_max = 1.7f;
+ const float baseline_delta_max =
+ 0.5f * std::min(l1.x_height(), l2.x_height());
+
+ const box2d& l1_bbox = l1.bbox();
+ const box2d& l2_bbox = l2.bbox();
+
+ const point2d& l1_pmin = l1_bbox.pmin();
+ const point2d& l2_pmin = l2_bbox.pmin();
+ const point2d& l1_pmax = l1_bbox.pmax();
+ const point2d& l2_pmax = l2_bbox.pmax();
+
+ const bool l1_has_separators = l1.holder().components().has_separators();
+ const bool l1_l2_between_separators = (l1_has_separators) ?
+ between_separators(l1, l2) : false;
+ const float l_ted_cw = l2.char_width();
+
+ const float dx = std::max(l1_pmin.col(), l2_pmin.col())
+ - std::min(l1_pmax.col(), l2_pmax.col());
+ const float dy = std::max(l1_pmin.row(), l2_pmin.row())
+ - std::min(l1_pmax.row(), l2_pmax.row());
+
+ // Particular case of "
+ {
+ if (// Must have 2 characters
+ (l1.card() == 2
+ // The box height must be smaller than the touched line x height
+ && l1_bbox.height() < l2.x_height())
+ // The line must be vertically and horizontally close to
+ // the touched line
+ && (dx < l_ted_cw && dy < 0)
+ // No separator between the two lines
+ && not (l1_l2_between_separators))
+ {
+ // Line is then considered as punctuation
+ l1.update_type(line::Punctuation);
+ return true;
+ }
+ }
+
+ // Particular case like merging between a line and [5]
+ {
+ const mln::def::coord
+ top_row_l2 = l2_pmin.row(),
+ top_row_l1 = l1_pmin.row(),
+ bot_row = l2_pmax.row();
+ const float x1 = l1.x_height(), x2 = l2.x_height();
+ const float x_ratio = std::max(x1, x2) / std::min(x1, x2);
+
+ if (// No separator
+ !l1_l2_between_separators
+ // The x height ration must be lower than 2
+ && (x_ratio < 2.0f)
+ // Baseline alignment
+ && (std::abs(bot_row - l1.baseline()) < baseline_delta_max)
+ // The top of the boxes must be aligned
+ && (std::abs(top_row_l2 - top_row_l1) < 5)
+ // Distance between the line and the touched line.
+ && dx < 5.0f * l_ted_cw)
+ {
+ return true;
+ }
+ }
// Similarity of x_height.
{
- float x1 = l1.x_height(), x2 = l2.x_height();
- float x_ratio = std::max(x1, x2) / std::min(x1, x2);
+ const float x1 = l1.x_height(), x2 = l2.x_height();
+ const float x_ratio = std::max(x1, x2) / std::min(x1, x2);
if (x_ratio > x_ratio_max)
return false;
}
@@ -287,22 +375,21 @@ namespace scribo
}
// left / right
- unsigned
- col1 = l1.bbox().pcenter().col(),
- col2 = l2.bbox().pcenter().col();
+ const unsigned
+ col1 = l1_bbox.pcenter().col(),
+ col2 = l2_bbox.pcenter().col();
if (col1 < col2)
{
- if ((col1 + l1.bbox().width() / 4) >= (col2 - l2.bbox().width() / 4))
+ if ((col1 + l1_bbox.width() / 4) >= (col2 - l2_bbox.width() / 4))
return false;
}
else
- if ((col2 + l2.bbox().width() / 4) >= (col1 - l1.bbox().width() / 4))
+ if ((col2 + l2_bbox.width() / 4) >= (col1 - l1_bbox.width() / 4))
return false;
-
// Check that there is no separator in between.
- if (l1.holder().components().has_separators())
- return ! between_separators(l1, l2);
+ if (l1_has_separators)
+ return ! l1_l2_between_separators;
return true;
}
@@ -310,14 +397,14 @@ namespace scribo
- template <typename L>
- int horizontal_distance(const scribo::line_info<L>& l1,
- const scribo::line_info<L>& l2)
+ inline
+ int horizontal_distance(const box2d& l1,
+ const box2d& l2)
{
- if (l1.bbox().pcenter().col() < l2.bbox().pcenter().col())
- return l2.bbox().pmin().col() - l1.bbox().pmax().col();
+ if (l1.pcenter().col() < l2.pcenter().col())
+ return l2.pmin().col() - l1.pmax().col();
else
- return l1.bbox().pmin().col() - l2.bbox().pmax().col();
+ return l1.pmin().col() - l2.pmax().col();
}
@@ -339,7 +426,7 @@ namespace scribo
*/
template <typename L>
- bool non_text_and_text_can_merge(const scribo::line_info<L>& l_cur, // current
+ bool non_text_and_text_can_merge(scribo::line_info<L>& l_cur, // current
const scribo::line_info<L>& l_ted) // touched
{
if (l_cur.type() == line::Text || l_ted.type() != line::Text)
@@ -353,22 +440,42 @@ namespace scribo
&& between_separators(l_cur, l_ted))
return false;
+ const box2d& l_cur_bbox = l_cur.bbox();
+ const box2d& l_ted_bbox = l_ted.bbox();
+
+ const point2d& l_cur_pmin = l_cur_bbox.pmin();
+ const point2d& l_ted_pmin = l_ted_bbox.pmin();
+ const point2d& l_cur_pmax = l_cur_bbox.pmax();
+ const point2d& l_ted_pmax = l_ted_bbox.pmax();
+
+ const float dx = std::max(l_cur_pmin.col(), l_ted_pmin.col())
+ - std::min(l_cur_pmax.col(), l_ted_pmax.col());
+ const float l_ted_cw = l_ted.char_width();
+ const float l_ted_x_height = l_ted.x_height();
+
+ const unsigned l_cur_height = l_cur_bbox.height();
+ const unsigned l_cur_width = l_cur_bbox.width();
// General case (for tiny components like --> ',:."; <--):
- if (l_cur.bbox().height() < l_ted.x_height()
- && float(l_cur.bbox().width()) / float(l_cur.card()) < l_ted.char_width())
+ if (l_cur_height < l_ted_x_height
+ && l_cur_height > 0.05f * l_ted_x_height
+ && float(l_cur_width) / float(l_cur.card()) < l_ted.char_width()
+ && dx < l_ted_cw)
+ {
+ l_cur.update_type(line::Punctuation);
return true;
-
+ }
// Special case for '---':
if (// small height:
- l_cur.bbox().height() < l_ted.x_height()
+ l_cur_height < l_ted_x_height
// // not so long width:
- && l_cur.bbox().width() < 5 * l_ted.char_width()
+ && l_cur_width > 0.8 * l_ted_cw
+ && l_cur_width < 5 * l_ted_cw
// align with the 'x' center:
&& std::abs((l_ted.baseline() + l_ted.meanline()) / 2 - l_cur.bbox().pcenter().row()) < 7
// tiny spacing:
- && unsigned(horizontal_distance(l_cur, l_ted)) < l_ted.char_width()
+ && unsigned(horizontal_distance(l_cur_bbox, l_ted_bbox)) < 2 * l_ted_cw
)
{
return true;
@@ -378,10 +485,11 @@ namespace scribo
// Special case
// Looking for alignement.
- mln::def::coord
+ const mln::def::coord
top_row = l_cur.bbox().pmin().row(),
bot_row = l_cur.bbox().pmax().row();
+ const box2d& l_ted_ebbox = l_ted.ebbox();
// std::cout << "top_row = " << top_row << " - bot_row = " << bot_row << std::endl;
// std::cout << std::abs(bot_row - l_ted.baseline())
@@ -402,10 +510,11 @@ namespace scribo
// << std::endl;
if ((std::abs(bot_row - l_ted.baseline()) < 5
- || std::abs(bot_row - l_ted.ebbox().pmax().row()) < 5)
+ || std::abs(bot_row - l_ted_ebbox.pmax().row()) < 5)
&&
(std::abs(top_row - l_ted.meanline()) < 5
- || std::abs(top_row - l_ted.ebbox().pmin().row()) < 5))
+ || std::abs(top_row - l_ted_ebbox.pmin().row()) < 5)
+ && dx < 5.0f * l_ted_cw)
{
return true;
}
@@ -536,35 +645,33 @@ namespace scribo
if (parent[l] != l) // not a root, so has already merged, thus ignore it
continue;
- box2d b = lines(l).bbox();
+ const box2d& b = lines(l).bbox();
- unsigned tl, tr, ml, mc, mr, bl, br;
+ // unsigned tl, tr, ml, mc, mr, bl, br;
+
+ const box2d& b_ = lines(l).ebbox();
+
+ /*
+ tl tr
+ x---------------x
+ | |
+ | mc |
+ ml x x x mr
+ | |
+ | |
+ x---------------x
+ bl br
+
+ */
- {
- box2d b_ = lines(l).ebbox();
-
- /*
- tl tr
- x---------------x
- | |
- | mc |
- ml x x x mr
- | |
- | |
- x---------------x
- bl br
-
- */
-
-
- tl = billboard(b_.pmin());
- tr = billboard.at_(b_.pmin().row(), b_.pmax().col());
- ml = billboard.at_(b_.pcenter().row(), b_.pmin().col());
- mc = billboard.at_(b_.pcenter().row(), b_.pcenter().col());
- mr = billboard.at_(b_.pcenter().row(), b_.pmax().col());
- bl = billboard.at_(b_.pmax().row(), b_.pmin().col());
- br = billboard(b_.pmax());
- }
+
+ const unsigned tl = billboard(b_.pmin());
+ const unsigned tr = billboard.at_(b_.pmin().row(), b_.pmax().col());
+ const unsigned ml = billboard.at_(b_.pcenter().row(), b_.pmin().col());
+ const unsigned mc = billboard.at_(b_.pcenter().row(), b_.pcenter().col());
+ const unsigned mr = billboard.at_(b_.pcenter().row(), b_.pmax().col());
+ const unsigned bl = billboard.at_(b_.pmax().row(), b_.pmin().col());
+ const unsigned br = billboard(b_.pmax());
typedef std::set<unsigned> set_t;
std::set<unsigned> labels;
@@ -596,11 +703,47 @@ namespace scribo
{
// Main case: it is an "included" box (falling in an already drawn box)
- if (lines(l).type() == line::Text) // the current object IS a text line
+ const line_info<L>& l_info = lines(l);
+ const line_info<L>& mc_info = lines(mc);
+
+ if (l_info.type() == line::Text) // the current object IS a text line
{
- if (lines(mc).type() == line::Text) // included in a text line => weird
+ if (mc_info.type() == line::Text) // included in a text line => weird
{
++count_txtline_IN_txtline;
+
+ // Particular case of "
+ // {
+ // if ((lines(l).card() == 2 &&
+ // lines(l).bbox().height() < lines(mc).x_height()) &&
+ // not (lines(l).holder().components().has_separators()
+ // && between_separators(lines(l),
+ // lines(mc))))
+
+ const box2d& l_bbox = l_info.bbox();
+ const box2d& mc_bbox = mc_info.bbox();
+
+ const point2d& l_pmin = l_bbox.pmin();
+ const point2d& mc_pmin = mc_bbox.pmin();
+ const point2d& l_pmax = l_bbox.pmax();
+ const point2d& mc_pmax = mc_bbox.pmax();
+
+ const float dx = std::max(l_pmin.col(), mc_pmin.col())
+ - std::min(l_pmax.col(), mc_pmax.col());
+ const float dy = std::max(l_pmin.row(), mc_pmin.row())
+ - std::min(l_pmax.row(), mc_pmax.row());
+ const float l_ted_cw = mc_info.char_width();
+
+ // We accept a line included into another only if it
+ // is horizontally close to the line's bbox and
+ // vertically aligned
+ // Obviously no separators between the two lines
+ if (dx < l_ted_cw && dy < 0
+ && not (l_info.holder().components().has_separators()
+ && between_separators(l_info, mc_info)))
+ l = do_union(lines, l, mc, parent);
+ // }
+
// std::cout << "weird: inclusion of a txt_line in a txt_line!" << std::endl;
/// Merge is perform if the current line is a
@@ -649,15 +792,15 @@ namespace scribo
// could be noise or garbage... So adding new
// criterions could fix this issue.
//
- //if (!non_text_and_text_can_merge(lines(l), lines(mc)))
- // continue;
+ if (!non_text_and_text_can_merge(lines(l), lines(mc)))
+ continue;
// Avoid the case when a large title ebbox overlap
// with a text column. In that case, the title may
// merge with punctuation from the text.
- if (lines(l).holder().components().has_separators()
- && between_separators(lines(l), lines(mc)))
- continue;
+ // if (lines(l).holder().components().has_separators()
+ // && between_separators(lines(l), lines(mc)))
+ // continue;
// Mark current line as punctuation.
lines(l).update_type(line::Punctuation);
@@ -832,9 +975,12 @@ namespace scribo
bool operator()(const scribo::line_id_t& l1, const scribo::line_id_t& l2) const
{
- if (lines_(l1).bbox().nsites() == lines_(l2).bbox().nsites())
+ const unsigned l1_nsites = lines_(l1).bbox().nsites();
+ const unsigned l2_nsites = lines_(l2).bbox().nsites();
+
+ if (l1_nsites == l2_nsites)
return l1 < l2;
- return lines_(l1).bbox().nsites() < lines_(l2).bbox().nsites();
+ return l1_nsites < l2_nsites;
}
scribo::line_set<L> lines_;
diff --git a/scribo/scribo/text/paragraphs.hh b/scribo/scribo/text/paragraphs.hh
new file mode 100644
index 0000000..a3ff802
--- /dev/null
+++ b/scribo/scribo/text/paragraphs.hh
@@ -0,0 +1,1027 @@
+#include <mln/util/array.hh>
+#include <mln/accu/shape/bbox.hh>
+#include <mln/core/image/image2d.hh>
+#include <mln/core/alias/neighb2d.hh>
+#include <mln/draw/box.hh>
+#include <mln/data/convert.hh>
+#include <mln/value/int_u16.hh>
+#include <mln/value/label_16.hh>
+#include <mln/value/int_u8.hh>
+#include <mln/value/rgb8.hh>
+#include <mln/io/ppm/save.hh>
+#include <mln/io/pgm/save.hh>
+#include <mln/geom/rotate.hh>
+#include <mln/literal/colors.hh>
+
+#include <scribo/core/macros.hh>
+#include <scribo/core/line_set.hh>
+#include <scribo/core/line_info.hh>
+
+using namespace mln;
+
+namespace scribo
+{
+
+ namespace internal
+ {
+
+//-------------------------------------
+// Extracting root of links
+//-------------------------------------
+ template <typename T>
+ inline
+ unsigned
+ find_root(util::array<T>& parent, unsigned x)
+ {
+ unsigned tmp_x = x;
+
+ while (parent(tmp_x) != tmp_x)
+ tmp_x = parent(tmp_x);
+
+ while (parent(x) != x)
+ {
+ const unsigned tmp = parent(x);
+ x = parent(x);
+ parent(tmp) = tmp_x;
+ }
+
+ return x;
+ }
+ }
+
+ namespace filter
+ {
+
+//---------------------------------------------------------------------
+// This method aims to cut the links between lines that do not fit the
+// different criteria
+//---------------------------------------------------------------------
+
+ template <typename L>
+ inline
+ void paragraph_links(const util::array<value::int_u16>& left,
+ const util::array<value::int_u16>& right,
+ util::array<value::int_u16>& output,
+ const util::array< line_info<L> >& lines,
+ const image2d<bool>& input)
+ {
+ output = left;
+
+ const unsigned nlines = lines.nelements();
+
+ // image2d<value::rgb8> links = data::convert(value::rgb8(), input);
+ // for (unsigned l = 0; l < nlines; ++l)
+ // {
+ // mln::draw::line(links, lines(l).bbox().pcenter(), lines(left(l)).bbox().pcenter(), literal::red);
+ // }
+ // mln::io::ppm::save(links, "out_links.ppm");
+
+ // For each line
+ for (unsigned l = 0; l < nlines; ++l)
+ {
+ // Neighbors
+
+ const value::int_u16 left_nbh = output(l);
+ const value::int_u16 right_nbh = right(l);
+ const value::int_u16 lol_nbh = output(left_nbh);
+
+ // Line features
+ const float x_height = lines(l).x_height();
+ const float left_x_height = lines(left_nbh).x_height();
+ const float right_x_height = lines(right_nbh).x_height();
+
+ const box2d& left_line_bbox = lines(left_nbh).bbox();
+ const box2d& current_line_bbox = lines(l).bbox();
+ const box2d& right_line_bbox = lines(right_nbh).bbox();
+ const box2d& lol_line_bbox = lines(lol_nbh).bbox(); // lol : left neighbor of the left neighbor
+
+ const int lline_col_min = left_line_bbox.pmin().col();
+ const int cline_col_min = current_line_bbox.pmin().col();
+ const int rline_col_min = right_line_bbox.pmin().col();
+ const int lolline_col_min = lol_line_bbox.pmin().col();
+
+ const int lline_col_max = left_line_bbox.pmax().col();
+ const int cline_col_max = current_line_bbox.pmax().col();
+ const int rline_col_max = right_line_bbox.pmax().col();
+
+ const int lline_cw = lines(left_nbh).char_width();
+ const int cline_cw = lines(l).char_width();
+ const int rline_cw = lines(right_nbh).char_width();
+ // Maximal x variation to consider two lines vertically aligned
+ const int delta_alignment = cline_cw;
+
+ // Checks the baseline distances of the two neighbors
+ {
+ // Current line baseline
+ const int c_baseline = lines(l).baseline();
+
+ // Baseline distance with the left and right neighbors
+ const int lc_baseline = lines(left_nbh).baseline() - c_baseline;
+ const int rc_baseline = c_baseline -lines(right_nbh).baseline();
+
+ // Max baseline distance between the two neighbors
+ // const float delta_baseline_max = std::max(lc_baseline, rc_baseline);
+ // const float delta_baseline_min = std::min(lc_baseline,
+ // rc_baseline);
+
+ // Only two lines, meaning the current line has only one neighbor
+ bool two_lines = false;
+
+ // If the current line has no left neighbor
+ if (lc_baseline == 0)
+ {
+ // ror : right neighbor of the right neighbor
+ const value::int_u16 ror_nbh = right(right_nbh);
+ const box2d& ror_line_bbox = lines(ror_nbh).bbox();
+
+ // If the current line has a ror
+ if (ror_nbh != right_nbh
+ && output(ror_nbh) == right_nbh)
+ {
+ // Distance between the current line and the right neighbor
+ const float right_distance =
+ current_line_bbox.pcenter().row() - right_line_bbox.pcenter().row();
+ // Distance between the right neighbor and the ror
+ const float ror_distance =
+ right_line_bbox.pcenter().row() - ror_line_bbox.pcenter().row();
+ // ror x_height
+ const float ror_x_height = lines(ror_nbh).x_height();
+
+ // Conditions to cut the link between the current line
+ // and its right neighbor
+ if (right_distance > 1.4f * ror_distance
+ && std::max(ror_x_height, right_x_height) <
+ 1.2f * std::min(ror_x_height, right_x_height)
+ && output(right_nbh) == l)
+ {
+ output(right_nbh) = right_nbh;
+ continue;
+ }
+ }
+ // Otherwise we only have a group of two lines
+ else
+ {
+ // We determine the distance between the two lines
+ const float distance = lines(l).meanline() - lines(right_nbh).baseline();
+ two_lines = true;
+
+ // If the distance between the two lines is greater than
+ // the minimum x height of the two lines then we cut the
+ // link between them
+ if (distance > std::min(x_height, right_x_height)
+ && output(right_nbh) == l)
+ {
+ output(right_nbh) = right_nbh;
+ continue;
+ }
+ }
+
+ // Lines features
+ const float min_x_height = std::min(x_height, right_x_height);
+ const float max_x_height = std::max(x_height, right_x_height);
+ const float min_char_width = std::min(rline_cw, cline_cw);
+ const float max_char_width = std::max(rline_cw, cline_cw);
+
+ // Condition to cut the link between the current line and
+ // its right neighbor
+ if ((max_x_height > min_x_height * 1.2f) &&
+ !(max_char_width <= 1.2f * min_char_width))
+ {
+ if (output(right_nbh) == l)
+ {
+ output(right_nbh) = right_nbh;
+ continue;
+ }
+ }
+
+ // If we only have two lines we stop the study
+ if (two_lines)
+ continue;
+ }
+ // If the current line has no right neighbor
+ else if (rc_baseline == 0)
+ {
+ // lol : left neighbor of the left neighbor
+
+ // If the left neighbor of the current line has a left neighbor
+ if (lol_nbh != left_nbh)
+ {
+ // Distance between the current line and its left neighbor
+ const float left_distance =
+ left_line_bbox.pcenter().row() - current_line_bbox.pcenter().row();
+ // Distance between the left neighbor and the left
+ // neighbor of its left neighbor
+ const float lol_distance =
+ lol_line_bbox.pcenter().row() - left_line_bbox.pcenter().row();
+ // lol x height
+ const float lol_x_height = lines(lol_nbh).x_height();
+
+ // Conditions to cut the link between the current line
+ // and its left neighbor
+ if (left_distance > 1.4f * lol_distance
+ && std::max(lol_x_height, left_x_height) <
+ 1.2f * std::min(lol_x_height, left_x_height))
+ {
+ output(l) = l;
+ continue;
+ }
+ }
+ // Otherwise we only have a group of two lines
+ else
+ {
+ // Distance between the current line and it left neighbor
+ const float distance = lines(left_nbh).meanline() -
+ lines(l).baseline();
+
+ two_lines = true;
+
+ // If the distance is greater than the min x height
+ // between the two lines
+ if (distance > std::min(x_height, left_x_height))
+ {
+ output(l) = l;
+ continue;
+ }
+ }
+
+ // Lines features
+ const float min_x_height = std::min(x_height, left_x_height);
+ const float max_x_height = std::max(x_height, left_x_height);
+ const float min_char_width = std::min(lline_cw, cline_cw);
+ const float max_char_width = std::max(lline_cw, cline_cw);
+
+ // Condition to cut the link between the current line and
+ // its left neighbor
+ if ((max_x_height > min_x_height * 1.2f) &&
+ !(max_char_width <= 1.2f * min_char_width))
+ {
+ output(l) = l;
+ continue;
+ }
+
+ // If we only have two lines we stop the study
+ if (two_lines)
+ continue;
+ }
+ // The current line has at least one left and one right neighbor
+ else // if (delta_baseline_max >= delta_baseline_min)
+ {
+ // Distance between the left and the current line
+ const float left_distance =
+ lines(left_nbh).meanline() - lines(l).baseline();
+ // Distance between the right and the current line
+ const float right_distance =
+ lines(l).meanline() - lines(right_nbh).baseline();
+
+ // If the left line is too far compared to the right one
+ // we cut the link with it
+ if (left_distance > 1.2f * right_distance
+ && std::max(x_height, left_x_height) > 1.2f * std::min(x_height, left_x_height))
+ {
+ output(l) = l;
+ continue;
+ }
+ // If the right line is too far compared to the left one
+ // we cut the link with it
+ else if (right_distance > 1.2f * left_distance
+ && std::max(x_height, right_x_height) > 1.2f * std::min(x_height, right_x_height)
+ && output(right_nbh) == l)
+ {
+ output(right_nbh) = right_nbh;
+ continue;
+ }
+
+ // If the distance between the baseline of the left
+ // neighbor and the baseline of the current line is
+ // greater than the one between the current line baseline
+ // and the right line baseline we have to study the texte
+ // features of the right and left lines
+ if (lc_baseline > rc_baseline)
+ {
+ const float cw_max = std::max(lline_cw, cline_cw);
+ const float cw_min = std::min(lline_cw, cline_cw);
+ const float min_x_height = std::min(x_height, left_x_height);
+ const float max_x_height = std::max(x_height, left_x_height);
+
+ if ((max_x_height > min_x_height * 1.2f) &&
+ !(cw_max <= 1.2f * cw_min))
+ {
+ output(l) = l;
+ continue;
+ }
+
+ {
+ const float min_x_height = std::min(x_height, right_x_height);
+ const float max_x_height = std::max(x_height, right_x_height);
+ const float cw_max = std::max(rline_cw, cline_cw);
+ const float cw_min = std::min(rline_cw, cline_cw);
+
+ if ((max_x_height > min_x_height * 1.2f)
+ && !(cw_max <= 1.2f * cw_min)
+ && output(right_nbh) == l)
+ {
+ output(right_nbh) = right_nbh;
+ continue;
+ }
+ }
+ }
+ else
+ {
+ const float cw_max = std::max(rline_cw, cline_cw);
+ const float cw_min = std::min(rline_cw, cline_cw);
+ const float min_x_height = std::min(x_height, right_x_height);
+ const float max_x_height = std::max(x_height, right_x_height);
+
+ if ((max_x_height > min_x_height * 1.2f)
+ && !(cw_max <= 1.2f * cw_min)
+ && output(right_nbh) == l)
+ {
+ output(right_nbh) = right_nbh;
+ continue;
+ }
+
+ {
+ const float min_x_height = std::min(x_height, left_x_height);
+ const float max_x_height = std::max(x_height, left_x_height);
+ const float cw_max = std::max(lline_cw, cline_cw);
+ const float cw_min = std::min(lline_cw, cline_cw);
+
+ if ((max_x_height > min_x_height * 1.2f)
+ && !(cw_max <= 1.2f * cw_min))
+ {
+ output(l) = l;
+ continue;
+ }
+ }
+ }
+ }
+ }
+
+ // If we arrive here, it means than the lines in the
+ // neighborhood of the current line are quite similar. We can
+ // then begin to study the indentations in order to determine
+ // the beginning of new paragraphs
+
+//-----------------------------------------------------------------------------
+// ___________________________
+// |___________________________|
+// ________________________
+// |________________________|
+// ___________________________
+// |___________________________|
+// ___________________________
+// |___________________________|
+//
+// Simple case : paragraphs are justified on the left. We try to find any
+// indentation like above.
+//
+//-----------------------------------------------------------------------------
+
+ {
+ // Check if the current line neighbors are aligned
+ bool left_right_aligned = false;
+ bool left_lol_aligned = false;
+ const int dx_lr = std::abs(lline_col_min - rline_col_min);
+ const int dx_llol = std::abs(lline_col_min - lolline_col_min);
+
+ if (dx_lr < delta_alignment)
+ left_right_aligned = true;
+
+ if (dx_llol < delta_alignment)
+ left_lol_aligned = true;
+
+ if (left_right_aligned && left_lol_aligned)
+ {
+ const int left_right_col_min = std::min(lline_col_min, rline_col_min);
+ const int dx_lrc = std::abs(left_right_col_min - cline_col_min);
+ const float l_char_width = 1.5f * lines(l).char_width();
+
+ if (dx_lrc > l_char_width &&
+ dx_lrc < 3.0f * l_char_width &&
+ cline_col_min > rline_col_min &&
+ cline_col_min > lline_col_min)
+ {
+ output(right_nbh) = right_nbh;
+ continue;
+ }
+ }
+ }
+
+//-----------------------------------------------------------------------------
+// ___________________________
+// |___________________________|
+// ___________________
+// |___________________| End of the paragraph - Current line
+// ________________________
+// |________________________| Beginning of a new one
+// ___________________________
+// |___________________________| Left of left of current line
+//
+// End of paragraph case : we try to find an end to the current paragraph
+//
+//-----------------------------------------------------------------------------
+
+ {
+ // Check if the current line neighbors are aligned
+ bool left_right_max_aligned = false;
+ bool left_current_min_aligned = false;
+ bool lol_current_min_aligned = false;
+ const bool lol_is_left = output(left_nbh) == left_nbh;
+ const int dx_lr_max = std::abs(lline_col_max - rline_col_max);
+ const int dx_lc_min = std::abs(lline_col_min - cline_col_min);
+ const int dx_lolc_min = std::abs(lolline_col_min - cline_col_min);
+
+ if (dx_lr_max < delta_alignment)
+ left_right_max_aligned = true;
+
+ if (dx_lc_min < delta_alignment)
+ left_current_min_aligned = true;
+
+ if (dx_lolc_min < delta_alignment)
+ lol_current_min_aligned = true;
+
+ if (!left_current_min_aligned && left_right_max_aligned &&
+ (lol_current_min_aligned || lol_is_left))
+ {
+ const int dx_lrc = std::abs(lline_col_max - cline_col_max);
+ const int l_char_width = lines(l).char_width();
+
+ if (dx_lrc > l_char_width &&
+ cline_col_max < lline_col_max &&
+ cline_col_min < lline_col_min &&
+ (lline_col_min > lolline_col_min || lol_is_left))
+ {
+ output(l) = l;
+ continue;
+ }
+ }
+ }
+
+
+//-----------------------------------------------------------------------------
+// ___________________________
+// |___________________________|
+// ___________________________
+// |___________________________|
+// ________________________
+// |________________________|
+//
+// Simple case : paragraphs are justified on the left. We try to find any
+// indentation like above at the end of a column.
+//
+//-----------------------------------------------------------------------------
+
+ if (left_nbh == l)
+ {
+ const value::int_u16 ror_nbh = right(right_nbh);
+ const box2d& ror_line_bbox = lines(ror_nbh).bbox();
+ const int rorline_col_min = ror_line_bbox.pmin().col();
+
+ bool right_ror_min_aligned = false;
+ const int dx_rror_min = std::abs(rline_col_min - rorline_col_min);
+
+ if (dx_rror_min < delta_alignment)
+ right_ror_min_aligned = true;
+
+ if (right_ror_min_aligned)
+ {
+ const int right_ror_col_min = std::min(rline_col_min, rorline_col_min);
+ const int dx_rrorc = std::abs(right_ror_col_min - cline_col_min);
+ const float l_char_width = 1.5f * lines(l).char_width();
+
+ if (dx_rrorc > l_char_width &&
+ dx_rrorc < 3.0f * l_char_width &&
+ cline_col_min > rline_col_min &&
+ cline_col_max >= rline_col_max)
+ {
+ output(right_nbh) = right_nbh;
+ continue;
+ }
+ }
+ }
+ }
+
+
+ // Only debug
+
+ {
+ image2d<value::rgb8> debug = data::convert(value::rgb8(), input);
+
+ for (unsigned i = 0; i < output.nelements(); ++i)
+ output(i) = internal::find_root(output, i);
+
+ mln::util::array<accu::shape::bbox<point2d> > nbbox(output.nelements());
+ for (unsigned i = 0; i < nlines; ++i)
+ {
+ // if (lines(i).is_textline())
+ // {
+ // mln::draw::box(debug, lines(i).bbox(), literal::red);
+ nbbox(output(i)).take(lines(i).bbox());
+ // }
+ }
+
+ for (unsigned i = 0; i < nbbox.nelements(); ++i)
+ if (nbbox(i).is_valid())
+ {
+ box2d b = nbbox(i).to_result();
+ mln::draw::box(debug, b, literal::orange);
+ b.enlarge(1);
+ mln::draw::box(debug, b, literal::orange);
+ b.enlarge(1);
+ mln::draw::box(debug, b, literal::orange);
+ }
+
+ mln::io::ppm::save(debug, "out_paragraph.ppm");
+ }
+
+ }
+ }
+
+//-------------------------------------------------------------
+// Preparation of the lines before linking them.
+// For each line we draw the top and the bottom of it.
+// Assuming than i is the number of the line. Then the top of the line
+// will be affected with the value 2 * i in the block image and the
+// bottom with 2 * i + 1.
+//
+//-------------------------------------------------------------
+
+ template <typename L>
+ inline
+ void prepare_lines(const box2d& domain,
+ const util::array< line_info<L> >& lines,
+ image2d<value::int_u16>& blocks,
+ util::array<box2d>& rbbox)
+ {
+ std::map< int, std::vector< const box2d* > > drawn_lines;
+ const unsigned nlines = lines.nelements();
+
+ // For each line
+ for (unsigned l = 0; l < nlines; ++l)
+ {
+ // Rotation of the bounding box
+ box2d b = geom::rotate(lines(l).bbox(), -90, domain.pcenter());
+ rbbox.append(b);
+
+ const unsigned index = l + 1;
+ const unsigned even_index = 2 * index;
+ const unsigned odd_index = even_index + 1;
+
+ // Top of the line
+ {
+ bool not_finished = true;
+ int col_offset = 0;
+
+ while (not_finished)
+ {
+ // Looking for a column in the image to draw the top of the
+ // line
+
+ const int col = b.pmax().col() + col_offset;
+ std::map< int, std::vector< const box2d* > >::iterator it
+ = drawn_lines.find(col);
+
+ if (it != drawn_lines.end())
+ {
+ const std::vector< const box2d* >& lines = (*it).second;
+ const unsigned nb_lines = lines.size();
+ unsigned i = 0;
+
+ for (i = 0; i < nb_lines; ++i)
+ {
+ const box2d* box = lines[i];
+ const int min_row = std::max(b.pmin().row(), box->pmin().row());
+ const int max_row = std::min(b.pmax().row(), box->pmax().row());
+
+ if (min_row - max_row <= 0)
+ break;
+ }
+
+ if (i == nb_lines)
+ {
+ mln::draw::line(blocks, point2d(b.pmin().row(), col),
+ point2d(b.pmax().row(), col), even_index);
+ not_finished = false;
+ drawn_lines[col].push_back(&(rbbox[l]));
+ }
+ else
+ ++col_offset;
+ }
+ else
+ {
+ mln::draw::line(blocks, point2d(b.pmin().row(), col),
+ point2d(b.pmax().row(), col), even_index);
+ not_finished = false;
+ drawn_lines[col].push_back(&(rbbox[l]));
+ }
+ }
+ }
+
+ // Bottom of the line
+ {
+ bool not_finished = true;
+ int col_offset = 0;
+
+ while (not_finished)
+ {
+ // Looking for a column in the image to draw the bottom of
+ // the line
+
+ const int col = b.pmin().col() - col_offset;
+ std::map< int, std::vector< const box2d* > >::iterator it
+ = drawn_lines.find(col);
+
+ if (it != drawn_lines.end())
+ {
+ const std::vector< const box2d* >& lines = (*it).second;
+ const unsigned nb_lines = lines.size();
+ unsigned i = 0;
+
+ for (i = 0; i < nb_lines; ++i)
+ {
+ const box2d* box = lines[i];
+ const int min_row = std::max(b.pmin().row(), box->pmin().row());
+ const int max_row = std::min(b.pmax().row(), box->pmax().row());
+
+ if (min_row - max_row <= 0)
+ break;
+ }
+
+ if (i == nb_lines)
+ {
+ mln::draw::line(blocks, point2d(b.pmin().row(), col),
+ point2d(b.pmax().row(), col), odd_index);
+ not_finished = false;
+ drawn_lines[col].push_back(&(rbbox[l]));
+ }
+ else
+ ++col_offset;
+ }
+ else
+ {
+ mln::draw::line(blocks, point2d(b.pmin().row(), col),
+ point2d(b.pmax().row(), col), odd_index);
+ not_finished = false;
+ drawn_lines[col].push_back(&(rbbox[l]));
+ }
+ }
+ }
+ }
+ }
+
+ template <typename L>
+ inline
+ void
+ process_left_link(image2d<value::int_u16>& blocks,
+ const util::array<box2d>& rbbox,
+ const util::array< line_info<L> >& lines,
+ util::array<value::int_u16>& left)
+ {
+ typedef value::int_u16 V;
+
+ // At the beginning each line is its own neighbor
+ for (unsigned i = 0; i < left.nelements(); ++i)
+ left(i) = i;
+
+ const unsigned nlines = lines.nelements();
+
+ // For each line
+ for (unsigned i = 0; i < nlines; ++i)
+ {
+ // Max distance for the line search
+ int dmax = 1.5f * lines(i).x_height();
+
+ // Starting points in the current line box
+ point2d c = rbbox(i).pcenter();
+ point2d q(rbbox(i).pmin().row() + ((c.row() - rbbox(i).pmin().row()) / 4), c.col());
+
+ int
+ midcol = (rbbox(i).pmax().col()
+ - rbbox(i).pmin().col()) / 2;
+
+ // Left
+ {
+ // marge gauche
+ int
+ nleftima = c.col() - blocks.domain().pmin().col(),
+ // Distance gauche
+ nleft = std::min(nleftima, midcol + dmax);
+
+ V
+ // Starting points in the box
+ *p = &blocks(c),
+ *p2 = &blocks(q),
+ // End of search
+ *pstop = p - nleft - 1,
+ // Line neighbor
+ *nbh_p = 0;
+
+ // While we haven't found a neighbor or reached the limit
+ for (; p != pstop; --p, --p2)
+ {
+ if (*p2 != literal::zero // Not the background
+ && ((*p2 % 2) == 0) // Looking for the bottom of a line
+ && left((*p2 >> 1) - 1) != i) // No loops
+ {
+ // Neightbor found, we stop the research
+ nbh_p = p2;
+ break;
+ }
+
+ if (*p != literal::zero // Not the background
+ && ((*p % 2) == 0) // Looking for the bottom of a line
+ && left((*p >> 1) - 1) != i) // No loops
+ {
+ // Neightbor found, we stop the research
+ nbh_p = p;
+ break;
+ }
+ }
+
+ // If a neighbor was found, then we have found the top of the
+ // line. We are then looking for the bottom of the encountered
+ // line. If during the search process we find a complete line
+ // included in the touched line, this line is considered as
+ // the neighbor under certain conditions (see below)
+
+ //---------------------------------------------------------------
+ // _________________________ |
+ // |_________________________| => Current line | Search direction
+ // v
+ // => First encountered top line
+ // __________________________________________________ 2Q
+ // | Q |
+ // | _________________________ |2P
+ // | |_____________P___________| => Second top |2P + 1
+ // | line |
+ // |__________________________________________________|2Q + 1
+ //
+ //
+ //---------------------------------------------------------------
+
+ if (nbh_p)
+ {
+ std::vector<V> lines_nbh;
+ const V end_p = *nbh_p + 1;
+ const V* nbh_p_copy = nbh_p;
+
+ for (; *nbh_p != end_p; --nbh_p)
+ {
+ if ((*nbh_p) != literal::zero) // Not the background
+ {
+ if ((*nbh_p) % 2 == 0)// We have found the top of
+ // another line
+ lines_nbh.push_back(*nbh_p);
+ else
+ {
+ // We have found the bottom of a line. We are looking if
+ // we have already encountered the top of this
+ // line. If so, we link the current line with this one
+ // under certain conditions:
+
+ if (std::find(lines_nbh.begin(), lines_nbh.end(),
+ (*nbh_p) - 1) != lines_nbh.end())
+ {
+ // If we can link the complete line with the current line
+ if (// It must be in the search range
+ nbh_p > pstop
+ // Avoid loops
+ && left(((*nbh_p - 1) >> 1) - 1) != i)
+ left(i) = ((*nbh_p - 1) >> 1) - 1;
+
+ // We have found a complete line so we stop the search
+ break;
+ }
+ }
+ }
+ }
+
+
+ // If we haven't found any included line in the first
+ // neighbor, then the line is considered as the neighbor of
+ // the current line
+ if (*nbh_p == end_p)
+ left(i) = (*nbh_p_copy >> 1) - 1;
+ }
+ }
+ }
+ }
+
+
+ // We assume that the lines have been rotated
+ template <typename L>
+ inline
+ void
+ process_right_link(image2d<value::int_u16>& blocks,
+ const util::array<box2d>& rbbox,
+ const util::array< line_info<L> >& lines,
+ util::array<value::int_u16>& right)
+ {
+ typedef value::int_u16 V;
+
+ // At the beginning each line is its own neighbor
+ for (unsigned i = 0; i < right.nelements(); ++i)
+ right(i) = i;
+
+ const unsigned nlines = lines.nelements();
+
+ // For each line
+ for (unsigned i = 0; i < nlines; ++i)
+ {
+ // Max distance for the line search
+ int dmax = 1.5f * lines(i).x_height();
+
+ // Starting points in the current line box
+ point2d c = rbbox(i).pcenter();
+ point2d q(rbbox(i).pmax().row() - ((rbbox(i).pmax().row() - c.row()) / 4), c.col());
+
+ int
+ midcol = (rbbox(i).pmax().col()
+ - rbbox(i).pmin().col()) / 2;
+
+ // Right
+ {
+ int
+ nrightima = geom::ncols(blocks) - c.col() + blocks.domain().pmin().col(),
+ nright = std::min(nrightima, midcol + dmax);
+
+ V
+ // Starting points in the box
+ *p = &blocks(c),
+ *p2 = &blocks(q),
+ // End of search
+ *pstop = p + nright - 1,
+ // Line neighbor
+ *nbh_p = 0;
+
+ // While we haven't found a neighbor or reached the limit
+ for (; p != pstop; ++p, ++p2)
+ {
+ if (*p2 != literal::zero // Not the background
+ && ((*p2 % 2) == 1) // Looking for the bottom of a line
+ && right(((*p2 - 1) >> 1) - 1) != i) // No loops
+ {
+ // Neightbor found, we stop the research
+ nbh_p = p2;
+ break;
+ }
+
+ if (*p != literal::zero // Not the background
+ && ((*p % 2) == 1) // Looking for the bottom of a line
+ && right(((*p - 1) >> 1) - 1) != i) // No loops
+ {
+ // Neightbor found, we stop the research
+ nbh_p = p;
+ break;
+ }
+ }
+
+ // If a neighbor was found, then we have found the bottom of the
+ // line. We are then looking for the top of the encountered
+ // line. If during the search process we find a complete line
+ // included in the touched line, this line is considered as
+ // the neighbor under certain conditions (see below)
+
+ //---------------------------------------------------------------
+ //
+ //
+ // __________________________________________________ 2Q
+ // | Q |
+ // | _________________________ |2P
+ // | |_____________P___________| => Second bottom |2P + 1
+ // | line |
+ // |__________________________________________________|2Q + 1
+ // => First encountered bottom line
+ // _________________________ ^
+ // |_________________________| => Current line | Search direction
+ // |
+ //---------------------------------------------------------------
+
+ if (nbh_p)
+ {
+ std::vector<V> lines_nbh;
+ const V end_p = *nbh_p - 1;
+ const V* nbh_p_copy = nbh_p;
+
+ for (; *nbh_p != end_p; ++nbh_p)
+ {
+ if (*nbh_p != literal::zero) // Not the background
+ {
+ if (*nbh_p % 2 == 1) // We have found the bottom of
+ // another line
+ lines_nbh.push_back(*nbh_p);
+ else
+ {
+ // We have found the top of a line. We are looking if
+ //we have already encountered the bottom of this
+ // line. If so, we link the current line with this one
+ // under certain conditions:
+
+ if (std::find(lines_nbh.begin(), lines_nbh.end(),
+ *nbh_p + 1) != lines_nbh.end())
+ {
+ // If we can link the complete line with the current line
+ if (// It must be in the search range
+ nbh_p < pstop
+ // Avoid loops
+ && right((*nbh_p >> 1) - 1) != i)
+ right(i) = (*nbh_p >> 1) - 1;
+
+ // We have found a complete line, so we stop the search
+ break;
+ }
+ }
+ }
+ }
+
+ // If we haven't found any included line in the first
+ // neighbor, then the line is considered as the neighbor of
+ // the current line
+
+ if (*nbh_p == end_p)
+ right(i) = ((*nbh_p_copy - 1) >> 1) - 1;
+ }
+ }
+ }
+ }
+
+//-----------------------------------------------------------------------
+// Finalizing the links by merging information extracted from the left
+// and right links
+//-----------------------------------------------------------------------
+
+ template< typename L >
+ inline
+ void finalize_links(util::array<value::int_u16>& left,
+ util::array<value::int_u16>& right,
+ const util::array< line_info<L> >& lines)
+ {
+ const unsigned nlines = lines.nelements();
+
+ for (unsigned i = 0; i < nlines; ++i)
+ {
+ const unsigned left_value = left(i);
+ const unsigned right_value = right(i);
+
+ // If the right neighbor of my left neighbor is itself then its
+ // right neighbor is me
+ {
+ value::int_u16& v = right(left_value);
+
+ if (v == left_value)
+ v = i;
+ }
+
+ // If the left neighbor of my right neighbor is itself then its
+ // left neighbor is me
+ {
+ value::int_u16& v = left(right_value);
+
+ if (v == right_value)
+ v = i;
+ }
+ }
+ }
+
+ template <typename L>
+ inline
+ void extract_paragraphs(line_set<L>& lines,
+ const image2d<bool>& input)
+ {
+ typedef value::int_u16 V;
+
+ image2d<V> blocks(geom::rotate(input.domain(), -90, input.domain().pcenter()));
+ data::fill(blocks, 0);
+
+ util::array< line_info<L> > lines_info;
+
+ for_all_lines(l, lines)
+ {
+ if (lines(l).is_textline())
+ lines_info.append(lines(l));
+ }
+
+ const unsigned nlines = lines_info.nelements();
+ util::array<box2d> rbbox;
+ util::array<V> left(nlines);
+ util::array<V> right(nlines);
+ util::array<V> output;
+
+ rbbox.reserve(nlines);
+ output.reserve(nlines);
+
+ std::cout << "Preparing lines" << std::endl;
+ prepare_lines(input.domain(), lines_info, blocks, rbbox);
+// io::pgm::save(blocks, "blocks.pgm");
+ std::cout << "Linking left" << std::endl;
+ process_left_link(blocks, rbbox, lines_info, left);
+ std::cout << "Linking right" << std::endl;
+ process_right_link(blocks, rbbox, lines_info, right);
+ std::cout << "Finalizing links" << std::endl;
+ finalize_links(left, right, lines_info);
+ // std::cout << "Finalizing merging" << std::endl;
+ // finalize_line_merging(left, right, lines);
+ std::cout << "Extracting paragraphs" << std::endl;
+ filter::paragraph_links(left, right, output, lines_info, input);
+ }
+}
--
1.5.6.5
1
0
---
scribo/scribo/core/line_info.hh | 212 +++++-
scribo/scribo/core/stats.hh | 320 +++++++++
scribo/scribo/text/look_like_text_lines.hh | 16 +
scribo/scribo/text/merging.hh | 306 ++++++---
scribo/scribo/text/paragraphs.hh | 1027 ++++++++++++++++++++++++++++
5 files changed, 1774 insertions(+), 107 deletions(-)
create mode 100644 scribo/scribo/core/stats.hh
create mode 100644 scribo/scribo/text/paragraphs.hh
diff --git a/scribo/scribo/core/line_info.hh b/scribo/scribo/core/line_info.hh
index b8a8be3..c683be4 100644
--- a/scribo/scribo/core/line_info.hh
+++ b/scribo/scribo/core/line_info.hh
@@ -61,6 +61,9 @@
# include <scribo/core/internal/sort_comp_ids.hh>
# include <scribo/core/concept/serializable.hh>
+// DEBUG
+
+# include <scribo/core/stats.hh>
namespace scribo
{
@@ -145,6 +148,11 @@ namespace scribo
private:
void init_();
+ // DEBUG
+ stats< float > meanline_clusters_;
+ stats< float > baseline_clusters_;
+ };
+
};
} // end of namespace scribo::internal
@@ -273,7 +281,8 @@ namespace scribo
/// @}
-
+ bool chars_same_width() const;
+ unsigned get_first_char_height() const;
/// Force a new computation of statistics.
void force_stats_update();
@@ -301,6 +310,9 @@ namespace scribo
void update_components_type(component::Type type);
+ int compute_baseline();
+ int compute_meanline();
+
private: // Attributes
line_id_t id_;
mln::util::tracked_ptr<data_t> data_;
@@ -810,7 +822,7 @@ namespace scribo
int
line_info<L>::delta_of_line() const
{
- return char_width() + 2 * char_space();
+ return 2 * char_width() + 2 * char_space();
// FIXME: choose between:
// not enough: char_width + char_space
// too much: 2 * char_width
@@ -960,6 +972,145 @@ namespace scribo
}
template <typename L>
+ bool
+ line_info<L>::chars_same_width() const
+ {
+ // Only for the case of two-character words
+ if (card() == 2)
+ {
+ const component_set<L>& comp_set = data_->holder_.components();
+
+ const unsigned c1 = data_->components_(0);
+ const unsigned c2 = data_->components_(1);
+
+ if (data_->holder_.components()(c1).type() == component::Punctuation
+ || data_->holder_.components()(c2).type() == component::Punctuation)
+ return false;
+
+ const mln::box2d& bb1 = comp_set(c1).bbox();
+ const mln::box2d& bb2 = comp_set(c2).bbox();
+
+ const float w1 = bb1.width();
+ const float h1 = bb1.height();
+ const float w2 = bb2.width();
+ const float h2 = bb2.height();
+
+ const float space = std::max(bb1.pmin().col(), bb2.pmin().col()) -
+ std::min(bb1.pmax().col(), bb2.pmax().col());
+
+ const int dy = bb1.pmax().row() - bb2.pmax().row();
+
+ // The two characters must be distinct
+ if (space < 0)
+ return false;
+
+ if (// Approximately the same width
+ ((std::max(w1, w2) / std::min(w1, w2)) > 1.1f ||
+ // One character must not be smaller than the space between
+ // the two characters
+ (w1 < space || w2 < space))
+ // If the two characters have a different width they must also
+ // have a different height
+ && not (std::max(h1, h2) / std::min(h1, h2) <= 1.5f))
+ return false;
+
+ // Approximately aligned on baseline
+ if (std::abs(dy) > 10)
+ return false;
+
+ return true;
+ }
+
+ return false;
+ }
+
+ template< typename L >
+ unsigned
+ line_info<L>::get_first_char_height() const
+ {
+ const component_set<L>& comp_set = data_->holder_.components();
+ const unsigned c1 = data_->components_(0);
+ const mln::box2d& bb1 = comp_set(c1).bbox();
+
+ return bb1.height();
+ }
+
+ template <typename L>
+ int
+ line_info<L>::compute_baseline()
+ {
+ const unsigned nelements = data_->baseline_clusters_.nelements();
+
+ if (nelements == 2)
+ return data_->baseline_clusters_.mean();
+
+ mln::util::array< cluster_stats< float > >& clusters_b = data_->baseline_clusters_.clusters();
+
+ unsigned index = 0;
+ float min_base = 0.0f;
+ const unsigned clusters_b_nelements = clusters_b.nelements();
+
+ for (unsigned i = 0; i < clusters_b_nelements; ++i)
+ {
+ const unsigned clusters_b_i_nelements = clusters_b[i].nelements();
+
+ if (clusters_b_i_nelements >= min_base * 2.0f)
+ {
+ min_base = clusters_b_i_nelements;
+ index = i;
+ }
+ else if (clusters_b_i_nelements >= 0.5f * min_base)
+ {
+ if (clusters_b_i_nelements > 1 &&
+ clusters_b[index].median() > clusters_b[i].median())
+ {
+ if (clusters_b_i_nelements > min_base)
+ min_base = clusters_b_i_nelements;
+ index = i;
+ }
+ }
+ }
+
+ if (clusters_b[index].nelements() <= 2 && nelements <= 5)
+ return data_->baseline_clusters_.mean();
+
+ return clusters_b[index].median();
+ }
+
+ template <typename L>
+ int
+ line_info<L>::compute_meanline()
+ {
+ mln::util::array< cluster_stats< float > >& clusters_m = data_->meanline_clusters_.clusters();
+
+ unsigned index = 0;
+ float max_mean = 0.0f;
+ const unsigned clusters_m_nelements = clusters_m.nelements();
+
+ for (unsigned i = 0; i < clusters_m_nelements; ++i)
+ {
+ const unsigned clusters_m_i_nelements = clusters_m[i].nelements();
+
+ if (clusters_m_i_nelements >= max_mean * 2.0f)
+ {
+ max_mean = clusters_m_i_nelements;
+ index = i;
+ }
+ else if (clusters_m_i_nelements >= 0.5f * max_mean)
+ {
+ if (clusters_m[index].median() < clusters_m[i].median())
+ {
+ if (clusters_m_i_nelements > max_mean)
+ max_mean = clusters_m_i_nelements;
+ index = i;
+ }
+ }
+ }
+
+ return clusters_m[index].median();
+ }
+
+ template <typename L>
void
line_info<L>::force_stats_update()
{
@@ -970,8 +1121,8 @@ namespace scribo
typedef mln::value::int_u<12> median_data_t;
typedef mln::accu::stat::median_h<median_data_t> median_t;
median_t
- meanline,
- baseline,
+ // meanline,
+ // baseline,
char_space,
char_width;
@@ -995,6 +1146,10 @@ namespace scribo
mln::def::coord ref_line = mln_max(mln::def::coord);
+ // DEBUG
+ data_->baseline_clusters_.reset();
+ data_->meanline_clusters_.reset();
+
// Find a reference line to compute baselines and other attributes.
// Workaround to avoid overflow with int_u<12> in median accumulators.
//
@@ -1033,23 +1188,22 @@ namespace scribo
// incremented.
++used_comps;
-
// COMPUTE FEATURES DATA
- if (comp_set(c).has_features())
- {
- // Compute boldness
- boldness.take(comp_set(c).features().boldness);
- sum2_boldness += mln::math::sqr<float>(comp_set(c).features().boldness);
-
- // Compute color
- color_red.take(comp_set(c).features().color.red());
- color_green.take(comp_set(c).features().color.green());
- color_blue.take(comp_set(c).features().color.blue());
-
- sum2_red += mln::math::sqr<unsigned>(comp_set(c).features().color.red());
- sum2_green += mln::math::sqr<unsigned>(comp_set(c).features().color.green());
- sum2_blue += mln::math::sqr<unsigned>(comp_set(c).features().color.blue());
- }
+ // if (comp_set(c).has_features())
+ // {
+ // // Compute boldness
+ // boldness.take(comp_set(c).features().boldness);
+ // sum2_boldness += mln::math::sqr<float>(comp_set(c).features().boldness);
+
+ // // Compute color
+ // color_red.take(comp_set(c).features().color.red());
+ // color_green.take(comp_set(c).features().color.green());
+ // color_blue.take(comp_set(c).features().color.blue());
+
+ // sum2_red += mln::math::sqr<unsigned>(comp_set(c).features().color.red());
+ // sum2_green += mln::math::sqr<unsigned>(comp_set(c).features().color.green());
+ // sum2_blue += mln::math::sqr<unsigned>(comp_set(c).features().color.blue());
+ // }
// FIXME: we must guaranty here that the relationship is from
// right to left, otherwise, the space size computed between
@@ -1083,12 +1237,14 @@ namespace scribo
// Meanline (compute an absolute value, from the top left corner
// of the highest character bounding box, excluding
// punctuation).
- meanline.take(bb.pmin().row() - ref_line);
+ // meanline.take(bb.pmin().row() - ref_line);
+ data_->meanline_clusters_.take(bb.pmin().row());
// Baseline (compute an absolute value, from the top left corner
// of the highest character bounding box, excluding
// punctuation).
- baseline.take(bb.pmax().row() - ref_line);
+ // baseline.take(bb.pmax().row() - ref_line);
+ data_->baseline_clusters_.take(bb.pmax().row());
}
// Finalization
@@ -1140,12 +1296,14 @@ namespace scribo
else
data_->char_width_ = char_width.to_result();
- mln::def::coord
- absolute_baseline_r = baseline.to_result() + ref_line,
- absolute_meanline_r = meanline.to_result() + ref_line;
+ // // mln::def::coord
+ // // absolute_baseline_r = baseline.to_result() + ref_line,
+ // // absolute_meanline_r = meanline.to_result() + ref_line;
- data_->baseline_ = absolute_baseline_r;
- data_->meanline_ = absolute_meanline_r;
+ // data_->baseline_ = absolute_baseline_r;
+ // data_->meanline_ = absolute_meanline_r;
+ data_->baseline_ = compute_baseline();
+ data_->meanline_ = compute_meanline();
data_->x_height_ = data_->baseline_ - data_->meanline_ + 1;
data_->d_height_ = data_->baseline_ - bbox.to_result().pmax().row();
data_->a_height_ = data_->baseline_ - bbox.to_result().pmin().row() + 1;
diff --git a/scribo/scribo/core/stats.hh b/scribo/scribo/core/stats.hh
new file mode 100644
index 0000000..570325c
--- /dev/null
+++ b/scribo/scribo/core/stats.hh
@@ -0,0 +1,320 @@
+#ifndef STATS_HH_
+# define STATS_HH_
+
+# include <vector>
+# include <algorithm>
+
+# include <mln/util/array.hh>
+
+using namespace mln;
+
+//---------------------------------------------------------------------------
+// compare_values
+//---------------------------------------------------------------------------
+
+template< typename T >
+struct compare_values
+{
+ bool operator() (const T& lhs,
+ const T& rhs)
+ {
+ return (lhs < rhs);
+ }
+};
+
+
+//---------------------------------------------------------------------------
+// cluster_stats
+//---------------------------------------------------------------------------
+
+template< typename T >
+class cluster_stats
+{
+public:
+ cluster_stats()
+ : mean_needs_update_(false), median_needs_update_(false),
+ variance_needs_update_(false), std_needs_update_(false), size_(0)
+ {
+ }
+
+ cluster_stats(const unsigned size)
+ : mean_needs_update_(false), median_needs_update_(false),
+ variance_needs_update_(false), std_needs_update_(false), size_(0)
+ {
+ data_.reserve(size);
+ }
+
+ void reset()
+ {
+ std::vector< T >& data = data_.hook_std_vector_();
+ data.clear();
+
+ size_ = 0;
+ needs_update();
+ }
+
+ void take(const T& value)
+ {
+ if (not size_)
+ {
+ min_ = value;
+ max_ = value;
+ }
+ else
+ {
+ if (value < min_)
+ min_ = value;
+ else if (value > max_)
+ max_ = value;
+ }
+
+ ++size_;
+ data_.append(value);
+ needs_update();
+ }
+
+ T mean()
+ {
+ if (mean_needs_update_)
+ {
+ mean_ = 0;
+
+ for (unsigned i = 0; i < size_; ++i)
+ mean_ += data_[i];
+
+ mean_ /= size_;
+ mean_needs_update_ = false;
+ }
+
+ return mean_;
+ }
+
+ T median()
+ {
+ if (median_needs_update_)
+ {
+ std::vector< T >& data = data_.hook_std_vector_();
+ std::sort(data.begin(), data.end(), compare_values< T >());
+
+ median_ = data[(size_ - 1) >> 1];
+ median_needs_update_ = false;
+ }
+
+ return median_;
+ }
+
+ T variance()
+ {
+ if (variance_needs_update_)
+ {
+ mean();
+ variance_ = 0;
+
+ for (unsigned i = 0; i < size_; ++i)
+ {
+ const T tmp = mean_ - data_[i];
+
+ variance_ += (tmp * tmp);
+ }
+
+ variance_ /= size_;
+ std_ = sqrt(variance_);
+
+ variance_needs_update_ = false;
+ std_needs_update_ = false;
+ }
+
+ return variance_;
+ }
+
+ T standard_deviation()
+ {
+ if (std_needs_update_)
+ variance();
+
+ return std_;
+ }
+
+ T min() { return min_; }
+ T max() { return max_; }
+
+ void sort()
+ {
+ std::vector< T >& data = data_.hook_std_vector_();
+ std::sort(data.begin(), data.end(), compare_values< T >());
+ }
+
+ unsigned nelements() { return size_; }
+
+ T operator[] (const unsigned index)
+ {
+ return data_[index];
+ }
+
+private:
+ void needs_update()
+ {
+ mean_needs_update_ = true;
+ median_needs_update_ = true;
+ variance_needs_update_ = true;
+ std_needs_update_ = true;
+ }
+
+private:
+ bool mean_needs_update_ : 1;
+ bool median_needs_update_ : 1;
+ bool variance_needs_update_ : 1;
+ bool std_needs_update_ : 1;
+ T mean_;
+ T median_;
+ T min_;
+ T max_;
+ T variance_;
+ T std_;
+ unsigned size_;
+ util::array< T > data_;
+};
+
+//---------------------------------------------------------------------------
+// stats
+//---------------------------------------------------------------------------
+
+template< typename T >
+class stats
+{
+public:
+ stats()
+ : clusters_need_update_(true), data_sorted_(false)
+ {
+ }
+
+ stats(const int size)
+ : clusters_need_update_(true), data_sorted_(false), data_(size)
+ {
+ }
+
+ void reset()
+ {
+ data_.reset();
+ std::vector< cluster_stats< T > >& clusters = clusters_.hook_std_vector_();
+ clusters.clear();
+ data_sorted_ = false;
+ clusters_need_update_ = true;
+ }
+
+ void take(const T& value)
+ {
+ data_.take(value);
+ clusters_need_update_ = true;
+ }
+
+ T mean()
+ {
+ return data_.mean();
+ }
+
+ T median()
+ {
+ return data_.median();
+ }
+
+ T variance()
+ {
+ return data_.variance();
+ }
+
+ T standard_deviation()
+ {
+ return data_.standard_deviation();
+ }
+
+ T min() { return data_.min(); }
+ T max() { return data_.max(); }
+
+ unsigned nelements() { return data_.nelements(); }
+
+ util::array< cluster_stats< T > >& clusters()
+ {
+ if (clusters_need_update_)
+ {
+ compute_clusters();
+ clusters_need_update_ = false;
+ }
+
+ return clusters_;
+ }
+
+private:
+ void compute_clusters()
+ {
+ std::vector< unsigned > clusters;
+ unsigned cluster_index = 1;
+
+ clusters.reserve(data_.nelements());
+
+ if (not data_sorted_)
+ {
+ data_.sort();
+ data_sorted_ = true;
+ }
+
+ unsigned i = 0;
+ const unsigned nelements = data_.nelements();
+
+ clusters[0] = cluster_index;
+ const T std = data_.standard_deviation();
+
+ for (i = 1; i < nelements - 1; ++i)
+ {
+ const T left_distance = data_[i] - data_[i - 1];
+ const T right_distance = data_[i + 1] - data_[i];
+
+ if (not ((left_distance <= 2 || left_distance < right_distance)
+ && left_distance <= std))
+ ++cluster_index;
+
+ clusters[i] = cluster_index;
+ }
+
+ if (nelements > 1
+ && data_[i] - data_[i - 1] > std)
+ ++cluster_index;
+
+ clusters[i] = cluster_index;
+
+ clusters_.clear();
+ clusters_.reserve(cluster_index);
+ cluster_index = 1;
+
+ i = 0;
+ while (i < nelements)
+ {
+ unsigned tmp = i;
+
+ while (tmp < nelements && clusters[tmp] == clusters[i])
+ ++tmp;
+
+ cluster_stats< T > cluster(tmp - i);
+
+ tmp = i;
+ while (tmp < nelements && clusters[tmp] == clusters[i])
+ {
+ cluster.take(data_[tmp]);
+ ++tmp;
+ }
+
+ clusters_.append(cluster);
+
+ i = tmp;
+ ++cluster_index;
+ }
+ }
+
+private:
+ bool clusters_need_update_ : 1;
+ bool data_sorted_ : 1;
+ cluster_stats< T > data_;
+ util::array< cluster_stats< T > > clusters_;
+};
+
+#endif /* STATS_HH_ */
diff --git a/scribo/scribo/text/look_like_text_lines.hh b/scribo/scribo/text/look_like_text_lines.hh
index 2ced7ce..80d9995 100644
--- a/scribo/scribo/text/look_like_text_lines.hh
+++ b/scribo/scribo/text/look_like_text_lines.hh
@@ -64,6 +64,22 @@ namespace scribo
inline
bool looks_like_a_text_line(const scribo::line_info<L>& l)
{
+ // Special case for two-letter words
+ if (l.card() == 2)
+ {
+ const float ratio = (float) l.bbox().width() / l.bbox().height();
+
+ if (// Minimal width / height ratio
+ ratio > 0.4f && ratio < 2.0f
+ // Minimal height
+ && l.bbox().height() >= 15
+ // Characters must have approximately the same width
+ && l.chars_same_width())
+ {
+ return true;
+ }
+ }
+
return
l.card() >= 3 // at least 3 components
&& l.bbox().height() > 10 // and minimal height
diff --git a/scribo/scribo/text/merging.hh b/scribo/scribo/text/merging.hh
index 26e2da7..8ee349b 100644
--- a/scribo/scribo/text/merging.hh
+++ b/scribo/scribo/text/merging.hh
@@ -188,21 +188,24 @@ namespace scribo
swap_ordering(l1, l2);
parent[l2] = l1; // The smallest label value is root.
- if (lines(l2).card() > lines(l1).card())
+ line_info<L>& l1_info = lines(l1);
+ line_info<L>& l2_info = lines(l2);
+
+ if (l2_info.card() > l1_info.card())
{
// we transfer data from the largest item to the root one.
- scribo::line_info<L> tmp = lines(l1);
- std::swap(lines(l1), lines(l2));
- lines(l1).fast_merge(tmp);
+ scribo::line_info<L> tmp = l1_info;
+ std::swap(l1_info, l2_info);
+ l1_info.fast_merge(tmp);
// We must set manually the tag for lines(l2) since it is
// not used directly in merge process so its tag cannot be
// updated automatically.
- lines(l2).update_tag(line::Merged);
- lines(l2).set_hidden(true);
+ l2_info.update_tag(line::Merged);
+ l2_info.set_hidden(true);
}
else
- lines(l1).fast_merge(lines(l2));
+ l1_info.fast_merge(l2_info);
// l1's tag is automatically set to line::Needs_Precise_Stats_Update
// l2's tag is automatically set to line::Merged
@@ -226,34 +229,57 @@ namespace scribo
bool between_separators(const scribo::line_info<L>& l1,
const scribo::line_info<L>& l2)
{
- // No separators found in image.
+ // No separators found in image.
mln_precondition(l1.holder().components().has_separators());
- unsigned
- col1 = l1.bbox().pcenter().col(),
- col2 = l2.bbox().pcenter().col();
+ const box2d& l1_bbox = l1.bbox();
+ const box2d& l2_bbox = l2.bbox();
+
+ const unsigned
+ col1 = l1_bbox.pcenter().col(),
+ col2 = l2_bbox.pcenter().col();
const mln_ch_value(L, bool)&
separators = l1.holder().components().separators();
+ // Checking for separators starting from 1 / 4, 3/ 4 and the
+ // center of the box
typedef const bool* sep_ptr_t;
- sep_ptr_t sep_ptr, end;
+ sep_ptr_t sep_ptr, sep_ptr_top, sep_ptr_bottom, end;
if (col1 < col2)
{
- sep_ptr = &separators(l1.bbox().pcenter());
+ const unsigned quarter =
+ ((l1_bbox.pcenter().row() - l1_bbox.pmin().row()) >> 1);
+
+ sep_ptr = &separators(l1_bbox.pcenter());
+ sep_ptr_top = &separators(point2d(l1_bbox.pmin().row() + quarter,
+ l1_bbox.pcenter().col()));
+ sep_ptr_bottom = &separators(point2d(l1_bbox.pmax().row() - quarter,
+ l1_bbox.pcenter().col()));
end = sep_ptr + col2 - col1;
}
else
{
- sep_ptr = &separators(l2.bbox().pcenter());
+ const unsigned quarter =
+ ((l2_bbox.pcenter().row() - l2_bbox.pmin().row()) >> 1);
+
+ sep_ptr = &separators(l2_bbox.pcenter());
+ sep_ptr_top = &separators(point2d(l2_bbox.pmin().row() + quarter,
+ l2_bbox.pcenter().col()));
+ sep_ptr_bottom = &separators(point2d(l2_bbox.pmax().row() - quarter,
+ l2_bbox.pcenter().col()));
end = sep_ptr + col1 - col2;
}
// If sep_ptr is true, then a separator is reached.
- while (!*sep_ptr && sep_ptr != end)
+ while (!*sep_ptr && !*sep_ptr_top && !*sep_ptr_bottom && sep_ptr != end)
+ {
++sep_ptr;
+ ++sep_ptr_top;
+ ++sep_ptr_bottom;
+ }
- return *sep_ptr;
+ return (*sep_ptr || *sep_ptr_top || *sep_ptr_bottom);
}
@@ -266,16 +292,78 @@ namespace scribo
*/
template <typename L>
- bool lines_can_merge(const scribo::line_info<L>& l1,
+ bool lines_can_merge(scribo::line_info<L>& l1,
const scribo::line_info<L>& l2)
{
// Parameters.
- const float x_ratio_max = 1.7, baseline_delta_max = 3;
+ const float x_ratio_max = 1.7f;
+ const float baseline_delta_max =
+ 0.5f * std::min(l1.x_height(), l2.x_height());
+
+ const box2d& l1_bbox = l1.bbox();
+ const box2d& l2_bbox = l2.bbox();
+
+ const point2d& l1_pmin = l1_bbox.pmin();
+ const point2d& l2_pmin = l2_bbox.pmin();
+ const point2d& l1_pmax = l1_bbox.pmax();
+ const point2d& l2_pmax = l2_bbox.pmax();
+
+ const bool l1_has_separators = l1.holder().components().has_separators();
+ const bool l1_l2_between_separators = (l1_has_separators) ?
+ between_separators(l1, l2) : false;
+ const float l_ted_cw = l2.char_width();
+
+ const float dx = std::max(l1_pmin.col(), l2_pmin.col())
+ - std::min(l1_pmax.col(), l2_pmax.col());
+ const float dy = std::max(l1_pmin.row(), l2_pmin.row())
+ - std::min(l1_pmax.row(), l2_pmax.row());
+
+ // Particular case of "
+ {
+ if (// Must have 2 characters
+ (l1.card() == 2
+ // The box height must be smaller than the touched line x height
+ && l1_bbox.height() < l2.x_height())
+ // The line must be vertically and horizontally close to
+ // the touched line
+ && (dx < l_ted_cw && dy < 0)
+ // No separator between the two lines
+ && not (l1_l2_between_separators))
+ {
+ // Line is then considered as punctuation
+ l1.update_type(line::Punctuation);
+ return true;
+ }
+ }
+
+ // Particular case like merging between a line and [5]
+ {
+ const mln::def::coord
+ top_row_l2 = l2_pmin.row(),
+ top_row_l1 = l1_pmin.row(),
+ bot_row = l2_pmax.row();
+ const float x1 = l1.x_height(), x2 = l2.x_height();
+ const float x_ratio = std::max(x1, x2) / std::min(x1, x2);
+
+ if (// No separator
+ !l1_l2_between_separators
+ // The x height ration must be lower than 2
+ && (x_ratio < 2.0f)
+ // Baseline alignment
+ && (std::abs(bot_row - l1.baseline()) < baseline_delta_max)
+ // The top of the boxes must be aligned
+ && (std::abs(top_row_l2 - top_row_l1) < 5)
+ // Distance between the line and the touched line.
+ && dx < 5.0f * l_ted_cw)
+ {
+ return true;
+ }
+ }
// Similarity of x_height.
{
- float x1 = l1.x_height(), x2 = l2.x_height();
- float x_ratio = std::max(x1, x2) / std::min(x1, x2);
+ const float x1 = l1.x_height(), x2 = l2.x_height();
+ const float x_ratio = std::max(x1, x2) / std::min(x1, x2);
if (x_ratio > x_ratio_max)
return false;
}
@@ -287,22 +375,21 @@ namespace scribo
}
// left / right
- unsigned
- col1 = l1.bbox().pcenter().col(),
- col2 = l2.bbox().pcenter().col();
+ const unsigned
+ col1 = l1_bbox.pcenter().col(),
+ col2 = l2_bbox.pcenter().col();
if (col1 < col2)
{
- if ((col1 + l1.bbox().width() / 4) >= (col2 - l2.bbox().width() / 4))
+ if ((col1 + l1_bbox.width() / 4) >= (col2 - l2_bbox.width() / 4))
return false;
}
else
- if ((col2 + l2.bbox().width() / 4) >= (col1 - l1.bbox().width() / 4))
+ if ((col2 + l2_bbox.width() / 4) >= (col1 - l1_bbox.width() / 4))
return false;
-
// Check that there is no separator in between.
- if (l1.holder().components().has_separators())
- return ! between_separators(l1, l2);
+ if (l1_has_separators)
+ return ! l1_l2_between_separators;
return true;
}
@@ -310,14 +397,14 @@ namespace scribo
- template <typename L>
- int horizontal_distance(const scribo::line_info<L>& l1,
- const scribo::line_info<L>& l2)
+ inline
+ int horizontal_distance(const box2d& l1,
+ const box2d& l2)
{
- if (l1.bbox().pcenter().col() < l2.bbox().pcenter().col())
- return l2.bbox().pmin().col() - l1.bbox().pmax().col();
+ if (l1.pcenter().col() < l2.pcenter().col())
+ return l2.pmin().col() - l1.pmax().col();
else
- return l1.bbox().pmin().col() - l2.bbox().pmax().col();
+ return l1.pmin().col() - l2.pmax().col();
}
@@ -339,7 +426,7 @@ namespace scribo
*/
template <typename L>
- bool non_text_and_text_can_merge(const scribo::line_info<L>& l_cur, // current
+ bool non_text_and_text_can_merge(scribo::line_info<L>& l_cur, // current
const scribo::line_info<L>& l_ted) // touched
{
if (l_cur.type() == line::Text || l_ted.type() != line::Text)
@@ -353,22 +440,42 @@ namespace scribo
&& between_separators(l_cur, l_ted))
return false;
+ const box2d& l_cur_bbox = l_cur.bbox();
+ const box2d& l_ted_bbox = l_ted.bbox();
+
+ const point2d& l_cur_pmin = l_cur_bbox.pmin();
+ const point2d& l_ted_pmin = l_ted_bbox.pmin();
+ const point2d& l_cur_pmax = l_cur_bbox.pmax();
+ const point2d& l_ted_pmax = l_ted_bbox.pmax();
+
+ const float dx = std::max(l_cur_pmin.col(), l_ted_pmin.col())
+ - std::min(l_cur_pmax.col(), l_ted_pmax.col());
+ const float l_ted_cw = l_ted.char_width();
+ const float l_ted_x_height = l_ted.x_height();
+
+ const unsigned l_cur_height = l_cur_bbox.height();
+ const unsigned l_cur_width = l_cur_bbox.width();
// General case (for tiny components like --> ',:."; <--):
- if (l_cur.bbox().height() < l_ted.x_height()
- && float(l_cur.bbox().width()) / float(l_cur.card()) < l_ted.char_width())
+ if (l_cur_height < l_ted_x_height
+ && l_cur_height > 0.05f * l_ted_x_height
+ && float(l_cur_width) / float(l_cur.card()) < l_ted.char_width()
+ && dx < l_ted_cw)
+ {
+ l_cur.update_type(line::Punctuation);
return true;
-
+ }
// Special case for '---':
if (// small height:
- l_cur.bbox().height() < l_ted.x_height()
+ l_cur_height < l_ted_x_height
// // not so long width:
- && l_cur.bbox().width() < 5 * l_ted.char_width()
+ && l_cur_width > 0.8 * l_ted_cw
+ && l_cur_width < 5 * l_ted_cw
// align with the 'x' center:
&& std::abs((l_ted.baseline() + l_ted.meanline()) / 2 - l_cur.bbox().pcenter().row()) < 7
// tiny spacing:
- && unsigned(horizontal_distance(l_cur, l_ted)) < l_ted.char_width()
+ && unsigned(horizontal_distance(l_cur_bbox, l_ted_bbox)) < 2 * l_ted_cw
)
{
return true;
@@ -378,10 +485,11 @@ namespace scribo
// Special case
// Looking for alignement.
- mln::def::coord
+ const mln::def::coord
top_row = l_cur.bbox().pmin().row(),
bot_row = l_cur.bbox().pmax().row();
+ const box2d& l_ted_ebbox = l_ted.ebbox();
// std::cout << "top_row = " << top_row << " - bot_row = " << bot_row << std::endl;
// std::cout << std::abs(bot_row - l_ted.baseline())
@@ -402,10 +510,11 @@ namespace scribo
// << std::endl;
if ((std::abs(bot_row - l_ted.baseline()) < 5
- || std::abs(bot_row - l_ted.ebbox().pmax().row()) < 5)
+ || std::abs(bot_row - l_ted_ebbox.pmax().row()) < 5)
&&
(std::abs(top_row - l_ted.meanline()) < 5
- || std::abs(top_row - l_ted.ebbox().pmin().row()) < 5))
+ || std::abs(top_row - l_ted_ebbox.pmin().row()) < 5)
+ && dx < 5.0f * l_ted_cw)
{
return true;
}
@@ -536,35 +645,33 @@ namespace scribo
if (parent[l] != l) // not a root, so has already merged, thus ignore it
continue;
- box2d b = lines(l).bbox();
+ const box2d& b = lines(l).bbox();
- unsigned tl, tr, ml, mc, mr, bl, br;
+ // unsigned tl, tr, ml, mc, mr, bl, br;
+
+ const box2d& b_ = lines(l).ebbox();
+
+ /*
+ tl tr
+ x---------------x
+ | |
+ | mc |
+ ml x x x mr
+ | |
+ | |
+ x---------------x
+ bl br
+
+ */
- {
- box2d b_ = lines(l).ebbox();
-
- /*
- tl tr
- x---------------x
- | |
- | mc |
- ml x x x mr
- | |
- | |
- x---------------x
- bl br
-
- */
-
-
- tl = billboard(b_.pmin());
- tr = billboard.at_(b_.pmin().row(), b_.pmax().col());
- ml = billboard.at_(b_.pcenter().row(), b_.pmin().col());
- mc = billboard.at_(b_.pcenter().row(), b_.pcenter().col());
- mr = billboard.at_(b_.pcenter().row(), b_.pmax().col());
- bl = billboard.at_(b_.pmax().row(), b_.pmin().col());
- br = billboard(b_.pmax());
- }
+
+ const unsigned tl = billboard(b_.pmin());
+ const unsigned tr = billboard.at_(b_.pmin().row(), b_.pmax().col());
+ const unsigned ml = billboard.at_(b_.pcenter().row(), b_.pmin().col());
+ const unsigned mc = billboard.at_(b_.pcenter().row(), b_.pcenter().col());
+ const unsigned mr = billboard.at_(b_.pcenter().row(), b_.pmax().col());
+ const unsigned bl = billboard.at_(b_.pmax().row(), b_.pmin().col());
+ const unsigned br = billboard(b_.pmax());
typedef std::set<unsigned> set_t;
std::set<unsigned> labels;
@@ -596,11 +703,47 @@ namespace scribo
{
// Main case: it is an "included" box (falling in an already drawn box)
- if (lines(l).type() == line::Text) // the current object IS a text line
+ const line_info<L>& l_info = lines(l);
+ const line_info<L>& mc_info = lines(mc);
+
+ if (l_info.type() == line::Text) // the current object IS a text line
{
- if (lines(mc).type() == line::Text) // included in a text line => weird
+ if (mc_info.type() == line::Text) // included in a text line => weird
{
++count_txtline_IN_txtline;
+
+ // Particular case of "
+ // {
+ // if ((lines(l).card() == 2 &&
+ // lines(l).bbox().height() < lines(mc).x_height()) &&
+ // not (lines(l).holder().components().has_separators()
+ // && between_separators(lines(l),
+ // lines(mc))))
+
+ const box2d& l_bbox = l_info.bbox();
+ const box2d& mc_bbox = mc_info.bbox();
+
+ const point2d& l_pmin = l_bbox.pmin();
+ const point2d& mc_pmin = mc_bbox.pmin();
+ const point2d& l_pmax = l_bbox.pmax();
+ const point2d& mc_pmax = mc_bbox.pmax();
+
+ const float dx = std::max(l_pmin.col(), mc_pmin.col())
+ - std::min(l_pmax.col(), mc_pmax.col());
+ const float dy = std::max(l_pmin.row(), mc_pmin.row())
+ - std::min(l_pmax.row(), mc_pmax.row());
+ const float l_ted_cw = mc_info.char_width();
+
+ // We accept a line included into another only if it
+ // is horizontally close to the line's bbox and
+ // vertically aligned
+ // Obviously no separators between the two lines
+ if (dx < l_ted_cw && dy < 0
+ && not (l_info.holder().components().has_separators()
+ && between_separators(l_info, mc_info)))
+ l = do_union(lines, l, mc, parent);
+ // }
+
// std::cout << "weird: inclusion of a txt_line in a txt_line!" << std::endl;
/// Merge is perform if the current line is a
@@ -649,15 +792,15 @@ namespace scribo
// could be noise or garbage... So adding new
// criterions could fix this issue.
//
- //if (!non_text_and_text_can_merge(lines(l), lines(mc)))
- // continue;
+ if (!non_text_and_text_can_merge(lines(l), lines(mc)))
+ continue;
// Avoid the case when a large title ebbox overlap
// with a text column. In that case, the title may
// merge with punctuation from the text.
- if (lines(l).holder().components().has_separators()
- && between_separators(lines(l), lines(mc)))
- continue;
+ // if (lines(l).holder().components().has_separators()
+ // && between_separators(lines(l), lines(mc)))
+ // continue;
// Mark current line as punctuation.
lines(l).update_type(line::Punctuation);
@@ -832,9 +975,12 @@ namespace scribo
bool operator()(const scribo::line_id_t& l1, const scribo::line_id_t& l2) const
{
- if (lines_(l1).bbox().nsites() == lines_(l2).bbox().nsites())
+ const unsigned l1_nsites = lines_(l1).bbox().nsites();
+ const unsigned l2_nsites = lines_(l2).bbox().nsites();
+
+ if (l1_nsites == l2_nsites)
return l1 < l2;
- return lines_(l1).bbox().nsites() < lines_(l2).bbox().nsites();
+ return l1_nsites < l2_nsites;
}
scribo::line_set<L> lines_;
diff --git a/scribo/scribo/text/paragraphs.hh b/scribo/scribo/text/paragraphs.hh
new file mode 100644
index 0000000..a3ff802
--- /dev/null
+++ b/scribo/scribo/text/paragraphs.hh
@@ -0,0 +1,1027 @@
+#include <mln/util/array.hh>
+#include <mln/accu/shape/bbox.hh>
+#include <mln/core/image/image2d.hh>
+#include <mln/core/alias/neighb2d.hh>
+#include <mln/draw/box.hh>
+#include <mln/data/convert.hh>
+#include <mln/value/int_u16.hh>
+#include <mln/value/label_16.hh>
+#include <mln/value/int_u8.hh>
+#include <mln/value/rgb8.hh>
+#include <mln/io/ppm/save.hh>
+#include <mln/io/pgm/save.hh>
+#include <mln/geom/rotate.hh>
+#include <mln/literal/colors.hh>
+
+#include <scribo/core/macros.hh>
+#include <scribo/core/line_set.hh>
+#include <scribo/core/line_info.hh>
+
+using namespace mln;
+
+namespace scribo
+{
+
+ namespace internal
+ {
+
+//-------------------------------------
+// Extracting root of links
+//-------------------------------------
+ template <typename T>
+ inline
+ unsigned
+ find_root(util::array<T>& parent, unsigned x)
+ {
+ unsigned tmp_x = x;
+
+ while (parent(tmp_x) != tmp_x)
+ tmp_x = parent(tmp_x);
+
+ while (parent(x) != x)
+ {
+ const unsigned tmp = parent(x);
+ x = parent(x);
+ parent(tmp) = tmp_x;
+ }
+
+ return x;
+ }
+ }
+
+ namespace filter
+ {
+
+//---------------------------------------------------------------------
+// This method aims to cut the links between lines that do not fit the
+// different criteria
+//---------------------------------------------------------------------
+
+ template <typename L>
+ inline
+ void paragraph_links(const util::array<value::int_u16>& left,
+ const util::array<value::int_u16>& right,
+ util::array<value::int_u16>& output,
+ const util::array< line_info<L> >& lines,
+ const image2d<bool>& input)
+ {
+ output = left;
+
+ const unsigned nlines = lines.nelements();
+
+ // image2d<value::rgb8> links = data::convert(value::rgb8(), input);
+ // for (unsigned l = 0; l < nlines; ++l)
+ // {
+ // mln::draw::line(links, lines(l).bbox().pcenter(), lines(left(l)).bbox().pcenter(), literal::red);
+ // }
+ // mln::io::ppm::save(links, "out_links.ppm");
+
+ // For each line
+ for (unsigned l = 0; l < nlines; ++l)
+ {
+ // Neighbors
+
+ const value::int_u16 left_nbh = output(l);
+ const value::int_u16 right_nbh = right(l);
+ const value::int_u16 lol_nbh = output(left_nbh);
+
+ // Line features
+ const float x_height = lines(l).x_height();
+ const float left_x_height = lines(left_nbh).x_height();
+ const float right_x_height = lines(right_nbh).x_height();
+
+ const box2d& left_line_bbox = lines(left_nbh).bbox();
+ const box2d& current_line_bbox = lines(l).bbox();
+ const box2d& right_line_bbox = lines(right_nbh).bbox();
+ const box2d& lol_line_bbox = lines(lol_nbh).bbox(); // lol : left neighbor of the left neighbor
+
+ const int lline_col_min = left_line_bbox.pmin().col();
+ const int cline_col_min = current_line_bbox.pmin().col();
+ const int rline_col_min = right_line_bbox.pmin().col();
+ const int lolline_col_min = lol_line_bbox.pmin().col();
+
+ const int lline_col_max = left_line_bbox.pmax().col();
+ const int cline_col_max = current_line_bbox.pmax().col();
+ const int rline_col_max = right_line_bbox.pmax().col();
+
+ const int lline_cw = lines(left_nbh).char_width();
+ const int cline_cw = lines(l).char_width();
+ const int rline_cw = lines(right_nbh).char_width();
+ // Maximal x variation to consider two lines vertically aligned
+ const int delta_alignment = cline_cw;
+
+ // Checks the baseline distances of the two neighbors
+ {
+ // Current line baseline
+ const int c_baseline = lines(l).baseline();
+
+ // Baseline distance with the left and right neighbors
+ const int lc_baseline = lines(left_nbh).baseline() - c_baseline;
+ const int rc_baseline = c_baseline -lines(right_nbh).baseline();
+
+ // Max baseline distance between the two neighbors
+ // const float delta_baseline_max = std::max(lc_baseline, rc_baseline);
+ // const float delta_baseline_min = std::min(lc_baseline,
+ // rc_baseline);
+
+ // Only two lines, meaning the current line has only one neighbor
+ bool two_lines = false;
+
+ // If the current line has no left neighbor
+ if (lc_baseline == 0)
+ {
+ // ror : right neighbor of the right neighbor
+ const value::int_u16 ror_nbh = right(right_nbh);
+ const box2d& ror_line_bbox = lines(ror_nbh).bbox();
+
+ // If the current line has a ror
+ if (ror_nbh != right_nbh
+ && output(ror_nbh) == right_nbh)
+ {
+ // Distance between the current line and the right neighbor
+ const float right_distance =
+ current_line_bbox.pcenter().row() - right_line_bbox.pcenter().row();
+ // Distance between the right neighbor and the ror
+ const float ror_distance =
+ right_line_bbox.pcenter().row() - ror_line_bbox.pcenter().row();
+ // ror x_height
+ const float ror_x_height = lines(ror_nbh).x_height();
+
+ // Conditions to cut the link between the current line
+ // and its right neighbor
+ if (right_distance > 1.4f * ror_distance
+ && std::max(ror_x_height, right_x_height) <
+ 1.2f * std::min(ror_x_height, right_x_height)
+ && output(right_nbh) == l)
+ {
+ output(right_nbh) = right_nbh;
+ continue;
+ }
+ }
+ // Otherwise we only have a group of two lines
+ else
+ {
+ // We determine the distance between the two lines
+ const float distance = lines(l).meanline() - lines(right_nbh).baseline();
+ two_lines = true;
+
+ // If the distance between the two lines is greater than
+ // the minimum x height of the two lines then we cut the
+ // link between them
+ if (distance > std::min(x_height, right_x_height)
+ && output(right_nbh) == l)
+ {
+ output(right_nbh) = right_nbh;
+ continue;
+ }
+ }
+
+ // Lines features
+ const float min_x_height = std::min(x_height, right_x_height);
+ const float max_x_height = std::max(x_height, right_x_height);
+ const float min_char_width = std::min(rline_cw, cline_cw);
+ const float max_char_width = std::max(rline_cw, cline_cw);
+
+ // Condition to cut the link between the current line and
+ // its right neighbor
+ if ((max_x_height > min_x_height * 1.2f) &&
+ !(max_char_width <= 1.2f * min_char_width))
+ {
+ if (output(right_nbh) == l)
+ {
+ output(right_nbh) = right_nbh;
+ continue;
+ }
+ }
+
+ // If we only have two lines we stop the study
+ if (two_lines)
+ continue;
+ }
+ // If the current line has no right neighbor
+ else if (rc_baseline == 0)
+ {
+ // lol : left neighbor of the left neighbor
+
+ // If the left neighbor of the current line has a left neighbor
+ if (lol_nbh != left_nbh)
+ {
+ // Distance between the current line and its left neighbor
+ const float left_distance =
+ left_line_bbox.pcenter().row() - current_line_bbox.pcenter().row();
+ // Distance between the left neighbor and the left
+ // neighbor of its left neighbor
+ const float lol_distance =
+ lol_line_bbox.pcenter().row() - left_line_bbox.pcenter().row();
+ // lol x height
+ const float lol_x_height = lines(lol_nbh).x_height();
+
+ // Conditions to cut the link between the current line
+ // and its left neighbor
+ if (left_distance > 1.4f * lol_distance
+ && std::max(lol_x_height, left_x_height) <
+ 1.2f * std::min(lol_x_height, left_x_height))
+ {
+ output(l) = l;
+ continue;
+ }
+ }
+ // Otherwise we only have a group of two lines
+ else
+ {
+ // Distance between the current line and it left neighbor
+ const float distance = lines(left_nbh).meanline() -
+ lines(l).baseline();
+
+ two_lines = true;
+
+ // If the distance is greater than the min x height
+ // between the two lines
+ if (distance > std::min(x_height, left_x_height))
+ {
+ output(l) = l;
+ continue;
+ }
+ }
+
+ // Lines features
+ const float min_x_height = std::min(x_height, left_x_height);
+ const float max_x_height = std::max(x_height, left_x_height);
+ const float min_char_width = std::min(lline_cw, cline_cw);
+ const float max_char_width = std::max(lline_cw, cline_cw);
+
+ // Condition to cut the link between the current line and
+ // its left neighbor
+ if ((max_x_height > min_x_height * 1.2f) &&
+ !(max_char_width <= 1.2f * min_char_width))
+ {
+ output(l) = l;
+ continue;
+ }
+
+ // If we only have two lines we stop the study
+ if (two_lines)
+ continue;
+ }
+ // The current line has at least one left and one right neighbor
+ else // if (delta_baseline_max >= delta_baseline_min)
+ {
+ // Distance between the left and the current line
+ const float left_distance =
+ lines(left_nbh).meanline() - lines(l).baseline();
+ // Distance between the right and the current line
+ const float right_distance =
+ lines(l).meanline() - lines(right_nbh).baseline();
+
+ // If the left line is too far compared to the right one
+ // we cut the link with it
+ if (left_distance > 1.2f * right_distance
+ && std::max(x_height, left_x_height) > 1.2f * std::min(x_height, left_x_height))
+ {
+ output(l) = l;
+ continue;
+ }
+ // If the right line is too far compared to the left one
+ // we cut the link with it
+ else if (right_distance > 1.2f * left_distance
+ && std::max(x_height, right_x_height) > 1.2f * std::min(x_height, right_x_height)
+ && output(right_nbh) == l)
+ {
+ output(right_nbh) = right_nbh;
+ continue;
+ }
+
+ // If the distance between the baseline of the left
+ // neighbor and the baseline of the current line is
+ // greater than the one between the current line baseline
+ // and the right line baseline we have to study the texte
+ // features of the right and left lines
+ if (lc_baseline > rc_baseline)
+ {
+ const float cw_max = std::max(lline_cw, cline_cw);
+ const float cw_min = std::min(lline_cw, cline_cw);
+ const float min_x_height = std::min(x_height, left_x_height);
+ const float max_x_height = std::max(x_height, left_x_height);
+
+ if ((max_x_height > min_x_height * 1.2f) &&
+ !(cw_max <= 1.2f * cw_min))
+ {
+ output(l) = l;
+ continue;
+ }
+
+ {
+ const float min_x_height = std::min(x_height, right_x_height);
+ const float max_x_height = std::max(x_height, right_x_height);
+ const float cw_max = std::max(rline_cw, cline_cw);
+ const float cw_min = std::min(rline_cw, cline_cw);
+
+ if ((max_x_height > min_x_height * 1.2f)
+ && !(cw_max <= 1.2f * cw_min)
+ && output(right_nbh) == l)
+ {
+ output(right_nbh) = right_nbh;
+ continue;
+ }
+ }
+ }
+ else
+ {
+ const float cw_max = std::max(rline_cw, cline_cw);
+ const float cw_min = std::min(rline_cw, cline_cw);
+ const float min_x_height = std::min(x_height, right_x_height);
+ const float max_x_height = std::max(x_height, right_x_height);
+
+ if ((max_x_height > min_x_height * 1.2f)
+ && !(cw_max <= 1.2f * cw_min)
+ && output(right_nbh) == l)
+ {
+ output(right_nbh) = right_nbh;
+ continue;
+ }
+
+ {
+ const float min_x_height = std::min(x_height, left_x_height);
+ const float max_x_height = std::max(x_height, left_x_height);
+ const float cw_max = std::max(lline_cw, cline_cw);
+ const float cw_min = std::min(lline_cw, cline_cw);
+
+ if ((max_x_height > min_x_height * 1.2f)
+ && !(cw_max <= 1.2f * cw_min))
+ {
+ output(l) = l;
+ continue;
+ }
+ }
+ }
+ }
+ }
+
+ // If we arrive here, it means than the lines in the
+ // neighborhood of the current line are quite similar. We can
+ // then begin to study the indentations in order to determine
+ // the beginning of new paragraphs
+
+//-----------------------------------------------------------------------------
+// ___________________________
+// |___________________________|
+// ________________________
+// |________________________|
+// ___________________________
+// |___________________________|
+// ___________________________
+// |___________________________|
+//
+// Simple case : paragraphs are justified on the left. We try to find any
+// indentation like above.
+//
+//-----------------------------------------------------------------------------
+
+ {
+ // Check if the current line neighbors are aligned
+ bool left_right_aligned = false;
+ bool left_lol_aligned = false;
+ const int dx_lr = std::abs(lline_col_min - rline_col_min);
+ const int dx_llol = std::abs(lline_col_min - lolline_col_min);
+
+ if (dx_lr < delta_alignment)
+ left_right_aligned = true;
+
+ if (dx_llol < delta_alignment)
+ left_lol_aligned = true;
+
+ if (left_right_aligned && left_lol_aligned)
+ {
+ const int left_right_col_min = std::min(lline_col_min, rline_col_min);
+ const int dx_lrc = std::abs(left_right_col_min - cline_col_min);
+ const float l_char_width = 1.5f * lines(l).char_width();
+
+ if (dx_lrc > l_char_width &&
+ dx_lrc < 3.0f * l_char_width &&
+ cline_col_min > rline_col_min &&
+ cline_col_min > lline_col_min)
+ {
+ output(right_nbh) = right_nbh;
+ continue;
+ }
+ }
+ }
+
+//-----------------------------------------------------------------------------
+// ___________________________
+// |___________________________|
+// ___________________
+// |___________________| End of the paragraph - Current line
+// ________________________
+// |________________________| Beginning of a new one
+// ___________________________
+// |___________________________| Left of left of current line
+//
+// End of paragraph case : we try to find an end to the current paragraph
+//
+//-----------------------------------------------------------------------------
+
+ {
+ // Check if the current line neighbors are aligned
+ bool left_right_max_aligned = false;
+ bool left_current_min_aligned = false;
+ bool lol_current_min_aligned = false;
+ const bool lol_is_left = output(left_nbh) == left_nbh;
+ const int dx_lr_max = std::abs(lline_col_max - rline_col_max);
+ const int dx_lc_min = std::abs(lline_col_min - cline_col_min);
+ const int dx_lolc_min = std::abs(lolline_col_min - cline_col_min);
+
+ if (dx_lr_max < delta_alignment)
+ left_right_max_aligned = true;
+
+ if (dx_lc_min < delta_alignment)
+ left_current_min_aligned = true;
+
+ if (dx_lolc_min < delta_alignment)
+ lol_current_min_aligned = true;
+
+ if (!left_current_min_aligned && left_right_max_aligned &&
+ (lol_current_min_aligned || lol_is_left))
+ {
+ const int dx_lrc = std::abs(lline_col_max - cline_col_max);
+ const int l_char_width = lines(l).char_width();
+
+ if (dx_lrc > l_char_width &&
+ cline_col_max < lline_col_max &&
+ cline_col_min < lline_col_min &&
+ (lline_col_min > lolline_col_min || lol_is_left))
+ {
+ output(l) = l;
+ continue;
+ }
+ }
+ }
+
+
+//-----------------------------------------------------------------------------
+// ___________________________
+// |___________________________|
+// ___________________________
+// |___________________________|
+// ________________________
+// |________________________|
+//
+// Simple case : paragraphs are justified on the left. We try to find any
+// indentation like above at the end of a column.
+//
+//-----------------------------------------------------------------------------
+
+ if (left_nbh == l)
+ {
+ const value::int_u16 ror_nbh = right(right_nbh);
+ const box2d& ror_line_bbox = lines(ror_nbh).bbox();
+ const int rorline_col_min = ror_line_bbox.pmin().col();
+
+ bool right_ror_min_aligned = false;
+ const int dx_rror_min = std::abs(rline_col_min - rorline_col_min);
+
+ if (dx_rror_min < delta_alignment)
+ right_ror_min_aligned = true;
+
+ if (right_ror_min_aligned)
+ {
+ const int right_ror_col_min = std::min(rline_col_min, rorline_col_min);
+ const int dx_rrorc = std::abs(right_ror_col_min - cline_col_min);
+ const float l_char_width = 1.5f * lines(l).char_width();
+
+ if (dx_rrorc > l_char_width &&
+ dx_rrorc < 3.0f * l_char_width &&
+ cline_col_min > rline_col_min &&
+ cline_col_max >= rline_col_max)
+ {
+ output(right_nbh) = right_nbh;
+ continue;
+ }
+ }
+ }
+ }
+
+
+ // Only debug
+
+ {
+ image2d<value::rgb8> debug = data::convert(value::rgb8(), input);
+
+ for (unsigned i = 0; i < output.nelements(); ++i)
+ output(i) = internal::find_root(output, i);
+
+ mln::util::array<accu::shape::bbox<point2d> > nbbox(output.nelements());
+ for (unsigned i = 0; i < nlines; ++i)
+ {
+ // if (lines(i).is_textline())
+ // {
+ // mln::draw::box(debug, lines(i).bbox(), literal::red);
+ nbbox(output(i)).take(lines(i).bbox());
+ // }
+ }
+
+ for (unsigned i = 0; i < nbbox.nelements(); ++i)
+ if (nbbox(i).is_valid())
+ {
+ box2d b = nbbox(i).to_result();
+ mln::draw::box(debug, b, literal::orange);
+ b.enlarge(1);
+ mln::draw::box(debug, b, literal::orange);
+ b.enlarge(1);
+ mln::draw::box(debug, b, literal::orange);
+ }
+
+ mln::io::ppm::save(debug, "out_paragraph.ppm");
+ }
+
+ }
+ }
+
+//-------------------------------------------------------------
+// Preparation of the lines before linking them.
+// For each line we draw the top and the bottom of it.
+// Assuming than i is the number of the line. Then the top of the line
+// will be affected with the value 2 * i in the block image and the
+// bottom with 2 * i + 1.
+//
+//-------------------------------------------------------------
+
+ template <typename L>
+ inline
+ void prepare_lines(const box2d& domain,
+ const util::array< line_info<L> >& lines,
+ image2d<value::int_u16>& blocks,
+ util::array<box2d>& rbbox)
+ {
+ std::map< int, std::vector< const box2d* > > drawn_lines;
+ const unsigned nlines = lines.nelements();
+
+ // For each line
+ for (unsigned l = 0; l < nlines; ++l)
+ {
+ // Rotation of the bounding box
+ box2d b = geom::rotate(lines(l).bbox(), -90, domain.pcenter());
+ rbbox.append(b);
+
+ const unsigned index = l + 1;
+ const unsigned even_index = 2 * index;
+ const unsigned odd_index = even_index + 1;
+
+ // Top of the line
+ {
+ bool not_finished = true;
+ int col_offset = 0;
+
+ while (not_finished)
+ {
+ // Looking for a column in the image to draw the top of the
+ // line
+
+ const int col = b.pmax().col() + col_offset;
+ std::map< int, std::vector< const box2d* > >::iterator it
+ = drawn_lines.find(col);
+
+ if (it != drawn_lines.end())
+ {
+ const std::vector< const box2d* >& lines = (*it).second;
+ const unsigned nb_lines = lines.size();
+ unsigned i = 0;
+
+ for (i = 0; i < nb_lines; ++i)
+ {
+ const box2d* box = lines[i];
+ const int min_row = std::max(b.pmin().row(), box->pmin().row());
+ const int max_row = std::min(b.pmax().row(), box->pmax().row());
+
+ if (min_row - max_row <= 0)
+ break;
+ }
+
+ if (i == nb_lines)
+ {
+ mln::draw::line(blocks, point2d(b.pmin().row(), col),
+ point2d(b.pmax().row(), col), even_index);
+ not_finished = false;
+ drawn_lines[col].push_back(&(rbbox[l]));
+ }
+ else
+ ++col_offset;
+ }
+ else
+ {
+ mln::draw::line(blocks, point2d(b.pmin().row(), col),
+ point2d(b.pmax().row(), col), even_index);
+ not_finished = false;
+ drawn_lines[col].push_back(&(rbbox[l]));
+ }
+ }
+ }
+
+ // Bottom of the line
+ {
+ bool not_finished = true;
+ int col_offset = 0;
+
+ while (not_finished)
+ {
+ // Looking for a column in the image to draw the bottom of
+ // the line
+
+ const int col = b.pmin().col() - col_offset;
+ std::map< int, std::vector< const box2d* > >::iterator it
+ = drawn_lines.find(col);
+
+ if (it != drawn_lines.end())
+ {
+ const std::vector< const box2d* >& lines = (*it).second;
+ const unsigned nb_lines = lines.size();
+ unsigned i = 0;
+
+ for (i = 0; i < nb_lines; ++i)
+ {
+ const box2d* box = lines[i];
+ const int min_row = std::max(b.pmin().row(), box->pmin().row());
+ const int max_row = std::min(b.pmax().row(), box->pmax().row());
+
+ if (min_row - max_row <= 0)
+ break;
+ }
+
+ if (i == nb_lines)
+ {
+ mln::draw::line(blocks, point2d(b.pmin().row(), col),
+ point2d(b.pmax().row(), col), odd_index);
+ not_finished = false;
+ drawn_lines[col].push_back(&(rbbox[l]));
+ }
+ else
+ ++col_offset;
+ }
+ else
+ {
+ mln::draw::line(blocks, point2d(b.pmin().row(), col),
+ point2d(b.pmax().row(), col), odd_index);
+ not_finished = false;
+ drawn_lines[col].push_back(&(rbbox[l]));
+ }
+ }
+ }
+ }
+ }
+
+ template <typename L>
+ inline
+ void
+ process_left_link(image2d<value::int_u16>& blocks,
+ const util::array<box2d>& rbbox,
+ const util::array< line_info<L> >& lines,
+ util::array<value::int_u16>& left)
+ {
+ typedef value::int_u16 V;
+
+ // At the beginning each line is its own neighbor
+ for (unsigned i = 0; i < left.nelements(); ++i)
+ left(i) = i;
+
+ const unsigned nlines = lines.nelements();
+
+ // For each line
+ for (unsigned i = 0; i < nlines; ++i)
+ {
+ // Max distance for the line search
+ int dmax = 1.5f * lines(i).x_height();
+
+ // Starting points in the current line box
+ point2d c = rbbox(i).pcenter();
+ point2d q(rbbox(i).pmin().row() + ((c.row() - rbbox(i).pmin().row()) / 4), c.col());
+
+ int
+ midcol = (rbbox(i).pmax().col()
+ - rbbox(i).pmin().col()) / 2;
+
+ // Left
+ {
+ // marge gauche
+ int
+ nleftima = c.col() - blocks.domain().pmin().col(),
+ // Distance gauche
+ nleft = std::min(nleftima, midcol + dmax);
+
+ V
+ // Starting points in the box
+ *p = &blocks(c),
+ *p2 = &blocks(q),
+ // End of search
+ *pstop = p - nleft - 1,
+ // Line neighbor
+ *nbh_p = 0;
+
+ // While we haven't found a neighbor or reached the limit
+ for (; p != pstop; --p, --p2)
+ {
+ if (*p2 != literal::zero // Not the background
+ && ((*p2 % 2) == 0) // Looking for the bottom of a line
+ && left((*p2 >> 1) - 1) != i) // No loops
+ {
+ // Neightbor found, we stop the research
+ nbh_p = p2;
+ break;
+ }
+
+ if (*p != literal::zero // Not the background
+ && ((*p % 2) == 0) // Looking for the bottom of a line
+ && left((*p >> 1) - 1) != i) // No loops
+ {
+ // Neightbor found, we stop the research
+ nbh_p = p;
+ break;
+ }
+ }
+
+ // If a neighbor was found, then we have found the top of the
+ // line. We are then looking for the bottom of the encountered
+ // line. If during the search process we find a complete line
+ // included in the touched line, this line is considered as
+ // the neighbor under certain conditions (see below)
+
+ //---------------------------------------------------------------
+ // _________________________ |
+ // |_________________________| => Current line | Search direction
+ // v
+ // => First encountered top line
+ // __________________________________________________ 2Q
+ // | Q |
+ // | _________________________ |2P
+ // | |_____________P___________| => Second top |2P + 1
+ // | line |
+ // |__________________________________________________|2Q + 1
+ //
+ //
+ //---------------------------------------------------------------
+
+ if (nbh_p)
+ {
+ std::vector<V> lines_nbh;
+ const V end_p = *nbh_p + 1;
+ const V* nbh_p_copy = nbh_p;
+
+ for (; *nbh_p != end_p; --nbh_p)
+ {
+ if ((*nbh_p) != literal::zero) // Not the background
+ {
+ if ((*nbh_p) % 2 == 0)// We have found the top of
+ // another line
+ lines_nbh.push_back(*nbh_p);
+ else
+ {
+ // We have found the bottom of a line. We are looking if
+ // we have already encountered the top of this
+ // line. If so, we link the current line with this one
+ // under certain conditions:
+
+ if (std::find(lines_nbh.begin(), lines_nbh.end(),
+ (*nbh_p) - 1) != lines_nbh.end())
+ {
+ // If we can link the complete line with the current line
+ if (// It must be in the search range
+ nbh_p > pstop
+ // Avoid loops
+ && left(((*nbh_p - 1) >> 1) - 1) != i)
+ left(i) = ((*nbh_p - 1) >> 1) - 1;
+
+ // We have found a complete line so we stop the search
+ break;
+ }
+ }
+ }
+ }
+
+
+ // If we haven't found any included line in the first
+ // neighbor, then the line is considered as the neighbor of
+ // the current line
+ if (*nbh_p == end_p)
+ left(i) = (*nbh_p_copy >> 1) - 1;
+ }
+ }
+ }
+ }
+
+
+ // We assume that the lines have been rotated
+ template <typename L>
+ inline
+ void
+ process_right_link(image2d<value::int_u16>& blocks,
+ const util::array<box2d>& rbbox,
+ const util::array< line_info<L> >& lines,
+ util::array<value::int_u16>& right)
+ {
+ typedef value::int_u16 V;
+
+ // At the beginning each line is its own neighbor
+ for (unsigned i = 0; i < right.nelements(); ++i)
+ right(i) = i;
+
+ const unsigned nlines = lines.nelements();
+
+ // For each line
+ for (unsigned i = 0; i < nlines; ++i)
+ {
+ // Max distance for the line search
+ int dmax = 1.5f * lines(i).x_height();
+
+ // Starting points in the current line box
+ point2d c = rbbox(i).pcenter();
+ point2d q(rbbox(i).pmax().row() - ((rbbox(i).pmax().row() - c.row()) / 4), c.col());
+
+ int
+ midcol = (rbbox(i).pmax().col()
+ - rbbox(i).pmin().col()) / 2;
+
+ // Right
+ {
+ int
+ nrightima = geom::ncols(blocks) - c.col() + blocks.domain().pmin().col(),
+ nright = std::min(nrightima, midcol + dmax);
+
+ V
+ // Starting points in the box
+ *p = &blocks(c),
+ *p2 = &blocks(q),
+ // End of search
+ *pstop = p + nright - 1,
+ // Line neighbor
+ *nbh_p = 0;
+
+ // While we haven't found a neighbor or reached the limit
+ for (; p != pstop; ++p, ++p2)
+ {
+ if (*p2 != literal::zero // Not the background
+ && ((*p2 % 2) == 1) // Looking for the bottom of a line
+ && right(((*p2 - 1) >> 1) - 1) != i) // No loops
+ {
+ // Neightbor found, we stop the research
+ nbh_p = p2;
+ break;
+ }
+
+ if (*p != literal::zero // Not the background
+ && ((*p % 2) == 1) // Looking for the bottom of a line
+ && right(((*p - 1) >> 1) - 1) != i) // No loops
+ {
+ // Neightbor found, we stop the research
+ nbh_p = p;
+ break;
+ }
+ }
+
+ // If a neighbor was found, then we have found the bottom of the
+ // line. We are then looking for the top of the encountered
+ // line. If during the search process we find a complete line
+ // included in the touched line, this line is considered as
+ // the neighbor under certain conditions (see below)
+
+ //---------------------------------------------------------------
+ //
+ //
+ // __________________________________________________ 2Q
+ // | Q |
+ // | _________________________ |2P
+ // | |_____________P___________| => Second bottom |2P + 1
+ // | line |
+ // |__________________________________________________|2Q + 1
+ // => First encountered bottom line
+ // _________________________ ^
+ // |_________________________| => Current line | Search direction
+ // |
+ //---------------------------------------------------------------
+
+ if (nbh_p)
+ {
+ std::vector<V> lines_nbh;
+ const V end_p = *nbh_p - 1;
+ const V* nbh_p_copy = nbh_p;
+
+ for (; *nbh_p != end_p; ++nbh_p)
+ {
+ if (*nbh_p != literal::zero) // Not the background
+ {
+ if (*nbh_p % 2 == 1) // We have found the bottom of
+ // another line
+ lines_nbh.push_back(*nbh_p);
+ else
+ {
+ // We have found the top of a line. We are looking if
+ //we have already encountered the bottom of this
+ // line. If so, we link the current line with this one
+ // under certain conditions:
+
+ if (std::find(lines_nbh.begin(), lines_nbh.end(),
+ *nbh_p + 1) != lines_nbh.end())
+ {
+ // If we can link the complete line with the current line
+ if (// It must be in the search range
+ nbh_p < pstop
+ // Avoid loops
+ && right((*nbh_p >> 1) - 1) != i)
+ right(i) = (*nbh_p >> 1) - 1;
+
+ // We have found a complete line, so we stop the search
+ break;
+ }
+ }
+ }
+ }
+
+ // If we haven't found any included line in the first
+ // neighbor, then the line is considered as the neighbor of
+ // the current line
+
+ if (*nbh_p == end_p)
+ right(i) = ((*nbh_p_copy - 1) >> 1) - 1;
+ }
+ }
+ }
+ }
+
+//-----------------------------------------------------------------------
+// Finalizing the links by merging information extracted from the left
+// and right links
+//-----------------------------------------------------------------------
+
+ template< typename L >
+ inline
+ void finalize_links(util::array<value::int_u16>& left,
+ util::array<value::int_u16>& right,
+ const util::array< line_info<L> >& lines)
+ {
+ const unsigned nlines = lines.nelements();
+
+ for (unsigned i = 0; i < nlines; ++i)
+ {
+ const unsigned left_value = left(i);
+ const unsigned right_value = right(i);
+
+ // If the right neighbor of my left neighbor is itself then its
+ // right neighbor is me
+ {
+ value::int_u16& v = right(left_value);
+
+ if (v == left_value)
+ v = i;
+ }
+
+ // If the left neighbor of my right neighbor is itself then its
+ // left neighbor is me
+ {
+ value::int_u16& v = left(right_value);
+
+ if (v == right_value)
+ v = i;
+ }
+ }
+ }
+
+ template <typename L>
+ inline
+ void extract_paragraphs(line_set<L>& lines,
+ const image2d<bool>& input)
+ {
+ typedef value::int_u16 V;
+
+ image2d<V> blocks(geom::rotate(input.domain(), -90, input.domain().pcenter()));
+ data::fill(blocks, 0);
+
+ util::array< line_info<L> > lines_info;
+
+ for_all_lines(l, lines)
+ {
+ if (lines(l).is_textline())
+ lines_info.append(lines(l));
+ }
+
+ const unsigned nlines = lines_info.nelements();
+ util::array<box2d> rbbox;
+ util::array<V> left(nlines);
+ util::array<V> right(nlines);
+ util::array<V> output;
+
+ rbbox.reserve(nlines);
+ output.reserve(nlines);
+
+ std::cout << "Preparing lines" << std::endl;
+ prepare_lines(input.domain(), lines_info, blocks, rbbox);
+// io::pgm::save(blocks, "blocks.pgm");
+ std::cout << "Linking left" << std::endl;
+ process_left_link(blocks, rbbox, lines_info, left);
+ std::cout << "Linking right" << std::endl;
+ process_right_link(blocks, rbbox, lines_info, right);
+ std::cout << "Finalizing links" << std::endl;
+ finalize_links(left, right, lines_info);
+ // std::cout << "Finalizing merging" << std::endl;
+ // finalize_line_merging(left, right, lines);
+ std::cout << "Extracting paragraphs" << std::endl;
+ filter::paragraph_links(left, right, output, lines_info, input);
+ }
+}
--
1.5.6.5
1
0
* tests/filter/Makefile.am: Add new targets.
* tests/filter/object_groups_mean_width.cc,
* tests/filter/object_groups_size_ratio.cc,
* tests/filter/object_groups_small.cc,
* tests/filter/object_groups_with_holes.cc,
* tests/filter/object_links_bbox_h_ratio.cc,
* tests/filter/object_links_bbox_overlap.cc,
* tests/filter/object_links_bbox_w_ratio.cc,
* tests/filter/object_links_bottom_aligned.cc,
* tests/filter/object_links_center_aligned.cc,
* tests/filter/object_links_left_aligned.cc,
* tests/filter/object_links_right_aligned.cc,
* tests/filter/object_links_top_aligned.cc: New.
* scribo/tests/img/the_valleys.pbm: New test data.
---
scribo/ChangeLog | 21 +++++
scribo/tests/filter/Makefile.am | 28 ++++++-
scribo/tests/filter/object_groups_mean_width.cc | 72 ++++++++++++++++
scribo/tests/filter/object_groups_size_ratio.cc | 71 ++++++++++++++++
scribo/tests/filter/object_groups_small.cc | 87 ++++++++++++++++++++
scribo/tests/filter/object_groups_with_holes.cc | 71 ++++++++++++++++
scribo/tests/filter/object_links_bbox_h_ratio.cc | 69 ++++++++++++++++
scribo/tests/filter/object_links_bbox_overlap.cc | 69 ++++++++++++++++
scribo/tests/filter/object_links_bbox_w_ratio.cc | 69 ++++++++++++++++
scribo/tests/filter/object_links_bottom_aligned.cc | 69 ++++++++++++++++
scribo/tests/filter/object_links_center_aligned.cc | 70 ++++++++++++++++
scribo/tests/filter/object_links_left_aligned.cc | 69 ++++++++++++++++
scribo/tests/filter/object_links_right_aligned.cc | 69 ++++++++++++++++
scribo/tests/filter/object_links_top_aligned.cc | 69 ++++++++++++++++
scribo/tests/img/the_valleys.pbm | Bin 0 -> 1724 bytes
15 files changed, 901 insertions(+), 2 deletions(-)
create mode 100644 scribo/tests/filter/object_groups_mean_width.cc
create mode 100644 scribo/tests/filter/object_groups_size_ratio.cc
create mode 100644 scribo/tests/filter/object_groups_small.cc
create mode 100644 scribo/tests/filter/object_groups_with_holes.cc
create mode 100644 scribo/tests/filter/object_links_bbox_h_ratio.cc
create mode 100644 scribo/tests/filter/object_links_bbox_overlap.cc
create mode 100644 scribo/tests/filter/object_links_bbox_w_ratio.cc
create mode 100644 scribo/tests/filter/object_links_bottom_aligned.cc
create mode 100644 scribo/tests/filter/object_links_center_aligned.cc
create mode 100644 scribo/tests/filter/object_links_left_aligned.cc
create mode 100644 scribo/tests/filter/object_links_right_aligned.cc
create mode 100644 scribo/tests/filter/object_links_top_aligned.cc
create mode 100644 scribo/tests/img/the_valleys.pbm
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index a0d4937..e7ac031 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,5 +1,26 @@
2011-04-06 Guillaume Lazzara <z(a)lrde.epita.fr>
+ Add more tests.
+
+ * tests/filter/Makefile.am: Add new targets.
+
+ * tests/filter/object_groups_mean_width.cc,
+ * tests/filter/object_groups_size_ratio.cc,
+ * tests/filter/object_groups_small.cc,
+ * tests/filter/object_groups_with_holes.cc,
+ * tests/filter/object_links_bbox_h_ratio.cc,
+ * tests/filter/object_links_bbox_overlap.cc,
+ * tests/filter/object_links_bbox_w_ratio.cc,
+ * tests/filter/object_links_bottom_aligned.cc,
+ * tests/filter/object_links_center_aligned.cc,
+ * tests/filter/object_links_left_aligned.cc,
+ * tests/filter/object_links_right_aligned.cc,
+ * tests/filter/object_links_top_aligned.cc: New.
+
+ * scribo/tests/img/the_valleys.pbm: New test data.
+
+2011-04-06 Guillaume Lazzara <z(a)lrde.epita.fr>
+
Small fixes in Scribo.
* scribo/core/component_features_data.hh: Fix default
diff --git a/scribo/tests/filter/Makefile.am b/scribo/tests/filter/Makefile.am
index 8064646..8a27f91 100644
--- a/scribo/tests/filter/Makefile.am
+++ b/scribo/tests/filter/Makefile.am
@@ -1,5 +1,5 @@
-# Copyright (C) 2009, 2010 EPITA Research and Development Laboratory
-# (LRDE).
+# Copyright (C) 2009, 2010, 2011 EPITA Research and Development
+# Laboratory (LRDE).
#
# This file is part of Olena.
#
@@ -19,11 +19,35 @@ include $(top_srcdir)/scribo/tests/tests.mk
check_PROGRAMS = \
objects_with_holes \
+ object_links_bbox_h_ratio \
+ object_links_bbox_w_ratio \
+ object_links_bbox_overlap \
+ object_links_bottom_aligned \
+ object_links_center_aligned \
+ object_links_left_aligned \
+ object_links_right_aligned \
+ object_links_top_aligned \
+ object_groups_mean_width \
+ object_groups_size_ratio \
+ object_groups_small \
+ object_groups_with_holes \
components_small \
components_large
objects_with_holes_SOURCES = objects_with_holes.cc
+object_links_bbox_h_ratio_SOURCES = object_links_bbox_h_ratio.cc
+object_links_bbox_w_ratio_SOURCES = object_links_bbox_w_ratio.cc
+object_links_bbox_overlap_SOURCES = object_links_bbox_overlap.cc
+object_links_bottom_aligned_SOURCES = object_links_bottom_aligned.cc
+object_links_center_aligned_SOURCES = object_links_center_aligned.cc
+object_links_left_aligned_SOURCES = object_links_left_aligned.cc
+object_links_right_aligned_SOURCES = object_links_right_aligned.cc
+object_links_top_aligned_SOURCES = object_links_top_aligned.cc
+object_groups_mean_width_SOURCES = object_groups_mean_width.cc
+object_groups_size_ratio_SOURCES = object_groups_size_ratio.cc
+object_groups_small_SOURCES = object_groups_small.cc
+object_groups_with_holes_SOURCES = object_groups_with_holes.cc
components_small_SOURCES = components_small.cc
components_large_SOURCES = components_large.cc
diff --git a/scribo/tests/filter/object_groups_mean_width.cc b/scribo/tests/filter/object_groups_mean_width.cc
new file mode 100644
index 0000000..81da661
--- /dev/null
+++ b/scribo/tests/filter/object_groups_mean_width.cc
@@ -0,0 +1,72 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+/// \file
+
+#include <iostream>
+
+#include <mln/core/image/image2d.hh>
+#include <mln/core/alias/neighb2d.hh>
+#include <mln/io/pbm/load.hh>
+#include <scribo/core/def/lbl_type.hh>
+#include <scribo/primitive/extract/components.hh>
+#include <scribo/primitive/link/with_single_left_link.hh>
+#include <scribo/primitive/group/from_single_link.hh>
+#include <scribo/filter/object_groups_mean_width.hh>
+
+#include "tests/data.hh"
+
+int main()
+{
+ using namespace scribo;
+ using namespace mln;
+
+ std::string img = SCRIBO_IMG_DIR "/the_valleys.pbm";
+
+ const unsigned ref[] = { 0, 1, 7, 3, 4, 3, 3, 7, 7, 3, 3, 3, 3 };
+ const unsigned filtered_ref[] = { 0, 0, 7, 3, 0, 3, 3, 7, 7, 3, 3, 3, 3 };
+
+ image2d<bool> input;
+ io::pbm::load(input, img.c_str());
+
+ typedef scribo::def::lbl_type V;
+ typedef image2d<V> L;
+
+ V nbboxes;
+ component_set<L>
+ text = primitive::extract::components(input, c8(), nbboxes);
+ object_links<L> links = primitive::link::with_single_left_link(text, 30);
+
+ object_groups<L> groups = primitive::group::from_single_link(links);
+
+ for_all_groups(g, groups)
+ mln_assertion(groups(g) == ref[g]);
+
+ groups = filter::object_groups_mean_width(groups, 10);
+
+ for_all_groups(g, groups)
+ mln_assertion(groups(g) == filtered_ref[g]);
+
+}
diff --git a/scribo/tests/filter/object_groups_size_ratio.cc b/scribo/tests/filter/object_groups_size_ratio.cc
new file mode 100644
index 0000000..90e353a
--- /dev/null
+++ b/scribo/tests/filter/object_groups_size_ratio.cc
@@ -0,0 +1,71 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+/// \file
+
+#include <iostream>
+
+#include <mln/core/image/image2d.hh>
+#include <mln/core/alias/neighb2d.hh>
+#include <mln/io/pbm/load.hh>
+#include <scribo/core/def/lbl_type.hh>
+#include <scribo/primitive/extract/components.hh>
+#include <scribo/primitive/link/with_single_left_link.hh>
+#include <scribo/primitive/group/from_single_link.hh>
+#include <scribo/filter/object_groups_size_ratio.hh>
+
+#include "tests/data.hh"
+
+int main()
+{
+ using namespace scribo;
+ using namespace mln;
+
+ std::string img = SCRIBO_IMG_DIR "/the_valleys.pbm";
+
+ const unsigned ref[] = { 0, 1, 7, 3, 4, 3, 3, 7, 7, 3, 3, 3, 3 };
+ const unsigned filtered_ref[] = { 0, 1, 0, 3, 0, 3, 3, 0, 0, 3, 3, 3, 3 };
+
+ image2d<bool> input;
+ io::pbm::load(input, img.c_str());
+
+ typedef scribo::def::lbl_type V;
+ typedef image2d<V> L;
+
+ V nbboxes;
+ component_set<L>
+ text = primitive::extract::components(input, c8(), nbboxes);
+ object_links<L> links = primitive::link::with_single_left_link(text, 30);
+
+ object_groups<L> groups = primitive::group::from_single_link(links);
+
+ for_all_groups(g, groups)
+ mln_assertion(groups(g) == ref[g]);
+
+ groups = filter::object_groups_size_ratio(groups, 1.2, 0.3);
+
+ for_all_groups(g, groups)
+ mln_assertion(groups(g) == filtered_ref[g]);
+}
diff --git a/scribo/tests/filter/object_groups_small.cc b/scribo/tests/filter/object_groups_small.cc
new file mode 100644
index 0000000..165634d
--- /dev/null
+++ b/scribo/tests/filter/object_groups_small.cc
@@ -0,0 +1,87 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+/// \file
+
+#include <iostream>
+
+#include <mln/core/image/image2d.hh>
+#include <mln/core/alias/neighb2d.hh>
+#include <mln/io/pbm/load.hh>
+#include <scribo/core/def/lbl_type.hh>
+#include <scribo/primitive/extract/components.hh>
+#include <scribo/primitive/link/with_single_left_link.hh>
+#include <scribo/primitive/group/from_single_link.hh>
+#include <scribo/filter/object_groups_small.hh>
+
+#include "tests/data.hh"
+
+int main()
+{
+ using namespace scribo;
+ using namespace mln;
+
+ std::string img = SCRIBO_IMG_DIR "/the_valleys.pbm";
+
+ const unsigned ref[] = { 0, 1, 7, 3, 4, 3, 3, 7, 7, 3, 3, 3, 3 };
+ const unsigned filtered_ref[] = { 0, 0, 0, 3, 0, 3, 3, 0, 0, 3, 3, 3, 3 };
+ const unsigned size_ref[] = { 0, 1, 0, 7, 1, 0, 0, 3, 0, 0, 0, 0, 0 };
+
+
+ image2d<bool> input;
+ io::pbm::load(input, img.c_str());
+
+ typedef scribo::def::lbl_type V;
+ typedef image2d<V> L;
+
+ V nbboxes;
+ component_set<L>
+ text = primitive::extract::components(input, c8(), nbboxes);
+ object_links<L> links = primitive::link::with_single_left_link(text, 30);
+
+ object_groups<L> groups = primitive::group::from_single_link(links);
+
+ for_all_groups(g, groups)
+ mln_assertion(groups(g) == ref[g]);
+
+ {
+ object_groups<L> groups2 = filter::object_groups_small(groups, 4);
+
+ for_all_groups(g, groups2)
+ mln_assertion(groups2(g) == filtered_ref[g]);
+ }
+
+ {
+ mln::util::array<unsigned> group_size;
+ object_groups<L> groups2 = filter::object_groups_small(groups, 4, group_size);
+
+ for_all_groups(g, groups2)
+ {
+ mln_assertion(groups2(g) == filtered_ref[g]);
+ mln_assertion(group_size(g) == size_ref[g]);
+ }
+ }
+
+}
diff --git a/scribo/tests/filter/object_groups_with_holes.cc b/scribo/tests/filter/object_groups_with_holes.cc
new file mode 100644
index 0000000..3b5b29f
--- /dev/null
+++ b/scribo/tests/filter/object_groups_with_holes.cc
@@ -0,0 +1,71 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+/// \file
+
+#include <iostream>
+
+#include <mln/core/image/image2d.hh>
+#include <mln/core/alias/neighb2d.hh>
+#include <mln/io/pbm/load.hh>
+#include <scribo/core/def/lbl_type.hh>
+#include <scribo/primitive/extract/components.hh>
+#include <scribo/primitive/link/with_single_left_link.hh>
+#include <scribo/primitive/group/from_single_link.hh>
+#include <scribo/filter/object_groups_with_holes.hh>
+
+#include "tests/data.hh"
+
+int main()
+{
+ using namespace scribo;
+ using namespace mln;
+
+ std::string img = SCRIBO_IMG_DIR "/the_valleys.pbm";
+
+ const unsigned ref[] = { 0, 1, 7, 3, 4, 3, 3, 7, 7, 3, 3, 3, 3 };
+ const unsigned filtered_ref[] = { 0, 0, 7, 3, 0, 3, 3, 7, 7, 3, 3, 3, 3 };
+
+ image2d<bool> input;
+ io::pbm::load(input, img.c_str());
+
+ typedef scribo::def::lbl_type V;
+ typedef image2d<V> L;
+
+ V nbboxes;
+ component_set<L>
+ text = primitive::extract::components(input, c8(), nbboxes);
+ object_links<L> links = primitive::link::with_single_left_link(text, 30);
+
+ object_groups<L> groups = primitive::group::from_single_link(links);
+
+ for_all_groups(g, groups)
+ mln_assertion(groups(g) == ref[g]);
+
+ groups = filter::object_groups_with_holes(groups, 3);
+
+ for_all_groups(g, groups)
+ mln_assertion(groups(g) == filtered_ref[g]);
+}
diff --git a/scribo/tests/filter/object_links_bbox_h_ratio.cc b/scribo/tests/filter/object_links_bbox_h_ratio.cc
new file mode 100644
index 0000000..38be9b7
--- /dev/null
+++ b/scribo/tests/filter/object_links_bbox_h_ratio.cc
@@ -0,0 +1,69 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+/// \file
+
+#include <iostream>
+
+#include <mln/core/image/image2d.hh>
+#include <mln/core/alias/neighb2d.hh>
+#include <mln/io/pbm/load.hh>
+#include <scribo/core/def/lbl_type.hh>
+#include <scribo/primitive/extract/components.hh>
+#include <scribo/primitive/link/with_single_left_link.hh>
+#include <scribo/filter/object_links_bbox_h_ratio.hh>
+
+#include "tests/data.hh"
+
+int main()
+{
+ using namespace scribo;
+ using namespace mln;
+
+ std::string img = SCRIBO_IMG_DIR "/phillip.pbm";
+
+ const unsigned ref[] = { 0, 2, 6, 8, 3, 4, 6, 9, 2, 4 };
+ const unsigned filtered_ref[] = { 0, 1, 6, 3, 3, 5, 6, 7, 8, 9 };
+
+ image2d<bool> input;
+ io::pbm::load(input, img.c_str());
+
+ typedef scribo::def::lbl_type V;
+ typedef image2d<V> L;
+
+ V nbboxes;
+ component_set<L>
+ text = primitive::extract::components(input, c8(), nbboxes);
+
+ object_links<L> links = primitive::link::with_single_left_link(text, 30);
+
+ for_all_links(l, links)
+ mln_assertion(links(l) == ref[l]);
+
+ links = scribo::filter::object_links_bbox_h_ratio(links, 1.2f);
+
+ for_all_links(l, links)
+ mln_assertion(links(l) == filtered_ref[l]);
+}
diff --git a/scribo/tests/filter/object_links_bbox_overlap.cc b/scribo/tests/filter/object_links_bbox_overlap.cc
new file mode 100644
index 0000000..a548e47
--- /dev/null
+++ b/scribo/tests/filter/object_links_bbox_overlap.cc
@@ -0,0 +1,69 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+/// \file
+
+#include <iostream>
+
+#include <mln/core/image/image2d.hh>
+#include <mln/core/alias/neighb2d.hh>
+#include <mln/io/pbm/load.hh>
+#include <scribo/core/def/lbl_type.hh>
+#include <scribo/primitive/extract/components.hh>
+#include <scribo/primitive/link/with_single_left_link.hh>
+#include <scribo/filter/object_links_bbox_overlap.hh>
+
+#include "tests/data.hh"
+
+int main()
+{
+ using namespace scribo;
+ using namespace mln;
+
+ std::string img = SCRIBO_IMG_DIR "/wildly.pbm";
+
+ const unsigned ref[] = { 0, 1, 1 };
+ const unsigned filtered_ref[] = { 0, 1, 2 };
+
+ image2d<bool> input;
+ io::pbm::load(input, img.c_str());
+
+ typedef scribo::def::lbl_type V;
+ typedef image2d<V> L;
+
+ V nbboxes;
+ component_set<L>
+ text = primitive::extract::components(input, c8(), nbboxes);
+
+ object_links<L> links = primitive::link::with_single_left_link(text, 30);
+
+ for_all_links(l, links)
+ mln_assertion(links(l) == ref[l]);
+
+ links = scribo::filter::object_links_bbox_overlap(links, 0.1f);
+
+ for_all_links(l, links)
+ mln_assertion(links(l) == filtered_ref[l]);
+}
diff --git a/scribo/tests/filter/object_links_bbox_w_ratio.cc b/scribo/tests/filter/object_links_bbox_w_ratio.cc
new file mode 100644
index 0000000..9ea699a
--- /dev/null
+++ b/scribo/tests/filter/object_links_bbox_w_ratio.cc
@@ -0,0 +1,69 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+/// \file
+
+#include <iostream>
+
+#include <mln/core/image/image2d.hh>
+#include <mln/core/alias/neighb2d.hh>
+#include <mln/io/pbm/load.hh>
+#include <scribo/core/def/lbl_type.hh>
+#include <scribo/primitive/extract/components.hh>
+#include <scribo/primitive/link/with_single_left_link.hh>
+#include <scribo/filter/object_links_bbox_w_ratio.hh>
+
+#include "tests/data.hh"
+
+int main()
+{
+ using namespace scribo;
+ using namespace mln;
+
+ std::string img = SCRIBO_IMG_DIR "/phillip.pbm";
+
+ const unsigned ref[] = { 0, 2, 6, 8, 3, 4, 6, 9, 2, 4 };
+ const unsigned filtered_ref[] = { 0, 1, 6, 8, 3, 5, 6, 7, 8, 4 };
+
+ image2d<bool> input;
+ io::pbm::load(input, img.c_str());
+
+ typedef scribo::def::lbl_type V;
+ typedef image2d<V> L;
+
+ V nbboxes;
+ component_set<L>
+ text = primitive::extract::components(input, c8(), nbboxes);
+
+ object_links<L> links = primitive::link::with_single_left_link(text, 30);
+
+ for_all_links(l, links)
+ mln_assertion(links(l) == ref[l]);
+
+ links = scribo::filter::object_links_bbox_w_ratio(links, 1.2f);
+
+ for_all_links(l, links)
+ mln_assertion(links(l) == filtered_ref[l]);
+}
diff --git a/scribo/tests/filter/object_links_bottom_aligned.cc b/scribo/tests/filter/object_links_bottom_aligned.cc
new file mode 100644
index 0000000..2fe783f
--- /dev/null
+++ b/scribo/tests/filter/object_links_bottom_aligned.cc
@@ -0,0 +1,69 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+/// \file
+
+#include <iostream>
+
+#include <mln/core/image/image2d.hh>
+#include <mln/core/alias/neighb2d.hh>
+#include <mln/io/pbm/load.hh>
+#include <scribo/core/def/lbl_type.hh>
+#include <scribo/primitive/extract/components.hh>
+#include <scribo/primitive/link/with_single_left_link.hh>
+#include <scribo/filter/object_links_bottom_aligned.hh>
+
+#include "tests/data.hh"
+
+int main()
+{
+ using namespace scribo;
+ using namespace mln;
+
+ std::string img = SCRIBO_IMG_DIR "/phillip.pbm";
+
+ const unsigned ref[] = { 0, 2, 6, 8, 3, 4, 6, 9, 2, 4 };
+ const unsigned filtered_ref[] = { 0, 1, 6, 3, 3, 5, 6, 7, 2, 9 };
+
+ image2d<bool> input;
+ io::pbm::load(input, img.c_str());
+
+ typedef scribo::def::lbl_type V;
+ typedef image2d<V> L;
+
+ V nbboxes;
+ component_set<L>
+ text = primitive::extract::components(input, c8(), nbboxes);
+
+ object_links<L> links = primitive::link::with_single_left_link(text, 30);
+
+ for_all_links(l, links)
+ mln_assertion(links(l) == ref[l]);
+
+ links = scribo::filter::object_links_bottom_aligned(links, 1.f);
+
+ for_all_links(l, links)
+ mln_assertion(links(l) == filtered_ref[l]);
+}
diff --git a/scribo/tests/filter/object_links_center_aligned.cc b/scribo/tests/filter/object_links_center_aligned.cc
new file mode 100644
index 0000000..9c9e8cc
--- /dev/null
+++ b/scribo/tests/filter/object_links_center_aligned.cc
@@ -0,0 +1,70 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+/// \file
+
+#include <iostream>
+
+#include <mln/core/image/image2d.hh>
+#include <mln/core/alias/neighb2d.hh>
+#include <mln/io/pbm/load.hh>
+#include <scribo/core/def/lbl_type.hh>
+#include <scribo/primitive/extract/components.hh>
+#include <scribo/primitive/link/with_single_left_link.hh>
+#include <scribo/filter/object_links_center_aligned.hh>
+
+#include "tests/data.hh"
+
+int main()
+{
+ using namespace scribo;
+ using namespace mln;
+
+ std::string img = SCRIBO_IMG_DIR "/phillip.pbm";
+
+ const unsigned ref[] = { 0, 2, 6, 8, 3, 4, 6, 9, 2, 4 };
+ const unsigned filtered_ref[] = { 0, 1, 2, 3, 3, 5, 6, 7, 8, 9 };
+
+
+ image2d<bool> input;
+ io::pbm::load(input, img.c_str());
+
+ typedef scribo::def::lbl_type V;
+ typedef image2d<V> L;
+
+ V nbboxes;
+ component_set<L>
+ text = primitive::extract::components(input, c8(), nbboxes);
+
+ object_links<L> links = primitive::link::with_single_left_link(text, 30);
+
+ for_all_links(l, links)
+ mln_assertion(links(l) == ref[l]);
+
+ links = scribo::filter::object_links_center_aligned(links, 1.f);
+
+ for_all_links(l, links)
+ mln_assertion(links(l) == filtered_ref[l]);
+}
diff --git a/scribo/tests/filter/object_links_left_aligned.cc b/scribo/tests/filter/object_links_left_aligned.cc
new file mode 100644
index 0000000..ce630f5
--- /dev/null
+++ b/scribo/tests/filter/object_links_left_aligned.cc
@@ -0,0 +1,69 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+/// \file
+
+#include <iostream>
+
+#include <mln/core/image/image2d.hh>
+#include <mln/core/alias/neighb2d.hh>
+#include <mln/io/pbm/load.hh>
+#include <scribo/core/def/lbl_type.hh>
+#include <scribo/primitive/extract/components.hh>
+#include <scribo/primitive/link/with_single_left_link.hh>
+#include <scribo/filter/object_links_left_aligned.hh>
+
+#include "tests/data.hh"
+
+int main()
+{
+ using namespace scribo;
+ using namespace mln;
+
+ std::string img = SCRIBO_IMG_DIR "/phillip.pbm";
+
+ const unsigned ref[] = { 0, 2, 6, 8, 3, 4, 6, 9, 2, 4 };
+ const unsigned filtered_ref[] = { 0, 2, 2, 8, 4, 4, 6, 9, 2, 4 };
+
+ image2d<bool> input;
+ io::pbm::load(input, img.c_str());
+
+ typedef scribo::def::lbl_type V;
+ typedef image2d<V> L;
+
+ V nbboxes;
+ component_set<L>
+ text = primitive::extract::components(input, c8(), nbboxes);
+
+ object_links<L> links = primitive::link::with_single_left_link(text, 30);
+
+ for_all_links(l, links)
+ mln_assertion(links(l) == ref[l]);
+
+ links = scribo::filter::object_links_left_aligned(links, 80.f);
+
+ for_all_links(l, links)
+ mln_assertion(links(l) == filtered_ref[l]);
+}
diff --git a/scribo/tests/filter/object_links_right_aligned.cc b/scribo/tests/filter/object_links_right_aligned.cc
new file mode 100644
index 0000000..375ef9c
--- /dev/null
+++ b/scribo/tests/filter/object_links_right_aligned.cc
@@ -0,0 +1,69 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+/// \file
+
+#include <iostream>
+
+#include <mln/core/image/image2d.hh>
+#include <mln/core/alias/neighb2d.hh>
+#include <mln/io/pbm/load.hh>
+#include <scribo/core/def/lbl_type.hh>
+#include <scribo/primitive/extract/components.hh>
+#include <scribo/primitive/link/with_single_left_link.hh>
+#include <scribo/filter/object_links_right_aligned.hh>
+
+#include "tests/data.hh"
+
+int main()
+{
+ using namespace scribo;
+ using namespace mln;
+
+ std::string img = SCRIBO_IMG_DIR "/phillip.pbm";
+
+ const unsigned ref[] = { 0, 2, 6, 8, 3, 4, 6, 9, 2, 4 };
+ const unsigned filtered_ref[] = { 0, 2, 2, 8, 4, 4, 6, 9, 2, 4 };
+
+ image2d<bool> input;
+ io::pbm::load(input, img.c_str());
+
+ typedef scribo::def::lbl_type V;
+ typedef image2d<V> L;
+
+ V nbboxes;
+ component_set<L>
+ text = primitive::extract::components(input, c8(), nbboxes);
+
+ object_links<L> links = primitive::link::with_single_left_link(text, 30);
+
+ for_all_links(l, links)
+ mln_assertion(links(l) == ref[l]);
+
+ links = scribo::filter::object_links_right_aligned(links, 80.f);
+
+ for_all_links(l, links)
+ mln_assertion(links(l) == filtered_ref[l]);
+}
diff --git a/scribo/tests/filter/object_links_top_aligned.cc b/scribo/tests/filter/object_links_top_aligned.cc
new file mode 100644
index 0000000..fe6730d
--- /dev/null
+++ b/scribo/tests/filter/object_links_top_aligned.cc
@@ -0,0 +1,69 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+/// \file
+
+#include <iostream>
+
+#include <mln/core/image/image2d.hh>
+#include <mln/core/alias/neighb2d.hh>
+#include <mln/io/pbm/load.hh>
+#include <scribo/core/def/lbl_type.hh>
+#include <scribo/primitive/extract/components.hh>
+#include <scribo/primitive/link/with_single_left_link.hh>
+#include <scribo/filter/object_links_top_aligned.hh>
+
+#include "tests/data.hh"
+
+int main()
+{
+ using namespace scribo;
+ using namespace mln;
+
+ std::string img = SCRIBO_IMG_DIR "/phillip.pbm";
+
+ const unsigned ref[] = { 0, 2, 6, 8, 3, 4, 6, 9, 2, 4 };
+ const unsigned filtered_ref[] = { 0, 1, 2, 3, 3, 4, 6, 7, 8, 9 };
+
+ image2d<bool> input;
+ io::pbm::load(input, img.c_str());
+
+ typedef scribo::def::lbl_type V;
+ typedef image2d<V> L;
+
+ V nbboxes;
+ component_set<L>
+ text = primitive::extract::components(input, c8(), nbboxes);
+
+ object_links<L> links = primitive::link::with_single_left_link(text, 30);
+
+ for_all_links(l, links)
+ mln_assertion(links(l) == ref[l]);
+
+ links = scribo::filter::object_links_top_aligned(links, 1.f);
+
+ for_all_links(l, links)
+ mln_assertion(links(l) == filtered_ref[l]);
+}
diff --git a/scribo/tests/img/the_valleys.pbm b/scribo/tests/img/the_valleys.pbm
new file mode 100644
index 0000000000000000000000000000000000000000..3d40b8417a9c971de4352f791618f2c7727e0d92
GIT binary patch
literal 1724
zcmd_oKZ_GV6aerITLe<LtwMAMY_!Q;{vgRA8ALQ#+!=x#wueMK5dWNqAOt3$kaE9)
z@snsmij?^Xmt~8TH_H~8gJGZV?PRk-bLqvYCiB}jdGF0@*9v#p<Ia;uFP?V}*vn&f
z+&%kr`u-iOmdg7ljCH!74_LXfS841uYV2P7sKq{<oWFX*%B9Nv|24Gr?6A$+M~Cd`
z>D%+}*}7-tQn^s6R$1*q0kXBowyRx#<K%K<Z?>EI7s9`)u`sp7mTU2KSsHg#(Oj4#
zqnZzrp9)V=d8#mi9|B}EpQz+UHGY*WAAA7K?c~Q6jR0ak+0dA;V<jw6U88i`$8R?j
zA2C&`1Xb)(ov9M|#IZX~{@2tiZCwX&=|qWHcJmb;^g-tIKG#W&sM4XFziNBb&_z_u
z^=mi8QmEA7$d36Ga;Q~h8Fie1?jfD{n6vJQfrLmJb-h3(>i(L9c(cOzD~MF8)0OgZ
zh_R#*M4D7+?o8=Luo&)>VmxY?s+Xz&qi(PSDa_5>>01;FHv~{t_ywthN`3Y+weZ%t
z<@yUyxmBz;ph$pk*c<F5YIq6ps$P-l{3G6sGZ11=M*?$h_Jn5|k2hgOJV2E*Sg2IZ
zgTojoqe+MlaskUYQOqhZQ#*zMsYD$@l;}fLKTfj3*BO7uUcpegSUJYVmgaf+CD8yT
zqo1gPYYvhWa`QX3Hd7kF{ASY~>mqg+WDihhvS^#xz>!q-dfoyHFb_%H_;aZi6+l^D
mY*F(yDb3aP)f9AojpD@Z(%U=r)=sVN(m0v^>#_pR#Qp{`{Oqm(
literal 0
HcmV?d00001
--
1.5.6.5
1
0
* scribo/estim/object_groups_v_thickness.hh,
* scribo/filter/object_groups_v_thickness.hh: Rename as...
* scribo/estim/object_groups_mean_width.hh,
* scribo/filter/object_groups_mean_width.hh: ... this.
---
scribo/ChangeLog | 10 ++++++
..._v_thickness.hh => object_groups_mean_width.hh} | 26 ++++++++--------
..._v_thickness.hh => object_groups_mean_width.hh} | 32 ++++++++++----------
3 files changed, 39 insertions(+), 29 deletions(-)
rename scribo/scribo/estim/{object_groups_v_thickness.hh => object_groups_mean_width.hh} (75%)
rename scribo/scribo/filter/{object_groups_v_thickness.hh => object_groups_mean_width.hh} (68%)
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index e7ac031..0c785fc 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,5 +1,15 @@
2011-04-06 Guillaume Lazzara <z(a)lrde.epita.fr>
+ Rename few routines.
+
+ * scribo/estim/object_groups_v_thickness.hh,
+ * scribo/filter/object_groups_v_thickness.hh: Rename as...
+
+ * scribo/estim/object_groups_mean_width.hh,
+ * scribo/filter/object_groups_mean_width.hh: ... this.
+
+2011-04-06 Guillaume Lazzara <z(a)lrde.epita.fr>
+
Add more tests.
* tests/filter/Makefile.am: Add new targets.
diff --git a/scribo/scribo/estim/object_groups_v_thickness.hh b/scribo/scribo/estim/object_groups_mean_width.hh
similarity index 75%
rename from scribo/scribo/estim/object_groups_v_thickness.hh
rename to scribo/scribo/estim/object_groups_mean_width.hh
index b626243..c74137a 100644
--- a/scribo/scribo/estim/object_groups_v_thickness.hh
+++ b/scribo/scribo/estim/object_groups_mean_width.hh
@@ -1,4 +1,5 @@
-// Copyright (C) 2009 EPITA Research and Development Laboratory (LRDE)
+// Copyright (C) 2009, 2011 EPITA Research and Development Laboratory
+// (LRDE)
//
// This file is part of Olena.
//
@@ -23,13 +24,13 @@
// exception does not however invalidate any other reasons why the
// executable file might be covered by the GNU General Public License.
-#ifndef SCRIBO_ESTIM_OBJECT_GROUPS_V_THICKNESS_HH
-# define SCRIBO_ESTIM_OBJECT_GROUPS_V_THICKNESS_HH
+#ifndef SCRIBO_ESTIM_OBJECT_GROUPS_MEAN_WIDTH_HH
+# define SCRIBO_ESTIM_OBJECT_GROUPS_MEAN_WIDTH_HH
/// \file
///
-/// \brief Estimate the mean object thickness for each group.
+/// \brief Estimate the mean object width for each group.
# include <mln/util/array.hh>
@@ -45,16 +46,16 @@ namespace scribo
using namespace mln;
- /*! \brief Estimate the mean object thickness for each group.
+ /*! \brief Estimate the mean object width for each group.
\param[in] groups Object groups information.
- \return An array of mean object thickness.
+ \return An array of mean object width.
*/
template <typename L>
mln::util::array<float>
- object_groups_v_thickness(const object_groups<L>& groups);
+ object_groups_mean_width(const object_groups<L>& groups);
@@ -63,9 +64,9 @@ namespace scribo
template <typename L>
util::array<float>
- object_groups_v_thickness(const object_groups<L>& groups)
+ object_groups_mean_width(const object_groups<L>& groups)
{
- trace::entering("scribo::estim::object_groups_v_thickness");
+ trace::entering("scribo::estim::object_groups_mean_width");
mln_precondition(groups.is_valid());
@@ -79,8 +80,7 @@ namespace scribo
for_all_comps(i, components)
if (components(i).is_valid())
{
- output(groups(i)) += components(i).bbox().pmax().row()
- - components(i).bbox().pmin().row();
+ output(groups(i)) += components(i).bbox().width();
++group_card(groups(i));
}
@@ -91,7 +91,7 @@ namespace scribo
else
output(i) = 0;
- trace::exiting("scribo::estim::object_groups_v_thickness");
+ trace::exiting("scribo::estim::object_groups_mean_width");
return output;
}
@@ -103,4 +103,4 @@ namespace scribo
} // end of namespace scribo
-#endif // ! SCRIBO_ESTIM_OBJECT_GROUPS_V_THICKNESS_HH
+#endif // ! SCRIBO_ESTIM_OBJECT_GROUPS_MEAN_WIDTH_HH
diff --git a/scribo/scribo/filter/object_groups_v_thickness.hh b/scribo/scribo/filter/object_groups_mean_width.hh
similarity index 68%
rename from scribo/scribo/filter/object_groups_v_thickness.hh
rename to scribo/scribo/filter/object_groups_mean_width.hh
index 717b372..0668205 100644
--- a/scribo/scribo/filter/object_groups_v_thickness.hh
+++ b/scribo/scribo/filter/object_groups_mean_width.hh
@@ -1,5 +1,5 @@
-// Copyright (C) 2009, 2010 EPITA Research and Development Laboratory
-// (LRDE)
+// Copyright (C) 2009, 2010, 2011 EPITA Research and Development
+// Laboratory (LRDE)
//
// This file is part of Olena.
//
@@ -24,18 +24,18 @@
// exception does not however invalidate any other reasons why the
// executable file might be covered by the GNU General Public License.
-#ifndef SCRIBO_FILTER_OBJECT_GROUPS_V_THICKNESS_HH
-# define SCRIBO_FILTER_OBJECT_GROUPS_V_THICKNESS_HH
+#ifndef SCRIBO_FILTER_OBJECT_GROUPS_MEAN_WIDTH_HH
+# define SCRIBO_FILTER_OBJECT_GROUPS_MEAN_WIDTH_HH
/// \file
///
-/// \brief Filter groups having their object mean thickness too low.
+/// \brief Filter groups having their object mean width too low.
# include <mln/util/array.hh>
# include <scribo/core/object_groups.hh>
-# include <scribo/estim/object_groups_v_thickness.hh>
+# include <scribo/estim/object_groups_mean_width.hh>
namespace scribo
@@ -47,10 +47,10 @@ namespace scribo
using namespace mln;
- /*! \brief Filter groups having their object mean thickness too low.
+ /*! \brief Filter groups having their object mean width too low.
\param[in] groups Object group information.
- \param[in] thickness Object group mean thickness must be greater
+ \param[in] width Object group mean width must be greater
or equal to this value.
\return Filtered object group information.
@@ -58,7 +58,7 @@ namespace scribo
*/
template <typename L>
object_groups<L>
- object_groups_v_thickness(const object_groups<L>& groups, float thickness);
+ object_groups_mean_width(const object_groups<L>& groups, float width);
# ifndef MLN_INCLUDE_ONLY
@@ -66,24 +66,24 @@ namespace scribo
template <typename L>
object_groups<L>
- object_groups_v_thickness(const object_groups<L>& groups, float thickness)
+ object_groups_mean_width(const object_groups<L>& groups, float width)
{
- trace::entering("scribo::filter::object_groups_v_thickness");
+ trace::entering("scribo::filter::object_groups_mean_width");
mln_precondition(groups.is_valid());
- mln_precondition(thickness >= 0);
+ mln_precondition(width >= 0);
mln::util::array<float>
- group_thickness = estim::object_groups_v_thickness(groups);
+ group_width = estim::object_groups_mean_width(groups);
object_groups<L> output = groups.duplicate();
output(0) = 0;
for (unsigned i = 1; i < output.nelements(); ++i)
if (groups.components()(i).is_valid()
- && group_thickness[groups(i)] < thickness)
+ && group_width[groups(i)] < width)
output(i) = 0;
- trace::exiting("scribo::filter::object_groups_v_thickness");
+ trace::exiting("scribo::filter::object_groups_mean_width");
return output;
}
@@ -96,4 +96,4 @@ namespace scribo
} // end of namespace scribo
-#endif // ! SCRIBO_FILTER_OBJECT_GROUPS_V_THICKNESS_HH
+#endif // ! SCRIBO_FILTER_OBJECT_GROUPS_MEAN_WIDTH_HH
--
1.5.6.5
1
0

last-svn-commit-842-g78b8020 scribo/filter/object_groups_size_ratio.hh: Fix compilation.
by Guillaume Lazzara 03 May '11
by Guillaume Lazzara 03 May '11
03 May '11
---
scribo/ChangeLog | 4 ++++
scribo/scribo/filter/object_groups_size_ratio.hh | 18 +++++++++---------
2 files changed, 13 insertions(+), 9 deletions(-)
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index 0c785fc..0235162 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,5 +1,9 @@
2011-04-06 Guillaume Lazzara <z(a)lrde.epita.fr>
+ * scribo/filter/object_groups_size_ratio.hh: Fix compilation.
+
+2011-04-06 Guillaume Lazzara <z(a)lrde.epita.fr>
+
Rename few routines.
* scribo/estim/object_groups_v_thickness.hh,
diff --git a/scribo/scribo/filter/object_groups_size_ratio.hh b/scribo/scribo/filter/object_groups_size_ratio.hh
index fa59e7f..01676c4 100644
--- a/scribo/scribo/filter/object_groups_size_ratio.hh
+++ b/scribo/scribo/filter/object_groups_size_ratio.hh
@@ -1,5 +1,5 @@
-// Copyright (C) 2009, 2010 EPITA Research and Development Laboratory
-// (LRDE)
+// Copyright (C) 2009, 2010, 2011 EPITA Research and Development
+// Laboratory (LRDE)
//
// This file is part of Olena.
//
@@ -70,22 +70,22 @@ namespace scribo
// Counting the number of objects per group with a size ratio >
// max_ratio.
mln::util::array<unsigned>
- group_size(groups.size(), 0),
- invalid_object_in_group(groups.size(), 0);
+ group_size(groups.nelements(), 0),
+ invalid_object_in_group(groups.nelements(), 0);
for_all_comps(i, comps)
{
- if ((comps(i).bbox().nrows() / comps(i).bbox().ncols())
+ if ((comps(i).bbox().height() / comps(i).bbox().width())
>= max_size_ratio)
- ++invalid_object_in_group[groups[i]];
+ ++invalid_object_in_group(groups(i));
- ++group_size[groups[i]];
+ ++group_size(groups(i));
}
object_groups<L> output(groups);
output(0) = 0;
- for (unsigned i = 1; i < output.size(); ++i)
- if ((invalid_object_in_group[groups[i]] / static_cast<float>(group_size[groups[i]])) >= max_invalid_ratio_per_group
+ for (unsigned i = 1; i < output.nelements(); ++i)
+ if ((invalid_object_in_group(groups(i)) / static_cast<float>(group_size(groups(i)))) >= max_invalid_ratio_per_group
|| !comps(i).is_valid())
output(i) = 0;
--
1.5.6.5
1
0