Olena-patches
Threads by month
- ----- 2025 -----
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2024 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2023 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2022 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2021 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2020 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2019 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2018 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2017 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2016 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2015 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2014 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2013 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2012 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2011 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2010 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2009 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2008 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2007 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2006 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2005 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2004 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
June 2011
- 6 participants
- 92 discussions

23 Jun '11
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "Olena, a generic and efficient image processing platform".
The branch icdar/hdlac2011 has been updated
via 93197d7eb85b70237cdfa3c2613051a5d19f1455 (commit)
via 5454dc1932181c13ef468b63d6a31334f2c2c8af (commit)
from 563c43fdb996c6891301d4ac83e8547f12a50e57 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
93197d7 Fix small fixes [to cleanup]
5454dc1 Make different algorithms for doc and hdoc use cases.
-----------------------------------------------------------------------
Summary of changes:
scribo/ChangeLog | 21 +
scribo/demo/viewer/viewer.cc | 10 +-
scribo/demo/viewer/xml_widget.cc | 2 +-
scribo/scribo/core/component_features_data.hh | 1 +
scribo/scribo/core/group_info.hh | 11 +
scribo/scribo/core/paragraph_set.hh | 29 +
.../scribo/io/xml/internal/compute_text_colour.hh | 1 +
.../io/xml/internal/extended_page_xml_visitor.hh | 49 +-
scribo/scribo/io/xml/internal/time_info.hh | 1 +
scribo/scribo/io/xml/save.hh | 2 +-
scribo/scribo/text/extract_paragraphs.hh | 1091 ++++++++++++++++
scribo/scribo/text/extract_paragraphs_hdoc.hh | 1327 ++++++++++++++++++++
scribo/scribo/text/merging.hh | 81 +-
scribo/scribo/text/{merging.hh => merging_hdoc.hh} | 10 +-
scribo/scribo/text/paragraphs.hh | 1270 -------------------
.../toolchain/internal/content_in_doc_functor.hh | 16 +-
.../toolchain/internal/content_in_hdoc_functor.hh | 9 +-
.../internal/text_in_doc_preprocess_functor.hh | 2 +
scribo/scribo/util/color_to_hex.hh | 1 +
scribo/scribo/util/component_precise_outline.hh | 5 +
20 files changed, 2575 insertions(+), 1364 deletions(-)
create mode 100644 scribo/scribo/text/extract_paragraphs.hh
create mode 100644 scribo/scribo/text/extract_paragraphs_hdoc.hh
copy scribo/scribo/text/{merging.hh => merging_hdoc.hh} (99%)
delete mode 100644 scribo/scribo/text/paragraphs.hh
hooks/post-receive
--
Olena, a generic and efficient image processing platform
1
0

23 Jun '11
---
scribo/demo/viewer/viewer.cc | 10 ++--
scribo/demo/viewer/xml_widget.cc | 2 +-
scribo/scribo/core/component_features_data.hh | 1 +
scribo/scribo/core/group_info.hh | 11 ++++
.../scribo/io/xml/internal/compute_text_colour.hh | 1 +
.../io/xml/internal/extended_page_xml_visitor.hh | 49 +++++++++++++-------
scribo/scribo/io/xml/internal/time_info.hh | 1 +
scribo/scribo/io/xml/save.hh | 2 +-
.../toolchain/internal/content_in_doc_functor.hh | 12 ++---
.../internal/text_in_doc_preprocess_functor.hh | 2 +
scribo/scribo/util/color_to_hex.hh | 1 +
scribo/scribo/util/component_precise_outline.hh | 5 ++
12 files changed, 65 insertions(+), 32 deletions(-)
diff --git a/scribo/demo/viewer/viewer.cc b/scribo/demo/viewer/viewer.cc
index 173cb3b..12270e1 100644
--- a/scribo/demo/viewer/viewer.cc
+++ b/scribo/demo/viewer/viewer.cc
@@ -368,7 +368,7 @@ Viewer::add_text(QDomNode line)
QDomNode coords = line.firstChild();
- while (!coords.isNull() && !coords.toElement().tagName().contains("coords"))
+ while (!coords.isNull() && !coords.toElement().tagName().contains("Coords"))
coords = coords.nextSibling();
if (coords.isNull())
@@ -416,7 +416,7 @@ Viewer::add_region(QDomNode father, QString attr_id)
QString name = father.toElement().tagName();
region::RegionId id = static_cast<region::RegionId>(region_ids_[name]);
- while (!coords.isNull() && !coords.toElement().tagName().contains("coords"))
+ while (!coords.isNull() && !coords.toElement().tagName().contains("Coords"))
coords = coords.nextSibling();
if (coords.isNull())
@@ -471,7 +471,7 @@ Viewer::add_typo_lines(QDomNode father, QString attr_id)
// Looking for bbox coordinates.
{
QDomNode coords = father.firstChild();
- while (!coords.isNull() && !coords.toElement().tagName().contains("coords"))
+ while (!coords.isNull() && !coords.toElement().tagName().contains("Coords"))
coords = coords.nextSibling();
if (coords.isNull())
@@ -581,7 +581,7 @@ Viewer::load_xml(QString filename)
QDomElement root = doc.documentElement();
QDomNode page = root.firstChild();
- while (!page.isNull() && !page.toElement().tagName().contains("page"))
+ while (!page.isNull() && !page.toElement().tagName().contains("Page"))
page = page.nextSibling();
if (page.isNull())
@@ -611,7 +611,7 @@ Viewer::load_xml(QString filename)
QDomNode coords = region.firstChild();
- while (!coords.isNull() && !coords.toElement().tagName().contains("coords"))
+ while (!coords.isNull() && !coords.toElement().tagName().contains("Coords"))
coords = coords.nextSibling();
if (!container.isNull() && !coords.isNull())
diff --git a/scribo/demo/viewer/xml_widget.cc b/scribo/demo/viewer/xml_widget.cc
index 57d171f..642cf9a 100644
--- a/scribo/demo/viewer/xml_widget.cc
+++ b/scribo/demo/viewer/xml_widget.cc
@@ -86,7 +86,7 @@ void XmlWidget::NFS(QDomNode node, QTreeWidgetItem* item)
if (!node.isNull())
{
QString append;
- if (node.toElement().tagName().contains("point"))
+ if (node.toElement().tagName().contains("Point"))
{
QString x = node.toElement().attribute("x", "0");
QString y = node.toElement().attribute("y", "0");
diff --git a/scribo/scribo/core/component_features_data.hh b/scribo/scribo/core/component_features_data.hh
index b0a4e47..6064a0f 100644
--- a/scribo/scribo/core/component_features_data.hh
+++ b/scribo/scribo/core/component_features_data.hh
@@ -75,6 +75,7 @@ namespace scribo
}
+ inline
bool
operator==(const component_features_data& lhs,
const component_features_data& rhs)
diff --git a/scribo/scribo/core/group_info.hh b/scribo/scribo/core/group_info.hh
index 19a8ecc..05d5aae 100644
--- a/scribo/scribo/core/group_info.hh
+++ b/scribo/scribo/core/group_info.hh
@@ -80,11 +80,13 @@ namespace scribo
# ifndef MLN_INCLUDE_ONLY
+ inline
group_info::group_info()
: valid_(false)
{
}
+ inline
group_info::group_info(unsigned id, const mln::util::array<component_id_t>& comps,
unsigned pixel_area, const box2d& bbox)
: id_(id), comps_(comps), valid_(true),
@@ -92,53 +94,62 @@ namespace scribo
{
}
+ inline
group_info::group_info(unsigned id, unsigned pixel_area, const box2d& bbox, bool valid)
: id_(id), valid_(valid),
pixel_area_(pixel_area), bbox_(bbox)
{
}
+ inline
const mln::util::array<component_id_t>&
group_info::component_ids() const
{
return comps_;
}
+ inline
mln::util::array<component_id_t>&
group_info::component_ids_()
{
return comps_;
}
+ inline
bool group_info::is_valid() const
{
return valid_;
}
+ inline
void
group_info::invalidate()
{
valid_ = false;
}
+ inline
unsigned
group_info::card() const
{
return comps_.nelements();
}
+ inline
unsigned
group_info::id() const
{
return id_;
}
+ inline
unsigned
group_info::pixel_area() const
{
return pixel_area_;
}
+ inline
const box2d&
group_info::bbox() const
{
diff --git a/scribo/scribo/io/xml/internal/compute_text_colour.hh b/scribo/scribo/io/xml/internal/compute_text_colour.hh
index 86fe454..f592c08 100644
--- a/scribo/scribo/io/xml/internal/compute_text_colour.hh
+++ b/scribo/scribo/io/xml/internal/compute_text_colour.hh
@@ -62,6 +62,7 @@ namespace scribo
# ifndef MLN_INCLUDE_ONLY
+ inline
const char *
compute_text_colour(const value::rgb8& v)
{
diff --git a/scribo/scribo/io/xml/internal/extended_page_xml_visitor.hh b/scribo/scribo/io/xml/internal/extended_page_xml_visitor.hh
index d109827..da7a582 100644
--- a/scribo/scribo/io/xml/internal/extended_page_xml_visitor.hh
+++ b/scribo/scribo/io/xml/internal/extended_page_xml_visitor.hh
@@ -52,8 +52,12 @@
# include <scribo/io/xml/internal/print_page_preambule.hh>
# include <scribo/io/xml/internal/compute_text_colour.hh>
+# include <scribo/text/paragraphs_closing.hh>
+
+# include <scribo/util/component_precise_outline.hh>
# include <scribo/util/color_to_hex.hh>
+
namespace scribo
{
@@ -67,31 +71,29 @@ namespace scribo
{
- class extended_page_xml_visitor : public doc_serializer<extended_page_xml_visitor>
+ template <typename L>
+ class extended_page_xml_visitor
+ : public doc_serializer<extended_page_xml_visitor<L> >
{
public:
// Constructor
extended_page_xml_visitor(std::ofstream& out);
// Visit overloads
- template <typename L>
void visit(const document<L>& doc) const;
- template <typename L>
void visit(const component_set<L>& comp_set) const;
- template <typename L>
void visit(const component_info<L>& info) const;
- template <typename L>
void visit(const paragraph_set<L>& parset) const;
- template <typename L>
void visit(const line_info<L>& line) const;
private: // Attributes
std::ofstream& output;
mutable image2d<scribo::def::lbl_type> elt_edge;
+ mutable L lbl_;
};
@@ -99,8 +101,8 @@ namespace scribo
# ifndef MLN_INCLUDE_ONLY
- inline
- extended_page_xml_visitor::extended_page_xml_visitor(std::ofstream& out)
+ template <typename L>
+ extended_page_xml_visitor<L>::extended_page_xml_visitor(std::ofstream& out)
: output(out)
{
}
@@ -111,7 +113,7 @@ namespace scribo
//
template <typename L>
void
- extended_page_xml_visitor::visit(const document<L>& doc) const
+ extended_page_xml_visitor<L>::visit(const document<L>& doc) const
{
// Preambule
print_PAGE_preambule(output, doc, false);
@@ -150,8 +152,9 @@ namespace scribo
//
template <typename L>
void
- extended_page_xml_visitor::visit(const component_set<L>& comp_set) const
+ extended_page_xml_visitor<L>::visit(const component_set<L>& comp_set) const
{
+ lbl_ = comp_set.labeled_image();
for_all_comps(c, comp_set)
if (comp_set(c).is_valid())
comp_set(c).accept(*this);
@@ -162,8 +165,14 @@ namespace scribo
//
template <typename L>
void
- extended_page_xml_visitor::visit(const component_info<L>& info) const
+ extended_page_xml_visitor<L>::visit(const component_info<L>& info) const
{
+ // Getting component outline
+ scribo::def::lbl_type id = (scribo::def::lbl_type)info.id().to_equiv();
+ //const L& lbl = info.holder().labeled_image();
+ p_array<point2d>
+ par = scribo::util::component_precise_outline(lbl_ | info.bbox(), id);
+
switch (info.type())
{
case component::WhitespaceSeparator:
@@ -177,7 +186,7 @@ namespace scribo
<< " y_max=\"" << info.bbox().pmax().row() << "\""
<< ">" << std::endl;
- internal::print_box_coords(output, info.bbox(), " ");
+ internal::print_image_coords(output, par, " ");
output << " </whitespace_separator_region>" << std::endl;
break;
@@ -195,7 +204,7 @@ namespace scribo
<< " y_max=\"" << info.bbox().pmax().row() << "\""
<< ">" << std::endl;
- internal::print_box_coords(output, info.bbox(), " ");
+ internal::print_image_coords(output, par, " ");
output << " </vertical_separator_region>" << std::endl;
break;
@@ -213,7 +222,7 @@ namespace scribo
<< " y_max=\"" << info.bbox().pmax().row() << "\""
<< ">" << std::endl;
- internal::print_box_coords(output, info.bbox(), " ");
+ internal::print_image_coords(output, par, " ");
output << " </horizontal_separator_region>" << std::endl;
break;
@@ -250,13 +259,19 @@ namespace scribo
//
template <typename L>
void
- extended_page_xml_visitor::visit(const paragraph_set<L>& parset) const
+ extended_page_xml_visitor<L>::visit(const paragraph_set<L>& parset) const
{
const line_set<L>& lines = parset.lines();
+ // Prepare paragraph outlines.
+ L par_clo = text::paragraphs_closing(parset);
+
for_all_paragraphs(p, parset)
if (parset(p).is_valid())
{
+ p_array<mln_site(L)> par = scribo::util::component_precise_outline(par_clo
+ | parset(p).bbox(), p);
+
const mln::util::array<line_id_t>& line_ids = parset(p).line_ids();
// FIXME: compute that information on the whole paragraph
@@ -286,7 +301,7 @@ namespace scribo
output << "\">"
<< std::endl;
- internal::print_box_coords(output, parset(p).bbox(), " ");
+ internal::print_image_coords(output, parset(p).bbox(), " ");
// EXTENSIONS - Not officially supported
for_all_paragraph_lines(lid, line_ids)
@@ -303,7 +318,7 @@ namespace scribo
template <typename L>
void
- extended_page_xml_visitor::visit(const line_info<L>& line) const
+ extended_page_xml_visitor<L>::visit(const line_info<L>& line) const
{
if (line.has_text())
{
diff --git a/scribo/scribo/io/xml/internal/time_info.hh b/scribo/scribo/io/xml/internal/time_info.hh
index 6adc49a..760e856 100644
--- a/scribo/scribo/io/xml/internal/time_info.hh
+++ b/scribo/scribo/io/xml/internal/time_info.hh
@@ -48,6 +48,7 @@ namespace scribo
# ifndef MLN_INCLUDE_ONLY
+ inline
std::string time_info()
{
time_t cur_time = time(NULL);
diff --git a/scribo/scribo/io/xml/save.hh b/scribo/scribo/io/xml/save.hh
index cc6905b..cc02c03 100644
--- a/scribo/scribo/io/xml/save.hh
+++ b/scribo/scribo/io/xml/save.hh
@@ -103,7 +103,7 @@ namespace scribo
template <typename L>
void save_page_extended(const document<L>& doc, std::ofstream& output)
{
- scribo::io::xml::internal::extended_page_xml_visitor f(output);
+ scribo::io::xml::internal::extended_page_xml_visitor<L> f(output);
doc.accept(f);
}
diff --git a/scribo/scribo/toolchain/internal/content_in_doc_functor.hh b/scribo/scribo/toolchain/internal/content_in_doc_functor.hh
index 52ee5b0..dde8196 100644
--- a/scribo/scribo/toolchain/internal/content_in_doc_functor.hh
+++ b/scribo/scribo/toolchain/internal/content_in_doc_functor.hh
@@ -256,14 +256,6 @@ namespace scribo
if (enable_line_seps)
components.add_separators(separators);
- // Debug
-# ifndef SCRIBO_NDEBUG
- debug::logger().log_image(debug::AuxiliaryResults,
- components.separators(),
- "all_separators");
-# endif // ! SCRIBO_NDEBUG
-
-
on_new_progress_label("Filtering components");
components = scribo::filter::components_small(components, 3);
@@ -373,6 +365,10 @@ namespace scribo
//===== DEBUG =====
# ifndef SCRIBO_NDEBUG
+ debug::logger().log_image(debug::AuxiliaryResults,
+ components.separators(),
+ "all_separators");
+
if (debug::logger().is_enabled())
{
if (enable_whitespace_seps)
diff --git a/scribo/scribo/toolchain/internal/text_in_doc_preprocess_functor.hh b/scribo/scribo/toolchain/internal/text_in_doc_preprocess_functor.hh
index 484e28e..d58b4b2 100644
--- a/scribo/scribo/toolchain/internal/text_in_doc_preprocess_functor.hh
+++ b/scribo/scribo/toolchain/internal/text_in_doc_preprocess_functor.hh
@@ -33,6 +33,8 @@
#include <mln/fun/v2v/rgb_to_luma.hh>
#include <mln/subsampling/antialiased.hh>
+#include <mln/util/timer.hh>
+
#include <scribo/binarization/sauvola.hh>
#include <scribo/binarization/sauvola_ms.hh>
diff --git a/scribo/scribo/util/color_to_hex.hh b/scribo/scribo/util/color_to_hex.hh
index e0ee33f..63dc81e 100644
--- a/scribo/scribo/util/color_to_hex.hh
+++ b/scribo/scribo/util/color_to_hex.hh
@@ -48,6 +48,7 @@ namespace scribo
# ifndef MLN_INCLUDE_ONLY
+ inline
std::string color_to_hex(const value::rgb8& v)
{
std::string result = "#";
diff --git a/scribo/scribo/util/component_precise_outline.hh b/scribo/scribo/util/component_precise_outline.hh
index 175160c..5c87fb4 100644
--- a/scribo/scribo/util/component_precise_outline.hh
+++ b/scribo/scribo/util/component_precise_outline.hh
@@ -122,6 +122,7 @@ namespace scribo
}
}
+ inline
void
left_up_after(int& direction,
const unsigned i)
@@ -155,6 +156,7 @@ namespace scribo
}
}
+ inline
void
right_up_after(int& direction,
const unsigned i)
@@ -186,6 +188,7 @@ namespace scribo
}
}
+ inline
void
right_down_after(int& direction,
const unsigned i)
@@ -219,6 +222,7 @@ namespace scribo
}
}
+ inline
void
left_down_after(int& direction,
const unsigned i)
@@ -274,6 +278,7 @@ namespace scribo
cur_pt = tmp;
}
+ inline
void
filter_points(const mln::p_array<point2d>& points,
mln::p_array<point2d>& waypoints)
--
1.5.6.5
1
0

last-svn-commit-924-g5454dc1 Make different algorithms for doc and hdoc use cases.
by Guillaume Lazzara 20 Jun '11
by Guillaume Lazzara 20 Jun '11
20 Jun '11
* scribo/core/paragraph_set.hh: Add an overload of
make::paragraph().
* scribo/text/paragraphs.hh: Move...
* scribo/text/extract_paragraphs.hh,
* scribo/text/extract_paragraphs_hdoc.hh: ... here.
* scribo/text/merging.hh: Remove holder() calls.
* scribo/text/merging_hdoc.hh: New.
* scribo/toolchain/internal/content_in_doc_functor.hh: Fix compilation.
* scribo/toolchain/internal/content_in_hdoc_functor.hh: Make use
of hdoc algorithms.
---
scribo/ChangeLog | 21 +
scribo/scribo/core/paragraph_set.hh | 29 +
scribo/scribo/text/extract_paragraphs.hh | 1091 ++++++++++++++++
scribo/scribo/text/extract_paragraphs_hdoc.hh | 1327 ++++++++++++++++++++
scribo/scribo/text/merging.hh | 81 +-
scribo/scribo/text/{merging.hh => merging_hdoc.hh} | 10 +-
scribo/scribo/text/paragraphs.hh | 1270 -------------------
.../toolchain/internal/content_in_doc_functor.hh | 4 +-
.../toolchain/internal/content_in_hdoc_functor.hh | 9 +-
9 files changed, 2510 insertions(+), 1332 deletions(-)
create mode 100644 scribo/scribo/text/extract_paragraphs.hh
create mode 100644 scribo/scribo/text/extract_paragraphs_hdoc.hh
copy scribo/scribo/text/{merging.hh => merging_hdoc.hh} (99%)
delete mode 100644 scribo/scribo/text/paragraphs.hh
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index 394c247..d1678c5 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,3 +1,24 @@
+2011-06-20 Guillaume Lazzara <z(a)lrde.epita.fr>
+
+ Make different algorithms for doc and hdoc use cases.
+
+ * scribo/core/paragraph_set.hh: Add an overload of
+ make::paragraph().
+
+ * scribo/text/paragraphs.hh: Move...
+
+ * scribo/text/extract_paragraphs.hh,
+ * scribo/text/extract_paragraphs_hdoc.hh: ... here.
+
+ * scribo/text/merging.hh: Remove holder() calls.
+
+ * scribo/text/merging_hdoc.hh: New.
+
+ * scribo/toolchain/internal/content_in_doc_functor.hh: Fix compilation.
+
+ * scribo/toolchain/internal/content_in_hdoc_functor.hh: Make use
+ of hdoc algorithms.
+
2011-06-07 Guillaume Lazzara <z(a)lrde.epita.fr>
Add a new toolchain for ICDAR contests.
diff --git a/scribo/scribo/core/paragraph_set.hh b/scribo/scribo/core/paragraph_set.hh
index 9214f20..c21359d 100644
--- a/scribo/scribo/core/paragraph_set.hh
+++ b/scribo/scribo/core/paragraph_set.hh
@@ -377,6 +377,35 @@ namespace scribo
}
+ template <typename L>
+ scribo::paragraph_set<L>
+ paragraph(const line_links<L>& llinks)
+ {
+ line_links<L> links = llinks.duplicate();
+
+ for_all_links(l, links)
+ links(l) = internal::find_root(links, l);
+
+ unsigned npars;
+ mln::fun::i2v::array<unsigned>
+ par_ids = mln::make::relabelfun(links.line_to_link(),
+ links.nelements() - 1, npars);
+ paragraph_set<L> parset(links, npars);
+
+ const scribo::line_set<L>& lines = links.lines();
+ for_all_links(l, links)
+ if (links(l))
+ {
+ value::int_u16 par_id = par_ids(l);
+ parset(par_id).add_line(lines(l));
+ }
+
+ for_all_paragraphs(p, parset)
+ parset(p).force_stats_update();
+
+ return parset;
+ }
+
// FIXME: move that code into paragraph_set constructor?
template <typename L>
scribo::paragraph_set<L>
diff --git a/scribo/scribo/text/extract_paragraphs.hh b/scribo/scribo/text/extract_paragraphs.hh
new file mode 100644
index 0000000..e8e9ac6
--- /dev/null
+++ b/scribo/scribo/text/extract_paragraphs.hh
@@ -0,0 +1,1091 @@
+// Copyright (C) 2010, 2011 EPITA Research and Development Laboratory
+// (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+/// \file
+///
+///
+
+
+#ifndef SCRIBO_TEXT_EXTRACT_PARAGRAPHS_HH
+# define SCRIBO_TEXT_EXTRACT_PARAGRAPHS_HH
+
+#include <mln/util/array.hh>
+#include <mln/accu/shape/bbox.hh>
+#include <mln/core/image/image2d.hh>
+#include <mln/core/alias/neighb2d.hh>
+#include <mln/draw/box.hh>
+#include <mln/data/convert.hh>
+#include <mln/value/int_u16.hh>
+#include <mln/value/label_16.hh>
+#include <mln/value/int_u8.hh>
+#include <mln/value/rgb8.hh>
+#include <mln/io/ppm/save.hh>
+#include <mln/io/pgm/save.hh>
+#include <mln/geom/rotate.hh>
+#include <mln/literal/colors.hh>
+
+#include <scribo/core/macros.hh>
+#include <scribo/core/line_set.hh>
+#include <scribo/core/line_links.hh>
+#include <scribo/core/line_info.hh>
+#include <scribo/core/paragraph_set.hh>
+
+using namespace mln;
+
+namespace scribo
+{
+
+ namespace text
+ {
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+ namespace internal
+ {
+
+//-------------------------------------
+// Extracting root of links
+//-------------------------------------
+ template <typename T>
+ inline
+ unsigned
+ find_root(mln::util::array<T>& parent, unsigned x)
+ {
+ unsigned tmp_x = x;
+
+ while (parent(tmp_x) != tmp_x)
+ tmp_x = parent(tmp_x);
+
+ while (parent(x) != x)
+ {
+ const unsigned tmp = parent(x);
+ x = parent(x);
+ parent(tmp) = tmp_x;
+ }
+
+ return x;
+ }
+
+
+//---------------------------------------------------------------------
+// This method aims to cut the links between lines that do not fit the
+// different criteria
+//---------------------------------------------------------------------
+
+ template <typename L>
+ inline
+ void paragraph_links(const line_links<L>& left,
+ const line_links<L>& right,
+ line_links<L>& output,
+ const line_set<L>& lines)
+ {
+ output = left.duplicate();
+
+ // const unsigned nlines = lines.nelements();
+
+ // image2d<value::rgb8> links = data::convert(value::rgb8(), input);
+ // for (unsigned l = 0; l < nlines; ++l)
+ // {
+ // mln::draw::line(links, lines(l).bbox().pcenter(), lines(left(l)).bbox().pcenter(), literal::red);
+ // }
+ // mln::io::ppm::save(links, "out_links.ppm");
+
+ // For each line
+ for_all_lines(l, lines)
+ if (lines(l).is_textline())
+ {
+ // Neighbors
+
+ const line_id_t left_nbh = output(l);
+ const line_id_t right_nbh = right(l);
+ const line_id_t lol_nbh = output(left_nbh);
+
+ // Line features
+ const float x_height = lines(l).x_height();
+ const float left_x_height = lines(left_nbh).x_height();
+ const float right_x_height = lines(right_nbh).x_height();
+
+ const box2d& left_line_bbox = lines(left_nbh).bbox();
+ const box2d& current_line_bbox = lines(l).bbox();
+ const box2d& right_line_bbox = lines(right_nbh).bbox();
+ const box2d& lol_line_bbox = lines(lol_nbh).bbox(); // lol : left neighbor of the left neighbor
+
+ const int lline_col_min = left_line_bbox.pmin().col();
+ const int cline_col_min = current_line_bbox.pmin().col();
+ const int rline_col_min = right_line_bbox.pmin().col();
+ const int lolline_col_min = lol_line_bbox.pmin().col();
+
+ const int lline_col_max = left_line_bbox.pmax().col();
+ const int cline_col_max = current_line_bbox.pmax().col();
+ const int rline_col_max = right_line_bbox.pmax().col();
+
+ const int lline_cw = lines(left_nbh).char_width();
+ const int cline_cw = lines(l).char_width();
+ const int rline_cw = lines(right_nbh).char_width();
+ // Maximal x variation to consider two lines vertically aligned
+ const int delta_alignment = cline_cw;
+
+ // Checks the baseline distances of the two neighbors
+ {
+ // Current line baseline
+ const int c_baseline = lines(l).baseline();
+
+ // Baseline distance with the left and right neighbors
+ const int lc_baseline = lines(left_nbh).baseline() - c_baseline;
+ const int rc_baseline = c_baseline -lines(right_nbh).baseline();
+
+ // Max baseline distance between the two neighbors
+ // const float delta_baseline_max = std::max(lc_baseline, rc_baseline);
+ // const float delta_baseline_min = std::min(lc_baseline,
+ // rc_baseline);
+
+ // Only two lines, meaning the current line has only one neighbor
+ bool two_lines = false;
+
+ // If the current line has no left neighbor
+ if (lc_baseline == 0)
+ {
+ // ror : right neighbor of the right neighbor
+ const line_id_t ror_nbh = right(right_nbh);
+ //const box2d& ror_line_bbox = lines(ror_nbh).bbox();
+
+ // If the current line has a ror
+ if (ror_nbh != right_nbh
+ && output(ror_nbh) == right_nbh)
+ {
+ // Distance between the current line and the right neighbor
+ const float right_distance = lines(l).meanline() - lines(right_nbh).baseline();
+ // Distance between the right neighbor and the ror
+ const float ror_distance = lines(right_nbh).meanline() - lines(ror_nbh).baseline();
+ // ror x_height
+ const float ror_x_height = lines(ror_nbh).x_height();
+
+ // Conditions to cut the link between the current line
+ // and its right neighbor
+ if (right_distance > 1.4f * ror_distance
+ && std::max(ror_x_height, right_x_height) <
+ 1.2f * std::min(ror_x_height, right_x_height)
+ && output(right_nbh) == l)
+ {
+ output(right_nbh) = right_nbh;
+ continue;
+ }
+ }
+ // Otherwise we only have a group of two lines
+ else
+ {
+ // We determine the distance between the two lines
+ const float distance = lines(l).meanline() - lines(right_nbh).baseline();
+ two_lines = true;
+
+ // If the distance between the two lines is greater than
+ // the minimum x height of the two lines then we cut the
+ // link between them
+ if (distance > 2.0f * std::min(x_height, right_x_height)
+ && output(right_nbh) == l)
+ {
+ output(right_nbh) = right_nbh;
+ continue;
+ }
+ }
+
+ // Lines features
+ const float min_x_height = std::min(x_height, right_x_height);
+ const float max_x_height = std::max(x_height, right_x_height);
+ const float min_char_width = std::min(rline_cw, cline_cw);
+ const float max_char_width = std::max(rline_cw, cline_cw);
+
+ // Condition to cut the link between the current line and
+ // its right neighbor
+ if ((max_x_height > min_x_height * 1.2f) &&
+ !(max_char_width <= 1.2f * min_char_width))
+ {
+ if (output(right_nbh) == l)
+ {
+ output(right_nbh) = right_nbh;
+ continue;
+ }
+ }
+
+ // If we only have two lines we stop the study
+ if (two_lines)
+ continue;
+ }
+ // If the current line has no right neighbor
+ else if (rc_baseline == 0)
+ {
+ // lol : left neighbor of the left neighbor
+
+ // If the left neighbor of the current line has a left neighbor
+ if (lol_nbh != left_nbh)
+ {
+ // Distance between the current line and its left neighbor
+ const float left_distance = lines(left_nbh).meanline() -
+ lines(l).baseline();
+ // Distance between the left neighbor and the left
+ // neighbor of its left neighbor
+ const float lol_distance = lines(lol_nbh).meanline() -
+ lines(left_nbh).baseline();
+ // lol x height
+ const float lol_x_height = lines(lol_nbh).x_height();
+
+ // Conditions to cut the link between the current line
+ // and its left neighbor
+ if (left_distance > 1.4f * lol_distance
+ && std::max(lol_x_height, left_x_height) <
+ 1.2f * std::min(lol_x_height, left_x_height))
+ {
+ output(l) = l;
+ continue;
+ }
+ }
+ // Otherwise we only have a group of two lines
+ else
+ {
+ // Distance between the current line and it left neighbor
+ const float distance = lines(left_nbh).meanline() -
+ lines(l).baseline();
+
+ two_lines = true;
+
+ // If the distance is greater than the min x height
+ // between the two lines
+ if (distance > 2.0f * std::min(x_height, left_x_height))
+ {
+ output(l) = l;
+ continue;
+ }
+ }
+
+ // Lines features
+ const float min_x_height = std::min(x_height, left_x_height);
+ const float max_x_height = std::max(x_height, left_x_height);
+ const float min_char_width = std::min(lline_cw, cline_cw);
+ const float max_char_width = std::max(lline_cw, cline_cw);
+
+ // Condition to cut the link between the current line and
+ // its left neighbor
+ if ((max_x_height > min_x_height * 1.2f) &&
+ !(max_char_width <= 1.2f * min_char_width))
+ {
+ output(l) = l;
+ continue;
+ }
+
+ // If we only have two lines we stop the study
+ if (two_lines)
+ continue;
+ }
+ // The current line has at least one left and one right neighbor
+ else // if (delta_baseline_max >= delta_baseline_min)
+ {
+ // Distance between the left and the current line
+ const float left_distance =
+ lines(left_nbh).meanline() - lines(l).baseline();
+ // Distance between the right and the current line
+ const float right_distance =
+ lines(l).meanline() - lines(right_nbh).baseline();
+
+ // If the left line is too far compared to the right one
+ // we cut the link with it
+ if (left_distance > 1.2f * right_distance
+ && std::max(x_height, left_x_height) > 1.2f * std::min(x_height, left_x_height))
+ {
+ output(l) = l;
+ continue;
+ }
+ // If the right line is too far compared to the left one
+ // we cut the link with it
+ else if (right_distance > 1.2f * left_distance
+ && std::max(x_height, right_x_height) > 1.2f * std::min(x_height, right_x_height)
+ && output(right_nbh) == l)
+ {
+ output(right_nbh) = right_nbh;
+ continue;
+ }
+
+ // If the distance between the baseline of the left
+ // neighbor and the baseline of the current line is
+ // greater than the one between the current line baseline
+ // and the right line baseline we have to study the texte
+ // features of the right and left lines
+ if (lc_baseline > rc_baseline)
+ {
+ const float cw_max = std::max(lline_cw, cline_cw);
+ const float cw_min = std::min(lline_cw, cline_cw);
+ const float min_x_height = std::min(x_height, left_x_height);
+ const float max_x_height = std::max(x_height, left_x_height);
+
+ if ((max_x_height > min_x_height * 1.2f) &&
+ !(cw_max <= 1.2f * cw_min))
+ {
+ output(l) = l;
+ continue;
+ }
+
+ {
+ const float min_x_height = std::min(x_height, right_x_height);
+ const float max_x_height = std::max(x_height, right_x_height);
+ const float cw_max = std::max(rline_cw, cline_cw);
+ const float cw_min = std::min(rline_cw, cline_cw);
+
+ if ((max_x_height > min_x_height * 1.2f)
+ && !(cw_max <= 1.2f * cw_min)
+ && output(right_nbh) == l)
+ {
+ output(right_nbh) = right_nbh;
+ continue;
+ }
+ }
+ }
+ else
+ {
+ const float cw_max = std::max(rline_cw, cline_cw);
+ const float cw_min = std::min(rline_cw, cline_cw);
+ const float min_x_height = std::min(x_height, right_x_height);
+ const float max_x_height = std::max(x_height, right_x_height);
+
+ if ((max_x_height > min_x_height * 1.2f)
+ && !(cw_max <= 1.2f * cw_min)
+ && output(right_nbh) == l)
+ {
+ output(right_nbh) = right_nbh;
+ continue;
+ }
+
+ {
+ const float min_x_height = std::min(x_height, left_x_height);
+ const float max_x_height = std::max(x_height, left_x_height);
+ const float cw_max = std::max(lline_cw, cline_cw);
+ const float cw_min = std::min(lline_cw, cline_cw);
+
+ if ((max_x_height > min_x_height * 1.2f)
+ && !(cw_max <= 1.2f * cw_min))
+ {
+ output(l) = l;
+ continue;
+ }
+ }
+ }
+ }
+ }
+
+ // If we arrive here, it means than the lines in the
+ // neighborhood of the current line are quite similar. We can
+ // then begin to study the indentations in order to determine
+ // the beginning of new paragraphs
+
+//-----------------------------------------------------------------------------
+// ___________________________
+// |___________________________|
+// ________________________
+// |________________________|
+// ___________________________
+// |___________________________|
+// ___________________________
+// |___________________________|
+//
+// Simple case : paragraphs are justified on the left. We try to find any
+// indentation like above.
+//
+//-----------------------------------------------------------------------------
+
+ {
+ // Check if the current line neighbors are aligned
+ bool left_right_aligned = false;
+ bool left_lol_aligned = false;
+ const int dx_lr = std::abs(lline_col_min - rline_col_min);
+ const int dx_llol = std::abs(lline_col_min - lolline_col_min);
+
+ if (dx_lr < delta_alignment)
+ left_right_aligned = true;
+
+ if (dx_llol < delta_alignment)
+ left_lol_aligned = true;
+
+ if (left_right_aligned && left_lol_aligned)
+ {
+ const int left_right_col_min = std::min(lline_col_min, rline_col_min);
+ const int dx_lrc = std::abs(left_right_col_min - cline_col_min);
+ const float l_char_width = 1.5f * lines(l).char_width();
+
+ if (dx_lrc > l_char_width &&
+ dx_lrc < 3.0f * l_char_width &&
+ cline_col_min > rline_col_min &&
+ cline_col_min > lline_col_min)
+ {
+ output(right_nbh) = right_nbh;
+ continue;
+ }
+ }
+ }
+
+//-----------------------------------------------------------------------------
+// ___________________________
+// |___________________________|
+// ___________________
+// |___________________| End of the paragraph - Current line
+// ________________________
+// |________________________| Beginning of a new one
+// ___________________________
+// |___________________________| Left of left of current line
+//
+// End of paragraph case : we try to find an end to the current paragraph
+//
+//-----------------------------------------------------------------------------
+
+ {
+ // Check if the current line neighbors are aligned
+ bool left_right_max_aligned = false;
+ bool left_current_min_aligned = false;
+ bool lol_current_min_aligned = false;
+ const bool lol_is_left = output(left_nbh) == left_nbh;
+ const int dx_lr_max = std::abs(lline_col_max - rline_col_max);
+ const int dx_lc_min = std::abs(lline_col_min - cline_col_min);
+ const int dx_lolc_min = std::abs(lolline_col_min - cline_col_min);
+
+ if (dx_lr_max < delta_alignment)
+ left_right_max_aligned = true;
+
+ if (dx_lc_min < delta_alignment)
+ left_current_min_aligned = true;
+
+ if (dx_lolc_min < delta_alignment)
+ lol_current_min_aligned = true;
+
+ if (!left_current_min_aligned && left_right_max_aligned &&
+ (lol_current_min_aligned || lol_is_left))
+ {
+ const int dx_lrc = std::abs(lline_col_max - cline_col_max);
+ const int l_char_width = lines(l).char_width();
+
+ if (dx_lrc > l_char_width &&
+ cline_col_max < lline_col_max &&
+ cline_col_min < lline_col_min &&
+ (lline_col_min > lolline_col_min || lol_is_left))
+ {
+ output(l) = l;
+ continue;
+ }
+ }
+ }
+
+
+//-----------------------------------------------------------------------------
+// ___________________________
+// |___________________________|
+// ___________________________
+// |___________________________|
+// ________________________
+// |________________________|
+//
+// Simple case : paragraphs are justified on the left. We try to find any
+// indentation like above at the end of a column.
+//
+//-----------------------------------------------------------------------------
+
+ if (left_nbh == l)
+ {
+ const line_id_t ror_nbh = right(right_nbh);
+ const box2d& ror_line_bbox = lines(ror_nbh).bbox();
+ const int rorline_col_min = ror_line_bbox.pmin().col();
+
+ bool right_ror_min_aligned = false;
+ const int dx_rror_min = std::abs(rline_col_min - rorline_col_min);
+
+ if (dx_rror_min < delta_alignment)
+ right_ror_min_aligned = true;
+
+ if (right_ror_min_aligned)
+ {
+ const int right_ror_col_min = std::min(rline_col_min, rorline_col_min);
+ const int dx_rrorc = std::abs(right_ror_col_min - cline_col_min);
+ const float l_char_width = 1.5f * lines(l).char_width();
+
+ if (dx_rrorc > l_char_width &&
+ dx_rrorc < 3.0f * l_char_width &&
+ cline_col_min > rline_col_min &&
+ cline_col_max >= rline_col_max)
+ {
+ output(right_nbh) = right_nbh;
+ continue;
+ }
+ }
+ }
+ }
+
+
+ // Only debug
+
+ // {
+ // image2d<value::rgb8> debug = data::convert(value::rgb8(), input);
+
+ // for (unsigned i = 0; i < output.nelements(); ++i)
+ // output(i) = scribo::make::internal::find_root(output, i);
+
+ // mln::util::array<accu::shape::bbox<point2d> > nbbox(output.nelements());
+ // for_all_lines(l, lines)
+ // if (lines(l).is_textline())
+ // {
+ // // if (lines(i).is_textline())
+ // // {
+ // // mln::draw::box(debug, lines(i).bbox(), literal::red);
+ // nbbox(output(l)).take(lines(l).bbox());
+ // // }
+ // }
+
+ // for (unsigned i = 0; i < nbbox.nelements(); ++i)
+ // if (nbbox(i).is_valid())
+ // {
+ // box2d b = nbbox(i).to_result();
+ // mln::draw::box(debug, b, literal::orange);
+ // b.enlarge(1);
+ // mln::draw::box(debug, b, literal::orange);
+ // b.enlarge(1);
+ // mln::draw::box(debug, b, literal::orange);
+ // }
+
+ // mln::io::ppm::save(debug, "out_paragraph.ppm");
+ // }
+
+ }
+
+//-------------------------------------------------------------
+// Preparation of the lines before linking them.
+// For each line we draw the top and the bottom of it.
+// Assuming than i is the number of the line. Then the top of the line
+// will be affected with the value 2 * i in the block image and the
+// bottom with 2 * i + 1.
+//
+//-------------------------------------------------------------
+
+ template <typename L>
+ inline
+ void prepare_lines(const box2d& domain,
+ const line_set<L>& lines,
+ L& blocks,
+ mln::util::array<box2d>& rbbox)
+ {
+ std::map< int, std::vector< const box2d* > > drawn_lines;
+ // const unsigned nlines = lines.nelements();
+
+ // For each line
+ //for (unsigned l = 0; l < nlines; ++l)
+ for_all_lines(l, lines)
+ if (lines(l).is_textline())
+ {
+ // Rotation of the bounding box
+ box2d b = geom::rotate(lines(l).bbox(), -90, domain.pcenter());
+// rbbox.append(b);
+ rbbox(l) = b;
+
+ const unsigned index = l + 1;
+ const unsigned even_index = 2 * index;
+ const unsigned odd_index = even_index + 1;
+
+ // Top of the line
+ {
+ bool not_finished = true;
+ int col_offset = 0;
+
+ while (not_finished)
+ {
+ // Looking for a column in the image to draw the top of the
+ // line
+
+ const int col = b.pmax().col() + col_offset;
+ std::map< int, std::vector< const box2d* > >::iterator it
+ = drawn_lines.find(col);
+
+ if (it != drawn_lines.end())
+ {
+ const std::vector< const box2d* >& lines = (*it).second;
+ const unsigned nb_lines = lines.size();
+ unsigned i = 0;
+
+ for (i = 0; i < nb_lines; ++i)
+ {
+ const box2d* box = lines[i];
+ const int min_row = std::max(b.pmin().row(), box->pmin().row());
+ const int max_row = std::min(b.pmax().row(), box->pmax().row());
+
+ if (min_row - max_row <= 0)
+ break;
+ }
+
+ if (i == nb_lines)
+ {
+ mln::draw::line(blocks, point2d(b.pmin().row(), col),
+ point2d(b.pmax().row(), col), even_index);
+ not_finished = false;
+ drawn_lines[col].push_back(&(rbbox[l]));
+ }
+ else
+ ++col_offset;
+ }
+ else
+ {
+ mln::draw::line(blocks, point2d(b.pmin().row(), col),
+ point2d(b.pmax().row(), col), even_index);
+ not_finished = false;
+ drawn_lines[col].push_back(&(rbbox[l]));
+ }
+ }
+ }
+
+ // Bottom of the line
+ {
+ bool not_finished = true;
+ int col_offset = 0;
+
+ while (not_finished)
+ {
+ // Looking for a column in the image to draw the bottom of
+ // the line
+
+ const int col = b.pmin().col() - col_offset;
+ std::map< int, std::vector< const box2d* > >::iterator it
+ = drawn_lines.find(col);
+
+ if (it != drawn_lines.end())
+ {
+ const std::vector< const box2d* >& lines = (*it).second;
+ const unsigned nb_lines = lines.size();
+ unsigned i = 0;
+
+ for (i = 0; i < nb_lines; ++i)
+ {
+ const box2d* box = lines[i];
+ const int min_row = std::max(b.pmin().row(), box->pmin().row());
+ const int max_row = std::min(b.pmax().row(), box->pmax().row());
+
+ if (min_row - max_row <= 0)
+ break;
+ }
+
+ if (i == nb_lines)
+ {
+ mln::draw::line(blocks, point2d(b.pmin().row(), col),
+ point2d(b.pmax().row(), col), odd_index);
+ not_finished = false;
+ drawn_lines[col].push_back(&(rbbox[l]));
+ }
+ else
+ ++col_offset;
+ }
+ else
+ {
+ mln::draw::line(blocks, point2d(b.pmin().row(), col),
+ point2d(b.pmax().row(), col), odd_index);
+ not_finished = false;
+ drawn_lines[col].push_back(&(rbbox[l]));
+ }
+ }
+ }
+ }
+ }
+
+ template <typename L>
+ inline
+ void
+ process_left_link(L& blocks,
+ const mln::util::array<box2d>& rbbox,
+ const line_set<L>& lines,
+ line_links<L>& left)
+ {
+ typedef scribo::def::lbl_type V;
+
+ // At the beginning each line is its own neighbor
+ for_all_lines(l, lines)
+ if (lines(l).is_textline())
+ left(l) = l;
+ else
+ left(l) = 0;
+
+ // const unsigned nlines = lines.nelements();
+
+ // For each line
+ for_all_lines(l, lines)
+ if (lines(l).is_textline())
+ {
+ // Max distance for the line search
+ int dmax = 1.5f * lines(l).x_height();
+
+ // Starting points in the current line box
+ point2d c = rbbox(l).pcenter();
+ point2d q(rbbox(l).pmin().row() + ((c.row() - rbbox(l).pmin().row()) / 4), c.col());
+
+ int
+ midcol = (rbbox(l).pmax().col()
+ - rbbox(l).pmin().col()) / 2;
+
+ // Left
+ {
+ // marge gauche
+ int
+ nleftima = c.col() - blocks.domain().pmin().col(),
+ // Distance gauche
+ nleft = std::min(nleftima, midcol + dmax);
+
+ V
+ // Starting points in the box
+ *p = &blocks(c),
+ *p2 = &blocks(q),
+ // End of search
+ *pstop = p - nleft - 1,
+ // Line neighbor
+ *nbh_p = 0;
+
+ // While we haven't found a neighbor or reached the limit
+ for (; p != pstop; --p, --p2)
+ {
+ if (*p2 != literal::zero // Not the background
+ && ((*p2 % 2) == 0) // Looking for the bottom of a line
+ && left((*p2 >> 1) - 1) != l) // No loops
+ {
+ // Neightbor found, we stop the research
+ nbh_p = p2;
+ break;
+ }
+
+ if (*p != literal::zero // Not the background
+ && ((*p % 2) == 0) // Looking for the bottom of a line
+ && left((*p >> 1) - 1) != l) // No loops
+ {
+ // Neightbor found, we stop the research
+ nbh_p = p;
+ break;
+ }
+ }
+
+ // If a neighbor was found, then we have found the top of the
+ // line. We are then looking for the bottom of the encountered
+ // line. If during the search process we find a complete line
+ // included in the touched line, this line is considered as
+ // the neighbor under certain conditions (see below)
+
+ //---------------------------------------------------------------
+ // _________________________ |
+ // |_________________________| => Current line | Search direction
+ // v
+ // => First encountered top line
+ // __________________________________________________ 2Q
+ // | Q |
+ // | _________________________ |2P
+ // | |_____________P___________| => Second top |2P + 1
+ // | line |
+ // |__________________________________________________|2Q + 1
+ //
+ //
+ //---------------------------------------------------------------
+
+ if (nbh_p)
+ {
+ std::vector<V> lines_nbh;
+ const V end_p = *nbh_p + 1;
+ const V* nbh_p_copy = nbh_p;
+
+ for (; *nbh_p != end_p; --nbh_p)
+ {
+ if ((*nbh_p) != literal::zero) // Not the background
+ {
+ if ((*nbh_p) % 2 == 0)// We have found the top of
+ // another line
+ lines_nbh.push_back(*nbh_p);
+ else
+ {
+ // We have found the bottom of a line. We are looking if
+ // we have already encountered the top of this
+ // line. If so, we link the current line with this one
+ // under certain conditions:
+
+ if (std::find(lines_nbh.begin(), lines_nbh.end(),
+ (*nbh_p) - 1) != lines_nbh.end())
+ {
+ // If we can link the complete line with the current line
+ if (// It must be in the search range
+ nbh_p > pstop
+ // Avoid loops
+ && left(((*nbh_p - 1) >> 1) - 1) != l)
+ left(l) = ((*nbh_p - 1) >> 1) - 1;
+
+ // We have found a complete line so we stop the search
+ break;
+ }
+ }
+ }
+ }
+
+
+ // If we haven't found any included line in the first
+ // neighbor, then the line is considered as the neighbor of
+ // the current line
+ if (*nbh_p == end_p)
+ left(l) = (*nbh_p_copy >> 1) - 1;
+ }
+ }
+ }
+ }
+
+
+ // We assume that the lines have been rotated
+ template <typename L>
+ inline
+ void
+ process_right_link(L& blocks,
+ const mln::util::array<box2d>& rbbox,
+ const line_set<L>& lines,
+ line_links<L>& right)
+ {
+ typedef scribo::def::lbl_type V;
+
+ // At the beginning each line is its own neighbor
+ for_all_lines(l, lines)
+ if (lines(l).is_textline())
+ right(l) = l;
+ else
+ right(l) = 0;
+
+ // const unsigned nlines = lines.nelements();
+
+ // For each line
+ for_all_lines(l, lines)
+ if (lines(l).is_textline())
+ {
+ // Max distance for the line search
+ int dmax = 1.5f * lines(l).x_height();
+
+ // Starting points in the current line box
+ point2d c = rbbox(l).pcenter();
+ point2d q(rbbox(l).pmax().row() - ((rbbox(l).pmax().row() - c.row()) / 4), c.col());
+
+ int
+ midcol = (rbbox(l).pmax().col()
+ - rbbox(l).pmin().col()) / 2;
+
+ // Right
+ {
+ int
+ nrightima = geom::ncols(blocks) - c.col() + blocks.domain().pmin().col(),
+ nright = std::min(nrightima, midcol + dmax);
+
+ V
+ // Starting points in the box
+ *p = &blocks(c),
+ *p2 = &blocks(q),
+ // End of search
+ *pstop = p + nright - 1,
+ // Line neighbor
+ *nbh_p = 0;
+
+ // While we haven't found a neighbor or reached the limit
+ for (; p != pstop; ++p, ++p2)
+ {
+ if (*p2 != literal::zero // Not the background
+ && ((*p2 % 2) == 1) // Looking for the bottom of a line
+ && right(((*p2 - 1) >> 1) - 1) != l) // No loops
+ {
+ // Neightbor found, we stop the research
+ nbh_p = p2;
+ break;
+ }
+
+ if (*p != literal::zero // Not the background
+ && ((*p % 2) == 1) // Looking for the bottom of a line
+ && right(((*p - 1) >> 1) - 1) != l) // No loops
+ {
+ // Neightbor found, we stop the research
+ nbh_p = p;
+ break;
+ }
+ }
+
+ // If a neighbor was found, then we have found the bottom of the
+ // line. We are then looking for the top of the encountered
+ // line. If during the search process we find a complete line
+ // included in the touched line, this line is considered as
+ // the neighbor under certain conditions (see below)
+
+ //---------------------------------------------------------------
+ //
+ //
+ // __________________________________________________ 2Q
+ // | Q |
+ // | _________________________ |2P
+ // | |_____________P___________| => Second bottom |2P + 1
+ // | line |
+ // |__________________________________________________|2Q + 1
+ // => First encountered bottom line
+ // _________________________ ^
+ // |_________________________| => Current line | Search direction
+ // |
+ //---------------------------------------------------------------
+
+ if (nbh_p)
+ {
+ std::vector<V> lines_nbh;
+ const V end_p = *nbh_p - 1;
+ const V* nbh_p_copy = nbh_p;
+
+ for (; *nbh_p != end_p; ++nbh_p)
+ {
+ if (*nbh_p != literal::zero) // Not the background
+ {
+ if (*nbh_p % 2 == 1) // We have found the bottom of
+ // another line
+ lines_nbh.push_back(*nbh_p);
+ else
+ {
+ // We have found the top of a line. We are looking if
+ //we have already encountered the bottom of this
+ // line. If so, we link the current line with this one
+ // under certain conditions:
+
+ if (std::find(lines_nbh.begin(), lines_nbh.end(),
+ *nbh_p + 1) != lines_nbh.end())
+ {
+ // If we can link the complete line with the current line
+ if (// It must be in the search range
+ nbh_p < pstop
+ // Avoid loops
+ && right((*nbh_p >> 1) - 1) != l)
+ right(l) = (*nbh_p >> 1) - 1;
+
+ // We have found a complete line, so we stop the search
+ break;
+ }
+ }
+ }
+ }
+
+ // If we haven't found any included line in the first
+ // neighbor, then the line is considered as the neighbor of
+ // the current line
+
+ if (*nbh_p == end_p)
+ right(l) = ((*nbh_p_copy - 1) >> 1) - 1;
+ }
+ }
+ }
+ }
+
+//-----------------------------------------------------------------------
+// Finalizing the links by merging information extracted from the left
+// and right links
+//-----------------------------------------------------------------------
+
+ template< typename L >
+ inline
+ void finalize_links(line_links<L>& left,
+ line_links<L>& right,
+ const line_set<L>& lines)
+ {
+ // const unsigned nlines = lines.nelements();
+
+ for_all_lines(l, lines)
+ if (lines(l).is_textline())
+ {
+ const unsigned left_value = left(l);
+ const unsigned right_value = right(l);
+
+ // If the right neighbor of my left neighbor is itself then its
+ // right neighbor is me
+ {
+ line_id_t& v = right(left_value);
+
+ if (v == left_value)
+ v = l;
+ }
+
+ // If the left neighbor of my right neighbor is itself then its
+ // left neighbor is me
+ {
+ line_id_t& v = left(right_value);
+
+ if (v == right_value)
+ v = l;
+ }
+ }
+ }
+
+ } // end of namespace scribo::text::internal
+
+ template <typename L>
+ inline
+ paragraph_set<L>
+ extract_paragraphs(line_set<L>& lines,
+ const image2d<bool>& input)
+ {
+ typedef scribo::def::lbl_type V;
+
+ image2d<V> blocks(geom::rotate(input.domain(), -90, input.domain().pcenter()));
+ data::fill(blocks, 0);
+
+ // util::array< line_info<L> > lines_info;
+
+ // for_all_lines(l, lines)
+ // {
+ // if (lines(l).is_textline())
+ // lines_info.append(lines(l));
+ // }
+
+/// const unsigned nlines = lines_info.nelemnts();
+ mln::util::array<box2d> rbbox;
+ line_links<L> left(lines);
+ left(0) = 0;
+ line_links<L> right(lines);
+ right(0) = 0;
+ line_links<L> output(lines);
+ output(0) = 0;
+
+ rbbox.resize(lines.nelements() + 1);
+
+ std::cout << "Preparing lines" << std::endl;
+ internal::prepare_lines(input.domain(), lines , blocks, rbbox);
+// io::pgm::save(blocks, "blocks.pgm");
+ std::cout << "Linking left" << std::endl;
+ internal::process_left_link(blocks, rbbox, lines , left);
+ std::cout << "Linking right" << std::endl;
+ internal::process_right_link(blocks, rbbox, lines , right);
+ std::cout << "Finalizing links" << std::endl;
+ internal::finalize_links(left, right, lines );
+ // std::cout << "Finalizing merging" << std::endl;
+ // finalize_line_merging(left, right, lines);
+ std::cout << "Extracting paragraphs" << std::endl;
+ internal::paragraph_links(left, right, output, lines);
+
+ paragraph_set<L> par_set = make::paragraph(output);
+ return par_set;
+ }
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::text
+
+} // end of namespace scribo
+
+#endif // ! SCRIBO_TEXT_EXTRACT_PARAGRAPHS_HH
diff --git a/scribo/scribo/text/extract_paragraphs_hdoc.hh b/scribo/scribo/text/extract_paragraphs_hdoc.hh
new file mode 100644
index 0000000..57d3713
--- /dev/null
+++ b/scribo/scribo/text/extract_paragraphs_hdoc.hh
@@ -0,0 +1,1327 @@
+// Copyright (C) 2010, 2011 EPITA Research and Development Laboratory
+// (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+/// \file
+///
+///
+
+
+#ifndef SCRIBO_TEXT_EXTRACT_PARAGRAPHS_HDOC_HH
+# define SCRIBO_TEXT_EXTRACT_PARAGRAPHS_HDOC_HH
+
+#include <mln/util/array.hh>
+#include <mln/accu/shape/bbox.hh>
+#include <mln/core/image/image2d.hh>
+#include <mln/core/alias/neighb2d.hh>
+#include <mln/draw/box.hh>
+#include <mln/data/convert.hh>
+#include <mln/value/int_u16.hh>
+#include <mln/value/label_16.hh>
+#include <mln/value/int_u8.hh>
+#include <mln/value/rgb8.hh>
+#include <mln/io/ppm/save.hh>
+#include <mln/io/pgm/save.hh>
+#include <mln/geom/rotate.hh>
+#include <mln/literal/colors.hh>
+
+#include <scribo/core/macros.hh>
+#include <scribo/core/line_set.hh>
+#include <scribo/core/line_links.hh>
+#include <scribo/core/line_info.hh>
+#include <scribo/core/paragraph_set.hh>
+
+using namespace mln;
+
+
+namespace scribo
+{
+
+ namespace text
+ {
+
+ template <typename L>
+ inline
+ paragraph_set<L>
+ extract_paragraphs_hdoc(line_set<L>& lines,
+ const image2d<bool>& input);
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+ namespace internal
+ {
+
+ template <typename L>
+ inline
+ bool
+ between_horizontal_separator(const line_set<L>& lines,
+ const line_id_t& l1_,
+ const line_id_t& l2_)
+ {
+ const line_info<L>& l1 = lines(l1_);
+ const line_info<L>& l2 = lines(l2_);
+
+ // No separators found in image.
+ mln_precondition(lines.components().has_separators());
+
+ const box2d& l1_bbox = l1.bbox();
+ const box2d& l2_bbox = l2.bbox();
+
+ unsigned
+ row1 = l1_bbox.pcenter().row(),
+ row2 = l2_bbox.pcenter().row();
+ const mln_ch_value(L, bool)&
+ separators = lines.components().separators();
+
+ unsigned row;
+ unsigned col_ptr;
+ unsigned left_col_ptr;
+ unsigned right_col_ptr;
+ unsigned end;
+
+ if (row1 < row2)
+ {
+ row1 = l1_bbox.pmax().row();
+ row2 = l2_bbox.pmin().row();
+
+ const unsigned quarter =
+ ((l1_bbox.pcenter().col() - l1_bbox.pmin().col()) >> 2);
+
+ row = l1_bbox.pcenter().row();
+ col_ptr = l1_bbox.pcenter().col();
+ left_col_ptr = l1_bbox.pmin().col() + quarter;
+ right_col_ptr = l1_bbox.pmax().col() - quarter;
+ end = row2;
+ }
+ else
+ {
+ row2 = l2_bbox.pmax().row();
+ row1 = l1_bbox.pmin().row();
+
+ const unsigned quarter =
+ ((l2_bbox.pcenter().col() - l2_bbox.pmin().col()) >> 2);
+
+ row = l2_bbox.pcenter().row();
+ col_ptr = l2_bbox.pcenter().col();
+ left_col_ptr = l2_bbox.pmin().col() + quarter;
+ right_col_ptr = l2_bbox.pmax().col() - quarter;
+ end = row1;
+ }
+
+ // If sep_ptr is true, then a separator is reached.
+ while (row < end)
+ {
+ ++row;
+ if (separators.at_(row, col_ptr)
+ || separators.at_(row, left_col_ptr)
+ || separators.at_(row, right_col_ptr))
+ return true;
+ }
+
+ return false;
+ }
+
+
+ template <typename L>
+ bool may_have_another_left_link(const line_links<L>& right,
+ const line_id_t& index,
+ const line_id_t& current_line,
+ const line_set<L>& lines)
+ {
+ const line_info<L>& l = lines(current_line);
+ const point2d& pmin = l.bbox().pmin();
+ const unsigned x1 = l.x_height();
+
+ for_all_links(i, right)
+ if (i != index && right(i) == index)
+ {
+ const line_info<L>& l_info = lines(i);
+ const unsigned x2 = l_info.x_height();
+
+ const float delta_max = 0.5f * std::min(x1, x2);
+
+ if (l_info.bbox().pmin().col() < pmin.col()
+ && std::abs(l.baseline() - l_info.baseline()) < delta_max
+ )
+ return true;
+ }
+
+ return false;
+ }
+
+//---------------------------------------------------------------------
+// This method aims to cut the links between lines that do not fit the
+// different criteria
+//---------------------------------------------------------------------
+
+ template <typename L>
+ inline
+ void paragraph_links(const line_links<L>& left,
+ line_links<L>& right,
+ line_links<L>& output,
+ const line_set<L>& lines)
+ {
+ output = left.duplicate();
+
+ // const unsigned nlines = lines.nelements();
+
+ // image2d<value::rgb8> links = data::convert(value::rgb8(), input);
+ // for (unsigned l = 0; l < nlines; ++l)
+ // {
+ // mln::draw::line(links, lines(l).bbox().pcenter(), lines(left(l)).bbox().pcenter(), literal::red);
+ // }
+ // mln::io::ppm::save(links, "out_links.ppm");
+
+ // For each line
+ for_all_lines(l, lines)
+ if (lines(l).is_textline())
+ {
+ // Neighbors
+
+ line_id_t left_nbh = output(l);
+ line_id_t right_nbh = right(l);
+ line_id_t lol_nbh = output(left_nbh);
+
+ // const line_info<L>& left_line = lines(left_nbh);
+ // const line_info<L>& current_line = lines(l);
+ // const line_info<L>& right_line = lines(right_nbh);
+
+ if (lines.components().has_separators() &&
+ between_horizontal_separator(lines, right_nbh, l))
+ {
+ output(right_nbh) = right_nbh;
+ right_nbh = l;
+ }
+ if (lines.components().has_separators() &&
+ between_horizontal_separator(lines, l, left_nbh))
+ {
+ output(l) = l;
+ left_nbh = l;
+ lol_nbh = l;
+ }
+
+ // Line features
+ const float x_height = lines(l).x_height();
+ const float left_x_height = lines(left_nbh).x_height();
+ const float right_x_height = lines(right_nbh).x_height();
+
+ const box2d& left_line_bbox = lines(left_nbh).bbox();
+ const box2d& current_line_bbox = lines(l).bbox();
+ const box2d& right_line_bbox = lines(right_nbh).bbox();
+ const box2d& lol_line_bbox = lines(lol_nbh).bbox(); // lol : left neighbor of the left neighbor
+
+ const int lline_col_min = left_line_bbox.pmin().col();
+ const int cline_col_min = current_line_bbox.pmin().col();
+ const int rline_col_min = right_line_bbox.pmin().col();
+ const int lolline_col_min = lol_line_bbox.pmin().col();
+
+ const int lline_col_max = left_line_bbox.pmax().col();
+ const int cline_col_max = current_line_bbox.pmax().col();
+ const int rline_col_max = right_line_bbox.pmax().col();
+
+ const int lline_cw = lines(left_nbh).char_width();
+ const int cline_cw = lines(l).char_width();
+ const int rline_cw = lines(right_nbh).char_width();
+ // Maximal x variation to consider two lines vertically aligned
+ const int delta_alignment = cline_cw;
+
+
+ // Checks the baseline distances of the two neighbors
+ {
+ // Current line baseline
+ const int c_baseline = lines(l).baseline();
+
+ // Baseline distance with the left and right neighbors
+ const int lc_baseline = lines(left_nbh).baseline() - c_baseline;
+ const int rc_baseline = c_baseline -lines(right_nbh).baseline();
+
+ // Max baseline distance between the two neighbors
+ // const float delta_baseline_max = std::max(lc_baseline, rc_baseline);
+ // const float delta_baseline_min = std::min(lc_baseline,
+ // rc_baseline);
+
+ // Only two lines, meaning the current line has only one neighbor
+ bool two_lines = false;
+
+ // If the current line has no left neighbor
+ if (lc_baseline == 0)
+ {
+ // ror : right neighbor of the right neighbor
+ const line_id_t ror_nbh = right(right_nbh);
+ //const box2d& ror_line_bbox = lines(ror_nbh).bbox();
+
+ // If the current line has a ror
+ if (ror_nbh != right_nbh
+ && output(ror_nbh) == right_nbh)
+ {
+ // Distance between the current line and the right neighbor
+ const float right_distance = lines(l).meanline() - lines(right_nbh).baseline();
+ // Distance between the right neighbor and the ror
+ const float ror_distance = lines(right_nbh).meanline() - lines(ror_nbh).baseline();
+ // ror x_height
+ const float ror_x_height = lines(ror_nbh).x_height();
+
+ // Conditions to cut the link between the current line
+ // and its right neighbor
+ if (right_distance > 1.4f * ror_distance
+ && std::max(ror_x_height, right_x_height) <
+ 1.4f * std::min(ror_x_height, right_x_height)
+ && output(right_nbh) == l)
+ {
+ output(right_nbh) = right_nbh;
+ continue;
+ }
+ }
+ // Otherwise we only have a group of two lines
+ else
+ {
+ // We determine the distance between the two lines
+ const float distance = lines(l).meanline() - lines(right_nbh).baseline();
+ two_lines = true;
+
+ // If the distance between the two lines is greater than
+ // the minimum x height of the two lines then we cut the
+ // link between them
+ if (distance > 2.0f * std::min(x_height, right_x_height)
+ && output(right_nbh) == l)
+ {
+ output(right_nbh) = right_nbh;
+ continue;
+ }
+ }
+
+ // Lines features
+ const float min_x_height = std::min(x_height, right_x_height);
+ const float max_x_height = std::max(x_height, right_x_height);
+ const float min_char_width = std::min(rline_cw, cline_cw);
+ const float max_char_width = std::max(rline_cw, cline_cw);
+
+ // Condition to cut the link between the current line and
+ // its right neighbor
+ if ((max_x_height > min_x_height * 1.4f) &&
+ !(max_char_width <= 1.2f * min_char_width))
+ {
+ if (output(right_nbh) == l)
+ {
+ output(right_nbh) = right_nbh;
+ continue;
+ }
+ }
+
+ // If we only have two lines we stop the study
+ if (two_lines)
+ continue;
+ }
+ // If the current line has no right neighbor
+ else if (rc_baseline == 0)
+ {
+ // lol : left neighbor of the left neighbor
+
+ // If the left neighbor of the current line has a left neighbor
+ if (lol_nbh != left_nbh)
+ {
+ // Distance between the current line and its left neighbor
+ const float left_distance = lines(left_nbh).meanline() -
+ lines(l).baseline();
+ // Distance between the left neighbor and the left
+ // neighbor of its left neighbor
+ const float lol_distance = lines(lol_nbh).meanline() -
+ lines(left_nbh).baseline();
+ // lol x height
+ const float lol_x_height = lines(lol_nbh).x_height();
+
+ // Conditions to cut the link between the current line
+ // and its left neighbor
+ if (left_distance > 1.4f * lol_distance
+ && std::max(lol_x_height, left_x_height) <
+ 1.4f * std::min(lol_x_height, left_x_height))
+ {
+ output(l) = l;
+ continue;
+ }
+ }
+ // Otherwise we only have a group of two lines
+ else
+ {
+ // Distance between the current line and it left neighbor
+ const float distance = lines(left_nbh).meanline() -
+ lines(l).baseline();
+
+ two_lines = true;
+
+ // If the distance is greater than the min x height
+ // between the two lines
+ if (distance > 2.0f * std::min(x_height, left_x_height))
+ {
+ output(l) = l;
+ continue;
+ }
+ }
+
+ // Lines features
+ const float min_x_height = std::min(x_height, left_x_height);
+ const float max_x_height = std::max(x_height, left_x_height);
+ const float min_char_width = std::min(lline_cw, cline_cw);
+ const float max_char_width = std::max(lline_cw, cline_cw);
+
+ // Condition to cut the link between the current line and
+ // its left neighbor
+ if ((max_x_height > min_x_height * 1.4f) &&
+ !(max_char_width <= 1.2f * min_char_width))
+ {
+ output(l) = l;
+ continue;
+ }
+
+ // If we only have two lines we stop the study
+ if (two_lines)
+ continue;
+ }
+ // The current line has at least one left and one right neighbor
+ else // if (delta_baseline_max >= 1.1 * delta_baseline_min)
+ {
+ // Distance between the left and the current line
+ const float
+ left_distance = left_line_bbox.pcenter().row() - current_line_bbox.pcenter().row();
+ // Distance between the right and the current line
+ const float
+ right_distance = current_line_bbox.pcenter().row() - right_line_bbox.pcenter().row();;
+
+ // If the left line is too far compared to the right one
+ // we cut the link with it
+ if (left_distance > 1.5f * right_distance
+ && std::max(x_height, left_x_height) > 1.2f * std::min(x_height, left_x_height))
+ {
+ output(l) = l;
+ continue;
+ }
+ // If the right line is too far compared to the left one
+ // we cut the link with it
+ else if (right_distance > 1.5f * left_distance
+ && std::max(x_height, right_x_height) >= 1.2f * std::min(x_height, right_x_height)
+ && output(right_nbh) == l)
+ {
+ output(right_nbh) = right_nbh;
+ continue;
+ }
+
+ // If the distance between the baseline of the left
+ // neighbor and the baseline of the current line is
+ // greater than the one between the current line baseline
+ // and the right line baseline we have to study the texte
+ // features of the right and left lines
+ if (lc_baseline > rc_baseline)
+ {
+ const float cw_max = std::max(lline_cw, cline_cw);
+ const float cw_min = std::min(lline_cw, cline_cw);
+ const float min_x_height = std::min(x_height, left_x_height);
+ const float max_x_height = std::max(x_height, left_x_height);
+
+ if ((max_x_height > min_x_height * 1.4f) &&
+ !(cw_max <= 1.2f * cw_min))
+ {
+ output(l) = l;
+ continue;
+ }
+
+ {
+ const float min_x_height = std::min(x_height, right_x_height);
+ const float max_x_height = std::max(x_height, right_x_height);
+ const float cw_max = std::max(rline_cw, cline_cw);
+ const float cw_min = std::min(rline_cw, cline_cw);
+
+ if ((max_x_height > min_x_height * 1.4f)
+ && !(cw_max <= 1.2f * cw_min)
+ && output(right_nbh) == l)
+ {
+ output(right_nbh) = right_nbh;
+ continue;
+ }
+ }
+ }
+ else
+ {
+ const float cw_max = std::max(rline_cw, cline_cw);
+ const float cw_min = std::min(rline_cw, cline_cw);
+ const float min_x_height = std::min(x_height, right_x_height);
+ const float max_x_height = std::max(x_height, right_x_height);
+
+ if ((max_x_height > min_x_height * 1.4f)
+ && !(cw_max <= 1.2f * cw_min)
+ && output(right_nbh) == l)
+ {
+ output(right_nbh) = right_nbh;
+ continue;
+ }
+
+ {
+ const float min_x_height = std::min(x_height, left_x_height);
+ const float max_x_height = std::max(x_height, left_x_height);
+ const float cw_max = std::max(lline_cw, cline_cw);
+ const float cw_min = std::min(lline_cw, cline_cw);
+
+ if ((max_x_height > min_x_height * 1.4f)
+ && !(cw_max <= 1.2f * cw_min))
+ {
+ output(l) = l;
+ continue;
+ }
+ }
+ }
+ }
+ }
+
+ // If we arrive here, it means than the lines in the
+ // neighborhood of the current line are quite similar. We can
+ // then begin to study the indentations in order to determine
+ // the beginning of new paragraphs
+
+//-----------------------------------------------------------------------------
+// ___________________________
+// |___________________________|
+// ________________________
+// |________________________|
+// ___________________________
+// |___________________________|
+// ___________________________
+// |___________________________|
+//
+// Simple case : paragraphs are justified on the left. We try to find any
+// indentation like above.
+//
+//-----------------------------------------------------------------------------
+
+ {
+ // Check if the current line neighbors are aligned
+ bool left_right_aligned = false;
+ bool left_lol_aligned = false;
+ const int dx_lr = std::abs(lline_col_min - rline_col_min);
+ const int dx_llol = std::abs(lline_col_min - lolline_col_min);
+
+ if (dx_lr < delta_alignment)
+ left_right_aligned = true;
+
+ if (dx_llol < delta_alignment)
+ left_lol_aligned = true;
+
+ if (left_right_aligned && left_lol_aligned)
+ {
+ const int left_right_col_min = std::min(lline_col_min, rline_col_min);
+ const int dx_lrc = std::abs(left_right_col_min - cline_col_min);
+ const float l_char_width = 1.5f * lines(l).char_width();
+
+ if (dx_lrc > l_char_width &&
+ dx_lrc < 3.0f * l_char_width &&
+ cline_col_min > rline_col_min &&
+ cline_col_min > lline_col_min)
+ {
+ const line_id_t out_right_nbh = output(right_nbh);
+
+ if (out_right_nbh != l)
+ {
+ if (output(out_right_nbh) == l)
+ output(out_right_nbh) = out_right_nbh;
+ right(l) = l;
+ }
+ else
+ output(right_nbh) = right_nbh;
+ continue;
+ }
+ }
+ }
+
+//-----------------------------------------------------------------------------
+// ___________________________
+// |___________________________|
+// ___________________
+// |___________________| End of the paragraph - Current line
+// ________________________
+// |________________________| Beginning of a new one
+// ___________________________
+// |___________________________| Left of left of current line
+//
+// End of paragraph case : we try to find an end to the current paragraph
+//
+//-----------------------------------------------------------------------------
+
+ {
+ // Check if the current line neighbors are aligned
+ bool left_right_max_aligned = false;
+ bool left_current_min_aligned = false;
+ bool lol_current_min_aligned = false;
+ const bool lol_is_left = output(left_nbh) == left_nbh;
+ const int dx_lr_max = std::abs(lline_col_max - rline_col_max);
+ const int dx_lc_min = std::abs(lline_col_min - cline_col_min);
+ const int dx_lolc_min = std::abs(lolline_col_min - cline_col_min);
+
+ if (dx_lr_max < delta_alignment)
+ left_right_max_aligned = true;
+
+ if (dx_lc_min < delta_alignment)
+ left_current_min_aligned = true;
+
+ if (dx_lolc_min < delta_alignment)
+ lol_current_min_aligned = true;
+
+ if (!left_current_min_aligned && left_right_max_aligned &&
+ (lol_current_min_aligned || lol_is_left))
+ {
+ const int dx_lrc = std::abs(lline_col_max - cline_col_max);
+ const int l_char_width = lines(l).char_width();
+ const int dx_indent = std::abs(std::max(lline_col_min,
+ rline_col_min) - cline_col_min);
+
+ if (dx_lrc > l_char_width &&
+ dx_indent < 4 * delta_alignment &&
+ cline_col_max < lline_col_max &&
+ cline_col_min < lline_col_min &&
+ (lline_col_min > lolline_col_min || lol_is_left))
+ {
+ output(l) = l;
+ continue;
+ }
+ }
+ }
+
+//-----------------------------------------------------------------------------
+// ___________________________
+// |___________________________|
+// ___________________________
+// |___________________________|
+// ________________________
+// |________________________|
+// ___________________________
+// |___________________________|
+//
+// Simple case : paragraphs are justified on the left. We try to find any
+// indentation like above.
+//
+//-----------------------------------------------------------------------------
+
+ {
+ const line_id_t ror_nbh = right(right_nbh);
+ const box2d& ror_line_bbox = lines(ror_nbh).bbox();
+ const int rorline_col_min = ror_line_bbox.pmin().col();
+
+ bool right_ror_min_aligned = false;
+ bool left_right_aligned = false;
+ const int dx_lr = std::abs(lline_col_min - rline_col_min);
+ const int dx_rror_min = std::abs(rline_col_min - rorline_col_min);
+
+ if (dx_rror_min < delta_alignment)
+ right_ror_min_aligned = true;
+
+ if (dx_lr < delta_alignment)
+ left_right_aligned = true;
+
+ if (right_ror_min_aligned && left_right_aligned &&
+ ror_nbh != right_nbh)
+ {
+ const int left_right_col_min = std::min(lline_col_min, rline_col_min);
+ const int dx_lrc = std::abs(left_right_col_min - cline_col_min);
+ const float l_char_width = 1.5f * lines(l).char_width();
+
+ if (dx_lrc > l_char_width &&
+ !may_have_another_left_link(right, right_nbh, l, lines) &&
+ dx_lrc < 10.0f * l_char_width &&
+ cline_col_min > rline_col_min &&
+ cline_col_min > lline_col_min)
+ {
+ const line_id_t out_right_nbh = output(right_nbh);
+
+ if (out_right_nbh != l)
+ {
+ if (output(out_right_nbh) == l)
+ output(out_right_nbh) = out_right_nbh;
+ right(l) = l;
+ }
+ else
+ output(right_nbh) = right_nbh;
+ continue;
+ }
+ }
+ }
+
+//-----------------------------------------------------------------------------
+// ___________________________
+// |___________________________|
+// ___________
+// |___________|
+// ________________________
+// |________________________|
+// ___________________________
+// |___________________________|
+//
+// Simple case : paragraphs are justified on the left. We try to find any
+// indentation like above.
+//
+//-----------------------------------------------------------------------------
+
+ {
+ const line_id_t ror_nbh = right(right_nbh);
+ const box2d& ror_line_bbox = lines(ror_nbh).bbox();
+ const int rorline_col_min = ror_line_bbox.pmin().col();
+
+ bool left_ror_aligned = false;
+ const int dx_lror = std::abs(lline_col_min - rorline_col_min);
+
+ if (dx_lror < delta_alignment)
+ left_ror_aligned = true;
+
+ if (left_ror_aligned)
+ {
+ const int left_ror_col_min = std::min(lline_col_min, rorline_col_min);
+ const int dx_lrorc = std::abs(left_ror_col_min - cline_col_min);
+ const float l_char_width = 1.5f * lines(l).char_width();
+ const int dx_lrorr = std::abs(left_ror_col_min - rline_col_min);
+ const int dx_crmax = std::abs(rline_col_max - cline_col_max);
+
+ if (dx_lrorc > l_char_width &&
+ dx_lrorr > 5 * l_char_width &&
+ dx_lrorr > dx_lrorc &&
+ dx_crmax > 5 * l_char_width &&
+ !may_have_another_left_link(right, right_nbh, l, lines) &&
+ dx_lrorc < 10.0f * l_char_width &&
+ cline_col_min > rorline_col_min &&
+ cline_col_min > lline_col_min)
+ {
+ right(right_nbh) = right_nbh;
+ continue;
+ }
+ }
+ }
+
+
+// Strange case
+ {
+ if (rline_col_min > current_line_bbox.pcenter().col()
+ && !may_have_another_left_link(right, right_nbh, l, lines)
+ && cline_col_max < rline_col_max
+ && output(right_nbh) == l)
+ {
+ output(right_nbh) = right_nbh;
+ }
+ }
+
+//-----------------------------------------------------------------------------
+// ___________________________
+// |___________________________|
+// ___________________________
+// |___________________________|
+// ________________________
+// |________________________|
+//
+// Simple case : paragraphs are justified on the left. We try to find any
+// indentation like above at the end of a column.
+//
+//-----------------------------------------------------------------------------
+
+ if (left_nbh == l)
+ {
+ const line_id_t ror_nbh = right(right_nbh);
+ const box2d& ror_line_bbox = lines(ror_nbh).bbox();
+ const int rorline_col_min = ror_line_bbox.pmin().col();
+
+ bool right_ror_min_aligned = false;
+ const int dx_rror_min = std::abs(rline_col_min - rorline_col_min);
+
+ if (dx_rror_min < delta_alignment)
+ right_ror_min_aligned = true;
+
+ if (right_ror_min_aligned)
+ {
+ const int right_ror_col_min = std::min(rline_col_min, rorline_col_min);
+ const int dx_rrorc = std::abs(right_ror_col_min - cline_col_min);
+ const float l_char_width = 1.5f * lines(l).char_width();
+
+ if (dx_rrorc > l_char_width &&
+ dx_rrorc < 10.0f * l_char_width &&
+ cline_col_min > rline_col_min &&
+ cline_col_max >= rline_col_max)
+ {
+ const line_id_t out_right_nbh = output(right_nbh);
+
+ if (out_right_nbh != l)
+ {
+ if (output(out_right_nbh) == l)
+ output(out_right_nbh) = out_right_nbh;
+ right(l) = l;
+ }
+ else
+ output(right_nbh) = right_nbh;
+ continue;
+ }
+ }
+ }
+ }
+
+ // Only debug
+ // {
+ // image2d<value::rgb8> debug = data::convert(value::rgb8(), input);
+
+ // mln::util::array<accu::shape::bbox<point2d> > nbbox(output.nelements());
+ // for_all_lines(l, lines)
+ // if (lines(l).is_textline())
+ // {
+ // // if (lines(i).is_textline())
+ // // {
+ // // mln::draw::box(debug, lines(i).bbox(), literal::red);
+ // nbbox(output(l)).take(lines(l).bbox());
+ // // }
+ // }
+
+ // for (unsigned i = 0; i < nbbox.nelements(); ++i)
+ // if (nbbox(i).is_valid())
+ // {
+ // box2d b = nbbox(i).to_result();
+ // mln::draw::box(debug, b, literal::orange);
+ // b.enlarge(1);
+ // mln::draw::box(debug, b, literal::orange);
+ // b.enlarge(1);
+ // mln::draw::box(debug, b, literal::orange);
+ // }
+
+ // mln::io::ppm::save(debug, "out_paragraph.ppm");
+ // }
+
+ }
+
+//-------------------------------------------------------------
+// Preparation of the lines before linking them.
+// For each line we draw the top and the bottom of it.
+// Assuming than i is the number of the line. Then the top of the line
+// will be affected with the value 2 * i in the block image and the
+// bottom with 2 * i + 1.
+//
+//-------------------------------------------------------------
+
+ template <typename L>
+ inline
+ void prepare_lines(const box2d& domain,
+ const line_set<L>& lines,
+ L& blocks,
+ mln::util::array<box2d>& rbbox)
+ {
+ std::map< int, std::vector< const box2d* > > drawn_lines;
+ // const unsigned nlines = lines.nelements();
+
+ // For each line
+ //for (unsigned l = 0; l < nlines; ++l)
+ for_all_lines(l, lines)
+ if (lines(l).is_textline())
+ {
+ // Rotation of the bounding box
+ box2d b = geom::rotate(lines(l).bbox(), -90, domain.pcenter());
+// rbbox.append(b);
+ rbbox(l) = b;
+
+ const unsigned index = l + 1;
+ const unsigned even_index = 2 * index;
+ const unsigned odd_index = even_index + 1;
+
+ // Top of the line
+ {
+ bool not_finished = true;
+ int col_offset = 0;
+
+ while (not_finished)
+ {
+ // Looking for a column in the image to draw the top of the
+ // line
+
+ const int col = b.pmax().col() + col_offset;
+ std::map< int, std::vector< const box2d* > >::iterator it
+ = drawn_lines.find(col);
+
+ if (it != drawn_lines.end())
+ {
+ const std::vector< const box2d* >& lines = (*it).second;
+ const unsigned nb_lines = lines.size();
+ unsigned i = 0;
+
+ for (i = 0; i < nb_lines; ++i)
+ {
+ const box2d* box = lines[i];
+ const int min_row = std::max(b.pmin().row(), box->pmin().row());
+ const int max_row = std::min(b.pmax().row(), box->pmax().row());
+
+ if (min_row - max_row <= 0)
+ break;
+ }
+
+ if (i == nb_lines)
+ {
+ mln::draw::line(blocks, point2d(b.pmin().row(), col),
+ point2d(b.pmax().row(), col), even_index);
+ not_finished = false;
+ drawn_lines[col].push_back(&(rbbox[l]));
+ }
+ else
+ ++col_offset;
+ }
+ else
+ {
+ mln::draw::line(blocks, point2d(b.pmin().row(), col),
+ point2d(b.pmax().row(), col), even_index);
+ not_finished = false;
+ drawn_lines[col].push_back(&(rbbox[l]));
+ }
+ }
+ }
+
+ // Bottom of the line
+ {
+ bool not_finished = true;
+ int col_offset = 0;
+
+ while (not_finished)
+ {
+ // Looking for a column in the image to draw the bottom of
+ // the line
+
+ const int col = b.pmin().col() - col_offset;
+ std::map< int, std::vector< const box2d* > >::iterator it
+ = drawn_lines.find(col);
+
+ if (it != drawn_lines.end())
+ {
+ const std::vector< const box2d* >& lines = (*it).second;
+ const unsigned nb_lines = lines.size();
+ unsigned i = 0;
+
+ for (i = 0; i < nb_lines; ++i)
+ {
+ const box2d* box = lines[i];
+ const int min_row = std::max(b.pmin().row(), box->pmin().row());
+ const int max_row = std::min(b.pmax().row(), box->pmax().row());
+
+ if (min_row - max_row <= 0)
+ break;
+ }
+
+ if (i == nb_lines)
+ {
+ mln::draw::line(blocks, point2d(b.pmin().row(), col),
+ point2d(b.pmax().row(), col), odd_index);
+ not_finished = false;
+ drawn_lines[col].push_back(&(rbbox[l]));
+ }
+ else
+ ++col_offset;
+ }
+ else
+ {
+ mln::draw::line(blocks, point2d(b.pmin().row(), col),
+ point2d(b.pmax().row(), col), odd_index);
+ not_finished = false;
+ drawn_lines[col].push_back(&(rbbox[l]));
+ }
+ }
+ }
+ }
+ }
+
+ template <typename L>
+ inline
+ void
+ process_left_link(L& blocks,
+ const mln::util::array<box2d>& rbbox,
+ const line_set<L>& lines,
+ line_links<L>& left)
+ {
+ typedef scribo::def::lbl_type V;
+
+ // At the beginning each line is its own neighbor
+ for_all_lines(l, lines)
+ if (lines(l).is_textline())
+ left(l) = l;
+ else
+ left(l) = 0;
+
+ // const unsigned nlines = lines.nelements();
+
+ // For each line
+ for_all_lines(l, lines)
+ if (lines(l).is_textline())
+ {
+ // Max distance for the line search
+ int dmax = 1.5f * lines(l).x_height();
+
+ // Starting points in the current line box
+ point2d c = rbbox(l).pcenter();
+ point2d q(rbbox(l).pmin().row() + ((c.row() - rbbox(l).pmin().row()) / 4), c.col());
+
+ int
+ midcol = (rbbox(l).pmax().col()
+ - rbbox(l).pmin().col()) / 2;
+
+ // Left
+ {
+ // marge gauche
+ int
+ nleftima = c.col() - blocks.domain().pmin().col(),
+ // Distance gauche
+ nleft = std::min(nleftima, midcol + dmax);
+
+ V
+ // Starting points in the box
+ *p = &blocks(c),
+ *p2 = &blocks(q),
+ // End of search
+ *pstop = p - nleft - 1,
+ // Line neighbor
+ *nbh_p = 0;
+
+ // While we haven't found a neighbor or reached the limit
+ for (; p != pstop; --p, --p2)
+ {
+ if (*p2 != literal::zero // Not the background
+ && ((*p2 % 2) == 0) // Looking for the bottom of a line
+ && left((*p2 >> 1) - 1) != l) // No loops
+ {
+ // Neightbor found, we stop the research
+ nbh_p = p2;
+ break;
+ }
+
+ if (*p != literal::zero // Not the background
+ && ((*p % 2) == 0) // Looking for the bottom of a line
+ && left((*p >> 1) - 1) != l) // No loops
+ {
+ // Neightbor found, we stop the research
+ nbh_p = p;
+ break;
+ }
+ }
+
+ // If a neighbor was found, then we have found the top of the
+ // line. We are then looking for the bottom of the encountered
+ // line. If during the search process we find a complete line
+ // included in the touched line, this line is considered as
+ // the neighbor under certain conditions (see below)
+
+ //---------------------------------------------------------------
+ // _________________________ |
+ // |_________________________| => Current line | Search direction
+ // v
+ // => First encountered top line
+ // __________________________________________________ 2Q
+ // | Q |
+ // | _________________________ |2P
+ // | |_____________P___________| => Second top |2P + 1
+ // | line |
+ // |__________________________________________________|2Q + 1
+ //
+ //
+ //---------------------------------------------------------------
+
+ if (nbh_p)
+ {
+ std::vector<V> lines_nbh;
+ const V end_p = *nbh_p + 1;
+ const V* nbh_p_copy = nbh_p;
+
+ for (; *nbh_p != end_p; --nbh_p)
+ {
+ if ((*nbh_p) != literal::zero) // Not the background
+ {
+ if ((*nbh_p) % 2 == 0)// We have found the top of
+ // another line
+ lines_nbh.push_back(*nbh_p);
+ else
+ {
+ // We have found the bottom of a line. We are looking if
+ // we have already encountered the top of this
+ // line. If so, we link the current line with this one
+ // under certain conditions:
+
+ if (std::find(lines_nbh.begin(), lines_nbh.end(),
+ (*nbh_p) - 1) != lines_nbh.end())
+ {
+ // If we can link the complete line with the current line
+ if (// It must be in the search range
+ nbh_p > pstop
+ // Avoid loops
+ && left(((*nbh_p - 1) >> 1) - 1) != l)
+ left(l) = ((*nbh_p - 1) >> 1) - 1;
+
+ // We have found a complete line so we stop the search
+ break;
+ }
+ }
+ }
+ }
+
+
+ // If we haven't found any included line in the first
+ // neighbor, then the line is considered as the neighbor of
+ // the current line
+ if (*nbh_p == end_p)
+ left(l) = (*nbh_p_copy >> 1) - 1;
+ }
+ }
+ }
+ }
+
+
+ // We assume that the lines have been rotated
+ template <typename L>
+ inline
+ void
+ process_right_link(L& blocks,
+ const mln::util::array<box2d>& rbbox,
+ const line_set<L>& lines,
+ line_links<L>& right)
+ {
+ typedef scribo::def::lbl_type V;
+
+ // At the beginning each line is its own neighbor
+ for_all_lines(l, lines)
+ if (lines(l).is_textline())
+ right(l) = l;
+ else
+ right(l) = 0;
+
+ // const unsigned nlines = lines.nelements();
+
+ // For each line
+ for_all_lines(l, lines)
+ if (lines(l).is_textline())
+ {
+ // Max distance for the line search
+ int dmax = 1.5f * lines(l).x_height();
+
+ // Starting points in the current line box
+ point2d c = rbbox(l).pcenter();
+ point2d q(rbbox(l).pmax().row() - ((rbbox(l).pmax().row() - c.row()) / 4), c.col());
+
+ int
+ midcol = (rbbox(l).pmax().col()
+ - rbbox(l).pmin().col()) / 2;
+
+ // Right
+ {
+ int
+ nrightima = geom::ncols(blocks) - c.col() + blocks.domain().pmin().col(),
+ nright = std::min(nrightima, midcol + dmax);
+
+ V
+ // Starting points in the box
+ *p = &blocks(c),
+ *p2 = &blocks(q),
+ // End of search
+ *pstop = p + nright - 1,
+ // Line neighbor
+ *nbh_p = 0;
+
+ // While we haven't found a neighbor or reached the limit
+ for (; p != pstop; ++p, ++p2)
+ {
+ if (*p2 != literal::zero // Not the background
+ && ((*p2 % 2) == 1) // Looking for the bottom of a line
+ && right(((*p2 - 1) >> 1) - 1) != l) // No loops
+ {
+ // Neightbor found, we stop the research
+ nbh_p = p2;
+ break;
+ }
+
+ if (*p != literal::zero // Not the background
+ && ((*p % 2) == 1) // Looking for the bottom of a line
+ && right(((*p - 1) >> 1) - 1) != l) // No loops
+ {
+ // Neightbor found, we stop the research
+ nbh_p = p;
+ break;
+ }
+ }
+
+ // If a neighbor was found, then we have found the bottom of the
+ // line. We are then looking for the top of the encountered
+ // line. If during the search process we find a complete line
+ // included in the touched line, this line is considered as
+ // the neighbor under certain conditions (see below)
+
+ //---------------------------------------------------------------
+ //
+ //
+ // __________________________________________________ 2Q
+ // | Q |
+ // | _________________________ |2P
+ // | |_____________P___________| => Second bottom |2P + 1
+ // | line |
+ // |__________________________________________________|2Q + 1
+ // => First encountered bottom line
+ // _________________________ ^
+ // |_________________________| => Current line | Search direction
+ // |
+ //---------------------------------------------------------------
+
+ if (nbh_p)
+ {
+ std::vector<V> lines_nbh;
+ const V end_p = *nbh_p - 1;
+ const V* nbh_p_copy = nbh_p;
+
+ for (; *nbh_p != end_p; ++nbh_p)
+ {
+ if (*nbh_p != literal::zero) // Not the background
+ {
+ if (*nbh_p % 2 == 1) // We have found the bottom of
+ // another line
+ lines_nbh.push_back(*nbh_p);
+ else
+ {
+ // We have found the top of a line. We are looking if
+ //we have already encountered the bottom of this
+ // line. If so, we link the current line with this one
+ // under certain conditions:
+
+ if (std::find(lines_nbh.begin(), lines_nbh.end(),
+ *nbh_p + 1) != lines_nbh.end())
+ {
+ // If we can link the complete line with the current line
+ if (// It must be in the search range
+ nbh_p < pstop
+ // Avoid loops
+ && right((*nbh_p >> 1) - 1) != l)
+ right(l) = (*nbh_p >> 1) - 1;
+
+ // We have found a complete line, so we stop the search
+ break;
+ }
+ }
+ }
+ }
+
+ // If we haven't found any included line in the first
+ // neighbor, then the line is considered as the neighbor of
+ // the current line
+
+ if (*nbh_p == end_p)
+ right(l) = ((*nbh_p_copy - 1) >> 1) - 1;
+ }
+ }
+ }
+ }
+
+//-----------------------------------------------------------------------
+// Finalizing the links by merging information extracted from the left
+// and right links
+//-----------------------------------------------------------------------
+
+ template< typename L >
+ inline
+ void finalize_links(line_links<L>& left,
+ line_links<L>& right,
+ const line_set<L>& lines)
+ {
+ // const unsigned nlines = lines.nelements();
+
+ for_all_lines(l, lines)
+ if (lines(l).is_textline())
+ {
+ const unsigned left_value = left(l);
+ const unsigned right_value = right(l);
+
+ // If the right neighbor of my left neighbor is itself then its
+ // right neighbor is me
+ {
+ line_id_t& v = right(left_value);
+
+ if (v == left_value)
+ v = l;
+ }
+
+ // If the left neighbor of my right neighbor is itself then its
+ // left neighbor is me
+ {
+ line_id_t& v = left(right_value);
+
+ if (v == right_value)
+ v = l;
+ }
+ }
+ }
+
+ } // end of namespace scribo::text::internal
+
+
+ template <typename L>
+ inline
+ paragraph_set<L>
+ extract_paragraphs_hdoc(line_set<L>& lines,
+ const image2d<bool>& input)
+ {
+ typedef scribo::def::lbl_type V;
+
+ image2d<V> blocks(geom::rotate(input.domain(), -90, input.domain().pcenter()));
+ data::fill(blocks, 0);
+
+ // util::array< line_info<L> > lines_info;
+
+ // for_all_lines(l, lines)
+ // {
+ // if (lines(l).is_textline())
+ // lines_info.append(lines(l));
+ // }
+
+/// const unsigned nlines = lines_info.nelemnts();
+ mln::util::array<box2d> rbbox;
+ line_links<L> left(lines);
+ left(0) = 0;
+ line_links<L> right(lines);
+ right(0) = 0;
+ line_links<L> output(lines);
+ output(0) = 0;
+
+ rbbox.resize(lines.nelements() + 1);
+
+// std::cout << "Preparing lines" << std::endl;
+ prepare_lines(input.domain(), lines , blocks, rbbox);
+// io::pgm::save(blocks, "blocks.pgm");
+// std::cout << "Linking left" << std::endl;
+ process_left_link(blocks, rbbox, lines , left);
+// std::cout << "Linking right" << std::endl;
+ process_right_link(blocks, rbbox, lines , right);
+// std::cout << "Finalizing links" << std::endl;
+ finalize_links(left, right, lines );
+ // std::cout << "Finalizing merging" << std::endl;
+ // finalize_line_merging(left, right, lines);
+// std::cout << "Extracting paragraphs" << std::endl;
+ filter::paragraph_links(left, right, output, lines);
+
+ paragraph_set<L> par_set = make::paragraph(output, right);
+ return par_set;
+ }
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::text
+
+} // end of namespace scribo
+
+#endif // ! SCRIBO_TEXT_EXTRACT_PARAGRAPHS_HDOC_HH
diff --git a/scribo/scribo/text/merging.hh b/scribo/scribo/text/merging.hh
index 31a5ed4..dd5762a 100644
--- a/scribo/scribo/text/merging.hh
+++ b/scribo/scribo/text/merging.hh
@@ -1,5 +1,4 @@
-// Copyright (C) 2010, 2011 EPITA Research and Development Laboratory
-// (LRDE)
+// Copyright (C) 2010 EPITA Research and Development Laboratory (LRDE)
//
// This file is part of Olena.
//
@@ -181,7 +180,10 @@ namespace scribo
l1 = my_find_root(parent, l1);
l2 = my_find_root(parent, l2);
if (l1 == l2)
- return l1;
+ {
+ std::cerr << "what! in'do_union': already merged!!!" << std::endl;
+ return l1;
+ }
swap_ordering(l1, l2);
parent[l2] = l1; // The smallest label value is root.
@@ -224,16 +226,16 @@ namespace scribo
template <typename L>
- bool between_separators(const scribo::line_set<L>& lines,
+ bool between_separators(const line_set<L>& lines,
const line_id_t& l1_,
const line_id_t& l2_)
{
- const scribo::line_info<L>& l1 = lines(l1_);
- const scribo::line_info<L>& l2 = lines(l2_);
-
// No separators found in image.
mln_precondition(lines.components().has_separators());
+ const scribo::line_info<L>& l1 = lines(l1_);
+ const scribo::line_info<L>& l2 = lines(l2_);
+
const box2d& l1_bbox = l1.bbox();
const box2d& l2_bbox = l2.bbox();
@@ -295,11 +297,11 @@ namespace scribo
*/
template <typename L>
bool lines_can_merge(scribo::line_set<L>& lines,
- const scribo::line_id_t& l1_,
- const scribo::line_id_t& l2_)
+ const line_id_t& l1_,
+ const line_id_t& l2_)
{
scribo::line_info<L>& l1 = lines(l1_);
- scribo::line_info<L>& l2 = lines(l2_);
+ const scribo::line_info<L>& l2 = lines(l2_);
// Parameters.
const float x_ratio_max = 1.7f;
@@ -432,12 +434,13 @@ namespace scribo
*/
template <typename L>
- bool non_text_and_text_can_merge(scribo::line_set<L>& lines,
- const scribo::line_id_t& l_cur_, // current
- const scribo::line_id_t l_ted_) // touched
+ bool non_text_and_text_can_merge(line_set<L>& lines,
+ const line_id_t& l_cur_, // current
+ const line_id_t& l_ted_) // touched
{
scribo::line_info<L>& l_cur = lines(l_cur_);
- scribo::line_info<L>& l_ted = lines(l_ted_);
+ const scribo::line_info<L>& l_ted = lines(l_ted_);
+
if (l_cur.type() == line::Text || l_ted.type() != line::Text)
return false;
@@ -470,8 +473,8 @@ namespace scribo
if (l_cur_height < l_ted_x_height
&& l_cur_height > 0.05f * l_ted_x_height
&& float(l_cur_width) / float(l_cur.card()) < l_ted.char_width()
- && dx < 2 * l_ted_cw
- && l_cur_pmin.row() < l_ted.baseline())
+ && dx < l_ted_cw
+ && l_cur_pmin.row() < l_ted_pmax.row())
{
l_cur.update_type(line::Punctuation);
return true;
@@ -634,10 +637,8 @@ namespace scribo
image2d<unsigned> billboard(domain);
data::fill(billboard, 0);
-# ifndef SCRIBO_NDEBUG
image2d<value::int_u8> log(domain);
data::fill(log, 0);
-# endif // ! SCRIBO_NDEBUG
const unsigned n = v.size();
unsigned l_;
@@ -669,7 +670,7 @@ namespace scribo
x---------------x
| |
| mc |
- ml x x x mr
+ ml x x x mr
| |
| |
x---------------x
@@ -752,12 +753,11 @@ namespace scribo
// vertically aligned
// Obviously no separators between the two lines
if ((l_info.card() <= 5 ||
- (std::abs(l_info.baseline() - mc_info.baseline())
- < 5 && std::abs(l_info.meanline() -
- mc_info.meanline()) < 5))
- && dx < l_ted_cw && dy < 0
- && not (lines.components().has_separators()
- && between_separators(lines, l, mc)))
+ (std::abs(l_info.baseline() - mc_info.baseline()) < 5
+ && std::abs(l_info.meanline() - mc_info.meanline()) < 5))
+ && dx < l_ted_cw && dy < 0
+ && not (lines.components().has_separators()
+ && between_separators(lines, l, mc)))
l = do_union(lines, l, mc, parent);
// }
@@ -776,10 +776,9 @@ namespace scribo
// l_ = do_union(lines, mc, l, parent);
// draw_box(billboard, lines(l_).ebbox(), l_);
-# ifndef SCRIBO_NDEBUG
// Log:
draw_box(log, b, 126);
-# endif // ! SCRIBO_NDEBUG
+
}
else // FIXME: Remove! since included in a non-text-line, so not drawn, so inclusion impossible!!!!!!!!!!
@@ -789,11 +788,8 @@ namespace scribo
// a non-text-line (probably a drawing or a frame) includes a text line
draw_box(billboard, lines(l).ebbox(), l);
-
-# ifndef SCRIBO_NDEBUG
// Log:
draw_box(log, b, 100);
-# endif // ! SCRIBO_NDEBUG
}
}
@@ -832,10 +828,8 @@ namespace scribo
// it may change of label (take the one of the included line).
draw_box(billboard, lines(l_).ebbox(), l_);
-# ifndef SCRIBO_NDEBUG
// Log:
draw_box(log, b, 128);
-# endif // ! SCRIBO_NDEBUG
}
}
}
@@ -848,15 +842,11 @@ namespace scribo
{
++count_new_txtline;
draw_box(billboard, lines(l).ebbox(), l);
-# ifndef SCRIBO_NDEBUG
// Log:
draw_box(log, b, 127);
-# endif // ! SCRIBO_NDEBUG
}
-# ifndef SCRIBO_NDEBUG
else
- draw_box(log, b, 1);
-# endif // ! SCRIBO_NDEBUG
+ draw_box(log, b, 1);
}
}
else
@@ -873,8 +863,8 @@ namespace scribo
if (lcand == 0) // Skip background.
continue;
- // if (lines(lcand).type() != line::Text)
- // std::cerr << "again!" << std::endl;
+ if (lines(lcand).type() != line::Text)
+ std::cerr << "again!" << std::endl;
if (lines(l_).type() == line::Text)
@@ -886,22 +876,15 @@ namespace scribo
l_ = do_union(lines, l_, lcand, parent);
draw_box(billboard, lines(l_).ebbox(), l_);
-
-# ifndef SCRIBO_NDEBUG
// Log:
draw_box(log, b, 151);
-# endif // ! SCRIBO_NDEBUG
-
continue;
}
else
{
++count_WTF;
-
-# ifndef SCRIBO_NDEBUG
// Log:
draw_box(log, b, 255);
-# endif // ! SCRIBO_NDEBUG
// (*) SEE BELOW
draw_box(billboard, lines(l_).ebbox(), l_);
@@ -918,20 +901,15 @@ namespace scribo
l_ = do_union(lines, l_, lcand, parent);
draw_box(billboard, lines(l_).ebbox(), l_);
-# ifndef SCRIBO_NDEBUG
// Log:
draw_box(log, b, 169);
-# endif // ! SCRIBO_NDEBUG
-
continue;
}
-# ifndef SCRIBO_NDEBUG
else
{
// Log:
draw_box(log, b, 254);
}
-# endif // ! SCRIBO_NDEBUG
}
@@ -1081,6 +1059,7 @@ namespace scribo
// ts = t.stop();
// std::cout << "time " << ts << std::endl;
+
lines.force_stats_update();
return lines;
diff --git a/scribo/scribo/text/merging.hh b/scribo/scribo/text/merging_hdoc.hh
similarity index 99%
copy from scribo/scribo/text/merging.hh
copy to scribo/scribo/text/merging_hdoc.hh
index 31a5ed4..6f89d2a 100644
--- a/scribo/scribo/text/merging.hh
+++ b/scribo/scribo/text/merging_hdoc.hh
@@ -24,8 +24,8 @@
// exception does not however invalidate any other reasons why the
// executable file might be covered by the GNU General Public License.
-#ifndef SCRIBO_TEXT_MERGING_HH
-# define SCRIBO_TEXT_MERGING_HH
+#ifndef SCRIBO_TEXT_MERGING_HDOC_HH
+# define SCRIBO_TEXT_MERGING_HDOC_HH
/// \file
///
@@ -82,7 +82,7 @@ namespace scribo
//
template <typename L>
line_set<L>
- merging(const scribo::line_set<L>& lines);
+ merging_hdoc(const scribo::line_set<L>& lines);
# ifndef MLN_INCLUDE_ONLY
@@ -1094,7 +1094,7 @@ namespace scribo
template <typename L>
line_set<L>
- merging(const scribo::line_set<L>& lines)
+ merging_hdoc(const scribo::line_set<L>& lines)
{
using namespace mln;
@@ -1116,4 +1116,4 @@ namespace scribo
} // end of namespace scribo
-#endif // ! SCRIBO_TEXT_MERGING_HH
+#endif // ! SCRIBO_TEXT_MERGING_HDOC_HH
diff --git a/scribo/scribo/text/paragraphs.hh b/scribo/scribo/text/paragraphs.hh
deleted file mode 100644
index e37f610..0000000
--- a/scribo/scribo/text/paragraphs.hh
+++ /dev/null
@@ -1,1270 +0,0 @@
-#include <mln/util/array.hh>
-#include <mln/accu/shape/bbox.hh>
-#include <mln/core/image/image2d.hh>
-#include <mln/core/alias/neighb2d.hh>
-#include <mln/draw/box.hh>
-#include <mln/data/convert.hh>
-#include <mln/value/int_u16.hh>
-#include <mln/value/label_16.hh>
-#include <mln/value/int_u8.hh>
-#include <mln/value/rgb8.hh>
-#include <mln/io/ppm/save.hh>
-#include <mln/io/pgm/save.hh>
-#include <mln/geom/rotate.hh>
-#include <mln/literal/colors.hh>
-
-#include <scribo/core/macros.hh>
-#include <scribo/core/line_set.hh>
-#include <scribo/core/line_links.hh>
-#include <scribo/core/line_info.hh>
-#include <scribo/core/paragraph_set.hh>
-
-using namespace mln;
-
-namespace scribo
-{
- namespace filter
- {
-
- template <typename L>
- inline
- bool
- between_horizontal_separator(const line_set<L>& lines,
- const line_id_t& l1_,
- const line_id_t& l2_)
- {
- const line_info<L>& l1 = lines(l1_);
- const line_info<L>& l2 = lines(l2_);
-
- // No separators found in image.
- mln_precondition(lines.components().has_separators());
-
- const box2d& l1_bbox = l1.bbox();
- const box2d& l2_bbox = l2.bbox();
-
- unsigned
- row1 = l1_bbox.pcenter().row(),
- row2 = l2_bbox.pcenter().row();
- const mln_ch_value(L, bool)&
- separators = lines.components().separators();
-
- unsigned row;
- unsigned col_ptr;
- unsigned left_col_ptr;
- unsigned right_col_ptr;
- unsigned end;
-
- if (row1 < row2)
- {
- row1 = l1_bbox.pmax().row();
- row2 = l2_bbox.pmin().row();
-
- const unsigned quarter =
- ((l1_bbox.pcenter().col() - l1_bbox.pmin().col()) >> 2);
-
- row = l1_bbox.pcenter().row();
- col_ptr = l1_bbox.pcenter().col();
- left_col_ptr = l1_bbox.pmin().col() + quarter;
- right_col_ptr = l1_bbox.pmax().col() - quarter;
- end = row2;
- }
- else
- {
- row2 = l2_bbox.pmax().row();
- row1 = l1_bbox.pmin().row();
-
- const unsigned quarter =
- ((l2_bbox.pcenter().col() - l2_bbox.pmin().col()) >> 2);
-
- row = l2_bbox.pcenter().row();
- col_ptr = l2_bbox.pcenter().col();
- left_col_ptr = l2_bbox.pmin().col() + quarter;
- right_col_ptr = l2_bbox.pmax().col() - quarter;
- end = row1;
- }
-
- // If sep_ptr is true, then a separator is reached.
- while (row < end)
- {
- ++row;
- if (separators.at_(row, col_ptr)
- || separators.at_(row, left_col_ptr)
- || separators.at_(row, right_col_ptr))
- return true;
- }
-
- return false;
- }
-
-
- template <typename L>
- bool may_have_another_left_link(const line_links<L>& right,
- const line_id_t& index,
- const line_id_t& current_line,
- const line_set<L>& lines)
- {
- const line_info<L>& l = lines(current_line);
- const point2d& pmin = l.bbox().pmin();
- const unsigned x1 = l.x_height();
-
- for_all_links(i, right)
- if (i != index && right(i) == index)
- {
- const line_info<L>& l_info = lines(i);
- const unsigned x2 = l_info.x_height();
-
- const float delta_max = 0.5f * std::min(x1, x2);
-
- if (l_info.bbox().pmin().col() < pmin.col()
- && std::abs(l.baseline() - l_info.baseline()) < delta_max
- )
- return true;
- }
-
- return false;
- }
-
-//---------------------------------------------------------------------
-// This method aims to cut the links between lines that do not fit the
-// different criteria
-//---------------------------------------------------------------------
-
- template <typename L>
- inline
- void paragraph_links(const line_links<L>& left,
- line_links<L>& right,
- line_links<L>& output,
- const line_set<L>& lines)
- {
- output = left.duplicate();
-
- // const unsigned nlines = lines.nelements();
-
- // image2d<value::rgb8> links = data::convert(value::rgb8(), input);
- // for (unsigned l = 0; l < nlines; ++l)
- // {
- // mln::draw::line(links, lines(l).bbox().pcenter(), lines(left(l)).bbox().pcenter(), literal::red);
- // }
- // mln::io::ppm::save(links, "out_links.ppm");
-
- // For each line
- for_all_lines(l, lines)
- if (lines(l).is_textline())
- {
- // Neighbors
-
- line_id_t left_nbh = output(l);
- line_id_t right_nbh = right(l);
- line_id_t lol_nbh = output(left_nbh);
-
- // const line_info<L>& left_line = lines(left_nbh);
- // const line_info<L>& current_line = lines(l);
- // const line_info<L>& right_line = lines(right_nbh);
-
- if (lines.components().has_separators() &&
- between_horizontal_separator(lines, right_nbh, l))
- {
- output(right_nbh) = right_nbh;
- right_nbh = l;
- }
- if (lines.components().has_separators() &&
- between_horizontal_separator(lines, l, left_nbh))
- {
- output(l) = l;
- left_nbh = l;
- lol_nbh = l;
- }
-
- // Line features
- const float x_height = lines(l).x_height();
- const float left_x_height = lines(left_nbh).x_height();
- const float right_x_height = lines(right_nbh).x_height();
-
- const box2d& left_line_bbox = lines(left_nbh).bbox();
- const box2d& current_line_bbox = lines(l).bbox();
- const box2d& right_line_bbox = lines(right_nbh).bbox();
- const box2d& lol_line_bbox = lines(lol_nbh).bbox(); // lol : left neighbor of the left neighbor
-
- const int lline_col_min = left_line_bbox.pmin().col();
- const int cline_col_min = current_line_bbox.pmin().col();
- const int rline_col_min = right_line_bbox.pmin().col();
- const int lolline_col_min = lol_line_bbox.pmin().col();
-
- const int lline_col_max = left_line_bbox.pmax().col();
- const int cline_col_max = current_line_bbox.pmax().col();
- const int rline_col_max = right_line_bbox.pmax().col();
-
- const int lline_cw = lines(left_nbh).char_width();
- const int cline_cw = lines(l).char_width();
- const int rline_cw = lines(right_nbh).char_width();
- // Maximal x variation to consider two lines vertically aligned
- const int delta_alignment = cline_cw;
-
-
- // Checks the baseline distances of the two neighbors
- {
- // Current line baseline
- const int c_baseline = lines(l).baseline();
-
- // Baseline distance with the left and right neighbors
- const int lc_baseline = lines(left_nbh).baseline() - c_baseline;
- const int rc_baseline = c_baseline -lines(right_nbh).baseline();
-
- // Max baseline distance between the two neighbors
- // const float delta_baseline_max = std::max(lc_baseline, rc_baseline);
- // const float delta_baseline_min = std::min(lc_baseline,
- // rc_baseline);
-
- // Only two lines, meaning the current line has only one neighbor
- bool two_lines = false;
-
- // If the current line has no left neighbor
- if (lc_baseline == 0)
- {
- // ror : right neighbor of the right neighbor
- const line_id_t ror_nbh = right(right_nbh);
- //const box2d& ror_line_bbox = lines(ror_nbh).bbox();
-
- // If the current line has a ror
- if (ror_nbh != right_nbh
- && output(ror_nbh) == right_nbh)
- {
- // Distance between the current line and the right neighbor
- const float right_distance = lines(l).meanline() - lines(right_nbh).baseline();
- // Distance between the right neighbor and the ror
- const float ror_distance = lines(right_nbh).meanline() - lines(ror_nbh).baseline();
- // ror x_height
- const float ror_x_height = lines(ror_nbh).x_height();
-
- // Conditions to cut the link between the current line
- // and its right neighbor
- if (right_distance > 1.4f * ror_distance
- && std::max(ror_x_height, right_x_height) <
- 1.4f * std::min(ror_x_height, right_x_height)
- && output(right_nbh) == l)
- {
- output(right_nbh) = right_nbh;
- continue;
- }
- }
- // Otherwise we only have a group of two lines
- else
- {
- // We determine the distance between the two lines
- const float distance = lines(l).meanline() - lines(right_nbh).baseline();
- two_lines = true;
-
- // If the distance between the two lines is greater than
- // the minimum x height of the two lines then we cut the
- // link between them
- if (distance > 2.0f * std::min(x_height, right_x_height)
- && output(right_nbh) == l)
- {
- output(right_nbh) = right_nbh;
- continue;
- }
- }
-
- // Lines features
- const float min_x_height = std::min(x_height, right_x_height);
- const float max_x_height = std::max(x_height, right_x_height);
- const float min_char_width = std::min(rline_cw, cline_cw);
- const float max_char_width = std::max(rline_cw, cline_cw);
-
- // Condition to cut the link between the current line and
- // its right neighbor
- if ((max_x_height > min_x_height * 1.4f) &&
- !(max_char_width <= 1.2f * min_char_width))
- {
- if (output(right_nbh) == l)
- {
- output(right_nbh) = right_nbh;
- continue;
- }
- }
-
- // If we only have two lines we stop the study
- if (two_lines)
- continue;
- }
- // If the current line has no right neighbor
- else if (rc_baseline == 0)
- {
- // lol : left neighbor of the left neighbor
-
- // If the left neighbor of the current line has a left neighbor
- if (lol_nbh != left_nbh)
- {
- // Distance between the current line and its left neighbor
- const float left_distance = lines(left_nbh).meanline() -
- lines(l).baseline();
- // Distance between the left neighbor and the left
- // neighbor of its left neighbor
- const float lol_distance = lines(lol_nbh).meanline() -
- lines(left_nbh).baseline();
- // lol x height
- const float lol_x_height = lines(lol_nbh).x_height();
-
- // Conditions to cut the link between the current line
- // and its left neighbor
- if (left_distance > 1.4f * lol_distance
- && std::max(lol_x_height, left_x_height) <
- 1.4f * std::min(lol_x_height, left_x_height))
- {
- output(l) = l;
- continue;
- }
- }
- // Otherwise we only have a group of two lines
- else
- {
- // Distance between the current line and it left neighbor
- const float distance = lines(left_nbh).meanline() -
- lines(l).baseline();
-
- two_lines = true;
-
- // If the distance is greater than the min x height
- // between the two lines
- if (distance > 2.0f * std::min(x_height, left_x_height))
- {
- output(l) = l;
- continue;
- }
- }
-
- // Lines features
- const float min_x_height = std::min(x_height, left_x_height);
- const float max_x_height = std::max(x_height, left_x_height);
- const float min_char_width = std::min(lline_cw, cline_cw);
- const float max_char_width = std::max(lline_cw, cline_cw);
-
- // Condition to cut the link between the current line and
- // its left neighbor
- if ((max_x_height > min_x_height * 1.4f) &&
- !(max_char_width <= 1.2f * min_char_width))
- {
- output(l) = l;
- continue;
- }
-
- // If we only have two lines we stop the study
- if (two_lines)
- continue;
- }
- // The current line has at least one left and one right neighbor
- else // if (delta_baseline_max >= 1.1 * delta_baseline_min)
- {
- // Distance between the left and the current line
- const float
- left_distance = left_line_bbox.pcenter().row() - current_line_bbox.pcenter().row();
- // Distance between the right and the current line
- const float
- right_distance = current_line_bbox.pcenter().row() - right_line_bbox.pcenter().row();;
-
- // If the left line is too far compared to the right one
- // we cut the link with it
- if (left_distance > 1.5f * right_distance
- && std::max(x_height, left_x_height) > 1.2f * std::min(x_height, left_x_height))
- {
- output(l) = l;
- continue;
- }
- // If the right line is too far compared to the left one
- // we cut the link with it
- else if (right_distance > 1.5f * left_distance
- && std::max(x_height, right_x_height) >= 1.2f * std::min(x_height, right_x_height)
- && output(right_nbh) == l)
- {
- output(right_nbh) = right_nbh;
- continue;
- }
-
- // If the distance between the baseline of the left
- // neighbor and the baseline of the current line is
- // greater than the one between the current line baseline
- // and the right line baseline we have to study the texte
- // features of the right and left lines
- if (lc_baseline > rc_baseline)
- {
- const float cw_max = std::max(lline_cw, cline_cw);
- const float cw_min = std::min(lline_cw, cline_cw);
- const float min_x_height = std::min(x_height, left_x_height);
- const float max_x_height = std::max(x_height, left_x_height);
-
- if ((max_x_height > min_x_height * 1.4f) &&
- !(cw_max <= 1.2f * cw_min))
- {
- output(l) = l;
- continue;
- }
-
- {
- const float min_x_height = std::min(x_height, right_x_height);
- const float max_x_height = std::max(x_height, right_x_height);
- const float cw_max = std::max(rline_cw, cline_cw);
- const float cw_min = std::min(rline_cw, cline_cw);
-
- if ((max_x_height > min_x_height * 1.4f)
- && !(cw_max <= 1.2f * cw_min)
- && output(right_nbh) == l)
- {
- output(right_nbh) = right_nbh;
- continue;
- }
- }
- }
- else
- {
- const float cw_max = std::max(rline_cw, cline_cw);
- const float cw_min = std::min(rline_cw, cline_cw);
- const float min_x_height = std::min(x_height, right_x_height);
- const float max_x_height = std::max(x_height, right_x_height);
-
- if ((max_x_height > min_x_height * 1.4f)
- && !(cw_max <= 1.2f * cw_min)
- && output(right_nbh) == l)
- {
- output(right_nbh) = right_nbh;
- continue;
- }
-
- {
- const float min_x_height = std::min(x_height, left_x_height);
- const float max_x_height = std::max(x_height, left_x_height);
- const float cw_max = std::max(lline_cw, cline_cw);
- const float cw_min = std::min(lline_cw, cline_cw);
-
- if ((max_x_height > min_x_height * 1.4f)
- && !(cw_max <= 1.2f * cw_min))
- {
- output(l) = l;
- continue;
- }
- }
- }
- }
- }
-
- // If we arrive here, it means than the lines in the
- // neighborhood of the current line are quite similar. We can
- // then begin to study the indentations in order to determine
- // the beginning of new paragraphs
-
-//-----------------------------------------------------------------------------
-// ___________________________
-// |___________________________|
-// ________________________
-// |________________________|
-// ___________________________
-// |___________________________|
-// ___________________________
-// |___________________________|
-//
-// Simple case : paragraphs are justified on the left. We try to find any
-// indentation like above.
-//
-//-----------------------------------------------------------------------------
-
- {
- // Check if the current line neighbors are aligned
- bool left_right_aligned = false;
- bool left_lol_aligned = false;
- const int dx_lr = std::abs(lline_col_min - rline_col_min);
- const int dx_llol = std::abs(lline_col_min - lolline_col_min);
-
- if (dx_lr < delta_alignment)
- left_right_aligned = true;
-
- if (dx_llol < delta_alignment)
- left_lol_aligned = true;
-
- if (left_right_aligned && left_lol_aligned)
- {
- const int left_right_col_min = std::min(lline_col_min, rline_col_min);
- const int dx_lrc = std::abs(left_right_col_min - cline_col_min);
- const float l_char_width = 1.5f * lines(l).char_width();
-
- if (dx_lrc > l_char_width &&
- dx_lrc < 3.0f * l_char_width &&
- cline_col_min > rline_col_min &&
- cline_col_min > lline_col_min)
- {
- const line_id_t out_right_nbh = output(right_nbh);
-
- if (out_right_nbh != l)
- {
- if (output(out_right_nbh) == l)
- output(out_right_nbh) = out_right_nbh;
- right(l) = l;
- }
- else
- output(right_nbh) = right_nbh;
- continue;
- }
- }
- }
-
-//-----------------------------------------------------------------------------
-// ___________________________
-// |___________________________|
-// ___________________
-// |___________________| End of the paragraph - Current line
-// ________________________
-// |________________________| Beginning of a new one
-// ___________________________
-// |___________________________| Left of left of current line
-//
-// End of paragraph case : we try to find an end to the current paragraph
-//
-//-----------------------------------------------------------------------------
-
- {
- // Check if the current line neighbors are aligned
- bool left_right_max_aligned = false;
- bool left_current_min_aligned = false;
- bool lol_current_min_aligned = false;
- const bool lol_is_left = output(left_nbh) == left_nbh;
- const int dx_lr_max = std::abs(lline_col_max - rline_col_max);
- const int dx_lc_min = std::abs(lline_col_min - cline_col_min);
- const int dx_lolc_min = std::abs(lolline_col_min - cline_col_min);
-
- if (dx_lr_max < delta_alignment)
- left_right_max_aligned = true;
-
- if (dx_lc_min < delta_alignment)
- left_current_min_aligned = true;
-
- if (dx_lolc_min < delta_alignment)
- lol_current_min_aligned = true;
-
- if (!left_current_min_aligned && left_right_max_aligned &&
- (lol_current_min_aligned || lol_is_left))
- {
- const int dx_lrc = std::abs(lline_col_max - cline_col_max);
- const int l_char_width = lines(l).char_width();
- const int dx_indent = std::abs(std::max(lline_col_min,
- rline_col_min) - cline_col_min);
-
- if (dx_lrc > l_char_width &&
- dx_indent < 4 * delta_alignment &&
- cline_col_max < lline_col_max &&
- cline_col_min < lline_col_min &&
- (lline_col_min > lolline_col_min || lol_is_left))
- {
- output(l) = l;
- continue;
- }
- }
- }
-
-//-----------------------------------------------------------------------------
-// ___________________________
-// |___________________________|
-// ___________________________
-// |___________________________|
-// ________________________
-// |________________________|
-// ___________________________
-// |___________________________|
-//
-// Simple case : paragraphs are justified on the left. We try to find any
-// indentation like above.
-//
-//-----------------------------------------------------------------------------
-
- {
- const line_id_t ror_nbh = right(right_nbh);
- const box2d& ror_line_bbox = lines(ror_nbh).bbox();
- const int rorline_col_min = ror_line_bbox.pmin().col();
-
- bool right_ror_min_aligned = false;
- bool left_right_aligned = false;
- const int dx_lr = std::abs(lline_col_min - rline_col_min);
- const int dx_rror_min = std::abs(rline_col_min - rorline_col_min);
-
- if (dx_rror_min < delta_alignment)
- right_ror_min_aligned = true;
-
- if (dx_lr < delta_alignment)
- left_right_aligned = true;
-
- if (right_ror_min_aligned && left_right_aligned &&
- ror_nbh != right_nbh)
- {
- const int left_right_col_min = std::min(lline_col_min, rline_col_min);
- const int dx_lrc = std::abs(left_right_col_min - cline_col_min);
- const float l_char_width = 1.5f * lines(l).char_width();
-
- if (dx_lrc > l_char_width &&
- !may_have_another_left_link(right, right_nbh, l, lines) &&
- dx_lrc < 10.0f * l_char_width &&
- cline_col_min > rline_col_min &&
- cline_col_min > lline_col_min)
- {
- const line_id_t out_right_nbh = output(right_nbh);
-
- if (out_right_nbh != l)
- {
- if (output(out_right_nbh) == l)
- output(out_right_nbh) = out_right_nbh;
- right(l) = l;
- }
- else
- output(right_nbh) = right_nbh;
- continue;
- }
- }
- }
-
-//-----------------------------------------------------------------------------
-// ___________________________
-// |___________________________|
-// ___________
-// |___________|
-// ________________________
-// |________________________|
-// ___________________________
-// |___________________________|
-//
-// Simple case : paragraphs are justified on the left. We try to find any
-// indentation like above.
-//
-//-----------------------------------------------------------------------------
-
- {
- const line_id_t ror_nbh = right(right_nbh);
- const box2d& ror_line_bbox = lines(ror_nbh).bbox();
- const int rorline_col_min = ror_line_bbox.pmin().col();
-
- bool left_ror_aligned = false;
- const int dx_lror = std::abs(lline_col_min - rorline_col_min);
-
- if (dx_lror < delta_alignment)
- left_ror_aligned = true;
-
- if (left_ror_aligned)
- {
- const int left_ror_col_min = std::min(lline_col_min, rorline_col_min);
- const int dx_lrorc = std::abs(left_ror_col_min - cline_col_min);
- const float l_char_width = 1.5f * lines(l).char_width();
- const int dx_lrorr = std::abs(left_ror_col_min - rline_col_min);
- const int dx_crmax = std::abs(rline_col_max - cline_col_max);
-
- if (dx_lrorc > l_char_width &&
- dx_lrorr > 5 * l_char_width &&
- dx_lrorr > dx_lrorc &&
- dx_crmax > 5 * l_char_width &&
- !may_have_another_left_link(right, right_nbh, l, lines) &&
- dx_lrorc < 10.0f * l_char_width &&
- cline_col_min > rorline_col_min &&
- cline_col_min > lline_col_min)
- {
- right(right_nbh) = right_nbh;
- continue;
- }
- }
- }
-
-
-// Strange case
- {
- if (rline_col_min > current_line_bbox.pcenter().col()
- && !may_have_another_left_link(right, right_nbh, l, lines)
- && cline_col_max < rline_col_max
- && output(right_nbh) == l)
- {
- output(right_nbh) = right_nbh;
- }
- }
-
-//-----------------------------------------------------------------------------
-// ___________________________
-// |___________________________|
-// ___________________________
-// |___________________________|
-// ________________________
-// |________________________|
-//
-// Simple case : paragraphs are justified on the left. We try to find any
-// indentation like above at the end of a column.
-//
-//-----------------------------------------------------------------------------
-
- if (left_nbh == l)
- {
- const line_id_t ror_nbh = right(right_nbh);
- const box2d& ror_line_bbox = lines(ror_nbh).bbox();
- const int rorline_col_min = ror_line_bbox.pmin().col();
-
- bool right_ror_min_aligned = false;
- const int dx_rror_min = std::abs(rline_col_min - rorline_col_min);
-
- if (dx_rror_min < delta_alignment)
- right_ror_min_aligned = true;
-
- if (right_ror_min_aligned)
- {
- const int right_ror_col_min = std::min(rline_col_min, rorline_col_min);
- const int dx_rrorc = std::abs(right_ror_col_min - cline_col_min);
- const float l_char_width = 1.5f * lines(l).char_width();
-
- if (dx_rrorc > l_char_width &&
- dx_rrorc < 10.0f * l_char_width &&
- cline_col_min > rline_col_min &&
- cline_col_max >= rline_col_max)
- {
- const line_id_t out_right_nbh = output(right_nbh);
-
- if (out_right_nbh != l)
- {
- if (output(out_right_nbh) == l)
- output(out_right_nbh) = out_right_nbh;
- right(l) = l;
- }
- else
- output(right_nbh) = right_nbh;
- continue;
- }
- }
- }
- }
-
- // Only debug
- // {
- // image2d<value::rgb8> debug = data::convert(value::rgb8(), input);
-
- // mln::util::array<accu::shape::bbox<point2d> > nbbox(output.nelements());
- // for_all_lines(l, lines)
- // if (lines(l).is_textline())
- // {
- // // if (lines(i).is_textline())
- // // {
- // // mln::draw::box(debug, lines(i).bbox(), literal::red);
- // nbbox(output(l)).take(lines(l).bbox());
- // // }
- // }
-
- // for (unsigned i = 0; i < nbbox.nelements(); ++i)
- // if (nbbox(i).is_valid())
- // {
- // box2d b = nbbox(i).to_result();
- // mln::draw::box(debug, b, literal::orange);
- // b.enlarge(1);
- // mln::draw::box(debug, b, literal::orange);
- // b.enlarge(1);
- // mln::draw::box(debug, b, literal::orange);
- // }
-
- // mln::io::ppm::save(debug, "out_paragraph.ppm");
- // }
-
- }
- }
-
-//-------------------------------------------------------------
-// Preparation of the lines before linking them.
-// For each line we draw the top and the bottom of it.
-// Assuming than i is the number of the line. Then the top of the line
-// will be affected with the value 2 * i in the block image and the
-// bottom with 2 * i + 1.
-//
-//-------------------------------------------------------------
-
- template <typename L>
- inline
- void prepare_lines(const box2d& domain,
- const line_set<L>& lines,
- L& blocks,
- mln::util::array<box2d>& rbbox)
- {
- std::map< int, std::vector< const box2d* > > drawn_lines;
- // const unsigned nlines = lines.nelements();
-
- // For each line
- //for (unsigned l = 0; l < nlines; ++l)
- for_all_lines(l, lines)
- if (lines(l).is_textline())
- {
- // Rotation of the bounding box
- box2d b = geom::rotate(lines(l).bbox(), -90, domain.pcenter());
-// rbbox.append(b);
- rbbox(l) = b;
-
- const unsigned index = l + 1;
- const unsigned even_index = 2 * index;
- const unsigned odd_index = even_index + 1;
-
- // Top of the line
- {
- bool not_finished = true;
- int col_offset = 0;
-
- while (not_finished)
- {
- // Looking for a column in the image to draw the top of the
- // line
-
- const int col = b.pmax().col() + col_offset;
- std::map< int, std::vector< const box2d* > >::iterator it
- = drawn_lines.find(col);
-
- if (it != drawn_lines.end())
- {
- const std::vector< const box2d* >& lines = (*it).second;
- const unsigned nb_lines = lines.size();
- unsigned i = 0;
-
- for (i = 0; i < nb_lines; ++i)
- {
- const box2d* box = lines[i];
- const int min_row = std::max(b.pmin().row(), box->pmin().row());
- const int max_row = std::min(b.pmax().row(), box->pmax().row());
-
- if (min_row - max_row <= 0)
- break;
- }
-
- if (i == nb_lines)
- {
- mln::draw::line(blocks, point2d(b.pmin().row(), col),
- point2d(b.pmax().row(), col), even_index);
- not_finished = false;
- drawn_lines[col].push_back(&(rbbox[l]));
- }
- else
- ++col_offset;
- }
- else
- {
- mln::draw::line(blocks, point2d(b.pmin().row(), col),
- point2d(b.pmax().row(), col), even_index);
- not_finished = false;
- drawn_lines[col].push_back(&(rbbox[l]));
- }
- }
- }
-
- // Bottom of the line
- {
- bool not_finished = true;
- int col_offset = 0;
-
- while (not_finished)
- {
- // Looking for a column in the image to draw the bottom of
- // the line
-
- const int col = b.pmin().col() - col_offset;
- std::map< int, std::vector< const box2d* > >::iterator it
- = drawn_lines.find(col);
-
- if (it != drawn_lines.end())
- {
- const std::vector< const box2d* >& lines = (*it).second;
- const unsigned nb_lines = lines.size();
- unsigned i = 0;
-
- for (i = 0; i < nb_lines; ++i)
- {
- const box2d* box = lines[i];
- const int min_row = std::max(b.pmin().row(), box->pmin().row());
- const int max_row = std::min(b.pmax().row(), box->pmax().row());
-
- if (min_row - max_row <= 0)
- break;
- }
-
- if (i == nb_lines)
- {
- mln::draw::line(blocks, point2d(b.pmin().row(), col),
- point2d(b.pmax().row(), col), odd_index);
- not_finished = false;
- drawn_lines[col].push_back(&(rbbox[l]));
- }
- else
- ++col_offset;
- }
- else
- {
- mln::draw::line(blocks, point2d(b.pmin().row(), col),
- point2d(b.pmax().row(), col), odd_index);
- not_finished = false;
- drawn_lines[col].push_back(&(rbbox[l]));
- }
- }
- }
- }
- }
-
- template <typename L>
- inline
- void
- process_left_link(L& blocks,
- const mln::util::array<box2d>& rbbox,
- const line_set<L>& lines,
- line_links<L>& left)
- {
- typedef scribo::def::lbl_type V;
-
- // At the beginning each line is its own neighbor
- for_all_lines(l, lines)
- if (lines(l).is_textline())
- left(l) = l;
- else
- left(l) = 0;
-
- // const unsigned nlines = lines.nelements();
-
- // For each line
- for_all_lines(l, lines)
- if (lines(l).is_textline())
- {
- // Max distance for the line search
- int dmax = 1.5f * lines(l).x_height();
-
- // Starting points in the current line box
- point2d c = rbbox(l).pcenter();
- point2d q(rbbox(l).pmin().row() + ((c.row() - rbbox(l).pmin().row()) / 4), c.col());
-
- int
- midcol = (rbbox(l).pmax().col()
- - rbbox(l).pmin().col()) / 2;
-
- // Left
- {
- // marge gauche
- int
- nleftima = c.col() - blocks.domain().pmin().col(),
- // Distance gauche
- nleft = std::min(nleftima, midcol + dmax);
-
- V
- // Starting points in the box
- *p = &blocks(c),
- *p2 = &blocks(q),
- // End of search
- *pstop = p - nleft - 1,
- // Line neighbor
- *nbh_p = 0;
-
- // While we haven't found a neighbor or reached the limit
- for (; p != pstop; --p, --p2)
- {
- if (*p2 != literal::zero // Not the background
- && ((*p2 % 2) == 0) // Looking for the bottom of a line
- && left((*p2 >> 1) - 1) != l) // No loops
- {
- // Neightbor found, we stop the research
- nbh_p = p2;
- break;
- }
-
- if (*p != literal::zero // Not the background
- && ((*p % 2) == 0) // Looking for the bottom of a line
- && left((*p >> 1) - 1) != l) // No loops
- {
- // Neightbor found, we stop the research
- nbh_p = p;
- break;
- }
- }
-
- // If a neighbor was found, then we have found the top of the
- // line. We are then looking for the bottom of the encountered
- // line. If during the search process we find a complete line
- // included in the touched line, this line is considered as
- // the neighbor under certain conditions (see below)
-
- //---------------------------------------------------------------
- // _________________________ |
- // |_________________________| => Current line | Search direction
- // v
- // => First encountered top line
- // __________________________________________________ 2Q
- // | Q |
- // | _________________________ |2P
- // | |_____________P___________| => Second top |2P + 1
- // | line |
- // |__________________________________________________|2Q + 1
- //
- //
- //---------------------------------------------------------------
-
- if (nbh_p)
- {
- std::vector<V> lines_nbh;
- const V end_p = *nbh_p + 1;
- const V* nbh_p_copy = nbh_p;
-
- for (; *nbh_p != end_p; --nbh_p)
- {
- if ((*nbh_p) != literal::zero) // Not the background
- {
- if ((*nbh_p) % 2 == 0)// We have found the top of
- // another line
- lines_nbh.push_back(*nbh_p);
- else
- {
- // We have found the bottom of a line. We are looking if
- // we have already encountered the top of this
- // line. If so, we link the current line with this one
- // under certain conditions:
-
- if (std::find(lines_nbh.begin(), lines_nbh.end(),
- (*nbh_p) - 1) != lines_nbh.end())
- {
- // If we can link the complete line with the current line
- if (// It must be in the search range
- nbh_p > pstop
- // Avoid loops
- && left(((*nbh_p - 1) >> 1) - 1) != l)
- left(l) = ((*nbh_p - 1) >> 1) - 1;
-
- // We have found a complete line so we stop the search
- break;
- }
- }
- }
- }
-
-
- // If we haven't found any included line in the first
- // neighbor, then the line is considered as the neighbor of
- // the current line
- if (*nbh_p == end_p)
- left(l) = (*nbh_p_copy >> 1) - 1;
- }
- }
- }
- }
-
-
- // We assume that the lines have been rotated
- template <typename L>
- inline
- void
- process_right_link(L& blocks,
- const mln::util::array<box2d>& rbbox,
- const line_set<L>& lines,
- line_links<L>& right)
- {
- typedef scribo::def::lbl_type V;
-
- // At the beginning each line is its own neighbor
- for_all_lines(l, lines)
- if (lines(l).is_textline())
- right(l) = l;
- else
- right(l) = 0;
-
- // const unsigned nlines = lines.nelements();
-
- // For each line
- for_all_lines(l, lines)
- if (lines(l).is_textline())
- {
- // Max distance for the line search
- int dmax = 1.5f * lines(l).x_height();
-
- // Starting points in the current line box
- point2d c = rbbox(l).pcenter();
- point2d q(rbbox(l).pmax().row() - ((rbbox(l).pmax().row() - c.row()) / 4), c.col());
-
- int
- midcol = (rbbox(l).pmax().col()
- - rbbox(l).pmin().col()) / 2;
-
- // Right
- {
- int
- nrightima = geom::ncols(blocks) - c.col() + blocks.domain().pmin().col(),
- nright = std::min(nrightima, midcol + dmax);
-
- V
- // Starting points in the box
- *p = &blocks(c),
- *p2 = &blocks(q),
- // End of search
- *pstop = p + nright - 1,
- // Line neighbor
- *nbh_p = 0;
-
- // While we haven't found a neighbor or reached the limit
- for (; p != pstop; ++p, ++p2)
- {
- if (*p2 != literal::zero // Not the background
- && ((*p2 % 2) == 1) // Looking for the bottom of a line
- && right(((*p2 - 1) >> 1) - 1) != l) // No loops
- {
- // Neightbor found, we stop the research
- nbh_p = p2;
- break;
- }
-
- if (*p != literal::zero // Not the background
- && ((*p % 2) == 1) // Looking for the bottom of a line
- && right(((*p - 1) >> 1) - 1) != l) // No loops
- {
- // Neightbor found, we stop the research
- nbh_p = p;
- break;
- }
- }
-
- // If a neighbor was found, then we have found the bottom of the
- // line. We are then looking for the top of the encountered
- // line. If during the search process we find a complete line
- // included in the touched line, this line is considered as
- // the neighbor under certain conditions (see below)
-
- //---------------------------------------------------------------
- //
- //
- // __________________________________________________ 2Q
- // | Q |
- // | _________________________ |2P
- // | |_____________P___________| => Second bottom |2P + 1
- // | line |
- // |__________________________________________________|2Q + 1
- // => First encountered bottom line
- // _________________________ ^
- // |_________________________| => Current line | Search direction
- // |
- //---------------------------------------------------------------
-
- if (nbh_p)
- {
- std::vector<V> lines_nbh;
- const V end_p = *nbh_p - 1;
- const V* nbh_p_copy = nbh_p;
-
- for (; *nbh_p != end_p; ++nbh_p)
- {
- if (*nbh_p != literal::zero) // Not the background
- {
- if (*nbh_p % 2 == 1) // We have found the bottom of
- // another line
- lines_nbh.push_back(*nbh_p);
- else
- {
- // We have found the top of a line. We are looking if
- //we have already encountered the bottom of this
- // line. If so, we link the current line with this one
- // under certain conditions:
-
- if (std::find(lines_nbh.begin(), lines_nbh.end(),
- *nbh_p + 1) != lines_nbh.end())
- {
- // If we can link the complete line with the current line
- if (// It must be in the search range
- nbh_p < pstop
- // Avoid loops
- && right((*nbh_p >> 1) - 1) != l)
- right(l) = (*nbh_p >> 1) - 1;
-
- // We have found a complete line, so we stop the search
- break;
- }
- }
- }
- }
-
- // If we haven't found any included line in the first
- // neighbor, then the line is considered as the neighbor of
- // the current line
-
- if (*nbh_p == end_p)
- right(l) = ((*nbh_p_copy - 1) >> 1) - 1;
- }
- }
- }
- }
-
-//-----------------------------------------------------------------------
-// Finalizing the links by merging information extracted from the left
-// and right links
-//-----------------------------------------------------------------------
-
- template< typename L >
- inline
- void finalize_links(line_links<L>& left,
- line_links<L>& right,
- const line_set<L>& lines)
- {
- // const unsigned nlines = lines.nelements();
-
- for_all_lines(l, lines)
- if (lines(l).is_textline())
- {
- const unsigned left_value = left(l);
- const unsigned right_value = right(l);
-
- // If the right neighbor of my left neighbor is itself then its
- // right neighbor is me
- {
- line_id_t& v = right(left_value);
-
- if (v == left_value)
- v = l;
- }
-
- // If the left neighbor of my right neighbor is itself then its
- // left neighbor is me
- {
- line_id_t& v = left(right_value);
-
- if (v == right_value)
- v = l;
- }
- }
- }
-
- template <typename L>
- inline
- paragraph_set<L>
- extract_paragraphs(line_set<L>& lines,
- const image2d<bool>& input)
- {
- typedef scribo::def::lbl_type V;
-
- image2d<V> blocks(geom::rotate(input.domain(), -90, input.domain().pcenter()));
- data::fill(blocks, 0);
-
- // util::array< line_info<L> > lines_info;
-
- // for_all_lines(l, lines)
- // {
- // if (lines(l).is_textline())
- // lines_info.append(lines(l));
- // }
-
-/// const unsigned nlines = lines_info.nelemnts();
- mln::util::array<box2d> rbbox;
- line_links<L> left(lines);
- left(0) = 0;
- line_links<L> right(lines);
- right(0) = 0;
- line_links<L> output(lines);
- output(0) = 0;
-
- rbbox.resize(lines.nelements() + 1);
-
-// std::cout << "Preparing lines" << std::endl;
- prepare_lines(input.domain(), lines , blocks, rbbox);
-// io::pgm::save(blocks, "blocks.pgm");
-// std::cout << "Linking left" << std::endl;
- process_left_link(blocks, rbbox, lines , left);
-// std::cout << "Linking right" << std::endl;
- process_right_link(blocks, rbbox, lines , right);
-// std::cout << "Finalizing links" << std::endl;
- finalize_links(left, right, lines );
- // std::cout << "Finalizing merging" << std::endl;
- // finalize_line_merging(left, right, lines);
-// std::cout << "Extracting paragraphs" << std::endl;
- filter::paragraph_links(left, right, output, lines);
-
- paragraph_set<L> par_set = make::paragraph(output, right);
- return par_set;
- }
-}
diff --git a/scribo/scribo/toolchain/internal/content_in_doc_functor.hh b/scribo/scribo/toolchain/internal/content_in_doc_functor.hh
index 4f2c074..52ee5b0 100644
--- a/scribo/scribo/toolchain/internal/content_in_doc_functor.hh
+++ b/scribo/scribo/toolchain/internal/content_in_doc_functor.hh
@@ -63,7 +63,7 @@
# include <scribo/text/merging.hh>
# include <scribo/text/link_lines.hh>
-# include <scribo/text/paragraphs.hh>
+# include <scribo/text/extract_paragraphs.hh>
# include <scribo/make/debug_filename.hh>
@@ -541,7 +541,7 @@ namespace scribo
// scribo::paragraph_set<L> parset = scribo::make::paragraph(llinks);
scribo::paragraph_set<L>
- parset = extract_paragraphs(lines, doc.binary_image());
+ parset = text::extract_paragraphs(lines, doc.binary_image());
doc.set_paragraphs(parset);
on_progress();
diff --git a/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh b/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh
index 5e98f3e..4a74aef 100644
--- a/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh
+++ b/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh
@@ -74,9 +74,9 @@
# include <scribo/text/recognition.hh>
# endif // ! SCRIBO_NOCR
-# include <scribo/text/merging.hh>
+# include <scribo/text/merging_hdoc.hh>
# include <scribo/text/link_lines.hh>
-# include <scribo/text/paragraphs.hh>
+# include <scribo/text/paragraphs_hdoc.hh>
# include <scribo/make/debug_filename.hh>
@@ -461,7 +461,7 @@ namespace scribo
on_new_progress_label("Merging segmented lines");
- lines = scribo::text::merging(lines);
+ lines = scribo::text::merging_hdoc(lines);
//===== DEBUG =====
@@ -525,7 +525,8 @@ namespace scribo
on_new_progress_label("Extracting paragraphs");
scribo::paragraph_set<L>
- parset = extract_paragraphs(lines, doc.binary_image());
+ parset = scribo::text::extract_paragraphs_hdoc(lines,
+ doc.binary_image());
on_progress();
--
1.5.6.5
1
0

branch fix-leak-remove_holder deleted: last-svn-commit-912-ga2c67cf
by lazzara@lrde.epita.fr 16 Jun '11
by lazzara@lrde.epita.fr 16 Jun '11
16 Jun '11
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "Olena, a generic and efficient image processing platform".
The branch fix-leak-remove_holder has been deleted
was a2c67cf6ac3577535d74469cece6919ed1e67c2e
-----------------------------------------------------------------------
a2c67cf6ac3577535d74469cece6919ed1e67c2e Add file.
-----------------------------------------------------------------------
hooks/post-receive
--
Olena, a generic and efficient image processing platform
1
0

15 Jun '11
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "Olena, a generic and efficient image processing platform".
The branch unstable/scribo has been updated
via 3a86ff37896bf5b5788b25efa97a2c7a8c700a5d (commit)
from 3368692f6ecf9e857f8443caa3f8d60da470a1f9 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
3a86ff3 Adding horizontal separators check in paragraphs construction
-----------------------------------------------------------------------
Summary of changes:
scribo/scribo/text/paragraphs.hh | 116 ++++++++++++++++++++++++++++++++++----
1 files changed, 105 insertions(+), 11 deletions(-)
hooks/post-receive
--
Olena, a generic and efficient image processing platform
1
0

last-svn-commit-875-g3a86ff3 Adding horizontal separators check in paragraphs construction
by Julien Marquegnies 15 Jun '11
by Julien Marquegnies 15 Jun '11
15 Jun '11
---
scribo/scribo/text/paragraphs.hh | 116 ++++++++++++++++++++++++++++++++++----
1 files changed, 105 insertions(+), 11 deletions(-)
diff --git a/scribo/scribo/text/paragraphs.hh b/scribo/scribo/text/paragraphs.hh
index 6c9285b..44835cb 100644
--- a/scribo/scribo/text/paragraphs.hh
+++ b/scribo/scribo/text/paragraphs.hh
@@ -55,6 +55,76 @@ namespace scribo
{
//---------------------------------------------------------------------
+// Checks if there is a horizontal separator between the two lines
+//---------------------------------------------------------------------
+
+ template <typename L>
+ inline
+ bool
+ between_horizontal_separator(const scribo::line_info<L>& l1,
+ const scribo::line_info<L>& l2)
+ {
+ // No separators found in image.
+ mln_precondition(l1.holder().components().has_separators());
+
+ const box2d& l1_bbox = l1.bbox();
+ const box2d& l2_bbox = l2.bbox();
+
+ unsigned
+ row1 = l1_bbox.pcenter().row(),
+ row2 = l2_bbox.pcenter().row();
+ const mln_ch_value(L, bool)&
+ separators = l1.holder().components().separators();
+
+ unsigned row;
+ unsigned col_ptr;
+ unsigned left_col_ptr;
+ unsigned right_col_ptr;
+ unsigned end;
+
+ if (row1 < row2)
+ {
+ row1 = l1_bbox.pmax().row();
+ row2 = l2_bbox.pmin().row();
+
+ const unsigned quarter =
+ ((l1_bbox.pcenter().col() - l1_bbox.pmin().col()) >> 2);
+
+ row = l1_bbox.pcenter().row();
+ col_ptr = l1_bbox.pcenter().col();
+ left_col_ptr = l1_bbox.pmin().col() + quarter;
+ right_col_ptr = l1_bbox.pmax().col() - quarter;
+ end = row2;
+ }
+ else
+ {
+ row2 = l2_bbox.pmax().row();
+ row1 = l1_bbox.pmin().row();
+
+ const unsigned quarter =
+ ((l2_bbox.pcenter().col() - l2_bbox.pmin().col()) >> 2);
+
+ row = l2_bbox.pcenter().row();
+ col_ptr = l2_bbox.pcenter().col();
+ left_col_ptr = l2_bbox.pmin().col() + quarter;
+ right_col_ptr = l2_bbox.pmax().col() - quarter;
+ end = row1;
+ }
+
+ // If sep_ptr is true, then a separator is reached.
+ while (row < end)
+ {
+ ++row;
+ if (separators.at_(row, col_ptr)
+ || separators.at_(row, left_col_ptr)
+ || separators.at_(row, right_col_ptr))
+ return true;
+ }
+
+ return false;
+ }
+
+//---------------------------------------------------------------------
// This method aims to cut the links between lines that do not fit the
// different criteria
//---------------------------------------------------------------------
@@ -82,10 +152,30 @@ namespace scribo
if (lines(l).is_textline())
{
// Neighbors
+ line_id_t left_nbh = output(l);
+ line_id_t right_nbh = right(l);
+ line_id_t lol_nbh = output(left_nbh);
- const line_id_t left_nbh = output(l);
- const line_id_t right_nbh = right(l);
- const line_id_t lol_nbh = output(left_nbh);
+ const line_info<L>& left_line = lines(left_nbh);
+ const line_info<L>& current_line = lines(l);
+ const line_info<L>& right_line = lines(right_nbh);
+
+ if (right_line.holder().components().has_separators() &&
+ between_horizontal_separator(right_line, current_line))
+ {
+ if (output(right_nbh) == l)
+ {
+ output(right_nbh) = right_nbh;
+ right_nbh = l;
+ }
+ }
+ if (current_line.holder().components().has_separators() &&
+ between_horizontal_separator(current_line, left_line))
+ {
+ output(l) = l;
+ left_nbh = l;
+ lol_nbh = l;
+ }
// Line features
const float x_height = lines(l).x_height();
@@ -122,9 +212,9 @@ namespace scribo
const int rc_baseline = c_baseline -lines(right_nbh).baseline();
// Max baseline distance between the two neighbors
- // const float delta_baseline_max = std::max(lc_baseline, rc_baseline);
- // const float delta_baseline_min = std::min(lc_baseline,
- // rc_baseline);
+ const float delta_baseline_max = std::max(lc_baseline, rc_baseline);
+ const float delta_baseline_min = std::min(lc_baseline,
+ rc_baseline);
// Only two lines, meaning the current line has only one neighbor
bool two_lines = false;
@@ -264,7 +354,7 @@ namespace scribo
continue;
}
// The current line has at least one left and one right neighbor
- else // if (delta_baseline_max >= delta_baseline_min)
+ else if (delta_baseline_max >= 1.1f * delta_baseline_min)
{
// Distance between the left and the current line
const float left_distance =
@@ -275,16 +365,20 @@ namespace scribo
// If the left line is too far compared to the right one
// we cut the link with it
- if (left_distance > 1.2f * right_distance
- && std::max(x_height, left_x_height) > 1.2f * std::min(x_height, left_x_height))
+ if ((left_distance > 1.2f * right_distance
+ && std::max(x_height, left_x_height) > 1.2f *
+ std::min(x_height, left_x_height))
+ || (left_distance > 2.0 * right_distance))
{
output(l) = l;
continue;
}
// If the right line is too far compared to the left one
// we cut the link with it
- else if (right_distance > 1.2f * left_distance
- && std::max(x_height, right_x_height) > 1.2f * std::min(x_height, right_x_height)
+ else if (((right_distance > 1.2f * left_distance
+ && std::max(x_height, right_x_height) > 1.2f *
+ std::min(x_height, right_x_height))
+ || (right_distance > 2.0f * left_distance))
&& output(right_nbh) == l)
{
output(right_nbh) = right_nbh;
--
1.5.6.5
1
0
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "Olena, a generic and efficient image processing platform".
The branch Sylvain has been updated
via a794c88fd084cd17f09ce4228c6b80f138f42f47 (commit)
from 9d2089bedcc47c6b7acbe24719b0f41e12d4c2da (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
a794c88 Modified weights in the choose function.
-----------------------------------------------------------------------
Summary of changes:
scribo/ChangeLog | 6 ++++++
scribo/scribo/inverse_video/choose.hh | 4 ++--
2 files changed, 8 insertions(+), 2 deletions(-)
hooks/post-receive
--
Olena, a generic and efficient image processing platform
1
0

12 Jun '11
---
scribo/ChangeLog | 6 ++++++
scribo/scribo/inverse_video/choose.hh | 4 ++--
2 files changed, 8 insertions(+), 2 deletions(-)
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index 3fbee9e..65cc887 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,5 +1,11 @@
2011-05-27 Sylvain Lobry <lobry(a)lrde.epita.fr>
+ Modified weights in the choose function.
+
+ * scribo/inverse_video/choose.hh: Here.
+
+2011-05-27 Sylvain Lobry <lobry(a)lrde.epita.fr>
+
Added a choose function, to be used with handle_collisions.
* scribo/inverse_video/handle_collisions.hh: Prototype fixed.
diff --git a/scribo/scribo/inverse_video/choose.hh b/scribo/scribo/inverse_video/choose.hh
index 9b0d46e..7095116 100644
--- a/scribo/scribo/inverse_video/choose.hh
+++ b/scribo/scribo/inverse_video/choose.hh
@@ -101,8 +101,8 @@ namespace scribo
double score_ratio_area = scribo::inverse_video::ratio_area(line, line_inverse);
score =
- score_nb_comps * 0.5 +
- score_ratio_area * 0.5;
+ score_nb_comps * 0.4 +
+ score_ratio_area * 0.6;
scribo::inverse_video::internal::handle_score (score, input, line, line_inverse);
--
1.5.6.5
1
0

07 Jun '11
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "Olena, a generic and efficient image processing platform".
The branch icdar/hdlac2011 has been updated
via 563c43fdb996c6891301d4ac83e8547f12a50e57 (commit)
via 1fc7f0c1905ff517dc8d1ead15dc15aa0268a668 (commit)
via 5facd2765b0bb038eea11262d03ced1592f3c863 (commit)
via 7014c9b07963a43711291b7b83ebb3e9033c709c (commit)
via be328942bdefdcaf635821f678661f188ae4e48e (commit)
via afb377800d77db694b1fa72d017d389d5ec6368d (commit)
via 3f75647a000c255d8dfa039ac0cd087401b08c84 (commit)
via 21c18879bb90b58b5434ac37f4a28431d8f541ee (commit)
via 3d6ca930d20a7d7c27b4b693e02962007d03100e (commit)
via 12481e43db4beb10032bc7e3593f5bb671855d97 (commit)
via 3aef9a5c1e0460bab291bfe6642e9bdc9d73e06b (commit)
via aea0fc02c80ece88f674cd99ce5c9322a3ed8884 (commit)
via a32b976cb7e346ef41c3ffdb7aa38464824413ef (commit)
via 18cb924161c41ac5a27be2ce714dae1f1877e409 (commit)
via 5a9f233bb109ecdcae534a93d87fed41d5f3d527 (commit)
via 3f5a6b2fff5e3361d282310419d1ea33a02e2040 (commit)
via 98f9fcbe78d871931c1aac49d33c24e73d3491b7 (commit)
via cd6039ae81bedd86c94e6a50ebf8f82405f5eb50 (commit)
via ed11b3b43159f8cea9f28edc0bc3c67297a9bc72 (commit)
via a80bcb5bdaff1619d039901aa278cb3c8e63bf36 (commit)
from 51383b97b33a33a046e3026d6b70fe33af775ccd (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
563c43f Add a new toolchain for ICDAR contests.
1fc7f0c Disable OCR in ICDAR toolchains.
5facd27 src/contest/DAE-2011/content_in_hdoc_dae.cc: Specify output dir.
7014c9b configure.ac: Configure scribo/src/util.
be32894 Add a new example.
afb3778 configure.ac: Configure ICDAR contests directories.
3f75647 Add binaries for ICDAR 2011 contests.
21c1887 Small fixes.
3d6ca93 Fix use of uninitialized values.
12481e4 Fix a serious memory leak.
3aef9a5 mln/draw/polygon.hh: New.
aea0fc0 Add a missing conversion routine for object ids.
a32b976 scribo/text/paragraphs_closing.hh: Revamp code.
18cb924 scribo/util/box_is_included.hh: New.
5a9f233 Fix Page XML output.
3f5a6b2 Handle drop capital components.
98f9fcb Save component outlines instead of bboxes.
cd6039a Add support for tags in paragraph_info.
ed11b3b Improve output cleanup for historical document toolchain.
a80bcb5 scribo/util/component_precise_outline.hh: Add support for labeled_image.
-----------------------------------------------------------------------
Summary of changes:
ChangeLog | 8 +
configure.ac | 3 +
milena/ChangeLog | 11 +
milena/mln/convert/from_to.hxx | 7 +-
milena/mln/draw/polygon.hh | 105 +++++++
milena/mln/util/object_id.hh | 41 +++-
scribo/ChangeLog | 157 +++++++++++
scribo/scribo/core/component_info.hh | 19 +--
scribo/scribo/core/component_set.hh | 6 +-
scribo/scribo/core/line_info.hh | 44 ++--
scribo/scribo/core/paragraph_info.hh | 52 +++-
scribo/scribo/core/tag/component.hh | 7 +-
scribo/scribo/core/tag/paragraph.hh | 14 +
scribo/scribo/draw/line_components.hh | 7 +-
scribo/scribo/filter/images_in_paragraph.hh | 8 +-
scribo/scribo/filter/paragraphs_bbox_overlap.hh | 175 +++++++++----
scribo/scribo/filter/paragraphs_in_borders.hh | 140 ++++++++++
scribo/scribo/filter/paragraphs_in_image.hh | 29 ++-
scribo/scribo/filter/separators_in_borders.hh | 206 ++++++++++++++
scribo/scribo/filter/separators_in_element.hh | 84 +++---
scribo/scribo/filter/separators_in_paragraph.hh | 92 ++++---
scribo/scribo/filter/separators_vert_in_borders.hh | 143 ++++++++++
scribo/scribo/io/img/internal/debug_img_visitor.hh | 102 +++-----
scribo/scribo/io/img/internal/full_img_visitor.hh | 71 +++--
scribo/scribo/io/img/save.hh | 6 +-
scribo/scribo/io/xml/internal/page_xml_visitor.hh | 60 +++--
.../scribo/io/xml/internal/print_image_coords.hh | 6 +-
.../scribo/io/xml/internal/print_page_preambule.hh | 13 +-
scribo/scribo/io/xml/internal/time_info.hh | 75 +++++
scribo/scribo/io/xml/save.hh | 2 +-
.../postprocessing/images_to_drop_capital.hh | 141 ++++++++++
.../primitive/extract/lines_h_thick_and_thin.hh | 1 +
scribo/scribo/primitive/remove/separators.hh | 2 +
scribo/scribo/text/merging.hh | 46 ++--
scribo/scribo/text/paragraphs.hh | 26 +-
scribo/scribo/text/paragraphs_closing.hh | 284 ++++++++++++--------
.../toolchain/internal/content_in_doc_functor.hh | 5 +
.../toolchain/internal/content_in_hdoc_functor.hh | 66 ++++-
scribo/scribo/util/box_is_included.hh | 74 +++++
scribo/scribo/util/component_precise_outline.hh | 79 ++++--
scribo/src/Makefile.am | 3 +-
scribo/src/content_in_hdoc.cc | 20 +-
scribo/src/contest/DAE-2011/Makefile.am | 46 ++++
scribo/src/contest/DAE-2011/content_in_doc_dae.cc | 122 +++++++++
scribo/src/contest/DAE-2011/content_in_hdoc_dae.cc | 154 +++++++++++
scribo/src/contest/Makefile.am | 7 +-
scribo/src/contest/hdlac-2011/Makefile.am | 37 +++
.../contest/hdlac-2011/content_in_hdoc_hdlac.cc | 135 +++++++++
scribo/src/util/Makefile.am | 23 ++
scribo/src/util/component_precise_outline.cc | 88 ++++++
50 files changed, 2533 insertions(+), 519 deletions(-)
create mode 100644 milena/mln/draw/polygon.hh
create mode 100644 scribo/scribo/filter/paragraphs_in_borders.hh
create mode 100644 scribo/scribo/filter/separators_in_borders.hh
create mode 100644 scribo/scribo/filter/separators_vert_in_borders.hh
create mode 100644 scribo/scribo/io/xml/internal/time_info.hh
create mode 100644 scribo/scribo/postprocessing/images_to_drop_capital.hh
create mode 100644 scribo/scribo/util/box_is_included.hh
create mode 100644 scribo/src/contest/DAE-2011/Makefile.am
create mode 100644 scribo/src/contest/DAE-2011/content_in_doc_dae.cc
create mode 100644 scribo/src/contest/DAE-2011/content_in_hdoc_dae.cc
create mode 100644 scribo/src/contest/hdlac-2011/Makefile.am
create mode 100644 scribo/src/contest/hdlac-2011/content_in_hdoc_hdlac.cc
create mode 100644 scribo/src/util/Makefile.am
create mode 100644 scribo/src/util/component_precise_outline.cc
hooks/post-receive
--
Olena, a generic and efficient image processing platform
1
0

last-svn-commit-923-g563c43f Add a new toolchain for ICDAR contests.
by Guillaume Lazzara 07 Jun '11
by Guillaume Lazzara 07 Jun '11
07 Jun '11
* src/contest/DAE-2011/Makefile.am: New target.
* src/contest/DAE-2011/content_in_doc_dae.cc: New.
---
scribo/ChangeLog | 8 +++
scribo/src/contest/DAE-2011/Makefile.am | 11 ++++-
...ontent_in_hdoc_dae.cc => content_in_doc_dae.cc} | 56 ++++---------------
3 files changed, 30 insertions(+), 45 deletions(-)
copy scribo/src/contest/DAE-2011/{content_in_hdoc_dae.cc => content_in_doc_dae.cc} (69%)
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index 9ba701f..394c247 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,5 +1,13 @@
2011-06-07 Guillaume Lazzara <z(a)lrde.epita.fr>
+ Add a new toolchain for ICDAR contests.
+
+ * src/contest/DAE-2011/Makefile.am: New target.
+
+ * src/contest/DAE-2011/content_in_doc_dae.cc: New.
+
+2011-06-07 Guillaume Lazzara <z(a)lrde.epita.fr>
+
Disable OCR in ICDAR toolchains.
* scribo/toolchain/internal/content_in_doc_functor.hh,
diff --git a/scribo/src/contest/DAE-2011/Makefile.am b/scribo/src/contest/DAE-2011/Makefile.am
index 09bccd7..d7d6b8d 100644
--- a/scribo/src/contest/DAE-2011/Makefile.am
+++ b/scribo/src/contest/DAE-2011/Makefile.am
@@ -24,7 +24,7 @@ noinst_PROGRAMS =
if HAVE_MAGICKXX
-noinst_PROGRAMS += content_in_hdoc_dae
+noinst_PROGRAMS += content_in_hdoc_dae content_in_doc_dae
content_in_hdoc_dae_SOURCES = content_in_hdoc_dae.cc
content_in_hdoc_dae_CPPFLAGS = $(AM_CPPFLAGS) \
@@ -34,4 +34,13 @@ content_in_hdoc_dae_LDFLAGS = $(AM_LDFLAGS) \
-lpthread \
$(MAGICKXX_LDFLAGS)
+content_in_doc_dae_SOURCES = content_in_doc_dae.cc
+content_in_doc_dae_CPPFLAGS = $(AM_CPPFLAGS) \
+ $(TESSERACT_CPPFLAGS) \
+ -I$(top_builddir)
+content_in_doc_dae_LDFLAGS = $(AM_LDFLAGS) \
+ -lpthread \
+ $(MAGICKXX_LDFLAGS)
+
+
endif HAVE_MAGICKXX
\ No newline at end of file
diff --git a/scribo/src/contest/DAE-2011/content_in_hdoc_dae.cc b/scribo/src/contest/DAE-2011/content_in_doc_dae.cc
similarity index 69%
copy from scribo/src/contest/DAE-2011/content_in_hdoc_dae.cc
copy to scribo/src/contest/DAE-2011/content_in_doc_dae.cc
index 5986142..d194bd1 100644
--- a/scribo/src/contest/DAE-2011/content_in_hdoc_dae.cc
+++ b/scribo/src/contest/DAE-2011/content_in_doc_dae.cc
@@ -29,13 +29,13 @@
#include <libgen.h>
#include <fstream>
#include <iostream>
-#include <sstream>
#include <mln/core/image/image2d.hh>
+#include <mln/io/pbm/save.hh>
#include <mln/io/magick/load.hh>
-#include <scribo/toolchain/content_in_hdoc.hh>
+#include <scribo/toolchain/content_in_doc.hh>
#include <scribo/toolchain/text_in_doc_preprocess.hh>
#include <scribo/core/document.hh>
@@ -47,20 +47,13 @@
#include <scribo/preprocessing/crop.hh>
#include <scribo/io/xml/save.hh>
-#include <scribo/io/img/save.hh>
-
-
-#include <mln/core/alias/neighb2d.hh>
-#include <mln/labeling/compute.hh>
-#include <mln/labeling/foreground.hh>
-#include <mln/util/timer.hh>
-
const char *args_desc[][2] =
{
- { "input.tif", "An image." },
- { "output_dir", "Output directory." },
+ { "input.*", "An image." },
+ { "output_dir", "Output directory" },
+
{0, 0}
};
@@ -73,45 +66,23 @@ int main(int argc, char* argv[])
if (argc != 3)
return scribo::debug::usage(argv,
- "Document Image Analysis in Historical Documents",
- "input.tif output_dir",
+ "Find paragraph segmentation and produces images for each paragraph.",
+ "input.* output_dir",
args_desc);
trace::entering("main");
Magick::InitializeMagick(*argv);
- mln::util::timer t;
- t.start();
-
typedef image2d<scribo::def::lbl_type> L;
image2d<value::rgb8> input;
mln::io::magick::load(input, argv[1]);
// Preprocess document
image2d<bool> input_preproc;
- {
- input_preproc = toolchain::text_in_doc_preprocess(input, false, 0.34);
+ input_preproc = toolchain::text_in_doc_preprocess(input, false, 0.34);
- // Cleanup components on borders
- {
- typedef scribo::def::lbl_type V;
- V nlabels;
- image2d<V> lbl = labeling::foreground(input_preproc, c8(), nlabels);
- mln::util::array<box2d>
- bbox = labeling::compute(accu::shape::bbox<point2d>(), lbl, nlabels);
-
- const box2d& b = input.domain();
- for_all_ncomponents(e, nlabels)
- if (bbox(e).pmin().row() == b.pmin().row()
- || bbox(e).pmax().row() == b.pmax().row()
- || bbox(e).pmin().col() == b.pmin().col()
- || bbox(e).pmax().col() == b.pmax().col())
- data::fill(((input_preproc | bbox(e)).rw() | (pw::value(lbl) == pw::cst(e))).rw(), false);
- }
- }
-
- bool denoise = 1;
+ bool denoise = true;
std::string language = "";
bool find_line_seps = true;
bool find_whitespace_seps = true;
@@ -121,11 +92,9 @@ int main(int argc, char* argv[])
// Text
std::cout << "Analysing document..." << std::endl;
document<L>
- doc = scribo::toolchain::content_in_hdoc(input, input_preproc, denoise,
- find_line_seps, find_whitespace_seps,
- !language.empty(), language);
-
- doc.set_filename(basename(argv[1]));
+ doc = scribo::toolchain::content_in_doc(input, input_preproc, denoise,
+ find_line_seps, find_whitespace_seps,
+ !language.empty(), language);
const paragraph_set<L>& par_set = doc.paragraphs();
image2d<bool> output(input.domain());
@@ -149,6 +118,5 @@ int main(int argc, char* argv[])
mln::io::pbm::save(output, ss.str());
}
-
trace::exiting("main");
}
--
1.5.6.5
1
0