* scribo/core/document.hh: Store binary image without separators.
* scribo/filter/object_links_bbox_overlap.hh: Fix use of
object_links structure.
* scribo/io/img/internal/debug_img_visitor.hh: Do not draw invalid
elements.
* scribo/make/text_components_image.hh: Add a precondition.
* scribo/primitive/extract/alignments.hh: Add debug guards.
* scribo/primitive/extract/lines_h_pattern.hh,
* scribo/primitive/extract/lines_v_pattern.hh: Fix structural
element used for dilation.
* scribo/text/merging.hh: Reindent comments.
* scribo/src/Makefile.am: Add content_in_hdoc target.
---
scribo/ChangeLog | 24 ++++++++++++++++++++
scribo/scribo/core/document.hh | 21 +++++++++++++++++
scribo/scribo/filter/object_links_bbox_overlap.hh | 19 +++++++++------
scribo/scribo/io/img/internal/debug_img_visitor.hh | 13 ++++++++--
scribo/scribo/make/text_components_image.hh | 1 +
scribo/scribo/primitive/extract/alignments.hh | 24 +++++++++++++++++++-
scribo/scribo/primitive/extract/lines_h_pattern.hh | 10 +++-----
scribo/scribo/primitive/extract/lines_v_pattern.hh | 10 +++-----
scribo/scribo/text/merging.hh | 5 ++-
scribo/src/Makefile.am | 22 ++++++++++++++++-
10 files changed, 121 insertions(+), 28 deletions(-)
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index 8e3c903..df242f1 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,5 +1,29 @@
2011-05-16 Guillaume Lazzara <lazzara(a)fidji.lrde.epita.fr>
+ Various small fixes in Scribo.
+
+ * scribo/core/document.hh: Store binary image without separators.
+
+ * scribo/filter/object_links_bbox_overlap.hh: Fix use of
+ object_links structure.
+
+ * scribo/io/img/internal/debug_img_visitor.hh: Do not draw invalid
+ elements.
+
+ * scribo/make/text_components_image.hh: Add a precondition.
+
+ * scribo/primitive/extract/alignments.hh: Add debug guards.
+
+ * scribo/primitive/extract/lines_h_pattern.hh,
+ * scribo/primitive/extract/lines_v_pattern.hh: Fix structural
+ element used for dilation.
+
+ * scribo/text/merging.hh: Reindent comments.
+
+ * scribo/src/Makefile.am: Add content_in_hdoc target.
+
+2011-05-16 Guillaume Lazzara <lazzara(a)fidji.lrde.epita.fr>
+
Add holder information to component_info.
* scribo/core/component_info.hh,
diff --git a/scribo/scribo/core/document.hh b/scribo/scribo/core/document.hh
index 0fe2be3..98c438d 100644
--- a/scribo/scribo/core/document.hh
+++ b/scribo/scribo/core/document.hh
@@ -116,11 +116,15 @@ namespace scribo
const mln::image2d<bool>& binary_image() const;
void set_binary_image(const mln::image2d<bool>& binary_image);
+ const mln::image2d<bool>& binary_image_wo_seps() const;
+ void set_binary_image_wo_seps(const mln::image2d<bool>&
binary_image_wo_seps);
+
private:
std::string filename_;
mln::image2d<mln::value::rgb8> image_;
mln::image2d<bool> binary_image_;
+ mln::image2d<bool> binary_image_wo_seps_;
paragraph_set<L> parset_;
component_set<L> elements_;
@@ -437,6 +441,23 @@ namespace scribo
template <typename L>
+ const mln::image2d<bool>&
+ document<L>::binary_image_wo_seps() const
+ {
+ return binary_image_wo_seps_;
+ }
+
+
+ template <typename L>
+ void
+ document<L>::set_binary_image_wo_seps(
+ const mln::image2d<bool>& binary_image_wo_seps)
+ {
+ binary_image_wo_seps_ = binary_image_wo_seps;
+ }
+
+
+ template <typename L>
bool operator==(const document<L>& lhs, const document<L>& rhs)
{
diff --git a/scribo/scribo/filter/object_links_bbox_overlap.hh
b/scribo/scribo/filter/object_links_bbox_overlap.hh
index 3bf3c50..a93d849 100644
--- a/scribo/scribo/filter/object_links_bbox_overlap.hh
+++ b/scribo/scribo/filter/object_links_bbox_overlap.hh
@@ -78,13 +78,16 @@ namespace scribo
mln_precondition(links.is_valid());
const component_set<L>& components = links.components();
- object_links<L> output(links);
+ object_links<L> output = links.duplicate();
- for_all_comps(i, components)
- if (components(i).is_valid() && links(i) && links(i) != i)
+ bool has_intersection;
+ mln_site(L) pmin, pmax;
+ float ratio_i, ratio_link_i;
+
+ for_all_links(i, links)
+ if (links.is_linked(i))
{
- bool has_intersection = true;
- mln_site(L) pmin, pmax;
+ has_intersection = true;
for (unsigned dim = 0; dim < mln_site_(L)::dim; ++dim)
{
pmin[dim] = math::max(components(i).bbox().pmin()[dim],
@@ -103,9 +106,9 @@ namespace scribo
continue;
mln_box(L) interbbox(pmin, pmax);
- float
- ratio_i = interbbox.nsites() /(float)components(i).bbox().nsites(),
- ratio_link_i = interbbox.nsites() /(float)components(links(i)).bbox().nsites();
+
+ ratio_i = interbbox.nsites() /(float)components(i).bbox().nsites();
+ ratio_link_i = interbbox.nsites() /(float)components(links(i)).bbox().nsites();
if (ratio_i >= max_overlap_ratio
|| ratio_link_i >= max_overlap_ratio)
diff --git a/scribo/scribo/io/img/internal/debug_img_visitor.hh
b/scribo/scribo/io/img/internal/debug_img_visitor.hh
index 5ad1dd3..7d1d3d7 100644
--- a/scribo/scribo/io/img/internal/debug_img_visitor.hh
+++ b/scribo/scribo/io/img/internal/debug_img_visitor.hh
@@ -130,11 +130,16 @@ namespace scribo
{
// Prepare element edges
+ L lbl = duplicate(doc.elements().labeled_image());
+ for_all_comps(c, doc.elements())
+ if (! doc.elements()(c).is_valid())
+ data::fill(((lbl | doc.elements()(c).bbox()).rw()
+ | (pw::value(lbl) == pw::cst(c))).rw(), 0);
+
// FIXME: UGLY! Too slow!
scribo::def::lbl_type nlabels;
component_set<L> elts = primitive::extract::components(
- data::convert(bool(),
mln::subsampling::antialiased(doc.elements().labeled_image(),
- output_ratio)),
+ data::convert(bool(), mln::subsampling::antialiased(lbl, output_ratio)),
c8(),
nlabels);
@@ -150,11 +155,13 @@ namespace scribo
}
else
for_all_comps(c, doc.elements())
+ {
elts(c).update_type(doc.elements()(c).type());
+ elts(c).update_tag(doc.elements()(c).tag());
+ }
elt_edge = morpho::elementary::gradient_external(elts.labeled_image(), c8());
-// const component_set<L>& elts = doc.elements();
for_all_comps(e, elts)
if (elts(e).is_valid())
elts(e).accept(*this);
diff --git a/scribo/scribo/make/text_components_image.hh
b/scribo/scribo/make/text_components_image.hh
index 522505e..1a30a04 100644
--- a/scribo/scribo/make/text_components_image.hh
+++ b/scribo/scribo/make/text_components_image.hh
@@ -65,6 +65,7 @@ namespace scribo
trace::entering("scribo::make::text_components_image");
mln_precondition(doc.is_open());
+ mln_precondition(doc.has_text());
mln_ch_value(L,bool) output;
initialize(output, doc.image());
diff --git a/scribo/scribo/primitive/extract/alignments.hh
b/scribo/scribo/primitive/extract/alignments.hh
index 1e3d835..7dbf683 100644
--- a/scribo/scribo/primitive/extract/alignments.hh
+++ b/scribo/scribo/primitive/extract/alignments.hh
@@ -50,6 +50,7 @@
# include <scribo/core/def/lbl_type.hh>
# include <scribo/primitive/extract/components.hh>
# include <scribo/filter/object_links_aligned.hh>
+# include <scribo/filter/object_links_bbox_overlap.hh>
# include <scribo/filter/object_groups_small.hh>
# include <scribo/preprocessing/denoise_fg.hh>
# include
<scribo/primitive/link/internal/link_single_dmax_ratio_aligned_delta_base.hh>
@@ -252,7 +253,9 @@ namespace scribo
: super_(components, dmax_f, delta, delta_direction),
bbox_ima_(bbox_ima), delta_ws_lookup_(delta_ws_lookup)
{
+# ifndef SCRIBO_NDEBUG
debug_ = data::convert(value::rgb8(), data::convert(bool(), bbox_ima));
+# endif // ! SCRIBO_NDEBUG
}
void compute_next_site_(P& p)
@@ -286,7 +289,9 @@ namespace scribo
for (; p.col() <= this->components_(nbh).bbox().pmax().col()
&& (bbox_ima_(p) == 0);)
{
+# ifndef SCRIBO_NDEBUG
debug_(p) = literal::violet;
+# endif // ! SCRIBO_NDEBUG
++p.col();
}
@@ -304,7 +309,9 @@ namespace scribo
for (; p.col() <= this->components_(nbh).bbox().pmax().col()
&& (bbox_ima_(p) == 0);)
{
+# ifndef SCRIBO_NDEBUG
debug_(p) = literal::violet;
+# endif // ! SCRIBO_NDEBUG
++p.col();
}
@@ -322,7 +329,9 @@ namespace scribo
L bbox_ima_;
unsigned delta_ws_lookup_;
+# ifndef SCRIBO_NDEBUG
image2d<value::rgb8> debug_;
+# endif // ! SCRIBO_NDEBUG
};
@@ -346,7 +355,9 @@ namespace scribo
: super_(components, dmax_f, delta, delta_direction),
bbox_ima_(bbox_ima), delta_ws_lookup_(delta_ws_lookup)
{
+# ifndef SCRIBO_NDEBUG
debug_ = data::convert(value::rgb8(), data::convert(bool(), bbox_ima));
+# endif // ! SCRIBO_NDEBUG
}
void compute_next_site_(P& p)
@@ -381,7 +392,9 @@ namespace scribo
for (; p.col() > this->components_(nbh).bbox().pmin().col()
&& (bbox_ima_(p) == 0);)
{
+# ifndef SCRIBO_NDEBUG
debug_(p) = literal::violet;
+# endif // ! SCRIBO_NDEBUG
--p.col();
}
@@ -399,7 +412,9 @@ namespace scribo
for (; p.col() > this->components_(nbh).bbox().pmin().col()
&& (bbox_ima_(p) == 0);)
{
+# ifndef SCRIBO_NDEBUG
debug_(p) = literal::violet;
+# endif // ! SCRIBO_NDEBUG
--p.col();
}
@@ -417,7 +432,9 @@ namespace scribo
L bbox_ima_;
unsigned delta_ws_lookup_;
+# ifndef SCRIBO_NDEBUG
image2d<value::rgb8> debug_;
+# endif // ! SCRIBO_NDEBUG
};
@@ -656,11 +673,13 @@ namespace scribo
top_links = primitive::link::merge_double_link_closest_aligned(left, right,
anchor::StrictTopCenter);
+ // Remove links if component bboxes overlap too much.
+ top_links = filter::object_links_bbox_overlap(top_links, 0.80f);
+
// Remove groups with not enough links.
top_groups = primitive::group::from_single_link(top_links);
top_groups = filter::object_groups_small(top_groups, min_card);
-
// Compute char_width and char_space statistics.
//
// Here, we also compute max_char_width, in case other
@@ -889,6 +908,9 @@ namespace scribo
bot_links = primitive::link::merge_double_link_closest_aligned(left, right,
anchor::StrictBottomCenter);
+ // Remove links if component bboxes overlap too much.
+ bot_links = filter::object_links_bbox_overlap(bot_links, 0.80f);
+
// Remove groups with not enough links.
bot_groups = primitive::group::from_single_link(bot_links);
diff --git a/scribo/scribo/primitive/extract/lines_h_pattern.hh
b/scribo/scribo/primitive/extract/lines_h_pattern.hh
index 6a1f7f0..3cedf53 100644
--- a/scribo/scribo/primitive/extract/lines_h_pattern.hh
+++ b/scribo/scribo/primitive/extract/lines_h_pattern.hh
@@ -1,5 +1,5 @@
-// Copyright (C) 2009, 2010 EPITA Research and Development Laboratory
-// (LRDE)
+// Copyright (C) 2009, 2010, 2011 EPITA Research and Development
+// Laboratory (LRDE)
//
// This file is part of Olena.
//
@@ -106,12 +106,10 @@ namespace scribo
mln_concrete(I) output = lines_pattern(input, length, 1, win);
- unsigned new_length = length / 2 + delta;
- new_length += 1 - (new_length % 2); // Guaranty that new_length is odd.
-
mln_concrete(I)
output_dil = morpho::dilation(output,
- win::rectangle2d(3, new_length));
+ win::rectangle2d(2 * delta + 1,
+ length + 2));
output = scribo::primitive::internal::rd(output, input * output_dil);
diff --git a/scribo/scribo/primitive/extract/lines_v_pattern.hh
b/scribo/scribo/primitive/extract/lines_v_pattern.hh
index 8a103ac..2908c8b 100644
--- a/scribo/scribo/primitive/extract/lines_v_pattern.hh
+++ b/scribo/scribo/primitive/extract/lines_v_pattern.hh
@@ -1,5 +1,5 @@
-// Copyright (C) 2009, 2010 EPITA Research and Development Laboratory
-// (LRDE)
+// Copyright (C) 2009, 2010, 2011 EPITA Research and Development
+// Laboratory (LRDE)
//
// This file is part of Olena.
//
@@ -88,12 +88,10 @@ namespace scribo
mln_concrete(I) output = lines_pattern(input, length, 0, win);
- unsigned new_length = length / 2 + delta;
- new_length += 1 - (new_length % 2); // Guaranty that new_length is odd.
-
mln_concrete(I)
output_dil = morpho::dilation(output,
- win::rectangle2d(new_length, 3));
+ win::rectangle2d(2 * delta + 1,
+ length + 2));
output = scribo::primitive::internal::rd(output, input * output_dil);
diff --git a/scribo/scribo/text/merging.hh b/scribo/scribo/text/merging.hh
index c94f9f5..f691188 100644
--- a/scribo/scribo/text/merging.hh
+++ b/scribo/scribo/text/merging.hh
@@ -1,4 +1,5 @@
-// Copyright (C) 2010 EPITA Research and Development Laboratory (LRDE)
+// Copyright (C) 2010, 2011 EPITA Research and Development Laboratory
+// (LRDE)
//
// This file is part of Olena.
//
@@ -657,7 +658,7 @@ namespace scribo
x---------------x
| |
| mc |
- ml x x x mr
+ ml x x x mr
| |
| |
x---------------x
diff --git a/scribo/src/Makefile.am b/scribo/src/Makefile.am
index 6360a56..6ab5d7d 100644
--- a/scribo/src/Makefile.am
+++ b/scribo/src/Makefile.am
@@ -1,5 +1,5 @@
-# Copyright (C) 2009, 2010 EPITA Research and Development Laboratory
-# (LRDE).
+# Copyright (C) 2009, 2010, 2011 EPITA Research and Development
+# Laboratory (LRDE).
#
# This file is part of Olena.
#
@@ -116,6 +116,24 @@ if HAVE_QT
content_in_doc_LDADD = $(LDADD) \
$(QT_LIBS)
+ utilexec_PROGRAMS += content_in_hdoc
+ content_in_hdoc_SOURCES = content_in_hdoc.cc
+ content_in_hdoc_CPPFLAGS = $(AM_CPPFLAGS) \
+ $(TESSERACT_CPPFLAGS) \
+ $(TIFF_CPPFLAGS) \
+ $(MAGICKXX_CPPFLAGS) \
+ $(QT_CPPFLAGS) -DHAVE_QT
+ content_in_hdoc_CXXFLAGS = $(AM_CXXFLAGS) \
+ $(QT_CXXFLAGS)
+ content_in_hdoc_LDFLAGS = $(AM_LDFLAGS) \
+ $(TESSERACT_LDFLAGS) \
+ $(TIFF_LDFLAGS) \
+ $(MAGICKXX_LDFLAGS) \
+ $(QT_LDFLAGS) \
+ -lpthread
+ content_in_hdoc_LDADD = $(LDADD) \
+ $(QT_LIBS)
+
utilexec_PROGRAMS += non_text_components
non_text_components_SOURCES = non_text_components.cc
--
1.5.6.5