* scribo/primitive/extract/lines_h_thick_and_thin.hh: Fix structural element length.
* scribo/primitive/extract/non_text_hdoc.hh: Filter too small images.
* scribo/text/paragraphs.hh: Make it compile.
* scribo/toolchain/internal/content_in_hdoc_functor.hh: Denoise
larger objects. Fix vseparator image and binary_image_wo_seps.
* src/primitive/extract/lines_thick_and_thin.cc: Be less severe on
parameters.
---
scribo/ChangeLog | 16 +++++++
.../primitive/extract/lines_h_thick_and_thin.hh | 15 ++++++-
scribo/scribo/primitive/extract/non_text_hdoc.hh | 4 ++
scribo/scribo/text/paragraphs.hh | 44 ++++++++++---------
.../toolchain/internal/content_in_hdoc_functor.hh | 15 ++++---
scribo/src/content_in_hdoc.cc | 2 +-
.../src/primitive/extract/lines_thick_and_thin.cc | 2 +-
7 files changed, 67 insertions(+), 31 deletions(-)
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index 5e29749..bfe43b2 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,3 +1,19 @@
+2011-05-19 Guillaume Lazzara <z(a)lrde.epita.fr>
+
+ Improve results.
+
+ * scribo/primitive/extract/lines_h_thick_and_thin.hh: Fix structural element length.
+
+ * scribo/primitive/extract/non_text_hdoc.hh: Filter too small images.
+
+ * scribo/text/paragraphs.hh: Make it compile.
+
+ * scribo/toolchain/internal/content_in_hdoc_functor.hh: Denoise
+ larger objects. Fix vseparator image and binary_image_wo_seps.
+
+ * src/primitive/extract/lines_thick_and_thin.cc: Be less severe on
+ parameters.
+
2011-05-18 Julien Marquegnies <marquegnies(a)lrde.epita.fr>
Improve paragraph grouping for historical documents.
diff --git a/scribo/scribo/primitive/extract/lines_h_thick_and_thin.hh
b/scribo/scribo/primitive/extract/lines_h_thick_and_thin.hh
index b9b44b4..511da9f 100644
--- a/scribo/scribo/primitive/extract/lines_h_thick_and_thin.hh
+++ b/scribo/scribo/primitive/extract/lines_h_thick_and_thin.hh
@@ -49,6 +49,7 @@
# include <mln/pw/all.hh>
# include <mln/core/routine/duplicate.hh>
# include <mln/win/rectangle2d.hh>
+# include <mln/win/hline2d.hh>
# include <mln/morpho/dilation.hh>
# include <mln/data/convert.hh>
@@ -63,6 +64,7 @@
# include <scribo/core/def/lbl_type.hh>
# include <scribo/primitive/internal/rd.hh>
+# include <scribo/debug/logger.hh>
namespace scribo
@@ -485,8 +487,15 @@ namespace scribo
mln_concrete(I) mask = internal::detect_thick(tags);
internal::add_thin(tags, mask);
+
+ debug::logger().log_image(debug::AuxiliaryResults,
+ mask, "lines_h_thick_and_thin_mask");
+
image2d<bool> output = internal::rd3_fast(mask, binary_image,
- length, delta);
+ 2 * length, 2 * delta);
+
+ debug::logger().log_image(debug::AuxiliaryResults,
+ output, "lines_h_thick_and_thin_output_before_filter");
// Remove invalid lines
typedef scribo::def::lbl_type V;
@@ -499,6 +508,10 @@ namespace scribo
if (bbox(e).width() < filter_factor * length || bbox(e).width() / bbox(e).height()
< 3)
data::fill(((output | bbox(e)).rw() | (pw::value(lbl) == pw::cst(e))).rw(), false);
+ debug::logger().log_image(debug::Results,
+ output, "lines_h_thick_and_thin_output");
+
+
trace::exiting("scribo::primitive::extract::lines_h_thick_and_thin");
return output;
}
diff --git a/scribo/scribo/primitive/extract/non_text_hdoc.hh
b/scribo/scribo/primitive/extract/non_text_hdoc.hh
index 4924189..97e1f0e 100644
--- a/scribo/scribo/primitive/extract/non_text_hdoc.hh
+++ b/scribo/scribo/primitive/extract/non_text_hdoc.hh
@@ -47,6 +47,8 @@
# include <scribo/filter/objects_small.hh>
# include <scribo/filter/objects_on_border.hh>
+# include <scribo/filter/objects_v_thin.hh>
+# include <scribo/filter/objects_h_thin.hh>
//DEBUG
#include <mln/util/timer.hh>
@@ -111,6 +113,8 @@ namespace scribo
elements = scribo::filter::components_small(elements, 200);
elements = scribo::filter::components_on_border(elements);
+ elements = scribo::filter::objects_v_thin(elements, 100);
+ elements = scribo::filter::objects_h_thin(elements, 100);
// Debug
{
diff --git a/scribo/scribo/text/paragraphs.hh b/scribo/scribo/text/paragraphs.hh
index 9a59e02..af01581 100644
--- a/scribo/scribo/text/paragraphs.hh
+++ b/scribo/scribo/text/paragraphs.hh
@@ -30,10 +30,10 @@ namespace scribo
//-------------------------------------
// Extracting root of links
//-------------------------------------
- template <typename T>
+ template <typename L>
inline
unsigned
- find_root(util::array<T>& parent, unsigned x)
+ find_root(line_links<L>& parent, unsigned x)
{
unsigned tmp_x = x;
@@ -50,10 +50,10 @@ namespace scribo
return x;
}
- template <typename T>
+ template <typename L>
inline
void
- set_root(util::array<T>& parent, unsigned x, const unsigned root)
+ set_root(line_links<L>& parent, unsigned x, const unsigned root)
{
while (parent(x) != x && parent(x) != root)
{
@@ -799,27 +799,29 @@ namespace scribo
}
}
- // Only debug
+ // Post link processing
+
+ const line_links<L> backup = output.duplicate();
+ for (unsigned i = 0; i < output.nelements(); ++i)
+ {
+ const line_id_t current_neighbor = backup(i);
+ output(i) = scribo::internal::find_root(output, i);
+ const line_id_t root_index = output(i);
+
+ for (unsigned j = 0; j < right.nelements(); ++j)
+ {
+ if (i != j &&
+ current_neighbor != i &&
+ right(j) == i)
+ scribo::internal::set_root(output, j, root_index);
+ }
+ }
+
+ // Only debug
// {
// image2d<value::rgb8> debug = data::convert(value::rgb8(), input);
- // const util::array<value::int_u16> backup = output;
- // for (unsigned i = 0; i < output.nelements(); ++i)
- // {
- // const value::int_u16 current_neighbor = backup(i);
- // output(i) = internal::find_root(output, i);
- // const value::int_u16 root_index = output(i);
-
- // for (unsigned j = 0; j < right.nelements(); ++j)
- // {
- // if (i != j &&
- // current_neighbor != i &&
- // right(j) == i)
- // internal::set_root(output, j, root_index);
- // }
- // }
-
// mln::util::array<accu::shape::bbox<point2d> >
nbbox(output.nelements());
// for_all_lines(l, lines)
// if (lines(l).is_textline())
diff --git a/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh
b/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh
index 92db8a7..e7d14ff 100644
--- a/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh
+++ b/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh
@@ -182,14 +182,14 @@ namespace scribo
mln_ch_value(I,bool)
vseparators = preprocessing::rotate_90(
primitive::extract::lines_h_thick_and_thin(
- preprocessing::rotate_90(processed_image), 101, 3, 0.05, 0.80, 2), false),
+ preprocessing::rotate_90(processed_image), 101, 3, 0.2, 0.6, 1), false),
hseparators = primitive::extract::lines_h_thick_and_thin(
processed_image, 101, 3);
doc.set_vline_separators(vseparators);
doc.set_hline_separators(hseparators);
- separators = vseparators;
+ separators = duplicate(vseparators);
separators += hseparators;
border::resize(processed_image, border::thickness);
@@ -202,7 +202,6 @@ namespace scribo
input_cleaned = primitive::remove::separators(processed_image,
separators);
- doc.set_binary_image_wo_seps(input_cleaned);
on_progress();
}
@@ -231,7 +230,7 @@ namespace scribo
{
on_new_progress_label("Denoise...");
- input_cleaned = preprocessing::denoise_fg(input_cleaned, c8(), 3);
+ input_cleaned = preprocessing::denoise_fg(input_cleaned, c8(), 10);
// Debug
# ifndef SCRIBO_NDEBUG
@@ -242,13 +241,15 @@ namespace scribo
on_progress();
}
+ doc.set_binary_image_wo_seps(input_cleaned);
+
/// Finding components.
on_new_progress_label("Finding components...");
+ // NOTE: Component features computation is disabled.
V ncomponents;
component_set<L>
- components = scribo::primitive::extract::components(original_image,
- input_cleaned,
+ components = scribo::primitive::extract::components(input_cleaned,
c8(),
ncomponents);
@@ -269,7 +270,7 @@ namespace scribo
on_new_progress_label("Filtering components");
- components = scribo::filter::components_small(components, 3);
+ components = scribo::filter::components_small(components, 10);
on_progress();
diff --git a/scribo/src/content_in_hdoc.cc b/scribo/src/content_in_hdoc.cc
index e0d2258..ed15693 100644
--- a/scribo/src/content_in_hdoc.cc
+++ b/scribo/src/content_in_hdoc.cc
@@ -200,7 +200,7 @@ int main(int argc, char* argv[])
// Saving results
std::cout << "Saving results..." << std::endl;
- scribo::io::xml::save(doc, "page.xml", scribo::io::xml::Page);
+// scribo::io::xml::save(doc, "page.xml", scribo::io::xml::Page);
std::cout << "End of process - " << t << std::endl;
diff --git a/scribo/src/primitive/extract/lines_thick_and_thin.cc
b/scribo/src/primitive/extract/lines_thick_and_thin.cc
index f574b3d..fb78862 100644
--- a/scribo/src/primitive/extract/lines_thick_and_thin.cc
+++ b/scribo/src/primitive/extract/lines_thick_and_thin.cc
@@ -91,7 +91,7 @@ int main(int argc, char *argv[])
input, length, delta);
I vseparators = preprocessing::rotate_90(
primitive::extract::lines_h_thick_and_thin(
- preprocessing::rotate_90(input), length, delta, 0.05, 0.80, 2), false);
+ preprocessing::rotate_90(input), length, delta, 0.1, 0.80, 2), false);
I separators = duplicate(vseparators);
separators += hseparators;
--
1.5.6.5