* scribo/core/component_info.hh: Initialize type_ attribute.
* scribo/core/line_info.hh: Rename accumulators.
* scribo/debug/usage.hh: Update copyright.
* scribo/toolchain/internal/text_in_doc_functor.hh: Initialize
input_cleaned local variable.
* tests/toolchain/nepomuk/text_extraction.cc: Support both
Tesseract 2 and 3.
---
scribo/ChangeLog | 16 ++++++++++++
scribo/scribo/core/component_info.hh | 8 +++---
scribo/scribo/core/line_info.hh | 26 ++++++++++---------
scribo/scribo/debug/usage.hh | 6 ++--
.../toolchain/internal/text_in_doc_functor.hh | 8 ++++-
scribo/tests/toolchain/nepomuk/text_extraction.cc | 8 +++++-
6 files changed, 50 insertions(+), 22 deletions(-)
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index 7982a78..22ed824 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,5 +1,21 @@
2011-01-18 Guillaume Lazzara <z(a)lrde.epita.fr>
+ Various fixes in Scribo.
+
+ * scribo/core/component_info.hh: Initialize type_ attribute.
+
+ * scribo/core/line_info.hh: Rename accumulators.
+
+ * scribo/debug/usage.hh: Update copyright.
+
+ * scribo/toolchain/internal/text_in_doc_functor.hh: Initialize
+ input_cleaned local variable.
+
+ * tests/toolchain/nepomuk/text_extraction.cc: Support both
+ Tesseract 2 and 3.
+
+2011-01-18 Guillaume Lazzara <z(a)lrde.epita.fr>
+
* demo/viewer/Makefile.am: Explicitly link to libtiff.
2011-01-13 Guillaume Lazzara <z(a)lrde.epita.fr>
diff --git a/scribo/scribo/core/component_info.hh b/scribo/scribo/core/component_info.hh
index 8345266..4ed6db7 100644
--- a/scribo/scribo/core/component_info.hh
+++ b/scribo/scribo/core/component_info.hh
@@ -1,5 +1,5 @@
-// Copyright (C) 2009, 2010 EPITA Research and Development Laboratory
-// (LRDE)
+// Copyright (C) 2009, 2010, 2011 EPITA Research and Development
+// Laboratory (LRDE)
//
// This file is part of Olena.
//
@@ -97,7 +97,7 @@ namespace scribo
inline
component_info::component_info()
- : id_(0), tag_(component::Ignored)
+ : id_(0), tag_(component::Ignored), type_(component::Undefined)
{
}
@@ -109,7 +109,7 @@ namespace scribo
const mln::point2d& mass_center,
unsigned card)
: id_(id), bbox_(bbox), mass_center_(mass_center), card_(card),
- tag_(component::None), line_id_(0)
+ tag_(component::None), type_(component::Undefined), line_id_(0)
{
}
diff --git a/scribo/scribo/core/line_info.hh b/scribo/scribo/core/line_info.hh
index ed31d17..54a5094 100644
--- a/scribo/scribo/core/line_info.hh
+++ b/scribo/scribo/core/line_info.hh
@@ -1,5 +1,5 @@
-// Copyright (C) 2009, 2010 EPITA Research and Development Laboratory
-// (LRDE)
+// Copyright (C) 2009, 2010, 2011 EPITA Research and Development
+// Laboratory (LRDE)
//
// This file is part of Olena.
//
@@ -801,8 +801,8 @@ namespace scribo
typedef mln::value::int_u<12> median_data_t;
typedef mln::accu::stat::median_h<median_data_t> median_t;
median_t
- absolute_meanline,
- absolute_baseline,
+ meanline,
+ baseline,
char_space,
char_width;
@@ -871,13 +871,15 @@ namespace scribo
if (bb.width() <= 1000)
char_width.take(bb.width());
- // Meanline (compute an absolute value, from the top left
- // corner of the image).
- absolute_meanline.take(bb.pmin().row() - ref_line);
+ // Meanline (compute an absolute value, from the top left corner
+ // of the highest character bounding box, excluding
+ // punctuation).
+ meanline.take(bb.pmin().row() - ref_line);
- // Baseline (compute an absolute value, from the top left
- // corner of the image).
- absolute_baseline.take(bb.pmax().row() - ref_line);
+ // Baseline (compute an absolute value, from the top left corner
+ // of the highest character bounding box, excluding
+ // punctuation).
+ baseline.take(bb.pmax().row() - ref_line);
}
// Finalization
@@ -899,8 +901,8 @@ namespace scribo
char_width_ = char_width.to_result();
mln::def::coord
- absolute_baseline_r = absolute_baseline.to_result() + ref_line,
- absolute_meanline_r = absolute_meanline.to_result() + ref_line;
+ absolute_baseline_r = baseline.to_result() + ref_line,
+ absolute_meanline_r = meanline.to_result() + ref_line;
baseline_ = absolute_baseline_r;
meanline_ = absolute_meanline_r;
diff --git a/scribo/scribo/debug/usage.hh b/scribo/scribo/debug/usage.hh
index 7a284e3..28ab145 100644
--- a/scribo/scribo/debug/usage.hh
+++ b/scribo/scribo/debug/usage.hh
@@ -1,5 +1,5 @@
-// Copyright (C) 2009, 2010 EPITA Research and Development Laboratory
-// (LRDE)
+// Copyright (C) 2009, 2010, 2011 EPITA Research and Development
+// Laboratory (LRDE)
//
// This file is part of Olena.
//
@@ -68,7 +68,7 @@ namespace scribo
<< std::endl;
std::cout << "-----------" << std::endl;
- std::cout << "EPITA/LRDE - Scribo 2008-2010" << std::endl;
+ std::cout << "EPITA/LRDE - Scribo 2008-2011" << std::endl;
return 1;
}
diff --git a/scribo/scribo/toolchain/internal/text_in_doc_functor.hh
b/scribo/scribo/toolchain/internal/text_in_doc_functor.hh
index fdbee90..16e981d 100644
--- a/scribo/scribo/toolchain/internal/text_in_doc_functor.hh
+++ b/scribo/scribo/toolchain/internal/text_in_doc_functor.hh
@@ -1,4 +1,5 @@
-// Copyright (C) 2010 EPITA Research and Development Laboratory (LRDE)
+// Copyright (C) 2010, 2011 EPITA Research and Development Laboratory
+// (LRDE)
//
// This file is part of Olena.
//
@@ -132,7 +133,10 @@ namespace scribo
text_in_doc_functor<I>::operator()(const Image<I>& input)
{
// Remove separators
- mln_ch_value(I,bool) separators, input_cleaned;
+ mln_ch_value(I,bool)
+ separators,
+ input_cleaned = exact(input);
+
if (enable_line_seps)
{
on_new_progress_label("Find vertical separators...");
diff --git a/scribo/tests/toolchain/nepomuk/text_extraction.cc
b/scribo/tests/toolchain/nepomuk/text_extraction.cc
index c475aa9..30051ae 100644
--- a/scribo/tests/toolchain/nepomuk/text_extraction.cc
+++ b/scribo/tests/toolchain/nepomuk/text_extraction.cc
@@ -1,4 +1,5 @@
-// Copyright (C) 2010 EPITA Research and Development Laboratory (LRDE)
+// Copyright (C) 2010, 2011 EPITA Research and Development Laboratory
+// (LRDE)
//
// This file is part of Olena.
//
@@ -43,6 +44,11 @@ int main()
QSet<QString> words = scribo::toolchain::nepomuk::text_extraction(ima);
mln_assertion(words.size() == 1);
+
+#ifndef HAVE_TESSERACT_3
mln_assertion(words.contains("Wildly"));
+#else // HAVE_TESSERACT_2
+ mln_assertion(words.contains("wildly"));
+#endif // ! HAVE_TESSERACT_3
return 0;
}
--
1.5.6.5