* src/extract_text_double_link.cc: Improve usage output and add new
parameters to be passed as arguments.
* src/recognition.cc: Add a new program argument.
* text/clean.hh: Disable cleaning for now.
* text/recognition.hh: may store the recognized text into a file.
---
scribo/ChangeLog | 13 ++++++++++++
scribo/src/extract_text_double_link.cc | 33 +++++++++++++++++++++----------
scribo/src/recognition.cc | 9 ++++---
scribo/text/clean.hh | 28 +++++++++++++-------------
scribo/text/recognition.hh | 26 ++++++++++++++++++++----
5 files changed, 75 insertions(+), 34 deletions(-)
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index f11cfa1..c508cbd 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,5 +1,18 @@
2009-06-39 Guillaume Lazzara <lazzara(a)lrde.epita.fr>
+ Update Scribo.
+
+ * src/extract_text_double_link.cc: Improve usage output and add new
+ parameters to be passed as arguments.
+
+ * src/recognition.cc: Add a new program argument.
+
+ * text/clean.hh: Disable cleaning for now.
+
+ * text/recognition.hh: may store the recognized text into a file.
+
+2009-06-39 Guillaume Lazzara <lazzara(a)lrde.epita.fr>
+
* tests/preprocessing/unskew.cc: fix test.
2009-06-39 Guillaume Lazzara <lazzara(a)lrde.epita.fr>
diff --git a/scribo/src/extract_text_double_link.cc
b/scribo/src/extract_text_double_link.cc
index 50706d8..aa47ac6 100644
--- a/scribo/src/extract_text_double_link.cc
+++ b/scribo/src/extract_text_double_link.cc
@@ -45,22 +45,33 @@
#include <scribo/debug/save_bboxes_image.hh>
#include <scribo/make/debug_filename.hh>
-int usage(const char *name)
-{
- std::cout << "Usage: " << name << " <input.pbm>
" << std::endl;
- return 1;
-}
+#include <scribo/debug/usage.hh>
-int main(int argc, char* argv[])
+const char *args_desc[][2] =
+{
+ { "input.pbm", "A binary image. 'True' for objects,
'False'\
+for the background." },
+ { "hlmax", "Maximum distance between two grouped objects while browsing
on the left." },
+ { "hrmax", "Maximum distance between two grouped objects while browsing
on the right." },
+ { "prefix", "Output names prefix" },
+ {0, 0}
+};
+
+int main(int argc, char *argv[])
{
using namespace scribo;
using namespace mln;
- if (argc < 1)
- return usage(argv[0]);
+ if (argc != 5)
+ return scribo::debug::usage(argv,
+ "Group potential text objects using a double validation link.",
+ "input.pbm hlmax hrmax prefix",
+ args_desc,
+ "Several images showing the process.");
+
- scribo::make::internal::debug_filename_prefix = "extract_text_double_link";
+ scribo::make::internal::debug_filename_prefix = argv[4];
image2d<bool> input;
io::pbm::load(input, argv[1]);
@@ -72,9 +83,9 @@ int main(int argc, char* argv[])
text = filter::small_objects(text, 4);
mln::util::array<unsigned> left_link
- = text::grouping::group_with_single_left_link(text, 30);
+ = text::grouping::group_with_single_left_link(text, atoi(argv[2]));
mln::util::array<unsigned> right_link
- = text::grouping::group_with_single_right_link(text, 30);
+ = text::grouping::group_with_single_right_link(text, atoi(argv[3]));
std::cout << "BEFORE - nbboxes = " << nbboxes <<
std::endl;
diff --git a/scribo/src/recognition.cc b/scribo/src/recognition.cc
index 87bbade..97f2880 100644
--- a/scribo/src/recognition.cc
+++ b/scribo/src/recognition.cc
@@ -51,6 +51,7 @@ const char *args_desc[][2] =
{
{ "input.pbm", "A binary image. 'True' for objects,
'False'\
for the background." },
+ { "out.txt", "OCR's output." },
{0, 0}
};
@@ -61,12 +62,12 @@ int main(int argc, char* argv[])
using namespace scribo;
using namespace mln;
- if (argc != 2)
+ if (argc != 3)
return scribo::debug::usage(argv,
"Text extraction and recognition",
- "input.pbm",
+ "input.pbm out.txt",
args_desc,
- "The text is printed on the standard output.");
+ "The text is printed on the standard output and stored in
'out.txt'");
trace::entering("main");
@@ -91,7 +92,7 @@ int main(int argc, char* argv[])
objects = text::grouping::group_from_double_link(objects, left_link, right_link);
/// Try to recognize text in grouped objects.
- scribo::text::recognition(objects, "fra");
+ scribo::text::recognition(objects, "fra", argv[2]);
trace::exiting("main");
}
diff --git a/scribo/text/clean.hh b/scribo/text/clean.hh
index 24973c5..0f87139 100644
--- a/scribo/text/clean.hh
+++ b/scribo/text/clean.hh
@@ -98,31 +98,31 @@ namespace scribo
mln_precondition(input.is_valid());
mln_precondition(dmap_win.is_valid());
- I input_large = world::binary_2d::enlarge(input, 2);
+// I input_large = world::binary_2d::enlarge(input, 2);
// image2d<bool> blur = linear::gaussian(input_large, 2);
// image2d<value::int_u8> blur =
linear::gaussian(level::convert(value::int_u8(), input_large), 2);
// image2d<bool> blur =
level::transform(linear::gaussian(level::convert(value::int_u8(), input_large), 2),
fun::v2b::threshold<value::int_u8>(100));
- mln_ch_value(I,unsigned)
- dmap = transform::distance_front(logical::not_(input_large), c8(),
- dmap_win,
- mln_max(unsigned));
+// mln_ch_value(I,unsigned)
+// dmap = transform::distance_front(logical::not_(input_large), c8(),
+// dmap_win,
+// mln_max(unsigned));
// io::pgm::save(labeling::wrap(dmap), mln::debug::filename("dmap.pgm"));
// I skeleton = topo::skeleton::crest(input_large, dmap, c8());
- I constraint = topo::skeleton::crest(input_large, dmap, c8());
- mln_postcondition(constraint.is_valid());
+// I constraint = topo::skeleton::crest(input_large, dmap, c8());
+// mln_postcondition(constraint.is_valid());
// io::pgm::save(labeling::wrap(constraint),
mln::debug::filename("constraint.pgm"));
- I skeleton =
- morpho::skeleton_constrained(input_large, c8(),
- topo::skeleton::is_simple_point<I,neighb2d>,
- extend(constraint, false), arith::revert(dmap));
+// I skeleton =
+// morpho::skeleton_constrained(input_large, c8(),
+//
topo::skeleton::is_simple_point<I,neighb2d>,
+// extend(constraint, false), arith::revert(dmap));
- win::octagon2d disk(7);
- I output = morpho::dilation(skeleton, disk);
+// win::octagon2d disk(7);
+// I output = morpho::dilation(skeleton, disk);
// if (plop > 20 && plop < 50)
{
@@ -134,7 +134,7 @@ namespace scribo
// ++plop;
trace::exiting("scribo::text::clean");
- return output;
+ return input;
}
# endif // ! MLN_INCLUDE_ONLY
diff --git a/scribo/text/recognition.hh b/scribo/text/recognition.hh
index 75410e4..6293e28 100644
--- a/scribo/text/recognition.hh
+++ b/scribo/text/recognition.hh
@@ -33,6 +33,8 @@
/// \todo For each text bbox, we create a new image. We may like to avoid that.
/// \todo Do not store the result in an image?
+# include <ostream>
+
# include <mln/core/image/dmorph/image_if.hh>
# include <mln/core/concept/neighborhood.hh>
# include <mln/core/site_set/box.hh>
@@ -76,15 +78,19 @@ namespace scribo
/// Passes the text bboxes to Tesseract (OCR).
///
- /// \param[in] text The lines of text.
- /// \param[in] language the language which should be recognized by Tesseract.
- /// (fra, en, ...)
+ /// \param[in] text The lines of text.
+ /// \param[in] language The language which should be recognized by
+ /// Tesseract. (fra, en, ...)
+ /// \param[in] output_file If set, store the recognized text in
+ /// this file.
///
/// \return An image of characters.
+ //
template <typename L>
mln_ch_value(L,char)
recognition(const object_image(L)& objects,
- const char *language);
+ const char *language,
+ const char *output_file);
@@ -94,7 +100,8 @@ namespace scribo
template <typename L>
mln_ch_value(L,char)
recognition(const object_image(L)& objects,
- const char *language)
+ const char *language,
+ const char *output_file)
{
trace::entering("scribo::text::recognition");
@@ -114,6 +121,10 @@ namespace scribo
0, 9, 0, 9, 0 };
w_window2d_int dmap_win = mln::make::w_window2d_int(vals);
+ std::ofstream file;
+ if (output_file != 0)
+ file.open(output_file);
+
/// Use text bboxes with Tesseract
for_all_ncomponents(i, objects.nlabels())
{
@@ -147,6 +158,8 @@ namespace scribo
if (s != 0)
{
std::cout << s << std::endl;
+ if (output_file != 0)
+ file << s << std::endl;
mln::debug::put_word(txt, p, s);
}
@@ -154,6 +167,9 @@ namespace scribo
free(s);
}
+ if (output_file != 0)
+ file.close();
+
trace::exiting("scribo::text::recognition");
return txt;
}
--
1.5.6.5