* scribo/src/content_in_doc.cc,
* scribo/src/pbm_text_in_doc.cc: Here.
---
scribo/ChangeLog | 7 ++++++
scribo/src/content_in_doc.cc | 49 ++++++++++++++++++++++++++++++++++------
scribo/src/pbm_text_in_doc.cc | 24 ++++++++++++++++---
3 files changed, 68 insertions(+), 12 deletions(-)
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index 5042dc0..941fc35 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,5 +1,12 @@
2010-12-10 Guillaume Lazzara <z(a)lrde.epita.fr>
+ Add new options to command lines tools.
+
+ * scribo/src/content_in_doc.cc,
+ * scribo/src/pbm_text_in_doc.cc: Here.
+
+2010-12-10 Guillaume Lazzara <z(a)lrde.epita.fr>
+
Rewrite toolchains as functors.
* scribo/toolchain/internal/content_in_doc_functor.hh,
diff --git a/scribo/src/content_in_doc.cc b/scribo/src/content_in_doc.cc
index 6613e22..2c31d90 100644
--- a/scribo/src/content_in_doc.cc
+++ b/scribo/src/content_in_doc.cc
@@ -72,6 +72,9 @@ const char *args_desc[][2] =
{ "pmin_col", "Col index of the top left corner of the Region of
interest." },
{ "pmax_row", "Row index of the bottom right corner of the Region of
interest." },
{ "pmax_col", "Col index of the bottom right corner of the Region of
interest." },
+ { "find_lines", "Find vertical lines. (Default 1)" },
+ { "find_whitespaces", "Find whitespaces separators. (Default 1)"
},
+ { "K", "Sauvola's binarization threshold parameter. (Default:
0.34)" },
{ "debug_dir", "Output directory for debug image" },
{0, 0}
};
@@ -83,16 +86,16 @@ int main(int argc, char* argv[])
using namespace scribo;
using namespace mln;
- if (argc != 3 && argc != 4 && argc != 5 && argc != 8 &&
argc != 9)
+ if (argc < 3 || (argc > 8 && argc != 12))
return scribo::debug::usage(argv,
"Find text lines and elements in a document",
- "input.* out.xml <denoise_enabled> [<pmin_row> <pmin_col>
<pmax_row> <pmax_col>] <debug_dir>",
+ "input.* out.xml <denoise_enabled> [<pmin_row> <pmin_col>
<pmax_row> <pmax_col>] <find_lines> <find_whitespaces> <K>
<debug_dir>",
args_desc);
bool debug = false;
// Enable debug output.
- if (argc == 5 || argc == 9)
+ if (argc == 8 || argc == 12)
{
scribo::make::internal::debug_filename_prefix = argv[argc - 1];
debug = true;
@@ -104,15 +107,28 @@ int main(int argc, char* argv[])
typedef image2d<scribo::def::lbl_type> L;
scribo::document<L> doc(argv[1]);
+ doc.open();
// Preprocess document
- image2d<bool>
- input = toolchain::text_in_doc_preprocess(doc.image(), false);
-
+ image2d<bool> input;
+ {
+ double K = 0.34;
+ if (argc == 7 || argc == 8 || argc == 11)
+ {
+ if (argc == 7)
+ K = atof(argv[6]);
+ else
+ K = atof(argv[argc - 2]);
+ std::cout << "Using K = " << K << std::endl;
+ }
+
+ image2d<bool> tmp_fg;
+ input = toolchain::text_in_doc_preprocess(doc.image(), false, K);
+ }
// Optional Cropping
point2d crop_shift = literal::origin;
- if (argc >= 8)
+ if (argc >= 12)
{
mln::def::coord
minr = atoi(argv[4]),
@@ -120,6 +136,9 @@ int main(int argc, char* argv[])
maxr = atoi(argv[6]),
maxc = atoi(argv[7]);
+ std::cout << "> Image cropped from (" << minr <<
"," << minc << ")"
+ << " to (" << maxr << "," << maxc
<< ")" << std::endl;
+
box2d roi = mln::make::box2d(minr, minc, maxr, maxc);
input = preprocessing::crop_without_localization(input, roi);
crop_shift = point2d(minr, minc);
@@ -131,13 +150,27 @@ int main(int argc, char* argv[])
bool denoise = (argc > 3 && atoi(argv[3]) != 0);
+ bool find_line_seps = true;
+ if (argc >= 5 && argc < 12)
+ find_line_seps = (atoi(argv[4]) != 0);
+
+ bool find_whitespace_seps = true;
+ if (argc >= 6 && argc < 12)
+ find_line_seps = (atoi(argv[5]) != 0);
+
+ std::cout << "Running with the following options :"
+ << "find_lines_seps = " << find_line_seps
+ << " | find_whitespace_seps = " << find_whitespace_seps
+ << " | debug = " << debug
+ << std::endl;
// Run document toolchain.
// Text
std::cout << "Extracting text" << std::endl;
line_set<L>
- lines = scribo::toolchain::text_in_doc(input, denoise, debug);
+ lines = scribo::toolchain::text_in_doc(input, denoise, find_line_seps,
+ find_whitespace_seps, debug);
doc.set_text(lines);
// Elements
diff --git a/scribo/src/pbm_text_in_doc.cc b/scribo/src/pbm_text_in_doc.cc
index 96cb92b..2240f42 100644
--- a/scribo/src/pbm_text_in_doc.cc
+++ b/scribo/src/pbm_text_in_doc.cc
@@ -65,6 +65,8 @@ for the background." },
{ "pmin_col", "Col index of the top left corner of the Region of
interest." },
{ "pmax_row", "Row index of the bottom right corner of the Region of
interest." },
{ "pmax_col", "Col index of the bottom right corner of the Region of
interest." },
+ { "find_lines", "Find vertical lines. (Default 1)" },
+ { "find_whitespaces", "Find whitespaces separators. (Default 1)"
},
{ "debug_dir", "Output directory for debug image" },
{0, 0}
};
@@ -78,13 +80,13 @@ int main(int argc, char* argv[])
if (argc != 3 && argc != 4 && argc != 5 && argc != 8 &&
argc != 9)
return scribo::debug::usage(argv,
"Find text lines using left/right validation and display x-height in a binarized
article.",
- "input.pbm out.txt <denoise_enabled> [<pmin_row> <pmin_col>
<pmax_row> <pmax_col>] <debug_dir>",
+ "input.pbm out.txt <denoise_enabled> [<pmin_row> <pmin_col>
<pmax_row> <pmax_col>] <find_lines> <find_whitespaces>
<debug_dir>",
args_desc);
bool debug = false;
// Enable debug output.
- if (argc == 5 || argc == 9)
+ if (argc == 7 || argc == 11)
{
scribo::make::internal::debug_filename_prefix = argv[argc - 1];
debug = true;
@@ -99,7 +101,7 @@ int main(int argc, char* argv[])
// Optional Cropping
point2d crop_shift = literal::origin;
- if (argc >= 8)
+ if (argc >= 11)
{
mln::def::coord
minr = atoi(argv[4]),
@@ -118,10 +120,24 @@ int main(int argc, char* argv[])
bool denoise = (argc > 3 && atoi(argv[3]) != 0);
+ bool find_line_seps = true;
+ if (argc >= 4 && argc < 11)
+ find_line_seps = (atoi(argv[3]) != 0);
+
+ bool find_whitespace_seps = true;
+ if (argc >= 5 && argc < 11)
+ find_line_seps = (atoi(argv[4]) != 0);
+
+ std::cout << "Running with the following options :"
+ << "find_lines_seps = " << find_line_seps
+ << " | find_whitespace_seps = " << find_whitespace_seps
+ << " | debug = " << debug
+ << std::endl;
// Run document toolchain.
line_set<L>
- lines = scribo::toolchain::text_in_doc(input, denoise, debug);
+ lines = scribo::toolchain::text_in_doc(input, denoise, find_line_seps,
+ find_whitespace_seps, debug);
scribo::document<L> doc;
doc.set_filename(argv[1]);
--
1.5.6.5