
* scribo/src/content_in_doc.cc, * scribo/src/pbm_text_in_doc.cc: Here. --- scribo/ChangeLog | 7 ++++++ scribo/src/content_in_doc.cc | 49 ++++++++++++++++++++++++++++++++++------ scribo/src/pbm_text_in_doc.cc | 24 ++++++++++++++++--- 3 files changed, 68 insertions(+), 12 deletions(-) diff --git a/scribo/ChangeLog b/scribo/ChangeLog index 5042dc0..941fc35 100644 --- a/scribo/ChangeLog +++ b/scribo/ChangeLog @@ -1,5 +1,12 @@ 2010-12-10 Guillaume Lazzara <z@lrde.epita.fr> + Add new options to command lines tools. + + * scribo/src/content_in_doc.cc, + * scribo/src/pbm_text_in_doc.cc: Here. + +2010-12-10 Guillaume Lazzara <z@lrde.epita.fr> + Rewrite toolchains as functors. * scribo/toolchain/internal/content_in_doc_functor.hh, diff --git a/scribo/src/content_in_doc.cc b/scribo/src/content_in_doc.cc index 6613e22..2c31d90 100644 --- a/scribo/src/content_in_doc.cc +++ b/scribo/src/content_in_doc.cc @@ -72,6 +72,9 @@ const char *args_desc[][2] = { "pmin_col", "Col index of the top left corner of the Region of interest." }, { "pmax_row", "Row index of the bottom right corner of the Region of interest." }, { "pmax_col", "Col index of the bottom right corner of the Region of interest." }, + { "find_lines", "Find vertical lines. (Default 1)" }, + { "find_whitespaces", "Find whitespaces separators. (Default 1)" }, + { "K", "Sauvola's binarization threshold parameter. (Default: 0.34)" }, { "debug_dir", "Output directory for debug image" }, {0, 0} }; @@ -83,16 +86,16 @@ int main(int argc, char* argv[]) using namespace scribo; using namespace mln; - if (argc != 3 && argc != 4 && argc != 5 && argc != 8 && argc != 9) + if (argc < 3 || (argc > 8 && argc != 12)) return scribo::debug::usage(argv, "Find text lines and elements in a document", - "input.* out.xml <denoise_enabled> [<pmin_row> <pmin_col> <pmax_row> <pmax_col>] <debug_dir>", + "input.* out.xml <denoise_enabled> [<pmin_row> <pmin_col> <pmax_row> <pmax_col>] <find_lines> <find_whitespaces> <K> <debug_dir>", args_desc); bool debug = false; // Enable debug output. - if (argc == 5 || argc == 9) + if (argc == 8 || argc == 12) { scribo::make::internal::debug_filename_prefix = argv[argc - 1]; debug = true; @@ -104,15 +107,28 @@ int main(int argc, char* argv[]) typedef image2d<scribo::def::lbl_type> L; scribo::document<L> doc(argv[1]); + doc.open(); // Preprocess document - image2d<bool> - input = toolchain::text_in_doc_preprocess(doc.image(), false); - + image2d<bool> input; + { + double K = 0.34; + if (argc == 7 || argc == 8 || argc == 11) + { + if (argc == 7) + K = atof(argv[6]); + else + K = atof(argv[argc - 2]); + std::cout << "Using K = " << K << std::endl; + } + + image2d<bool> tmp_fg; + input = toolchain::text_in_doc_preprocess(doc.image(), false, K); + } // Optional Cropping point2d crop_shift = literal::origin; - if (argc >= 8) + if (argc >= 12) { mln::def::coord minr = atoi(argv[4]), @@ -120,6 +136,9 @@ int main(int argc, char* argv[]) maxr = atoi(argv[6]), maxc = atoi(argv[7]); + std::cout << "> Image cropped from (" << minr << "," << minc << ")" + << " to (" << maxr << "," << maxc << ")" << std::endl; + box2d roi = mln::make::box2d(minr, minc, maxr, maxc); input = preprocessing::crop_without_localization(input, roi); crop_shift = point2d(minr, minc); @@ -131,13 +150,27 @@ int main(int argc, char* argv[]) bool denoise = (argc > 3 && atoi(argv[3]) != 0); + bool find_line_seps = true; + if (argc >= 5 && argc < 12) + find_line_seps = (atoi(argv[4]) != 0); + + bool find_whitespace_seps = true; + if (argc >= 6 && argc < 12) + find_line_seps = (atoi(argv[5]) != 0); + + std::cout << "Running with the following options :" + << "find_lines_seps = " << find_line_seps + << " | find_whitespace_seps = " << find_whitespace_seps + << " | debug = " << debug + << std::endl; // Run document toolchain. // Text std::cout << "Extracting text" << std::endl; line_set<L> - lines = scribo::toolchain::text_in_doc(input, denoise, debug); + lines = scribo::toolchain::text_in_doc(input, denoise, find_line_seps, + find_whitespace_seps, debug); doc.set_text(lines); // Elements diff --git a/scribo/src/pbm_text_in_doc.cc b/scribo/src/pbm_text_in_doc.cc index 96cb92b..2240f42 100644 --- a/scribo/src/pbm_text_in_doc.cc +++ b/scribo/src/pbm_text_in_doc.cc @@ -65,6 +65,8 @@ for the background." }, { "pmin_col", "Col index of the top left corner of the Region of interest." }, { "pmax_row", "Row index of the bottom right corner of the Region of interest." }, { "pmax_col", "Col index of the bottom right corner of the Region of interest." }, + { "find_lines", "Find vertical lines. (Default 1)" }, + { "find_whitespaces", "Find whitespaces separators. (Default 1)" }, { "debug_dir", "Output directory for debug image" }, {0, 0} }; @@ -78,13 +80,13 @@ int main(int argc, char* argv[]) if (argc != 3 && argc != 4 && argc != 5 && argc != 8 && argc != 9) return scribo::debug::usage(argv, "Find text lines using left/right validation and display x-height in a binarized article.", - "input.pbm out.txt <denoise_enabled> [<pmin_row> <pmin_col> <pmax_row> <pmax_col>] <debug_dir>", + "input.pbm out.txt <denoise_enabled> [<pmin_row> <pmin_col> <pmax_row> <pmax_col>] <find_lines> <find_whitespaces> <debug_dir>", args_desc); bool debug = false; // Enable debug output. - if (argc == 5 || argc == 9) + if (argc == 7 || argc == 11) { scribo::make::internal::debug_filename_prefix = argv[argc - 1]; debug = true; @@ -99,7 +101,7 @@ int main(int argc, char* argv[]) // Optional Cropping point2d crop_shift = literal::origin; - if (argc >= 8) + if (argc >= 11) { mln::def::coord minr = atoi(argv[4]), @@ -118,10 +120,24 @@ int main(int argc, char* argv[]) bool denoise = (argc > 3 && atoi(argv[3]) != 0); + bool find_line_seps = true; + if (argc >= 4 && argc < 11) + find_line_seps = (atoi(argv[3]) != 0); + + bool find_whitespace_seps = true; + if (argc >= 5 && argc < 11) + find_line_seps = (atoi(argv[4]) != 0); + + std::cout << "Running with the following options :" + << "find_lines_seps = " << find_line_seps + << " | find_whitespace_seps = " << find_whitespace_seps + << " | debug = " << debug + << std::endl; // Run document toolchain. line_set<L> - lines = scribo::toolchain::text_in_doc(input, denoise, debug); + lines = scribo::toolchain::text_in_doc(input, denoise, find_line_seps, + find_whitespace_seps, debug); scribo::document<L> doc; doc.set_filename(argv[1]); -- 1.5.6.5