last-svn-commit-909-g1dec414 Add binaries for ICDAR contests.

--- configure.ac | 3 + .../toolchain/internal/content_in_hdoc_functor.hh | 6 +- scribo/scribo/toolchain/text_in_doc_preprocess.hh | 1 + scribo/src/content_in_hdoc.cc | 20 ++-- .../estim => src/contest/DAE-2011}/Makefile.am | 22 +++-- .../DAE-2011/content_in_hdoc_dae.cc} | 111 ++++++++++++-------- scribo/src/contest/Makefile.am | 7 +- .../estim => src/contest/hdlac-2011}/Makefile.am | 22 +++-- .../hdlac-2011/content_in_hdoc_hdlac.cc} | 91 +++++++++-------- 9 files changed, 172 insertions(+), 111 deletions(-) copy scribo/{tests/estim => src/contest/DAE-2011}/Makefile.am (64%) copy scribo/src/{non_text_components.cc => contest/DAE-2011/content_in_hdoc_dae.cc} (51%) copy scribo/{tests/estim => src/contest/hdlac-2011}/Makefile.am (63%) copy scribo/src/{non_text_components.cc => contest/hdlac-2011/content_in_hdoc_hdlac.cc} (56%) diff --git a/configure.ac b/configure.ac index fdbddd9..66643a4 100644 --- a/configure.ac +++ b/configure.ac @@ -323,6 +323,8 @@ AC_CONFIG_FILES([ scribo/src/binarization/Makefile scribo/src/contest/Makefile scribo/src/contest/hdibco-2010/Makefile + scribo/src/contest/hdlac-2011/Makefile + scribo/src/contest/DAE-2011/Makefile scribo/src/debug/Makefile scribo/src/filter/Makefile scribo/src/misc/Makefile @@ -337,6 +339,7 @@ AC_CONFIG_FILES([ scribo/src/text/Makefile scribo/src/toolchain/Makefile scribo/src/toolchain/nepomuk/Makefile + scribo/src/util/Makefile ]) AC_CONFIG_FILES([scribo/tests/data.hh]) diff --git a/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh b/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh index adfcdb3..5e98f3e 100644 --- a/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh +++ b/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh @@ -272,7 +272,8 @@ namespace scribo { on_new_progress_label("Denoise..."); - std::cout << ">> min_area = " << min_area << std::endl; + if (verbose) + std::cout << ">> min_area = " << min_area << std::endl; input_cleaned = preprocessing::denoise_fg(input_cleaned, c8(), min_area); @@ -547,7 +548,8 @@ namespace scribo if (!(closing_size % 2)) closing_size += 1; - std::cout << ">> CLosing size = " << closing_size << std::endl; + if (verbose) + std::cout << ">> CLosing size = " << closing_size << std::endl; component_set<L> elements = scribo::primitive::extract::non_text_hdoc(doc, closing_size); diff --git a/scribo/scribo/toolchain/text_in_doc_preprocess.hh b/scribo/scribo/toolchain/text_in_doc_preprocess.hh index 20fea02..dba5601 100644 --- a/scribo/scribo/toolchain/text_in_doc_preprocess.hh +++ b/scribo/scribo/toolchain/text_in_doc_preprocess.hh @@ -159,6 +159,7 @@ namespace scribo f.sauvola_K = K; f.enable_fg_extraction = enable_fg_bg; f.lambda = lambda; + f.verbose = false; // Get results. mln_ch_value(I,bool) output = f(input); diff --git a/scribo/src/content_in_hdoc.cc b/scribo/src/content_in_hdoc.cc index ed15693..737b1d3 100644 --- a/scribo/src/content_in_hdoc.cc +++ b/scribo/src/content_in_hdoc.cc @@ -31,7 +31,6 @@ #include <mln/core/image/image2d.hh> -#include <mln/io/pbm/save.hh> #include <mln/io/magick/load.hh> #include <scribo/toolchain/content_in_hdoc.hh> @@ -95,8 +94,6 @@ int main(int argc, char* argv[]) scribo::make::internal::debug_filename_prefix = argv[argc - 1]; } - scribo::debug::logger().set_level(scribo::debug::None); - trace::entering("main"); Magick::InitializeMagick(*argv); @@ -198,16 +195,23 @@ int main(int argc, char* argv[]) find_line_seps, find_whitespace_seps, !language.empty(), language); + doc.set_filename(basename(argv[1])); + // Saving results std::cout << "Saving results..." << std::endl; -// scribo::io::xml::save(doc, "page.xml", scribo::io::xml::Page); + scribo::io::xml::save(doc, "page.xml", scribo::io::xml::Page); std::cout << "End of process - " << t << std::endl; - scribo::io::xml::save(doc, argv[2], scribo::io::xml::PageExtended); - scribo::io::img::save(doc, "debug.png", scribo::io::img::DebugWoImage); - scribo::io::img::save(doc, "full.png", scribo::io::img::Full); - // scribo::io::xml::save(doc, "full.xml", scribo::io::xml::Full); +// scribo::io::xml::save(doc, argv[2], scribo::io::xml::PageExtended); + + // scribo::io::img::save(doc, "debug.png", scribo::io::img::DebugWoImage); + // std::cout << "Debug image saved " << t << std::endl; + // scribo::io::img::save(doc, "full.png", scribo::io::img::DebugWithImage); + // std::cout << "Full Debug image saved " << t << std::endl; + // scribo::io::img::save(doc, "full_hd.png", scribo::io::img::Full); + +// sleep(10); trace::exiting("main"); } diff --git a/scribo/tests/estim/Makefile.am b/scribo/src/contest/DAE-2011/Makefile.am similarity index 64% copy from scribo/tests/estim/Makefile.am copy to scribo/src/contest/DAE-2011/Makefile.am index 1c39ab6..09bccd7 100644 --- a/scribo/tests/estim/Makefile.am +++ b/scribo/src/contest/DAE-2011/Makefile.am @@ -17,13 +17,21 @@ ## Process this file through Automake to create Makefile.in. -include $(top_srcdir)/scribo/tests/tests.mk +include $(top_srcdir)/scribo/scribo.mk -check_PROGRAMS = \ - font_color \ - font_boldness +noinst_PROGRAMS = -font_color_SOURCES = font_color.cc -font_boldness_SOURCES = font_boldness.cc -TESTS = $(check_PROGRAMS) +if HAVE_MAGICKXX + +noinst_PROGRAMS += content_in_hdoc_dae + +content_in_hdoc_dae_SOURCES = content_in_hdoc_dae.cc +content_in_hdoc_dae_CPPFLAGS = $(AM_CPPFLAGS) \ + $(TESSERACT_CPPFLAGS) \ + -I$(top_builddir) +content_in_hdoc_dae_LDFLAGS = $(AM_LDFLAGS) \ + -lpthread \ + $(MAGICKXX_LDFLAGS) + +endif HAVE_MAGICKXX \ No newline at end of file diff --git a/scribo/src/non_text_components.cc b/scribo/src/contest/DAE-2011/content_in_hdoc_dae.cc similarity index 51% copy from scribo/src/non_text_components.cc copy to scribo/src/contest/DAE-2011/content_in_hdoc_dae.cc index 0f4cce4..5309d49 100644 --- a/scribo/src/non_text_components.cc +++ b/scribo/src/contest/DAE-2011/content_in_hdoc_dae.cc @@ -1,4 +1,4 @@ -// Copyright (C) 2011 EPITA Research and Development Laboratory +// Copyright (C) 2010, 2011 EPITA Research and Development Laboratory // (LRDE) // // This file is part of Olena. @@ -24,22 +24,24 @@ // exception does not however invalidate any other reasons why the // executable file might be covered by the GNU General Public License. +# define SCRIBO_NOCR #include <libgen.h> #include <fstream> #include <iostream> +#include <sstream> #include <mln/core/image/image2d.hh> -#include <mln/io/pbm/save.hh> #include <mln/io/magick/load.hh> -#include <scribo/toolchain/content_in_doc.hh> +#include <scribo/toolchain/content_in_hdoc.hh> #include <scribo/toolchain/text_in_doc_preprocess.hh> #include <scribo/core/document.hh> #include <scribo/debug/usage.hh> +#include <scribo/debug/logger.hh> #include <scribo/preprocessing/crop_without_localization.hh> #include <scribo/preprocessing/crop.hh> @@ -48,12 +50,16 @@ #include <scribo/io/img/save.hh> +#include <mln/core/alias/neighb2d.hh> +#include <mln/labeling/compute.hh> +#include <mln/labeling/foreground.hh> +#include <mln/util/timer.hh> + + + const char *args_desc[][2] = { - { "input.*", "An image." }, - { "non_text_comps.pbm", "Non text components mask." }, - { "enable_debug", "Enable debug image output. Set to 1 or 0." }, - { "enable_tabstops", "Enable tabstops detection. Set to 1 or 0." }, + { "input.tif", "An image." }, {0, 0} }; @@ -64,65 +70,84 @@ int main(int argc, char* argv[]) using namespace scribo; using namespace mln; - if (argc != 4 && argc != 3 && argc != 5) + if (argc != 2) return scribo::debug::usage(argv, - "Extract non text components mask/", - "input.* non_text_comps.pbm [enable_debug] [enable_tabstops]", + "Document Image Analysis in Historical Documents", + "input.tif", args_desc); - std::string out_img = basename(argv[1]); - out_img.erase(out_img.size() - 4); - - std::string filename_prefix = out_img + "_debug"; - scribo::debug::logger().set_filename_prefix(filename_prefix.c_str()); - if (argc > 3 && atoi(argv[3])) - scribo::debug::logger().set_level(scribo::debug::Special); - else - scribo::debug::logger().set_level(scribo::debug::None); - trace::entering("main"); Magick::InitializeMagick(*argv); + mln::util::timer t; + t.start(); + typedef image2d<scribo::def::lbl_type> L; image2d<value::rgb8> input; mln::io::magick::load(input, argv[1]); - util::timer t; - t.start(); - // Preprocess document - image2d<bool> + image2d<bool> input_preproc; + { input_preproc = toolchain::text_in_doc_preprocess(input, false, 0.34); - - bool denoise = true; + // Cleanup components on borders + { + typedef scribo::def::lbl_type V; + V nlabels; + image2d<V> lbl = labeling::foreground(input_preproc, c8(), nlabels); + mln::util::array<box2d> + bbox = labeling::compute(accu::shape::bbox<point2d>(), lbl, nlabels); + + const box2d& b = input.domain(); + for_all_ncomponents(e, nlabels) + if (bbox(e).pmin().row() == b.pmin().row() + || bbox(e).pmax().row() == b.pmax().row() + || bbox(e).pmin().col() == b.pmin().col() + || bbox(e).pmax().col() == b.pmax().col()) + data::fill(((input_preproc | bbox(e)).rw() | (pw::value(lbl) == pw::cst(e))).rw(), false); + } + } + + bool denoise = 1; std::string language = ""; bool find_line_seps = true; - bool find_whitespace_seps = (argc > 4 && atoi(argv[4])); - - std::cout << "Running with the following options :" - << " ocr_language = " << language - << " | find_lines_seps = " << find_line_seps - << " | find_whitespace_seps = " << find_whitespace_seps - << " | debug = " << scribo::debug::logger().is_enabled() - << std::endl; + bool find_whitespace_seps = true; // Run document toolchain. // Text std::cout << "Analysing document..." << std::endl; document<L> - doc = scribo::toolchain::content_in_doc(input, input_preproc, denoise, - find_line_seps, find_whitespace_seps, - !language.empty(), language); - t.stop(); - std::cout << t << std::endl; - - mln::io::pbm::save(data::convert(bool(), doc.elements().labeled_image()), argv[2]); + doc = scribo::toolchain::content_in_hdoc(input, input_preproc, denoise, + find_line_seps, find_whitespace_seps, + !language.empty(), language); + + doc.set_filename(basename(argv[1])); + + const paragraph_set<L>& par_set = doc.paragraphs(); + image2d<bool> output(input.domain()); + for_all_paragraphs(p, par_set) + { + data::fill(output, true); + const paragraph_info<L>& current_par = par_set(p); + const mln::util::array<line_id_t>& line_ids = current_par.line_ids(); + const unsigned nelements = line_ids.nelements(); + + for (unsigned i = 0; i < nelements; ++i) + { + const line_id_t& line_id = line_ids(i); + const line_info<L>& current_line = par_set.lines()(line_id); + + scribo::draw::line_components(output, par_set.lines(), current_line, false); + } + + std::stringstream ss; + ss << basename(argv[1]) << p << ".pbm"; + mln::io::pbm::save(output, ss.str()); + } - scribo::io::img::save(doc, out_img + "_debug_wo_image.png", scribo::io::img::DebugWoImage); - scribo::io::img::save(doc, out_img + "_debug_with_image.png", scribo::io::img::DebugWithImage); trace::exiting("main"); } diff --git a/scribo/src/contest/Makefile.am b/scribo/src/contest/Makefile.am index b1d2dfb..ade408a 100644 --- a/scribo/src/contest/Makefile.am +++ b/scribo/src/contest/Makefile.am @@ -1,4 +1,5 @@ -# Copyright (C) 2010 EPITA Research and Development Laboratory (LRDE). +# Copyright (C) 2010, 2011 EPITA Research and Development Laboratory +# (LRDE). # # This file is part of Olena. # @@ -20,4 +21,6 @@ include $(top_srcdir)/scribo/scribo.mk SUBDIRS = \ - hdibco-2010 \ No newline at end of file + hdibco-2010 \ + hdlac-2011 \ + DAE-2011 \ No newline at end of file diff --git a/scribo/tests/estim/Makefile.am b/scribo/src/contest/hdlac-2011/Makefile.am similarity index 63% copy from scribo/tests/estim/Makefile.am copy to scribo/src/contest/hdlac-2011/Makefile.am index 1c39ab6..f5ead52 100644 --- a/scribo/tests/estim/Makefile.am +++ b/scribo/src/contest/hdlac-2011/Makefile.am @@ -17,13 +17,21 @@ ## Process this file through Automake to create Makefile.in. -include $(top_srcdir)/scribo/tests/tests.mk +include $(top_srcdir)/scribo/scribo.mk -check_PROGRAMS = \ - font_color \ - font_boldness +noinst_PROGRAMS = -font_color_SOURCES = font_color.cc -font_boldness_SOURCES = font_boldness.cc -TESTS = $(check_PROGRAMS) +if HAVE_MAGICKXX + +noinst_PROGRAMS += content_in_hdoc_hdlac + +content_in_hdoc_hdlac_SOURCES = content_in_hdoc_hdlac.cc +content_in_hdoc_hdlac_CPPFLAGS = $(AM_CPPFLAGS) \ + $(TESSERACT_CPPFLAGS) \ + -I$(top_builddir) +content_in_hdoc_hdlac_LDFLAGS = $(AM_LDFLAGS) \ + -lpthread \ + $(MAGICKXX_LDFLAGS) + +endif HAVE_MAGICKXX \ No newline at end of file diff --git a/scribo/src/non_text_components.cc b/scribo/src/contest/hdlac-2011/content_in_hdoc_hdlac.cc similarity index 56% copy from scribo/src/non_text_components.cc copy to scribo/src/contest/hdlac-2011/content_in_hdoc_hdlac.cc index 0f4cce4..045803f 100644 --- a/scribo/src/non_text_components.cc +++ b/scribo/src/contest/hdlac-2011/content_in_hdoc_hdlac.cc @@ -1,4 +1,4 @@ -// Copyright (C) 2011 EPITA Research and Development Laboratory +// Copyright (C) 2010, 2011 EPITA Research and Development Laboratory // (LRDE) // // This file is part of Olena. @@ -24,6 +24,7 @@ // exception does not however invalidate any other reasons why the // executable file might be covered by the GNU General Public License. +# define SCRIBO_NOCR #include <libgen.h> #include <fstream> @@ -31,15 +32,15 @@ #include <mln/core/image/image2d.hh> -#include <mln/io/pbm/save.hh> #include <mln/io/magick/load.hh> -#include <scribo/toolchain/content_in_doc.hh> +#include <scribo/toolchain/content_in_hdoc.hh> #include <scribo/toolchain/text_in_doc_preprocess.hh> #include <scribo/core/document.hh> #include <scribo/debug/usage.hh> +#include <scribo/debug/logger.hh> #include <scribo/preprocessing/crop_without_localization.hh> #include <scribo/preprocessing/crop.hh> @@ -48,12 +49,17 @@ #include <scribo/io/img/save.hh> +#include <mln/core/alias/neighb2d.hh> +#include <mln/labeling/compute.hh> +#include <mln/labeling/foreground.hh> +#include <mln/util/timer.hh> + + + const char *args_desc[][2] = { - { "input.*", "An image." }, - { "non_text_comps.pbm", "Non text components mask." }, - { "enable_debug", "Enable debug image output. Set to 1 or 0." }, - { "enable_tabstops", "Enable tabstops detection. Set to 1 or 0." }, + { "input.tif", "An image." }, + { "out.xml", "Result of the document analysis." }, {0, 0} }; @@ -64,65 +70,66 @@ int main(int argc, char* argv[]) using namespace scribo; using namespace mln; - if (argc != 4 && argc != 3 && argc != 5) + if (argc != 3) return scribo::debug::usage(argv, - "Extract non text components mask/", - "input.* non_text_comps.pbm [enable_debug] [enable_tabstops]", + "Document Image Analysis in Historical Documents", + "input.tif out.xml", args_desc); - std::string out_img = basename(argv[1]); - out_img.erase(out_img.size() - 4); - - std::string filename_prefix = out_img + "_debug"; - scribo::debug::logger().set_filename_prefix(filename_prefix.c_str()); - if (argc > 3 && atoi(argv[3])) - scribo::debug::logger().set_level(scribo::debug::Special); - else - scribo::debug::logger().set_level(scribo::debug::None); - trace::entering("main"); Magick::InitializeMagick(*argv); + mln::util::timer t; + t.start(); + typedef image2d<scribo::def::lbl_type> L; image2d<value::rgb8> input; mln::io::magick::load(input, argv[1]); - util::timer t; - t.start(); - // Preprocess document - image2d<bool> + image2d<bool> input_preproc; + { input_preproc = toolchain::text_in_doc_preprocess(input, false, 0.34); - - bool denoise = true; + // Cleanup components on borders + { + typedef scribo::def::lbl_type V; + V nlabels; + image2d<V> lbl = labeling::foreground(input_preproc, c8(), nlabels); + mln::util::array<box2d> + bbox = labeling::compute(accu::shape::bbox<point2d>(), lbl, nlabels); + + const box2d& b = input.domain(); + for_all_ncomponents(e, nlabels) + if (bbox(e).pmin().row() == b.pmin().row() + || bbox(e).pmax().row() == b.pmax().row() + || bbox(e).pmin().col() == b.pmin().col() + || bbox(e).pmax().col() == b.pmax().col()) + data::fill(((input_preproc | bbox(e)).rw() | (pw::value(lbl) == pw::cst(e))).rw(), false); + } + } + + bool denoise = 1; std::string language = ""; bool find_line_seps = true; - bool find_whitespace_seps = (argc > 4 && atoi(argv[4])); - - std::cout << "Running with the following options :" - << " ocr_language = " << language - << " | find_lines_seps = " << find_line_seps - << " | find_whitespace_seps = " << find_whitespace_seps - << " | debug = " << scribo::debug::logger().is_enabled() - << std::endl; + bool find_whitespace_seps = true; // Run document toolchain. // Text std::cout << "Analysing document..." << std::endl; document<L> - doc = scribo::toolchain::content_in_doc(input, input_preproc, denoise, - find_line_seps, find_whitespace_seps, - !language.empty(), language); - t.stop(); - std::cout << t << std::endl; + doc = scribo::toolchain::content_in_hdoc(input, input_preproc, denoise, + find_line_seps, find_whitespace_seps, + !language.empty(), language); - mln::io::pbm::save(data::convert(bool(), doc.elements().labeled_image()), argv[2]); + doc.set_filename(basename(argv[1])); - scribo::io::img::save(doc, out_img + "_debug_wo_image.png", scribo::io::img::DebugWoImage); - scribo::io::img::save(doc, out_img + "_debug_with_image.png", scribo::io::img::DebugWithImage); + // Saving results + std::cout << "Saving results..." << std::endl; + scribo::io::xml::save(doc, argv[2], scribo::io::xml::Page); + std::cout << "End of process - " << t << std::endl; trace::exiting("main"); } -- 1.5.6.5
participants (1)
-
Guillaume Lazzara