last-svn-commit-923-g563c43f Add a new toolchain for ICDAR contests.

* src/contest/DAE-2011/Makefile.am: New target. * src/contest/DAE-2011/content_in_doc_dae.cc: New. --- scribo/ChangeLog | 8 +++ scribo/src/contest/DAE-2011/Makefile.am | 11 ++++- ...ontent_in_hdoc_dae.cc => content_in_doc_dae.cc} | 56 ++++--------------- 3 files changed, 30 insertions(+), 45 deletions(-) copy scribo/src/contest/DAE-2011/{content_in_hdoc_dae.cc => content_in_doc_dae.cc} (69%) diff --git a/scribo/ChangeLog b/scribo/ChangeLog index 9ba701f..394c247 100644 --- a/scribo/ChangeLog +++ b/scribo/ChangeLog @@ -1,5 +1,13 @@ 2011-06-07 Guillaume Lazzara <z@lrde.epita.fr> + Add a new toolchain for ICDAR contests. + + * src/contest/DAE-2011/Makefile.am: New target. + + * src/contest/DAE-2011/content_in_doc_dae.cc: New. + +2011-06-07 Guillaume Lazzara <z@lrde.epita.fr> + Disable OCR in ICDAR toolchains. * scribo/toolchain/internal/content_in_doc_functor.hh, diff --git a/scribo/src/contest/DAE-2011/Makefile.am b/scribo/src/contest/DAE-2011/Makefile.am index 09bccd7..d7d6b8d 100644 --- a/scribo/src/contest/DAE-2011/Makefile.am +++ b/scribo/src/contest/DAE-2011/Makefile.am @@ -24,7 +24,7 @@ noinst_PROGRAMS = if HAVE_MAGICKXX -noinst_PROGRAMS += content_in_hdoc_dae +noinst_PROGRAMS += content_in_hdoc_dae content_in_doc_dae content_in_hdoc_dae_SOURCES = content_in_hdoc_dae.cc content_in_hdoc_dae_CPPFLAGS = $(AM_CPPFLAGS) \ @@ -34,4 +34,13 @@ content_in_hdoc_dae_LDFLAGS = $(AM_LDFLAGS) \ -lpthread \ $(MAGICKXX_LDFLAGS) +content_in_doc_dae_SOURCES = content_in_doc_dae.cc +content_in_doc_dae_CPPFLAGS = $(AM_CPPFLAGS) \ + $(TESSERACT_CPPFLAGS) \ + -I$(top_builddir) +content_in_doc_dae_LDFLAGS = $(AM_LDFLAGS) \ + -lpthread \ + $(MAGICKXX_LDFLAGS) + + endif HAVE_MAGICKXX \ No newline at end of file diff --git a/scribo/src/contest/DAE-2011/content_in_hdoc_dae.cc b/scribo/src/contest/DAE-2011/content_in_doc_dae.cc similarity index 69% copy from scribo/src/contest/DAE-2011/content_in_hdoc_dae.cc copy to scribo/src/contest/DAE-2011/content_in_doc_dae.cc index 5986142..d194bd1 100644 --- a/scribo/src/contest/DAE-2011/content_in_hdoc_dae.cc +++ b/scribo/src/contest/DAE-2011/content_in_doc_dae.cc @@ -29,13 +29,13 @@ #include <libgen.h> #include <fstream> #include <iostream> -#include <sstream> #include <mln/core/image/image2d.hh> +#include <mln/io/pbm/save.hh> #include <mln/io/magick/load.hh> -#include <scribo/toolchain/content_in_hdoc.hh> +#include <scribo/toolchain/content_in_doc.hh> #include <scribo/toolchain/text_in_doc_preprocess.hh> #include <scribo/core/document.hh> @@ -47,20 +47,13 @@ #include <scribo/preprocessing/crop.hh> #include <scribo/io/xml/save.hh> -#include <scribo/io/img/save.hh> - - -#include <mln/core/alias/neighb2d.hh> -#include <mln/labeling/compute.hh> -#include <mln/labeling/foreground.hh> -#include <mln/util/timer.hh> - const char *args_desc[][2] = { - { "input.tif", "An image." }, - { "output_dir", "Output directory." }, + { "input.*", "An image." }, + { "output_dir", "Output directory" }, + {0, 0} }; @@ -73,45 +66,23 @@ int main(int argc, char* argv[]) if (argc != 3) return scribo::debug::usage(argv, - "Document Image Analysis in Historical Documents", - "input.tif output_dir", + "Find paragraph segmentation and produces images for each paragraph.", + "input.* output_dir", args_desc); trace::entering("main"); Magick::InitializeMagick(*argv); - mln::util::timer t; - t.start(); - typedef image2d<scribo::def::lbl_type> L; image2d<value::rgb8> input; mln::io::magick::load(input, argv[1]); // Preprocess document image2d<bool> input_preproc; - { - input_preproc = toolchain::text_in_doc_preprocess(input, false, 0.34); + input_preproc = toolchain::text_in_doc_preprocess(input, false, 0.34); - // Cleanup components on borders - { - typedef scribo::def::lbl_type V; - V nlabels; - image2d<V> lbl = labeling::foreground(input_preproc, c8(), nlabels); - mln::util::array<box2d> - bbox = labeling::compute(accu::shape::bbox<point2d>(), lbl, nlabels); - - const box2d& b = input.domain(); - for_all_ncomponents(e, nlabels) - if (bbox(e).pmin().row() == b.pmin().row() - || bbox(e).pmax().row() == b.pmax().row() - || bbox(e).pmin().col() == b.pmin().col() - || bbox(e).pmax().col() == b.pmax().col()) - data::fill(((input_preproc | bbox(e)).rw() | (pw::value(lbl) == pw::cst(e))).rw(), false); - } - } - - bool denoise = 1; + bool denoise = true; std::string language = ""; bool find_line_seps = true; bool find_whitespace_seps = true; @@ -121,11 +92,9 @@ int main(int argc, char* argv[]) // Text std::cout << "Analysing document..." << std::endl; document<L> - doc = scribo::toolchain::content_in_hdoc(input, input_preproc, denoise, - find_line_seps, find_whitespace_seps, - !language.empty(), language); - - doc.set_filename(basename(argv[1])); + doc = scribo::toolchain::content_in_doc(input, input_preproc, denoise, + find_line_seps, find_whitespace_seps, + !language.empty(), language); const paragraph_set<L>& par_set = doc.paragraphs(); image2d<bool> output(input.domain()); @@ -149,6 +118,5 @@ int main(int argc, char* argv[]) mln::io::pbm::save(output, ss.str()); } - trace::exiting("main"); } -- 1.5.6.5
participants (1)
-
Guillaume Lazzara