---
configure.ac | 3 +
.../toolchain/internal/content_in_hdoc_functor.hh | 6 +-
scribo/scribo/toolchain/text_in_doc_preprocess.hh | 1 +
scribo/src/content_in_hdoc.cc | 20 ++--
.../estim => src/contest/DAE-2011}/Makefile.am | 22 +++--
.../DAE-2011/content_in_hdoc_dae.cc} | 111 ++++++++++++--------
scribo/src/contest/Makefile.am | 7 +-
.../estim => src/contest/hdlac-2011}/Makefile.am | 22 +++--
.../hdlac-2011/content_in_hdoc_hdlac.cc} | 91 +++++++++--------
9 files changed, 172 insertions(+), 111 deletions(-)
copy scribo/{tests/estim => src/contest/DAE-2011}/Makefile.am (64%)
copy scribo/src/{non_text_components.cc => contest/DAE-2011/content_in_hdoc_dae.cc}
(51%)
copy scribo/{tests/estim => src/contest/hdlac-2011}/Makefile.am (63%)
copy scribo/src/{non_text_components.cc =>
contest/hdlac-2011/content_in_hdoc_hdlac.cc} (56%)
diff --git a/configure.ac b/configure.ac
index fdbddd9..66643a4 100644
--- a/configure.ac
+++ b/configure.ac
@@ -323,6 +323,8 @@ AC_CONFIG_FILES([
scribo/src/binarization/Makefile
scribo/src/contest/Makefile
scribo/src/contest/hdibco-2010/Makefile
+ scribo/src/contest/hdlac-2011/Makefile
+ scribo/src/contest/DAE-2011/Makefile
scribo/src/debug/Makefile
scribo/src/filter/Makefile
scribo/src/misc/Makefile
@@ -337,6 +339,7 @@ AC_CONFIG_FILES([
scribo/src/text/Makefile
scribo/src/toolchain/Makefile
scribo/src/toolchain/nepomuk/Makefile
+ scribo/src/util/Makefile
])
AC_CONFIG_FILES([scribo/tests/data.hh])
diff --git a/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh
b/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh
index adfcdb3..5e98f3e 100644
--- a/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh
+++ b/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh
@@ -272,7 +272,8 @@ namespace scribo
{
on_new_progress_label("Denoise...");
- std::cout << ">> min_area = " << min_area <<
std::endl;
+ if (verbose)
+ std::cout << ">> min_area = " << min_area <<
std::endl;
input_cleaned = preprocessing::denoise_fg(input_cleaned, c8(), min_area);
@@ -547,7 +548,8 @@ namespace scribo
if (!(closing_size % 2))
closing_size += 1;
- std::cout << ">> CLosing size = " << closing_size <<
std::endl;
+ if (verbose)
+ std::cout << ">> CLosing size = " << closing_size <<
std::endl;
component_set<L>
elements = scribo::primitive::extract::non_text_hdoc(doc, closing_size);
diff --git a/scribo/scribo/toolchain/text_in_doc_preprocess.hh
b/scribo/scribo/toolchain/text_in_doc_preprocess.hh
index 20fea02..dba5601 100644
--- a/scribo/scribo/toolchain/text_in_doc_preprocess.hh
+++ b/scribo/scribo/toolchain/text_in_doc_preprocess.hh
@@ -159,6 +159,7 @@ namespace scribo
f.sauvola_K = K;
f.enable_fg_extraction = enable_fg_bg;
f.lambda = lambda;
+ f.verbose = false;
// Get results.
mln_ch_value(I,bool) output = f(input);
diff --git a/scribo/src/content_in_hdoc.cc b/scribo/src/content_in_hdoc.cc
index ed15693..737b1d3 100644
--- a/scribo/src/content_in_hdoc.cc
+++ b/scribo/src/content_in_hdoc.cc
@@ -31,7 +31,6 @@
#include <mln/core/image/image2d.hh>
-#include <mln/io/pbm/save.hh>
#include <mln/io/magick/load.hh>
#include <scribo/toolchain/content_in_hdoc.hh>
@@ -95,8 +94,6 @@ int main(int argc, char* argv[])
scribo::make::internal::debug_filename_prefix = argv[argc - 1];
}
- scribo::debug::logger().set_level(scribo::debug::None);
-
trace::entering("main");
Magick::InitializeMagick(*argv);
@@ -198,16 +195,23 @@ int main(int argc, char* argv[])
find_line_seps, find_whitespace_seps,
!language.empty(), language);
+ doc.set_filename(basename(argv[1]));
+
// Saving results
std::cout << "Saving results..." << std::endl;
-// scribo::io::xml::save(doc, "page.xml", scribo::io::xml::Page);
+ scribo::io::xml::save(doc, "page.xml", scribo::io::xml::Page);
std::cout << "End of process - " << t << std::endl;
- scribo::io::xml::save(doc, argv[2], scribo::io::xml::PageExtended);
- scribo::io::img::save(doc, "debug.png", scribo::io::img::DebugWoImage);
- scribo::io::img::save(doc, "full.png", scribo::io::img::Full);
- // scribo::io::xml::save(doc, "full.xml", scribo::io::xml::Full);
+// scribo::io::xml::save(doc, argv[2], scribo::io::xml::PageExtended);
+
+ // scribo::io::img::save(doc, "debug.png", scribo::io::img::DebugWoImage);
+ // std::cout << "Debug image saved " << t << std::endl;
+ // scribo::io::img::save(doc, "full.png", scribo::io::img::DebugWithImage);
+ // std::cout << "Full Debug image saved " << t <<
std::endl;
+ // scribo::io::img::save(doc, "full_hd.png", scribo::io::img::Full);
+
+// sleep(10);
trace::exiting("main");
}
diff --git a/scribo/tests/estim/Makefile.am b/scribo/src/contest/DAE-2011/Makefile.am
similarity index 64%
copy from scribo/tests/estim/Makefile.am
copy to scribo/src/contest/DAE-2011/Makefile.am
index 1c39ab6..09bccd7 100644
--- a/scribo/tests/estim/Makefile.am
+++ b/scribo/src/contest/DAE-2011/Makefile.am
@@ -17,13 +17,21 @@
## Process this file through Automake to create Makefile.in.
-include $(top_srcdir)/scribo/tests/tests.mk
+include $(top_srcdir)/scribo/scribo.mk
-check_PROGRAMS = \
- font_color \
- font_boldness
+noinst_PROGRAMS =
-font_color_SOURCES = font_color.cc
-font_boldness_SOURCES = font_boldness.cc
-TESTS = $(check_PROGRAMS)
+if HAVE_MAGICKXX
+
+noinst_PROGRAMS += content_in_hdoc_dae
+
+content_in_hdoc_dae_SOURCES = content_in_hdoc_dae.cc
+content_in_hdoc_dae_CPPFLAGS = $(AM_CPPFLAGS) \
+ $(TESSERACT_CPPFLAGS) \
+ -I$(top_builddir)
+content_in_hdoc_dae_LDFLAGS = $(AM_LDFLAGS) \
+ -lpthread \
+ $(MAGICKXX_LDFLAGS)
+
+endif HAVE_MAGICKXX
\ No newline at end of file
diff --git a/scribo/src/non_text_components.cc
b/scribo/src/contest/DAE-2011/content_in_hdoc_dae.cc
similarity index 51%
copy from scribo/src/non_text_components.cc
copy to scribo/src/contest/DAE-2011/content_in_hdoc_dae.cc
index 0f4cce4..5309d49 100644
--- a/scribo/src/non_text_components.cc
+++ b/scribo/src/contest/DAE-2011/content_in_hdoc_dae.cc
@@ -1,4 +1,4 @@
-// Copyright (C) 2011 EPITA Research and Development Laboratory
+// Copyright (C) 2010, 2011 EPITA Research and Development Laboratory
// (LRDE)
//
// This file is part of Olena.
@@ -24,22 +24,24 @@
// exception does not however invalidate any other reasons why the
// executable file might be covered by the GNU General Public License.
+# define SCRIBO_NOCR
#include <libgen.h>
#include <fstream>
#include <iostream>
+#include <sstream>
#include <mln/core/image/image2d.hh>
-#include <mln/io/pbm/save.hh>
#include <mln/io/magick/load.hh>
-#include <scribo/toolchain/content_in_doc.hh>
+#include <scribo/toolchain/content_in_hdoc.hh>
#include <scribo/toolchain/text_in_doc_preprocess.hh>
#include <scribo/core/document.hh>
#include <scribo/debug/usage.hh>
+#include <scribo/debug/logger.hh>
#include <scribo/preprocessing/crop_without_localization.hh>
#include <scribo/preprocessing/crop.hh>
@@ -48,12 +50,16 @@
#include <scribo/io/img/save.hh>
+#include <mln/core/alias/neighb2d.hh>
+#include <mln/labeling/compute.hh>
+#include <mln/labeling/foreground.hh>
+#include <mln/util/timer.hh>
+
+
+
const char *args_desc[][2] =
{
- { "input.*", "An image." },
- { "non_text_comps.pbm", "Non text components mask." },
- { "enable_debug", "Enable debug image output. Set to 1 or 0." },
- { "enable_tabstops", "Enable tabstops detection. Set to 1 or 0."
},
+ { "input.tif", "An image." },
{0, 0}
};
@@ -64,65 +70,84 @@ int main(int argc, char* argv[])
using namespace scribo;
using namespace mln;
- if (argc != 4 && argc != 3 && argc != 5)
+ if (argc != 2)
return scribo::debug::usage(argv,
- "Extract non text components mask/",
- "input.* non_text_comps.pbm [enable_debug] [enable_tabstops]",
+ "Document Image Analysis in Historical Documents",
+ "input.tif",
args_desc);
- std::string out_img = basename(argv[1]);
- out_img.erase(out_img.size() - 4);
-
- std::string filename_prefix = out_img + "_debug";
- scribo::debug::logger().set_filename_prefix(filename_prefix.c_str());
- if (argc > 3 && atoi(argv[3]))
- scribo::debug::logger().set_level(scribo::debug::Special);
- else
- scribo::debug::logger().set_level(scribo::debug::None);
-
trace::entering("main");
Magick::InitializeMagick(*argv);
+ mln::util::timer t;
+ t.start();
+
typedef image2d<scribo::def::lbl_type> L;
image2d<value::rgb8> input;
mln::io::magick::load(input, argv[1]);
- util::timer t;
- t.start();
-
// Preprocess document
- image2d<bool>
+ image2d<bool> input_preproc;
+ {
input_preproc = toolchain::text_in_doc_preprocess(input, false, 0.34);
-
- bool denoise = true;
+ // Cleanup components on borders
+ {
+ typedef scribo::def::lbl_type V;
+ V nlabels;
+ image2d<V> lbl = labeling::foreground(input_preproc, c8(), nlabels);
+ mln::util::array<box2d>
+ bbox = labeling::compute(accu::shape::bbox<point2d>(), lbl, nlabels);
+
+ const box2d& b = input.domain();
+ for_all_ncomponents(e, nlabels)
+ if (bbox(e).pmin().row() == b.pmin().row()
+ || bbox(e).pmax().row() == b.pmax().row()
+ || bbox(e).pmin().col() == b.pmin().col()
+ || bbox(e).pmax().col() == b.pmax().col())
+ data::fill(((input_preproc | bbox(e)).rw() | (pw::value(lbl) == pw::cst(e))).rw(),
false);
+ }
+ }
+
+ bool denoise = 1;
std::string language = "";
bool find_line_seps = true;
- bool find_whitespace_seps = (argc > 4 && atoi(argv[4]));
-
- std::cout << "Running with the following options :"
- << " ocr_language = " << language
- << " | find_lines_seps = " << find_line_seps
- << " | find_whitespace_seps = " << find_whitespace_seps
- << " | debug = " << scribo::debug::logger().is_enabled()
- << std::endl;
+ bool find_whitespace_seps = true;
// Run document toolchain.
// Text
std::cout << "Analysing document..." << std::endl;
document<L>
- doc = scribo::toolchain::content_in_doc(input, input_preproc, denoise,
- find_line_seps, find_whitespace_seps,
- !language.empty(), language);
- t.stop();
- std::cout << t << std::endl;
-
- mln::io::pbm::save(data::convert(bool(), doc.elements().labeled_image()), argv[2]);
+ doc = scribo::toolchain::content_in_hdoc(input, input_preproc, denoise,
+ find_line_seps, find_whitespace_seps,
+ !language.empty(), language);
+
+ doc.set_filename(basename(argv[1]));
+
+ const paragraph_set<L>& par_set = doc.paragraphs();
+ image2d<bool> output(input.domain());
+ for_all_paragraphs(p, par_set)
+ {
+ data::fill(output, true);
+ const paragraph_info<L>& current_par = par_set(p);
+ const mln::util::array<line_id_t>& line_ids = current_par.line_ids();
+ const unsigned nelements = line_ids.nelements();
+
+ for (unsigned i = 0; i < nelements; ++i)
+ {
+ const line_id_t& line_id = line_ids(i);
+ const line_info<L>& current_line = par_set.lines()(line_id);
+
+ scribo::draw::line_components(output, par_set.lines(), current_line, false);
+ }
+
+ std::stringstream ss;
+ ss << basename(argv[1]) << p << ".pbm";
+ mln::io::pbm::save(output, ss.str());
+ }
- scribo::io::img::save(doc, out_img + "_debug_wo_image.png",
scribo::io::img::DebugWoImage);
- scribo::io::img::save(doc, out_img + "_debug_with_image.png",
scribo::io::img::DebugWithImage);
trace::exiting("main");
}
diff --git a/scribo/src/contest/Makefile.am b/scribo/src/contest/Makefile.am
index b1d2dfb..ade408a 100644
--- a/scribo/src/contest/Makefile.am
+++ b/scribo/src/contest/Makefile.am
@@ -1,4 +1,5 @@
-# Copyright (C) 2010 EPITA Research and Development Laboratory (LRDE).
+# Copyright (C) 2010, 2011 EPITA Research and Development Laboratory
+# (LRDE).
#
# This file is part of Olena.
#
@@ -20,4 +21,6 @@
include $(top_srcdir)/scribo/scribo.mk
SUBDIRS = \
- hdibco-2010
\ No newline at end of file
+ hdibco-2010 \
+ hdlac-2011 \
+ DAE-2011
\ No newline at end of file
diff --git a/scribo/tests/estim/Makefile.am b/scribo/src/contest/hdlac-2011/Makefile.am
similarity index 63%
copy from scribo/tests/estim/Makefile.am
copy to scribo/src/contest/hdlac-2011/Makefile.am
index 1c39ab6..f5ead52 100644
--- a/scribo/tests/estim/Makefile.am
+++ b/scribo/src/contest/hdlac-2011/Makefile.am
@@ -17,13 +17,21 @@
## Process this file through Automake to create Makefile.in.
-include $(top_srcdir)/scribo/tests/tests.mk
+include $(top_srcdir)/scribo/scribo.mk
-check_PROGRAMS = \
- font_color \
- font_boldness
+noinst_PROGRAMS =
-font_color_SOURCES = font_color.cc
-font_boldness_SOURCES = font_boldness.cc
-TESTS = $(check_PROGRAMS)
+if HAVE_MAGICKXX
+
+noinst_PROGRAMS += content_in_hdoc_hdlac
+
+content_in_hdoc_hdlac_SOURCES = content_in_hdoc_hdlac.cc
+content_in_hdoc_hdlac_CPPFLAGS = $(AM_CPPFLAGS) \
+ $(TESSERACT_CPPFLAGS) \
+ -I$(top_builddir)
+content_in_hdoc_hdlac_LDFLAGS = $(AM_LDFLAGS) \
+ -lpthread \
+ $(MAGICKXX_LDFLAGS)
+
+endif HAVE_MAGICKXX
\ No newline at end of file
diff --git a/scribo/src/non_text_components.cc
b/scribo/src/contest/hdlac-2011/content_in_hdoc_hdlac.cc
similarity index 56%
copy from scribo/src/non_text_components.cc
copy to scribo/src/contest/hdlac-2011/content_in_hdoc_hdlac.cc
index 0f4cce4..045803f 100644
--- a/scribo/src/non_text_components.cc
+++ b/scribo/src/contest/hdlac-2011/content_in_hdoc_hdlac.cc
@@ -1,4 +1,4 @@
-// Copyright (C) 2011 EPITA Research and Development Laboratory
+// Copyright (C) 2010, 2011 EPITA Research and Development Laboratory
// (LRDE)
//
// This file is part of Olena.
@@ -24,6 +24,7 @@
// exception does not however invalidate any other reasons why the
// executable file might be covered by the GNU General Public License.
+# define SCRIBO_NOCR
#include <libgen.h>
#include <fstream>
@@ -31,15 +32,15 @@
#include <mln/core/image/image2d.hh>
-#include <mln/io/pbm/save.hh>
#include <mln/io/magick/load.hh>
-#include <scribo/toolchain/content_in_doc.hh>
+#include <scribo/toolchain/content_in_hdoc.hh>
#include <scribo/toolchain/text_in_doc_preprocess.hh>
#include <scribo/core/document.hh>
#include <scribo/debug/usage.hh>
+#include <scribo/debug/logger.hh>
#include <scribo/preprocessing/crop_without_localization.hh>
#include <scribo/preprocessing/crop.hh>
@@ -48,12 +49,17 @@
#include <scribo/io/img/save.hh>
+#include <mln/core/alias/neighb2d.hh>
+#include <mln/labeling/compute.hh>
+#include <mln/labeling/foreground.hh>
+#include <mln/util/timer.hh>
+
+
+
const char *args_desc[][2] =
{
- { "input.*", "An image." },
- { "non_text_comps.pbm", "Non text components mask." },
- { "enable_debug", "Enable debug image output. Set to 1 or 0." },
- { "enable_tabstops", "Enable tabstops detection. Set to 1 or 0."
},
+ { "input.tif", "An image." },
+ { "out.xml", "Result of the document analysis." },
{0, 0}
};
@@ -64,65 +70,66 @@ int main(int argc, char* argv[])
using namespace scribo;
using namespace mln;
- if (argc != 4 && argc != 3 && argc != 5)
+ if (argc != 3)
return scribo::debug::usage(argv,
- "Extract non text components mask/",
- "input.* non_text_comps.pbm [enable_debug] [enable_tabstops]",
+ "Document Image Analysis in Historical Documents",
+ "input.tif out.xml",
args_desc);
- std::string out_img = basename(argv[1]);
- out_img.erase(out_img.size() - 4);
-
- std::string filename_prefix = out_img + "_debug";
- scribo::debug::logger().set_filename_prefix(filename_prefix.c_str());
- if (argc > 3 && atoi(argv[3]))
- scribo::debug::logger().set_level(scribo::debug::Special);
- else
- scribo::debug::logger().set_level(scribo::debug::None);
-
trace::entering("main");
Magick::InitializeMagick(*argv);
+ mln::util::timer t;
+ t.start();
+
typedef image2d<scribo::def::lbl_type> L;
image2d<value::rgb8> input;
mln::io::magick::load(input, argv[1]);
- util::timer t;
- t.start();
-
// Preprocess document
- image2d<bool>
+ image2d<bool> input_preproc;
+ {
input_preproc = toolchain::text_in_doc_preprocess(input, false, 0.34);
-
- bool denoise = true;
+ // Cleanup components on borders
+ {
+ typedef scribo::def::lbl_type V;
+ V nlabels;
+ image2d<V> lbl = labeling::foreground(input_preproc, c8(), nlabels);
+ mln::util::array<box2d>
+ bbox = labeling::compute(accu::shape::bbox<point2d>(), lbl, nlabels);
+
+ const box2d& b = input.domain();
+ for_all_ncomponents(e, nlabels)
+ if (bbox(e).pmin().row() == b.pmin().row()
+ || bbox(e).pmax().row() == b.pmax().row()
+ || bbox(e).pmin().col() == b.pmin().col()
+ || bbox(e).pmax().col() == b.pmax().col())
+ data::fill(((input_preproc | bbox(e)).rw() | (pw::value(lbl) == pw::cst(e))).rw(),
false);
+ }
+ }
+
+ bool denoise = 1;
std::string language = "";
bool find_line_seps = true;
- bool find_whitespace_seps = (argc > 4 && atoi(argv[4]));
-
- std::cout << "Running with the following options :"
- << " ocr_language = " << language
- << " | find_lines_seps = " << find_line_seps
- << " | find_whitespace_seps = " << find_whitespace_seps
- << " | debug = " << scribo::debug::logger().is_enabled()
- << std::endl;
+ bool find_whitespace_seps = true;
// Run document toolchain.
// Text
std::cout << "Analysing document..." << std::endl;
document<L>
- doc = scribo::toolchain::content_in_doc(input, input_preproc, denoise,
- find_line_seps, find_whitespace_seps,
- !language.empty(), language);
- t.stop();
- std::cout << t << std::endl;
+ doc = scribo::toolchain::content_in_hdoc(input, input_preproc, denoise,
+ find_line_seps, find_whitespace_seps,
+ !language.empty(), language);
- mln::io::pbm::save(data::convert(bool(), doc.elements().labeled_image()), argv[2]);
+ doc.set_filename(basename(argv[1]));
- scribo::io::img::save(doc, out_img + "_debug_wo_image.png",
scribo::io::img::DebugWoImage);
- scribo::io::img::save(doc, out_img + "_debug_with_image.png",
scribo::io::img::DebugWithImage);
+ // Saving results
+ std::cout << "Saving results..." << std::endl;
+ scribo::io::xml::save(doc, argv[2], scribo::io::xml::Page);
+ std::cout << "End of process - " << t << std::endl;
trace::exiting("main");
}
--
1.5.6.5