* scribo/toolchain/internal/content_in_doc_functor.hh,
* scribo/toolchain/internal/text_in_doc_functor.hh,
* scribo/toolchain/internal/text_in_doc_preprocess_functor.hh,
* scribo/toolchain/internal/toolchain_functor.hh: New.
* scribo/toolchain/text_in_doc.hh,
* scribo/toolchain/text_in_doc_preprocess.hh: Make use of the
functors.
---
scribo/ChangeLog | 13 +
.../toolchain/internal/content_in_doc_functor.hh | 415 ++++++++++++++++++++
.../toolchain/internal/text_in_doc_functor.hh | 401 +++++++++++++++++++
.../internal/text_in_doc_preprocess_functor.hh | 304 ++++++++++++++
.../internal/toolchain_functor.hh} | 74 ++--
scribo/scribo/toolchain/text_in_doc.hh | 251 +-----------
scribo/scribo/toolchain/text_in_doc_preprocess.hh | 64 ++--
7 files changed, 1225 insertions(+), 297 deletions(-)
create mode 100644 scribo/scribo/toolchain/internal/content_in_doc_functor.hh
create mode 100644 scribo/scribo/toolchain/internal/text_in_doc_functor.hh
create mode 100644 scribo/scribo/toolchain/internal/text_in_doc_preprocess_functor.hh
copy scribo/scribo/{primitive/link/internal/dmax_default.hh =>
toolchain/internal/toolchain_functor.hh} (59%)
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index 8a7132a..5042dc0 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,5 +1,18 @@
2010-12-10 Guillaume Lazzara <z(a)lrde.epita.fr>
+ Rewrite toolchains as functors.
+
+ * scribo/toolchain/internal/content_in_doc_functor.hh,
+ * scribo/toolchain/internal/text_in_doc_functor.hh,
+ * scribo/toolchain/internal/text_in_doc_preprocess_functor.hh,
+ * scribo/toolchain/internal/toolchain_functor.hh: New.
+
+ * scribo/toolchain/text_in_doc.hh,
+ * scribo/toolchain/text_in_doc_preprocess.hh: Make use of the
+ functors.
+
+2010-12-10 Guillaume Lazzara <z(a)lrde.epita.fr>
+
* scribo/core/document.hh: Add new methods.
2010-12-10 Guillaume Lazzara <z(a)lrde.epita.fr>
diff --git a/scribo/scribo/toolchain/internal/content_in_doc_functor.hh
b/scribo/scribo/toolchain/internal/content_in_doc_functor.hh
new file mode 100644
index 0000000..5459487
--- /dev/null
+++ b/scribo/scribo/toolchain/internal/content_in_doc_functor.hh
@@ -0,0 +1,415 @@
+// Copyright (C) 2010 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef SCRIBO_TOOLCHAIN_INTERNAL_CONTENT_IN_DOC_PREPROCESS_FUNCTOR_HH
+# define SCRIBO_TOOLCHAIN_INTERNAL_CONTENT_IN_DOC_FUNCTOR_HH
+
+# include <mln/io/ppm/save.hh>
+
+# include <scribo/core/def/lbl_type.hh>
+# include <scribo/core/document.hh>
+
+# include <scribo/primitive/extract/elements.hh>
+# include <scribo/primitive/extract/components.hh>
+# include <scribo/primitive/extract/vertical_separators.hh>
+# include <scribo/primitive/extract/separators_nonvisible.hh>
+
+# include <scribo/primitive/remove/separators.hh>
+
+# include <scribo/filter/object_links_bbox_h_ratio.hh>
+# include <scribo/filter/objects_small.hh>
+
+# include <scribo/primitive/group/from_single_link.hh>
+
+# include <scribo/primitive/link/merge_double_link.hh>
+# include <scribo/primitive/link/internal/dmax_width_and_height.hh>
+# include <scribo/primitive/link/with_single_left_link_dmax_ratio.hh>
+# include <scribo/primitive/link/with_single_right_link_dmax_ratio.hh>
+
+# include <scribo/preprocessing/denoise_fg.hh>
+
+# include <scribo/text/recognition.hh>
+# include <scribo/text/merging.hh>
+
+# include <scribo/make/debug_filename.hh>
+
+# include <scribo/debug/save_bboxes_image.hh>
+# include <scribo/debug/save_linked_bboxes_image.hh>
+# include <scribo/debug/bboxes_enlarged_image.hh>
+# include <scribo/debug/mean_and_base_lines_image.hh>
+# include <scribo/debug/looks_like_a_text_line_image.hh>
+
+# include <scribo/toolchain/internal/toolchain_functor.hh>
+
+# include <scribo/io/xml/save.hh>
+
+
+namespace scribo
+{
+
+ namespace toolchain
+ {
+
+ namespace internal
+ {
+
+
+ template <typename I>
+ struct content_in_doc_functor
+ : public Toolchain_Functor
+ {
+ typedef value::label<30> V;
+ typedef mln_ch_value(I,V) L;
+
+ content_in_doc_functor(const char *doc_filename);
+
+ virtual int nsteps() const;
+
+ virtual void on_xml_saved();
+
+ //===============
+ // Core function
+ //===============
+
+ template <typename J>
+ scribo::document<L> operator()(const Image<J>& original_image,
+ const Image<I>& processed_image);
+
+
+ //=========
+ // Options
+ //=========
+ bool enable_denoising;
+ bool enable_line_seps;
+ bool enable_whitespace_seps;
+ bool enable_debug;
+ bool save_doc_as_xml;
+ bool allow_xml_extensions;
+
+ //============
+ // Parameters
+ //============
+
+ std::string ocr_language;
+ std::string output_file;
+
+ //=========
+ // Results
+ //=========
+ document<L> doc;
+ };
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+ template <typename I>
+ content_in_doc_functor<I>::content_in_doc_functor(const char *doc_filename)
+ : enable_denoising(true),
+ enable_line_seps(true),
+ enable_whitespace_seps(true),
+ enable_debug(false),
+ save_doc_as_xml(false),
+ allow_xml_extensions(true),
+ ocr_language("eng"),
+ output_file("/tmp/foo.xml"),
+ doc(doc_filename)
+ {
+ }
+
+
+ //===============
+ // Core function
+ //===============
+
+ template <typename I>
+ template <typename J>
+ scribo::document<typename content_in_doc_functor<I>::L>
+ content_in_doc_functor<I>::operator()(const Image<J>&
original_image,
+ const Image<I>& processed_image)
+ {
+ mln_precondition(exact(original_image).is_valid());
+ mln_precondition(exact(processed_image).is_valid());
+
+ doc.set_image(exact(original_image));
+
+ // Remove separators
+ mln_ch_value(I,bool) separators, input_cleaned;
+ if (enable_line_seps)
+ {
+ on_new_progress_label("Find vertical separators...");
+
+ // Vertical separators
+ separators = primitive::extract::vertical_separators(processed_image, 81);
+
+ on_progress();
+
+ on_new_progress_label("Remove separators...");
+
+ input_cleaned = primitive::remove::separators(processed_image, separators);
+
+ on_progress();
+ }
+
+ mln_ch_value(I,bool) whitespaces;
+ if (enable_whitespace_seps)
+ {
+ // Whitespace separators
+ on_new_progress_label("Find whitespace separators...");
+
+ whitespaces = primitive::extract::separators_nonvisible(processed_image);
+
+ on_progress();
+ }
+
+ if (enable_debug)
+ {
+ if (enable_whitespace_seps)
+ mln::io::pbm::save(whitespaces,
+ scribo::make::debug_filename("whitespaces.pbm"));
+
+ if (enable_line_seps)
+ {
+ mln::io::pbm::save(separators,
+ scribo::make::debug_filename("vseparators.pbm"));
+
+ mln::io::pbm::save(input_cleaned,
+ scribo::make::debug_filename("input_wo_vseparators.pbm"));
+ }
+ }
+
+ // Denoise
+ if (enable_denoising)
+ {
+ on_new_progress_label("Denoise...");
+
+ input_cleaned = preprocessing::denoise_fg(input_cleaned, c8(), 3);
+
+ if (enable_debug)
+ mln::io::pbm::save(input_cleaned,
+ scribo::make::debug_filename("denoised.pbm"));
+
+ on_progress();
+ }
+
+ /// Finding components.
+ on_new_progress_label("Finding components...");
+
+ V ncomponents;
+ component_set<L>
+ components = scribo::primitive::extract::components(input_cleaned, c8(),
+ ncomponents);
+
+ on_progress();
+
+ /// Set separator components.
+ if (enable_line_seps)
+ components.add_separators(separators);
+ if (enable_whitespace_seps)
+ components.add_separators(whitespaces);
+
+ if (enable_debug)
+ mln::io::pbm::save(components.separators(),
+ scribo::make::debug_filename("all_separators.pbm"));
+
+
+ on_new_progress_label("Filtering components");
+
+ components = scribo::filter::components_small(components, 3);
+
+ on_progress();
+
+
+ /// Linking potential objects
+ on_new_progress_label("Linking objects...");
+
+ object_links<L> left_link
+ = primitive::link::with_single_left_link_dmax_ratio(components,
+ primitive::link::internal::dmax_width_and_height(1),
+ anchor::MassCenter);
+ object_links<L> right_link
+ = primitive::link::with_single_right_link_dmax_ratio(components,
+ primitive::link::internal::dmax_width_and_height(1),
+ anchor::MassCenter);
+
+ if (enable_debug)
+ {
+ debug::save_linked_bboxes_image(processed_image, left_link, right_link,
+ literal::blue,
+ literal::cyan,
+ literal::yellow,
+ literal::green,
+ anchor::MassCenter,
+ scribo::make::debug_filename("object_links.ppm"));
+ }
+
+
+ // Validating left and right links.
+ object_links<L>
+ merged_links = primitive::link::merge_double_link(left_link,
+ right_link);
+
+ on_progress();
+
+
+ on_new_progress_label("Filtering objects");
+
+ // Remove links if bboxes have too different sizes.
+ object_links<L> hratio_filtered_links
+ = filter::object_links_bbox_h_ratio(merged_links, 2.5f);
+
+
+ if (enable_debug)
+ {
+ mln_ch_value(I,value::rgb8)
+ hratio_decision_image = scribo::debug::decision_image(processed_image,
+ merged_links,
+ hratio_filtered_links,
+ anchor::MassCenter);
+ mln::io::ppm::save(hratio_decision_image,
+ scribo::make::debug_filename("hratio_links_decision_image.ppm"));
+ }
+
+ on_progress();
+
+
+ on_new_progress_label("Rebuilding lines");
+
+ object_groups<L>
+ groups = primitive::group::from_single_link(hratio_filtered_links);
+
+
+
+ // Construct a line set.
+ line_set<L>
+ lines = scribo::make::line_set(groups);
+
+
+ //===== DEBUG =====
+
+ if (enable_debug)
+ {
+
+ // Bboxes image.
+ scribo::debug::save_bboxes_image(processed_image, lines,
+ scribo::make::debug_filename("step1_bboxes.ppm"));
+
+ // Bboxes enlarged
+ mln::io::ppm::save(scribo::debug::bboxes_enlarged_image(processed_image, lines),
+ scribo::make::debug_filename("step1_bboxes_enlarged.ppm"));
+
+ // Looks like a text line
+ mln::io::ppm::save(scribo::debug::looks_like_a_text_line_image(processed_image,
lines),
+ scribo::make::debug_filename("step1_looks_like_a_text_line.ppm"));
+
+
+ // mean and base lines.
+ mln::io::ppm::save(scribo::debug::mean_and_base_lines_image(processed_image, lines),
+ scribo::make::debug_filename("step1_x_height.ppm"));
+
+ }
+ //===== END OF DEBUG =====
+
+
+
+ lines = scribo::text::merging(lines);
+
+
+ if (enable_debug)
+ {
+
+ // mean and base lines.
+ mln::io::ppm::save(scribo::debug::mean_and_base_lines_image(processed_image, lines),
+ scribo::make::debug_filename("step2_x_height.ppm"));
+
+ // Looks like a text line
+ mln::io::ppm::save(scribo::debug::looks_like_a_text_line_image(processed_image,
lines),
+ scribo::make::debug_filename("step2_looks_like_a_text_line.ppm"));
+
+ // Bboxes image.
+ scribo::debug::save_bboxes_image(processed_image, lines,
+ scribo::make::debug_filename("step2_bboxes.ppm"));
+
+
+ }
+
+ on_progress();
+
+ on_new_progress_label("Recognizing text");
+
+ scribo::text::recognition(lines, ocr_language.c_str());
+ doc.set_text(lines);
+
+ on_progress();
+
+ // Extract other Elements
+ on_new_progress_label("Extracting Elements");
+ component_set<L>
+ elements = scribo::primitive::extract::elements(doc, original_image);
+ doc.set_elements(elements);
+
+ on_progress();
+
+ // Saving results
+ if (save_doc_as_xml)
+ {
+ on_new_progress_label("Saving results");
+
+ scribo::io::xml::save(doc, output_file, allow_xml_extensions);
+ on_xml_saved();
+
+ on_progress();
+ }
+
+ return doc;
+ }
+
+
+
+ template<typename I>
+ int
+ content_in_doc_functor<I>::nsteps() const
+ {
+ return 7 + enable_denoising + enable_line_seps
+ + enable_whitespace_seps + save_doc_as_xml;
+ }
+
+
+ template<typename I>
+ void
+ content_in_doc_functor<I>::on_xml_saved()
+ {
+ // Nothing
+ }
+
+# endif // ! MLN_INCLUDE_ONLY
+
+
+ } // end of namespace scribo::toolchain::internal
+
+ } // end of namespace scribo::toolchain
+
+} // end of namespace scribo
+
+#endif // ! SCRIBO_TOOLCHAIN_INTERNAL_CONTENT_IN_DOC_FUNCTOR_HH
diff --git a/scribo/scribo/toolchain/internal/text_in_doc_functor.hh
b/scribo/scribo/toolchain/internal/text_in_doc_functor.hh
new file mode 100644
index 0000000..701f1e6
--- /dev/null
+++ b/scribo/scribo/toolchain/internal/text_in_doc_functor.hh
@@ -0,0 +1,401 @@
+// Copyright (C) 2010 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef SCRIBO_TOOLCHAIN_INTERNAL_TEXT_IN_DOC_PREPROCESS_FUNCTOR_HH
+# define SCRIBO_TOOLCHAIN_INTERNAL_TEXT_IN_DOC_FUNCTOR_HH
+
+# include <mln/io/ppm/save.hh>
+
+# include <scribo/core/def/lbl_type.hh>
+
+# include <scribo/primitive/extract/components.hh>
+# include <scribo/primitive/extract/vertical_separators.hh>
+# include <scribo/primitive/extract/separators_nonvisible.hh>
+
+# include <scribo/primitive/remove/separators.hh>
+
+# include <scribo/filter/object_links_bbox_h_ratio.hh>
+# include <scribo/filter/objects_small.hh>
+
+# include <scribo/primitive/group/from_single_link.hh>
+
+# include <scribo/primitive/link/merge_double_link.hh>
+# include <scribo/primitive/link/internal/dmax_width_and_height.hh>
+# include <scribo/primitive/link/with_single_left_link_dmax_ratio.hh>
+# include <scribo/primitive/link/with_single_right_link_dmax_ratio.hh>
+
+# include <scribo/preprocessing/denoise_fg.hh>
+
+# include <scribo/text/recognition.hh>
+# include <scribo/text/merging.hh>
+
+# include <scribo/make/debug_filename.hh>
+
+# include <scribo/debug/save_bboxes_image.hh>
+# include <scribo/debug/save_linked_bboxes_image.hh>
+# include <scribo/debug/bboxes_enlarged_image.hh>
+# include <scribo/debug/mean_and_base_lines_image.hh>
+# include <scribo/debug/looks_like_a_text_line_image.hh>
+
+# include <scribo/toolchain/internal/toolchain_functor.hh>
+
+
+namespace scribo
+{
+
+ namespace toolchain
+ {
+
+ namespace internal
+ {
+
+
+ template <typename I>
+ struct text_in_doc_functor
+ : public Toolchain_Functor
+ {
+ typedef value::label<30> V;
+ typedef mln_ch_value(I,V) L;
+
+ text_in_doc_functor();
+
+ virtual int nsteps() const;
+
+ //===============
+ // Core function
+ //===============
+
+ line_set<L> operator()(const Image<I>& input_);
+
+
+ //=========
+ // Options
+ //=========
+ bool enable_denoising;
+ bool enable_line_seps;
+ bool enable_whitespace_seps;
+ bool enable_debug;
+
+ //============
+ // Parameters
+ //============
+
+ std::string ocr_language;
+
+
+ // Results
+ line_set<L> output;
+ };
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+ template <typename I>
+ text_in_doc_functor<I>::text_in_doc_functor()
+ : enable_denoising(true),
+ enable_line_seps(true),
+ enable_whitespace_seps(true),
+ enable_debug(false),
+ ocr_language("eng")
+ {
+ }
+
+
+ //===============
+ // Core function
+ //===============
+
+ template <typename I>
+ line_set<typename text_in_doc_functor<I>::L>
+ text_in_doc_functor<I>::operator()(const Image<I>& input)
+ {
+ // Remove separators
+ mln_ch_value(I,bool) separators, input_cleaned;
+ if (enable_line_seps)
+ {
+ on_new_progress_label("Find vertical separators...");
+
+ // Vertical separators
+ separators = primitive::extract::vertical_separators(input, 81);
+
+ on_progress();
+
+ on_new_progress_label("Remove separators...");
+
+ input_cleaned = primitive::remove::separators(input, separators);
+
+ on_progress();
+ }
+
+ mln_ch_value(I,bool) whitespaces;
+ if (enable_whitespace_seps)
+ {
+ // Whitespace separators
+ on_new_progress_label("Find whitespace separators...");
+
+ whitespaces = primitive::extract::separators_nonvisible(input);
+
+ on_progress();
+ }
+
+ if (enable_debug)
+ {
+ if (enable_whitespace_seps)
+ mln::io::pbm::save(whitespaces,
+ scribo::make::debug_filename("whitespaces.pbm"));
+
+ if (enable_line_seps)
+ {
+ mln::io::pbm::save(separators,
+ scribo::make::debug_filename("vseparators.pbm"));
+
+ mln::io::pbm::save(input_cleaned,
+ scribo::make::debug_filename("input_wo_vseparators.pbm"));
+ }
+ }
+
+ // Denoise
+ if (enable_denoising)
+ {
+ on_new_progress_label("Denoise...");
+
+ input_cleaned = preprocessing::denoise_fg(input_cleaned, c8(), 3);
+
+ if (enable_debug)
+ mln::io::pbm::save(input_cleaned,
+ scribo::make::debug_filename("denoised.pbm"));
+
+ on_progress();
+ }
+
+ /// Finding components.
+ on_new_progress_label("Finding components...");
+
+ V ncomponents;
+ component_set<L>
+ components = scribo::primitive::extract::components(input_cleaned, c8(),
+ ncomponents);
+
+ on_progress();
+
+ /// Set separator components.
+ if (enable_line_seps)
+ components.add_separators(separators);
+ if (enable_whitespace_seps)
+ components.add_separators(whitespaces);
+
+ if (enable_debug)
+ mln::io::pbm::save(components.separators(),
+ scribo::make::debug_filename("all_separators.pbm"));
+
+
+ on_new_progress_label("Filtering components");
+
+ components = scribo::filter::components_small(components, 3);
+
+ on_progress();
+
+
+ /// Linking potential objects
+ on_new_progress_label("Linking objects...");
+
+ object_links<L> left_link
+ = primitive::link::with_single_left_link_dmax_ratio(components,
+ primitive::link::internal::dmax_width_and_height(1),
+ anchor::MassCenter);
+ object_links<L> right_link
+ = primitive::link::with_single_right_link_dmax_ratio(components,
+ primitive::link::internal::dmax_width_and_height(1),
+ anchor::MassCenter);
+
+ if (enable_debug)
+ {
+ debug::save_linked_bboxes_image(input, left_link, right_link,
+ literal::blue,
+ literal::cyan,
+ literal::yellow,
+ literal::green,
+ anchor::MassCenter,
+ scribo::make::debug_filename("object_links.ppm"));
+ }
+
+
+ // Validating left and right links.
+ object_links<L>
+ merged_links = primitive::link::merge_double_link(left_link,
+ right_link);
+
+ on_progress();
+
+
+ on_new_progress_label("Filtering objects");
+
+ // Remove links if bboxes have too different sizes.
+ object_links<L> hratio_filtered_links
+ = filter::object_links_bbox_h_ratio(merged_links, 2.5f);
+
+
+ if (enable_debug)
+ {
+ mln_ch_value(I,value::rgb8)
+ hratio_decision_image = scribo::debug::decision_image(input,
+ merged_links,
+ hratio_filtered_links,
+ anchor::MassCenter);
+ io::ppm::save(hratio_decision_image,
+ scribo::make::debug_filename("hratio_links_decision_image.ppm"));
+ }
+
+ on_progress();
+
+
+ on_new_progress_label("Rebuilding lines");
+
+ object_groups<L>
+ groups = primitive::group::from_single_link(hratio_filtered_links);
+
+
+
+ // Construct a line set.
+ line_set<L>
+ lines = scribo::make::line_set(groups);
+
+
+ //===== DEBUG =====
+
+ if (enable_debug)
+ {
+
+ // Bboxes image.
+ scribo::debug::save_bboxes_image(input, lines,
+ scribo::make::debug_filename("step1_bboxes.ppm"));
+
+ // Bboxes enlarged
+ mln::io::ppm::save(scribo::debug::bboxes_enlarged_image(input, lines),
+ scribo::make::debug_filename("step1_bboxes_enlarged.ppm"));
+
+ // Looks like a text line
+ mln::io::ppm::save(scribo::debug::looks_like_a_text_line_image(input, lines),
+ scribo::make::debug_filename("step1_looks_like_a_text_line.ppm"));
+
+
+ // mean and base lines.
+ mln::io::ppm::save(scribo::debug::mean_and_base_lines_image(input, lines),
+ scribo::make::debug_filename("step1_x_height.ppm"));
+
+ }
+ //===== END OF DEBUG =====
+
+
+
+ lines = scribo::text::merging(lines);
+
+
+
+ //===== DEBUG =====
+
+ if (enable_debug)
+ {
+
+ // mean and base lines.
+ mln::io::ppm::save(scribo::debug::mean_and_base_lines_image(input, lines),
+ scribo::make::debug_filename("step2_x_height.ppm"));
+
+ // Looks like a text line
+ mln::io::ppm::save(scribo::debug::looks_like_a_text_line_image(input, lines),
+ scribo::make::debug_filename("step2_looks_like_a_text_line.ppm"));
+
+ // Bboxes image.
+ scribo::debug::save_bboxes_image(input, lines,
+ scribo::make::debug_filename("step2_bboxes.ppm"));
+
+
+ }
+
+
+
+ if (enable_debug)
+ {
+ std::ofstream
file(scribo::make::debug_filename("step2_bboxes_100p.txt").c_str());
+
+ for_all_lines(l, lines)
+ if (lines(l).tag() != line::Merged
+ && lines(l).tag() != line::Ignored
+ && lines(l).tag() != line::Pathological)
+ {
+ file << lines(l).bbox().pmin().row() << " "
+ << lines(l).bbox().pmin().col() << " "
+ << lines(l).bbox().pmax().row() << " "
+ << lines(l).bbox().pmax().col() << " "
+ << lines(l).card() << " "
+ << lines(l).baseline() << " "
+ << lines(l).x_height() << " "
+ << lines(l).meanline() << " "
+ << lines(l).d_height() << " "
+ << lines(l).a_height() << " "
+ << lines(l).char_space() << " "
+ << lines(l).char_width() << std::endl;
+
+ }
+
+ file.close();
+ }
+
+
+ //===== END OF DEBUG =====
+
+ on_progress();
+
+ on_new_progress_label("Recognizing text");
+
+ scribo::text::recognition(lines, ocr_language.c_str());
+
+ on_progress();
+
+ output = lines;
+ return output;
+ }
+
+
+
+ template<typename I>
+ int
+ text_in_doc_functor<I>::nsteps() const
+ {
+ return 6 + enable_denoising + enable_line_seps
+ + enable_whitespace_seps;
+ }
+
+
+# endif // ! MLN_INCLUDE_ONLY
+
+
+ } // end of namespace scribo::toolchain::internal
+
+ } // end of namespace scribo::toolchain
+
+} // end of namespace scribo
+
+#endif // ! SCRIBO_TOOLCHAIN_INTERNAL_TEXT_IN_DOC_FUNCTOR_HH
diff --git a/scribo/scribo/toolchain/internal/text_in_doc_preprocess_functor.hh
b/scribo/scribo/toolchain/internal/text_in_doc_preprocess_functor.hh
new file mode 100644
index 0000000..6a9506b
--- /dev/null
+++ b/scribo/scribo/toolchain/internal/text_in_doc_preprocess_functor.hh
@@ -0,0 +1,304 @@
+// Copyright (C) 2010 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef SCRIBO_TOOLCHAIN_INTERNAL_TEXT_IN_DOC_PREPROCESS_FUNCTOR_HH
+# define SCRIBO_TOOLCHAIN_INTERNAL_TEXT_IN_DOC_PREPROCESS_FUNCTOR_HH
+
+#include <mln/core/concept/image.hh>
+#include <mln/data/transform.hh>
+#include <mln/data/convert.hh>
+#include <mln/fun/v2v/rgb_to_int_u.hh>
+
+#include <mln/subsampling/antialiased.hh>
+#include <scribo/binarization/sauvola.hh>
+#include <scribo/binarization/sauvola_ms.hh>
+
+#include <scribo/preprocessing/split_bg_fg.hh>
+#include <scribo/preprocessing/deskew.hh>
+#include <scribo/preprocessing/denoise.hh>
+
+#include <scribo/toolchain/internal/toolchain_functor.hh>
+
+namespace scribo
+{
+
+ namespace toolchain
+ {
+
+ namespace internal
+ {
+
+ /*! \brief Enum of the binarization algorithms available.
+
+ Convert: Violently convert from color to binary.
+ Sauvola: Local threshold binarization algoritm.
+ SauvolaMs: Multi-scale local threshold binarization algoritm.
+ */
+ enum Binarization_Algo
+ {
+ Convert,
+ Sauvola,
+ SauvolaMs
+ };
+
+
+ /*! \brief Functor performing custom preprocessing algorithms on
+ * documents.
+
+ Whatever the options selected, this functor converts an input
+ image into a graylevel image and try to binarize the latter.
+
+ Optional algorithms can be performed too:
+ - Subsampling (enable_subsample), disabled by default.
+ - Extract Background/Foreground (enable_fg_extraction), split
+ background and foreground objects, disabled by default.
+ - Deskew (enable_deskew), disabled by default.
+ - Denoise (enable_denoise), enabled by default.
+
+ The binarization algorithm can be chosen through
+ binarization_algo option. It can be one of the algorithms
+ detailed in enum Binarization_Algo.
+
+ Few parameters can be set for algorithms:
+
+ - Sauvola:
+ * sauvola_win, the window size used in Sauvola based
+ algorithms (default 101).
+ * sauvola_K, a user parameter for Sauvola's threshold formula
+ (default 0.34).
+
+ - Extract background/foreground
+ * lambda, the maximum area of the possible foreground objects
+ (default 0, auto).
+
+ */
+ template <typename I>
+ struct text_in_doc_preprocess_functor
+ : public Toolchain_Functor
+ {
+
+ text_in_doc_preprocess_functor();
+
+ virtual int nsteps() const;
+
+ //===============
+ // Core function
+ //===============
+
+ mln_ch_value(I,bool) operator()(const Image<I>& input_);
+
+
+ //=========
+ // Options
+ //=========
+
+ // Settings
+ bool enable_subsample;
+ bool enable_fg_extraction;
+ bool enable_deskew;
+ bool enable_denoising;
+
+ Binarization_Algo binarization_algo;
+
+ unsigned sauvola_win;
+ double sauvola_K;
+ unsigned lambda;
+
+ // Results
+ mln_concrete(I) fg;
+ mln_concrete(I) bg;
+ image2d<bool> output;
+
+ private: // Methods
+ unsigned find_best_scale(const Image<I>& ima_);
+
+ };
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+ template <typename I>
+ text_in_doc_preprocess_functor<I>::text_in_doc_preprocess_functor()
+ : enable_subsample(false),
+ enable_fg_extraction(false),
+ enable_deskew(false),
+ enable_denoising(false),
+ binarization_algo(SauvolaMs),
+ sauvola_win(101),
+ sauvola_K(0.34),
+ lambda(0)
+ {
+ }
+
+
+ //===============
+ // Core function
+ //===============
+
+ template <typename I>
+ mln_ch_value(I,bool)
+ text_in_doc_preprocess_functor<I>::operator()(const Image<I>&
input_)
+ {
+ trace::entering("scribo::toolchain::text_in_doc_preprocess");
+
+ const I& input = exact(input_);
+ mln_precondition(input.is_valid());
+
+ mln_concrete(I) input_rgb = input;
+
+ // Subsample
+ //----------
+ if (enable_subsample)
+ {
+ on_new_progress_label("Subsample");
+
+ input_rgb = mln::subsampling::antialiased(input_rgb,
+ find_best_scale(input_rgb));
+
+ on_progress();
+ }
+
+
+ // Remove background
+ //------------------
+ if (enable_fg_extraction)
+ {
+ on_new_progress_label("Foreground Extraction");
+
+ typedef mln::util::couple<mln_concrete(I), mln_concrete(I)> res_t;
+
+ unsigned rlambda = lambda;
+ if (!lambda)
+ rlambda = 1.2 * (input.nrows() + input.ncols());
+
+ res_t res = scribo::preprocessing::split_bg_fg(input_rgb, rlambda, 32);
+
+ bg = res.first();
+ fg = res.second();
+ input_rgb = res.second();
+
+ on_progress();
+ }
+
+
+ // Convert to grayscale image (always happens).
+ //---------------------------------------------
+ on_new_progress_label("Convert to gray-scale image");
+ image2d<value::int_u8>
+ intensity_ima = mln::data::transform(input_rgb,
+ mln::fun::v2v::rgb_to_int_u<8>());
+ on_progress();
+
+
+ // Deskew
+ //-------
+ if (enable_deskew)
+ {
+ on_new_progress_label("Deskew");
+
+ intensity_ima = scribo::preprocessing::deskew(intensity_ima);
+
+ on_progress();
+ }
+
+
+ // Binarization (always happens)
+ //------------------------------
+ on_new_progress_label("Binarization");
+
+ if (binarization_algo == Sauvola)
+ {
+ on_new_progress_label("Binarization (Sauvola)");
+ output = scribo::binarization::sauvola(intensity_ima);
+ }
+ else if (binarization_algo == SauvolaMs)
+ {
+ on_new_progress_label("Binarization (Sauvola Multi-scale)");
+ output = scribo::binarization::sauvola_ms(intensity_ima,
+ sauvola_win, 3, sauvola_K);
+ }
+ else // binarization_algo == Convert
+ {
+ on_new_progress_label("Binary conversion");
+ output = mln::data::convert(bool(), intensity_ima);
+ }
+
+ on_progress();
+
+
+ // Denoise
+ //--------
+ if (enable_denoising)
+ {
+ on_new_progress_label("Remove noise");
+
+ output = scribo::preprocessing::denoise(output, c8(), 2, 2);
+
+ on_progress();
+ }
+
+ return output;
+ }
+
+
+
+ template<typename I>
+ int
+ text_in_doc_preprocess_functor<I>::nsteps() const
+ {
+ return 2 + enable_denoising + enable_deskew
+ + enable_fg_extraction + enable_subsample;
+ }
+
+
+ template <typename I>
+ unsigned
+ text_in_doc_preprocess_functor<I>::find_best_scale(const Image<I>&
ima_)
+ {
+ const I& ima = exact(ima_);
+ if (ima.nrows() > 2500
+ && ima.nrows() < 5000
+ && ima.ncols() > 2500
+ && ima.ncols() < 5000)
+ return 2;
+
+ if (ima.nrows() > 5000
+ && ima.ncols() > 5000)
+ return 3;
+
+ return 1;
+ }
+
+
+# endif // ! MLN_INCLUDE_ONLY
+
+
+ } // end of namespace scribo::toolchain::internal
+
+ } // end of namespace scribo::toolchain
+
+} // end of namespace scribo
+
+#endif // ! SCRIBO_TOOLCHAIN_INTERNAL_TEXT_IN_DOC_PREPROCESS_FUNCTOR_HH
diff --git a/scribo/scribo/primitive/link/internal/dmax_default.hh
b/scribo/scribo/toolchain/internal/toolchain_functor.hh
similarity index 59%
copy from scribo/scribo/primitive/link/internal/dmax_default.hh
copy to scribo/scribo/toolchain/internal/toolchain_functor.hh
index b4106a9..d10ea28 100644
--- a/scribo/scribo/primitive/link/internal/dmax_default.hh
+++ b/scribo/scribo/toolchain/internal/toolchain_functor.hh
@@ -23,62 +23,72 @@
// exception does not however invalidate any other reasons why the
// executable file might be covered by the GNU General Public License.
-#ifndef SCRIBO_PRIMITIVE_LINK_INTERNAL_DMAX_DEFAULT_HH
-# define SCRIBO_PRIMITIVE_LINK_INTERNAL_DMAX_DEFAULT_HH
-
-/// \file
-///
-/// Default class for dmax functors.
-
-# include <scribo/primitive/link/internal/dmax_functor_base.hh>
-
+#ifndef SCRIBO_TOOLCHAIN_INTERNAL_TOOLCHAIN_FUNCTOR_HH
+# define SCRIBO_TOOLCHAIN_INTERNAL_TOOLCHAIN_FUNCTOR_HH
namespace scribo
{
- namespace primitive
+ namespace toolchain
{
- namespace link
+ namespace internal
{
- namespace internal
+ class Toolchain_Functor
{
+ public:
+ Toolchain_Functor();
- /// \brief Base class for dmax functors.
- class dmax_default : public dmax_functor_base<dmax_default>
- {
- typedef dmax_functor_base<dmax_default> super_;
+ virtual int nsteps() const = 0;
- public:
- dmax_default(const float& dmax_factor);
- protected:
- using super_::dmax_factor_;
- };
+ //==========
+ // Triggers
+ //==========
+
+ virtual void on_progress();
+ virtual void on_new_progress_label(const char *label);
+
+ // Attributes
+ bool verbose;
+ };
# ifndef MLN_INCLUDE_ONLY
+ inline
+ Toolchain_Functor::Toolchain_Functor()
+ : verbose(true)
+ {
+ }
+
- inline
- dmax_default::dmax_default(const float& dmax_factor)
- : super_(dmax_factor)
- {
- }
+ //==========
+ // Triggers
+ //==========
+ inline
+ void Toolchain_Functor::on_progress()
+ {
+ // Nothing
+ }
-# endif // ! MLN_INCLUDE_ONLY
+ inline
+ void Toolchain_Functor::on_new_progress_label(const char *label)
+ {
+ if (verbose)
+ std::cout << label << std::endl;
+ }
+# endif // ! MLN_INCLUDE_ONLY
- } // end of namespace scribo::primitive::link::internal
- } // end of namespace scribo::primitive::link
+ } // end of namespace scribo::toolchain::internal
- } // end of namespace scribo::primitive
+ } // end of namespace scribo::toolchain
} // end of namespace scribo
-
-#endif // ! SCRIBO_PRIMITIVE_LINK_INTERNAL_DMAX_DEFAULT_HH
+#endif // ! SCRIBO_TOOLCHAIN_INTERNAL_TOOLCHAIN_FUNCTOR_HH
diff --git a/scribo/scribo/toolchain/text_in_doc.hh
b/scribo/scribo/toolchain/text_in_doc.hh
index 82094d4..0ad6cf3 100644
--- a/scribo/scribo/toolchain/text_in_doc.hh
+++ b/scribo/scribo/toolchain/text_in_doc.hh
@@ -31,38 +31,7 @@
///
/// Extract text from a document.
-# include <mln/io/ppm/save.hh>
-
-# include <scribo/core/def/lbl_type.hh>
-
-# include <scribo/primitive/extract/components.hh>
-# include <scribo/primitive/extract/vertical_separators.hh>
-# include <scribo/primitive/extract/separators_nonvisible.hh>
-
-# include <scribo/primitive/remove/separators.hh>
-
-# include <scribo/filter/object_links_bbox_h_ratio.hh>
-# include <scribo/filter/objects_small.hh>
-
-# include <scribo/primitive/group/from_single_link.hh>
-
-# include <scribo/primitive/link/merge_double_link.hh>
-# include <scribo/primitive/link/internal/dmax_width_and_height.hh>
-# include <scribo/primitive/link/with_single_left_link_dmax_ratio.hh>
-# include <scribo/primitive/link/with_single_right_link_dmax_ratio.hh>
-
-# include <scribo/preprocessing/denoise_fg.hh>
-
-# include <scribo/text/recognition.hh>
-# include <scribo/text/merging.hh>
-
-# include <scribo/make/debug_filename.hh>
-
-# include <scribo/debug/save_bboxes_image.hh>
-# include <scribo/debug/bboxes_enlarged_image.hh>
-# include <scribo/debug/mean_and_base_lines_image.hh>
-# include <scribo/debug/looks_like_a_text_line_image.hh>
-
+# include <scribo/toolchain/internal/text_in_doc_functor.hh>
namespace scribo
{
@@ -75,7 +44,10 @@ namespace scribo
template <typename I>
line_set<mln_ch_value(I, def::lbl_type)>
- text_in_doc(const Image<I>& input, bool denoise, bool debug = false);
+ text_in_doc(const Image<I>& input, bool denoise,
+ bool find_line_seps = true,
+ bool find_whitespace_seps = true,
+ bool debug = false);
# ifndef MLN_INCLUDE_ONLY
@@ -83,211 +55,18 @@ namespace scribo
template <typename I>
line_set<mln_ch_value(I, def::lbl_type)>
- text_in_doc(const Image<I>& input, bool denoise, bool debug = false)
+ text_in_doc(const Image<I>& input, bool denoise,
+ bool find_line_seps = true,
+ bool find_whitespace_seps = true,
+ bool debug = false)
{
- typedef value::label<30> V;
- typedef image2d<V> L;
-
- // Remove separators
- if (debug)
- std::cout << "Find vertical separators..." << std::endl;
-
- // Vertical separators
- image2d<bool>
- separators = primitive::extract::vertical_separators(input, 81);
-
- if (debug)
- std::cout << "Remove separators..." << std::endl;
-
- image2d<bool> input_cleaned = primitive::remove::separators(input,
- separators);
-
- // Whitespace separators
- if (debug)
- std::cout << "Find whitespace separators..." << std::endl;
-
- image2d<bool>
- whitespaces = primitive::extract::separators_nonvisible(input);
-
- if (debug)
- {
- mln::io::pbm::save(whitespaces,
- scribo::make::debug_filename("whitespaces.pbm"));
-
- mln::io::pbm::save(separators,
- scribo::make::debug_filename("vseparators.pbm"));
-
- mln::io::pbm::save(input_cleaned,
- scribo::make::debug_filename("input_wo_vseparators.pbm"));
- }
-
- // Denoise
- if (denoise)
- {
- if (debug)
- std::cout << "Denoise..." << std::endl;
-
- input_cleaned = preprocessing::denoise_fg(input_cleaned, c8(), 3);
-
- if (debug)
- mln::io::pbm::save(input_cleaned,
- scribo::make::debug_filename("denoised.pbm"));
- }
-
- /// Finding components.
- if (debug)
- std::cout << "Finding components..." << std::endl;
-
- V ncomponents;
- component_set<L>
- components = scribo::primitive::extract::components(input_cleaned, c8(),
- ncomponents);
-
- /// Set separator components.
- components.add_separators(separators);
- components.add_separators(whitespaces);
-
- components = scribo::filter::components_small(components, 3);
-
- if (debug)
- mln::io::pbm::save(components.separators(),
- scribo::make::debug_filename("all_separators.pbm"));
-
- /// Linking potential objects
- if (debug)
- std::cout << "Linking objects..." << std::endl;
-
- object_links<L> left_link
- = primitive::link::with_single_left_link_dmax_ratio(components,
- primitive::link::internal::dmax_width_and_height(1),
- anchor::MassCenter);
- object_links<L> right_link
- = primitive::link::with_single_right_link_dmax_ratio(components,
- primitive::link::internal::dmax_width_and_height(1),
- anchor::MassCenter);
-
- // Validating left and right links.
- object_links<L>
- merged_links = primitive::link::merge_double_link(left_link,
- right_link);
-
-
- // Remove links if bboxes have too different sizes.
- object_links<L> hratio_filtered_links
- = filter::object_links_bbox_h_ratio(merged_links, 2.5f);
-
-
-// #ifndef NOUT
-// if (argc == 4)
-// {
-// image2d<value::rgb8>
-// hratio_decision_image = scribo::debug::decision_image(input,
-// merged_links,
-// hratio_filtered_links);
-// io::ppm::save(hratio_decision_image,
-// scribo::make::debug_filename("hratio_links_decision_image.ppm"));
-// }
-// #endif
-
-
- object_groups<L>
- groups = primitive::group::from_single_link(hratio_filtered_links);
-
-
- // Construct a line set.
- line_set<L>
- lines = scribo::make::line_set(groups);
-
-
-
- //===== DEBUG =====
-
- if (debug)
- {
-
- // Bboxes image.
- scribo::debug::save_bboxes_image(input, lines,
- scribo::make::debug_filename("step1_bboxes.ppm"));
-
- // Bboxes enlarged
- mln::io::ppm::save(scribo::debug::bboxes_enlarged_image(input, lines),
- scribo::make::debug_filename("step1_bboxes_enlarged.ppm"));
-
- // Looks like a text line
- mln::io::ppm::save(scribo::debug::looks_like_a_text_line_image(input, lines),
- scribo::make::debug_filename("step1_looks_like_a_text_line.ppm"));
-
-
- // mean and base lines.
- mln::io::ppm::save(scribo::debug::mean_and_base_lines_image(input, lines),
- scribo::make::debug_filename("step1_x_height.ppm"));
-
- }
- //===== END OF DEBUG =====
-
-
-
- if (debug)
- std::cout << "Merging lines..." << std::endl;
- lines = scribo::text::merging(lines);
-
-
-
- //===== DEBUG =====
-
- if (debug)
- {
-
- // mean and base lines.
- mln::io::ppm::save(scribo::debug::mean_and_base_lines_image(input, lines),
- scribo::make::debug_filename("step2_x_height.ppm"));
-
- // Looks like a text line
- mln::io::ppm::save(scribo::debug::looks_like_a_text_line_image(input, lines),
- scribo::make::debug_filename("step2_looks_like_a_text_line.ppm"));
-
- // Bboxes image.
- scribo::debug::save_bboxes_image(input, lines,
- scribo::make::debug_filename("step2_bboxes.ppm"));
-
-
- }
-
-
-
- if (debug)
- {
- std::ofstream
file(scribo::make::debug_filename("step2_bboxes_100p.txt").c_str());
-
- for_all_lines(l, lines)
- if (lines(l).tag() != line::Merged
- && lines(l).tag() != line::Ignored
- && lines(l).tag() != line::Pathological)
- {
- file << lines(l).bbox().pmin().row() << " "
- << lines(l).bbox().pmin().col() << " "
- << lines(l).bbox().pmax().row() << " "
- << lines(l).bbox().pmax().col() << " "
- << lines(l).card() << " "
- << lines(l).baseline() << " "
- << lines(l).x_height() << " "
- << lines(l).meanline() << " "
- << lines(l).d_height() << " "
- << lines(l).a_height() << " "
- << lines(l).char_space() << " "
- << lines(l).char_width() << std::endl;
-
- }
-
- file.close();
- }
-
-
- //===== END OF DEBUG =====
-
-
+ internal::text_in_doc_functor<I> f;
+ f.enable_denoising = denoise;
+ f.enable_line_seps = find_line_seps;
+ f.enable_whitespace_seps = find_whitespace_seps;
+ f.enable_debug = debug;
- scribo::text::recognition(lines, "eng");
+ line_set<mln_ch_value(I, def::lbl_type)> lines = f(input);
return lines;
}
diff --git a/scribo/scribo/toolchain/text_in_doc_preprocess.hh
b/scribo/scribo/toolchain/text_in_doc_preprocess.hh
index 00a626e..20fea02 100644
--- a/scribo/scribo/toolchain/text_in_doc_preprocess.hh
+++ b/scribo/scribo/toolchain/text_in_doc_preprocess.hh
@@ -40,6 +40,7 @@
#include <scribo/preprocessing/split_bg_fg.hh>
#include <scribo/preprocessing/deskew.hh>
+#include <scribo/toolchain/internal/text_in_doc_preprocess_functor.hh>
namespace scribo
{
@@ -53,6 +54,7 @@ namespace scribo
\param[in] input An image.
\param[in] enable_fg_bg Enable/Disable background removal.
+ \param[in] K Binarization threshold parameter. (Default 0.34)
If \p enable_fg_bg is set to 'True' then a background removal is
performed. Its parameter lambda is automatically set according
@@ -61,8 +63,16 @@ namespace scribo
*/
template <typename I>
mln_ch_value(I,bool)
+ text_in_doc_preprocess(const Image<I>& input, bool enable_fg_bg, double
K);
+
+ /*! \overload
+ K is set to 0.34.
+ */
+ template <typename I>
+ mln_ch_value(I,bool)
text_in_doc_preprocess(const Image<I>& input, bool enable_fg_bg);
+
/*! \brief Preprocess a document before looking for its content.
\param[in] input An image.
@@ -81,6 +91,7 @@ namespace scribo
\param[in] input An image.
\param[in] lambda Parameter to the background removal.
+ \param[in] K Binarization threshold parameter. (Default 0.34)
\param[in,out] fg The foreground layer of \p input.
If lambda is set to '0' no background removal is
@@ -91,7 +102,7 @@ namespace scribo
template <typename I>
mln_ch_value(I,bool)
text_in_doc_preprocess(const Image<I>& input, unsigned lambda,
- Image<I>& fg);
+ double K, bool enable_fg_bg, Image<I>& fg);
# ifndef MLN_INCLUDE_ONLY
@@ -99,7 +110,15 @@ namespace scribo
template <typename I>
mln_ch_value(I,bool)
- text_in_doc_preprocess(const Image<I>& input_, bool enable_fg_bg)
+ text_in_doc_preprocess(const Image<I>& input, bool enable_fg_bg)
+ {
+ return text_in_doc_preprocess(input, enable_fg_bg, 0.34);
+ }
+
+
+ template <typename I>
+ mln_ch_value(I,bool)
+ text_in_doc_preprocess(const Image<I>& input_, bool enable_fg_bg, double
K)
{
const I& input = exact(input_);
mln_precondition(input.is_valid());
@@ -108,7 +127,9 @@ namespace scribo
if (enable_fg_bg)
lambda = 1.2 * (input.nrows() + input.ncols());
- mln_ch_value(I,bool) output = text_in_doc_preprocess(input, lambda);
+ mln_concrete(I) tmp_fg;
+ mln_ch_value(I,bool)
+ output = text_in_doc_preprocess(input, lambda, K, enable_fg_bg, tmp_fg);
return output;
}
@@ -118,48 +139,33 @@ namespace scribo
text_in_doc_preprocess(const Image<I>& input, unsigned lambda)
{
I tmp;
- return text_in_doc_preprocess(input, lambda, tmp);
+ return text_in_doc_preprocess(input, lambda, 0.34, true, tmp);
}
template <typename I>
mln_ch_value(I,bool)
text_in_doc_preprocess(const Image<I>& input_, unsigned lambda,
- Image<I>& fg_)
+ double K, bool enable_fg_bg, Image<I>& fg)
{
trace::entering("scribo::toolchain::text_in_doc_preprocess");
const I& input = exact(input_);
- I& fg = exact(fg_);
mln_precondition(input.is_valid());
- mln_concrete(I) input_rgb = input;
-
- // Extract foreground
- if (lambda != 0)
- {
- std::cout << "Extracting foreground..." << std::endl;
- input_rgb = preprocessing::split_bg_fg(input, lambda, 32).second();
- fg = input_rgb;
- }
+ internal::text_in_doc_preprocess_functor<I> f;
- // Convert to Gray level image.
- mln_ch_value(I,value::int_u8)
- input_gl = data::transform(input_rgb,
- mln::fun::v2v::rgb_to_int_u<8>());
+ // Setup functor.
+ f.sauvola_K = K;
+ f.enable_fg_extraction = enable_fg_bg;
+ f.lambda = lambda;
-
- // Deskewing
- std::cout << "Deskew if needed..." << std::endl;
- input_gl = preprocessing::deskew(input_gl);
-
- // Binarize foreground to use it in the processing chain.
- std::cout << "Binarizing foreground..." << std::endl;
- mln_ch_value(I,bool)
- input_bin = scribo::binarization::sauvola_ms(input_gl, 101, 3);
+ // Get results.
+ mln_ch_value(I,bool) output = f(input);
+ exact(fg) = f.fg;
trace::exiting("scribo::toolchain::text_in_doc_preprocess");
- return input_bin;
+ return output;
}
--
1.5.6.5