last-svn-commit-876-g7f93eff scribo/primitive/extract/non_text_hdoc.hh: New routine for historical documents.

--- scribo/ChangeLog | 5 + scribo/scribo/primitive/extract/non_text_hdoc.hh | 135 ++++++++++++++++++++++ 2 files changed, 140 insertions(+), 0 deletions(-) create mode 100644 scribo/scribo/primitive/extract/non_text_hdoc.hh diff --git a/scribo/ChangeLog b/scribo/ChangeLog index f53ea42..83797f6 100644 --- a/scribo/ChangeLog +++ b/scribo/ChangeLog @@ -1,5 +1,10 @@ 2011-05-16 Guillaume Lazzara <lazzara@fidji.lrde.epita.fr> + * scribo/primitive/extract/non_text_hdoc.hh: New routine for + historical documents. + +2011-05-16 Guillaume Lazzara <lazzara@fidji.lrde.epita.fr> + New object filter. * scribo/fun/v2b/objects_on_border_filter.hh, diff --git a/scribo/scribo/primitive/extract/non_text_hdoc.hh b/scribo/scribo/primitive/extract/non_text_hdoc.hh new file mode 100644 index 0000000..4924189 --- /dev/null +++ b/scribo/scribo/primitive/extract/non_text_hdoc.hh @@ -0,0 +1,135 @@ +// Copyright (C) 2011 EPITA Research and Development Laboratory +// (LRDE) +// +// This file is part of Olena. +// +// Olena is free software: you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free +// Software Foundation, version 2 of the License. +// +// Olena is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Olena. If not, see <http://www.gnu.org/licenses/>. +// +// As a special exception, you may use this file as part of a free +// software project without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to produce +// an executable, this file does not by itself cause the resulting +// executable to be covered by the GNU General Public License. This +// exception does not however invalidate any other reasons why the +// executable file might be covered by the GNU General Public License. + +/// \file +/// +/// \brief Find in a document non text which are not text. +/// +/// \fixme To be optimized! + +#ifndef SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_HDOC_HH +# define SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_HDOC_HH + +# include <mln/morpho/elementary/dilation.hh> + +# include <mln/draw/box_plain.hh> +# include <mln/morpho/closing/structural.hh> +# include <mln/win/rectangle2d.hh> + +# include <scribo/make/text_components_image.hh> +# include <scribo/make/text_blocks_image.hh> + +# include <scribo/primitive/extract/internal/union.hh> +# include <scribo/debug/logger.hh> + +# include <scribo/filter/objects_small.hh> +# include <scribo/filter/objects_on_border.hh> + +//DEBUG +#include <mln/util/timer.hh> +#include <mln/io/pbm/save.hh> + + +namespace scribo +{ + + namespace primitive + { + + namespace extract + { + + using namespace mln; + + /*! \brief Extract non text components. + + Variant adapted for historical documents. + */ + template <typename L> + component_set<L> + non_text_hdoc(const document<L>& doc, unsigned closing_size); + + +# ifndef MLN_INCLUDE_ONLY + + namespace internal + { + + } // end of namespace scribo::primitive::extract::internal + + + + // FACADE + + template <typename L> + component_set<L> + non_text_hdoc(const document<L>& doc, unsigned closing_size) + { + trace::entering("scribo::primitive::extract::non_text_hdoc"); + + mln_precondition(doc.is_valid()); + mln_precondition(doc.has_text()); + + mln_ch_value(L,bool) + element_image = duplicate(doc.binary_image_wo_seps()); + + for_all_lines(l, doc.lines()) + if (doc.lines()(l).is_textline()) + mln::draw::box_plain(element_image, doc.lines()(l).bbox(), false); + + element_image = morpho::closing::structural(element_image, + win::rectangle2d(closing_size, + closing_size)); + + mln_value(L) ncomps; + component_set<L> + elements = primitive::extract::components(element_image, + c8(), ncomps); + + elements = scribo::filter::components_small(elements, 200); + elements = scribo::filter::components_on_border(elements); + + // Debug + { + debug::logger().log_image(debug::Special, + elements.labeled_image(), + "non_text_hdoc_components"); + } + + trace::exiting("scribo::primitive::extract::non_text_hdoc"); + return elements; + } + +# endif // ! MLN_INCLUDE_ONLY + + + } // end of namespace scribo::primitive::extract + + } // end of namespace scribo::primitive + +} // end of namespace scribo + +#endif // ! SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_HDOC_HH -- 1.5.6.5
participants (1)
-
Guillaume Lazzara