last-svn-commit-898-gd777dff Paragraphs closing

--- scribo/scribo/core/line_info.hh | 2 - scribo/scribo/core/paragraph_info.hh | 18 +++ scribo/scribo/core/paragraph_set.hh | 84 +++++++++++- scribo/scribo/core/stats.hh | 40 ++++-- scribo/scribo/draw/line_components.hh | 113 ++++++++++++++++ scribo/scribo/text/paragraphs.hh | 65 +--------- scribo/scribo/text/paragraphs_closing.hh | 208 ++++++++++++++++++++++++++++++ 7 files changed, 443 insertions(+), 87 deletions(-) create mode 100644 scribo/scribo/draw/line_components.hh create mode 100644 scribo/scribo/text/paragraphs_closing.hh diff --git a/scribo/scribo/core/line_info.hh b/scribo/scribo/core/line_info.hh index 9017174..9320416 100644 --- a/scribo/scribo/core/line_info.hh +++ b/scribo/scribo/core/line_info.hh @@ -61,8 +61,6 @@ # include <scribo/core/internal/sort_comp_ids.hh> # include <scribo/core/concept/serializable.hh> -// DEBUG - # include <scribo/core/stats.hh> namespace scribo diff --git a/scribo/scribo/core/paragraph_info.hh b/scribo/scribo/core/paragraph_info.hh index af619dd..90db7da 100644 --- a/scribo/scribo/core/paragraph_info.hh +++ b/scribo/scribo/core/paragraph_info.hh @@ -76,6 +76,9 @@ namespace scribo bool needs_stats_update() const; void force_stats_update(); + void set_delta_baseline(const int delta_baseline); + int delta_baseline() const; + private: mln::util::array<line_id_t> line_ids_; mln::accu::shape::bbox<mln_site(L)> bbox_; @@ -84,6 +87,7 @@ namespace scribo mln::value::rgb8 color_; float color_reliability_; + int delta_baseline_; bool needs_stats_update_; bool is_valid_; }; @@ -259,6 +263,20 @@ namespace scribo } template <typename L> + void + paragraph_info<L>::set_delta_baseline(const int delta_baseline) + { + delta_baseline_ = delta_baseline; + } + + template <typename L> + int + paragraph_info<L>::delta_baseline() const + { + return delta_baseline_; + } + + template <typename L> bool operator==(const paragraph_info<L>& lhs, const paragraph_info<L>& rhs) { diff --git a/scribo/scribo/core/paragraph_set.hh b/scribo/scribo/core/paragraph_set.hh index 65e17c6..9214f20 100644 --- a/scribo/scribo/core/paragraph_set.hh +++ b/scribo/scribo/core/paragraph_set.hh @@ -33,6 +33,7 @@ # include <scribo/core/line_links.hh> # include <scribo/core/line_set.hh> # include <scribo/core/paragraph_info.hh> +# include <scribo/core/stats.hh> # include <scribo/core/concept/serializable.hh> # include <scribo/core/tag/paragraph.hh> @@ -106,7 +107,8 @@ namespace scribo /// \brief Construct a paragraph set from line links information. template <typename L> scribo::paragraph_set<L> - paragraph(const line_links<L>& llinks); + paragraph(const line_links<L>& llinks, + const line_links<L>& rlinks); /// \brief Construct a paragraph set from line set information. template <typename L> @@ -267,24 +269,62 @@ namespace scribo unsigned find_root(line_links<L>& parent, unsigned x) { - if (parent(x) == x) - return x; - else - return parent(x) = find_root(parent, parent(x)); + unsigned tmp_x = x; + + while (parent(tmp_x) != tmp_x) + tmp_x = parent(tmp_x); + + while (parent(x) != x) + { + const unsigned tmp = parent(x); + x = parent(x); + parent(tmp) = tmp_x; + } + + return x; + } + + template <typename L> + inline + void + set_root(line_links<L>& parent, unsigned x, const unsigned root) + { + while (parent(x) != x && parent(x) != root) + { + const unsigned tmp = parent(x); + x = parent(x); + parent(tmp) = root; + } + + parent(x) = root; } + } // end of namespace scribo::make::internal // FIXME: move that code into paragraph_set constructor? template <typename L> scribo::paragraph_set<L> - paragraph(const line_links<L>& llinks) + paragraph(const line_links<L>& llinks, + const line_links<L>& rlinks) { line_links<L> links = llinks.duplicate(); for_all_links(l, links) + { + const line_id_t current_neighbor = llinks(l); links(l) = internal::find_root(links, l); + const line_id_t root_index = links(l); + + for (unsigned j = 0; j < rlinks.nelements(); ++j) + { + if (l != j && + current_neighbor != l && + rlinks(j) == l) + internal::set_root(links, j, root_index); + } + } unsigned npars; mln::fun::i2v::array<unsigned> @@ -301,7 +341,37 @@ namespace scribo } for_all_paragraphs(p, parset) - parset(p).force_stats_update(); + { + paragraph_info<L>& current_par = parset(p); + stats< float > delta(current_par.nlines()); + + // Update stats + current_par.force_stats_update(); + + // Compute paragraph's delta baseline + const mln::util::array<line_id_t>& line_ids = current_par.line_ids(); + const unsigned nelements = line_ids.nelements(); + + for (unsigned i = 0; i < nelements; ++i) + { + const line_id_t& current_id = line_ids(i); + + if (llinks(current_id) != current_id) + { + const line_info<L>& current_line = lines(current_id); + const line_info<L>& left_line = lines(llinks(current_id)); + + delta.take(left_line.baseline() - current_line.baseline()); + } + } + + int median = delta.median(); + + if (!median) + median = lines(current_par.line_ids()(0)).x_height(); + + current_par.set_delta_baseline(median); + } return parset; } diff --git a/scribo/scribo/core/stats.hh b/scribo/scribo/core/stats.hh index ea00da9..0fcb983 100644 --- a/scribo/scribo/core/stats.hh +++ b/scribo/scribo/core/stats.hh @@ -79,10 +79,13 @@ public: { mean_ = 0; - for (unsigned i = 0; i < size_; ++i) - mean_ += data_[i]; + if (size_ > 0) + { + for (unsigned i = 0; i < size_; ++i) + mean_ += data_[i]; - mean_ /= size_; + mean_ /= size_; + } mean_needs_update_ = false; } @@ -93,10 +96,16 @@ public: { if (median_needs_update_) { - std::vector< T >& data = data_.hook_std_vector_(); - std::sort(data.begin(), data.end(), compare_values< T >()); + median_ = 0; + + if (size_ > 0) + { + std::vector< T >& data = data_.hook_std_vector_(); + std::sort(data.begin(), data.end(), compare_values< T >()); + + median_ = data[(size_ - 1) >> 1]; + } - median_ = data[(size_ - 1) >> 1]; median_needs_update_ = false; } @@ -110,15 +119,18 @@ public: mean(); variance_ = 0; - for (unsigned i = 0; i < size_; ++i) + if (size_ > 0) { - const T tmp = mean_ - data_[i]; + for (unsigned i = 0; i < size_; ++i) + { + const T tmp = mean_ - data_[i]; - variance_ += (tmp * tmp); - } + variance_ += (tmp * tmp); + } - variance_ /= size_; - std_ = sqrt(variance_); + variance_ /= size_; + std_ = sqrt(variance_); + } variance_needs_update_ = false; std_needs_update_ = false; @@ -135,8 +147,8 @@ public: return std_; } - T min() { return min_; } - T max() { return max_; } + T min() { return (size_ > 0) ? min_ : 0; } + T max() { return (size_ > 0) ? max_ : 0; } void sort() { diff --git a/scribo/scribo/draw/line_components.hh b/scribo/scribo/draw/line_components.hh new file mode 100644 index 0000000..878b2c1 --- /dev/null +++ b/scribo/scribo/draw/line_components.hh @@ -0,0 +1,113 @@ +// Copyright (C) 2009, 2010 EPITA Research and Development Laboratory +// (LRDE) +// +// This file is part of Olena. +// +// Olena is free software: you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free +// Software Foundation, version 2 of the License. +// +// Olena is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Olena. If not, see <http://www.gnu.org/licenses/>. +// +// As a special exception, you may use this file as part of a free +// software project without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to produce +// an executable, this file does not by itself cause the resulting +// executable to be covered by the GNU General Public License. This +// exception does not however invalidate any other reasons why the +// executable file might be covered by the GNU General Public License. + +#ifndef SCRIBO_DRAW_LINE_COMPONENTS_HH +# define SCRIBO_DRAW_LINE_COMPONENTS_HH + +/// \file +/// +/// Draw the components of a line +# include <mln/util/object_id.hh> + +# include <scribo/core/line_set.hh> +# include <scribo/core/component_set.hh> + + +namespace scribo +{ + namespace draw + { + + using namespace mln; + + template <typename L, typename I> + void + line_components(Image<I>& input_, + const line_info<L>& line, + const mln_value(I)& value); + + +# ifndef MLN_INCLUDE_ONLY + + + template <typename L, typename I> + void + line_components(Image<I>& input_, + const line_info<L>& line, + const mln_value(I)& value) + { + trace::entering("scribo::draw::line_components"); + + I& input = exact(input_); + + mln_precondition(input.is_valid()); + + const line_set<L>& holder = line.holder(); + const component_set<L>& comp_set = holder.components(); + const L& labeled_image = holder.components().labeled_image(); + + const mln::util::array<component_id_t>& component_ids = line.component_ids(); + + for_all_elements(i, component_ids) + { + const unsigned c = component_ids(i); + const mln_value(L) v = c; + const box2d& bbox = comp_set(c).bbox(); + + const unsigned index = labeled_image.index_of_point(bbox.pmin()); + const unsigned index2 = input.index_of_point(bbox.pmin()); + const mln_value(L)* ptr_in = &(labeled_image.element(index)); + mln_value(I)* ptr_out = &(input.element(index2)); + const unsigned nrows = bbox.height(); + const unsigned ncols = bbox.width(); + const int delta = labeled_image.delta_index(dpoint2d(+1, -ncols)); + const int delta2 = input.delta_index(dpoint2d(+1, -ncols)); + + for (unsigned k = 0; k < nrows; ++k) + { + for (unsigned j = 0; j < ncols; ++j) + { + if (*ptr_in == v) + *ptr_out = value; + ++ptr_out; + ++ptr_in; + } + + ptr_out += delta2; + ptr_in += delta; + } + } + + trace::exiting("scribo::draw::line_components"); + } + + +# endif // ! MLN_INCLUDE_ONLY + + } // end of namespace scribo::draw +} // end of namespace scribo + +#endif // ! SCRIBO_DRAW_LINE_COMPONENTS_HH diff --git a/scribo/scribo/text/paragraphs.hh b/scribo/scribo/text/paragraphs.hh index 37817af..620faa7 100644 --- a/scribo/scribo/text/paragraphs.hh +++ b/scribo/scribo/text/paragraphs.hh @@ -23,50 +23,6 @@ using namespace mln; namespace scribo { - - namespace internal - { - -//------------------------------------- -// Extracting root of links -//------------------------------------- - template <typename L> - inline - unsigned - find_root(line_links<L>& parent, unsigned x) - { - unsigned tmp_x = x; - - while (parent(tmp_x) != tmp_x) - tmp_x = parent(tmp_x); - - while (parent(x) != x) - { - const unsigned tmp = parent(x); - x = parent(x); - parent(tmp) = tmp_x; - } - - return x; - } - - template <typename L> - inline - void - set_root(line_links<L>& parent, unsigned x, const unsigned root) - { - while (parent(x) != x && parent(x) != root) - { - const unsigned tmp = parent(x); - x = parent(x); - parent(tmp) = root; - } - - parent(x) = root; - } - - } - namespace filter { @@ -799,25 +755,6 @@ namespace scribo } } - // Post link processing - - const line_links<L> backup = output.duplicate(); - for (unsigned i = 0; i < output.nelements(); ++i) - { - const line_id_t current_neighbor = backup(i); - output(i) = scribo::internal::find_root(output, i); - const line_id_t root_index = output(i); - - for (unsigned j = 0; j < right.nelements(); ++j) - { - if (i != j && - current_neighbor != i && - right(j) == i) - scribo::internal::set_root(output, j, root_index); - } - } - - // Only debug // { // image2d<value::rgb8> debug = data::convert(value::rgb8(), input); @@ -1352,7 +1289,7 @@ namespace scribo // std::cout << "Extracting paragraphs" << std::endl; filter::paragraph_links(left, right, output, lines); - paragraph_set<L> par_set = make::paragraph(output); + paragraph_set<L> par_set = make::paragraph(output, right); return par_set; } } diff --git a/scribo/scribo/text/paragraphs_closing.hh b/scribo/scribo/text/paragraphs_closing.hh new file mode 100644 index 0000000..ec1d5c8 --- /dev/null +++ b/scribo/scribo/text/paragraphs_closing.hh @@ -0,0 +1,208 @@ +// Copyright (C) 2009, 2010, 2011 EPITA Research and Development +// Laboratory (LRDE) +// +// This file is part of Olena. +// +// Olena is free software: you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free +// Software Foundation, version 2 of the License. +// +// Olena is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Olena. If not, see <http://www.gnu.org/licenses/>. +// +// As a special exception, you may use this file as part of a free +// software project without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to produce +// an executable, this file does not by itself cause the resulting +// executable to be covered by the GNU General Public License. This +// exception does not however invalidate any other reasons why the +// executable file might be covered by the GNU General Public License. + +#ifndef SCRIBO_TEXT_PARAGRAPHS_CLOSING_HH +# define SCRIBO_TEXT_PARAGRAPHS_CLOSING_HH + +/// \file +/// +/// Paragraphs closing using CRLA. + +namespace scribo +{ + + namespace text + { + + using namespace mln; + + template< typename L, typename V > + void + paragraphs_closing(image2d<V>& output, + const paragraph_set<L>& par_set, + const box2d& domain); + +# ifndef MLN_INCLUDE_ONLY + + template< typename V > + inline + void horizontal_CRLA(const image2d<V>& input, + image2d<V>& output, + const mln::util::array<int>& deltas) + { + mln_piter(image2d<V>) p(input.domain()); + int count = 0; + unsigned nrows = input.nrows(); + unsigned ncols = input.ncols(); + V last_pixel_value = 0; + + for (unsigned i = 0; i < nrows; ++i) + { + for (unsigned j = 0; j < ncols; ++j) + { + const V& current_pixel = input.at_(i, j); + + if (!current_pixel) + { + if (last_pixel_value) + { + unsigned k = j + 1; + for (; !input.at_(i, k) && (k < ncols); ++k); + + count = k - j; + const int threshold = deltas(last_pixel_value - 1); + + if (last_pixel_value == input.at_(i, k) && count < threshold) + for (unsigned l = j; l <= k; ++l) + output.at_(i, l) = last_pixel_value; + + j = k; + last_pixel_value = 0; + } + } + else + { + output.at_(i, j) = current_pixel; + last_pixel_value = current_pixel; + } + } + } + } + + template< typename V > + inline + void vertical_CRLA(const image2d<V>& input, + image2d<V>& output, + const mln::util::array<int>& deltas) + { + mln_piter(image2d<V>) p(input.domain()); + int count = 0; + unsigned nrows = input.nrows(); + unsigned ncols = input.ncols(); + V last_pixel_value = 0; + + for (unsigned j = 0; j < ncols; ++j) + { + for (unsigned i = 0; i < nrows; ++i) + { + const V& current_pixel = input.at_(i, j); + + if (!current_pixel) + { + if (last_pixel_value) + { + unsigned k = i + 1; + for (; !input.at_(k, j) && (k < nrows); ++k); + + count = k - i; + const int threshold = deltas(last_pixel_value - 1); + + if (last_pixel_value == input.at_(k, j) + && count < threshold) + for (unsigned l = i; l <= k; ++l) + output.at_(l, j) = last_pixel_value; + + i = k; + last_pixel_value = 0; + } + } + else + { + output.at_(i, j) = current_pixel; + last_pixel_value = current_pixel; + } + } + } + } + + template< typename V > + inline + void CRLA(const image2d<V>& input, + image2d<V>& output, + const mln::util::array<int>& deltas, + const mln::util::array<int>& deltas_factor) + { + horizontal_CRLA(input, output, deltas_factor); + vertical_CRLA(output, output, deltas); + horizontal_CRLA(output, output, deltas_factor); + } + + template< typename L, typename V > + void + paragraphs_closing(image2d<V>& output, + const paragraph_set<L>& par_set, + const box2d& domain) + { + trace::entering("scribo::text::paragraphs_closing"); + + image2d<V> debug(domain); + + mln::util::array<int> deltas; + deltas.reserve(par_set.nelements()); + mln::util::array<int> deltas_factor; + deltas_factor.reserve(par_set.nelements()); + + data::fill(debug, 0); + data::fill(output, 0); + + const line_set<L>& lines = par_set.lines(); + + for_all_paragraphs(p, par_set) + { + const paragraph_info<L>& current_par = par_set(p); + const mln::util::array<line_id_t>& line_ids = current_par.line_ids(); + const unsigned nelements = line_ids.nelements(); + + for (unsigned i = 0; i < nelements; ++i) + { + const line_id_t& line_id = line_ids(i); + const line_info<L>& current_line = lines(line_id); + + draw::line_components(debug, current_line, p); + } + + int delta_baseline = current_par.delta_baseline(); + + if (delta_baseline % 2 == 0) + --delta_baseline; + + deltas.append(delta_baseline); + deltas_factor.append(3 * delta_baseline); + } + CRLA(debug, output, deltas, deltas_factor); + + trace::exiting("scribo::draw::line_components"); + } + +# endif + + } // end of namespace scribo::text + +} // end of namespace scribo + + +#endif // ! SCRIBO_TEXT_PARAGRAPHS_CLOSING_HH + -- 1.5.6.5
participants (1)
-
Julien Marquegnies