---
scribo/scribo/core/line_info.hh | 2 -
scribo/scribo/core/paragraph_info.hh | 18 +++
scribo/scribo/core/paragraph_set.hh | 84 +++++++++++-
scribo/scribo/core/stats.hh | 40 ++++--
scribo/scribo/draw/line_components.hh | 113 ++++++++++++++++
scribo/scribo/text/paragraphs.hh | 65 +---------
scribo/scribo/text/paragraphs_closing.hh | 208 ++++++++++++++++++++++++++++++
7 files changed, 443 insertions(+), 87 deletions(-)
create mode 100644 scribo/scribo/draw/line_components.hh
create mode 100644 scribo/scribo/text/paragraphs_closing.hh
diff --git a/scribo/scribo/core/line_info.hh b/scribo/scribo/core/line_info.hh
index 9017174..9320416 100644
--- a/scribo/scribo/core/line_info.hh
+++ b/scribo/scribo/core/line_info.hh
@@ -61,8 +61,6 @@
# include <scribo/core/internal/sort_comp_ids.hh>
# include <scribo/core/concept/serializable.hh>
-// DEBUG
-
# include <scribo/core/stats.hh>
namespace scribo
diff --git a/scribo/scribo/core/paragraph_info.hh b/scribo/scribo/core/paragraph_info.hh
index af619dd..90db7da 100644
--- a/scribo/scribo/core/paragraph_info.hh
+++ b/scribo/scribo/core/paragraph_info.hh
@@ -76,6 +76,9 @@ namespace scribo
bool needs_stats_update() const;
void force_stats_update();
+ void set_delta_baseline(const int delta_baseline);
+ int delta_baseline() const;
+
private:
mln::util::array<line_id_t> line_ids_;
mln::accu::shape::bbox<mln_site(L)> bbox_;
@@ -84,6 +87,7 @@ namespace scribo
mln::value::rgb8 color_;
float color_reliability_;
+ int delta_baseline_;
bool needs_stats_update_;
bool is_valid_;
};
@@ -259,6 +263,20 @@ namespace scribo
}
template <typename L>
+ void
+ paragraph_info<L>::set_delta_baseline(const int delta_baseline)
+ {
+ delta_baseline_ = delta_baseline;
+ }
+
+ template <typename L>
+ int
+ paragraph_info<L>::delta_baseline() const
+ {
+ return delta_baseline_;
+ }
+
+ template <typename L>
bool
operator==(const paragraph_info<L>& lhs, const paragraph_info<L>&
rhs)
{
diff --git a/scribo/scribo/core/paragraph_set.hh b/scribo/scribo/core/paragraph_set.hh
index 65e17c6..9214f20 100644
--- a/scribo/scribo/core/paragraph_set.hh
+++ b/scribo/scribo/core/paragraph_set.hh
@@ -33,6 +33,7 @@
# include <scribo/core/line_links.hh>
# include <scribo/core/line_set.hh>
# include <scribo/core/paragraph_info.hh>
+# include <scribo/core/stats.hh>
# include <scribo/core/concept/serializable.hh>
# include <scribo/core/tag/paragraph.hh>
@@ -106,7 +107,8 @@ namespace scribo
/// \brief Construct a paragraph set from line links information.
template <typename L>
scribo::paragraph_set<L>
- paragraph(const line_links<L>& llinks);
+ paragraph(const line_links<L>& llinks,
+ const line_links<L>& rlinks);
/// \brief Construct a paragraph set from line set information.
template <typename L>
@@ -267,24 +269,62 @@ namespace scribo
unsigned
find_root(line_links<L>& parent, unsigned x)
{
- if (parent(x) == x)
- return x;
- else
- return parent(x) = find_root(parent, parent(x));
+ unsigned tmp_x = x;
+
+ while (parent(tmp_x) != tmp_x)
+ tmp_x = parent(tmp_x);
+
+ while (parent(x) != x)
+ {
+ const unsigned tmp = parent(x);
+ x = parent(x);
+ parent(tmp) = tmp_x;
+ }
+
+ return x;
+ }
+
+ template <typename L>
+ inline
+ void
+ set_root(line_links<L>& parent, unsigned x, const unsigned root)
+ {
+ while (parent(x) != x && parent(x) != root)
+ {
+ const unsigned tmp = parent(x);
+ x = parent(x);
+ parent(tmp) = root;
+ }
+
+ parent(x) = root;
}
+
} // end of namespace scribo::make::internal
// FIXME: move that code into paragraph_set constructor?
template <typename L>
scribo::paragraph_set<L>
- paragraph(const line_links<L>& llinks)
+ paragraph(const line_links<L>& llinks,
+ const line_links<L>& rlinks)
{
line_links<L> links = llinks.duplicate();
for_all_links(l, links)
+ {
+ const line_id_t current_neighbor = llinks(l);
links(l) = internal::find_root(links, l);
+ const line_id_t root_index = links(l);
+
+ for (unsigned j = 0; j < rlinks.nelements(); ++j)
+ {
+ if (l != j &&
+ current_neighbor != l &&
+ rlinks(j) == l)
+ internal::set_root(links, j, root_index);
+ }
+ }
unsigned npars;
mln::fun::i2v::array<unsigned>
@@ -301,7 +341,37 @@ namespace scribo
}
for_all_paragraphs(p, parset)
- parset(p).force_stats_update();
+ {
+ paragraph_info<L>& current_par = parset(p);
+ stats< float > delta(current_par.nlines());
+
+ // Update stats
+ current_par.force_stats_update();
+
+ // Compute paragraph's delta baseline
+ const mln::util::array<line_id_t>& line_ids = current_par.line_ids();
+ const unsigned nelements = line_ids.nelements();
+
+ for (unsigned i = 0; i < nelements; ++i)
+ {
+ const line_id_t& current_id = line_ids(i);
+
+ if (llinks(current_id) != current_id)
+ {
+ const line_info<L>& current_line = lines(current_id);
+ const line_info<L>& left_line = lines(llinks(current_id));
+
+ delta.take(left_line.baseline() - current_line.baseline());
+ }
+ }
+
+ int median = delta.median();
+
+ if (!median)
+ median = lines(current_par.line_ids()(0)).x_height();
+
+ current_par.set_delta_baseline(median);
+ }
return parset;
}
diff --git a/scribo/scribo/core/stats.hh b/scribo/scribo/core/stats.hh
index ea00da9..0fcb983 100644
--- a/scribo/scribo/core/stats.hh
+++ b/scribo/scribo/core/stats.hh
@@ -79,10 +79,13 @@ public:
{
mean_ = 0;
- for (unsigned i = 0; i < size_; ++i)
- mean_ += data_[i];
+ if (size_ > 0)
+ {
+ for (unsigned i = 0; i < size_; ++i)
+ mean_ += data_[i];
- mean_ /= size_;
+ mean_ /= size_;
+ }
mean_needs_update_ = false;
}
@@ -93,10 +96,16 @@ public:
{
if (median_needs_update_)
{
- std::vector< T >& data = data_.hook_std_vector_();
- std::sort(data.begin(), data.end(), compare_values< T >());
+ median_ = 0;
+
+ if (size_ > 0)
+ {
+ std::vector< T >& data = data_.hook_std_vector_();
+ std::sort(data.begin(), data.end(), compare_values< T >());
+
+ median_ = data[(size_ - 1) >> 1];
+ }
- median_ = data[(size_ - 1) >> 1];
median_needs_update_ = false;
}
@@ -110,15 +119,18 @@ public:
mean();
variance_ = 0;
- for (unsigned i = 0; i < size_; ++i)
+ if (size_ > 0)
{
- const T tmp = mean_ - data_[i];
+ for (unsigned i = 0; i < size_; ++i)
+ {
+ const T tmp = mean_ - data_[i];
- variance_ += (tmp * tmp);
- }
+ variance_ += (tmp * tmp);
+ }
- variance_ /= size_;
- std_ = sqrt(variance_);
+ variance_ /= size_;
+ std_ = sqrt(variance_);
+ }
variance_needs_update_ = false;
std_needs_update_ = false;
@@ -135,8 +147,8 @@ public:
return std_;
}
- T min() { return min_; }
- T max() { return max_; }
+ T min() { return (size_ > 0) ? min_ : 0; }
+ T max() { return (size_ > 0) ? max_ : 0; }
void sort()
{
diff --git a/scribo/scribo/draw/line_components.hh
b/scribo/scribo/draw/line_components.hh
new file mode 100644
index 0000000..878b2c1
--- /dev/null
+++ b/scribo/scribo/draw/line_components.hh
@@ -0,0 +1,113 @@
+// Copyright (C) 2009, 2010 EPITA Research and Development Laboratory
+// (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef SCRIBO_DRAW_LINE_COMPONENTS_HH
+# define SCRIBO_DRAW_LINE_COMPONENTS_HH
+
+/// \file
+///
+/// Draw the components of a line
+# include <mln/util/object_id.hh>
+
+# include <scribo/core/line_set.hh>
+# include <scribo/core/component_set.hh>
+
+
+namespace scribo
+{
+ namespace draw
+ {
+
+ using namespace mln;
+
+ template <typename L, typename I>
+ void
+ line_components(Image<I>& input_,
+ const line_info<L>& line,
+ const mln_value(I)& value);
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+
+ template <typename L, typename I>
+ void
+ line_components(Image<I>& input_,
+ const line_info<L>& line,
+ const mln_value(I)& value)
+ {
+ trace::entering("scribo::draw::line_components");
+
+ I& input = exact(input_);
+
+ mln_precondition(input.is_valid());
+
+ const line_set<L>& holder = line.holder();
+ const component_set<L>& comp_set = holder.components();
+ const L& labeled_image = holder.components().labeled_image();
+
+ const mln::util::array<component_id_t>& component_ids =
line.component_ids();
+
+ for_all_elements(i, component_ids)
+ {
+ const unsigned c = component_ids(i);
+ const mln_value(L) v = c;
+ const box2d& bbox = comp_set(c).bbox();
+
+ const unsigned index = labeled_image.index_of_point(bbox.pmin());
+ const unsigned index2 = input.index_of_point(bbox.pmin());
+ const mln_value(L)* ptr_in = &(labeled_image.element(index));
+ mln_value(I)* ptr_out = &(input.element(index2));
+ const unsigned nrows = bbox.height();
+ const unsigned ncols = bbox.width();
+ const int delta = labeled_image.delta_index(dpoint2d(+1, -ncols));
+ const int delta2 = input.delta_index(dpoint2d(+1, -ncols));
+
+ for (unsigned k = 0; k < nrows; ++k)
+ {
+ for (unsigned j = 0; j < ncols; ++j)
+ {
+ if (*ptr_in == v)
+ *ptr_out = value;
+ ++ptr_out;
+ ++ptr_in;
+ }
+
+ ptr_out += delta2;
+ ptr_in += delta;
+ }
+ }
+
+ trace::exiting("scribo::draw::line_components");
+ }
+
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::draw
+} // end of namespace scribo
+
+#endif // ! SCRIBO_DRAW_LINE_COMPONENTS_HH
diff --git a/scribo/scribo/text/paragraphs.hh b/scribo/scribo/text/paragraphs.hh
index 37817af..620faa7 100644
--- a/scribo/scribo/text/paragraphs.hh
+++ b/scribo/scribo/text/paragraphs.hh
@@ -23,50 +23,6 @@ using namespace mln;
namespace scribo
{
-
- namespace internal
- {
-
-//-------------------------------------
-// Extracting root of links
-//-------------------------------------
- template <typename L>
- inline
- unsigned
- find_root(line_links<L>& parent, unsigned x)
- {
- unsigned tmp_x = x;
-
- while (parent(tmp_x) != tmp_x)
- tmp_x = parent(tmp_x);
-
- while (parent(x) != x)
- {
- const unsigned tmp = parent(x);
- x = parent(x);
- parent(tmp) = tmp_x;
- }
-
- return x;
- }
-
- template <typename L>
- inline
- void
- set_root(line_links<L>& parent, unsigned x, const unsigned root)
- {
- while (parent(x) != x && parent(x) != root)
- {
- const unsigned tmp = parent(x);
- x = parent(x);
- parent(tmp) = root;
- }
-
- parent(x) = root;
- }
-
- }
-
namespace filter
{
@@ -799,25 +755,6 @@ namespace scribo
}
}
- // Post link processing
-
- const line_links<L> backup = output.duplicate();
- for (unsigned i = 0; i < output.nelements(); ++i)
- {
- const line_id_t current_neighbor = backup(i);
- output(i) = scribo::internal::find_root(output, i);
- const line_id_t root_index = output(i);
-
- for (unsigned j = 0; j < right.nelements(); ++j)
- {
- if (i != j &&
- current_neighbor != i &&
- right(j) == i)
- scribo::internal::set_root(output, j, root_index);
- }
- }
-
-
// Only debug
// {
// image2d<value::rgb8> debug = data::convert(value::rgb8(), input);
@@ -1352,7 +1289,7 @@ namespace scribo
// std::cout << "Extracting paragraphs" << std::endl;
filter::paragraph_links(left, right, output, lines);
- paragraph_set<L> par_set = make::paragraph(output);
+ paragraph_set<L> par_set = make::paragraph(output, right);
return par_set;
}
}
diff --git a/scribo/scribo/text/paragraphs_closing.hh
b/scribo/scribo/text/paragraphs_closing.hh
new file mode 100644
index 0000000..ec1d5c8
--- /dev/null
+++ b/scribo/scribo/text/paragraphs_closing.hh
@@ -0,0 +1,208 @@
+// Copyright (C) 2009, 2010, 2011 EPITA Research and Development
+// Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef SCRIBO_TEXT_PARAGRAPHS_CLOSING_HH
+# define SCRIBO_TEXT_PARAGRAPHS_CLOSING_HH
+
+/// \file
+///
+/// Paragraphs closing using CRLA.
+
+namespace scribo
+{
+
+ namespace text
+ {
+
+ using namespace mln;
+
+ template< typename L, typename V >
+ void
+ paragraphs_closing(image2d<V>& output,
+ const paragraph_set<L>& par_set,
+ const box2d& domain);
+
+# ifndef MLN_INCLUDE_ONLY
+
+ template< typename V >
+ inline
+ void horizontal_CRLA(const image2d<V>& input,
+ image2d<V>& output,
+ const mln::util::array<int>& deltas)
+ {
+ mln_piter(image2d<V>) p(input.domain());
+ int count = 0;
+ unsigned nrows = input.nrows();
+ unsigned ncols = input.ncols();
+ V last_pixel_value = 0;
+
+ for (unsigned i = 0; i < nrows; ++i)
+ {
+ for (unsigned j = 0; j < ncols; ++j)
+ {
+ const V& current_pixel = input.at_(i, j);
+
+ if (!current_pixel)
+ {
+ if (last_pixel_value)
+ {
+ unsigned k = j + 1;
+ for (; !input.at_(i, k) && (k < ncols); ++k);
+
+ count = k - j;
+ const int threshold = deltas(last_pixel_value - 1);
+
+ if (last_pixel_value == input.at_(i, k) && count < threshold)
+ for (unsigned l = j; l <= k; ++l)
+ output.at_(i, l) = last_pixel_value;
+
+ j = k;
+ last_pixel_value = 0;
+ }
+ }
+ else
+ {
+ output.at_(i, j) = current_pixel;
+ last_pixel_value = current_pixel;
+ }
+ }
+ }
+ }
+
+ template< typename V >
+ inline
+ void vertical_CRLA(const image2d<V>& input,
+ image2d<V>& output,
+ const mln::util::array<int>& deltas)
+ {
+ mln_piter(image2d<V>) p(input.domain());
+ int count = 0;
+ unsigned nrows = input.nrows();
+ unsigned ncols = input.ncols();
+ V last_pixel_value = 0;
+
+ for (unsigned j = 0; j < ncols; ++j)
+ {
+ for (unsigned i = 0; i < nrows; ++i)
+ {
+ const V& current_pixel = input.at_(i, j);
+
+ if (!current_pixel)
+ {
+ if (last_pixel_value)
+ {
+ unsigned k = i + 1;
+ for (; !input.at_(k, j) && (k < nrows); ++k);
+
+ count = k - i;
+ const int threshold = deltas(last_pixel_value - 1);
+
+ if (last_pixel_value == input.at_(k, j)
+ && count < threshold)
+ for (unsigned l = i; l <= k; ++l)
+ output.at_(l, j) = last_pixel_value;
+
+ i = k;
+ last_pixel_value = 0;
+ }
+ }
+ else
+ {
+ output.at_(i, j) = current_pixel;
+ last_pixel_value = current_pixel;
+ }
+ }
+ }
+ }
+
+ template< typename V >
+ inline
+ void CRLA(const image2d<V>& input,
+ image2d<V>& output,
+ const mln::util::array<int>& deltas,
+ const mln::util::array<int>& deltas_factor)
+ {
+ horizontal_CRLA(input, output, deltas_factor);
+ vertical_CRLA(output, output, deltas);
+ horizontal_CRLA(output, output, deltas_factor);
+ }
+
+ template< typename L, typename V >
+ void
+ paragraphs_closing(image2d<V>& output,
+ const paragraph_set<L>& par_set,
+ const box2d& domain)
+ {
+ trace::entering("scribo::text::paragraphs_closing");
+
+ image2d<V> debug(domain);
+
+ mln::util::array<int> deltas;
+ deltas.reserve(par_set.nelements());
+ mln::util::array<int> deltas_factor;
+ deltas_factor.reserve(par_set.nelements());
+
+ data::fill(debug, 0);
+ data::fill(output, 0);
+
+ const line_set<L>& lines = par_set.lines();
+
+ for_all_paragraphs(p, par_set)
+ {
+ const paragraph_info<L>& current_par = par_set(p);
+ const mln::util::array<line_id_t>& line_ids = current_par.line_ids();
+ const unsigned nelements = line_ids.nelements();
+
+ for (unsigned i = 0; i < nelements; ++i)
+ {
+ const line_id_t& line_id = line_ids(i);
+ const line_info<L>& current_line = lines(line_id);
+
+ draw::line_components(debug, current_line, p);
+ }
+
+ int delta_baseline = current_par.delta_baseline();
+
+ if (delta_baseline % 2 == 0)
+ --delta_baseline;
+
+ deltas.append(delta_baseline);
+ deltas_factor.append(3 * delta_baseline);
+ }
+ CRLA(debug, output, deltas, deltas_factor);
+
+ trace::exiting("scribo::draw::line_components");
+ }
+
+# endif
+
+ } // end of namespace scribo::text
+
+} // end of namespace scribo
+
+
+#endif // ! SCRIBO_TEXT_PARAGRAPHS_CLOSING_HH
+
--
1.5.6.5