* core/component_info.hh,
* core/component_set.hh: Add new methods and an output operator.
* core/line_info.hh: Add new methods and improve stats computation.
* core/line_set.hh: Add new methods.
* core/macros.hh: Add more specific macros.
* core/tag/component.hh,
* core/tag/line.hh: Add output operators.
---
scribo/ChangeLog | 16 ++
scribo/core/component_info.hh | 43 ++++
scribo/core/component_set.hh | 152 +++++++++++-
scribo/core/line_info.hh | 517 ++++++++++++++++++++++++++++++++++-------
scribo/core/line_set.hh | 213 ++++++++---------
scribo/core/macros.hh | 12 +-
scribo/core/tag/component.hh | 63 +++++-
scribo/core/tag/line.hh | 29 ++-
8 files changed, 823 insertions(+), 222 deletions(-)
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index 2c2f0c2..a24406b 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,3 +1,19 @@
+2010-03-11 Guillaume Lazzara <z(a)lrde.epita.fr>
+
+ Improve core classes in Scribo.
+
+ * core/component_info.hh,
+ * core/component_set.hh: Add new methods and an output operator.
+
+ * core/line_info.hh: Add new methods and improve stats computation.
+
+ * core/line_set.hh: Add new methods.
+
+ * core/macros.hh: Add more specific macros.
+
+ * core/tag/component.hh,
+ * core/tag/line.hh: Add output operators.
+
2010-03-09 Guillaume Lazzara <z(a)lrde.epita.fr>
Share internal data in groups and links structures.
diff --git a/scribo/core/component_info.hh b/scribo/core/component_info.hh
index 1e4aaf5..4ee438c 100644
--- a/scribo/core/component_info.hh
+++ b/scribo/core/component_info.hh
@@ -58,10 +58,17 @@ namespace scribo
component_id_t id() const;
const mln::box2d& bbox() const;
const mln::point2d& mass_center() const;
+
+ // The number of pixels in this component.
unsigned card() const;
+
component::Tag tag() const;
void update_tag(component::Tag tag);
+ component::Type type() const;
+ void update_type(component::Type type);
+
+ // The line it is rattached to. 0 means an invalid line.
line_id_t line_id() const;
bool is_valid() const;
@@ -71,12 +78,19 @@ namespace scribo
mln::box2d bbox_;
mln::point2d mass_center_;
unsigned card_;
+
component::Tag tag_;
+ component::Type type_;
line_id_t line_id_;
};
+
+ std::ostream&
+ operator<<(std::ostream& ostr, const component_info& info);
+
+
# ifndef MLN_INCLUDE_ONLY
@@ -138,6 +152,21 @@ namespace scribo
tag_ = tag;
}
+
+ component::Type
+ component_info::type() const
+ {
+ return type_;
+ }
+
+
+ void
+ component_info::update_type(component::Type type)
+ {
+ type_ = type;
+ }
+
+
component_info::line_id_t
component_info::line_id() const
{
@@ -151,6 +180,20 @@ namespace scribo
}
+ std::ostream&
+ operator<<(std::ostream& ostr, const component_info& info)
+ {
+ return ostr << "component_info("
+ << "id=" << info.id()
+ << ", bbox=" << info.bbox()
+ << ", mass_center=" << info.mass_center()
+ << ", card=" << info.card()
+ << ", tag=" << info.tag()
+ << ", line_id=" << info.line_id()
+ << ")" << std::endl;
+ }
+
+
# endif // ! MLN_INCLUDE_ONLY
diff --git a/scribo/core/component_set.hh b/scribo/core/component_set.hh
index 2f9d10e..14cdc4c 100644
--- a/scribo/core/component_set.hh
+++ b/scribo/core/component_set.hh
@@ -1,4 +1,5 @@
-// Copyright (C) 2009 EPITA Research and Development Laboratory (LRDE)
+// Copyright (C) 2009, 2010 EPITA Research and Development Laboratory
+// (LRDE)
//
// This file is part of Olena.
//
@@ -30,6 +31,11 @@
///
/// \brief Definition of a component set.
+# include <mln/core/concept/site_set.hh>
+# include <mln/core/concept/function.hh>
+
+# include <mln/data/fill.hh>
+
# include <mln/util/array.hh>
# include <mln/accu/pair.hh>
@@ -40,6 +46,10 @@
# include <mln/convert/from_to.hh>
+# include <mln/core/image/dmorph/image_if.hh>
+# include <mln/pw/all.hh>
+
+# include <mln/core/routine/duplicate.hh>
# include <scribo/core/macros.hh>
# include <scribo/core/component_info.hh>
@@ -86,6 +96,7 @@ namespace scribo
mln_value(L) ncomps_;
mln::util::array<scribo::component_info> infos_;
+ mln_ch_value(L, bool) separators_;
};
} // end of namespace scribo::internal
@@ -131,13 +142,6 @@ namespace scribo
/// Return component information for a given component id \p id.
component_info& info(const mln_value(L)& id);
-// /// Return component information for a given component id \p id.
-// component_info& operator()(const mln_value(L)& id);
-
-// /// Return component information for a given component id \p id.
-// const component_info& operator()(const mln_value(L)& id) const;
-
-
/// Return component information for a given component id \p id.
component_info& operator()(const component_id_t& id);
@@ -155,12 +159,38 @@ namespace scribo
/// Return the underlying labeled image
const L& labeled_image() const;
+ /// Is this component set valid?
+ bool is_valid() const;
+
+
+ /// Separators components related routines.
+ /// @{
+
+ /// Return true if an image of separator exists.
+ bool has_separators() const;
+
+ /// Add separators in the underlying labeled image.
+ void add_separators(const mln_ch_value(L, bool)& ima);
+
+ /// Return the Boolean image of separators.
+ const mln_ch_value(L, bool)& separators() const;
+
+ /// Remove any existing separators.
+ void clear_separators();
+
+ /// @}
+
+
+
/// Internal methods
/// @{
/// Return all the component infos.
const mln::util::array<scribo::component_info>& infos_() const;
+ /// Unique set Id.
+ unsigned id_() const;
+
/// @}
private:
@@ -171,6 +201,18 @@ namespace scribo
};
+ template <typename L>
+ bool
+ operator==(const component_set<L>& lhs, const component_set<L>&
rhs);
+
+
+ template <typename L>
+ std::ostream&
+ operator<<(std::ostream& ostr, const component_set<L>& info);
+
+
+
+
# ifndef MLN_INCLUDE_ONLY
@@ -195,6 +237,9 @@ namespace scribo
const mln_value(L)& ncomps)
: ima_(ima), ncomps_(ncomps)
{
+ initialize(separators_, ima); // FIXME: do we really want that?
+ mln::data::fill(separators_, false);
+
typedef mln::accu::shape::bbox<mln_site(L)> bbox_accu_t;
typedef mln::accu::center<mln_site(L)> center_accu_t;
typedef mln::accu::pair<bbox_accu_t, center_accu_t> pair_accu_t;
@@ -214,6 +259,9 @@ namespace scribo
const mln::util::array<pair_accu_t>& attribs)
: ima_(ima), ncomps_(ncomps)
{
+ initialize(separators_, ima); // FIXME: do we really want that?
+ mln::data::fill(separators_, false);
+
fill_infos(attribs);
}
@@ -224,6 +272,9 @@ namespace scribo
const mln::util::array<pair_data_t>& attribs)
: ima_(ima), ncomps_(ncomps)
{
+ initialize(separators_, ima); // FIXME: do we really want that?
+ mln::data::fill(separators_, false);
+
fill_infos(attribs);
}
@@ -234,6 +285,8 @@ namespace scribo
const mln::util::array<scribo::component_info>& infos)
: ima_(ima), ncomps_(ncomps), infos_(infos)
{
+ initialize(separators_, ima); // FIXME: do we really want that?
+ mln::data::fill(separators_, false);
}
@@ -247,7 +300,7 @@ namespace scribo
infos_.reserve(static_cast<unsigned>(ncomps_) + 1);
infos_.append(component_info()); // Component 0, i.e. the background.
- for_all_components(i, attribs)
+ for_all_comp_data(i, attribs)
{
component_info info(i, attribs[i].first(),
attribs[i].second(), attribs[i].second_accu().nsites());
@@ -265,7 +318,7 @@ namespace scribo
infos_.reserve(static_cast<unsigned>(ncomps_) + 1);
infos_.append(component_info()); // Component 0, i.e. the background.
- for_all_components(i, attribs)
+ for_all_comp_data(i, attribs)
{
component_info info(i, attribs[i].first,
attribs[i].second.first, attribs[i].second.second);
@@ -379,7 +432,7 @@ namespace scribo
{
const F& f = exact(f_);
- for_all_components(i, data_->infos_)
+ for_all_comp_data(i, data_->infos_)
if (!f(i))
data_->infos_[i].update_tag(tag);
}
@@ -405,6 +458,61 @@ namespace scribo
return this->data_->ima_;
}
+
+ template <typename L>
+ inline
+ bool
+ component_set<L>::is_valid() const
+ {
+ return this->data_->ima_.is_valid();
+ }
+
+
+ template <typename L>
+ inline
+ unsigned
+ component_set<L>::id_() const
+ {
+ return (unsigned)data_.ptr_;
+ }
+
+
+ template <typename L>
+ inline
+ bool
+ component_set<L>::has_separators() const
+ {
+ return this->data_->separators_.is_valid();
+ }
+
+
+ template <typename L>
+ inline
+ void
+ component_set<L>::add_separators(const mln_ch_value(L, bool)& ima)
+ {
+ this->data_->separators_ = ima;
+ }
+
+
+ template <typename L>
+ inline
+ const mln_ch_value(L, bool)&
+ component_set<L>::separators() const
+ {
+ return this->data_->separators_;
+ }
+
+
+ template <typename L>
+ inline
+ void
+ component_set<L>::clear_separators()
+ {
+ this->data_->separators_.destroy();
+ }
+
+
template <typename L>
inline
const mln::util::array<scribo::component_info>&
@@ -419,12 +527,32 @@ namespace scribo
component_set<L>::init_(const component_set<L>& set)
{
data_ = new internal::component_set_data<L>();
- data_->ima_ = set.labeled_image();
+ data_->ima_ = mln::duplicate(set.labeled_image());
data_->ncomps_ = set.nelements();
data_->infos_ = set.infos_();
+ data_->separators_ = set.separators();
}
+ template <typename L>
+ bool
+ operator==(const component_set<L>& lhs, const component_set<L>&
rhs)
+ {
+ return lhs.id_() == rhs.id_();
+ }
+
+ template <typename L>
+ std::ostream&
+ operator<<(std::ostream& ostr, const component_set<L>& info)
+ {
+ ostr << "component_set[" << std::endl;
+ for_all_comps(i, info)
+ ostr << info(i);
+ ostr << "]" << std::endl;
+
+ return ostr;
+ }
+
# endif // ! MLN_INCLUDE_ONLY
} // end of namespace scribo
diff --git a/scribo/core/line_info.hh b/scribo/core/line_info.hh
index 42438b0..0445f85 100644
--- a/scribo/core/line_info.hh
+++ b/scribo/core/line_info.hh
@@ -1,4 +1,5 @@
-// Copyright (C) 2009 EPITA Research and Development Laboratory (LRDE)
+// Copyright (C) 2009, 2010 EPITA Research and Development Laboratory
+// (LRDE)
//
// This file is part of Olena.
//
@@ -29,6 +30,9 @@
/// \file
///
/// \brief Line information data structure.
+///
+/// \fixme The meanline should not be stored! The user can deduce it
+/// from the x_height and the baseline.
# include <mln/core/alias/box2d.hh>
@@ -40,13 +44,15 @@
# include <scribo/core/tag/component.hh>
# include <scribo/core/tag/line.hh>
-// # include <scribo/filter/object_links_bottom_aligned.hh>
-// # include <scribo/filter/object_links_top_aligned.hh>
-
+# include <scribo/core/line_set.hh>
+# include <scribo/core/component_set.hh>
namespace scribo
{
+ // Forward declarations.
+ template <typename L> class line_set;
+
typedef mln::util::object_id<scribo::LineId, unsigned> line_id_t;
template <typename L>
@@ -56,18 +62,26 @@ namespace scribo
typedef mln::util::object_id<scribo::LineId, unsigned> line_id_t;
public:
+
+ /// Constructors
+ /// @{
+
line_info();
- line_info(const line_id_t& id,
- const mln::box2d& bbox,
- const mln::util::array<component_id_t>& comps,
- unsigned absolute_median,
- unsigned absolute_baseline,
- unsigned char_space,
- unsigned char_width);
- line_info(const object_links<L>& links,
+
+ line_info(const line_set<L>& holder,
const line_id_t& id,
const mln::util::array<component_id_t>& comps);
+ /// The line id of the target instance is preserved if it is valid.
+ line_info(const line_info<L>& other);
+ /// @}
+
+ /// The line id of the target instance is preserved if it is valid.
+ line_info<L>& operator=(const line_info<L>& other);
+
+ /// If the line info is valid, the line id never changes for a
+ /// given instance.
+ //
line_id_t id() const;
line::Tag tag() const;
@@ -75,12 +89,21 @@ namespace scribo
const mln::box2d& bbox() const;
+ /// Extended bounding box.
+ /// The width is extended with char_width() + char_space() on each side.
+ /// The height is adjusted to max(a_height, - d_height) on each side.
+ //
+ const mln::box2d& ebbox() const;
+
const mln::util::array<component_id_t>& components() const;
unsigned card() const;
- unsigned baseline() const;
- unsigned median() const;
- int x_height() const;
+ int baseline() const;
+ int meanline() const;
+ int ascent() const;
+ int descent() const;
+
+ unsigned x_height() const;
int d_height() const;
int a_height() const;
@@ -90,7 +113,10 @@ namespace scribo
unsigned word_space() const;
line::ReadingDirection reading_direction() const;
+
line::Type type() const;
+ void update_type(line::Type type);
+
bool reverse_video() const;
float orientation() const;
@@ -101,6 +127,15 @@ namespace scribo
bool is_valid() const;
+ /// Hidden status.
+ ///
+ /// When a line is hidden, it should not be used in routines
+ /// computing data over lines.
+ ///
+ /// @{
+ bool hidden() const;
+ void set_hidden(bool b);
+ /// @}
/// Merge related routines.
/// @{
@@ -109,15 +144,18 @@ namespace scribo
///
/// After this merge, the line is tagged with
/// line::Needs_Precise_Stats_Update.
+ ///
+ /// The \p other line is tagged with line::Merged and if \p hide
+ /// is set to 'True', it is set as hidden as well.
//
- void fast_merge(line_info<L>& other);
+ void fast_merge(line_info<L>& other, bool hide = true);
/// This merge updates the component list and recompute from
/// scratch statistics, bounding box and other line attributes.
///
/// After this merge, the line is tagged with line::None.
//
- void precise_merge(line_info<L>& other);
+ void precise_merge(line_info<L>& other, bool hide = true);
/// @}
@@ -126,21 +164,49 @@ namespace scribo
void force_stats_update();
- private:
+ /// Returns the line set holding this element.
+ const line_set<L>& holder() const;
+
+ /// Returns the delta used to compute the extended bbox.
+ int delta_of_line() const;
+
+ private: // Members
+ void copy_data(const line_info<L>& other);
+
+ /// Enlarge the width of a given bbox \p b with a \p delta.
+ mln::box2d enlarge(const mln::box2d& b, int delta) const;
+
+ /// Update bbox and ebbox_ attributes.
+ void update_bbox_and_ebox(line_info<L>& other);
+
+ /// Update the extended bbox.
+ void update_ebbox();
+
+ mln::box2d merged_ebbox(const scribo::line_info<L>& info_l,
+ const scribo::line_info<L>& info);
+
+ void update_components_type(component::Type type);
+
+ private: // Attributes
+ // WARNING: NEVER FORGET TO UPDATE COPY CONSTRUCTOR REDEFINITION!!!!
+
line_id_t id_;
+ bool hidden_;
line::Tag tag_;
mln::box2d bbox_;
+ mln::box2d ebbox_;
mln::util::array<component_id_t> components_;
- // Value relative to the line bbox.
- unsigned baseline_;
- unsigned median_;
+ // Values relative to the line bbox.
+ int baseline_;
+ int meanline_;
// Values relative to the baseline.
- int x_height_;
+ unsigned x_height_;
int d_height_;
int a_height_;
+ // WARNING: NEVER FORGET TO UPDATE COPY CONSTRUCTOR REDEFINITION!!!!
// Character related stats.
unsigned char_space_;
@@ -166,8 +232,10 @@ namespace scribo
bool indented_;
- // Related object links information.
- const object_links<L>* links_;
+ // Line set holding this element.
+ line_set<L> holder_;
+
+ // WARNING: NEVER FORGET TO UPDATE COPY CONSTRUCTOR REDEFINITION!!!!
};
@@ -181,66 +249,111 @@ namespace scribo
template <typename L>
line_info<L>::line_info()
- : id_(0)
+ : id_(0), hidden_(false)
{
}
+ template <typename L>
+ inline
+ void
+ line_info<L>::copy_data(const line_info<L>& other)
+ {
+ // Id MUST NOT change except if this instance have no id.
+ if (! is_valid())
+ {
+ id_ = other.id();
+ hidden_ = other.hidden_;
+ }
+
+ tag_ = other.tag();
+ bbox_ = other.bbox();
+ ebbox_ = other.ebbox();
+ components_ = other.components();
+
+ baseline_ = other.baseline();
+ meanline_ = other.meanline();
+
+ x_height_ = other.x_height();
+ d_height_ = other.d_height();
+ a_height_ = other.a_height();
+
+ char_space_ = other.char_space();
+ char_width_ = other.char_width();
+
+ word_space_ = other.word_space();
+
+ reading_direction_ = other.reading_direction();
+
+ type_ = other.type();
+
+ reverse_video_ = other.reverse_video();
+
+ orientation_ = other.orientation();
+
+ reading_orientation_ = other.reading_orientation();
+
+ indented_ = other.indented();
+
+ holder_ = other.holder();
+ }
+
+ template <typename L>
+ inline
+ line_info<L>::line_info(const line_info<L>& other)
+ : id_(0), hidden_(false)
+ {
+ copy_data(other);
+ }
+
/*!
Exemple:
- ---------------------
- | | | |x| | | | | | | ----> a_height = 4
- ---------------------
- | | | |x| | | | | | |
- ---------------------
- | |x|x|x| | |x|x|x| | ----> Median = 2 ^
- --------------------- |
- | |x| |x| | |x| |x| | | x_height = 3
- --------------------- |
- | |x|x|x| | |x|x|x| | ----> Baseline = 4 v
- ---------------------
- | | | | | | |x| | | |
- ---------------------
- | | | | | | |x| | | | ----> d_height = -2
- ---------------------
-
- All the metrics are computed relatively to the Baseline.
-
- The baseline is defined as an absolute row index.
+ \verbatim
+
+ 0 1 2 3 4 5 6 7 8 9
+ ---------------------
+ 0 | | | |x| | | | | | | ----> a_height = 4
+ ---------------------
+ 1 | | | |x| | | | | | |
+ ---------------------
+ 2 | |x|x|x| | |x|x|x| | ----> Meanline = 2 ^
+ --------------------- |
+ 3 | |x| |x| | |x| |x| | | x_height = 3
+ --------------------- |
+ 4 | |x|x|x| | |x|x|x| | ----> Baseline = 4 v
+ ---------------------
+ 5 | | | | | | |x| | | |
+ ---------------------
+ 6 | | | | | | |x| | | | ----> d_height = -2
+ ---------------------
+
+ \endverbatim
+
+ The baseline, the meanline, the ascent and the descent are defined
+ as an absolute row index.
+
+ All other metrics, such as x_height, are computed relatively to
+ the Baseline.
*/
template <typename L>
- line_info<L>::line_info(const object_links<L>& links,
+ line_info<L>::line_info(const line_set<L>& holder,
const line_id_t& id,
const mln::util::array<component_id_t>& comps)
- : id_(id), tag_(line::None), components_(comps), links_(&links)
+ : id_(id), hidden_(false), tag_(line::None), components_(comps),
+ type_(line::Undefined), holder_(holder)
{
- force_stats_update();
-
-
-// typedef mln_site(L) P;
-// const component_set<L>& comp_set = links_->component_set_();
-// mln::accu::shape::bbox<P> bbox;
-// for_all_elements(i, components_)
-// {
-// unsigned c = components_(i);
-// const box2d& bb = comp_set(c).bbox();
-// // Bounding box.
-// bbox.take(bb);
-// }
-// bbox_ = bbox.to_result();
-
-
+ force_stats_update();
- // FIXME: set valid information for these attributes.
+ // FIXME: set valid information for these attributes in
+ // force_stats_update.
word_space_ = 0;
reading_direction_ = line::LeftToRight;
- type_ = line::Paragraph;
reverse_video_ = false;
orientation_ = 0.;
@@ -251,12 +364,22 @@ namespace scribo
template <typename L>
+ inline
+ line_info<L>&
+ line_info<L>::operator=(const line_info<L>& other)
+ {
+ copy_data(other);
+ return *this;
+ }
+
+ template <typename L>
typename line_info<L>::line_id_t
line_info<L>::id() const
{
return id_;
}
+
template <typename L>
line::Tag
line_info<L>::tag() const
@@ -280,6 +403,13 @@ namespace scribo
return bbox_;
}
+ template <typename L>
+ const mln::box2d&
+ line_info<L>::ebbox() const
+ {
+ return ebbox_;
+ }
+
template <typename L>
const mln::util::array<typename line_info<L>::component_id_t>&
@@ -297,7 +427,7 @@ namespace scribo
template <typename L>
- unsigned
+ int
line_info<L>::baseline() const
{
return baseline_;
@@ -305,14 +435,31 @@ namespace scribo
template <typename L>
- unsigned
- line_info<L>::median() const
+ int
+ line_info<L>::meanline() const
{
- return median_;
+ return meanline_;
}
+
template <typename L>
int
+ line_info<L>::ascent() const
+ {
+ return baseline_ - a_height() + 1;
+ }
+
+
+ template <typename L>
+ int
+ line_info<L>::descent() const
+ {
+ return baseline_ - d_height() + 1;
+ }
+
+
+ template <typename L>
+ unsigned
line_info<L>::x_height() const
{
return x_height_;
@@ -375,6 +522,32 @@ namespace scribo
template <typename L>
+ void
+ line_info<L>::update_components_type(component::Type type)
+ {
+ for_all_elements(i, components_)
+ {
+ unsigned c = components_[i];
+ holder_.components_()(c).update_type(type);
+ }
+ }
+
+
+ template <typename L>
+ void
+ line_info<L>::update_type(line::Type type)
+ {
+ type_ = type;
+
+ // Some line types may involve updating components type as well.
+ if (type == line::Punctuation)
+ update_components_type(component::Punctuation);
+ else if (type == line::Text)
+ update_components_type(component::Character);
+ }
+
+
+ template <typename L>
bool
line_info<L>::reverse_video() const
{
@@ -415,22 +588,170 @@ namespace scribo
template <typename L>
+ bool
+ line_info<L>::hidden() const
+ {
+ return hidden_;
+ }
+
+
+ template <typename L>
void
- line_info<L>::fast_merge(line_info<L>& other)
+ line_info<L>::set_hidden(bool b)
+ {
+ hidden_ = b;
+ }
+
+
+ template <typename L>
+ inline
+ int
+ line_info<L>::delta_of_line() const
+ {
+ return char_width() + 2 * char_space();
+ // FIXME: choose between:
+ // not enough: char_width + char_space
+ // too much: 2 * char_width
+ // looks good: char_width + 2 * char_space
+ }
+
+
+ template <typename L>
+ mln::box2d
+ line_info<L>::enlarge(const mln::box2d& b, int delta) const
+ {
+ mln::box2d b_(mln::point2d(b.pmin().row(), b.pmin().col() - delta),
+ mln::point2d(b.pmax().row(), b.pmax().col() + delta));
+ return b_;
+ }
+
+
+ template <typename L>
+ void
+ line_info<L>::update_ebbox()
+ {
+ int A = a_height_ - x_height_;
+ int D = - d_height_;
+ if (A <= 2 && D > 2)
+ A = D;
+ if (D <= 2 && A > 2)
+ D = A;
+
+ int delta = delta_of_line();
+
+ ebbox_ = mln::make::box2d(meanline_ - A, bbox().pmin().col() - delta,
+ baseline_ + D, bbox().pmax().col() + delta);
+
+ ebbox_.crop_wrt(holder_.components().labeled_image().domain());
+ }
+
+
+ template <typename L>
+ mln::box2d
+ line_info<L>::merged_ebbox(const scribo::line_info<L>& info_l,
+ const scribo::line_info<L>& info)
+ {
+ // line data
+ int
+ baseline_l = info_l.baseline(),
+ d_height = info_l.d_height();
+ unsigned
+ a_height = info_l.a_height(),
+ x_height = info_l.x_height();
+ int A_l = a_height - x_height;
+ int D_l = - d_height;
+ if (A_l <= 2 && D_l > 2)
+ A_l = D_l;
+ if (D_l <= 2 && A_l > 2)
+ D_l = A_l;
+ unsigned delta_l = info_l.delta_of_line();
+ int meanline_l = info_l.meanline();
+
+ // non-line data
+ unsigned delta_ = info.delta_of_line();
+
+ mln::box2d b = mln::make::box2d(// pmin
+ meanline_l - A_l,
+ std::min(info_l.bbox().pmin().col(), info.bbox().pmin().col()) - std::max(delta_l,
delta_),
+ // pmax
+ baseline_l + D_l,
+ std::max(info_l.bbox().pmax().col(), info.bbox().pmax().col()) + std::max(delta_l,
delta_));
+
+ return b;
+ }
+
+
+ template <typename L>
+ void
+ line_info<L>::update_bbox_and_ebox(line_info<L>& other)
+ {
+ // Merging ebboxes depending on the type of the line.
+
+ if (type() == line::Text) // /this/ IS a text line
+ {
+ if (other.type() == line::Text) // /other/ IS a text line.
+ {
+ // Adjusting ebboxes with the highest delta and merging ebboxes.
+ int d_delta = other.delta_of_line() - this->delta_of_line();
+ if (d_delta < 0) // other.delta_of_line() < this->delta_of_line()
+ ebbox_.merge(enlarge(other.ebbox(), - d_delta));
+ else
+ {
+ mln::box2d b = ebbox_;
+ ebbox_ = other.bbox();
+ ebbox_.merge(enlarge(b, d_delta));
+ }
+
+ ebbox_.crop_wrt(holder_.components().labeled_image().domain());
+ }
+ else // /other/ IS NOT a text line.
+ {
+ ebbox_.merge(other.ebbox());
+ ebbox_.merge(merged_ebbox(*this, other));
+ }
+ }
+ else // /this/ is NOT a text line
+ {
+ if (other.type() != line::Text)
+ {
+ std::cerr << "error in 'line_info::update_bbox_and_ebox':"
+ << "Merging two non text lines." << std::endl;
+ std::abort();
+ }
+
+ update_type(line::Text);
+ ebbox_.merge(other.ebbox());
+ ebbox_.merge(merged_ebbox(other, *this));
+ }
+
+ // Merging bboxes.
+ bbox_.merge(other.bbox());
+
+ // Make sure the ebbox is included in the image domain.
+ ebbox_.crop_wrt(holder_.components().labeled_image().domain());
+ }
+
+
+ template <typename L>
+ void
+ line_info<L>::fast_merge(line_info<L>& other, bool hide)
{
tag_ = line::Needs_Precise_Stats_Update;
other.update_tag(line::Merged);
+ other.set_hidden(hide);
+
+ // Update bbox and ebbox
+ update_bbox_and_ebox(other);
- bbox_.merge(other.bbox());
components_.append(other.components());
}
template <typename L>
void
- line_info<L>::precise_merge(line_info<L>& other)
+ line_info<L>::precise_merge(line_info<L>& other, bool hide)
{
- fast_merge(other);
+ fast_merge(other, hide);
force_stats_update();
}
@@ -439,7 +760,7 @@ namespace scribo
line_info<L>::force_stats_update()
{
typedef mln_site(L) P;
- const component_set<L>& comp_set = links_->component_set_();
+ const component_set<L>& comp_set = holder_.components();
// FIXME: int_u<12> may not be enought but we can't use unsigned
// or any other larger types since there is no median
@@ -449,7 +770,7 @@ namespace scribo
typedef mln::value::int_u<12> median_data_t;
typedef mln::accu::stat::median_h<median_data_t> median_t;
median_t
- absolute_median,
+ absolute_meanline,
absolute_baseline,
char_space,
char_width;
@@ -460,11 +781,22 @@ namespace scribo
{
unsigned c = components_(i);
- const box2d& bb = comp_set(c).bbox();
+ const mln::box2d& bb = comp_set(c).bbox();
+
+ // Bounding box.
+ bbox.take(bb);
+
+ // Ignore punctuation for stats computation but not for bbox
+ // computation.
+ if (holder_.components()(c).type() == component::Punctuation)
+ continue;
+
+
// Space between characters.
int space = bb.pmin().col()
- - comp_set((*links_)[c]).bbox().pmax().col();
+ - comp_set(holder_.links()(c)).bbox().pmax().col();
+
// -- Ignore overlapped characters.
if (space > 0)
char_space.take(space);
@@ -474,16 +806,13 @@ namespace scribo
if (bb.width() <= 1000)
char_width.take(bb.width());
- // Median (compute an absolute value, from the top left
+ // Meanline (compute an absolute value, from the top left
// corner of the image).
- absolute_median.take(bb.pmin().row());
+ absolute_meanline.take(bb.pmin().row());
// Baseline (compute an absolute value, from the top left
// corner of the image).
absolute_baseline.take(bb.pmax().row());
-
- // Bounding box.
- bbox.take(bb);
}
// Finalization
@@ -492,7 +821,7 @@ namespace scribo
bbox_ = bbox.to_result();
// Char space
- if (card() == 1)
+ if (char_space.card() < 2)
char_space_ = 0;
else
char_space_ = char_space.to_result();
@@ -506,21 +835,31 @@ namespace scribo
baseline_ = absolute_baseline.to_result();
- median_ = absolute_baseline - absolute_median;
- x_height_ = absolute_baseline - absolute_median + 1;
- d_height_ = absolute_baseline - bbox.to_result().pmax().row();
+ meanline_ = absolute_meanline.to_result();
+ x_height_ = absolute_baseline - absolute_meanline + 1;
+ d_height_ = absolute_baseline - bbox.to_result().pmax().row() + 1;
a_height_ = absolute_baseline - bbox.to_result().pmin().row() + 1;
//FIXME
//
//word_space_ = ...;
//reading_direction_ = ...;
- //type_ = ...;
//reverse_video_ = ...;
//orientation_ = ...;
//reading_orientation_ = ...;
//indented_ = ...;
+
+ update_ebbox();
}
+
+ }
+
+
+ template <typename L>
+ const line_set<L>&
+ line_info<L>::holder() const
+ {
+ return holder_;
}
@@ -531,10 +870,14 @@ namespace scribo
return ostr << "line_info("
<< "id=" << info.id()
<< ", tag=" << info.tag()
+ << ", type=" << info.type()
<< ", bbox=" << info.bbox()
+ << ", ebbox=" << info.ebbox()
<< ", components=" << info.components()
<< ", baseline=" << info.baseline()
- << ", median=" << info.median()
+ << ", meanline=" << info.meanline()
+ << ", ascent=" << info.ascent()
+ << ", descent=" << info.descent()
<< ", x_height=" << info.x_height()
<< ", d_height=" << info.d_height()
<< ", a_height=" << info.a_height()
diff --git a/scribo/core/line_set.hh b/scribo/core/line_set.hh
index 2d846f0..80a79b3 100644
--- a/scribo/core/line_set.hh
+++ b/scribo/core/line_set.hh
@@ -1,4 +1,5 @@
-// Copyright (C) 2009 EPITA Research and Development Laboratory (LRDE)
+// Copyright (C) 2009, 2010 EPITA Research and Development Laboratory
+// (LRDE)
//
// This file is part of Olena.
//
@@ -46,7 +47,9 @@
# include <scribo/core/macros.hh>
# include <scribo/core/line_info.hh>
-# include <scribo/core/line_stats_extra.hh>
+
+# include <scribo/core/object_links.hh>
+# include <scribo/core/object_groups.hh>
namespace scribo
@@ -63,17 +66,24 @@ namespace scribo
struct line_set_data
{
line_set_data();
+ line_set_data(const object_groups<L>& comp_set);
line_set_data(const mln::util::array<scribo::line_info<L> >&
infos,
- const component_set<L>& comp_set);
+ const object_groups<L>& comp_set);
mln::util::array<scribo::line_info<L> > infos_;
- component_set<L> comp_set_;
+ component_set<L> components_;
+ object_links<L> links_;
+ object_groups<L> groups_;
};
} // end of namespace scribo::internal
+ /*! \brief Lines container.
+
+ Line ids start from 1.
+ */
template <typename L>
class line_set
{
@@ -85,12 +95,11 @@ namespace scribo
line_set();
/// Constructor from object groups.
- line_set(const object_links<L>& links, const object_groups<L>&
groups);
+ line_set(const object_groups<L>& groups);
/// @}
/// Compute line stats and fill the underlying information.
- void compute_lines(const object_links<L>& links,
- const object_groups<L>& groups);
+ void compute_lines(const object_groups<L>& groups);
/// Return the line count.
mln_value(L) nelements() const;
@@ -115,7 +124,24 @@ namespace scribo
line_set<L> duplicate() const;
/// Return the underlying component set.
- const component_set<L>& component_set_() const;
+ const component_set<L>& components() const;
+
+ /// Return the underlying component set (non-const version).
+ component_set<L>& components_();
+
+ /// Return the underlying component group.
+ const object_groups<L>& groups() const;
+
+ /// Return the underlying links.
+ const object_links<L>& links() const;
+
+
+ /// Massive line computation
+ /// @{
+
+ void force_stats_update();
+
+ /// @}
/// Internal methods
/// @{
@@ -138,8 +164,7 @@ namespace scribo
template <typename L>
scribo::line_set<L>
- line_set(const object_links<L>& links,
- const object_groups<L>& groups);
+ line_set(const object_groups<L>& groups);
} // End of namespace scribo::make
@@ -165,9 +190,19 @@ namespace scribo
template <typename L>
inline
+ line_set_data<L>::line_set_data(const object_groups<L>& groups)
+ : components_(groups.components()), links_(groups.links()),
+ groups_(groups)
+ {
+ }
+
+
+ template <typename L>
+ inline
line_set_data<L>::line_set_data(const
mln::util::array<scribo::line_info<L> >& infos,
- const component_set<L>& comp_set)
- : infos_(infos), comp_set_(comp_set)
+ const object_groups<L>& groups)
+ : infos_(infos), components_(groups.components()),
+ links_(groups.links()), groups_(groups)
{
}
@@ -182,28 +217,22 @@ namespace scribo
{
}
+
template <typename L>
inline
- line_set<L>::line_set(const object_links<L>& links,
- const object_groups<L>& groups)
+ line_set<L>::line_set(const object_groups<L>& groups)
{
- compute_lines(links, groups);
+ compute_lines(groups);
}
- // FIXME: groups should have a reference to the links data and we
- // should only required groups as argument.
template <typename L>
-// util::array<line_stats_extra>
void
- line_set<L>::compute_lines(const object_links<L>& links,
- const object_groups<L>& groups)
+ line_set<L>::compute_lines(const object_groups<L>& groups)
{
- data_ = new internal::line_set_data<L>();
+ data_ = new internal::line_set_data<L>(groups);
typedef mln_site(L) P;
- data_->comp_set_ = groups.component_set_();
- const component_set<L>& comp_set = groups.component_set_();
mln_value(L) n_groups = groups.nelements() - 1;
mln::fun::i2v::array<mln_value(L)>
@@ -211,93 +240,32 @@ namespace scribo
n_groups, n_groups);
// FIXME: object_groups should store the relation 'group -> comp'.
- // it would avoid the use of accumulator arrays.
-
- // FIXME: int_u<11> may not be enought but we can't use unsigned
- // or any other larger types since there is no median
- // implementation for high quantification types...
-
- // Init.
-// typedef mln::value::int_u<12> median_data_t;
-// typedef mln::accu::stat::median_h<median_data_t> median_t;
-// util::array<median_t>
-// absolute_median(static_cast<unsigned>(n_groups) + 1),
-// absolute_baseline(static_cast<unsigned>(n_groups) + 1),
-// char_space(static_cast<unsigned>(n_groups) + 1),
-// char_width(static_cast<unsigned>(n_groups) + 1);
-
-// util::array<mln::accu::shape::bbox<P> >
-// bbox(static_cast<unsigned>(n_groups) + 1);
+ mln::util::array< mln::util::array<component_id_t> >
+ group_to_comps(unsigned(n_groups) + 1);
- util::array< util::array<component_id_t> >
- comps(static_cast<unsigned>(n_groups) + 1);
// 1st pass - Compute data.
- for (unsigned i = 1; i < packed_groups.size(); ++i)
- if (comp_set(i).is_valid())
+ for_all_comps(i, data_->components_)
+ if (data_->components_(i).is_valid())
{
unsigned group_id = packed_groups(i);
if (group_id != 0) // Is this component part of a group?
{
-// const box2d& bb = comp_set(i).bbox();
-
-// // Space between characters.
-// int space = bb.pmin().col()
-// - comp_set(links[i]).bbox().pmax().col();
-// // -- Ignore overlapped characters.
-// if (space > 0)
-// char_space(group_id).take(space);
-
-// // Character width
-// // -- Ignore too large components.
-// if (bb.width() <= 1000)
-// char_width(group_id).take(bb.width());
-
-// // Median (compute an absolute value, from the top left
-// // corner of the image).
-// absolute_median(group_id).take(bb.pmin().row());
-
-// // Baseline (compute an absolute value, from the top left
-// // corner of the image).
-// absolute_baseline(group_id).take(bb.pmax().row());
-
-// // Bounding box.
-// bbox(group_id).take(bb);
-
// Component id.
- comps(group_id).append(i);
+ group_to_comps(group_id).append(i);
}
}
// 2nd pass - Store data.
- data_->infos_.reserve(groups.nelements());
+ data_->infos_.reserve(group_to_comps.size());
data_->infos_.append(line_info<L>()); // line with id 0 is invalid.
-// util::array<line_stats_extra> stats_extra;
-// stats_extra.reserve(static_cast<unsigned>(n_groups) + 1);
-// stats_extra.append(line_stats_extra());
-
- for (unsigned i = 1; i <= n_groups; ++i)
+ for_all_groups(i, group_to_comps)
{
-
// Add line info.
- line_info<L> info(links, i, comps(i));
+ line_info<L> info(*this, i, group_to_comps(i));
data_->infos_.append(info);
-
-
- // Prepare extra stats to be returned.
-// line_stats_extra stats(absolute_median(i) * absolute_median(i).card(),
-// absolute_median(i).card(),
-// absolute_baseline(i) * absolute_baseline(i).card(),
-// absolute_baseline(i).card(),
-// char_space(i) * char_space(i).card(),
-// char_space(i).card(),
-// char_width(i) * char_width(i).card(),
-// char_width(i).card());
-// stats_extra.append(stats);
}
-
-// return stats_extra;
}
@@ -351,7 +319,7 @@ namespace scribo
{
const F& f = exact(f_);
- for_all_elements(i, data_->infos_)
+ for_all_lines_info(i, data_->infos_)
if (!f(i))
data_->infos_[i].update_tag(tag);
}
@@ -370,11 +338,44 @@ namespace scribo
template <typename L>
inline
const component_set<L>&
- line_set<L>::component_set_() const
+ line_set<L>::components() const
+ {
+ return data_->components_;
+ }
+
+ template <typename L>
+ inline
+ component_set<L>&
+ line_set<L>::components_()
{
- return data_->comp_set_;
+ return data_->components_;
}
+ template <typename L>
+ inline
+ const object_groups<L>&
+ line_set<L>::groups() const
+ {
+ return data_->groups_;
+ }
+
+ template <typename L>
+ inline
+ const object_links<L>&
+ line_set<L>::links() const
+ {
+ return data_->links_;
+ }
+
+ template <typename L>
+ inline
+ void
+ line_set<L>::force_stats_update()
+ {
+ for_all_lines_info(i, data_->infos_)
+ if (data_->infos_(i).tag() == line::Needs_Precise_Stats_Update)
+ data_->infos_(i).force_stats_update();
+ }
template <typename L>
inline
@@ -389,7 +390,7 @@ namespace scribo
void
line_set<L>::init_(const line_set<L>& set)
{
- data_ = new internal::line_set_data<L>(set.infos_(), set.component_set_());
+ data_ = new internal::line_set_data<L>(set.infos_(), set.groups());
}
@@ -400,27 +401,13 @@ namespace scribo
template <typename L>
scribo::line_set<L>
- line_set(const object_links<L>& links,
- const object_groups<L>& groups)
+ line_set(const object_groups<L>& groups)
{
- mln_precondition(exact(ima).is_valid());
- scribo::line_set<L> tmp(links, groups);
+ mln_precondition(groups.is_valid());
+ scribo::line_set<L> tmp(groups);
return tmp;
}
-
-// template <typename L>
-// scribo::line_set<L>
-// line_set(const object_links<L>& links,
-// const object_groups<L>& groups,
-// util::array<line_stats_extra>& line_stats)
-// {
-// mln_precondition(exact(ima).is_valid());
-// scribo::line_set<L> tmp;
-// line_stats = tmp.compute_lines(links, groups);
-// return tmp;
-// }
-
} // end of namespace scribo::make
diff --git a/scribo/core/macros.hh b/scribo/core/macros.hh
index bf2afde..f644db3 100644
--- a/scribo/core/macros.hh
+++ b/scribo/core/macros.hh
@@ -29,8 +29,8 @@
# define for_all_ncomponents(C, NCOMP) \
for (unsigned C = 1; C <= NCOMP; ++C)
-# define for_all_components(C, S) \
- for (unsigned C = 1; C < S.nelements(); ++C)
+// # define for_all_components(C, S)
+// for (unsigned C = 1; C <= S.nelements(); ++C)
# define for_all_elements(E, S) \
for (unsigned E = 0; E < S.nelements(); ++E)
@@ -43,10 +43,16 @@
# define for_all_comp_data(E, S) \
for (unsigned E = 1; E < S.nelements(); ++E)
+# define for_all_links(E, S) \
+ for_all_comp_data(E, S)
+
+# define for_all_groups(E, S) \
+ for_all_comp_data(E, S)
+
# define for_all_lines(E, S) \
for_all_comps(E, S)
-# define for_all_groups(E, S) \
+# define for_all_lines_info(E, S) \
for_all_comp_data(E, S)
#endif // ! SCRIBO_CORE_MACROS_HH
diff --git a/scribo/core/tag/component.hh b/scribo/core/tag/component.hh
index 7998f08..3c061b3 100644
--- a/scribo/core/tag/component.hh
+++ b/scribo/core/tag/component.hh
@@ -44,11 +44,72 @@ namespace scribo
enum Tag
{
None = 0,
- Separator,
Ignored
};
+ enum Type
+ {
+ Undefined = 0,
+ Character,
+ Separator,
+ Noise,
+ Punctuation
+ };
+
+# ifndef MLN_INCLUDE_ONLY
+
+
+ std::ostream&
+ operator<<(std::ostream& ostr, const Tag& tag)
+ {
+ std::string str;
+ switch(tag)
+ {
+ default:
+ case None:
+ str = "None";
+ break;
+ case Ignored:
+ str = "Ignored";
+ break;
+ }
+
+ return ostr << str;
+ }
+
+
+ std::ostream&
+ operator<<(std::ostream& ostr, const Type& type)
+ {
+ std::string str;
+ switch(type)
+ {
+ default:
+ case Undefined:
+ str = "Undefined";
+ break;
+ case Character:
+ str = "Character";
+ break;
+ case Separator:
+ str = "Separator";
+ break;
+ case Noise:
+ str = "Noise";
+ break;
+ case Punctuation:
+ str = "Punctuation";
+ break;
+ }
+
+ return ostr << str;
+ }
+
+
+# endif // ! MLN_INCLUDE_ONLY
+
+
} // end of namespace scribo::component
} // end of namespace scribo
diff --git a/scribo/core/tag/line.hh b/scribo/core/tag/line.hh
index 383e331..f571188 100644
--- a/scribo/core/tag/line.hh
+++ b/scribo/core/tag/line.hh
@@ -43,7 +43,6 @@ namespace scribo
enum Tag
{
None = 0,
- Separator,
Ignored,
Needs_Precise_Stats_Update,
Merged,
@@ -77,7 +76,12 @@ namespace scribo
Header,
Heading,
PageNumber,
- Paragraph
+ Paragraph,
+
+ // These types are not supported by the XSD.
+ Punctuation,
+ Text,
+ Undefined
};
@@ -104,9 +108,6 @@ namespace scribo
case None:
str = "None";
break;
- case Separator:
- str = "Separator";
- break;
case Ignored:
str = "Ignored";
break;
@@ -160,7 +161,6 @@ namespace scribo
case Caption:
str = "caption";
break;
- default:
case Credit:
str = "credit";
break;
@@ -169,17 +169,34 @@ namespace scribo
break;
case Floating:
str = "floating";
+ break;
case Footer:
str = "footer";
+ break;
case Header:
str = "header";
+ break;
case Heading:
str = "heading";
+ break;
case PageNumber:
str = "page-number";
+ break;
case Paragraph:
str = "paragraph";
break;
+
+ // Values unsupported by the XSD
+ case Punctuation:
+ str = "punctuation";
+ break;
+ case Text:
+ str = "text";
+ break;
+ default:
+ case Undefined:
+ str = "undefined";
+ break;
}
return ostr << str;
--
1.5.6.5