Olena-patches
Threads by month
- ----- 2025 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2024 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2023 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2022 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2021 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2020 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2019 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2018 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2017 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2016 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2015 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2014 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2013 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2012 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2011 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2010 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2009 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2008 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2007 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2006 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2005 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2004 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- 9625 discussions
* scribo/core/component_info.hh,
* scribo/core/component_set.hh,
* scribo/core/line_info.hh,
* scribo/draw/line_components.hh: Remove holder data. Prevented
the containers from being freed.
* scribo/io/img/internal/debug_img_visitor.hh,
* scribo/io/img/internal/full_img_visitor.hh,
* scribo/io/img/save.hh,
* scribo/io/xml/internal/page_xml_visitor.hh,
* scribo/io/xml/save.hh,
* scribo/primitive/extract/lines_h_thick_and_thin.hh,
* scribo/primitive/remove/separators.hh,
* scribo/text/merging.hh,
* scribo/text/paragraphs.hh,
* scribo/text/paragraphs_closing.hh,
* scribo/toolchain/internal/content_in_hdoc_functor.hh: Update
code in order to make holder data useless.
---
scribo/ChangeLog | 23 ++++++++++
scribo/scribo/core/component_info.hh | 19 +-------
scribo/scribo/core/component_set.hh | 6 +-
scribo/scribo/core/line_info.hh | 44 +++++++------------
scribo/scribo/draw/line_components.hh | 7 ++-
scribo/scribo/io/img/internal/debug_img_visitor.hh | 37 +++++++++------
scribo/scribo/io/img/internal/full_img_visitor.hh | 34 +++++++++------
scribo/scribo/io/img/save.hh | 6 +-
scribo/scribo/io/xml/internal/page_xml_visitor.hh | 28 ++++++------
scribo/scribo/io/xml/save.hh | 2 +-
scribo/scribo/text/merging.hh | 46 ++++++++++++-------
scribo/scribo/text/paragraphs.hh | 26 ++++++-----
scribo/scribo/text/paragraphs_closing.hh | 2 +-
13 files changed, 155 insertions(+), 125 deletions(-)
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index 5cf6f94..16fe8c8 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,3 +1,26 @@
+2011-06-01 Guillaume Lazzara <z(a)lrde.epita.fr>
+
+ Fix a serious memory leak.
+
+ * scribo/core/component_info.hh,
+ * scribo/core/component_set.hh,
+ * scribo/core/line_info.hh,
+ * scribo/draw/line_components.hh: Remove holder data. Prevented
+ the containers from being freed.
+
+ * scribo/io/img/internal/debug_img_visitor.hh,
+ * scribo/io/img/internal/full_img_visitor.hh,
+ * scribo/io/img/save.hh,
+ * scribo/io/xml/internal/page_xml_visitor.hh,
+ * scribo/io/xml/save.hh,
+ * scribo/primitive/extract/lines_h_thick_and_thin.hh,
+ * scribo/primitive/remove/separators.hh,
+ * scribo/text/merging.hh,
+ * scribo/text/paragraphs.hh,
+ * scribo/text/paragraphs_closing.hh,
+ * scribo/toolchain/internal/content_in_hdoc_functor.hh: Update
+ code in order to make holder data useless.
+
2011-06-07 Guillaume Lazzara <z(a)lrde.epita.fr>
* scribo/text/paragraphs_closing.hh: Revamp code.
diff --git a/scribo/scribo/core/component_info.hh b/scribo/scribo/core/component_info.hh
index 2fa2ad1..b42787f 100644
--- a/scribo/scribo/core/component_info.hh
+++ b/scribo/scribo/core/component_info.hh
@@ -57,8 +57,7 @@ namespace scribo
public:
component_info();
- component_info(const component_set<L>& holder,
- const component_id_t& id,
+ component_info(const component_id_t& id,
const mln::box2d& bbox,
const mln::point2d& mass_center,
unsigned card,
@@ -86,8 +85,6 @@ namespace scribo
bool is_valid() const;
- const component_set<L>& holder() const;
-
protected:
component_id_t id_;
mln::box2d bbox_;
@@ -98,8 +95,6 @@ namespace scribo
component::Tag tag_;
component::Type type_;
-
- component_set<L> holder_;
};
@@ -125,14 +120,13 @@ namespace scribo
template <typename L>
- component_info<L>::component_info(const component_set<L>& holder,
- const component_id_t& id,
+ component_info<L>::component_info(const component_id_t& id,
const mln::box2d& bbox,
const mln::point2d& mass_center,
unsigned card,
component::Type type)
: id_(id), bbox_(bbox), mass_center_(mass_center), card_(card),
- type_(type), holder_(holder)
+ type_(type)
{
if (!bbox.is_valid())
tag_ = component::Ignored;
@@ -232,13 +226,6 @@ namespace scribo
}
- template <typename L>
- const component_set<L>&
- component_info<L>::holder() const
- {
- return holder_;
- }
-
template <typename L>
std::ostream&
diff --git a/scribo/scribo/core/component_set.hh b/scribo/scribo/core/component_set.hh
index 4f4cd61..ded64ae 100644
--- a/scribo/scribo/core/component_set.hh
+++ b/scribo/scribo/core/component_set.hh
@@ -337,7 +337,7 @@ namespace scribo
data_->infos_.append(component_info<L>()); // Component 0, i.e. the background.
for_all_comp_data(i, attribs)
{
- component_info<L> info(*this, i, attribs[i].first(),
+ component_info<L> info(i, attribs[i].first(),
attribs[i].second(), attribs[i].second_accu().nsites());
data_->infos_.append(info);
}
@@ -362,7 +362,7 @@ namespace scribo
data_->infos_.append(component_info<L>()); // Component 0, i.e. the background.
for_all_comp_data(i, attribs)
{
- component_info<L> info(*this, i, attribs[i].first(),
+ component_info<L> info(i, attribs[i].first(),
attribs[i].second(), attribs[i].second_accu().nsites(),
type);
data_->infos_.append(info);
@@ -389,7 +389,7 @@ namespace scribo
data_->infos_.append(component_info<L>()); // Component 0, i.e. the background.
for_all_comp_data(i, attribs)
{
- component_info<L> info(*this, i, attribs[i].first,
+ component_info<L> info(i, attribs[i].first,
attribs[i].second.first, attribs[i].second.second,
type);
data_->infos_.append(info);
diff --git a/scribo/scribo/core/line_info.hh b/scribo/scribo/core/line_info.hh
index 9320416..2913f81 100644
--- a/scribo/scribo/core/line_info.hh
+++ b/scribo/scribo/core/line_info.hh
@@ -140,13 +140,13 @@ namespace scribo
std::string text_;
std::string html_text_;
- // Line set holding this element.
- line_set<L> holder_;
-
// DEBUG
stats< float > meanline_clusters_;
stats< float > baseline_clusters_;
+ component_set<L> components_;
+ object_links<L> links_;
+
private:
void init_();
};
@@ -282,10 +282,6 @@ namespace scribo
/// Force a new computation of statistics.
void force_stats_update();
-
- /// Returns the line set holding this element.
- const line_set<L>& holder() const;
-
/// Returns the delta used to compute the extended bbox.
int delta_of_line() const;
@@ -376,7 +372,7 @@ namespace scribo
line_info_data<L>::line_info_data(const line_set<L>& holder,
const group_info& group)
: hidden_(false), tag_(line::None), component_ids_(group.component_ids()),
- type_(line::Undefined), holder_(holder)
+ type_(line::Undefined), components_(holder.components()), links_(holder.links())
{
init_();
}
@@ -385,7 +381,7 @@ namespace scribo
line_info_data<L>::line_info_data(const line_set<L>& holder,
const mln::util::array<component_id_t>& component_ids)
: hidden_(false), tag_(line::None), component_ids_(component_ids),
- type_(line::Undefined), holder_(holder)
+ type_(line::Undefined), components_(holder.components()), links_(holder.links())
{
init_();
}
@@ -696,7 +692,7 @@ namespace scribo
for_all_elements(i, data_->component_ids_)
{
unsigned c = data_->component_ids_[i];
- data_->holder_.components_()(c).update_type(type);
+ data_->components_(c).update_type(type);
}
}
@@ -855,7 +851,7 @@ namespace scribo
data_->baseline_ + D,
bbox().pmax().col() + delta);
- data_->ebbox_.crop_wrt(data_->holder_.components().labeled_image().domain());
+ data_->ebbox_.crop_wrt(data_->components_.labeled_image().domain());
}
@@ -915,7 +911,7 @@ namespace scribo
data_->ebbox_.merge(enlarge(b, d_delta));
}
- data_->ebbox_.crop_wrt(data_->holder_.components().labeled_image().domain());
+ data_->ebbox_.crop_wrt(data_->components_.labeled_image().domain());
}
else // /other/ IS NOT a text line.
{
@@ -941,7 +937,7 @@ namespace scribo
data_->bbox_.merge(other.bbox());
// Make sure the ebbox is included in the image domain.
- data_->ebbox_.crop_wrt(data_->holder_.components().labeled_image().domain());
+ data_->ebbox_.crop_wrt(data_->components_.labeled_image().domain());
}
@@ -975,13 +971,13 @@ namespace scribo
// Only for the case of two-character words
if (card() == 2)
{
- const component_set<L>& comp_set = data_->holder_.components();
+ const component_set<L>& comp_set = data_->components_;
const unsigned c1 = data_->component_ids_(0);
const unsigned c2 = data_->component_ids_(1);
- if (data_->holder_.components()(c1).type() == component::Punctuation
- || data_->holder_.components()(c2).type() == component::Punctuation)
+ if (data_->components_(c1).type() == component::Punctuation
+ || data_->components_(c2).type() == component::Punctuation)
return false;
const mln::box2d& bb1 = comp_set(c1).bbox();
@@ -1026,7 +1022,7 @@ namespace scribo
unsigned
line_info<L>::get_first_char_height() const
{
- const component_set<L>& comp_set = data_->holder_.components();
+ const component_set<L>& comp_set = data_->components_;
const unsigned c1 = data_->components_(0);
const mln::box2d& bb1 = comp_set(c1).bbox();
@@ -1120,7 +1116,7 @@ namespace scribo
line_info<L>::force_stats_update()
{
typedef mln_site(L) P;
- const component_set<L>& comp_set = data_->holder_.components();
+ const component_set<L>& comp_set = data_->components_;
// Init.
typedef mln::value::int_u<12> median_data_t;
@@ -1222,11 +1218,11 @@ namespace scribo
// (right link) (left link)
// Space between characters.
- if (data_->holder_.links()(c) != c)
+ if (data_->links_(c) != c)
{
int
space = bb.pmin().col()
- - comp_set(data_->holder_.links()(c)).bbox().pmax().col() - 1;
+ - comp_set(data_->links_(c)).bbox().pmax().col() - 1;
// -- Ignore overlapped characters.
if (space > 0)
@@ -1330,14 +1326,6 @@ namespace scribo
template <typename L>
- const line_set<L>&
- line_info<L>::holder() const
- {
- return data_->holder_;
- }
-
-
- template <typename L>
std::ostream&
operator<<(std::ostream& ostr, const line_info<L>& info)
{
diff --git a/scribo/scribo/draw/line_components.hh b/scribo/scribo/draw/line_components.hh
index 878b2c1..12e7489 100644
--- a/scribo/scribo/draw/line_components.hh
+++ b/scribo/scribo/draw/line_components.hh
@@ -46,6 +46,7 @@ namespace scribo
template <typename L, typename I>
void
line_components(Image<I>& input_,
+ const line_set<L>& lines,
const line_info<L>& line,
const mln_value(I)& value);
@@ -56,6 +57,7 @@ namespace scribo
template <typename L, typename I>
void
line_components(Image<I>& input_,
+ const line_set<L>& lines,
const line_info<L>& line,
const mln_value(I)& value)
{
@@ -65,9 +67,8 @@ namespace scribo
mln_precondition(input.is_valid());
- const line_set<L>& holder = line.holder();
- const component_set<L>& comp_set = holder.components();
- const L& labeled_image = holder.components().labeled_image();
+ const component_set<L>& comp_set = lines.components();
+ const L& labeled_image = lines.components().labeled_image();
const mln::util::array<component_id_t>& component_ids = line.component_ids();
diff --git a/scribo/scribo/io/img/internal/debug_img_visitor.hh b/scribo/scribo/io/img/internal/debug_img_visitor.hh
index 520a743..cde16ed 100644
--- a/scribo/scribo/io/img/internal/debug_img_visitor.hh
+++ b/scribo/scribo/io/img/internal/debug_img_visitor.hh
@@ -61,7 +61,8 @@ namespace scribo
{
- class debug_img_visitor : public doc_serializer<debug_img_visitor>
+ template <typename L>
+ class debug_img_visitor : public doc_serializer<debug_img_visitor<L> >
{
public:
// Constructor
@@ -69,21 +70,18 @@ namespace scribo
unsigned output_ratio);
// Visit overloads
- template <typename L>
void visit(const document<L>& doc) const;
- template <typename L>
void visit(const component_info<L>& info) const;
- template <typename L>
void visit(const paragraph_set<L>& parset) const;
- template <typename L>
void visit(const line_info<L>& line) const;
private: // Attributes
mln::image2d<value::rgb8>& output;
unsigned output_ratio;
+ mutable L lbl_;
private: // Methods
box2d compute_bbox(const box2d& b) const;
@@ -94,9 +92,9 @@ namespace scribo
# ifndef MLN_INCLUDE_ONLY
- inline
+ template <typename L>
box2d
- debug_img_visitor::compute_bbox(const box2d& b) const
+ debug_img_visitor<L>::compute_bbox(const box2d& b) const
{
point2d
pmin = b.pmin() / output_ratio,
@@ -106,8 +104,8 @@ namespace scribo
}
- inline
- debug_img_visitor::debug_img_visitor(mln::image2d<value::rgb8>& out,
+ template <typename L>
+ debug_img_visitor<L>::debug_img_visitor(mln::image2d<value::rgb8>& out,
unsigned output_ratio)
: output(out), output_ratio(output_ratio)
{
@@ -119,7 +117,7 @@ namespace scribo
//
template <typename L>
void
- debug_img_visitor::visit(const document<L>& doc) const
+ debug_img_visitor<L>::visit(const document<L>& doc) const
{
// Text
if (doc.has_text())
@@ -129,19 +127,28 @@ namespace scribo
if (doc.has_elements())
{
for_all_comps(e, doc.elements())
+ {
+ lbl_ = doc.elements().labeled_image();
if (doc.elements()(e).is_valid())
doc.elements()(e).accept(*this);
+ }
}
// line seraparators
if (doc.has_vline_seps())
+ {
+ lbl_ = doc.vline_seps_comps().labeled_image();
for_all_comps(c, doc.vline_seps_comps())
if (doc.vline_seps_comps()(c).is_valid())
doc.vline_seps_comps()(c).accept(*this);
+ }
if (doc.has_hline_seps())
+ {
+ lbl_ = doc.hline_seps_comps().labeled_image();
for_all_comps(c, doc.hline_seps_comps())
if (doc.hline_seps_comps()(c).is_valid())
doc.hline_seps_comps()(c).accept(*this);
+ }
}
@@ -150,13 +157,13 @@ namespace scribo
//
template <typename L>
void
- debug_img_visitor::visit(const component_info<L>& info) const
+ debug_img_visitor<L>::visit(const component_info<L>& info) const
{
// Getting component outline
scribo::def::lbl_type id = (scribo::def::lbl_type)info.id().to_equiv();
- const L& lbl = info.holder().labeled_image();
+ //const L& lbl = info.holder().labeled_image();
p_array<point2d>
- par = scribo::util::component_precise_outline(lbl | info.bbox(), id);
+ par = scribo::util::component_precise_outline(lbl_ | info.bbox(), id);
switch (info.type())
{
@@ -187,7 +194,7 @@ namespace scribo
//
template <typename L>
void
- debug_img_visitor::visit(const paragraph_set<L>& parset) const
+ debug_img_visitor<L>::visit(const paragraph_set<L>& parset) const
{
const line_set<L>& lines = parset.lines();
@@ -216,7 +223,7 @@ namespace scribo
template <typename L>
void
- debug_img_visitor::visit(const line_info<L>& line) const
+ debug_img_visitor<L>::visit(const line_info<L>& line) const
{
point2d
pmin = line.bbox().pmin(),
diff --git a/scribo/scribo/io/img/internal/full_img_visitor.hh b/scribo/scribo/io/img/internal/full_img_visitor.hh
index 7b20970..f31eec1 100644
--- a/scribo/scribo/io/img/internal/full_img_visitor.hh
+++ b/scribo/scribo/io/img/internal/full_img_visitor.hh
@@ -58,27 +58,26 @@ namespace scribo
{
- class full_img_visitor : public doc_serializer<full_img_visitor>
+ template <typename L>
+ class full_img_visitor : public doc_serializer<full_img_visitor<L> >
{
public:
// Constructor
full_img_visitor(mln::image2d<value::rgb8>& out);
// Visit overloads
- template <typename L>
void visit(const document<L>& doc) const;
- template <typename L>
void visit(const component_info<L>& info) const;
- template <typename L>
void visit(const paragraph_set<L>& parset) const;
- template <typename L>
void visit(const line_info<L>& line) const;
private: // Attributes
mln::image2d<value::rgb8>& output;
+
+ mutable L lbl_;
};
@@ -86,8 +85,8 @@ namespace scribo
# ifndef MLN_INCLUDE_ONLY
- inline
- full_img_visitor::full_img_visitor(mln::image2d<value::rgb8>& out)
+ template <typename L>
+ full_img_visitor<L>::full_img_visitor(mln::image2d<value::rgb8>& out)
: output(out)
{
mln_assertion(output.is_valid());
@@ -98,7 +97,7 @@ namespace scribo
//
template <typename L>
void
- full_img_visitor::visit(const document<L>& doc) const
+ full_img_visitor<L>::visit(const document<L>& doc) const
{
// Text
if (doc.has_text())
@@ -109,20 +108,29 @@ namespace scribo
{
const component_set<L>& elts = doc.elements();
for_all_comps(e, elts)
+ {
+ lbl_ = elts.labeled_image();
if (elts(e).is_valid())
elts(e).accept(*this);
+ }
}
// line seraparators
if (doc.has_vline_seps())
+ {
+ lbl_ = doc.vline_seps_comps().labeled_image();
for_all_comps(c, doc.vline_seps_comps())
if (doc.vline_seps_comps()(c).is_valid())
doc.vline_seps_comps()(c).accept(*this);
+ }
if (doc.has_hline_seps())
+ {
+ lbl_ = doc.hline_seps_comps().labeled_image();
for_all_comps(c, doc.hline_seps_comps())
if (doc.hline_seps_comps()(c).is_valid())
doc.hline_seps_comps()(c).accept(*this);
+ }
}
@@ -131,13 +139,13 @@ namespace scribo
//
template <typename L>
void
- full_img_visitor::visit(const component_info<L>& info) const
+ full_img_visitor<L>::visit(const component_info<L>& info) const
{
// Getting component outline
scribo::def::lbl_type id = (scribo::def::lbl_type)info.id().to_equiv();
- const L& lbl = info.holder().labeled_image();
+ //const L& lbl = info.holder().labeled_image();
p_array<point2d>
- par = scribo::util::component_precise_outline(lbl | info.bbox(), id);
+ par = scribo::util::component_precise_outline(lbl_ | info.bbox(), id);
switch (info.type())
{
@@ -167,7 +175,7 @@ namespace scribo
//
template <typename L>
void
- full_img_visitor::visit(const paragraph_set<L>& parset) const
+ full_img_visitor<L>::visit(const paragraph_set<L>& parset) const
{
// const line_set<L>& lines = parset.lines();
@@ -187,7 +195,7 @@ namespace scribo
template <typename L>
void
- full_img_visitor::visit(const line_info<L>& line) const
+ full_img_visitor<L>::visit(const line_info<L>& line) const
{
// mln::draw::box(output, line.bbox(), literal::red);
diff --git a/scribo/scribo/io/img/save.hh b/scribo/scribo/io/img/save.hh
index 04f0a3c..a985d07 100644
--- a/scribo/scribo/io/img/save.hh
+++ b/scribo/scribo/io/img/save.hh
@@ -150,7 +150,7 @@ namespace scribo
{
mln_precondition(doc.is_valid());
mln::image2d<value::rgb8> output = duplicate(doc.image());
- scribo::io::img::internal::full_img_visitor f(output);
+ scribo::io::img::internal::full_img_visitor<L> f(output);
doc.accept(f);
return output;
}
@@ -164,7 +164,7 @@ namespace scribo
output(box2d(doc.image().domain().pmin() / 4,
doc.image().domain().pmax() / 4));
data::fill(output, literal::black);
- scribo::io::img::internal::debug_img_visitor f(output, 4);
+ scribo::io::img::internal::debug_img_visitor<L> f(output, 4);
doc.accept(f);
return output;
}
@@ -178,7 +178,7 @@ namespace scribo
output = mln::subsampling::antialiased(doc.image(), 4);
internal::highlight_mask highlight(0.5f);
data::transform_inplace(output, highlight);
- scribo::io::img::internal::debug_img_visitor f(output, 4);
+ scribo::io::img::internal::debug_img_visitor<L> f(output, 4);
doc.accept(f);
return output;
}
diff --git a/scribo/scribo/io/xml/internal/page_xml_visitor.hh b/scribo/scribo/io/xml/internal/page_xml_visitor.hh
index bbdd3e2..8373b02 100644
--- a/scribo/scribo/io/xml/internal/page_xml_visitor.hh
+++ b/scribo/scribo/io/xml/internal/page_xml_visitor.hh
@@ -66,28 +66,27 @@ namespace scribo
Its XSD file is located here:
http://schema.primaresearch.org/PAGE/gts/pagecontent/2010-03-19/pagecontent…
*/
- class page_xml_visitor : public doc_serializer<page_xml_visitor>
+ template <typename L>
+ class page_xml_visitor : public doc_serializer<page_xml_visitor<L> >
{
public:
// Constructor
- page_xml_visitor(std::ofstream& out);
+ page_xml_visitor<L>(std::ofstream& out);
// Visit overloads
- template <typename L>
void visit(const document<L>& doc) const;
- template <typename L>
void visit(const component_set<L>& comp_set) const;
- template <typename L>
void visit(const component_info<L>& info) const;
- template <typename L>
void visit(const paragraph_set<L>& parset) const;
private: // Attributes
std::ofstream& output;
mutable int base_vertical_line_id_;
+
+ mutable L lbl_;
};
@@ -95,8 +94,8 @@ namespace scribo
# ifndef MLN_INCLUDE_ONLY
- inline
- page_xml_visitor::page_xml_visitor(std::ofstream& out)
+ template <typename L>
+ page_xml_visitor<L>::page_xml_visitor(std::ofstream& out)
: output(out)
{
}
@@ -107,7 +106,7 @@ namespace scribo
//
template <typename L>
void
- page_xml_visitor::visit(const document<L>& doc) const
+ page_xml_visitor<L>::visit(const document<L>& doc) const
{
// Make sure there are no duplicate ids for line separators.
// Vertical and horizontal lines are indexed separately from
@@ -143,8 +142,9 @@ namespace scribo
//
template <typename L>
void
- page_xml_visitor::visit(const component_set<L>& comp_set) const
+ page_xml_visitor<L>::visit(const component_set<L>& comp_set) const
{
+ lbl_ = comp_set.labeled_image();
for_all_comps(c, comp_set)
if (comp_set(c).is_valid())
comp_set(c).accept(*this);
@@ -155,13 +155,13 @@ namespace scribo
//
template <typename L>
void
- page_xml_visitor::visit(const component_info<L>& info) const
+ page_xml_visitor<L>::visit(const component_info<L>& info) const
{
// Getting component outline
scribo::def::lbl_type id = (scribo::def::lbl_type)info.id().to_equiv();
- const L& lbl = info.holder().labeled_image();
+ //const L& lbl = info.holder().labeled_image();
p_array<point2d>
- par = scribo::util::component_precise_outline(lbl | info.bbox(), id);
+ par = scribo::util::component_precise_outline(lbl_ | info.bbox(), id);
switch (info.type())
{
@@ -223,7 +223,7 @@ namespace scribo
//
template <typename L>
void
- page_xml_visitor::visit(const paragraph_set<L>& parset) const
+ page_xml_visitor<L>::visit(const paragraph_set<L>& parset) const
{
const line_set<L>& lines = parset.lines();
diff --git a/scribo/scribo/io/xml/save.hh b/scribo/scribo/io/xml/save.hh
index 54afa79..cc6905b 100644
--- a/scribo/scribo/io/xml/save.hh
+++ b/scribo/scribo/io/xml/save.hh
@@ -96,7 +96,7 @@ namespace scribo
template <typename L>
void save_page(const document<L>& doc, std::ofstream& output)
{
- scribo::io::xml::internal::page_xml_visitor f(output);
+ scribo::io::xml::internal::page_xml_visitor<L> f(output);
doc.accept(f);
}
diff --git a/scribo/scribo/text/merging.hh b/scribo/scribo/text/merging.hh
index f1135ed..31a5ed4 100644
--- a/scribo/scribo/text/merging.hh
+++ b/scribo/scribo/text/merging.hh
@@ -224,11 +224,15 @@ namespace scribo
template <typename L>
- bool between_separators(const scribo::line_info<L>& l1,
- const scribo::line_info<L>& l2)
+ bool between_separators(const scribo::line_set<L>& lines,
+ const line_id_t& l1_,
+ const line_id_t& l2_)
{
+ const scribo::line_info<L>& l1 = lines(l1_);
+ const scribo::line_info<L>& l2 = lines(l2_);
+
// No separators found in image.
- mln_precondition(l1.holder().components().has_separators());
+ mln_precondition(lines.components().has_separators());
const box2d& l1_bbox = l1.bbox();
const box2d& l2_bbox = l2.bbox();
@@ -237,7 +241,7 @@ namespace scribo
col1 = l1_bbox.pcenter().col(),
col2 = l2_bbox.pcenter().col();
const mln_ch_value(L, bool)&
- separators = l1.holder().components().separators();
+ separators = lines.components().separators();
// Checking for separators starting from 1 / 4, 3/ 4 and the
// center of the box
@@ -290,9 +294,13 @@ namespace scribo
*/
template <typename L>
- bool lines_can_merge(scribo::line_info<L>& l1,
- const scribo::line_info<L>& l2)
+ bool lines_can_merge(scribo::line_set<L>& lines,
+ const scribo::line_id_t& l1_,
+ const scribo::line_id_t& l2_)
{
+ scribo::line_info<L>& l1 = lines(l1_);
+ scribo::line_info<L>& l2 = lines(l2_);
+
// Parameters.
const float x_ratio_max = 1.7f;
const float baseline_delta_max =
@@ -306,9 +314,9 @@ namespace scribo
const point2d& l1_pmax = l1_bbox.pmax();
const point2d& l2_pmax = l2_bbox.pmax();
- const bool l1_has_separators = l1.holder().components().has_separators();
+ const bool l1_has_separators = lines.components().has_separators();
const bool l1_l2_between_separators = (l1_has_separators) ?
- between_separators(l1, l2) : false;
+ between_separators(lines, l1_, l2_) : false;
const float l_ted_cw = l2.char_width();
const float dx = std::max(l1_pmin.col(), l2_pmin.col())
@@ -424,9 +432,13 @@ namespace scribo
*/
template <typename L>
- bool non_text_and_text_can_merge(scribo::line_info<L>& l_cur, // current
- const scribo::line_info<L>& l_ted) // touched
+ bool non_text_and_text_can_merge(scribo::line_set<L>& lines,
+ const scribo::line_id_t& l_cur_, // current
+ const scribo::line_id_t l_ted_) // touched
{
+ scribo::line_info<L>& l_cur = lines(l_cur_);
+ scribo::line_info<L>& l_ted = lines(l_ted_);
+
if (l_cur.type() == line::Text || l_ted.type() != line::Text)
return false;
// the current object is a NON-textline
@@ -434,8 +446,8 @@ namespace scribo
// Check that there is no separator in between.
- if (l_cur.holder().components().has_separators()
- && between_separators(l_cur, l_ted))
+ if (lines.components().has_separators()
+ && between_separators(lines, l_cur_, l_ted_))
return false;
const box2d& l_cur_bbox = l_cur.bbox();
@@ -744,8 +756,8 @@ namespace scribo
< 5 && std::abs(l_info.meanline() -
mc_info.meanline()) < 5))
&& dx < l_ted_cw && dy < 0
- && not (l_info.holder().components().has_separators()
- && between_separators(l_info, mc_info)))
+ && not (lines.components().has_separators()
+ && between_separators(lines, l, mc)))
l = do_union(lines, l, mc, parent);
// }
@@ -801,7 +813,7 @@ namespace scribo
// could be noise or garbage... So adding new
// criterions could fix this issue.
//
- if (!non_text_and_text_can_merge(lines(l), lines(mc)))
+ if (!non_text_and_text_can_merge(lines, l, mc))
continue;
// Avoid the case when a large title ebbox overlap
@@ -868,7 +880,7 @@ namespace scribo
if (lines(l_).type() == line::Text)
{
// l_ and lcand look like text line chunks.
- if (lines_can_merge(lines(l_), lines(lcand)))
+ if (lines_can_merge(lines, l_, lcand))
{
++count_two_lines_merge;
l_ = do_union(lines, l_, lcand, parent);
@@ -899,7 +911,7 @@ namespace scribo
{
// l_ does NOT looks like a text line chunk.
++count_comp_HITS_txtline;
- if (non_text_and_text_can_merge(lines(l_), lines(lcand)))
+ if (non_text_and_text_can_merge(lines, l_, lcand))
// a petouille merges with a text line?
{
++count_comp_HITS_txtline;
diff --git a/scribo/scribo/text/paragraphs.hh b/scribo/scribo/text/paragraphs.hh
index 8fd89be..e37f610 100644
--- a/scribo/scribo/text/paragraphs.hh
+++ b/scribo/scribo/text/paragraphs.hh
@@ -29,11 +29,15 @@ namespace scribo
template <typename L>
inline
bool
- between_horizontal_separator(const scribo::line_info<L>& l1,
- const scribo::line_info<L>& l2)
+ between_horizontal_separator(const line_set<L>& lines,
+ const line_id_t& l1_,
+ const line_id_t& l2_)
{
+ const line_info<L>& l1 = lines(l1_);
+ const line_info<L>& l2 = lines(l2_);
+
// No separators found in image.
- mln_precondition(l1.holder().components().has_separators());
+ mln_precondition(lines.components().has_separators());
const box2d& l1_bbox = l1.bbox();
const box2d& l2_bbox = l2.bbox();
@@ -42,7 +46,7 @@ namespace scribo
row1 = l1_bbox.pcenter().row(),
row2 = l2_bbox.pcenter().row();
const mln_ch_value(L, bool)&
- separators = l1.holder().components().separators();
+ separators = lines.components().separators();
unsigned row;
unsigned col_ptr;
@@ -153,18 +157,18 @@ namespace scribo
line_id_t right_nbh = right(l);
line_id_t lol_nbh = output(left_nbh);
- const line_info<L>& left_line = lines(left_nbh);
- const line_info<L>& current_line = lines(l);
- const line_info<L>& right_line = lines(right_nbh);
+ // const line_info<L>& left_line = lines(left_nbh);
+ // const line_info<L>& current_line = lines(l);
+ // const line_info<L>& right_line = lines(right_nbh);
- if (right_line.holder().components().has_separators() &&
- between_horizontal_separator(right_line, current_line))
+ if (lines.components().has_separators() &&
+ between_horizontal_separator(lines, right_nbh, l))
{
output(right_nbh) = right_nbh;
right_nbh = l;
}
- if (current_line.holder().components().has_separators() &&
- between_horizontal_separator(current_line, left_line))
+ if (lines.components().has_separators() &&
+ between_horizontal_separator(lines, l, left_nbh))
{
output(l) = l;
left_nbh = l;
diff --git a/scribo/scribo/text/paragraphs_closing.hh b/scribo/scribo/text/paragraphs_closing.hh
index efc5259..2b685df 100644
--- a/scribo/scribo/text/paragraphs_closing.hh
+++ b/scribo/scribo/text/paragraphs_closing.hh
@@ -210,7 +210,7 @@ namespace scribo
const line_id_t& line_id = line_ids(i);
const line_info<L>& current_line = lines(line_id);
- scribo::draw::line_components(debug, current_line, p);
+ scribo::draw::line_components(debug, lines, current_line, p);
// HACK DISCLAIMER : this line is drawn in order to be
// sure that every line will be reduced to a single
--
1.5.6.5
1
0
---
milena/mln/convert/from_to.hxx | 7 +-
milena/mln/draw/polygon.hh | 105 +++++++
milena/mln/util/object_id.hh | 41 +++-
scribo/scribo/core/paragraph_info.hh | 52 +++-
scribo/scribo/core/tag/component.hh | 7 +-
scribo/scribo/core/tag/paragraph.hh | 14 +
scribo/scribo/filter/images_in_paragraph.hh | 8 +-
scribo/scribo/filter/paragraphs_bbox_overlap.hh | 145 +++++++----
scribo/scribo/filter/paragraphs_in_borders.hh | 140 ++++++++++
scribo/scribo/filter/paragraphs_in_image.hh | 29 ++-
scribo/scribo/filter/separators_in_borders.hh | 206 ++++++++++++++
scribo/scribo/filter/separators_in_element.hh | 84 +++---
scribo/scribo/filter/separators_in_paragraph.hh | 92 ++++---
scribo/scribo/filter/separators_vert_in_borders.hh | 143 ++++++++++
scribo/scribo/io/img/internal/debug_img_visitor.hh | 69 ++----
scribo/scribo/io/img/internal/full_img_visitor.hh | 39 ++-
scribo/scribo/io/xml/internal/page_xml_visitor.hh | 23 ++-
.../scribo/io/xml/internal/print_image_coords.hh | 6 +-
.../scribo/io/xml/internal/print_page_preambule.hh | 13 +-
scribo/scribo/io/xml/internal/time_info.hh | 75 +++++
.../postprocessing/images_to_drop_capital.hh | 141 ++++++++++
scribo/scribo/text/paragraphs_closing.hh | 284 ++++++++++++--------
.../toolchain/internal/content_in_hdoc_functor.hh | 58 ++++-
scribo/scribo/util/box_is_included.hh | 74 +++++
scribo/scribo/util/component_precise_outline.hh | 83 ++++--
25 files changed, 1549 insertions(+), 389 deletions(-)
create mode 100644 milena/mln/draw/polygon.hh
create mode 100644 scribo/scribo/filter/paragraphs_in_borders.hh
create mode 100644 scribo/scribo/filter/separators_in_borders.hh
create mode 100644 scribo/scribo/filter/separators_vert_in_borders.hh
create mode 100644 scribo/scribo/io/xml/internal/time_info.hh
create mode 100644 scribo/scribo/postprocessing/images_to_drop_capital.hh
create mode 100644 scribo/scribo/util/box_is_included.hh
diff --git a/milena/mln/convert/from_to.hxx b/milena/mln/convert/from_to.hxx
index cc7cc15..7891e9a 100644
--- a/milena/mln/convert/from_to.hxx
+++ b/milena/mln/convert/from_to.hxx
@@ -1,4 +1,4 @@
-// Copyright (C) 2008, 2009, 2010 EPITA Research and Development
+// Copyright (C) 2008, 2009, 2010, 2011 EPITA Research and Development
// Laboratory (LRDE)
//
// This file is part of Olena.
@@ -84,6 +84,7 @@ namespace mln
namespace util {
template <typename T> class array;
+ template <typename Tag, typename V> class object_id;
}
namespace value {
@@ -473,6 +474,10 @@ namespace mln
from_to(from.second(), to.second());
}
+ // util::object_id<Tag,V> -> V.
+ template <typename Tag, typename V>
+ void from_to_(const util::object_id<Tag,V>& from, V& to_);
+
} // end of namespace mln::convert::over_load
} // end of namespace mln::convert
diff --git a/milena/mln/draw/polygon.hh b/milena/mln/draw/polygon.hh
new file mode 100644
index 0000000..5c6c917
--- /dev/null
+++ b/milena/mln/draw/polygon.hh
@@ -0,0 +1,105 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inpolygon functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef MLN_DRAW_POLYGON_HH
+# define MLN_DRAW_POLYGON_HH
+
+/// \file
+///
+/// Draw a polygon in an image.
+/// \fixme Add specializations for horizontal polygons (use pointers/memset).
+
+# include <mln/core/concept/image.hh>
+# include <mln/core/site_set/p_array.hh>
+# include <mln/draw/line.hh>
+
+
+namespace mln
+{
+
+ namespace draw
+ {
+
+ /*! Draw a polygon at level \p v in image \p ima.
+ *
+ * \param[in,out] ima The image to be drawn.
+ * \param[in] par The polygon site set.
+ * \param[in] v The value to assign to all drawn pixels.
+ *
+ * \pre \p ima has to be initialized.
+ *
+ */
+ template <typename I>
+ void polygon(Image<I>& ima,
+ const p_array<mln_site(I)>& par,
+ const mln_value(I)& v,
+ unsigned output_ratio);
+
+ // \overload
+ template <typename I>
+ void polygon(Image<I>& ima,
+ const p_array<mln_site(I)>& par,
+ const mln_value(I)& v);
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+ template <typename I>
+ void polygon(Image<I>& ima_,
+ const p_array<mln_site(I)>& par,
+ const mln_value(I)& v,
+ unsigned output_ratio)
+ {
+ I& ima = exact(ima_);
+ mln_precondition(ima.is_valid());
+ mln_precondition(par.nelements() > 1);
+
+ mln_site(I) p_last, tmp;
+ mln_piter(p_array<mln_site(I)>) p(par);
+ p_last = par[0] / output_ratio;
+ for_all(p)
+ {
+ tmp = p / output_ratio;
+ draw::line(ima, p_last, tmp, v);
+ p_last = tmp;
+ }
+ }
+
+ template <typename I>
+ void polygon(Image<I>& ima,
+ const p_array<mln_site(I)>& par,
+ const mln_value(I)& v)
+ {
+ polygon(ima, par, v, 1);
+ }
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace mln::draw
+
+} // end of namespace mln
+
+
+#endif // ! MLN_DRAW_POLYGON_HH
diff --git a/milena/mln/util/object_id.hh b/milena/mln/util/object_id.hh
index d7db929..794abc4 100644
--- a/milena/mln/util/object_id.hh
+++ b/milena/mln/util/object_id.hh
@@ -38,6 +38,24 @@
namespace mln
{
+ // Forward declaration
+ namespace util { template <typename Tag, typename V> class object_id; }
+
+ namespace convert
+ {
+
+ namespace over_load
+ {
+
+ // object_id<Tag,V> -> V.
+ template <typename Tag, typename V>
+ void from_to_(const util::object_id<Tag,V>& from, V& to_);
+
+ } // end of namespace mln::convert::over_load
+
+ } // end of namespace mln::convert
+
+
namespace util
{
@@ -90,9 +108,12 @@ namespace mln
bool
operator<(const object_id<Tag,V>& lhs, const object_id<Tag,V>& rhs);
+ } // end of namespace mln::util
# ifndef MLN_INCLUDE_ONLY
+ namespace util
+ {
template <typename Tag, typename V>
inline
@@ -205,10 +226,26 @@ namespace mln
return lhs.value() < rhs.value();
}
+ } // end of namespace mln::util
-# endif // ! MLN_INCLUDE_ONLY
+ namespace convert
+ {
- } // end of namespace mln::util
+ namespace over_load
+ {
+
+ // object_id<Tag,V> -> V.
+ template <typename Tag, typename V>
+ void from_to_(const util::object_id<Tag,V>& from, V& to_)
+ {
+ to_ = from.value();
+ }
+
+ } // end of namespace mln::convert::over_load
+
+ } // end of namespace mln::convert
+
+# endif // ! MLN_INCLUDE_ONLY
} // end of namespace mln
diff --git a/scribo/scribo/core/paragraph_info.hh b/scribo/scribo/core/paragraph_info.hh
index 90db7da..1029913 100644
--- a/scribo/scribo/core/paragraph_info.hh
+++ b/scribo/scribo/core/paragraph_info.hh
@@ -28,6 +28,7 @@
# include <scribo/core/line_info.hh>
# include <scribo/core/line_links.hh>
+# include <scribo/core/tag/paragraph.hh>
# include <mln/util/array.hh>
# include <mln/accu/shape/bbox.hh>
@@ -79,6 +80,11 @@ namespace scribo
void set_delta_baseline(const int delta_baseline);
int delta_baseline() const;
+ void fast_merge(paragraph_info<L>& info);
+
+ void update_tag(paragraph::Tag tag);
+ paragraph::Tag tag() const;
+
private:
mln::util::array<line_id_t> line_ids_;
mln::accu::shape::bbox<mln_site(L)> bbox_;
@@ -88,7 +94,7 @@ namespace scribo
float color_reliability_;
int delta_baseline_;
- bool needs_stats_update_;
+ paragraph::Tag tag_;
bool is_valid_;
};
@@ -103,13 +109,13 @@ namespace scribo
template <typename L>
paragraph_info<L>::paragraph_info()
- : needs_stats_update_(false), is_valid_(false)
+ : tag_(paragraph::None), is_valid_(false)
{
}
template <typename L>
paragraph_info<L>::paragraph_info(const line_links<L>& llinks)
- : llinks_(llinks), needs_stats_update_(false), is_valid_(true)
+ : llinks_(llinks), tag_(paragraph::None), is_valid_(true)
{
}
@@ -121,7 +127,7 @@ namespace scribo
bbox_.take(line.bbox());
// More data may need to be updated!
- needs_stats_update_ = true;
+ tag_ = paragraph::Needs_Precise_Stats_Update;
}
template <typename L>
@@ -206,14 +212,14 @@ namespace scribo
bool
paragraph_info<L>::needs_stats_update() const
{
- return needs_stats_update_;
+ return tag_ == paragraph::Needs_Precise_Stats_Update;
}
template <typename L>
void
paragraph_info<L>::force_stats_update()
{
- if (!needs_stats_update_)
+ if (!needs_stats_update())
return;
const line_set<L>& lines = llinks_.lines();
@@ -259,7 +265,7 @@ namespace scribo
// FIXME: Update paragraph stats
- needs_stats_update_ = false;
+ tag_ = paragraph::None;
}
template <typename L>
@@ -277,6 +283,38 @@ namespace scribo
}
template <typename L>
+ void
+ paragraph_info<L>::fast_merge(paragraph_info<L>& other)
+ {
+ tag_ = paragraph::Needs_Precise_Stats_Update;
+ other.update_tag(paragraph::Merged);
+ other.invalidate();
+
+ // Merge bboxes.
+ bbox_.take(other.bbox());
+
+ // Update delta_baseline
+ // FIXME: delta base line should be updated correctly!!
+ set_delta_baseline(std::max(other.delta_baseline_, delta_baseline_));
+
+ line_ids_.append(other.line_ids());
+ }
+
+ template <typename L>
+ void
+ paragraph_info<L>::update_tag(paragraph::Tag tag)
+ {
+ tag_ = tag;
+ }
+
+ template <typename L>
+ paragraph::Tag
+ paragraph_info<L>::tag() const
+ {
+ return tag_;
+ }
+
+ template <typename L>
bool
operator==(const paragraph_info<L>& lhs, const paragraph_info<L>& rhs)
{
diff --git a/scribo/scribo/core/tag/component.hh b/scribo/scribo/core/tag/component.hh
index dc9db90..d5afb36 100644
--- a/scribo/scribo/core/tag/component.hh
+++ b/scribo/scribo/core/tag/component.hh
@@ -60,7 +60,8 @@ namespace scribo
WhitespaceSeparator,
Noise,
Punctuation,
- Image
+ Image,
+ DropCapital
};
@@ -135,6 +136,8 @@ namespace scribo
break;
case Image:
str = "Image";
+ case DropCapital:
+ str = "DropCapital";
break;
}
@@ -159,6 +162,8 @@ namespace scribo
return Punctuation;
else if (str == "Image")
return Image;
+ else if (str == "DropCapital")
+ return DropCapital;
return Undefined;
}
diff --git a/scribo/scribo/core/tag/paragraph.hh b/scribo/scribo/core/tag/paragraph.hh
index 14dd579..9a11a45 100644
--- a/scribo/scribo/core/tag/paragraph.hh
+++ b/scribo/scribo/core/tag/paragraph.hh
@@ -36,6 +36,20 @@ namespace scribo
// Paragraph id tag.
struct ParagraphId;
+ namespace paragraph
+ {
+
+
+ enum Tag
+ {
+ None = 0,
+ Needs_Precise_Stats_Update,
+ Merged
+ };
+
+
+ } // end of namespace scribo::paragraph
+
} // end of namespace scribo
diff --git a/scribo/scribo/filter/images_in_paragraph.hh b/scribo/scribo/filter/images_in_paragraph.hh
index e05b202..3cf64e1 100644
--- a/scribo/scribo/filter/images_in_paragraph.hh
+++ b/scribo/scribo/filter/images_in_paragraph.hh
@@ -101,12 +101,12 @@ namespace scribo
// => Ignore it.
if (tl && tr && ml && mc && mr && bl && br)
elts(c).update_tag(component::Ignored);
-
- // FIXME: warning this call may produce inconsistent data
- // Ignored components are still in the separator image...
- doc.set_elements(elts);
}
+ // FIXME: warning this call may produce inconsistent data
+ // Ignored components are still in the separator image...
+ doc.set_elements(elts);
+
trace::exiting("scribo::filter::images_in_paragraph");
}
diff --git a/scribo/scribo/filter/paragraphs_bbox_overlap.hh b/scribo/scribo/filter/paragraphs_bbox_overlap.hh
index d40d42f..188a77e 100644
--- a/scribo/scribo/filter/paragraphs_bbox_overlap.hh
+++ b/scribo/scribo/filter/paragraphs_bbox_overlap.hh
@@ -126,70 +126,113 @@ namespace scribo
const box2d& b_ = parset(cur_id).bbox();
- if (parset(cur_id).nlines() > 1)
+ if (parset(cur_id).nlines() > 3)
{
mln::draw::box_plain(billboard, b_, cur_id);
continue;
}
- const unsigned tl = billboard(b_.pmin());
- const unsigned tr = billboard.at_(b_.pmin().row(), b_.pmax().col());
- const unsigned ml = billboard.at_(b_.pcenter().row(), b_.pmin().col());
const unsigned mc = billboard.at_(b_.pcenter().row(), b_.pcenter().col());
- const unsigned mr = billboard.at_(b_.pcenter().row(), b_.pmax().col());
- const unsigned bl = billboard.at_(b_.pmax().row(), b_.pmin().col());
- const unsigned br = billboard(b_.pmax());
-
- typedef std::set<unsigned> set_t;
- set_t labels;
- labels.insert(tl);
- labels.insert(tl);
- labels.insert(tr);
- labels.insert(ml);
- labels.insert(mc);
- labels.insert(mr);
- labels.insert(bl);
- labels.insert(br);
-
- for (set_t::const_iterator it = labels.begin();
- it != labels.end();
- ++it)
- if (not_to_ignore(*it))
- {
- box2d b2 = output(*it).bbox();
- box2d b_i = scribo::util::box_intersection(b_, b2);
- volatile float
- b_ratio = b_i.nsites() / (float)b_.nsites(),
- b2_ratio = b_i.nsites() / (float)b2.nsites();
+ // Box is mostly in the background => do nothing.
+ if (mc == 0)
+ {
+ mln::draw::box_plain(billboard, b_, cur_id);
+ continue;
+ }
+ else // Bbox center is inside another box. Check if we can
+ // merge the current box with it.
+ {
+ // Consider other potential overlapping bboxes.
+ const unsigned tl = billboard(b_.pmin());
+ const unsigned tr = billboard.at_(b_.pmin().row(), b_.pmax().col());
+ const unsigned bl = billboard.at_(b_.pmax().row(), b_.pmin().col());
+ const unsigned br = billboard(b_.pmax());
+
+ typedef std::set<unsigned> set_t;
+ set_t labels;
+ labels.insert(tl);
+ labels.insert(tr);
+ labels.insert(mc);
+ labels.insert(bl);
+ labels.insert(br);
+
+ // FIXME: check that there are at least 3 points (including
+ // the center) in another paragraph.
+
+ // The potential merged bbox is already ignored or the
+ // current bbox overlaps with several bboxes.
+ // => Ignore current bbox .
+ //
+ if (!not_to_ignore(mc)
+ || (labels.size() > 1 && labels.find(0) == labels.end()))
+ {
+ mln::draw::box_plain(billboard, b_, cur_id); // Really?
+ not_to_ignore(cur_id) = false;
+ continue;
+ }
- if (b2_ratio == 1)
+ for (set_t::const_iterator it = labels.begin();
+ it != labels.end(); ++it)
+ if (*it)
{
- // Merge paragraphs and redraw the new bbox.
- output(cur_id).fast_merge(output(*it));
- mln::draw::box_plain(billboard, output(cur_id).bbox(), cur_id);
+ mln_assertion(*it != mc);
+
+ box2d b2 = output(*it).bbox();
+ box2d b_i = scribo::util::box_intersection(b_, b2);
+ volatile float
+ b_ratio = b_i.nsites() / (float)b_.nsites();
+
+ // If the bbox is widely included in another box.
+ if (b_ratio > 0.8)
+ {
+ output(mc).fast_merge(output(cur_id));
+ mln::draw::box_plain(billboard, parset(mc).bbox(), mc);
+ }
+ else
+ mln::draw::box_plain(billboard, parset(cur_id).bbox(), cur_id);
+ break;
}
- else if (b_ratio == 1)
- {
- // Merge paragraphs and redraw the new bbox.
- output(*it).fast_merge(output(cur_id));
- mln::draw::box_plain(billboard, output(*it).bbox(), *it);
- }
- else if ((b_ratio > 0.4 || b2_ratio > 0.9))
- {
- // si b_ est inclus dans une boite dont le nombre de
- // comp > 4 => invalid juste b_ sinon => invalid b_ et
- // b2
- not_to_ignore(cur_id) = false;
-
- if (parset(*it).nlines() < 4)
- not_to_ignore(*it) = false;
- }
- }
- mln::draw::box_plain(billboard, b_, cur_id);
+ }
}
+ // if (not_to_ignore(*it))
+ // {
+ // box2d b2 = output(*it).bbox();
+ // box2d b_i = scribo::util::box_intersection(b_, b2);
+
+ // volatile float
+ // b_ratio = b_i.nsites() / (float)b_.nsites(),
+ // b2_ratio = b_i.nsites() / (float)b2.nsites();
+
+ // if (b2_ratio == 1)
+ // {
+ // // Merge paragraphs and redraw the new bbox.
+ // output(cur_id).fast_merge(output(*it));
+ // mln::draw::box_plain(billboard, output(cur_id).bbox(), cur_id);
+ // }
+ // else if (b_ratio == 1)
+ // {
+ // // Merge paragraphs and redraw the new bbox.
+ // output(*it).fast_merge(output(cur_id));
+ // mln::draw::box_plain(billboard, output(*it).bbox(), *it);
+ // }
+ // else if ((b_ratio > 0.4 || b2_ratio > 0.9))
+ // {
+ // // si b_ est inclus dans une boite dont le nombre de
+ // // comp > 4 => invalid juste b_ sinon => invalid b_ et
+ // // b2
+ // not_to_ignore(cur_id) = false;
+
+ // if (parset(*it).nlines() < 4)
+ // not_to_ignore(*it) = false;
+ // }
+ // }
+
+ // mln::draw::box_plain(billboard, b_, cur_id);
+ // }
+
output.invalidate(not_to_ignore);
for_all_paragraphs(p, output)
diff --git a/scribo/scribo/filter/paragraphs_in_borders.hh b/scribo/scribo/filter/paragraphs_in_borders.hh
new file mode 100644
index 0000000..8953282
--- /dev/null
+++ b/scribo/scribo/filter/paragraphs_in_borders.hh
@@ -0,0 +1,140 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef SCRIBO_FILTER_PARAGRAPHS_IN_BORDERS_HH
+# define SCRIBO_FILTER_PARAGRAPHS_IN_BORDERS_HH
+
+/// \file
+///
+/// Invalidate false positive paragraphs.
+/// \fixme Share same test canvas as text::merging.
+
+
+# include <mln/core/concept/image.hh>
+# include <scribo/core/component_set.hh>
+# include <scribo/core/document.hh>
+# include <scribo/util/box_is_included.hh>
+
+namespace scribo
+{
+
+ namespace filter
+ {
+
+ using namespace mln;
+
+
+ /// Invalidate paragraphs located close to the image borders.
+ ///
+ /// \param[in,out] doc A document structure.
+ ///
+ /// Warning: it does not remove paragraphs from separator
+ /// image. It only invalidate separator components in their
+ /// respective component_set.
+ ///
+ /// \verbatim
+ ///
+ /// -----------
+ /// |_!____!__|
+ /// | ! ! <--------- Paragraphs located in this area are
+ /// | ! ! | invalidated.
+ /// | ! ! |
+ /// |_!____!__|
+ /// | ! ! |
+ /// -----------
+ ///
+ /// \endverbatim
+ //
+ template <typename L>
+ void
+ paragraphs_in_borders(document<L>& doc);
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+ template <typename L>
+ void
+ paragraphs_in_borders(document<L>& doc)
+ {
+ trace::entering("scribo::filter::paragraphs_in_borders");
+
+ mln_precondition(doc.is_valid());
+
+ const mln::image2d<mln::value::rgb8>& ima = doc.image();
+
+ unsigned border_size = std::min(43., 0.02 * ima.domain().width());
+
+ /// pt
+ /// ptl X------X---
+ /// |_!____!__X ptr
+ /// | ! ! |
+ /// | ! ! |
+ /// | ! ! |
+ /// pbl X_!____!__|
+ /// | ! ! |
+ /// --X-------X
+ /// pb pbr
+ ///
+ point2d
+ ptl = ima.domain().pmin(),
+ pt(geom::min_row(ima), geom::max_col(ima) - border_size),
+ ptr(border_size, geom::max_col(ima)),
+ pbr = ima.domain().pmax(),
+ pb(geom::max_row(ima), border_size),
+ pbl(geom::max_row(ima) - border_size, geom::min_col(ima));
+
+ box2d
+ bt(ptl, ptr),
+ br(pt, pbr),
+ bb(pbl, pbr),
+ bl(ptl, pb);
+
+ // Horizontal paragraphs
+ if (doc.has_text())
+ {
+ paragraph_set<L> parset = doc.paragraphs();
+ for_all_paragraphs(p, parset)
+ if (parset(p).is_valid())
+ if (util::box_is_included(parset(p).bbox(), bt)
+ || util::box_is_included(parset(p).bbox(), br)
+ || util::box_is_included(parset(p).bbox(), bb)
+ || util::box_is_included(parset(p).bbox(), bl))
+ {
+ parset(p).invalidate();
+ }
+
+ doc.set_paragraphs(parset);
+ }
+
+ trace::exiting("scribo::filter::paragraphs_in_borders");
+ }
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::filter
+
+} // end of namespace scribo
+
+#endif // ! SCRIBO_FILTER_PARAGRAPHS_IN_BORDERS_HH
diff --git a/scribo/scribo/filter/paragraphs_in_image.hh b/scribo/scribo/filter/paragraphs_in_image.hh
index 1029430..f67b863 100644
--- a/scribo/scribo/filter/paragraphs_in_image.hh
+++ b/scribo/scribo/filter/paragraphs_in_image.hh
@@ -89,8 +89,6 @@ namespace scribo
&& doc.elements()(e).type() == component::Image)
mln::draw::box_plain(billboard, doc.elements()(e).bbox(), true);
- mln::io::pbm::save(billboard, "billboard_parimage.pbm");
-
const paragraph_set<L>& parset = doc.paragraphs();
mln::util::array<bool> not_to_ignore(parset.nelements() + 1, true);
not_to_ignore(0) = false;
@@ -101,15 +99,34 @@ namespace scribo
const bool
tl = billboard(b_.pmin()),
tr = billboard.at_(b_.pmin().row(), b_.pmax().col()),
- ml = billboard.at_(b_.pcenter().row(), b_.pmin().col()),
mc = billboard.at_(b_.pcenter().row(), b_.pcenter().col()),
- mr = billboard.at_(b_.pcenter().row(), b_.pmax().col()),
bl = billboard.at_(b_.pmax().row(), b_.pmin().col()),
br = billboard(b_.pmax());
+ typedef mln::util::set<int> set_t;
+ set_t s;
+ s.insert(tl);
+ s.insert(tr);
+ s.insert(mc);
+ s.insert(bl);
+ s.insert(br);
+
+ if (s.nelements() > 2 || (s.nelements() == 2 && !s.has(0)))
+ continue;
+
// The paragraph is fully included in an image.
- if (tl && tr && ml && mc && mr && bl && br)
- not_to_ignore(cur_id) = false;
+ for_all_elements(e, s)
+ if (s[e] != 0
+ && (mc != 0 && mc == s[e]
+ && ((tl == mc && bl == mc)
+ || (tr == mc && br == mc)
+ || (tl == mc && tr == mc)
+ || (bl == mc && br == mc))))
+ {
+// if (tl && tr && ml && mc && mr && bl && br)
+ not_to_ignore(cur_id) = false;
+ break;
+ }
}
paragraph_set<L> output = parset.duplicate();
diff --git a/scribo/scribo/filter/separators_in_borders.hh b/scribo/scribo/filter/separators_in_borders.hh
new file mode 100644
index 0000000..8ccb6b1
--- /dev/null
+++ b/scribo/scribo/filter/separators_in_borders.hh
@@ -0,0 +1,206 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef SCRIBO_FILTER_SEPARATORS_IN_BORDERS_HH
+# define SCRIBO_FILTER_SEPARATORS_IN_BORDERS_HH
+
+/// \file
+///
+/// Invalidate false positive separators.
+/// \fixme Share same test canvas as text::merging.
+
+
+# include <mln/core/concept/image.hh>
+# include <scribo/core/component_set.hh>
+# include <scribo/core/document.hh>
+# include <scribo/util/box_is_included.hh>
+
+namespace scribo
+{
+
+ namespace filter
+ {
+
+ using namespace mln;
+
+
+ /// Invalidate separators located close to the image borders.
+ ///
+ /// \param[in,out] doc A document structure.
+ ///
+ /// Warning: it does not remove separators from separator
+ /// image. It only invalidate separator components in their
+ /// respective component_set.
+ ///
+ /// \verbatim
+ ///
+ /// -----------
+ /// |_!____!__|
+ /// | ! ! <--------- Separators located in this area are
+ /// | ! ! | invalidated.
+ /// | ! ! |
+ /// |_!____!__|
+ /// | ! ! |
+ /// -----------
+ ///
+ /// \endverbatim
+ //
+ template <typename L>
+ void
+ separators_in_borders(document<L>& doc, float vratio, float hratio);
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+ template <typename L>
+ void
+ separators_in_borders(document<L>& doc, float vratio, float hratio)
+ {
+ trace::entering("scribo::filter::separators_in_borders");
+
+ mln_precondition(doc.is_valid());
+
+ const mln::image2d<mln::value::rgb8>& ima = doc.image();
+
+ // Horizontal separators
+ if (doc.has_hline_seps())
+ {
+ unsigned border_size = hratio * std::min(ima.domain().width(), ima.domain().height());
+
+ /// pt
+ /// ptl X------X---
+ /// |_!____!__X ptr
+ /// | ! ! |
+ /// | ! ! |
+ /// | ! ! |
+ /// pbl X_!____!__|
+ /// | ! ! |
+ /// --X-------X
+ /// pb pbr
+ ///
+ point2d
+ ptl = ima.domain().pmin(),
+ pt(geom::min_row(ima), geom::max_col(ima) - border_size),
+ ptr(border_size, geom::max_col(ima)),
+ pbr = ima.domain().pmax(),
+ pb(geom::max_row(ima), border_size),
+ pbl(geom::max_row(ima) - border_size, geom::min_col(ima));
+
+ box2d
+ bt(ptl, ptr),
+ br(pt, pbr),
+ bb(pbl, pbr),
+ bl(ptl, pb);
+
+
+ component_set<L> hline = doc.hline_seps_comps().duplicate();
+ for_all_comps(c, hline)
+ if (hline(c).is_valid())
+ if (util::box_is_included(hline(c).bbox(), bt)
+ || util::box_is_included(hline(c).bbox(), br)
+ || util::box_is_included(hline(c).bbox(), bb)
+ || util::box_is_included(hline(c).bbox(), bl))
+ {
+ hline(c).update_tag(component::Ignored);
+ }
+
+ // FIXME: warning this call may produce inconsistent data
+ // Ignored components are still in the separator image...
+ doc.set_hline_separators(doc.hline_seps(), hline);
+ }
+
+
+ // Vertical separators
+ if (doc.has_vline_seps())
+ {
+ unsigned border_size = vratio * std::min(ima.domain().width(), ima.domain().height());
+
+ /// pt
+ /// ptl X------X---
+ /// |_!____!__X ptr
+ /// | ! ! |
+ /// | ! ! |
+ /// | ! ! |
+ /// pbl X_!____!__|
+ /// | ! ! |
+ /// --X-------X
+ /// pb pbr
+ ///
+ point2d
+ ptl = ima.domain().pmin(),
+ pt(geom::min_row(ima), geom::max_col(ima) - border_size),
+ ptr(border_size, geom::max_col(ima)),
+ pbr = ima.domain().pmax(),
+ pb(geom::max_row(ima), border_size),
+ pbl(geom::max_row(ima) - border_size, geom::min_col(ima));
+
+ box2d
+ bt(ptl, ptr),
+ br(pt, pbr),
+ bb(pbl, pbr),
+ bl(ptl, pb);
+
+
+ component_set<L> vline = doc.vline_seps_comps().duplicate();
+ for_all_comps(c, vline)
+ if (vline(c).is_valid())
+ {
+ if (util::box_is_included(vline(c).bbox(), bt)
+ || util::box_is_included(vline(c).bbox(), br)
+ || util::box_is_included(vline(c).bbox(), bb)
+ || util::box_is_included(vline(c).bbox(), bl))
+ {
+ // std::cout << vline(c).bbox() << " is included in ";
+ // if (util::box_is_included(vline(c).bbox(), bt))
+ // std::cout << bt << std::endl;
+ // if (util::box_is_included(vline(c).bbox(), br))
+ // std::cout << br << std::endl;
+ // if (util::box_is_included(vline(c).bbox(), bb))
+ // std::cout << bb << std::endl;
+ // if (util::box_is_included(vline(c).bbox(), bl))
+ // std::cout << bl << std::endl;
+
+ vline(c).update_tag(component::Ignored);
+ }
+ // else
+ // {
+ // std::cout << vline(c).bbox() << " is not included in " << bt << " - " << br << " - " << bb << " - " << bl << std::endl;
+ // }
+ }
+ // FIXME: warning this call may produce inconsistent data
+ // Ignored components are still in the separator image...
+ doc.set_vline_separators(doc.vline_seps(), vline);
+ }
+
+ trace::exiting("scribo::filter::separators_in_borders");
+ }
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::filter
+
+} // end of namespace scribo
+
+#endif // ! SCRIBO_FILTER_SEPARATORS_IN_BORDERS_HH
diff --git a/scribo/scribo/filter/separators_in_element.hh b/scribo/scribo/filter/separators_in_element.hh
index 228d82f..a8b0ebb 100644
--- a/scribo/scribo/filter/separators_in_element.hh
+++ b/scribo/scribo/filter/separators_in_element.hh
@@ -90,26 +90,26 @@ namespace scribo
{
component_set<L> hline = doc.hline_seps_comps().duplicate();
for_all_comps(c, hline)
- {
- const mln_box(L)& b_ = hline(c).bbox();
-
- const bool tl = billboard(b_.pmin());
- const bool tr = billboard.at_(b_.pmin().row(), b_.pmax().col());
- const bool ml = billboard.at_(b_.pcenter().row(), b_.pmin().col());
- const bool mc = billboard.at_(b_.pcenter().row(), b_.pcenter().col());
- const bool mr = billboard.at_(b_.pcenter().row(), b_.pmax().col());
- const bool bl = billboard.at_(b_.pmax().row(), b_.pmin().col());
- const bool br = billboard(b_.pmax());
-
- // This separator is included in an element (picture, drawing...)
- // => Ignore it.
- if (tl && tr && ml && mc && mr && bl && br)
- hline(c).update_tag(component::Ignored);
-
- // FIXME: warning this call may produce inconsistent data
- // Ignored components are still in the separator image...
- doc.set_hline_separators(doc.hline_seps(), hline);
- }
+ if (hline(c).is_valid())
+ {
+ const mln_box(L)& b_ = hline(c).bbox();
+
+ const bool tl = billboard(b_.pmin());
+ const bool tr = billboard.at_(b_.pmin().row(), b_.pmax().col());
+ const bool ml = billboard.at_(b_.pcenter().row(), b_.pmin().col());
+ const bool mc = billboard.at_(b_.pcenter().row(), b_.pcenter().col());
+ const bool mr = billboard.at_(b_.pcenter().row(), b_.pmax().col());
+ const bool bl = billboard.at_(b_.pmax().row(), b_.pmin().col());
+ const bool br = billboard(b_.pmax());
+
+ // This separator is included in an element (picture, drawing...)
+ // => Ignore it.
+ if (tl && tr && ml && mc && mr && bl && br)
+ hline(c).update_tag(component::Ignored);
+ }
+ // FIXME: warning this call may produce inconsistent data
+ // Ignored components are still in the separator image...
+ doc.set_hline_separators(doc.hline_seps(), hline);
}
// Vertical separators
@@ -117,29 +117,29 @@ namespace scribo
{
component_set<L> vline = doc.vline_seps_comps().duplicate();
for_all_comps(c, vline)
- {
- const mln_box(L)& b_ = vline(c).bbox();
-
- const bool tl = billboard(b_.pmin());
- const bool tr = billboard.at_(b_.pmin().row(), b_.pmax().col());
- const bool ml = billboard.at_(b_.pcenter().row(), b_.pmin().col());
- const bool mc = billboard.at_(b_.pcenter().row(), b_.pcenter().col());
- const bool mr = billboard.at_(b_.pcenter().row(), b_.pmax().col());
- const bool bl = billboard.at_(b_.pmax().row(), b_.pmin().col());
- const bool br = billboard(b_.pmax());
-
- // This separator is included in an element (picture, drawing...)
- // => Ignore it.
- if (tl && tr && ml && mc && mr && bl && br)
- vline(c).update_tag(component::Ignored);
-
- // FIXME: warning this call may produce inconsistent data
- // Ignored components are still in the separator image...
- doc.set_vline_separators(doc.vline_seps(), vline);
- }
-
- trace::exiting("scribo::filter::separators_in_element");
+ if (vline(c).is_valid())
+ {
+ const mln_box(L)& b_ = vline(c).bbox();
+
+ const bool tl = billboard(b_.pmin());
+ const bool tr = billboard.at_(b_.pmin().row(), b_.pmax().col());
+ const bool ml = billboard.at_(b_.pcenter().row(), b_.pmin().col());
+ const bool mc = billboard.at_(b_.pcenter().row(), b_.pcenter().col());
+ const bool mr = billboard.at_(b_.pcenter().row(), b_.pmax().col());
+ const bool bl = billboard.at_(b_.pmax().row(), b_.pmin().col());
+ const bool br = billboard(b_.pmax());
+
+ // This separator is included in an element (picture, drawing...)
+ // => Ignore it.
+ if (tl && tr && ml && mc && mr && bl && br)
+ vline(c).update_tag(component::Ignored);
+ }
+ // FIXME: warning this call may produce inconsistent data
+ // Ignored components are still in the separator image...
+ doc.set_vline_separators(doc.vline_seps(), vline);
}
+
+ trace::exiting("scribo::filter::separators_in_element");
}
# endif // ! MLN_INCLUDE_ONLY
diff --git a/scribo/scribo/filter/separators_in_paragraph.hh b/scribo/scribo/filter/separators_in_paragraph.hh
index 3e7a150..7c157be 100644
--- a/scribo/scribo/filter/separators_in_paragraph.hh
+++ b/scribo/scribo/filter/separators_in_paragraph.hh
@@ -58,14 +58,14 @@ namespace scribo
///
template <typename L>
void
- separators_in_paragraph(document<L>& doc);
+ separators_in_paragraph(document<L>& doc, unsigned hmax_size, unsigned vmax_size);
# ifndef MLN_INCLUDE_ONLY
template <typename L>
void
- separators_in_paragraph(document<L>& doc)
+ separators_in_paragraph(document<L>& doc, unsigned hmax_size, unsigned vmax_size)
{
trace::entering("scribo::filter::separators_in_paragraph");
@@ -90,26 +90,28 @@ namespace scribo
{
component_set<L> hline = doc.hline_seps_comps().duplicate();
for_all_comps(c, hline)
- {
- const mln_box(L)& b_ = hline(c).bbox();
-
- const bool tl = billboard(b_.pmin());
- const bool tr = billboard.at_(b_.pmin().row(), b_.pmax().col());
- const bool ml = billboard.at_(b_.pcenter().row(), b_.pmin().col());
- const bool mc = billboard.at_(b_.pcenter().row(), b_.pcenter().col());
- const bool mr = billboard.at_(b_.pcenter().row(), b_.pmax().col());
- const bool bl = billboard.at_(b_.pmax().row(), b_.pmin().col());
- const bool br = billboard(b_.pmax());
-
- // This separator is included in an element (picture, drawing...)
- // => Ignore it.
- if (tl && tr && ml && mc && mr && bl && br)
- hline(c).update_tag(component::Ignored);
-
- // FIXME: warning this call may produce inconsistent data
- // Ignored components are still in the separator image...
- doc.set_hline_separators(doc.hline_seps(), hline);
- }
+ if (hline(c).is_valid())
+ {
+ const mln_box(L)& b_ = hline(c).bbox();
+
+ const bool tl = billboard(b_.pmin());
+ const bool tr = billboard.at_(b_.pmin().row(), b_.pmax().col());
+ const bool ml = billboard.at_(b_.pcenter().row(), b_.pmin().col());
+ const bool mc = billboard.at_(b_.pcenter().row(), b_.pcenter().col());
+ const bool mr = billboard.at_(b_.pcenter().row(), b_.pmax().col());
+ const bool bl = billboard.at_(b_.pmax().row(), b_.pmin().col());
+ const bool br = billboard(b_.pmax());
+
+ // This separator is included in an element (picture, drawing...)
+ // => Ignore it.
+ if (tl && tr && ml && mc && mr && bl && br
+ && hline(c).bbox().width() < hmax_size)
+ hline(c).update_tag(component::Ignored);
+ }
+
+ // FIXME: warning this call may produce inconsistent data
+ // Ignored components are still in the separator image...
+ doc.set_hline_separators(doc.hline_seps(), hline);
}
// Vertical separators
@@ -117,29 +119,31 @@ namespace scribo
{
component_set<L> vline = doc.vline_seps_comps().duplicate();
for_all_comps(c, vline)
- {
- const mln_box(L)& b_ = vline(c).bbox();
-
- const bool tl = billboard(b_.pmin());
- const bool tr = billboard.at_(b_.pmin().row(), b_.pmax().col());
- const bool ml = billboard.at_(b_.pcenter().row(), b_.pmin().col());
- const bool mc = billboard.at_(b_.pcenter().row(), b_.pcenter().col());
- const bool mr = billboard.at_(b_.pcenter().row(), b_.pmax().col());
- const bool bl = billboard.at_(b_.pmax().row(), b_.pmin().col());
- const bool br = billboard(b_.pmax());
-
- // This separator is included in an element (picture, drawing...)
- // => Ignore it.
- if (tl && tr && ml && mc && mr && bl && br)
- vline(c).update_tag(component::Ignored);
-
- // FIXME: warning this call may produce inconsistent data
- // Ignored components are still in the separator image...
- doc.set_vline_separators(doc.vline_seps(), vline);
- }
-
- trace::exiting("scribo::filter::separators_in_paragraph");
+ if (vline(c).is_valid())
+ {
+ const mln_box(L)& b_ = vline(c).bbox();
+
+ const bool tl = billboard(b_.pmin());
+ const bool tr = billboard.at_(b_.pmin().row(), b_.pmax().col());
+ const bool ml = billboard.at_(b_.pcenter().row(), b_.pmin().col());
+ const bool mc = billboard.at_(b_.pcenter().row(), b_.pcenter().col());
+ const bool mr = billboard.at_(b_.pcenter().row(), b_.pmax().col());
+ const bool bl = billboard.at_(b_.pmax().row(), b_.pmin().col());
+ const bool br = billboard(b_.pmax());
+
+ // This separator is included in an element (picture, drawing...)
+ // => Ignore it.
+ if (tl && tr && ml && mc && mr && bl && br
+ && vline(c).bbox().height() < vmax_size)
+ vline(c).update_tag(component::Ignored);
+ }
+
+ // FIXME: warning this call may produce inconsistent data
+ // Ignored components are still in the separator image...
+ doc.set_vline_separators(doc.vline_seps(), vline);
}
+
+ trace::exiting("scribo::filter::separators_in_paragraph");
}
# endif // ! MLN_INCLUDE_ONLY
diff --git a/scribo/scribo/filter/separators_vert_in_borders.hh b/scribo/scribo/filter/separators_vert_in_borders.hh
new file mode 100644
index 0000000..4a9e806
--- /dev/null
+++ b/scribo/scribo/filter/separators_vert_in_borders.hh
@@ -0,0 +1,143 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef SCRIBO_FILTER_SEPARATORS_VERT_IN_BORDERS_HH
+# define SCRIBO_FILTER_SEPARATORS_VERT_IN_BORDERS_HH
+
+/// \file
+///
+/// Invalidate false positive separators.
+/// \fixme Share same test canvas as text::merging.
+
+
+# include <mln/core/concept/image.hh>
+# include <scribo/core/component_set.hh>
+# include <scribo/core/document.hh>
+# include <scribo/util/box_is_included.hh>
+
+
+namespace scribo
+{
+
+ namespace filter
+ {
+
+ using namespace mln;
+
+
+ /// Invalidate separators located close to the image borders.
+ ///
+ /// \param[in,out] doc A document structure.
+ ///
+ /// Warning: it does not remove separators from separator
+ /// image. It only invalidate separator components in their
+ /// respective component_set.
+ ///
+ /// \verbatim
+ ///
+ /// -----------
+ /// |_!____!__|
+ /// | ! ! <--------- Separators located in this area are
+ /// | ! ! | invalidated.
+ /// | ! ! |
+ /// |_!____!__|
+ /// | ! ! |
+ /// -----------
+ ///
+ /// \endverbatim
+ //
+ template <typename L>
+ void
+ separators_vert_in_borders(document<L>& doc);
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+ template <typename L>
+ void
+ separators_vert_in_borders(document<L>& doc)
+ {
+ trace::entering("scribo::filter::separators_vert_in_borders");
+
+ mln_precondition(doc.is_valid());
+
+ const mln::image2d<mln::value::rgb8>& ima = doc.image();
+
+ float border_size = std::min(43., 0.05 * ima.domain().width());
+
+ /// pt
+ /// ptl X------X---
+ /// |_!____!__X ptr
+ /// | ! ! |
+ /// | ! ! |
+ /// | ! ! |
+ /// pbl X_!____!__|
+ /// | ! ! |
+ /// --X-------X
+ /// pb pbr
+ ///
+ point2d
+ ptl = ima.domain().pmin(),
+ pt(geom::min_row(ima), geom::max_col(ima) - border_size),
+ ptr(border_size, geom::max_col(ima)),
+ pbr = ima.domain().pmax(),
+ pb(geom::max_row(ima), border_size),
+ pbl(geom::max_row(ima) - border_size, geom::min_col(ima));
+
+ box2d
+ bt(ptl, ptr),
+ br(pt, pbr),
+ bb(pbl, pbr),
+ bl(ptl, pb);
+
+ // Vertical separators
+ if (doc.has_vline_seps())
+ {
+ component_set<L> vline = doc.vline_seps_comps().duplicate();
+ for_all_comps(c, vline)
+ if (vline(c).is_valid())
+ if (util::box_is_included(vline(c).bbox(), bt)
+ || util::box_is_included(vline(c).bbox(), br)
+ || util::box_is_included(vline(c).bbox(), bb)
+ || util::box_is_included(vline(c).bbox(), bl))
+ {
+ vline(c).update_tag(component::Ignored);
+ }
+
+ // FIXME: warning this call may produce inconsistent data
+ // Ignored components are still in the separator image...
+ doc.set_vline_separators(doc.vline_seps(), vline);
+ }
+
+ trace::exiting("scribo::filter::separators_vert_in_borders");
+ }
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::filter
+
+} // end of namespace scribo
+
+#endif // ! SCRIBO_FILTER_SEPARATORS_VERT_IN_BORDERS_HH
diff --git a/scribo/scribo/io/img/internal/debug_img_visitor.hh b/scribo/scribo/io/img/internal/debug_img_visitor.hh
index a4715f5..520a743 100644
--- a/scribo/scribo/io/img/internal/debug_img_visitor.hh
+++ b/scribo/scribo/io/img/internal/debug_img_visitor.hh
@@ -34,7 +34,7 @@
# include <mln/core/image/image2d.hh>
# include <mln/value/rgb8.hh>
-# include <mln/draw/site_set.hh>
+# include <mln/draw/polygon.hh>
# include <mln/subsampling/antialiased.hh>
# include <mln/morpho/elementary/gradient_external.hh>
@@ -45,7 +45,7 @@
# include <scribo/util/component_precise_outline.hh>
# include <scribo/io/img/internal/draw_edges.hh>
-
+# include <scribo/text/paragraphs_closing.hh>
namespace scribo
@@ -85,9 +85,6 @@ namespace scribo
mln::image2d<value::rgb8>& output;
unsigned output_ratio;
- // FIXME: we would like its type to be L.
- mutable image2d<scribo::def::lbl_type> lbl_sub;
-
private: // Methods
box2d compute_bbox(const box2d& b) const;
};
@@ -97,7 +94,7 @@ namespace scribo
# ifndef MLN_INCLUDE_ONLY
- inline
+ inline
box2d
debug_img_visitor::compute_bbox(const box2d& b) const
{
@@ -131,50 +128,11 @@ namespace scribo
// Page elements (Pictures, ...)
if (doc.has_elements())
{
- // Prepare element edges
-
- // L lbl = duplicate(doc.elements().labeled_image());
- // for_all_comps(c, doc.elements())
- // if (! doc.elements()(c).is_valid())
- // data::fill(((lbl | doc.elements()(c).bbox()).rw()
- // | (pw::value(lbl) == pw::cst(c))).rw(), 0);
-
- // const L& lbl = doc.lines().components().labeled_image();
- // lbl_sub = mln::subsampling::antialiased(lbl, output_ratio);
-
- // mln::io::pgm::save(data::wrap(value::int_u8(), lbl), "lbl.pgm");
- // mln::io::pgm::save(data::wrap(value::int_u8(), lbl_sub), "lbl_sub.pgm");
-
- // // FIXME: UGLY! Too slow!
- // scribo::def::lbl_type nlabels;
- // component_set<L> elts = primitive::extract::components(
- // data::convert(bool(), lbl_sub),
- // c8(),
- // nlabels);
-
- // Preserving elements tags
- // if (doc.elements().nelements() != elts.nelements())
- // {
- // std::cerr << "Warnig: could not preserve element type in "
- // << "img debug output." << std::endl;
- // std::cerr << "The number of non text element has changed while "
- // << "subsampling images : "
- // << doc.elements().nelements() << " vs "
- // << elts.nelements() << std::endl;
- // }
- // else
- // for_all_comps(c, doc.elements())
- // {
- // elts(c).update_type(doc.elements()(c).type());
- // elts(c).update_tag(doc.elements()(c).tag());
- // }
-
for_all_comps(e, doc.elements())
if (doc.elements()(e).is_valid())
doc.elements()(e).accept(*this);
}
-
// line seraparators
if (doc.has_vline_seps())
for_all_comps(c, doc.vline_seps_comps())
@@ -198,23 +156,28 @@ namespace scribo
scribo::def::lbl_type id = (scribo::def::lbl_type)info.id().to_equiv();
const L& lbl = info.holder().labeled_image();
p_array<point2d>
- par = scribo::util::component_precise_outline(
- extend((lbl | info.bbox()) | (pw::value(lbl) == pw::cst(id)), 0));
+ par = scribo::util::component_precise_outline(lbl | info.bbox(), id);
switch (info.type())
{
case component::HorizontalLineSeparator:
case component::VerticalLineSeparator:
{
- mln::draw::site_set(output, par, literal::cyan, output_ratio);
+ mln::draw::polygon(output, par, literal::cyan, output_ratio);
}
break;
+ case component::DropCapital:
+ {
+ mln::draw::polygon(output, par, literal::violet, output_ratio);
+ }
+ break;
+
default:
case component::Image:
{
- mln::draw::site_set(output, par, literal::orange, output_ratio);
+ mln::draw::polygon(output, par, literal::orange, output_ratio);
}
break;
}
@@ -228,6 +191,9 @@ namespace scribo
{
const line_set<L>& lines = parset.lines();
+ // Prepare paragraph outlines.
+ L par_clo = text::paragraphs_closing(parset);
+
for_all_paragraphs(p, parset)
if (parset(p).is_valid())
{
@@ -235,10 +201,11 @@ namespace scribo
for_all_paragraph_lines(lid, line_ids)
{
- line_id_t l = line_ids(lid);
- lines(l).accept(*this);
+ line_id_t l = line_ids(lid);
+ lines(l).accept(*this);
}
+ // Adjust bbox to output image size.
box2d b = compute_bbox(parset(p).bbox());
b.enlarge(1);
b.crop_wrt(output.domain());
diff --git a/scribo/scribo/io/img/internal/full_img_visitor.hh b/scribo/scribo/io/img/internal/full_img_visitor.hh
index f2c0f5c..7b20970 100644
--- a/scribo/scribo/io/img/internal/full_img_visitor.hh
+++ b/scribo/scribo/io/img/internal/full_img_visitor.hh
@@ -34,7 +34,7 @@
# include <mln/core/image/image2d.hh>
# include <mln/value/rgb8.hh>
-# include <mln/draw/site_set.hh>
+# include <mln/draw/polygon.hh>
# include <mln/draw/box.hh>
# include <scribo/core/internal/doc_serializer.hh>
@@ -137,22 +137,27 @@ namespace scribo
scribo::def::lbl_type id = (scribo::def::lbl_type)info.id().to_equiv();
const L& lbl = info.holder().labeled_image();
p_array<point2d>
- par = scribo::util::component_precise_outline((lbl | info.bbox()) | (pw::value(lbl) == pw::cst(id)));
+ par = scribo::util::component_precise_outline(lbl | info.bbox(), id);
switch (info.type())
{
case component::HorizontalLineSeparator:
case component::VerticalLineSeparator:
{
- mln::draw::site_set(output, par, literal::cyan);
+ mln::draw::polygon(output, par, literal::cyan);
}
break;
+ case component::DropCapital:
+ {
+ mln::draw::polygon(output, par, literal::violet);
+ }
+ break;
default:
case component::Image:
{
- mln::draw::site_set(output, par, literal::orange);
+ mln::draw::polygon(output, par, literal::orange);
}
break;
}
@@ -164,20 +169,18 @@ namespace scribo
void
full_img_visitor::visit(const paragraph_set<L>& parset) const
{
- const line_set<L>& lines = parset.lines();
+ // const line_set<L>& lines = parset.lines();
+
+ // Prepare paragraph outlines.
+ L par_clo = text::paragraphs_closing(parset);
for_all_paragraphs(p, parset)
if (parset(p).is_valid())
{
- const mln::util::array<line_id_t>& line_ids = parset(p).line_ids();
+ p_array<point2d> par = scribo::util::component_precise_outline(par_clo
+ | parset(p).bbox(), p);
- for_all_paragraph_lines(lid, line_ids)
- {
- line_id_t l = line_ids(lid);
- lines(l).accept(*this);
- }
-
- mln::draw::box(output, parset(p).bbox(), literal::blue);
+ mln::draw::polygon(output, par, literal::blue);
}
}
@@ -186,7 +189,15 @@ namespace scribo
void
full_img_visitor::visit(const line_info<L>& line) const
{
- mln::draw::box(output, line.bbox(), literal::red);
+// mln::draw::box(output, line.bbox(), literal::red);
+
+ point2d
+ pmin = line.bbox().pmin(),
+ pmax = line.bbox().pmax();
+ pmax.row() = line.baseline();
+ pmin.row() = line.baseline();
+
+ mln::draw::line(output, pmin, pmax, literal::red);
}
#endif // MLN_INCLUDE_ONLY
diff --git a/scribo/scribo/io/xml/internal/page_xml_visitor.hh b/scribo/scribo/io/xml/internal/page_xml_visitor.hh
index 1659a85..bbdd3e2 100644
--- a/scribo/scribo/io/xml/internal/page_xml_visitor.hh
+++ b/scribo/scribo/io/xml/internal/page_xml_visitor.hh
@@ -40,6 +40,7 @@
# include <scribo/io/xml/internal/print_box_coords.hh>
# include <scribo/io/xml/internal/print_page_preambule.hh>
# include <scribo/io/xml/internal/compute_text_colour.hh>
+# include <scribo/text/paragraphs_closing.hh>
namespace scribo
@@ -160,8 +161,7 @@ namespace scribo
scribo::def::lbl_type id = (scribo::def::lbl_type)info.id().to_equiv();
const L& lbl = info.holder().labeled_image();
p_array<point2d>
- par = scribo::util::component_precise_outline(
- extend((lbl | info.bbox()) | (pw::value(lbl) == pw::cst(id)), 0));
+ par = scribo::util::component_precise_outline(lbl | info.bbox(), id);
switch (info.type())
{
@@ -189,6 +189,17 @@ namespace scribo
break;
}
+ case component::DropCapital:
+ {
+ output << " <TextRegion id=\"r" << id << "\" "
+ << " Type=\"Drop_Capital\">"
+ << std::endl;
+
+ internal::print_image_coords(output, par, " ");
+
+ output << " </TextRegion>" << std::endl;
+ break;
+ }
default:
case component::Image:
@@ -216,9 +227,15 @@ namespace scribo
{
const line_set<L>& lines = parset.lines();
+ // Prepare paragraph outlines.
+ L par_clo = text::paragraphs_closing(parset);
+
for_all_paragraphs(p, parset)
if (parset(p).is_valid())
{
+ p_array<mln_site(L)> par = scribo::util::component_precise_outline(par_clo
+ | parset(p).bbox(), p);
+
const mln::util::array<line_id_t>& line_ids = parset(p).line_ids();
// FIXME: compute that information on the whole paragraph
@@ -245,7 +262,7 @@ namespace scribo
// <Unicode></Unicode>
// </TextEquiv>
- internal::print_box_coords(output, parset(p).bbox(), " ");
+ internal::print_image_coords(output, par, " ");
output << " </TextRegion>" << std::endl;
}
diff --git a/scribo/scribo/io/xml/internal/print_image_coords.hh b/scribo/scribo/io/xml/internal/print_image_coords.hh
index ebfe402..41c4e30 100644
--- a/scribo/scribo/io/xml/internal/print_image_coords.hh
+++ b/scribo/scribo/io/xml/internal/print_image_coords.hh
@@ -69,15 +69,15 @@ namespace scribo
const S& b = exact(b_);
mln_precondition(b.is_valid());
- ostr << sc << "<coords>" << std::endl;
+ ostr << sc << "<Coords>" << std::endl;
mln_piter(S) p(b);
for_all(p)
- ostr << sp << "<point x=\"" << p.col()
+ ostr << sp << "<Point x=\"" << p.col()
<< "\" y=\"" << p.row() << "\"/>"
<< std::endl;
- ostr << sc << "</coords>" << std::endl;
+ ostr << sc << "</Coords>" << std::endl;
}
diff --git a/scribo/scribo/io/xml/internal/print_page_preambule.hh b/scribo/scribo/io/xml/internal/print_page_preambule.hh
index bcb6b33..9f00c60 100644
--- a/scribo/scribo/io/xml/internal/print_page_preambule.hh
+++ b/scribo/scribo/io/xml/internal/print_page_preambule.hh
@@ -30,10 +30,10 @@
///
/// \brief Print PAGE XML format preambule.
-# include <ctime>
# include <fstream>
# include <mln/core/alias/box2d.hh>
# include <scribo/core/document.hh>
+# include <scribo/io/xml/internal/time_info.hh>
namespace scribo
{
@@ -75,17 +75,10 @@ namespace scribo
else
output << "<PcGts>" << std::endl;
-
- time_t cur_time = time(NULL);
- tm * time_struct;
- time_struct = localtime(&cur_time);
- char time_info[55];
- strftime(time_info, 55, "%Y-%m-%dT%H:%M:%S", time_struct);
-
output << " <Metadata>" << std::endl;
output << " <Creator>LRDE</Creator>" << std::endl;
- output << " <Created>" << time_info << "</Created>" << std::endl;
- output << " <LastChange>" << time_info << "</LastChange>" << std::endl;
+ output << " <Created>" << time_info() << "</Created>" << std::endl;
+ output << " <LastChange>" << time_info() << "</LastChange>" << std::endl;
output << " <Comments>Generated by Scribo from Olena.</Comments>" << std::endl;
output << " </Metadata>" << std::endl;
diff --git a/scribo/scribo/io/xml/internal/time_info.hh b/scribo/scribo/io/xml/internal/time_info.hh
new file mode 100644
index 0000000..6adc49a
--- /dev/null
+++ b/scribo/scribo/io/xml/internal/time_info.hh
@@ -0,0 +1,75 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef SCRIBO_IO_XML_INTERNAL_TIME_INFO_HH
+# define SCRIBO_IO_XML_INTERNAL_TIME_INFO_HH
+
+/// \file
+///
+/// Get formated time info for PAGE XML format.
+
+# include <ctime>
+
+namespace scribo
+{
+
+ namespace io
+ {
+
+ namespace xml
+ {
+
+ namespace internal
+ {
+
+ using namespace mln;
+
+# ifndef MLN_INCLUDE_ONLY
+
+ std::string time_info()
+ {
+ time_t cur_time = time(NULL);
+ tm * time_struct;
+ time_struct = localtime(&cur_time);
+ char time_info_[55];
+ strftime(time_info_, 55, "%Y-%m-%dT%H:%M:%S", time_struct);
+ std::string output(time_info_);
+
+ return output;
+ }
+
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::io::xml::internal
+
+ } // end of namespace scribo::io::xml
+
+ } // end of namespace scribo::io
+
+} // end of namespace scribo
+
+
+#endif // ! SCRIBO_IO_XML_INTERNAL_TIME_INFO_HH
diff --git a/scribo/scribo/postprocessing/images_to_drop_capital.hh b/scribo/scribo/postprocessing/images_to_drop_capital.hh
new file mode 100644
index 0000000..ca76609
--- /dev/null
+++ b/scribo/scribo/postprocessing/images_to_drop_capital.hh
@@ -0,0 +1,141 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef SCRIBO_POSTPROCESSING_IMAGES_TO_DROP_CAPITAL_HH
+# define SCRIBO_POSTPROCESSING_IMAGES_TO_DROP_CAPITAL_HH
+
+/// \file
+///
+/// Set type for specific images to Drop Capital component.
+/// \fixme Share same test canvas as text::merging.
+
+
+# include <mln/core/concept/image.hh>
+# include <scribo/core/component_set.hh>
+# include <scribo/core/document.hh>
+
+
+namespace scribo
+{
+
+ namespace postprocessing
+ {
+
+ using namespace mln;
+
+
+ /// Set type for specific images to Drop Capital component.
+ ///
+ /// \param[in] separators A paragraph set.
+ ///
+ /// \return A doc with images tagged as dropped capital is such
+ /// images have been found.
+ //
+ template <typename L>
+ void
+ images_to_drop_capital(document<L>& doc);
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+ template <typename L>
+ void
+ images_to_drop_capital(document<L>& doc)
+ {
+ trace::entering("scribo::postprocessing::images_to_drop_capital");
+
+ mln_precondition(doc.is_valid());
+
+ if (! doc.has_elements())
+ return;
+
+ mln_ch_value(L,bool) billboard;
+ initialize(billboard, doc.image());
+ data::fill(billboard, false);
+
+ for_all_comps(p, doc.paragraphs())
+ if (doc.paragraphs()(p).is_valid())
+ mln::draw::box_plain(billboard, doc.paragraphs()(p).bbox(), true);
+
+ float min_img_size = 0.2 * (doc.image().domain().width()
+ + doc.image().domain().height());
+
+ component_set<L> elts = doc.elements();
+ for_all_comps(c, elts)
+ if (elts(c).is_valid() && elts(c).type() == component::Image)
+ {
+ const mln_box(L)& b_ = elts(c).bbox();
+
+ const bool tl = billboard(b_.pmin());
+ const bool tr = billboard.at_(b_.pmin().row(), b_.pmax().col());
+ const bool mb = billboard.at_(b_.pmax().row(), b_.pcenter().col());
+ const bool mc = billboard.at_(b_.pcenter().row(), b_.pcenter().col());
+ const bool mr = billboard.at_(b_.pcenter().row(), b_.pmax().col());
+ const bool bl = billboard.at_(b_.pmax().row(), b_.pmin().col());
+ const bool br = billboard(b_.pmax());
+
+ typedef mln::util::set<int> set_t;
+ set_t s;
+ s.insert(tl);
+ s.insert(tr);
+ s.insert(mb);
+ s.insert(mc);
+ s.insert(mr);
+ s.insert(bl);
+ s.insert(br);
+
+ if (s.nelements() > 2 || (s.nelements() == 2 && !s.has(0)))
+ continue;
+
+ float elt_size = elts(c).bbox().width() + elts(c).bbox().height();
+ for_all_elements(e, s)
+ if (s[e] != 0
+ && (mc != 0 && mc == s[e]
+ && ((tl == mc && bl == mc)
+ || (tr == mc && br == mc)
+ || (bl == mc && br == mc)
+ || (tl == mc && tr == mc)
+ || (br == mc && mr == mc && mb == mc)))
+ && (elt_size < min_img_size))
+ {
+ elts(c).update_type(component::DropCapital);
+ break;
+ }
+ }
+
+ // FIXME: warning this call may produce inconsistent data
+ // Ignored components are still in the separator image...
+ doc.set_elements(elts);
+
+ trace::exiting("scribo::postprocessing::images_to_drop_capital");
+ }
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::postprocessing
+
+} // end of namespace scribo
+
+#endif // ! SCRIBO_POSTPROCESSING_IMAGES_TO_DROP_CAPITAL_HH
diff --git a/scribo/scribo/text/paragraphs_closing.hh b/scribo/scribo/text/paragraphs_closing.hh
index ec1d5c8..efc5259 100644
--- a/scribo/scribo/text/paragraphs_closing.hh
+++ b/scribo/scribo/text/paragraphs_closing.hh
@@ -31,6 +31,9 @@
///
/// Paragraphs closing using CRLA.
+# include <mln/draw/line.hh>
+# include <scribo/draw/line_components.hh>
+
namespace scribo
{
@@ -39,162 +42,217 @@ namespace scribo
using namespace mln;
- template< typename L, typename V >
- void
- paragraphs_closing(image2d<V>& output,
- const paragraph_set<L>& par_set,
- const box2d& domain);
+ template< typename L>
+ mln_concrete(L)
+ paragraphs_closing(const paragraph_set<L>& parset);
+
# ifndef MLN_INCLUDE_ONLY
- template< typename V >
- inline
- void horizontal_CRLA(const image2d<V>& input,
- image2d<V>& output,
- const mln::util::array<int>& deltas)
+ namespace internal
{
- mln_piter(image2d<V>) p(input.domain());
- int count = 0;
- unsigned nrows = input.nrows();
- unsigned ncols = input.ncols();
- V last_pixel_value = 0;
- for (unsigned i = 0; i < nrows; ++i)
+ template<typename L>
+ inline
+ void horizontal_CRLA(const Image<L>& input_,
+ Image<L>& output_,
+ const mln::util::array<int>& deltas)
{
- for (unsigned j = 0; j < ncols; ++j)
- {
- const V& current_pixel = input.at_(i, j);
+ const L& input = exact(input_);
+ L& output = exact(output_);
+ mln_precondition(input.is_valid());
+ mln_precondition(output.is_valid());
+
+ mln_piter(L) p(input.domain());
+ int count = 0;
+ unsigned nrows = input.nrows();
+ unsigned ncols = input.ncols();
+ mln_value(L) last_pixel_value = 0;
- if (!current_pixel)
+ for (unsigned i = 0; i < nrows; ++i)
+ {
+ for (unsigned j = 0; j < ncols; ++j)
{
- if (last_pixel_value)
+ const mln_value(L)& current_pixel = input.at_(i, j);
+
+ if (!current_pixel)
{
- unsigned k = j + 1;
- for (; !input.at_(i, k) && (k < ncols); ++k);
+ if (last_pixel_value)
+ {
+ unsigned k = j + 1;
+ for (; !(input.at_(i, k)) && (k < ncols); ++k);
- count = k - j;
- const int threshold = deltas(last_pixel_value - 1);
+ count = k - j;
+ const int threshold = deltas(last_pixel_value);
- if (last_pixel_value == input.at_(i, k) && count < threshold)
- for (unsigned l = j; l <= k; ++l)
- output.at_(i, l) = last_pixel_value;
+ if (last_pixel_value == input.at_(i, k) && count < threshold)
+ for (unsigned l = j; l <= k; ++l)
+ output.at_(i, l) = last_pixel_value;
- j = k;
- last_pixel_value = 0;
+ j = k;
+ last_pixel_value = 0;
+ }
+ }
+ else
+ {
+ output.at_(i, j) = current_pixel;
+ last_pixel_value = current_pixel;
}
- }
- else
- {
- output.at_(i, j) = current_pixel;
- last_pixel_value = current_pixel;
}
}
}
- }
- template< typename V >
- inline
- void vertical_CRLA(const image2d<V>& input,
- image2d<V>& output,
- const mln::util::array<int>& deltas)
- {
- mln_piter(image2d<V>) p(input.domain());
- int count = 0;
- unsigned nrows = input.nrows();
- unsigned ncols = input.ncols();
- V last_pixel_value = 0;
-
- for (unsigned j = 0; j < ncols; ++j)
+ template<typename L>
+ inline
+ void vertical_CRLA(const Image<L>& input_,
+ Image<L>& output_,
+ const mln::util::array<int>& deltas)
{
- for (unsigned i = 0; i < nrows; ++i)
- {
- const V& current_pixel = input.at_(i, j);
-
- if (!current_pixel)
- {
- if (last_pixel_value)
- {
- unsigned k = i + 1;
- for (; !input.at_(k, j) && (k < nrows); ++k);
+ const L& input = exact(input_);
+ L& output = exact(output_);
+ mln_precondition(input.is_valid());
+ mln_precondition(output.is_valid());
- count = k - i;
- const int threshold = deltas(last_pixel_value - 1);
+ mln_piter(L) p(input.domain());
+ int count = 0;
+ unsigned nrows = input.nrows();
+ unsigned ncols = input.ncols();
+ mln_value(L) last_pixel_value = 0;
- if (last_pixel_value == input.at_(k, j)
- && count < threshold)
- for (unsigned l = i; l <= k; ++l)
- output.at_(l, j) = last_pixel_value;
+ for (unsigned j = 0; j < ncols; ++j)
+ {
+ for (unsigned i = 0; i < nrows; ++i)
+ {
+ const mln_value(L)& current_pixel = input.at_(i, j);
- i = k;
- last_pixel_value = 0;
+ if (!current_pixel)
+ {
+ if (last_pixel_value)
+ {
+ unsigned k = i + 1;
+ for (; !(input.at_(k, j)) && (k < nrows); ++k);
+
+ count = k - i;
+ const int threshold = deltas(last_pixel_value);
+
+ if (last_pixel_value == input.at_(k, j)
+ && count < threshold)
+ for (unsigned l = i; l <= k; ++l)
+ output.at_(l, j) = last_pixel_value;
+
+ i = k;
+ last_pixel_value = 0;
+ }
+ }
+ else
+ {
+ output.at_(i, j) = current_pixel;
+ last_pixel_value = current_pixel;
}
- }
- else
- {
- output.at_(i, j) = current_pixel;
- last_pixel_value = current_pixel;
}
}
}
- }
- template< typename V >
- inline
- void CRLA(const image2d<V>& input,
- image2d<V>& output,
- const mln::util::array<int>& deltas,
- const mln::util::array<int>& deltas_factor)
- {
- horizontal_CRLA(input, output, deltas_factor);
- vertical_CRLA(output, output, deltas);
- horizontal_CRLA(output, output, deltas_factor);
- }
+ template<typename L>
+ inline
+ void CRLA(const Image<L>& input,
+ Image<L>& output,
+ const mln::util::array<int>& deltas,
+ const mln::util::array<int>& deltas_factor)
+ {
+ horizontal_CRLA(input, output, deltas_factor);
+
+ debug::logger().log_image(debug::AuxiliaryResults,
+ output,
+ "paragraph_closing_horizontal_CRLA");
+
+
+ vertical_CRLA(output, output, deltas);
+
+ debug::logger().log_image(debug::AuxiliaryResults,
+ output,
+ "paragraph_closing_vertical_CRLA");
+
+ horizontal_CRLA(output, output, deltas_factor);
+ }
+
+ } // end of namespace scribo::text::internal
+
- template< typename L, typename V >
- void
- paragraphs_closing(image2d<V>& output,
- const paragraph_set<L>& par_set,
- const box2d& domain)
+ template<typename L>
+ mln_concrete(L)
+ paragraphs_closing(const paragraph_set<L>& parset)
{
trace::entering("scribo::text::paragraphs_closing");
- image2d<V> debug(domain);
+ // FIXME: 'debug' may be useless.
+ mln_concrete(L) output, debug;
+ initialize(output, parset.lines().components().labeled_image());
+ initialize(debug, output);
- mln::util::array<int> deltas;
- deltas.reserve(par_set.nelements());
- mln::util::array<int> deltas_factor;
- deltas_factor.reserve(par_set.nelements());
+ mln::util::array<int> deltas(parset.nelements() + 1, 0);
+ mln::util::array<int> deltas_factor(parset.nelements() + 1, 0);
data::fill(debug, 0);
data::fill(output, 0);
- const line_set<L>& lines = par_set.lines();
+ const line_set<L>& lines = parset.lines();
- for_all_paragraphs(p, par_set)
- {
- const paragraph_info<L>& current_par = par_set(p);
- const mln::util::array<line_id_t>& line_ids = current_par.line_ids();
- const unsigned nelements = line_ids.nelements();
-
- for (unsigned i = 0; i < nelements; ++i)
+ for_all_paragraphs(p, parset)
+ if (parset(p).is_valid())
{
- const line_id_t& line_id = line_ids(i);
- const line_info<L>& current_line = lines(line_id);
+ const paragraph_info<L>& current_par = parset(p);
+ const mln::util::array<line_id_t>& line_ids = current_par.line_ids();
- draw::line_components(debug, current_line, p);
- }
+ line_id_t last_id = line_ids[0];
+ for_all_elements(i, line_ids)
+ {
+ const line_id_t& line_id = line_ids(i);
+ const line_info<L>& current_line = lines(line_id);
+
+ scribo::draw::line_components(debug, current_line, p);
+
+ // HACK DISCLAIMER : this line is drawn in order to be
+ // sure that every line will be reduced to a single
+ // component after closing. It is necessary to reduce a
+ // paragraph to one component in order to extract its
+ // outline correctly for xml/debug output.
+ component_id_t last_comp = lines(line_id).component_ids()(0);
+ for_all_elements(i, lines(line_id).component_ids())
+ {
+ const unsigned c = lines(line_id).component_ids()(i);
+ mln::draw::line(debug,
+ lines.components()(c).mass_center(),
+ lines.components()(last_comp).mass_center(),
+ p);
+ last_comp = c;
+ }
- int delta_baseline = current_par.delta_baseline();
+ // mln::draw::line(debug, current_line.bbox().pcenter(), lines(last_id).bbox().pcenter(), p);
+ // last_id = line_id;
+ }
- if (delta_baseline % 2 == 0)
+ int delta_baseline = current_par.delta_baseline();
+
+ if (delta_baseline % 2 == 0)
--delta_baseline;
- deltas.append(delta_baseline);
- deltas_factor.append(3 * delta_baseline);
- }
- CRLA(debug, output, deltas, deltas_factor);
+ deltas(p) = 2 * delta_baseline; // Vertical
+ deltas_factor(p) = 3 * delta_baseline; // Horizontal
+ }
+
+ debug::logger().log_image(debug::AuxiliaryResults,
+ debug,
+ "paragraph_closing_input_CRLA");
+
+ internal::CRLA(debug, output, deltas, deltas_factor);
+
+ debug::logger().log_image(debug::Results,
+ output,
+ "paragraph_closing");
- trace::exiting("scribo::draw::line_components");
+ trace::exiting("scribo::text::paragraphs_closing");
+ return output;
}
# endif
diff --git a/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh b/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh
index e0c5b50..24d24a3 100644
--- a/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh
+++ b/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh
@@ -53,8 +53,10 @@
# include <scribo/filter/objects_small.hh>
# include <scribo/filter/paragraphs_bbox_overlap.hh>
# include <scribo/filter/paragraphs_in_image.hh>
+# include <scribo/filter/paragraphs_in_borders.hh>
# include <scribo/filter/separators_in_element.hh>
# include <scribo/filter/separators_in_paragraph.hh>
+# include <scribo/filter/separators_in_borders.hh>
# include <scribo/filter/images_in_paragraph.hh>
# include <scribo/primitive/group/from_single_link.hh>
@@ -66,6 +68,8 @@
# include <scribo/preprocessing/denoise_fg.hh>
+# include <scribo/postprocessing/images_to_drop_capital.hh>
+
# include <scribo/text/recognition.hh>
# include <scribo/text/merging.hh>
# include <scribo/text/link_lines.hh>
@@ -84,6 +88,7 @@
# include <scribo/io/xml/save.hh>
+#include <scribo/io/img/save.hh>
namespace scribo
{
@@ -201,12 +206,22 @@ namespace scribo
// Vertical and horizontal separators
{
+ unsigned closing_size = std::min(0.01 * doc.image().domain().width(),
+ 0.01 * doc.image().domain().height());
+ win::hline2d hl(closing_size);
+
+ // Apply a closing::structural in order to disconnected
+ // parts of a single separator.
mln_ch_value(I,bool)
vseparators = preprocessing::rotate_90(
- primitive::extract::lines_h_thick_and_thin(
- preprocessing::rotate_90(processed_image), 101, 3, 0.2, 0.6, 10), false),
- hseparators = primitive::extract::lines_h_thick_and_thin(
- processed_image, 101, 3);
+ morpho::closing::structural(
+ primitive::extract::lines_h_thick_and_thin(
+ preprocessing::rotate_90(processed_image),
+ 101, 3, 0.2, 0.6, 10), hl), false),
+
+ hseparators = morpho::closing::structural(
+ primitive::extract::lines_h_thick_and_thin(
+ processed_image, 101, 3), hl);
doc.set_vline_separators(vseparators);
doc.set_hline_separators(hseparators);
@@ -509,9 +524,11 @@ namespace scribo
on_new_progress_label("Filtering paragraphs");
- parset = filter::paragraphs_bbox_overlap(parset);
+ paragraph_set<L> parset_f = filter::paragraphs_bbox_overlap(parset);
+ doc.set_paragraphs(parset_f);
- doc.set_paragraphs(parset);
+ // parset = filter::paragraphs_bbox_overlap(parset);
+ // doc.set_paragraphs(parset);
on_progress();
@@ -540,16 +557,38 @@ namespace scribo
on_progress();
+// TEMPORARY DEBUG
+ on_new_progress_label("Saving debug data");
+ doc.set_paragraphs(parset);
+ scribo::io::img::save(doc, "debug_wo_filter.png", scribo::io::img::DebugWoImage);
+ scribo::io::img::save(doc, "full_wo_filter.png", scribo::io::img::DebugWithImage);
+ doc.set_paragraphs(parset_f);
+ on_progress();
+// END OF TEMPORARY DEBUG
+
on_new_progress_label("Cleanup miscellaneous false positive");
filter::separators_in_element(doc);
- filter::separators_in_paragraph(doc);
+ filter::separators_in_paragraph(doc, 81, 121);
+ filter::separators_in_borders(doc, 0.05, 0.02);
+
filter::paragraphs_in_image(doc);
- filter::images_in_paragraph(doc);
+ filter::paragraphs_in_borders(doc);
on_progress();
+ on_new_progress_label("Rebuild extracted images");
+ elements = scribo::primitive::extract::non_text_hdoc(doc, closing_size);
+ doc.set_elements(elements);
+
+ on_progress();
+
+ on_new_progress_label("Tag images as drop capital");
+
+ postprocessing::images_to_drop_capital(doc);
+
+ on_progress();
// Saving results
if (save_doc_as_xml)
@@ -564,6 +603,9 @@ namespace scribo
on_end();
+
+ sleep(10);
+
return doc;
}
diff --git a/scribo/scribo/util/box_is_included.hh b/scribo/scribo/util/box_is_included.hh
new file mode 100644
index 0000000..dc3f791
--- /dev/null
+++ b/scribo/scribo/util/box_is_included.hh
@@ -0,0 +1,74 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef SCRIBO_UTIL_BOX_IS_INCLUDED_HH
+# define SCRIBO_UTIL_BOX_IS_INCLUDED_HH
+
+/// \file
+///
+/// Check whether a box is included in another one.
+
+
+#include <mln/core/site_set/box.hh>
+
+namespace scribo
+{
+
+ namespace util
+ {
+ using namespace mln;
+
+ /// \brief Check whether a box is included in another one.
+ ///
+ /// \return true if \p lhs is included in \p rhs.
+ //
+ template <typename P>
+ bool
+ box_is_included(const box<P>& lhs, const box<P>& rhs);
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+ template <typename P>
+ bool
+ box_is_included(const box<P>& lhs, const box<P>& rhs)
+ {
+ trace::entering("scribo::util::box_is_included");
+
+ for (unsigned i = 0; i < P::dim; ++i)
+ if (!(lhs.pmin()[i] >= rhs.pmin()[i] && lhs.pmax()[i] <= rhs.pmax()[i]))
+ return false;
+
+ trace::exiting("scribo::util::box_is_included");
+ return true;
+ }
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::util
+
+} // end of namespace scribo
+
+#endif // ! SCRIBO_UTIL_BOX_IS_INCLUDED_HH
diff --git a/scribo/scribo/util/component_precise_outline.hh b/scribo/scribo/util/component_precise_outline.hh
index 490b814..70fc995 100644
--- a/scribo/scribo/util/component_precise_outline.hh
+++ b/scribo/scribo/util/component_precise_outline.hh
@@ -40,9 +40,15 @@
# include <mln/io/ppm/save.hh>
# include <mln/data/convert.hh>
# include <mln/opt/at.hh>
+# include <mln/extension/fill.hh>
# include <iostream>
+#include <mln/io/pgm/save.hh>
+#include <mln/data/wrap.hh>
+#include <mln/data/convert.hh>
+
+
namespace scribo
{
@@ -79,7 +85,8 @@ namespace scribo
template <typename I>
void
find_first_point(const I& input,
- point2d& p)
+ point2d& p,
+ const mln_value(I)& id)
{
const mln::def::coord
mid_row = geom::min_row(input) + (geom::nrows(input) >> 1);
@@ -87,7 +94,7 @@ namespace scribo
for (mln::def::coord i = geom::min_col(input);
i <= geom::max_col(input); ++i)
{
- if (opt::at(input, mid_row, i))
+ if (opt::at(input, mid_row, i) == id)
{
p.row() = mid_row;
p.col() = i;
@@ -100,14 +107,15 @@ namespace scribo
void
left_up(int& direction,
const I& input,
- const point2d& cur_pt)
+ const point2d& cur_pt,
+ const mln_value(I)& id)
{
const point2d p2(cur_pt.row() + offset[direction][5][1],
cur_pt.col() + offset[direction][5][0]);
const point2d p3(cur_pt.row() + offset[direction][7][1],
cur_pt.col() + offset[direction][7][0]);
- if (!input(p2) && input(p3))
+ if ((input(p2) != id) && (input(p3) == id))
{
direction = 3;
return;
@@ -130,7 +138,8 @@ namespace scribo
void
right_up(int& direction,
const I& input,
- const point2d& cur_pt)
+ const point2d& cur_pt,
+ const mln_value(I)& id)
{
const point2d p1(cur_pt.row() + offset[direction][0][1],
cur_pt.col() + offset[direction][0][0]);
@@ -139,7 +148,7 @@ namespace scribo
const point2d p3(cur_pt.row() + offset[direction][7][1],
cur_pt.col() + offset[direction][7][0]);
- if (!input(p2) && (input(p1) || input(p3)))
+ if ((input(p2) != id) && ((input(p1) == id) || (input(p3) == id)))
{
direction = 0;
return;
@@ -162,14 +171,15 @@ namespace scribo
void
right_down(int& direction,
const I& input,
- const point2d& cur_pt)
+ const point2d& cur_pt,
+ const mln_value(I)& id)
{
const point2d p2(cur_pt.row() + offset[direction][5][1],
cur_pt.col() + offset[direction][5][0]);
const point2d p3(cur_pt.row() + offset[direction][7][1],
cur_pt.col() + offset[direction][7][0]);
- if (!input(p2) && input(p3))
+ if ((input(p2) != id) && (input(p3) == id))
{
direction = 1;
return;
@@ -192,7 +202,8 @@ namespace scribo
void
left_down(int& direction,
const I& input,
- const point2d& cur_pt)
+ const point2d& cur_pt,
+ const mln_value(I)& id)
{
const point2d p1(cur_pt.row() + offset[direction][0][1],
cur_pt.col() + offset[direction][0][0]);
@@ -201,7 +212,7 @@ namespace scribo
const point2d p3(cur_pt.row() + offset[direction][7][1],
cur_pt.col() + offset[direction][7][0]);
- if (!input(p2) && (input(p1) || input(p3)))
+ if ((input(p2) != id) && ((input(p1) == id) || (input(p3) == id)))
{
direction = 2;
return;
@@ -225,17 +236,18 @@ namespace scribo
void
find_next_point(const I& input,
point2d& cur_pt,
- int& direction)
+ int& direction,
+ const mln_value(I)& id)
{
unsigned i = 0;
point2d tmp;
switch (direction)
{
- case 0: left_up(direction, input, cur_pt); break;
- case 1: right_up(direction , input, cur_pt); break;
- case 2: right_down(direction, input, cur_pt); break;
- case 3: left_down(direction, input, cur_pt); break;
+ case 0: left_up(direction, input, cur_pt, id); break;
+ case 1: right_up(direction , input, cur_pt, id); break;
+ case 2: right_down(direction, input, cur_pt, id); break;
+ case 3: left_down(direction, input, cur_pt, id); break;
}
for (; i < 8; ++i)
@@ -243,7 +255,7 @@ namespace scribo
tmp = point2d(cur_pt.row() + offset[direction][i][1],
cur_pt.col() + offset[direction][i][0]);
- if (input.domain().has(tmp) && input(tmp))
+ if (input(tmp) == id)
break;
}
@@ -263,7 +275,7 @@ namespace scribo
}
void
- filter_points(mln::p_array<point2d>& points,
+ filter_points(const mln::p_array<point2d>& points,
mln::p_array<point2d>& waypoints)
{
const unsigned nelements = points.nsites();
@@ -330,33 +342,35 @@ namespace scribo
template <typename I>
mln::p_array<point2d>
- component_precise_outline(const Image<I>& input_)
+ component_precise_outline(const Image<I>& input_, const mln_value(I)& id)
{
trace::entering("scribo::util::component_precise_outline");
const I& input = exact(input_);
typedef mln_site(I) P;
- point2d start_pt;
- int direction = 0;
+ extension::fill(input, 0);
+
mln::p_array<P> points;
points.reserve(std::max(geom::ncols(input), geom::nrows(input)));
- internal::find_first_point(input, start_pt);
+ point2d start_pt;
+ int direction = 0;
+
+ internal::find_first_point(input, start_pt, id);
P cur_pt = start_pt;
- internal::find_next_point(input, cur_pt, direction);
+ internal::find_next_point(input, cur_pt, direction, id);
points.append(cur_pt);
while (cur_pt != start_pt)
{
- internal::find_next_point(input, cur_pt, direction);
+ internal::find_next_point(input, cur_pt, direction, id);
points.append(cur_pt);
}
-
- internal::find_next_point(input, cur_pt, direction);
+ internal::find_next_point(input, cur_pt, direction, id);
const std::vector<point2d>& vec_points = points.hook_std_vector_();
@@ -367,16 +381,27 @@ namespace scribo
while (cur_pt != start_pt)
{
- internal::find_next_point(input, cur_pt, direction);
+ internal::find_next_point(input, cur_pt, direction, id);
points.append(cur_pt);
}
}
- // mln::p_array<P> waypoints;
- // internal::filter_points(points, waypoints);
+ std::cout << "Before filter points - " << points.nsites() << std::endl;
+
+ mln::p_array<P> waypoints;
+ internal::filter_points(points, waypoints);
+
+ std::cout << "After filter points - " << waypoints.nsites() << std::endl;
trace::exiting("scribo::util::component_precise_outline");
- return points;
+ return waypoints;
+ }
+
+ template <typename I>
+ mln::p_array<point2d>
+ component_precise_outline(const Image<I>& input)
+ {
+ return component_precise_outline(input, true);
}
# endif // ! MLN_INCLUDE_ONLY
--
1.5.6.5
1
0
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "Olena, a generic and efficient image processing platform".
The branch Sylvain has been created
at 9d2089bedcc47c6b7acbe24719b0f41e12d4c2da (commit)
- Log -----------------------------------------------------------------
9d2089b Added a choose function, to be used with handle_collisions.
-----------------------------------------------------------------------
hooks/post-receive
--
Olena, a generic and efficient image processing platform
1
0
30 May '11
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "Olena, a generic and efficient image processing platform".
The branch icdar/hdlac2011 has been updated
via 51383b97b33a33a046e3026d6b70fe33af775ccd (commit)
from bde8638454bdf7a7426ed977c74956eb0e4fca3b (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
51383b9 Correction in points to segments transform
-----------------------------------------------------------------------
Summary of changes:
scribo/scribo/util/component_precise_outline.hh | 16 ++++++++++++++++
1 files changed, 16 insertions(+), 0 deletions(-)
hooks/post-receive
--
Olena, a generic and efficient image processing platform
1
0
last-svn-commit-903-g51383b9 Correction in points to segments transform
by Julien Marquegnies 30 May '11
by Julien Marquegnies 30 May '11
30 May '11
---
scribo/scribo/util/component_precise_outline.hh | 16 ++++++++++++++++
1 files changed, 16 insertions(+), 0 deletions(-)
diff --git a/scribo/scribo/util/component_precise_outline.hh b/scribo/scribo/util/component_precise_outline.hh
index b618cb7..490b814 100644
--- a/scribo/scribo/util/component_precise_outline.hh
+++ b/scribo/scribo/util/component_precise_outline.hh
@@ -275,12 +275,16 @@ namespace scribo
waypoints.append(*first_pt);
unsigned i = 1;
const point2d* cur_pt = & points[i];
+ bool has_changed = false;
while (i < nelements)
{
+ has_changed = false;
+
while (cur_pt->row() == first_pt->row()
|| cur_pt->col() == first_pt->col())
{
+ has_changed = true;
++i;
if (i == nelements)
break;
@@ -301,6 +305,13 @@ namespace scribo
}
}
+ if (!has_changed)
+ {
+ ++i;
+ last_pt = cur_pt;
+ if (i < nelements)
+ cur_pt = & points[i];
+ }
waypoints.append(*last_pt);
first_pt = last_pt;
last_pt = first_pt;
@@ -308,6 +319,11 @@ namespace scribo
waypoints.append(*cur_pt);
}
+ else
+ {
+ for (unsigned i = 0; i < nelements; ++i)
+ waypoints.append(points[i]);
+ }
}
} // end of namespace scribo::util::internal
--
1.5.6.5
1
0
30 May '11
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "Olena, a generic and efficient image processing platform".
The branch icdar/hdlac2011 has been updated
via bde8638454bdf7a7426ed977c74956eb0e4fca3b (commit)
from f53b7d461ebcefbb55a59381c1806d61b3cf0284 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
bde8638 Precise outline correction
-----------------------------------------------------------------------
Summary of changes:
scribo/scribo/util/component_precise_outline.hh | 45 +++++++++++++++++------
1 files changed, 33 insertions(+), 12 deletions(-)
hooks/post-receive
--
Olena, a generic and efficient image processing platform
1
0
---
scribo/scribo/util/component_precise_outline.hh | 45 +++++++++++++++++------
1 files changed, 33 insertions(+), 12 deletions(-)
diff --git a/scribo/scribo/util/component_precise_outline.hh b/scribo/scribo/util/component_precise_outline.hh
index 1e447a3..b618cb7 100644
--- a/scribo/scribo/util/component_precise_outline.hh
+++ b/scribo/scribo/util/component_precise_outline.hh
@@ -99,7 +99,6 @@ namespace scribo
template <typename I>
void
left_up(int& direction,
- const unsigned i,
const I& input,
const point2d& cur_pt)
{
@@ -113,7 +112,12 @@ namespace scribo
direction = 3;
return;
}
+ }
+ void
+ left_up_after(int& direction,
+ const unsigned i)
+ {
if (i == 3 || i == 4)
direction = 1;
else if (i == 5 || i == 6)
@@ -122,11 +126,9 @@ namespace scribo
direction = 3;
}
-
template <typename I>
void
right_up(int& direction,
- const unsigned i,
const I& input,
const point2d& cur_pt)
{
@@ -142,8 +144,12 @@ namespace scribo
direction = 0;
return;
}
+ }
-
+ void
+ right_up_after(int& direction,
+ const unsigned i)
+ {
if (i == 3 || i == 4)
direction = 2;
else if (i == 5 || i == 6)
@@ -152,11 +158,9 @@ namespace scribo
direction = 0;
}
-
template <typename I>
void
right_down(int& direction,
- const unsigned i,
const I& input,
const point2d& cur_pt)
{
@@ -170,7 +174,12 @@ namespace scribo
direction = 1;
return;
}
+ }
+ void
+ right_down_after(int& direction,
+ const unsigned i)
+ {
if (i == 3 || i == 4)
direction = 3;
else if (i == 5 || i == 6)
@@ -179,11 +188,9 @@ namespace scribo
direction = 1;
}
-
template <typename I>
void
left_down(int& direction,
- const unsigned i,
const I& input,
const point2d& cur_pt)
{
@@ -199,7 +206,12 @@ namespace scribo
direction = 2;
return;
}
+ }
+ void
+ left_down_after(int& direction,
+ const unsigned i)
+ {
if (i == 3 || i == 4)
direction = 0;
else if (i == 5 || i == 6)
@@ -208,6 +220,7 @@ namespace scribo
direction = 2;
}
+
template <typename I>
void
find_next_point(const I& input,
@@ -217,6 +230,14 @@ namespace scribo
unsigned i = 0;
point2d tmp;
+ switch (direction)
+ {
+ case 0: left_up(direction, input, cur_pt); break;
+ case 1: right_up(direction , input, cur_pt); break;
+ case 2: right_down(direction, input, cur_pt); break;
+ case 3: left_down(direction, input, cur_pt); break;
+ }
+
for (; i < 8; ++i)
{
tmp = point2d(cur_pt.row() + offset[direction][i][1],
@@ -232,10 +253,10 @@ namespace scribo
switch (direction)
{
- case 0: left_up(direction, i, input, cur_pt); break;
- case 1: right_up(direction , i, input, cur_pt); break;
- case 2: right_down(direction, i, input, cur_pt); break;
- case 3: left_down(direction, i, input, cur_pt); break;
+ case 0: left_up_after(direction, i); break;
+ case 1: right_up_after(direction , i); break;
+ case 2: right_down_after(direction, i); break;
+ case 3: left_down_after(direction, i); break;
}
cur_pt = tmp;
--
1.5.6.5
1
0
---
scribo/scribo/filter/paragraphs_bbox_overlap.hh | 76 ++++++++++++++++------
1 files changed, 55 insertions(+), 21 deletions(-)
diff --git a/scribo/scribo/filter/paragraphs_bbox_overlap.hh b/scribo/scribo/filter/paragraphs_bbox_overlap.hh
index aa1c8ac..d40d42f 100644
--- a/scribo/scribo/filter/paragraphs_bbox_overlap.hh
+++ b/scribo/scribo/filter/paragraphs_bbox_overlap.hh
@@ -41,6 +41,7 @@
# include <scribo/core/paragraph_set.hh>
+#include <mln/labeling/colorize.hh>
namespace scribo
{
@@ -59,7 +60,7 @@ namespace scribo
/// Paragraph::Ignored.
template <typename L>
paragraph_set<L>
- paragraphs_bbox_overlap(const paragraph_set<L>& paragraphs);
+ paragraphs_bbox_overlap(const paragraph_set<L>& parset);
# ifndef MLN_INCLUDE_ONLY
@@ -70,23 +71,23 @@ namespace scribo
template <typename L>
struct order_paragraphs_id
{
- order_paragraphs_id(const scribo::paragraph_set<L>& paragraphs)
- : paragraphs_(paragraphs)
+ order_paragraphs_id(const scribo::paragraph_set<L>& parset)
+ : parset_(parset)
{
}
bool operator()(const scribo::paragraph_id_t& l1,
const scribo::paragraph_id_t& l2) const
{
- const unsigned l1_nsites = paragraphs_(l1).bbox().nsites();
- const unsigned l2_nsites = paragraphs_(l2).bbox().nsites();
+ const unsigned l1_nsites = parset_(l1).bbox().nsites();
+ const unsigned l2_nsites = parset_(l2).bbox().nsites();
if (l1_nsites == l2_nsites)
return l1 > l2;
return l1_nsites > l2_nsites;
}
- scribo::paragraph_set<L> paragraphs_;
+ scribo::paragraph_set<L> parset_;
};
} // end of namespace scribo::filter::internal
@@ -94,24 +95,38 @@ namespace scribo
template <typename L>
paragraph_set<L>
- paragraphs_bbox_overlap(const paragraph_set<L>& paragraphs)
+ paragraphs_bbox_overlap(const paragraph_set<L>& parset)
{
trace::entering("scribo::filter::paragraphs_bbox_overlap");
- mln_precondition(paragraphs.is_valid());
+ mln_precondition(parset.is_valid());
- L billboard;
- initialize(billboard, paragraphs.lines().components().labeled_image());
+ mln_ch_value(L, paragraph_id_t) billboard;
+ initialize(billboard, parset.lines().components().labeled_image());
data::fill(billboard, 0);
- mln::util::array<bool> not_to_ignore(paragraphs.nelements() + 1, true);
+ mln::util::array<bool> not_to_ignore(parset.nelements() + 1, true);
not_to_ignore(0) = false;
- for_all_paragraphs(cur_id, paragraphs)
+ paragraph_set<L> output = parset.duplicate();
+
+ mln::util::array<paragraph_id_t> candidate;
+ candidate.reserve(parset.nelements());
+ for_all_paragraphs(cur_id, parset)
+ if (parset(cur_id).is_valid())
+ candidate.append(cur_id);
+
+ std::sort(candidate.hook_std_vector_().begin(),
+ candidate.hook_std_vector_().end(),
+ internal::order_paragraphs_id<L>(parset));
+
+ for_all_elements(e, candidate)
{
- const box2d& b_ = paragraphs(cur_id).bbox();
+ paragraph_id_t cur_id = candidate(e);
- if (paragraphs(cur_id).nlines() > 1)
+ const box2d& b_ = parset(cur_id).bbox();
+
+ if (parset(cur_id).nlines() > 1)
{
mln::draw::box_plain(billboard, b_, cur_id);
continue;
@@ -141,17 +156,33 @@ namespace scribo
++it)
if (not_to_ignore(*it))
{
- box2d b2 = paragraphs(*it).bbox();
+ box2d b2 = output(*it).bbox();
box2d b_i = scribo::util::box_intersection(b_, b2);
- // si b_ est inclus dans une boite donc le nombre de comp > 1 => invalid juste b_
- // sinon => invalid b_ et b2
- if ((b_i.nsites() / (float)b_.nsites() > 0.4
- || (b_i.nsites() / (float)b2.nsites()) > 0.9))
+ volatile float
+ b_ratio = b_i.nsites() / (float)b_.nsites(),
+ b2_ratio = b_i.nsites() / (float)b2.nsites();
+
+ if (b2_ratio == 1)
{
+ // Merge paragraphs and redraw the new bbox.
+ output(cur_id).fast_merge(output(*it));
+ mln::draw::box_plain(billboard, output(cur_id).bbox(), cur_id);
+ }
+ else if (b_ratio == 1)
+ {
+ // Merge paragraphs and redraw the new bbox.
+ output(*it).fast_merge(output(cur_id));
+ mln::draw::box_plain(billboard, output(*it).bbox(), *it);
+ }
+ else if ((b_ratio > 0.4 || b2_ratio > 0.9))
+ {
+ // si b_ est inclus dans une boite dont le nombre de
+ // comp > 4 => invalid juste b_ sinon => invalid b_ et
+ // b2
not_to_ignore(cur_id) = false;
- if (paragraphs(*it).nlines() < 4)
+ if (parset(*it).nlines() < 4)
not_to_ignore(*it) = false;
}
}
@@ -159,9 +190,12 @@ namespace scribo
mln::draw::box_plain(billboard, b_, cur_id);
}
- paragraph_set<L> output = paragraphs.duplicate();
output.invalidate(not_to_ignore);
+ for_all_paragraphs(p, output)
+ if (output(p).is_valid())
+ output(p).force_stats_update();
+
trace::exiting("scribo::filter::paragraphs_bbox_overlap");
return output;
}
--
1.5.6.5
1
0
last-svn-commit-905-ged11b3b Improve output cleanup for historical document toolchain.
by Guillaume Lazzara 30 May '11
by Guillaume Lazzara 30 May '11
30 May '11
* scribo/filter/paragraphs_in_borders.hh,
* scribo/filter/separators_in_borders.hh,
* scribo/filter/separators_vert_in_borders.hh: New.
* scribo/filter/images_in_paragraph.hh,
* scribo/filter/paragraphs_bbox_overlap.hh,
* scribo/filter/paragraphs_in_image.hh,
* scribo/filter/separators_in_element.hh,
* scribo/filter/separators_in_paragraph.hh: Improve filtering.
* scribo/toolchain/internal/content_in_hdoc_functor.hh: Make use
of new filters.
---
scribo/ChangeLog | 17 ++
scribo/scribo/filter/images_in_paragraph.hh | 8 +-
scribo/scribo/filter/paragraphs_bbox_overlap.hh | 175 ++++++++++++-----
scribo/scribo/filter/paragraphs_in_borders.hh | 140 +++++++++++++
scribo/scribo/filter/paragraphs_in_image.hh | 29 +++-
scribo/scribo/filter/separators_in_borders.hh | 206 ++++++++++++++++++++
scribo/scribo/filter/separators_in_element.hh | 84 ++++----
scribo/scribo/filter/separators_in_paragraph.hh | 92 +++++----
scribo/scribo/filter/separators_vert_in_borders.hh | 143 ++++++++++++++
.../toolchain/internal/content_in_hdoc_functor.hh | 58 +++++-
10 files changed, 799 insertions(+), 153 deletions(-)
create mode 100644 scribo/scribo/filter/paragraphs_in_borders.hh
create mode 100644 scribo/scribo/filter/separators_in_borders.hh
create mode 100644 scribo/scribo/filter/separators_vert_in_borders.hh
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index 84564da..450c4d5 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,5 +1,22 @@
2011-06-07 Guillaume Lazzara <z(a)lrde.epita.fr>
+ Improve output cleanup for historical document toolchain.
+
+ * scribo/filter/paragraphs_in_borders.hh,
+ * scribo/filter/separators_in_borders.hh,
+ * scribo/filter/separators_vert_in_borders.hh: New.
+
+ * scribo/filter/images_in_paragraph.hh,
+ * scribo/filter/paragraphs_bbox_overlap.hh,
+ * scribo/filter/paragraphs_in_image.hh,
+ * scribo/filter/separators_in_element.hh,
+ * scribo/filter/separators_in_paragraph.hh: Improve filtering.
+
+ * scribo/toolchain/internal/content_in_hdoc_functor.hh: Make use
+ of new filters.
+
+2011-06-07 Guillaume Lazzara <z(a)lrde.epita.fr>
+
* scribo/util/component_precise_outline.hh: Add support for
labeled_image.
diff --git a/scribo/scribo/filter/images_in_paragraph.hh b/scribo/scribo/filter/images_in_paragraph.hh
index e05b202..3cf64e1 100644
--- a/scribo/scribo/filter/images_in_paragraph.hh
+++ b/scribo/scribo/filter/images_in_paragraph.hh
@@ -101,12 +101,12 @@ namespace scribo
// => Ignore it.
if (tl && tr && ml && mc && mr && bl && br)
elts(c).update_tag(component::Ignored);
-
- // FIXME: warning this call may produce inconsistent data
- // Ignored components are still in the separator image...
- doc.set_elements(elts);
}
+ // FIXME: warning this call may produce inconsistent data
+ // Ignored components are still in the separator image...
+ doc.set_elements(elts);
+
trace::exiting("scribo::filter::images_in_paragraph");
}
diff --git a/scribo/scribo/filter/paragraphs_bbox_overlap.hh b/scribo/scribo/filter/paragraphs_bbox_overlap.hh
index aa1c8ac..188a77e 100644
--- a/scribo/scribo/filter/paragraphs_bbox_overlap.hh
+++ b/scribo/scribo/filter/paragraphs_bbox_overlap.hh
@@ -41,6 +41,7 @@
# include <scribo/core/paragraph_set.hh>
+#include <mln/labeling/colorize.hh>
namespace scribo
{
@@ -59,7 +60,7 @@ namespace scribo
/// Paragraph::Ignored.
template <typename L>
paragraph_set<L>
- paragraphs_bbox_overlap(const paragraph_set<L>& paragraphs);
+ paragraphs_bbox_overlap(const paragraph_set<L>& parset);
# ifndef MLN_INCLUDE_ONLY
@@ -70,23 +71,23 @@ namespace scribo
template <typename L>
struct order_paragraphs_id
{
- order_paragraphs_id(const scribo::paragraph_set<L>& paragraphs)
- : paragraphs_(paragraphs)
+ order_paragraphs_id(const scribo::paragraph_set<L>& parset)
+ : parset_(parset)
{
}
bool operator()(const scribo::paragraph_id_t& l1,
const scribo::paragraph_id_t& l2) const
{
- const unsigned l1_nsites = paragraphs_(l1).bbox().nsites();
- const unsigned l2_nsites = paragraphs_(l2).bbox().nsites();
+ const unsigned l1_nsites = parset_(l1).bbox().nsites();
+ const unsigned l2_nsites = parset_(l2).bbox().nsites();
if (l1_nsites == l2_nsites)
return l1 > l2;
return l1_nsites > l2_nsites;
}
- scribo::paragraph_set<L> paragraphs_;
+ scribo::paragraph_set<L> parset_;
};
} // end of namespace scribo::filter::internal
@@ -94,74 +95,150 @@ namespace scribo
template <typename L>
paragraph_set<L>
- paragraphs_bbox_overlap(const paragraph_set<L>& paragraphs)
+ paragraphs_bbox_overlap(const paragraph_set<L>& parset)
{
trace::entering("scribo::filter::paragraphs_bbox_overlap");
- mln_precondition(paragraphs.is_valid());
+ mln_precondition(parset.is_valid());
- L billboard;
- initialize(billboard, paragraphs.lines().components().labeled_image());
+ mln_ch_value(L, paragraph_id_t) billboard;
+ initialize(billboard, parset.lines().components().labeled_image());
data::fill(billboard, 0);
- mln::util::array<bool> not_to_ignore(paragraphs.nelements() + 1, true);
+ mln::util::array<bool> not_to_ignore(parset.nelements() + 1, true);
not_to_ignore(0) = false;
- for_all_paragraphs(cur_id, paragraphs)
+ paragraph_set<L> output = parset.duplicate();
+
+ mln::util::array<paragraph_id_t> candidate;
+ candidate.reserve(parset.nelements());
+ for_all_paragraphs(cur_id, parset)
+ if (parset(cur_id).is_valid())
+ candidate.append(cur_id);
+
+ std::sort(candidate.hook_std_vector_().begin(),
+ candidate.hook_std_vector_().end(),
+ internal::order_paragraphs_id<L>(parset));
+
+ for_all_elements(e, candidate)
{
- const box2d& b_ = paragraphs(cur_id).bbox();
+ paragraph_id_t cur_id = candidate(e);
+
+ const box2d& b_ = parset(cur_id).bbox();
- if (paragraphs(cur_id).nlines() > 1)
+ if (parset(cur_id).nlines() > 3)
{
mln::draw::box_plain(billboard, b_, cur_id);
continue;
}
- const unsigned tl = billboard(b_.pmin());
- const unsigned tr = billboard.at_(b_.pmin().row(), b_.pmax().col());
- const unsigned ml = billboard.at_(b_.pcenter().row(), b_.pmin().col());
const unsigned mc = billboard.at_(b_.pcenter().row(), b_.pcenter().col());
- const unsigned mr = billboard.at_(b_.pcenter().row(), b_.pmax().col());
- const unsigned bl = billboard.at_(b_.pmax().row(), b_.pmin().col());
- const unsigned br = billboard(b_.pmax());
-
- typedef std::set<unsigned> set_t;
- set_t labels;
- labels.insert(tl);
- labels.insert(tl);
- labels.insert(tr);
- labels.insert(ml);
- labels.insert(mc);
- labels.insert(mr);
- labels.insert(bl);
- labels.insert(br);
-
- for (set_t::const_iterator it = labels.begin();
- it != labels.end();
- ++it)
- if (not_to_ignore(*it))
+
+ // Box is mostly in the background => do nothing.
+ if (mc == 0)
+ {
+ mln::draw::box_plain(billboard, b_, cur_id);
+ continue;
+ }
+ else // Bbox center is inside another box. Check if we can
+ // merge the current box with it.
+ {
+ // Consider other potential overlapping bboxes.
+ const unsigned tl = billboard(b_.pmin());
+ const unsigned tr = billboard.at_(b_.pmin().row(), b_.pmax().col());
+ const unsigned bl = billboard.at_(b_.pmax().row(), b_.pmin().col());
+ const unsigned br = billboard(b_.pmax());
+
+ typedef std::set<unsigned> set_t;
+ set_t labels;
+ labels.insert(tl);
+ labels.insert(tr);
+ labels.insert(mc);
+ labels.insert(bl);
+ labels.insert(br);
+
+ // FIXME: check that there are at least 3 points (including
+ // the center) in another paragraph.
+
+ // The potential merged bbox is already ignored or the
+ // current bbox overlaps with several bboxes.
+ // => Ignore current bbox .
+ //
+ if (!not_to_ignore(mc)
+ || (labels.size() > 1 && labels.find(0) == labels.end()))
{
- box2d b2 = paragraphs(*it).bbox();
- box2d b_i = scribo::util::box_intersection(b_, b2);
+ mln::draw::box_plain(billboard, b_, cur_id); // Really?
+ not_to_ignore(cur_id) = false;
+ continue;
+ }
- // si b_ est inclus dans une boite donc le nombre de comp > 1 => invalid juste b_
- // sinon => invalid b_ et b2
- if ((b_i.nsites() / (float)b_.nsites() > 0.4
- || (b_i.nsites() / (float)b2.nsites()) > 0.9))
+ for (set_t::const_iterator it = labels.begin();
+ it != labels.end(); ++it)
+ if (*it)
{
- not_to_ignore(cur_id) = false;
-
- if (paragraphs(*it).nlines() < 4)
- not_to_ignore(*it) = false;
+ mln_assertion(*it != mc);
+
+ box2d b2 = output(*it).bbox();
+ box2d b_i = scribo::util::box_intersection(b_, b2);
+ volatile float
+ b_ratio = b_i.nsites() / (float)b_.nsites();
+
+ // If the bbox is widely included in another box.
+ if (b_ratio > 0.8)
+ {
+ output(mc).fast_merge(output(cur_id));
+ mln::draw::box_plain(billboard, parset(mc).bbox(), mc);
+ }
+ else
+ mln::draw::box_plain(billboard, parset(cur_id).bbox(), cur_id);
+ break;
}
- }
- mln::draw::box_plain(billboard, b_, cur_id);
+ }
}
- paragraph_set<L> output = paragraphs.duplicate();
+ // if (not_to_ignore(*it))
+ // {
+ // box2d b2 = output(*it).bbox();
+ // box2d b_i = scribo::util::box_intersection(b_, b2);
+
+ // volatile float
+ // b_ratio = b_i.nsites() / (float)b_.nsites(),
+ // b2_ratio = b_i.nsites() / (float)b2.nsites();
+
+ // if (b2_ratio == 1)
+ // {
+ // // Merge paragraphs and redraw the new bbox.
+ // output(cur_id).fast_merge(output(*it));
+ // mln::draw::box_plain(billboard, output(cur_id).bbox(), cur_id);
+ // }
+ // else if (b_ratio == 1)
+ // {
+ // // Merge paragraphs and redraw the new bbox.
+ // output(*it).fast_merge(output(cur_id));
+ // mln::draw::box_plain(billboard, output(*it).bbox(), *it);
+ // }
+ // else if ((b_ratio > 0.4 || b2_ratio > 0.9))
+ // {
+ // // si b_ est inclus dans une boite dont le nombre de
+ // // comp > 4 => invalid juste b_ sinon => invalid b_ et
+ // // b2
+ // not_to_ignore(cur_id) = false;
+
+ // if (parset(*it).nlines() < 4)
+ // not_to_ignore(*it) = false;
+ // }
+ // }
+
+ // mln::draw::box_plain(billboard, b_, cur_id);
+ // }
+
output.invalidate(not_to_ignore);
+ for_all_paragraphs(p, output)
+ if (output(p).is_valid())
+ output(p).force_stats_update();
+
trace::exiting("scribo::filter::paragraphs_bbox_overlap");
return output;
}
diff --git a/scribo/scribo/filter/paragraphs_in_borders.hh b/scribo/scribo/filter/paragraphs_in_borders.hh
new file mode 100644
index 0000000..8953282
--- /dev/null
+++ b/scribo/scribo/filter/paragraphs_in_borders.hh
@@ -0,0 +1,140 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef SCRIBO_FILTER_PARAGRAPHS_IN_BORDERS_HH
+# define SCRIBO_FILTER_PARAGRAPHS_IN_BORDERS_HH
+
+/// \file
+///
+/// Invalidate false positive paragraphs.
+/// \fixme Share same test canvas as text::merging.
+
+
+# include <mln/core/concept/image.hh>
+# include <scribo/core/component_set.hh>
+# include <scribo/core/document.hh>
+# include <scribo/util/box_is_included.hh>
+
+namespace scribo
+{
+
+ namespace filter
+ {
+
+ using namespace mln;
+
+
+ /// Invalidate paragraphs located close to the image borders.
+ ///
+ /// \param[in,out] doc A document structure.
+ ///
+ /// Warning: it does not remove paragraphs from separator
+ /// image. It only invalidate separator components in their
+ /// respective component_set.
+ ///
+ /// \verbatim
+ ///
+ /// -----------
+ /// |_!____!__|
+ /// | ! ! <--------- Paragraphs located in this area are
+ /// | ! ! | invalidated.
+ /// | ! ! |
+ /// |_!____!__|
+ /// | ! ! |
+ /// -----------
+ ///
+ /// \endverbatim
+ //
+ template <typename L>
+ void
+ paragraphs_in_borders(document<L>& doc);
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+ template <typename L>
+ void
+ paragraphs_in_borders(document<L>& doc)
+ {
+ trace::entering("scribo::filter::paragraphs_in_borders");
+
+ mln_precondition(doc.is_valid());
+
+ const mln::image2d<mln::value::rgb8>& ima = doc.image();
+
+ unsigned border_size = std::min(43., 0.02 * ima.domain().width());
+
+ /// pt
+ /// ptl X------X---
+ /// |_!____!__X ptr
+ /// | ! ! |
+ /// | ! ! |
+ /// | ! ! |
+ /// pbl X_!____!__|
+ /// | ! ! |
+ /// --X-------X
+ /// pb pbr
+ ///
+ point2d
+ ptl = ima.domain().pmin(),
+ pt(geom::min_row(ima), geom::max_col(ima) - border_size),
+ ptr(border_size, geom::max_col(ima)),
+ pbr = ima.domain().pmax(),
+ pb(geom::max_row(ima), border_size),
+ pbl(geom::max_row(ima) - border_size, geom::min_col(ima));
+
+ box2d
+ bt(ptl, ptr),
+ br(pt, pbr),
+ bb(pbl, pbr),
+ bl(ptl, pb);
+
+ // Horizontal paragraphs
+ if (doc.has_text())
+ {
+ paragraph_set<L> parset = doc.paragraphs();
+ for_all_paragraphs(p, parset)
+ if (parset(p).is_valid())
+ if (util::box_is_included(parset(p).bbox(), bt)
+ || util::box_is_included(parset(p).bbox(), br)
+ || util::box_is_included(parset(p).bbox(), bb)
+ || util::box_is_included(parset(p).bbox(), bl))
+ {
+ parset(p).invalidate();
+ }
+
+ doc.set_paragraphs(parset);
+ }
+
+ trace::exiting("scribo::filter::paragraphs_in_borders");
+ }
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::filter
+
+} // end of namespace scribo
+
+#endif // ! SCRIBO_FILTER_PARAGRAPHS_IN_BORDERS_HH
diff --git a/scribo/scribo/filter/paragraphs_in_image.hh b/scribo/scribo/filter/paragraphs_in_image.hh
index 1029430..f67b863 100644
--- a/scribo/scribo/filter/paragraphs_in_image.hh
+++ b/scribo/scribo/filter/paragraphs_in_image.hh
@@ -89,8 +89,6 @@ namespace scribo
&& doc.elements()(e).type() == component::Image)
mln::draw::box_plain(billboard, doc.elements()(e).bbox(), true);
- mln::io::pbm::save(billboard, "billboard_parimage.pbm");
-
const paragraph_set<L>& parset = doc.paragraphs();
mln::util::array<bool> not_to_ignore(parset.nelements() + 1, true);
not_to_ignore(0) = false;
@@ -101,15 +99,34 @@ namespace scribo
const bool
tl = billboard(b_.pmin()),
tr = billboard.at_(b_.pmin().row(), b_.pmax().col()),
- ml = billboard.at_(b_.pcenter().row(), b_.pmin().col()),
mc = billboard.at_(b_.pcenter().row(), b_.pcenter().col()),
- mr = billboard.at_(b_.pcenter().row(), b_.pmax().col()),
bl = billboard.at_(b_.pmax().row(), b_.pmin().col()),
br = billboard(b_.pmax());
+ typedef mln::util::set<int> set_t;
+ set_t s;
+ s.insert(tl);
+ s.insert(tr);
+ s.insert(mc);
+ s.insert(bl);
+ s.insert(br);
+
+ if (s.nelements() > 2 || (s.nelements() == 2 && !s.has(0)))
+ continue;
+
// The paragraph is fully included in an image.
- if (tl && tr && ml && mc && mr && bl && br)
- not_to_ignore(cur_id) = false;
+ for_all_elements(e, s)
+ if (s[e] != 0
+ && (mc != 0 && mc == s[e]
+ && ((tl == mc && bl == mc)
+ || (tr == mc && br == mc)
+ || (tl == mc && tr == mc)
+ || (bl == mc && br == mc))))
+ {
+// if (tl && tr && ml && mc && mr && bl && br)
+ not_to_ignore(cur_id) = false;
+ break;
+ }
}
paragraph_set<L> output = parset.duplicate();
diff --git a/scribo/scribo/filter/separators_in_borders.hh b/scribo/scribo/filter/separators_in_borders.hh
new file mode 100644
index 0000000..8ccb6b1
--- /dev/null
+++ b/scribo/scribo/filter/separators_in_borders.hh
@@ -0,0 +1,206 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef SCRIBO_FILTER_SEPARATORS_IN_BORDERS_HH
+# define SCRIBO_FILTER_SEPARATORS_IN_BORDERS_HH
+
+/// \file
+///
+/// Invalidate false positive separators.
+/// \fixme Share same test canvas as text::merging.
+
+
+# include <mln/core/concept/image.hh>
+# include <scribo/core/component_set.hh>
+# include <scribo/core/document.hh>
+# include <scribo/util/box_is_included.hh>
+
+namespace scribo
+{
+
+ namespace filter
+ {
+
+ using namespace mln;
+
+
+ /// Invalidate separators located close to the image borders.
+ ///
+ /// \param[in,out] doc A document structure.
+ ///
+ /// Warning: it does not remove separators from separator
+ /// image. It only invalidate separator components in their
+ /// respective component_set.
+ ///
+ /// \verbatim
+ ///
+ /// -----------
+ /// |_!____!__|
+ /// | ! ! <--------- Separators located in this area are
+ /// | ! ! | invalidated.
+ /// | ! ! |
+ /// |_!____!__|
+ /// | ! ! |
+ /// -----------
+ ///
+ /// \endverbatim
+ //
+ template <typename L>
+ void
+ separators_in_borders(document<L>& doc, float vratio, float hratio);
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+ template <typename L>
+ void
+ separators_in_borders(document<L>& doc, float vratio, float hratio)
+ {
+ trace::entering("scribo::filter::separators_in_borders");
+
+ mln_precondition(doc.is_valid());
+
+ const mln::image2d<mln::value::rgb8>& ima = doc.image();
+
+ // Horizontal separators
+ if (doc.has_hline_seps())
+ {
+ unsigned border_size = hratio * std::min(ima.domain().width(), ima.domain().height());
+
+ /// pt
+ /// ptl X------X---
+ /// |_!____!__X ptr
+ /// | ! ! |
+ /// | ! ! |
+ /// | ! ! |
+ /// pbl X_!____!__|
+ /// | ! ! |
+ /// --X-------X
+ /// pb pbr
+ ///
+ point2d
+ ptl = ima.domain().pmin(),
+ pt(geom::min_row(ima), geom::max_col(ima) - border_size),
+ ptr(border_size, geom::max_col(ima)),
+ pbr = ima.domain().pmax(),
+ pb(geom::max_row(ima), border_size),
+ pbl(geom::max_row(ima) - border_size, geom::min_col(ima));
+
+ box2d
+ bt(ptl, ptr),
+ br(pt, pbr),
+ bb(pbl, pbr),
+ bl(ptl, pb);
+
+
+ component_set<L> hline = doc.hline_seps_comps().duplicate();
+ for_all_comps(c, hline)
+ if (hline(c).is_valid())
+ if (util::box_is_included(hline(c).bbox(), bt)
+ || util::box_is_included(hline(c).bbox(), br)
+ || util::box_is_included(hline(c).bbox(), bb)
+ || util::box_is_included(hline(c).bbox(), bl))
+ {
+ hline(c).update_tag(component::Ignored);
+ }
+
+ // FIXME: warning this call may produce inconsistent data
+ // Ignored components are still in the separator image...
+ doc.set_hline_separators(doc.hline_seps(), hline);
+ }
+
+
+ // Vertical separators
+ if (doc.has_vline_seps())
+ {
+ unsigned border_size = vratio * std::min(ima.domain().width(), ima.domain().height());
+
+ /// pt
+ /// ptl X------X---
+ /// |_!____!__X ptr
+ /// | ! ! |
+ /// | ! ! |
+ /// | ! ! |
+ /// pbl X_!____!__|
+ /// | ! ! |
+ /// --X-------X
+ /// pb pbr
+ ///
+ point2d
+ ptl = ima.domain().pmin(),
+ pt(geom::min_row(ima), geom::max_col(ima) - border_size),
+ ptr(border_size, geom::max_col(ima)),
+ pbr = ima.domain().pmax(),
+ pb(geom::max_row(ima), border_size),
+ pbl(geom::max_row(ima) - border_size, geom::min_col(ima));
+
+ box2d
+ bt(ptl, ptr),
+ br(pt, pbr),
+ bb(pbl, pbr),
+ bl(ptl, pb);
+
+
+ component_set<L> vline = doc.vline_seps_comps().duplicate();
+ for_all_comps(c, vline)
+ if (vline(c).is_valid())
+ {
+ if (util::box_is_included(vline(c).bbox(), bt)
+ || util::box_is_included(vline(c).bbox(), br)
+ || util::box_is_included(vline(c).bbox(), bb)
+ || util::box_is_included(vline(c).bbox(), bl))
+ {
+ // std::cout << vline(c).bbox() << " is included in ";
+ // if (util::box_is_included(vline(c).bbox(), bt))
+ // std::cout << bt << std::endl;
+ // if (util::box_is_included(vline(c).bbox(), br))
+ // std::cout << br << std::endl;
+ // if (util::box_is_included(vline(c).bbox(), bb))
+ // std::cout << bb << std::endl;
+ // if (util::box_is_included(vline(c).bbox(), bl))
+ // std::cout << bl << std::endl;
+
+ vline(c).update_tag(component::Ignored);
+ }
+ // else
+ // {
+ // std::cout << vline(c).bbox() << " is not included in " << bt << " - " << br << " - " << bb << " - " << bl << std::endl;
+ // }
+ }
+ // FIXME: warning this call may produce inconsistent data
+ // Ignored components are still in the separator image...
+ doc.set_vline_separators(doc.vline_seps(), vline);
+ }
+
+ trace::exiting("scribo::filter::separators_in_borders");
+ }
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::filter
+
+} // end of namespace scribo
+
+#endif // ! SCRIBO_FILTER_SEPARATORS_IN_BORDERS_HH
diff --git a/scribo/scribo/filter/separators_in_element.hh b/scribo/scribo/filter/separators_in_element.hh
index 228d82f..a8b0ebb 100644
--- a/scribo/scribo/filter/separators_in_element.hh
+++ b/scribo/scribo/filter/separators_in_element.hh
@@ -90,26 +90,26 @@ namespace scribo
{
component_set<L> hline = doc.hline_seps_comps().duplicate();
for_all_comps(c, hline)
- {
- const mln_box(L)& b_ = hline(c).bbox();
-
- const bool tl = billboard(b_.pmin());
- const bool tr = billboard.at_(b_.pmin().row(), b_.pmax().col());
- const bool ml = billboard.at_(b_.pcenter().row(), b_.pmin().col());
- const bool mc = billboard.at_(b_.pcenter().row(), b_.pcenter().col());
- const bool mr = billboard.at_(b_.pcenter().row(), b_.pmax().col());
- const bool bl = billboard.at_(b_.pmax().row(), b_.pmin().col());
- const bool br = billboard(b_.pmax());
-
- // This separator is included in an element (picture, drawing...)
- // => Ignore it.
- if (tl && tr && ml && mc && mr && bl && br)
- hline(c).update_tag(component::Ignored);
-
- // FIXME: warning this call may produce inconsistent data
- // Ignored components are still in the separator image...
- doc.set_hline_separators(doc.hline_seps(), hline);
- }
+ if (hline(c).is_valid())
+ {
+ const mln_box(L)& b_ = hline(c).bbox();
+
+ const bool tl = billboard(b_.pmin());
+ const bool tr = billboard.at_(b_.pmin().row(), b_.pmax().col());
+ const bool ml = billboard.at_(b_.pcenter().row(), b_.pmin().col());
+ const bool mc = billboard.at_(b_.pcenter().row(), b_.pcenter().col());
+ const bool mr = billboard.at_(b_.pcenter().row(), b_.pmax().col());
+ const bool bl = billboard.at_(b_.pmax().row(), b_.pmin().col());
+ const bool br = billboard(b_.pmax());
+
+ // This separator is included in an element (picture, drawing...)
+ // => Ignore it.
+ if (tl && tr && ml && mc && mr && bl && br)
+ hline(c).update_tag(component::Ignored);
+ }
+ // FIXME: warning this call may produce inconsistent data
+ // Ignored components are still in the separator image...
+ doc.set_hline_separators(doc.hline_seps(), hline);
}
// Vertical separators
@@ -117,29 +117,29 @@ namespace scribo
{
component_set<L> vline = doc.vline_seps_comps().duplicate();
for_all_comps(c, vline)
- {
- const mln_box(L)& b_ = vline(c).bbox();
-
- const bool tl = billboard(b_.pmin());
- const bool tr = billboard.at_(b_.pmin().row(), b_.pmax().col());
- const bool ml = billboard.at_(b_.pcenter().row(), b_.pmin().col());
- const bool mc = billboard.at_(b_.pcenter().row(), b_.pcenter().col());
- const bool mr = billboard.at_(b_.pcenter().row(), b_.pmax().col());
- const bool bl = billboard.at_(b_.pmax().row(), b_.pmin().col());
- const bool br = billboard(b_.pmax());
-
- // This separator is included in an element (picture, drawing...)
- // => Ignore it.
- if (tl && tr && ml && mc && mr && bl && br)
- vline(c).update_tag(component::Ignored);
-
- // FIXME: warning this call may produce inconsistent data
- // Ignored components are still in the separator image...
- doc.set_vline_separators(doc.vline_seps(), vline);
- }
-
- trace::exiting("scribo::filter::separators_in_element");
+ if (vline(c).is_valid())
+ {
+ const mln_box(L)& b_ = vline(c).bbox();
+
+ const bool tl = billboard(b_.pmin());
+ const bool tr = billboard.at_(b_.pmin().row(), b_.pmax().col());
+ const bool ml = billboard.at_(b_.pcenter().row(), b_.pmin().col());
+ const bool mc = billboard.at_(b_.pcenter().row(), b_.pcenter().col());
+ const bool mr = billboard.at_(b_.pcenter().row(), b_.pmax().col());
+ const bool bl = billboard.at_(b_.pmax().row(), b_.pmin().col());
+ const bool br = billboard(b_.pmax());
+
+ // This separator is included in an element (picture, drawing...)
+ // => Ignore it.
+ if (tl && tr && ml && mc && mr && bl && br)
+ vline(c).update_tag(component::Ignored);
+ }
+ // FIXME: warning this call may produce inconsistent data
+ // Ignored components are still in the separator image...
+ doc.set_vline_separators(doc.vline_seps(), vline);
}
+
+ trace::exiting("scribo::filter::separators_in_element");
}
# endif // ! MLN_INCLUDE_ONLY
diff --git a/scribo/scribo/filter/separators_in_paragraph.hh b/scribo/scribo/filter/separators_in_paragraph.hh
index 3e7a150..7c157be 100644
--- a/scribo/scribo/filter/separators_in_paragraph.hh
+++ b/scribo/scribo/filter/separators_in_paragraph.hh
@@ -58,14 +58,14 @@ namespace scribo
///
template <typename L>
void
- separators_in_paragraph(document<L>& doc);
+ separators_in_paragraph(document<L>& doc, unsigned hmax_size, unsigned vmax_size);
# ifndef MLN_INCLUDE_ONLY
template <typename L>
void
- separators_in_paragraph(document<L>& doc)
+ separators_in_paragraph(document<L>& doc, unsigned hmax_size, unsigned vmax_size)
{
trace::entering("scribo::filter::separators_in_paragraph");
@@ -90,26 +90,28 @@ namespace scribo
{
component_set<L> hline = doc.hline_seps_comps().duplicate();
for_all_comps(c, hline)
- {
- const mln_box(L)& b_ = hline(c).bbox();
-
- const bool tl = billboard(b_.pmin());
- const bool tr = billboard.at_(b_.pmin().row(), b_.pmax().col());
- const bool ml = billboard.at_(b_.pcenter().row(), b_.pmin().col());
- const bool mc = billboard.at_(b_.pcenter().row(), b_.pcenter().col());
- const bool mr = billboard.at_(b_.pcenter().row(), b_.pmax().col());
- const bool bl = billboard.at_(b_.pmax().row(), b_.pmin().col());
- const bool br = billboard(b_.pmax());
-
- // This separator is included in an element (picture, drawing...)
- // => Ignore it.
- if (tl && tr && ml && mc && mr && bl && br)
- hline(c).update_tag(component::Ignored);
-
- // FIXME: warning this call may produce inconsistent data
- // Ignored components are still in the separator image...
- doc.set_hline_separators(doc.hline_seps(), hline);
- }
+ if (hline(c).is_valid())
+ {
+ const mln_box(L)& b_ = hline(c).bbox();
+
+ const bool tl = billboard(b_.pmin());
+ const bool tr = billboard.at_(b_.pmin().row(), b_.pmax().col());
+ const bool ml = billboard.at_(b_.pcenter().row(), b_.pmin().col());
+ const bool mc = billboard.at_(b_.pcenter().row(), b_.pcenter().col());
+ const bool mr = billboard.at_(b_.pcenter().row(), b_.pmax().col());
+ const bool bl = billboard.at_(b_.pmax().row(), b_.pmin().col());
+ const bool br = billboard(b_.pmax());
+
+ // This separator is included in an element (picture, drawing...)
+ // => Ignore it.
+ if (tl && tr && ml && mc && mr && bl && br
+ && hline(c).bbox().width() < hmax_size)
+ hline(c).update_tag(component::Ignored);
+ }
+
+ // FIXME: warning this call may produce inconsistent data
+ // Ignored components are still in the separator image...
+ doc.set_hline_separators(doc.hline_seps(), hline);
}
// Vertical separators
@@ -117,29 +119,31 @@ namespace scribo
{
component_set<L> vline = doc.vline_seps_comps().duplicate();
for_all_comps(c, vline)
- {
- const mln_box(L)& b_ = vline(c).bbox();
-
- const bool tl = billboard(b_.pmin());
- const bool tr = billboard.at_(b_.pmin().row(), b_.pmax().col());
- const bool ml = billboard.at_(b_.pcenter().row(), b_.pmin().col());
- const bool mc = billboard.at_(b_.pcenter().row(), b_.pcenter().col());
- const bool mr = billboard.at_(b_.pcenter().row(), b_.pmax().col());
- const bool bl = billboard.at_(b_.pmax().row(), b_.pmin().col());
- const bool br = billboard(b_.pmax());
-
- // This separator is included in an element (picture, drawing...)
- // => Ignore it.
- if (tl && tr && ml && mc && mr && bl && br)
- vline(c).update_tag(component::Ignored);
-
- // FIXME: warning this call may produce inconsistent data
- // Ignored components are still in the separator image...
- doc.set_vline_separators(doc.vline_seps(), vline);
- }
-
- trace::exiting("scribo::filter::separators_in_paragraph");
+ if (vline(c).is_valid())
+ {
+ const mln_box(L)& b_ = vline(c).bbox();
+
+ const bool tl = billboard(b_.pmin());
+ const bool tr = billboard.at_(b_.pmin().row(), b_.pmax().col());
+ const bool ml = billboard.at_(b_.pcenter().row(), b_.pmin().col());
+ const bool mc = billboard.at_(b_.pcenter().row(), b_.pcenter().col());
+ const bool mr = billboard.at_(b_.pcenter().row(), b_.pmax().col());
+ const bool bl = billboard.at_(b_.pmax().row(), b_.pmin().col());
+ const bool br = billboard(b_.pmax());
+
+ // This separator is included in an element (picture, drawing...)
+ // => Ignore it.
+ if (tl && tr && ml && mc && mr && bl && br
+ && vline(c).bbox().height() < vmax_size)
+ vline(c).update_tag(component::Ignored);
+ }
+
+ // FIXME: warning this call may produce inconsistent data
+ // Ignored components are still in the separator image...
+ doc.set_vline_separators(doc.vline_seps(), vline);
}
+
+ trace::exiting("scribo::filter::separators_in_paragraph");
}
# endif // ! MLN_INCLUDE_ONLY
diff --git a/scribo/scribo/filter/separators_vert_in_borders.hh b/scribo/scribo/filter/separators_vert_in_borders.hh
new file mode 100644
index 0000000..4a9e806
--- /dev/null
+++ b/scribo/scribo/filter/separators_vert_in_borders.hh
@@ -0,0 +1,143 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef SCRIBO_FILTER_SEPARATORS_VERT_IN_BORDERS_HH
+# define SCRIBO_FILTER_SEPARATORS_VERT_IN_BORDERS_HH
+
+/// \file
+///
+/// Invalidate false positive separators.
+/// \fixme Share same test canvas as text::merging.
+
+
+# include <mln/core/concept/image.hh>
+# include <scribo/core/component_set.hh>
+# include <scribo/core/document.hh>
+# include <scribo/util/box_is_included.hh>
+
+
+namespace scribo
+{
+
+ namespace filter
+ {
+
+ using namespace mln;
+
+
+ /// Invalidate separators located close to the image borders.
+ ///
+ /// \param[in,out] doc A document structure.
+ ///
+ /// Warning: it does not remove separators from separator
+ /// image. It only invalidate separator components in their
+ /// respective component_set.
+ ///
+ /// \verbatim
+ ///
+ /// -----------
+ /// |_!____!__|
+ /// | ! ! <--------- Separators located in this area are
+ /// | ! ! | invalidated.
+ /// | ! ! |
+ /// |_!____!__|
+ /// | ! ! |
+ /// -----------
+ ///
+ /// \endverbatim
+ //
+ template <typename L>
+ void
+ separators_vert_in_borders(document<L>& doc);
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+ template <typename L>
+ void
+ separators_vert_in_borders(document<L>& doc)
+ {
+ trace::entering("scribo::filter::separators_vert_in_borders");
+
+ mln_precondition(doc.is_valid());
+
+ const mln::image2d<mln::value::rgb8>& ima = doc.image();
+
+ float border_size = std::min(43., 0.05 * ima.domain().width());
+
+ /// pt
+ /// ptl X------X---
+ /// |_!____!__X ptr
+ /// | ! ! |
+ /// | ! ! |
+ /// | ! ! |
+ /// pbl X_!____!__|
+ /// | ! ! |
+ /// --X-------X
+ /// pb pbr
+ ///
+ point2d
+ ptl = ima.domain().pmin(),
+ pt(geom::min_row(ima), geom::max_col(ima) - border_size),
+ ptr(border_size, geom::max_col(ima)),
+ pbr = ima.domain().pmax(),
+ pb(geom::max_row(ima), border_size),
+ pbl(geom::max_row(ima) - border_size, geom::min_col(ima));
+
+ box2d
+ bt(ptl, ptr),
+ br(pt, pbr),
+ bb(pbl, pbr),
+ bl(ptl, pb);
+
+ // Vertical separators
+ if (doc.has_vline_seps())
+ {
+ component_set<L> vline = doc.vline_seps_comps().duplicate();
+ for_all_comps(c, vline)
+ if (vline(c).is_valid())
+ if (util::box_is_included(vline(c).bbox(), bt)
+ || util::box_is_included(vline(c).bbox(), br)
+ || util::box_is_included(vline(c).bbox(), bb)
+ || util::box_is_included(vline(c).bbox(), bl))
+ {
+ vline(c).update_tag(component::Ignored);
+ }
+
+ // FIXME: warning this call may produce inconsistent data
+ // Ignored components are still in the separator image...
+ doc.set_vline_separators(doc.vline_seps(), vline);
+ }
+
+ trace::exiting("scribo::filter::separators_vert_in_borders");
+ }
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::filter
+
+} // end of namespace scribo
+
+#endif // ! SCRIBO_FILTER_SEPARATORS_VERT_IN_BORDERS_HH
diff --git a/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh b/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh
index e0c5b50..24d24a3 100644
--- a/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh
+++ b/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh
@@ -53,8 +53,10 @@
# include <scribo/filter/objects_small.hh>
# include <scribo/filter/paragraphs_bbox_overlap.hh>
# include <scribo/filter/paragraphs_in_image.hh>
+# include <scribo/filter/paragraphs_in_borders.hh>
# include <scribo/filter/separators_in_element.hh>
# include <scribo/filter/separators_in_paragraph.hh>
+# include <scribo/filter/separators_in_borders.hh>
# include <scribo/filter/images_in_paragraph.hh>
# include <scribo/primitive/group/from_single_link.hh>
@@ -66,6 +68,8 @@
# include <scribo/preprocessing/denoise_fg.hh>
+# include <scribo/postprocessing/images_to_drop_capital.hh>
+
# include <scribo/text/recognition.hh>
# include <scribo/text/merging.hh>
# include <scribo/text/link_lines.hh>
@@ -84,6 +88,7 @@
# include <scribo/io/xml/save.hh>
+#include <scribo/io/img/save.hh>
namespace scribo
{
@@ -201,12 +206,22 @@ namespace scribo
// Vertical and horizontal separators
{
+ unsigned closing_size = std::min(0.01 * doc.image().domain().width(),
+ 0.01 * doc.image().domain().height());
+ win::hline2d hl(closing_size);
+
+ // Apply a closing::structural in order to disconnected
+ // parts of a single separator.
mln_ch_value(I,bool)
vseparators = preprocessing::rotate_90(
- primitive::extract::lines_h_thick_and_thin(
- preprocessing::rotate_90(processed_image), 101, 3, 0.2, 0.6, 10), false),
- hseparators = primitive::extract::lines_h_thick_and_thin(
- processed_image, 101, 3);
+ morpho::closing::structural(
+ primitive::extract::lines_h_thick_and_thin(
+ preprocessing::rotate_90(processed_image),
+ 101, 3, 0.2, 0.6, 10), hl), false),
+
+ hseparators = morpho::closing::structural(
+ primitive::extract::lines_h_thick_and_thin(
+ processed_image, 101, 3), hl);
doc.set_vline_separators(vseparators);
doc.set_hline_separators(hseparators);
@@ -509,9 +524,11 @@ namespace scribo
on_new_progress_label("Filtering paragraphs");
- parset = filter::paragraphs_bbox_overlap(parset);
+ paragraph_set<L> parset_f = filter::paragraphs_bbox_overlap(parset);
+ doc.set_paragraphs(parset_f);
- doc.set_paragraphs(parset);
+ // parset = filter::paragraphs_bbox_overlap(parset);
+ // doc.set_paragraphs(parset);
on_progress();
@@ -540,16 +557,38 @@ namespace scribo
on_progress();
+// TEMPORARY DEBUG
+ on_new_progress_label("Saving debug data");
+ doc.set_paragraphs(parset);
+ scribo::io::img::save(doc, "debug_wo_filter.png", scribo::io::img::DebugWoImage);
+ scribo::io::img::save(doc, "full_wo_filter.png", scribo::io::img::DebugWithImage);
+ doc.set_paragraphs(parset_f);
+ on_progress();
+// END OF TEMPORARY DEBUG
+
on_new_progress_label("Cleanup miscellaneous false positive");
filter::separators_in_element(doc);
- filter::separators_in_paragraph(doc);
+ filter::separators_in_paragraph(doc, 81, 121);
+ filter::separators_in_borders(doc, 0.05, 0.02);
+
filter::paragraphs_in_image(doc);
- filter::images_in_paragraph(doc);
+ filter::paragraphs_in_borders(doc);
on_progress();
+ on_new_progress_label("Rebuild extracted images");
+ elements = scribo::primitive::extract::non_text_hdoc(doc, closing_size);
+ doc.set_elements(elements);
+
+ on_progress();
+
+ on_new_progress_label("Tag images as drop capital");
+
+ postprocessing::images_to_drop_capital(doc);
+
+ on_progress();
// Saving results
if (save_doc_as_xml)
@@ -564,6 +603,9 @@ namespace scribo
on_end();
+
+ sleep(10);
+
return doc;
}
--
1.5.6.5
1
0
30 May '11
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "Olena, a generic and efficient image processing platform".
The branch icdar/hdlac2011 has been updated
via f53b7d461ebcefbb55a59381c1806d61b3cf0284 (commit)
via b39473e5ef5fe12c6e5cf56e5d6c513f944969f4 (commit)
from cb2e9b223ae8f832b885889d92429cf16c7e1219 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
f53b7d4 Precise outline correction
b39473e Paragraphs correction
-----------------------------------------------------------------------
Summary of changes:
scribo/scribo/text/paragraphs.hh | 77 ++++++-------------
scribo/scribo/util/component_precise_outline.hh | 91 ++++++++++-------------
2 files changed, 64 insertions(+), 104 deletions(-)
hooks/post-receive
--
Olena, a generic and efficient image processing platform
1
0