Olena-patches
Threads by month
- ----- 2025 -----
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2024 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2023 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2022 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2021 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2020 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2019 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2018 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2017 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2016 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2015 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2014 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2013 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2012 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2011 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2010 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2009 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2008 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2007 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2006 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2005 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2004 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- 9625 discussions

28 Mar '11
---
milena/ChangeLog | 4 ++++
milena/mln/util/array.hh | 25 +++++++++++++++++++++++--
2 files changed, 27 insertions(+), 2 deletions(-)
diff --git a/milena/ChangeLog b/milena/ChangeLog
index e03d35c..c4bed2c 100644
--- a/milena/ChangeLog
+++ b/milena/ChangeLog
@@ -1,5 +1,9 @@
2011-02-17 Guillaume Lazzara <z(a)lrde.epita.fr>
+ * mln/util/array.hh: Add last() method.
+
+2011-02-17 Guillaume Lazzara <z(a)lrde.epita.fr>
+
Add an optional base index for debug::iota.
* mln/debug/iota.hh,
diff --git a/milena/mln/util/array.hh b/milena/mln/util/array.hh
index 0fe34e7..c7febfe 100644
--- a/milena/mln/util/array.hh
+++ b/milena/mln/util/array.hh
@@ -1,5 +1,5 @@
-// Copyright (C) 2008, 2009 EPITA Research and Development Laboratory
-// (LRDE)
+// Copyright (C) 2008, 2009, 2011 EPITA Research and Development
+// Laboratory (LRDE)
//
// This file is part of Olena.
//
@@ -187,6 +187,11 @@ namespace mln
/// \pre i < nelements()
mutable_result operator[](unsigned i);
+ /// \brief Return the last element.
+ ro_result last() const;
+
+ /// \brief Return the last element.
+ mutable_result last();
/// Empty the array. All elements contained in the array are
/// destroyed. \post is_empty() == true
@@ -553,6 +558,22 @@ namespace mln
template <typename T>
inline
+ typename array<T>::ro_result
+ array<T>::last() const
+ {
+ return v_[nelements() - 1];
+ }
+
+ template <typename T>
+ inline
+ typename array<T>::mutable_result
+ array<T>::last()
+ {
+ return v_[nelements() - 1];
+ }
+
+ template <typename T>
+ inline
bool
array<T>::is_empty() const
{
--
1.5.6.5
1
0
* scribo/primitive/extract/elements.hh,
* src/primitive/extract/extract_discontinued_hlines.cc,
* src/primitive/extract/extract_discontinued_lines.cc,
* src/primitive/extract/extract_discontinued_vlines.cc,
* src/primitive/extract/extract_thick_hlines.cc,
* src/primitive/extract/extract_thick_vlines.cc: Rename to...
* scribo/primitive/extract/non_text.hh,
* src/primitive/extract/discontinued_hlines.cc,
* src/primitive/extract/discontinued_lines.cc,
* src/primitive/extract/discontinued_vlines.cc,
* src/primitive/extract/thick_hlines.cc,
* src/primitive/extract/thick_vlines.cc: ... this.
* scribo/toolchain/internal/content_in_doc_functor.hh: Rename
function call from extract::elements to extract_non_text.
* src/primitive/extract/Makefile.am: Update target names.
---
scribo/ChangeLog | 23 +++++++
.../primitive/extract/{elements.hh => non_text.hh} | 64 +++++++++++---------
.../toolchain/internal/content_in_doc_functor.hh | 2 +-
scribo/src/primitive/extract/Makefile.am | 26 ++++----
...scontinued_hlines.cc => discontinued_hlines.cc} | 0
...discontinued_lines.cc => discontinued_lines.cc} | 0
...scontinued_vlines.cc => discontinued_vlines.cc} | 0
.../{extract_thick_hlines.cc => thick_hlines.cc} | 0
.../{extract_thick_vlines.cc => thick_vlines.cc} | 0
9 files changed, 73 insertions(+), 42 deletions(-)
rename scribo/scribo/primitive/extract/{elements.hh => non_text.hh} (82%)
rename scribo/src/primitive/extract/{extract_discontinued_hlines.cc => discontinued_hlines.cc} (100%)
rename scribo/src/primitive/extract/{extract_discontinued_lines.cc => discontinued_lines.cc} (100%)
rename scribo/src/primitive/extract/{extract_discontinued_vlines.cc => discontinued_vlines.cc} (100%)
rename scribo/src/primitive/extract/{extract_thick_hlines.cc => thick_hlines.cc} (100%)
rename scribo/src/primitive/extract/{extract_thick_vlines.cc => thick_vlines.cc} (100%)
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index 9cbfca1..e08f2fd 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,5 +1,28 @@
2011-02-17 Guillaume Lazzara <z(a)lrde.epita.fr>
+ Rename files in Scribo.
+
+ * scribo/primitive/extract/elements.hh,
+ * src/primitive/extract/extract_discontinued_hlines.cc,
+ * src/primitive/extract/extract_discontinued_lines.cc,
+ * src/primitive/extract/extract_discontinued_vlines.cc,
+ * src/primitive/extract/extract_thick_hlines.cc,
+ * src/primitive/extract/extract_thick_vlines.cc: Rename to...
+
+ * scribo/primitive/extract/non_text.hh,
+ * src/primitive/extract/discontinued_hlines.cc,
+ * src/primitive/extract/discontinued_lines.cc,
+ * src/primitive/extract/discontinued_vlines.cc,
+ * src/primitive/extract/thick_hlines.cc,
+ * src/primitive/extract/thick_vlines.cc: ... this.
+
+ * scribo/toolchain/internal/content_in_doc_functor.hh: Rename
+ function call from extract::elements to extract_non_text.
+
+ * src/primitive/extract/Makefile.am: Update target names.
+
+2011-02-17 Guillaume Lazzara <z(a)lrde.epita.fr>
+
Add Base64 conversion routines.
* scribo/scribo/convert/from_base64.hh,
diff --git a/scribo/scribo/primitive/extract/elements.hh b/scribo/scribo/primitive/extract/non_text.hh
similarity index 82%
rename from scribo/scribo/primitive/extract/elements.hh
rename to scribo/scribo/primitive/extract/non_text.hh
index ddf2c92..a017f1f 100644
--- a/scribo/scribo/primitive/extract/elements.hh
+++ b/scribo/scribo/primitive/extract/non_text.hh
@@ -26,12 +26,12 @@
/// \file
///
-/// \brief Find in a document elements which are not text.
+/// \brief Find in a document non text which are not text.
///
/// \fixme To be optimized!
-#ifndef SCRIBO_PRIMITIVE_EXTRACT_ELEMENTS_HH
-# define SCRIBO_PRIMITIVE_EXTRACT_ELEMENTS_HH
+#ifndef SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_HH
+# define SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_HH
# include <mln/core/image/image2d.hh>
# include <mln/core/alias/neighb2d.hh>
@@ -58,6 +58,10 @@
# include <mln/clustering/kmean_rgb.hh>
# include <mln/fun/v2v/rgb8_to_rgbn.hh>
+# include <mln/util/timer.hh>
+
+# include <mln/io/pbm/save.hh>
+
namespace scribo
{
@@ -72,7 +76,7 @@ namespace scribo
template <typename L, typename I>
component_set<L>
- elements(const document<L>& doc, const Image<I>& input);
+ non_text(const document<L>& doc, const Image<I>& input);
# ifndef MLN_INCLUDE_ONLY
@@ -107,9 +111,9 @@ namespace scribo
template <typename L, typename I>
component_set<L>
- elements(const document<L>& doc, const Image<I>& input_)
+ non_text(const document<L>& doc, const Image<I>& input_)
{
- trace::entering("scribo::primitive::extract::elements");
+ trace::entering("scribo::primitive::extract::non_text");
const I& input = exact(input_);
mln_precondition(doc.is_valid());
@@ -135,9 +139,16 @@ namespace scribo
image2d<t_rgb5>
img_rgb5 = mln::data::transform(doc.image(), t_rgb8_to_rgb5());
+ // DEBUG
+ io::pbm::save(content, "text_area.pbm");
+
+ mln::util::timer t;
+ t.start();
img_lbl8 =
mln::clustering::kmean_rgb<double,5>((img_rgb5 | pw::value(content)), 3, 10, 10).unmorph_();
data::fill((img_lbl8 | !pw::value(content)).rw(), 0u);
+ t.stop();
+ std::cout << t << std::endl;
mln::util::array<unsigned>
card = mln::labeling::compute(accu::math::count<value::label_8>(),
@@ -162,15 +173,25 @@ namespace scribo
std::cout << "Removing small elements" << std::endl;
{
- image2d<bool> elts;
- initialize(elts, img_lbl8);
- data::fill(elts, false);
- data::fill((elts | (pw::value(img_lbl8) != pw::cst(0))).rw(), true);
+ image2d<bool> elts;
+ initialize(elts, img_lbl8);
+ data::fill(elts, false);
+ data::fill((elts | (pw::value(img_lbl8) != pw::cst(0))).rw(), true);
+
+ // DEBUG
+ data::fill((elts | (pw::value(doc.line_seps()) != pw::cst(0))).rw(),
+ false);
+ // END OF DEBUG
+
- scribo::def::lbl_type nlabels;
- elts = filter::components_small(elts, c8(), nlabels, 40);
+ scribo::def::lbl_type nlabels;
+ elts = filter::components_small(elts, c8(), nlabels, 40);
- output = primitive::extract::components(elts, c8(), nlabels);
+ // DEBUG
+ io::pbm::save(elts, "elements.pbm");
+ // END OF DEBUG
+
+ output = primitive::extract::components(elts, c8(), nlabels);
}
@@ -207,20 +228,7 @@ namespace scribo
}
}
-// mln::io::pbm::save(merged_elts, "merged_elts.pbm");
-
-// mln::util::array<image2d<value::rgb8> > elt_ima;
-// unsigned i = 0;
-// for_all_comps(c, elt_comp)
-// if (elt_comp(c).is_valid())
-// {
-// elt_ima.append(preprocessing::crop(doc.image(), elt_comp(c).bbox()));
-// mln::io::ppm::save(elt_ima(i), mln::debug::filename("elt.ppm", i));
-// ++i;
-// }
-
-
- trace::exiting("scribo::primitive::extract::elements");
+ trace::exiting("scribo::primitive::extract::non_text");
return output;
}
@@ -233,4 +241,4 @@ namespace scribo
} // end of namespace scribo
-#endif // ! SCRIBO_PRIMITIVE_EXTRACT_ELEMENTS_HH
+#endif // ! SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_HH
diff --git a/scribo/scribo/toolchain/internal/content_in_doc_functor.hh b/scribo/scribo/toolchain/internal/content_in_doc_functor.hh
index b8aa56d..ed691e8 100644
--- a/scribo/scribo/toolchain/internal/content_in_doc_functor.hh
+++ b/scribo/scribo/toolchain/internal/content_in_doc_functor.hh
@@ -463,7 +463,7 @@ namespace scribo
// Extract other Elements
on_new_progress_label("Extracting Elements");
component_set<L>
- elements = scribo::primitive::extract::elements(doc, original_image);
+ elements = scribo::primitive::extract::non_text(doc, original_image);
on_progress();
diff --git a/scribo/src/primitive/extract/Makefile.am b/scribo/src/primitive/extract/Makefile.am
index 2c8188e..a46cd68 100644
--- a/scribo/src/primitive/extract/Makefile.am
+++ b/scribo/src/primitive/extract/Makefile.am
@@ -1,5 +1,5 @@
-# Copyright (C) 2009, 2010 EPITA Research and Development Laboratory
-# (LRDE).
+# Copyright (C) 2009, 2010, 2011 EPITA Research and Development
+# Laboratory (LRDE).
#
# This file is part of Olena.
#
@@ -18,17 +18,17 @@
include $(top_srcdir)/scribo/scribo.mk
-noinst_PROGRAMS = \
- extract_discontinued_lines \
- extract_discontinued_vlines \
- extract_discontinued_hlines \
- extract_thick_vlines \
- extract_thick_hlines \
+noinst_PROGRAMS = \
+ discontinued_lines \
+ discontinued_vlines \
+ discontinued_hlines \
+ thick_vlines \
+ thick_hlines \
lines_pattern
-extract_discontinued_lines_SOURCES = extract_discontinued_lines.cc
-extract_discontinued_vlines_SOURCES = extract_discontinued_vlines.cc
-extract_discontinued_hlines_SOURCES = extract_discontinued_hlines.cc
-extract_thick_vlines_SOURCES = extract_thick_vlines.cc
-extract_thick_hlines_SOURCES = extract_thick_hlines.cc
+discontinued_lines_SOURCES = discontinued_lines.cc
+discontinued_vlines_SOURCES = discontinued_vlines.cc
+discontinued_hlines_SOURCES = discontinued_hlines.cc
+thick_vlines_SOURCES = thick_vlines.cc
+thick_hlines_SOURCES = thick_hlines.cc
lines_pattern_SOURCES = lines_pattern.cc
diff --git a/scribo/src/primitive/extract/extract_discontinued_hlines.cc b/scribo/src/primitive/extract/discontinued_hlines.cc
similarity index 100%
rename from scribo/src/primitive/extract/extract_discontinued_hlines.cc
rename to scribo/src/primitive/extract/discontinued_hlines.cc
diff --git a/scribo/src/primitive/extract/extract_discontinued_lines.cc b/scribo/src/primitive/extract/discontinued_lines.cc
similarity index 100%
rename from scribo/src/primitive/extract/extract_discontinued_lines.cc
rename to scribo/src/primitive/extract/discontinued_lines.cc
diff --git a/scribo/src/primitive/extract/extract_discontinued_vlines.cc b/scribo/src/primitive/extract/discontinued_vlines.cc
similarity index 100%
rename from scribo/src/primitive/extract/extract_discontinued_vlines.cc
rename to scribo/src/primitive/extract/discontinued_vlines.cc
diff --git a/scribo/src/primitive/extract/extract_thick_hlines.cc b/scribo/src/primitive/extract/thick_hlines.cc
similarity index 100%
rename from scribo/src/primitive/extract/extract_thick_hlines.cc
rename to scribo/src/primitive/extract/thick_hlines.cc
diff --git a/scribo/src/primitive/extract/extract_thick_vlines.cc b/scribo/src/primitive/extract/thick_vlines.cc
similarity index 100%
rename from scribo/src/primitive/extract/extract_thick_vlines.cc
rename to scribo/src/primitive/extract/thick_vlines.cc
--
1.5.6.5
1
0

last-svn-commit-770-g2a17f6d Various changes in scribo core structures.
by Guillaume Lazzara 28 Mar '11
by Guillaume Lazzara 28 Mar '11
28 Mar '11
* scribo/scribo/core/component_info.hh: Remove useless line_id.
* scribo/scribo/core/line_set.hh,
* scribo/scribo/core/component_set.hh: Add new constructors.
* scribo/scribo/core/document.hh: Store line separators.
* scribo/scribo/core/line_info.hh: Share attributes and add new
constructors.
(is_textline): New method.
* scribo/scribo/core/line_links.hh: Do not allocate useless data.
* scribo/scribo/core/paragraph_info.hh (operator<<): New.
* scribo/scribo/core/paragraph_set.hh: Share attributes and add new
constructors.
---
scribo/ChangeLog | 22 ++
scribo/scribo/core/component_info.hh | 17 +--
scribo/scribo/core/component_set.hh | 65 +++++--
scribo/scribo/core/document.hh | 48 +++++-
scribo/scribo/core/line_info.hh | 367 ++++++++++++++++++----------------
scribo/scribo/core/line_links.hh | 2 +-
scribo/scribo/core/line_set.hh | 29 +++-
scribo/scribo/core/paragraph_info.hh | 10 +
scribo/scribo/core/paragraph_set.hh | 84 +++++++-
9 files changed, 425 insertions(+), 219 deletions(-)
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index e08f2fd..ab028c3 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,5 +1,27 @@
2011-02-17 Guillaume Lazzara <z(a)lrde.epita.fr>
+ Various changes in scribo core structures.
+
+ * scribo/scribo/core/component_info.hh: Remove useless line_id.
+
+ * scribo/scribo/core/line_set.hh,
+ * scribo/scribo/core/component_set.hh: Add new constructors.
+
+ * scribo/scribo/core/document.hh: Store line separators.
+
+ * scribo/scribo/core/line_info.hh: Share attributes and add new
+ constructors.
+ (is_textline): New method.
+
+ * scribo/scribo/core/line_links.hh: Do not allocate useless data.
+
+ * scribo/scribo/core/paragraph_info.hh (operator<<): New.
+
+ * scribo/scribo/core/paragraph_set.hh: Share attributes and add new
+ constructors.
+
+2011-02-17 Guillaume Lazzara <z(a)lrde.epita.fr>
+
Rename files in Scribo.
* scribo/primitive/extract/elements.hh,
diff --git a/scribo/scribo/core/component_info.hh b/scribo/scribo/core/component_info.hh
index 4ed6db7..1b03318 100644
--- a/scribo/scribo/core/component_info.hh
+++ b/scribo/scribo/core/component_info.hh
@@ -47,7 +47,6 @@ namespace scribo
class component_info
{
typedef mln::util::object_id<scribo::ComponentId, unsigned> component_id_t;
- typedef mln::util::object_id<scribo::LineId, unsigned> line_id_t;
public:
component_info();
@@ -69,9 +68,6 @@ namespace scribo
component::Type type() const;
void update_type(component::Type type);
- // The line it is rattached to. 0 means an invalid line.
- line_id_t line_id() const;
-
bool is_valid() const;
private:
@@ -82,8 +78,6 @@ namespace scribo
component::Tag tag_;
component::Type type_;
-
- line_id_t line_id_;
};
@@ -109,7 +103,7 @@ namespace scribo
const mln::point2d& mass_center,
unsigned card)
: id_(id), bbox_(bbox), mass_center_(mass_center), card_(card),
- tag_(component::None), type_(component::Undefined), line_id_(0)
+ tag_(component::None), type_(component::Undefined)
{
}
@@ -179,14 +173,6 @@ namespace scribo
inline
- component_info::line_id_t
- component_info::line_id() const
- {
- return line_id_;
- }
-
-
- inline
bool
component_info::is_valid() const
{
@@ -204,7 +190,6 @@ namespace scribo
<< ", mass_center=" << info.mass_center()
<< ", card=" << info.card()
<< ", tag=" << info.tag()
- << ", line_id=" << info.line_id()
<< ")" << std::endl;
}
diff --git a/scribo/scribo/core/component_set.hh b/scribo/scribo/core/component_set.hh
index 103826f..7ddcf16 100644
--- a/scribo/scribo/core/component_set.hh
+++ b/scribo/scribo/core/component_set.hh
@@ -1,5 +1,5 @@
-// Copyright (C) 2009, 2010 EPITA Research and Development Laboratory
-// (LRDE)
+// Copyright (C) 2009, 2010, 2011 EPITA Research and Development
+// Laboratory (LRDE)
//
// This file is part of Olena.
//
@@ -55,6 +55,8 @@
# include <mln/core/routine/duplicate.hh>
+# include <mln/value/next.hh>
+
# include <scribo/core/macros.hh>
# include <scribo/core/component_info.hh>
@@ -95,6 +97,8 @@ namespace scribo
void fill_infos(const mln::util::array<pair_data_t>& attribs);
+ // Useful while constructing incrementaly (XML loading).
+ void soft_init(const mln_value(L) ncomps);
L ima_;
mln_value(L) ncomps_;
@@ -128,6 +132,9 @@ namespace scribo
/// Constructor without argument.
component_set();
+ // Constructor from internal data.
+ component_set(const mln::util::tracked_ptr<data_t>& data);
+
/// Constructor from an image \p ima and the number of labels \p ncomps.
component_set(const L& ima, const mln_value(L)& ncomps);
@@ -348,6 +355,19 @@ namespace scribo
}
+ template <typename L>
+ inline
+ void
+ component_set_data<L>::soft_init(const mln_value(L) ncomps)
+ {
+ mln_precondition(infos_.nelements() == 0);
+
+ ncomps_ = ncomps;
+ infos_.reserve(ncomps_);
+ infos_.append(component_info()); // Component 0, i.e. the background.
+ }
+
+
} // end of namespace mln::internal
@@ -361,6 +381,13 @@ namespace scribo
template <typename L>
inline
+ component_set<L>::component_set(const mln::util::tracked_ptr<data_t>& data)
+ {
+ data_ = data;
+ }
+
+ template <typename L>
+ inline
component_set<L>::component_set(const L& ima, const mln_value(L)& ncomps)
{
data_ = new internal::component_set_data<L>(ima, ncomps);
@@ -400,7 +427,7 @@ namespace scribo
const component_info&
component_set<L>::info(const mln_value(L)& id) const
{
- return this->data_->infos_[id];
+ return data_->infos_[id];
}
template <typename L>
@@ -408,7 +435,7 @@ namespace scribo
component_info&
component_set<L>::info(const mln_value(L)& id)
{
- return this->data_->infos_[id];
+ return data_->infos_[id];
}
// template <typename L>
@@ -416,7 +443,7 @@ namespace scribo
// const component_info&
// component_set<L>::operator()(const mln_value(L)& id) const
// {
-// return this->data_->infos_[id];
+// return data_->infos_[id];
// }
// template <typename L>
@@ -424,7 +451,7 @@ namespace scribo
// component_info&
// component_set<L>::operator()(const mln_value(L)& id)
// {
-// return this->data_->infos_[id];
+// return data_->infos_[id];
// }
template <typename L>
@@ -432,7 +459,7 @@ namespace scribo
const component_info&
component_set<L>::operator()(const component_id_t& id) const
{
- return this->data_->infos_[id];
+ return data_->infos_[id];
}
template <typename L>
@@ -440,7 +467,7 @@ namespace scribo
component_info&
component_set<L>::operator()(const component_id_t& id)
{
- return this->data_->infos_[id];
+ return data_->infos_[id];
}
@@ -476,7 +503,7 @@ namespace scribo
const L&
component_set<L>::labeled_image() const
{
- return this->data_->ima_;
+ return data_->ima_;
}
@@ -485,7 +512,7 @@ namespace scribo
bool
component_set<L>::is_valid() const
{
- return this->data_->ima_.is_valid();
+ return data_ != 0 && data_->ima_.is_valid();
}
@@ -503,7 +530,7 @@ namespace scribo
L&
component_set<L>::labeled_image_()
{
- return this->data_->ima_;
+ return data_->ima_;
}
@@ -512,7 +539,7 @@ namespace scribo
mln_concrete(L)
component_set<L>::valid_comps_image_() const
{
- mln::util::array<bool> f(mln::value::next(this->data_->ncomps_));
+ mln::util::array<bool> f(mln::value::next(data_->ncomps_));
f(0) = true;
for_all_comps(c, (*this))
@@ -520,8 +547,8 @@ namespace scribo
mln_value(L) new_ncomps;
mln_concrete(L)
- output = mln::labeling::relabel(this->data_->ima_,
- this->data_->ncomps_,
+ output = mln::labeling::relabel(data_->ima_,
+ data_->ncomps_,
new_ncomps,
f);
@@ -534,7 +561,7 @@ namespace scribo
bool
component_set<L>::has_separators() const
{
- return this->data_->separators_.is_valid();
+ return data_->separators_.is_valid();
}
@@ -544,9 +571,9 @@ namespace scribo
component_set<L>::add_separators(const mln_ch_value(L, bool)& ima)
{
if (! has_separators())
- this->data_->separators_ = ima;
+ data_->separators_ = ima;
else
- mln::logical::or_inplace(this->data_->separators_, ima);
+ mln::logical::or_inplace(data_->separators_, ima);
}
@@ -555,7 +582,7 @@ namespace scribo
const mln_ch_value(L, bool)&
component_set<L>::separators() const
{
- return this->data_->separators_;
+ return data_->separators_;
}
@@ -564,7 +591,7 @@ namespace scribo
void
component_set<L>::clear_separators()
{
- this->data_->separators_.destroy();
+ data_->separators_.destroy();
}
diff --git a/scribo/scribo/core/document.hh b/scribo/scribo/core/document.hh
index f112410..e5ac825 100644
--- a/scribo/scribo/core/document.hh
+++ b/scribo/scribo/core/document.hh
@@ -91,16 +91,24 @@ namespace scribo
const component_set<L>& whitespace_seps_comps() const;
void set_whitespace_separators(const image2d<bool>& whitespace_seps);
+ bool has_line_seps() const;
+ const mln::image2d<bool>& line_seps() const;
+ const component_set<L>& line_seps_comps() const;
+ void set_line_separators(const image2d<bool>& line_seps);
+
+
private:
const char *filename_;
mln::image2d<mln::value::rgb8> image_;
- line_set<L> lines_;
paragraph_set<L> parset_;
component_set<L> elements_;
mln::image2d<bool> whitespace_seps_;
component_set<L> whitespace_seps_comps_;
+
+ mln::image2d<bool> line_seps_;
+ component_set<L> line_seps_comps_;
};
@@ -201,7 +209,6 @@ namespace scribo
return parset_.lines();
}
-
template <typename L>
const paragraph_set<L>&
document<L>::paragraphs() const
@@ -294,6 +301,43 @@ namespace scribo
}
+ template <typename L>
+ bool
+ document<L>::has_line_seps() const
+ {
+ return line_seps_.is_valid();
+ }
+
+
+ template <typename L>
+ const mln::image2d<bool>&
+ document<L>::line_seps() const
+ {
+ return line_seps_;
+ }
+
+
+ template <typename L>
+ const component_set<L>&
+ document<L>::line_seps_comps() const
+ {
+ return line_seps_comps_;
+ }
+
+
+ template <typename L>
+ void
+ document<L>::set_line_separators(const image2d<bool>& line_seps)
+ {
+ line_seps_ = line_seps;
+
+ mln_value(L) ncomps;
+ line_seps_comps_ = primitive::extract::components(line_seps,
+ mln::c8(), ncomps);
+ }
+
+
+
# endif // ! MLN_INCLUDE_ONLY
diff --git a/scribo/scribo/core/line_info.hh b/scribo/scribo/core/line_info.hh
index 54a5094..c82160a 100644
--- a/scribo/scribo/core/line_info.hh
+++ b/scribo/scribo/core/line_info.hh
@@ -58,12 +58,76 @@ namespace scribo
// Forward declarations.
template <typename L> class line_set;
+ template <typename L> class line_info;
typedef mln::util::object_id<scribo::LineId, unsigned> line_id_t;
+
+ namespace internal
+ {
+ /// Data structure for \c scribo::line_info<I>.
+ template <typename L>
+ struct line_info_data
+ {
+ line_info_data();
+ line_info_data(const line_set<L>& holder,
+ const mln::util::array<component_id_t>& comps);
+
+
+ bool hidden_;
+ line::Tag tag_;
+ mln::box2d bbox_;
+ mln::box2d ebbox_;
+ mln::util::array<component_id_t> components_;
+
+ // Values relative to the line bbox.
+ int baseline_;
+ int meanline_;
+
+ // Values relative to the baseline.
+ unsigned x_height_;
+ int d_height_;
+ int a_height_;
+
+ // Character related stats.
+ unsigned char_space_;
+ unsigned char_width_;
+
+ // Words related stats.
+ unsigned word_space_;
+
+ // Reading direction
+ line::ReadingDirection reading_direction_;
+
+ // Line type
+ line::Type type_;
+
+ // Is this line in reverse video?
+ bool reverse_video_;
+
+ // Text orientation
+ float orientation_;
+
+ // Text reading orientation
+ float reading_orientation_;
+
+ bool indented_;
+
+ std::string text_;
+
+ // Line set holding this element.
+ line_set<L> holder_;
+
+ };
+
+ } // end of namespace scribo::internal
+
+
+
template <typename L>
class line_info
{
+ typedef internal::line_info_data<L> data_t;
typedef mln::util::object_id<scribo::ComponentId, unsigned> component_id_t;
typedef mln::util::object_id<scribo::LineId, unsigned> line_id_t;
@@ -74,6 +138,8 @@ namespace scribo
line_info();
+ line_info(const line_id_t& id, data_t* data);
+
line_info(const line_set<L>& holder,
const line_id_t& id,
const mln::util::array<component_id_t>& comps);
@@ -134,9 +200,10 @@ namespace scribo
const std::string& text() const;
void update_text(const std::string& str);
-
bool is_valid() const;
+ bool is_textline() const;
+
/// Hidden status.
///
/// When a line is hidden, it should not be used in routines
@@ -180,6 +247,9 @@ namespace scribo
/// Returns the delta used to compute the extended bbox.
int delta_of_line() const;
+ /// Update the extended bbox.
+ void update_ebbox();
+
private: // Members
void copy_data(const line_info<L>& other);
@@ -189,81 +259,59 @@ namespace scribo
/// Update bbox and ebbox_ attributes.
void update_bbox_and_ebox(line_info<L>& other);
- /// Update the extended bbox.
- void update_ebbox();
-
mln::box2d merged_ebbox(const scribo::line_info<L>& info_l,
const scribo::line_info<L>& info);
void update_components_type(component::Type type);
private: // Attributes
- // WARNING: NEVER FORGET TO UPDATE COPY CONSTRUCTOR REDEFINITION!!!!
-
line_id_t id_;
- bool hidden_;
- line::Tag tag_;
- mln::box2d bbox_;
- mln::box2d ebbox_;
- mln::util::array<component_id_t> components_;
-
- // Values relative to the line bbox.
- int baseline_;
- int meanline_;
-
- // Values relative to the baseline.
- unsigned x_height_;
- int d_height_;
- int a_height_;
-
- // WARNING: NEVER FORGET TO UPDATE COPY CONSTRUCTOR REDEFINITION!!!!
-
- // Character related stats.
- unsigned char_space_;
- unsigned char_width_;
-
- // Words related stats.
- unsigned word_space_;
-
- // Reading direction
- line::ReadingDirection reading_direction_;
-
- // Line type
- line::Type type_;
+ mln::util::tracked_ptr<data_t> data_;
+ };
- // Is this line in reverse video?
- bool reverse_video_;
- // Text orientation
- float orientation_;
+ template <typename L>
+ std::ostream&
+ operator<<(std::ostream& ostr, const line_info<L>& info);
- // Text reading orientation
- float reading_orientation_;
- bool indented_;
+# ifndef MLN_INCLUDE_ONLY
- std::string text_;
+ namespace internal
+ {
- // Line set holding this element.
- line_set<L> holder_;
+ template <typename L>
+ line_info_data<L>::line_info_data()
+ {
+ hidden_ = false;
+ }
- // WARNING: NEVER FORGET TO UPDATE COPY CONSTRUCTOR REDEFINITION!!!!
- };
+ template <typename L>
+ line_info_data<L>::line_info_data(const line_set<L>& holder,
+ const mln::util::array<component_id_t>& comps)
+ : hidden_(false), tag_(line::None), components_(comps),
+ type_(line::Undefined), holder_(holder)
+ {
+ // FIXME: set valid information for these attributes in
+ // force_stats_update.
+ word_space_ = 0;
+ reading_direction_ = line::LeftToRight;
+ reverse_video_ = false;
- template <typename L>
- std::ostream&
- operator<<(std::ostream& ostr, const line_info<L>& info);
+ orientation_ = 0.;
+ reading_orientation_ = 0.;
+ indented_ = false;
+ }
-# ifndef MLN_INCLUDE_ONLY
+ } // end of namespace scribo::internal
template <typename L>
line_info<L>::line_info()
- : id_(0), hidden_(false)
+ : id_(0)
{
-
}
template <typename L>
@@ -275,47 +323,24 @@ namespace scribo
if (! is_valid())
id_ = other.id();
- hidden_ = other.hidden_;
-
- tag_ = other.tag();
- bbox_ = other.bbox();
- ebbox_ = other.ebbox();
- components_ = other.components();
-
- baseline_ = other.baseline();
- meanline_ = other.meanline();
-
- x_height_ = other.x_height();
- d_height_ = other.d_height();
- a_height_ = other.a_height();
-
- char_space_ = other.char_space();
- char_width_ = other.char_width();
-
- word_space_ = other.word_space();
-
- reading_direction_ = other.reading_direction();
-
- type_ = other.type();
-
- reverse_video_ = other.reverse_video();
-
- orientation_ = other.orientation();
-
- reading_orientation_ = other.reading_orientation();
-
- indented_ = other.indented();
+ data_ = other.data_;
+ }
- text_ = other.text();
- holder_ = other.holder();
+ template <typename L>
+ inline
+ line_info<L>::line_info(const line_id_t& id, data_t *data)
+ : id_(id), data_(data)
+ {
}
+
template <typename L>
inline
line_info<L>::line_info(const line_info<L>& other)
- : id_(0), hidden_(false)
+ : id_(0)
{
+ //data_->hidden_ = false;
copy_data(other);
}
@@ -357,22 +382,10 @@ namespace scribo
line_info<L>::line_info(const line_set<L>& holder,
const line_id_t& id,
const mln::util::array<component_id_t>& comps)
- : id_(id), hidden_(false), tag_(line::None), components_(comps),
- type_(line::Undefined), holder_(holder)
+ : id_(id)
{
-
+ data_ = new data_t(holder, comps);
force_stats_update();
-
- // FIXME: set valid information for these attributes in
- // force_stats_update.
- word_space_ = 0;
- reading_direction_ = line::LeftToRight;
- reverse_video_ = false;
-
- orientation_ = 0.;
- reading_orientation_ = 0.;
-
- indented_ = false;
}
@@ -397,7 +410,7 @@ namespace scribo
line::Tag
line_info<L>::tag() const
{
- return tag_;
+ return data_->tag_;
}
@@ -405,7 +418,7 @@ namespace scribo
void
line_info<L>::update_tag(line::Tag tag)
{
- tag_ = tag;
+ data_->tag_ = tag;
}
@@ -413,14 +426,14 @@ namespace scribo
const mln::box2d&
line_info<L>::bbox() const
{
- return bbox_;
+ return data_->bbox_;
}
template <typename L>
const mln::box2d&
line_info<L>::ebbox() const
{
- return ebbox_;
+ return data_->ebbox_;
}
@@ -428,14 +441,14 @@ namespace scribo
const mln::util::array<typename line_info<L>::component_id_t>&
line_info<L>::components() const
{
- return components_;
+ return data_->components_;
}
template <typename L>
unsigned
line_info<L>::card() const
{
- return components_.size();
+ return data_->components_.size();
}
@@ -443,7 +456,7 @@ namespace scribo
int
line_info<L>::baseline() const
{
- return baseline_;
+ return data_->baseline_;
}
@@ -451,7 +464,7 @@ namespace scribo
int
line_info<L>::meanline() const
{
- return meanline_;
+ return data_->meanline_;
}
@@ -459,7 +472,7 @@ namespace scribo
int
line_info<L>::ascent() const
{
- return baseline_ - a_height() + 1;
+ return data_->baseline_ - a_height() + 1;
}
@@ -467,7 +480,7 @@ namespace scribo
int
line_info<L>::descent() const
{
- return baseline_ - d_height();
+ return data_->baseline_ - d_height();
}
@@ -475,7 +488,7 @@ namespace scribo
unsigned
line_info<L>::x_height() const
{
- return x_height_;
+ return data_->x_height_;
}
@@ -483,7 +496,7 @@ namespace scribo
int
line_info<L>::d_height() const
{
- return d_height_;
+ return data_->d_height_;
}
@@ -491,7 +504,7 @@ namespace scribo
int
line_info<L>::a_height() const
{
- return a_height_;
+ return data_->a_height_;
}
@@ -499,7 +512,7 @@ namespace scribo
unsigned
line_info<L>::char_space() const
{
- return char_space_;
+ return data_->char_space_;
}
@@ -507,7 +520,7 @@ namespace scribo
unsigned
line_info<L>::char_width() const
{
- return char_width_;
+ return data_->char_width_;
}
@@ -515,7 +528,7 @@ namespace scribo
unsigned
line_info<L>::word_space() const
{
- return word_space_;
+ return data_->word_space_;
}
@@ -523,14 +536,14 @@ namespace scribo
line::ReadingDirection
line_info<L>::reading_direction() const
{
- return reading_direction_;
+ return data_->reading_direction_;
}
template <typename L>
line::Type
line_info<L>::type() const
{
- return type_;
+ return data_->type_;
}
@@ -538,10 +551,10 @@ namespace scribo
void
line_info<L>::update_components_type(component::Type type)
{
- for_all_elements(i, components_)
+ for_all_elements(i, data_->components_)
{
- unsigned c = components_[i];
- holder_.components_()(c).update_type(type);
+ unsigned c = data_->components_[i];
+ data_->holder_.components_()(c).update_type(type);
}
}
@@ -550,7 +563,7 @@ namespace scribo
void
line_info<L>::update_type(line::Type type)
{
- type_ = type;
+ data_->type_ = type;
// Some line types may involve updating components type as well.
if (type == line::Punctuation)
@@ -564,7 +577,7 @@ namespace scribo
bool
line_info<L>::reverse_video() const
{
- return reverse_video_;
+ return data_->reverse_video_;
}
@@ -572,7 +585,7 @@ namespace scribo
float
line_info<L>::orientation() const
{
- return orientation_;
+ return data_->orientation_;
}
@@ -580,7 +593,7 @@ namespace scribo
float
line_info<L>::reading_orientation() const
{
- return reading_orientation_;
+ return data_->reading_orientation_;
}
@@ -588,21 +601,21 @@ namespace scribo
bool
line_info<L>::indented() const
{
- return indented_;
+ return data_->indented_;
}
template <typename L>
bool
line_info<L>::has_text() const
{
- return !text_.empty();
+ return !data_->text_.empty();
}
template <typename L>
const std::string&
line_info<L>::text() const
{
- return text_;
+ return data_->text_;
}
@@ -610,7 +623,7 @@ namespace scribo
void
line_info<L>::update_text(const std::string& str)
{
- text_ = str;
+ data_->text_ = str;
}
@@ -624,9 +637,19 @@ namespace scribo
template <typename L>
bool
+ line_info<L>::is_textline() const
+ {
+ return is_valid()
+ && !is_hidden()
+ && type() == line::Text;
+ }
+
+
+ template <typename L>
+ bool
line_info<L>::is_hidden() const
{
- return hidden_;
+ return data_->hidden_;
}
@@ -634,7 +657,7 @@ namespace scribo
void
line_info<L>::set_hidden(bool b)
{
- hidden_ = b;
+ data_->hidden_ = b;
}
@@ -665,8 +688,8 @@ namespace scribo
void
line_info<L>::update_ebbox()
{
- int A = a_height_ - x_height_;
- int D = - d_height_;
+ int A = data_->a_height_ - data_->x_height_;
+ int D = - data_->d_height_;
if (A <= 2 && D > 2)
A = D;
if (D <= 2 && A > 2)
@@ -674,10 +697,12 @@ namespace scribo
int delta = delta_of_line();
- ebbox_ = mln::make::box2d(meanline_ - A, bbox().pmin().col() - delta,
- baseline_ + D, bbox().pmax().col() + delta);
+ data_->ebbox_ = mln::make::box2d(data_->meanline_ - A,
+ bbox().pmin().col() - delta,
+ data_->baseline_ + D,
+ bbox().pmax().col() + delta);
- ebbox_.crop_wrt(holder_.components().labeled_image().domain());
+ data_->ebbox_.crop_wrt(data_->holder_.components().labeled_image().domain());
}
@@ -729,20 +754,20 @@ namespace scribo
// Adjusting ebboxes with the highest delta and merging ebboxes.
int d_delta = other.delta_of_line() - this->delta_of_line();
if (d_delta < 0) // other.delta_of_line() < this->delta_of_line()
- ebbox_.merge(enlarge(other.ebbox(), - d_delta));
+ data_->ebbox_.merge(enlarge(other.ebbox(), - d_delta));
else
{
- mln::box2d b = ebbox_;
- ebbox_ = other.bbox();
- ebbox_.merge(enlarge(b, d_delta));
+ mln::box2d b = data_->ebbox_;
+ data_->ebbox_ = other.bbox();
+ data_->ebbox_.merge(enlarge(b, d_delta));
}
- ebbox_.crop_wrt(holder_.components().labeled_image().domain());
+ data_->ebbox_.crop_wrt(data_->holder_.components().labeled_image().domain());
}
else // /other/ IS NOT a text line.
{
- ebbox_.merge(other.ebbox());
- ebbox_.merge(merged_ebbox(*this, other));
+ data_->ebbox_.merge(other.ebbox());
+ data_->ebbox_.merge(merged_ebbox(*this, other));
}
}
else // /this/ is NOT a text line
@@ -755,15 +780,15 @@ namespace scribo
}
update_type(line::Text);
- ebbox_.merge(other.ebbox());
- ebbox_.merge(merged_ebbox(other, *this));
+ data_->ebbox_.merge(other.ebbox());
+ data_->ebbox_.merge(merged_ebbox(other, *this));
}
// Merging bboxes.
- bbox_.merge(other.bbox());
+ data_->bbox_.merge(other.bbox());
// Make sure the ebbox is included in the image domain.
- ebbox_.crop_wrt(holder_.components().labeled_image().domain());
+ data_->ebbox_.crop_wrt(data_->holder_.components().labeled_image().domain());
}
@@ -771,14 +796,14 @@ namespace scribo
void
line_info<L>::fast_merge(line_info<L>& other, bool hide)
{
- tag_ = line::Needs_Precise_Stats_Update;
+ data_->tag_ = line::Needs_Precise_Stats_Update;
other.update_tag(line::Merged);
other.set_hidden(hide);
// Update bbox and ebbox
update_bbox_and_ebox(other);
- components_.append(other.components());
+ data_->components_.append(other.components());
}
@@ -795,7 +820,7 @@ namespace scribo
line_info<L>::force_stats_update()
{
typedef mln_site(L) P;
- const component_set<L>& comp_set = holder_.components();
+ const component_set<L>& comp_set = data_->holder_.components();
// Init.
typedef mln::value::int_u<12> median_data_t;
@@ -814,21 +839,21 @@ namespace scribo
// Workaround to avoid overflow with int_u<12> in median accumulators.
//
// FIXME: not optimal...
- for_all_elements(i, components_)
+ for_all_elements(i, data_->components_)
{
- unsigned c = components_(i);
+ unsigned c = data_->components_(i);
// Ignore punctuation for stats computation but not for bbox
// computation.
- if (holder_.components()(c).type() == component::Punctuation)
+ if (data_->holder_.components()(c).type() == component::Punctuation)
continue;
ref_line = mln::math::min(comp_set(c).bbox().pmin().row(), ref_line);
}
- for_all_elements(i, components_)
+ for_all_elements(i, data_->components_)
{
- unsigned c = components_(i);
+ unsigned c = data_->components_(i);
const mln::box2d& bb = comp_set(c).bbox();
@@ -837,7 +862,7 @@ namespace scribo
// Ignore punctuation for stats computation but not for bbox
// computation.
- if (holder_.components()(c).type() == component::Punctuation)
+ if (data_->holder_.components()(c).type() == component::Punctuation)
continue;
@@ -853,11 +878,11 @@ namespace scribo
// (right link) (left link)
// Space between characters.
- if (holder_.links()(c) != c)
+ if (data_->holder_.links()(c) != c)
{
int
space = bb.pmin().col()
- - comp_set(holder_.links()(c)).bbox().pmax().col() - 1;
+ - comp_set(data_->holder_.links()(c)).bbox().pmax().col() - 1;
// -- Ignore overlapped characters.
if (space > 0)
@@ -884,31 +909,31 @@ namespace scribo
// Finalization
{
- tag_ = line::None;
- bbox_ = bbox.to_result();
+ data_->tag_ = line::None;
+ data_->bbox_ = bbox.to_result();
// Char space
if (char_space.card() < 2)
- char_space_ = 0;
+ data_->char_space_ = 0;
else
- char_space_ = char_space.to_result();
+ data_->char_space_ = char_space.to_result();
// Char width
if (card() == 2)
- char_width_ = (comp_set(components_[0]).bbox().width()
- + comp_set(components_[1]).bbox().width()) / 2;
+ data_->char_width_ = (comp_set(data_->components_[0]).bbox().width()
+ + comp_set(data_->components_[1]).bbox().width()) / 2;
else
- char_width_ = char_width.to_result();
+ data_->char_width_ = char_width.to_result();
mln::def::coord
absolute_baseline_r = baseline.to_result() + ref_line,
absolute_meanline_r = meanline.to_result() + ref_line;
- baseline_ = absolute_baseline_r;
- meanline_ = absolute_meanline_r;
- x_height_ = baseline_ - meanline_ + 1;
- d_height_ = baseline_ - bbox.to_result().pmax().row();
- a_height_ = baseline_ - bbox.to_result().pmin().row() + 1;
+ data_->baseline_ = absolute_baseline_r;
+ data_->meanline_ = absolute_meanline_r;
+ data_->x_height_ = data_->baseline_ - data_->meanline_ + 1;
+ data_->d_height_ = data_->baseline_ - bbox.to_result().pmax().row();
+ data_->a_height_ = data_->baseline_ - bbox.to_result().pmin().row() + 1;
//FIXME
//
@@ -929,7 +954,7 @@ namespace scribo
const line_set<L>&
line_info<L>::holder() const
{
- return holder_;
+ return data_->holder_;
}
diff --git a/scribo/scribo/core/line_links.hh b/scribo/scribo/core/line_links.hh
index 85c45e8..de62158 100644
--- a/scribo/scribo/core/line_links.hh
+++ b/scribo/scribo/core/line_links.hh
@@ -139,8 +139,8 @@ namespace scribo
template <typename L>
line_links<L>::line_links()
+ : data_(0)
{
- data_ = new data_t();
}
diff --git a/scribo/scribo/core/line_set.hh b/scribo/scribo/core/line_set.hh
index 29795b2..bfa9240 100644
--- a/scribo/scribo/core/line_set.hh
+++ b/scribo/scribo/core/line_set.hh
@@ -99,6 +99,10 @@ namespace scribo
/// Constructor from object groups.
line_set(const object_groups<L>& groups);
+
+ /// Constructor useful for delayed construction (loading from file).
+ line_set(const object_groups<L>& groups,
+ const mln::util::array<line_info<L> >& line_data);
/// @}
/// Compute line stats and fill the underlying information.
@@ -155,8 +159,14 @@ namespace scribo
/// @}
+ /// Return false if it is not initialized (built with the default
+ /// constructor).
bool is_valid() const;
+
+ void update_line_data_(const mln::util::array<line_info<L> >& line_data);
+
+
private:
/// Duplicate the underlying image and create a new line_set.
void init_(const line_set<L>& model);
@@ -233,6 +243,15 @@ namespace scribo
template <typename L>
+ inline
+ line_set<L>::line_set(const object_groups<L>& groups,
+ const mln::util::array<line_info<L> >& line_data)
+ {
+ data_ = new internal::line_set_data<L>(line_data, groups);
+ }
+
+
+ template <typename L>
void
line_set<L>::compute_lines(const object_groups<L>& groups)
{
@@ -396,7 +415,15 @@ namespace scribo
bool
line_set<L>::is_valid() const
{
- return data_->links_.is_valid() && data_->groups_.is_valid();
+ return data_ && data_->groups_.is_valid();
+ }
+
+ template <typename L>
+ inline
+ void
+ line_set<L>::update_line_data_(const mln::util::array<line_info<L> >& line_data)
+ {
+ data_->infos_ = line_data;
}
template <typename L>
diff --git a/scribo/scribo/core/paragraph_info.hh b/scribo/scribo/core/paragraph_info.hh
index a8c623a..17f847f 100644
--- a/scribo/scribo/core/paragraph_info.hh
+++ b/scribo/scribo/core/paragraph_info.hh
@@ -113,6 +113,16 @@ namespace scribo
}
+ template <typename L>
+ std::ostream&
+ operator<<(std::ostream& ostr, const paragraph_info<L>& info)
+ {
+ return ostr << "paragraph_info("
+ << "line_ids=" << info.line_ids()
+ << ", bbox=" << info.bbox()
+ << ")" << std::endl;
+ }
+
# endif // ! MLN_INCLUDE_ONLY
} // end of namespace scribo
diff --git a/scribo/scribo/core/paragraph_set.hh b/scribo/scribo/core/paragraph_set.hh
index 355eaa9..6597189 100644
--- a/scribo/scribo/core/paragraph_set.hh
+++ b/scribo/scribo/core/paragraph_set.hh
@@ -36,6 +36,25 @@
namespace scribo
{
+ namespace internal
+ {
+
+ /// Data structure for \c scribo::paragraph_set<I>.
+ template <typename L>
+ struct paragraph_set_data
+ {
+ paragraph_set_data();
+ paragraph_set_data(const line_links<L>& llines, unsigned npars);
+
+ mln::util::array<paragraph_info<L> > pars_;
+ line_set<L> lines_;
+ line_links<L> links_;
+ };
+
+ } // end of namespace scribo::internal
+
+
+
/*! \brief Paragraph container.
Paragraph ids start from 1.
@@ -46,6 +65,7 @@ namespace scribo
{
public:
paragraph_set();
+ paragraph_set(internal::paragraph_set_data<L>* data);
paragraph_set(const line_links<L>& llinks, unsigned npars);
unsigned nelements() const;
@@ -57,9 +77,10 @@ namespace scribo
const line_set<L>& lines() const;
+ const line_links<L>& links() const;
+
private:
- mln::util::array<paragraph_info<L> > pars_;
- line_set<L> lines_;
+ mln::util::tracked_ptr< internal::paragraph_set_data<L> > data_;
};
@@ -77,37 +98,72 @@ namespace scribo
# ifndef MLN_INCLUDE_ONLY
+ // paragraph_set_data<L> >
+
+ namespace internal
+ {
+
+ // data< paragraph_set<L> >
+
+
+ template <typename L>
+ inline
+ paragraph_set_data<L>::paragraph_set_data()
+ {
+ }
+
+
+ template <typename L>
+ inline
+ paragraph_set_data<L>::paragraph_set_data(const line_links<L>& llinks, unsigned npars)
+ : pars_(npars + 1, paragraph_info<L>(llinks)), links_(llinks)
+ {
+ lines_ = llinks.lines();
+ }
+
+ } // end of namespace mln::internal
+
+
template <typename L>
paragraph_set<L>::paragraph_set()
+ : data_(0)
{
}
template <typename L>
+ paragraph_set<L>::paragraph_set(internal::paragraph_set_data<L>* data)
+ {
+ data_ = data;
+ }
+
+ template <typename L>
paragraph_set<L>::paragraph_set(const line_links<L>& llinks, unsigned npars)
- : pars_(npars + 1, paragraph_info<L>(llinks))
{
- lines_ = llinks.lines();
+ data_ = new internal::paragraph_set_data<L>(llinks, npars);
}
template <typename L>
unsigned
paragraph_set<L>::nelements() const
{
- return pars_.nelements() - 1;
+ mln_precondition(data_ != 0);
+ return data_->pars_.nelements() - 1;
}
template <typename L>
paragraph_info<L>&
paragraph_set<L>::operator()(unsigned i)
{
- return pars_[i];
+ mln_precondition(data_ != 0);
+ return data_->pars_[i];
}
template <typename L>
const paragraph_info<L>&
paragraph_set<L>::operator()(unsigned i) const
{
- return pars_[i];
+ mln_precondition(data_ != 0);
+ return data_->pars_[i];
}
@@ -115,7 +171,7 @@ namespace scribo
bool
paragraph_set<L>::is_valid() const
{
- return !pars_.is_empty();
+ return data_ && !data_->pars_.is_empty();
}
@@ -123,7 +179,17 @@ namespace scribo
const line_set<L>&
paragraph_set<L>::lines() const
{
- return lines_;
+ mln_precondition(data_ != 0);
+ return data_->lines_;
+ }
+
+
+ template <typename L>
+ const line_links<L>&
+ paragraph_set<L>::links() const
+ {
+ mln_precondition(data_ != 0);
+ return data_->links_;
}
--
1.5.6.5
1
0

last-svn-commit-771-gf67cec4 Add new conversion routines from string to tag.
by Guillaume Lazzara 28 Mar '11
by Guillaume Lazzara 28 Mar '11
28 Mar '11
* scribo/core/tag/component.hh,
* scribo/core/tag/line.hh: Here.
---
scribo/ChangeLog | 7 +++++++
scribo/scribo/core/tag/component.hh | 35 +++++++++++++++++++++++++++++++++++
scribo/scribo/core/tag/line.hh | 34 +++++++++++++++++++++++++++++++++-
3 files changed, 75 insertions(+), 1 deletions(-)
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index ab028c3..ab498cc 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,5 +1,12 @@
2011-02-17 Guillaume Lazzara <z(a)lrde.epita.fr>
+ Add new conversion routines from string to tag.
+
+ * scribo/core/tag/component.hh,
+ * scribo/core/tag/line.hh: Here.
+
+2011-02-17 Guillaume Lazzara <z(a)lrde.epita.fr>
+
Various changes in scribo core structures.
* scribo/scribo/core/component_info.hh: Remove useless line_id.
diff --git a/scribo/scribo/core/tag/component.hh b/scribo/scribo/core/tag/component.hh
index f773932..10b86a6 100644
--- a/scribo/scribo/core/tag/component.hh
+++ b/scribo/scribo/core/tag/component.hh
@@ -61,6 +61,14 @@ namespace scribo
Image
};
+
+ std::ostream& operator<<(std::ostream& ostr, const Tag& tag);
+ Tag str2tag(const std::string& str);
+
+ std::ostream& operator<<(std::ostream& ostr, const Type& type);
+ Type str2type(const std::string& str);
+
+
# ifndef MLN_INCLUDE_ONLY
@@ -85,6 +93,16 @@ namespace scribo
inline
+ Tag str2tag(const std::string& str)
+ {
+ if (str == "Ignored")
+ return Ignored;
+
+ return None;
+ }
+
+
+ inline
std::ostream&
operator<<(std::ostream& ostr, const Type& type)
{
@@ -116,6 +134,23 @@ namespace scribo
}
+ inline
+ Type str2type(const std::string& str)
+ {
+ if (str == "Character")
+ return Character;
+ else if (str == "Separator")
+ return Separator;
+ else if (str == "Noise")
+ return Noise;
+ else if (str == "Punctuation")
+ return Punctuation;
+ else if (str == "Image")
+ return Image;
+
+ return Undefined;
+ }
+
# endif // ! MLN_INCLUDE_ONLY
diff --git a/scribo/scribo/core/tag/line.hh b/scribo/scribo/core/tag/line.hh
index 0444a11..3fb1fdf 100644
--- a/scribo/scribo/core/tag/line.hh
+++ b/scribo/scribo/core/tag/line.hh
@@ -96,7 +96,7 @@ namespace scribo
std::ostream&
operator<<(std::ostream& ostr, const Type& type);
-
+ Type str2type(const std::string& str);
# ifndef MLN_INCLUDE_ONLY
@@ -208,6 +208,38 @@ namespace scribo
return ostr << str;
}
+ inline
+ Type str2type(const std::string& str)
+ {
+ if (str == "caption")
+ return Caption;
+ else if (str == "credit")
+ return Credit;
+ else if (str == "drop-capital")
+ return DropCapital;
+ else if (str == "floating")
+ return Floating;
+ else if (str == "footer")
+ return Footer;
+ else if (str == "header")
+ return Header;
+ else if (str == "heading")
+ return Heading;
+ else if (str == "page-number")
+ return PageNumber;
+ else if (str == "paragraph")
+ return Paragraph;
+
+ // Values unsupported by the XSD
+ else if(str == "punctuation")
+ return Punctuation;
+ else if (str == "text")
+ return Text;
+
+ return Undefined;
+ }
+
+
# endif // ! MLN_INCLUDE_ONLY
--
1.5.6.5
1
0

last-svn-commit-772-g4756e17 Improve and cleanup whitespace separator detection.
by Guillaume Lazzara 28 Mar '11
by Guillaume Lazzara 28 Mar '11
28 Mar '11
* scribo/core/tag/anchor.hh: Add new anchors.
* scribo/filter/internal/alignment_angle.hh,
* scribo/primitive/link/internal/compute_anchor.hh: Support new
anchors.
* scribo/primitive/extract/separators_nonvisible.hh: Remove dead
code and comment debug code.
* scribo/toolchain/internal/content_in_doc_functor.hh: Detect
horizontal whitespace.
---
scribo/ChangeLog | 16 +
scribo/scribo/core/tag/anchor.hh | 5 +-
scribo/scribo/filter/internal/alignment_angle.hh | 144 +++--
.../primitive/extract/separators_nonvisible.hh | 673 +++++++++-----------
.../primitive/link/internal/compute_anchor.hh | 26 +-
.../toolchain/internal/content_in_doc_functor.hh | 13 +-
6 files changed, 447 insertions(+), 430 deletions(-)
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index ab498cc..d783c5c 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,5 +1,21 @@
2011-02-17 Guillaume Lazzara <z(a)lrde.epita.fr>
+ Improve and cleanup whitespace separator detection.
+
+ * scribo/core/tag/anchor.hh: Add new anchors.
+
+ * scribo/filter/internal/alignment_angle.hh,
+ * scribo/primitive/link/internal/compute_anchor.hh: Support new
+ anchors.
+
+ * scribo/primitive/extract/separators_nonvisible.hh: Remove dead
+ code and comment debug code.
+
+ * scribo/toolchain/internal/content_in_doc_functor.hh: Detect
+ horizontal whitespace.
+
+2011-02-17 Guillaume Lazzara <z(a)lrde.epita.fr>
+
Add new conversion routines from string to tag.
* scribo/core/tag/component.hh,
diff --git a/scribo/scribo/core/tag/anchor.hh b/scribo/scribo/core/tag/anchor.hh
index 262a32d..bb6780c 100644
--- a/scribo/scribo/core/tag/anchor.hh
+++ b/scribo/scribo/core/tag/anchor.hh
@@ -1,4 +1,5 @@
-// Copyright (C) 2009 EPITA Research and Development Laboratory (LRDE)
+// Copyright (C) 2009, 2011 EPITA Research and Development Laboratory
+// (LRDE)
//
// This file is part of Olena.
//
@@ -43,9 +44,11 @@ namespace scribo
ActualRight,
Right,
TopLeft,
+ TopStrictLeft,
TopRight,
BottomLeft,
BottomRight,
+ BottomStrictRight,
Invalid
};
diff --git a/scribo/scribo/filter/internal/alignment_angle.hh b/scribo/scribo/filter/internal/alignment_angle.hh
index 5b88012..33855a8 100644
--- a/scribo/scribo/filter/internal/alignment_angle.hh
+++ b/scribo/scribo/filter/internal/alignment_angle.hh
@@ -1,4 +1,4 @@
-// Copyright (C) 2010 EPITA Research and Development Laboratory
+// Copyright (C) 2010, 2011 EPITA Research and Development Laboratory
// (LRDE)
//
// This file is part of Olena.
@@ -73,76 +73,98 @@ namespace scribo
unsigned current_object, unsigned nbh_object,
anchor::Type anchor)
{
- trace::entering("scribo::filter::internal::alignment_angle_rad");
+ trace::entering("scribo::filter::internal::alignment_angle");
mln_precondition(comps.is_valid());
- float dr, dc;
+ float dr, dc, result = 0;
if (nbh_object == current_object)
return 0;
- // Center
- if (anchor == anchor::Center)
+ switch(anchor)
{
- dr = math::abs(comps(current_object).bbox().pcenter().row()
- - comps(nbh_object).bbox().pcenter().row());
- dc = math::abs(comps(current_object).bbox().pcenter().col()
- - comps(nbh_object).bbox().pcenter().col());
-
- return std::atan(dr / dc);
- }
-
- // Top
- else if (anchor == anchor::Top)
- {
- dr = math::abs(comps(current_object).bbox().pmin().row()
- - comps(nbh_object).bbox().pmin().row());
- dc = math::abs(comps(current_object).bbox().pcenter().col()
- - comps(nbh_object).bbox().pcenter().col());
-
- return std::atan(dr / dc);
+ // Center
+ case anchor::Center:
+ {
+ dr = math::abs(comps(current_object).bbox().pcenter().row()
+ - comps(nbh_object).bbox().pcenter().row());
+ dc = math::abs(comps(current_object).bbox().pcenter().col()
+ - comps(nbh_object).bbox().pcenter().col());
+
+ result = std::atan(dr / dc);
+ }
+ break;
+
+ // Mass Center
+ case anchor::MassCenter:
+ {
+ dr = math::abs(comps(current_object).mass_center().row()
+ - comps(nbh_object).mass_center().row());
+ dc = math::abs(comps(current_object).mass_center().col()
+ - comps(nbh_object).mass_center().col());
+
+ result = std::atan(dr / dc);
+ }
+ break;
+
+ // Top
+ case anchor::TopStrictLeft:
+ case anchor::Top:
+ {
+ dr = math::abs(comps(current_object).bbox().pmin().row()
+ - comps(nbh_object).bbox().pmin().row());
+ dc = math::abs(comps(current_object).bbox().pcenter().col()
+ - comps(nbh_object).bbox().pcenter().col());
+
+ result = std::atan(dr / dc);
+ }
+ break;
+
+ // Bottom
+ case anchor::BottomStrictRight:
+ case anchor::Bottom:
+ {
+ dr = math::abs(comps(current_object).bbox().pmax().row()
+ - comps(nbh_object).bbox().pmax().row());
+ dc = math::abs(comps(current_object).bbox().pcenter().col()
+ - comps(nbh_object).bbox().pcenter().col());
+
+ result = std::atan(dr / dc);
+ }
+ break;
+
+ // Left
+ case anchor::Left:
+ {
+ dr = math::abs(comps(current_object).bbox().pcenter().row()
+ - comps(nbh_object).bbox().pcenter().row());
+ dc = math::abs(comps(current_object).bbox().pmin().col()
+ - comps(nbh_object).bbox().pmin().col());
+
+ result = std::atan(dc / dr);
+ }
+ break;
+
+ // Right
+ case anchor::Right:
+ {
+ dr = math::abs(comps(current_object).bbox().pcenter().row()
+ - comps(nbh_object).bbox().pcenter().row());
+ dc = math::abs(comps(current_object).bbox().pmax().col()
+ - comps(nbh_object).bbox().pmax().col());
+
+ result = std::atan(dc / dr);
+ }
+ break;
+
+ default:
+ trace::warning("scribo::filter::internal::alignment_angle,"
+ " Invalid anchor value... Aborting computation.");
}
- // Bottom
- else if (anchor == anchor::Bottom)
- {
- dr = math::abs(comps(current_object).bbox().pmax().row()
- - comps(nbh_object).bbox().pmax().row());
- dc = math::abs(comps(current_object).bbox().pcenter().col()
- - comps(nbh_object).bbox().pcenter().col());
-
- return std::atan(dr / dc);
- }
-
- // Left
- else if (anchor == anchor::Left)
- {
- dr = math::abs(comps(current_object).bbox().pcenter().row()
- - comps(nbh_object).bbox().pcenter().row());
- dc = math::abs(comps(current_object).bbox().pmin().col()
- - comps(nbh_object).bbox().pmin().col());
-
- return std::atan(dc / dr);
- }
-
- // Right
- else if (anchor == anchor::Right)
- {
- dr = math::abs(comps(current_object).bbox().pcenter().row()
- - comps(nbh_object).bbox().pcenter().row());
- dc = math::abs(comps(current_object).bbox().pmax().col()
- - comps(nbh_object).bbox().pmax().col());
-
- return std::atan(dc / dr);
- }
-
- else
- trace::warning("Invalid anchor value... Aborting computation.");
-
- trace::exiting("scribo::filter::internal::alignment_angle_rad");
- return 0;
-
+ trace::exiting("scribo::filter::internal::alignment_angle");
+ return result;
}
# endif // ! MLN_INCLUDE_ONLY
diff --git a/scribo/scribo/primitive/extract/separators_nonvisible.hh b/scribo/scribo/primitive/extract/separators_nonvisible.hh
index 81ebd64..4e31650 100644
--- a/scribo/scribo/primitive/extract/separators_nonvisible.hh
+++ b/scribo/scribo/primitive/extract/separators_nonvisible.hh
@@ -86,6 +86,8 @@
#include <scribo/preprocessing/denoise_fg.hh>
#include <scribo/preprocessing/rotate_90.hh>
+#include <scribo/primitive/link/internal/compute_anchor.hh>
+
#include <scribo/primitive/link/internal/dmax_default.hh>
#include <scribo/primitive/link/with_single_right_link_dmax_ratio.hh>
@@ -128,80 +130,6 @@ namespace scribo
namespace internal
{
- template <typename L>
- void filter_bad_groups(object_groups<L>& top_groups,
- object_groups<L>& bot_groups)
- {
- const component_set<L>& comps = top_groups.components();
- const L& lbl = comps.labeled_image();
-
- for_all_groups(c, top_groups)
- {
- box2d b = comps(c).bbox();
- b.enlarge(0, comps(c).bbox().height());
- b.crop_wrt(lbl.domain());
-
- typedef mln_value(L) V;
-
- const V* top_ptr = & lbl(b.pmin());
- const V* bot_ptr = & lbl(point2d(b.pmax().row(), b.pmin().col()));
-
- unsigned ntop = 0, nbot = 0;
- for (unsigned n = 0; n < b.width(); ++n)
- {
- if (*top_ptr)
- ++ntop;
- if (*bot_ptr)
- ++nbot;
- }
-
- if (ntop / b.width() > 0.50f)
- top_groups(c) = c;
-
- if (nbot / b.width() > 0.50f)
- bot_groups(c) = c;
- }
-
- }
-
-
- template <typename L>
- mln_site(L)
- my_anchors(const component_set<L>& comps,
- unsigned current_object,
- anchor::Type anchor)
- {
- mln_site(L) sp;// = comps(current_object).bbox().pcenter();
-
- unsigned h = comps(current_object).bbox().height();
-
- switch (anchor)
- {
- default:
- return sp;
-
-
- // Bounding box top center
- case anchor::Top: // FIXME: rename as TopLeft
- sp.col() = comps(current_object).bbox().pmin().col();
- sp.row() = comps(current_object).bbox().pmin().row()
- + math::min(2u, (h + 1) / 2 - 1);
- break;
-
-
- // Bounding box bottom center
- case anchor::Bottom: // FIXME: rename as BottomLeft
- sp.col() = comps(current_object).bbox().pmax().col();
- sp.row() = comps(current_object).bbox().pmax().row()
- - math::min(2u, (h + 1) / 2 - 1);
- break;
- }
-
- return sp;
- }
-
-
-
using namespace primitive::link::internal;
template <typename L, typename E>
@@ -225,12 +153,17 @@ namespace scribo
anchor::Horizontal,
dmax_default(dmax)),
anchor(anchor_),
- debug_(data::convert(value::rgb8(), input)),
- debug_angle_(data::convert(value::rgb8(), input)),
_debug_(debug)
{
+ (void) input; // FIXME : remove this argument
min_alpha_rad = (min_angle / 180.0f) * math::pi;
max_alpha_rad = (max_angle / 180.0f) * math::pi;
+
+ // if (_debug_)
+ // {
+ // debug_ = data::convert(value::rgb8(), input);
+ // debug_angle_ = data::convert(value::rgb8(), input);
+ // }
}
void compute_next_site_(P& p)
@@ -247,7 +180,8 @@ namespace scribo
mln_site(L)
start_point_(unsigned current_object, anchor::Type anchor)
{
- return my_anchors(this->components_, current_object, anchor);
+ return link::internal::compute_anchor(this->components_,
+ current_object, anchor);
}
@@ -303,27 +237,29 @@ namespace scribo
{
super_::validate_link_(current_object, start_point, p, anchor);
- if (_debug_)
- {
- mln_site(L)
- p1 = my_anchors(this->components_, current_object, anchor),
- p2 = my_anchors(this->components_, this->labeled_image_(p),
- anchor);
- mln::draw::line(debug_, p1, p2, literal::green);
-
-
- float
- angle = filter::internal::alignment_angle(this->components_,
- current_object,
- this->labeled_image_(p),
- anchor);
- angle = (angle * 180.0f) / math::pi;
- angle = angle * 20.0f + 1.0f;
- mln::draw::line(debug_angle_, p1, p2,
- value::rgb8(unsigned(angle),
- unsigned(angle),
- unsigned(angle)));
- }
+ // if (_debug_)
+ // {
+ // mln_site(L)
+ // p1 = link::internal::compute_anchor(this->components_,
+ // current_object, anchor),
+ // p2 = link::internal::compute_anchor(this->components_,
+ // this->labeled_image_(p),
+ // anchor);
+ // mln::draw::line(debug_, p1, p2, literal::green);
+
+
+ // float
+ // angle = filter::internal::alignment_angle(this->components_,
+ // current_object,
+ // this->labeled_image_(p),
+ // anchor);
+ // angle = (angle * 180.0f) / math::pi;
+ // angle = angle * 20.0f + 1.0f;
+ // mln::draw::line(debug_angle_, p1, p2,
+ // value::rgb8(unsigned(angle),
+ // unsigned(angle),
+ // unsigned(angle)));
+ // }
}
void invalidate_link_(unsigned current_object,
@@ -333,33 +269,36 @@ namespace scribo
{
super_::invalidate_link_(current_object, start_point, p, anchor);
- if (_debug_)
- {
- if (this->labeled_image_.domain().has(p) && this->labeled_image_(p) != 0)
- {
- mln_site(L)
- p1 = my_anchors(this->components_, current_object, anchor),
- p2 = my_anchors(this->components_, this->labeled_image_(p),
- anchor);
- if (this->labeled_image_.domain().has(p2) && norm::l1_distance(p1.to_vec(), p2.to_vec()) < 300)
- {
- mln::draw::line(debug_, p1, p2, literal::red);
- }
-
-
- float
- angle = filter::internal::alignment_angle(this->components_,
- current_object,
- this->labeled_image_(p),
- anchor);
- angle = (angle * 180.0f) / math::pi;
- angle = angle * 20.0f + 1.0f;
- mln::draw::line(debug_angle_, p1, p2,
- value::rgb8(unsigned(angle),
- unsigned(angle),
- unsigned(angle)));
- }
- }
+ // if (_debug_)
+ // {
+ // if (this->labeled_image_.domain().has(p) && this->labeled_image_(p) != 0)
+ // {
+ // mln_site(L)
+ // p1 = link::internal::compute_anchor(this->components_,
+ // current_object, anchor),
+ // p2 = link::internal::compute_anchor(this->components_,
+ // this->labeled_image_(p),
+ // anchor);
+ // if (this->labeled_image_.domain().has(p2)
+ // && norm::l1_distance(p1.to_vec(), p2.to_vec()) < 300)
+ // {
+ // mln::draw::line(debug_, p1, p2, literal::red);
+ // }
+
+
+ // float
+ // angle = filter::internal::alignment_angle(this->components_,
+ // current_object,
+ // this->labeled_image_(p),
+ // anchor);
+ // angle = (angle * 180.0f) / math::pi;
+ // angle = angle * 20.0f + 1.0f;
+ // mln::draw::line(debug_angle_, p1, p2,
+ // value::rgb8(unsigned(angle),
+ // unsigned(angle),
+ // unsigned(angle)));
+ // }
+ // }
}
@@ -368,8 +307,8 @@ namespace scribo
anchor::Type anchor;
- mln_ch_value(L, value::rgb8) debug_;
- mln_ch_value(L, value::rgb8) debug_angle_;
+ // mln_ch_value(L, value::rgb8) debug_;
+ // mln_ch_value(L, value::rgb8) debug_angle_;
bool _debug_;
};
@@ -392,7 +331,8 @@ namespace scribo
float max_angle,
anchor::Type anchor,
bool debug)
- : super_(input, components, dmax, min_angle, max_angle, anchor, debug)
+ : super_(input, components, dmax, min_angle,
+ max_angle, anchor, debug)
{
}
@@ -427,7 +367,8 @@ namespace scribo
float max_angle,
anchor::Type anchor,
bool debug)
- : super_(input, components, dmax, min_angle, max_angle, anchor, debug)
+ : super_(input, components, dmax, min_angle,
+ max_angle, anchor, debug)
{
}
@@ -455,6 +396,8 @@ namespace scribo
mln_concrete(I)
separators_nonvisible(const Image<I>& in_)
{
+ trace::entering("scribo::primitive::extract::separators_nonvisible");
+
const I& in = exact(in_);
mln_precondition(in.is_valid());
typedef mln_value(I) Vi;
@@ -469,42 +412,35 @@ namespace scribo
util::timer t;
util::timer gt;
- // Load (OK)
- t.start();
- float t_ = t;
- std::cout << "Image loaded - " << t_ << std::endl;
-
gt.start();
- // Remove horizontal lines.
- t.restart();
+ // // Remove horizontal lines.
+ // t.restart();
- mln_concrete(I) hlines = primitive::extract::lines_h_pattern(in, 50, 3);
- mln_concrete(I) input = primitive::remove::separators(in, hlines);
+ // mln_concrete(I) hlines = primitive::extract::lines_h_pattern(in, 50, 3);
+ // mln_concrete(I) input = primitive::remove::separators(in, hlines);
- t_ = t;
- std::cout << "Horizontal lines removed - " << t_ << std::endl;
+ // t_ = t;
+ // std::cout << "Horizontal lines removed - " << t_ << std::endl;
// Closing structural - Connect characters.
- t.restart();
+ t.start();
win::hline2d vl(17);
- mln_concrete(I) input_clo = morpho::closing::structural(input, vl);
+ mln_concrete(I) input_clo = morpho::closing::structural(in, vl);
-// input_clo = scribo::preprocessing::rotate_90(input_clo, true);
-
- t_ = t;
+ float t_ = t;
std::cout << "closing_structural - " << t_ << std::endl;
- if (_debug_)
- {
- // Restore input orientation.
- input = scribo::preprocessing::rotate_90(input, false);
+ // if (_debug_)
+ // {
+ // // Restore input orientation.
+ // input = scribo::preprocessing::rotate_90(input, false);
- io::pbm::save(input_clo, "input_clo.pbm");
- }
+ // io::pbm::save(input_clo, "input_clo.pbm");
+ // }
// Rotate (OK)
t.restart();
@@ -526,9 +462,9 @@ namespace scribo
t_ = t;
std::cout << "extract::components - " << t_ << std::endl;
- if (_debug_)
- io::pgm::save(data::convert(value::int_u8(), components.labeled_image()),
- "lbl.pgm");
+ // if (_debug_)
+ // io::pgm::save(data::convert(value::int_u8(), components.labeled_image()),
+ // "lbl.pgm");
unsigned dmax = 5;
@@ -543,39 +479,42 @@ namespace scribo
{
// Right
internal::single_right_dmax_ratio_aligned_functor<L>
- functor(input_clo, components, dmax, min_angle, max_angle, anchor::Top, _debug_);
-// top_right = primitive::link::impl::compute_fastest(functor, anchor::Top);
- top_right = primitive::link::compute(functor, anchor::Top);
+ functor(input_clo, components, dmax, min_angle, max_angle,
+ anchor::TopStrictLeft, _debug_);
+// top_right = primitive::link::impl::compute_fastest(functor, anchor::TopStrictLeft);
+ top_right = primitive::link::compute(functor, anchor::TopStrictLeft);
t.stop();
- if (_debug_)
- {
- io::ppm::save(functor.debug_, "right_top.ppm");
- io::ppm::save(functor.debug_angle_, "right_top_angle.ppm");
- }
+ // if (_debug_)
+ // {
+ // io::ppm::save(functor.debug_, "right_top.ppm");
+ // io::ppm::save(functor.debug_angle_, "right_top_angle.ppm");
+ // }
t.resume();
// Left
internal::single_left_dmax_ratio_aligned_functor<L>
- lfunctor(input_clo, components, dmax, min_angle, max_angle, anchor::Top, _debug_);
- top_left = primitive::link::compute(lfunctor, anchor::Top);
+ lfunctor(input_clo, components, dmax, min_angle, max_angle,
+ anchor::TopStrictLeft, _debug_);
+ top_left = primitive::link::compute(lfunctor, anchor::TopStrictLeft);
t.stop();
- if (_debug_)
- {
- io::ppm::save(lfunctor.debug_, "left_top.ppm");
- io::ppm::save(lfunctor.debug_angle_, "left_top_angle.ppm");
+ // if (_debug_)
+ // {
+ // io::ppm::save(lfunctor.debug_, "left_top.ppm");
+ // io::ppm::save(lfunctor.debug_angle_, "left_top_angle.ppm");
- mln_ch_value(I, value::rgb8) output = duplicate(functor.debug_);
- data::paste((lfunctor.debug_ | (pw::value(lfunctor.debug_) != pw::cst(literal::black))) | (pw::value(lfunctor.debug_) != pw::cst(literal::white)), output);
+ // mln_ch_value(I, value::rgb8) output = duplicate(functor.debug_);
+ // data::paste((lfunctor.debug_ | (pw::value(lfunctor.debug_) != pw::cst(literal::black)))
+ // | (pw::value(lfunctor.debug_) != pw::cst(literal::white)), output);
- io::ppm::save(output, "left_right_top.ppm");
- }
+ // io::ppm::save(output, "left_right_top.ppm");
+ // }
t.resume();
}
@@ -585,105 +524,53 @@ namespace scribo
{
// Right
internal::single_right_dmax_ratio_aligned_functor<L>
- functor(input_clo, components, dmax, min_angle, max_angle, anchor::Bottom, _debug_);
- bot_right = primitive::link::compute(functor, anchor::Bottom);
+ functor(input_clo, components, dmax, min_angle, max_angle,
+ anchor::BottomStrictRight, _debug_);
+ bot_right = primitive::link::compute(functor, anchor::BottomStrictRight);
t.stop();
- if (_debug_)
- {
- io::ppm::save(functor.debug_, "right_bot.ppm");
- io::ppm::save(functor.debug_angle_, "right_bot_angle.ppm");
- }
+ // if (_debug_)
+ // {
+ // io::ppm::save(functor.debug_, "right_bot.ppm");
+ // io::ppm::save(functor.debug_angle_, "right_bot_angle.ppm");
+ // }
t.resume();
// Left
internal::single_left_dmax_ratio_aligned_functor<L>
- lfunctor(input_clo, components, dmax, min_angle, max_angle, anchor::Bottom, _debug_);
- bot_left = primitive::link::compute(lfunctor, anchor::Bottom);
+ lfunctor(input_clo, components, dmax, min_angle, max_angle,
+ anchor::BottomStrictRight, _debug_);
+ bot_left = primitive::link::compute(lfunctor, anchor::BottomStrictRight);
t.stop();
- if (_debug_)
- {
- io::ppm::save(lfunctor.debug_, "left_bot.ppm");
- io::ppm::save(lfunctor.debug_angle_, "left_bot_angle.ppm");
- }
+ // if (_debug_)
+ // {
+ // io::ppm::save(lfunctor.debug_, "left_bot.ppm");
+ // io::ppm::save(lfunctor.debug_angle_, "left_bot_angle.ppm");
- if (_debug_)
- {
- mln_ch_value(I, value::rgb8) output = duplicate(functor.debug_);
- data::paste((lfunctor.debug_ | (pw::value(lfunctor.debug_) != pw::cst(literal::black))) | (pw::value(lfunctor.debug_) != pw::cst(literal::white)), output);
+ // mln_ch_value(I, value::rgb8) output = duplicate(functor.debug_);
+ // data::paste((lfunctor.debug_ | (pw::value(lfunctor.debug_) != pw::cst(literal::black)))
+ // | (pw::value(lfunctor.debug_) != pw::cst(literal::white)), output);
- io::ppm::save(output, "left_right_bot.ppm");
- }
+ // io::ppm::save(output, "left_right_bot.ppm");
+ // }
}
t_ = t;
std::cout << "links - " << t_ << std::endl;
-
+ // Merge links and build CC groups
t.restart();
- std::cout << "group - top" << std::endl;
object_groups<L>
top_groups = primitive::group::from_double_link_any(top_left, top_right);
- std::cout << "group - bot" << std::endl;
object_groups<L>
bot_groups = primitive::group::from_double_link_any(bot_left, bot_right);
t_ = t;
std::cout << "group - " << t_ << std::endl;
- t.restart();
- util::array<accu::shape::bbox<point2d> >
- btop_accu(top_groups.nelements()),
- bbot_accu(bot_groups.nelements());
-
-
- for_all_groups(c, top_groups)
- {
- btop_accu(top_groups(c)).take(components(c).bbox());
- bbot_accu(bot_groups(c)).take(components(c).bbox());
- }
- t_ = t;
- std::cout << "groups to group bboxes - " << t_ << std::endl;
-
-
-
- if (_debug_)
- {
-
- mln_ch_value(I, value::rgb8)
- wo_filtering = data::convert(value::rgb8(), input);
-
- for_all_comp_data(d, btop_accu)
- {
- if (btop_accu(d).is_valid())
- {
- mln::draw::line(wo_filtering,
- btop_accu(d).to_result().pmin(),
- point2d(btop_accu(d).to_result().pmin().row(),
- btop_accu(d).to_result().pmax().col()),
- literal::green);
-
- }
- }
-
- for_all_comp_data(d, bbot_accu)
- {
- if (bbot_accu(d).is_valid())
- {
- mln::draw::line(wo_filtering,
- point2d(bbot_accu(d).to_result().pmax().row(),
- bbot_accu(d).to_result().pmin().col()),
- bbot_accu(d).to_result().pmax(),
- literal::green);
- }
-
- }
- io::ppm::save(wo_filtering, "wo_filtering.ppm");
- }
-
-
+ // Filter CC groups
t.restart();
top_groups = filter::object_groups_small(top_groups, min_card);
bot_groups = filter::object_groups_small(bot_groups, min_card);
@@ -691,7 +578,7 @@ namespace scribo
std::cout << "small groups - " << t_ << std::endl;
-
+ // Compute group bboxes
t.restart();
util::array<accu::shape::bbox<point2d> >
top_accu(top_groups.nelements()),
@@ -708,78 +595,35 @@ namespace scribo
-
-
-
-
-
t.restart();
mln_concrete(I) separators;
initialize(separators, input_clo);
+
+ // FIXME: any way to fill border AND data at the same time?
data::fill(separators, false);
+ extension::fill(separators, false);
+
t_ = t;
std::cout << "Initialize separators image - " << t_ << std::endl;
- mln_ch_value(I, value::rgb8) both;
-
- if (_debug_)
- both = data::convert(value::rgb8(), input);
-
-
-
t.restart();
for_all_comp_data(d, top_accu)
{
- if (top_accu(d).is_valid() || btop_accu(d).is_valid())
+ if (top_accu(d).is_valid())
{
- if (top_accu(d).is_valid())
- {
- if (_debug_)
- mln::draw::line(both,
- top_accu(d).to_result().pmin(),
- point2d(top_accu(d).to_result().pmin().row(),
- top_accu(d).to_result().pmax().col()),
- literal::green);
-
- mln::draw::line(separators,
- top_accu(d).to_result().pmin(),
- point2d(top_accu(d).to_result().pmin().row(),
- top_accu(d).to_result().pmax().col()),
- true);
- }
- else
- if (_debug_ && btop_accu(d).is_valid())
- mln::draw::line(both,
- btop_accu(d).to_result().pmin(),
- point2d(btop_accu(d).to_result().pmin().row(),
- btop_accu(d).to_result().pmax().col()),
- literal::yellow);
-
+ mln::draw::line(separators,
+ top_accu(d).to_result().pmin(),
+ point2d(top_accu(d).to_result().pmin().row(),
+ top_accu(d).to_result().pmax().col()),
+ true);
}
- if (bot_accu(d).is_valid() || bbot_accu(d).is_valid())
+ if (bot_accu(d).is_valid())
{
- if (bot_accu(d).is_valid())
- {
- if (_debug_)
- mln::draw::line(both,
- point2d(bot_accu(d).to_result().pmax().row(),
- bot_accu(d).to_result().pmin().col()),
- bot_accu(d).to_result().pmax(),
- literal::green);
-
- mln::draw::line(separators,
- point2d(bot_accu(d).to_result().pmax().row(),
- bot_accu(d).to_result().pmin().col()),
- bot_accu(d).to_result().pmax(),
- true);
- }
- else
- if (_debug_ && bbot_accu(d).is_valid())
- mln::draw::line(both,
- point2d(bbot_accu(d).to_result().pmax().row(),
- bbot_accu(d).to_result().pmin().col()),
- bbot_accu(d).to_result().pmax(),
- literal::yellow);
+ mln::draw::line(separators,
+ point2d(bot_accu(d).to_result().pmax().row(),
+ bot_accu(d).to_result().pmin().col()),
+ bot_accu(d).to_result().pmax(),
+ true);
}
}
@@ -787,22 +631,112 @@ namespace scribo
std::cout << "Drawing output image - " << t_ << std::endl;
- if (_debug_)
- {
- io::ppm::save(both, "both.ppm");
- io::pbm::save(separators, "separators.pbm");
- }
+ // if (_debug_)
+ // {
+ // // Restore input orientation.
+ // mln_concrete(I) input = scribo::preprocessing::rotate_90(in, false);
+
+
+ // // Debug group bboxes (includes all bboxes before filtering)
+ // util::array<accu::shape::bbox<point2d> >
+ // btop_accu(top_groups.nelements()),
+ // bbot_accu(bot_groups.nelements());
+
+
+ // for_all_groups(c, top_groups)
+ // {
+ // btop_accu(top_groups(c)).take(components(c).bbox());
+ // bbot_accu(bot_groups(c)).take(components(c).bbox());
+ // }
+
+ // mln_ch_value(I, value::rgb8)
+ // wo_filtering = data::convert(value::rgb8(), input);
+
+ // for_all_comp_data(d, btop_accu)
+ // {
+ // if (btop_accu(d).is_valid())
+ // {
+ // mln::draw::line(wo_filtering,
+ // btop_accu(d).to_result().pmin(),
+ // point2d(btop_accu(d).to_result().pmin().row(),
+ // btop_accu(d).to_result().pmax().col()),
+ // literal::green);
+
+ // }
+ // }
+
+ // for_all_comp_data(d, bbot_accu)
+ // {
+ // if (bbot_accu(d).is_valid())
+ // {
+ // mln::draw::line(wo_filtering,
+ // point2d(bbot_accu(d).to_result().pmax().row(),
+ // bbot_accu(d).to_result().pmin().col()),
+ // bbot_accu(d).to_result().pmax(),
+ // literal::green);
+ // }
+
+ // }
+ // io::ppm::save(wo_filtering, "wo_filtering.ppm");
+
+ // mln_ch_value(I, value::rgb8) both = data::convert(value::rgb8(), input);
+
+ // for_all_comp_data(d, top_accu)
+ // {
+ // if (top_accu(d).is_valid() || btop_accu(d).is_valid())
+ // {
+ // if (top_accu(d).is_valid())
+ // {
+ // mln::draw::line(both,
+ // top_accu(d).to_result().pmin(),
+ // point2d(top_accu(d).to_result().pmin().row(),
+ // top_accu(d).to_result().pmax().col()),
+ // literal::green);
+ // }
+ // else
+ // if (btop_accu(d).is_valid())
+ // mln::draw::line(both,
+ // btop_accu(d).to_result().pmin(),
+ // point2d(btop_accu(d).to_result().pmin().row(),
+ // btop_accu(d).to_result().pmax().col()),
+ // literal::yellow);
+
+ // }
+ // if (bot_accu(d).is_valid() || bbot_accu(d).is_valid())
+ // {
+ // if (bot_accu(d).is_valid())
+ // {
+ // mln::draw::line(both,
+ // point2d(bot_accu(d).to_result().pmax().row(),
+ // bot_accu(d).to_result().pmin().col()),
+ // bot_accu(d).to_result().pmax(),
+ // literal::green);
+ // }
+ // else
+ // if (bbot_accu(d).is_valid())
+ // mln::draw::line(both,
+ // point2d(bbot_accu(d).to_result().pmax().row(),
+ // bbot_accu(d).to_result().pmin().col()),
+ // bbot_accu(d).to_result().pmax(),
+ // literal::yellow);
+ // }
+
+ // }
+
+ // io::ppm::save(both, "both.ppm");
+ // io::pbm::save(separators, "separators.pbm");
+ // }
// Hit or miss
{
- if (_debug_)
- {
- mln_concrete(I) input_with_seps = duplicate(input_clo);
- data::paste(separators | pw::value(separators), input_with_seps);
+ // if (_debug_)
+ // {
+ // mln_concrete(I) input_with_seps = duplicate(input_clo);
+ // data::paste(separators | pw::value(separators), input_with_seps);
- io::pbm::save(input_with_seps, "input_with_seps.pbm");
- }
+ // io::pbm::save(input_with_seps, "input_with_seps.pbm");
+ // }
t.restart();
unsigned length = 25;
@@ -819,11 +753,12 @@ namespace scribo
typedef mln_ch_value(I,unsigned) J;
J tmp = accu::transform_line(accu, input_clo, length, 1);
+
t_ = t;
std::cout << "* accu::transform_line - " << t_ << std::endl;
- if (_debug_)
- io::pgm::save(data::convert(value::int_u8(), tmp), "tmp.pgm");
+ // if (_debug_)
+ // io::pgm::save(data::convert(value::int_u8(), tmp), "tmp.pgm");
t.restart();
@@ -841,25 +776,53 @@ namespace scribo
unsigned invalid_ratio = unsigned(length * 0.30f);
- mln_piter(I) p(separators.domain());
- for_all(p)
- if (separators(p))
- {
- unsigned lbl = sep_lbl(p);
+ extension::adjust_fill(tmp, 21, 0);
+
+ value::int_u8 *sep_lbl_ptr = sep_lbl.buffer() + sep_lbl.index_of_point(sep_lbl.domain().pmin());
+ bool *separators_ptr = separators.buffer() + separators.index_of_point(separators.domain().pmin());
+ unsigned *tmp_ptr = tmp.buffer() + tmp.index_of_point(tmp.domain().pmin());;
+ int idx1 = tmp.delta_index(dp1);
+ int idx2 = tmp.delta_index(dp2);
+
+ unsigned nrows = separators.nrows();
+ unsigned ncols = separators.ncols();
- unsigned
- top_count = tmp(p + dp1),
- bot_count = tmp(p + dp2);
+ unsigned row_idx_sep_lbl = sep_lbl.delta_index(dpoint2d(+1, - ncols));
+ unsigned row_idx_separators = separators.delta_index(dpoint2d(+1, - ncols));
+ unsigned row_idx_tmp = tmp.delta_index(dpoint2d(+1, - ncols));
- // This site is wrapped between two lines of text so we don't
- // want it.
- if (top_count >= invalid_ratio + 1
- && bot_count >= invalid_ratio + 1)
+ for (unsigned row = 0; row < nrows; ++row)
+ {
+ for (unsigned col = 0; col < ncols; ++col)
+ {
+ if (*separators_ptr)
{
- relbl(lbl) = false;
+ unsigned lbl = *sep_lbl_ptr;
+
+ unsigned
+ top_count = *(tmp_ptr + idx1),
+ bot_count = *(tmp_ptr + idx2);
+
+ // This site is wrapped between two lines of text so we don't
+ // want it.
+ if (top_count >= invalid_ratio + 1
+ && bot_count >= invalid_ratio + 1)
+ {
+ relbl(lbl) = false;
+ }
}
+
+ ++tmp_ptr;
+ ++sep_lbl_ptr;
+ ++separators_ptr;
}
+ tmp_ptr += row_idx_tmp;
+ sep_lbl_ptr += row_idx_sep_lbl;
+ separators_ptr += row_idx_separators;
+ }
+
+
t_ = t;
std::cout << "* reading data - " << t_ << std::endl;
@@ -870,37 +833,35 @@ namespace scribo
mln_concrete(I) output = data::convert(bool(), sep_lbl);
- if (_debug_)
- {
- io::pbm::save(output, "separators_hom.pbm");
- io::pbm::save(separators, "separators_filtered.pbm");
- }
+ // if (_debug_)
+ // {
+ // io::pbm::save(output, "separators_hom.pbm");
+ // io::pbm::save(separators, "separators_filtered.pbm");
- t.restart();
- value::int_u16 ncomps;
- component_set<L> comps = primitive::extract::components(output, c8(), ncomps);
- mln_ch_value(I, value::rgb8) both;
+ // // value::int_u16 ncomps;
+ // // component_set<L> comps = primitive::extract::components(output, c8(), ncomps);
+ // // mln_ch_value(I, value::rgb8) both;
- both = data::convert(value::rgb8(), input);
+ // // both = data::convert(value::rgb8(), input);
- // Needed since the rotated image origin is (0,0). Rotation does
- // not preserve rotated coordinates.
- dpoint2d dp(input.domain().pcenter() - input_clo.domain().pcenter());
+ // // // Needed since the rotated image origin is (0,0).
+ // // dpoint2d dp(input.domain().pcenter() - input_clo.domain().pcenter());
+
+ // // for_all_comps(c, comps)
+ // // {
+ // // box2d b = geom::rotate(comps(c).bbox(), -90, input_clo.domain().pcenter());
+ // // mln::draw::line(both,
+ // // b.pmin() + dp,
+ // // b.pmax() + dp,
+ // // literal::green);
+ // // }
+ // }
- for_all_comps(c, comps)
- {
- box2d b = geom::rotate(comps(c).bbox(), -90, input_clo.domain().pcenter());
- mln::draw::line(both,
- b.pmin() + dp,
- b.pmax() + dp,
- literal::green);
- }
- t_ = t;
- std::cout << "Output image - " << t_ << std::endl;
gt.stop();
t_ = gt;
- std::cout << "Total time: " << t_ << std::endl;
+ std::cout << "Non visible separators: " << t_ << std::endl;
+ trace::exiting("scribo::primitive::extract::separators_nonvisible");
return scribo::preprocessing::rotate_90(output, true);
}
}
diff --git a/scribo/scribo/primitive/link/internal/compute_anchor.hh b/scribo/scribo/primitive/link/internal/compute_anchor.hh
index 1c52b91..c1a9106 100644
--- a/scribo/scribo/primitive/link/internal/compute_anchor.hh
+++ b/scribo/scribo/primitive/link/internal/compute_anchor.hh
@@ -1,5 +1,5 @@
-// Copyright (C) 2009, 2010 EPITA Research and Development Laboratory
-// (LRDE)
+// Copyright (C) 2009, 2010, 2011 EPITA Research and Development
+// Laboratory (LRDE)
//
// This file is part of Olena.
//
@@ -88,10 +88,8 @@ namespace scribo
{
typedef mln_site(L) P;
- unsigned h = components(current_object).bbox().pmax().row()
- - components(current_object).bbox().pmin().row();
- unsigned w = components(current_object).bbox().pmax().col()
- - components(current_object).bbox().pmin().col();
+ unsigned h = components(current_object).bbox().height();
+ unsigned w = components(current_object).bbox().width();
mln_site(L) sp = components(current_object).bbox().pcenter();
@@ -113,6 +111,22 @@ namespace scribo
break;
+ // Bounding box top left
+ case anchor::TopStrictLeft:
+ sp.col() = components(current_object).bbox().pmin().col();
+ sp.row() = components(current_object).bbox().pmin().row()
+ + math::min(2u, (h + 1) / 2 - 1);
+ break;
+
+
+ // Bounding box bottom right
+ case anchor::BottomStrictRight:
+ sp.col() = components(current_object).bbox().pmax().col();
+ sp.row() = components(current_object).bbox().pmax().row()
+ - math::min(2u, (h + 1) / 2 - 1);
+ break;
+
+
// Bounding box bottom center
case anchor::Bottom:
if (h < 30)
diff --git a/scribo/scribo/toolchain/internal/content_in_doc_functor.hh b/scribo/scribo/toolchain/internal/content_in_doc_functor.hh
index ed691e8..48098ba 100644
--- a/scribo/scribo/toolchain/internal/content_in_doc_functor.hh
+++ b/scribo/scribo/toolchain/internal/content_in_doc_functor.hh
@@ -34,11 +34,11 @@
# include <scribo/core/line_set.hh>
# include <scribo/core/paragraph_set.hh>
-# include <scribo/primitive/extract/elements.hh>
+# include <scribo/primitive/extract/non_text.hh>
# include <scribo/primitive/extract/components.hh>
-# include <scribo/primitive/extract/vertical_separators.hh>
+//# include <scribo/primitive/extract/vertical_separators.hh>
+# include <scribo/primitive/extract/separators.hh>
# include <scribo/primitive/extract/separators_nonvisible.hh>
-# include <scribo/primitive/extract/elements.hh>
# include <scribo/primitive/identify.hh>
@@ -168,16 +168,17 @@ namespace scribo
input_cleaned = exact(processed_image);
if (enable_line_seps)
{
- on_new_progress_label("Find vertical separators...");
+ on_new_progress_label("Find vertical and horizontal separators...");
- // Vertical separators
- separators = primitive::extract::vertical_separators(processed_image, 81);
+ // Vertical and horizontal separators
+ separators = primitive::extract::separators(processed_image, 81);
on_progress();
on_new_progress_label("Remove separators...");
input_cleaned = primitive::remove::separators(processed_image, separators);
+ doc.set_line_separators(separators);
on_progress();
}
--
1.5.6.5
1
0
* scribo/core/macros.hh: Update comments.
* scribo/text/merging.hh: Add comments and fix line data swap.
* scribo/text/recognition.hh: Make use of is_textline.
* src/text_in_picture.cc: Initialize ImageMagick.
---
scribo/ChangeLog | 12 ++++++++++++
scribo/scribo/core/macros.hh | 7 +++----
scribo/scribo/text/merging.hh | 6 +++---
scribo/scribo/text/recognition.hh | 2 +-
4 files changed, 19 insertions(+), 8 deletions(-)
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index d783c5c..0aa4ba2 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,5 +1,17 @@
2011-02-17 Guillaume Lazzara <z(a)lrde.epita.fr>
+ Small fixes in Scribo.
+
+ * scribo/core/macros.hh: Update comments.
+
+ * scribo/text/merging.hh: Add comments and fix line data swap.
+
+ * scribo/text/recognition.hh: Make use of is_textline.
+
+ * src/text_in_picture.cc: Initialize ImageMagick.
+
+2011-02-17 Guillaume Lazzara <z(a)lrde.epita.fr>
+
Improve and cleanup whitespace separator detection.
* scribo/core/tag/anchor.hh: Add new anchors.
diff --git a/scribo/scribo/core/macros.hh b/scribo/scribo/core/macros.hh
index 887539f..c6de1ff 100644
--- a/scribo/scribo/core/macros.hh
+++ b/scribo/scribo/core/macros.hh
@@ -1,5 +1,5 @@
-// Copyright (C) 2009, 2010 EPITA Research and Development Laboratory
-// (LRDE)
+// Copyright (C) 2009, 2010, 2011 EPITA Research and Development
+// Laboratory (LRDE)
//
// This file is part of Olena.
//
@@ -33,8 +33,6 @@
# define for_all_elements(E, S) \
for (unsigned E = 0; E < S.nelements(); ++E)
-
-// FIXME: we want to replace previous macros by these ones.
# define for_all_comps(C, S) \
for (unsigned C = 1; C <= S.nelements(); ++C)
@@ -56,6 +54,7 @@
# define for_all_line_comps(E, S) \
for_all_elements(E, S)
+// Internal use only.
# define for_all_lines_info(E, S) \
for_all_comp_data(E, S)
diff --git a/scribo/scribo/text/merging.hh b/scribo/scribo/text/merging.hh
index 3087465..f433e51 100644
--- a/scribo/scribo/text/merging.hh
+++ b/scribo/scribo/text/merging.hh
@@ -192,7 +192,7 @@ namespace scribo
{
// we transfer data from the largest item to the root one.
scribo::line_info<L> tmp = lines(l1);
- lines(l1) = lines(l2);
+ std::swap(lines(l1), lines(l2));
lines(l1).fast_merge(tmp);
// We must set manually the tag for lines(l2) since it is
@@ -504,8 +504,8 @@ namespace scribo
void
one_merge_pass(unsigned ith_pass,
const box2d& domain,
- std::vector<scribo::line_id_t>& v,
- scribo::line_set<L>& lines,
+ std::vector<scribo::line_id_t>& v, // Ids sorted by bbox size.
+ scribo::line_set<L>& lines, // Tagged Lines (looks_like_a_text_line?)
mln::util::array<unsigned>& parent)
{
image2d<unsigned> billboard(domain);
diff --git a/scribo/scribo/text/recognition.hh b/scribo/scribo/text/recognition.hh
index 59f269e..3a9742b 100644
--- a/scribo/scribo/text/recognition.hh
+++ b/scribo/scribo/text/recognition.hh
@@ -127,7 +127,7 @@ namespace scribo
/// Use text bboxes with Tesseract
for_all_lines(i, lines)
{
- if (! lines(i).is_valid() || lines(i).is_hidden() || lines(i).type() != line::Text)
+ if (! lines(i).is_textline())
continue;
mln_domain(I) box = lines(i).bbox();
--
1.5.6.5
1
0
* src/primitive/extract/Makefile.am,
* src/primitive/remove/Makefile.am: Add new targets.
* src/primitive/extract/separators_nonvisible.cc,
* src/primitive/remove/separators.cc: New.
---
scribo/ChangeLog | 10 ++++++++++
scribo/src/primitive/extract/Makefile.am | 2 ++
.../extract/separators_nonvisible.cc} | 19 ++++++++++---------
scribo/src/primitive/{ => remove}/Makefile.am | 11 ++++++-----
.../negate.cc => primitive/remove/separators.cc} | 20 ++++++++++++--------
5 files changed, 40 insertions(+), 22 deletions(-)
copy scribo/src/{misc/negate.cc => primitive/extract/separators_nonvisible.cc} (79%)
copy scribo/src/primitive/{ => remove}/Makefile.am (82%)
copy scribo/src/{misc/negate.cc => primitive/remove/separators.cc} (76%)
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index 0aa4ba2..8b7ad7f 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,5 +1,15 @@
2011-02-17 Guillaume Lazzara <z(a)lrde.epita.fr>
+ Add new tools in Scribo.
+
+ * src/primitive/extract/Makefile.am,
+ * src/primitive/remove/Makefile.am: Add new targets.
+
+ * src/primitive/extract/separators_nonvisible.cc,
+ * src/primitive/remove/separators.cc: New.
+
+2011-02-17 Guillaume Lazzara <z(a)lrde.epita.fr>
+
Small fixes in Scribo.
* scribo/core/macros.hh: Update comments.
diff --git a/scribo/src/primitive/extract/Makefile.am b/scribo/src/primitive/extract/Makefile.am
index a46cd68..22d6bfd 100644
--- a/scribo/src/primitive/extract/Makefile.am
+++ b/scribo/src/primitive/extract/Makefile.am
@@ -22,6 +22,7 @@ noinst_PROGRAMS = \
discontinued_lines \
discontinued_vlines \
discontinued_hlines \
+ separators_nonvisible \
thick_vlines \
thick_hlines \
lines_pattern
@@ -29,6 +30,7 @@ noinst_PROGRAMS = \
discontinued_lines_SOURCES = discontinued_lines.cc
discontinued_vlines_SOURCES = discontinued_vlines.cc
discontinued_hlines_SOURCES = discontinued_hlines.cc
+separators_nonvisible_SOURCES = separators_nonvisible.cc
thick_vlines_SOURCES = thick_vlines.cc
thick_hlines_SOURCES = thick_hlines.cc
lines_pattern_SOURCES = lines_pattern.cc
diff --git a/scribo/src/misc/negate.cc b/scribo/src/primitive/extract/separators_nonvisible.cc
similarity index 79%
copy from scribo/src/misc/negate.cc
copy to scribo/src/primitive/extract/separators_nonvisible.cc
index da6fad6..82d4787 100644
--- a/scribo/src/misc/negate.cc
+++ b/scribo/src/primitive/extract/separators_nonvisible.cc
@@ -1,5 +1,4 @@
-// Copyright (C) 2009, 2010 EPITA Research and Development Laboratory
-// (LRDE)
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
//
// This file is part of Olena.
//
@@ -25,35 +24,37 @@
// executable file might be covered by the GNU General Public License.
#include <mln/core/image/image2d.hh>
-#include <mln/logical/not.hh>
#include <mln/io/pbm/all.hh>
+#include <mln/data/convert.hh>
+#include <scribo/primitive/extract/separators_nonvisible2.hh>
#include <scribo/debug/usage.hh>
-
const char *args_desc[][2] =
{
{ "input.pbm", "A binary image." },
+ { "output.pbm", "Output image." },
{0, 0}
};
int main(int argc, char *argv[])
{
- mln::trace::entering("main");
using namespace mln;
+ using namespace scribo;
if (argc != 3)
return scribo::debug::usage(argv,
- "Negate a binary image",
+ "Extract non visible separators (whitespaces)",
"input.pbm output.pbm",
args_desc);
+ trace::entering("main");
+
image2d<bool> input;
io::pbm::load(input, argv[1]);
- io::pbm::save(logical::not_(input), argv[2]);
-
- mln::trace::exiting("main");
+ io::pbm::save(primitive::extract::separators_nonvisible(input), argv[2]);
+ trace::exiting("main");
}
diff --git a/scribo/src/primitive/Makefile.am b/scribo/src/primitive/remove/Makefile.am
similarity index 82%
copy from scribo/src/primitive/Makefile.am
copy to scribo/src/primitive/remove/Makefile.am
index 7e46a66..a673886 100644
--- a/scribo/src/primitive/Makefile.am
+++ b/scribo/src/primitive/remove/Makefile.am
@@ -1,4 +1,4 @@
-# Copyright (C) 2009 EPITA Research and Development Laboratory (LRDE).
+# Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE).
#
# This file is part of Olena.
#
@@ -16,7 +16,8 @@
include $(top_srcdir)/scribo/scribo.mk
-SUBDIRS = \
- extract \
- find \
- group
+
+noinst_PROGRAMS = \
+ separators
+
+separators_SOURCES = separators.cc
diff --git a/scribo/src/misc/negate.cc b/scribo/src/primitive/remove/separators.cc
similarity index 76%
copy from scribo/src/misc/negate.cc
copy to scribo/src/primitive/remove/separators.cc
index da6fad6..46e977f 100644
--- a/scribo/src/misc/negate.cc
+++ b/scribo/src/primitive/remove/separators.cc
@@ -1,5 +1,4 @@
-// Copyright (C) 2009, 2010 EPITA Research and Development Laboratory
-// (LRDE)
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
//
// This file is part of Olena.
//
@@ -25,35 +24,40 @@
// executable file might be covered by the GNU General Public License.
#include <mln/core/image/image2d.hh>
-#include <mln/logical/not.hh>
#include <mln/io/pbm/all.hh>
+#include <mln/data/convert.hh>
+#include <scribo/primitive/extract/separators.hh>
+#include <scribo/primitive/remove/separators.hh>
#include <scribo/debug/usage.hh>
-
const char *args_desc[][2] =
{
{ "input.pbm", "A binary image." },
+ { "output.pbm", "Output image." },
{0, 0}
};
int main(int argc, char *argv[])
{
- mln::trace::entering("main");
using namespace mln;
+ using namespace scribo;
if (argc != 3)
return scribo::debug::usage(argv,
- "Negate a binary image",
+ "Remove visible separators",
"input.pbm output.pbm",
args_desc);
+ trace::entering("main");
+
image2d<bool> input;
io::pbm::load(input, argv[1]);
- io::pbm::save(logical::not_(input), argv[2]);
+ image2d<bool> seps = primitive::extract::separators(input, 81);
- mln::trace::exiting("main");
+ io::pbm::save(primitive::remove::separators(input, seps), argv[2]);
+ trace::exiting("main");
}
--
1.5.6.5
1
0

last-svn-commit-775-gfd62480 configure.ac: scribo/src/primitive/remove.
by Guillaume Lazzara 28 Mar '11
by Guillaume Lazzara 28 Mar '11
28 Mar '11
---
ChangeLog | 4 ++++
configure.ac | 1 +
2 files changed, 5 insertions(+), 0 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 9fedefa..30f8b74 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,9 @@
2011-02-17 Guillaume Lazzara <z(a)lrde.epita.fr>
+ * configure.ac: scribo/src/primitive/remove.
+
+2011-02-17 Guillaume Lazzara <z(a)lrde.epita.fr>
+
* configure.ac: configure scribo/tests/convert.
2011-03-14 Thierry GERAUD <thierry.geraud(a)lrde.epita.fr>
diff --git a/configure.ac b/configure.ac
index e30f010..44f359b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -332,6 +332,7 @@ AC_CONFIG_FILES([
scribo/src/primitive/extract/Makefile
scribo/src/primitive/find/Makefile
scribo/src/primitive/group/Makefile
+ scribo/src/primitive/remove/Makefile
scribo/src/table/Makefile
scribo/src/text/Makefile
scribo/src/toolchain/Makefile
--
1.5.6.5
1
0

last-svn-commit-776-g8c287df Set component type during component extraction.
by Guillaume Lazzara 28 Mar '11
by Guillaume Lazzara 28 Mar '11
28 Mar '11
* scribo/core/component_info.hh,
* scribo/core/component_set.hh,
* scribo/core/document.hh,
* scribo/core/tag/component.hh,
* scribo/primitive/extract/components.hh,
* scribo/primitive/identify.hh: Explicitly set component type to
Separator when extracting separator components.
---
scribo/ChangeLog | 12 ++++++
scribo/scribo/core/component_info.hh | 8 ++-
scribo/scribo/core/component_set.hh | 50 ++++++++++++++++---------
scribo/scribo/core/document.hh | 6 ++-
scribo/scribo/core/tag/component.hh | 20 ++++++---
scribo/scribo/primitive/extract/components.hh | 18 ++++++---
scribo/scribo/primitive/identify.hh | 2 +-
7 files changed, 79 insertions(+), 37 deletions(-)
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index 8b7ad7f..330338a 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,3 +1,15 @@
+2011-03-01 Guillaume Lazzara <z(a)lrde.epita.fr>
+
+ Set component type during component extraction.
+
+ * scribo/core/component_info.hh,
+ * scribo/core/component_set.hh,
+ * scribo/core/document.hh,
+ * scribo/core/tag/component.hh,
+ * scribo/primitive/extract/components.hh,
+ * scribo/primitive/identify.hh: Explicitly set component type to
+ Separator when extracting separator components.
+
2011-02-17 Guillaume Lazzara <z(a)lrde.epita.fr>
Add new tools in Scribo.
diff --git a/scribo/scribo/core/component_info.hh b/scribo/scribo/core/component_info.hh
index 1b03318..6fc73f8 100644
--- a/scribo/scribo/core/component_info.hh
+++ b/scribo/scribo/core/component_info.hh
@@ -53,7 +53,8 @@ namespace scribo
component_info(const component_id_t& id,
const mln::box2d& bbox,
const mln::point2d& mass_center,
- unsigned card);
+ unsigned card,
+ component::Type type = component::Undefined);
component_id_t id() const;
const mln::box2d& bbox() const;
@@ -101,9 +102,10 @@ namespace scribo
component_info::component_info(const component_id_t& id,
const mln::box2d& bbox,
const mln::point2d& mass_center,
- unsigned card)
+ unsigned card,
+ component::Type type)
: id_(id), bbox_(bbox), mass_center_(mass_center), card_(card),
- tag_(component::None), type_(component::Undefined)
+ tag_(component::None), type_(type)
{
}
diff --git a/scribo/scribo/core/component_set.hh b/scribo/scribo/core/component_set.hh
index 7ddcf16..442e8d6 100644
--- a/scribo/scribo/core/component_set.hh
+++ b/scribo/scribo/core/component_set.hh
@@ -86,16 +86,20 @@ namespace scribo
component_set_data();
component_set_data(const L& ima, const mln_value(L)& ncomps);
component_set_data(const L& ima, const mln_value(L)& ncomps,
- const mln::util::array<pair_accu_t>& attribs);
+ const mln::util::array<pair_accu_t>& attribs,
+ component::Type type = component::Undefined);
component_set_data(const L& ima, const mln_value(L)& ncomps,
- const mln::util::array<pair_data_t>& attribs);
+ const mln::util::array<pair_data_t>& attribs,
+ component::Type type = component::Undefined);
component_set_data(const L& ima, const mln_value(L)& ncomps,
const mln::util::array<scribo::component_info>& infos);
- void fill_infos(const mln::util::array<pair_accu_t>& attribs);
+ void fill_infos(const mln::util::array<pair_accu_t>& attribs,
+ component::Type type = component::Undefined);
- void fill_infos(const mln::util::array<pair_data_t>& attribs);
+ void fill_infos(const mln::util::array<pair_data_t>& attribs,
+ component::Type type = component::Undefined);
// Useful while constructing incrementaly (XML loading).
void soft_init(const mln_value(L) ncomps);
@@ -141,10 +145,12 @@ namespace scribo
/// Constructor from an image \p ima, the number of labels \p ncomps and
/// attributes values (bounding box and mass center).
component_set(const L& ima, const mln_value(L)& ncomps,
- const mln::util::array<pair_accu_t>& attribs);
+ const mln::util::array<pair_accu_t>& attribs,
+ component::Type type = component::Undefined);
component_set(const L& ima, const mln_value(L)& ncomps,
- const mln::util::array<pair_data_t>& attribs);
+ const mln::util::array<pair_data_t>& attribs,
+ component::Type type = component::Undefined);
/// @}
/// Return the component count.
@@ -284,26 +290,28 @@ namespace scribo
inline
component_set_data<L>::component_set_data(const L& ima,
const mln_value(L)& ncomps,
- const mln::util::array<pair_accu_t>& attribs)
+ const mln::util::array<pair_accu_t>& attribs,
+ component::Type type)
: ima_(ima), ncomps_(ncomps)
{
initialize(separators_, ima); // FIXME: do we really want that?
mln::data::fill(separators_, false);
- fill_infos(attribs);
+ fill_infos(attribs, type);
}
template <typename L>
inline
component_set_data<L>::component_set_data(const L& ima,
const mln_value(L)& ncomps,
- const mln::util::array<pair_data_t>& attribs)
+ const mln::util::array<pair_data_t>& attribs,
+ component::Type type)
: ima_(ima), ncomps_(ncomps)
{
initialize(separators_, ima); // FIXME: do we really want that?
mln::data::fill(separators_, false);
- fill_infos(attribs);
+ fill_infos(attribs, type);
}
template <typename L>
@@ -321,7 +329,8 @@ namespace scribo
template <typename L>
inline
void
- component_set_data<L>::fill_infos(const mln::util::array<pair_accu_t>& attribs)
+ component_set_data<L>::fill_infos(const mln::util::array<pair_accu_t>& attribs,
+ component::Type type)
{
typedef mln_site(L) P;
@@ -331,7 +340,8 @@ namespace scribo
for_all_comp_data(i, attribs)
{
component_info info(i, attribs[i].first(),
- attribs[i].second(), attribs[i].second_accu().nsites());
+ attribs[i].second(), attribs[i].second_accu().nsites(),
+ type);
infos_.append(info);
}
}
@@ -339,7 +349,8 @@ namespace scribo
template <typename L>
inline
void
- component_set_data<L>::fill_infos(const mln::util::array<pair_data_t>& attribs)
+ component_set_data<L>::fill_infos(const mln::util::array<pair_data_t>& attribs,
+ component::Type type)
{
typedef mln_site(L) P;
@@ -349,7 +360,8 @@ namespace scribo
for_all_comp_data(i, attribs)
{
component_info info(i, attribs[i].first,
- attribs[i].second.first, attribs[i].second.second);
+ attribs[i].second.first, attribs[i].second.second,
+ type);
infos_.append(info);
}
}
@@ -397,9 +409,10 @@ namespace scribo
template <typename L>
inline
component_set<L>::component_set(const L& ima, const mln_value(L)& ncomps,
- const mln::util::array<pair_accu_t>& attribs)
+ const mln::util::array<pair_accu_t>& attribs,
+ component::Type type)
{
- data_ = new internal::component_set_data<L>(ima, ncomps, attribs);
+ data_ = new internal::component_set_data<L>(ima, ncomps, attribs, type);
}
@@ -407,9 +420,10 @@ namespace scribo
inline
component_set<L>::component_set(const L& ima, const mln_value(L)& ncomps,
- const mln::util::array<pair_data_t>& attribs)
+ const mln::util::array<pair_data_t>& attribs,
+ component::Type type)
{
- data_ = new internal::component_set_data<L>(ima, ncomps, attribs);
+ data_ = new internal::component_set_data<L>(ima, ncomps, attribs, type);
}
diff --git a/scribo/scribo/core/document.hh b/scribo/scribo/core/document.hh
index e5ac825..ef0869e 100644
--- a/scribo/scribo/core/document.hh
+++ b/scribo/scribo/core/document.hh
@@ -297,7 +297,8 @@ namespace scribo
mln_value(L) ncomps;
whitespace_seps_comps_ = primitive::extract::components(whitespace_seps,
- mln::c8(), ncomps);
+ mln::c8(), ncomps,
+ component::WhitespaceSeparator);
}
@@ -333,7 +334,8 @@ namespace scribo
mln_value(L) ncomps;
line_seps_comps_ = primitive::extract::components(line_seps,
- mln::c8(), ncomps);
+ mln::c8(), ncomps,
+ component::LineSeparator);
}
diff --git a/scribo/scribo/core/tag/component.hh b/scribo/scribo/core/tag/component.hh
index 10b86a6..7cd2ede 100644
--- a/scribo/scribo/core/tag/component.hh
+++ b/scribo/scribo/core/tag/component.hh
@@ -1,5 +1,5 @@
-// Copyright (C) 2009, 2010 EPITA Research and Development Laboratory
-// (LRDE)
+// Copyright (C) 2009, 2010, 2011 EPITA Research and Development
+// Laboratory (LRDE)
//
// This file is part of Olena.
//
@@ -55,7 +55,8 @@ namespace scribo
{
Undefined = 0,
Character,
- Separator,
+ LineSeparator,
+ WhitespaceSeparator,
Noise,
Punctuation,
Image
@@ -116,8 +117,11 @@ namespace scribo
case Character:
str = "Character";
break;
- case Separator:
- str = "Separator";
+ case LineSeparator:
+ str = "LineSeparator";
+ break;
+ case WhitespaceSeparator:
+ str = "WhitespaceSeparator";
break;
case Noise:
str = "Noise";
@@ -139,8 +143,10 @@ namespace scribo
{
if (str == "Character")
return Character;
- else if (str == "Separator")
- return Separator;
+ else if (str == "LineSeparator")
+ return LineSeparator;
+ else if (str == "WhitespaceSeparator")
+ return WhitespaceSeparator;
else if (str == "Noise")
return Noise;
else if (str == "Punctuation")
diff --git a/scribo/scribo/primitive/extract/components.hh b/scribo/scribo/primitive/extract/components.hh
index 4994d4b..849dd7b 100644
--- a/scribo/scribo/primitive/extract/components.hh
+++ b/scribo/scribo/primitive/extract/components.hh
@@ -1,4 +1,5 @@
-// Copyright (C) 2009 EPITA Research and Development Laboratory (LRDE)
+// Copyright (C) 2009, 2011 EPITA Research and Development Laboratory
+// (LRDE)
//
// This file is part of Olena.
//
@@ -68,6 +69,7 @@ namespace scribo
/// and background to 'false'.
/// \param[in] nbh A neighborhood to be used for labeling.
/// \param[in,out] ncomponents Will store the numbers of components found.
+ /// \param[in] type The default component type set to components.
///
/// \return An image of labeled components.
//
@@ -75,7 +77,8 @@ namespace scribo
inline
component_set<mln_ch_value(I,V)>
components(const Image<I>& input,
- const Neighborhood<N>& nbh, V& ncomponents);
+ const Neighborhood<N>& nbh, V& ncomponents,
+ component::Type type = component::Undefined);
# ifndef MLN_INCLUDE_ONLY
@@ -88,7 +91,8 @@ namespace scribo
inline
void
components_tests(const Image<I>& input,
- const Neighborhood<N>& nbh, V& ncomponents)
+ const Neighborhood<N>& nbh, V& ncomponents,
+ component::Type type)
{
mlc_equal(mln_value(I),bool)::check();
// mlc_is_a(V, mln::value::Symbolic)::check();
@@ -97,6 +101,7 @@ namespace scribo
(void) input;
(void) nbh;
(void) ncomponents;
+ (void) type;
}
@@ -107,11 +112,12 @@ namespace scribo
inline
component_set<mln_ch_value(I,V)>
components(const Image<I>& input,
- const Neighborhood<N>& nbh, V& ncomponents)
+ const Neighborhood<N>& nbh, V& ncomponents,
+ component::Type type = component::Undefined)
{
trace::entering("scribo::components");
- internal::components_tests(input, nbh, ncomponents);
+ internal::components_tests(input, nbh, ncomponents, type);
typedef mln_ch_value(I,V) L;
typedef mln::accu::shape::bbox<mln_site(L)> bbox_accu_t;
@@ -129,7 +135,7 @@ namespace scribo
pair_accu_t());
component_set<L>
- output(results.first(), ncomponents, results.second().second());
+ output(results.first(), ncomponents, results.second().second(), type);
trace::exiting("scribo::components");
return output;
diff --git a/scribo/scribo/primitive/identify.hh b/scribo/scribo/primitive/identify.hh
index 81a7d16..1bed712 100644
--- a/scribo/scribo/primitive/identify.hh
+++ b/scribo/scribo/primitive/identify.hh
@@ -61,7 +61,7 @@ namespace scribo
std::swap(min, max);
if (max/min > 10)
- output(c).update_type(component::Separator);
+ output(c).update_type(component::LineSeparator);
}
mln::trace::exiting("scribo::primitive::identify");
--
1.5.6.5
1
0

28 Mar '11
* scribo/core/component_info.hh,
* scribo/core/component_set.hh,
* scribo/core/document.hh,
* scribo/core/line_info.hh,
* scribo/core/line_links.hh,
* scribo/core/object_groups.hh,
* scribo/core/object_links.hh,
* scribo/core/paragraph_set.hh: Make these classes serializable.
* scribo/core/concept/serializable.hh,
* scribo/core/concept/serialize_visitor.hh: New concepts.
* scribo/core/internal/doc_xml_serializer.hh: New. Base
implementation.
* scribo/io/xml/internal/extended_page_xml_visitor.hh,
* scribo/io/xml/internal/full_xml_visitor.hh,
* scribo/io/xml/internal/page_xml_visitor.hh: New. Visitors
producing different XML outputs.
* scribo/io/xml/internal/html_markups_replace.hh,
* scribo/io/xml/internal/print_box_coords.hh,
* scribo/io/xml/internal/print_page_preambule.hh: New. Tools for
XML output.
* scribo/io/xml/save.hh: Make use of visitors.
* scribo/toolchain/internal/content_in_doc_functor.hh: Set default
XML output type.
* src/content_in_doc.cc: Produce several XML output.
---
scribo/ChangeLog | 38 ++
scribo/demo/viewer/runner.cc | 5 +-
scribo/scribo/core/component_info.hh | 3 +-
scribo/scribo/core/component_set.hh | 7 +-
scribo/scribo/core/concept/serializable.hh | 64 +++
scribo/scribo/core/concept/serialize_visitor.hh | 49 +++
scribo/scribo/core/document.hh | 8 +-
scribo/scribo/core/internal/doc_xml_serializer.hh | 140 ++++++
scribo/scribo/core/line_info.hh | 21 +-
scribo/scribo/core/line_links.hh | 3 +-
scribo/scribo/core/object_groups.hh | 4 +-
scribo/scribo/core/object_links.hh | 8 +-
scribo/scribo/core/paragraph_set.hh | 4 +-
.../io/xml/internal/extended_page_xml_visitor.hh | 283 ++++++++++++
scribo/scribo/io/xml/internal/full_xml_visitor.hh | 456 ++++++++++++++++++++
.../scribo/io/xml/internal/html_markups_replace.hh | 97 +++++
scribo/scribo/io/xml/internal/page_xml_visitor.hh | 222 ++++++++++
scribo/scribo/io/xml/internal/print_box_coords.hh | 92 ++++
.../scribo/io/xml/internal/print_page_preambule.hh | 95 ++++
scribo/scribo/io/xml/save.hh | 388 +++--------------
.../toolchain/internal/content_in_doc_functor.hh | 9 +-
scribo/src/content_in_doc.cc | 4 +-
22 files changed, 1660 insertions(+), 340 deletions(-)
create mode 100644 scribo/scribo/core/concept/serializable.hh
create mode 100644 scribo/scribo/core/concept/serialize_visitor.hh
create mode 100644 scribo/scribo/core/internal/doc_xml_serializer.hh
create mode 100644 scribo/scribo/io/xml/internal/extended_page_xml_visitor.hh
create mode 100644 scribo/scribo/io/xml/internal/full_xml_visitor.hh
create mode 100644 scribo/scribo/io/xml/internal/html_markups_replace.hh
create mode 100644 scribo/scribo/io/xml/internal/page_xml_visitor.hh
create mode 100644 scribo/scribo/io/xml/internal/print_box_coords.hh
create mode 100644 scribo/scribo/io/xml/internal/print_page_preambule.hh
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index 330338a..6571137 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,5 +1,43 @@
2011-03-01 Guillaume Lazzara <z(a)lrde.epita.fr>
+ Make XML output more flexible.
+
+ * scribo/core/component_info.hh,
+ * scribo/core/component_set.hh,
+ * scribo/core/document.hh,
+ * scribo/core/line_info.hh,
+ * scribo/core/line_links.hh,
+ * scribo/core/object_groups.hh,
+ * scribo/core/object_links.hh,
+ * scribo/core/paragraph_set.hh: Make these classes serializable.
+
+ * scribo/core/concept/serializable.hh,
+ * scribo/core/concept/serialize_visitor.hh: New concepts.
+
+ * scribo/core/internal/doc_xml_serializer.hh: New. Base
+ implementation.
+
+ * scribo/io/xml/internal/extended_page_xml_visitor.hh,
+ * scribo/io/xml/internal/full_xml_visitor.hh,
+ * scribo/io/xml/internal/page_xml_visitor.hh: New. Visitors
+ producing different XML outputs.
+
+ * scribo/io/xml/internal/html_markups_replace.hh,
+ * scribo/io/xml/internal/print_box_coords.hh,
+ * scribo/io/xml/internal/print_page_preambule.hh: New. Tools for
+ XML output.
+
+ * scribo/io/xml/save.hh: Make use of visitors.
+
+ * scribo/toolchain/internal/content_in_doc_functor.hh: Set default
+ XML output type.
+
+ * src/content_in_doc.cc: Produce several XML output.
+
+ * demo/viewer/runner.cc: Update call to io::xml::save.
+
+2011-03-01 Guillaume Lazzara <z(a)lrde.epita.fr>
+
Set component type during component extraction.
* scribo/core/component_info.hh,
diff --git a/scribo/demo/viewer/runner.cc b/scribo/demo/viewer/runner.cc
index 86ff5dc..a3cc883 100644
--- a/scribo/demo/viewer/runner.cc
+++ b/scribo/demo/viewer/runner.cc
@@ -1,4 +1,5 @@
-// Copyright (C) 2010 EPITA Research and Development Laboratory (LRDE)
+// Copyright (C) 2010, 2011 EPITA Research and Development Laboratory
+// (LRDE)
//
// This file is part of Olena.
//
@@ -156,7 +157,7 @@ void runner::process(const image2d<value::rgb8>& original_ima,
f.enable_whitespace_seps = (find_seps == defs::Whitespaces
|| find_seps == defs::LinesAndWhitespaces);
- f.allow_xml_extensions = true;
+ f.xml_format = scribo::io::xml::PageExtended;
f.save_doc_as_xml = true;
diff --git a/scribo/scribo/core/component_info.hh b/scribo/scribo/core/component_info.hh
index 6fc73f8..f825aee 100644
--- a/scribo/scribo/core/component_info.hh
+++ b/scribo/scribo/core/component_info.hh
@@ -36,6 +36,7 @@
# include <mln/core/alias/point2d.hh>
# include <mln/util/object_id.hh>
+# include <scribo/core/concept/serializable.hh>
# include <scribo/core/tag/component.hh>
# include <scribo/core/tag/line.hh>
@@ -44,7 +45,7 @@ namespace scribo
typedef mln::util::object_id<scribo::ComponentId, unsigned> component_id_t;
- class component_info
+ class component_info : public Serializable<component_info>
{
typedef mln::util::object_id<scribo::ComponentId, unsigned> component_id_t;
diff --git a/scribo/scribo/core/component_set.hh b/scribo/scribo/core/component_set.hh
index 442e8d6..a63ed6c 100644
--- a/scribo/scribo/core/component_set.hh
+++ b/scribo/scribo/core/component_set.hh
@@ -30,6 +30,10 @@
/// \file
///
/// \brief Definition of a component set.
+///
+/// \fixme component_set should always set a component type in order
+/// to be fully supported by visitors.
+
# include <mln/core/concept/site_set.hh>
# include <mln/core/concept/function.hh>
@@ -59,6 +63,7 @@
# include <scribo/core/macros.hh>
# include <scribo/core/component_info.hh>
+# include <scribo/core/concept/serializable.hh>
namespace scribo
@@ -115,7 +120,7 @@ namespace scribo
template <typename L>
- class component_set
+ class component_set : public Serializable<component_set<L> >
{
typedef mln::accu::shape::bbox<mln_site(L)> bbox_accu_t;
typedef mln::accu::center<mln_site(L)> center_accu_t;
diff --git a/scribo/scribo/core/concept/serializable.hh b/scribo/scribo/core/concept/serializable.hh
new file mode 100644
index 0000000..6e661a6
--- /dev/null
+++ b/scribo/scribo/core/concept/serializable.hh
@@ -0,0 +1,64 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef SCRIBO_CORE_CONCEPT_SERIALIZABLE_HH
+# define SCRIBO_CORE_CONCEPT_SERIALIZABLE_HH
+
+/// \file
+///
+/// Concept for serializer visitors.
+
+# include <mln/core/concept/object.hh>
+# include <scribo/core/concept/serialize_visitor.hh>
+
+namespace scribo
+{
+
+ /// \brief Link functor concept.
+ template <typename E>
+ class Serializable : public mln::Object<E>
+ {
+ public:
+ template <typename E2>
+ void accept(const SerializeVisitor<E2>& visitor) const;
+ };
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+ template <typename E>
+ template <typename E2>
+ void
+ Serializable<E>::accept(const SerializeVisitor<E2>& visitor) const
+ {
+ exact(visitor).visit(exact(*this));
+ }
+
+# endif // ! MLN_INCLUDE_ONLY
+
+
+} // end of namespace scribo
+
+#endif // SCRIBO_CORE_CONCEPT_SERIALIZABLE_HH
diff --git a/scribo/scribo/core/concept/serialize_visitor.hh b/scribo/scribo/core/concept/serialize_visitor.hh
new file mode 100644
index 0000000..e5e598f
--- /dev/null
+++ b/scribo/scribo/core/concept/serialize_visitor.hh
@@ -0,0 +1,49 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef SCRIBO_CORE_CONCEPT_SERIALIZE_VISITOR_HH
+# define SCRIBO_CORE_CONCEPT_SERIALIZE_VISITOR_HH
+
+/// \file
+///
+/// Concept for serializer visitors.
+
+# include <mln/core/concept/object.hh>
+
+namespace scribo
+{
+
+ /// \brief Link functor concept.
+ template <typename E>
+ class SerializeVisitor : public mln::Object<E>
+ {
+ public:
+ // void visit(..);
+ };
+
+
+} // end of namespace scribo
+
+#endif // SCRIBO_CORE_CONCEPT_SERIALIZE_VISITOR_HH
diff --git a/scribo/scribo/core/document.hh b/scribo/scribo/core/document.hh
index ef0869e..372f0a4 100644
--- a/scribo/scribo/core/document.hh
+++ b/scribo/scribo/core/document.hh
@@ -40,13 +40,15 @@
# include <scribo/core/line_set.hh>
# include <scribo/core/paragraph_set.hh>
+# include <scribo/core/concept/serializable.hh>
+
# include <scribo/primitive/extract/components.hh>
namespace scribo
{
template <typename L>
- struct document
+ struct document : public Serializable<document<L> >
{
public:
@@ -98,7 +100,7 @@ namespace scribo
private:
- const char *filename_;
+ std::string filename_;
mln::image2d<mln::value::rgb8> image_;
paragraph_set<L> parset_;
@@ -142,7 +144,7 @@ namespace scribo
const char *
document<L>::filename() const
{
- return filename_;
+ return filename_.c_str();
}
diff --git a/scribo/scribo/core/internal/doc_xml_serializer.hh b/scribo/scribo/core/internal/doc_xml_serializer.hh
new file mode 100644
index 0000000..b64c9d4
--- /dev/null
+++ b/scribo/scribo/core/internal/doc_xml_serializer.hh
@@ -0,0 +1,140 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef SCRIBO_CORE_INTERNAL_DOC_XML_SERIALIZER_HH
+# define SCRIBO_CORE_INTERNAL_DOC_XML_SERIALIZER_HH
+
+/// \file
+///
+/// Concept for serializer visitors.
+
+# include <scribo/core/concept/serialize_visitor.hh>
+
+# include <scribo/core/document.hh>
+# include <scribo/core/component_set.hh>
+# include <scribo/core/component_info.hh>
+# include <scribo/core/paragraph_set.hh>
+# include <scribo/core/object_groups.hh>
+# include <scribo/core/object_links.hh>
+# include <scribo/core/line_links.hh>
+# include <scribo/core/line_info.hh>
+
+namespace scribo
+{
+
+ /// \brief Link functor concept.
+ template <typename E>
+ class doc_xml_serializer : public SerializeVisitor<E>
+ {
+ public:
+ // Visit overloads
+ template <typename L>
+ void visit(const document<L>& doc) const;
+
+ template <typename L>
+ void visit(const line_links<L>& llinks) const;
+
+ template <typename L>
+ void visit(const object_groups<L>& groups) const;
+
+ template <typename L>
+ void visit(const object_links<L>& links) const;
+
+ template <typename L>
+ void visit(const component_set<L>& comp_set) const;
+
+ void visit(const component_info& info) const;
+
+ template <typename L>
+ void visit(const paragraph_set<L>& parset) const;
+
+ template <typename L>
+ void visit(const line_info<L>& line) const;
+ };
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+ template <typename E>
+ template <typename L>
+ void
+ doc_xml_serializer<E>::visit(const document<L>& doc) const
+ {
+ }
+
+ template <typename E>
+ template <typename L>
+ void
+ doc_xml_serializer<E>::visit(const line_links<L>& llinks) const
+ {
+ }
+
+ template <typename E>
+ template <typename L>
+ void
+ doc_xml_serializer<E>::visit(const object_groups<L>& groups) const
+ {
+ }
+
+ template <typename E>
+ template <typename L>
+ void
+ doc_xml_serializer<E>::visit(const object_links<L>& links) const
+ {
+ }
+
+ template <typename E>
+ template <typename L>
+ void
+ doc_xml_serializer<E>::visit(const component_set<L>& comp_set) const
+ {
+ }
+
+ template <typename E>
+ void
+ doc_xml_serializer<E>::visit(const component_info& info) const
+ {
+ }
+
+ template <typename E>
+ template <typename L>
+ void
+ doc_xml_serializer<E>::visit(const paragraph_set<L>& parset) const
+ {
+ }
+
+ template <typename E>
+ template <typename L>
+ void
+ doc_xml_serializer<E>::visit(const line_info<L>& line) const
+ {
+ }
+
+# endif // ! MLN_INCLUDE_ONLY
+
+
+} // end of namespace scribo
+
+#endif // SCRIBO_CORE_INTERNAL_DOC_XML_SERIALIZER_HH
diff --git a/scribo/scribo/core/line_info.hh b/scribo/scribo/core/line_info.hh
index c82160a..33a1529 100644
--- a/scribo/scribo/core/line_info.hh
+++ b/scribo/scribo/core/line_info.hh
@@ -53,6 +53,11 @@
# include <scribo/core/line_set.hh>
# include <scribo/core/component_set.hh>
+# include <scribo/io/xml/internal/html_markups_replace.hh>
+
+# include <scribo/core/concept/serializable.hh>
+
+
namespace scribo
{
@@ -114,6 +119,7 @@ namespace scribo
bool indented_;
std::string text_;
+ std::string html_text_;
// Line set holding this element.
line_set<L> holder_;
@@ -125,7 +131,7 @@ namespace scribo
template <typename L>
- class line_info
+ class line_info : public Serializable<line_info<L> >
{
typedef internal::line_info_data<L> data_t;
typedef mln::util::object_id<scribo::ComponentId, unsigned> component_id_t;
@@ -198,6 +204,7 @@ namespace scribo
bool has_text() const;
const std::string& text() const;
+ const std::string& html_text() const;
void update_text(const std::string& str);
bool is_valid() const;
@@ -604,6 +611,7 @@ namespace scribo
return data_->indented_;
}
+
template <typename L>
bool
line_info<L>::has_text() const
@@ -611,6 +619,7 @@ namespace scribo
return !data_->text_.empty();
}
+
template <typename L>
const std::string&
line_info<L>::text() const
@@ -620,10 +629,19 @@ namespace scribo
template <typename L>
+ const std::string&
+ line_info<L>::html_text() const
+ {
+ return data_->html_text_;
+ }
+
+
+ template <typename L>
void
line_info<L>::update_text(const std::string& str)
{
data_->text_ = str;
+ data_->html_text_ = scribo::io::xml::internal::html_markups_replace(str);
}
@@ -987,6 +1005,7 @@ namespace scribo
<< ", indented=" << info.indented()
<< ", hidden=" << info.is_hidden()
<< ", text=" << info.text()
+ << ", html_text=" << info.html_text()
<< ")" << std::endl;
}
diff --git a/scribo/scribo/core/line_links.hh b/scribo/scribo/core/line_links.hh
index de62158..fdd09a5 100644
--- a/scribo/scribo/core/line_links.hh
+++ b/scribo/scribo/core/line_links.hh
@@ -34,6 +34,7 @@
# include <mln/util/array.hh>
# include <mln/util/tracked_ptr.hh>
+# include <scribo/core/concept/serializable.hh>
# include <scribo/core/line_set.hh>
@@ -69,7 +70,7 @@ namespace scribo
/// \brief Line group representation.
//
template <typename L>
- class line_links
+ class line_links : public Serializable<line_links<L> >
{
typedef internal::line_links_data<L> data_t;
diff --git a/scribo/scribo/core/object_groups.hh b/scribo/scribo/core/object_groups.hh
index 9d9fb25..bbfaf6e 100644
--- a/scribo/scribo/core/object_groups.hh
+++ b/scribo/scribo/core/object_groups.hh
@@ -36,6 +36,8 @@
# include <scribo/core/object_links.hh>
# include <scribo/core/component_set.hh>
+# include <scribo/core/concept/serializable.hh>
+
namespace scribo
{
@@ -69,7 +71,7 @@ namespace scribo
/// \brief Object group representation.
//
template <typename L>
- class object_groups
+ class object_groups : public Serializable<object_groups<L> >
{
typedef internal::object_groups_data<L> data_t;
diff --git a/scribo/scribo/core/object_links.hh b/scribo/scribo/core/object_links.hh
index af7dc38..2c2eea1 100644
--- a/scribo/scribo/core/object_links.hh
+++ b/scribo/scribo/core/object_links.hh
@@ -1,5 +1,5 @@
-// Copyright (C) 2009, 2010 EPITA Research and Development Laboratory
-// (LRDE)
+// Copyright (C) 2009, 2010, 2011 EPITA Research and Development
+// Laboratory (LRDE)
//
// This file is part of Olena.
//
@@ -37,6 +37,8 @@
# include <scribo/core/component_set.hh>
+# include <scribo/core/concept/serializable.hh>
+
namespace scribo
{
@@ -70,7 +72,7 @@ namespace scribo
/// \brief Object group representation.
//
template <typename L>
- class object_links
+ class object_links : public Serializable<object_links<L> >
{
typedef internal::object_links_data<L> data_t;
diff --git a/scribo/scribo/core/paragraph_set.hh b/scribo/scribo/core/paragraph_set.hh
index 6597189..5451069 100644
--- a/scribo/scribo/core/paragraph_set.hh
+++ b/scribo/scribo/core/paragraph_set.hh
@@ -33,6 +33,8 @@
# include <scribo/core/line_set.hh>
# include <scribo/core/paragraph_info.hh>
+# include <scribo/core/concept/serializable.hh>
+
namespace scribo
{
@@ -61,7 +63,7 @@ namespace scribo
*/
template <typename L>
- class paragraph_set
+ class paragraph_set : public Serializable<paragraph_set<L> >
{
public:
paragraph_set();
diff --git a/scribo/scribo/io/xml/internal/extended_page_xml_visitor.hh b/scribo/scribo/io/xml/internal/extended_page_xml_visitor.hh
new file mode 100644
index 0000000..5d8a672
--- /dev/null
+++ b/scribo/scribo/io/xml/internal/extended_page_xml_visitor.hh
@@ -0,0 +1,283 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef SCRIBO_IO_XML_INTERNAL_EXTENDED_PAGE_XML_VISITOR_HH
+# define SCRIBO_IO_XML_INTERNAL_EXTENDED_PAGE_XML_VISITOR_HH
+
+/// \file
+///
+/// Extended XML PAGE format serializer Visitor.
+
+# include <fstream>
+# include <scribo/core/internal/doc_xml_serializer.hh>
+# include <scribo/core/document.hh>
+# include <scribo/core/component_set.hh>
+# include <scribo/core/paragraph_set.hh>
+# include <scribo/core/object_groups.hh>
+# include <scribo/core/object_links.hh>
+# include <scribo/core/line_links.hh>
+# include <scribo/core/line_info.hh>
+
+# include <scribo/convert/to_base64.hh>
+
+# include <scribo/io/xml/internal/print_box_coords.hh>
+# include <scribo/io/xml/internal/print_page_preambule.hh>
+
+
+namespace scribo
+{
+
+ namespace io
+ {
+
+ namespace xml
+ {
+
+ namespace internal
+ {
+
+
+ class extended_page_xml_visitor : public doc_xml_serializer<extended_page_xml_visitor>
+ {
+ public:
+ // Constructor
+ extended_page_xml_visitor(std::ofstream& out);
+
+ // Visit overloads
+ template <typename L>
+ void visit(const document<L>& doc) const;
+
+ template <typename L>
+ void visit(const component_set<L>& comp_set) const;
+
+ void visit(const component_info& info) const;
+
+ template <typename L>
+ void visit(const paragraph_set<L>& parset) const;
+
+ template <typename L>
+ void visit(const line_info<L>& line) const;
+
+ private: // Attributes
+ std::ofstream& output;
+ };
+
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+
+ inline
+ extended_page_xml_visitor::extended_page_xml_visitor(std::ofstream& out)
+ : output(out)
+ {
+ }
+
+
+
+ /// Document
+ //
+ template <typename L>
+ void
+ extended_page_xml_visitor::visit(const document<L>& doc) const
+ {
+ // Preambule
+ print_PAGE_preambule(output, doc, false);
+
+ // Text
+ if (doc.has_text())
+ doc.paragraphs().accept(*this);
+
+
+ // Page elements (Pictures, ...)
+ if (doc.has_elements())
+ doc.elements().accept(*this);
+
+ // Whitespace seraparators
+ if (doc.has_whitespace_seps())
+ doc.whitespace_seps_comps().accept(*this);
+
+ output << " </page>" << std::endl;
+ output << "</pcGts>" << std::endl;
+
+ }
+
+ /// Component Set
+ //
+ template <typename L>
+ void
+ extended_page_xml_visitor::visit(const component_set<L>& comp_set) const
+ {
+ for_all_comps(c, comp_set)
+ if (comp_set(c).is_valid())
+ comp_set(c).accept(*this);
+ }
+
+
+ /// Component_info
+ //
+ inline
+ void
+ extended_page_xml_visitor::visit(const component_info& info) const
+ {
+ switch (info.type())
+ {
+ case component::WhitespaceSeparator:
+ {
+ output << " <whitespace_separator_region id=\"wss"
+ << info.id()
+ << "\">" << std::endl;
+
+ internal::print_box_coords(output, info.bbox(), " ");
+
+ output << " </whitespace_separator_region>" << std::endl;
+ break;
+ }
+
+ case component::LineSeparator:
+ {
+ output << " <separator_region id=\"sr" << info.id()
+ << "\" sep_orientation=\"0.000000\" "
+ << " sep_colour=\"Black\" "
+ << " sep_bgcolour=\"White\">" << std::endl;
+
+ internal::print_box_coords(output, info.bbox(), " ");
+
+ output << " </separator_region>" << std::endl;
+ break;
+ }
+
+
+ default:
+ case component::Image:
+ {
+ output << " <image_region id=\"ir" << info.id()
+ << "\" img_colour_type=\"24_Bit_Colour\""
+ << " img_orientation=\"0.000000\" "
+ << " img_emb_text=\"No\" "
+ << " img_bgcolour=\"White\">" << std::endl;
+
+ internal::print_box_coords(output, info.bbox(), " ");
+
+ output << " </image_region>" << std::endl;
+ break;
+ }
+ }
+ }
+
+
+ /// Paragraph Set
+ //
+ template <typename L>
+ void
+ extended_page_xml_visitor::visit(const paragraph_set<L>& parset) const
+ {
+ const line_set<L>& lines = parset.lines();
+
+ for_all_paragraphs(p, parset)
+ {
+ const mln::util::array<line_id_t>& line_ids = parset(p).line_ids();
+
+ // FIXME: compute that information on the whole paragraph
+ // and use them here.
+ line_id_t fid = line_ids(0);
+ output << " <text_region id=\"" << p
+ << "\" txt_orientation=\"" << lines(fid).orientation()
+ << "\" txt_reading_orientation=\"" << lines(fid).reading_orientation()
+ << "\" txt_reading_direction=\"" << lines(fid).reading_direction()
+ << "\" txt_text_type=\"" << lines(fid).type()
+ << "\" txt_reverse_video=\"" << (lines(fid).reverse_video() ? "true" : "false")
+ << "\" txt_indented=\"" << (lines(fid).indented() ? "true" : "false")
+ << "\" kerning=\"" << lines(fid).char_space();
+
+ // EXTENSIONS - Not officially supported
+ output << "\" baseline=\"" << lines(fid).baseline()
+ << "\" meanline=\"" << lines(fid).meanline()
+ << "\" x_height=\"" << lines(fid).x_height()
+ << "\" d_height=\"" << lines(fid).d_height()
+ << "\" a_height=\"" << lines(fid).a_height()
+ << "\" char_width=\"" << lines(fid).char_width();
+ // End of EXTENSIONS
+ output << "\">"
+ << std::endl;
+
+ internal::print_box_coords(output, parset(p).bbox(), " ");
+
+ // EXTENSIONS - Not officially supported
+ for_all_paragraph_lines(lid, line_ids)
+ {
+ line_id_t l = line_ids(lid);
+ lines(l).accept(*this);
+ }
+ // End of EXTENSIONS
+
+ output << " </text_region>" << std::endl;
+ }
+ }
+
+
+ template <typename L>
+ void
+ extended_page_xml_visitor::visit(const line_info<L>& line) const
+ {
+ if (line.has_text())
+ {
+ output << " <line text=\"" << line.html_text() << "\" ";
+ }
+ else
+ output << " <line " << std::endl;
+
+ output << "id=\"" << line.id()
+ << "\" txt_orientation=\"" << line.orientation()
+ << "\" txt_reading_orientation=\"" << line.reading_orientation()
+ << "\" txt_reading_direction=\"" << line.reading_direction()
+ << "\" txt_text_type=\"" << line.type()
+ << "\" txt_reverse_video=\"" << (line.reverse_video() ? "true" : "false")
+ << "\" txt_indented=\"" << (line.indented() ? "true" : "false")
+ << "\" kerning=\"" << line.char_space()
+ << "\" baseline=\"" << line.baseline()
+ << "\" meanline=\"" << line.meanline()
+ << "\" x_height=\"" << line.x_height()
+ << "\" d_height=\"" << line.d_height()
+ << "\" a_height=\"" << line.a_height()
+ << "\" char_width=\"" << line.char_width()
+ << "\">" << std::endl;
+
+ internal::print_box_coords(output, line.bbox(), " ");
+
+ output << " </line>" << std::endl;
+ }
+
+#endif // MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::io::xml::internal
+
+ } // end of namespace scribo::io::xml
+
+ } // end of namespace scribo::io
+
+} // end of namespace scribo
+
+#endif // SCRIBO_IO_XML_INTERNAL_EXTENDED_PAGE_XML_VISITOR_HH
diff --git a/scribo/scribo/io/xml/internal/full_xml_visitor.hh b/scribo/scribo/io/xml/internal/full_xml_visitor.hh
new file mode 100644
index 0000000..9c5bd1d
--- /dev/null
+++ b/scribo/scribo/io/xml/internal/full_xml_visitor.hh
@@ -0,0 +1,456 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef SCRIBO_IO_XML_INTERNAL_FULL_XML_VISITOR_HH
+# define SCRIBO_IO_XML_INTERNAL_FULL_XML_VISITOR_HH
+
+/// \file
+///
+/// XML serializer Visitor.
+
+# include <fstream>
+# include <scribo/core/internal/doc_xml_serializer.hh>
+# include <scribo/core/document.hh>
+# include <scribo/core/component_set.hh>
+# include <scribo/core/paragraph_set.hh>
+# include <scribo/core/object_groups.hh>
+# include <scribo/core/object_links.hh>
+# include <scribo/core/line_links.hh>
+# include <scribo/core/line_info.hh>
+
+# include <scribo/convert/to_base64.hh>
+
+# include <scribo/io/xml/internal/print_box_coords.hh>
+# include <scribo/io/xml/internal/print_page_preambule.hh>
+
+
+namespace scribo
+{
+
+ namespace io
+ {
+
+ namespace xml
+ {
+
+ namespace internal
+ {
+
+
+ class full_xml_visitor : public doc_xml_serializer<full_xml_visitor>
+ {
+ public:
+ // Constructor
+ full_xml_visitor(std::ofstream& out);
+
+ // Visit overloads
+ template <typename L>
+ void visit(const document<L>& doc) const;
+
+ template <typename L>
+ void visit(const line_links<L>& llinks) const;
+
+ template <typename L>
+ void visit(const object_groups<L>& groups) const;
+
+ template <typename L>
+ void visit(const object_links<L>& links) const;
+
+ template <typename L>
+ void visit(const component_set<L>& comp_set) const;
+
+ void visit(const component_info& info) const;
+
+ template <typename L>
+ void visit(const paragraph_set<L>& parset) const;
+
+ template <typename L>
+ void visit(const line_info<L>& line) const;
+
+ private: // Attributes
+ std::ofstream& output;
+ };
+
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+
+ inline
+ full_xml_visitor::full_xml_visitor(std::ofstream& out)
+ : output(out)
+ {
+ }
+
+
+
+ /// Document
+ //
+ template <typename L>
+ void
+ full_xml_visitor::visit(const document<L>& doc) const
+ {
+ print_PAGE_preambule(output, doc, false);
+
+ // Text
+ if (doc.has_text())
+ {
+ const line_set<L>& lines = doc.lines();
+
+ // Save component/link/group information (Extension)
+ {
+ // Component set
+ lines.components().accept(*this);
+
+ // Object link
+ lines.links().accept(*this);
+
+ // Object group
+ lines.groups().accept(*this);
+ }
+ // End of EXTENSIONS
+
+ const paragraph_set<L>& parset = doc.paragraphs();
+
+ // Save paragraphs related information (Extension)
+ {
+ // General text information
+ output << " <text_data nlines=\"" << lines.nelements() << "\" "
+ << " nparagraphs=\"" << parset.nelements() << "\" />" << std::endl;
+
+ // line_links
+ parset.links().accept(*this);
+ }
+
+ // Paragraph and lines
+ parset.accept(*this);
+ }
+
+
+ // Page elements (Pictures, ...)
+ if (doc.has_elements())
+ {
+ const component_set<L>& elts = doc.elements();
+ for_all_comps(e, elts)
+ if (elts(e).is_valid())
+ elts(e).accept(*this);
+ }
+
+
+ // line seraparators
+ if (doc.has_line_seps())
+ {
+ const component_set<L>&
+ line_seps_comps = doc.line_seps_comps();
+
+ for_all_comps(c, line_seps_comps)
+ line_seps_comps(c).accept(*this);
+ }
+
+
+ // Whitespace seraparators
+ if (doc.has_whitespace_seps())
+ {
+ const component_set<L>&
+ whitespace_seps_comps = doc.whitespace_seps_comps();
+
+ for_all_comps(c, whitespace_seps_comps)
+ whitespace_seps_comps(c).accept(*this);
+ }
+
+ output << " </page>" << std::endl;
+ output << "</pcGts>" << std::endl;
+
+ }
+
+
+ /// Line Links
+ //
+ template <typename L>
+ void
+ full_xml_visitor::visit(const line_links<L>& llinks) const
+ {
+ output << " <line_links>" << std::endl;
+ for_all_links(l, llinks)
+ {
+ output << " <line_link"
+ << " from=\"" << l
+ << "\" to=\"" << llinks(l)
+ << "\"/>" << std::endl;
+ }
+ output << " </line_links>" << std::endl;
+ }
+
+
+ /// Object Groups
+ //
+ template <typename L>
+ void
+ full_xml_visitor::visit(const object_groups<L>& groups) const
+ {
+ output << " <object_groups>" << std::endl;
+ for_all_groups(g, groups)
+ {
+ output << " <group "
+ << " object_id=\"" << g
+ << "\" group_id=\"" << groups(g)
+ << "\"/>" << std::endl;
+ }
+ output << " </object_groups>" << std::endl;
+ }
+
+
+ /// Object Links
+ //
+ template <typename L>
+ void
+ full_xml_visitor::visit(const object_links<L>& links) const
+ {
+ output << " <object_links>" << std::endl;
+ for_all_links(l, links)
+ {
+ output << " <link"
+ << " from=\"" << l
+ << "\" to=\"" << links(l)
+ << "\"/>" << std::endl;
+ }
+ output << " </object_links>" << std::endl;
+ }
+
+
+ /// Component Set
+ //
+ template <typename L>
+ void
+ full_xml_visitor::visit(const component_set<L>& comp_set) const
+ {
+ output << " <component_set nelements=\"" << comp_set.nelements()
+ << "\">" << std::endl;
+ for_all_comps(c, comp_set)
+ {
+ output << " <component_info"
+ << " id=\"" << comp_set(c).id()
+ << "\" mass_center_x=\"" << comp_set(c).mass_center().col()
+ << "\" mass_center_y=\"" << comp_set(c).mass_center().row()
+ << "\" card=\"" << comp_set(c).card()
+ << "\" tag=\"" << comp_set(c).tag()
+ << "\" type=\"" << comp_set(c).type()
+ << "\" pmin_x=\"" << comp_set(c).bbox().pmin().col()
+ << "\" pmin_y=\"" << comp_set(c).bbox().pmin().row()
+ << "\" pmax_x=\"" << comp_set(c).bbox().pmax().col()
+ << "\" pmax_y=\"" << comp_set(c).bbox().pmax().row()
+ << "\"/>" << std::endl;
+ }
+
+
+ // Save labeled image
+ {
+ const L& lbl = comp_set.labeled_image();
+ output << "<labeled_image "
+ << " height=\"" << lbl.domain().height()
+ << "\" width=\"" << lbl.domain().width() << "\">"
+ << "<![CDATA[";
+
+ util::array<unsigned char> lbl64;
+ convert::to_base64(lbl, lbl64);
+ output.write((const char *)lbl64.std_vector().data(),
+ lbl64.nelements());
+
+ output << "]]></labeled_image>" << std::endl;
+ }
+
+ // Save separators image
+ {
+ const mln_ch_value(L,bool)& seps = comp_set.separators();
+ output << "<separators_image "
+ << " height=\"" << seps.domain().height()
+ << "\" width=\"" << seps.domain().width() << "\">"
+ << "<![CDATA[";
+
+ util::array<unsigned char> seps64;
+ convert::to_base64(seps, seps64);
+ output.write((const char *)seps64.std_vector().data(),
+ seps64.nelements());
+
+ output << "]]></separators_image>" << std::endl;
+ }
+
+ output << "</component_set>" << std::endl;
+ }
+
+
+ /// Component_info
+ //
+ inline
+ void
+ full_xml_visitor::visit(const component_info& info) const
+ {
+ switch (info.type())
+ {
+ case component::WhitespaceSeparator:
+ {
+ output << " <whitespace_separator_region id=\"wss"
+ << info.id()
+ << "\">" << std::endl;
+
+ internal::print_box_coords(output, info.bbox(), " ");
+
+ output << " </whitespace_separator_region>" << std::endl;
+ break;
+ }
+
+ case component::LineSeparator:
+ {
+ output << " <separator_region id=\"sr" << info.id()
+ << "\" sep_orientation=\"0.000000\" "
+ << " sep_colour=\"Black\" "
+ << " sep_bgcolour=\"White\">" << std::endl;
+
+ internal::print_box_coords(output, info.bbox(), " ");
+
+ output << " </separator_region>" << std::endl;
+ break;
+ }
+
+
+ default:
+ case component::Image:
+ {
+ output << " <image_region id=\"ir" << info.id()
+ << "\" img_colour_type=\"24_Bit_Colour\""
+ << " img_orientation=\"0.000000\" "
+ << " img_emb_text=\"No\" "
+ << " img_bgcolour=\"White\">" << std::endl;
+
+ internal::print_box_coords(output, info.bbox(), " ");
+
+ output << " </image_region>" << std::endl;
+ break;
+ }
+ }
+ }
+
+ /// Paragraph Set
+ //
+ template <typename L>
+ void
+ full_xml_visitor::visit(const paragraph_set<L>& parset) const
+ {
+ const line_set<L>& lines = parset.lines();
+
+ for_all_paragraphs(p, parset)
+ {
+ const mln::util::array<line_id_t>& line_ids = parset(p).line_ids();
+
+ // FIXME: compute that information on the whole paragraph
+ // and use them here.
+ line_id_t fid = line_ids(0);
+ output << " <text_region id=\"" << p
+ << "\" txt_orientation=\"" << lines(fid).orientation()
+ << "\" txt_reading_orientation=\"" << lines(fid).reading_orientation()
+ << "\" txt_reading_direction=\"" << lines(fid).reading_direction()
+ << "\" txt_text_type=\"" << lines(fid).type()
+ << "\" txt_reverse_video=\"" << (lines(fid).reverse_video() ? "true" : "false")
+ << "\" txt_indented=\"" << (lines(fid).indented() ? "true" : "false")
+ << "\" kerning=\"" << lines(fid).char_space();
+
+ // EXTENSIONS - Not officially supported
+ output << "\" baseline=\"" << lines(fid).baseline()
+ << "\" meanline=\"" << lines(fid).meanline()
+ << "\" x_height=\"" << lines(fid).x_height()
+ << "\" d_height=\"" << lines(fid).d_height()
+ << "\" a_height=\"" << lines(fid).a_height()
+ << "\" char_width=\"" << lines(fid).char_width();
+ // End of EXTENSIONS
+ output << "\">"
+ << std::endl;
+
+ internal::print_box_coords(output, parset(p).bbox(), " ");
+
+
+ // EXTENSIONS - Not officially supported
+ for_all_paragraph_lines(lid, line_ids)
+ {
+ line_id_t l = line_ids(lid);
+
+ lines(l).accept(*this);
+ }
+
+ output << " </text_region>" << std::endl;
+ }
+ }
+
+
+ template <typename L>
+ void
+ full_xml_visitor::visit(const line_info<L>& line) const
+ {
+ if (line.has_text())
+ {
+ output << " <line text=\"" << line.html_text() << "\" ";
+ }
+ else
+ output << " <line " << std::endl;
+
+ output << "id=\"" << line.id()
+ << "\" txt_orientation=\"" << line.orientation()
+ << "\" txt_reading_orientation=\"" << line.reading_orientation()
+ << "\" txt_reading_direction=\"" << line.reading_direction()
+ << "\" txt_text_type=\"" << line.type()
+ << "\" txt_reverse_video=\"" << (line.reverse_video() ? "true" : "false")
+ << "\" txt_indented=\"" << (line.indented() ? "true" : "false")
+ << "\" kerning=\"" << line.char_space()
+ << "\" baseline=\"" << line.baseline()
+ << "\" meanline=\"" << line.meanline()
+ << "\" x_height=\"" << line.x_height()
+ << "\" d_height=\"" << line.d_height()
+ << "\" a_height=\"" << line.a_height()
+ << "\" char_width=\"" << line.char_width()
+ << "\">" << std::endl;
+
+ internal::print_box_coords(output, line.bbox(), " ");
+
+ output << " <compid_list>" << std::endl;
+
+ for_all_line_comps(c, line.components())
+ output << " <compid value=\""
+ << line.components()(c) << "\" />" << std::endl;
+
+ output << " </compid_list>" << std::endl;
+
+ output << " </line>" << std::endl;
+ }
+
+#endif // MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::io::xml::internal
+
+ } // end of namespace scribo::io::xml
+
+ } // end of namespace scribo::io
+
+} // end of namespace scribo
+
+#endif // SCRIBO_IO_XML_INTERNAL_FULL_XML_VISITOR_HH
diff --git a/scribo/scribo/io/xml/internal/html_markups_replace.hh b/scribo/scribo/io/xml/internal/html_markups_replace.hh
new file mode 100644
index 0000000..76f8107
--- /dev/null
+++ b/scribo/scribo/io/xml/internal/html_markups_replace.hh
@@ -0,0 +1,97 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef SCRIBO_IO_XML_INTERNAL_HTML_MARKUPS_REPLACE_HH
+# define SCRIBO_IO_XML_INTERNAL_HTML_MARKUPS_REPLACE_HH
+
+/// \file
+///
+/// \brief Replace HTML markups characters by their corresponding
+/// markups.
+
+
+namespace scribo
+{
+
+ namespace io
+ {
+
+ namespace xml
+ {
+
+ namespace internal
+ {
+
+ /*! \brief Replace HTML markups characters by their corresponding
+ markups.
+ */
+ inline
+ std::string
+ html_markups_replace(std::string& input);
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+ static inline std::map<char, std::string> init_map()
+ {
+ std::map<char, std::string> html_map;
+ html_map['\"'] = """;
+ html_map['<'] = "<";
+ html_map['>'] = ">";
+ html_map['&'] = "&";
+ return html_map;
+ }
+
+
+ inline
+ std::string
+ html_markups_replace(const std::string& input)
+ {
+ static std::map<char, std::string> map = init_map();
+
+ std::string output = input;
+ for (unsigned i = 0; i < input.size(); ++i)
+ {
+ std::map<char, std::string>::iterator it = map.find(output.at(i));
+ if (it != map.end())
+ {
+ output.replace(i, 1, it->second);
+ i += it->second.size() - 1;
+ }
+ }
+ return output;
+ }
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::io::xml::internal
+
+ } // end of namespace scribo::io::xml
+
+ } // end of namespace scribo::io
+
+} // end of namespace scribo
+
+#endif // ! SCRIBO_IO_XML_INTERNAL_HTML_MARKUPS_REPLACE_HH
diff --git a/scribo/scribo/io/xml/internal/page_xml_visitor.hh b/scribo/scribo/io/xml/internal/page_xml_visitor.hh
new file mode 100644
index 0000000..52d8f12
--- /dev/null
+++ b/scribo/scribo/io/xml/internal/page_xml_visitor.hh
@@ -0,0 +1,222 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef SCRIBO_IO_XML_INTERNAL_PAGE_XML_VISITOR_HH
+# define SCRIBO_IO_XML_INTERNAL_PAGE_XML_VISITOR_HH
+
+/// \file
+///
+/// PAGE format XML serializer Visitor.
+
+# include <fstream>
+
+# include <scribo/core/internal/doc_xml_serializer.hh>
+# include <scribo/convert/to_base64.hh>
+
+# include <scribo/io/xml/internal/print_box_coords.hh>
+# include <scribo/io/xml/internal/print_page_preambule.hh>
+
+
+namespace scribo
+{
+
+ namespace io
+ {
+
+ namespace xml
+ {
+
+ namespace internal
+ {
+
+ /*! \brief Save document information as XML.
+
+ We use a XML Schema part of the PAGE (Page Analysis and Ground
+ truth Elements) image representation framework.
+
+ This schema was used in the Page Segmentation COMPetition
+ (PSCOMP) for ICDAR 2009.
+
+ Its XSD file is located here:
+ http://schema.primaresearch.org/PAGE/gts/pagecontent/2009-03-16/pagecontent…
+
+ */
+ class page_xml_visitor : public doc_xml_serializer<page_xml_visitor>
+ {
+ public:
+ // Constructor
+ page_xml_visitor(std::ofstream& out);
+
+ // Visit overloads
+ template <typename L>
+ void visit(const document<L>& doc) const;
+
+ template <typename L>
+ void visit(const component_set<L>& comp_set) const;
+
+ void visit(const component_info& info) const;
+
+ template <typename L>
+ void visit(const paragraph_set<L>& parset) const;
+
+ private: // Attributes
+ std::ofstream& output;
+ };
+
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+
+ inline
+ page_xml_visitor::page_xml_visitor(std::ofstream& out)
+ : output(out)
+ {
+ }
+
+
+
+ /// Document
+ //
+ template <typename L>
+ void
+ page_xml_visitor::visit(const document<L>& doc) const
+ {
+ // Preambule
+ print_PAGE_preambule(output, doc, true);
+
+ // Text
+ if (doc.has_text())
+ doc.paragraphs().accept(*this);
+
+ // Page elements (Pictures, ...)
+ if (doc.has_elements())
+ doc.elements().accept(*this);
+
+ // line seraparators
+ if (doc.has_line_seps())
+ doc.line_seps_comps().accept(*this);
+
+ output << " </page>" << std::endl;
+ output << "</pcGts>" << std::endl;
+ }
+
+
+ /// Component Set
+ //
+ template <typename L>
+ void
+ page_xml_visitor::visit(const component_set<L>& comp_set) const
+ {
+ for_all_comps(c, comp_set)
+ if (comp_set(c).is_valid())
+ comp_set(c).accept(*this);
+ }
+
+
+ /// Component_info
+ //
+ inline
+ void
+ page_xml_visitor::visit(const component_info& info) const
+ {
+ switch (info.type())
+ {
+ case component::LineSeparator:
+ {
+ output << " <separator_region id=\"sr" << info.id()
+ << "\" sep_orientation=\"0.000000\" "
+ << " sep_colour=\"Black\" "
+ << " sep_bgcolour=\"White\">" << std::endl;
+
+ internal::print_box_coords(output, info.bbox(), " ");
+
+ output << " </separator_region>" << std::endl;
+ break;
+ }
+
+
+ default:
+ case component::Image:
+ {
+ output << " <image_region id=\"ir" << info.id()
+ << "\" img_colour_type=\"24_Bit_Colour\""
+ << " img_orientation=\"0.000000\" "
+ << " img_emb_text=\"No\" "
+ << " img_bgcolour=\"White\">" << std::endl;
+
+ internal::print_box_coords(output, info.bbox(), " ");
+
+ output << " </image_region>" << std::endl;
+ break;
+ }
+ }
+ }
+
+
+ /// Paragraph Set
+ //
+ template <typename L>
+ void
+ page_xml_visitor::visit(const paragraph_set<L>& parset) const
+ {
+ const line_set<L>& lines = parset.lines();
+
+ for_all_paragraphs(p, parset)
+ {
+ const mln::util::array<line_id_t>& line_ids = parset(p).line_ids();
+
+ // FIXME: compute that information on the whole paragraph
+ // and use them here.
+ line_id_t fid = line_ids(0);
+ output << " <text_region id=\"" << p
+ << "\" txt_orientation=\"" << lines(fid).orientation()
+ << "\" txt_reading_orientation=\"" << lines(fid).reading_orientation()
+ << "\" txt_reading_direction=\"" << lines(fid).reading_direction()
+ << "\" txt_text_type=\"" << lines(fid).type()
+ << "\" txt_reverse_video=\"" << (lines(fid).reverse_video() ? "true" : "false")
+ << "\" txt_indented=\"" << (lines(fid).indented() ? "true" : "false")
+ << "\" kerning=\"" << lines(fid).char_space()
+ << "\">"
+ << std::endl;
+
+ internal::print_box_coords(output, parset(p).bbox(), " ");
+
+ output << " </text_region>" << std::endl;
+ }
+ }
+
+
+#endif // MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::io::xml::internal
+
+ } // end of namespace scribo::io::xml
+
+ } // end of namespace scribo::io
+
+} // end of namespace scribo
+
+#endif // SCRIBO_IO_XML_INTERNAL_PAGE_XML_VISITOR_HH
diff --git a/scribo/scribo/io/xml/internal/print_box_coords.hh b/scribo/scribo/io/xml/internal/print_box_coords.hh
new file mode 100644
index 0000000..d3aeedf
--- /dev/null
+++ b/scribo/scribo/io/xml/internal/print_box_coords.hh
@@ -0,0 +1,92 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef SCRIBO_IO_XML_INTERNAL_PRINT_BOX_COORDS_HH
+# define SCRIBO_IO_XML_INTERNAL_PRINT_BOX_COORDS_HH
+
+/// \file
+///
+/// \brief Prints box2d coordinates to XML data.
+
+# include <mln/core/alias/box2d.hh>
+
+namespace scribo
+{
+
+ namespace io
+ {
+
+ namespace xml
+ {
+
+ namespace internal
+ {
+
+ /*! \brief Prints box2d coordinates to XML data.
+ */
+ void
+ print_box_coords(std::ofstream& ostr, const box2d& b,
+ const char *space);
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+
+ inline
+ void
+ print_box_coords(std::ofstream& ostr, const box2d& b,
+ const char *space)
+ {
+ std::string sc = space;
+ std::string sp = sc + " ";
+ ostr << sc << "<coords>" << std::endl
+ << sp << "<point x=\"" << b.pmin().col()
+ << "\" y=\"" << b.pmin().row() << "\"/>"
+ << std::endl
+ << sp << "<point x=\"" << b.pmax().col()
+ << "\" y=\"" << b.pmin().row() << "\"/>"
+ << std::endl
+ << sp << "<point x=\"" << b.pmax().col()
+ << "\" y=\"" << b.pmax().row() << "\"/>"
+ << std::endl
+ << sp << "<point x=\"" << b.pmin().col()
+ << "\" y=\"" << b.pmax().row() << "\"/>"
+ << std::endl
+ << sc << "</coords>" << std::endl;
+
+ }
+
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::io::xml::internal
+
+ } // end of namespace scribo::io::xml
+
+ } // end of namespace scribo::io
+
+} // end of namespace scribo
+
+#endif // ! SCRIBO_IO_XML_INTERNAL_PRINT_BOX_COORDS_HH
diff --git a/scribo/scribo/io/xml/internal/print_page_preambule.hh b/scribo/scribo/io/xml/internal/print_page_preambule.hh
new file mode 100644
index 0000000..b5ae891
--- /dev/null
+++ b/scribo/scribo/io/xml/internal/print_page_preambule.hh
@@ -0,0 +1,95 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef SCRIBO_IO_XML_INTERNAL_PRINT_PAGE_PREAMBULE_HH
+# define SCRIBO_IO_XML_INTERNAL_PRINT_PAGE_PREAMBULE_HH
+
+/// \file
+///
+/// \brief Print PAGE XML format preambule.
+
+# include <mln/core/alias/box2d.hh>
+
+namespace scribo
+{
+
+ namespace io
+ {
+
+ namespace xml
+ {
+
+ namespace internal
+ {
+
+ /// \brief Print PAGE XML format preambule.
+ template <typename L>
+ void print_PAGE_preambule(std::ofstream& output,
+ const document<L>& doc,
+ bool with_validation);
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+ template <typename L>
+ void print_PAGE_preambule(std::ofstream& output,
+ const document<L>& doc,
+ bool with_validation)
+ {
+ output << "<?xml version=\"1.0\"?>" << std::endl;
+
+ if (with_validation)
+ output << "<pcGts xmlns=\"http://schema.primaresearch.org/PAGE/gts/pagecontent/2009-03-16\" "
+ << "xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" "
+ << "xsi:schemaLocation=\"http://schema.primaresearch.org/PAGE/gts/pagecontent/2009-03-16 "
+ << "http://schema.primaresearch.org/PAGE/gts/pagecontent/2009-03-16/pagecontent…" "
+ << "pcGtsId=\"" << doc.filename() << "\">" << std::endl;
+ else
+ output << "<pcGts>" << std::endl;
+
+ output << " <pcMetadata>" << std::endl;
+ output << " <pcCreator>LRDE</pcCreator>" << std::endl;
+ output << " <pcCreated/>" << std::endl;
+ output << " <pcLastChange/>" << std::endl;
+ output << " <pcComments>Generated by Scribo from Olena.</pcComments>" << std::endl;
+ output << " </pcMetadata>" << std::endl;
+
+ output << " <page image_filename=\"" << doc.filename()
+ << "\" image_width=\"" << doc.width()
+ << "\" image_height=\"" << doc.height()
+ << "\">" << std::endl;
+ }
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::io::xml::internal
+
+ } // end of namespace scribo::io::xml
+
+ } // end of namespace scribo::io
+
+} // end of namespace scribo
+
+#endif // ! SCRIBO_IO_XML_INTERNAL_PRINT_PAGE_PREAMBULE_HH
diff --git a/scribo/scribo/io/xml/save.hh b/scribo/scribo/io/xml/save.hh
index 7011e87..30579d0 100644
--- a/scribo/scribo/io/xml/save.hh
+++ b/scribo/scribo/io/xml/save.hh
@@ -38,7 +38,11 @@
# include <map>
# include <scribo/core/document.hh>
-# include <scribo/core/line_set.hh>
+
+# include <scribo/io/xml/internal/full_xml_visitor.hh>
+# include <scribo/io/xml/internal/extended_page_xml_visitor.hh>
+# include <scribo/io/xml/internal/page_xml_visitor.hh>
+
namespace scribo
{
@@ -49,360 +53,104 @@ namespace scribo
namespace xml
{
- /*! \brief Save document information as XML.
+ /*! \brief Supported XML formats
+
+ Page : PRima PAGE format. Used in ICDAR 2009.
- We use a XML Schema part of the PAGE (Page Analysis and Ground
- truth Elements) image representation framework.
+ PageExtended : Enriched PRima PAGE format with scribo data.
+
+ Full : Enriched PRima PAGE format with scribo data. This
+ format can be reloaded in Scribo.
+ */
+ enum Format
+ {
+ Page,
+ PageExtended,
+ Full
+ //Hocr
+ };
- This schema was used in the Page Segmentation COMPetition
- (PSCOMP) for ICDAR 2009.
- Its XSD file is located here:
- http://schema.primaresearch.org/PAGE/gts/pagecontent/2009-03-16/pagecontent…
+ /*! \brief Save document information as XML.
*/
template <typename L>
void
- save(const document<L>& doc,
- const std::string& output_name,
- bool allow_extensions);
+ save(const document<L>& doc, const std::string& output_name,
+ Format format);
# ifndef MLN_INCLUDE_ONLY
+
namespace internal
{
- inline
- std::string&
- html_markups_replace(std::string& input,
- std::map<char, std::string>& map)
- {
- for (unsigned i = 0; i < input.size(); ++i)
- {
- std::map<char, std::string>::iterator it = map.find(input.at(i));
- if (it != map.end())
- {
- input.replace(i, 1, it->second);
- i += it->second.size() - 1;
- }
- }
- return input;
- }
-
-
- inline
- void print_box_coords(std::ofstream& ostr, const box2d& b,
- const char *space)
+ template <typename L>
+ void save_page(const document<L>& doc, std::ofstream& output)
{
- std::string sc = space;
- std::string sp = sc + " ";
- ostr << sc << "<coords>" << std::endl
- << sp << "<point x=\"" << b.pmin().col()
- << "\" y=\"" << b.pmin().row() << "\"/>"
- << std::endl
- << sp << "<point x=\"" << b.pmax().col()
- << "\" y=\"" << b.pmin().row() << "\"/>"
- << std::endl
- << sp << "<point x=\"" << b.pmax().col()
- << "\" y=\"" << b.pmax().row() << "\"/>"
- << std::endl
- << sp << "<point x=\"" << b.pmin().col()
- << "\" y=\"" << b.pmax().row() << "\"/>"
- << std::endl
- << sc << "</coords>" << std::endl;
-
+ scribo::io::xml::internal::page_xml_visitor f(output);
+ doc.accept(f);
}
-
-
template <typename L>
- void
- save(const document<L>& doc,
- const std::string& output_name)
+ void save_page_extended(const document<L>& doc, std::ofstream& output)
{
- trace::entering("scribo::io::xml:save_text_lines");
-
- std::ofstream file(output_name.c_str());
- if (! file)
- {
- std::cerr << "error: cannot open file '" << doc.filename() << "'!";
- abort();
- }
-
- std::map<char, std::string> html_map;
- html_map['\"'] = """;
- html_map['<'] = "<";
- html_map['>'] = ">";
- html_map['&'] = "&";
-
- file << "<?xml version=\"1.0\"?>" << std::endl;
- file << "<pcGts xmlns=\"http://schema.primaresearch.org/PAGE/gts/pagecontent/2009-03-16\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://schema.primaresearch.org/PAGE/gts/pagecontent/2009-03-16 http://schema.primaresearch.org/PAGE/gts/pagecontent/2009-03-16/pagecontent…" pcGtsId=\"" << doc.filename() << "\">" << std::endl;
-
- file << " <pcMetadata>" << std::endl;
- file << " <pcCreator>LRDE</pcCreator>" << std::endl;
- file << " <pcCreated/>" << std::endl;
- file << " <pcLastChange/>" << std::endl;
- file << " <pcComments>Generated by Scribo from Olena.</pcComments>" << std::endl;
- file << " </pcMetadata>" << std::endl;
-
- file << " <page image_filename=\"" << doc.filename()
- << "\" image_width=\"" << doc.width()
- << "\" image_height=\"" << doc.height()
- << "\">" << std::endl;
-
- // Text
- if (doc.has_text())
- {
- const line_set<L>& lines = doc.lines();
- const paragraph_set<L>& parset = doc.paragraphs();
-
- for_all_paragraphs(p, parset)
- {
- const mln::util::array<line_id_t>& line_ids = parset(p).line_ids();
-
- // FIXME: compute that information on the whole paragraph
- // and use them here.
- line_id_t fid = line_ids(0);
- file << " <text_region id=\"" << p
- << "\" txt_orientation=\"" << lines(fid).orientation()
- << "\" txt_reading_orientation=\"" << lines(fid).reading_orientation()
- << "\" txt_reading_direction=\"" << lines(fid).reading_direction()
- << "\" txt_text_type=\"" << lines(fid).type()
- << "\" txt_reverse_video=\"" << (lines(fid).reverse_video() ? "true" : "false")
- << "\" txt_indented=\"" << (lines(fid).indented() ? "true" : "false")
- << "\" kerning=\"" << lines(fid).char_space()
- << "\">"
- << std::endl;
-
- internal::print_box_coords(file, parset(p).bbox(), " ");
-
- file << " </text_region>" << std::endl;
- }
- }
-
- // Page elements (Pictures, ...)
- if (doc.has_elements())
- {
- const component_set<L>& elts = doc.elements();
- for_all_comps(e, elts)
- if (elts(e).is_valid())
- {
- file << " <image_region id=\"ir" << elts(e).id()
- << "\" img_colour_type=\"24_Bit_Colour\""
- << " img_orientation=\"0.000000\" "
- << " img_emb_text=\"No\" "
- << " img_bgcolour=\"White\">" << std::endl;
-
- internal::print_box_coords(file, elts(e).bbox(), " ");
-
- file << " </image_region>" << std::endl;
- }
- }
-
-
- file << " </page>" << std::endl;
- file << "</pcGts>" << std::endl;
-
- trace::exiting("scribo::io::xml::save_text_lines");
+ scribo::io::xml::internal::extended_page_xml_visitor f(output);
+ doc.accept(f);
}
-
-
-
template <typename L>
- void
- save_extended(const document<L>& doc,
- const std::string& output_name)
+ void save_full(const document<L>& doc, std::ofstream& output)
{
- trace::entering("scribo::io::xml:save_text_lines");
-
- std::ofstream file(output_name.c_str());
- if (! file)
- {
- std::cerr << "error: cannot open file '" << doc.filename() << "'!";
- abort();
- }
-
- std::map<char, std::string> html_map;
- html_map['\"'] = """;
- html_map['<'] = "<";
- html_map['>'] = ">";
- html_map['&'] = "&";
-
- file << "<?xml version=\"1.0\"?>" << std::endl;
- file << "<pcGts>" << std::endl;
-
- file << " <pcMetadata>" << std::endl;
- file << " <pcCreator>LRDE</pcCreator>" << std::endl;
- file << " <pcCreated/>" << std::endl;
- file << " <pcLastChange/>" << std::endl;
- file << " <pcComments>Generated by Scribo from Olena.</pcComments>" << std::endl;
- file << " </pcMetadata>" << std::endl;
-
- file << " <page image_filename=\"" << doc.filename()
- << "\" image_width=\"" << doc.width()
- << "\" image_height=\"" << doc.height()
- << "\">" << std::endl;
-
- // Text
- if (doc.has_text())
- {
- const line_set<L>& lines = doc.lines();
- const paragraph_set<L>& parset = doc.paragraphs();
-
- for_all_paragraphs(p, parset)
- {
- const mln::util::array<line_id_t>& line_ids = parset(p).line_ids();
-
- // FIXME: compute that information on the whole paragraph
- // and use them here.
- line_id_t fid = line_ids(0);
- file << " <text_region id=\"" << p
- << "\" txt_orientation=\"" << lines(fid).orientation()
- << "\" txt_reading_orientation=\"" << lines(fid).reading_orientation()
- << "\" txt_reading_direction=\"" << lines(fid).reading_direction()
- << "\" txt_text_type=\"" << lines(fid).type()
- << "\" txt_reverse_video=\"" << (lines(fid).reverse_video() ? "true" : "false")
- << "\" txt_indented=\"" << (lines(fid).indented() ? "true" : "false")
- << "\" kerning=\"" << lines(fid).char_space();
-
- // EXTENSIONS - Not officially supported
- file << "\" baseline=\"" << lines(fid).baseline()
- << "\" meanline=\"" << lines(fid).meanline()
- << "\" x_height=\"" << lines(fid).x_height()
- << "\" d_height=\"" << lines(fid).d_height()
- << "\" a_height=\"" << lines(fid).a_height()
- << "\" char_width=\"" << lines(fid).char_width();
- // End of EXTENSIONS
- file << "\">"
- << std::endl;
-
- internal::print_box_coords(file, parset(p).bbox(), " ");
-
-
- // EXTENSIONS - Not officially supported
- for_all_paragraph_lines(lid, line_ids)
- {
- line_id_t l = line_ids(lid);
-
- if (lines(l).has_text())
- {
- std::string tmp = lines(l).text();
- tmp = internal::html_markups_replace(tmp, html_map);
-
- file << " <line text=\"" << tmp << "\" ";
- }
- else
- file << " <line " << std::endl;
-
- file << "id=\"" << lines(l).id()
- << "\" txt_orientation=\"" << lines(l).orientation()
- << "\" txt_reading_orientation=\"" << lines(l).reading_orientation()
- << "\" txt_reading_direction=\"" << lines(l).reading_direction()
- << "\" txt_text_type=\"" << lines(l).type()
- << "\" txt_reverse_video=\"" << (lines(l).reverse_video() ? "true" : "false")
- << "\" txt_indented=\"" << (lines(l).indented() ? "true" : "false")
- << "\" kerning=\"" << lines(l).char_space()
- << "\" baseline=\"" << lines(l).baseline()
- << "\" meanline=\"" << lines(l).meanline()
- << "\" x_height=\"" << lines(l).x_height()
- << "\" d_height=\"" << lines(l).d_height()
- << "\" a_height=\"" << lines(l).a_height()
- << "\" char_width=\"" << lines(l).char_width()
- << "\">" << std::endl;
-
- internal::print_box_coords(file, lines(l).bbox(), " ");
-
- file << " </line>" << std::endl;
- }
-
- file << " </text_region>" << std::endl;
- }
- }
- // End of EXTENSIONS
-
- // Page elements (Pictures, ...)
- if (doc.has_elements())
- {
- const component_set<L>& elts = doc.elements();
- for_all_comps(e, elts)
- if (elts(e).is_valid())
- {
- switch (elts(e).type())
- {
- case component::Separator:
- {
- file << " <separator_region id=\"sr" << elts(e).id()
- << "\" sep_orientation=\"0.000000\" "
- << " sep_colour=\"Black\" "
- << " sep_bgcolour=\"White\">" << std::endl;
-
- internal::print_box_coords(file, elts(e).bbox(), " ");
-
- file << " </separator_region>" << std::endl;
- break;
- break;
- }
-
- default:
- case component::Image:
- {
- file << " <image_region id=\"ir" << elts(e).id()
- << "\" img_colour_type=\"24_Bit_Colour\""
- << " img_orientation=\"0.000000\" "
- << " img_emb_text=\"No\" "
- << " img_bgcolour=\"White\">" << std::endl;
-
- internal::print_box_coords(file, elts(e).bbox(), " ");
-
- file << " </image_region>" << std::endl;
- break;
- }
- }
- }
- }
-
-
- // Whitespace seraparators
- if (doc.has_whitespace_seps())
- {
- const component_set<L>&
- whitespace_seps_comps = doc.whitespace_seps_comps();
-
- for_all_comps(c, whitespace_seps_comps)
- {
- file << " <whitespace_separator_region id=\"wss"
- << whitespace_seps_comps(c).id()
- << "\">" << std::endl;
-
- internal::print_box_coords(file, whitespace_seps_comps(c).bbox(), " ");
-
- file << " </whitespace_separator_region>" << std::endl;
- }
- }
-
- file << " </page>" << std::endl;
- file << "</pcGts>" << std::endl;
-
- trace::exiting("scribo::io::xml::save_text_lines");
+ scribo::io::xml::internal::full_xml_visitor f(output);
+ doc.accept(f);
}
} // end of namespace scribo::io::xml::internal
+
// FACADE
template <typename L>
void
save(const document<L>& doc,
const std::string& output_name,
- bool allow_extensions)
+ Format format)
{
- if (allow_extensions)
- internal::save_extended(doc, output_name);
- else
- internal::save(doc, output_name);
+ trace::entering("scribo::io::xml::save");
+
+ // Open file
+ std::ofstream output(output_name.c_str());
+ if (! output)
+ {
+ std::cerr << "scribo::io::xml::save - ERROR: cannot open file '"
+ << doc.filename() << "'!";
+ return;
+ }
+
+ // Choose saving method.
+ switch (format)
+ {
+ case Page:
+ internal::save_page(doc, output);
+ break;
+
+ case PageExtended:
+ internal::save_page_extended(doc, output);
+ break;
+
+ case Full:
+ internal::save_full(doc, output);
+ break;
+
+ default:
+ trace::warning("scribo::io::xml::save - Invalid XML format! Skip saving...");
+ }
+
+ output.close();
+ trace::exiting("scribo::io::xml::save");
}
diff --git a/scribo/scribo/toolchain/internal/content_in_doc_functor.hh b/scribo/scribo/toolchain/internal/content_in_doc_functor.hh
index 48098ba..dcbb4f7 100644
--- a/scribo/scribo/toolchain/internal/content_in_doc_functor.hh
+++ b/scribo/scribo/toolchain/internal/content_in_doc_functor.hh
@@ -36,7 +36,6 @@
# include <scribo/primitive/extract/non_text.hh>
# include <scribo/primitive/extract/components.hh>
-//# include <scribo/primitive/extract/vertical_separators.hh>
# include <scribo/primitive/extract/separators.hh>
# include <scribo/primitive/extract/separators_nonvisible.hh>
@@ -114,7 +113,7 @@ namespace scribo
bool enable_whitespace_seps;
bool enable_debug;
bool save_doc_as_xml;
- bool allow_xml_extensions;
+ scribo::io::xml::Format xml_format;
//============
// Parameters
@@ -139,7 +138,7 @@ namespace scribo
enable_whitespace_seps(true),
enable_debug(false),
save_doc_as_xml(false),
- allow_xml_extensions(true),
+ xml_format(scribo::io::xml::PageExtended),
ocr_language("eng"),
output_file("/tmp/foo.xml"),
doc(doc_filename)
@@ -189,7 +188,7 @@ namespace scribo
// Whitespace separators
on_new_progress_label("Find whitespace separators...");
- whitespaces = primitive::extract::separators_nonvisible(processed_image);
+ whitespaces = primitive::extract::separators_nonvisible(input_cleaned);
on_progress();
}
@@ -483,7 +482,7 @@ namespace scribo
{
on_new_progress_label("Saving results");
- scribo::io::xml::save(doc, output_file, allow_xml_extensions);
+ scribo::io::xml::save(doc, output_file, xml_format);
on_xml_saved();
on_progress();
diff --git a/scribo/src/content_in_doc.cc b/scribo/src/content_in_doc.cc
index 9748b28..d8d4e52 100644
--- a/scribo/src/content_in_doc.cc
+++ b/scribo/src/content_in_doc.cc
@@ -172,7 +172,9 @@ int main(int argc, char* argv[])
debug);
// Saving results
- scribo::io::xml::save(doc, argv[2], true);
+ scribo::io::xml::save(doc, argv[2], scribo::io::xml::PageExtended);
+ scribo::io::xml::save(doc, "page.xml", scribo::io::xml::Page);
+ scribo::io::xml::save(doc, "full.xml", scribo::io::xml::Full);
trace::exiting("main");
}
--
1.5.6.5
1
0