* scribo/core/document.hh: Make a distinction between horizontal
and vertical lines. Store the binary image.
* scribo/io/xml/internal/extended_page_xml_visitor.hh,
* scribo/io/xml/internal/full_xml_visitor.hh,
* scribo/io/xml/internal/page_xml_visitor.hh,
* scribo/toolchain/internal/content_in_doc_functor.hh: Make use of
that new methods and information.
---
scribo/ChangeLog | 13 ++
scribo/scribo/core/document.hh | 137 ++++++++++++++-----
.../io/xml/internal/extended_page_xml_visitor.hh | 6 +-
scribo/scribo/io/xml/internal/full_xml_visitor.hh | 14 +-
scribo/scribo/io/xml/internal/page_xml_visitor.hh | 6 +-
.../toolchain/internal/content_in_doc_functor.hh | 20 +++-
6 files changed, 145 insertions(+), 51 deletions(-)
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index 1928592..33b036a 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,5 +1,18 @@
2011-03-14 Guillaume Lazzara <z(a)lrde.epita.fr>
+ Store more information in scribo::document.
+
+ * scribo/core/document.hh: Make a distinction between horizontal
+ and vertical lines. Store the binary image.
+
+ * scribo/io/xml/internal/extended_page_xml_visitor.hh,
+ * scribo/io/xml/internal/full_xml_visitor.hh,
+ * scribo/io/xml/internal/page_xml_visitor.hh,
+ * scribo/toolchain/internal/content_in_doc_functor.hh: Make use of
+ that new methods and information.
+
+2011-03-14 Guillaume Lazzara <z(a)lrde.epita.fr>
+
Rename line_info::components() to line_info::component_ids.
* scribo/core/line_info.hh: Here.
diff --git a/scribo/scribo/core/document.hh b/scribo/scribo/core/document.hh
index 689d0e3..f38b20b 100644
--- a/scribo/scribo/core/document.hh
+++ b/scribo/scribo/core/document.hh
@@ -85,23 +85,34 @@ namespace scribo
const component_set<L>& elements() const;
void set_elements(const component_set<L>& elements);
- const mln::image2d<value::rgb8>& image() const;
- void set_image(const mln::image2d<value::rgb8>& image);
-
bool has_whitespace_seps() const;
const mln::image2d<bool>& whitespace_seps() const;
const component_set<L>& whitespace_seps_comps() const;
void set_whitespace_separators(const image2d<bool>& whitespace_seps);
- bool has_line_seps() const;
- const mln::image2d<bool>& line_seps() const;
- const component_set<L>& line_seps_comps() const;
- void set_line_separators(const image2d<bool>& line_seps);
+ // Horizontal separators
+ bool has_hline_seps() const;
+ const mln::image2d<bool>& hline_seps() const;
+ const component_set<L>& hline_seps_comps() const;
+ void set_hline_separators(const image2d<bool>& line_seps);
+
+ // Vertical separators
+ bool has_vline_seps() const;
+ const mln::image2d<bool>& vline_seps() const;
+ const component_set<L>& vline_seps_comps() const;
+ void set_vline_separators(const image2d<bool>& line_seps);
+
+ const mln::image2d<value::rgb8>& image() const;
+ void set_image(const mln::image2d<value::rgb8>& image);
+
+ const mln::image2d<bool>& binary_image() const;
+ void set_binary_image(const mln::image2d<bool>& binary_image);
private:
std::string filename_;
mln::image2d<mln::value::rgb8> image_;
+ mln::image2d<bool> binary_image_;
paragraph_set<L> parset_;
component_set<L> elements_;
@@ -109,8 +120,11 @@ namespace scribo
mln::image2d<bool> whitespace_seps_;
component_set<L> whitespace_seps_comps_;
- mln::image2d<bool> line_seps_;
- component_set<L> line_seps_comps_;
+ mln::image2d<bool> hline_seps_;
+ component_set<L> hline_seps_comps_;
+
+ mln::image2d<bool> vline_seps_;
+ component_set<L> vline_seps_comps_;
};
@@ -251,97 +265,148 @@ namespace scribo
elements_ = elements;
}
+ template <typename L>
+ bool
+ document<L>::has_whitespace_seps() const
+ {
+ return whitespace_seps_.is_valid();
+ }
+
template <typename L>
- const mln::image2d<value::rgb8>&
- document<L>::image() const
+ const mln::image2d<bool>&
+ document<L>::whitespace_seps() const
{
- return image_;
+ return whitespace_seps_;
+ }
+
+
+ template <typename L>
+ const component_set<L>&
+ document<L>::whitespace_seps_comps() const
+ {
+ return whitespace_seps_comps_;
}
template <typename L>
void
- document<L>::set_image(const mln::image2d<value::rgb8>& image)
+ document<L>::set_whitespace_separators(const image2d<bool>&
whitespace_seps)
{
- image_ = image;
+ whitespace_seps_ = whitespace_seps;
+
+ mln_value(L) ncomps;
+ whitespace_seps_comps_ = primitive::extract::components(whitespace_seps,
+ mln::c8(), ncomps,
+ component::WhitespaceSeparator);
}
template <typename L>
bool
- document<L>::has_whitespace_seps() const
+ document<L>::has_hline_seps() const
{
- return whitespace_seps_.is_valid();
+ return hline_seps_.is_valid();
}
template <typename L>
const mln::image2d<bool>&
- document<L>::whitespace_seps() const
+ document<L>::hline_seps() const
{
- return whitespace_seps_;
+ return hline_seps_;
}
template <typename L>
const component_set<L>&
- document<L>::whitespace_seps_comps() const
+ document<L>::hline_seps_comps() const
{
- return whitespace_seps_comps_;
+ return hline_seps_comps_;
}
template <typename L>
void
- document<L>::set_whitespace_separators(const image2d<bool>&
whitespace_seps)
+ document<L>::set_hline_separators(const image2d<bool>& hline_seps)
{
- whitespace_seps_ = whitespace_seps;
+ hline_seps_ = hline_seps;
mln_value(L) ncomps;
- whitespace_seps_comps_ = primitive::extract::components(whitespace_seps,
- mln::c8(), ncomps,
- component::WhitespaceSeparator);
+ hline_seps_comps_ = primitive::extract::components(hline_seps,
+ mln::c8(), ncomps,
+ component::LineSeparator);
}
template <typename L>
bool
- document<L>::has_line_seps() const
+ document<L>::has_vline_seps() const
{
- return line_seps_.is_valid();
+ return vline_seps_.is_valid();
}
template <typename L>
const mln::image2d<bool>&
- document<L>::line_seps() const
+ document<L>::vline_seps() const
{
- return line_seps_;
+ return vline_seps_;
}
template <typename L>
const component_set<L>&
- document<L>::line_seps_comps() const
+ document<L>::vline_seps_comps() const
{
- return line_seps_comps_;
+ return vline_seps_comps_;
}
template <typename L>
void
- document<L>::set_line_separators(const image2d<bool>& line_seps)
+ document<L>::set_vline_separators(const image2d<bool>& vline_seps)
{
- line_seps_ = line_seps;
+ vline_seps_ = vline_seps;
mln_value(L) ncomps;
- line_seps_comps_ = primitive::extract::components(line_seps,
- mln::c8(), ncomps,
- component::LineSeparator);
+ vline_seps_comps_ = primitive::extract::components(vline_seps,
+ mln::c8(), ncomps,
+ component::LineSeparator);
+ }
+
+
+ template <typename L>
+ const mln::image2d<value::rgb8>&
+ document<L>::image() const
+ {
+ return image_;
}
+ template <typename L>
+ void
+ document<L>::set_image(const mln::image2d<value::rgb8>& image)
+ {
+ image_ = image;
+ }
+
+
+ template <typename L>
+ const mln::image2d<bool>&
+ document<L>::binary_image() const
+ {
+ return binary_image_;
+ }
+
+
+ template <typename L>
+ void
+ document<L>::set_binary_image(const mln::image2d<bool>& binary_image)
+ {
+ binary_image_ = binary_image;
+ }
+
# endif // ! MLN_INCLUDE_ONLY
diff --git a/scribo/scribo/io/xml/internal/extended_page_xml_visitor.hh
b/scribo/scribo/io/xml/internal/extended_page_xml_visitor.hh
index f573d88..51e7ad3 100644
--- a/scribo/scribo/io/xml/internal/extended_page_xml_visitor.hh
+++ b/scribo/scribo/io/xml/internal/extended_page_xml_visitor.hh
@@ -114,8 +114,10 @@ namespace scribo
doc.elements().accept(*this);
// line seraparators
- if (doc.has_line_seps())
- doc.line_seps_comps().accept(*this);
+ if (doc.has_vline_seps())
+ doc.vline_seps_comps().accept(*this);
+ if (doc.has_hline_seps())
+ doc.hline_seps_comps().accept(*this);
// Whitespace seraparators
if (doc.has_whitespace_seps())
diff --git a/scribo/scribo/io/xml/internal/full_xml_visitor.hh
b/scribo/scribo/io/xml/internal/full_xml_visitor.hh
index 701c1b7..d466b34 100644
--- a/scribo/scribo/io/xml/internal/full_xml_visitor.hh
+++ b/scribo/scribo/io/xml/internal/full_xml_visitor.hh
@@ -164,14 +164,12 @@ namespace scribo
// line seraparators
- if (doc.has_line_seps())
- {
- const component_set<L>&
- line_seps_comps = doc.line_seps_comps();
-
- for_all_comps(c, line_seps_comps)
- line_seps_comps(c).accept(*this);
- }
+ if (doc.has_hline_seps())
+ for_all_comps(c, doc.hline_seps_comps())
+ doc.hline_seps_comps()(c).accept(*this);
+ if (doc.has_vline_seps())
+ for_all_comps(c, doc.vline_seps_comps())
+ doc.vline_seps_comps()(c).accept(*this);
// Whitespace seraparators
diff --git a/scribo/scribo/io/xml/internal/page_xml_visitor.hh
b/scribo/scribo/io/xml/internal/page_xml_visitor.hh
index 52d8f12..9dec1e7 100644
--- a/scribo/scribo/io/xml/internal/page_xml_visitor.hh
+++ b/scribo/scribo/io/xml/internal/page_xml_visitor.hh
@@ -116,8 +116,10 @@ namespace scribo
doc.elements().accept(*this);
// line seraparators
- if (doc.has_line_seps())
- doc.line_seps_comps().accept(*this);
+ if (doc.has_vline_seps())
+ doc.vline_seps_comps().accept(*this);
+ if (doc.has_hline_seps())
+ doc.hline_seps_comps().accept(*this);
output << " </page>" << std::endl;
output << "</pcGts>" << std::endl;
diff --git a/scribo/scribo/toolchain/internal/content_in_doc_functor.hh
b/scribo/scribo/toolchain/internal/content_in_doc_functor.hh
index 40f013c..3e6668f 100644
--- a/scribo/scribo/toolchain/internal/content_in_doc_functor.hh
+++ b/scribo/scribo/toolchain/internal/content_in_doc_functor.hh
@@ -162,6 +162,7 @@ namespace scribo
mln_precondition(exact(processed_image).is_valid());
doc.set_image(exact(original_image));
+ doc.set_binary_image(exact(processed_image));
// Remove separators
mln_ch_value(I,bool)
@@ -169,17 +170,30 @@ namespace scribo
input_cleaned = exact(processed_image);
if (enable_line_seps)
{
+ // FIXME: SLOW
on_new_progress_label("Find vertical and horizontal separators...");
// Vertical and horizontal separators
- separators = primitive::extract::separators(processed_image, 81);
+ {
+ mln_ch_value(I,bool)
+ vseparators = primitive::extract::vertical_separators(processed_image, 81),
+ hseparators = primitive::extract::horizontal_separators(processed_image, 81);
+
+ doc.set_vline_separators(vseparators);
+ doc.set_hline_separators(hseparators);
+
+ separators = vseparators;
+ separators += hseparators;
+
+ border::resize(processed_image, border::thickness);
+ }
on_progress();
on_new_progress_label("Remove separators...");
- input_cleaned = primitive::remove::separators(processed_image, separators);
- doc.set_line_separators(separators);
+ input_cleaned = primitive::remove::separators(processed_image,
+ separators);
on_progress();
}
--
1.5.6.5