
* scribo/core/document.hh: Make a distinction between horizontal and vertical lines. Store the binary image. * scribo/io/xml/internal/extended_page_xml_visitor.hh, * scribo/io/xml/internal/full_xml_visitor.hh, * scribo/io/xml/internal/page_xml_visitor.hh, * scribo/toolchain/internal/content_in_doc_functor.hh: Make use of that new methods and information. --- scribo/ChangeLog | 13 ++ scribo/scribo/core/document.hh | 137 ++++++++++++++----- .../io/xml/internal/extended_page_xml_visitor.hh | 6 +- scribo/scribo/io/xml/internal/full_xml_visitor.hh | 14 +- scribo/scribo/io/xml/internal/page_xml_visitor.hh | 6 +- .../toolchain/internal/content_in_doc_functor.hh | 20 +++- 6 files changed, 145 insertions(+), 51 deletions(-) diff --git a/scribo/ChangeLog b/scribo/ChangeLog index 1928592..33b036a 100644 --- a/scribo/ChangeLog +++ b/scribo/ChangeLog @@ -1,5 +1,18 @@ 2011-03-14 Guillaume Lazzara <z@lrde.epita.fr> + Store more information in scribo::document. + + * scribo/core/document.hh: Make a distinction between horizontal + and vertical lines. Store the binary image. + + * scribo/io/xml/internal/extended_page_xml_visitor.hh, + * scribo/io/xml/internal/full_xml_visitor.hh, + * scribo/io/xml/internal/page_xml_visitor.hh, + * scribo/toolchain/internal/content_in_doc_functor.hh: Make use of + that new methods and information. + +2011-03-14 Guillaume Lazzara <z@lrde.epita.fr> + Rename line_info::components() to line_info::component_ids. * scribo/core/line_info.hh: Here. diff --git a/scribo/scribo/core/document.hh b/scribo/scribo/core/document.hh index 689d0e3..f38b20b 100644 --- a/scribo/scribo/core/document.hh +++ b/scribo/scribo/core/document.hh @@ -85,23 +85,34 @@ namespace scribo const component_set<L>& elements() const; void set_elements(const component_set<L>& elements); - const mln::image2d<value::rgb8>& image() const; - void set_image(const mln::image2d<value::rgb8>& image); - bool has_whitespace_seps() const; const mln::image2d<bool>& whitespace_seps() const; const component_set<L>& whitespace_seps_comps() const; void set_whitespace_separators(const image2d<bool>& whitespace_seps); - bool has_line_seps() const; - const mln::image2d<bool>& line_seps() const; - const component_set<L>& line_seps_comps() const; - void set_line_separators(const image2d<bool>& line_seps); + // Horizontal separators + bool has_hline_seps() const; + const mln::image2d<bool>& hline_seps() const; + const component_set<L>& hline_seps_comps() const; + void set_hline_separators(const image2d<bool>& line_seps); + + // Vertical separators + bool has_vline_seps() const; + const mln::image2d<bool>& vline_seps() const; + const component_set<L>& vline_seps_comps() const; + void set_vline_separators(const image2d<bool>& line_seps); + + const mln::image2d<value::rgb8>& image() const; + void set_image(const mln::image2d<value::rgb8>& image); + + const mln::image2d<bool>& binary_image() const; + void set_binary_image(const mln::image2d<bool>& binary_image); private: std::string filename_; mln::image2d<mln::value::rgb8> image_; + mln::image2d<bool> binary_image_; paragraph_set<L> parset_; component_set<L> elements_; @@ -109,8 +120,11 @@ namespace scribo mln::image2d<bool> whitespace_seps_; component_set<L> whitespace_seps_comps_; - mln::image2d<bool> line_seps_; - component_set<L> line_seps_comps_; + mln::image2d<bool> hline_seps_; + component_set<L> hline_seps_comps_; + + mln::image2d<bool> vline_seps_; + component_set<L> vline_seps_comps_; }; @@ -251,97 +265,148 @@ namespace scribo elements_ = elements; } + template <typename L> + bool + document<L>::has_whitespace_seps() const + { + return whitespace_seps_.is_valid(); + } + template <typename L> - const mln::image2d<value::rgb8>& - document<L>::image() const + const mln::image2d<bool>& + document<L>::whitespace_seps() const { - return image_; + return whitespace_seps_; + } + + + template <typename L> + const component_set<L>& + document<L>::whitespace_seps_comps() const + { + return whitespace_seps_comps_; } template <typename L> void - document<L>::set_image(const mln::image2d<value::rgb8>& image) + document<L>::set_whitespace_separators(const image2d<bool>& whitespace_seps) { - image_ = image; + whitespace_seps_ = whitespace_seps; + + mln_value(L) ncomps; + whitespace_seps_comps_ = primitive::extract::components(whitespace_seps, + mln::c8(), ncomps, + component::WhitespaceSeparator); } template <typename L> bool - document<L>::has_whitespace_seps() const + document<L>::has_hline_seps() const { - return whitespace_seps_.is_valid(); + return hline_seps_.is_valid(); } template <typename L> const mln::image2d<bool>& - document<L>::whitespace_seps() const + document<L>::hline_seps() const { - return whitespace_seps_; + return hline_seps_; } template <typename L> const component_set<L>& - document<L>::whitespace_seps_comps() const + document<L>::hline_seps_comps() const { - return whitespace_seps_comps_; + return hline_seps_comps_; } template <typename L> void - document<L>::set_whitespace_separators(const image2d<bool>& whitespace_seps) + document<L>::set_hline_separators(const image2d<bool>& hline_seps) { - whitespace_seps_ = whitespace_seps; + hline_seps_ = hline_seps; mln_value(L) ncomps; - whitespace_seps_comps_ = primitive::extract::components(whitespace_seps, - mln::c8(), ncomps, - component::WhitespaceSeparator); + hline_seps_comps_ = primitive::extract::components(hline_seps, + mln::c8(), ncomps, + component::LineSeparator); } template <typename L> bool - document<L>::has_line_seps() const + document<L>::has_vline_seps() const { - return line_seps_.is_valid(); + return vline_seps_.is_valid(); } template <typename L> const mln::image2d<bool>& - document<L>::line_seps() const + document<L>::vline_seps() const { - return line_seps_; + return vline_seps_; } template <typename L> const component_set<L>& - document<L>::line_seps_comps() const + document<L>::vline_seps_comps() const { - return line_seps_comps_; + return vline_seps_comps_; } template <typename L> void - document<L>::set_line_separators(const image2d<bool>& line_seps) + document<L>::set_vline_separators(const image2d<bool>& vline_seps) { - line_seps_ = line_seps; + vline_seps_ = vline_seps; mln_value(L) ncomps; - line_seps_comps_ = primitive::extract::components(line_seps, - mln::c8(), ncomps, - component::LineSeparator); + vline_seps_comps_ = primitive::extract::components(vline_seps, + mln::c8(), ncomps, + component::LineSeparator); + } + + + template <typename L> + const mln::image2d<value::rgb8>& + document<L>::image() const + { + return image_; } + template <typename L> + void + document<L>::set_image(const mln::image2d<value::rgb8>& image) + { + image_ = image; + } + + + template <typename L> + const mln::image2d<bool>& + document<L>::binary_image() const + { + return binary_image_; + } + + + template <typename L> + void + document<L>::set_binary_image(const mln::image2d<bool>& binary_image) + { + binary_image_ = binary_image; + } + # endif // ! MLN_INCLUDE_ONLY diff --git a/scribo/scribo/io/xml/internal/extended_page_xml_visitor.hh b/scribo/scribo/io/xml/internal/extended_page_xml_visitor.hh index f573d88..51e7ad3 100644 --- a/scribo/scribo/io/xml/internal/extended_page_xml_visitor.hh +++ b/scribo/scribo/io/xml/internal/extended_page_xml_visitor.hh @@ -114,8 +114,10 @@ namespace scribo doc.elements().accept(*this); // line seraparators - if (doc.has_line_seps()) - doc.line_seps_comps().accept(*this); + if (doc.has_vline_seps()) + doc.vline_seps_comps().accept(*this); + if (doc.has_hline_seps()) + doc.hline_seps_comps().accept(*this); // Whitespace seraparators if (doc.has_whitespace_seps()) diff --git a/scribo/scribo/io/xml/internal/full_xml_visitor.hh b/scribo/scribo/io/xml/internal/full_xml_visitor.hh index 701c1b7..d466b34 100644 --- a/scribo/scribo/io/xml/internal/full_xml_visitor.hh +++ b/scribo/scribo/io/xml/internal/full_xml_visitor.hh @@ -164,14 +164,12 @@ namespace scribo // line seraparators - if (doc.has_line_seps()) - { - const component_set<L>& - line_seps_comps = doc.line_seps_comps(); - - for_all_comps(c, line_seps_comps) - line_seps_comps(c).accept(*this); - } + if (doc.has_hline_seps()) + for_all_comps(c, doc.hline_seps_comps()) + doc.hline_seps_comps()(c).accept(*this); + if (doc.has_vline_seps()) + for_all_comps(c, doc.vline_seps_comps()) + doc.vline_seps_comps()(c).accept(*this); // Whitespace seraparators diff --git a/scribo/scribo/io/xml/internal/page_xml_visitor.hh b/scribo/scribo/io/xml/internal/page_xml_visitor.hh index 52d8f12..9dec1e7 100644 --- a/scribo/scribo/io/xml/internal/page_xml_visitor.hh +++ b/scribo/scribo/io/xml/internal/page_xml_visitor.hh @@ -116,8 +116,10 @@ namespace scribo doc.elements().accept(*this); // line seraparators - if (doc.has_line_seps()) - doc.line_seps_comps().accept(*this); + if (doc.has_vline_seps()) + doc.vline_seps_comps().accept(*this); + if (doc.has_hline_seps()) + doc.hline_seps_comps().accept(*this); output << " </page>" << std::endl; output << "</pcGts>" << std::endl; diff --git a/scribo/scribo/toolchain/internal/content_in_doc_functor.hh b/scribo/scribo/toolchain/internal/content_in_doc_functor.hh index 40f013c..3e6668f 100644 --- a/scribo/scribo/toolchain/internal/content_in_doc_functor.hh +++ b/scribo/scribo/toolchain/internal/content_in_doc_functor.hh @@ -162,6 +162,7 @@ namespace scribo mln_precondition(exact(processed_image).is_valid()); doc.set_image(exact(original_image)); + doc.set_binary_image(exact(processed_image)); // Remove separators mln_ch_value(I,bool) @@ -169,17 +170,30 @@ namespace scribo input_cleaned = exact(processed_image); if (enable_line_seps) { + // FIXME: SLOW on_new_progress_label("Find vertical and horizontal separators..."); // Vertical and horizontal separators - separators = primitive::extract::separators(processed_image, 81); + { + mln_ch_value(I,bool) + vseparators = primitive::extract::vertical_separators(processed_image, 81), + hseparators = primitive::extract::horizontal_separators(processed_image, 81); + + doc.set_vline_separators(vseparators); + doc.set_hline_separators(hseparators); + + separators = vseparators; + separators += hseparators; + + border::resize(processed_image, border::thickness); + } on_progress(); on_new_progress_label("Remove separators..."); - input_cleaned = primitive::remove::separators(processed_image, separators); - doc.set_line_separators(separators); + input_cleaned = primitive::remove::separators(processed_image, + separators); on_progress(); } -- 1.5.6.5