
* scribo/core/component_features_data.hh, * scribo/core/component_info.hh, * scribo/core/component_set.hh, * scribo/core/document.hh, * scribo/core/line_links.hh, * scribo/core/paragraph_info.hh, * scribo/core/paragraph_set.hh: Add operator==(). * scribo/io/xml/internal/full_xml_visitor.hh: Save more data for groups, separators and paragraphs. * scribo/io/xml/internal/save_image_to_xml.hh: New. * scribo/io/xml/load.hh: Load new saved data. * tests/unit_test/cond_tests_qt: Add save_image_to_xml.hh. --- scribo/ChangeLog | 22 +++ scribo/scribo/core/component_features_data.hh | 16 ++ scribo/scribo/core/component_info.hh | 17 ++ scribo/scribo/core/component_set.hh | 36 +++-- scribo/scribo/core/document.hh | 68 +++++++- scribo/scribo/core/line_links.hh | 12 ++ scribo/scribo/core/paragraph_info.hh | 19 +++ scribo/scribo/core/paragraph_set.hh | 21 +++ scribo/scribo/io/xml/internal/full_xml_visitor.hh | 118 +++++++++------ ...{print_image_coords.hh => save_image_to_xml.hh} | 70 +++++---- scribo/scribo/io/xml/load.hh | 164 ++++++++++++++++++-- scribo/tests/unit_test/cond_tests_qt | 1 + 12 files changed, 456 insertions(+), 108 deletions(-) copy scribo/scribo/io/xml/internal/{print_image_coords.hh => save_image_to_xml.hh} (54%) diff --git a/scribo/ChangeLog b/scribo/ChangeLog index 637fca8..350b536 100644 --- a/scribo/ChangeLog +++ b/scribo/ChangeLog @@ -1,5 +1,27 @@ 2011-05-03 Guillaume Lazzara <lazzara@lrde.epita.fr> + Several improvements related to low-level data structures in XML + output. + + * scribo/core/component_features_data.hh, + * scribo/core/component_info.hh, + * scribo/core/component_set.hh, + * scribo/core/document.hh, + * scribo/core/line_links.hh, + * scribo/core/paragraph_info.hh, + * scribo/core/paragraph_set.hh: Add operator==(). + + * scribo/io/xml/internal/full_xml_visitor.hh: Save more data for + groups, separators and paragraphs. + + * scribo/io/xml/internal/save_image_to_xml.hh: New. + + * scribo/io/xml/load.hh: Load new saved data. + + * tests/unit_test/cond_tests_qt: Add save_image_to_xml.hh. + +2011-05-03 Guillaume Lazzara <lazzara@lrde.epita.fr> + * scribo/toolchain/internal/content_in_doc_functor.hh: Make use of component::extract::alignments. diff --git a/scribo/scribo/core/component_features_data.hh b/scribo/scribo/core/component_features_data.hh index 07b3e4a..b0a4e47 100644 --- a/scribo/scribo/core/component_features_data.hh +++ b/scribo/scribo/core/component_features_data.hh @@ -49,9 +49,14 @@ namespace scribo std::ostream& operator<<(std::ostream& ostr, const component_features_data& data); + bool + operator==(const component_features_data& lhs, + const component_features_data& rhs); + # ifndef MLN_INCLUDE_ONLY + inline component_features_data::component_features_data() : valid(false) { @@ -69,6 +74,17 @@ namespace scribo << "]" << std::endl; } + + bool + operator==(const component_features_data& lhs, + const component_features_data& rhs) + { + return + lhs.valid == rhs.valid + && lhs.color == rhs.color + && lhs.boldness == rhs.boldness; + } + # endif // ! MLN_INCLUDE_ONLY } // end of namespace scribo diff --git a/scribo/scribo/core/component_info.hh b/scribo/scribo/core/component_info.hh index 164a242..1f94076 100644 --- a/scribo/scribo/core/component_info.hh +++ b/scribo/scribo/core/component_info.hh @@ -97,6 +97,9 @@ namespace scribo std::ostream& operator<<(std::ostream& ostr, const component_info& info); + bool + operator==(const component_info& lhs, const component_info& rhs); + # ifndef MLN_INCLUDE_ONLY @@ -230,6 +233,20 @@ namespace scribo << ")" << std::endl; } + inline + bool + operator==(const component_info& lhs, const component_info& rhs) + { + + return + lhs.id() == rhs.id() + && lhs.bbox() == rhs.bbox() + && lhs.mass_center() == rhs.mass_center() + && lhs.card() == rhs.card() + && lhs.features() == rhs.features() + && lhs.tag() == rhs.tag() + && lhs.type() == rhs.type(); + } # endif // ! MLN_INCLUDE_ONLY diff --git a/scribo/scribo/core/component_set.hh b/scribo/scribo/core/component_set.hh index d729802..3587302 100644 --- a/scribo/scribo/core/component_set.hh +++ b/scribo/scribo/core/component_set.hh @@ -459,22 +459,6 @@ namespace scribo return data_->infos_[id]; } -// template <typename L> -// inline -// const component_info& -// component_set<L>::operator()(const mln_value(L)& id) const -// { -// return data_->infos_[id]; -// } - -// template <typename L> -// inline -// component_info& -// component_set<L>::operator()(const mln_value(L)& id) -// { -// return data_->infos_[id]; -// } - template <typename L> inline const component_info& @@ -650,7 +634,25 @@ namespace scribo bool operator==(const component_set<L>& lhs, const component_set<L>& rhs) { - return lhs.id_() == rhs.id_(); + if (! (lhs.labeled_image() == rhs.labeled_image())) + std::cout << "comp.lbl" << std::endl; + + if (! (lhs.separators() == rhs.separators())) + std::cout << "comp.seps" << std::endl; + + if (! (lhs.nelements() == rhs.nelements() + && lhs.labeled_image() == rhs.labeled_image() + && lhs.separators() == rhs.separators())) + return false; + + for_all_comps(c, lhs) + if (! (lhs(c) == rhs(c))) + { + std::cout << "comp.info" << std::endl; + return false; + } + + return true; } template <typename L> diff --git a/scribo/scribo/core/document.hh b/scribo/scribo/core/document.hh index f38b20b..e287c1d 100644 --- a/scribo/scribo/core/document.hh +++ b/scribo/scribo/core/document.hh @@ -88,19 +88,27 @@ namespace scribo bool has_whitespace_seps() const; const mln::image2d<bool>& whitespace_seps() const; const component_set<L>& whitespace_seps_comps() const; - void set_whitespace_separators(const image2d<bool>& whitespace_seps); + void set_whitespace_separators(const image2d<bool>& whitespace_seps, + const component_set<L>& whitespace_seps_comps); // Horizontal separators bool has_hline_seps() const; const mln::image2d<bool>& hline_seps() const; const component_set<L>& hline_seps_comps() const; + // Set vline separators image. The component is automatically computed. void set_hline_separators(const image2d<bool>& line_seps); + void set_hline_separators(const image2d<bool>& line_seps, + const component_set<L>& hline_seps_comps); // Vertical separators bool has_vline_seps() const; const mln::image2d<bool>& vline_seps() const; const component_set<L>& vline_seps_comps() const; - void set_vline_separators(const image2d<bool>& line_seps); + + // Set vline separators image. The component is automatically computed. + void set_vline_separators(const image2d<bool>& vline_seps); + void set_vline_separators(const image2d<bool>& vline_seps, + const component_set<L>& vline_seps_comps); const mln::image2d<value::rgb8>& image() const; void set_image(const mln::image2d<value::rgb8>& image); @@ -128,6 +136,9 @@ namespace scribo }; + template <typename L> + bool operator==(const document<L>& lhs, const document<L>& rhs); + # ifndef MLN_INCLUDE_ONLY @@ -291,14 +302,11 @@ namespace scribo template <typename L> void - document<L>::set_whitespace_separators(const image2d<bool>& whitespace_seps) + document<L>::set_whitespace_separators(const image2d<bool>& whitespace_seps, + const component_set<L>& whitespace_seps_comps) { whitespace_seps_ = whitespace_seps; - - mln_value(L) ncomps; - whitespace_seps_comps_ = primitive::extract::components(whitespace_seps, - mln::c8(), ncomps, - component::WhitespaceSeparator); + whitespace_seps_comps_ = whitespace_seps_comps; } @@ -340,6 +348,16 @@ namespace scribo template <typename L> + void + document<L>::set_hline_separators(const image2d<bool>& hline_seps, + const component_set<L>& hline_seps_comps) + { + hline_seps_ = hline_seps; + hline_seps_comps_ = hline_seps_comps; + } + + + template <typename L> bool document<L>::has_vline_seps() const { @@ -377,6 +395,16 @@ namespace scribo template <typename L> + void + document<L>::set_vline_separators(const image2d<bool>& vline_seps, + const component_set<L>& vline_seps_comps) + { + vline_seps_ = vline_seps; + vline_seps_comps_ = vline_seps_comps; + } + + + template <typename L> const mln::image2d<value::rgb8>& document<L>::image() const { @@ -408,6 +436,30 @@ namespace scribo } + template <typename L> + bool operator==(const document<L>& lhs, const document<L>& rhs) + { + + + return + lhs.filename() == rhs.filename() + && lhs.image() == rhs.image() + && lhs.binary_image() == rhs.binary_image() + && lhs.has_text() == rhs.has_text() + && lhs.paragraphs() == rhs.paragraphs() + && lhs.has_elements() == rhs.has_elements() + && lhs.elements() == rhs.elements() + && lhs.has_whitespace_seps() == rhs.has_whitespace_seps() + && lhs.whitespace_seps() == rhs.whitespace_seps() + && lhs.whitespace_seps_comps() == rhs.whitespace_seps_comps() + && lhs.has_hline_seps() == rhs.has_hline_seps() + && lhs.hline_seps() == rhs.hline_seps() + && lhs.hline_seps_comps() == rhs.hline_seps_comps() + && lhs.has_vline_seps() == rhs.has_vline_seps() + && lhs.vline_seps() == rhs.vline_seps() + && lhs.vline_seps_comps() == rhs.vline_seps_comps(); + } + # endif // ! MLN_INCLUDE_ONLY diff --git a/scribo/scribo/core/line_links.hh b/scribo/scribo/core/line_links.hh index ab36a73..b7b438c 100644 --- a/scribo/scribo/core/line_links.hh +++ b/scribo/scribo/core/line_links.hh @@ -103,6 +103,10 @@ namespace scribo std::ostream& operator<<(std::ostream& ostr, const line_links<L>& links); + template <typename L> + bool + operator==(const line_links<L>& lhs, const line_links<L>& rhs); + # ifndef MLN_INCLUDE_ONLY @@ -254,6 +258,14 @@ namespace scribo } + template <typename L> + bool + operator==(const line_links<L>& lhs, const line_links<L>& rhs) + { + return lhs.lines() == rhs.lines() + && lhs.line_to_link() == rhs.line_to_link(); + } + # endif // ! MLN_INCLUDE_ONLY diff --git a/scribo/scribo/core/paragraph_info.hh b/scribo/scribo/core/paragraph_info.hh index 557ded7..52068a7 100644 --- a/scribo/scribo/core/paragraph_info.hh +++ b/scribo/scribo/core/paragraph_info.hh @@ -89,6 +89,8 @@ namespace scribo template <typename L> std::ostream& operator<<(std::ostream& ostr, const paragraph_info<L>& info); + template <typename L> + bool operator==(const paragraph_info<L>& lhs, const paragraph_info<L>& rhs); # ifndef MLN_INCLUDE_ONLY @@ -248,6 +250,22 @@ namespace scribo } template <typename L> + bool + operator==(const paragraph_info<L>& lhs, const paragraph_info<L>& rhs) + { + + + + return + lhs.line_ids() == rhs.line_ids() + && lhs.bbox() == rhs.bbox() + && lhs.llinks() == rhs.llinks() + && lhs.color() == rhs.color() + && lhs.color_reliability() == rhs.color_reliability() + && lhs.needs_stats_update() == rhs.needs_stats_update(); + } + + template <typename L> std::ostream& operator<<(std::ostream& ostr, const paragraph_info<L>& info) { @@ -259,6 +277,7 @@ namespace scribo << ")" << std::endl; } + # endif // ! MLN_INCLUDE_ONLY } // end of namespace scribo diff --git a/scribo/scribo/core/paragraph_set.hh b/scribo/scribo/core/paragraph_set.hh index ec9f51b..242501d 100644 --- a/scribo/scribo/core/paragraph_set.hh +++ b/scribo/scribo/core/paragraph_set.hh @@ -86,6 +86,8 @@ namespace scribo }; + template <typename L> + bool operator==(const paragraph_set<L>& lhs, const paragraph_set<L>& rhs); namespace make { @@ -200,6 +202,25 @@ namespace scribo } + template <typename L> + bool operator==(const paragraph_set<L>& lhs, const paragraph_set<L>& rhs) + { + if (! (lhs.lines() == rhs.lines() && lhs.nelements() == rhs.nelements())) + { + return false; + } + + for_all_paragraphs(p, lhs) + if (!(lhs(p) == rhs(p))) + { + std::cout << "paragraph.info" << std::endl; + return false; + } + + return true; + } + + namespace make { diff --git a/scribo/scribo/io/xml/internal/full_xml_visitor.hh b/scribo/scribo/io/xml/internal/full_xml_visitor.hh index bba7691..c294bbc 100644 --- a/scribo/scribo/io/xml/internal/full_xml_visitor.hh +++ b/scribo/scribo/io/xml/internal/full_xml_visitor.hh @@ -43,13 +43,10 @@ # include <scribo/core/line_links.hh> # include <scribo/core/line_info.hh> +# include <scribo/io/xml/internal/save_image_to_xml.hh> # include <scribo/io/xml/internal/print_box_coords.hh> # include <scribo/io/xml/internal/print_page_preambule.hh> -// Compression level 0-9. 9 is the best but is slow. -// 5 seems to be a good compromise. -# define COMPRESSION_LEVEL 5 - namespace scribo { @@ -157,20 +154,46 @@ namespace scribo if (doc.has_elements()) { const component_set<L>& elts = doc.elements(); + + output << " <elements>" << std::endl; + elts.accept(*this); + for_all_comps(e, elts) if (elts(e).is_valid()) elts(e).accept(*this); + + output << " </elements>" << std::endl; } // line seraparators if (doc.has_hline_seps()) + { + output << " <hlines_separators>" << std::endl; + doc.hline_seps_comps().accept(*this); + for_all_comps(c, doc.hline_seps_comps()) doc.hline_seps_comps()(c).accept(*this); + + save_image_to_xml(output, doc.hline_seps(), + "hlines_separators_image"); + + output << " </hlines_separators>" << std::endl; + } if (doc.has_vline_seps()) + { + output << " <vlines_separators>" << std::endl; + doc.vline_seps_comps().accept(*this); + for_all_comps(c, doc.vline_seps_comps()) doc.vline_seps_comps()(c).accept(*this); + save_image_to_xml(output, doc.vline_seps(), + "vlines_separators_image"); + + output << " </vlines_separators>" << std::endl; + } + // Whitespace seraparators if (doc.has_whitespace_seps()) @@ -178,8 +201,16 @@ namespace scribo const component_set<L>& whitespace_seps_comps = doc.whitespace_seps_comps(); + output << " <whitespaces_delimitors>" << std::endl; + whitespace_seps_comps.accept(*this); + for_all_comps(c, whitespace_seps_comps) whitespace_seps_comps(c).accept(*this); + + save_image_to_xml(output, doc.whitespace_seps(), + "whitespaces_delimitors_image"); + + output << " </whitespaces_delimitors>" << std::endl; } output << " </page>" << std::endl; @@ -212,13 +243,26 @@ namespace scribo void full_xml_visitor::visit(const object_groups<L>& groups) const { - output << " <object_groups>" << std::endl; + output << " <object_groups ngroups=\"" << groups.nelements() + << "\">" << std::endl; + for_all_groups(g, groups) { - output << " <group " - << " object_id=\"" << g - << "\" group_id=\"" << groups(g) - << "\"/>" << std::endl; + output << " <group id=\"" << groups(g).id() + << "\" valid=\"" << groups(g).is_valid() + << "\" pixel_area=\"" << groups(g).pixel_area() + << "\" pmin_x=\"" << groups(g).bbox().pmin().row() + << "\" pmin_y=\"" << groups(g).bbox().pmin().col() + << "\" pmax_x=\"" << groups(g).bbox().pmax().row() + << "\" pmax_y=\"" << groups(g).bbox().pmax().col() + << "\">" << std::endl; + + for_all_elements(e, groups(g).component_ids()) + output << " <group_member comp_id=\"" + << groups(g).component_ids()(e) + << "\"/>" << std::endl; + + output << " </group>" << std::endl; } output << " </object_groups>" << std::endl; } @@ -263,51 +307,36 @@ namespace scribo << "\" pmin_x=\"" << comp_set(c).bbox().pmin().col() << "\" pmin_y=\"" << comp_set(c).bbox().pmin().row() << "\" pmax_x=\"" << comp_set(c).bbox().pmax().col() - << "\" pmax_y=\"" << comp_set(c).bbox().pmax().row() - << "\"/>" << std::endl; + << "\" pmax_y=\"" << comp_set(c).bbox().pmax().row(); + + if (comp_set(c).has_features()) + { + output << "\">" << std::endl; + + output << " <component_features" + << " valid=\"" << comp_set(c).features().valid + << "\" color=\"" << comp_set(c).features().color + << "\" boldness=\"" << comp_set(c).features().boldness + << "\"/>" << std::endl; + + output << " </component_info>" << std::endl; + } + else + output << "\"/>" << std::endl; } // Save labeled image { const L& lbl = comp_set.labeled_image(); - output << "<labeled_image " - << " height=\"" << lbl.domain().height() - << "\" width=\"" << lbl.domain().width() << "\">" - << "<![CDATA["; - - // FIXME: Try to avoid that! - border::resize(lbl, 0); - QByteArray - lbl64 = QByteArray::fromRawData((const char *)lbl.buffer(), - lbl.nelements() * sizeof(mln_value(L))); - lbl64 = qCompress(lbl64, COMPRESSION_LEVEL); - lbl64 = lbl64.toBase64(); - - output.write(lbl64.data(), lbl64.size()); - - output << "]]></labeled_image>" << std::endl; + save_image_to_xml(output, lbl, "labeled_image"); } // Save separators image if (comp_set.has_separators()) { const mln_ch_value(L,bool)& seps = comp_set.separators(); - output << "<separators_image " - << " height=\"" << seps.domain().height() - << "\" width=\"" << seps.domain().width() << "\">" - << "<![CDATA["; - - border::resize(seps, 0); - QByteArray - seps64 = QByteArray::fromRawData((const char *)seps.buffer(), - seps.nelements() * sizeof(bool)); - seps64 = qCompress(seps64, COMPRESSION_LEVEL); - seps64 = seps64.toBase64(); - - output.write(seps64.data(), seps64.size()); - - output << "]]></separators_image>" << std::endl; + save_image_to_xml(output, seps, "separators_image"); } output << "</component_set>" << std::endl; @@ -394,7 +423,9 @@ namespace scribo << "\" x_height=\"" << lines(fid).x_height() << "\" d_height=\"" << lines(fid).d_height() << "\" a_height=\"" << lines(fid).a_height() - << "\" char_width=\"" << lines(fid).char_width(); + << "\" char_width=\"" << lines(fid).char_width() + << "\" color=\"" << parset(p).color() + << "\" color_reliability=\"" << parset(p).color_reliability(); // End of EXTENSIONS output << "\">" << std::endl; @@ -469,6 +500,5 @@ namespace scribo } // end of namespace scribo -# undef COMPRESSION_LEVEL #endif // SCRIBO_IO_XML_INTERNAL_FULL_XML_VISITOR_HH diff --git a/scribo/scribo/io/xml/internal/print_image_coords.hh b/scribo/scribo/io/xml/internal/save_image_to_xml.hh similarity index 54% copy from scribo/scribo/io/xml/internal/print_image_coords.hh copy to scribo/scribo/io/xml/internal/save_image_to_xml.hh index ebfe402..3f38337 100644 --- a/scribo/scribo/io/xml/internal/print_image_coords.hh +++ b/scribo/scribo/io/xml/internal/save_image_to_xml.hh @@ -23,15 +23,24 @@ // exception does not however invalidate any other reasons why the // executable file might be covered by the GNU General Public License. -#ifndef SCRIBO_IO_XML_INTERNAL_PRINT_IMAGE_COORDS_HH -# define SCRIBO_IO_XML_INTERNAL_PRINT_IMAGE_COORDS_HH +#ifndef SCRIBO_IO_XML_INTERNAL_SAVE_IMAGE_TO_XML_HH +# define SCRIBO_IO_XML_INTERNAL_SAVE_IMAGE_TO_XML_HH /// \file /// -/// \brief Prints box2d coordinates to XML data. +/// Save an image as XML data. -# include <fstream> -# include <mln/core/concept/site_set.hh> +# include <iostream> + +# include <QtXml> + +# include <mln/core/concept/image.hh> +# include <mln/border/resize.hh> + + +// Compression level 0-9. 9 is the best but is slow. +// 5 seems to be a good compromise. +# define COMPRESSION_LEVEL 5 namespace scribo { @@ -44,42 +53,43 @@ namespace scribo namespace internal { - using namespace mln; - /*! \brief Prints box2d coordinates to XML data. - */ - template <typename S> + template <typename I> void - print_image_coords(std::ofstream& ostr, const mln::Site_Set<S>& b, - const char *space); - + save_image_to_xml(std::ostream& output, const Image<I>& ima, + const char *qname); # ifndef MLN_INCLUDE_ONLY - - template <typename S> + template <typename I> void - print_image_coords(std::ofstream& ostr, const mln::Site_Set<S>& b_, - const char *space) + save_image_to_xml(std::ostream& output, const Image<I>& ima_, + const char *qname) { - std::string sc = space; - std::string sp = sc + " "; + trace::entering("scribo::io::xml::internal::save_image"); - const S& b = exact(b_); - mln_precondition(b.is_valid()); + mln_precondition(exact(ima_).is_valid()); + const I& ima = exact(ima_); - ostr << sc << "<coords>" << std::endl; + output << "<" << qname + << " height=\"" << ima.domain().height() + << "\" width=\"" << ima.domain().width() << "\">" + << "<![CDATA["; - mln_piter(S) p(b); - for_all(p) - ostr << sp << "<point x=\"" << p.col() - << "\" y=\"" << p.row() << "\"/>" - << std::endl; + mln::border::resize(ima, 0); + QByteArray + seps64 = QByteArray::fromRawData((const char *)ima.buffer(), + ima.nelements() * sizeof(mln_value(I))); + seps64 = qCompress(seps64, COMPRESSION_LEVEL); + seps64 = seps64.toBase64(); - ostr << sc << "</coords>" << std::endl; - } + output.write(seps64.data(), seps64.size()); + output << "]]></" << qname << ">" << std::endl; + + trace::exiting("scribo::io::xml::internal::save_image"); + } # endif // ! MLN_INCLUDE_ONLY @@ -91,4 +101,6 @@ namespace scribo } // end of namespace scribo -#endif // ! SCRIBO_IO_XML_INTERNAL_PRINT_IMAGE_COORDS_HH +# undef COMPRESSION_LEVEL + +#endif // ! SCRIBO_IO_XML_INTERNAL_SAVE_IMAGE_TO_XML_HH diff --git a/scribo/scribo/io/xml/load.hh b/scribo/scribo/io/xml/load.hh index 8042c75..8d89085 100644 --- a/scribo/scribo/io/xml/load.hh +++ b/scribo/scribo/io/xml/load.hh @@ -88,6 +88,8 @@ namespace scribo None, ComponentSet, ComponentInfo, + ComponentFeatures, + Elements, LabeledImage, SeparatorsImage, ObjectLinks, @@ -95,6 +97,7 @@ namespace scribo Point, Link, Group, + GroupMember, Line, LineLinks, LineLink, @@ -102,7 +105,13 @@ namespace scribo TextRegion, CompIdList, CompId, - Page + Page, + WhitespacesDelimitors, + HLineSeparators, + VLineSeparators, + WhitespacesDelimitorsImage, + HLineSeparatorsImage, + VLineSeparatorsImage, }; @@ -116,6 +125,8 @@ namespace scribo static const ModeData mode_data[] = { { "component_set", ComponentSet }, { "component_info", ComponentInfo }, + { "component_features", ComponentFeatures }, + { "elements", Elements }, { "labeled_image", LabeledImage }, { "separators_image", SeparatorsImage }, { "object_links", ObjectLinks }, @@ -123,6 +134,7 @@ namespace scribo { "point", Point }, { "link", Link }, { "group", Group }, + { "group_member", GroupMember }, { "line", Line }, { "line_links", LineLinks }, { "line_link", LineLink }, @@ -131,10 +143,34 @@ namespace scribo { "compid_list", CompIdList }, { "compid", CompId }, { "page", Page }, + { "whitespaces_delimitors", WhitespacesDelimitors }, + { "hlines_separators", HLineSeparators }, + { "vlines_separators", VLineSeparators }, + { "whitespaces_delimitors_image", WhitespacesDelimitorsImage }, + { "hlines_separators_image", HLineSeparatorsImage }, + { "vlines_separators_image", VLineSeparatorsImage }, { 0, None } }; + namespace internal + { + + value::rgb8 parse_color(const QString& color_str) + { + QString color = color_str; + color.chop(1); + color = color.remove(0, 1); + QStringList rgb = color.split(','); + + return + value::rgb8(rgb.at(0).toInt(), + rgb.at(1).toInt(), + rgb.at(2).toInt()); + } + + } + template <typename L> class xml_handler : public QXmlDefaultHandler { @@ -142,7 +178,8 @@ namespace scribo typedef mln_ch_value(L,bool) B; public: - xml_handler() : current_paragraph_id(1) { lines_data.append(line_info<L>()); } // line info id starts from 1. + xml_handler(document<L>& doc_) : current_paragraph_id(1), doc(doc_) + { lines_data.append(line_info<L>()); } // line info id starts from 1. virtual bool @@ -185,6 +222,20 @@ namespace scribo break; + case ComponentFeatures: + { + if (atts.value("valid").toInt()) + { + component_features_data comp_features; + comp_features.valid = true; + comp_features.color = internal::parse_color(atts.value("color")); + comp_features.boldness = atts.value("boldness").toFloat(); + + comp_set_data->infos_.last().update_features(comp_features); + } + } + break; + // Object links case ObjectLinks: { @@ -197,8 +248,9 @@ namespace scribo // Object groups case ObjectGroups: { - // qDebug() << "object_groups created"; - groups = object_groups<L>(links); + //qDebug() << "Processing object_groups"; + group_info_.reserve(atts.value("ngroups").toInt()); + group_info_.resize(1); } break; @@ -225,6 +277,8 @@ namespace scribo // qDebug() << "TextRegion"; current_paragraph = paragraph_info<L>(llinks); + current_paragraph.set_color_(internal::parse_color(atts.value("color"))); + current_paragraph.set_color_reliability_(atts.value("color_reliability").toFloat()); } break; @@ -257,6 +311,7 @@ namespace scribo line_data->a_height_ = atts.value("a_height").toInt(); line_data->char_space_ = atts.value("kerning").toInt(); line_data->char_width_ = atts.value("char_width").toInt(); + line_data->char_width_ = atts.value("char_width").toInt(); line_data->word_space_ = 0; line_data->reading_direction_ = line::LeftToRight; @@ -266,6 +321,13 @@ namespace scribo line_data->reading_orientation_ = atts.value("txt_reading_orientation").toInt(); line_data->indented_ = (atts.value("txt_indented") == "false" ? false : true); + + line_data->boldness_ = atts.value("boldness").toFloat(); + line_data->boldness_reliability_ = atts.value("boldness_reliability").toFloat(); + line_data->color_ = internal::parse_color(atts.value("color")); + + line_data->color_reliability_ = atts.value("color_reliability").toFloat(); + bbox.init(); } break; @@ -282,7 +344,7 @@ namespace scribo // CompId case CompId: { - line_data->components_.append(atts.value("value").toInt()); + line_data->component_ids_.append(atts.value("value").toInt()); } break; @@ -316,10 +378,21 @@ namespace scribo break; + // Separators/delimitor images + case WhitespacesDelimitorsImage: + case HLineSeparatorsImage: + case VLineSeparatorsImage: + { + width = atts.value("width").toInt(); + height = atts.value("height").toInt(); + seps = B(mln::make::box2d(height, width), 0); // No border + } + break; + // Link case Link: { - links(atts.value("from").toInt()) = atts.value("to").toInt(); + links.update(atts.value("from").toInt(), atts.value("to").toInt()); } break; @@ -327,7 +400,20 @@ namespace scribo // Group case Group: { - groups(atts.value("object_id").toInt()) = atts.value("group_id").toInt(); + group_info_.append(group_info(atts.value("id").toInt(), + atts.value("pixel_area").toInt(), + mln::make::box2d(atts.value("pmin_x").toInt(), + atts.value("pmin_y").toInt(), + atts.value("pmax_x").toInt(), + atts.value("pmax_y").toInt()), + atts.value("valid").toInt())); + } + break; + + // GroupMember + case GroupMember: + { + component_ids.append(atts.value("comp_id").toInt()); } break; @@ -352,6 +438,7 @@ namespace scribo { // qDebug() << "Component set done"; components = component_set<L>(comp_set_data); + } break; @@ -382,6 +469,46 @@ namespace scribo // qDebug() << "Page done"; lines.update_line_data_(lines_data); parset = paragraph_set<L>(par_data); + doc.set_paragraphs(parset); + } + break; + + // ObjectGroups + case ObjectGroups: + { + groups = object_groups<L>(links, group_info_); + } + break; + + // Group + case Group: + { + group_info_.last().component_ids_() = component_ids; + component_ids.clear(); + } + break; + + case Elements: + { + doc.set_elements(components); + } + break; + + case WhitespacesDelimitors: + { + doc.set_whitespace_separators(seps, components); + } + break; + + case HLineSeparators: + { + doc.set_hline_separators(seps, components); + } + break; + + case VLineSeparators: + { + doc.set_vline_separators(seps, components); } break; @@ -419,6 +546,17 @@ namespace scribo } break; + case WhitespacesDelimitorsImage: + case HLineSeparatorsImage: + case VLineSeparatorsImage: + { + QByteArray data = ch.toAscii(); + data = QByteArray::fromBase64(data); + data = qUncompress(data); + memcpy((char *) seps.buffer(), data.data(), data.size()); + } + break; + default: ; } @@ -453,7 +591,10 @@ namespace scribo component_set<L> components; object_links<L> links; + object_groups<L> groups; + mln::util::array<component_id_t> component_ids; + mln::util::array<group_info> group_info_; // Lines unsigned current_line_id; @@ -468,6 +609,11 @@ namespace scribo mln::util::array<line_info<L> > lines_data; line_set<L> lines; + + // Delimitors/separators + B seps; // Temporary image. + + document<L>& doc; }; @@ -484,7 +630,7 @@ namespace scribo load_extended(document<L>& doc, const std::string& output_name) { - xml_handler<L> handler; + xml_handler<L> handler(doc); QXmlSimpleReader reader; reader.setContentHandler(&handler); @@ -498,8 +644,6 @@ namespace scribo QXmlInputSource xmlInputSource(&file); if (reader.parse(xmlInputSource)) qDebug() << "Loaded successfuly"; - - doc.set_paragraphs(handler.parset); } } // end of namespace scribo::io::xml::internal diff --git a/scribo/tests/unit_test/cond_tests_qt b/scribo/tests/unit_test/cond_tests_qt index f7bc42e..4f4b667 100644 --- a/scribo/tests/unit_test/cond_tests_qt +++ b/scribo/tests/unit_test/cond_tests_qt @@ -1,4 +1,5 @@ scribo/convert/from_base64.hh scribo/convert/from_qimage.hh scribo/io/xml/internal/full_xml_visitor.hh +scribo/io/xml/internal/save_image_to_xml.hh scribo/io/xml/load.hh -- 1.5.6.5
participants (1)
-
Guillaume Lazzara