* scribo/core/component_features_data.hh,
* scribo/core/component_info.hh,
* scribo/core/component_set.hh,
* scribo/core/document.hh,
* scribo/core/line_links.hh,
* scribo/core/paragraph_info.hh,
* scribo/core/paragraph_set.hh: Add operator==().
* scribo/io/xml/internal/full_xml_visitor.hh: Save more data for
groups, separators and paragraphs.
* scribo/io/xml/internal/save_image_to_xml.hh: New.
* scribo/io/xml/load.hh: Load new saved data.
* tests/unit_test/cond_tests_qt: Add save_image_to_xml.hh.
---
scribo/ChangeLog | 22 +++
scribo/scribo/core/component_features_data.hh | 16 ++
scribo/scribo/core/component_info.hh | 17 ++
scribo/scribo/core/component_set.hh | 36 +++--
scribo/scribo/core/document.hh | 68 +++++++-
scribo/scribo/core/line_links.hh | 12 ++
scribo/scribo/core/paragraph_info.hh | 19 +++
scribo/scribo/core/paragraph_set.hh | 21 +++
scribo/scribo/io/xml/internal/full_xml_visitor.hh | 118 +++++++++------
...{print_image_coords.hh => save_image_to_xml.hh} | 70 +++++----
scribo/scribo/io/xml/load.hh | 164 ++++++++++++++++++--
scribo/tests/unit_test/cond_tests_qt | 1 +
12 files changed, 456 insertions(+), 108 deletions(-)
copy scribo/scribo/io/xml/internal/{print_image_coords.hh => save_image_to_xml.hh}
(54%)
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index 637fca8..350b536 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,5 +1,27 @@
2011-05-03 Guillaume Lazzara <lazzara(a)lrde.epita.fr>
+ Several improvements related to low-level data structures in XML
+ output.
+
+ * scribo/core/component_features_data.hh,
+ * scribo/core/component_info.hh,
+ * scribo/core/component_set.hh,
+ * scribo/core/document.hh,
+ * scribo/core/line_links.hh,
+ * scribo/core/paragraph_info.hh,
+ * scribo/core/paragraph_set.hh: Add operator==().
+
+ * scribo/io/xml/internal/full_xml_visitor.hh: Save more data for
+ groups, separators and paragraphs.
+
+ * scribo/io/xml/internal/save_image_to_xml.hh: New.
+
+ * scribo/io/xml/load.hh: Load new saved data.
+
+ * tests/unit_test/cond_tests_qt: Add save_image_to_xml.hh.
+
+2011-05-03 Guillaume Lazzara <lazzara(a)lrde.epita.fr>
+
* scribo/toolchain/internal/content_in_doc_functor.hh: Make use of
component::extract::alignments.
diff --git a/scribo/scribo/core/component_features_data.hh
b/scribo/scribo/core/component_features_data.hh
index 07b3e4a..b0a4e47 100644
--- a/scribo/scribo/core/component_features_data.hh
+++ b/scribo/scribo/core/component_features_data.hh
@@ -49,9 +49,14 @@ namespace scribo
std::ostream&
operator<<(std::ostream& ostr, const component_features_data& data);
+ bool
+ operator==(const component_features_data& lhs,
+ const component_features_data& rhs);
+
# ifndef MLN_INCLUDE_ONLY
+ inline
component_features_data::component_features_data()
: valid(false)
{
@@ -69,6 +74,17 @@ namespace scribo
<< "]" << std::endl;
}
+
+ bool
+ operator==(const component_features_data& lhs,
+ const component_features_data& rhs)
+ {
+ return
+ lhs.valid == rhs.valid
+ && lhs.color == rhs.color
+ && lhs.boldness == rhs.boldness;
+ }
+
# endif // ! MLN_INCLUDE_ONLY
} // end of namespace scribo
diff --git a/scribo/scribo/core/component_info.hh b/scribo/scribo/core/component_info.hh
index 164a242..1f94076 100644
--- a/scribo/scribo/core/component_info.hh
+++ b/scribo/scribo/core/component_info.hh
@@ -97,6 +97,9 @@ namespace scribo
std::ostream&
operator<<(std::ostream& ostr, const component_info& info);
+ bool
+ operator==(const component_info& lhs, const component_info& rhs);
+
# ifndef MLN_INCLUDE_ONLY
@@ -230,6 +233,20 @@ namespace scribo
<< ")" << std::endl;
}
+ inline
+ bool
+ operator==(const component_info& lhs, const component_info& rhs)
+ {
+
+ return
+ lhs.id() == rhs.id()
+ && lhs.bbox() == rhs.bbox()
+ && lhs.mass_center() == rhs.mass_center()
+ && lhs.card() == rhs.card()
+ && lhs.features() == rhs.features()
+ && lhs.tag() == rhs.tag()
+ && lhs.type() == rhs.type();
+ }
# endif // ! MLN_INCLUDE_ONLY
diff --git a/scribo/scribo/core/component_set.hh b/scribo/scribo/core/component_set.hh
index d729802..3587302 100644
--- a/scribo/scribo/core/component_set.hh
+++ b/scribo/scribo/core/component_set.hh
@@ -459,22 +459,6 @@ namespace scribo
return data_->infos_[id];
}
-// template <typename L>
-// inline
-// const component_info&
-// component_set<L>::operator()(const mln_value(L)& id) const
-// {
-// return data_->infos_[id];
-// }
-
-// template <typename L>
-// inline
-// component_info&
-// component_set<L>::operator()(const mln_value(L)& id)
-// {
-// return data_->infos_[id];
-// }
-
template <typename L>
inline
const component_info&
@@ -650,7 +634,25 @@ namespace scribo
bool
operator==(const component_set<L>& lhs, const component_set<L>&
rhs)
{
- return lhs.id_() == rhs.id_();
+ if (! (lhs.labeled_image() == rhs.labeled_image()))
+ std::cout << "comp.lbl" << std::endl;
+
+ if (! (lhs.separators() == rhs.separators()))
+ std::cout << "comp.seps" << std::endl;
+
+ if (! (lhs.nelements() == rhs.nelements()
+ && lhs.labeled_image() == rhs.labeled_image()
+ && lhs.separators() == rhs.separators()))
+ return false;
+
+ for_all_comps(c, lhs)
+ if (! (lhs(c) == rhs(c)))
+ {
+ std::cout << "comp.info" << std::endl;
+ return false;
+ }
+
+ return true;
}
template <typename L>
diff --git a/scribo/scribo/core/document.hh b/scribo/scribo/core/document.hh
index f38b20b..e287c1d 100644
--- a/scribo/scribo/core/document.hh
+++ b/scribo/scribo/core/document.hh
@@ -88,19 +88,27 @@ namespace scribo
bool has_whitespace_seps() const;
const mln::image2d<bool>& whitespace_seps() const;
const component_set<L>& whitespace_seps_comps() const;
- void set_whitespace_separators(const image2d<bool>& whitespace_seps);
+ void set_whitespace_separators(const image2d<bool>& whitespace_seps,
+ const component_set<L>& whitespace_seps_comps);
// Horizontal separators
bool has_hline_seps() const;
const mln::image2d<bool>& hline_seps() const;
const component_set<L>& hline_seps_comps() const;
+ // Set vline separators image. The component is automatically computed.
void set_hline_separators(const image2d<bool>& line_seps);
+ void set_hline_separators(const image2d<bool>& line_seps,
+ const component_set<L>& hline_seps_comps);
// Vertical separators
bool has_vline_seps() const;
const mln::image2d<bool>& vline_seps() const;
const component_set<L>& vline_seps_comps() const;
- void set_vline_separators(const image2d<bool>& line_seps);
+
+ // Set vline separators image. The component is automatically computed.
+ void set_vline_separators(const image2d<bool>& vline_seps);
+ void set_vline_separators(const image2d<bool>& vline_seps,
+ const component_set<L>& vline_seps_comps);
const mln::image2d<value::rgb8>& image() const;
void set_image(const mln::image2d<value::rgb8>& image);
@@ -128,6 +136,9 @@ namespace scribo
};
+ template <typename L>
+ bool operator==(const document<L>& lhs, const document<L>& rhs);
+
# ifndef MLN_INCLUDE_ONLY
@@ -291,14 +302,11 @@ namespace scribo
template <typename L>
void
- document<L>::set_whitespace_separators(const image2d<bool>&
whitespace_seps)
+ document<L>::set_whitespace_separators(const image2d<bool>&
whitespace_seps,
+ const component_set<L>& whitespace_seps_comps)
{
whitespace_seps_ = whitespace_seps;
-
- mln_value(L) ncomps;
- whitespace_seps_comps_ = primitive::extract::components(whitespace_seps,
- mln::c8(), ncomps,
- component::WhitespaceSeparator);
+ whitespace_seps_comps_ = whitespace_seps_comps;
}
@@ -340,6 +348,16 @@ namespace scribo
template <typename L>
+ void
+ document<L>::set_hline_separators(const image2d<bool>& hline_seps,
+ const component_set<L>& hline_seps_comps)
+ {
+ hline_seps_ = hline_seps;
+ hline_seps_comps_ = hline_seps_comps;
+ }
+
+
+ template <typename L>
bool
document<L>::has_vline_seps() const
{
@@ -377,6 +395,16 @@ namespace scribo
template <typename L>
+ void
+ document<L>::set_vline_separators(const image2d<bool>& vline_seps,
+ const component_set<L>& vline_seps_comps)
+ {
+ vline_seps_ = vline_seps;
+ vline_seps_comps_ = vline_seps_comps;
+ }
+
+
+ template <typename L>
const mln::image2d<value::rgb8>&
document<L>::image() const
{
@@ -408,6 +436,30 @@ namespace scribo
}
+ template <typename L>
+ bool operator==(const document<L>& lhs, const document<L>& rhs)
+ {
+
+
+ return
+ lhs.filename() == rhs.filename()
+ && lhs.image() == rhs.image()
+ && lhs.binary_image() == rhs.binary_image()
+ && lhs.has_text() == rhs.has_text()
+ && lhs.paragraphs() == rhs.paragraphs()
+ && lhs.has_elements() == rhs.has_elements()
+ && lhs.elements() == rhs.elements()
+ && lhs.has_whitespace_seps() == rhs.has_whitespace_seps()
+ && lhs.whitespace_seps() == rhs.whitespace_seps()
+ && lhs.whitespace_seps_comps() == rhs.whitespace_seps_comps()
+ && lhs.has_hline_seps() == rhs.has_hline_seps()
+ && lhs.hline_seps() == rhs.hline_seps()
+ && lhs.hline_seps_comps() == rhs.hline_seps_comps()
+ && lhs.has_vline_seps() == rhs.has_vline_seps()
+ && lhs.vline_seps() == rhs.vline_seps()
+ && lhs.vline_seps_comps() == rhs.vline_seps_comps();
+ }
+
# endif // ! MLN_INCLUDE_ONLY
diff --git a/scribo/scribo/core/line_links.hh b/scribo/scribo/core/line_links.hh
index ab36a73..b7b438c 100644
--- a/scribo/scribo/core/line_links.hh
+++ b/scribo/scribo/core/line_links.hh
@@ -103,6 +103,10 @@ namespace scribo
std::ostream&
operator<<(std::ostream& ostr, const line_links<L>& links);
+ template <typename L>
+ bool
+ operator==(const line_links<L>& lhs, const line_links<L>& rhs);
+
# ifndef MLN_INCLUDE_ONLY
@@ -254,6 +258,14 @@ namespace scribo
}
+ template <typename L>
+ bool
+ operator==(const line_links<L>& lhs, const line_links<L>& rhs)
+ {
+ return lhs.lines() == rhs.lines()
+ && lhs.line_to_link() == rhs.line_to_link();
+ }
+
# endif // ! MLN_INCLUDE_ONLY
diff --git a/scribo/scribo/core/paragraph_info.hh b/scribo/scribo/core/paragraph_info.hh
index 557ded7..52068a7 100644
--- a/scribo/scribo/core/paragraph_info.hh
+++ b/scribo/scribo/core/paragraph_info.hh
@@ -89,6 +89,8 @@ namespace scribo
template <typename L>
std::ostream& operator<<(std::ostream& ostr, const
paragraph_info<L>& info);
+ template <typename L>
+ bool operator==(const paragraph_info<L>& lhs, const
paragraph_info<L>& rhs);
# ifndef MLN_INCLUDE_ONLY
@@ -248,6 +250,22 @@ namespace scribo
}
template <typename L>
+ bool
+ operator==(const paragraph_info<L>& lhs, const paragraph_info<L>&
rhs)
+ {
+
+
+
+ return
+ lhs.line_ids() == rhs.line_ids()
+ && lhs.bbox() == rhs.bbox()
+ && lhs.llinks() == rhs.llinks()
+ && lhs.color() == rhs.color()
+ && lhs.color_reliability() == rhs.color_reliability()
+ && lhs.needs_stats_update() == rhs.needs_stats_update();
+ }
+
+ template <typename L>
std::ostream&
operator<<(std::ostream& ostr, const paragraph_info<L>& info)
{
@@ -259,6 +277,7 @@ namespace scribo
<< ")" << std::endl;
}
+
# endif // ! MLN_INCLUDE_ONLY
} // end of namespace scribo
diff --git a/scribo/scribo/core/paragraph_set.hh b/scribo/scribo/core/paragraph_set.hh
index ec9f51b..242501d 100644
--- a/scribo/scribo/core/paragraph_set.hh
+++ b/scribo/scribo/core/paragraph_set.hh
@@ -86,6 +86,8 @@ namespace scribo
};
+ template <typename L>
+ bool operator==(const paragraph_set<L>& lhs, const
paragraph_set<L>& rhs);
namespace make
{
@@ -200,6 +202,25 @@ namespace scribo
}
+ template <typename L>
+ bool operator==(const paragraph_set<L>& lhs, const
paragraph_set<L>& rhs)
+ {
+ if (! (lhs.lines() == rhs.lines() && lhs.nelements() == rhs.nelements()))
+ {
+ return false;
+ }
+
+ for_all_paragraphs(p, lhs)
+ if (!(lhs(p) == rhs(p)))
+ {
+ std::cout << "paragraph.info" << std::endl;
+ return false;
+ }
+
+ return true;
+ }
+
+
namespace make
{
diff --git a/scribo/scribo/io/xml/internal/full_xml_visitor.hh
b/scribo/scribo/io/xml/internal/full_xml_visitor.hh
index bba7691..c294bbc 100644
--- a/scribo/scribo/io/xml/internal/full_xml_visitor.hh
+++ b/scribo/scribo/io/xml/internal/full_xml_visitor.hh
@@ -43,13 +43,10 @@
# include <scribo/core/line_links.hh>
# include <scribo/core/line_info.hh>
+# include <scribo/io/xml/internal/save_image_to_xml.hh>
# include <scribo/io/xml/internal/print_box_coords.hh>
# include <scribo/io/xml/internal/print_page_preambule.hh>
-// Compression level 0-9. 9 is the best but is slow.
-// 5 seems to be a good compromise.
-# define COMPRESSION_LEVEL 5
-
namespace scribo
{
@@ -157,20 +154,46 @@ namespace scribo
if (doc.has_elements())
{
const component_set<L>& elts = doc.elements();
+
+ output << " <elements>" << std::endl;
+ elts.accept(*this);
+
for_all_comps(e, elts)
if (elts(e).is_valid())
elts(e).accept(*this);
+
+ output << " </elements>" << std::endl;
}
// line seraparators
if (doc.has_hline_seps())
+ {
+ output << " <hlines_separators>" << std::endl;
+ doc.hline_seps_comps().accept(*this);
+
for_all_comps(c, doc.hline_seps_comps())
doc.hline_seps_comps()(c).accept(*this);
+
+ save_image_to_xml(output, doc.hline_seps(),
+ "hlines_separators_image");
+
+ output << " </hlines_separators>" << std::endl;
+ }
if (doc.has_vline_seps())
+ {
+ output << " <vlines_separators>" << std::endl;
+ doc.vline_seps_comps().accept(*this);
+
for_all_comps(c, doc.vline_seps_comps())
doc.vline_seps_comps()(c).accept(*this);
+ save_image_to_xml(output, doc.vline_seps(),
+ "vlines_separators_image");
+
+ output << " </vlines_separators>" << std::endl;
+ }
+
// Whitespace seraparators
if (doc.has_whitespace_seps())
@@ -178,8 +201,16 @@ namespace scribo
const component_set<L>&
whitespace_seps_comps = doc.whitespace_seps_comps();
+ output << " <whitespaces_delimitors>" << std::endl;
+ whitespace_seps_comps.accept(*this);
+
for_all_comps(c, whitespace_seps_comps)
whitespace_seps_comps(c).accept(*this);
+
+ save_image_to_xml(output, doc.whitespace_seps(),
+ "whitespaces_delimitors_image");
+
+ output << " </whitespaces_delimitors>" <<
std::endl;
}
output << " </page>" << std::endl;
@@ -212,13 +243,26 @@ namespace scribo
void
full_xml_visitor::visit(const object_groups<L>& groups) const
{
- output << " <object_groups>" << std::endl;
+ output << " <object_groups ngroups=\"" <<
groups.nelements()
+ << "\">" << std::endl;
+
for_all_groups(g, groups)
{
- output << " <group "
- << " object_id=\"" << g
- << "\" group_id=\"" << groups(g)
- << "\"/>" << std::endl;
+ output << " <group id=\"" << groups(g).id()
+ << "\" valid=\"" << groups(g).is_valid()
+ << "\" pixel_area=\"" << groups(g).pixel_area()
+ << "\" pmin_x=\"" << groups(g).bbox().pmin().row()
+ << "\" pmin_y=\"" << groups(g).bbox().pmin().col()
+ << "\" pmax_x=\"" << groups(g).bbox().pmax().row()
+ << "\" pmax_y=\"" << groups(g).bbox().pmax().col()
+ << "\">" << std::endl;
+
+ for_all_elements(e, groups(g).component_ids())
+ output << " <group_member comp_id=\""
+ << groups(g).component_ids()(e)
+ << "\"/>" << std::endl;
+
+ output << " </group>" << std::endl;
}
output << " </object_groups>" << std::endl;
}
@@ -263,51 +307,36 @@ namespace scribo
<< "\" pmin_x=\"" <<
comp_set(c).bbox().pmin().col()
<< "\" pmin_y=\"" <<
comp_set(c).bbox().pmin().row()
<< "\" pmax_x=\"" <<
comp_set(c).bbox().pmax().col()
- << "\" pmax_y=\"" <<
comp_set(c).bbox().pmax().row()
- << "\"/>" << std::endl;
+ << "\" pmax_y=\"" <<
comp_set(c).bbox().pmax().row();
+
+ if (comp_set(c).has_features())
+ {
+ output << "\">" << std::endl;
+
+ output << " <component_features"
+ << " valid=\"" << comp_set(c).features().valid
+ << "\" color=\"" <<
comp_set(c).features().color
+ << "\" boldness=\"" <<
comp_set(c).features().boldness
+ << "\"/>" << std::endl;
+
+ output << " </component_info>" << std::endl;
+ }
+ else
+ output << "\"/>" << std::endl;
}
// Save labeled image
{
const L& lbl = comp_set.labeled_image();
- output << "<labeled_image "
- << " height=\"" << lbl.domain().height()
- << "\" width=\"" << lbl.domain().width() <<
"\">"
- << "<![CDATA[";
-
- // FIXME: Try to avoid that!
- border::resize(lbl, 0);
- QByteArray
- lbl64 = QByteArray::fromRawData((const char *)lbl.buffer(),
- lbl.nelements() * sizeof(mln_value(L)));
- lbl64 = qCompress(lbl64, COMPRESSION_LEVEL);
- lbl64 = lbl64.toBase64();
-
- output.write(lbl64.data(), lbl64.size());
-
- output << "]]></labeled_image>" << std::endl;
+ save_image_to_xml(output, lbl, "labeled_image");
}
// Save separators image
if (comp_set.has_separators())
{
const mln_ch_value(L,bool)& seps = comp_set.separators();
- output << "<separators_image "
- << " height=\"" << seps.domain().height()
- << "\" width=\"" << seps.domain().width() <<
"\">"
- << "<![CDATA[";
-
- border::resize(seps, 0);
- QByteArray
- seps64 = QByteArray::fromRawData((const char *)seps.buffer(),
- seps.nelements() * sizeof(bool));
- seps64 = qCompress(seps64, COMPRESSION_LEVEL);
- seps64 = seps64.toBase64();
-
- output.write(seps64.data(), seps64.size());
-
- output << "]]></separators_image>" << std::endl;
+ save_image_to_xml(output, seps, "separators_image");
}
output << "</component_set>" << std::endl;
@@ -394,7 +423,9 @@ namespace scribo
<< "\" x_height=\"" << lines(fid).x_height()
<< "\" d_height=\"" << lines(fid).d_height()
<< "\" a_height=\"" << lines(fid).a_height()
- << "\" char_width=\"" << lines(fid).char_width();
+ << "\" char_width=\"" << lines(fid).char_width()
+ << "\" color=\"" << parset(p).color()
+ << "\" color_reliability=\"" <<
parset(p).color_reliability();
// End of EXTENSIONS
output << "\">"
<< std::endl;
@@ -469,6 +500,5 @@ namespace scribo
} // end of namespace scribo
-# undef COMPRESSION_LEVEL
#endif // SCRIBO_IO_XML_INTERNAL_FULL_XML_VISITOR_HH
diff --git a/scribo/scribo/io/xml/internal/print_image_coords.hh
b/scribo/scribo/io/xml/internal/save_image_to_xml.hh
similarity index 54%
copy from scribo/scribo/io/xml/internal/print_image_coords.hh
copy to scribo/scribo/io/xml/internal/save_image_to_xml.hh
index ebfe402..3f38337 100644
--- a/scribo/scribo/io/xml/internal/print_image_coords.hh
+++ b/scribo/scribo/io/xml/internal/save_image_to_xml.hh
@@ -23,15 +23,24 @@
// exception does not however invalidate any other reasons why the
// executable file might be covered by the GNU General Public License.
-#ifndef SCRIBO_IO_XML_INTERNAL_PRINT_IMAGE_COORDS_HH
-# define SCRIBO_IO_XML_INTERNAL_PRINT_IMAGE_COORDS_HH
+#ifndef SCRIBO_IO_XML_INTERNAL_SAVE_IMAGE_TO_XML_HH
+# define SCRIBO_IO_XML_INTERNAL_SAVE_IMAGE_TO_XML_HH
/// \file
///
-/// \brief Prints box2d coordinates to XML data.
+/// Save an image as XML data.
-# include <fstream>
-# include <mln/core/concept/site_set.hh>
+# include <iostream>
+
+# include <QtXml>
+
+# include <mln/core/concept/image.hh>
+# include <mln/border/resize.hh>
+
+
+// Compression level 0-9. 9 is the best but is slow.
+// 5 seems to be a good compromise.
+# define COMPRESSION_LEVEL 5
namespace scribo
{
@@ -44,42 +53,43 @@ namespace scribo
namespace internal
{
-
using namespace mln;
- /*! \brief Prints box2d coordinates to XML data.
- */
- template <typename S>
+ template <typename I>
void
- print_image_coords(std::ofstream& ostr, const mln::Site_Set<S>& b,
- const char *space);
-
+ save_image_to_xml(std::ostream& output, const Image<I>& ima,
+ const char *qname);
# ifndef MLN_INCLUDE_ONLY
-
- template <typename S>
+ template <typename I>
void
- print_image_coords(std::ofstream& ostr, const mln::Site_Set<S>& b_,
- const char *space)
+ save_image_to_xml(std::ostream& output, const Image<I>& ima_,
+ const char *qname)
{
- std::string sc = space;
- std::string sp = sc + " ";
+ trace::entering("scribo::io::xml::internal::save_image");
- const S& b = exact(b_);
- mln_precondition(b.is_valid());
+ mln_precondition(exact(ima_).is_valid());
+ const I& ima = exact(ima_);
- ostr << sc << "<coords>" << std::endl;
+ output << "<" << qname
+ << " height=\"" << ima.domain().height()
+ << "\" width=\"" << ima.domain().width() <<
"\">"
+ << "<![CDATA[";
- mln_piter(S) p(b);
- for_all(p)
- ostr << sp << "<point x=\"" << p.col()
- << "\" y=\"" << p.row() <<
"\"/>"
- << std::endl;
+ mln::border::resize(ima, 0);
+ QByteArray
+ seps64 = QByteArray::fromRawData((const char *)ima.buffer(),
+ ima.nelements() * sizeof(mln_value(I)));
+ seps64 = qCompress(seps64, COMPRESSION_LEVEL);
+ seps64 = seps64.toBase64();
- ostr << sc << "</coords>" << std::endl;
- }
+ output.write(seps64.data(), seps64.size());
+ output << "]]></" << qname << ">"
<< std::endl;
+
+ trace::exiting("scribo::io::xml::internal::save_image");
+ }
# endif // ! MLN_INCLUDE_ONLY
@@ -91,4 +101,6 @@ namespace scribo
} // end of namespace scribo
-#endif // ! SCRIBO_IO_XML_INTERNAL_PRINT_IMAGE_COORDS_HH
+# undef COMPRESSION_LEVEL
+
+#endif // ! SCRIBO_IO_XML_INTERNAL_SAVE_IMAGE_TO_XML_HH
diff --git a/scribo/scribo/io/xml/load.hh b/scribo/scribo/io/xml/load.hh
index 8042c75..8d89085 100644
--- a/scribo/scribo/io/xml/load.hh
+++ b/scribo/scribo/io/xml/load.hh
@@ -88,6 +88,8 @@ namespace scribo
None,
ComponentSet,
ComponentInfo,
+ ComponentFeatures,
+ Elements,
LabeledImage,
SeparatorsImage,
ObjectLinks,
@@ -95,6 +97,7 @@ namespace scribo
Point,
Link,
Group,
+ GroupMember,
Line,
LineLinks,
LineLink,
@@ -102,7 +105,13 @@ namespace scribo
TextRegion,
CompIdList,
CompId,
- Page
+ Page,
+ WhitespacesDelimitors,
+ HLineSeparators,
+ VLineSeparators,
+ WhitespacesDelimitorsImage,
+ HLineSeparatorsImage,
+ VLineSeparatorsImage,
};
@@ -116,6 +125,8 @@ namespace scribo
static const ModeData mode_data[] = {
{ "component_set", ComponentSet },
{ "component_info", ComponentInfo },
+ { "component_features", ComponentFeatures },
+ { "elements", Elements },
{ "labeled_image", LabeledImage },
{ "separators_image", SeparatorsImage },
{ "object_links", ObjectLinks },
@@ -123,6 +134,7 @@ namespace scribo
{ "point", Point },
{ "link", Link },
{ "group", Group },
+ { "group_member", GroupMember },
{ "line", Line },
{ "line_links", LineLinks },
{ "line_link", LineLink },
@@ -131,10 +143,34 @@ namespace scribo
{ "compid_list", CompIdList },
{ "compid", CompId },
{ "page", Page },
+ { "whitespaces_delimitors", WhitespacesDelimitors },
+ { "hlines_separators", HLineSeparators },
+ { "vlines_separators", VLineSeparators },
+ { "whitespaces_delimitors_image", WhitespacesDelimitorsImage },
+ { "hlines_separators_image", HLineSeparatorsImage },
+ { "vlines_separators_image", VLineSeparatorsImage },
{ 0, None }
};
+ namespace internal
+ {
+
+ value::rgb8 parse_color(const QString& color_str)
+ {
+ QString color = color_str;
+ color.chop(1);
+ color = color.remove(0, 1);
+ QStringList rgb = color.split(',');
+
+ return
+ value::rgb8(rgb.at(0).toInt(),
+ rgb.at(1).toInt(),
+ rgb.at(2).toInt());
+ }
+
+ }
+
template <typename L>
class xml_handler : public QXmlDefaultHandler
{
@@ -142,7 +178,8 @@ namespace scribo
typedef mln_ch_value(L,bool) B;
public:
- xml_handler() : current_paragraph_id(1) { lines_data.append(line_info<L>()); }
// line info id starts from 1.
+ xml_handler(document<L>& doc_) : current_paragraph_id(1), doc(doc_)
+ { lines_data.append(line_info<L>()); } // line info id starts from 1.
virtual
bool
@@ -185,6 +222,20 @@ namespace scribo
break;
+ case ComponentFeatures:
+ {
+ if (atts.value("valid").toInt())
+ {
+ component_features_data comp_features;
+ comp_features.valid = true;
+ comp_features.color = internal::parse_color(atts.value("color"));
+ comp_features.boldness = atts.value("boldness").toFloat();
+
+ comp_set_data->infos_.last().update_features(comp_features);
+ }
+ }
+ break;
+
// Object links
case ObjectLinks:
{
@@ -197,8 +248,9 @@ namespace scribo
// Object groups
case ObjectGroups:
{
- // qDebug() << "object_groups created";
- groups = object_groups<L>(links);
+ //qDebug() << "Processing object_groups";
+ group_info_.reserve(atts.value("ngroups").toInt());
+ group_info_.resize(1);
}
break;
@@ -225,6 +277,8 @@ namespace scribo
// qDebug() << "TextRegion";
current_paragraph = paragraph_info<L>(llinks);
+ current_paragraph.set_color_(internal::parse_color(atts.value("color")));
+ current_paragraph.set_color_reliability_(atts.value("color_reliability").toFloat());
}
break;
@@ -257,6 +311,7 @@ namespace scribo
line_data->a_height_ = atts.value("a_height").toInt();
line_data->char_space_ = atts.value("kerning").toInt();
line_data->char_width_ = atts.value("char_width").toInt();
+ line_data->char_width_ = atts.value("char_width").toInt();
line_data->word_space_ = 0;
line_data->reading_direction_ = line::LeftToRight;
@@ -266,6 +321,13 @@ namespace scribo
line_data->reading_orientation_ =
atts.value("txt_reading_orientation").toInt();
line_data->indented_ = (atts.value("txt_indented") == "false" ?
false : true);
+
+ line_data->boldness_ = atts.value("boldness").toFloat();
+ line_data->boldness_reliability_ =
atts.value("boldness_reliability").toFloat();
+ line_data->color_ = internal::parse_color(atts.value("color"));
+
+ line_data->color_reliability_ =
atts.value("color_reliability").toFloat();
+
bbox.init();
}
break;
@@ -282,7 +344,7 @@ namespace scribo
// CompId
case CompId:
{
- line_data->components_.append(atts.value("value").toInt());
+ line_data->component_ids_.append(atts.value("value").toInt());
}
break;
@@ -316,10 +378,21 @@ namespace scribo
break;
+ // Separators/delimitor images
+ case WhitespacesDelimitorsImage:
+ case HLineSeparatorsImage:
+ case VLineSeparatorsImage:
+ {
+ width = atts.value("width").toInt();
+ height = atts.value("height").toInt();
+ seps = B(mln::make::box2d(height, width), 0); // No border
+ }
+ break;
+
// Link
case Link:
{
- links(atts.value("from").toInt()) = atts.value("to").toInt();
+ links.update(atts.value("from").toInt(),
atts.value("to").toInt());
}
break;
@@ -327,7 +400,20 @@ namespace scribo
// Group
case Group:
{
- groups(atts.value("object_id").toInt()) =
atts.value("group_id").toInt();
+ group_info_.append(group_info(atts.value("id").toInt(),
+ atts.value("pixel_area").toInt(),
+ mln::make::box2d(atts.value("pmin_x").toInt(),
+ atts.value("pmin_y").toInt(),
+ atts.value("pmax_x").toInt(),
+ atts.value("pmax_y").toInt()),
+ atts.value("valid").toInt()));
+ }
+ break;
+
+ // GroupMember
+ case GroupMember:
+ {
+ component_ids.append(atts.value("comp_id").toInt());
}
break;
@@ -352,6 +438,7 @@ namespace scribo
{
// qDebug() << "Component set done";
components = component_set<L>(comp_set_data);
+
}
break;
@@ -382,6 +469,46 @@ namespace scribo
// qDebug() << "Page done";
lines.update_line_data_(lines_data);
parset = paragraph_set<L>(par_data);
+ doc.set_paragraphs(parset);
+ }
+ break;
+
+ // ObjectGroups
+ case ObjectGroups:
+ {
+ groups = object_groups<L>(links, group_info_);
+ }
+ break;
+
+ // Group
+ case Group:
+ {
+ group_info_.last().component_ids_() = component_ids;
+ component_ids.clear();
+ }
+ break;
+
+ case Elements:
+ {
+ doc.set_elements(components);
+ }
+ break;
+
+ case WhitespacesDelimitors:
+ {
+ doc.set_whitespace_separators(seps, components);
+ }
+ break;
+
+ case HLineSeparators:
+ {
+ doc.set_hline_separators(seps, components);
+ }
+ break;
+
+ case VLineSeparators:
+ {
+ doc.set_vline_separators(seps, components);
}
break;
@@ -419,6 +546,17 @@ namespace scribo
}
break;
+ case WhitespacesDelimitorsImage:
+ case HLineSeparatorsImage:
+ case VLineSeparatorsImage:
+ {
+ QByteArray data = ch.toAscii();
+ data = QByteArray::fromBase64(data);
+ data = qUncompress(data);
+ memcpy((char *) seps.buffer(), data.data(), data.size());
+ }
+ break;
+
default:
;
}
@@ -453,7 +591,10 @@ namespace scribo
component_set<L> components;
object_links<L> links;
+
object_groups<L> groups;
+ mln::util::array<component_id_t> component_ids;
+ mln::util::array<group_info> group_info_;
// Lines
unsigned current_line_id;
@@ -468,6 +609,11 @@ namespace scribo
mln::util::array<line_info<L> > lines_data;
line_set<L> lines;
+
+ // Delimitors/separators
+ B seps; // Temporary image.
+
+ document<L>& doc;
};
@@ -484,7 +630,7 @@ namespace scribo
load_extended(document<L>& doc,
const std::string& output_name)
{
- xml_handler<L> handler;
+ xml_handler<L> handler(doc);
QXmlSimpleReader reader;
reader.setContentHandler(&handler);
@@ -498,8 +644,6 @@ namespace scribo
QXmlInputSource xmlInputSource(&file);
if (reader.parse(xmlInputSource))
qDebug() << "Loaded successfuly";
-
- doc.set_paragraphs(handler.parset);
}
} // end of namespace scribo::io::xml::internal
diff --git a/scribo/tests/unit_test/cond_tests_qt b/scribo/tests/unit_test/cond_tests_qt
index f7bc42e..4f4b667 100644
--- a/scribo/tests/unit_test/cond_tests_qt
+++ b/scribo/tests/unit_test/cond_tests_qt
@@ -1,4 +1,5 @@
scribo/convert/from_base64.hh
scribo/convert/from_qimage.hh
scribo/io/xml/internal/full_xml_visitor.hh
+scribo/io/xml/internal/save_image_to_xml.hh
scribo/io/xml/load.hh
--
1.5.6.5