---
scribo/ChangeLog | 4 +
scribo/scribo/io/xml/save.hh | 214 +++++++++++++++++++++---------------------
2 files changed, 111 insertions(+), 107 deletions(-)
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index e738a8a..b14e5bf 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,5 +1,9 @@
2010-11-15 Guillaume Lazzara <z(a)lrde.epita.fr>
+ * scribo/io/xml/save.hh: Make use of document structure.
+
+2010-11-15 Guillaume Lazzara <z(a)lrde.epita.fr>
+
* scribo/primitive/extract/elements.hh: New routine.
2010-11-15 Guillaume Lazzara <z(a)lrde.epita.fr>
diff --git a/scribo/scribo/io/xml/save.hh b/scribo/scribo/io/xml/save.hh
index f13396f..c6b416d 100644
--- a/scribo/scribo/io/xml/save.hh
+++ b/scribo/scribo/io/xml/save.hh
@@ -28,7 +28,7 @@
/// \file
///
-/// \brief Save text line information as XML.
+/// \brief Save document information as XML.
# include <fstream>
# include <sstream>
@@ -46,7 +46,7 @@ namespace scribo
namespace xml
{
- /*! \brief Save text line information as XML.
+ /*! \brief Save document information as XML.
We use a XML Schema part of the PAGE (Page Analysis and Ground
truth Elements) image representation framework.
@@ -60,8 +60,7 @@ namespace scribo
*/
template <typename L>
void
- save(const std::string& input_name,
- const line_set<L>& lines,
+ save(const document<L>& doc,
const std::string& output_name,
bool extended_format);
@@ -86,12 +85,34 @@ namespace scribo
return input;
}
+
+ void print_box_coords(std::ofstream& ostr, const box2d& b,
+ const char *space)
+ {
+ std::string sc = space;
+ std::string sp = sc + " ";
+ ostr << sc << "<coords>" << std::endl
+ << sp << "<point x=\"" << b.pmin().col()
+ << "\" y=\"" << b.pmin().row() <<
"\"/>"
+ << std::endl
+ << sp << "<point x=\"" << b.pmax().col()
+ << "\" y=\"" << b.pmin().row() <<
"\"/>"
+ << std::endl
+ << sp << "<point x=\"" << b.pmax().col()
+ << "\" y=\"" << b.pmax().row() <<
"\"/>"
+ << std::endl
+ << sp << "<point x=\"" << b.pmin().col()
+ << "\" y=\"" << b.pmax().row() <<
"\"/>"
+ << std::endl
+ << sc << "</coords>" << std::endl;
+
+ }
+
} // end of namespace scribo::io::xml::internal
template <typename L>
void
- save(const std::string& input_name,
- const line_set<L>& lines,
+ save(const document<L>& doc,
const std::string& output_name,
bool extended_format)
{
@@ -100,9 +121,12 @@ namespace scribo
std::ofstream file(output_name.c_str());
if (! file)
{
- std::cerr << "error: cannot open file '" << input_name
<< "'!";
+ std::cerr << "error: cannot open file '" << doc.filename()
<< "'!";
abort();
}
+
+ const line_set<L>& lines = doc.text();
+
std::map<char, std::string> html_map;
html_map['\"'] = """;
html_map['<'] = "<";
@@ -111,13 +135,13 @@ namespace scribo
file << "<?xml version=\"1.0\"?>" << std::endl;
if (extended_format)
- {
- file << "<pcGts>" << std::endl;
- }
+ {
+ file << "<pcGts>" << std::endl;
+ }
else
- {
- file << "<pcGts
xmlns=\"http://schema.primaresearch.org/PAGE/gts/pagecontent/2009-03-1…
xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"
xsi:schemaLocation=\"http://schema.primaresearch.org/PAGE/gts/pagecont…
http://schema.primaresearch.org/PAGE/gts/pagecontent/2009-03-16/pagecontent…
pcGtsId=\"" << input_name << "\">" <<
std::endl;
- }
+ {
+ file << "<pcGts
xmlns=\"http://schema.primaresearch.org/PAGE/gts/pagecontent/2009-03-1…
xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"
xsi:schemaLocation=\"http://schema.primaresearch.org/PAGE/gts/pagecont…
http://schema.primaresearch.org/PAGE/gts/pagecontent/2009-03-16/pagecontent…
pcGtsId=\"" << doc.filename() << "\">" <<
std::endl;
+ }
file << " <PcMetadata>" << std::endl;
file << " <PcCreator>LRDE</PcCreator>" <<
std::endl;
@@ -126,110 +150,86 @@ namespace scribo
file << " <PcComments>Generated by Scribo from
Olena.</PcComments>" << std::endl;
file << " </PcMetadata>" << std::endl;
- file << " <page image_filename=\"" << input_name
+ file << " <page image_filename=\"" << doc.filename()
<< "\" image_width=\"" <<
lines.components().labeled_image().ncols()
<< "\" image_height=\"" <<
lines.components().labeled_image().nrows()
<< "\">" << std::endl;
for_all_lines(l, lines)
+ {
+ if (! lines(l).is_valid()
+ || lines(l).tag() != line::None
+ || lines(l).type() != line::Text) // Is NOT a text line.
+ continue;
{
- if (! lines(l).is_valid()
- || lines(l).tag() != line::None
- || lines(l).type() != line::Text) // Is NOT a text line.
- continue;
+ file << " <text_region id=\"" << lines(l).id()
+ << "\" txt_orientation=\"" << lines(l).orientation()
+ << "\" txt_reading_orientation=\"" <<
lines(l).reading_orientation()
+ << "\" txt_reading_direction=\"" <<
lines(l).reading_direction()
+ << "\" txt_text_type=\"" << lines(l).type()
+ << "\" txt_reverse_video=\"" <<
(lines(l).reverse_video() ? "true" : "false")
+ << "\" txt_indented=\"" << (lines(l).indented() ?
"true" : "false")
+ << "\" kerning=\"" << lines(l).char_space();
+
+ // EXTENSIONS - Not officially supported
+ if (extended_format)
{
- file << " <text_region id=\"" << lines(l).id()
- << "\" txt_orientation=\"" <<
lines(l).orientation()
- << "\" txt_reading_orientation=\"" <<
lines(l).reading_orientation()
- << "\" txt_reading_direction=\"" <<
lines(l).reading_direction()
- << "\" txt_text_type=\"" << lines(l).type()
- << "\" txt_reverse_video=\"" <<
(lines(l).reverse_video() ? "true" : "false")
- << "\" txt_indented=\"" << (lines(l).indented() ?
"true" : "false")
- << "\" kerning=\"" << lines(l).char_space();
-
- // EXTENSIONS - Not officially supported
- if (extended_format)
- {
- file << "\" baseline=\"" << lines(l).baseline()
- << "\" meanline=\"" << lines(l).meanline()
- << "\" x_height=\"" << lines(l).x_height()
- << "\" d_height=\"" << lines(l).d_height()
- << "\" a_height=\"" << lines(l).a_height()
- << "\" char_width=\"" << lines(l).char_width();
- }
- // End of EXTENSIONS
- file << "\">"
- << std::endl;
+ file << "\" baseline=\"" << lines(l).baseline()
+ << "\" meanline=\"" << lines(l).meanline()
+ << "\" x_height=\"" << lines(l).x_height()
+ << "\" d_height=\"" << lines(l).d_height()
+ << "\" a_height=\"" << lines(l).a_height()
+ << "\" char_width=\"" << lines(l).char_width();
+ }
+ // End of EXTENSIONS
+ file << "\">"
+ << std::endl;
- if (extended_format)
- {
- file << " <coords>" << std::endl
- << " <point x=\"" <<
lines(l).bbox().pmin().col()
- << "\" y=\"" << lines(l).bbox().pmin().row()
<< "\"/>"
- << std::endl
- << " <point x=\"" <<
lines(l).bbox().pmax().col()
- << "\" y=\"" << lines(l).bbox().pmin().row()
<< "\"/>"
- << std::endl
- << " <point x=\"" <<
lines(l).bbox().pmax().col()
- << "\" y=\"" << lines(l).bbox().pmax().row()
<< "\"/>"
- << std::endl
- << " <point x=\"" <<
lines(l).bbox().pmin().col()
- << "\" y=\"" << lines(l).bbox().pmax().row()
<< "\"/>"
- << std::endl
- << " </coords>" << std::endl;
-
-
- file << " <paragraph>" << std::endl;
-
- file << " <coords>" << std::endl
- << " <point x=\"" <<
lines(l).bbox().pmin().col()
- << "\" y=\"" << lines(l).bbox().pmin().row()
<< "\"/>"
- << std::endl
- << " <point x=\"" <<
lines(l).bbox().pmax().col()
- << "\" y=\"" << lines(l).bbox().pmin().row()
<< "\"/>"
- << std::endl
- << " <point x=\"" <<
lines(l).bbox().pmax().col()
- << "\" y=\"" << lines(l).bbox().pmax().row()
<< "\"/>"
- << std::endl
- << " <point x=\"" <<
lines(l).bbox().pmin().col()
- << "\" y=\"" << lines(l).bbox().pmax().row()
<< "\"/>"
- << std::endl
- << " </coords>" << std::endl;
-
- if (lines(l).has_text())
- {
- std::string tmp = lines(l).text();
- tmp = internal::html_markups_replace(tmp, html_map);
-
- file << " <line text=\""
- << tmp
- << "\">" << std::endl;
- }
- else
- file << " <line>" << std::endl;
-
- file << " <coords>" << std::endl
- << " <point x=\"" <<
lines(l).bbox().pmin().col()
- << "\" y=\"" << lines(l).bbox().pmin().row()
<< "\"/>"
- << std::endl
- << " <point x=\"" <<
lines(l).bbox().pmax().col()
- << "\" y=\"" << lines(l).bbox().pmin().row()
<< "\"/>"
- << std::endl
- << " <point x=\"" <<
lines(l).bbox().pmax().col()
- << "\" y=\"" << lines(l).bbox().pmax().row()
<< "\"/>"
- << std::endl
- << " <point x=\"" <<
lines(l).bbox().pmin().col()
- << "\" y=\"" << lines(l).bbox().pmax().row()
<< "\"/>"
- << std::endl
- << " </coords>" << std::endl;
-
- file << " </line>" << std::endl;
-
- file << " </paragraph>" << std::endl;
- }
+ internal::print_box_coords(file, lines(l).bbox(), " ");
+
+ if (extended_format)
+ {
+ file << " <paragraph>" << std::endl;
+
+ internal::print_box_coords(file, lines(l).bbox(), " ");
+
+ if (lines(l).has_text())
+ {
+ std::string tmp = lines(l).text();
+ tmp = internal::html_markups_replace(tmp, html_map);
+
+ file << " <line text=\""
+ << tmp
+ << "\">" << std::endl;
+ }
+ else
+ file << " <line>" << std::endl;
+
+ internal::print_box_coords(file, lines(l).bbox(), " ");
- file << " </text_region>" << std::endl;
+ file << " </line>" << std::endl;
+
+ file << " </paragraph>" << std::endl;
}
+
+ file << " </text_region>" << std::endl;
+ }
+ }
+
+
+ const component_set<L>& elts = doc.elements();
+ for_all_comps(e, elts)
+ if (elts(e).is_valid())
+ {
+ file << " <image_region id=\"ir" << elts(e).id()
+ << "\" img_colour_type=\"24_Bit_Colour\""
+ << " img_orientation=\"0.000000\" "
+ << " img_emb_text=\"No\" "
+ << " img_bgcolour=\"White\">" << std::endl;
+
+ internal::print_box_coords(file, elts(e).bbox(), " ");
+
+ file << " </image_region>" << std::endl;
}
file << " </page>" << std::endl;
--
1.5.6.5