* scribo/core/tag/line.hh: Add operator<<.
* scribo/io/xml/internal/compute_text_colour.hh: Update color
names.
* scribo/io/xml/internal/extended_page_xml_visitor.hh,
* scribo/io/xml/internal/full_xml_visitor.hh,
* scribo/io/xml/internal/page_xml_visitor.hh,
* scribo/io/xml/internal/print_box_coords.hh,
* scribo/io/xml/internal/print_page_preambule.hh: Update in order
to produce PAGE compatible XML files.
---
scribo/ChangeLog | 16 ++++
scribo/scribo/core/tag/line.hh | 13 +++-
.../scribo/io/xml/internal/compute_text_colour.hh | 32 ++++----
.../io/xml/internal/extended_page_xml_visitor.hh | 4 +-
scribo/scribo/io/xml/internal/full_xml_visitor.hh | 4 +-
scribo/scribo/io/xml/internal/page_xml_visitor.hh | 77 +++++++++++++-------
scribo/scribo/io/xml/internal/print_box_coords.hh | 12 ++--
.../scribo/io/xml/internal/print_page_preambule.hh | 42 +++++++-----
8 files changed, 129 insertions(+), 71 deletions(-)
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index 08642d5..fedc5ec 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,3 +1,19 @@
+2011-05-12 Guillaume Lazzara <lazzara(a)fidji.lrde.epita.fr>
+
+ Update support for PAGE XML format.
+
+ * scribo/core/tag/line.hh: Add operator<<.
+
+ * scribo/io/xml/internal/compute_text_colour.hh: Update color
+ names.
+
+ * scribo/io/xml/internal/extended_page_xml_visitor.hh,
+ * scribo/io/xml/internal/full_xml_visitor.hh,
+ * scribo/io/xml/internal/page_xml_visitor.hh,
+ * scribo/io/xml/internal/print_box_coords.hh,
+ * scribo/io/xml/internal/print_page_preambule.hh: Update in order
+ to produce PAGE compatible XML files.
+
2011-05-11 Guillaume Lazzara <lazzara(a)fidji.lrde.epita.fr>
Small fixes in Scribo.
diff --git a/scribo/scribo/core/tag/line.hh b/scribo/scribo/core/tag/line.hh
index 3fb1fdf..12b3246 100644
--- a/scribo/scribo/core/tag/line.hh
+++ b/scribo/scribo/core/tag/line.hh
@@ -158,8 +158,7 @@ namespace scribo
inline
- std::ostream&
- operator<<(std::ostream& ostr, const Type& type)
+ std::string type2str(const Type& type)
{
std::string str;
switch(type)
@@ -205,9 +204,17 @@ namespace scribo
break;
}
- return ostr << str;
+ return str;
+ }
+
+ inline
+ std::ostream&
+ operator<<(std::ostream& ostr, const Type& type)
+ {
+ return ostr << type2str(type);
}
+
inline
Type str2type(const std::string& str)
{
diff --git a/scribo/scribo/io/xml/internal/compute_text_colour.hh
b/scribo/scribo/io/xml/internal/compute_text_colour.hh
index 1caf358..86fe454 100644
--- a/scribo/scribo/io/xml/internal/compute_text_colour.hh
+++ b/scribo/scribo/io/xml/internal/compute_text_colour.hh
@@ -48,7 +48,7 @@ namespace scribo
{
const char *
- compute_txt_text_colour(const value::rgb8& v);
+ compute_text_colour(const value::rgb8& v);
struct color_t
@@ -63,23 +63,23 @@ namespace scribo
const char *
- compute_txt_text_colour(const value::rgb8& v)
+ compute_text_colour(const value::rgb8& v)
{
static color_t colors[] = {
- { mln::make::vec(0, 0, 0), "Black", 0 },
- { mln::make::vec(255, 0, 0), "Red", 0 },
- { mln::make::vec(255, 255, 255), "White", 0 },
- { mln::make::vec(0, 255, 0), "Green", 0 },
- { mln::make::vec(0, 0, 255), "Blue", 0 },
- { mln::make::vec(255, 255, 0), "Yellow", 0 },
- { mln::make::vec(255, 165, 0), "Orange", 0 },
- { mln::make::vec(255, 192, 203), "Pink", 0 },
- { mln::make::vec(192, 192, 192), "Grey", 0 },
- { mln::make::vec(64, 224, 208), "Turquoise", 0 },
- { mln::make::vec(75, 0, 130), "Indigo", 0 },
- { mln::make::vec(238, 130, 238), "Violet", 0 },
- { mln::make::vec(0, 255, 255), "Cyan", 0 },
- { mln::make::vec(255, 0, 255), "Magenta", 0 },
+ { mln::make::vec(0, 0, 0), "black", 0 },
+ { mln::make::vec(255, 0, 0), "red", 0 },
+ { mln::make::vec(255, 255, 255), "white", 0 },
+ { mln::make::vec(0, 255, 0), "green", 0 },
+ { mln::make::vec(0, 0, 255), "blue", 0 },
+ { mln::make::vec(255, 255, 0), "yellow", 0 },
+ { mln::make::vec(255, 165, 0), "orange", 0 },
+ { mln::make::vec(255, 192, 203), "pink", 0 },
+ { mln::make::vec(192, 192, 192), "grey", 0 },
+ { mln::make::vec(64, 224, 208), "turquoise", 0 },
+ { mln::make::vec(75, 0, 130), "indigo", 0 },
+ { mln::make::vec(238, 130, 238), "violet", 0 },
+ { mln::make::vec(0, 255, 255), "cyan", 0 },
+ { mln::make::vec(255, 0, 255), "magenta", 0 },
{ mln::make::vec(0, 0, 0), 0, 0 } // Invalid
};
diff --git a/scribo/scribo/io/xml/internal/extended_page_xml_visitor.hh
b/scribo/scribo/io/xml/internal/extended_page_xml_visitor.hh
index 869795b..0cdebb5 100644
--- a/scribo/scribo/io/xml/internal/extended_page_xml_visitor.hh
+++ b/scribo/scribo/io/xml/internal/extended_page_xml_visitor.hh
@@ -267,7 +267,7 @@ namespace scribo
<< "\" txt_text_type=\"" << lines(fid).type()
<< "\" txt_reverse_video=\"" <<
(lines(fid).reverse_video() ? "true" : "false")
<< "\" txt_indented=\"" << (lines(fid).indented() ?
"true" : "false")
- << "\" txt_text_colour=\"" <<
internal::compute_txt_text_colour(parset(p).color())
+ << "\" txt_text_colour=\"" <<
internal::compute_text_colour(parset(p).color())
<< "\" kerning=\"" << lines(fid).char_space();
// EXTENSIONS - Not officially supported
@@ -321,7 +321,7 @@ namespace scribo
<< "\" txt_text_type=\"" << line.type()
<< "\" txt_reverse_video=\"" << (line.reverse_video()
? "true" : "false")
<< "\" txt_indented=\"" << (line.indented() ?
"true" : "false")
- << "\" txt_text_colour=\"" <<
internal::compute_txt_text_colour(line.color())
+ << "\" txt_text_colour=\"" <<
internal::compute_text_colour(line.color())
<< "\" kerning=\"" << line.char_space()
<< "\" baseline=\"" << line.baseline()
<< "\" meanline=\"" << line.meanline()
diff --git a/scribo/scribo/io/xml/internal/full_xml_visitor.hh
b/scribo/scribo/io/xml/internal/full_xml_visitor.hh
index 6b59e60..a8dfffe 100644
--- a/scribo/scribo/io/xml/internal/full_xml_visitor.hh
+++ b/scribo/scribo/io/xml/internal/full_xml_visitor.hh
@@ -453,7 +453,7 @@ namespace scribo
<< "\" txt_text_type=\"" << lines(fid).type()
<< "\" txt_reverse_video=\"" <<
(lines(fid).reverse_video() ? "true" : "false")
<< "\" txt_indented=\"" << (lines(fid).indented() ?
"true" : "false")
- << "\" txt_text_colour=\"" <<
internal::compute_txt_text_colour(parset(p).color())
+ << "\" txt_text_colour=\"" <<
internal::compute_text_colour(parset(p).color())
<< "\" kerning=\"" << lines(fid).char_space();
// EXTENSIONS - Not officially supported
@@ -507,7 +507,7 @@ namespace scribo
<< "\" txt_text_type=\"" << line.type()
<< "\" txt_reverse_video=\"" << (line.reverse_video()
? "true" : "false")
<< "\" txt_indented=\"" << (line.indented() ?
"true" : "false")
- << "\" txt_text_colour=\"" <<
internal::compute_txt_text_colour(line.color())
+ << "\" txt_text_colour=\"" <<
internal::compute_text_colour(line.color())
<< "\" kerning=\"" << line.char_space()
<< "\" baseline=\"" << line.baseline()
<< "\" meanline=\"" << line.meanline()
diff --git a/scribo/scribo/io/xml/internal/page_xml_visitor.hh
b/scribo/scribo/io/xml/internal/page_xml_visitor.hh
index 0014caf..0f3cce1 100644
--- a/scribo/scribo/io/xml/internal/page_xml_visitor.hh
+++ b/scribo/scribo/io/xml/internal/page_xml_visitor.hh
@@ -37,6 +37,7 @@
# include <scribo/io/xml/internal/print_box_coords.hh>
# include <scribo/io/xml/internal/print_page_preambule.hh>
+# include <scribo/io/xml/internal/compute_text_colour.hh>
namespace scribo
@@ -56,12 +57,11 @@ namespace scribo
We use a XML Schema part of the PAGE (Page Analysis and Ground
truth Elements) image representation framework.
- This schema was used in the Page Segmentation COMPetition
- (PSCOMP) for ICDAR 2009.
+ This schema was used in the Historical Document Layout
+ Analysis COMPetition (HDLAC) for ICDAR 2011.
Its XSD file is located here:
-
http://schema.primaresearch.org/PAGE/gts/pagecontent/2009-03-16/pagecontent…
-
+
http://schema.primaresearch.org/PAGE/gts/pagecontent/2010-03-19/pagecontent…
*/
class page_xml_visitor : public doc_serializer<page_xml_visitor>
{
@@ -83,6 +83,7 @@ namespace scribo
private: // Attributes
std::ofstream& output;
+ mutable int base_vertical_line_id_;
};
@@ -104,6 +105,12 @@ namespace scribo
void
page_xml_visitor::visit(const document<L>& doc) const
{
+ // Make sure there are no duplicate ids for line separators.
+ // Vertical and horizontal lines are indexed separately from
+ // 0, so vertical and horizontal lines with the same id
+ // exist.
+ base_vertical_line_id_ = doc.hline_seps_comps().nelements();
+
// Preambule
print_PAGE_preambule(output, doc, true);
@@ -121,8 +128,8 @@ namespace scribo
if (doc.has_hline_seps())
doc.hline_seps_comps().accept(*this);
- output << " </page>" << std::endl;
- output << "</pcGts>" << std::endl;
+ output << " </Page>" << std::endl;
+ output << "</PcGts>" << std::endl;
}
@@ -147,16 +154,26 @@ namespace scribo
switch (info.type())
{
case component::VerticalLineSeparator:
+ {
+ output << " <SeparatorRegion id=\"sr" <<
info.id() + base_vertical_line_id_
+ << "\" orientation=\"0.000000\" "
+ << " colour=\"black\">" << std::endl;
+
+ internal::print_box_coords(output, info.bbox(), " ");
+
+ output << " </SeparatorRegion>" << std::endl;
+ break;
+ }
+
case component::HorizontalLineSeparator:
{
- output << " <separator_region id=\"sr" <<
info.id()
- << "\" sep_orientation=\"0.000000\" "
- << " sep_colour=\"Black\" "
- << " sep_bgcolour=\"White\">" << std::endl;
+ output << " <SeparatorRegion id=\"sr" <<
info.id()
+ << "\" orientation=\"0.000000\" "
+ << " colour=\"black\">" << std::endl;
internal::print_box_coords(output, info.bbox(), " ");
- output << " </separator_region>" << std::endl;
+ output << " </SeparatorRegion>" << std::endl;
break;
}
@@ -164,15 +181,15 @@ namespace scribo
default:
case component::Image:
{
- output << " <image_region id=\"ir" << info.id()
- << "\" img_colour_type=\"24_Bit_Colour\""
- << " img_orientation=\"0.000000\" "
- << " img_emb_text=\"No\" "
- << " img_bgcolour=\"White\">" << std::endl;
+ output << " <ImageRegion id=\"ir" << info.id()
+ << "\" colourDepth=\"colour\""
+ << " orientation=\"0.000000\" "
+ << " embText=\"false\" "
+ << " bgColour=\"white\">" << std::endl;
internal::print_box_coords(output, info.bbox(), " ");
- output << " </image_region>" << std::endl;
+ output << " </ImageRegion>" << std::endl;
break;
}
}
@@ -194,20 +211,30 @@ namespace scribo
// FIXME: compute that information on the whole paragraph
// and use them here.
line_id_t fid = line_ids(0);
- output << " <text_region id=\"" << p
- << "\" txt_orientation=\"" <<
lines(fid).orientation()
- << "\" txt_reading_orientation=\"" <<
lines(fid).reading_orientation()
- << "\" txt_reading_direction=\"" <<
lines(fid).reading_direction()
- << "\" txt_text_type=\"" << lines(fid).type()
- << "\" txt_reverse_video=\"" <<
(lines(fid).reverse_video() ? "true" : "false")
- << "\" txt_indented=\"" << (lines(fid).indented() ?
"true" : "false")
+ output << " <TextRegion id=\"r" << p
+ << "\" orientation=\"" << lines(fid).orientation()
+ << "\" readingOrientation=\"" <<
lines(fid).reading_orientation()
+ << "\" readingDirection=\"" <<
lines(fid).reading_direction()
+ << "\" type=\"" << ((lines(fid).type() ==
line::Text) ? "paragraph" : line::type2str(lines(fid).type()))
+ << "\" reverseVideo=\"" <<
(lines(fid).reverse_video() ? "true" : "false")
+ << "\" indented=\"" << (lines(fid).indented() ?
"true" : "false")
<< "\" kerning=\"" << lines(fid).char_space()
+ << "\" textColour=\"" <<
compute_text_colour(lines(fid).color())
+// << "\" bgColour=\"" <<
compute_text_color(lines(fid).bgcolor())
+// << "\" fontSize=\"" <<
compute_text_color(lines(fid).x_height())
+// << "\" leading=\"" <<
compute_text_color(lines(fid).leading())
<< "\">"
<< std::endl;
+ // Add support for text recognition
+ // <TextEquiv>
+ // <PlainText></PlainText>
+ // <Unicode></Unicode>
+ // </TextEquiv>
+
internal::print_box_coords(output, parset(p).bbox(), " ");
- output << " </text_region>" << std::endl;
+ output << " </TextRegion>" << std::endl;
}
}
diff --git a/scribo/scribo/io/xml/internal/print_box_coords.hh
b/scribo/scribo/io/xml/internal/print_box_coords.hh
index ad84709..8549b47 100644
--- a/scribo/scribo/io/xml/internal/print_box_coords.hh
+++ b/scribo/scribo/io/xml/internal/print_box_coords.hh
@@ -64,20 +64,20 @@ namespace scribo
{
std::string sc = space;
std::string sp = sc + " ";
- ostr << sc << "<coords>" << std::endl
- << sp << "<point x=\"" << b.pmin().col()
+ ostr << sc << "<Coords>" << std::endl
+ << sp << "<Point x=\"" << b.pmin().col()
<< "\" y=\"" << b.pmin().row() <<
"\"/>"
<< std::endl
- << sp << "<point x=\"" << b.pmax().col()
+ << sp << "<Point x=\"" << b.pmax().col()
<< "\" y=\"" << b.pmin().row() <<
"\"/>"
<< std::endl
- << sp << "<point x=\"" << b.pmax().col()
+ << sp << "<Point x=\"" << b.pmax().col()
<< "\" y=\"" << b.pmax().row() <<
"\"/>"
<< std::endl
- << sp << "<point x=\"" << b.pmin().col()
+ << sp << "<Point x=\"" << b.pmin().col()
<< "\" y=\"" << b.pmax().row() <<
"\"/>"
<< std::endl
- << sc << "</coords>" << std::endl;
+ << sc << "</Coords>" << std::endl;
}
diff --git a/scribo/scribo/io/xml/internal/print_page_preambule.hh
b/scribo/scribo/io/xml/internal/print_page_preambule.hh
index 3ee29be..bcb6b33 100644
--- a/scribo/scribo/io/xml/internal/print_page_preambule.hh
+++ b/scribo/scribo/io/xml/internal/print_page_preambule.hh
@@ -30,6 +30,7 @@
///
/// \brief Print PAGE XML format preambule.
+# include <ctime>
# include <fstream>
# include <mln/core/alias/box2d.hh>
# include <scribo/core/document.hh>
@@ -63,27 +64,34 @@ namespace scribo
const document<L>& doc,
bool with_validation)
{
- output << "<?xml version=\"1.0\"?>" <<
std::endl;
+ output << "<?xml version=\"1.0\"
encoding=\"UTF-8\"?>" << std::endl;
if (with_validation)
- output << "<pcGts
xmlns=\"http://schema.primaresearch.org/PAGE/gts/pagecontent/2009-03-1…
"
+ output << "<PcGts
xmlns=\"http://schema.primaresearch.org/PAGE/gts/pagecontent/2010-03-1…
"
<< "xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"
"
- <<
"xsi:schemaLocation=\"http://schema.primaresearch.org/PAGE/gts/pagecontent/2009-03-16
"
- <<
"http://schema.primaresearch.org/PAGE/gts/pagecontent/2009-03-16/pagecontent.xsd\"
"
- << "pcGtsId=\"" << doc.filename() <<
"\">" << std::endl;
+ <<
"xsi:schemaLocation=\"http://schema.primaresearch.org/PAGE/gts/pagecontent/2010-03-19
"
+ <<
"http://schema.primaresearch.org/PAGE/gts/pagecontent/2010-03-19/pagecontent.xsd\">"
+ << std::endl;
else
- output << "<pcGts>" << std::endl;
-
- output << " <pcMetadata>" << std::endl;
- output << " <pcCreator>LRDE</pcCreator>" <<
std::endl;
- output << " <pcCreated/>" << std::endl;
- output << " <pcLastChange/>" << std::endl;
- output << " <pcComments>Generated by Scribo from
Olena.</pcComments>" << std::endl;
- output << " </pcMetadata>" << std::endl;
-
- output << " <page image_filename=\"" << doc.filename()
- << "\" image_width=\"" << doc.width()
- << "\" image_height=\"" << doc.height()
+ output << "<PcGts>" << std::endl;
+
+
+ time_t cur_time = time(NULL);
+ tm * time_struct;
+ time_struct = localtime(&cur_time);
+ char time_info[55];
+ strftime(time_info, 55, "%Y-%m-%dT%H:%M:%S", time_struct);
+
+ output << " <Metadata>" << std::endl;
+ output << " <Creator>LRDE</Creator>" <<
std::endl;
+ output << " <Created>" << time_info <<
"</Created>" << std::endl;
+ output << " <LastChange>" << time_info <<
"</LastChange>" << std::endl;
+ output << " <Comments>Generated by Scribo from
Olena.</Comments>" << std::endl;
+ output << " </Metadata>" << std::endl;
+
+ output << " <Page imageFilename=\"" << doc.filename()
+ << "\" imageWidth=\"" << doc.width()
+ << "\" imageHeight=\"" << doc.height()
<< "\">" << std::endl;
}
--
1.5.6.5