last-svn-commit-868-gc287ea3 Update support for PAGE XML format.

* scribo/core/tag/line.hh: Add operator<<. * scribo/io/xml/internal/compute_text_colour.hh: Update color names. * scribo/io/xml/internal/extended_page_xml_visitor.hh, * scribo/io/xml/internal/full_xml_visitor.hh, * scribo/io/xml/internal/page_xml_visitor.hh, * scribo/io/xml/internal/print_box_coords.hh, * scribo/io/xml/internal/print_page_preambule.hh: Update in order to produce PAGE compatible XML files. --- scribo/ChangeLog | 16 ++++ scribo/scribo/core/tag/line.hh | 13 +++- .../scribo/io/xml/internal/compute_text_colour.hh | 32 ++++---- .../io/xml/internal/extended_page_xml_visitor.hh | 4 +- scribo/scribo/io/xml/internal/full_xml_visitor.hh | 4 +- scribo/scribo/io/xml/internal/page_xml_visitor.hh | 77 +++++++++++++------- scribo/scribo/io/xml/internal/print_box_coords.hh | 12 ++-- .../scribo/io/xml/internal/print_page_preambule.hh | 42 +++++++----- 8 files changed, 129 insertions(+), 71 deletions(-) diff --git a/scribo/ChangeLog b/scribo/ChangeLog index 08642d5..fedc5ec 100644 --- a/scribo/ChangeLog +++ b/scribo/ChangeLog @@ -1,3 +1,19 @@ +2011-05-12 Guillaume Lazzara <lazzara@fidji.lrde.epita.fr> + + Update support for PAGE XML format. + + * scribo/core/tag/line.hh: Add operator<<. + + * scribo/io/xml/internal/compute_text_colour.hh: Update color + names. + + * scribo/io/xml/internal/extended_page_xml_visitor.hh, + * scribo/io/xml/internal/full_xml_visitor.hh, + * scribo/io/xml/internal/page_xml_visitor.hh, + * scribo/io/xml/internal/print_box_coords.hh, + * scribo/io/xml/internal/print_page_preambule.hh: Update in order + to produce PAGE compatible XML files. + 2011-05-11 Guillaume Lazzara <lazzara@fidji.lrde.epita.fr> Small fixes in Scribo. diff --git a/scribo/scribo/core/tag/line.hh b/scribo/scribo/core/tag/line.hh index 3fb1fdf..12b3246 100644 --- a/scribo/scribo/core/tag/line.hh +++ b/scribo/scribo/core/tag/line.hh @@ -158,8 +158,7 @@ namespace scribo inline - std::ostream& - operator<<(std::ostream& ostr, const Type& type) + std::string type2str(const Type& type) { std::string str; switch(type) @@ -205,9 +204,17 @@ namespace scribo break; } - return ostr << str; + return str; + } + + inline + std::ostream& + operator<<(std::ostream& ostr, const Type& type) + { + return ostr << type2str(type); } + inline Type str2type(const std::string& str) { diff --git a/scribo/scribo/io/xml/internal/compute_text_colour.hh b/scribo/scribo/io/xml/internal/compute_text_colour.hh index 1caf358..86fe454 100644 --- a/scribo/scribo/io/xml/internal/compute_text_colour.hh +++ b/scribo/scribo/io/xml/internal/compute_text_colour.hh @@ -48,7 +48,7 @@ namespace scribo { const char * - compute_txt_text_colour(const value::rgb8& v); + compute_text_colour(const value::rgb8& v); struct color_t @@ -63,23 +63,23 @@ namespace scribo const char * - compute_txt_text_colour(const value::rgb8& v) + compute_text_colour(const value::rgb8& v) { static color_t colors[] = { - { mln::make::vec(0, 0, 0), "Black", 0 }, - { mln::make::vec(255, 0, 0), "Red", 0 }, - { mln::make::vec(255, 255, 255), "White", 0 }, - { mln::make::vec(0, 255, 0), "Green", 0 }, - { mln::make::vec(0, 0, 255), "Blue", 0 }, - { mln::make::vec(255, 255, 0), "Yellow", 0 }, - { mln::make::vec(255, 165, 0), "Orange", 0 }, - { mln::make::vec(255, 192, 203), "Pink", 0 }, - { mln::make::vec(192, 192, 192), "Grey", 0 }, - { mln::make::vec(64, 224, 208), "Turquoise", 0 }, - { mln::make::vec(75, 0, 130), "Indigo", 0 }, - { mln::make::vec(238, 130, 238), "Violet", 0 }, - { mln::make::vec(0, 255, 255), "Cyan", 0 }, - { mln::make::vec(255, 0, 255), "Magenta", 0 }, + { mln::make::vec(0, 0, 0), "black", 0 }, + { mln::make::vec(255, 0, 0), "red", 0 }, + { mln::make::vec(255, 255, 255), "white", 0 }, + { mln::make::vec(0, 255, 0), "green", 0 }, + { mln::make::vec(0, 0, 255), "blue", 0 }, + { mln::make::vec(255, 255, 0), "yellow", 0 }, + { mln::make::vec(255, 165, 0), "orange", 0 }, + { mln::make::vec(255, 192, 203), "pink", 0 }, + { mln::make::vec(192, 192, 192), "grey", 0 }, + { mln::make::vec(64, 224, 208), "turquoise", 0 }, + { mln::make::vec(75, 0, 130), "indigo", 0 }, + { mln::make::vec(238, 130, 238), "violet", 0 }, + { mln::make::vec(0, 255, 255), "cyan", 0 }, + { mln::make::vec(255, 0, 255), "magenta", 0 }, { mln::make::vec(0, 0, 0), 0, 0 } // Invalid }; diff --git a/scribo/scribo/io/xml/internal/extended_page_xml_visitor.hh b/scribo/scribo/io/xml/internal/extended_page_xml_visitor.hh index 869795b..0cdebb5 100644 --- a/scribo/scribo/io/xml/internal/extended_page_xml_visitor.hh +++ b/scribo/scribo/io/xml/internal/extended_page_xml_visitor.hh @@ -267,7 +267,7 @@ namespace scribo << "\" txt_text_type=\"" << lines(fid).type() << "\" txt_reverse_video=\"" << (lines(fid).reverse_video() ? "true" : "false") << "\" txt_indented=\"" << (lines(fid).indented() ? "true" : "false") - << "\" txt_text_colour=\"" << internal::compute_txt_text_colour(parset(p).color()) + << "\" txt_text_colour=\"" << internal::compute_text_colour(parset(p).color()) << "\" kerning=\"" << lines(fid).char_space(); // EXTENSIONS - Not officially supported @@ -321,7 +321,7 @@ namespace scribo << "\" txt_text_type=\"" << line.type() << "\" txt_reverse_video=\"" << (line.reverse_video() ? "true" : "false") << "\" txt_indented=\"" << (line.indented() ? "true" : "false") - << "\" txt_text_colour=\"" << internal::compute_txt_text_colour(line.color()) + << "\" txt_text_colour=\"" << internal::compute_text_colour(line.color()) << "\" kerning=\"" << line.char_space() << "\" baseline=\"" << line.baseline() << "\" meanline=\"" << line.meanline() diff --git a/scribo/scribo/io/xml/internal/full_xml_visitor.hh b/scribo/scribo/io/xml/internal/full_xml_visitor.hh index 6b59e60..a8dfffe 100644 --- a/scribo/scribo/io/xml/internal/full_xml_visitor.hh +++ b/scribo/scribo/io/xml/internal/full_xml_visitor.hh @@ -453,7 +453,7 @@ namespace scribo << "\" txt_text_type=\"" << lines(fid).type() << "\" txt_reverse_video=\"" << (lines(fid).reverse_video() ? "true" : "false") << "\" txt_indented=\"" << (lines(fid).indented() ? "true" : "false") - << "\" txt_text_colour=\"" << internal::compute_txt_text_colour(parset(p).color()) + << "\" txt_text_colour=\"" << internal::compute_text_colour(parset(p).color()) << "\" kerning=\"" << lines(fid).char_space(); // EXTENSIONS - Not officially supported @@ -507,7 +507,7 @@ namespace scribo << "\" txt_text_type=\"" << line.type() << "\" txt_reverse_video=\"" << (line.reverse_video() ? "true" : "false") << "\" txt_indented=\"" << (line.indented() ? "true" : "false") - << "\" txt_text_colour=\"" << internal::compute_txt_text_colour(line.color()) + << "\" txt_text_colour=\"" << internal::compute_text_colour(line.color()) << "\" kerning=\"" << line.char_space() << "\" baseline=\"" << line.baseline() << "\" meanline=\"" << line.meanline() diff --git a/scribo/scribo/io/xml/internal/page_xml_visitor.hh b/scribo/scribo/io/xml/internal/page_xml_visitor.hh index 0014caf..0f3cce1 100644 --- a/scribo/scribo/io/xml/internal/page_xml_visitor.hh +++ b/scribo/scribo/io/xml/internal/page_xml_visitor.hh @@ -37,6 +37,7 @@ # include <scribo/io/xml/internal/print_box_coords.hh> # include <scribo/io/xml/internal/print_page_preambule.hh> +# include <scribo/io/xml/internal/compute_text_colour.hh> namespace scribo @@ -56,12 +57,11 @@ namespace scribo We use a XML Schema part of the PAGE (Page Analysis and Ground truth Elements) image representation framework. - This schema was used in the Page Segmentation COMPetition - (PSCOMP) for ICDAR 2009. + This schema was used in the Historical Document Layout + Analysis COMPetition (HDLAC) for ICDAR 2011. Its XSD file is located here: - http://schema.primaresearch.org/PAGE/gts/pagecontent/2009-03-16/pagecontent.... - + http://schema.primaresearch.org/PAGE/gts/pagecontent/2010-03-19/pagecontent.... */ class page_xml_visitor : public doc_serializer<page_xml_visitor> { @@ -83,6 +83,7 @@ namespace scribo private: // Attributes std::ofstream& output; + mutable int base_vertical_line_id_; }; @@ -104,6 +105,12 @@ namespace scribo void page_xml_visitor::visit(const document<L>& doc) const { + // Make sure there are no duplicate ids for line separators. + // Vertical and horizontal lines are indexed separately from + // 0, so vertical and horizontal lines with the same id + // exist. + base_vertical_line_id_ = doc.hline_seps_comps().nelements(); + // Preambule print_PAGE_preambule(output, doc, true); @@ -121,8 +128,8 @@ namespace scribo if (doc.has_hline_seps()) doc.hline_seps_comps().accept(*this); - output << " </page>" << std::endl; - output << "</pcGts>" << std::endl; + output << " </Page>" << std::endl; + output << "</PcGts>" << std::endl; } @@ -147,16 +154,26 @@ namespace scribo switch (info.type()) { case component::VerticalLineSeparator: + { + output << " <SeparatorRegion id=\"sr" << info.id() + base_vertical_line_id_ + << "\" orientation=\"0.000000\" " + << " colour=\"black\">" << std::endl; + + internal::print_box_coords(output, info.bbox(), " "); + + output << " </SeparatorRegion>" << std::endl; + break; + } + case component::HorizontalLineSeparator: { - output << " <separator_region id=\"sr" << info.id() - << "\" sep_orientation=\"0.000000\" " - << " sep_colour=\"Black\" " - << " sep_bgcolour=\"White\">" << std::endl; + output << " <SeparatorRegion id=\"sr" << info.id() + << "\" orientation=\"0.000000\" " + << " colour=\"black\">" << std::endl; internal::print_box_coords(output, info.bbox(), " "); - output << " </separator_region>" << std::endl; + output << " </SeparatorRegion>" << std::endl; break; } @@ -164,15 +181,15 @@ namespace scribo default: case component::Image: { - output << " <image_region id=\"ir" << info.id() - << "\" img_colour_type=\"24_Bit_Colour\"" - << " img_orientation=\"0.000000\" " - << " img_emb_text=\"No\" " - << " img_bgcolour=\"White\">" << std::endl; + output << " <ImageRegion id=\"ir" << info.id() + << "\" colourDepth=\"colour\"" + << " orientation=\"0.000000\" " + << " embText=\"false\" " + << " bgColour=\"white\">" << std::endl; internal::print_box_coords(output, info.bbox(), " "); - output << " </image_region>" << std::endl; + output << " </ImageRegion>" << std::endl; break; } } @@ -194,20 +211,30 @@ namespace scribo // FIXME: compute that information on the whole paragraph // and use them here. line_id_t fid = line_ids(0); - output << " <text_region id=\"" << p - << "\" txt_orientation=\"" << lines(fid).orientation() - << "\" txt_reading_orientation=\"" << lines(fid).reading_orientation() - << "\" txt_reading_direction=\"" << lines(fid).reading_direction() - << "\" txt_text_type=\"" << lines(fid).type() - << "\" txt_reverse_video=\"" << (lines(fid).reverse_video() ? "true" : "false") - << "\" txt_indented=\"" << (lines(fid).indented() ? "true" : "false") + output << " <TextRegion id=\"r" << p + << "\" orientation=\"" << lines(fid).orientation() + << "\" readingOrientation=\"" << lines(fid).reading_orientation() + << "\" readingDirection=\"" << lines(fid).reading_direction() + << "\" type=\"" << ((lines(fid).type() == line::Text) ? "paragraph" : line::type2str(lines(fid).type())) + << "\" reverseVideo=\"" << (lines(fid).reverse_video() ? "true" : "false") + << "\" indented=\"" << (lines(fid).indented() ? "true" : "false") << "\" kerning=\"" << lines(fid).char_space() + << "\" textColour=\"" << compute_text_colour(lines(fid).color()) +// << "\" bgColour=\"" << compute_text_color(lines(fid).bgcolor()) +// << "\" fontSize=\"" << compute_text_color(lines(fid).x_height()) +// << "\" leading=\"" << compute_text_color(lines(fid).leading()) << "\">" << std::endl; + // Add support for text recognition + // <TextEquiv> + // <PlainText></PlainText> + // <Unicode></Unicode> + // </TextEquiv> + internal::print_box_coords(output, parset(p).bbox(), " "); - output << " </text_region>" << std::endl; + output << " </TextRegion>" << std::endl; } } diff --git a/scribo/scribo/io/xml/internal/print_box_coords.hh b/scribo/scribo/io/xml/internal/print_box_coords.hh index ad84709..8549b47 100644 --- a/scribo/scribo/io/xml/internal/print_box_coords.hh +++ b/scribo/scribo/io/xml/internal/print_box_coords.hh @@ -64,20 +64,20 @@ namespace scribo { std::string sc = space; std::string sp = sc + " "; - ostr << sc << "<coords>" << std::endl - << sp << "<point x=\"" << b.pmin().col() + ostr << sc << "<Coords>" << std::endl + << sp << "<Point x=\"" << b.pmin().col() << "\" y=\"" << b.pmin().row() << "\"/>" << std::endl - << sp << "<point x=\"" << b.pmax().col() + << sp << "<Point x=\"" << b.pmax().col() << "\" y=\"" << b.pmin().row() << "\"/>" << std::endl - << sp << "<point x=\"" << b.pmax().col() + << sp << "<Point x=\"" << b.pmax().col() << "\" y=\"" << b.pmax().row() << "\"/>" << std::endl - << sp << "<point x=\"" << b.pmin().col() + << sp << "<Point x=\"" << b.pmin().col() << "\" y=\"" << b.pmax().row() << "\"/>" << std::endl - << sc << "</coords>" << std::endl; + << sc << "</Coords>" << std::endl; } diff --git a/scribo/scribo/io/xml/internal/print_page_preambule.hh b/scribo/scribo/io/xml/internal/print_page_preambule.hh index 3ee29be..bcb6b33 100644 --- a/scribo/scribo/io/xml/internal/print_page_preambule.hh +++ b/scribo/scribo/io/xml/internal/print_page_preambule.hh @@ -30,6 +30,7 @@ /// /// \brief Print PAGE XML format preambule. +# include <ctime> # include <fstream> # include <mln/core/alias/box2d.hh> # include <scribo/core/document.hh> @@ -63,27 +64,34 @@ namespace scribo const document<L>& doc, bool with_validation) { - output << "<?xml version=\"1.0\"?>" << std::endl; + output << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" << std::endl; if (with_validation) - output << "<pcGts xmlns=\"http://schema.primaresearch.org/PAGE/gts/pagecontent/2009-03-16\" " + output << "<PcGts xmlns=\"http://schema.primaresearch.org/PAGE/gts/pagecontent/2010-03-19\" " << "xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" " - << "xsi:schemaLocation=\"http://schema.primaresearch.org/PAGE/gts/pagecontent/2009-03-16 " - << "http://schema.primaresearch.org/PAGE/gts/pagecontent/2009-03-16/pagecontent.xsd\" " - << "pcGtsId=\"" << doc.filename() << "\">" << std::endl; + << "xsi:schemaLocation=\"http://schema.primaresearch.org/PAGE/gts/pagecontent/2010-03-19 " + << "http://schema.primaresearch.org/PAGE/gts/pagecontent/2010-03-19/pagecontent.xsd\">" + << std::endl; else - output << "<pcGts>" << std::endl; - - output << " <pcMetadata>" << std::endl; - output << " <pcCreator>LRDE</pcCreator>" << std::endl; - output << " <pcCreated/>" << std::endl; - output << " <pcLastChange/>" << std::endl; - output << " <pcComments>Generated by Scribo from Olena.</pcComments>" << std::endl; - output << " </pcMetadata>" << std::endl; - - output << " <page image_filename=\"" << doc.filename() - << "\" image_width=\"" << doc.width() - << "\" image_height=\"" << doc.height() + output << "<PcGts>" << std::endl; + + + time_t cur_time = time(NULL); + tm * time_struct; + time_struct = localtime(&cur_time); + char time_info[55]; + strftime(time_info, 55, "%Y-%m-%dT%H:%M:%S", time_struct); + + output << " <Metadata>" << std::endl; + output << " <Creator>LRDE</Creator>" << std::endl; + output << " <Created>" << time_info << "</Created>" << std::endl; + output << " <LastChange>" << time_info << "</LastChange>" << std::endl; + output << " <Comments>Generated by Scribo from Olena.</Comments>" << std::endl; + output << " </Metadata>" << std::endl; + + output << " <Page imageFilename=\"" << doc.filename() + << "\" imageWidth=\"" << doc.width() + << "\" imageHeight=\"" << doc.height() << "\">" << std::endl; } -- 1.5.6.5
participants (1)
-
Guillaume Lazzara