olena-2.0-313-g60618b0 Add standard support for OCR output in PAGE format.

* scribo/io/xml/internal/page_xml_visitor.hh: Here. --- scribo/ChangeLog | 6 ++++ scribo/scribo/io/xml/internal/page_xml_visitor.hh | 27 +++++++++++++++----- 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/scribo/ChangeLog b/scribo/ChangeLog index af65bd5..c083e6e 100644 --- a/scribo/ChangeLog +++ b/scribo/ChangeLog @@ -1,5 +1,11 @@ 2013-03-07 Guillaume Lazzara <z@lrde.epita.fr> + Add standard support for OCR output in PAGE format. + + * scribo/io/xml/internal/page_xml_visitor.hh: Here. + +2013-03-07 Guillaume Lazzara <z@lrde.epita.fr> + Fix sauvola_ms test. * tests/binarization/sauvola_ms.cc, diff --git a/scribo/scribo/io/xml/internal/page_xml_visitor.hh b/scribo/scribo/io/xml/internal/page_xml_visitor.hh index 4c15e83..33ec740 100644 --- a/scribo/scribo/io/xml/internal/page_xml_visitor.hh +++ b/scribo/scribo/io/xml/internal/page_xml_visitor.hh @@ -1,4 +1,5 @@ -// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE) +// Copyright (C) 2011, 2013 EPITA Research and Development Laboratory +// (LRDE) // // This file is part of Olena. // @@ -268,14 +269,26 @@ namespace scribo << "\">" << std::endl; - // Add support for text recognition - // <TextEquiv> - // <PlainText></PlainText> - // <Unicode></Unicode> - // </TextEquiv> - + // Save coordinates. internal::print_image_coords(output, par, " "); + // Save text recognition results. + output << "<TextEquiv>" << std::endl + << "<PlainText></PlainText>" << std::endl; + + output << "<Unicode>"; + + // Retrieve and merge text from paragraph lines. + for_all_paragraph_lines(lid, line_ids) + { + line_id_t l = line_ids(lid); + if (lines(l).has_text()) + output << lines(l).html_text() << std::endl; + } + + output << "</Unicode>" << std::endl + << "</TextEquiv>" << std::endl; + output << " </TextRegion>" << std::endl; } } -- 1.7.2.5
participants (1)
-
Guillaume Lazzara