* scribo/io/xml/internal/page_xml_visitor.hh: Here.
---
scribo/ChangeLog | 6 ++++
scribo/scribo/io/xml/internal/page_xml_visitor.hh | 27 +++++++++++++++-----
2 files changed, 26 insertions(+), 7 deletions(-)
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index af65bd5..c083e6e 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,5 +1,11 @@
2013-03-07 Guillaume Lazzara <z(a)lrde.epita.fr>
+ Add standard support for OCR output in PAGE format.
+
+ * scribo/io/xml/internal/page_xml_visitor.hh: Here.
+
+2013-03-07 Guillaume Lazzara <z(a)lrde.epita.fr>
+
Fix sauvola_ms test.
* tests/binarization/sauvola_ms.cc,
diff --git a/scribo/scribo/io/xml/internal/page_xml_visitor.hh
b/scribo/scribo/io/xml/internal/page_xml_visitor.hh
index 4c15e83..33ec740 100644
--- a/scribo/scribo/io/xml/internal/page_xml_visitor.hh
+++ b/scribo/scribo/io/xml/internal/page_xml_visitor.hh
@@ -1,4 +1,5 @@
-// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+// Copyright (C) 2011, 2013 EPITA Research and Development Laboratory
+// (LRDE)
//
// This file is part of Olena.
//
@@ -268,14 +269,26 @@ namespace scribo
<< "\">"
<< std::endl;
- // Add support for text recognition
- // <TextEquiv>
- // <PlainText></PlainText>
- // <Unicode></Unicode>
- // </TextEquiv>
-
+ // Save coordinates.
internal::print_image_coords(output, par, " ");
+ // Save text recognition results.
+ output << "<TextEquiv>" << std::endl
+ << "<PlainText></PlainText>" << std::endl;
+
+ output << "<Unicode>";
+
+ // Retrieve and merge text from paragraph lines.
+ for_all_paragraph_lines(lid, line_ids)
+ {
+ line_id_t l = line_ids(lid);
+ if (lines(l).has_text())
+ output << lines(l).html_text() << std::endl;
+ }
+
+ output << "</Unicode>" << std::endl
+ << "</TextEquiv>" << std::endl;
+
output << " </TextRegion>" << std::endl;
}
}
--
1.7.2.5