
--- scribo/sandbox/icdar_13_table/Makefile | 2 +- .../competition-entry-region-model.xsd | 45 +++++++++++++++++++ scribo/sandbox/icdar_13_table/src/new.cc | 5 +-- scribo/sandbox/icdar_13_table/src/xml.cc | 47 +++++++++++++------ scribo/sandbox/icdar_13_table/src/xml.hh | 9 +++- 5 files changed, 86 insertions(+), 22 deletions(-) create mode 100644 scribo/sandbox/icdar_13_table/competition-entry-region-model.xsd diff --git a/scribo/sandbox/icdar_13_table/Makefile b/scribo/sandbox/icdar_13_table/Makefile index 8d0cd1a..7c11bf1 100644 --- a/scribo/sandbox/icdar_13_table/Makefile +++ b/scribo/sandbox/icdar_13_table/Makefile @@ -1,6 +1,6 @@ CCACHE=ccache CC=g++ -CFLAGS=-Wall -Werror -O3 -DHAVE_TESSERACT_3 -DNDEBUG +CFLAGS=-Wall -Werror -O3 -DHAVE_TESSERACT_3 -DNDEBUG -DMLN_WO_GLOBAL_VARS CLIBS=-I../../../milena/ -I../../ -I/usr/include/poppler CLEAN=*.o output/* log final.xml diff --git a/scribo/sandbox/icdar_13_table/competition-entry-region-model.xsd b/scribo/sandbox/icdar_13_table/competition-entry-region-model.xsd new file mode 100644 index 0000000..2e0d0d4 --- /dev/null +++ b/scribo/sandbox/icdar_13_table/competition-entry-region-model.xsd @@ -0,0 +1,45 @@ +<?xml version="1.0" encoding="utf-8"?> +<xsd:schema attributeFormDefault="unqualified" elementFormDefault="qualified" version="1.0" + xmlns:xsd="http://www.w3.org/2001/XMLSchema" > + <xsd:element name="document"> + <xsd:complexType> + <xsd:sequence> + <xsd:element minOccurs="0" maxOccurs="unbounded" name="table"> <!-- a document can contain 0 or more tables --> + <xsd:complexType> + <xsd:sequence> + <xsd:element name="region" maxOccurs="unbounded" nillable="false"> <!-- each table must contain 1 or more regions --> + <xsd:complexType> + <xsd:sequence> + <xsd:element name="instruction" minOccurs="0" maxOccurs="unbounded"> <!-- the instructions are optional --> + <xsd:complexType> + <xsd:attribute name="instr-id" type="xsd:integer" use="required"/> + <xsd:attribute name="subinstr-id" type="xsd:integer"/> + <!--<xsd:attribute name="text" type="xsd:string" use="required"/> + <xsd:attribute name="x1" type="xsd:integer" use="required"/> + <xsd:attribute name="y1" type="xsd:integer" use="required"/> + <xsd:attribute name="x2" type="xsd:integer" use="required"/> + <xsd:attribute name="y2" type="xsd:integer" use="required"/>--> + </xsd:complexType> + </xsd:element> + <xsd:element name="bounding-box"> <!-- each region contains one bounding box --> + <xsd:complexType> + <xsd:attribute name="x1" type="xsd:integer" use="required"/> + <xsd:attribute name="y1" type="xsd:integer" use="required"/> + <xsd:attribute name="x2" type="xsd:integer" use="required"/> + <xsd:attribute name="y2" type="xsd:integer" use="required"/> + </xsd:complexType> + </xsd:element> + </xsd:sequence> + <xsd:attribute name="id" type="xsd:nonNegativeInteger" use="required"/> + <xsd:attribute name="page" type="xsd:positiveInteger" use="required"/> + </xsd:complexType> + </xsd:element> + </xsd:sequence> + <xsd:attribute name="id" type="xsd:nonNegativeInteger" use="required"/> + </xsd:complexType> + </xsd:element> + </xsd:sequence> + <xsd:attribute name="filename" type="xsd:string" use="required"/> + </xsd:complexType> + </xsd:element> +</xsd:schema> diff --git a/scribo/sandbox/icdar_13_table/src/new.cc b/scribo/sandbox/icdar_13_table/src/new.cc index 95bf575..9f05030 100644 --- a/scribo/sandbox/icdar_13_table/src/new.cc +++ b/scribo/sandbox/icdar_13_table/src/new.cc @@ -1,3 +1,4 @@ +#undef MLN_WO_GLOBAL_VARS #include "xml.hh" // INCLUDES OLENA @@ -231,7 +232,6 @@ int main(int argc, char** argv) typedef value::label_16 V; typedef image2d<V> L; - //std::ofstream xml; std::ostringstream path; image2d<value::rgb8> original, ima_links, ima_groups, ima_valid; image2d<value::int_u8> filtered; @@ -241,7 +241,6 @@ int main(int argc, char** argv) unsigned dpi = 72; // Loading and binarization - //start_xml(xml, "final.xml", argv[1]); XML* xml = new XML("final.xml", argv[1]); util::array< image2d<value::rgb8> > pdf; @@ -395,8 +394,6 @@ int main(int argc, char** argv) write_image(ima_valid, "valid", page, number, path); } - - //end_xml(xml); delete xml; return 0; diff --git a/scribo/sandbox/icdar_13_table/src/xml.cc b/scribo/sandbox/icdar_13_table/src/xml.cc index 76fed84..86e66e2 100644 --- a/scribo/sandbox/icdar_13_table/src/xml.cc +++ b/scribo/sandbox/icdar_13_table/src/xml.cc @@ -1,31 +1,48 @@ #include "xml.hh" -XML::XML(const char* name, const char* pdf) - : _name(name), _pdf(pdf) +XML::XML(const char* name, + const char* pdf) + : _name(name), _pdf(pdf), _table(0), _region(0), _first_time(true) { _xml.open(_name); _xml << "<?xml version\"1.0\" encoding=\"UTF-8\"?>" << std::endl - << "<document filename='" << _pdf << "'>" << std::endl; + << "<document filename='" << _pdf << "'>" << std::endl; } -XML::~XML() +XML::~XML(void) { _xml << "</document>" << std::endl; _xml.close(); } -void XML::write_table(const point2d& start, const point2d& end) +void XML::table(const point2d& start, + const point2d& end, + const unsigned page, + const bool connect) { - static unsigned table = 0; - static unsigned region = 0; - static unsigned page = 1; + if (_first_time) + { + _xml << "\t<table id='" << _table << "'>" << std::endl; + ++_table; + _first_time = false; + } + else + { + if (!connect) + { + _xml << "\t</table>" << std::endl; + _xml << "\t<table id='" << _table << "'>" << std::endl; + _region = 0; + ++_table; + } + } - _xml << "\t<table id='" << table << "'>" << std::endl - << "\t\t<region id='" << region << "' page='" << page << "'>" << std::endl - << "\t\t<bounding-box x1='" << start[1] << "' y1='" << start[0] << "' " - << "x2='" << end[1] << "' y2='" << end[0] << "'/>" << std::endl - << "\t\t</region>" << std::endl - << "\t</table>" << std::endl; + _xml << "\t\t<region id='" << _region << "' page='" << page + 1 << "'>" << std::endl + << "\t\t\t<bounding-box x1='" << start[1] + << "' y1='" << start[0] + << "' x2='" << end[1] + << "' y2='" << end[0] << "'/>" << std::endl + << "\t\t</region>" << std::endl; - ++table; + ++_region; } diff --git a/scribo/sandbox/icdar_13_table/src/xml.hh b/scribo/sandbox/icdar_13_table/src/xml.hh index 397d585..b4cef29 100644 --- a/scribo/sandbox/icdar_13_table/src/xml.hh +++ b/scribo/sandbox/icdar_13_table/src/xml.hh @@ -1,6 +1,5 @@ #ifndef XML_HH # define XML_HH -# define MLN_WO_GLOBAL_VARS # include <iostream> # include <fstream> @@ -13,12 +12,18 @@ class XML public: XML(const char* name, const char* pdf); ~XML(); - void write_table(const point2d& start, const point2d& end); + void table(const point2d& start, + const point2d& end, + const unsigned page, + const bool connect); private: std::ofstream _xml; const char* _name; const char* _pdf; + unsigned _table; + unsigned _region; + bool _first_time; }; #endif /* !XML_HH */ -- 1.7.2.5