last-svn-commit-680-g7f18abf Backup FineReader XML schema

--- scribo/sandbox/z/FineReader9-schema-v1.xml | 447 ++++++++++++++++++++++++++++ 1 files changed, 447 insertions(+), 0 deletions(-) create mode 100644 scribo/sandbox/z/FineReader9-schema-v1.xml diff --git a/scribo/sandbox/z/FineReader9-schema-v1.xml b/scribo/sandbox/z/FineReader9-schema-v1.xml new file mode 100644 index 0000000..6c8beb2 --- /dev/null +++ b/scribo/sandbox/z/FineReader9-schema-v1.xml @@ -0,0 +1,447 @@ +<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" + targetNamespace="http://www.abbyy.com/FineReader_xml/FineReader9-schema-v1.xml" + xmlns:tns="http://www.abbyy.com/FineReader_xml/FineReader9-schema-v1.xml" + elementFormDefault="qualified"> + <xs:annotation> + <xs:documentation xml:lang="en">Schema for representing OCR results exported from FineReader 9.0 SDK. Copyright 2001-2007 ABBYY, Inc. + </xs:documentation> + </xs:annotation> + + <xs:element name="document"> + <xs:complexType> + <xs:sequence> + <xs:element name="documentData" minOccurs="0" maxOccurs="1"> + <xs:annotation> + <xs:documentation xml:lang="en">Global document data + </xs:documentation> + </xs:annotation> + <xs:complexType> + <xs:sequence> + <xs:element name="fontStyles" minOccurs="0" maxOccurs="1"> + <xs:annotation> + <xs:documentation xml:lang="en">Font formatting styles collection + </xs:documentation> + </xs:annotation> + <xs:complexType> + <xs:sequence> + <xs:element name="fontStyle" minOccurs="0" maxOccurs="unbounded" type="tns:FontStyleType"> + <xs:annotation> + <xs:documentation xml:lang="en">Font formatting style + </xs:documentation> + </xs:annotation> + </xs:element> + </xs:sequence> + </xs:complexType> + </xs:element> + <xs:element name="paragraphStyles" minOccurs="0" maxOccurs="1"> + <xs:annotation> + <xs:documentation xml:lang="en">Paragraph formatting styles collection + </xs:documentation> + </xs:annotation> + <xs:complexType> + <xs:sequence> + <xs:element name="paragraphStyle" minOccurs="0" maxOccurs="unbounded" type="tns:ParagraphStyleType"> + <xs:annotation> + <xs:documentation xml:lang="en">Paragraph formatting style + </xs:documentation> + </xs:annotation> + </xs:element> + </xs:sequence> + </xs:complexType> + </xs:element> + <xs:element name="sections" minOccurs="0" maxOccurs="1"> + <xs:annotation> + <xs:documentation xml:lang="en">Document sections collection + </xs:documentation> + </xs:annotation> + <xs:complexType> + <xs:sequence> + <xs:element name="section" minOccurs="0" maxOccurs="unbounded" type="tns:SectionType"> + <xs:annotation> + <xs:documentation xml:lang="en">Section + </xs:documentation> + </xs:annotation> + </xs:element> + </xs:sequence> + </xs:complexType> + </xs:element> + </xs:sequence> + </xs:complexType> + </xs:element> + <xs:element name="page" minOccurs="0" maxOccurs="unbounded"> + <xs:annotation> + <xs:documentation xml:lang="en">Recognized page + </xs:documentation> + </xs:annotation> + <xs:complexType> + <xs:sequence> + <xs:element name="block" minOccurs="0" maxOccurs="unbounded" type="tns:BlockType"> + <xs:annotation> + <xs:documentation xml:lang="en">Recognized block + </xs:documentation> + </xs:annotation> + </xs:element> + </xs:sequence> + <xs:attribute name="width" type="xs:integer" use="required"/> + <xs:attribute name="height" type="xs:integer" use="required"/> + <xs:attribute name="resolution" type="xs:integer" use="required"/> + <xs:attribute name="originalCoords" type="xs:boolean" use="optional" default="false"> + <xs:annotation> + <xs:documentation xml:lang="en">If true, all coordinates are relative to original image before opening, otherwise they are relative to the opened (deskewed) image</xs:documentation> + </xs:annotation> + </xs:attribute> + </xs:complexType> + </xs:element> + </xs:sequence> + <xs:attribute name="version" type="xs:string" use="required"/> + <xs:attribute name="producer" type="xs:string" use="required"/> + <xs:attribute name="pagesCount" type="xs:integer" use="optional" default="1"/> + <xs:attribute name="mainLanguage" type="xs:string" use="optional"/> + <xs:attribute name="languages" type="xs:string" use="optional"/> + </xs:complexType> + </xs:element> + + <xs:complexType name ="ParagraphStyleType"> + <xs:attribute name="id" type="xs:integer" use="required" /> + <xs:attribute name="name" type="xs:string" use="required" /> + <xs:attribute name="mainFontStyleId" type="xs:integer" use="required" /> + <xs:attribute name="role" use="required"> + <xs:simpleType> + <xs:restriction base="xs:string"> + <xs:enumeration value="text"/> + <xs:enumeration value="tableText"/> + <xs:enumeration value="heading"/> + <xs:enumeration value="tableHeading"/> + <xs:enumeration value="pictureCaption"/> + <xs:enumeration value="tableCaption"/> + <xs:enumeration value="footnote" /> + <xs:enumeration value="endnote" /> + <xs:enumeration value="rt" /> + <xs:enumeration value="garb" /> + <xs:enumeration value="other" /> + </xs:restriction> + </xs:simpleType> + </xs:attribute> + <xs:attribute name="roleLevel" type="xs:integer" use="optional" default="-1" /> + <xs:attribute name="align" type="tns:ParagraphAlignment" use="required" /> + <xs:attribute name="width" type="xs:integer" use="optional" default="-1" /> + <xs:attribute name="before" type="xs:integer" use="optional" default="-1" /> + <xs:attribute name="after" type="xs:integer" use="optional" default="-1" /> + <xs:attribute name="lineSpacing" type="xs:integer" use="optional" default="0" /> + <xs:attribute name="startIndent" type="xs:integer" use="required"/> + </xs:complexType> + + <xs:complexType name="FontStyleType"> + <xs:attribute name="id" type="xs:integer" use="required" /> + <xs:attribute name="name" type="xs:string" use="required" /> + <xs:attribute name="italic" type="xs:boolean" use="optional" default="false" /> + <xs:attribute name="bold" type="xs:boolean" use="optional" default="false"/> + <xs:attribute name="smallcaps" type="xs:boolean" use="optional" default="false"/> + <xs:attribute name="scaling" type="xs:integer" use="optional" default="1000" /> + <xs:attribute name="spacing" type="xs:integer" use="optional" default="0" /> + <xs:attribute name="ff" type="xs:string" use="required"/> + <xs:attribute name="fs" type="xs:integer" use="required"/> +</xs:complexType> + + <xs:complexType name="SectionType"> + <xs:sequence> + <xs:element name="stream" minOccurs="0" maxOccurs="unbounded" type="tns:TextStreamType"> + <xs:annotation> + <xs:documentation xml:lang="en">Text Stream is the sequence of paragraphs and/or blocks + </xs:documentation> + </xs:annotation> + </xs:element> + </xs:sequence> + </xs:complexType> + + <xs:complexType name="TextStreamType"> + <xs:sequence> + <xs:element name="parId" minOccurs="0" maxOccurs="unbounded"> + <xs:annotation> + <xs:documentation xml:lang="en">Id of paragraph or block + </xs:documentation> + </xs:annotation> + <xs:complexType> + <xs:attribute name="id" type="xs:string" use="required" /> + </xs:complexType> + </xs:element> + </xs:sequence> + <xs:attribute name="role" use="optional" default="text"> + <xs:simpleType> + <xs:restriction base="xs:string"> + <xs:enumeration value="undef" /> + <xs:enumeration value="garb" /> + <xs:enumeration value="text" /> + <xs:enumeration value="footnote" /> + <xs:enumeration value="endnote" /> + <xs:enumeration value="incut" /> + <xs:enumeration value="rt" /> + </xs:restriction> + </xs:simpleType> + </xs:attribute> + <xs:attribute name="rtId" type="xs:integer" use="optional" default="0" /> + <xs:attribute name="prevPar" type="xs:string" use="optional" default="" /> + <xs:attribute name="nextPar" type="xs:string" use="optional" default="" /> + </xs:complexType> + + <xs:complexType name="BlockType"> + <xs:sequence> + <xs:element name="region" minOccurs="1" maxOccurs="1"> + <xs:annotation> + <xs:documentation xml:lang="en">Block region, the set of rectangles + </xs:documentation> + </xs:annotation> + <xs:complexType> + <xs:sequence> + <xs:element name="rect" minOccurs="1" maxOccurs="unbounded"> + <xs:complexType> + <xs:attribute name="l" type="xs:integer" use="required"/> + <xs:attribute name="t" type="xs:integer" use="required"/> + <xs:attribute name="r" type="xs:integer" use="required"/> + <xs:attribute name="b" type="xs:integer" use="required"/> + </xs:complexType> + </xs:element> + </xs:sequence> + </xs:complexType> + </xs:element> + <xs:element name="text" minOccurs="0" maxOccurs="1" type="tns:TextType"> + <xs:annotation> + <xs:documentation xml:lang="en">Recognized block text, presents if blockType attribute is Text</xs:documentation> + </xs:annotation> + </xs:element> + <xs:element name="row" minOccurs="0" maxOccurs="unbounded" type="tns:TableRowType"> + <xs:annotation> + <xs:documentation xml:lang="en">The set of table rows, presents if blockType attribute is Table</xs:documentation> + </xs:annotation> + </xs:element> + </xs:sequence> + <xs:attribute name="blockType" use="required"> + <xs:simpleType> + <xs:restriction base="xs:string"> + <xs:enumeration value="Text"/> + <xs:enumeration value="Table"/> + <xs:enumeration value="Picture"/> + <xs:enumeration value="Barcode"/> + </xs:restriction> + </xs:simpleType> + </xs:attribute> + <xs:attribute name="blockName" type="xs:string" use="optional"/> + <xs:attribute name="isHidden" type="xs:boolean" use="optional" default="false"/> + <xs:attribute name="l" type="xs:integer" use="required"/> + <xs:attribute name="t" type="xs:integer" use="required"/> + <xs:attribute name="r" type="xs:integer" use="required"/> + <xs:attribute name="b" type="xs:integer" use="required"/> + </xs:complexType> + + + + <xs:complexType name="TextType"> + <xs:sequence> + <xs:element name="par" minOccurs="0" maxOccurs="unbounded" type="tns:ParagraphType"> + <xs:annotation> + <xs:documentation xml:lang="en">Text paragraph</xs:documentation> + </xs:annotation> + </xs:element> + </xs:sequence> + <xs:attribute name="orientation" use="optional" default="Normal"> + <xs:simpleType> + <xs:restriction base="xs:string"> + <xs:enumeration value="Normal"/> + <xs:enumeration value="RotatedClockwise"/> + <xs:enumeration value="RotatedUpsidedown"/> + <xs:enumeration value="RotatedCounterclockwise"/> + </xs:restriction> + </xs:simpleType> + </xs:attribute> + <xs:attribute name="backgroundColor" type="xs:integer" use="optional" default="16777215"/> + <xs:attribute name="mirrored" type="xs:boolean" use="optional" default="false"/> + <xs:attribute name="inverted" type="xs:boolean" use="optional" default="false"/> + </xs:complexType> + + + <xs:complexType name="TableRowType"> + <xs:sequence> + <xs:element name="cell" minOccurs="0" maxOccurs="unbounded"> + <xs:annotation> + <xs:documentation xml:lang="en">Table cell</xs:documentation> + </xs:annotation> + <xs:complexType> + <xs:sequence> + <xs:element name="text" minOccurs="0" maxOccurs="unbounded" type="tns:TextType"> + <xs:annotation> + <xs:documentation xml:lang="en">Cell text</xs:documentation> + </xs:annotation> + </xs:element> + </xs:sequence> + <xs:attribute name="colSpan" type="xs:integer" use="optional" default="1"/> + <xs:attribute name="rowSpan" type="xs:integer" use="optional" default="1"/> + <xs:attribute name="align" use="optional" default="Top"> + <xs:simpleType> + <xs:restriction base="xs:string"> + <xs:enumeration value="Top"/> + <xs:enumeration value="Center"/> + <xs:enumeration value="Bottom"/> + </xs:restriction> + </xs:simpleType> + </xs:attribute> + <xs:attribute name="picture" type="xs:boolean" use="optional" default="false"/> + <xs:attribute name="leftBorder" use="optional" type="tns:TableCellBorderType" default="Black"/> + <xs:attribute name="topBorder" use="optional" type="tns:TableCellBorderType" default="Black"/> + <xs:attribute name="rightBorder" use="optional" type="tns:TableCellBorderType" default="Black"/> + <xs:attribute name="bottomBorder" use="optional" type="tns:TableCellBorderType" default="Black"/> + <xs:attribute name="width" type="xs:integer" use="required"/> + <xs:attribute name="height" type="xs:integer" use="required"/> + </xs:complexType> + </xs:element> + </xs:sequence> + </xs:complexType> + + + <xs:complexType name="ParagraphType"> + <xs:sequence> + <xs:element name="line" minOccurs="0" maxOccurs="unbounded" type="tns:LineType"> + <xs:annotation> + <xs:documentation xml:lang="en">Text paragraph line</xs:documentation> + </xs:annotation> + </xs:element> + </xs:sequence> + <xs:attribute name="dropCapCharsCount" type="xs:integer" use="optional" default="0"/> + <xs:attribute name="dropCap-l" type="xs:integer" use="optional"/> + <xs:attribute name="dropCap-t" type="xs:integer" use="optional"/> + <xs:attribute name="dropCap-r" type="xs:integer" use="optional"/> + <xs:attribute name="dropCap-b" type="xs:integer" use="optional"/> + <xs:attribute name="align" type="tns:ParagraphAlignment" use="optional" default="Left" /> + <xs:attribute name="leftIndent" type="xs:integer" use="optional" default="0"/> + <xs:attribute name="rightIndent" type="xs:integer" use="optional" default="0"/> + <xs:attribute name="startIndent" type="xs:integer" use="optional" default="0"/> + <xs:attribute name="lineSpacing" type="xs:integer" use="optional" default="0"/> + <xs:attribute name="id" type="xs:string" use="optional" /> + <xs:attribute name="style" type="xs:integer" use="optional" /> + </xs:complexType> + +<xs:simpleType name="ParagraphAlignment"> + <xs:restriction base="xs:string"> + <xs:enumeration value="Left"/> + <xs:enumeration value="Center"/> + <xs:enumeration value="Right"/> + <xs:enumeration value="Justified"/> + </xs:restriction> +</xs:simpleType> + + <xs:complexType name="LineType"> + <xs:sequence> + <xs:element name="formatting" minOccurs="0" maxOccurs="unbounded" type="tns:FormattingType"> + <xs:annotation> + <xs:documentation xml:lang="en">Group of characters with uniform formatting</xs:documentation> + </xs:annotation> + </xs:element> + </xs:sequence> + <xs:attribute name="baseline" type="xs:integer" use="required"/> + <xs:attribute name="l" type="xs:integer" use="required"/> + <xs:attribute name="t" type="xs:integer" use="required"/> + <xs:attribute name="r" type="xs:integer" use="required"/> + <xs:attribute name="b" type="xs:integer" use="required"/> + </xs:complexType> + + <xs:complexType name="FormattingType" mixed="true"> + <xs:sequence> + <xs:choice minOccurs="0" maxOccurs="unbounded"> + <xs:annotation> + <xs:documentation xml:lang="en">Attributes of characters are alternated with word's recognition variants. The variants of recognition of the word are written before the word</xs:documentation> + </xs:annotation> + <xs:element name="charParams" type="tns:CharParamsType"> + <xs:annotation> + <xs:documentation xml:lang="en">Attributes of single character</xs:documentation> + </xs:annotation> + </xs:element> + <xs:element name="wordRecVariants"> + <xs:annotation> + <xs:documentation xml:lang="en">Variants of recognition of the next word</xs:documentation> + </xs:annotation> + <xs:complexType> + <xs:sequence> + <xs:element name="wordRecVariant" minOccurs="0" maxOccurs="unbounded" type="tns:WordRecognitionVariant"/> + </xs:sequence> + </xs:complexType> + </xs:element> + </xs:choice> + </xs:sequence> + <xs:attribute name="lang" type="xs:string" use="required"/> + <xs:attribute name="ff" type="xs:string" use="optional"/> + <xs:attribute name="fs" type="xs:float" use="optional"/> + <xs:attribute name="bold" type="xs:boolean" use="optional" default="false"/> + <xs:attribute name="italic" type="xs:boolean" use="optional" default="false"/> + <xs:attribute name="subscript" type="xs:boolean" use="optional" default="false"/> + <xs:attribute name="superscript" type="xs:boolean" use="optional" default="false"/> + <xs:attribute name="smallcaps" type="xs:boolean" use="optional" default="false"/> + <xs:attribute name="underline" type="xs:boolean" use="optional" default="false"/> + <xs:attribute name="strikeout" type="xs:boolean" use="optional" default="false"/> + <xs:attribute name="color" type="xs:integer" use="optional" default="0"/> + <xs:attribute name="scaling" type="xs:integer" use="optional" default="1000"/> + <xs:attribute name="spacing" type="xs:integer" use="optional" default="0"/> + </xs:complexType> + + <xs:complexType name="WordRecognitionVariant"> + <xs:sequence> + <xs:element name="variantText" minOccurs="1" maxOccurs="1"> + <xs:complexType mixed="true"> + <xs:sequence> + <xs:element name="charParams" minOccurs="0" maxOccurs="unbounded" type="tns:CharParamsType"/> + </xs:sequence> + </xs:complexType> + </xs:element> + </xs:sequence> + <xs:attribute name="wordFromDictionary" type="xs:boolean" use="optional"/> + <xs:attribute name="wordNormal" type="xs:boolean" use="optional"/> + <xs:attribute name="wordNumeric" type="xs:boolean" use="optional"/> + <xs:attribute name="wordIdentifier" type="xs:boolean" use="optional"/> + <xs:attribute name="wordPenalty" type="xs:integer" use="optional"/> + <xs:attribute name="meanStrokeWidth" type="xs:integer" use="optional"/> + </xs:complexType> + + <xs:complexType name="CharRecognitionVariant" mixed="true"> + <xs:attribute name="charConfidence" type="xs:integer" use="optional"/> + <xs:attribute name="serifProbability" type="xs:integer" use="optional"/> + </xs:complexType> + + <xs:complexType name="CharParamsType" mixed="true"> + <xs:sequence> + <xs:element name="charRecVariants" minOccurs="0"> + <xs:complexType> + <xs:sequence> + <xs:element name="charRecVariant" minOccurs="0" maxOccurs="unbounded" type="tns:CharRecognitionVariant"/> + </xs:sequence> + </xs:complexType> + </xs:element> + </xs:sequence> + <xs:attribute name="l" type="xs:integer" use="required"/> + <xs:attribute name="t" type="xs:integer" use="required"/> + <xs:attribute name="r" type="xs:integer" use="required"/> + <xs:attribute name="b" type="xs:integer" use="required"/> + <xs:attribute name="suspicious" type="xs:boolean" use="optional" default="false"/> + <xs:attribute name="proofed" type="xs:boolean" use="optional" default="false"/> + <xs:attribute name="wordStart" type="xs:boolean" use="optional"/> + <xs:attribute name="wordFromDictionary" type="xs:boolean" use="optional"/> + <xs:attribute name="wordNormal" type="xs:boolean" use="optional"/> + <xs:attribute name="wordNumeric" type="xs:boolean" use="optional"/> + <xs:attribute name="wordIdentifier" type="xs:boolean" use="optional"/> + <xs:attribute name="charConfidence" type="xs:integer" use="optional"/> + <xs:attribute name="serifProbability" type="xs:integer" use="optional"/> + <xs:attribute name="wordPenalty" type="xs:integer" use="optional"/> + <xs:attribute name="meanStrokeWidth" type="xs:integer" use="optional"/> + <xs:attribute name="characterHeight" type="xs:integer" use="optional"/> + <xs:attribute name="hasUncertainHeight" type="xs:boolean" use="optional"/> + <xs:attribute name="baseLine" type="xs:integer" use="optional"/> + </xs:complexType> + + <xs:simpleType name="TableCellBorderType"> + <xs:restriction base="xs:string"> + <xs:enumeration value="Absent"/> + <xs:enumeration value="Unknown"/> + <xs:enumeration value="White"/> + <xs:enumeration value="Black"/> + </xs:restriction> + </xs:simpleType> + + +</xs:schema> -- 1.5.6.5
participants (1)
-
Guillaume Lazzara