
* demo/xml2doc/image_crop.cc, * demo/xml2doc/templates/pdf/regions.xsl: Handle new XML tags. * scribo/core/document.hh, * scribo/core/tag/component.hh, * scribo/io/img/internal/debug_img_visitor.hh, * scribo/io/img/internal/full_img_visitor.hh, * scribo/io/img/internal/non_text_img_visitor.hh, * scribo/io/xml/internal/extended_page_xml_visitor.hh, * scribo/io/xml/internal/full_xml_visitor.hh, * scribo/io/xml/internal/page_xml_visitor.hh: Update enum declaration and handle new cases. --- scribo/ChangeLog | 18 ++++++++ scribo/demo/xml2doc/image_crop.cc | 7 ++- scribo/demo/xml2doc/templates/pdf/regions.xsl | 3 +- scribo/scribo/core/document.hh | 4 +- scribo/scribo/core/tag/component.hh | 16 +++++-- scribo/scribo/io/img/internal/debug_img_visitor.hh | 3 +- scribo/scribo/io/img/internal/full_img_visitor.hh | 3 +- .../scribo/io/img/internal/non_text_img_visitor.hh | 3 +- .../io/xml/internal/extended_page_xml_visitor.hh | 24 +++++++++- scribo/scribo/io/xml/internal/full_xml_visitor.hh | 45 +++++++++++++++++--- scribo/scribo/io/xml/internal/page_xml_visitor.hh | 3 +- 11 files changed, 105 insertions(+), 24 deletions(-) diff --git a/scribo/ChangeLog b/scribo/ChangeLog index efc42cf..e857752 100644 --- a/scribo/ChangeLog +++ b/scribo/ChangeLog @@ -1,5 +1,23 @@ 2011-05-05 Guillaume Lazzara <lazzara@fidji.lrde.epita.fr> + Explicitly set vertical or horizontal attribute for separators in + XML output. + + * demo/xml2doc/image_crop.cc, + * demo/xml2doc/templates/pdf/regions.xsl: Handle new XML tags. + + * scribo/core/document.hh, + * scribo/core/tag/component.hh, + * scribo/io/img/internal/debug_img_visitor.hh, + * scribo/io/img/internal/full_img_visitor.hh, + * scribo/io/img/internal/non_text_img_visitor.hh, + * scribo/io/xml/internal/extended_page_xml_visitor.hh, + * scribo/io/xml/internal/full_xml_visitor.hh, + * scribo/io/xml/internal/page_xml_visitor.hh: Update enum + declaration and handle new cases. + +2011-05-05 Guillaume Lazzara <lazzara@fidji.lrde.epita.fr> + Make use of opacity in PDF output. * demo/xml2doc/image_crop.cc: Create opacity mask. diff --git a/scribo/demo/xml2doc/image_crop.cc b/scribo/demo/xml2doc/image_crop.cc index 7d61f75..72fd2b4 100644 --- a/scribo/demo/xml2doc/image_crop.cc +++ b/scribo/demo/xml2doc/image_crop.cc @@ -101,7 +101,7 @@ void ImageCrop::from_base64() child = child.firstChild(); while (!child.isNull()) { - if (child.toElement().tagName().contains(QRegExp("(image|graphic|chart|separator|table)_region"))) + if (child.toElement().tagName().contains(QRegExp("(image|graphic|chart|horizontal_separator|vertical_separator|table)_region"))) { QDomNode node = child.firstChild(); QString id = child.toElement().attribute("id", "none"); @@ -208,7 +208,7 @@ void ImageCrop::to_base64(const QString& out_file, bool no_crop) while(!line.contains("</pcGts>")) { stream2 << "\n" << line; - if (line.contains(QRegExp("<(image|graphic|chart|separator|table)_region"))) + if (line.contains(QRegExp("<(image|graphic|chart|horizontal_separator|vertical_separator|table)_region"))) { stream2 << "\n" << " <container>\n"; stream2 << " <mime>png</mime>\n"; @@ -268,7 +268,7 @@ bool ImageCrop::crop_regions(bool temp) while (!region.isNull()) { - if (region.toElement().tagName().contains(QRegExp("(image|graphic|chart|separator|table)_region"))) + if (region.toElement().tagName().contains(QRegExp("(image|graphic|chart|vertical_separator|horizontal_separator|table)_region"))) { found_regions = true; @@ -338,6 +338,7 @@ bool ImageCrop::crop_regions(bool temp) else io::magick::save(crop, opacity_mask, QString(output_dir_ + id + ".png").toStdString()); } + region = region.nextSibling(); } diff --git a/scribo/demo/xml2doc/templates/pdf/regions.xsl b/scribo/demo/xml2doc/templates/pdf/regions.xsl index add0cba..f236a1f 100644 --- a/scribo/demo/xml2doc/templates/pdf/regions.xsl +++ b/scribo/demo/xml2doc/templates/pdf/regions.xsl @@ -8,7 +8,8 @@ pcGts/page/graphic_region| pcGts/page/chart_region| pcGts/page/table_region| - pcGts/page/separator_region"> + pcGts/page/vertical_separator_region| + pcGts/page/horizontal_separator_region"> <!-- ID of the region, used to display id.png --> <xsl:variable name="data"> diff --git a/scribo/scribo/core/document.hh b/scribo/scribo/core/document.hh index e287c1d..0fe2be3 100644 --- a/scribo/scribo/core/document.hh +++ b/scribo/scribo/core/document.hh @@ -343,7 +343,7 @@ namespace scribo mln_value(L) ncomps; hline_seps_comps_ = primitive::extract::components(hline_seps, mln::c8(), ncomps, - component::LineSeparator); + component::HorizontalLineSeparator); } @@ -390,7 +390,7 @@ namespace scribo mln_value(L) ncomps; vline_seps_comps_ = primitive::extract::components(vline_seps, mln::c8(), ncomps, - component::LineSeparator); + component::VerticalLineSeparator); } diff --git a/scribo/scribo/core/tag/component.hh b/scribo/scribo/core/tag/component.hh index 7cd2ede..dc9db90 100644 --- a/scribo/scribo/core/tag/component.hh +++ b/scribo/scribo/core/tag/component.hh @@ -55,7 +55,8 @@ namespace scribo { Undefined = 0, Character, - LineSeparator, + VerticalLineSeparator, + HorizontalLineSeparator, WhitespaceSeparator, Noise, Punctuation, @@ -117,8 +118,11 @@ namespace scribo case Character: str = "Character"; break; - case LineSeparator: - str = "LineSeparator"; + case HorizontalLineSeparator: + str = "HorizontalLineSeparator"; + break; + case VerticalLineSeparator: + str = "VerticalLineSeparator"; break; case WhitespaceSeparator: str = "WhitespaceSeparator"; @@ -143,8 +147,10 @@ namespace scribo { if (str == "Character") return Character; - else if (str == "LineSeparator") - return LineSeparator; + else if (str == "HorizontalLineSeparator") + return HorizontalLineSeparator; + else if (str == "VerticalLineSeparator") + return VerticalLineSeparator; else if (str == "WhitespaceSeparator") return WhitespaceSeparator; else if (str == "Noise") diff --git a/scribo/scribo/io/img/internal/debug_img_visitor.hh b/scribo/scribo/io/img/internal/debug_img_visitor.hh index f1c689d..62097f5 100644 --- a/scribo/scribo/io/img/internal/debug_img_visitor.hh +++ b/scribo/scribo/io/img/internal/debug_img_visitor.hh @@ -179,7 +179,8 @@ namespace scribo { switch (info.type()) { - case component::LineSeparator: + case component::HorizontalLineSeparator: + case component::VerticalLineSeparator: { mln::draw::box(output, compute_bbox(info.bbox()), literal::cyan); diff --git a/scribo/scribo/io/img/internal/full_img_visitor.hh b/scribo/scribo/io/img/internal/full_img_visitor.hh index 30987db..170b6a1 100644 --- a/scribo/scribo/io/img/internal/full_img_visitor.hh +++ b/scribo/scribo/io/img/internal/full_img_visitor.hh @@ -135,7 +135,8 @@ namespace scribo { switch (info.type()) { - case component::LineSeparator: + case component::HorizontalLineSeparator: + case component::VerticalLineSeparator: { mln::draw::box(output, info.bbox(), literal::cyan); } diff --git a/scribo/scribo/io/img/internal/non_text_img_visitor.hh b/scribo/scribo/io/img/internal/non_text_img_visitor.hh index 24b027e..cc1acb9 100644 --- a/scribo/scribo/io/img/internal/non_text_img_visitor.hh +++ b/scribo/scribo/io/img/internal/non_text_img_visitor.hh @@ -132,7 +132,8 @@ namespace scribo { switch (info.type()) { - case component::LineSeparator: + case component::HorizontalLineSeparator: + case component::VerticalLineSeparator: { mln::draw::box(output, info.bbox(), literal::cyan); } diff --git a/scribo/scribo/io/xml/internal/extended_page_xml_visitor.hh b/scribo/scribo/io/xml/internal/extended_page_xml_visitor.hh index eaf08ac..83e5b0b 100644 --- a/scribo/scribo/io/xml/internal/extended_page_xml_visitor.hh +++ b/scribo/scribo/io/xml/internal/extended_page_xml_visitor.hh @@ -179,9 +179,9 @@ namespace scribo break; } - case component::LineSeparator: + case component::VerticalLineSeparator: { - output << " <separator_region id=\"sr" << info.id() + output << " <vertical_separator_region id=\"vlsr" << info.id() << "\" sep_orientation=\"0.000000\" " << " sep_colour=\"Black\" " << " sep_bgcolour=\"White\"" @@ -193,7 +193,25 @@ namespace scribo internal::print_box_coords(output, info.bbox(), " "); - output << " </separator_region>" << std::endl; + output << " </vertical_separator_region>" << std::endl; + break; + } + + case component::HorizontalLineSeparator: + { + output << " <horizontal_separator_region id=\"hlsr" << info.id() + << "\" sep_orientation=\"0.000000\" " + << " sep_colour=\"Black\" " + << " sep_bgcolour=\"White\"" + << " x_min=\"" << info.bbox().pmin().col() << "\"" + << " y_min=\"" << info.bbox().pmin().row() << "\"" + << " x_max=\"" << info.bbox().pmax().col() << "\"" + << " y_max=\"" << info.bbox().pmax().row() << "\"" + << ">" << std::endl; + + internal::print_box_coords(output, info.bbox(), " "); + + output << " </horizontal_separator_region>" << std::endl; break; } diff --git a/scribo/scribo/io/xml/internal/full_xml_visitor.hh b/scribo/scribo/io/xml/internal/full_xml_visitor.hh index c294bbc..b920ca8 100644 --- a/scribo/scribo/io/xml/internal/full_xml_visitor.hh +++ b/scribo/scribo/io/xml/internal/full_xml_visitor.hh @@ -355,7 +355,12 @@ namespace scribo { output << " <whitespace_separator_region id=\"wss" << info.id() - << "\">" << std::endl; + << "\"" + << " x_min=\"" << info.bbox().pmin().col() << "\"" + << " y_min=\"" << info.bbox().pmin().row() << "\"" + << " x_max=\"" << info.bbox().pmax().col() << "\"" + << " y_max=\"" << info.bbox().pmax().row() << "\"" + << ">" << std::endl; internal::print_box_coords(output, info.bbox(), " "); @@ -363,16 +368,39 @@ namespace scribo break; } - case component::LineSeparator: + case component::VerticalLineSeparator: { - output << " <separator_region id=\"sr" << info.id() + output << " <vertical_separator_region id=\"vlsr" << info.id() << "\" sep_orientation=\"0.000000\" " << " sep_colour=\"Black\" " - << " sep_bgcolour=\"White\">" << std::endl; + << " sep_bgcolour=\"White\"" + << " x_min=\"" << info.bbox().pmin().col() << "\"" + << " y_min=\"" << info.bbox().pmin().row() << "\"" + << " x_max=\"" << info.bbox().pmax().col() << "\"" + << " y_max=\"" << info.bbox().pmax().row() << "\"" + << ">" << std::endl; internal::print_box_coords(output, info.bbox(), " "); - output << " </separator_region>" << std::endl; + output << " </vertical_separator_region>" << std::endl; + break; + } + + case component::HorizontalLineSeparator: + { + output << " <horizontal_separator_region id=\"hlsr" << info.id() + << "\" sep_orientation=\"0.000000\" " + << " sep_colour=\"Black\" " + << " sep_bgcolour=\"White\"" + << " x_min=\"" << info.bbox().pmin().col() << "\"" + << " y_min=\"" << info.bbox().pmin().row() << "\"" + << " x_max=\"" << info.bbox().pmax().col() << "\"" + << " y_max=\"" << info.bbox().pmax().row() << "\"" + << ">" << std::endl; + + internal::print_box_coords(output, info.bbox(), " "); + + output << " </horizontal_separator_region>" << std::endl; break; } @@ -383,7 +411,12 @@ namespace scribo << "\" img_colour_type=\"24_Bit_Colour\"" << " img_orientation=\"0.000000\" " << " img_emb_text=\"No\" " - << " img_bgcolour=\"White\">" << std::endl; + << " img_bgcolour=\"White\"" + << " x_min=\"" << info.bbox().pmin().col() << "\"" + << " y_min=\"" << info.bbox().pmin().row() << "\"" + << " x_max=\"" << info.bbox().pmax().col() << "\"" + << " y_max=\"" << info.bbox().pmax().row() << "\"" + << ">" << std::endl; internal::print_box_coords(output, info.bbox(), " "); diff --git a/scribo/scribo/io/xml/internal/page_xml_visitor.hh b/scribo/scribo/io/xml/internal/page_xml_visitor.hh index 19665d6..0014caf 100644 --- a/scribo/scribo/io/xml/internal/page_xml_visitor.hh +++ b/scribo/scribo/io/xml/internal/page_xml_visitor.hh @@ -146,7 +146,8 @@ namespace scribo { switch (info.type()) { - case component::LineSeparator: + case component::VerticalLineSeparator: + case component::HorizontalLineSeparator: { output << " <separator_region id=\"sr" << info.id() << "\" sep_orientation=\"0.000000\" " -- 1.5.6.5