last-svn-commit-916-g21c1887 Small fixes.

* scribo/io/xml/internal/page_xml_visitor.hh: Fix duplicate ids. * scribo/toolchain/internal/content_in_hdoc_functor.hh: Remove debug and disable OCR. * scribo/util/component_precise_outline.hh: Remove debug. --- scribo/ChangeLog | 11 +++++++ scribo/scribo/io/xml/internal/page_xml_visitor.hh | 15 ++++++++- .../toolchain/internal/content_in_hdoc_functor.hh | 32 +++++++++++-------- scribo/scribo/util/component_precise_outline.hh | 4 -- 4 files changed, 42 insertions(+), 20 deletions(-) diff --git a/scribo/ChangeLog b/scribo/ChangeLog index 59e3e84..c4768a4 100644 --- a/scribo/ChangeLog +++ b/scribo/ChangeLog @@ -1,3 +1,14 @@ +2011-06-07 Guillaume Lazzara <z@lrde.epita.fr> + + Small fixes. + + * scribo/io/xml/internal/page_xml_visitor.hh: Fix duplicate ids. + + * scribo/toolchain/internal/content_in_hdoc_functor.hh: Remove + debug and disable OCR. + + * scribo/util/component_precise_outline.hh: Remove debug. + 2011-06-01 Guillaume Lazzara <z@lrde.epita.fr> Fix use of uninitialized values. diff --git a/scribo/scribo/io/xml/internal/page_xml_visitor.hh b/scribo/scribo/io/xml/internal/page_xml_visitor.hh index 8373b02..dab1cce 100644 --- a/scribo/scribo/io/xml/internal/page_xml_visitor.hh +++ b/scribo/scribo/io/xml/internal/page_xml_visitor.hh @@ -85,6 +85,7 @@ namespace scribo private: // Attributes std::ofstream& output; mutable int base_vertical_line_id_; + mutable int base_text_id_; mutable L lbl_; }; @@ -113,13 +114,23 @@ namespace scribo // 0, so vertical and horizontal lines with the same id // exist. base_vertical_line_id_ = doc.hline_seps_comps().nelements(); + base_text_id_ = 0; // Preambule print_PAGE_preambule(output, doc, true); // Text if (doc.has_text()) + { + + // FIXME: counting the number of valid lines... + for_all_paragraphs(p, doc.paragraphs()) + if (doc.paragraphs()(p).is_valid()) + ++base_text_id_; + --base_text_id_; + doc.paragraphs().accept(*this); + } // Page elements (Pictures, ...) if (doc.has_elements()) @@ -191,8 +202,8 @@ namespace scribo case component::DropCapital: { - output << " <TextRegion id=\"r" << id << "\" " - << " Type=\"Drop_Capital\">" + output << " <TextRegion id=\"r" << base_text_id_ + id << "\" " + << " type=\"drop-capital\">" // FIXME: should not be inline here! << std::endl; internal::print_image_coords(output, par, " "); diff --git a/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh b/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh index 24d24a3..adfcdb3 100644 --- a/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh +++ b/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh @@ -70,7 +70,10 @@ # include <scribo/postprocessing/images_to_drop_capital.hh> +# ifndef SCRIBO_NOCR # include <scribo/text/recognition.hh> +# endif // ! SCRIBO_NOCR + # include <scribo/text/merging.hh> # include <scribo/text/link_lines.hh> # include <scribo/text/paragraphs.hh> @@ -505,6 +508,8 @@ namespace scribo # endif // ! SCRIBO_NDEBUG //===== END OF DEBUG ===== + +# ifndef SCRIBO_NOCR // Text recognition if (enable_ocr) { @@ -514,6 +519,7 @@ namespace scribo on_progress(); } +# endif // ! SCRIBO_NOCR on_new_progress_label("Extracting paragraphs"); @@ -524,11 +530,11 @@ namespace scribo on_new_progress_label("Filtering paragraphs"); - paragraph_set<L> parset_f = filter::paragraphs_bbox_overlap(parset); - doc.set_paragraphs(parset_f); + // paragraph_set<L> parset_f = filter::paragraphs_bbox_overlap(parset); + // doc.set_paragraphs(parset_f); - // parset = filter::paragraphs_bbox_overlap(parset); - // doc.set_paragraphs(parset); + parset = filter::paragraphs_bbox_overlap(parset); + doc.set_paragraphs(parset); on_progress(); @@ -557,14 +563,14 @@ namespace scribo on_progress(); -// TEMPORARY DEBUG - on_new_progress_label("Saving debug data"); - doc.set_paragraphs(parset); - scribo::io::img::save(doc, "debug_wo_filter.png", scribo::io::img::DebugWoImage); - scribo::io::img::save(doc, "full_wo_filter.png", scribo::io::img::DebugWithImage); - doc.set_paragraphs(parset_f); - on_progress(); -// END OF TEMPORARY DEBUG +// // TEMPORARY DEBUG +// on_new_progress_label("Saving debug data"); +// doc.set_paragraphs(parset); +// scribo::io::img::save(doc, "debug_wo_filter.png", scribo::io::img::DebugWoImage); +// scribo::io::img::save(doc, "full_wo_filter.png", scribo::io::img::DebugWithImage); +// doc.set_paragraphs(parset_f); +// on_progress(); +// // END OF TEMPORARY DEBUG on_new_progress_label("Cleanup miscellaneous false positive"); @@ -604,8 +610,6 @@ namespace scribo on_end(); - sleep(10); - return doc; } diff --git a/scribo/scribo/util/component_precise_outline.hh b/scribo/scribo/util/component_precise_outline.hh index 70fc995..175160c 100644 --- a/scribo/scribo/util/component_precise_outline.hh +++ b/scribo/scribo/util/component_precise_outline.hh @@ -386,13 +386,9 @@ namespace scribo } } - std::cout << "Before filter points - " << points.nsites() << std::endl; - mln::p_array<P> waypoints; internal::filter_points(points, waypoints); - std::cout << "After filter points - " << waypoints.nsites() << std::endl; - trace::exiting("scribo::util::component_precise_outline"); return waypoints; } -- 1.5.6.5
participants (1)
-
Guillaume Lazzara