---
scribo/scribo/io/xml/internal/page_xml_visitor.hh | 15 ++++++++-
scribo/scribo/toolchain/content_in_hdoc.hh | 1 +
.../toolchain/internal/content_in_hdoc_functor.hh | 32 +++++++++++--------
scribo/scribo/util/component_precise_outline.hh | 4 --
4 files changed, 32 insertions(+), 20 deletions(-)
diff --git a/scribo/scribo/io/xml/internal/page_xml_visitor.hh
b/scribo/scribo/io/xml/internal/page_xml_visitor.hh
index 8373b02..dab1cce 100644
--- a/scribo/scribo/io/xml/internal/page_xml_visitor.hh
+++ b/scribo/scribo/io/xml/internal/page_xml_visitor.hh
@@ -85,6 +85,7 @@ namespace scribo
private: // Attributes
std::ofstream& output;
mutable int base_vertical_line_id_;
+ mutable int base_text_id_;
mutable L lbl_;
};
@@ -113,13 +114,23 @@ namespace scribo
// 0, so vertical and horizontal lines with the same id
// exist.
base_vertical_line_id_ = doc.hline_seps_comps().nelements();
+ base_text_id_ = 0;
// Preambule
print_PAGE_preambule(output, doc, true);
// Text
if (doc.has_text())
+ {
+
+ // FIXME: counting the number of valid lines...
+ for_all_paragraphs(p, doc.paragraphs())
+ if (doc.paragraphs()(p).is_valid())
+ ++base_text_id_;
+ --base_text_id_;
+
doc.paragraphs().accept(*this);
+ }
// Page elements (Pictures, ...)
if (doc.has_elements())
@@ -191,8 +202,8 @@ namespace scribo
case component::DropCapital:
{
- output << " <TextRegion id=\"r" << id <<
"\" "
- << " Type=\"Drop_Capital\">"
+ output << " <TextRegion id=\"r" << base_text_id_
+ id << "\" "
+ << " type=\"drop-capital\">" // FIXME: should not be
inline here!
<< std::endl;
internal::print_image_coords(output, par, " ");
diff --git a/scribo/scribo/toolchain/content_in_hdoc.hh
b/scribo/scribo/toolchain/content_in_hdoc.hh
index 97233d5..67d8d2a 100644
--- a/scribo/scribo/toolchain/content_in_hdoc.hh
+++ b/scribo/scribo/toolchain/content_in_hdoc.hh
@@ -72,6 +72,7 @@ namespace scribo
f.enable_whitespace_seps = find_whitespace_seps;
f.ocr_language = language;
f.enable_ocr = enable_ocr;
+ f.verbose = false;
document<mln_ch_value(I, def::lbl_type)> doc = f(input, input_preproc);
diff --git a/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh
b/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh
index 24d24a3..adfcdb3 100644
--- a/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh
+++ b/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh
@@ -70,7 +70,10 @@
# include <scribo/postprocessing/images_to_drop_capital.hh>
+# ifndef SCRIBO_NOCR
# include <scribo/text/recognition.hh>
+# endif // ! SCRIBO_NOCR
+
# include <scribo/text/merging.hh>
# include <scribo/text/link_lines.hh>
# include <scribo/text/paragraphs.hh>
@@ -505,6 +508,8 @@ namespace scribo
# endif // ! SCRIBO_NDEBUG
//===== END OF DEBUG =====
+
+# ifndef SCRIBO_NOCR
// Text recognition
if (enable_ocr)
{
@@ -514,6 +519,7 @@ namespace scribo
on_progress();
}
+# endif // ! SCRIBO_NOCR
on_new_progress_label("Extracting paragraphs");
@@ -524,11 +530,11 @@ namespace scribo
on_new_progress_label("Filtering paragraphs");
- paragraph_set<L> parset_f = filter::paragraphs_bbox_overlap(parset);
- doc.set_paragraphs(parset_f);
+ // paragraph_set<L> parset_f = filter::paragraphs_bbox_overlap(parset);
+ // doc.set_paragraphs(parset_f);
- // parset = filter::paragraphs_bbox_overlap(parset);
- // doc.set_paragraphs(parset);
+ parset = filter::paragraphs_bbox_overlap(parset);
+ doc.set_paragraphs(parset);
on_progress();
@@ -557,14 +563,14 @@ namespace scribo
on_progress();
-// TEMPORARY DEBUG
- on_new_progress_label("Saving debug data");
- doc.set_paragraphs(parset);
- scribo::io::img::save(doc, "debug_wo_filter.png",
scribo::io::img::DebugWoImage);
- scribo::io::img::save(doc, "full_wo_filter.png",
scribo::io::img::DebugWithImage);
- doc.set_paragraphs(parset_f);
- on_progress();
-// END OF TEMPORARY DEBUG
+// // TEMPORARY DEBUG
+// on_new_progress_label("Saving debug data");
+// doc.set_paragraphs(parset);
+// scribo::io::img::save(doc, "debug_wo_filter.png",
scribo::io::img::DebugWoImage);
+// scribo::io::img::save(doc, "full_wo_filter.png",
scribo::io::img::DebugWithImage);
+// doc.set_paragraphs(parset_f);
+// on_progress();
+// // END OF TEMPORARY DEBUG
on_new_progress_label("Cleanup miscellaneous false positive");
@@ -604,8 +610,6 @@ namespace scribo
on_end();
- sleep(10);
-
return doc;
}
diff --git a/scribo/scribo/util/component_precise_outline.hh
b/scribo/scribo/util/component_precise_outline.hh
index 70fc995..175160c 100644
--- a/scribo/scribo/util/component_precise_outline.hh
+++ b/scribo/scribo/util/component_precise_outline.hh
@@ -386,13 +386,9 @@ namespace scribo
}
}
- std::cout << "Before filter points - " << points.nsites()
<< std::endl;
-
mln::p_array<P> waypoints;
internal::filter_points(points, waypoints);
- std::cout << "After filter points - " << waypoints.nsites()
<< std::endl;
-
trace::exiting("scribo::util::component_precise_outline");
return waypoints;
}
--
1.5.6.5
Show replies by date