last-svn-commit-904-gbce6cef BACKUP

--- scribo/scribo/filter/paragraphs_bbox_overlap.hh | 76 ++++++++++++++++------ 1 files changed, 55 insertions(+), 21 deletions(-) diff --git a/scribo/scribo/filter/paragraphs_bbox_overlap.hh b/scribo/scribo/filter/paragraphs_bbox_overlap.hh index aa1c8ac..d40d42f 100644 --- a/scribo/scribo/filter/paragraphs_bbox_overlap.hh +++ b/scribo/scribo/filter/paragraphs_bbox_overlap.hh @@ -41,6 +41,7 @@ # include <scribo/core/paragraph_set.hh> +#include <mln/labeling/colorize.hh> namespace scribo { @@ -59,7 +60,7 @@ namespace scribo /// Paragraph::Ignored. template <typename L> paragraph_set<L> - paragraphs_bbox_overlap(const paragraph_set<L>& paragraphs); + paragraphs_bbox_overlap(const paragraph_set<L>& parset); # ifndef MLN_INCLUDE_ONLY @@ -70,23 +71,23 @@ namespace scribo template <typename L> struct order_paragraphs_id { - order_paragraphs_id(const scribo::paragraph_set<L>& paragraphs) - : paragraphs_(paragraphs) + order_paragraphs_id(const scribo::paragraph_set<L>& parset) + : parset_(parset) { } bool operator()(const scribo::paragraph_id_t& l1, const scribo::paragraph_id_t& l2) const { - const unsigned l1_nsites = paragraphs_(l1).bbox().nsites(); - const unsigned l2_nsites = paragraphs_(l2).bbox().nsites(); + const unsigned l1_nsites = parset_(l1).bbox().nsites(); + const unsigned l2_nsites = parset_(l2).bbox().nsites(); if (l1_nsites == l2_nsites) return l1 > l2; return l1_nsites > l2_nsites; } - scribo::paragraph_set<L> paragraphs_; + scribo::paragraph_set<L> parset_; }; } // end of namespace scribo::filter::internal @@ -94,24 +95,38 @@ namespace scribo template <typename L> paragraph_set<L> - paragraphs_bbox_overlap(const paragraph_set<L>& paragraphs) + paragraphs_bbox_overlap(const paragraph_set<L>& parset) { trace::entering("scribo::filter::paragraphs_bbox_overlap"); - mln_precondition(paragraphs.is_valid()); + mln_precondition(parset.is_valid()); - L billboard; - initialize(billboard, paragraphs.lines().components().labeled_image()); + mln_ch_value(L, paragraph_id_t) billboard; + initialize(billboard, parset.lines().components().labeled_image()); data::fill(billboard, 0); - mln::util::array<bool> not_to_ignore(paragraphs.nelements() + 1, true); + mln::util::array<bool> not_to_ignore(parset.nelements() + 1, true); not_to_ignore(0) = false; - for_all_paragraphs(cur_id, paragraphs) + paragraph_set<L> output = parset.duplicate(); + + mln::util::array<paragraph_id_t> candidate; + candidate.reserve(parset.nelements()); + for_all_paragraphs(cur_id, parset) + if (parset(cur_id).is_valid()) + candidate.append(cur_id); + + std::sort(candidate.hook_std_vector_().begin(), + candidate.hook_std_vector_().end(), + internal::order_paragraphs_id<L>(parset)); + + for_all_elements(e, candidate) { - const box2d& b_ = paragraphs(cur_id).bbox(); + paragraph_id_t cur_id = candidate(e); - if (paragraphs(cur_id).nlines() > 1) + const box2d& b_ = parset(cur_id).bbox(); + + if (parset(cur_id).nlines() > 1) { mln::draw::box_plain(billboard, b_, cur_id); continue; @@ -141,17 +156,33 @@ namespace scribo ++it) if (not_to_ignore(*it)) { - box2d b2 = paragraphs(*it).bbox(); + box2d b2 = output(*it).bbox(); box2d b_i = scribo::util::box_intersection(b_, b2); - // si b_ est inclus dans une boite donc le nombre de comp > 1 => invalid juste b_ - // sinon => invalid b_ et b2 - if ((b_i.nsites() / (float)b_.nsites() > 0.4 - || (b_i.nsites() / (float)b2.nsites()) > 0.9)) + volatile float + b_ratio = b_i.nsites() / (float)b_.nsites(), + b2_ratio = b_i.nsites() / (float)b2.nsites(); + + if (b2_ratio == 1) { + // Merge paragraphs and redraw the new bbox. + output(cur_id).fast_merge(output(*it)); + mln::draw::box_plain(billboard, output(cur_id).bbox(), cur_id); + } + else if (b_ratio == 1) + { + // Merge paragraphs and redraw the new bbox. + output(*it).fast_merge(output(cur_id)); + mln::draw::box_plain(billboard, output(*it).bbox(), *it); + } + else if ((b_ratio > 0.4 || b2_ratio > 0.9)) + { + // si b_ est inclus dans une boite dont le nombre de + // comp > 4 => invalid juste b_ sinon => invalid b_ et + // b2 not_to_ignore(cur_id) = false; - if (paragraphs(*it).nlines() < 4) + if (parset(*it).nlines() < 4) not_to_ignore(*it) = false; } } @@ -159,9 +190,12 @@ namespace scribo mln::draw::box_plain(billboard, b_, cur_id); } - paragraph_set<L> output = paragraphs.duplicate(); output.invalidate(not_to_ignore); + for_all_paragraphs(p, output) + if (output(p).is_valid()) + output(p).force_stats_update(); + trace::exiting("scribo::filter::paragraphs_bbox_overlap"); return output; } -- 1.5.6.5
participants (1)
-
Guillaume Lazzara