last-svn-commit-906-ge6c5288 Fix a serious memory leak.

* scribo/core/component_info.hh, * scribo/core/component_set.hh, * scribo/core/line_info.hh, * scribo/draw/line_components.hh: Remove holder data. Prevented the containers from being freed. * scribo/io/img/internal/debug_img_visitor.hh, * scribo/io/img/internal/full_img_visitor.hh, * scribo/io/img/save.hh, * scribo/io/xml/internal/page_xml_visitor.hh, * scribo/io/xml/save.hh, * scribo/primitive/extract/lines_h_thick_and_thin.hh, * scribo/primitive/remove/separators.hh, * scribo/text/merging.hh, * scribo/text/paragraphs.hh, * scribo/text/paragraphs_closing.hh, * scribo/toolchain/internal/content_in_hdoc_functor.hh: Update code in order to make holder data useless. --- scribo/ChangeLog | 23 ++++++++++ scribo/scribo/core/component_info.hh | 19 +------- scribo/scribo/core/component_set.hh | 6 +- scribo/scribo/core/line_info.hh | 44 +++++++------------ scribo/scribo/draw/line_components.hh | 7 ++- scribo/scribo/io/img/internal/debug_img_visitor.hh | 37 +++++++++------ scribo/scribo/io/img/internal/full_img_visitor.hh | 34 +++++++++------ scribo/scribo/io/img/save.hh | 6 +- scribo/scribo/io/xml/internal/page_xml_visitor.hh | 28 ++++++------ scribo/scribo/io/xml/save.hh | 2 +- scribo/scribo/text/merging.hh | 46 ++++++++++++------- scribo/scribo/text/paragraphs.hh | 26 ++++++----- scribo/scribo/text/paragraphs_closing.hh | 2 +- 13 files changed, 155 insertions(+), 125 deletions(-) diff --git a/scribo/ChangeLog b/scribo/ChangeLog index 6f44931..cc7cefc 100644 --- a/scribo/ChangeLog +++ b/scribo/ChangeLog @@ -1,3 +1,26 @@ +2011-06-01 Guillaume Lazzara <z@lrde.epita.fr> + + Fix a serious memory leak. + + * scribo/core/component_info.hh, + * scribo/core/component_set.hh, + * scribo/core/line_info.hh, + * scribo/draw/line_components.hh: Remove holder data. Prevented + the containers from being freed. + + * scribo/io/img/internal/debug_img_visitor.hh, + * scribo/io/img/internal/full_img_visitor.hh, + * scribo/io/img/save.hh, + * scribo/io/xml/internal/page_xml_visitor.hh, + * scribo/io/xml/save.hh, + * scribo/primitive/extract/lines_h_thick_and_thin.hh, + * scribo/primitive/remove/separators.hh, + * scribo/text/merging.hh, + * scribo/text/paragraphs.hh, + * scribo/text/paragraphs_closing.hh, + * scribo/toolchain/internal/content_in_hdoc_functor.hh: Update + code in order to make holder data useless. + 2011-05-26 Guillaume Lazzara <z@lrde.epita.fr> * scribo/util/component_precise_outline.hh: Fix namespace diff --git a/scribo/scribo/core/component_info.hh b/scribo/scribo/core/component_info.hh index 2fa2ad1..b42787f 100644 --- a/scribo/scribo/core/component_info.hh +++ b/scribo/scribo/core/component_info.hh @@ -57,8 +57,7 @@ namespace scribo public: component_info(); - component_info(const component_set<L>& holder, - const component_id_t& id, + component_info(const component_id_t& id, const mln::box2d& bbox, const mln::point2d& mass_center, unsigned card, @@ -86,8 +85,6 @@ namespace scribo bool is_valid() const; - const component_set<L>& holder() const; - protected: component_id_t id_; mln::box2d bbox_; @@ -98,8 +95,6 @@ namespace scribo component::Tag tag_; component::Type type_; - - component_set<L> holder_; }; @@ -125,14 +120,13 @@ namespace scribo template <typename L> - component_info<L>::component_info(const component_set<L>& holder, - const component_id_t& id, + component_info<L>::component_info(const component_id_t& id, const mln::box2d& bbox, const mln::point2d& mass_center, unsigned card, component::Type type) : id_(id), bbox_(bbox), mass_center_(mass_center), card_(card), - type_(type), holder_(holder) + type_(type) { if (!bbox.is_valid()) tag_ = component::Ignored; @@ -232,13 +226,6 @@ namespace scribo } - template <typename L> - const component_set<L>& - component_info<L>::holder() const - { - return holder_; - } - template <typename L> std::ostream& diff --git a/scribo/scribo/core/component_set.hh b/scribo/scribo/core/component_set.hh index 4f4cd61..ded64ae 100644 --- a/scribo/scribo/core/component_set.hh +++ b/scribo/scribo/core/component_set.hh @@ -337,7 +337,7 @@ namespace scribo data_->infos_.append(component_info<L>()); // Component 0, i.e. the background. for_all_comp_data(i, attribs) { - component_info<L> info(*this, i, attribs[i].first(), + component_info<L> info(i, attribs[i].first(), attribs[i].second(), attribs[i].second_accu().nsites()); data_->infos_.append(info); } @@ -362,7 +362,7 @@ namespace scribo data_->infos_.append(component_info<L>()); // Component 0, i.e. the background. for_all_comp_data(i, attribs) { - component_info<L> info(*this, i, attribs[i].first(), + component_info<L> info(i, attribs[i].first(), attribs[i].second(), attribs[i].second_accu().nsites(), type); data_->infos_.append(info); @@ -389,7 +389,7 @@ namespace scribo data_->infos_.append(component_info<L>()); // Component 0, i.e. the background. for_all_comp_data(i, attribs) { - component_info<L> info(*this, i, attribs[i].first, + component_info<L> info(i, attribs[i].first, attribs[i].second.first, attribs[i].second.second, type); data_->infos_.append(info); diff --git a/scribo/scribo/core/line_info.hh b/scribo/scribo/core/line_info.hh index 9320416..2913f81 100644 --- a/scribo/scribo/core/line_info.hh +++ b/scribo/scribo/core/line_info.hh @@ -140,13 +140,13 @@ namespace scribo std::string text_; std::string html_text_; - // Line set holding this element. - line_set<L> holder_; - // DEBUG stats< float > meanline_clusters_; stats< float > baseline_clusters_; + component_set<L> components_; + object_links<L> links_; + private: void init_(); }; @@ -282,10 +282,6 @@ namespace scribo /// Force a new computation of statistics. void force_stats_update(); - - /// Returns the line set holding this element. - const line_set<L>& holder() const; - /// Returns the delta used to compute the extended bbox. int delta_of_line() const; @@ -376,7 +372,7 @@ namespace scribo line_info_data<L>::line_info_data(const line_set<L>& holder, const group_info& group) : hidden_(false), tag_(line::None), component_ids_(group.component_ids()), - type_(line::Undefined), holder_(holder) + type_(line::Undefined), components_(holder.components()), links_(holder.links()) { init_(); } @@ -385,7 +381,7 @@ namespace scribo line_info_data<L>::line_info_data(const line_set<L>& holder, const mln::util::array<component_id_t>& component_ids) : hidden_(false), tag_(line::None), component_ids_(component_ids), - type_(line::Undefined), holder_(holder) + type_(line::Undefined), components_(holder.components()), links_(holder.links()) { init_(); } @@ -696,7 +692,7 @@ namespace scribo for_all_elements(i, data_->component_ids_) { unsigned c = data_->component_ids_[i]; - data_->holder_.components_()(c).update_type(type); + data_->components_(c).update_type(type); } } @@ -855,7 +851,7 @@ namespace scribo data_->baseline_ + D, bbox().pmax().col() + delta); - data_->ebbox_.crop_wrt(data_->holder_.components().labeled_image().domain()); + data_->ebbox_.crop_wrt(data_->components_.labeled_image().domain()); } @@ -915,7 +911,7 @@ namespace scribo data_->ebbox_.merge(enlarge(b, d_delta)); } - data_->ebbox_.crop_wrt(data_->holder_.components().labeled_image().domain()); + data_->ebbox_.crop_wrt(data_->components_.labeled_image().domain()); } else // /other/ IS NOT a text line. { @@ -941,7 +937,7 @@ namespace scribo data_->bbox_.merge(other.bbox()); // Make sure the ebbox is included in the image domain. - data_->ebbox_.crop_wrt(data_->holder_.components().labeled_image().domain()); + data_->ebbox_.crop_wrt(data_->components_.labeled_image().domain()); } @@ -975,13 +971,13 @@ namespace scribo // Only for the case of two-character words if (card() == 2) { - const component_set<L>& comp_set = data_->holder_.components(); + const component_set<L>& comp_set = data_->components_; const unsigned c1 = data_->component_ids_(0); const unsigned c2 = data_->component_ids_(1); - if (data_->holder_.components()(c1).type() == component::Punctuation - || data_->holder_.components()(c2).type() == component::Punctuation) + if (data_->components_(c1).type() == component::Punctuation + || data_->components_(c2).type() == component::Punctuation) return false; const mln::box2d& bb1 = comp_set(c1).bbox(); @@ -1026,7 +1022,7 @@ namespace scribo unsigned line_info<L>::get_first_char_height() const { - const component_set<L>& comp_set = data_->holder_.components(); + const component_set<L>& comp_set = data_->components_; const unsigned c1 = data_->components_(0); const mln::box2d& bb1 = comp_set(c1).bbox(); @@ -1120,7 +1116,7 @@ namespace scribo line_info<L>::force_stats_update() { typedef mln_site(L) P; - const component_set<L>& comp_set = data_->holder_.components(); + const component_set<L>& comp_set = data_->components_; // Init. typedef mln::value::int_u<12> median_data_t; @@ -1222,11 +1218,11 @@ namespace scribo // (right link) (left link) // Space between characters. - if (data_->holder_.links()(c) != c) + if (data_->links_(c) != c) { int space = bb.pmin().col() - - comp_set(data_->holder_.links()(c)).bbox().pmax().col() - 1; + - comp_set(data_->links_(c)).bbox().pmax().col() - 1; // -- Ignore overlapped characters. if (space > 0) @@ -1330,14 +1326,6 @@ namespace scribo template <typename L> - const line_set<L>& - line_info<L>::holder() const - { - return data_->holder_; - } - - - template <typename L> std::ostream& operator<<(std::ostream& ostr, const line_info<L>& info) { diff --git a/scribo/scribo/draw/line_components.hh b/scribo/scribo/draw/line_components.hh index 878b2c1..12e7489 100644 --- a/scribo/scribo/draw/line_components.hh +++ b/scribo/scribo/draw/line_components.hh @@ -46,6 +46,7 @@ namespace scribo template <typename L, typename I> void line_components(Image<I>& input_, + const line_set<L>& lines, const line_info<L>& line, const mln_value(I)& value); @@ -56,6 +57,7 @@ namespace scribo template <typename L, typename I> void line_components(Image<I>& input_, + const line_set<L>& lines, const line_info<L>& line, const mln_value(I)& value) { @@ -65,9 +67,8 @@ namespace scribo mln_precondition(input.is_valid()); - const line_set<L>& holder = line.holder(); - const component_set<L>& comp_set = holder.components(); - const L& labeled_image = holder.components().labeled_image(); + const component_set<L>& comp_set = lines.components(); + const L& labeled_image = lines.components().labeled_image(); const mln::util::array<component_id_t>& component_ids = line.component_ids(); diff --git a/scribo/scribo/io/img/internal/debug_img_visitor.hh b/scribo/scribo/io/img/internal/debug_img_visitor.hh index 520a743..cde16ed 100644 --- a/scribo/scribo/io/img/internal/debug_img_visitor.hh +++ b/scribo/scribo/io/img/internal/debug_img_visitor.hh @@ -61,7 +61,8 @@ namespace scribo { - class debug_img_visitor : public doc_serializer<debug_img_visitor> + template <typename L> + class debug_img_visitor : public doc_serializer<debug_img_visitor<L> > { public: // Constructor @@ -69,21 +70,18 @@ namespace scribo unsigned output_ratio); // Visit overloads - template <typename L> void visit(const document<L>& doc) const; - template <typename L> void visit(const component_info<L>& info) const; - template <typename L> void visit(const paragraph_set<L>& parset) const; - template <typename L> void visit(const line_info<L>& line) const; private: // Attributes mln::image2d<value::rgb8>& output; unsigned output_ratio; + mutable L lbl_; private: // Methods box2d compute_bbox(const box2d& b) const; @@ -94,9 +92,9 @@ namespace scribo # ifndef MLN_INCLUDE_ONLY - inline + template <typename L> box2d - debug_img_visitor::compute_bbox(const box2d& b) const + debug_img_visitor<L>::compute_bbox(const box2d& b) const { point2d pmin = b.pmin() / output_ratio, @@ -106,8 +104,8 @@ namespace scribo } - inline - debug_img_visitor::debug_img_visitor(mln::image2d<value::rgb8>& out, + template <typename L> + debug_img_visitor<L>::debug_img_visitor(mln::image2d<value::rgb8>& out, unsigned output_ratio) : output(out), output_ratio(output_ratio) { @@ -119,7 +117,7 @@ namespace scribo // template <typename L> void - debug_img_visitor::visit(const document<L>& doc) const + debug_img_visitor<L>::visit(const document<L>& doc) const { // Text if (doc.has_text()) @@ -129,19 +127,28 @@ namespace scribo if (doc.has_elements()) { for_all_comps(e, doc.elements()) + { + lbl_ = doc.elements().labeled_image(); if (doc.elements()(e).is_valid()) doc.elements()(e).accept(*this); + } } // line seraparators if (doc.has_vline_seps()) + { + lbl_ = doc.vline_seps_comps().labeled_image(); for_all_comps(c, doc.vline_seps_comps()) if (doc.vline_seps_comps()(c).is_valid()) doc.vline_seps_comps()(c).accept(*this); + } if (doc.has_hline_seps()) + { + lbl_ = doc.hline_seps_comps().labeled_image(); for_all_comps(c, doc.hline_seps_comps()) if (doc.hline_seps_comps()(c).is_valid()) doc.hline_seps_comps()(c).accept(*this); + } } @@ -150,13 +157,13 @@ namespace scribo // template <typename L> void - debug_img_visitor::visit(const component_info<L>& info) const + debug_img_visitor<L>::visit(const component_info<L>& info) const { // Getting component outline scribo::def::lbl_type id = (scribo::def::lbl_type)info.id().to_equiv(); - const L& lbl = info.holder().labeled_image(); + //const L& lbl = info.holder().labeled_image(); p_array<point2d> - par = scribo::util::component_precise_outline(lbl | info.bbox(), id); + par = scribo::util::component_precise_outline(lbl_ | info.bbox(), id); switch (info.type()) { @@ -187,7 +194,7 @@ namespace scribo // template <typename L> void - debug_img_visitor::visit(const paragraph_set<L>& parset) const + debug_img_visitor<L>::visit(const paragraph_set<L>& parset) const { const line_set<L>& lines = parset.lines(); @@ -216,7 +223,7 @@ namespace scribo template <typename L> void - debug_img_visitor::visit(const line_info<L>& line) const + debug_img_visitor<L>::visit(const line_info<L>& line) const { point2d pmin = line.bbox().pmin(), diff --git a/scribo/scribo/io/img/internal/full_img_visitor.hh b/scribo/scribo/io/img/internal/full_img_visitor.hh index 7b20970..f31eec1 100644 --- a/scribo/scribo/io/img/internal/full_img_visitor.hh +++ b/scribo/scribo/io/img/internal/full_img_visitor.hh @@ -58,27 +58,26 @@ namespace scribo { - class full_img_visitor : public doc_serializer<full_img_visitor> + template <typename L> + class full_img_visitor : public doc_serializer<full_img_visitor<L> > { public: // Constructor full_img_visitor(mln::image2d<value::rgb8>& out); // Visit overloads - template <typename L> void visit(const document<L>& doc) const; - template <typename L> void visit(const component_info<L>& info) const; - template <typename L> void visit(const paragraph_set<L>& parset) const; - template <typename L> void visit(const line_info<L>& line) const; private: // Attributes mln::image2d<value::rgb8>& output; + + mutable L lbl_; }; @@ -86,8 +85,8 @@ namespace scribo # ifndef MLN_INCLUDE_ONLY - inline - full_img_visitor::full_img_visitor(mln::image2d<value::rgb8>& out) + template <typename L> + full_img_visitor<L>::full_img_visitor(mln::image2d<value::rgb8>& out) : output(out) { mln_assertion(output.is_valid()); @@ -98,7 +97,7 @@ namespace scribo // template <typename L> void - full_img_visitor::visit(const document<L>& doc) const + full_img_visitor<L>::visit(const document<L>& doc) const { // Text if (doc.has_text()) @@ -109,20 +108,29 @@ namespace scribo { const component_set<L>& elts = doc.elements(); for_all_comps(e, elts) + { + lbl_ = elts.labeled_image(); if (elts(e).is_valid()) elts(e).accept(*this); + } } // line seraparators if (doc.has_vline_seps()) + { + lbl_ = doc.vline_seps_comps().labeled_image(); for_all_comps(c, doc.vline_seps_comps()) if (doc.vline_seps_comps()(c).is_valid()) doc.vline_seps_comps()(c).accept(*this); + } if (doc.has_hline_seps()) + { + lbl_ = doc.hline_seps_comps().labeled_image(); for_all_comps(c, doc.hline_seps_comps()) if (doc.hline_seps_comps()(c).is_valid()) doc.hline_seps_comps()(c).accept(*this); + } } @@ -131,13 +139,13 @@ namespace scribo // template <typename L> void - full_img_visitor::visit(const component_info<L>& info) const + full_img_visitor<L>::visit(const component_info<L>& info) const { // Getting component outline scribo::def::lbl_type id = (scribo::def::lbl_type)info.id().to_equiv(); - const L& lbl = info.holder().labeled_image(); + //const L& lbl = info.holder().labeled_image(); p_array<point2d> - par = scribo::util::component_precise_outline(lbl | info.bbox(), id); + par = scribo::util::component_precise_outline(lbl_ | info.bbox(), id); switch (info.type()) { @@ -167,7 +175,7 @@ namespace scribo // template <typename L> void - full_img_visitor::visit(const paragraph_set<L>& parset) const + full_img_visitor<L>::visit(const paragraph_set<L>& parset) const { // const line_set<L>& lines = parset.lines(); @@ -187,7 +195,7 @@ namespace scribo template <typename L> void - full_img_visitor::visit(const line_info<L>& line) const + full_img_visitor<L>::visit(const line_info<L>& line) const { // mln::draw::box(output, line.bbox(), literal::red); diff --git a/scribo/scribo/io/img/save.hh b/scribo/scribo/io/img/save.hh index 04f0a3c..a985d07 100644 --- a/scribo/scribo/io/img/save.hh +++ b/scribo/scribo/io/img/save.hh @@ -150,7 +150,7 @@ namespace scribo { mln_precondition(doc.is_valid()); mln::image2d<value::rgb8> output = duplicate(doc.image()); - scribo::io::img::internal::full_img_visitor f(output); + scribo::io::img::internal::full_img_visitor<L> f(output); doc.accept(f); return output; } @@ -164,7 +164,7 @@ namespace scribo output(box2d(doc.image().domain().pmin() / 4, doc.image().domain().pmax() / 4)); data::fill(output, literal::black); - scribo::io::img::internal::debug_img_visitor f(output, 4); + scribo::io::img::internal::debug_img_visitor<L> f(output, 4); doc.accept(f); return output; } @@ -178,7 +178,7 @@ namespace scribo output = mln::subsampling::antialiased(doc.image(), 4); internal::highlight_mask highlight(0.5f); data::transform_inplace(output, highlight); - scribo::io::img::internal::debug_img_visitor f(output, 4); + scribo::io::img::internal::debug_img_visitor<L> f(output, 4); doc.accept(f); return output; } diff --git a/scribo/scribo/io/xml/internal/page_xml_visitor.hh b/scribo/scribo/io/xml/internal/page_xml_visitor.hh index bbdd3e2..8373b02 100644 --- a/scribo/scribo/io/xml/internal/page_xml_visitor.hh +++ b/scribo/scribo/io/xml/internal/page_xml_visitor.hh @@ -66,28 +66,27 @@ namespace scribo Its XSD file is located here: http://schema.primaresearch.org/PAGE/gts/pagecontent/2010-03-19/pagecontent.... */ - class page_xml_visitor : public doc_serializer<page_xml_visitor> + template <typename L> + class page_xml_visitor : public doc_serializer<page_xml_visitor<L> > { public: // Constructor - page_xml_visitor(std::ofstream& out); + page_xml_visitor<L>(std::ofstream& out); // Visit overloads - template <typename L> void visit(const document<L>& doc) const; - template <typename L> void visit(const component_set<L>& comp_set) const; - template <typename L> void visit(const component_info<L>& info) const; - template <typename L> void visit(const paragraph_set<L>& parset) const; private: // Attributes std::ofstream& output; mutable int base_vertical_line_id_; + + mutable L lbl_; }; @@ -95,8 +94,8 @@ namespace scribo # ifndef MLN_INCLUDE_ONLY - inline - page_xml_visitor::page_xml_visitor(std::ofstream& out) + template <typename L> + page_xml_visitor<L>::page_xml_visitor(std::ofstream& out) : output(out) { } @@ -107,7 +106,7 @@ namespace scribo // template <typename L> void - page_xml_visitor::visit(const document<L>& doc) const + page_xml_visitor<L>::visit(const document<L>& doc) const { // Make sure there are no duplicate ids for line separators. // Vertical and horizontal lines are indexed separately from @@ -143,8 +142,9 @@ namespace scribo // template <typename L> void - page_xml_visitor::visit(const component_set<L>& comp_set) const + page_xml_visitor<L>::visit(const component_set<L>& comp_set) const { + lbl_ = comp_set.labeled_image(); for_all_comps(c, comp_set) if (comp_set(c).is_valid()) comp_set(c).accept(*this); @@ -155,13 +155,13 @@ namespace scribo // template <typename L> void - page_xml_visitor::visit(const component_info<L>& info) const + page_xml_visitor<L>::visit(const component_info<L>& info) const { // Getting component outline scribo::def::lbl_type id = (scribo::def::lbl_type)info.id().to_equiv(); - const L& lbl = info.holder().labeled_image(); + //const L& lbl = info.holder().labeled_image(); p_array<point2d> - par = scribo::util::component_precise_outline(lbl | info.bbox(), id); + par = scribo::util::component_precise_outline(lbl_ | info.bbox(), id); switch (info.type()) { @@ -223,7 +223,7 @@ namespace scribo // template <typename L> void - page_xml_visitor::visit(const paragraph_set<L>& parset) const + page_xml_visitor<L>::visit(const paragraph_set<L>& parset) const { const line_set<L>& lines = parset.lines(); diff --git a/scribo/scribo/io/xml/save.hh b/scribo/scribo/io/xml/save.hh index 54afa79..cc6905b 100644 --- a/scribo/scribo/io/xml/save.hh +++ b/scribo/scribo/io/xml/save.hh @@ -96,7 +96,7 @@ namespace scribo template <typename L> void save_page(const document<L>& doc, std::ofstream& output) { - scribo::io::xml::internal::page_xml_visitor f(output); + scribo::io::xml::internal::page_xml_visitor<L> f(output); doc.accept(f); } diff --git a/scribo/scribo/text/merging.hh b/scribo/scribo/text/merging.hh index f1135ed..31a5ed4 100644 --- a/scribo/scribo/text/merging.hh +++ b/scribo/scribo/text/merging.hh @@ -224,11 +224,15 @@ namespace scribo template <typename L> - bool between_separators(const scribo::line_info<L>& l1, - const scribo::line_info<L>& l2) + bool between_separators(const scribo::line_set<L>& lines, + const line_id_t& l1_, + const line_id_t& l2_) { + const scribo::line_info<L>& l1 = lines(l1_); + const scribo::line_info<L>& l2 = lines(l2_); + // No separators found in image. - mln_precondition(l1.holder().components().has_separators()); + mln_precondition(lines.components().has_separators()); const box2d& l1_bbox = l1.bbox(); const box2d& l2_bbox = l2.bbox(); @@ -237,7 +241,7 @@ namespace scribo col1 = l1_bbox.pcenter().col(), col2 = l2_bbox.pcenter().col(); const mln_ch_value(L, bool)& - separators = l1.holder().components().separators(); + separators = lines.components().separators(); // Checking for separators starting from 1 / 4, 3/ 4 and the // center of the box @@ -290,9 +294,13 @@ namespace scribo */ template <typename L> - bool lines_can_merge(scribo::line_info<L>& l1, - const scribo::line_info<L>& l2) + bool lines_can_merge(scribo::line_set<L>& lines, + const scribo::line_id_t& l1_, + const scribo::line_id_t& l2_) { + scribo::line_info<L>& l1 = lines(l1_); + scribo::line_info<L>& l2 = lines(l2_); + // Parameters. const float x_ratio_max = 1.7f; const float baseline_delta_max = @@ -306,9 +314,9 @@ namespace scribo const point2d& l1_pmax = l1_bbox.pmax(); const point2d& l2_pmax = l2_bbox.pmax(); - const bool l1_has_separators = l1.holder().components().has_separators(); + const bool l1_has_separators = lines.components().has_separators(); const bool l1_l2_between_separators = (l1_has_separators) ? - between_separators(l1, l2) : false; + between_separators(lines, l1_, l2_) : false; const float l_ted_cw = l2.char_width(); const float dx = std::max(l1_pmin.col(), l2_pmin.col()) @@ -424,9 +432,13 @@ namespace scribo */ template <typename L> - bool non_text_and_text_can_merge(scribo::line_info<L>& l_cur, // current - const scribo::line_info<L>& l_ted) // touched + bool non_text_and_text_can_merge(scribo::line_set<L>& lines, + const scribo::line_id_t& l_cur_, // current + const scribo::line_id_t l_ted_) // touched { + scribo::line_info<L>& l_cur = lines(l_cur_); + scribo::line_info<L>& l_ted = lines(l_ted_); + if (l_cur.type() == line::Text || l_ted.type() != line::Text) return false; // the current object is a NON-textline @@ -434,8 +446,8 @@ namespace scribo // Check that there is no separator in between. - if (l_cur.holder().components().has_separators() - && between_separators(l_cur, l_ted)) + if (lines.components().has_separators() + && between_separators(lines, l_cur_, l_ted_)) return false; const box2d& l_cur_bbox = l_cur.bbox(); @@ -744,8 +756,8 @@ namespace scribo < 5 && std::abs(l_info.meanline() - mc_info.meanline()) < 5)) && dx < l_ted_cw && dy < 0 - && not (l_info.holder().components().has_separators() - && between_separators(l_info, mc_info))) + && not (lines.components().has_separators() + && between_separators(lines, l, mc))) l = do_union(lines, l, mc, parent); // } @@ -801,7 +813,7 @@ namespace scribo // could be noise or garbage... So adding new // criterions could fix this issue. // - if (!non_text_and_text_can_merge(lines(l), lines(mc))) + if (!non_text_and_text_can_merge(lines, l, mc)) continue; // Avoid the case when a large title ebbox overlap @@ -868,7 +880,7 @@ namespace scribo if (lines(l_).type() == line::Text) { // l_ and lcand look like text line chunks. - if (lines_can_merge(lines(l_), lines(lcand))) + if (lines_can_merge(lines, l_, lcand)) { ++count_two_lines_merge; l_ = do_union(lines, l_, lcand, parent); @@ -899,7 +911,7 @@ namespace scribo { // l_ does NOT looks like a text line chunk. ++count_comp_HITS_txtline; - if (non_text_and_text_can_merge(lines(l_), lines(lcand))) + if (non_text_and_text_can_merge(lines, l_, lcand)) // a petouille merges with a text line? { ++count_comp_HITS_txtline; diff --git a/scribo/scribo/text/paragraphs.hh b/scribo/scribo/text/paragraphs.hh index 8fd89be..e37f610 100644 --- a/scribo/scribo/text/paragraphs.hh +++ b/scribo/scribo/text/paragraphs.hh @@ -29,11 +29,15 @@ namespace scribo template <typename L> inline bool - between_horizontal_separator(const scribo::line_info<L>& l1, - const scribo::line_info<L>& l2) + between_horizontal_separator(const line_set<L>& lines, + const line_id_t& l1_, + const line_id_t& l2_) { + const line_info<L>& l1 = lines(l1_); + const line_info<L>& l2 = lines(l2_); + // No separators found in image. - mln_precondition(l1.holder().components().has_separators()); + mln_precondition(lines.components().has_separators()); const box2d& l1_bbox = l1.bbox(); const box2d& l2_bbox = l2.bbox(); @@ -42,7 +46,7 @@ namespace scribo row1 = l1_bbox.pcenter().row(), row2 = l2_bbox.pcenter().row(); const mln_ch_value(L, bool)& - separators = l1.holder().components().separators(); + separators = lines.components().separators(); unsigned row; unsigned col_ptr; @@ -153,18 +157,18 @@ namespace scribo line_id_t right_nbh = right(l); line_id_t lol_nbh = output(left_nbh); - const line_info<L>& left_line = lines(left_nbh); - const line_info<L>& current_line = lines(l); - const line_info<L>& right_line = lines(right_nbh); + // const line_info<L>& left_line = lines(left_nbh); + // const line_info<L>& current_line = lines(l); + // const line_info<L>& right_line = lines(right_nbh); - if (right_line.holder().components().has_separators() && - between_horizontal_separator(right_line, current_line)) + if (lines.components().has_separators() && + between_horizontal_separator(lines, right_nbh, l)) { output(right_nbh) = right_nbh; right_nbh = l; } - if (current_line.holder().components().has_separators() && - between_horizontal_separator(current_line, left_line)) + if (lines.components().has_separators() && + between_horizontal_separator(lines, l, left_nbh)) { output(l) = l; left_nbh = l; diff --git a/scribo/scribo/text/paragraphs_closing.hh b/scribo/scribo/text/paragraphs_closing.hh index efc5259..2b685df 100644 --- a/scribo/scribo/text/paragraphs_closing.hh +++ b/scribo/scribo/text/paragraphs_closing.hh @@ -210,7 +210,7 @@ namespace scribo const line_id_t& line_id = line_ids(i); const line_info<L>& current_line = lines(line_id); - scribo::draw::line_components(debug, current_line, p); + scribo::draw::line_components(debug, lines, current_line, p); // HACK DISCLAIMER : this line is drawn in order to be // sure that every line will be reduced to a single -- 1.5.6.5
participants (1)
-
Guillaume Lazzara