
* scribo/core/line_info.hh: Keep few attributes public and update method names. * scribo/core/stats.hh: Fix missed elements in computation. * scribo/text/paragraphs.hh: Use line_set and line_links structures. * scribo/toolchain/internal/content_in_doc_functor.hh: Make use of the new paragraph extraction algorithm. --- scribo/ChangeLog | 15 + scribo/scribo/core/line_info.hh | 10 +- scribo/scribo/core/stats.hh | 13 +- scribo/scribo/text/paragraphs.hh | 1360 ++++++++++---------- .../toolchain/internal/content_in_doc_functor.hh | 178 ++-- 5 files changed, 808 insertions(+), 768 deletions(-) diff --git a/scribo/ChangeLog b/scribo/ChangeLog index a6ef3f8..fc761c3 100644 --- a/scribo/ChangeLog +++ b/scribo/ChangeLog @@ -1,3 +1,18 @@ +2011-05-04 Guillaume Lazzara <lazzara@fidji.lrde.epita.fr> + + Make paragraph extraction work with this branch. + + * scribo/core/line_info.hh: Keep few attributes public and update + method names. + + * scribo/core/stats.hh: Fix missed elements in computation. + + * scribo/text/paragraphs.hh: Use line_set and line_links + structures. + + * scribo/toolchain/internal/content_in_doc_functor.hh: Make use of + the new paragraph extraction algorithm. + 2011-05-03 Guillaume Lazzara <lazzara@lrde.epita.fr> * tests/unit_test/unit-tests.mk: Regen. diff --git a/scribo/scribo/core/line_info.hh b/scribo/scribo/core/line_info.hh index 949315a..27dda84 100644 --- a/scribo/scribo/core/line_info.hh +++ b/scribo/scribo/core/line_info.hh @@ -145,14 +145,12 @@ namespace scribo // Line set holding this element. line_set<L> holder_; - private: - void init_(); - // DEBUG stats< float > meanline_clusters_; stats< float > baseline_clusters_; - }; + private: + void init_(); }; } // end of namespace scribo::internal @@ -980,8 +978,8 @@ namespace scribo { const component_set<L>& comp_set = data_->holder_.components(); - const unsigned c1 = data_->components_(0); - const unsigned c2 = data_->components_(1); + const unsigned c1 = data_->component_ids_(0); + const unsigned c2 = data_->component_ids_(1); if (data_->holder_.components()(c1).type() == component::Punctuation || data_->holder_.components()(c2).type() == component::Punctuation) diff --git a/scribo/scribo/core/stats.hh b/scribo/scribo/core/stats.hh index bc24044..095735c 100644 --- a/scribo/scribo/core/stats.hh +++ b/scribo/scribo/core/stats.hh @@ -261,7 +261,7 @@ private: unsigned i = 0; const unsigned nelements = data_.nelements(); - clusters[0] = cluster_index; + clusters.push_back(cluster_index); const T std = data_.standard_deviation(); for (i = 1; i < nelements - 1; ++i) @@ -276,11 +276,12 @@ private: clusters.push_back(cluster_index); } - if (nelements > 1 - && data_[i] - data_[i - 1] > std) - ++cluster_index; - - clusters.push_back(cluster_index); + if (nelements > 1) + { + if (data_[i] - data_[i - 1] > std) + ++cluster_index; + clusters.push_back(cluster_index); + } clusters_.clear(); clusters_.reserve(cluster_index); diff --git a/scribo/scribo/text/paragraphs.hh b/scribo/scribo/text/paragraphs.hh index 5cb253e..94e5751 100644 --- a/scribo/scribo/text/paragraphs.hh +++ b/scribo/scribo/text/paragraphs.hh @@ -15,7 +15,9 @@ #include <scribo/core/macros.hh> #include <scribo/core/line_set.hh> +#include <scribo/core/line_links.hh> #include <scribo/core/line_info.hh> +#include <scribo/core/paragraph_set.hh> using namespace mln; @@ -59,15 +61,15 @@ namespace scribo template <typename L> inline - void paragraph_links(const util::array<value::int_u16>& left, - const util::array<value::int_u16>& right, - util::array<value::int_u16>& output, - const util::array< line_info<L> >& lines, + void paragraph_links(const line_links<L>& left, + const line_links<L>& right, + line_links<L>& output, + const line_set<L>& lines, const image2d<bool>& input) { - output = left; + output = left.duplicate(); - const unsigned nlines = lines.nelements(); + // const unsigned nlines = lines.nelements(); // image2d<value::rgb8> links = data::convert(value::rgb8(), input); // for (unsigned l = 0; l < nlines; ++l) @@ -77,242 +79,259 @@ namespace scribo // mln::io::ppm::save(links, "out_links.ppm"); // For each line - for (unsigned l = 0; l < nlines; ++l) - { - // Neighbors - - const value::int_u16 left_nbh = output(l); - const value::int_u16 right_nbh = right(l); - const value::int_u16 lol_nbh = output(left_nbh); - - // Line features - const float x_height = lines(l).x_height(); - const float left_x_height = lines(left_nbh).x_height(); - const float right_x_height = lines(right_nbh).x_height(); - - const box2d& left_line_bbox = lines(left_nbh).bbox(); - const box2d& current_line_bbox = lines(l).bbox(); - const box2d& right_line_bbox = lines(right_nbh).bbox(); - const box2d& lol_line_bbox = lines(lol_nbh).bbox(); // lol : left neighbor of the left neighbor - - const int lline_col_min = left_line_bbox.pmin().col(); - const int cline_col_min = current_line_bbox.pmin().col(); - const int rline_col_min = right_line_bbox.pmin().col(); - const int lolline_col_min = lol_line_bbox.pmin().col(); - - const int lline_col_max = left_line_bbox.pmax().col(); - const int cline_col_max = current_line_bbox.pmax().col(); - const int rline_col_max = right_line_bbox.pmax().col(); - - const int lline_cw = lines(left_nbh).char_width(); - const int cline_cw = lines(l).char_width(); - const int rline_cw = lines(right_nbh).char_width(); - // Maximal x variation to consider two lines vertically aligned - const int delta_alignment = cline_cw; - - // Checks the baseline distances of the two neighbors + for_all_lines(l, lines) + if (lines(l).is_textline()) { - // Current line baseline - const int c_baseline = lines(l).baseline(); - - // Baseline distance with the left and right neighbors - const int lc_baseline = lines(left_nbh).baseline() - c_baseline; - const int rc_baseline = c_baseline -lines(right_nbh).baseline(); + // Neighbors + + const line_id_t left_nbh = output(l); + const line_id_t right_nbh = right(l); + const line_id_t lol_nbh = output(left_nbh); + + // Line features + const float x_height = lines(l).x_height(); + const float left_x_height = lines(left_nbh).x_height(); + const float right_x_height = lines(right_nbh).x_height(); + + const box2d& left_line_bbox = lines(left_nbh).bbox(); + const box2d& current_line_bbox = lines(l).bbox(); + const box2d& right_line_bbox = lines(right_nbh).bbox(); + const box2d& lol_line_bbox = lines(lol_nbh).bbox(); // lol : left neighbor of the left neighbor + + const int lline_col_min = left_line_bbox.pmin().col(); + const int cline_col_min = current_line_bbox.pmin().col(); + const int rline_col_min = right_line_bbox.pmin().col(); + const int lolline_col_min = lol_line_bbox.pmin().col(); + + const int lline_col_max = left_line_bbox.pmax().col(); + const int cline_col_max = current_line_bbox.pmax().col(); + const int rline_col_max = right_line_bbox.pmax().col(); + + const int lline_cw = lines(left_nbh).char_width(); + const int cline_cw = lines(l).char_width(); + const int rline_cw = lines(right_nbh).char_width(); + // Maximal x variation to consider two lines vertically aligned + const int delta_alignment = cline_cw; + + // Checks the baseline distances of the two neighbors + { + // Current line baseline + const int c_baseline = lines(l).baseline(); - // Max baseline distance between the two neighbors - // const float delta_baseline_max = std::max(lc_baseline, rc_baseline); - // const float delta_baseline_min = std::min(lc_baseline, - // rc_baseline); + // Baseline distance with the left and right neighbors + const int lc_baseline = lines(left_nbh).baseline() - c_baseline; + const int rc_baseline = c_baseline -lines(right_nbh).baseline(); - // Only two lines, meaning the current line has only one neighbor - bool two_lines = false; + // Max baseline distance between the two neighbors + // const float delta_baseline_max = std::max(lc_baseline, rc_baseline); + // const float delta_baseline_min = std::min(lc_baseline, + // rc_baseline); - // If the current line has no left neighbor - if (lc_baseline == 0) - { - // ror : right neighbor of the right neighbor - const value::int_u16 ror_nbh = right(right_nbh); - // const box2d& ror_line_bbox = lines(ror_nbh).bbox(); + // Only two lines, meaning the current line has only one neighbor + bool two_lines = false; - // If the current line has a ror - if (ror_nbh != right_nbh - && output(ror_nbh) == right_nbh) + // If the current line has no left neighbor + if (lc_baseline == 0) { - // Distance between the current line and the right neighbor - const float right_distance = lines(l).meanline() - lines(right_nbh).baseline(); - // Distance between the right neighbor and the ror - const float ror_distance = lines(right_nbh).meanline() - lines(ror_nbh).baseline(); - // ror x_height - const float ror_x_height = lines(ror_nbh).x_height(); - - // Conditions to cut the link between the current line - // and its right neighbor - if (right_distance > 1.4f * ror_distance - && std::max(ror_x_height, right_x_height) < - 1.2f * std::min(ror_x_height, right_x_height) - && output(right_nbh) == l) + // ror : right neighbor of the right neighbor + const line_id_t ror_nbh = right(right_nbh); + //const box2d& ror_line_bbox = lines(ror_nbh).bbox(); + + // If the current line has a ror + if (ror_nbh != right_nbh + && output(ror_nbh) == right_nbh) { + // Distance between the current line and the right neighbor + const float right_distance = lines(l).meanline() - lines(right_nbh).baseline(); + // Distance between the right neighbor and the ror + const float ror_distance = lines(right_nbh).meanline() - lines(ror_nbh).baseline(); + // ror x_height + const float ror_x_height = lines(ror_nbh).x_height(); + + // Conditions to cut the link between the current line + // and its right neighbor + if (right_distance > 1.4f * ror_distance + && std::max(ror_x_height, right_x_height) < + 1.2f * std::min(ror_x_height, right_x_height) + && output(right_nbh) == l) + { output(right_nbh) = right_nbh; continue; + } } - } - // Otherwise we only have a group of two lines - else - { - // We determine the distance between the two lines - const float distance = lines(l).meanline() - lines(right_nbh).baseline(); - two_lines = true; - - // If the distance between the two lines is greater than - // the minimum x height of the two lines then we cut the - // link between them - if (distance > 2.0f * std::min(x_height, right_x_height) - && output(right_nbh) == l) + // Otherwise we only have a group of two lines + else { + // We determine the distance between the two lines + const float distance = lines(l).meanline() - lines(right_nbh).baseline(); + two_lines = true; + + // If the distance between the two lines is greater than + // the minimum x height of the two lines then we cut the + // link between them + if (distance > 2.0f * std::min(x_height, right_x_height) + && output(right_nbh) == l) + { output(right_nbh) = right_nbh; continue; + } } - } - // Lines features - const float min_x_height = std::min(x_height, right_x_height); - const float max_x_height = std::max(x_height, right_x_height); - const float min_char_width = std::min(rline_cw, cline_cw); - const float max_char_width = std::max(rline_cw, cline_cw); + // Lines features + const float min_x_height = std::min(x_height, right_x_height); + const float max_x_height = std::max(x_height, right_x_height); + const float min_char_width = std::min(rline_cw, cline_cw); + const float max_char_width = std::max(rline_cw, cline_cw); - // Condition to cut the link between the current line and - // its right neighbor - if ((max_x_height > min_x_height * 1.2f) && - !(max_char_width <= 1.2f * min_char_width)) - { - if (output(right_nbh) == l) + // Condition to cut the link between the current line and + // its right neighbor + if ((max_x_height > min_x_height * 1.2f) && + !(max_char_width <= 1.2f * min_char_width)) { - output(right_nbh) = right_nbh; - continue; + if (output(right_nbh) == l) + { + output(right_nbh) = right_nbh; + continue; + } } - } - - // If we only have two lines we stop the study - if (two_lines) - continue; - } - // If the current line has no right neighbor - else if (rc_baseline == 0) - { - // lol : left neighbor of the left neighbor - // If the left neighbor of the current line has a left neighbor - if (lol_nbh != left_nbh) + // If we only have two lines we stop the study + if (two_lines) + continue; + } + // If the current line has no right neighbor + else if (rc_baseline == 0) { - // Distance between the current line and its left neighbor - const float left_distance = lines(left_nbh).meanline() - - lines(l).baseline(); - // Distance between the left neighbor and the left - // neighbor of its left neighbor - const float lol_distance = lines(lol_nbh).meanline() - - lines(left_nbh).baseline(); - // lol x height - const float lol_x_height = lines(lol_nbh).x_height(); - - // Conditions to cut the link between the current line - // and its left neighbor - if (left_distance > 1.4f * lol_distance - && std::max(lol_x_height, left_x_height) < - 1.2f * std::min(lol_x_height, left_x_height)) + // lol : left neighbor of the left neighbor + + // If the left neighbor of the current line has a left neighbor + if (lol_nbh != left_nbh) { + // Distance between the current line and its left neighbor + const float left_distance = lines(left_nbh).meanline() - + lines(l).baseline(); + // Distance between the left neighbor and the left + // neighbor of its left neighbor + const float lol_distance = lines(lol_nbh).meanline() - + lines(left_nbh).baseline(); + // lol x height + const float lol_x_height = lines(lol_nbh).x_height(); + + // Conditions to cut the link between the current line + // and its left neighbor + if (left_distance > 1.4f * lol_distance + && std::max(lol_x_height, left_x_height) < + 1.2f * std::min(lol_x_height, left_x_height)) + { output(l) = l; continue; + } } - } - // Otherwise we only have a group of two lines - else - { - // Distance between the current line and it left neighbor - const float distance = lines(left_nbh).meanline() - - lines(l).baseline(); + // Otherwise we only have a group of two lines + else + { + // Distance between the current line and it left neighbor + const float distance = lines(left_nbh).meanline() - + lines(l).baseline(); - two_lines = true; + two_lines = true; - // If the distance is greater than the min x height - // between the two lines - if (distance > 2.0f * std::min(x_height, left_x_height)) - { + // If the distance is greater than the min x height + // between the two lines + if (distance > 2.0f * std::min(x_height, left_x_height)) + { output(l) = l; continue; + } } - } - - // Lines features - const float min_x_height = std::min(x_height, left_x_height); - const float max_x_height = std::max(x_height, left_x_height); - const float min_char_width = std::min(lline_cw, cline_cw); - const float max_char_width = std::max(lline_cw, cline_cw); - - // Condition to cut the link between the current line and - // its left neighbor - if ((max_x_height > min_x_height * 1.2f) && - !(max_char_width <= 1.2f * min_char_width)) - { - output(l) = l; - continue; - } - // If we only have two lines we stop the study - if (two_lines) - continue; - } - // The current line has at least one left and one right neighbor - else // if (delta_baseline_max >= delta_baseline_min) - { - // Distance between the left and the current line - const float left_distance = - lines(left_nbh).meanline() - lines(l).baseline(); - // Distance between the right and the current line - const float right_distance = - lines(l).meanline() - lines(right_nbh).baseline(); - - // If the left line is too far compared to the right one - // we cut the link with it - if (left_distance > 1.2f * right_distance - && std::max(x_height, left_x_height) > 1.2f * std::min(x_height, left_x_height)) - { - output(l) = l; - continue; - } - // If the right line is too far compared to the left one - // we cut the link with it - else if (right_distance > 1.2f * left_distance - && std::max(x_height, right_x_height) > 1.2f * std::min(x_height, right_x_height) - && output(right_nbh) == l) - { - output(right_nbh) = right_nbh; - continue; - } - - // If the distance between the baseline of the left - // neighbor and the baseline of the current line is - // greater than the one between the current line baseline - // and the right line baseline we have to study the texte - // features of the right and left lines - if (lc_baseline > rc_baseline) - { - const float cw_max = std::max(lline_cw, cline_cw); - const float cw_min = std::min(lline_cw, cline_cw); + // Lines features const float min_x_height = std::min(x_height, left_x_height); const float max_x_height = std::max(x_height, left_x_height); + const float min_char_width = std::min(lline_cw, cline_cw); + const float max_char_width = std::max(lline_cw, cline_cw); + // Condition to cut the link between the current line and + // its left neighbor if ((max_x_height > min_x_height * 1.2f) && - !(cw_max <= 1.2f * cw_min)) + !(max_char_width <= 1.2f * min_char_width)) { output(l) = l; continue; } + // If we only have two lines we stop the study + if (two_lines) + continue; + } + // The current line has at least one left and one right neighbor + else // if (delta_baseline_max >= delta_baseline_min) + { + // Distance between the left and the current line + const float left_distance = + lines(left_nbh).meanline() - lines(l).baseline(); + // Distance between the right and the current line + const float right_distance = + lines(l).meanline() - lines(right_nbh).baseline(); + + // If the left line is too far compared to the right one + // we cut the link with it + if (left_distance > 1.2f * right_distance + && std::max(x_height, left_x_height) > 1.2f * std::min(x_height, left_x_height)) + { + output(l) = l; + continue; + } + // If the right line is too far compared to the left one + // we cut the link with it + else if (right_distance > 1.2f * left_distance + && std::max(x_height, right_x_height) > 1.2f * std::min(x_height, right_x_height) + && output(right_nbh) == l) + { + output(right_nbh) = right_nbh; + continue; + } + + // If the distance between the baseline of the left + // neighbor and the baseline of the current line is + // greater than the one between the current line baseline + // and the right line baseline we have to study the texte + // features of the right and left lines + if (lc_baseline > rc_baseline) + { + const float cw_max = std::max(lline_cw, cline_cw); + const float cw_min = std::min(lline_cw, cline_cw); + const float min_x_height = std::min(x_height, left_x_height); + const float max_x_height = std::max(x_height, left_x_height); + + if ((max_x_height > min_x_height * 1.2f) && + !(cw_max <= 1.2f * cw_min)) + { + output(l) = l; + continue; + } + + { + const float min_x_height = std::min(x_height, right_x_height); + const float max_x_height = std::max(x_height, right_x_height); + const float cw_max = std::max(rline_cw, cline_cw); + const float cw_min = std::min(rline_cw, cline_cw); + + if ((max_x_height > min_x_height * 1.2f) + && !(cw_max <= 1.2f * cw_min) + && output(right_nbh) == l) + { + output(right_nbh) = right_nbh; + continue; + } + } + } + else { - const float min_x_height = std::min(x_height, right_x_height); - const float max_x_height = std::max(x_height, right_x_height); const float cw_max = std::max(rline_cw, cline_cw); const float cw_min = std::min(rline_cw, cline_cw); + const float min_x_height = std::min(x_height, right_x_height); + const float max_x_height = std::max(x_height, right_x_height); if ((max_x_height > min_x_height * 1.2f) && !(cw_max <= 1.2f * cw_min) @@ -321,44 +340,28 @@ namespace scribo output(right_nbh) = right_nbh; continue; } - } - } - else - { - const float cw_max = std::max(rline_cw, cline_cw); - const float cw_min = std::min(rline_cw, cline_cw); - const float min_x_height = std::min(x_height, right_x_height); - const float max_x_height = std::max(x_height, right_x_height); - if ((max_x_height > min_x_height * 1.2f) - && !(cw_max <= 1.2f * cw_min) - && output(right_nbh) == l) - { - output(right_nbh) = right_nbh; - continue; - } - - { - const float min_x_height = std::min(x_height, left_x_height); - const float max_x_height = std::max(x_height, left_x_height); - const float cw_max = std::max(lline_cw, cline_cw); - const float cw_min = std::min(lline_cw, cline_cw); - - if ((max_x_height > min_x_height * 1.2f) - && !(cw_max <= 1.2f * cw_min)) { - output(l) = l; - continue; + const float min_x_height = std::min(x_height, left_x_height); + const float max_x_height = std::max(x_height, left_x_height); + const float cw_max = std::max(lline_cw, cline_cw); + const float cw_min = std::min(lline_cw, cline_cw); + + if ((max_x_height > min_x_height * 1.2f) + && !(cw_max <= 1.2f * cw_min)) + { + output(l) = l; + continue; + } } } } } - } - // If we arrive here, it means than the lines in the - // neighborhood of the current line are quite similar. We can - // then begin to study the indentations in order to determine - // the beginning of new paragraphs + // If we arrive here, it means than the lines in the + // neighborhood of the current line are quite similar. We can + // then begin to study the indentations in order to determine + // the beginning of new paragraphs //----------------------------------------------------------------------------- // ___________________________ @@ -375,35 +378,35 @@ namespace scribo // //----------------------------------------------------------------------------- - { - // Check if the current line neighbors are aligned - bool left_right_aligned = false; - bool left_lol_aligned = false; - const int dx_lr = std::abs(lline_col_min - rline_col_min); - const int dx_llol = std::abs(lline_col_min - lolline_col_min); + { + // Check if the current line neighbors are aligned + bool left_right_aligned = false; + bool left_lol_aligned = false; + const int dx_lr = std::abs(lline_col_min - rline_col_min); + const int dx_llol = std::abs(lline_col_min - lolline_col_min); - if (dx_lr < delta_alignment) - left_right_aligned = true; + if (dx_lr < delta_alignment) + left_right_aligned = true; - if (dx_llol < delta_alignment) - left_lol_aligned = true; + if (dx_llol < delta_alignment) + left_lol_aligned = true; - if (left_right_aligned && left_lol_aligned) - { - const int left_right_col_min = std::min(lline_col_min, rline_col_min); - const int dx_lrc = std::abs(left_right_col_min - cline_col_min); - const float l_char_width = 1.5f * lines(l).char_width(); - - if (dx_lrc > l_char_width && - dx_lrc < 3.0f * l_char_width && - cline_col_min > rline_col_min && - cline_col_min > lline_col_min) + if (left_right_aligned && left_lol_aligned) { - output(right_nbh) = right_nbh; - continue; + const int left_right_col_min = std::min(lline_col_min, rline_col_min); + const int dx_lrc = std::abs(left_right_col_min - cline_col_min); + const float l_char_width = 1.5f * lines(l).char_width(); + + if (dx_lrc > l_char_width && + dx_lrc < 3.0f * l_char_width && + cline_col_min > rline_col_min && + cline_col_min > lline_col_min) + { + output(right_nbh) = right_nbh; + continue; + } } } - } //----------------------------------------------------------------------------- // ___________________________ @@ -419,41 +422,41 @@ namespace scribo // //----------------------------------------------------------------------------- - { - // Check if the current line neighbors are aligned - bool left_right_max_aligned = false; - bool left_current_min_aligned = false; - bool lol_current_min_aligned = false; - const bool lol_is_left = output(left_nbh) == left_nbh; - const int dx_lr_max = std::abs(lline_col_max - rline_col_max); - const int dx_lc_min = std::abs(lline_col_min - cline_col_min); - const int dx_lolc_min = std::abs(lolline_col_min - cline_col_min); - - if (dx_lr_max < delta_alignment) - left_right_max_aligned = true; - - if (dx_lc_min < delta_alignment) - left_current_min_aligned = true; - - if (dx_lolc_min < delta_alignment) - lol_current_min_aligned = true; - - if (!left_current_min_aligned && left_right_max_aligned && - (lol_current_min_aligned || lol_is_left)) { - const int dx_lrc = std::abs(lline_col_max - cline_col_max); - const int l_char_width = lines(l).char_width(); - - if (dx_lrc > l_char_width && - cline_col_max < lline_col_max && - cline_col_min < lline_col_min && - (lline_col_min > lolline_col_min || lol_is_left)) + // Check if the current line neighbors are aligned + bool left_right_max_aligned = false; + bool left_current_min_aligned = false; + bool lol_current_min_aligned = false; + const bool lol_is_left = output(left_nbh) == left_nbh; + const int dx_lr_max = std::abs(lline_col_max - rline_col_max); + const int dx_lc_min = std::abs(lline_col_min - cline_col_min); + const int dx_lolc_min = std::abs(lolline_col_min - cline_col_min); + + if (dx_lr_max < delta_alignment) + left_right_max_aligned = true; + + if (dx_lc_min < delta_alignment) + left_current_min_aligned = true; + + if (dx_lolc_min < delta_alignment) + lol_current_min_aligned = true; + + if (!left_current_min_aligned && left_right_max_aligned && + (lol_current_min_aligned || lol_is_left)) { - output(l) = l; - continue; + const int dx_lrc = std::abs(lline_col_max - cline_col_max); + const int l_char_width = lines(l).char_width(); + + if (dx_lrc > l_char_width && + cline_col_max < lline_col_max && + cline_col_min < lline_col_min && + (lline_col_min > lolline_col_min || lol_is_left)) + { + output(l) = l; + continue; + } } } - } //----------------------------------------------------------------------------- @@ -469,68 +472,69 @@ namespace scribo // //----------------------------------------------------------------------------- - if (left_nbh == l) - { - const value::int_u16 ror_nbh = right(right_nbh); - const box2d& ror_line_bbox = lines(ror_nbh).bbox(); - const int rorline_col_min = ror_line_bbox.pmin().col(); + if (left_nbh == l) + { + const line_id_t ror_nbh = right(right_nbh); + const box2d& ror_line_bbox = lines(ror_nbh).bbox(); + const int rorline_col_min = ror_line_bbox.pmin().col(); - bool right_ror_min_aligned = false; - const int dx_rror_min = std::abs(rline_col_min - rorline_col_min); + bool right_ror_min_aligned = false; + const int dx_rror_min = std::abs(rline_col_min - rorline_col_min); - if (dx_rror_min < delta_alignment) - right_ror_min_aligned = true; + if (dx_rror_min < delta_alignment) + right_ror_min_aligned = true; - if (right_ror_min_aligned) - { - const int right_ror_col_min = std::min(rline_col_min, rorline_col_min); - const int dx_rrorc = std::abs(right_ror_col_min - cline_col_min); - const float l_char_width = 1.5f * lines(l).char_width(); - - if (dx_rrorc > l_char_width && - dx_rrorc < 3.0f * l_char_width && - cline_col_min > rline_col_min && - cline_col_max >= rline_col_max) + if (right_ror_min_aligned) { - output(right_nbh) = right_nbh; - continue; + const int right_ror_col_min = std::min(rline_col_min, rorline_col_min); + const int dx_rrorc = std::abs(right_ror_col_min - cline_col_min); + const float l_char_width = 1.5f * lines(l).char_width(); + + if (dx_rrorc > l_char_width && + dx_rrorc < 3.0f * l_char_width && + cline_col_min > rline_col_min && + cline_col_max >= rline_col_max) + { + output(right_nbh) = right_nbh; + continue; + } } } } - } // Only debug - { - image2d<value::rgb8> debug = data::convert(value::rgb8(), input); - - for (unsigned i = 0; i < output.nelements(); ++i) - output(i) = internal::find_root(output, i); - - mln::util::array<accu::shape::bbox<point2d> > nbbox(output.nelements()); - for (unsigned i = 0; i < nlines; ++i) - { - // if (lines(i).is_textline()) - // { - // mln::draw::box(debug, lines(i).bbox(), literal::red); - nbbox(output(i)).take(lines(i).bbox()); - // } - } - - for (unsigned i = 0; i < nbbox.nelements(); ++i) - if (nbbox(i).is_valid()) - { - box2d b = nbbox(i).to_result(); - mln::draw::box(debug, b, literal::orange); - b.enlarge(1); - mln::draw::box(debug, b, literal::orange); - b.enlarge(1); - mln::draw::box(debug, b, literal::orange); - } - - mln::io::ppm::save(debug, "out_paragraph.ppm"); - } + // { + // image2d<value::rgb8> debug = data::convert(value::rgb8(), input); + + // for (unsigned i = 0; i < output.nelements(); ++i) + // output(i) = scribo::make::internal::find_root(output, i); + + // mln::util::array<accu::shape::bbox<point2d> > nbbox(output.nelements()); + // for_all_lines(l, lines) + // if (lines(l).is_textline()) + // { + // // if (lines(i).is_textline()) + // // { + // // mln::draw::box(debug, lines(i).bbox(), literal::red); + // nbbox(output(l)).take(lines(l).bbox()); + // // } + // } + + // for (unsigned i = 0; i < nbbox.nelements(); ++i) + // if (nbbox(i).is_valid()) + // { + // box2d b = nbbox(i).to_result(); + // mln::draw::box(debug, b, literal::orange); + // b.enlarge(1); + // mln::draw::box(debug, b, literal::orange); + // b.enlarge(1); + // mln::draw::box(debug, b, literal::orange); + // } + + // mln::io::ppm::save(debug, "out_paragraph.ppm"); + // } } } @@ -547,262 +551,269 @@ namespace scribo template <typename L> inline void prepare_lines(const box2d& domain, - const util::array< line_info<L> >& lines, - image2d<value::int_u16>& blocks, + const line_set<L>& lines, + L& blocks, util::array<box2d>& rbbox) { std::map< int, std::vector< const box2d* > > drawn_lines; - const unsigned nlines = lines.nelements(); + // const unsigned nlines = lines.nelements(); // For each line - for (unsigned l = 0; l < nlines; ++l) - { - // Rotation of the bounding box - box2d b = geom::rotate(lines(l).bbox(), -90, domain.pcenter()); - rbbox.append(b); - - const unsigned index = l + 1; - const unsigned even_index = 2 * index; - const unsigned odd_index = even_index + 1; - - // Top of the line + //for (unsigned l = 0; l < nlines; ++l) + for_all_lines(l, lines) + if (lines(l).is_textline()) { - bool not_finished = true; - int col_offset = 0; + // Rotation of the bounding box + box2d b = geom::rotate(lines(l).bbox(), -90, domain.pcenter()); +// rbbox.append(b); + rbbox(l) = b; - while (not_finished) - { - // Looking for a column in the image to draw the top of the - // line + const unsigned index = l + 1; + const unsigned even_index = 2 * index; + const unsigned odd_index = even_index + 1; - const int col = b.pmax().col() + col_offset; - std::map< int, std::vector< const box2d* > >::iterator it - = drawn_lines.find(col); + // Top of the line + { + bool not_finished = true; + int col_offset = 0; - if (it != drawn_lines.end()) + while (not_finished) { - const std::vector< const box2d* >& lines = (*it).second; - const unsigned nb_lines = lines.size(); - unsigned i = 0; + // Looking for a column in the image to draw the top of the + // line + + const int col = b.pmax().col() + col_offset; + std::map< int, std::vector< const box2d* > >::iterator it + = drawn_lines.find(col); - for (i = 0; i < nb_lines; ++i) + if (it != drawn_lines.end()) { - const box2d* box = lines[i]; - const int min_row = std::max(b.pmin().row(), box->pmin().row()); - const int max_row = std::min(b.pmax().row(), box->pmax().row()); + const std::vector< const box2d* >& lines = (*it).second; + const unsigned nb_lines = lines.size(); + unsigned i = 0; - if (min_row - max_row <= 0) - break; - } + for (i = 0; i < nb_lines; ++i) + { + const box2d* box = lines[i]; + const int min_row = std::max(b.pmin().row(), box->pmin().row()); + const int max_row = std::min(b.pmax().row(), box->pmax().row()); - if (i == nb_lines) + if (min_row - max_row <= 0) + break; + } + + if (i == nb_lines) + { + mln::draw::line(blocks, point2d(b.pmin().row(), col), + point2d(b.pmax().row(), col), even_index); + not_finished = false; + drawn_lines[col].push_back(&(rbbox[l])); + } + else + ++col_offset; + } + else { mln::draw::line(blocks, point2d(b.pmin().row(), col), point2d(b.pmax().row(), col), even_index); not_finished = false; drawn_lines[col].push_back(&(rbbox[l])); } - else - ++col_offset; - } - else - { - mln::draw::line(blocks, point2d(b.pmin().row(), col), - point2d(b.pmax().row(), col), even_index); - not_finished = false; - drawn_lines[col].push_back(&(rbbox[l])); } } - } - // Bottom of the line - { - bool not_finished = true; - int col_offset = 0; - - while (not_finished) + // Bottom of the line { - // Looking for a column in the image to draw the bottom of - // the line + bool not_finished = true; + int col_offset = 0; - const int col = b.pmin().col() - col_offset; - std::map< int, std::vector< const box2d* > >::iterator it - = drawn_lines.find(col); - - if (it != drawn_lines.end()) + while (not_finished) { - const std::vector< const box2d* >& lines = (*it).second; - const unsigned nb_lines = lines.size(); - unsigned i = 0; + // Looking for a column in the image to draw the bottom of + // the line + + const int col = b.pmin().col() - col_offset; + std::map< int, std::vector< const box2d* > >::iterator it + = drawn_lines.find(col); - for (i = 0; i < nb_lines; ++i) + if (it != drawn_lines.end()) { - const box2d* box = lines[i]; - const int min_row = std::max(b.pmin().row(), box->pmin().row()); - const int max_row = std::min(b.pmax().row(), box->pmax().row()); + const std::vector< const box2d* >& lines = (*it).second; + const unsigned nb_lines = lines.size(); + unsigned i = 0; - if (min_row - max_row <= 0) - break; - } + for (i = 0; i < nb_lines; ++i) + { + const box2d* box = lines[i]; + const int min_row = std::max(b.pmin().row(), box->pmin().row()); + const int max_row = std::min(b.pmax().row(), box->pmax().row()); - if (i == nb_lines) + if (min_row - max_row <= 0) + break; + } + + if (i == nb_lines) + { + mln::draw::line(blocks, point2d(b.pmin().row(), col), + point2d(b.pmax().row(), col), odd_index); + not_finished = false; + drawn_lines[col].push_back(&(rbbox[l])); + } + else + ++col_offset; + } + else { mln::draw::line(blocks, point2d(b.pmin().row(), col), point2d(b.pmax().row(), col), odd_index); not_finished = false; drawn_lines[col].push_back(&(rbbox[l])); } - else - ++col_offset; - } - else - { - mln::draw::line(blocks, point2d(b.pmin().row(), col), - point2d(b.pmax().row(), col), odd_index); - not_finished = false; - drawn_lines[col].push_back(&(rbbox[l])); } } } - } } template <typename L> inline void - process_left_link(image2d<value::int_u16>& blocks, + process_left_link(L& blocks, const util::array<box2d>& rbbox, - const util::array< line_info<L> >& lines, - util::array<value::int_u16>& left) + const line_set<L>& lines, + line_links<L>& left) { - typedef value::int_u16 V; + typedef scribo::def::lbl_type V; // At the beginning each line is its own neighbor - for (unsigned i = 0; i < left.nelements(); ++i) - left(i) = i; + for_all_lines(l, lines) + if (lines(l).is_textline()) + left(l) = l; + else + left(l) = 0; - const unsigned nlines = lines.nelements(); + // const unsigned nlines = lines.nelements(); // For each line - for (unsigned i = 0; i < nlines; ++i) - { - // Max distance for the line search - int dmax = 1.5f * lines(i).x_height(); - - // Starting points in the current line box - point2d c = rbbox(i).pcenter(); - point2d q(rbbox(i).pmin().row() + ((c.row() - rbbox(i).pmin().row()) / 4), c.col()); + for_all_lines(l, lines) + if (lines(l).is_textline()) + { + // Max distance for the line search + int dmax = 1.5f * lines(l).x_height(); - int - midcol = (rbbox(i).pmax().col() - - rbbox(i).pmin().col()) / 2; + // Starting points in the current line box + point2d c = rbbox(l).pcenter(); + point2d q(rbbox(l).pmin().row() + ((c.row() - rbbox(l).pmin().row()) / 4), c.col()); - // Left - { - // marge gauche int - nleftima = c.col() - blocks.domain().pmin().col(), - // Distance gauche - nleft = std::min(nleftima, midcol + dmax); - - V - // Starting points in the box - *p = &blocks(c), - *p2 = &blocks(q), - // End of search - *pstop = p - nleft - 1, - // Line neighbor - *nbh_p = 0; - - // While we haven't found a neighbor or reached the limit - for (; p != pstop; --p, --p2) + midcol = (rbbox(l).pmax().col() + - rbbox(l).pmin().col()) / 2; + + // Left { - if (*p2 != literal::zero // Not the background - && ((*p2 % 2) == 0) // Looking for the bottom of a line - && left((*p2 >> 1) - 1) != i) // No loops + // marge gauche + int + nleftima = c.col() - blocks.domain().pmin().col(), + // Distance gauche + nleft = std::min(nleftima, midcol + dmax); + + V + // Starting points in the box + *p = &blocks(c), + *p2 = &blocks(q), + // End of search + *pstop = p - nleft - 1, + // Line neighbor + *nbh_p = 0; + + // While we haven't found a neighbor or reached the limit + for (; p != pstop; --p, --p2) { - // Neightbor found, we stop the research - nbh_p = p2; - break; - } + if (*p2 != literal::zero // Not the background + && ((*p2 % 2) == 0) // Looking for the bottom of a line + && left((*p2 >> 1) - 1) != l) // No loops + { + // Neightbor found, we stop the research + nbh_p = p2; + break; + } - if (*p != literal::zero // Not the background - && ((*p % 2) == 0) // Looking for the bottom of a line - && left((*p >> 1) - 1) != i) // No loops - { - // Neightbor found, we stop the research - nbh_p = p; - break; + if (*p != literal::zero // Not the background + && ((*p % 2) == 0) // Looking for the bottom of a line + && left((*p >> 1) - 1) != l) // No loops + { + // Neightbor found, we stop the research + nbh_p = p; + break; + } } - } - - // If a neighbor was found, then we have found the top of the - // line. We are then looking for the bottom of the encountered - // line. If during the search process we find a complete line - // included in the touched line, this line is considered as - // the neighbor under certain conditions (see below) - - //--------------------------------------------------------------- - // _________________________ | - // |_________________________| => Current line | Search direction - // v - // => First encountered top line - // __________________________________________________ 2Q - // | Q | - // | _________________________ |2P - // | |_____________P___________| => Second top |2P + 1 - // | line | - // |__________________________________________________|2Q + 1 - // - // - //--------------------------------------------------------------- - - if (nbh_p) - { - std::vector<V> lines_nbh; - const V end_p = *nbh_p + 1; - const V* nbh_p_copy = nbh_p; - for (; *nbh_p != end_p; --nbh_p) + // If a neighbor was found, then we have found the top of the + // line. We are then looking for the bottom of the encountered + // line. If during the search process we find a complete line + // included in the touched line, this line is considered as + // the neighbor under certain conditions (see below) + + //--------------------------------------------------------------- + // _________________________ | + // |_________________________| => Current line | Search direction + // v + // => First encountered top line + // __________________________________________________ 2Q + // | Q | + // | _________________________ |2P + // | |_____________P___________| => Second top |2P + 1 + // | line | + // |__________________________________________________|2Q + 1 + // + // + //--------------------------------------------------------------- + + if (nbh_p) { - if ((*nbh_p) != literal::zero) // Not the background + std::vector<V> lines_nbh; + const V end_p = *nbh_p + 1; + const V* nbh_p_copy = nbh_p; + + for (; *nbh_p != end_p; --nbh_p) { - if ((*nbh_p) % 2 == 0)// We have found the top of - // another line - lines_nbh.push_back(*nbh_p); - else + if ((*nbh_p) != literal::zero) // Not the background { - // We have found the bottom of a line. We are looking if - // we have already encountered the top of this - // line. If so, we link the current line with this one - // under certain conditions: - - if (std::find(lines_nbh.begin(), lines_nbh.end(), - (*nbh_p) - 1) != lines_nbh.end()) + if ((*nbh_p) % 2 == 0)// We have found the top of + // another line + lines_nbh.push_back(*nbh_p); + else { - // If we can link the complete line with the current line - if (// It must be in the search range - nbh_p > pstop - // Avoid loops - && left(((*nbh_p - 1) >> 1) - 1) != i) - left(i) = ((*nbh_p - 1) >> 1) - 1; - - // We have found a complete line so we stop the search - break; + // We have found the bottom of a line. We are looking if + // we have already encountered the top of this + // line. If so, we link the current line with this one + // under certain conditions: + + if (std::find(lines_nbh.begin(), lines_nbh.end(), + (*nbh_p) - 1) != lines_nbh.end()) + { + // If we can link the complete line with the current line + if (// It must be in the search range + nbh_p > pstop + // Avoid loops + && left(((*nbh_p - 1) >> 1) - 1) != l) + left(l) = ((*nbh_p - 1) >> 1) - 1; + + // We have found a complete line so we stop the search + break; + } } } } - } - // If we haven't found any included line in the first - // neighbor, then the line is considered as the neighbor of - // the current line - if (*nbh_p == end_p) - left(i) = (*nbh_p_copy >> 1) - 1; + // If we haven't found any included line in the first + // neighbor, then the line is considered as the neighbor of + // the current line + if (*nbh_p == end_p) + left(l) = (*nbh_p_copy >> 1) - 1; + } } } - } } @@ -810,137 +821,141 @@ namespace scribo template <typename L> inline void - process_right_link(image2d<value::int_u16>& blocks, + process_right_link(L& blocks, const util::array<box2d>& rbbox, - const util::array< line_info<L> >& lines, - util::array<value::int_u16>& right) + const line_set<L>& lines, + line_links<L>& right) { - typedef value::int_u16 V; + typedef scribo::def::lbl_type V; // At the beginning each line is its own neighbor - for (unsigned i = 0; i < right.nelements(); ++i) - right(i) = i; + for_all_lines(l, lines) + if (lines(l).is_textline()) + right(l) = l; + else + right(l) = 0; - const unsigned nlines = lines.nelements(); + // const unsigned nlines = lines.nelements(); // For each line - for (unsigned i = 0; i < nlines; ++i) - { - // Max distance for the line search - int dmax = 1.5f * lines(i).x_height(); - - // Starting points in the current line box - point2d c = rbbox(i).pcenter(); - point2d q(rbbox(i).pmax().row() - ((rbbox(i).pmax().row() - c.row()) / 4), c.col()); + for_all_lines(l, lines) + if (lines(l).is_textline()) + { + // Max distance for the line search + int dmax = 1.5f * lines(l).x_height(); - int - midcol = (rbbox(i).pmax().col() - - rbbox(i).pmin().col()) / 2; + // Starting points in the current line box + point2d c = rbbox(l).pcenter(); + point2d q(rbbox(l).pmax().row() - ((rbbox(l).pmax().row() - c.row()) / 4), c.col()); - // Right - { int - nrightima = geom::ncols(blocks) - c.col() + blocks.domain().pmin().col(), - nright = std::min(nrightima, midcol + dmax); - - V - // Starting points in the box - *p = &blocks(c), - *p2 = &blocks(q), - // End of search - *pstop = p + nright - 1, - // Line neighbor - *nbh_p = 0; - - // While we haven't found a neighbor or reached the limit - for (; p != pstop; ++p, ++p2) + midcol = (rbbox(l).pmax().col() + - rbbox(l).pmin().col()) / 2; + + // Right { - if (*p2 != literal::zero // Not the background - && ((*p2 % 2) == 1) // Looking for the bottom of a line - && right(((*p2 - 1) >> 1) - 1) != i) // No loops + int + nrightima = geom::ncols(blocks) - c.col() + blocks.domain().pmin().col(), + nright = std::min(nrightima, midcol + dmax); + + V + // Starting points in the box + *p = &blocks(c), + *p2 = &blocks(q), + // End of search + *pstop = p + nright - 1, + // Line neighbor + *nbh_p = 0; + + // While we haven't found a neighbor or reached the limit + for (; p != pstop; ++p, ++p2) { - // Neightbor found, we stop the research - nbh_p = p2; - break; - } + if (*p2 != literal::zero // Not the background + && ((*p2 % 2) == 1) // Looking for the bottom of a line + && right(((*p2 - 1) >> 1) - 1) != l) // No loops + { + // Neightbor found, we stop the research + nbh_p = p2; + break; + } - if (*p != literal::zero // Not the background - && ((*p % 2) == 1) // Looking for the bottom of a line - && right(((*p - 1) >> 1) - 1) != i) // No loops - { - // Neightbor found, we stop the research - nbh_p = p; - break; + if (*p != literal::zero // Not the background + && ((*p % 2) == 1) // Looking for the bottom of a line + && right(((*p - 1) >> 1) - 1) != l) // No loops + { + // Neightbor found, we stop the research + nbh_p = p; + break; + } } - } - - // If a neighbor was found, then we have found the bottom of the - // line. We are then looking for the top of the encountered - // line. If during the search process we find a complete line - // included in the touched line, this line is considered as - // the neighbor under certain conditions (see below) - - //--------------------------------------------------------------- - // - // - // __________________________________________________ 2Q - // | Q | - // | _________________________ |2P - // | |_____________P___________| => Second bottom |2P + 1 - // | line | - // |__________________________________________________|2Q + 1 - // => First encountered bottom line - // _________________________ ^ - // |_________________________| => Current line | Search direction - // | - //--------------------------------------------------------------- - - if (nbh_p) - { - std::vector<V> lines_nbh; - const V end_p = *nbh_p - 1; - const V* nbh_p_copy = nbh_p; - for (; *nbh_p != end_p; ++nbh_p) + // If a neighbor was found, then we have found the bottom of the + // line. We are then looking for the top of the encountered + // line. If during the search process we find a complete line + // included in the touched line, this line is considered as + // the neighbor under certain conditions (see below) + + //--------------------------------------------------------------- + // + // + // __________________________________________________ 2Q + // | Q | + // | _________________________ |2P + // | |_____________P___________| => Second bottom |2P + 1 + // | line | + // |__________________________________________________|2Q + 1 + // => First encountered bottom line + // _________________________ ^ + // |_________________________| => Current line | Search direction + // | + //--------------------------------------------------------------- + + if (nbh_p) { - if (*nbh_p != literal::zero) // Not the background + std::vector<V> lines_nbh; + const V end_p = *nbh_p - 1; + const V* nbh_p_copy = nbh_p; + + for (; *nbh_p != end_p; ++nbh_p) { - if (*nbh_p % 2 == 1) // We have found the bottom of - // another line - lines_nbh.push_back(*nbh_p); - else + if (*nbh_p != literal::zero) // Not the background { - // We have found the top of a line. We are looking if - //we have already encountered the bottom of this - // line. If so, we link the current line with this one - // under certain conditions: - - if (std::find(lines_nbh.begin(), lines_nbh.end(), - *nbh_p + 1) != lines_nbh.end()) + if (*nbh_p % 2 == 1) // We have found the bottom of + // another line + lines_nbh.push_back(*nbh_p); + else { - // If we can link the complete line with the current line - if (// It must be in the search range - nbh_p < pstop - // Avoid loops - && right((*nbh_p >> 1) - 1) != i) - right(i) = (*nbh_p >> 1) - 1; - - // We have found a complete line, so we stop the search - break; + // We have found the top of a line. We are looking if + //we have already encountered the bottom of this + // line. If so, we link the current line with this one + // under certain conditions: + + if (std::find(lines_nbh.begin(), lines_nbh.end(), + *nbh_p + 1) != lines_nbh.end()) + { + // If we can link the complete line with the current line + if (// It must be in the search range + nbh_p < pstop + // Avoid loops + && right((*nbh_p >> 1) - 1) != l) + right(l) = (*nbh_p >> 1) - 1; + + // We have found a complete line, so we stop the search + break; + } } } } - } - // If we haven't found any included line in the first - // neighbor, then the line is considered as the neighbor of - // the current line + // If we haven't found any included line in the first + // neighbor, then the line is considered as the neighbor of + // the current line - if (*nbh_p == end_p) - right(i) = ((*nbh_p_copy - 1) >> 1) - 1; + if (*nbh_p == end_p) + right(l) = ((*nbh_p_copy - 1) >> 1) - 1; + } } } - } } //----------------------------------------------------------------------- @@ -950,76 +965,83 @@ namespace scribo template< typename L > inline - void finalize_links(util::array<value::int_u16>& left, - util::array<value::int_u16>& right, - const util::array< line_info<L> >& lines) + void finalize_links(line_links<L>& left, + line_links<L>& right, + const line_set<L>& lines) { - const unsigned nlines = lines.nelements(); - - for (unsigned i = 0; i < nlines; ++i) - { - const unsigned left_value = left(i); - const unsigned right_value = right(i); + // const unsigned nlines = lines.nelements(); - // If the right neighbor of my left neighbor is itself then its - // right neighbor is me + for_all_lines(l, lines) + if (lines(l).is_textline()) { - value::int_u16& v = right(left_value); + const unsigned left_value = left(l); + const unsigned right_value = right(l); - if (v == left_value) - v = i; - } + // If the right neighbor of my left neighbor is itself then its + // right neighbor is me + { + line_id_t& v = right(left_value); - // If the left neighbor of my right neighbor is itself then its - // left neighbor is me - { - value::int_u16& v = left(right_value); + if (v == left_value) + v = l; + } + + // If the left neighbor of my right neighbor is itself then its + // left neighbor is me + { + line_id_t& v = left(right_value); - if (v == right_value) - v = i; + if (v == right_value) + v = l; + } } - } } template <typename L> inline - void extract_paragraphs(line_set<L>& lines, - const image2d<bool>& input) + paragraph_set<L> + extract_paragraphs(line_set<L>& lines, + const image2d<bool>& input) { - typedef value::int_u16 V; + typedef scribo::def::lbl_type V; image2d<V> blocks(geom::rotate(input.domain(), -90, input.domain().pcenter())); data::fill(blocks, 0); - util::array< line_info<L> > lines_info; + // util::array< line_info<L> > lines_info; - for_all_lines(l, lines) - { - if (lines(l).is_textline()) - lines_info.append(lines(l)); - } + // for_all_lines(l, lines) + // { + // if (lines(l).is_textline()) + // lines_info.append(lines(l)); + // } - const unsigned nlines = lines_info.nelements(); +/// const unsigned nlines = lines_info.nelemnts(); util::array<box2d> rbbox; - util::array<V> left(nlines); - util::array<V> right(nlines); - util::array<V> output; + line_links<L> left(lines); + left(0) = 0; + line_links<L> right(lines); + right(0) = 0; + line_links<L> output(lines); + output(0) = 0; - rbbox.reserve(nlines); - output.reserve(nlines); + rbbox.resize(lines.nelements() + 1); std::cout << "Preparing lines" << std::endl; - prepare_lines(input.domain(), lines_info, blocks, rbbox); + prepare_lines(input.domain(), lines , blocks, rbbox); // io::pgm::save(blocks, "blocks.pgm"); std::cout << "Linking left" << std::endl; - process_left_link(blocks, rbbox, lines_info, left); + process_left_link(blocks, rbbox, lines , left); std::cout << "Linking right" << std::endl; - process_right_link(blocks, rbbox, lines_info, right); + process_right_link(blocks, rbbox, lines , right); std::cout << "Finalizing links" << std::endl; - finalize_links(left, right, lines_info); + finalize_links(left, right, lines ); // std::cout << "Finalizing merging" << std::endl; // finalize_line_merging(left, right, lines); std::cout << "Extracting paragraphs" << std::endl; - filter::paragraph_links(left, right, output, lines_info, input); + filter::paragraph_links(left, right, output, lines, input); + + paragraph_set<L> par_set = make::paragraph(output); + return par_set; } } diff --git a/scribo/scribo/toolchain/internal/content_in_doc_functor.hh b/scribo/scribo/toolchain/internal/content_in_doc_functor.hh index 25b328b..d60f3cc 100644 --- a/scribo/scribo/toolchain/internal/content_in_doc_functor.hh +++ b/scribo/scribo/toolchain/internal/content_in_doc_functor.hh @@ -60,6 +60,7 @@ # include <scribo/text/recognition.hh> # include <scribo/text/merging.hh> # include <scribo/text/link_lines.hh> +# include <scribo/text/paragraphs.hh> # include <scribo/make/debug_filename.hh> @@ -444,93 +445,96 @@ namespace scribo on_progress(); } - // Link text lines - on_new_progress_label("Linking text lines"); - line_links<L> llinks = scribo::text::link_lines(lines); - - - //===== DEBUG ===== -# ifndef SCRIBO_NDEBUG - if (debug::logger().is_enabled()) - { - image2d<value::rgb8> - debug = data::convert(value::rgb8(), original_image); - for_all_lines(l, lines) - { - if (! lines(l).is_textline()) - continue; - - mln::draw::box(debug, lines(l).bbox(), literal::blue); - mln::draw::line(debug, lines(l).bbox().pcenter(), - lines(llinks(l)).bbox().pcenter(), literal::green); - } - - debug::logger().log_image(debug::AuxiliaryResults, - debug, "links_raw"); - } -# endif // ! SCRIBO_NDEBUG - //===== END OF DEBUG ===== - - on_progress(); - - - // Filter line links. - on_new_progress_label("Filter line links"); - llinks = scribo::filter::line_links_x_height(llinks); - - //===== DEBUG ===== -# ifndef SCRIBO_NDEBUG - if (debug::logger().is_enabled()) - { - image2d<value::rgb8> - debug = data::convert(value::rgb8(), original_image); - for_all_links(i, llinks) - if (llinks(i) && llinks(i) != i) - mln::draw::line(debug, lines(i).bbox().pcenter(), - lines(llinks(i)).bbox().pcenter(), literal::red); - - debug::logger().log_image(debug::AuxiliaryResults, - debug, "links"); - - for (unsigned i = 1; i < llinks.nelements(); ++i) - llinks(i) = scribo::make::internal::find_root(llinks, i); - - debug = data::convert(value::rgb8(), original_image); - mln::util::array<accu::shape::bbox<point2d> > - nbbox(llinks.nelements()); - - for_all_lines(i, lines) - { - if (! lines(i).is_textline()) - continue; - - mln::draw::box(debug, lines(i).bbox(), literal::red); - nbbox(llinks(i)).take(lines(i).bbox()); - } - - for (unsigned i = 1; i < nbbox.nelements(); ++i) - if (nbbox(i).is_valid()) - { - box2d b = nbbox(i).to_result(); - mln::draw::box(debug, b, literal::green); - b.enlarge(1); - mln::draw::box(debug, b, literal::green); - b.enlarge(1); - mln::draw::box(debug, b, literal::green); - } - - debug::logger().log_image(debug::AuxiliaryResults, - debug, "par"); - } -# endif // ! SCRIBO_NDEBUG - //===== END OF DEBUG ===== - - on_progress(); - - - // Construct paragraphs - on_new_progress_label("Constructing paragraphs"); - scribo::paragraph_set<L> parset = scribo::make::paragraph(llinks); +// // Link text lines +// on_new_progress_label("Linking text lines"); +// line_links<L> llinks = scribo::text::link_lines(lines); + + +// //===== DEBUG ===== +// # ifndef SCRIBO_NDEBUG +// if (debug::logger().is_enabled()) +// { +// image2d<value::rgb8> +// debug = data::convert(value::rgb8(), original_image); +// for_all_lines(l, lines) +// { +// if (! lines(l).is_textline()) +// continue; + +// mln::draw::box(debug, lines(l).bbox(), literal::blue); +// mln::draw::line(debug, lines(l).bbox().pcenter(), +// lines(llinks(l)).bbox().pcenter(), literal::green); +// } + +// debug::logger().log_image(debug::AuxiliaryResults, +// debug, "links_raw"); +// } +// # endif // ! SCRIBO_NDEBUG +// //===== END OF DEBUG ===== + +// on_progress(); + + +// // Filter line links. +// on_new_progress_label("Filter line links"); +// llinks = scribo::filter::line_links_x_height(llinks); + +// //===== DEBUG ===== +// # ifndef SCRIBO_NDEBUG +// if (debug::logger().is_enabled()) +// { +// image2d<value::rgb8> +// debug = data::convert(value::rgb8(), original_image); +// for_all_links(i, llinks) +// if (llinks(i) && llinks(i) != i) +// mln::draw::line(debug, lines(i).bbox().pcenter(), +// lines(llinks(i)).bbox().pcenter(), literal::red); + +// debug::logger().log_image(debug::AuxiliaryResults, +// debug, "links"); + +// for (unsigned i = 1; i < llinks.nelements(); ++i) +// llinks(i) = scribo::make::internal::find_root(llinks, i); + +// debug = data::convert(value::rgb8(), original_image); +// mln::util::array<accu::shape::bbox<point2d> > +// nbbox(llinks.nelements()); + +// for_all_lines(i, lines) +// { +// if (! lines(i).is_textline()) +// continue; + +// mln::draw::box(debug, lines(i).bbox(), literal::red); +// nbbox(llinks(i)).take(lines(i).bbox()); +// } + +// for (unsigned i = 1; i < nbbox.nelements(); ++i) +// if (nbbox(i).is_valid()) +// { +// box2d b = nbbox(i).to_result(); +// mln::draw::box(debug, b, literal::green); +// b.enlarge(1); +// mln::draw::box(debug, b, literal::green); +// b.enlarge(1); +// mln::draw::box(debug, b, literal::green); +// } + +// debug::logger().log_image(debug::AuxiliaryResults, +// debug, "par"); +// } +// # endif // ! SCRIBO_NDEBUG +// //===== END OF DEBUG ===== + +// on_progress(); + + +// // Construct paragraphs +// on_new_progress_label("Constructing paragraphs"); +// scribo::paragraph_set<L> parset = scribo::make::paragraph(llinks); + + scribo::paragraph_set<L> + parset = extract_paragraphs(lines, doc.binary_image()); doc.set_paragraphs(parset); on_progress(); -- 1.5.6.5