
* scribo/demat.hh: - Correctly relabel. - Correctly erase components. --- milena/sandbox/ChangeLog | 8 ++ milena/sandbox/scribo/demat.hh | 165 +++++++++++++++++++--------------------- 2 files changed, 86 insertions(+), 87 deletions(-) diff --git a/milena/sandbox/ChangeLog b/milena/sandbox/ChangeLog index 2ecca11..e8cd9d9 100644 --- a/milena/sandbox/ChangeLog +++ b/milena/sandbox/ChangeLog @@ -1,3 +1,11 @@ +2008-10-22 Guillaume Lazzara <z@lrde.epita.fr> + + Fix bad deletion of small components. + + * scribo/demat.hh: + - Correctly relabel. + - Correctly erase components. + 2008-10-21 Guillaume Lazzara <z@lrde.epita.fr> Extract text bboxes for Scribo. diff --git a/milena/sandbox/scribo/demat.hh b/milena/sandbox/scribo/demat.hh index 06f54e9..a824d11 100644 --- a/milena/sandbox/scribo/demat.hh +++ b/milena/sandbox/scribo/demat.hh @@ -68,6 +68,7 @@ # include <mln/level/fill.hh> # include <mln/level/paste.hh> +# include <mln/level/apply.hh> # include <mln/morpho/hit_or_miss.hh> # include <mln/morpho/opening.hh> @@ -98,6 +99,7 @@ namespace scribo util::array<box2d> component_boxes(const image2d<bool>& filter) { + std::cout << "component boxes" << std::endl; int_u16 nlabels; image2d<int_u16> lbl = labeling::blobs(filter, c4(), nlabels); @@ -105,14 +107,16 @@ namespace scribo } - /// Remove bboxes from an image. - void erase_boxes(image2d<bool>& ima, + /// Remove table bboxes from an image. + void erase_table_boxes(image2d<bool>& ima, const util::array<box2d>& boxes, unsigned bbox_larger) { - for (unsigned i = 1; i <= boxes.nelements(); ++i) + std::cout << "erase table boxes " << std::endl; + for (unsigned i = 1; i < boxes.nelements(); ++i) level::paste((pw::cst(false) - | boxes[i].to_larger(bbox_larger)), ima); + | (boxes[i].to_larger(bbox_larger) | (pw::value(ima) == pw::cst(true)))), ima); + } @@ -126,20 +130,28 @@ namespace scribo typedef util::array<mln_result_(A)> boxes_t; // Lignes verticales + std::cout << "Removing vertical lines" << std::endl; win::rectangle2d vwin(h, w); image2d<bool> vfilter = morpho::opening(in, vwin); - //io::pbm::save(vfilter, "./table-vfilter.pbm"); +#ifndef NOUT + io::pbm::save(vfilter, "./table-vfilter.pbm"); +#endif boxes_t vboxes = component_boxes(vfilter); - erase_boxes(in, vboxes, n); + erase_table_boxes(in, vboxes, n); // Lignes horizontales + std::cout << "Removing horizontal lines" << std::endl; win::rectangle2d hwin(w, h); image2d<bool> hfilter = morpho::opening(in, hwin); - //io::pbm::save(hfilter, "./table-hfilter.pbm"); +#ifndef NOUT + io::pbm::save(hfilter, "./table-hfilter.pbm"); +#endif boxes_t hboxes = component_boxes(hfilter); - erase_boxes(in, hboxes, n); + erase_table_boxes(in, hboxes, n); - //io::pbm::save(in, "./table-filtered.pbm"); +#ifndef NOUT + io::pbm::save(in, "./table-filtered.pbm"); +#endif return std::make_pair(vboxes, hboxes); } @@ -152,22 +164,15 @@ namespace scribo /// \{ int_u16 - anc(const fun::i2v::array<int_u16>& f, unsigned i) + most_left(const fun::i2v::array<int_u16>& left_link, unsigned i) { - while (f(i) != i) - i = f(i); + while (left_link(i) != i) + i = left_link(i); return i; } - void do_curri(fun::i2v::array<int_u16>& f, int_u16 i) - { - if (f(i) != i) - f(i) = anc(f, f(i)); - } - - void - remove_small_comps_i2v(image2d<int_u16>& ima, image2d<int_u16>& lbl, + remove_small_comps_i2v(image2d<int_u16>& lbl, util::array<box2d>& cboxes, int_u16& ncomps, unsigned min_comp_size) { @@ -178,18 +183,13 @@ namespace scribo comps(0) = 0; // Construct the transform function. - for (int i = 1; i < ncomps;) + for (int i = 1; i <= ncomps;) { // On aimerait avoir une routine qui nous le fait toute seule et qui // soit optimisee. if (estim::nsites(cboxes[i]) < min_comp_size) { comps(current) = 0; - - /// DEBUG - level::paste(pw::cst(false) | cboxes[i], ima); - /// DEBUG - cboxes[i] = cboxes[ncomps]; comps(ncomps) = i; current = ncomps--; @@ -198,64 +198,55 @@ namespace scribo { comps(current) = i; current = ++i; - //draw::box(ima, cboxes[i], 150u); } } //Relabel - lbl = level::transform(lbl, comps); + level::apply(lbl, comps); cboxes.resize(ncomps + 1); } /// Merge bboxes according to their left box neighbor. - util::array< accu::bbox<point2d> > - group_bboxes(fun::i2v::array<int_u16>& left, image2d<int_u16>& lbl, + util::array< box2d > + group_bboxes(fun::i2v::array<int_u16>& left_link, image2d<int_u16>& lbl, util::array<box2d>& cboxes, unsigned ncomp) { - // Currify left lookup table and compute text area bboxes. + // Currify left_link lookup table and compute text area bboxes. util::array< accu::bbox<point2d> > tboxes; tboxes.resize(ncomp + 1); for (unsigned i = 1; i <= ncomp; ++i) { - do_curri(left, i); - tboxes[left(i)].take(cboxes[i]); + if (left_link(i) != i) + left_link(i) = most_left(left_link, left_link(i)); + tboxes[left_link(i)].take(cboxes[i]); } - //Update labels - level::transform(lbl, left); + //Update labels - FIXME: Do we need to do that? + level::apply(lbl, left_link); - return tboxes; - } + util::array<box2d> result; + for (unsigned i = 1; i <= ncomp; ++i) + result.append(tboxes[i].to_result()); - bool - has_valid_left_link(const fun::i2v::array<int_u16>& left, unsigned j) - { - return left(j) == j; + return result; } - - bool - is_valid_comp_neigh(const point2d& p, const point2d& c, unsigned dmax) - { - return (p.col() - c.col()) < dmax; - } - - - /// Update the lookup table \p left if a neighbor is found on the right of /// the current bbox. void update_link(fun::i2v::array<int_u16>& left_link, image2d<int_u16>& lbl, const point2d& p, const point2d& c, unsigned i, unsigned dmax) { - if (lbl.domain().has(p) && lbl(p) != 0 - && is_valid_comp_neigh(p, c, dmax) - && has_valid_left_link(left_link, lbl(p))) - left_link(lbl(p)) = i; - else if (!has_valid_left_link(left_link, lbl(p)) && lbl(p) != 0) - left_link(lbl(p)) = 0; + if (lbl.domain().has(p) && lbl(p) != 0 && lbl(p) != i + && (p.col() - c.col()) < dmax) + { + if (left_link(lbl(p)) == lbl(p)) + left_link(lbl(p)) = i; +// else +// left_link(lbl(p)) = 0;//FIXME: should be uncommented? + } } @@ -278,15 +269,16 @@ namespace scribo { unsigned midcol = (cboxes[i].pmax().col() - cboxes[i].pmin().col()) / 2; unsigned dmax = midcol + bbox_distance; - /// Box center => Routine? - point2d c (cboxes[i].pmin().row() + ((cboxes[i].pmax().row() - cboxes[i].pmin().row()) / 2), + /// FIXME: Box center => Routine? + point2d c (cboxes[i].pmin().row() + + ((cboxes[i].pmax().row() - cboxes[i].pmin().row()) / 2), cboxes[i].pmin().col() + midcol); - /// First point on the right of c + /// First site on the right of the center site point2d p(c.row(), c.col() + 1); - // Lemmings avec condition sur la distance en plus => Faire une version speciale? + // FIXME: Lemmings with a condition on the distance => write a special version? while (lbl.domain().has(p) && (lbl(p) == 0 || lbl(p) == i) - && is_valid_comp_neigh(p, c, dmax)) + && (p.col() - c.col()) < dmax) ++p.col(); update_link(left_link, lbl, p, c, i, dmax); @@ -297,9 +289,13 @@ namespace scribo - void - extract_text(image2d<bool>& in, unsigned bbox_distance, unsigned min_comp_size) + util::array<box2d> + extract_text(image2d<bool>& in, + unsigned bbox_distance, + unsigned min_comp_size) { + std::cout << "extracting text..." << std::endl; + typedef int_u16 V; typedef image2d<V> I; typedef util::array<box2d> boxes_t; @@ -309,47 +305,37 @@ namespace scribo image2d<V> lbl = labeling::blobs(in, c4(), nlabels); boxes_t cboxes = labeling::compute(accu::meta::bbox(), lbl, nlabels); +# ifndef NOUT std::cout << "nlabels = " << nlabels << std::endl; - - //DEBUG PURPOSE - //FIXME: don't know how to clone and convert to image<int> properly - // \{ I ima(in.domain()); - level::paste(in, ima); level::paste(pw::cst(100) - | (in | (pw::value(in) == pw::cst(true))).domain(), ima); - // \} + | (in.domain() | (pw::value(in) == pw::cst(true))), ima); +#endif //Remove small components. int_u16 ncomp; - remove_small_comps_i2v(ima, lbl, cboxes, ncomp, min_comp_size); + remove_small_comps_i2v(lbl, cboxes, ncomp, min_comp_size); - std::cout << "ncomp = " << ncomp << std::endl; +#ifndef NOUT + io::pgm::save(ima, "./text-wo-small.pgm"); +#endif //Link character bboxes to their left neighboor if possible. fun::i2v::array<int_u16> left = link_character_bboxes(lbl, cboxes, ncomp, bbox_distance); //Merge character bboxes according to their left neighbor. - util::array< accu::bbox<point2d> > tboxes = group_bboxes(left, lbl, cboxes, ncomp); - - /// DEBUG PURPOSE - /// \{ - io::pgm::save(lbl, "./textlbl.pgm"); - io::pgm::save(ima, "./text.pgm"); + util::array<box2d> tboxes = group_bboxes(left, lbl, cboxes, ncomp); +#ifndef NOUT for (unsigned i = 1; i <= ncomp; ++i) if (tboxes[i].is_valid()) - draw::box(ima, tboxes[i].to_result(), 254u); - - for (unsigned i = 1; i <= ncomp; ++i) - if (tboxes[i].is_valid()) - draw::box(in, tboxes[i].to_result(), true); + draw::box(ima, tboxes[i], 254u); io::pgm::save(ima, "./bbtext.pgm"); - io::pbm::save(in, "./intext.pgm"); - /// \} +#endif + return tboxes; } /// \} @@ -363,7 +349,8 @@ namespace scribo void demat(char *argv[]) { using namespace mln; - border::thickness = 0; + border::thickness = 3; + trace::quiet = true; //Useful debug variables unsigned h = atoi(argv[2]); @@ -378,7 +365,11 @@ namespace scribo internal::extract_tables(in, h, w, bbox_larger); - internal::extract_text(in, bbox_distance, min_comp_size); + util::array<box2d> tboxes = + internal::extract_text(in, bbox_distance, min_comp_size); + + /// Use txt bboxes here with Tesseract + /// => in | tboxes[i] } } // end of namespace scribo -- 1.5.6.5