2867: [Scribo] Add support for table extraction.

* scribo/demat.hh: update. --- milena/sandbox/ChangeLog | 6 + milena/sandbox/scribo/demat.hh | 379 ++++++++++++++++++++++++++++++++-------- 2 files changed, 309 insertions(+), 76 deletions(-) diff --git a/milena/sandbox/ChangeLog b/milena/sandbox/ChangeLog index f2fd9a2..1ce3a0d 100644 --- a/milena/sandbox/ChangeLog +++ b/milena/sandbox/ChangeLog @@ -1,3 +1,9 @@ +2008-11-12 Guillaume Lazzara <z@lrde.epita.fr> + + Add support for table extraction. + + * scribo/demat.hh: update. + 2008-11-12 Alexandre Abraham <abraham@lrde.epita.fr> Make topological watershed work. diff --git a/milena/sandbox/scribo/demat.hh b/milena/sandbox/scribo/demat.hh index 0867e49..59e8014 100644 --- a/milena/sandbox/scribo/demat.hh +++ b/milena/sandbox/scribo/demat.hh @@ -1,4 +1,4 @@ -// Copyright (C) 2008 EPITA Research and Development Laboratory +// Copyright (C) 2008 EPITA Research and Development Laboratory (LRDE) // // This file is part of the Olena Library. This library is free // software; you can redistribute it and/or modify it under the terms @@ -28,11 +28,24 @@ #ifndef DEMAT_HH_ # define DEMAT_HH_ +# define for_all_ncomponents(C, NCOMP) \ + for (unsigned C = 1; C <= NCOMP; ++C) + +# define for_all_components(C, S) \ + for (unsigned C = 1; C < S.nelements(); ++C) + +# define for_all_elements(E, S) \ + for (unsigned E = 0; E < S.nelements(); ++E) + # include <libgen.h> # include <sstream> -# include <mln/core/image/image1d.hh> +# include <mln/essential/1d.hh> # include <mln/essential/2d.hh> +# include <mln/morpho/elementary/dilation.hh> +# include <mln/labeling/background.hh> + +# include <mln/transform/influence_zone_geodesic.hh> #include <tesseract/baseapi.h> @@ -46,14 +59,33 @@ namespace scribo using value::int_u16; using value::rgb8; + struct settings_t + { + settings_t() + { + bbox_enlarge = 1; + ero_line_width = 101; + bbox_distance = 25; + min_comp_size = 5; + } + + unsigned bbox_enlarge; + unsigned ero_line_width; + unsigned bbox_distance; + unsigned min_comp_size; + }; + + settings_t settings; char *input_file = 0; - int dbg_file_id = 0; - std::string output_file(const char *name, unsigned file_id) + /// Returns a formated output file name. + std::string output_file(const char *name) { + static int file_id = 1; + std::ostringstream os; os << "./" - << file_id + << file_id++ << "_" << input_file << "_" @@ -61,9 +93,43 @@ namespace scribo return os.str(); } + + /// Returns the edge central sites of a box + /// + /// \param[in] b the bbbox + /// \param[in] dim the dimension used to compute the site. + /*! + ** + ** If dim == 0, returns the left and right central sites. + ** + ** |-------| + ** X X + ** |-------| + ** + ** If dim == 1, returns the top and bottom central sites. + ** + ** |---X---| + ** | | + ** |---X---| + ** + */ + std::pair<point2d, point2d> + central_sites(const box2d& b, unsigned dim) + { + unsigned n = b.pmax()[dim] - b.pmin()[dim]; + + point2d p1 = b.center(); + p1[dim] -= n / 2; + point2d p2 = b.center(); + p2[dim] += n / 2; + + return std::make_pair(p1, p2); + } + + /// Draw a list of bboxes and their center in a RGB8 image. void draw_component_boxes(image2d<rgb8>& output, const util::array<box2d>& boxes) { - for (unsigned i = 1; i < boxes.nelements(); ++i) + for_all_components(i, boxes) if (boxes[i].is_valid()) { output(boxes[i].center()) = literal::red; @@ -71,18 +137,26 @@ namespace scribo } } + /// Colorize and save a labeled image. template <typename V> void save_lbl_image(const image2d<V>& lbl, unsigned nlabels, - const char *filename, unsigned file_id) + const char *filename) { image2d<rgb8> output = debug::colorize<image2d<rgb8>, image2d<V> >(lbl, nlabels); - io::ppm::save(output, output_file(filename, file_id)); + io::ppm::save(output, output_file(filename)); } /// Functions related to Text Recognition. /// \{ + /// Passes the text bboxes to Tesseract and store the result an image of + /// char. + /// \param[in] in image from where the text bboxes are extracted. + /// \param[in] lbl labeled image. + /// \param[in] tboxes array of text bboxes. + /// + /// FIXME: For each text bbox, we create a new image. We may like to avoid that. void text_recognition(const image2d<bool>& in, const image2d<int_u16>& lbl, const util::array<box2d>& tboxes) @@ -94,11 +168,10 @@ namespace scribo image2d<char> txt(in.domain()); level::fill(txt, ' '); - for (unsigned i = 1; i < tboxes.nelements(); ++i) + for_all_components(i, tboxes) { if (tboxes[i].is_valid()) { - // FIXME: do not create a new image. image2d<bool> b(tboxes[i], 0); level::fill(b, false); level::fill(b, in | (tboxes[i] | (pw::value(lbl) == pw::cst(i)))); @@ -128,30 +201,166 @@ namespace scribo /// Functions related to the matrix extraction /// \{ - void draw_hline(image2d<rgb8>& ima, - const box2d& box, - const rgb8& v) + /// Align table lines bboxes according to a given dimension. + /* + ** + ** FIXME: DOC! + ** + ** 0 1 3 4 5 6 + ** ------------ ------- + ** 0 |- - - - - | | {0,1} | + ** 1 | - - | | {0,1} | + ** 2 | | | {1} | + ** 3 | | | | + ** 4 | | | {2} | + ** 5 |- - | | {2} | + ** 6 | | | {2} | + */ + util::array<int> + align_lines(unsigned nsites, + int min_coord, + int max_coord, + util::array<box2d>& hboxes, + unsigned dim) + { + std::cout << "extracting table lines..." << std::endl; + util::array< util::set<unsigned> > lines; + lines.resize(nsites); + + // Map components with actual lines. + for_all_components(i, hboxes) + { + int minline = hboxes[i].pmin()[dim] - 5; + minline = (minline < min_coord ? min_coord : minline); + int maxline = hboxes[i].pmax()[dim] + 5; + maxline = (maxline > max_coord ? max_coord : maxline); + + for (int line = minline; + line <= maxline; ++line) + lines[line].insert(i); + } + + // Init box2line + util::array<int> box2line; + box2line.resize(hboxes.nelements()); + for_all_elements(i, box2line) + box2line[i] = -1; + + // Find the line with the highest element count. + unsigned max_nelts = 0; + for_all_elements(i, lines) + if (max_nelts < lines[i].nelements()) + max_nelts = lines[i].nelements(); + + // Aligning lines + util::array<int> newlines; + while (max_nelts > 0) + { + for_all_elements(i, lines) + if (lines[i].nelements() == max_nelts) + { + accu::mean<int_u16> mean; + for_all_elements(j, lines[i]) + if (box2line[lines[i][j]] == -1) + mean.take(hboxes[lines[i][j]].center()[dim]); + + if (mean.is_valid()) + { + for_all_elements(j, lines[i]) + if (box2line[lines[i][j]] == -1) + { + hboxes[lines[i][j]].pmin()[dim] = mean.to_result(); + hboxes[lines[i][j]].pmax()[dim] = mean.to_result(); + box2line[lines[i][j]] = mean.to_result(); + } + newlines.append(mean.to_result()); + } + } + --max_nelts; + } + + return newlines; + } + + image2d<bool> + rebuild_table(const image2d<bool>& in, + std::pair<util::array<box2d>, + util::array<box2d> >& tblboxes) { - unsigned ncols = box.pmax().col() - box.pmin().col(); - point2d p1 = box.center(); - p1.col() -= ncols / 2; - point2d p2 = box.center(); - p2.col() += ncols / 2; + util::array<int> rows = align_lines(in.nrows(), geom::min_row(in), + geom::max_row(in), tblboxes.second, + 0); + util::array<int> cols = align_lines(in.ncols(), geom::min_col(in), + geom::max_col(in), tblboxes.first, + 1); + + image1d<int> v(in.nrows()); + image1d<int> h(in.ncols()); + level::fill(v, -1); + level::fill(h, -1); + + for_all_elements(i, rows) + v.at(rows[i]) = i; + for_all_elements(i, cols) + h.at(cols[i]) = i; + + for (int i = 0; i < 10; ++i) + { + v = morpho::elementary::dilation(v, c2()); + h = morpho::elementary::dilation(h, c2()); + } + + util::array<box2d>& vb = tblboxes.first; + for_all_components(i, vb) + { + std::pair<point2d, point2d> cp = central_sites(vb[i], 0); + if (v.at(cp.first.row()) != -1) + vb[i].pmin().row() = rows[v.at(cp.first.row())]; + if (v.at(cp.second.row()) != -1) + vb[i].pmax().row() = rows[v.at(cp.second.row())]; + + cp = central_sites(vb[i], 0); + } - draw::line(ima, p1, p2, v); + util::array<box2d>& hb = tblboxes.second; + for_all_components(i, hb) + { + std::pair<point2d, point2d> cp = central_sites(hb[i], 1); + if (h.at(cp.first.col()) != -1) + hb[i].pmin().col() = cols[h.at(cp.first.col())]; + if (h.at(cp.second.col()) != -1) + hb[i].pmax().col() = cols[h.at(cp.second.col())]; + } + + image2d<bool> res; + initialize(res, in); + level::fill(res, false); + for_all_components(i, tblboxes.first) + draw::box(res, tblboxes.first[i], true); + for_all_components(i, tblboxes.second) + draw::box(res, tblboxes.second[i], true); + +# ifndef NOUT + image2d<rgb8> out(in.domain()); + level::fill(out, literal::black); + for_all_components(i, tblboxes.first) + draw::box(out, tblboxes.first[i], literal::red); + for_all_components(i, tblboxes.second) + draw::box(out, tblboxes.second[i], literal::red); + io::ppm::save(out, output_file("table.ppm")); +# endif + return res; } - void draw_vline(image2d<rgb8>& ima, - const box2d& box, - const rgb8& v) + + void draw_line(image2d<rgb8>& ima, + unsigned dim, + const box2d& box, + const rgb8& v) { - unsigned nrows = box.pmax().row() - box.pmin().row(); - point2d p1 = box.center(); - p1.row() -= nrows / 2; - point2d p2 = box.center(); - p2.row() += nrows / 2; + std::pair<point2d, point2d> cp = central_sites(box, dim); - draw::line(ima, p1, p2, v); + draw::line(ima, cp.first, cp.second, v); } void draw_row(image2d<rgb8>& ima, @@ -184,14 +393,14 @@ namespace scribo level::fill(vend, 0); level::fill(vcol, 0); - for (unsigned i = 1; i < tboxes.first.nelements(); ++i) + for_all_components(i, tboxes.first) { ++vend.at(tboxes.first[i].pmin().row()); ++vend.at(tboxes.first[i].pmax().row()); ++vcol.at(tboxes.first[i].center().col()); } - for (unsigned i = 1; i < tboxes.second.nelements(); ++i) + for_all_components(i, tboxes.second) { ++hend.at(tboxes.second[i].pmin().col()); ++hend.at(tboxes.second[i].pmax().col()); @@ -218,13 +427,13 @@ namespace scribo draw_row(tmp, i, literal::magenta); } - for (unsigned i = 1; i < tboxes.first.nelements(); ++i) - draw_vline(tmp, tboxes.first[i], literal::green); + for_all_components(i, tboxes.first) + draw_line(tmp, 0, tboxes.first[i], literal::green); - for (unsigned i = 1; i < tboxes.second.nelements(); ++i) - draw_hline(tmp, tboxes.second[i], literal::red); + for_all_components(i, tboxes.second) + draw_line(tmp, 1, tboxes.second[i], literal::red); - io::ppm::save(tmp, output_file("matrix.ppm", 4)); + io::ppm::save(tmp, output_file("matrix.ppm")); #endif } @@ -253,9 +462,9 @@ namespace scribo util::array<box2d>& boxes, unsigned bbox_enlarge, unsigned dim) { - for (unsigned i = 1; i < boxes.nelements(); ++i) + for_all_components(i, boxes) { - boxes[i].enlarge(dim, bbox_enlarge + 1); + boxes[i].enlarge(dim, bbox_enlarge + settings.bbox_enlarge); boxes[i].crop_wrt(output.domain()); level::paste((pw::cst(false) | boxes[i] | (pw::value(output) == pw::cst(true))), output); @@ -267,8 +476,7 @@ namespace scribo std::pair<util::array<box2d>, util::array<box2d> > extract_tables(image2d<bool>& in, - image2d<rgb8>& output, - unsigned l) + image2d<rgb8>& output) { typedef image2d<int_u16> I; typedef accu::bbox<mln_psite_(I)> A; @@ -277,35 +485,34 @@ namespace scribo // Lignes verticales std::cout << "Removing vertical lines" << std::endl; - win::vline2d vline(l); + win::vline2d vline(settings.ero_line_width); image2d<bool> vfilter = morpho::erosion(in, vline); #ifndef NOUT - io::pbm::save(vfilter, output_file("table-vfilter.pbm", 1)); + io::pbm::save(vfilter, output_file("vertical-erosion.pbm")); #endif boxes_t vboxes = component_boxes(vfilter); - erase_table_boxes(in, vboxes, (l / 2), 0); - + erase_table_boxes(in, vboxes, (settings.ero_line_width / 2), 0); // Lignes horizontales std::cout << "Removing horizontal lines" << std::endl; - win::hline2d hline(l); + win::hline2d hline(settings.ero_line_width); image2d<bool> hfilter = morpho::erosion(in, hline); #ifndef NOUT - io::pbm::save(hfilter, output_file("table-hfilter.pbm", 2)); + io::pbm::save(hfilter, output_file("horizontal-erosion.pbm")); #endif boxes_t hboxes = component_boxes(hfilter); - erase_table_boxes(in, hboxes, (l / 2), 1); + erase_table_boxes(in, hboxes, (settings.ero_line_width / 2), 1); #ifndef NOUT image2d<rgb8> tmp = clone(output); draw_component_boxes(tmp, vboxes); draw_component_boxes(tmp, hboxes); - io::ppm::save(tmp, output_file("table-filtered.ppm", 3)); + io::ppm::save(tmp, output_file("vertical-and-horizontal-erosion.ppm")); #endif return std::make_pair(vboxes, hboxes); @@ -339,11 +546,10 @@ namespace scribo template <typename V> void remove_small_comps_i2v(image2d<V>& lbl, - V& nlabels, - unsigned min_comp_size) + V& nlabels) { std::cout << "Removing small components smaller than " - << min_comp_size << " sites among " << nlabels + << settings.min_comp_size << " sites among " << nlabels << " components" << std::endl; typedef accu::count<mln_psite(image2d<V>)> accu_count_t; @@ -356,9 +562,9 @@ namespace scribo fun::i2v::array<V> f(nsitecomp.nelements()); f(0) = 0; - for (unsigned i = 1; i <= nlabels; ++i) + for_all_ncomponents(i, nlabels) { - if (nsitecomp[i] < min_comp_size) + if (nsitecomp[i] < settings.min_comp_size) f(i) = 0; else f(i) = ++ncomp; @@ -368,7 +574,7 @@ namespace scribo nlabels = ncomp; #ifndef NOUT - save_lbl_image(lbl, nlabels, "lbl-small-comps-removed.pgm", 6); + save_lbl_image(lbl, nlabels, "lbl-small-comps-removed.pgm"); #endif } @@ -381,18 +587,18 @@ namespace scribo // Currify left_link lookup table and compute text area bboxes. util::array< accu::bbox<point2d> > tboxes; tboxes.resize(ncomp + 1); - for (unsigned i = 1; i <= ncomp; ++i) + for_all_ncomponents(i, ncomp) tboxes[uncurri_left_link(left_link, i)].take(cboxes[i]); //Update labels lbl = level::transform(lbl, left_link); #ifndef NOUT - save_lbl_image(lbl, ncomp, "lbl-grouped-boxes.pgm", 7); + save_lbl_image(lbl, ncomp, "lbl-grouped-boxes.pgm"); #endif util::array<box2d> result; - for (unsigned i = 1; i <= ncomp; ++i) + for_all_ncomponents(i, ncomp) if (tboxes[i].is_valid()) result.append(tboxes[i].to_result()); @@ -412,7 +618,7 @@ namespace scribo if (left_link(lbl(p)) == lbl(p) && most_left(left_link, i) != lbl(p)) left_link(lbl(p)) = i; // else -// left_link(lbl(p)) = 0;//FIXME: should be uncommented? +// left_link(lbl(p)) = 0;//FIXME: should be uncommented? VERY bad results otherwise. } } @@ -423,8 +629,7 @@ namespace scribo fun::i2v::array<int_u16> link_character_bboxes(image2d<int_u16>& lbl, const util::array<box2d>& cboxes, - unsigned ncomp, - unsigned bbox_distance) + unsigned ncomp) { fun::i2v::array<int_u16> left_link; left_link.resize(ncomp + 1); @@ -432,10 +637,10 @@ namespace scribo for (unsigned i = 0; i <= ncomp; ++i) left_link(i) = i; - for (unsigned i = 1; i <= ncomp; ++i) + for_all_ncomponents(i, ncomp) { unsigned midcol = (cboxes[i].pmax().col() - cboxes[i].pmin().col()) / 2; - int dmax = midcol + bbox_distance; + int dmax = midcol + settings.bbox_distance; point2d c = cboxes[i].center(); /// First site on the right of the central site point2d p(c.row(), c.col() + 1); @@ -454,11 +659,8 @@ namespace scribo util::array<box2d> extract_text(image2d<bool>& in, image2d<int_u16>& lbl, - image2d<rgb8>& output, - unsigned bbox_distance, - unsigned min_comp_size) + image2d<rgb8>& output) { - std::cout << "extracting text..." << std::endl; typedef int_u16 V; typedef image2d<V> I; @@ -469,21 +671,21 @@ namespace scribo lbl = labeling::blobs(in, c8(), nlabels); //Remove small components. - remove_small_comps_i2v(lbl, nlabels, min_comp_size); + remove_small_comps_i2v(lbl, nlabels); boxes_t cboxes = labeling::compute(accu::meta::bbox(), lbl, nlabels); #ifndef NOUT image2d<rgb8> tmp = clone(output); draw_component_boxes(tmp, cboxes); - io::ppm::save(tmp, output_file("character-bboxes.ppm", 5)); + io::ppm::save(tmp, output_file("character-bboxes.ppm")); #endif //merge_bboxes(cboxes, lbl, nlabels); //Link character bboxes to their left neighboor if possible. fun::i2v::array<int_u16> left = - link_character_bboxes(lbl, cboxes, nlabels, bbox_distance); + link_character_bboxes(lbl, cboxes, nlabels); //Merge character bboxes according to their left neighbor. util::array<box2d> tboxes = group_bboxes(left, lbl, cboxes, nlabels); @@ -491,26 +693,45 @@ namespace scribo return tboxes; } + void maptext_to_cells(const image2d<bool>& in, const image2d<bool>& table, const util::array<box2d>& tboxes) + { + int_u16 nlabels; + image2d<int_u16> tblelbl = labeling::background(table, c8(), nlabels); + image2d<rgb8> color = debug::colorize<image2d<rgb8>, image2d<int_u16> >(tblelbl, nlabels); +# ifndef NOUT + io::ppm::save(color, output_file("cells-labels.ppm")); +# endif + + image2d<rgb8> dbg = level::convert(rgb8(), logical::not_(table)); + + for_all_elements(i, tboxes) + if (tboxes[i].is_valid()) + { + level::paste(pw::cst(color(tboxes[i].center())) | (tboxes[i] | pw::value(in) == pw::cst(true)), + dbg); + } +# ifndef NOUT + io::ppm::save(dbg, output_file("text2cell.ppm")); +# endif + } + /// \} /// End of functions related to text extraction } // end of namespace scribo::internal - // Facade void demat(char *argv[], bool treat_tables) { using namespace mln; using value::rgb8; + using value::int_u16; border::thickness = 3; trace::quiet = true; //Useful debug variables internal::input_file = basename(argv[1]); - unsigned l = 101; - unsigned bbox_distance = 25; - unsigned min_comp_size = 5; //Load image image2d<bool> in; @@ -521,18 +742,24 @@ namespace scribo std::pair<util::array<box2d>, util::array<box2d> > tblboxes; + + image2d<bool> table; if (treat_tables) { - tblboxes = internal::extract_tables(in, output, l); - internal::extract_matrix(in, tblboxes); + tblboxes = internal::extract_tables(in, output); +// internal::extract_matrix(in, tblboxes); + table = internal::rebuild_table(in, tblboxes); } image2d<value::int_u16> lbl; util::array<box2d> tboxes = - internal::extract_text(in, lbl, output, bbox_distance, min_comp_size); + internal::extract_text(in, lbl, output); + + if (treat_tables) + internal::maptext_to_cells(in, table, tboxes); internal::draw_component_boxes(output, tboxes); - io::ppm::save(output, internal::output_file("out.ppm", 8)); + io::ppm::save(output, internal::output_file("out.ppm")); internal::text_recognition(in, lbl, tboxes); } -- 1.5.6.5
participants (1)
-
Guillaume Lazzara