
* sandbox/scribo/Makefile: add new rules. * sandbox/scribo/demat.hh: add more documentation. * sandbox/scribo/demat31Oct2008.hh, * sandbox/scribo/demat_v2.hh: remove. --- milena/ChangeLog | 11 + milena/sandbox/scribo/Makefile | 14 +- milena/sandbox/scribo/demat.hh | 137 +++++++-- milena/sandbox/scribo/demat31Oct2008.hh | 559 ------------------------------- milena/sandbox/scribo/demat_v2.hh | 134 -------- 5 files changed, 138 insertions(+), 717 deletions(-) delete mode 100644 milena/sandbox/scribo/demat31Oct2008.hh delete mode 100644 milena/sandbox/scribo/demat_v2.hh diff --git a/milena/ChangeLog b/milena/ChangeLog index e894701..438bfe2 100644 --- a/milena/ChangeLog +++ b/milena/ChangeLog @@ -1,5 +1,16 @@ 2009-03-02 Guillaume Lazzara <lazzara@lrde.epita.fr> + Cleanup scribo sandbox. + + * sandbox/scribo/Makefile: add new rules. + + * sandbox/scribo/demat.hh: add more documentation. + + * sandbox/scribo/demat31Oct2008.hh, + * sandbox/scribo/demat_v2.hh: remove. + +2009-03-02 Guillaume Lazzara <lazzara@lrde.epita.fr> + Small fixes. * headers.mk: update dist headers. diff --git a/milena/sandbox/scribo/Makefile b/milena/sandbox/scribo/Makefile index 2a58b92..730d8c2 100644 --- a/milena/sandbox/scribo/Makefile +++ b/milena/sandbox/scribo/Makefile @@ -1,5 +1,13 @@ -all: - g++ -I../.. -I$(HOME)/local/include -O1 -DNDEBUG table.cc $(HOME)/local/lib/libtesseract_full.a -lpthread +all: table photo + +table: demat.hh + g++ -I../.. -I$(HOME)/local/include -O1 -DNDEBUG table.cc $(HOME)/local/lib/libtesseract_full.a -lpthread -o table + +photo: demat.hh + g++ -I../.. -I$(HOME)/local/include -O1 -DNDEBUG photo.cc $(HOME)/local/lib/libtesseract_full.a -lpthread -o photo clean: - rm -f a.out + rm *.ppm *.pgm *.pbm + +dist-clean: clean + rm -f table photo diff --git a/milena/sandbox/scribo/demat.hh b/milena/sandbox/scribo/demat.hh index 0bc72ef..8872afc 100644 --- a/milena/sandbox/scribo/demat.hh +++ b/milena/sandbox/scribo/demat.hh @@ -1,4 +1,5 @@ -// Copyright (C) 2008 EPITA Research and Development Laboratory (LRDE) +// Copyright (C) 2008, 2009 EPITA Research and Development Laboratory +// (LRDE) // // This file is part of the Olena Library. This library is free // software; you can redistribute it and/or modify it under the terms @@ -48,9 +49,10 @@ # include <mln/transform/influence_zone_geodesic.hh> # include <mln/debug/draw_graph.hh> # include <mln/make/graph.hh> +# include <mln/make/region_adjacency_graph.hh> # include <mln/util/graph.hh> # include <mln/util/line_graph.hh> -# include <mln/opt/at.hh> +# include <mln/io/txt/save.hh> # include <mln/canvas/browsing/depth_first_search.hh> @@ -109,9 +111,13 @@ namespace scribo std::ostringstream os; os << "./" - << file_id++ - << "_" << input_file + << "_"; + + if (file_id < 10) + os << "0"; + + os << file_id++ << "_" << name; return os.str(); @@ -185,7 +191,7 @@ namespace scribo //-***************************************** /// \{ - /// Passes the text bboxes to Tesseract and store the result an image of + /// Passes the text bboxes to Tesseract and store the result in an image of /// char. /// \param[in] in image from where the text bboxes are extracted. /// \param[in] lbl labeled image. @@ -258,6 +264,22 @@ namespace scribo ** 4 | | | {2} | ** 5 |- - | | {2} | ** 6 | | | {2} | + ** + ** \p hboxes contains all the table lines bboxes. Each bbox is + ** associated with an id, its location in the array. + ** + ** For each bbox, its id is marked in a vector. The location is defined, + ** according to the given parameter \p dim, either by the row or the col + ** value of the pmin site. + ** + ** Ids are then propagated in the vector according a small delta value. + ** if bbox ids are at the same location in the vector, their related bboxes + ** are likely to be on the same line. + ** + ** Finally, iterate over the vector until all bboxes have been treated. + ** For each iteration, the set with a specific number of elements is found + ** and all bboxes referenced in this set are aligned on the same row or col. + ** */ util::array<int> align_lines(unsigned nsites, @@ -296,6 +318,9 @@ namespace scribo max_nelts = lines[i].nelements(); // Aligning lines + // FIXME: not optimal... Make it faster! + // We may do too much iterations (while loop) and some of them may + // be done for nothing... util::array<int> newlines; while (max_nelts > 0) { @@ -327,7 +352,7 @@ namespace scribo - /// Connect lines if they are close to each other. + /// Connect vertical and horizontal lines if they are close to each other. void connect_lines(const util::array<int>& aligned_lines, util::array<box2d>& boxes, @@ -353,6 +378,49 @@ namespace scribo } } +// void +// connect_lines2(const util::array<int>& aligned_lines, +// util::array<box2d>& boxes, +// unsigned dim, +// unsigned dim_size) +// { +// image1d<int> l(dim_size); +// data::fill(l, -1); +// +// for_all_components(i, boxes) +// { +// opt::at(l, boxes[i].pmin()[dim]) = i; +// opt::at(l, boxes[i].pmax()[dim]) = i; +// } +// +// for (unsigned i = 0; i < settings.max_dist_lines; ++i) +// l = morpho::elementary::dilation(l, c2()); +// +// for_all_components(i, boxes) +// { +// std::pair<point2d, point2d> cp = central_sites(boxes[i], dim); +// +// win::segment1d seg(11); +// { +// mln_qiter_(win::segment1d) q(seg, point1d(cp.first[dim])); +// for_all(q) +// if (opt::at(l, q[0]) != -1) +// { +// boxes[i].pmin()[dim] = boxes[opt::at(l, q[0])].pmin()[dim]; +// break; +// } +// } +// { +// mln_qiter_(win::segment1d) q(seg, point1d(cp.second[dim])); +// for_all(q) +// if (opt::at(l, q[0]) != -1) +// { +// boxes[i].pmax()[dim] = boxes[opt::at(l, q[0])].pmax()[dim]; +// break; +// } +// } +// } +// } /// Align line bboxes vertically and horizontally. Then, try to join @@ -372,7 +440,7 @@ namespace scribo 1); # ifndef NOUT image2d<rgb8> out2(in.domain()); - level::fill(out2, literal::black); + data::fill(out2, literal::black); for_all_components(i, tblboxes.first) draw::box(out2, tblboxes.first[i], literal::red); for_all_components(i, tblboxes.second) @@ -380,6 +448,12 @@ namespace scribo io::ppm::save(out2, output_file("after-alignment.ppm")); # endif + // FIXME: Rebuild incomplete lines if possible. + // ----- --- => ---------- +// connect_lines2(tblboxes.first, 0, in.nrows()); +// connect_lines2(rows, tblboxes.second, 0, in.nrows()); + + // Connect vertical lines with horizontal lines. connect_lines(rows, tblboxes.first, 0, in.nrows()); connect_lines(cols, tblboxes.second, 1, in.ncols()); @@ -481,7 +555,7 @@ namespace scribo /// Find table bboxes and remove them from the image. std::pair<util::array<box2d>, - util::array<box2d> > + util::array<box2d> > extract_tables(image2d<bool>& in) { typedef image2d<label_16> I; @@ -489,7 +563,7 @@ namespace scribo typedef util::array<mln_result_(A)> boxes_t; - // Lignes verticales + // Vertical lines std::cout << "Removing vertical lines" << std::endl; win::vline2d vline(settings.ero_line_width); image2d<bool> vfilter = morpho::rank_filter(in, vline, settings.rank_filter); @@ -500,7 +574,7 @@ namespace scribo boxes_t vboxes = component_boxes(vfilter); - // Lignes horizontales + // Horizontal lines. std::cout << "Removing horizontal lines" << std::endl; win::hline2d hline(settings.ero_line_width); image2d<bool> hfilter = morpho::rank_filter(in, hline, settings.rank_filter); @@ -596,12 +670,15 @@ namespace scribo if (settings.treat_tables) { + // Remove components which are too small typedef util::array<accu_count_res_t> nsitecomp_t; nsitecomp_t nsitecomp = labeling::compute(accu_count_t(), lbl, nlabels); remove_small_comps<accu_count_res_t> fl2b(nsitecomp); labeling::relabel_inplace(lbl, nlabels, fl2b); } else { + // Remove components which have too much or not enough sites and which are + // too heigh. typedef util::array<accu_pair_res_t> nsitecomp_t; nsitecomp_t nsitecomp = labeling::compute(accu_pair_t(), lbl, nlabels); remove_smallandlarge_comps<accu_pair_res_t> fl2b(nsitecomp); @@ -609,6 +686,9 @@ namespace scribo } } + + /// Functor to be passed to depth_first_search. + /// Map each component vertex with its representative vertex id. struct make_relabel_fun_t { template <typename G> @@ -640,6 +720,10 @@ namespace scribo fun::l2l::relabel<label_16> l2l; }; + + + /// Functor to be passed to depth_first_search. + /// Computes the number of vertices per graph component. struct comp_size_t { template <typename G> @@ -654,6 +738,7 @@ namespace scribo void next() { unsigned compsize = comp_vertices.nelements(); + std::cout << "compsize = " << compsize << std::endl; for (unsigned i = 0; i < comp_vertices.nelements(); ++i) treated[comp_vertices[i]] = compsize; comp_vertices.clear(); @@ -663,7 +748,11 @@ namespace scribo { comp_vertices.insert(id); } void update_queued(unsigned id) - { update_treated(id); } + { + std::cout << "update_queued_before " << comp_vertices << std::endl; + update_treated(id); + std::cout << "update_queued_after " << comp_vertices << std::endl; + } bool to_be_treated(unsigned id) { return treated[id] == mln_max(label_16); } @@ -675,6 +764,8 @@ namespace scribo util::array<unsigned> treated; }; + + /// Merge bboxes according to their left box neighbor. util::array<box2d> group_bboxes(const util::graph& g, image2d<label_16>& lbl, @@ -710,6 +801,7 @@ namespace scribo comp_size_t comp_size; canvas::browsing::depth_first_search(g, comp_size); + std::cout << g << std::endl; for_all_ncomponents(i, nlabels) if (tboxes[i].is_valid()) if (comp_size.treated[i] < 3) @@ -750,7 +842,7 @@ namespace scribo int dmax = midcol + settings.bbox_distance; point2d c = cboxes[i].center(); /// First site on the right of the central site - point2d p(c.row(), c.col() + 1); + point2d p = c + right; // FIXME: Lemmings with a condition on the distance => write a special version? while (lbl.domain().has(p) && (lbl(p) == 0u || lbl(p) == i) @@ -793,21 +885,24 @@ namespace scribo return tboxes; } + + + + /// Function mapping value to sites of a line graph image. template <typename P> struct lg_vertex_values : public mln::Function_p2v< lg_vertex_values<P> > { typedef float result; + // Compute the angle between P and (0,1) float operator()(const P& p) const { - mln::algebra::vec<2,float> v; + mln::algebra::vec<2,float> v, pv; v[0] = 0; v[1] = 1; - float norm = mln::math::sqrt(std::pow(p.to_vec()[0], 2) - + std::pow(p.to_vec()[1], 2)); - // FIXME: missing proxy_impl for point and line2d? - float res = (v * p.to_vec()) / norm; - return res; + pv = p.to_vec().normalize(); + + return v * pv; } }; @@ -848,7 +943,7 @@ namespace scribo #endif typedef util::graph G; - G g = make::graph(iz | (pw::value(iz) != pw::cst(0u)), nlabels); + G g = make::graph(iz | (pw::value(iz) != pw::cst(0u)), c8(), nlabels); // Compute the component centers and use them as vertex. //FIXME: Add fun::vertex2p @@ -857,8 +952,6 @@ namespace scribo for_all_components(i, tboxes) fv2p(i) = tboxes[i].center(); -// util::array<point2d> centers = labeling::compute(accu::center<point2d>(), iz, nlabels); -// fv2p_t fv2p = convert::to<fv2p_t>(centers); // Create a p_vertices. p_vertices<G, fv2p_t> pv(g, fv2p); @@ -935,6 +1028,8 @@ namespace scribo internal::settings.max_comp_size = in.ncols() * in.nrows() * 0.05; + // tblboxes.first = vertical lines. + // tblboxes.second = horizontal lines. std::pair<util::array<box2d>, util::array<box2d> > tblboxes = internal::extract_tables(in); image2d<bool> table = internal::rebuild_table(in, tblboxes); diff --git a/milena/sandbox/scribo/demat31Oct2008.hh b/milena/sandbox/scribo/demat31Oct2008.hh deleted file mode 100644 index e482326..0000000 --- a/milena/sandbox/scribo/demat31Oct2008.hh +++ /dev/null @@ -1,559 +0,0 @@ -// Copyright (C) 2008 EPITA Research and Development Laboratory -// -// This file is part of the Olena Library. This library is free -// software; you can redistribute it and/or modify it under the terms -// of the GNU General Public License version 2 as published by the -// Free Software Foundation. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this library; see the file COPYING. If not, write to -// the Free Software Foundation, 51 Franklin Street, Fifth Floor, -// Boston, MA 02111-1307, USA. -// -// As a special exception, you may use this file as part of a free -// software library without restriction. Specifically, if other files -// instantiate templates or use macros or inline functions from this -// file, or you compile this file and link it with other files to -// produce an executable, this file does not by itself cause the -// resulting executable to be covered by the GNU General Public -// License. This exception does not however invalidate any other -// reasons why the executable file might be covered by the GNU General -// Public License. - -# ifndef DEMAT_HH_ -# define DEMAT_HH_ - -# include <libgen.h> -# include <sstream> - -# include <mln/core/image/image2d.hh> -# include <mln/core/image/image1d.hh> - -# include <mln/core/concept/function.hh> -# include <mln/core/image/image_if.hh> -# include <mln/core/image/sub_image.hh> -# include <mln/core/image/cast_image.hh> -# include <mln/core/alias/neighb2d.hh> -# include <mln/core/var.hh> -# include <mln/core/routine/clone.hh> -# include <mln/core/routine/ops.hh> -# include <mln/core/site_set/p_vaccess.hh> -# include <mln/core/site_set/p_set.hh> - -# include <mln/accu/bbox.hh> -# include <mln/accu/count.hh> - -# include <mln/border/fill.hh> - -# include <mln/convert/to.hh> -# include <mln/convert/to_fun.hh> - -# include <mln/debug/println.hh> -# include <mln/debug/colorize.hh> - -# include <mln/draw/box.hh> - -# include <mln/fun/i2v/array.hh> - -# include <mln/io/pbm/load.hh> -# include <mln/io/pbm/save.hh> -# include <mln/io/ppm/save.hh> -# include <mln/io/pgm/save.hh> - -# include <mln/labeling/blobs.hh> -# include <mln/labeling/compute.hh> - -# include <mln/level/convert.hh> -# include <mln/level/compute.hh> -# include <mln/level/fill.hh> -# include <mln/level/paste.hh> -# include <mln/level/apply.hh> -# include <mln/level/transform.hh> - -# include <mln/literal/all.hh> - -# include <mln/logical/not.hh> - -# include <mln/morpho/hit_or_miss.hh> -# include <mln/morpho/erosion.hh> -# include <mln/morpho/top_hat.hh> - -# include <mln/pw/all.hh> - -# include <mln/util/array.hh> - -# include <mln/value/int_u16.hh> -# include <mln/value/rgb8.hh> - -# include <mln/win/hline2d.hh> -# include <mln/win/vline2d.hh> - -namespace scribo -{ - - namespace internal - { - - using namespace mln; - using value::int_u16; - using value::rgb8; - - char *input_file = 0; - int dbg_file_id = 0; - - std::string output_file(const char *name, unsigned file_id) - { - std::ostringstream os; - os << "./" - << file_id - << "_" - << input_file - << "_" - << name; - return os.str(); - } - - void draw_component_boxes(image2d<rgb8>& output, const util::array<box2d>& boxes) - { - for (unsigned i = 1; i < boxes.nelements(); ++i) - if (boxes[i].is_valid()) - { - output(boxes[i].center()) = literal::red; - draw::box(output, boxes[i], literal::red); - } - } - - template <typename V> - void save_lbl_image(const image2d<V>& lbl, unsigned nlabels, - const char *filename, unsigned file_id) - { - image2d<rgb8> output = debug::colorize<image2d<rgb8>, image2d<V> >(lbl, nlabels); - io::ppm::save(output, output_file(filename, file_id)); - } - - - /// Functions related to the matrix extraction - /// \{ - - void draw_hline(image2d<rgb8>& ima, - const box2d& box, - const rgb8& v) - { - unsigned ncols = box.pmax().col() - box.pmin().col(); - point2d p1 = box.center(); - p1.col() -= ncols / 2; - point2d p2 = box.center(); - p2.col() += ncols / 2; - - draw::line(ima, p1, p2, v); - } - - void draw_vline(image2d<rgb8>& ima, - const box2d& box, - const rgb8& v) - { - unsigned nrows = box.pmax().row() - box.pmin().row(); - point2d p1 = box.center(); - p1.row() -= nrows / 2; - point2d p2 = box.center(); - p2.row() += nrows / 2; - - draw::line(ima, p1, p2, v); - } - - void draw_row(image2d<rgb8>& ima, - unsigned line, - const rgb8& v) - { - draw::line(ima, point2d(line, 0), point2d(line, ima.ncols()), v); - } - - void draw_col(image2d<rgb8>& ima, - unsigned line, - const rgb8& v) - { - draw::line(ima, point2d(0, line), point2d(ima.nrows(), line), v); - } - - void - extract_matrix(const image2d<bool>& in, - std::pair<util::array<box2d>, util::array<box2d> > tboxes) - { - std::cout << "Extracting matrix..." << std::endl; - - image1d<unsigned> hend(in.ncols()), - hrow(in.nrows()), - vend(in.nrows()), - vcol(in.ncols()); - - level::fill(hend, 0); - level::fill(hrow, 0); - level::fill(vend, 0); - level::fill(vcol, 0); - - for (unsigned i = 1; i < tboxes.first.nelements(); ++i) - { - ++vend.at(tboxes.first[i].pmin().row()); - ++vend.at(tboxes.first[i].pmax().row()); - ++vcol.at(tboxes.first[i].center().col()); - } - - for (unsigned i = 1; i < tboxes.second.nelements(); ++i) - { - ++hend.at(tboxes.second[i].pmin().col()); - ++hend.at(tboxes.second[i].pmax().col()); - ++hrow.at(tboxes.second[i].center().row()); - } - -#ifndef NOUT - image2d<rgb8> tmp(in.domain()); - level::fill(tmp, literal::black); - - for (unsigned i = 1; i < in.ncols(); ++i) - { - if (hend.at(i) > 0) - draw_col(tmp, i, literal::dark_orange); - if (vcol.at(i) > 0) - draw_col(tmp, i, literal::dark_orange); - } - - for (unsigned i = 1; i < in.nrows(); ++i) - { - if (hrow.at(i) > 0) - draw_row(tmp, i, literal::dark_red); - if (vend.at(i) > 0) - draw_row(tmp, i, literal::dark_red); - } - - for (unsigned i = 1; i < tboxes.first.nelements(); ++i) - draw_vline(tmp, tboxes.first[i], literal::green); - - for (unsigned i = 1; i < tboxes.second.nelements(); ++i) - draw_hline(tmp, tboxes.second[i], literal::red); - - io::ppm::save(tmp, output_file("matrix.ppm", 4)); -#endif - - } - - /// \} - - - - /// Functions related to the table removal - /// \{ - - - /// Extract the components bboxes. - util::array<box2d> - component_boxes(const image2d<bool>& filter) - { - std::cout << "component boxes" << std::endl; - int_u16 nlabels = 0; - image2d<int_u16> lbl = labeling::blobs(filter, c8(), nlabels); - - return labeling::compute(accu::meta::bbox(), lbl, nlabels); - } - - /// Remove table bboxes from an image. - void erase_table_boxes(image2d<bool>& output, - util::array<box2d>& boxes, - unsigned bbox_enlarge, unsigned dim) - { - for (unsigned i = 1; i < boxes.nelements(); ++i) - { - boxes[i].enlarge(dim, bbox_enlarge + 1); - boxes[i].crop_wrt(output.domain()); - level::paste((pw::cst(false) | boxes[i] | - (pw::value(output) == pw::cst(true))), output); - } - } - - - /// Find table bboxes and remove them from the image. - std::pair<util::array<box2d>, - util::array<box2d> > - extract_tables(image2d<bool>& in, - image2d<rgb8>& output, - unsigned l) - { - typedef image2d<int_u16> I; - typedef accu::bbox<mln_psite_(I)> A; - typedef util::array<mln_result_(A)> boxes_t; - - - // Lignes verticales - std::cout << "Removing vertical lines" << std::endl; - win::vline2d vline(l); - image2d<bool> vfilter = morpho::erosion(in, vline); - -#ifndef NOUT - io::pbm::save(vfilter, output_file("table-vfilter.pbm", 1)); -#endif - - boxes_t vboxes = component_boxes(vfilter); - erase_table_boxes(in, vboxes, (l / 2), 0); - - - // Lignes horizontales - std::cout << "Removing horizontal lines" << std::endl; - win::hline2d hline(l); - image2d<bool> hfilter = morpho::erosion(in, hline); - -#ifndef NOUT - io::pbm::save(hfilter, output_file("table-hfilter.pbm", 2)); -#endif - - boxes_t hboxes = component_boxes(hfilter); - erase_table_boxes(in, hboxes, (l / 2), 1); - - -#ifndef NOUT - image2d<rgb8> tmp = clone(output); - draw_component_boxes(tmp, vboxes); - draw_component_boxes(tmp, hboxes); - io::ppm::save(tmp, output_file("table-filtered.ppm", 3)); -#endif - - return std::make_pair(vboxes, hboxes); - } - - /// \} - /// End of functions related to the table removal. - - - /// Function related to text extraction - /// \{ - - inline - int_u16 - most_left(const fun::i2v::array<int_u16>& left_link, unsigned i) - { - while (left_link(i) != i) - i = left_link(i); - return i; - } - - inline - int_u16 - uncurri_left_link(fun::i2v::array<int_u16>& left_link, unsigned i) - { - if (left_link(i) != i) - left_link(i) = uncurri_left_link(left_link, left_link(i)); - return left_link(i); - } - - template <typename V> - void - remove_small_comps_i2v(image2d<V>& lbl, - V& nlabels, - unsigned min_comp_size) - { - std::cout << "Removing small components smaller than " - << min_comp_size << " sites among " << nlabels - << " components" << std::endl; - - typedef accu::count<mln_psite(image2d<V>)> accu_count_t; - - util::array<mln_result(accu_count_t)> nsitecomp - = labeling::compute(accu_count_t(), lbl, nlabels); - - V ncomp = 0; - - fun::i2v::array<V> f(nsitecomp.nelements()); - f(0) = 0; - - for (unsigned i = 1; i <= nlabels; ++i) - { - if (nsitecomp[i] < min_comp_size) - f(i) = 0; - else - f(i) = ++ncomp; - } - - lbl = level::transform(lbl, f); - nlabels = ncomp; - -#ifndef NOUT - save_lbl_image(lbl, nlabels, "lbl-small-comps-removed.pgm", 6); -#endif - } - - - /// Merge bboxes according to their left box neighbor. - util::array< box2d > - group_bboxes(fun::i2v::array<int_u16>& left_link, image2d<int_u16>& lbl, - util::array<box2d>& cboxes, unsigned ncomp) - { - // Currify left_link lookup table and compute text area bboxes. - util::array< accu::bbox<point2d> > tboxes; - tboxes.resize(ncomp + 1); - for (unsigned i = 1; i <= ncomp; ++i) - tboxes[uncurri_left_link(left_link, i)].take(cboxes[i]); - - //Update labels - lbl = level::transform(lbl, left_link); - -#ifndef NOUT - save_lbl_image(lbl, ncomp, "lbl-grouped-boxes.pgm", 7); -#endif - - util::array<box2d> result; - for (unsigned i = 1; i <= ncomp; ++i) - if (tboxes[i].is_valid()) - result.append(tboxes[i].to_result()); - - return result; - } - - - /// Update the lookup table \p left if a neighbor is found on the right of - /// the current bbox. - void update_link(fun::i2v::array<int_u16>& left_link, image2d<int_u16>& lbl, - const point2d& p, const point2d& c, - unsigned i, int dmax) - { - if (lbl.domain().has(p) && lbl(p) != 0u && lbl(p) != i - && (math::abs(p.col() - c.col())) < dmax) - { - if (left_link(lbl(p)) == lbl(p) && most_left(left_link, i) != lbl(p)) - left_link(lbl(p)) = i; -// else -// left_link(lbl(p)) = 0;//FIXME: should be uncommented? - } - } - - - - /// Map each character bbox to its left bbox neighbor if possible. - /// Iterate to the right but link boxes to the left. - fun::i2v::array<int_u16> - link_character_bboxes(image2d<int_u16>& lbl, - const util::array<box2d>& cboxes, - unsigned ncomp, - unsigned bbox_distance) - { - fun::i2v::array<int_u16> left_link; - left_link.resize(ncomp + 1); - - for (unsigned i = 0; i <= ncomp; ++i) - left_link(i) = i; - - for (unsigned i = 1; i <= ncomp; ++i) - { - unsigned midcol = (cboxes[i].pmax().col() - cboxes[i].pmin().col()) / 2; - int dmax = midcol + bbox_distance; - point2d c = cboxes[i].center(); - /// First site on the right of the central site - point2d p(c.row(), c.col() + 1); - - // FIXME: Lemmings with a condition on the distance => write a special version? - while (lbl.domain().has(p) && (lbl(p) == 0u || lbl(p) == i) - && math::abs(p.col() - c.col()) < dmax) - ++p.col(); - - update_link(left_link, lbl, p, c, i, dmax); - } - - return left_link; - } - - util::array<box2d> - extract_text(image2d<bool>& in_, - image2d<rgb8>& output, - unsigned bbox_distance, - unsigned min_comp_size) - { - std::cout << "extracting text..." << std::endl; - - typedef int_u16 V; - typedef image2d<V> I; - typedef util::array<box2d> boxes_t; - - // Extract edges. - win::rectangle2d l(3, 3); - image2d<bool> in = morpho::top_hat_white(in_, l); - io::pbm::save(in, output_file("top_hat.ppm", 9)); - - // Find character bboxes. - V nlabels; - image2d<V> lbl = labeling::blobs(in, c8(), nlabels); - - //Remove small components. - remove_small_comps_i2v(lbl, nlabels, min_comp_size); - - boxes_t cboxes = labeling::compute(accu::meta::bbox(), lbl, nlabels); - -#ifndef NOUT - image2d<rgb8> tmp = clone(output); - draw_component_boxes(tmp, cboxes); - io::ppm::save(tmp, output_file("character-bboxes.ppm", 5)); -#endif - - //merge_bboxes(cboxes, lbl, nlabels); - - //Link character bboxes to their left neighboor if possible. - fun::i2v::array<int_u16> left = - link_character_bboxes(lbl, cboxes, nlabels, bbox_distance); - - //Merge character bboxes according to their left neighbor. - util::array<box2d> tboxes = group_bboxes(left, lbl, cboxes, nlabels); - - return tboxes; - } - - /// \} - /// End of functions related to text extraction - - } // end of namespace scribo::internal - - - // Facade - void demat(char *argv[], bool treat_tables) - { - using namespace mln; - using value::rgb8; - - border::thickness = 3; - trace::quiet = true; - - //Useful debug variables - internal::input_file = basename(argv[1]); - unsigned l = 101; - unsigned bbox_distance = 25; - unsigned min_comp_size = 5; - - //Load image - image2d<bool> in; - io::pbm::load(in, argv[1]); - in = logical::not_(in); - - image2d<rgb8> output = level::convert(rgb8(), in); - - std::pair<util::array<box2d>, - util::array<box2d> > tblboxes; - if (treat_tables) - { - tblboxes = internal::extract_tables(in, output, l); - internal::extract_matrix(in, tblboxes); - } - - util::array<box2d> tboxes = - internal::extract_text(in, output, bbox_distance, min_comp_size); - - internal::draw_component_boxes(output, tboxes); - io::ppm::save(output, internal::output_file("out.ppm", 8)); - - /// Use txt bboxes here with Tesseract - /// for (i = 1; i < tboxes.nelements(); ++i) - /// tesseract(in | tboxes[i]) - } - -} // end of namespace scribo - -# endif // ! DEMAT_HH diff --git a/milena/sandbox/scribo/demat_v2.hh b/milena/sandbox/scribo/demat_v2.hh deleted file mode 100644 index 2295c2f..0000000 --- a/milena/sandbox/scribo/demat_v2.hh +++ /dev/null @@ -1,134 +0,0 @@ -// Copyright (C) 2008 EPITA Research and Development Laboratory -// -// This file is part of the Olena Library. This library is free -// software; you can redistribute it and/or modify it under the terms -// of the GNU General Public License version 2 as published by the -// Free Software Foundation. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this library; see the file COPYING. If not, write to -// the Free Software Foundation, 51 Franklin Street, Fifth Floor, -// Boston, MA 02111-1307, USA. -// -// As a special exception, you may use this file as part of a free -// software library without restriction. Specifically, if other files -// instantiate templates or use macros or inline functions from this -// file, or you compile this file and link it with other files to -// produce an executable, this file does not by itself cause the -// resulting executable to be covered by the GNU General Public -// License. This exception does not however invalidate any other -// reasons why the executable file might be covered by the GNU General -// Public License. - -#ifndef DEMAT_HH_ -# define DEMAT_HH_ - -# include <mln/core/image/image2d.hh> - -# include <mln/core/image/image_if.hh> -# include <mln/core/image/sub_image.hh> -# include <mln/core/alias/neighb2d.hh> -# include <mln/core/var.hh> -# include <mln/core/site_set/p_vaccess.hh> - -# include <mln/binarization/threshold.hh> -# include <mln/morpho/hit_or_miss.hh> -# include <mln/level/fill.hh> -# include <mln/border/fill.hh> -# include <mln/io/pbm/load.hh> -# include <mln/io/pgm/load.hh> -# include <mln/io/pbm/save.hh> -# include <mln/io/pgm/save.hh> -# include <mln/debug/println.hh> -# include <mln/morpho/opening.hh> -# include <mln/trait/value_.hh> -# include <mln/value/int_u8.hh> -# include <mln/value/int_u16.hh> -# include <mln/level/paste.hh> -# include <mln/labeling/blobs.hh> -# include <mln/level/fill.hh> -# include <mln/pw/all.hh> -# include <mln/convert/to_fun.hh> -# include <mln/geom/bbox.hh> - -# include <mln/labeling/compute.hh> -# include <mln/accu/bbox.hh> - -namespace scribo -{ - - namespace internal - { - - void filter_image(mln::image2d<bool>& ima, - const mln::image2d<bool>& filter, - unsigned bbox_larger) - { - using namespace mln; - using value::int_u16; - - typedef image2d<int_u16> I; - typedef mln_accu_with_(accu::meta::bbox, mln_psite_(I)) A; - typedef util::array<A::result> boxes_t; - - int_u16 nlabels; - I lbl = labeling::blobs(filter, c4(), nlabels); - - boxes_t boxes = labeling::compute(accu::meta::bbox(), lbl, nlabels); - - for (unsigned i = 1; i <= nlabels; ++i) - level::paste(pw::cst(false) - | boxes[i].to_larger(bbox_larger), - ima); - } - - void remove_tables(mln::image2d<bool>& in, unsigned h, unsigned w, unsigned n) - { - using namespace mln; - - // Lignes verticales - win::rectangle2d vwin(h, w); - image2d<bool> vfilter = morpho::opening(in, vwin); - io::pbm::save(vfilter, "./table-vfilter.pbm"); - filter_image(in, vfilter, n); - - - // Lignes horizontales - win::rectangle2d hwin(w, h); - image2d<bool> hfilter = morpho::opening(in, hwin); - io::pbm::save(hfilter, "./table-hfilter.pbm"); - filter_image(in, hfilter, n); - } - - } // end of namespace scribo::internal - - - - // Facade - void demat(char *argv[]) - { - using namespace mln; - using value::int_u8; - - //Useful debug variables - unsigned h = atoi(argv[2]); - unsigned w = atoi(argv[3]); - unsigned n = atoi(argv[4]); - - //Load image - image2d<bool> in; - io::pbm::load(in, argv[1]); - - internal::remove_tables(in, h, w, n); - - io::pbm::save(in, "./table-filtered.pbm"); - } - -} // end of namespace scribo - -# endif // ! DEMAT_HH -- 1.5.6.5