* sandbox/scribo/Makefile: add new rules.
* sandbox/scribo/demat.hh: add more documentation.
* sandbox/scribo/demat31Oct2008.hh,
* sandbox/scribo/demat_v2.hh: remove.
---
milena/ChangeLog | 11 +
milena/sandbox/scribo/Makefile | 14 +-
milena/sandbox/scribo/demat.hh | 137 +++++++--
milena/sandbox/scribo/demat31Oct2008.hh | 559 -------------------------------
milena/sandbox/scribo/demat_v2.hh | 134 --------
5 files changed, 138 insertions(+), 717 deletions(-)
delete mode 100644 milena/sandbox/scribo/demat31Oct2008.hh
delete mode 100644 milena/sandbox/scribo/demat_v2.hh
diff --git a/milena/ChangeLog b/milena/ChangeLog
index e894701..438bfe2 100644
--- a/milena/ChangeLog
+++ b/milena/ChangeLog
@@ -1,5 +1,16 @@
2009-03-02 Guillaume Lazzara <lazzara(a)lrde.epita.fr>
+ Cleanup scribo sandbox.
+
+ * sandbox/scribo/Makefile: add new rules.
+
+ * sandbox/scribo/demat.hh: add more documentation.
+
+ * sandbox/scribo/demat31Oct2008.hh,
+ * sandbox/scribo/demat_v2.hh: remove.
+
+2009-03-02 Guillaume Lazzara <lazzara(a)lrde.epita.fr>
+
Small fixes.
* headers.mk: update dist headers.
diff --git a/milena/sandbox/scribo/Makefile b/milena/sandbox/scribo/Makefile
index 2a58b92..730d8c2 100644
--- a/milena/sandbox/scribo/Makefile
+++ b/milena/sandbox/scribo/Makefile
@@ -1,5 +1,13 @@
-all:
- g++ -I../.. -I$(HOME)/local/include -O1 -DNDEBUG table.cc
$(HOME)/local/lib/libtesseract_full.a -lpthread
+all: table photo
+
+table: demat.hh
+ g++ -I../.. -I$(HOME)/local/include -O1 -DNDEBUG table.cc
$(HOME)/local/lib/libtesseract_full.a -lpthread -o table
+
+photo: demat.hh
+ g++ -I../.. -I$(HOME)/local/include -O1 -DNDEBUG photo.cc
$(HOME)/local/lib/libtesseract_full.a -lpthread -o photo
clean:
- rm -f a.out
+ rm *.ppm *.pgm *.pbm
+
+dist-clean: clean
+ rm -f table photo
diff --git a/milena/sandbox/scribo/demat.hh b/milena/sandbox/scribo/demat.hh
index 0bc72ef..8872afc 100644
--- a/milena/sandbox/scribo/demat.hh
+++ b/milena/sandbox/scribo/demat.hh
@@ -1,4 +1,5 @@
-// Copyright (C) 2008 EPITA Research and Development Laboratory (LRDE)
+// Copyright (C) 2008, 2009 EPITA Research and Development Laboratory
+// (LRDE)
//
// This file is part of the Olena Library. This library is free
// software; you can redistribute it and/or modify it under the terms
@@ -48,9 +49,10 @@
# include <mln/transform/influence_zone_geodesic.hh>
# include <mln/debug/draw_graph.hh>
# include <mln/make/graph.hh>
+# include <mln/make/region_adjacency_graph.hh>
# include <mln/util/graph.hh>
# include <mln/util/line_graph.hh>
-# include <mln/opt/at.hh>
+# include <mln/io/txt/save.hh>
# include <mln/canvas/browsing/depth_first_search.hh>
@@ -109,9 +111,13 @@ namespace scribo
std::ostringstream os;
os << "./"
- << file_id++
- << "_"
<< input_file
+ << "_";
+
+ if (file_id < 10)
+ os << "0";
+
+ os << file_id++
<< "_"
<< name;
return os.str();
@@ -185,7 +191,7 @@ namespace scribo
//-*****************************************
/// \{
- /// Passes the text bboxes to Tesseract and store the result an image of
+ /// Passes the text bboxes to Tesseract and store the result in an image of
/// char.
/// \param[in] in image from where the text bboxes are extracted.
/// \param[in] lbl labeled image.
@@ -258,6 +264,22 @@ namespace scribo
** 4 | | | {2} |
** 5 |- - | | {2} |
** 6 | | | {2} |
+ **
+ ** \p hboxes contains all the table lines bboxes. Each bbox is
+ ** associated with an id, its location in the array.
+ **
+ ** For each bbox, its id is marked in a vector. The location is defined,
+ ** according to the given parameter \p dim, either by the row or the col
+ ** value of the pmin site.
+ **
+ ** Ids are then propagated in the vector according a small delta value.
+ ** if bbox ids are at the same location in the vector, their related bboxes
+ ** are likely to be on the same line.
+ **
+ ** Finally, iterate over the vector until all bboxes have been treated.
+ ** For each iteration, the set with a specific number of elements is found
+ ** and all bboxes referenced in this set are aligned on the same row or col.
+ **
*/
util::array<int>
align_lines(unsigned nsites,
@@ -296,6 +318,9 @@ namespace scribo
max_nelts = lines[i].nelements();
// Aligning lines
+ // FIXME: not optimal... Make it faster!
+ // We may do too much iterations (while loop) and some of them may
+ // be done for nothing...
util::array<int> newlines;
while (max_nelts > 0)
{
@@ -327,7 +352,7 @@ namespace scribo
- /// Connect lines if they are close to each other.
+ /// Connect vertical and horizontal lines if they are close to each other.
void
connect_lines(const util::array<int>& aligned_lines,
util::array<box2d>& boxes,
@@ -353,6 +378,49 @@ namespace scribo
}
}
+// void
+// connect_lines2(const util::array<int>& aligned_lines,
+// util::array<box2d>& boxes,
+// unsigned dim,
+// unsigned dim_size)
+// {
+// image1d<int> l(dim_size);
+// data::fill(l, -1);
+//
+// for_all_components(i, boxes)
+// {
+// opt::at(l, boxes[i].pmin()[dim]) = i;
+// opt::at(l, boxes[i].pmax()[dim]) = i;
+// }
+//
+// for (unsigned i = 0; i < settings.max_dist_lines; ++i)
+// l = morpho::elementary::dilation(l, c2());
+//
+// for_all_components(i, boxes)
+// {
+// std::pair<point2d, point2d> cp = central_sites(boxes[i], dim);
+//
+// win::segment1d seg(11);
+// {
+// mln_qiter_(win::segment1d) q(seg, point1d(cp.first[dim]));
+// for_all(q)
+// if (opt::at(l, q[0]) != -1)
+// {
+// boxes[i].pmin()[dim] = boxes[opt::at(l, q[0])].pmin()[dim];
+// break;
+// }
+// }
+// {
+// mln_qiter_(win::segment1d) q(seg, point1d(cp.second[dim]));
+// for_all(q)
+// if (opt::at(l, q[0]) != -1)
+// {
+// boxes[i].pmax()[dim] = boxes[opt::at(l, q[0])].pmax()[dim];
+// break;
+// }
+// }
+// }
+// }
/// Align line bboxes vertically and horizontally. Then, try to join
@@ -372,7 +440,7 @@ namespace scribo
1);
# ifndef NOUT
image2d<rgb8> out2(in.domain());
- level::fill(out2, literal::black);
+ data::fill(out2, literal::black);
for_all_components(i, tblboxes.first)
draw::box(out2, tblboxes.first[i], literal::red);
for_all_components(i, tblboxes.second)
@@ -380,6 +448,12 @@ namespace scribo
io::ppm::save(out2, output_file("after-alignment.ppm"));
# endif
+ // FIXME: Rebuild incomplete lines if possible.
+ // ----- --- => ----------
+// connect_lines2(tblboxes.first, 0, in.nrows());
+// connect_lines2(rows, tblboxes.second, 0, in.nrows());
+
+ // Connect vertical lines with horizontal lines.
connect_lines(rows, tblboxes.first, 0, in.nrows());
connect_lines(cols, tblboxes.second, 1, in.ncols());
@@ -481,7 +555,7 @@ namespace scribo
/// Find table bboxes and remove them from the image.
std::pair<util::array<box2d>,
- util::array<box2d> >
+ util::array<box2d> >
extract_tables(image2d<bool>& in)
{
typedef image2d<label_16> I;
@@ -489,7 +563,7 @@ namespace scribo
typedef util::array<mln_result_(A)> boxes_t;
- // Lignes verticales
+ // Vertical lines
std::cout << "Removing vertical lines" << std::endl;
win::vline2d vline(settings.ero_line_width);
image2d<bool> vfilter = morpho::rank_filter(in, vline,
settings.rank_filter);
@@ -500,7 +574,7 @@ namespace scribo
boxes_t vboxes = component_boxes(vfilter);
- // Lignes horizontales
+ // Horizontal lines.
std::cout << "Removing horizontal lines" << std::endl;
win::hline2d hline(settings.ero_line_width);
image2d<bool> hfilter = morpho::rank_filter(in, hline,
settings.rank_filter);
@@ -596,12 +670,15 @@ namespace scribo
if (settings.treat_tables)
{
+ // Remove components which are too small
typedef util::array<accu_count_res_t> nsitecomp_t;
nsitecomp_t nsitecomp = labeling::compute(accu_count_t(), lbl, nlabels);
remove_small_comps<accu_count_res_t> fl2b(nsitecomp);
labeling::relabel_inplace(lbl, nlabels, fl2b);
} else
{
+ // Remove components which have too much or not enough sites and which are
+ // too heigh.
typedef util::array<accu_pair_res_t> nsitecomp_t;
nsitecomp_t nsitecomp = labeling::compute(accu_pair_t(), lbl, nlabels);
remove_smallandlarge_comps<accu_pair_res_t> fl2b(nsitecomp);
@@ -609,6 +686,9 @@ namespace scribo
}
}
+
+ /// Functor to be passed to depth_first_search.
+ /// Map each component vertex with its representative vertex id.
struct make_relabel_fun_t
{
template <typename G>
@@ -640,6 +720,10 @@ namespace scribo
fun::l2l::relabel<label_16> l2l;
};
+
+
+ /// Functor to be passed to depth_first_search.
+ /// Computes the number of vertices per graph component.
struct comp_size_t
{
template <typename G>
@@ -654,6 +738,7 @@ namespace scribo
void next()
{
unsigned compsize = comp_vertices.nelements();
+ std::cout << "compsize = " << compsize << std::endl;
for (unsigned i = 0; i < comp_vertices.nelements(); ++i)
treated[comp_vertices[i]] = compsize;
comp_vertices.clear();
@@ -663,7 +748,11 @@ namespace scribo
{ comp_vertices.insert(id); }
void update_queued(unsigned id)
- { update_treated(id); }
+ {
+ std::cout << "update_queued_before " << comp_vertices <<
std::endl;
+ update_treated(id);
+ std::cout << "update_queued_after " << comp_vertices <<
std::endl;
+ }
bool to_be_treated(unsigned id)
{ return treated[id] == mln_max(label_16); }
@@ -675,6 +764,8 @@ namespace scribo
util::array<unsigned> treated;
};
+
+
/// Merge bboxes according to their left box neighbor.
util::array<box2d>
group_bboxes(const util::graph& g, image2d<label_16>& lbl,
@@ -710,6 +801,7 @@ namespace scribo
comp_size_t comp_size;
canvas::browsing::depth_first_search(g, comp_size);
+ std::cout << g << std::endl;
for_all_ncomponents(i, nlabels)
if (tboxes[i].is_valid())
if (comp_size.treated[i] < 3)
@@ -750,7 +842,7 @@ namespace scribo
int dmax = midcol + settings.bbox_distance;
point2d c = cboxes[i].center();
/// First site on the right of the central site
- point2d p(c.row(), c.col() + 1);
+ point2d p = c + right;
// FIXME: Lemmings with a condition on the distance => write a special version?
while (lbl.domain().has(p) && (lbl(p) == 0u || lbl(p) == i)
@@ -793,21 +885,24 @@ namespace scribo
return tboxes;
}
+
+
+
+ /// Function mapping value to sites of a line graph image.
template <typename P>
struct lg_vertex_values : public mln::Function_p2v< lg_vertex_values<P>
>
{
typedef float result;
+ // Compute the angle between P and (0,1)
float operator()(const P& p) const
{
- mln::algebra::vec<2,float> v;
+ mln::algebra::vec<2,float> v, pv;
v[0] = 0;
v[1] = 1;
- float norm = mln::math::sqrt(std::pow(p.to_vec()[0], 2)
- + std::pow(p.to_vec()[1], 2));
- // FIXME: missing proxy_impl for point and line2d?
- float res = (v * p.to_vec()) / norm;
- return res;
+ pv = p.to_vec().normalize();
+
+ return v * pv;
}
};
@@ -848,7 +943,7 @@ namespace scribo
#endif
typedef util::graph G;
- G g = make::graph(iz | (pw::value(iz) != pw::cst(0u)), nlabels);
+ G g = make::graph(iz | (pw::value(iz) != pw::cst(0u)), c8(), nlabels);
// Compute the component centers and use them as vertex.
//FIXME: Add fun::vertex2p
@@ -857,8 +952,6 @@ namespace scribo
for_all_components(i, tboxes)
fv2p(i) = tboxes[i].center();
-// util::array<point2d> centers =
labeling::compute(accu::center<point2d>(), iz, nlabels);
-// fv2p_t fv2p = convert::to<fv2p_t>(centers);
// Create a p_vertices.
p_vertices<G, fv2p_t> pv(g, fv2p);
@@ -935,6 +1028,8 @@ namespace scribo
internal::settings.max_comp_size = in.ncols() * in.nrows() * 0.05;
+ // tblboxes.first = vertical lines.
+ // tblboxes.second = horizontal lines.
std::pair<util::array<box2d>,
util::array<box2d> > tblboxes = internal::extract_tables(in);
image2d<bool> table = internal::rebuild_table(in, tblboxes);
diff --git a/milena/sandbox/scribo/demat31Oct2008.hh
b/milena/sandbox/scribo/demat31Oct2008.hh
deleted file mode 100644
index e482326..0000000
--- a/milena/sandbox/scribo/demat31Oct2008.hh
+++ /dev/null
@@ -1,559 +0,0 @@
-// Copyright (C) 2008 EPITA Research and Development Laboratory
-//
-// This file is part of the Olena Library. This library is free
-// software; you can redistribute it and/or modify it under the terms
-// of the GNU General Public License version 2 as published by the
-// Free Software Foundation.
-//
-// This library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-// General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with this library; see the file COPYING. If not, write to
-// the Free Software Foundation, 51 Franklin Street, Fifth Floor,
-// Boston, MA 02111-1307, USA.
-//
-// As a special exception, you may use this file as part of a free
-// software library without restriction. Specifically, if other files
-// instantiate templates or use macros or inline functions from this
-// file, or you compile this file and link it with other files to
-// produce an executable, this file does not by itself cause the
-// resulting executable to be covered by the GNU General Public
-// License. This exception does not however invalidate any other
-// reasons why the executable file might be covered by the GNU General
-// Public License.
-
-# ifndef DEMAT_HH_
-# define DEMAT_HH_
-
-# include <libgen.h>
-# include <sstream>
-
-# include <mln/core/image/image2d.hh>
-# include <mln/core/image/image1d.hh>
-
-# include <mln/core/concept/function.hh>
-# include <mln/core/image/image_if.hh>
-# include <mln/core/image/sub_image.hh>
-# include <mln/core/image/cast_image.hh>
-# include <mln/core/alias/neighb2d.hh>
-# include <mln/core/var.hh>
-# include <mln/core/routine/clone.hh>
-# include <mln/core/routine/ops.hh>
-# include <mln/core/site_set/p_vaccess.hh>
-# include <mln/core/site_set/p_set.hh>
-
-# include <mln/accu/bbox.hh>
-# include <mln/accu/count.hh>
-
-# include <mln/border/fill.hh>
-
-# include <mln/convert/to.hh>
-# include <mln/convert/to_fun.hh>
-
-# include <mln/debug/println.hh>
-# include <mln/debug/colorize.hh>
-
-# include <mln/draw/box.hh>
-
-# include <mln/fun/i2v/array.hh>
-
-# include <mln/io/pbm/load.hh>
-# include <mln/io/pbm/save.hh>
-# include <mln/io/ppm/save.hh>
-# include <mln/io/pgm/save.hh>
-
-# include <mln/labeling/blobs.hh>
-# include <mln/labeling/compute.hh>
-
-# include <mln/level/convert.hh>
-# include <mln/level/compute.hh>
-# include <mln/level/fill.hh>
-# include <mln/level/paste.hh>
-# include <mln/level/apply.hh>
-# include <mln/level/transform.hh>
-
-# include <mln/literal/all.hh>
-
-# include <mln/logical/not.hh>
-
-# include <mln/morpho/hit_or_miss.hh>
-# include <mln/morpho/erosion.hh>
-# include <mln/morpho/top_hat.hh>
-
-# include <mln/pw/all.hh>
-
-# include <mln/util/array.hh>
-
-# include <mln/value/int_u16.hh>
-# include <mln/value/rgb8.hh>
-
-# include <mln/win/hline2d.hh>
-# include <mln/win/vline2d.hh>
-
-namespace scribo
-{
-
- namespace internal
- {
-
- using namespace mln;
- using value::int_u16;
- using value::rgb8;
-
- char *input_file = 0;
- int dbg_file_id = 0;
-
- std::string output_file(const char *name, unsigned file_id)
- {
- std::ostringstream os;
- os << "./"
- << file_id
- << "_"
- << input_file
- << "_"
- << name;
- return os.str();
- }
-
- void draw_component_boxes(image2d<rgb8>& output, const
util::array<box2d>& boxes)
- {
- for (unsigned i = 1; i < boxes.nelements(); ++i)
- if (boxes[i].is_valid())
- {
- output(boxes[i].center()) = literal::red;
- draw::box(output, boxes[i], literal::red);
- }
- }
-
- template <typename V>
- void save_lbl_image(const image2d<V>& lbl, unsigned nlabels,
- const char *filename, unsigned file_id)
- {
- image2d<rgb8> output = debug::colorize<image2d<rgb8>,
image2d<V> >(lbl, nlabels);
- io::ppm::save(output, output_file(filename, file_id));
- }
-
-
- /// Functions related to the matrix extraction
- /// \{
-
- void draw_hline(image2d<rgb8>& ima,
- const box2d& box,
- const rgb8& v)
- {
- unsigned ncols = box.pmax().col() - box.pmin().col();
- point2d p1 = box.center();
- p1.col() -= ncols / 2;
- point2d p2 = box.center();
- p2.col() += ncols / 2;
-
- draw::line(ima, p1, p2, v);
- }
-
- void draw_vline(image2d<rgb8>& ima,
- const box2d& box,
- const rgb8& v)
- {
- unsigned nrows = box.pmax().row() - box.pmin().row();
- point2d p1 = box.center();
- p1.row() -= nrows / 2;
- point2d p2 = box.center();
- p2.row() += nrows / 2;
-
- draw::line(ima, p1, p2, v);
- }
-
- void draw_row(image2d<rgb8>& ima,
- unsigned line,
- const rgb8& v)
- {
- draw::line(ima, point2d(line, 0), point2d(line, ima.ncols()), v);
- }
-
- void draw_col(image2d<rgb8>& ima,
- unsigned line,
- const rgb8& v)
- {
- draw::line(ima, point2d(0, line), point2d(ima.nrows(), line), v);
- }
-
- void
- extract_matrix(const image2d<bool>& in,
- std::pair<util::array<box2d>, util::array<box2d> > tboxes)
- {
- std::cout << "Extracting matrix..." << std::endl;
-
- image1d<unsigned> hend(in.ncols()),
- hrow(in.nrows()),
- vend(in.nrows()),
- vcol(in.ncols());
-
- level::fill(hend, 0);
- level::fill(hrow, 0);
- level::fill(vend, 0);
- level::fill(vcol, 0);
-
- for (unsigned i = 1; i < tboxes.first.nelements(); ++i)
- {
- ++vend.at(tboxes.first[i].pmin().row());
- ++vend.at(tboxes.first[i].pmax().row());
- ++vcol.at(tboxes.first[i].center().col());
- }
-
- for (unsigned i = 1; i < tboxes.second.nelements(); ++i)
- {
- ++hend.at(tboxes.second[i].pmin().col());
- ++hend.at(tboxes.second[i].pmax().col());
- ++hrow.at(tboxes.second[i].center().row());
- }
-
-#ifndef NOUT
- image2d<rgb8> tmp(in.domain());
- level::fill(tmp, literal::black);
-
- for (unsigned i = 1; i < in.ncols(); ++i)
- {
- if (hend.at(i) > 0)
- draw_col(tmp, i, literal::dark_orange);
- if (vcol.at(i) > 0)
- draw_col(tmp, i, literal::dark_orange);
- }
-
- for (unsigned i = 1; i < in.nrows(); ++i)
- {
- if (hrow.at(i) > 0)
- draw_row(tmp, i, literal::dark_red);
- if (vend.at(i) > 0)
- draw_row(tmp, i, literal::dark_red);
- }
-
- for (unsigned i = 1; i < tboxes.first.nelements(); ++i)
- draw_vline(tmp, tboxes.first[i], literal::green);
-
- for (unsigned i = 1; i < tboxes.second.nelements(); ++i)
- draw_hline(tmp, tboxes.second[i], literal::red);
-
- io::ppm::save(tmp, output_file("matrix.ppm", 4));
-#endif
-
- }
-
- /// \}
-
-
-
- /// Functions related to the table removal
- /// \{
-
-
- /// Extract the components bboxes.
- util::array<box2d>
- component_boxes(const image2d<bool>& filter)
- {
- std::cout << "component boxes" << std::endl;
- int_u16 nlabels = 0;
- image2d<int_u16> lbl = labeling::blobs(filter, c8(), nlabels);
-
- return labeling::compute(accu::meta::bbox(), lbl, nlabels);
- }
-
- /// Remove table bboxes from an image.
- void erase_table_boxes(image2d<bool>& output,
- util::array<box2d>& boxes,
- unsigned bbox_enlarge, unsigned dim)
- {
- for (unsigned i = 1; i < boxes.nelements(); ++i)
- {
- boxes[i].enlarge(dim, bbox_enlarge + 1);
- boxes[i].crop_wrt(output.domain());
- level::paste((pw::cst(false) | boxes[i] |
- (pw::value(output) == pw::cst(true))), output);
- }
- }
-
-
- /// Find table bboxes and remove them from the image.
- std::pair<util::array<box2d>,
- util::array<box2d> >
- extract_tables(image2d<bool>& in,
- image2d<rgb8>& output,
- unsigned l)
- {
- typedef image2d<int_u16> I;
- typedef accu::bbox<mln_psite_(I)> A;
- typedef util::array<mln_result_(A)> boxes_t;
-
-
- // Lignes verticales
- std::cout << "Removing vertical lines" << std::endl;
- win::vline2d vline(l);
- image2d<bool> vfilter = morpho::erosion(in, vline);
-
-#ifndef NOUT
- io::pbm::save(vfilter, output_file("table-vfilter.pbm", 1));
-#endif
-
- boxes_t vboxes = component_boxes(vfilter);
- erase_table_boxes(in, vboxes, (l / 2), 0);
-
-
- // Lignes horizontales
- std::cout << "Removing horizontal lines" << std::endl;
- win::hline2d hline(l);
- image2d<bool> hfilter = morpho::erosion(in, hline);
-
-#ifndef NOUT
- io::pbm::save(hfilter, output_file("table-hfilter.pbm", 2));
-#endif
-
- boxes_t hboxes = component_boxes(hfilter);
- erase_table_boxes(in, hboxes, (l / 2), 1);
-
-
-#ifndef NOUT
- image2d<rgb8> tmp = clone(output);
- draw_component_boxes(tmp, vboxes);
- draw_component_boxes(tmp, hboxes);
- io::ppm::save(tmp, output_file("table-filtered.ppm", 3));
-#endif
-
- return std::make_pair(vboxes, hboxes);
- }
-
- /// \}
- /// End of functions related to the table removal.
-
-
- /// Function related to text extraction
- /// \{
-
- inline
- int_u16
- most_left(const fun::i2v::array<int_u16>& left_link, unsigned i)
- {
- while (left_link(i) != i)
- i = left_link(i);
- return i;
- }
-
- inline
- int_u16
- uncurri_left_link(fun::i2v::array<int_u16>& left_link, unsigned i)
- {
- if (left_link(i) != i)
- left_link(i) = uncurri_left_link(left_link, left_link(i));
- return left_link(i);
- }
-
- template <typename V>
- void
- remove_small_comps_i2v(image2d<V>& lbl,
- V& nlabels,
- unsigned min_comp_size)
- {
- std::cout << "Removing small components smaller than "
- << min_comp_size << " sites among " << nlabels
- << " components" << std::endl;
-
- typedef accu::count<mln_psite(image2d<V>)> accu_count_t;
-
- util::array<mln_result(accu_count_t)> nsitecomp
- = labeling::compute(accu_count_t(), lbl, nlabels);
-
- V ncomp = 0;
-
- fun::i2v::array<V> f(nsitecomp.nelements());
- f(0) = 0;
-
- for (unsigned i = 1; i <= nlabels; ++i)
- {
- if (nsitecomp[i] < min_comp_size)
- f(i) = 0;
- else
- f(i) = ++ncomp;
- }
-
- lbl = level::transform(lbl, f);
- nlabels = ncomp;
-
-#ifndef NOUT
- save_lbl_image(lbl, nlabels, "lbl-small-comps-removed.pgm", 6);
-#endif
- }
-
-
- /// Merge bboxes according to their left box neighbor.
- util::array< box2d >
- group_bboxes(fun::i2v::array<int_u16>& left_link,
image2d<int_u16>& lbl,
- util::array<box2d>& cboxes, unsigned ncomp)
- {
- // Currify left_link lookup table and compute text area bboxes.
- util::array< accu::bbox<point2d> > tboxes;
- tboxes.resize(ncomp + 1);
- for (unsigned i = 1; i <= ncomp; ++i)
- tboxes[uncurri_left_link(left_link, i)].take(cboxes[i]);
-
- //Update labels
- lbl = level::transform(lbl, left_link);
-
-#ifndef NOUT
- save_lbl_image(lbl, ncomp, "lbl-grouped-boxes.pgm", 7);
-#endif
-
- util::array<box2d> result;
- for (unsigned i = 1; i <= ncomp; ++i)
- if (tboxes[i].is_valid())
- result.append(tboxes[i].to_result());
-
- return result;
- }
-
-
- /// Update the lookup table \p left if a neighbor is found on the right of
- /// the current bbox.
- void update_link(fun::i2v::array<int_u16>& left_link,
image2d<int_u16>& lbl,
- const point2d& p, const point2d& c,
- unsigned i, int dmax)
- {
- if (lbl.domain().has(p) && lbl(p) != 0u && lbl(p) != i
- && (math::abs(p.col() - c.col())) < dmax)
- {
- if (left_link(lbl(p)) == lbl(p) && most_left(left_link, i) != lbl(p))
- left_link(lbl(p)) = i;
-// else
-// left_link(lbl(p)) = 0;//FIXME: should be uncommented?
- }
- }
-
-
-
- /// Map each character bbox to its left bbox neighbor if possible.
- /// Iterate to the right but link boxes to the left.
- fun::i2v::array<int_u16>
- link_character_bboxes(image2d<int_u16>& lbl,
- const util::array<box2d>& cboxes,
- unsigned ncomp,
- unsigned bbox_distance)
- {
- fun::i2v::array<int_u16> left_link;
- left_link.resize(ncomp + 1);
-
- for (unsigned i = 0; i <= ncomp; ++i)
- left_link(i) = i;
-
- for (unsigned i = 1; i <= ncomp; ++i)
- {
- unsigned midcol = (cboxes[i].pmax().col() - cboxes[i].pmin().col()) / 2;
- int dmax = midcol + bbox_distance;
- point2d c = cboxes[i].center();
- /// First site on the right of the central site
- point2d p(c.row(), c.col() + 1);
-
- // FIXME: Lemmings with a condition on the distance => write a special version?
- while (lbl.domain().has(p) && (lbl(p) == 0u || lbl(p) == i)
- && math::abs(p.col() - c.col()) < dmax)
- ++p.col();
-
- update_link(left_link, lbl, p, c, i, dmax);
- }
-
- return left_link;
- }
-
- util::array<box2d>
- extract_text(image2d<bool>& in_,
- image2d<rgb8>& output,
- unsigned bbox_distance,
- unsigned min_comp_size)
- {
- std::cout << "extracting text..." << std::endl;
-
- typedef int_u16 V;
- typedef image2d<V> I;
- typedef util::array<box2d> boxes_t;
-
- // Extract edges.
- win::rectangle2d l(3, 3);
- image2d<bool> in = morpho::top_hat_white(in_, l);
- io::pbm::save(in, output_file("top_hat.ppm", 9));
-
- // Find character bboxes.
- V nlabels;
- image2d<V> lbl = labeling::blobs(in, c8(), nlabels);
-
- //Remove small components.
- remove_small_comps_i2v(lbl, nlabels, min_comp_size);
-
- boxes_t cboxes = labeling::compute(accu::meta::bbox(), lbl, nlabels);
-
-#ifndef NOUT
- image2d<rgb8> tmp = clone(output);
- draw_component_boxes(tmp, cboxes);
- io::ppm::save(tmp, output_file("character-bboxes.ppm", 5));
-#endif
-
- //merge_bboxes(cboxes, lbl, nlabels);
-
- //Link character bboxes to their left neighboor if possible.
- fun::i2v::array<int_u16> left =
- link_character_bboxes(lbl, cboxes, nlabels, bbox_distance);
-
- //Merge character bboxes according to their left neighbor.
- util::array<box2d> tboxes = group_bboxes(left, lbl, cboxes, nlabels);
-
- return tboxes;
- }
-
- /// \}
- /// End of functions related to text extraction
-
- } // end of namespace scribo::internal
-
-
- // Facade
- void demat(char *argv[], bool treat_tables)
- {
- using namespace mln;
- using value::rgb8;
-
- border::thickness = 3;
- trace::quiet = true;
-
- //Useful debug variables
- internal::input_file = basename(argv[1]);
- unsigned l = 101;
- unsigned bbox_distance = 25;
- unsigned min_comp_size = 5;
-
- //Load image
- image2d<bool> in;
- io::pbm::load(in, argv[1]);
- in = logical::not_(in);
-
- image2d<rgb8> output = level::convert(rgb8(), in);
-
- std::pair<util::array<box2d>,
- util::array<box2d> > tblboxes;
- if (treat_tables)
- {
- tblboxes = internal::extract_tables(in, output, l);
- internal::extract_matrix(in, tblboxes);
- }
-
- util::array<box2d> tboxes =
- internal::extract_text(in, output, bbox_distance, min_comp_size);
-
- internal::draw_component_boxes(output, tboxes);
- io::ppm::save(output, internal::output_file("out.ppm", 8));
-
- /// Use txt bboxes here with Tesseract
- /// for (i = 1; i < tboxes.nelements(); ++i)
- /// tesseract(in | tboxes[i])
- }
-
-} // end of namespace scribo
-
-# endif // ! DEMAT_HH
diff --git a/milena/sandbox/scribo/demat_v2.hh b/milena/sandbox/scribo/demat_v2.hh
deleted file mode 100644
index 2295c2f..0000000
--- a/milena/sandbox/scribo/demat_v2.hh
+++ /dev/null
@@ -1,134 +0,0 @@
-// Copyright (C) 2008 EPITA Research and Development Laboratory
-//
-// This file is part of the Olena Library. This library is free
-// software; you can redistribute it and/or modify it under the terms
-// of the GNU General Public License version 2 as published by the
-// Free Software Foundation.
-//
-// This library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-// General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with this library; see the file COPYING. If not, write to
-// the Free Software Foundation, 51 Franklin Street, Fifth Floor,
-// Boston, MA 02111-1307, USA.
-//
-// As a special exception, you may use this file as part of a free
-// software library without restriction. Specifically, if other files
-// instantiate templates or use macros or inline functions from this
-// file, or you compile this file and link it with other files to
-// produce an executable, this file does not by itself cause the
-// resulting executable to be covered by the GNU General Public
-// License. This exception does not however invalidate any other
-// reasons why the executable file might be covered by the GNU General
-// Public License.
-
-#ifndef DEMAT_HH_
-# define DEMAT_HH_
-
-# include <mln/core/image/image2d.hh>
-
-# include <mln/core/image/image_if.hh>
-# include <mln/core/image/sub_image.hh>
-# include <mln/core/alias/neighb2d.hh>
-# include <mln/core/var.hh>
-# include <mln/core/site_set/p_vaccess.hh>
-
-# include <mln/binarization/threshold.hh>
-# include <mln/morpho/hit_or_miss.hh>
-# include <mln/level/fill.hh>
-# include <mln/border/fill.hh>
-# include <mln/io/pbm/load.hh>
-# include <mln/io/pgm/load.hh>
-# include <mln/io/pbm/save.hh>
-# include <mln/io/pgm/save.hh>
-# include <mln/debug/println.hh>
-# include <mln/morpho/opening.hh>
-# include <mln/trait/value_.hh>
-# include <mln/value/int_u8.hh>
-# include <mln/value/int_u16.hh>
-# include <mln/level/paste.hh>
-# include <mln/labeling/blobs.hh>
-# include <mln/level/fill.hh>
-# include <mln/pw/all.hh>
-# include <mln/convert/to_fun.hh>
-# include <mln/geom/bbox.hh>
-
-# include <mln/labeling/compute.hh>
-# include <mln/accu/bbox.hh>
-
-namespace scribo
-{
-
- namespace internal
- {
-
- void filter_image(mln::image2d<bool>& ima,
- const mln::image2d<bool>& filter,
- unsigned bbox_larger)
- {
- using namespace mln;
- using value::int_u16;
-
- typedef image2d<int_u16> I;
- typedef mln_accu_with_(accu::meta::bbox, mln_psite_(I)) A;
- typedef util::array<A::result> boxes_t;
-
- int_u16 nlabels;
- I lbl = labeling::blobs(filter, c4(), nlabels);
-
- boxes_t boxes = labeling::compute(accu::meta::bbox(), lbl, nlabels);
-
- for (unsigned i = 1; i <= nlabels; ++i)
- level::paste(pw::cst(false)
- | boxes[i].to_larger(bbox_larger),
- ima);
- }
-
- void remove_tables(mln::image2d<bool>& in, unsigned h, unsigned w, unsigned
n)
- {
- using namespace mln;
-
- // Lignes verticales
- win::rectangle2d vwin(h, w);
- image2d<bool> vfilter = morpho::opening(in, vwin);
- io::pbm::save(vfilter, "./table-vfilter.pbm");
- filter_image(in, vfilter, n);
-
-
- // Lignes horizontales
- win::rectangle2d hwin(w, h);
- image2d<bool> hfilter = morpho::opening(in, hwin);
- io::pbm::save(hfilter, "./table-hfilter.pbm");
- filter_image(in, hfilter, n);
- }
-
- } // end of namespace scribo::internal
-
-
-
- // Facade
- void demat(char *argv[])
- {
- using namespace mln;
- using value::int_u8;
-
- //Useful debug variables
- unsigned h = atoi(argv[2]);
- unsigned w = atoi(argv[3]);
- unsigned n = atoi(argv[4]);
-
- //Load image
- image2d<bool> in;
- io::pbm::load(in, argv[1]);
-
- internal::remove_tables(in, h, w, n);
-
- io::pbm::save(in, "./table-filtered.pbm");
- }
-
-} // end of namespace scribo
-
-# endif // ! DEMAT_HH
--
1.5.6.5