https://svn.lrde.epita.fr/svn/oln/trunk/milena/sandbox
Index: ChangeLog
from Thierry Geraud <thierry.geraud(a)lrde.epita.fr>
Add 2010 code for page segmentation.
* inim/2010/page: New directory.
* inim/2010/page/AUTHORS: New.
* inim/2010/page/check_size_functor.hh: New.
* inim/2010/page/white.hxx: New.
* inim/2010/page/main.cc: New.
* inim/2010/page/binarization: New.
* inim/2010/page/binarization/binarization.hxx: New.
* inim/2010/page/binarization/grayscale.hxx: New.
* inim/2010/page/binarization/functors.hh: New.
* inim/2010/page/binarization/integral-image.hh: New.
* inim/2010/page/binarization/binarization.hh: New.
* inim/2010/page/binarization/grayscale.hh: New.
* inim/2010/page/binarization/functors.hxx: New.
* inim/2010/page/binarization/integral-image.hxx: New.
* inim/2010/page/README: New.
* inim/2010/page/Makefile: New.
* inim/2010/page/white.hh: New.
AUTHORS | 3
Makefile | 43 +++++++
README | 51 ++++++++
binarization/binarization.hh | 36 ++++++
binarization/binarization.hxx | 72 ++++++++++++
binarization/functors.hh | 23 +++
binarization/functors.hxx | 33 +++++
binarization/grayscale.hh | 32 +++++
binarization/grayscale.hxx | 15 ++
binarization/integral-image.hh | 34 +++++
binarization/integral-image.hxx | 57 +++++++++
check_size_functor.hh | 30 +++++
main.cc | 89 +++++++++++++++
white.hh | 36 ++++++
white.hxx | 233 ++++++++++++++++++++++++++++++++++++++++
15 files changed, 787 insertions(+)
Index: inim/2010/page/AUTHORS
--- inim/2010/page/AUTHORS (revision 0)
+++ inim/2010/page/AUTHORS (revision 0)
@@ -0,0 +1,3 @@
+hameli_a
+denuzi_l
+da-mot_s
Index: inim/2010/page/check_size_functor.hh
--- inim/2010/page/check_size_functor.hh (revision 0)
+++ inim/2010/page/check_size_functor.hh (revision 0)
@@ -0,0 +1,30 @@
+#ifndef CHECK_SIZE_FUNCTOR_HH_
+# define CHECK_SIZE_FUNCTOR_HH_
+
+# include <mln/core/concept/function.hh>
+
+template <typename P, typename L>
+class CheckBoxSize : public mln::Function_v2b< CheckBoxSize<P, L> >
+{
+public:
+
+ CheckBoxSize(mln::util::array< mln::box<P> > boxes)
+ : boxes_ (boxes)
+ {
+ }
+
+ bool operator() (L label) const
+ {
+ bool condition =
+ boxes_[label].len(1) >= 200 ||
+ boxes_[label].len(0) >= 200;
+ return condition;
+ }
+
+private:
+
+ mln::util::array< mln::box<P> > boxes_;
+
+};
+
+#endif /* !CHECK_SIZE_FUNCTOR_HH_ */
Index: inim/2010/page/white.hxx
--- inim/2010/page/white.hxx (revision 0)
+++ inim/2010/page/white.hxx (revision 0)
@@ -0,0 +1,233 @@
+#ifndef WHITE_HXX
+# define WHITE_HXX
+
+# include <white.hh>
+
+namespace whitespace
+{
+
+ template <typename P>
+ bool is_box_ok (const box<P>& box)
+ {
+ return (box.len(0) < 50 && box.len(1) < 30)
+ && (box.len(0) >= 5 || box.len(1) >= 5);
+ }
+
+ template <typename I>
+ util::array< box<mln_psite(I)> > find_connected_components (I& ima)
+ {
+// std::cout << "Computing connected components..." <<
std::endl;
+ typedef value::label_16 V;
+ V num_labels;
+ mln_ch_value(I, V) labels = labeling::blobs (ima, c8(), num_labels);
+
+// std::cout << "Computing connected components bounding boxes..."
<< std::endl;
+ typedef util::array< box<mln_psite(I)> > A;
+ A components =
+ labeling::compute (accu::shape::bbox<mln_psite(I)>(),
+ labels, num_labels);
+
+// std::cout << "Writing components image..." << std::endl;
+ I out (ima.domain());
+ mln_fwd_eiter(A) it (components);
+ A final_components;
+ for_all (it)
+ if (is_box_ok (it.element()))
+ {
+ final_components.append (it.element());
+ data::fill((out | it.element()).rw(), literal::one);
+ }
+// io::pbm::save(out, "out.pbm");
+ return final_components;
+ }
+
+ template <typename P>
+ bool find_pivot (util::array< box<P> > components,
+ box<P> bound,
+ box<P>& pivot)
+ {
+ bool found = false;
+ P bound_min = bound.pmin();
+ P bound_max = bound.pmax();
+ unsigned int best_dist = 0xffffffff; //bound_max[0] + bound_max[1];
+ unsigned int bound_middle_x = (bound_max[0] + bound_min[0]) / 2;
+ unsigned int bound_middle_y = (bound_max[1] + bound_min[1]) / 2;
+
+ mln_eiter(util::array< box<P> >) it (components);
+ for_all (it)
+ {
+ P it_min = it.element().pmin();
+ P it_max = it.element().pmax();
+
+ if (((bound.pmin()[0] <= it_max[0] &&
+ it_max[0] <= bound.pmax()[0]) ||
+ (bound.pmin()[0] <= it_min[0] &&
+ it_min[0] <= bound.pmax()[0])) &&
+ ((bound.pmin()[1] <= it_max[1] &&
+ it_max[1] <= bound.pmax()[1]) ||
+ (bound.pmin()[1] <= it_min[1] &&
+ it_min[1] <= bound.pmax()[1])))
+// if (inter (it.element(), bound).nsites() != 0)
+ {
+ found = true;
+ unsigned int dist = abs(bound_middle_x - it_min[0]) +
+ abs(it_max[0] - bound_middle_x) +
+ abs(bound_middle_y - it_min[1]) +
+ abs(it_max[1] - bound_middle_y);
+ if (dist < best_dist)
+ {
+ best_dist = dist;
+ pivot = it.element();
+ }
+ }
+ }
+ return found;
+ }
+
+// Determinate whether the given box must be considered to search for
+// whitespace
+ template <typename P>
+ bool searchable_for_whitespace (box<P> pbox, util::array< box<P> >
others)
+ {
+ if (pbox.nsites() < 4000)
+ return false;
+
+ if (pbox.len (1) < 2 || pbox.len (0) < 10)
+ return false;
+
+ typedef util::array< box<P> > A;
+ mln_fwd_eiter(A) it (others);
+
+ for_all(it)
+ {
+ unsigned int box_area = (pbox.pmax()[0] - pbox.pmin()[0]) *
+ (pbox.pmax()[1] - pbox.pmin()[1]);
+
+ unsigned int left_inter_x = std::max(pbox.pmin()[0], it.element().pmin()[0]);
+ unsigned int right_inter_x = std::min(pbox.pmax()[0], it.element().pmax()[0]);
+ unsigned int top_inter_x = std::min(pbox.pmax()[1], it.element().pmax()[1]);
+ unsigned int bottom_inter_x = std::max(pbox.pmin()[1], it.element().pmin()[1]);
+
+ if ((right_inter_x >= left_inter_x) && (top_inter_x >=
bottom_inter_x))
+ {
+ unsigned int inter_area = (right_inter_x - left_inter_x) *
+ (top_inter_x - bottom_inter_x);
+
+ if (inter_area >= 0.8 * box_area)
+ return false;
+ }
+ }
+ return true;
+ }
+
+// Returns a lower value for high quality rectangles
+// For use with a minimal heap
+ template <typename P>
+ value::int_u32 quality (box<P> pbox)
+ {
+ // Not 0xffffffff because int_u32 constructor
+ // takes int instead of uint
+ return (0x7fffffff - pbox.nsites());
+ }
+
+ template <typename P>
+ util::array< box<P> >
+ find_whitespaces (util::array< box<P> > components,
+ box<P> bound)
+ {
+ typedef util::fibonacci_heap<value::int_u32, box<P> > H;
+ H h;
+ util::array< box<P> > res;
+ h.push (quality(bound), bound);
+ box<P> b;
+ box<P> pivot;
+ while (h.is_valid())
+ {
+ b = h.pop_front();
+ if (find_pivot (components, b, pivot))
+ {
+ P pmin = b.pmin();
+ P pmax = b.pmax();
+ box<P> tmp_box;
+ for (int i = 0; i < P::dim; ++i)
+ {
+ if (pivot.pmax()[i] < b.pmax()[i])
+ {
+ pmin[i] = pivot.pmax()[i] + 1;
+ tmp_box = box<P>(pmin, pmax);
+ if (searchable_for_whitespace (tmp_box, res))
+ h.push (quality(tmp_box), tmp_box);
+ pmin[i] = b.pmin()[i];
+ }
+ if (pivot.pmin()[i] > b.pmin()[i])
+ {
+ pmax[i] = pivot.pmin()[i] - 1;
+ tmp_box = box<P>(pmin, pmax);
+ if (searchable_for_whitespace (tmp_box, res))
+ h.push (quality(tmp_box), tmp_box);
+ pmax[i] = b.pmax()[i];
+ }
+ }
+ }
+ else
+ // Found a white rectangle \o/
+ {
+ res.append (b);
+ }
+ }
+ return res;
+ }
+
+ void whitespace_compute (image2d<bool>& ima,
+ image2d<bool>& cleaned)
+ {
+ typedef mln_psite_(image2d<bool>) P;
+
+// image2d<bool> ima = io::pbm::load (argv[1]);
+ border::fill (ima, true);
+
+ util::array< box<P> > components =
+ whitespace::find_connected_components(ima);
+// std::cout << "Computing whitespace zones..." << std::endl;
+ util::array< box<P> > final_components =
+ whitespace::find_whitespaces (components,ima.domain());
+
+
+// std::cout << "Merging whitespace zones..." << std::endl;
+ image2d<bool> zones (ima.domain());
+ data::fill (zones, literal::one);
+ mln_fwd_eiter_(util::array< box<P> >) it (final_components);
+ for_all (it)
+ data::fill ((zones | it.element()).rw(), literal::zero);
+ border::fill (zones, false);
+
+// std::cout << "Writing whitespace image..." << std::endl;
+// io::pbm::save(zones, "zones.pbm");
+
+// std::cout << "Cleaning object zones..." << std::endl;
+ typedef value::label_16 V;
+ V num_labels;
+ image2d<V> labels =
+ labeling::blobs (zones, c8(), num_labels);
+// io::pgm::save (labeling::wrap(labels), "labels.pgm");
+
+ util::array< box<P> > boxes =
+ labeling::compute (accu::shape::bbox<P>(), labels, num_labels);
+
+ labeling::relabel_inplace (labels, num_labels,
+ CheckBoxSize<P, V>(boxes));
+ labeling::pack_inplace (labels, num_labels);
+// io::pgm::save (labeling::wrap(labels), "labels2.pgm");
+
+ image2d<value::int_u32> size_label =
+ labeling::compute_image (accu::math::sum<V, value::int_u32>(),
+ labels, labels, num_labels);
+
+ cleaned = mln::binarization::threshold (size_label, 1);
+
+// std::cout << "Writing final image..." << std::endl;
+// io::pbm::save (cleaned, "white.pbm");
+ }
+} // End of namespace whitespace
+
+#endif /* !WHITE_HXX */
Index: inim/2010/page/main.cc
--- inim/2010/page/main.cc (revision 0)
+++ inim/2010/page/main.cc (revision 0)
@@ -0,0 +1,89 @@
+#include <string>
+#include <iostream>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <mln/io/ppm/load.hh>
+#include <mln/io/pbm/save.hh>
+#include <mln/io/pgm/save.hh>
+#include <mln/value/int_u8.hh>
+#include <mln/core/image/image2d.hh>
+
+#include <binarization.hh>
+#include <grayscale.hh>
+#include <white.hh>
+
+void usage (const std::string& name)
+{
+ std::cout << "Usage: " << name
+ << " [-w window_size] [-b] IN.ppm OUT.pbm" <<
std::endl;
+ exit (1);
+}
+
+int
+main (int argc,
+ char** argv)
+{
+ if (argc < 2)
+ usage(std::string(argv[0]));
+
+ int i = 1;
+ unsigned int w_val = 51;
+ bool do_white = true;
+
+ if (std::string(argv[i]) == "-w")
+ {
+ if (argc < i + 2)
+ usage(std::string(argv[0]));
+
+ std::string str_w_val(argv[++i]);
+ std::istringstream ist(str_w_val);
+
+ ist >> w_val;
+ ++i;
+ }
+
+ if (i >= argc)
+ usage(std::string(argv[0]));
+
+ if (std::string(argv[i]) == "-b")
+ {
+ do_white = false;
+ ++i;
+ }
+
+ if (i >= argc)
+ usage(std::string(argv[0]));
+
+ std::string infile(argv[i++]);
+
+ if (i >= argc)
+ usage(std::string(argv[0]));
+
+ std::string outfile(argv[i]);
+
+ // The original image
+ mln::image2d<mln::value::rgb8> initial;
+ mln::io::ppm::load(initial, infile);
+ const mln::image2d<mln::value::rgb8>& loaded = initial;
+
+ // Grayscale
+ mln::image2d<mln::value::int_u8> grays(loaded.domain());
+ binarization::grayscale(loaded, grays);
+
+ // Binarization
+ mln::image2d<bool> binarized(loaded.domain());
+ binarization::sauvola_threshold(grays, binarized, w_val);
+
+
+ if (do_white)
+ {
+ // Whitespace
+ mln::image2d<bool> final(loaded.domain());
+ whitespace::whitespace_compute(binarized, final);
+ mln::io::pbm::save (final, outfile);
+ }
+ else
+ mln::io::pbm::save (binarized, outfile);
+}
Index: inim/2010/page/binarization/binarization.hxx
--- inim/2010/page/binarization/binarization.hxx (revision 0)
+++ inim/2010/page/binarization/binarization.hxx (revision 0)
@@ -0,0 +1,72 @@
+#ifndef BINARIZATION_HXX
+# define BINARIZATION_HXX
+
+# include <binarization.hh>
+
+namespace binarization
+{
+ template<typename T1, typename T2>
+ void
+ sauvola_threshold(const image2d<T1>& src,
+ image2d<T2>& dst,
+ unsigned int w)
+ {
+ // Control the threshold value in the local window
+ // The higher, the lower the threshold form the local
+ // mean m(x, y). Badekas et al. said 0.34 was best.
+ const double k = 0.34;
+ // Maximum value of the standard deviation (128 for
+ // grayscale documents).
+ const double R = 128;
+
+
+ // TODO: Merge both calculus into one to improve speed
+ // Compute the sum of all intensities of src
+ IntegralImage<T1> simple(src, Identity<T1>());
+ // Compute the sum of all squared intensities of src
+ IntegralImage<T1> squared(src, Square<T1>());
+
+ int w_2 = w >> 1;
+
+ // Savaula Algorithm with I.I.
+ for(def::coord row = 0; row < static_cast<def::coord>(dst.nrows()); ++row)
+ for(def::coord col = 0; col < static_cast<def::coord>(dst.ncols());
++col)
+ {
+ int row_min = std::max(0, row - w_2);
+ int col_min = std::max(0, col - w_2);
+ int row_max = std::min(static_cast<int>(dst.nrows() - 1),
+ row + w_2);
+ int col_max = std::min(static_cast<int>(dst.ncols() - 1),
+ col + w_2);
+
+ double wh = (row_max - row_min + 1) * (col_max - col_min + 1);
+
+ // Mean
+ double m_x_y_tmp = (simple(row_max, col_max)
+ + simple(row_min, col_min)
+ - simple(row_max, col_min)
+ - simple(row_min, col_max));
+
+ double m_x_y = m_x_y_tmp / wh;
+
+ // Standard deviation
+ double s_x_y_tmp = (squared(row_max, col_max)
+ + squared(row_min, col_min)
+ - squared(row_max, col_min)
+ - squared(row_min, col_max));
+
+ double s_x_y = sqrt((s_x_y_tmp - (m_x_y_tmp * m_x_y_tmp) / wh)/(wh-1.0f));
+
+ // Thresholding
+ double t_x_y = m_x_y * (1.0 + k * ((s_x_y / R) - 1.0));
+
+ if (t_x_y > src(point2d(row, col)))
+ dst(point2d(row, col)) = true;
+ else
+ dst(point2d(row, col)) = false;
+ }
+
+ }
+}
+
+#endif /* !BINARIZATION_HXX */
Index: inim/2010/page/binarization/grayscale.hxx
--- inim/2010/page/binarization/grayscale.hxx (revision 0)
+++ inim/2010/page/binarization/grayscale.hxx (revision 0)
@@ -0,0 +1,15 @@
+#ifndef GRAYSCALE_HXX
+# define GRAYSCALE_HXX
+
+# include <grayscale.hh>
+
+namespace binarization
+{
+ void grayscale(const image2d<value::rgb8>& src,
+ image2d<value::int_u8>& dst)
+ {
+ dst = data::transform(src, lum());
+ }
+}
+
+#endif /* !GRAYSCALE_HXX */
Index: inim/2010/page/binarization/functors.hh
--- inim/2010/page/binarization/functors.hh (revision 0)
+++ inim/2010/page/binarization/functors.hh (revision 0)
@@ -0,0 +1,23 @@
+#ifndef FUNCTORS_HH
+# define FUNCTORS_HH
+
+namespace binarization
+{
+ template<typename T>
+ struct Square
+ {
+ Square();
+ unsigned long long operator()(T val) const;
+ };
+
+ template<typename T>
+ struct Identity
+ {
+ Identity();
+ unsigned long long operator()(T val) const;
+ };
+}
+
+# include "functors.hxx"
+
+#endif /* !FUNCTORS_HH */
Index: inim/2010/page/binarization/integral-image.hh
--- inim/2010/page/binarization/integral-image.hh (revision 0)
+++ inim/2010/page/binarization/integral-image.hh (revision 0)
@@ -0,0 +1,34 @@
+#ifndef INTEGRAL_IMAGE_HH
+# define INTEGRAL_IMAGE_HH
+
+# include <mln/core/image/image2d.hh>
+# include <mln/value/int_u8.hh>
+# include <mln/value/int_u32.hh>
+# include <functors.hh>
+
+namespace binarization
+{
+ using namespace mln;
+
+ template< typename T >
+ class IntegralImage
+ {
+ public:
+ template<class F>
+ IntegralImage(const image2d<T>& i, F func);
+ ~IntegralImage();
+
+// const unsigned long long& operator()(int row, int col) const;
+
+ unsigned long long operator()(int row, int col) const;
+
+ private:
+ unsigned long long **img_;
+ int nrows_;
+ int ncols_;
+ };
+}
+
+# include "integral-image.hxx"
+
+#endif /* !INTEGRAL_IMAGE_HH */
Index: inim/2010/page/binarization/binarization.hh
--- inim/2010/page/binarization/binarization.hh (revision 0)
+++ inim/2010/page/binarization/binarization.hh (revision 0)
@@ -0,0 +1,36 @@
+#ifndef BINARIZATION_HH
+# define BINARIZATION_HH
+
+# include <algorithm>
+# include <cmath>
+
+# include <mln/core/image/image2d.hh>
+# include <mln/value/int_u8.hh>
+# include <mln/value/int_u32.hh>
+
+# include <integral-image.hh>
+# include <functors.hh>
+
+namespace binarization
+{
+ using namespace mln;
+
+ /**
+ * Sauvola thresholding binarization
+ * using Integral Images.
+ *
+ * \param[in] src The source image
+ * \param[out] dst The destination image
+ * \param[in] w Value of the window size
+ */
+
+ template<typename T1, typename T2>
+ void
+ sauvola_threshold(const image2d<T1>& src,
+ image2d<T2>& dst,
+ unsigned int w);
+}
+
+# include <binarization.hxx>
+
+#endif /* !BINARIZATION_HH */
Index: inim/2010/page/binarization/grayscale.hh
--- inim/2010/page/binarization/grayscale.hh (revision 0)
+++ inim/2010/page/binarization/grayscale.hh (revision 0)
@@ -0,0 +1,32 @@
+#ifndef GRAYSCALE_HH
+# define GRAYSCALE_HH
+
+# include <mln/core/image/image2d.hh>
+# include <mln/value/rgb8.hh>
+# include <mln/value/int_u8.hh>
+# include <mln/data/transform.hh>
+# include <mln/literal/colors.hh>
+
+namespace mln
+{
+ struct lum : Function_v2v<lum>
+ {
+ typedef value::int_u8 result;
+ result operator()(const value::rgb8& c) const
+ {
+ return (c.red() + c.green() + c.blue()) / 3;
+ }
+ };
+}
+
+namespace binarization
+{
+ using namespace mln;
+
+ void grayscale(const image2d<value::rgb8>& src,
+ image2d<value::int_u8>& dst);
+}
+
+# include <grayscale.hxx>
+
+#endif /* !GRAYSCALE_HH */
Index: inim/2010/page/binarization/functors.hxx
--- inim/2010/page/binarization/functors.hxx (revision 0)
+++ inim/2010/page/binarization/functors.hxx (revision 0)
@@ -0,0 +1,33 @@
+#ifndef FUNCTORS_HXX
+# define FUNCTORS_HXX
+
+# include <functors.hxx>
+
+namespace binarization
+{
+ template<typename T>
+ Square<T>::Square()
+ { }
+
+ template<typename T>
+ unsigned long long
+ Square<T>::operator()(T val) const
+ {
+ return static_cast<unsigned long long>(val) * static_cast<unsigned long
long>(val);
+ }
+
+ template<typename T>
+ Identity<T>::Identity()
+ { }
+
+ template<typename T>
+ unsigned long long
+ Identity<T>::operator()(T val) const
+ {
+ return static_cast<unsigned long long>(val);
+ }
+
+
+}
+
+#endif /* !FUNCTORS_HXX */
Index: inim/2010/page/binarization/integral-image.hxx
--- inim/2010/page/binarization/integral-image.hxx (revision 0)
+++ inim/2010/page/binarization/integral-image.hxx (revision 0)
@@ -0,0 +1,57 @@
+#ifndef INTEGRAL_IMAGE_HXX
+# define INTEGRAL_IMAGE_HXX
+
+# include "integral-image.hh"
+
+namespace binarization
+{
+ template<typename T>
+ template<class F>
+ IntegralImage<T>::IntegralImage(const image2d<T>& i, F func)
+ : img_ (NULL),
+ nrows_ (i.nrows()),
+ ncols_ (i.ncols())
+ {
+ img_ = static_cast<unsigned long long**>(malloc(sizeof (unsigned long long*) *
nrows_));
+ for (int n = 0; n < nrows_; ++n)
+ img_[n] = static_cast<unsigned long long*>(malloc(sizeof (unsigned long long)
* ncols_));
+
+ // FIXME Overflow when super big image?
+ // unsigned long long not cool for portability
+ // Look for I.I. in the litterature
+ img_[0][0] = func(i(point2d(0, 0)));
+
+ for (def::coord row = 1; row < static_cast<def::coord>(i.nrows()); ++row)
+ img_[row][0] = (*this)(row - 1, 0)
+ + func(i(point2d(row, 0)));
+
+ for (def::coord col = 1; col < static_cast<def::coord>(i.ncols()); ++col)
+ img_[0][col] = (*this)(0, col - 1)
+ + func(i(point2d(0, col)));
+
+ for (def::coord row = 1; row < static_cast<def::coord>(i.nrows()); ++row)
+ for (def::coord col = 1; col < static_cast<def::coord>(i.ncols()); ++col)
+ img_[row][col] = (*this)(row - 1, col)
+ + (*this)(row, col - 1)
+ - (*this)(row - 1, col - 1)
+ + func(i(point2d(row, col)));
+ }
+
+ template< typename T >
+ IntegralImage<T>::~IntegralImage()
+ {
+ for (int n = 0; n < nrows_; ++n)
+ free(img_[n]);
+ free(img_);
+ }
+
+ template< typename T >
+ unsigned long long
+ IntegralImage<T>::operator()(int row,
+ int col) const
+ {
+ return img_[row][col];
+ }
+
+}
+#endif /* !INTEGRAL_IMAGE_HXX */
Index: inim/2010/page/README
--- inim/2010/page/README (revision 0)
+++ inim/2010/page/README (revision 0)
@@ -0,0 +1,51 @@
+Donc ce que l'on a fait consiste en troix parties:
+ - Niveaux de gris.
+ - Binarisation.
+ - Detection des colonnes de texte.
+
+- Niveaux de gris:
+ Pas besoin de trop d'explications ici je pense.
+
+ void binarization::grayscale(const image2d<rgb8>& src,
+ image2d<int_u8>& dst)
+
+- Binarisation:
+ On a effectue une binarisation de Sauvola qui est un algo de seuillage
+ local (vs global comme Otsu) en se servant d'"Integral Images" pour que
+ ca trace un peu plus que l'algo de base. Cet Algo retourne une image
+ de booleens avec en blanc les elements de l'image et en noir le reste.
+ Il n'a pas de problèmes particulier si ce n'est que si l'image est trop
+ grosse (vraiment très grosse) avec pleins d'aplat de blanc, on
+ va dépasser la capacité maximale des unsigned long long utilisés pour les
+ Integral Images. Un lien vers le papier de référence sur Sauvola:
+
http://pubs.iupr.org/DATA/2007-IUPR-11Sep_1129.pdf
+
+ La fonction maintenant, avec `w` étant le parametre de taille
+ de la fenetre utilisée pour les calculs de seuillages locaux:
+
+ void binarization::sauvola_threshold(const image2d<rgb8>& src,
+ image2d<bool>& dst,
+ unsigned int w);
+
+ Pour info, Ocropus utilise le même algo sauf que pour déterminer les
+ parametres optimaux pour une image, ils font plusieurs binarisations
+ et segmentation jusqu'à ce qu'ils considérent que le résultat soit
+ bon.
+
+
+- Whitespace:
+
+ On procéde par la recherche des plus grandes zones blanches dans
+ l'image binarisées. C'est un algo itératif qui cherche à merger
+ des composantes connexes pour avoir les plus grands aplats de
+ couleur. Le résultat, c'est que à partir d'une image binarisée,
+ on va donner une autre image binarisée avec en blanc les colones
+ et paragraphe de texte et en noir le fond.
+
+ void whitespace::whitespace_compute (image2d<bool>& src,
+ image2d<bool>& dst);
+
+
+Hésitez pas à nous spammer pour toute forme de questions
+la dessus!
+
Index: inim/2010/page/Makefile
--- inim/2010/page/Makefile (revision 0)
+++ inim/2010/page/Makefile (revision 0)
@@ -0,0 +1,43 @@
+CXX=g++
+INCLUDES=-I. -I./binarization
+CXX_FLAGS=-W -Wall -DNDEBUG -O3 -ffast-math
+CXXFLAGS = -I.
+OBJS= main.o
+TARGET=hameli_a
+DISTDIR=hameli_a
+DISTFILE= Makefile README AUTHORS binarization \
+ check_size_functor.hh main.cc \
+ white.hh white.hxx
+
+all: $(TARGET)
+
+$(TARGET): $(OBJS)
+ $(CXX) $(DEBUG_FLAGS) $^ -o $@
+
+clean:
+ rm -f *~ *.o
+
+distclean: clean
+ rm -rf $(TARGET)
+ rm -rf *_result
+
+dist: distclean
+ rm -rf $(DISTDIR)
+ mkdir $(DISTDIR)
+ chmod 755 $(DISTDIR)
+ cp -r -t $(DISTDIR) $(DISTFILE)
+ tar czf $(DISTDIR).tar.gz $(DISTDIR)
+ rm -rf $(DISTDIR)
+
+distcheck: dist
+ rm -rf _build
+ mkdir _build
+ cp $(DISTDIR).tar.gz _build
+ cd _build; tar xf $(DISTDIR).tar.gz
+ cd _build/$(DISTDIR) ; make
+
+%.o: %.cc
+ $(CXX) $(CXX_FLAGS) $(CXXFLAGS) $(INCLUDES) $(DEBUG_FLAGS) -c $<
+
+.PHONY: main.cc
+#
\ No newline at end of file
Index: inim/2010/page/white.hh
--- inim/2010/page/white.hh (revision 0)
+++ inim/2010/page/white.hh (revision 0)
@@ -0,0 +1,36 @@
+#ifndef WHITE_HH
+# define WHITE_HH
+
+#include <cmath>
+#include <iostream>
+#include <mln/accu/math/sum.hh>
+#include <mln/accu/shape/bbox.hh>
+#include <mln/binarization/threshold.hh>
+#include <mln/core/alias/neighb2d.hh>
+#include <mln/core/image/image2d.hh>
+#include <mln/data/fill.hh>
+#include <mln/labeling/blobs.hh>
+#include <mln/labeling/compute.hh>
+#include <mln/labeling/compute_image.hh>
+#include <mln/labeling/wrap.hh>
+#include <mln/util/array.hh>
+#include <mln/util/fibonacci_heap.hh>
+#include <mln/value/int_u32.hh>
+#include <mln/value/label_16.hh>
+#include <mln/labeling/relabel.hh>
+#include <mln/labeling/pack.hh>
+#include <mln/border/fill.hh>
+#include <mln/labeling/colorize.hh>
+#include <check_size_functor.hh>
+
+namespace whitespace
+{
+ using namespace mln;
+
+ void whitespace_compute (image2d<bool>& ima,
+ image2d<bool>& cleaned);
+}
+
+# include <white.hxx>
+
+#endif /* !WHITE_HH */