
* scribo/primitive/extract/elements.hh, * src/primitive/extract/extract_discontinued_hlines.cc, * src/primitive/extract/extract_discontinued_lines.cc, * src/primitive/extract/extract_discontinued_vlines.cc, * src/primitive/extract/extract_thick_hlines.cc, * src/primitive/extract/extract_thick_vlines.cc: Rename to... * scribo/primitive/extract/non_text.hh, * src/primitive/extract/discontinued_hlines.cc, * src/primitive/extract/discontinued_lines.cc, * src/primitive/extract/discontinued_vlines.cc, * src/primitive/extract/thick_hlines.cc, * src/primitive/extract/thick_vlines.cc: ... this. * scribo/toolchain/internal/content_in_doc_functor.hh: Rename function call from extract::elements to extract_non_text. * src/primitive/extract/Makefile.am: Update target names. --- scribo/ChangeLog | 23 +++++++ .../primitive/extract/{elements.hh => non_text.hh} | 64 +++++++++++--------- .../toolchain/internal/content_in_doc_functor.hh | 2 +- scribo/src/primitive/extract/Makefile.am | 26 ++++---- ...scontinued_hlines.cc => discontinued_hlines.cc} | 0 ...discontinued_lines.cc => discontinued_lines.cc} | 0 ...scontinued_vlines.cc => discontinued_vlines.cc} | 0 .../{extract_thick_hlines.cc => thick_hlines.cc} | 0 .../{extract_thick_vlines.cc => thick_vlines.cc} | 0 9 files changed, 73 insertions(+), 42 deletions(-) rename scribo/scribo/primitive/extract/{elements.hh => non_text.hh} (82%) rename scribo/src/primitive/extract/{extract_discontinued_hlines.cc => discontinued_hlines.cc} (100%) rename scribo/src/primitive/extract/{extract_discontinued_lines.cc => discontinued_lines.cc} (100%) rename scribo/src/primitive/extract/{extract_discontinued_vlines.cc => discontinued_vlines.cc} (100%) rename scribo/src/primitive/extract/{extract_thick_hlines.cc => thick_hlines.cc} (100%) rename scribo/src/primitive/extract/{extract_thick_vlines.cc => thick_vlines.cc} (100%) diff --git a/scribo/ChangeLog b/scribo/ChangeLog index 7b409a2..fa99f5b 100644 --- a/scribo/ChangeLog +++ b/scribo/ChangeLog @@ -1,5 +1,28 @@ 2011-02-17 Guillaume Lazzara <z@lrde.epita.fr> + Rename files in Scribo. + + * scribo/primitive/extract/elements.hh, + * src/primitive/extract/extract_discontinued_hlines.cc, + * src/primitive/extract/extract_discontinued_lines.cc, + * src/primitive/extract/extract_discontinued_vlines.cc, + * src/primitive/extract/extract_thick_hlines.cc, + * src/primitive/extract/extract_thick_vlines.cc: Rename to... + + * scribo/primitive/extract/non_text.hh, + * src/primitive/extract/discontinued_hlines.cc, + * src/primitive/extract/discontinued_lines.cc, + * src/primitive/extract/discontinued_vlines.cc, + * src/primitive/extract/thick_hlines.cc, + * src/primitive/extract/thick_vlines.cc: ... this. + + * scribo/toolchain/internal/content_in_doc_functor.hh: Rename + function call from extract::elements to extract_non_text. + + * src/primitive/extract/Makefile.am: Update target names. + +2011-02-17 Guillaume Lazzara <z@lrde.epita.fr> + Add Base64 conversion routines. * scribo/scribo/convert/from_base64.hh, diff --git a/scribo/scribo/primitive/extract/elements.hh b/scribo/scribo/primitive/extract/non_text.hh similarity index 82% rename from scribo/scribo/primitive/extract/elements.hh rename to scribo/scribo/primitive/extract/non_text.hh index ddf2c92..a017f1f 100644 --- a/scribo/scribo/primitive/extract/elements.hh +++ b/scribo/scribo/primitive/extract/non_text.hh @@ -26,12 +26,12 @@ /// \file /// -/// \brief Find in a document elements which are not text. +/// \brief Find in a document non text which are not text. /// /// \fixme To be optimized! -#ifndef SCRIBO_PRIMITIVE_EXTRACT_ELEMENTS_HH -# define SCRIBO_PRIMITIVE_EXTRACT_ELEMENTS_HH +#ifndef SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_HH +# define SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_HH # include <mln/core/image/image2d.hh> # include <mln/core/alias/neighb2d.hh> @@ -58,6 +58,10 @@ # include <mln/clustering/kmean_rgb.hh> # include <mln/fun/v2v/rgb8_to_rgbn.hh> +# include <mln/util/timer.hh> + +# include <mln/io/pbm/save.hh> + namespace scribo { @@ -72,7 +76,7 @@ namespace scribo template <typename L, typename I> component_set<L> - elements(const document<L>& doc, const Image<I>& input); + non_text(const document<L>& doc, const Image<I>& input); # ifndef MLN_INCLUDE_ONLY @@ -107,9 +111,9 @@ namespace scribo template <typename L, typename I> component_set<L> - elements(const document<L>& doc, const Image<I>& input_) + non_text(const document<L>& doc, const Image<I>& input_) { - trace::entering("scribo::primitive::extract::elements"); + trace::entering("scribo::primitive::extract::non_text"); const I& input = exact(input_); mln_precondition(doc.is_valid()); @@ -135,9 +139,16 @@ namespace scribo image2d<t_rgb5> img_rgb5 = mln::data::transform(doc.image(), t_rgb8_to_rgb5()); + // DEBUG + io::pbm::save(content, "text_area.pbm"); + + mln::util::timer t; + t.start(); img_lbl8 = mln::clustering::kmean_rgb<double,5>((img_rgb5 | pw::value(content)), 3, 10, 10).unmorph_(); data::fill((img_lbl8 | !pw::value(content)).rw(), 0u); + t.stop(); + std::cout << t << std::endl; mln::util::array<unsigned> card = mln::labeling::compute(accu::math::count<value::label_8>(), @@ -162,15 +173,25 @@ namespace scribo std::cout << "Removing small elements" << std::endl; { - image2d<bool> elts; - initialize(elts, img_lbl8); - data::fill(elts, false); - data::fill((elts | (pw::value(img_lbl8) != pw::cst(0))).rw(), true); + image2d<bool> elts; + initialize(elts, img_lbl8); + data::fill(elts, false); + data::fill((elts | (pw::value(img_lbl8) != pw::cst(0))).rw(), true); + + // DEBUG + data::fill((elts | (pw::value(doc.line_seps()) != pw::cst(0))).rw(), + false); + // END OF DEBUG + - scribo::def::lbl_type nlabels; - elts = filter::components_small(elts, c8(), nlabels, 40); + scribo::def::lbl_type nlabels; + elts = filter::components_small(elts, c8(), nlabels, 40); - output = primitive::extract::components(elts, c8(), nlabels); + // DEBUG + io::pbm::save(elts, "elements.pbm"); + // END OF DEBUG + + output = primitive::extract::components(elts, c8(), nlabels); } @@ -207,20 +228,7 @@ namespace scribo } } -// mln::io::pbm::save(merged_elts, "merged_elts.pbm"); - -// mln::util::array<image2d<value::rgb8> > elt_ima; -// unsigned i = 0; -// for_all_comps(c, elt_comp) -// if (elt_comp(c).is_valid()) -// { -// elt_ima.append(preprocessing::crop(doc.image(), elt_comp(c).bbox())); -// mln::io::ppm::save(elt_ima(i), mln::debug::filename("elt.ppm", i)); -// ++i; -// } - - - trace::exiting("scribo::primitive::extract::elements"); + trace::exiting("scribo::primitive::extract::non_text"); return output; } @@ -233,4 +241,4 @@ namespace scribo } // end of namespace scribo -#endif // ! SCRIBO_PRIMITIVE_EXTRACT_ELEMENTS_HH +#endif // ! SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_HH diff --git a/scribo/scribo/toolchain/internal/content_in_doc_functor.hh b/scribo/scribo/toolchain/internal/content_in_doc_functor.hh index b8aa56d..ed691e8 100644 --- a/scribo/scribo/toolchain/internal/content_in_doc_functor.hh +++ b/scribo/scribo/toolchain/internal/content_in_doc_functor.hh @@ -463,7 +463,7 @@ namespace scribo // Extract other Elements on_new_progress_label("Extracting Elements"); component_set<L> - elements = scribo::primitive::extract::elements(doc, original_image); + elements = scribo::primitive::extract::non_text(doc, original_image); on_progress(); diff --git a/scribo/src/primitive/extract/Makefile.am b/scribo/src/primitive/extract/Makefile.am index 2c8188e..a46cd68 100644 --- a/scribo/src/primitive/extract/Makefile.am +++ b/scribo/src/primitive/extract/Makefile.am @@ -1,5 +1,5 @@ -# Copyright (C) 2009, 2010 EPITA Research and Development Laboratory -# (LRDE). +# Copyright (C) 2009, 2010, 2011 EPITA Research and Development +# Laboratory (LRDE). # # This file is part of Olena. # @@ -18,17 +18,17 @@ include $(top_srcdir)/scribo/scribo.mk -noinst_PROGRAMS = \ - extract_discontinued_lines \ - extract_discontinued_vlines \ - extract_discontinued_hlines \ - extract_thick_vlines \ - extract_thick_hlines \ +noinst_PROGRAMS = \ + discontinued_lines \ + discontinued_vlines \ + discontinued_hlines \ + thick_vlines \ + thick_hlines \ lines_pattern -extract_discontinued_lines_SOURCES = extract_discontinued_lines.cc -extract_discontinued_vlines_SOURCES = extract_discontinued_vlines.cc -extract_discontinued_hlines_SOURCES = extract_discontinued_hlines.cc -extract_thick_vlines_SOURCES = extract_thick_vlines.cc -extract_thick_hlines_SOURCES = extract_thick_hlines.cc +discontinued_lines_SOURCES = discontinued_lines.cc +discontinued_vlines_SOURCES = discontinued_vlines.cc +discontinued_hlines_SOURCES = discontinued_hlines.cc +thick_vlines_SOURCES = thick_vlines.cc +thick_hlines_SOURCES = thick_hlines.cc lines_pattern_SOURCES = lines_pattern.cc diff --git a/scribo/src/primitive/extract/extract_discontinued_hlines.cc b/scribo/src/primitive/extract/discontinued_hlines.cc similarity index 100% rename from scribo/src/primitive/extract/extract_discontinued_hlines.cc rename to scribo/src/primitive/extract/discontinued_hlines.cc diff --git a/scribo/src/primitive/extract/extract_discontinued_lines.cc b/scribo/src/primitive/extract/discontinued_lines.cc similarity index 100% rename from scribo/src/primitive/extract/extract_discontinued_lines.cc rename to scribo/src/primitive/extract/discontinued_lines.cc diff --git a/scribo/src/primitive/extract/extract_discontinued_vlines.cc b/scribo/src/primitive/extract/discontinued_vlines.cc similarity index 100% rename from scribo/src/primitive/extract/extract_discontinued_vlines.cc rename to scribo/src/primitive/extract/discontinued_vlines.cc diff --git a/scribo/src/primitive/extract/extract_thick_hlines.cc b/scribo/src/primitive/extract/thick_hlines.cc similarity index 100% rename from scribo/src/primitive/extract/extract_thick_hlines.cc rename to scribo/src/primitive/extract/thick_hlines.cc diff --git a/scribo/src/primitive/extract/extract_thick_vlines.cc b/scribo/src/primitive/extract/thick_vlines.cc similarity index 100% rename from scribo/src/primitive/extract/extract_thick_vlines.cc rename to scribo/src/primitive/extract/thick_vlines.cc -- 1.5.6.5