---
scribo/src/Makefile.am | 10 ++
scribo/src/text_in_picture_neg.cc | 195 +++++++++++++++++++++++++++++++++++++
2 files changed, 205 insertions(+), 0 deletions(-)
create mode 100644 scribo/src/text_in_picture_neg.cc
diff --git a/scribo/src/Makefile.am b/scribo/src/Makefile.am
index cd7618c..9efa0da 100644
--- a/scribo/src/Makefile.am
+++ b/scribo/src/Makefile.am
@@ -70,6 +70,16 @@ if HAVE_MAGICKXX
-lpthread \
$(MAGICKXX_LDFLAGS)
+
+ utilexec_PROGRAMS += text_in_picture_neg
+ text_in_picture_neg_SOURCES = text_in_picture_neg.cc
+ text_in_picture_neg_CPPFLAGS = $(AM_CPPFLAGS) \
+ $(MAGICKXX_CPPFLAGS)
+ text_in_picture_neg_LDFLAGS = $(AM_LDFLAGS) \
+ -lpthread \
+ $(MAGICKXX_LDFLAGS)
+
+
if HAVE_TESSERACT
utilexec_PROGRAMS += text_recognition_in_picture
diff --git a/scribo/src/text_in_picture_neg.cc b/scribo/src/text_in_picture_neg.cc
new file mode 100644
index 0000000..e3078c6
--- /dev/null
+++ b/scribo/src/text_in_picture_neg.cc
@@ -0,0 +1,195 @@
+// Copyright (C) 2010 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#include <libgen.h>
+#include <iostream>
+
+#include <mln/core/image/image2d.hh>
+#include <mln/core/image/imorph/tr_image.hh>
+#include <mln/core/alias/neighb2d.hh>
+
+#include <mln/labeling/colorize.hh>
+
+#include <mln/data/stretch.hh>
+
+#include <mln/io/pbm/all.hh>
+#include <mln/io/ppm/save.hh>
+#include <mln/io/magick/all.hh>
+
+#include <mln/math/min.hh>
+
+#include <mln/logical/not.hh>
+
+#include <mln/literal/colors.hh>
+#include <mln/value/rgb8.hh>
+#include <mln/value/label_16.hh>
+
+#include <mln/fun/v2v/rgb_to_int_u.hh>
+
+#include <mln/data/wrap.hh>
+
+#include <mln/draw/box.hh>
+
+#include <mln/geom/translate.hh>
+
+#include <mln/subsampling/antialiased.hh>
+
+#include <scribo/draw/bounding_boxes.hh>
+#include <scribo/draw/groups_bboxes.hh>
+
+#include <scribo/binarization/sauvola_ms.hh>
+#include <scribo/binarization/sauvola.hh>
+
+#include <scribo/primitive/extract/components.hh>
+
+#include <scribo/primitive/link/merge_double_link.hh>
+#include <scribo/primitive/link/with_single_left_link.hh>
+#include <scribo/primitive/link/with_single_right_link.hh>
+
+#include <scribo/primitive/group/apply.hh>
+#include <scribo/primitive/group/from_double_link.hh>
+#include <scribo/primitive/group/from_single_link.hh>
+
+#include <scribo/primitive/regroup/from_single_left_link.hh>
+// #include <scribo/primitive/regroup/from_single_left_link_wrt_h_ratio.hh>
+
+#include <scribo/filter/object_groups_with_holes.hh>
+
+#include <scribo/filter/object_links_bbox_h_ratio.hh>
+#include <scribo/filter/object_links_bbox_overlap.hh>
+
+#include <scribo/filter/common/objects_photo.hh>
+
+#include <scribo/filter/object_groups_small.hh>
+#include <scribo/filter/object_groups_v_thickness.hh>
+
+#include <scribo/debug/highlight_text_area.hh>
+#include <scribo/debug/text_areas_image.hh>
+
+#include <scribo/debug/decision_image.hh>
+#include <scribo/debug/save_bboxes_image.hh>
+#include <scribo/debug/save_linked_bboxes_image.hh>
+
+#include <scribo/debug/usage.hh>
+
+#include <scribo/preprocessing/split_bg_fg.hh>
+
+#include <scribo/make/debug_filename.hh>
+
+#include <scribo/toolchain/text_in_picture.hh>
+
+#include <scribo/primitive/merge/components.hh>
+
+#include <mln/util/timer.hh>
+#include <mln/core/var.hh>
+
+const char *args_desc[][2] =
+{
+ { "input.*", "A color image." },
+ { "ouput.ppm", "A color image where the text is highlighted." },
+ { "debug_output_dir", "Directory were debug images will be saved" },
+ { "lambda", "Lambda value used for foreground extraction" },
+ {0, 0}
+};
+
+
+
+int main(int argc, char* argv[])
+{
+ using namespace scribo;
+ using namespace scribo::primitive;
+ using namespace mln;
+
+ if (argc < 3 || argc > 8)
+ return scribo::debug::usage(argv,
+ "Find text in a photo.\n\n"
+ "Common usage: ./text_in_photo_fast input.*"
+ " output.ppm 1 1",
+ "input.ppm output.ppm <bg/fg enabled>"
+ " <sauvola_ms enabled> "
+ "[debug_output_dir] [max_dim_size] [lambda]",
+ args_desc);
+
+ char *out_base_dir = 0;
+ if (argc > 5)
+ out_base_dir = argv[5];
+
+ trace::entering("main");
+
+ image2d<value::rgb8> input_rgb;
+ io::magick::load(input_rgb, argv[1]);
+
+ unsigned max_dim_size = 0;
+ if (argc >= 7)
+ max_dim_size = atoi(argv[6]);
+
+
+ unsigned lambda = 0;
+ if (argc == 8)
+ lambda = atoi(argv[7]);
+
+ bool bg_removal = false;
+ if (argc > 3 && atoi(argv[3]) != 0)
+ bg_removal = true;
+
+ bool multi_scale_bin = false;
+ if (argc > 4 && atoi(argv[4]) != 0)
+ multi_scale_bin = true;
+
+
+ typedef image2d<value::label_16> L;
+ component_set<L>
+ comps = toolchain::text_in_picture(input_rgb, bg_removal, multi_scale_bin,
+ false,
+ max_dim_size, lambda, out_base_dir);
+
+
+ typedef image2d<value::label_16> L;
+ component_set<L>
+ comps_neg = toolchain::text_in_picture(input_rgb, bg_removal, multi_scale_bin,
+ true,
+ max_dim_size, lambda, out_base_dir);
+
+
+ component_set<L> merged_comps = primitive::merge::components(comps, comps_neg);
+
+
+
+ io::ppm::save(mln::labeling::colorize(value::rgb8(),
+ merged_comps.labeled_image(),
+ merged_comps.nelements()),
+ argv[2]);
+
+ if (out_base_dir)
+ {
+ io::ppm::save(scribo::debug::highlight_text_area(input_rgb, merged_comps),
+ std::string(out_base_dir) + "_input_with_bboxes.ppm");
+ io::ppm::save(scribo::debug::text_areas_image(input_rgb, merged_comps),
+ std::string(out_base_dir) + "_out_text.ppm");
+ }
+
+ std::cout << "# objects = " << merged_comps.nelements() << std::endl;
+
+}
--
1.5.6.5
---
scribo/scribo/primitive/merge/components.hh | 121 +++++++++++++++++++++++++++
1 files changed, 121 insertions(+), 0 deletions(-)
create mode 100644 scribo/scribo/primitive/merge/components.hh
diff --git a/scribo/scribo/primitive/merge/components.hh b/scribo/scribo/primitive/merge/components.hh
new file mode 100644
index 0000000..7570d2e
--- /dev/null
+++ b/scribo/scribo/primitive/merge/components.hh
@@ -0,0 +1,121 @@
+// Copyright (C) 2010 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+/// \file
+///
+///
+
+#ifndef SCRIBO_PRIMITIVE_MERGE_COMPONENTS_HH
+# define SCRIBO_PRIMITIVE_MERGE_COMPONENTS_HH
+
+# include <mln/core/image/vmorph/fun_image.hh>
+
+
+namespace scribo
+{
+
+ namespace primitive
+ {
+
+ namespace merge
+ {
+
+ using namespace mln;
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+ namespace internal
+ {
+
+ template <typename V>
+ struct rebase_label : mln::Function_v2v<rebase_label<V> >
+ {
+ typedef V result;
+
+ rebase_label()
+ : base_label_(0)
+ {}
+
+ rebase_label(const V& base_label)
+ : base_label_(base_label)
+ {}
+
+ V operator()(const V& v) const
+ {
+ if (v)
+ return unsigned(base_label_) + unsigned(v);
+ return literal::zero;
+ }
+
+
+ V base_label_;
+ };
+
+
+ } // end of namespace scribo::primitive::merge
+
+
+ template <typename L>
+ component_set<L>
+ components(const component_set<L>& lhs, const component_set<L>& rhs)
+ {
+ trace::entering("scribo::primitive::merge::components");
+
+ const L& lhs_lbl = lhs.labeled_image();
+ const L& rhs_lbl = rhs.labeled_image();
+
+ typedef mln_value(L) V;
+
+
+ V nlabels = unsigned(lhs.nelements()) + unsigned(rhs.nelements());
+
+ // Merge labeled images.
+ //
+ // Rebase labeling in the second image, according to the
+ // number of labels in the first one.
+ //
+ L lbl_merge = duplicate(lhs_lbl);
+
+ internal::rebase_label<V> f(lhs.nelements());
+ fun_image<internal::rebase_label<V>, L> fima(f, rhs_lbl);
+ data::fill((lbl_merge | (pw::value(lbl_merge) == pw::cst(0))).rw(), fima);
+
+ component_set<L> output(lbl_merge, nlabels);
+
+ trace::exiting("scribo::primitive::merge::components");
+ return output;
+ }
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::primitive::merge
+
+ } // end of namespace scribo::primitive
+
+} // end of namespace scribo
+
+
+#endif // ! SCRIBO_PRIMITIVE_MERGE_COMPONENTS_HH
--
1.5.6.5
---
scribo/ChangeLog | 4 +
scribo/scribo/core/document.hh | 162 ++++++++++++++++++++++++++++++++++++++++
2 files changed, 166 insertions(+), 0 deletions(-)
create mode 100644 scribo/scribo/core/document.hh
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index 446ff89..9d47519 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,3 +1,7 @@
+2010-09-02 Guillaume Lazzara <z(a)lrde.epita.fr>
+
+ * scribo/core/document.hh: Introduce new document structure.
+
2010-11-15 Guillaume Lazzara <z(a)lrde.epita.fr>
* demo/viewer/Makefile.am: Remove deprecated dependencies.
diff --git a/scribo/scribo/core/document.hh b/scribo/scribo/core/document.hh
new file mode 100644
index 0000000..93d9348
--- /dev/null
+++ b/scribo/scribo/core/document.hh
@@ -0,0 +1,162 @@
+// Copyright (C) 2010 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef SCRIBO_CORE_DOCUMENT_HH
+# define SCRIBO_CORE_DOCUMENT_HH
+
+/// \file
+///
+/// \brief Describes document content.
+
+# include <mln/core/image/image2d.hh>
+# include <mln/value/rgb8.hh>
+# include <mln/io/magick/load.hh>
+
+# include <scribo/core/component_set.hh>
+# include <scribo/core/line_set.hh>
+
+namespace scribo
+{
+
+ template <typename L>
+ struct document
+ {
+ public:
+
+ document();
+ document(const char *filename);
+
+ const char * filename() const;
+
+ bool is_valid() const;
+
+ const line_set<L>& text() const;
+ void set_text(const line_set<L>& line);
+
+ const component_set<L>& elements() const;
+ void set_elements(const component_set<L>& elements);
+
+ const mln::image2d<value::rgb8>& image() const;
+
+ private:
+ void open();
+
+ const char *filename_;
+ mln::image2d<mln::value::rgb8> image_;
+
+ line_set<L> lines_;
+ component_set<L> elements_;
+ };
+
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+
+ template <typename L>
+ document<L>::document()
+ {
+ }
+
+
+ template <typename L>
+ document<L>::document(const char *filename)
+ : filename_(filename)
+ {
+ open();
+ }
+
+
+ template <typename L>
+ const char *
+ document<L>::filename() const
+ {
+ return filename_;
+ }
+
+
+ template <typename L>
+ void
+ document<L>::open()
+ {
+ mln::io::magick::load(image_, filename_);
+ }
+
+
+ template <typename L>
+ bool
+ document<L>::is_valid() const
+ {
+ return image_.is_valid();
+ }
+
+
+ template <typename L>
+ const line_set<L>&
+ document<L>::text() const
+ {
+ return lines_;
+ }
+
+
+ template <typename L>
+ void
+ document<L>::set_text(const line_set<L>& line)
+ {
+ lines_ = line;
+ }
+
+
+ template <typename L>
+ const component_set<L>&
+ document<L>::elements() const
+ {
+ return elements_;
+ }
+
+
+ template <typename L>
+ void
+ document<L>::set_elements(const component_set<L>& elements)
+ {
+ elements_ = elements;
+ }
+
+
+ template <typename L>
+ const mln::image2d<value::rgb8>&
+ document<L>::image() const
+ {
+ return image_;
+ }
+
+
+# endif // ! MLN_INCLUDE_ONLY
+
+
+} // end of namespace scribo
+
+
+#endif // ! SCRIBO_CORE_DOCUMENT_HH
--
1.5.6.5
* arthur/xml_transform/README
* arthur/xml_transform/main.cc
* arthur/xml_transform/TODO: New.
---
scribo/sandbox/ChangeLog | 8 ++++++++
scribo/sandbox/arthur/xml_transform/README | 24 +++++++++++++-----------
scribo/sandbox/arthur/xml_transform/main.cc | 10 ++++++----
3 files changed, 27 insertions(+), 15 deletions(-)
diff --git a/scribo/sandbox/ChangeLog b/scribo/sandbox/ChangeLog
index 4583666..78c55d6 100644
--- a/scribo/sandbox/ChangeLog
+++ b/scribo/sandbox/ChangeLog
@@ -1,5 +1,13 @@
2010-08-06 Arthur Crepin-Leblond <crepin(a)lrde.epita.fr>
+ Open Document bug warning.
+
+ * arthur/xml_transform/README
+ * arthur/xml_transform/main.cc
+ * arthur/xml_transform/TODO: New.
+
+2010-08-06 Arthur Crepin-Leblond <crepin(a)lrde.epita.fr>
+
Fix man error.
* arthur/xml_transform/README
diff --git a/scribo/sandbox/arthur/xml_transform/README b/scribo/sandbox/arthur/xml_transform/README
index 33c4dd3..c594992 100644
--- a/scribo/sandbox/arthur/xml_transform/README
+++ b/scribo/sandbox/arthur/xml_transform/README
@@ -3,34 +3,36 @@ OPTIONS:
HTML output:
--html input.xml image output.html Creates HTML file, images are embedded inside in base 64 format.
- --html-base64 input.xml output.html Same as --html but input is a XML file with images encoded in base 64 inside.
- --html-full input.xml image output_dir Creates HTML file without converting images in base 64. Images + HTML file are placed in output_dir.
+ --html-base64 input.xml output.html Same as --html but input is a XML file with images encoded in base 64 inside.
+ --html-full input.xml image output_dir Creates HTML file without converting images in base 64. Images + HTML file are placed in output_dir.
PDF output:
--pdf input.xml image output.pdf Creates PDF file, images are embedded inside in base 64 format.
- --pdf-base64 input.xml output.pdf Same as --pdf but input is a XML file with images encoded in base 64 inside.
+ --pdf-base64 input.xml output.pdf Same as --pdf but input is a XML file with images encoded in base 64 inside.
SVG output (experimental, may not work with all files):
- --svg input.xml image output.svg Creates SVG file, images are embedded inside in base 64 format.
- --svg-base64 input.xml output.svg Same as --svg but input is a XML file with images encoded in base 64 inside.
+ --svg input.xml image output.svg Creates SVG file, images are embedded inside in base 64 format.
+ --svg-base64 input.xml output.svg Same as --svg but input is a XML file with images encoded in base 64 inside.
OpenDocument output:
- --oo-doc input.xml image output.odt Creates Open Document Writer (odt) file.
+ --oo-doc input.xml image output.odt Creates Open Document Writer (odt) file.
+ This is experimental, Open Office will warn you and ask you to repair the file which is corrupted, click on repair.
Base 64 operations:
- --to-base64 input.xml image output.xml Creates a container XML file. It will contain regions which are cropped and converted in base 64.
- --to-base64-no-crop input.xml image output.xml Same as --to-base64 but only the full picture will be converted, regions are not cropped
- --from-base64 input.xml output_dir Decodes a XML file which has been encoded in base 64, regions will be converted into png files and saved in output_dir.
+ --to-base64 input.xml image output.xmlc Creates a container XML file. It will contain regions which are cropped and converted in base 64.
+ --to-base64-no-crop input.xml image output.xmlc Same as --to-base64 but only the full picture will be converted, regions are not cropped
+ --from-base64 input.xml output_dir Decodes a XML file which has been encoded in base 64, regions will be converted into png files and saved in output_dir.
Requirements:
PDF : fop >= 0.95
HTML : xsltproc
SVG : trunk version of fop
-OpenDocument : ooconvert (included)
+OpenDocument : ooconvert (included) and OpenOffice >= 3.xx
+
BUILD:
-Chnage the environment variable QMAKE_CXXFLAGS in xml_transfrom.pro then
+Change the environment variable QMAKE_CXXFLAGS in xml_transfrom.pro then
mkdir _build
cd _build
diff --git a/scribo/sandbox/arthur/xml_transform/main.cc b/scribo/sandbox/arthur/xml_transform/main.cc
index d35e80d..687f53c 100644
--- a/scribo/sandbox/arthur/xml_transform/main.cc
+++ b/scribo/sandbox/arthur/xml_transform/main.cc
@@ -34,13 +34,14 @@ int main(int argc, char **argv)
"OpenDocument output:\n"
"\t\033[01m--oo-doc\033[00m \033[04minput.xml\033[00m \033[04mimage\033[00m \033[04moutput.odt\033[00m"
- "\t\t\tCreates Open Document Writer (odt) file.\n\n"
+ "\t\t\tCreates Open Document Writer (odt) file.\n"
+ "\tThis is experimental, Open Office will warn you and ask you to repair the file which is corrupted, click on repair.\n\n"
"Base 64 operations:\n"
- "\t\033[01m--to-base64\033[00m \033[04minput.xml\033[00m \033[04mimage\033[00m \033[04moutput.xml\033[00m"
+ "\t\033[01m--to-base64\033[00m \033[04minput.xml\033[00m \033[04mimage\033[00m \033[04moutput.xmlc\033[00m"
"\t\t\tCreates a container XML file. It will contain regions which are cropped and converted in base 64.\n"
- "\t\033[01m--to-base64-no-crop\033[00m \033[04minput.xml\033[00m \033[04mimage\033[00m \033[04moutput.xml\033[00m"
+ "\t\033[01m--to-base64-no-crop\033[00m \033[04minput.xml\033[00m \033[04mimage\033[00m \033[04moutput.xmlc\033[00m"
"\t\tSame as --to-base64 but only the full picture will be converted, regions are not cropped\n"
"\t\033[01m--from-base64\033[00m \033[04minput.xml\033[00m \033[04moutput_dir\033[00m"
@@ -50,7 +51,8 @@ int main(int argc, char **argv)
"PDF : fop >= 0.95\n"
"HTML : xsltproc\n"
"SVG : trunk version of fop\n"
- "OpenDocument : ooconvert (included)";
+ "OpenDocument : ooconvert (included) and OpenOffice >= 3.xx";
+
if (argc > 4)
{
std::string html = "--html";
--
1.5.6.5