* src/Makefile.am,
* src/debug/Makefile.am: Update targets.
* src/debug/non_text_mask.cc,
* src/debug/show_components_bboxes.cc,
* src/debug/show_groups_bboxes.cc,
* src/debug/show_links_bottom_aligned_2angles.cc,
* src/debug/show_links_top_aligned_2angles.cc,
* src/debug/show_paragraph_blocks.cc,
* src/non_text_components.cc: New.
* src/debug/show_links_several_right.cc: Removed. Deprecated.
---
scribo/ChangeLog | 17 ++
scribo/src/Makefile.am | 32 ++++-
scribo/src/debug/Makefile.am | 66 ++++++-
scribo/src/debug/non_text_mask.cc | 26 +++
scribo/src/debug/show_components_bboxes.cc | 59 ++++++
scribo/src/debug/show_groups_bboxes.cc | 91 ++++++++++
...ned.cc => show_links_bottom_aligned_2angles.cc} | 58 +++---
scribo/src/debug/show_links_several_right.cc | 90 ----------
...ligned.cc => show_links_top_aligned_2angles.cc} | 61 +++----
scribo/src/debug/show_paragraph_blocks.cc | 185 ++++++++++++++++++++
scribo/src/non_text_components.cc | 128 ++++++++++++++
11 files changed, 652 insertions(+), 161 deletions(-)
create mode 100644 scribo/src/debug/non_text_mask.cc
create mode 100644 scribo/src/debug/show_components_bboxes.cc
create mode 100644 scribo/src/debug/show_groups_bboxes.cc
copy scribo/src/debug/{show_links_bottom_aligned.cc =>
show_links_bottom_aligned_2angles.cc} (60%)
delete mode 100644 scribo/src/debug/show_links_several_right.cc
copy scribo/src/debug/{show_links_top_aligned.cc => show_links_top_aligned_2angles.cc}
(60%)
create mode 100644 scribo/src/debug/show_paragraph_blocks.cc
create mode 100644 scribo/src/non_text_components.cc
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index 32dda87..cc6e861 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,5 +1,22 @@
2011-03-14 Guillaume Lazzara <z(a)lrde.epita.fr>
+ New debug tools.
+
+ * src/Makefile.am,
+ * src/debug/Makefile.am: Update targets.
+
+ * src/debug/non_text_mask.cc,
+ * src/debug/show_components_bboxes.cc,
+ * src/debug/show_groups_bboxes.cc,
+ * src/debug/show_links_bottom_aligned_2angles.cc,
+ * src/debug/show_links_top_aligned_2angles.cc,
+ * src/debug/show_paragraph_blocks.cc,
+ * src/non_text_components.cc: New.
+
+ * src/debug/show_links_several_right.cc: Removed. Deprecated.
+
+2011-03-14 Guillaume Lazzara <z(a)lrde.epita.fr>
+
New non-text components extraction routine.
* scribo/make/text_blocks_image.hh,
diff --git a/scribo/src/Makefile.am b/scribo/src/Makefile.am
index d6275fd..3a35528 100644
--- a/scribo/src/Makefile.am
+++ b/scribo/src/Makefile.am
@@ -96,18 +96,48 @@ if HAVE_TESSERACT
$(TIFF_LDFLAGS) \
$(MAGICKXX_LDFLAGS)
+if HAVE_QT
utilexec_PROGRAMS += content_in_doc
content_in_doc_SOURCES = content_in_doc.cc
content_in_doc_CPPFLAGS = $(AM_CPPFLAGS) \
$(TESSERACT_CPPFLAGS) \
$(TIFF_CPPFLAGS) \
- $(MAGICKXX_CPPFLAGS)
+ $(MAGICKXX_CPPFLAGS) \
+ $(QT_CPPFLAGS)
+ content_in_doc_CXXFLAGS = $(AM_CXXFLAGS) \
+ $(QT_CXXFLAGS)
content_in_doc_LDFLAGS = $(AM_LDFLAGS) \
$(TESSERACT_LDFLAGS) \
$(TIFF_LDFLAGS) \
$(MAGICKXX_LDFLAGS) \
+ $(QT_LDFLAGS) \
-lpthread
+ content_in_doc_LDADD = $(LDADD) \
+ $(QT_LIBS)
+
+
+ utilexec_PROGRAMS += non_text_components
+ non_text_components_SOURCES = non_text_components.cc
+ non_text_components_CPPFLAGS = $(AM_CPPFLAGS) \
+ $(TESSERACT_CPPFLAGS) \
+ $(TIFF_CPPFLAGS) \
+ $(MAGICKXX_CPPFLAGS) \
+ $(QT_CPPFLAGS)
+ non_text_components_CXXFLAGS = $(AM_CXXFLAGS) \
+ $(QT_CXXFLAGS)
+ non_text_components_LDFLAGS = $(AM_LDFLAGS) \
+ $(TESSERACT_LDFLAGS) \
+ $(TIFF_LDFLAGS) \
+ $(MAGICKXX_LDFLAGS) \
+ $(QT_LDFLAGS) \
+ -lpthread
+ non_text_components_LDADD = $(LDADD) \
+ $(QT_LIBS)
+
+
+endif HAVE_QT
+
endif HAVE_TESSERACT
diff --git a/scribo/src/debug/Makefile.am b/scribo/src/debug/Makefile.am
index cdb1f30..60d7afa 100644
--- a/scribo/src/debug/Makefile.am
+++ b/scribo/src/debug/Makefile.am
@@ -18,6 +18,8 @@
include $(top_srcdir)/scribo/scribo.mk
noinst_PROGRAMS = \
+ show_components_bboxes \
+ show_groups_bboxes \
show_info_x_height \
show_info_median_inter_characters \
show_separators \
@@ -25,9 +27,9 @@ noinst_PROGRAMS = \
show_links_bbox_h_ratio \
show_links_bbox_overlap \
show_links_bottom_aligned \
+ show_links_bottom_aligned_2angles \
show_links_center_aligned \
show_links_non_h_aligned \
- show_links_several_right \
show_links_several_right_overlap \
show_links_single_down \
show_links_single_down_left_aligned \
@@ -40,15 +42,16 @@ noinst_PROGRAMS = \
show_links_single_up_left_aligned \
show_links_single_up_right_aligned \
show_links_top_aligned \
+ show_links_top_aligned_2angles \
show_objects_large \
show_objects_large_small \
show_objects_small \
show_objects_thick \
- show_objects_thin \
- show_stoppers \
- show_text_lines
+ show_objects_thin
+show_components_bboxes_SOURCES = show_components_bboxes.cc
+show_groups_bboxes_SOURCES = show_groups_bboxes.cc
show_info_x_height_SOURCES = show_info_x_height.cc
show_info_median_inter_characters_SOURCES = show_info_median_inter_characters.cc
show_separators_SOURCES = show_separators.cc
@@ -56,9 +59,9 @@ show_links_left_right_links_validation_SOURCES =
show_links_left_right_links_val
show_links_bbox_h_ratio_SOURCES = show_links_bbox_h_ratio.cc
show_links_bbox_overlap_SOURCES = show_links_bbox_overlap.cc
show_links_bottom_aligned_SOURCES = show_links_bottom_aligned.cc
+show_links_bottom_aligned_2angles_SOURCES = show_links_bottom_aligned_2angles.cc
show_links_center_aligned_SOURCES = show_links_center_aligned.cc
show_links_non_h_aligned_SOURCES = show_links_non_h_aligned.cc
-show_links_several_right_SOURCES = show_links_several_right.cc
show_links_several_right_overlap_SOURCES = show_links_several_right_overlap.cc
show_links_single_down_SOURCES = show_links_single_down.cc
show_links_single_down_left_aligned_SOURCES = show_links_single_down_left_aligned.cc
@@ -71,21 +74,66 @@ show_links_single_up_SOURCES = show_links_single_up.cc
show_links_single_up_left_aligned_SOURCES = show_links_single_up_left_aligned.cc
show_links_single_up_right_aligned_SOURCES = show_links_single_up_right_aligned.cc
show_links_top_aligned_SOURCES = show_links_top_aligned.cc
+show_links_top_aligned_2angles_SOURCES = show_links_top_aligned_2angles.cc
show_objects_large_SOURCES = show_objects_large.cc
show_objects_large_small_SOURCES = show_objects_large_small.cc
show_objects_small_SOURCES = show_objects_small.cc
show_objects_thick_SOURCES = show_objects_thick.cc
show_objects_thin_SOURCES = show_objects_thin.cc
-show_stoppers_SOURCES = show_stoppers.cc
-show_text_lines_SOURCES = show_text_lines.cc
if HAVE_MAGICKXX
+if HAVE_QT
+
+ noinst_PROGRAMS += show_paragraph_blocks
+ show_paragraph_blocks_SOURCES = show_paragraph_blocks.cc
+ show_paragraph_blocks_CPPFLAGS = $(AM_CPPFLAGS) \
+ $(QT_CPPFLAGS) \
+ $(MAGICKXX_CPPFLAGS)
+ show_paragraph_blocks_CXXFLAGS = $(AM_CXXFLAGS) \
+ $(QT_CXXFLAGS)
+ show_paragraph_blocks_LDFLAGS = $(AM_LDFLAGS) \
+ $(QT_LDFLAGS) \
+ $(MAGICKXX_LDFLAGS)
+ show_paragraph_blocks_LDADD = $(LDADD) \
+ $(QT_LIBS)
+
+ noinst_PROGRAMS += show_text_lines
+ show_text_lines_SOURCES = show_text_lines.cc
+ show_text_lines_CPPFLAGS = $(AM_CPPFLAGS) \
+ $(MAGICKXX_CPPFLAGS) \
+ $(QT_CPPFLAGS) -DHAVE_QT
+ show_text_lines_CXXFLAGS = $(AM_CXXFLAGS) \
+ $(QT_CXXFLAGS)
+ show_text_lines_LDFLAGS = $(AM_LDFLAGS) \
+ $(MAGICKXX_LDFLAGS) \
+ $(QT_LDFLAGS)
+ show_text_lines_LDADD = $(LDADD) \
+ $(QT_LIBS)
+
+
+ noinst_PROGRAMS += show_stoppers
+ show_stoppers_SOURCES = show_stoppers.cc
+ show_stoppers_CPPFLAGS = $(AM_CPPFLAGS) \
+ $(MAGICKXX_CPPFLAGS) \
+ $(QT_CPPFLAGS)
+ show_stoppers_CXXFLAGS = $(AM_CXXFLAGS) \
+ $(QT_CXXFLAGS)
+ show_stoppers_LDFLAGS = $(AM_LDFLAGS) \
+ $(MAGICKXX_LDFLAGS) \
+ $(QT_LDFLAGS)
+ show_stoppers_LDADD = $(LDADD) \
+ $(QT_LIBS)
+
+
+endif HAVE_QT
+
+
noinst_PROGRAMS += highlight_text_area
highlight_text_area_SOURCES = highlight_text_area.cc
highlight_text_area_CPPFLAGS = $(AM_CPPFLAGS) \
- `Magick++-config --cppflags`
+ $(MAGICKXX_CPPFLAGS)
highlight_text_area_LDFLAGS = $(AM_LDFLAGS) \
- -lpthread `Magick++-config --libs`
+ $(MAGICKXX_LDFLAGS)
endif HAVE_MAGICKXX
diff --git a/scribo/src/debug/non_text_mask.cc b/scribo/src/debug/non_text_mask.cc
new file mode 100644
index 0000000..6fce945
--- /dev/null
+++ b/scribo/src/debug/non_text_mask.cc
@@ -0,0 +1,26 @@
+
+
+int main(int argc, char *argv[])
+{
+ // Link text lines
+ on_new_progress_label("Linking text lines");
+ line_links<L> llinks = scribo::text::link_lines(lines);
+
+ // Filter line links.
+ on_new_progress_label("Filter line links");
+ llinks = scribo::filter::line_links_x_height(llinks);
+
+ // Construct paragraphs
+ on_new_progress_label("Constructing paragraphs");
+ scribo::paragraph_set<L> parset = scribo::make::paragraph(llinks);
+ doc.set_paragraphs(parset);
+
+ on_progress();
+
+
+ // Extract other Elements
+ on_new_progress_label("Extracting Elements");
+ component_set<L>
+ elements = scribo::primitive::extract::non_text_fast(doc);
+
+}
diff --git a/scribo/src/debug/show_components_bboxes.cc
b/scribo/src/debug/show_components_bboxes.cc
new file mode 100644
index 0000000..7eab4ba
--- /dev/null
+++ b/scribo/src/debug/show_components_bboxes.cc
@@ -0,0 +1,59 @@
+// Copyright (C) 2010, 2011 EPITA Research and Development Laboratory
+// (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+
+#include <mln/core/image/image2d.hh>
+#include <mln/core/alias/neighb2d.hh>
+#include <mln/io/pbm/all.hh>
+#include <mln/value/int_u.hh>
+#include <mln/literal/colors.hh>
+#include <mln/draw/box.hh>
+
+#include <scribo/core/component_set.hh>
+#include <scribo/primitive/extract/components.hh>
+
+int main(int argc, char *argv[])
+{
+ using namespace mln;
+ using namespace scribo;
+
+ if (argc != 3)
+ {
+ std::cerr << "Usage : " << argv[0] << " input.pbm
out.pbm" << std::endl;
+ return 1;
+ }
+
+ typedef mln::value::int_u<30> V;
+ typedef image2d<V> L;
+
+ image2d<bool> input;
+ io::pbm::load(input, argv[1]);
+
+ V ncomponents;
+ component_set<L>
+ components = scribo::primitive::extract::components(input, c8(),
+ ncomponents);
+
+ image2d<bool> output;
+ initialize(output, input);
+ data::fill(output, false);
+
+ for_all_comps(c, components)
+ if (components(c).is_valid())
+ mln::draw::box(output, components(c).bbox(), true);
+
+ io::pbm::save(output, argv[2]);
+}
diff --git a/scribo/src/debug/show_groups_bboxes.cc
b/scribo/src/debug/show_groups_bboxes.cc
new file mode 100644
index 0000000..eff0eb7
--- /dev/null
+++ b/scribo/src/debug/show_groups_bboxes.cc
@@ -0,0 +1,91 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+
+#include <mln/core/image/image2d.hh>
+#include <mln/core/alias/neighb2d.hh>
+#include <mln/io/pbm/all.hh>
+#include <mln/value/int_u.hh>
+#include <mln/literal/colors.hh>
+#include <mln/draw/box.hh>
+
+#include <scribo/core/component_set.hh>
+#include <scribo/core/line_set.hh>
+#include <scribo/primitive/extract/components.hh>
+#include <scribo/primitive/group/from_single_link.hh>
+#include <scribo/primitive/link/with_single_left_link_dmax_ratio.hh>
+#include <scribo/primitive/link/with_single_right_link_dmax_ratio.hh>
+#include <scribo/primitive/link/merge_double_link.hh>
+#include <scribo/primitive/link/internal/dmax_width_and_height.hh>
+
+int main(int argc, char *argv[])
+{
+ using namespace mln;
+ using namespace scribo;
+ using namespace scribo::primitive;
+
+ if (argc != 3)
+ {
+ std::cerr << "Usage : " << argv[0] << " input.*
out.pbm" << std::endl;
+ return 1;
+ }
+
+ typedef mln::value::int_u<30> V;
+ typedef image2d<V> L;
+
+ image2d<bool> input;
+ io::pbm::load(input, argv[1]);
+
+ V ncomponents;
+ component_set<L>
+ components = scribo::primitive::extract::components(input, c8(),
+ ncomponents);
+
+
+ object_links<L>
+ left_link = link::with_single_left_link_dmax_ratio(
+ components,
+// primitive::link::internal::dmax_width_and_height(1),
+ link::internal::dmax_default(1),
+ anchor::MassCenter);
+
+ object_links<L>
+ right_link = primitive::link::with_single_right_link_dmax_ratio(
+ components,
+// primitive::link::internal::dmax_width_and_height(1),
+ primitive::link::internal::dmax_default(1),
+ anchor::MassCenter);
+
+ // Validating left and right links.
+ object_links<L>
+ merged_links = primitive::link::merge_double_link(left_link,
+ right_link);
+
+
+ object_groups<L>
+ groups = group::from_single_link(merged_links);
+
+ line_set<L> lines(groups);
+
+ image2d<bool> output;
+ initialize(output, input);
+ data::fill(output, false);
+
+ for_all_lines(l, lines)
+ if (lines(l).is_valid())
+ mln::draw::box(output, lines(l).bbox(), true);
+
+ io::pbm::save(output, argv[2]);
+}
diff --git a/scribo/src/debug/show_links_bottom_aligned.cc
b/scribo/src/debug/show_links_bottom_aligned_2angles.cc
similarity index 60%
copy from scribo/src/debug/show_links_bottom_aligned.cc
copy to scribo/src/debug/show_links_bottom_aligned_2angles.cc
index 634551b..4b0e765 100644
--- a/scribo/src/debug/show_links_bottom_aligned.cc
+++ b/scribo/src/debug/show_links_bottom_aligned_2angles.cc
@@ -1,5 +1,4 @@
-// Copyright (C) 2009, 2011 EPITA Research and Development Laboratory
-// (LRDE)
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
//
// This file is part of Olena.
//
@@ -38,22 +37,22 @@
#include <mln/io/pbm/load.hh>
#include <mln/io/ppm/save.hh>
-#include <scribo/core/component_set.hh>
-
+#include <scribo/core/def/lbl_type.hh>
+#include <scribo/debug/logger.hh>
#include <scribo/primitive/extract/components.hh>
-#include <scribo/primitive/link/with_single_right_link.hh>
#include <scribo/filter/object_links_aligned.hh>
+#include <scribo/primitive/link/with_single_right_link_dmax_ratio_aligned.hh>
-#include <scribo/debug/alignment_decision_image.hh>
#include <scribo/debug/usage.hh>
const char *args_desc[][2] =
{
- { "input.pbm", "A binary image. True for objects and False for the
background." },
- { "max_dist", "Maximum distance lookup (common value 45)" },
- { "max_alpha", "Max angle between two object bottoms. (common value :
5)" },
+ { "input.pbm", "A binary image" },
+ { "dmax_ratio", "Maximum distance lookup (common value 5)" },
+ { "min_alpha", "First angle used for close objects. (common value :
3)" },
+ { "max_alpha", "Second angle used for further objects. (common value :
5)" },
{0, 0}
};
@@ -61,42 +60,41 @@ const char *args_desc[][2] =
int main(int argc, char* argv[])
{
using namespace scribo;
- using namespace scribo::primitive::internal;
+ using namespace scribo::primitive;
using namespace mln;
- if (argc != 5)
+ if (argc != 6)
return scribo::debug::usage(argv,
- "Show valid or invalid links according the"
+ "Show valid or invalid links according the "
"horizontal alignment (based on bottom line).",
- "input.pbm max_dist max_alpha output.ppm",
+ "input.pbm dmax_ratio min_angle max_angle "
+ " output.ppm",
args_desc);
image2d<bool> input;
io::pbm::load(input, argv[1]);
// Finding components.
- value::label_16 nbboxes;
- typedef image2d<value::label_16> L;
+ typedef scribo::def::lbl_type V;
+ V nbboxes;
+ typedef image2d<V> L;
component_set<L> components
- = scribo::primitive::extract::components(input, c8(), nbboxes);
-
+ = extract::components(input, c8(), nbboxes);
// Finding right links.
- object_links<L> right_links
- = primitive::link::with_single_right_link(components, atoi(argv[2]));
+ // object_links<L>
+ // right_links = primitive::link::with_single_right_link_dmax_ratio_aligned(
+ // components, atof(argv[2]), atof(argv[3]), atof(argv[4]));
+
+ scribo::debug::logger().set_level(scribo::debug::All);
- // Filtering.
- object_links<L> filtered_links
- = filter::object_links_aligned(right_links, atof(argv[3]),
- anchor::StrictBottomCenter);
+
link::internal::single_right_dmax_ratio_aligned_functor<L,link::internal::dmax_default>
+ functor(components, link::internal::dmax_default(atof(argv[2])),
+ atof(argv[3]), atof(argv[4]), anchor::StrictBottomCenter);
- // Debug image.
- image2d<value::rgb8> decision_image
- = scribo::debug::alignment_decision_image(input,
- right_links,
- filtered_links,
- anchor::StrictBottomCenter);
+ object_links<L> output = link::compute(functor, anchor::Bottom);
- io::ppm::save(decision_image, argv[4]);
+ scribo::debug::logger().set_level(scribo::debug::None);
+ io::ppm::save(functor.debug_, argv[5]);
}
diff --git a/scribo/src/debug/show_links_several_right.cc
b/scribo/src/debug/show_links_several_right.cc
deleted file mode 100644
index a70b2fb..0000000
--- a/scribo/src/debug/show_links_several_right.cc
+++ /dev/null
@@ -1,90 +0,0 @@
-// Copyright (C) 2009, 2010 EPITA Research and Development Laboratory
-// (LRDE)
-//
-// This file is part of Olena.
-//
-// Olena is free software: you can redistribute it and/or modify it under
-// the terms of the GNU General Public License as published by the Free
-// Software Foundation, version 2 of the License.
-//
-// Olena is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-// General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with Olena. If not, see <http://www.gnu.org/licenses/>.
-//
-// As a special exception, you may use this file as part of a free
-// software project without restriction. Specifically, if other files
-// instantiate templates or use macros or inline functions from this
-// file, or you compile this file and link it with other files to produce
-// an executable, this file does not by itself cause the resulting
-// executable to be covered by the GNU General Public License. This
-// exception does not however invalidate any other reasons why the
-// executable file might be covered by the GNU General Public License.
-
-#include <iostream>
-
-#include <mln/core/image/image2d.hh>
-#include <mln/core/alias/neighb2d.hh>
-
-#include <mln/value/rgb8.hh>
-#include <mln/value/label_16.hh>
-#include <mln/literal/colors.hh>
-
-#include <mln/io/pbm/load.hh>
-#include <mln/io/ppm/save.hh>
-
-#include <scribo/primitive/extract/components.hh>
-#include <scribo/primitive/link/with_several_right_links.hh>
-
-#include <scribo/draw/bounding_boxes.hh>
-
-#include <scribo/debug/several_links_decision_image.hh>
-#include <scribo/debug/usage.hh>
-
-
-
-const char *args_desc[][2] =
-{
- { "input.pbm", "A binary image. True for objects and False for the
"
- "background." },
- { "max_nbh_dist", " Maximum distance for neighborhood search."
- "(common value : 30)" },
- {0, 0}
-};
-
-
-int main(int argc, char* argv[])
-{
- using namespace scribo;
- using namespace scribo::primitive::internal;
- using namespace mln;
-
- if (argc != 4)
- return scribo::debug::usage(argv,
- "Show sucessful/unsuccessful right links between components.",
- "input.pbm max_nbh_dist output.ppm",
- args_desc);
-
- image2d<bool> input;
- io::pbm::load(input, argv[1]);
-
- // Finding objects.
- value::label_16 nbboxes;
- typedef image2d<value::label_16> L;
- component_set<L> comps
- = scribo::primitive::extract::components(input, c8(), nbboxes);
-
- // Finding right links.
- object_links<L> right_link
- = primitive::link::with_several_right_links(comps, atoi(argv[2]));
-
- image2d<value::rgb8> decision_image
- = scribo::debug::several_links_decision_image(input,
- right_link,
- right_link);
-
- io::ppm::save(decision_image, argv[3]);
-}
diff --git a/scribo/src/debug/show_links_top_aligned.cc
b/scribo/src/debug/show_links_top_aligned_2angles.cc
similarity index 60%
copy from scribo/src/debug/show_links_top_aligned.cc
copy to scribo/src/debug/show_links_top_aligned_2angles.cc
index 5ffcb70..48f3a13 100644
--- a/scribo/src/debug/show_links_top_aligned.cc
+++ b/scribo/src/debug/show_links_top_aligned_2angles.cc
@@ -1,5 +1,4 @@
-// Copyright (C) 2009, 2010, 2011 EPITA Research and Development
-// Laboratory (LRDE)
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
//
// This file is part of Olena.
//
@@ -38,22 +37,22 @@
#include <mln/io/pbm/load.hh>
#include <mln/io/ppm/save.hh>
+#include <scribo/core/def/lbl_type.hh>
+#include <scribo/debug/logger.hh>
#include <scribo/primitive/extract/components.hh>
-#include <scribo/primitive/link/with_single_right_link.hh>
#include <scribo/filter/object_links_aligned.hh>
+#include <scribo/primitive/link/with_single_right_link_dmax_ratio_aligned.hh>
-#include <scribo/debug/alignment_decision_image.hh>
-#include <scribo/debug/links_image.hh>
#include <scribo/debug/usage.hh>
const char *args_desc[][2] =
{
- { "input.pbm", "A binary image. True for objects and False for the
"
- "background." },
- { "max_dist", "Maximum distance lookup (common value 45)" },
- { "max_alpha", "Max angle between two object tops. (common value :
5)" },
+ { "input.pbm", "A binary image" },
+ { "dmax_ratio", "Maximum distance lookup (common value 5)" },
+ { "min_alpha", "First angle used for close objects. (common value :
3)" },
+ { "max_alpha", "Second angle used for further objects. (common value :
5)" },
{0, 0}
};
@@ -61,41 +60,41 @@ const char *args_desc[][2] =
int main(int argc, char* argv[])
{
using namespace scribo;
- using namespace scribo::primitive::internal;
+ using namespace scribo::primitive;
using namespace mln;
- if (argc != 5)
+ if (argc != 6)
return scribo::debug::usage(argv,
"Show valid or invalid links according the "
"horizontal alignment (based on top line).",
- "input.pbm max_dist max_alpha output.ppm",
+ "input.pbm dmax_ratio min_angle max_angle "
+ " output.ppm",
args_desc);
image2d<bool> input;
io::pbm::load(input, argv[1]);
// Finding components.
- value::label_16 nbboxes;
- typedef image2d<value::label_16> L;
+ typedef scribo::def::lbl_type V;
+ V nbboxes;
+ typedef image2d<V> L;
component_set<L> components
- = scribo::primitive::extract::components(input, c8(), nbboxes);
-
+ = extract::components(input, c8(), nbboxes);
// Finding right links.
- object_links<L> right_links
- = primitive::link::with_single_right_link(components, atoi(argv[2]));
-
- // Filtering.
- object_links<L> filtered_links
- = filter::object_links_aligned(right_links, atof(argv[3]),
- anchor::StrictTopCenter);
-
- // Debug image.
- image2d<value::rgb8> decision_image
- = scribo::debug::alignment_decision_image(input,
- right_links,
- filtered_links,
- anchor::StrictTopCenter);
- io::ppm::save(decision_image, argv[4]);
+ // object_links<L>
+ // right_links = primitive::link::with_single_right_link_dmax_ratio_aligned(
+ // components, atof(argv[2]), atof(argv[3]), atof(argv[4]));
+
+ scribo::debug::logger().set_level(scribo::debug::All);
+
+
link::internal::single_right_dmax_ratio_aligned_functor<L,link::internal::dmax_default>
+ functor(components, link::internal::dmax_default(atof(argv[2])),
+ atof(argv[3]), atof(argv[4]), anchor::StrictTopCenter);
+
+ object_links<L> output = link::compute(functor, anchor::Top);
+
+ scribo::debug::logger().set_level(scribo::debug::None);
+ io::ppm::save(functor.debug_, argv[5]);
}
diff --git a/scribo/src/debug/show_paragraph_blocks.cc
b/scribo/src/debug/show_paragraph_blocks.cc
new file mode 100644
index 0000000..b16a751
--- /dev/null
+++ b/scribo/src/debug/show_paragraph_blocks.cc
@@ -0,0 +1,185 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#include <iostream>
+
+#include <mln/core/image/image2d.hh>
+#include <mln/io/pbm/save.hh>
+#include <mln/io/pgm/save.hh>
+#include <mln/draw/box_plain.hh>
+
+#include <mln/debug/filename.hh>
+
+#include <mln/util/timer.hh>
+
+#include <scribo/core/def/lbl_type.hh>
+#include <scribo/debug/usage.hh>
+
+#include <scribo/core/component_set.hh>
+#include <scribo/core/paragraph_set.hh>
+#include <scribo/core/line_info.hh>
+
+#include <scribo/text/link_lines.hh>
+#include <scribo/filter/line_links_x_height.hh>
+
+#include <scribo/io/xml/load.hh>
+
+// int i = 0;
+
+const char *args_desc[][2] =
+{
+ {0, 0}
+};
+
+
+
+int main(int argc, char* argv[])
+{
+ using namespace scribo;
+ using namespace mln;
+
+ if (argc != 3)
+ return scribo::debug::usage(argv,
+ "Show paragraph blocks",
+ "lines.xml out_blocks.pbm",
+ args_desc);
+
+ trace::entering("main");
+
+ typedef image2d<scribo::def::lbl_type> L;
+ document<L> doc;
+ scribo::io::xml::load(doc, argv[1]);
+
+ if (! doc.has_text())
+ {
+ std::cout << "ERROR: this XML file does not contain any text
information!"
+ << std::endl;
+ return 1;
+ }
+
+
+ // Link text lines
+ line_links<L> llinks = scribo::text::link_lines(doc.lines());
+ llinks = scribo::filter::line_links_x_height(llinks);
+
+ scribo::paragraph_set<L> parset = scribo::make::paragraph(llinks);
+ doc.set_paragraphs(parset);
+
+ util::timer t;
+ t.start();
+
+ image2d<bool> blocks;
+ initialize(blocks, doc.lines().components().labeled_image());
+ data::fill(blocks, false);
+
+ // image2d<value::int_u8> log;
+ // initialize(log, blocks);
+ // data::fill(log, 0);
+
+ for_all_paragraphs(p, parset)
+ if (parset(p).nlines() >= 3)
+ {
+ box2d last_tbox, last_box;
+
+ // For each line in this paragraph.
+ for_all_elements(l, parset(p).line_ids())
+ {
+ const line_info<L>& line = parset.lines()(parset(p).line_ids()(l));
+
+ if (last_box.is_valid())
+ if (last_box.pmax().row() < line.bbox().pmin().row())
+ {
+ last_tbox = last_box;
+
+ point2d
+ pmin(std::min(last_tbox.pmax().row(), line.bbox().pmin().row()),
+ std::max(last_tbox.pmin().col(), line.bbox().pmin().col())),
+ pmax(std::max(last_tbox.pmax().row(), line.bbox().pmin().row()),
+ std::min(last_tbox.pmax().col(), line.bbox().pmax().col()));
+
+ // invalid case:
+ //
+ // =======
+ // ======
+
+ if (pmax.col() > pmin.col())
+ {
+ box2d new_box(pmin, pmax);
+ mln::draw::box_plain(blocks, new_box, true);
+ // mln::draw::box_plain(log, new_box, 1);
+
+ // mln::io::pgm::save(log, mln::debug::filename("log.pgm", i++));
+ }
+ }
+ else // Handle the case when there are several text boxes on the same line.
+ {
+ if (last_tbox.is_valid() && last_tbox.pmax().row() <
line.bbox().pmin().row())
+ {
+ // Top box
+ point2d
+ pmin(std::min(last_tbox.pmax().row(), line.bbox().pmin().row()),
+ std::max(last_tbox.pmin().col(), line.bbox().pmin().col())),
+ pmax(std::max(last_tbox.pmax().row(), line.bbox().pmin().row()),
+ std::min(last_tbox.pmax().col(), line.bbox().pmax().col()));
+
+ box2d new_box(pmin, pmax);
+
+ mln::draw::box_plain(blocks, new_box, true);
+ // mln::draw::box_plain(log, new_box, 2);
+ // mln::io::pgm::save(log, mln::debug::filename("log.pgm", i++));
+ }
+
+ if (last_box.pmax().col() < line.bbox().pmax().col()
+ && last_box.pmin().col() < line.bbox().pmin().col())
+ {
+ // Left box
+ point2d
+ pmin(std::min(last_tbox.pmax().row(), line.bbox().pmin().row()),
+ std::min(last_box.pmax().col(), line.bbox().pmin().col())),
+ pmax(std::min(last_box.pmax().row(), line.bbox().pmax().row()),
+ std::max(last_box.pmax().col(), line.bbox().pmin().col()));
+
+ box2d new_box(pmin, pmax);
+ mln::draw::box_plain(blocks, new_box, true);
+ // mln::draw::box_plain(log, new_box, 3);
+ // mln::io::pgm::save(log, mln::debug::filename("log.pgm", i++));
+ }
+ }
+
+ mln::draw::box_plain(blocks, line.bbox(), true);
+ // mln::draw::box_plain(log, line.bbox(), 255);
+ // mln::io::pgm::save(log, mln::debug::filename("log.pgm", i++));
+ last_box = line.bbox();
+ }
+ }
+
+ t.stop();
+ std::cout << t << std::endl;
+
+ mln::io::pbm::save(blocks, argv[2]);
+ // mln::io::pgm::save(log, "log.pgm");
+
+ trace::exiting("main");
+}
diff --git a/scribo/src/non_text_components.cc b/scribo/src/non_text_components.cc
new file mode 100644
index 0000000..0f4cce4
--- /dev/null
+++ b/scribo/src/non_text_components.cc
@@ -0,0 +1,128 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory
+// (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+
+#include <libgen.h>
+#include <fstream>
+#include <iostream>
+
+#include <mln/core/image/image2d.hh>
+
+#include <mln/io/pbm/save.hh>
+#include <mln/io/magick/load.hh>
+
+#include <scribo/toolchain/content_in_doc.hh>
+#include <scribo/toolchain/text_in_doc_preprocess.hh>
+
+#include <scribo/core/document.hh>
+
+#include <scribo/debug/usage.hh>
+
+#include <scribo/preprocessing/crop_without_localization.hh>
+#include <scribo/preprocessing/crop.hh>
+
+#include <scribo/io/xml/save.hh>
+#include <scribo/io/img/save.hh>
+
+
+const char *args_desc[][2] =
+{
+ { "input.*", "An image." },
+ { "non_text_comps.pbm", "Non text components mask." },
+ { "enable_debug", "Enable debug image output. Set to 1 or 0." },
+ { "enable_tabstops", "Enable tabstops detection. Set to 1 or 0."
},
+ {0, 0}
+};
+
+
+
+int main(int argc, char* argv[])
+{
+ using namespace scribo;
+ using namespace mln;
+
+ if (argc != 4 && argc != 3 && argc != 5)
+ return scribo::debug::usage(argv,
+ "Extract non text components mask/",
+ "input.* non_text_comps.pbm [enable_debug] [enable_tabstops]",
+ args_desc);
+
+ std::string out_img = basename(argv[1]);
+ out_img.erase(out_img.size() - 4);
+
+ std::string filename_prefix = out_img + "_debug";
+ scribo::debug::logger().set_filename_prefix(filename_prefix.c_str());
+ if (argc > 3 && atoi(argv[3]))
+ scribo::debug::logger().set_level(scribo::debug::Special);
+ else
+ scribo::debug::logger().set_level(scribo::debug::None);
+
+ trace::entering("main");
+
+ Magick::InitializeMagick(*argv);
+
+ typedef image2d<scribo::def::lbl_type> L;
+ image2d<value::rgb8> input;
+ mln::io::magick::load(input, argv[1]);
+
+ util::timer t;
+ t.start();
+
+ // Preprocess document
+ image2d<bool>
+ input_preproc = toolchain::text_in_doc_preprocess(input, false, 0.34);
+
+
+ bool denoise = true;
+ std::string language = "";
+ bool find_line_seps = true;
+ bool find_whitespace_seps = (argc > 4 && atoi(argv[4]));
+
+ std::cout << "Running with the following options :"
+ << " ocr_language = " << language
+ << " | find_lines_seps = " << find_line_seps
+ << " | find_whitespace_seps = " << find_whitespace_seps
+ << " | debug = " << scribo::debug::logger().is_enabled()
+ << std::endl;
+
+ // Run document toolchain.
+
+ // Text
+ std::cout << "Analysing document..." << std::endl;
+ document<L>
+ doc = scribo::toolchain::content_in_doc(input, input_preproc, denoise,
+ find_line_seps, find_whitespace_seps,
+ !language.empty(), language);
+ t.stop();
+ std::cout << t << std::endl;
+
+ mln::io::pbm::save(data::convert(bool(), doc.elements().labeled_image()), argv[2]);
+
+ scribo::io::img::save(doc, out_img + "_debug_wo_image.png",
scribo::io::img::DebugWoImage);
+ scribo::io::img::save(doc, out_img + "_debug_with_image.png",
scribo::io::img::DebugWithImage);
+
+ trace::exiting("main");
+}
--
1.5.6.5