
* src/Makefile.am, * src/debug/Makefile.am: Update targets. * src/debug/non_text_mask.cc, * src/debug/show_components_bboxes.cc, * src/debug/show_groups_bboxes.cc, * src/debug/show_links_bottom_aligned_2angles.cc, * src/debug/show_links_top_aligned_2angles.cc, * src/debug/show_paragraph_blocks.cc, * src/non_text_components.cc: New. * src/debug/show_links_several_right.cc: Removed. Deprecated. --- scribo/ChangeLog | 17 ++ scribo/src/Makefile.am | 32 ++++- scribo/src/debug/Makefile.am | 66 ++++++- scribo/src/debug/non_text_mask.cc | 26 +++ scribo/src/debug/show_components_bboxes.cc | 59 ++++++ scribo/src/debug/show_groups_bboxes.cc | 91 ++++++++++ ...ned.cc => show_links_bottom_aligned_2angles.cc} | 58 +++--- scribo/src/debug/show_links_several_right.cc | 90 ---------- ...ligned.cc => show_links_top_aligned_2angles.cc} | 61 +++---- scribo/src/debug/show_paragraph_blocks.cc | 185 ++++++++++++++++++++ scribo/src/non_text_components.cc | 128 ++++++++++++++ 11 files changed, 652 insertions(+), 161 deletions(-) create mode 100644 scribo/src/debug/non_text_mask.cc create mode 100644 scribo/src/debug/show_components_bboxes.cc create mode 100644 scribo/src/debug/show_groups_bboxes.cc copy scribo/src/debug/{show_links_bottom_aligned.cc => show_links_bottom_aligned_2angles.cc} (60%) delete mode 100644 scribo/src/debug/show_links_several_right.cc copy scribo/src/debug/{show_links_top_aligned.cc => show_links_top_aligned_2angles.cc} (60%) create mode 100644 scribo/src/debug/show_paragraph_blocks.cc create mode 100644 scribo/src/non_text_components.cc diff --git a/scribo/ChangeLog b/scribo/ChangeLog index 32dda87..cc6e861 100644 --- a/scribo/ChangeLog +++ b/scribo/ChangeLog @@ -1,5 +1,22 @@ 2011-03-14 Guillaume Lazzara <z@lrde.epita.fr> + New debug tools. + + * src/Makefile.am, + * src/debug/Makefile.am: Update targets. + + * src/debug/non_text_mask.cc, + * src/debug/show_components_bboxes.cc, + * src/debug/show_groups_bboxes.cc, + * src/debug/show_links_bottom_aligned_2angles.cc, + * src/debug/show_links_top_aligned_2angles.cc, + * src/debug/show_paragraph_blocks.cc, + * src/non_text_components.cc: New. + + * src/debug/show_links_several_right.cc: Removed. Deprecated. + +2011-03-14 Guillaume Lazzara <z@lrde.epita.fr> + New non-text components extraction routine. * scribo/make/text_blocks_image.hh, diff --git a/scribo/src/Makefile.am b/scribo/src/Makefile.am index d6275fd..3a35528 100644 --- a/scribo/src/Makefile.am +++ b/scribo/src/Makefile.am @@ -96,18 +96,48 @@ if HAVE_TESSERACT $(TIFF_LDFLAGS) \ $(MAGICKXX_LDFLAGS) +if HAVE_QT utilexec_PROGRAMS += content_in_doc content_in_doc_SOURCES = content_in_doc.cc content_in_doc_CPPFLAGS = $(AM_CPPFLAGS) \ $(TESSERACT_CPPFLAGS) \ $(TIFF_CPPFLAGS) \ - $(MAGICKXX_CPPFLAGS) + $(MAGICKXX_CPPFLAGS) \ + $(QT_CPPFLAGS) + content_in_doc_CXXFLAGS = $(AM_CXXFLAGS) \ + $(QT_CXXFLAGS) content_in_doc_LDFLAGS = $(AM_LDFLAGS) \ $(TESSERACT_LDFLAGS) \ $(TIFF_LDFLAGS) \ $(MAGICKXX_LDFLAGS) \ + $(QT_LDFLAGS) \ -lpthread + content_in_doc_LDADD = $(LDADD) \ + $(QT_LIBS) + + + utilexec_PROGRAMS += non_text_components + non_text_components_SOURCES = non_text_components.cc + non_text_components_CPPFLAGS = $(AM_CPPFLAGS) \ + $(TESSERACT_CPPFLAGS) \ + $(TIFF_CPPFLAGS) \ + $(MAGICKXX_CPPFLAGS) \ + $(QT_CPPFLAGS) + non_text_components_CXXFLAGS = $(AM_CXXFLAGS) \ + $(QT_CXXFLAGS) + non_text_components_LDFLAGS = $(AM_LDFLAGS) \ + $(TESSERACT_LDFLAGS) \ + $(TIFF_LDFLAGS) \ + $(MAGICKXX_LDFLAGS) \ + $(QT_LDFLAGS) \ + -lpthread + non_text_components_LDADD = $(LDADD) \ + $(QT_LIBS) + + +endif HAVE_QT + endif HAVE_TESSERACT diff --git a/scribo/src/debug/Makefile.am b/scribo/src/debug/Makefile.am index cdb1f30..60d7afa 100644 --- a/scribo/src/debug/Makefile.am +++ b/scribo/src/debug/Makefile.am @@ -18,6 +18,8 @@ include $(top_srcdir)/scribo/scribo.mk noinst_PROGRAMS = \ + show_components_bboxes \ + show_groups_bboxes \ show_info_x_height \ show_info_median_inter_characters \ show_separators \ @@ -25,9 +27,9 @@ noinst_PROGRAMS = \ show_links_bbox_h_ratio \ show_links_bbox_overlap \ show_links_bottom_aligned \ + show_links_bottom_aligned_2angles \ show_links_center_aligned \ show_links_non_h_aligned \ - show_links_several_right \ show_links_several_right_overlap \ show_links_single_down \ show_links_single_down_left_aligned \ @@ -40,15 +42,16 @@ noinst_PROGRAMS = \ show_links_single_up_left_aligned \ show_links_single_up_right_aligned \ show_links_top_aligned \ + show_links_top_aligned_2angles \ show_objects_large \ show_objects_large_small \ show_objects_small \ show_objects_thick \ - show_objects_thin \ - show_stoppers \ - show_text_lines + show_objects_thin +show_components_bboxes_SOURCES = show_components_bboxes.cc +show_groups_bboxes_SOURCES = show_groups_bboxes.cc show_info_x_height_SOURCES = show_info_x_height.cc show_info_median_inter_characters_SOURCES = show_info_median_inter_characters.cc show_separators_SOURCES = show_separators.cc @@ -56,9 +59,9 @@ show_links_left_right_links_validation_SOURCES = show_links_left_right_links_val show_links_bbox_h_ratio_SOURCES = show_links_bbox_h_ratio.cc show_links_bbox_overlap_SOURCES = show_links_bbox_overlap.cc show_links_bottom_aligned_SOURCES = show_links_bottom_aligned.cc +show_links_bottom_aligned_2angles_SOURCES = show_links_bottom_aligned_2angles.cc show_links_center_aligned_SOURCES = show_links_center_aligned.cc show_links_non_h_aligned_SOURCES = show_links_non_h_aligned.cc -show_links_several_right_SOURCES = show_links_several_right.cc show_links_several_right_overlap_SOURCES = show_links_several_right_overlap.cc show_links_single_down_SOURCES = show_links_single_down.cc show_links_single_down_left_aligned_SOURCES = show_links_single_down_left_aligned.cc @@ -71,21 +74,66 @@ show_links_single_up_SOURCES = show_links_single_up.cc show_links_single_up_left_aligned_SOURCES = show_links_single_up_left_aligned.cc show_links_single_up_right_aligned_SOURCES = show_links_single_up_right_aligned.cc show_links_top_aligned_SOURCES = show_links_top_aligned.cc +show_links_top_aligned_2angles_SOURCES = show_links_top_aligned_2angles.cc show_objects_large_SOURCES = show_objects_large.cc show_objects_large_small_SOURCES = show_objects_large_small.cc show_objects_small_SOURCES = show_objects_small.cc show_objects_thick_SOURCES = show_objects_thick.cc show_objects_thin_SOURCES = show_objects_thin.cc -show_stoppers_SOURCES = show_stoppers.cc -show_text_lines_SOURCES = show_text_lines.cc if HAVE_MAGICKXX +if HAVE_QT + + noinst_PROGRAMS += show_paragraph_blocks + show_paragraph_blocks_SOURCES = show_paragraph_blocks.cc + show_paragraph_blocks_CPPFLAGS = $(AM_CPPFLAGS) \ + $(QT_CPPFLAGS) \ + $(MAGICKXX_CPPFLAGS) + show_paragraph_blocks_CXXFLAGS = $(AM_CXXFLAGS) \ + $(QT_CXXFLAGS) + show_paragraph_blocks_LDFLAGS = $(AM_LDFLAGS) \ + $(QT_LDFLAGS) \ + $(MAGICKXX_LDFLAGS) + show_paragraph_blocks_LDADD = $(LDADD) \ + $(QT_LIBS) + + noinst_PROGRAMS += show_text_lines + show_text_lines_SOURCES = show_text_lines.cc + show_text_lines_CPPFLAGS = $(AM_CPPFLAGS) \ + $(MAGICKXX_CPPFLAGS) \ + $(QT_CPPFLAGS) -DHAVE_QT + show_text_lines_CXXFLAGS = $(AM_CXXFLAGS) \ + $(QT_CXXFLAGS) + show_text_lines_LDFLAGS = $(AM_LDFLAGS) \ + $(MAGICKXX_LDFLAGS) \ + $(QT_LDFLAGS) + show_text_lines_LDADD = $(LDADD) \ + $(QT_LIBS) + + + noinst_PROGRAMS += show_stoppers + show_stoppers_SOURCES = show_stoppers.cc + show_stoppers_CPPFLAGS = $(AM_CPPFLAGS) \ + $(MAGICKXX_CPPFLAGS) \ + $(QT_CPPFLAGS) + show_stoppers_CXXFLAGS = $(AM_CXXFLAGS) \ + $(QT_CXXFLAGS) + show_stoppers_LDFLAGS = $(AM_LDFLAGS) \ + $(MAGICKXX_LDFLAGS) \ + $(QT_LDFLAGS) + show_stoppers_LDADD = $(LDADD) \ + $(QT_LIBS) + + +endif HAVE_QT + + noinst_PROGRAMS += highlight_text_area highlight_text_area_SOURCES = highlight_text_area.cc highlight_text_area_CPPFLAGS = $(AM_CPPFLAGS) \ - `Magick++-config --cppflags` + $(MAGICKXX_CPPFLAGS) highlight_text_area_LDFLAGS = $(AM_LDFLAGS) \ - -lpthread `Magick++-config --libs` + $(MAGICKXX_LDFLAGS) endif HAVE_MAGICKXX diff --git a/scribo/src/debug/non_text_mask.cc b/scribo/src/debug/non_text_mask.cc new file mode 100644 index 0000000..6fce945 --- /dev/null +++ b/scribo/src/debug/non_text_mask.cc @@ -0,0 +1,26 @@ + + +int main(int argc, char *argv[]) +{ + // Link text lines + on_new_progress_label("Linking text lines"); + line_links<L> llinks = scribo::text::link_lines(lines); + + // Filter line links. + on_new_progress_label("Filter line links"); + llinks = scribo::filter::line_links_x_height(llinks); + + // Construct paragraphs + on_new_progress_label("Constructing paragraphs"); + scribo::paragraph_set<L> parset = scribo::make::paragraph(llinks); + doc.set_paragraphs(parset); + + on_progress(); + + + // Extract other Elements + on_new_progress_label("Extracting Elements"); + component_set<L> + elements = scribo::primitive::extract::non_text_fast(doc); + +} diff --git a/scribo/src/debug/show_components_bboxes.cc b/scribo/src/debug/show_components_bboxes.cc new file mode 100644 index 0000000..7eab4ba --- /dev/null +++ b/scribo/src/debug/show_components_bboxes.cc @@ -0,0 +1,59 @@ +// Copyright (C) 2010, 2011 EPITA Research and Development Laboratory +// (LRDE) +// +// This file is part of Olena. +// +// Olena is free software: you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free +// Software Foundation, version 2 of the License. +// +// Olena is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Olena. If not, see <http://www.gnu.org/licenses/>. + +#include <mln/core/image/image2d.hh> +#include <mln/core/alias/neighb2d.hh> +#include <mln/io/pbm/all.hh> +#include <mln/value/int_u.hh> +#include <mln/literal/colors.hh> +#include <mln/draw/box.hh> + +#include <scribo/core/component_set.hh> +#include <scribo/primitive/extract/components.hh> + +int main(int argc, char *argv[]) +{ + using namespace mln; + using namespace scribo; + + if (argc != 3) + { + std::cerr << "Usage : " << argv[0] << " input.pbm out.pbm" << std::endl; + return 1; + } + + typedef mln::value::int_u<30> V; + typedef image2d<V> L; + + image2d<bool> input; + io::pbm::load(input, argv[1]); + + V ncomponents; + component_set<L> + components = scribo::primitive::extract::components(input, c8(), + ncomponents); + + image2d<bool> output; + initialize(output, input); + data::fill(output, false); + + for_all_comps(c, components) + if (components(c).is_valid()) + mln::draw::box(output, components(c).bbox(), true); + + io::pbm::save(output, argv[2]); +} diff --git a/scribo/src/debug/show_groups_bboxes.cc b/scribo/src/debug/show_groups_bboxes.cc new file mode 100644 index 0000000..eff0eb7 --- /dev/null +++ b/scribo/src/debug/show_groups_bboxes.cc @@ -0,0 +1,91 @@ +// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE) +// +// This file is part of Olena. +// +// Olena is free software: you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free +// Software Foundation, version 2 of the License. +// +// Olena is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Olena. If not, see <http://www.gnu.org/licenses/>. + +#include <mln/core/image/image2d.hh> +#include <mln/core/alias/neighb2d.hh> +#include <mln/io/pbm/all.hh> +#include <mln/value/int_u.hh> +#include <mln/literal/colors.hh> +#include <mln/draw/box.hh> + +#include <scribo/core/component_set.hh> +#include <scribo/core/line_set.hh> +#include <scribo/primitive/extract/components.hh> +#include <scribo/primitive/group/from_single_link.hh> +#include <scribo/primitive/link/with_single_left_link_dmax_ratio.hh> +#include <scribo/primitive/link/with_single_right_link_dmax_ratio.hh> +#include <scribo/primitive/link/merge_double_link.hh> +#include <scribo/primitive/link/internal/dmax_width_and_height.hh> + +int main(int argc, char *argv[]) +{ + using namespace mln; + using namespace scribo; + using namespace scribo::primitive; + + if (argc != 3) + { + std::cerr << "Usage : " << argv[0] << " input.* out.pbm" << std::endl; + return 1; + } + + typedef mln::value::int_u<30> V; + typedef image2d<V> L; + + image2d<bool> input; + io::pbm::load(input, argv[1]); + + V ncomponents; + component_set<L> + components = scribo::primitive::extract::components(input, c8(), + ncomponents); + + + object_links<L> + left_link = link::with_single_left_link_dmax_ratio( + components, +// primitive::link::internal::dmax_width_and_height(1), + link::internal::dmax_default(1), + anchor::MassCenter); + + object_links<L> + right_link = primitive::link::with_single_right_link_dmax_ratio( + components, +// primitive::link::internal::dmax_width_and_height(1), + primitive::link::internal::dmax_default(1), + anchor::MassCenter); + + // Validating left and right links. + object_links<L> + merged_links = primitive::link::merge_double_link(left_link, + right_link); + + + object_groups<L> + groups = group::from_single_link(merged_links); + + line_set<L> lines(groups); + + image2d<bool> output; + initialize(output, input); + data::fill(output, false); + + for_all_lines(l, lines) + if (lines(l).is_valid()) + mln::draw::box(output, lines(l).bbox(), true); + + io::pbm::save(output, argv[2]); +} diff --git a/scribo/src/debug/show_links_bottom_aligned.cc b/scribo/src/debug/show_links_bottom_aligned_2angles.cc similarity index 60% copy from scribo/src/debug/show_links_bottom_aligned.cc copy to scribo/src/debug/show_links_bottom_aligned_2angles.cc index 634551b..4b0e765 100644 --- a/scribo/src/debug/show_links_bottom_aligned.cc +++ b/scribo/src/debug/show_links_bottom_aligned_2angles.cc @@ -1,5 +1,4 @@ -// Copyright (C) 2009, 2011 EPITA Research and Development Laboratory -// (LRDE) +// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE) // // This file is part of Olena. // @@ -38,22 +37,22 @@ #include <mln/io/pbm/load.hh> #include <mln/io/ppm/save.hh> -#include <scribo/core/component_set.hh> - +#include <scribo/core/def/lbl_type.hh> +#include <scribo/debug/logger.hh> #include <scribo/primitive/extract/components.hh> -#include <scribo/primitive/link/with_single_right_link.hh> #include <scribo/filter/object_links_aligned.hh> +#include <scribo/primitive/link/with_single_right_link_dmax_ratio_aligned.hh> -#include <scribo/debug/alignment_decision_image.hh> #include <scribo/debug/usage.hh> const char *args_desc[][2] = { - { "input.pbm", "A binary image. True for objects and False for the background." }, - { "max_dist", "Maximum distance lookup (common value 45)" }, - { "max_alpha", "Max angle between two object bottoms. (common value : 5)" }, + { "input.pbm", "A binary image" }, + { "dmax_ratio", "Maximum distance lookup (common value 5)" }, + { "min_alpha", "First angle used for close objects. (common value : 3)" }, + { "max_alpha", "Second angle used for further objects. (common value : 5)" }, {0, 0} }; @@ -61,42 +60,41 @@ const char *args_desc[][2] = int main(int argc, char* argv[]) { using namespace scribo; - using namespace scribo::primitive::internal; + using namespace scribo::primitive; using namespace mln; - if (argc != 5) + if (argc != 6) return scribo::debug::usage(argv, - "Show valid or invalid links according the" + "Show valid or invalid links according the " "horizontal alignment (based on bottom line).", - "input.pbm max_dist max_alpha output.ppm", + "input.pbm dmax_ratio min_angle max_angle " + " output.ppm", args_desc); image2d<bool> input; io::pbm::load(input, argv[1]); // Finding components. - value::label_16 nbboxes; - typedef image2d<value::label_16> L; + typedef scribo::def::lbl_type V; + V nbboxes; + typedef image2d<V> L; component_set<L> components - = scribo::primitive::extract::components(input, c8(), nbboxes); - + = extract::components(input, c8(), nbboxes); // Finding right links. - object_links<L> right_links - = primitive::link::with_single_right_link(components, atoi(argv[2])); + // object_links<L> + // right_links = primitive::link::with_single_right_link_dmax_ratio_aligned( + // components, atof(argv[2]), atof(argv[3]), atof(argv[4])); + + scribo::debug::logger().set_level(scribo::debug::All); - // Filtering. - object_links<L> filtered_links - = filter::object_links_aligned(right_links, atof(argv[3]), - anchor::StrictBottomCenter); + link::internal::single_right_dmax_ratio_aligned_functor<L,link::internal::dmax_default> + functor(components, link::internal::dmax_default(atof(argv[2])), + atof(argv[3]), atof(argv[4]), anchor::StrictBottomCenter); - // Debug image. - image2d<value::rgb8> decision_image - = scribo::debug::alignment_decision_image(input, - right_links, - filtered_links, - anchor::StrictBottomCenter); + object_links<L> output = link::compute(functor, anchor::Bottom); - io::ppm::save(decision_image, argv[4]); + scribo::debug::logger().set_level(scribo::debug::None); + io::ppm::save(functor.debug_, argv[5]); } diff --git a/scribo/src/debug/show_links_several_right.cc b/scribo/src/debug/show_links_several_right.cc deleted file mode 100644 index a70b2fb..0000000 --- a/scribo/src/debug/show_links_several_right.cc +++ /dev/null @@ -1,90 +0,0 @@ -// Copyright (C) 2009, 2010 EPITA Research and Development Laboratory -// (LRDE) -// -// This file is part of Olena. -// -// Olena is free software: you can redistribute it and/or modify it under -// the terms of the GNU General Public License as published by the Free -// Software Foundation, version 2 of the License. -// -// Olena is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -// General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with Olena. If not, see <http://www.gnu.org/licenses/>. -// -// As a special exception, you may use this file as part of a free -// software project without restriction. Specifically, if other files -// instantiate templates or use macros or inline functions from this -// file, or you compile this file and link it with other files to produce -// an executable, this file does not by itself cause the resulting -// executable to be covered by the GNU General Public License. This -// exception does not however invalidate any other reasons why the -// executable file might be covered by the GNU General Public License. - -#include <iostream> - -#include <mln/core/image/image2d.hh> -#include <mln/core/alias/neighb2d.hh> - -#include <mln/value/rgb8.hh> -#include <mln/value/label_16.hh> -#include <mln/literal/colors.hh> - -#include <mln/io/pbm/load.hh> -#include <mln/io/ppm/save.hh> - -#include <scribo/primitive/extract/components.hh> -#include <scribo/primitive/link/with_several_right_links.hh> - -#include <scribo/draw/bounding_boxes.hh> - -#include <scribo/debug/several_links_decision_image.hh> -#include <scribo/debug/usage.hh> - - - -const char *args_desc[][2] = -{ - { "input.pbm", "A binary image. True for objects and False for the " - "background." }, - { "max_nbh_dist", " Maximum distance for neighborhood search." - "(common value : 30)" }, - {0, 0} -}; - - -int main(int argc, char* argv[]) -{ - using namespace scribo; - using namespace scribo::primitive::internal; - using namespace mln; - - if (argc != 4) - return scribo::debug::usage(argv, - "Show sucessful/unsuccessful right links between components.", - "input.pbm max_nbh_dist output.ppm", - args_desc); - - image2d<bool> input; - io::pbm::load(input, argv[1]); - - // Finding objects. - value::label_16 nbboxes; - typedef image2d<value::label_16> L; - component_set<L> comps - = scribo::primitive::extract::components(input, c8(), nbboxes); - - // Finding right links. - object_links<L> right_link - = primitive::link::with_several_right_links(comps, atoi(argv[2])); - - image2d<value::rgb8> decision_image - = scribo::debug::several_links_decision_image(input, - right_link, - right_link); - - io::ppm::save(decision_image, argv[3]); -} diff --git a/scribo/src/debug/show_links_top_aligned.cc b/scribo/src/debug/show_links_top_aligned_2angles.cc similarity index 60% copy from scribo/src/debug/show_links_top_aligned.cc copy to scribo/src/debug/show_links_top_aligned_2angles.cc index 5ffcb70..48f3a13 100644 --- a/scribo/src/debug/show_links_top_aligned.cc +++ b/scribo/src/debug/show_links_top_aligned_2angles.cc @@ -1,5 +1,4 @@ -// Copyright (C) 2009, 2010, 2011 EPITA Research and Development -// Laboratory (LRDE) +// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE) // // This file is part of Olena. // @@ -38,22 +37,22 @@ #include <mln/io/pbm/load.hh> #include <mln/io/ppm/save.hh> +#include <scribo/core/def/lbl_type.hh> +#include <scribo/debug/logger.hh> #include <scribo/primitive/extract/components.hh> -#include <scribo/primitive/link/with_single_right_link.hh> #include <scribo/filter/object_links_aligned.hh> +#include <scribo/primitive/link/with_single_right_link_dmax_ratio_aligned.hh> -#include <scribo/debug/alignment_decision_image.hh> -#include <scribo/debug/links_image.hh> #include <scribo/debug/usage.hh> const char *args_desc[][2] = { - { "input.pbm", "A binary image. True for objects and False for the " - "background." }, - { "max_dist", "Maximum distance lookup (common value 45)" }, - { "max_alpha", "Max angle between two object tops. (common value : 5)" }, + { "input.pbm", "A binary image" }, + { "dmax_ratio", "Maximum distance lookup (common value 5)" }, + { "min_alpha", "First angle used for close objects. (common value : 3)" }, + { "max_alpha", "Second angle used for further objects. (common value : 5)" }, {0, 0} }; @@ -61,41 +60,41 @@ const char *args_desc[][2] = int main(int argc, char* argv[]) { using namespace scribo; - using namespace scribo::primitive::internal; + using namespace scribo::primitive; using namespace mln; - if (argc != 5) + if (argc != 6) return scribo::debug::usage(argv, "Show valid or invalid links according the " "horizontal alignment (based on top line).", - "input.pbm max_dist max_alpha output.ppm", + "input.pbm dmax_ratio min_angle max_angle " + " output.ppm", args_desc); image2d<bool> input; io::pbm::load(input, argv[1]); // Finding components. - value::label_16 nbboxes; - typedef image2d<value::label_16> L; + typedef scribo::def::lbl_type V; + V nbboxes; + typedef image2d<V> L; component_set<L> components - = scribo::primitive::extract::components(input, c8(), nbboxes); - + = extract::components(input, c8(), nbboxes); // Finding right links. - object_links<L> right_links - = primitive::link::with_single_right_link(components, atoi(argv[2])); - - // Filtering. - object_links<L> filtered_links - = filter::object_links_aligned(right_links, atof(argv[3]), - anchor::StrictTopCenter); - - // Debug image. - image2d<value::rgb8> decision_image - = scribo::debug::alignment_decision_image(input, - right_links, - filtered_links, - anchor::StrictTopCenter); - io::ppm::save(decision_image, argv[4]); + // object_links<L> + // right_links = primitive::link::with_single_right_link_dmax_ratio_aligned( + // components, atof(argv[2]), atof(argv[3]), atof(argv[4])); + + scribo::debug::logger().set_level(scribo::debug::All); + + link::internal::single_right_dmax_ratio_aligned_functor<L,link::internal::dmax_default> + functor(components, link::internal::dmax_default(atof(argv[2])), + atof(argv[3]), atof(argv[4]), anchor::StrictTopCenter); + + object_links<L> output = link::compute(functor, anchor::Top); + + scribo::debug::logger().set_level(scribo::debug::None); + io::ppm::save(functor.debug_, argv[5]); } diff --git a/scribo/src/debug/show_paragraph_blocks.cc b/scribo/src/debug/show_paragraph_blocks.cc new file mode 100644 index 0000000..b16a751 --- /dev/null +++ b/scribo/src/debug/show_paragraph_blocks.cc @@ -0,0 +1,185 @@ +// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE) +// +// This file is part of Olena. +// +// Olena is free software: you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free +// Software Foundation, version 2 of the License. +// +// Olena is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Olena. If not, see <http://www.gnu.org/licenses/>. +// +// As a special exception, you may use this file as part of a free +// software project without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to produce +// an executable, this file does not by itself cause the resulting +// executable to be covered by the GNU General Public License. This +// exception does not however invalidate any other reasons why the +// executable file might be covered by the GNU General Public License. + +#include <iostream> + +#include <mln/core/image/image2d.hh> +#include <mln/io/pbm/save.hh> +#include <mln/io/pgm/save.hh> +#include <mln/draw/box_plain.hh> + +#include <mln/debug/filename.hh> + +#include <mln/util/timer.hh> + +#include <scribo/core/def/lbl_type.hh> +#include <scribo/debug/usage.hh> + +#include <scribo/core/component_set.hh> +#include <scribo/core/paragraph_set.hh> +#include <scribo/core/line_info.hh> + +#include <scribo/text/link_lines.hh> +#include <scribo/filter/line_links_x_height.hh> + +#include <scribo/io/xml/load.hh> + +// int i = 0; + +const char *args_desc[][2] = +{ + {0, 0} +}; + + + +int main(int argc, char* argv[]) +{ + using namespace scribo; + using namespace mln; + + if (argc != 3) + return scribo::debug::usage(argv, + "Show paragraph blocks", + "lines.xml out_blocks.pbm", + args_desc); + + trace::entering("main"); + + typedef image2d<scribo::def::lbl_type> L; + document<L> doc; + scribo::io::xml::load(doc, argv[1]); + + if (! doc.has_text()) + { + std::cout << "ERROR: this XML file does not contain any text information!" + << std::endl; + return 1; + } + + + // Link text lines + line_links<L> llinks = scribo::text::link_lines(doc.lines()); + llinks = scribo::filter::line_links_x_height(llinks); + + scribo::paragraph_set<L> parset = scribo::make::paragraph(llinks); + doc.set_paragraphs(parset); + + util::timer t; + t.start(); + + image2d<bool> blocks; + initialize(blocks, doc.lines().components().labeled_image()); + data::fill(blocks, false); + + // image2d<value::int_u8> log; + // initialize(log, blocks); + // data::fill(log, 0); + + for_all_paragraphs(p, parset) + if (parset(p).nlines() >= 3) + { + box2d last_tbox, last_box; + + // For each line in this paragraph. + for_all_elements(l, parset(p).line_ids()) + { + const line_info<L>& line = parset.lines()(parset(p).line_ids()(l)); + + if (last_box.is_valid()) + if (last_box.pmax().row() < line.bbox().pmin().row()) + { + last_tbox = last_box; + + point2d + pmin(std::min(last_tbox.pmax().row(), line.bbox().pmin().row()), + std::max(last_tbox.pmin().col(), line.bbox().pmin().col())), + pmax(std::max(last_tbox.pmax().row(), line.bbox().pmin().row()), + std::min(last_tbox.pmax().col(), line.bbox().pmax().col())); + + // invalid case: + // + // ======= + // ====== + + if (pmax.col() > pmin.col()) + { + box2d new_box(pmin, pmax); + mln::draw::box_plain(blocks, new_box, true); + // mln::draw::box_plain(log, new_box, 1); + + // mln::io::pgm::save(log, mln::debug::filename("log.pgm", i++)); + } + } + else // Handle the case when there are several text boxes on the same line. + { + if (last_tbox.is_valid() && last_tbox.pmax().row() < line.bbox().pmin().row()) + { + // Top box + point2d + pmin(std::min(last_tbox.pmax().row(), line.bbox().pmin().row()), + std::max(last_tbox.pmin().col(), line.bbox().pmin().col())), + pmax(std::max(last_tbox.pmax().row(), line.bbox().pmin().row()), + std::min(last_tbox.pmax().col(), line.bbox().pmax().col())); + + box2d new_box(pmin, pmax); + + mln::draw::box_plain(blocks, new_box, true); + // mln::draw::box_plain(log, new_box, 2); + // mln::io::pgm::save(log, mln::debug::filename("log.pgm", i++)); + } + + if (last_box.pmax().col() < line.bbox().pmax().col() + && last_box.pmin().col() < line.bbox().pmin().col()) + { + // Left box + point2d + pmin(std::min(last_tbox.pmax().row(), line.bbox().pmin().row()), + std::min(last_box.pmax().col(), line.bbox().pmin().col())), + pmax(std::min(last_box.pmax().row(), line.bbox().pmax().row()), + std::max(last_box.pmax().col(), line.bbox().pmin().col())); + + box2d new_box(pmin, pmax); + mln::draw::box_plain(blocks, new_box, true); + // mln::draw::box_plain(log, new_box, 3); + // mln::io::pgm::save(log, mln::debug::filename("log.pgm", i++)); + } + } + + mln::draw::box_plain(blocks, line.bbox(), true); + // mln::draw::box_plain(log, line.bbox(), 255); + // mln::io::pgm::save(log, mln::debug::filename("log.pgm", i++)); + last_box = line.bbox(); + } + } + + t.stop(); + std::cout << t << std::endl; + + mln::io::pbm::save(blocks, argv[2]); + // mln::io::pgm::save(log, "log.pgm"); + + trace::exiting("main"); +} diff --git a/scribo/src/non_text_components.cc b/scribo/src/non_text_components.cc new file mode 100644 index 0000000..0f4cce4 --- /dev/null +++ b/scribo/src/non_text_components.cc @@ -0,0 +1,128 @@ +// Copyright (C) 2011 EPITA Research and Development Laboratory +// (LRDE) +// +// This file is part of Olena. +// +// Olena is free software: you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free +// Software Foundation, version 2 of the License. +// +// Olena is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Olena. If not, see <http://www.gnu.org/licenses/>. +// +// As a special exception, you may use this file as part of a free +// software project without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to produce +// an executable, this file does not by itself cause the resulting +// executable to be covered by the GNU General Public License. This +// exception does not however invalidate any other reasons why the +// executable file might be covered by the GNU General Public License. + + +#include <libgen.h> +#include <fstream> +#include <iostream> + +#include <mln/core/image/image2d.hh> + +#include <mln/io/pbm/save.hh> +#include <mln/io/magick/load.hh> + +#include <scribo/toolchain/content_in_doc.hh> +#include <scribo/toolchain/text_in_doc_preprocess.hh> + +#include <scribo/core/document.hh> + +#include <scribo/debug/usage.hh> + +#include <scribo/preprocessing/crop_without_localization.hh> +#include <scribo/preprocessing/crop.hh> + +#include <scribo/io/xml/save.hh> +#include <scribo/io/img/save.hh> + + +const char *args_desc[][2] = +{ + { "input.*", "An image." }, + { "non_text_comps.pbm", "Non text components mask." }, + { "enable_debug", "Enable debug image output. Set to 1 or 0." }, + { "enable_tabstops", "Enable tabstops detection. Set to 1 or 0." }, + {0, 0} +}; + + + +int main(int argc, char* argv[]) +{ + using namespace scribo; + using namespace mln; + + if (argc != 4 && argc != 3 && argc != 5) + return scribo::debug::usage(argv, + "Extract non text components mask/", + "input.* non_text_comps.pbm [enable_debug] [enable_tabstops]", + args_desc); + + std::string out_img = basename(argv[1]); + out_img.erase(out_img.size() - 4); + + std::string filename_prefix = out_img + "_debug"; + scribo::debug::logger().set_filename_prefix(filename_prefix.c_str()); + if (argc > 3 && atoi(argv[3])) + scribo::debug::logger().set_level(scribo::debug::Special); + else + scribo::debug::logger().set_level(scribo::debug::None); + + trace::entering("main"); + + Magick::InitializeMagick(*argv); + + typedef image2d<scribo::def::lbl_type> L; + image2d<value::rgb8> input; + mln::io::magick::load(input, argv[1]); + + util::timer t; + t.start(); + + // Preprocess document + image2d<bool> + input_preproc = toolchain::text_in_doc_preprocess(input, false, 0.34); + + + bool denoise = true; + std::string language = ""; + bool find_line_seps = true; + bool find_whitespace_seps = (argc > 4 && atoi(argv[4])); + + std::cout << "Running with the following options :" + << " ocr_language = " << language + << " | find_lines_seps = " << find_line_seps + << " | find_whitespace_seps = " << find_whitespace_seps + << " | debug = " << scribo::debug::logger().is_enabled() + << std::endl; + + // Run document toolchain. + + // Text + std::cout << "Analysing document..." << std::endl; + document<L> + doc = scribo::toolchain::content_in_doc(input, input_preproc, denoise, + find_line_seps, find_whitespace_seps, + !language.empty(), language); + t.stop(); + std::cout << t << std::endl; + + mln::io::pbm::save(data::convert(bool(), doc.elements().labeled_image()), argv[2]); + + scribo::io::img::save(doc, out_img + "_debug_wo_image.png", scribo::io::img::DebugWoImage); + scribo::io::img::save(doc, out_img + "_debug_with_image.png", scribo::io::img::DebugWithImage); + + trace::exiting("main"); +} -- 1.5.6.5