Olena-patches
Threads by month
- ----- 2025 -----
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2024 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2023 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2022 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2021 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2020 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2019 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2018 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2017 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2016 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2015 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2014 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2013 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2012 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2011 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2010 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2009 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2008 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2007 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2006 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2005 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2004 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
March 2011
- 7 participants
- 277 discussions
last-svn-commit-811-gb71d697 scribo/core/def/lbl_type.hh: Use int_u<n> as label type.
by Guillaume Lazzara 14 Mar '11
by Guillaume Lazzara 14 Mar '11
14 Mar '11
---
scribo/ChangeLog | 4 ++++
scribo/scribo/core/def/lbl_type.hh | 12 +++++++++---
2 files changed, 13 insertions(+), 3 deletions(-)
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index 60d1277..8d504c2 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,5 +1,9 @@
2011-03-14 Guillaume Lazzara <z(a)lrde.epita.fr>
+ * scribo/core/def/lbl_type.hh: Use int_u<n> as label type.
+
+2011-03-14 Guillaume Lazzara <z(a)lrde.epita.fr>
+
Set unit tests dependencies.
* tests/unit_test/Makefile.am: Add new conditional test file.
diff --git a/scribo/scribo/core/def/lbl_type.hh b/scribo/scribo/core/def/lbl_type.hh
index 6434bbe..90cc932 100644
--- a/scribo/scribo/core/def/lbl_type.hh
+++ b/scribo/scribo/core/def/lbl_type.hh
@@ -1,4 +1,5 @@
-// Copyright (C) 2010 EPITA Research and Development Laboratory (LRDE)
+// Copyright (C) 2010, 2011 EPITA Research and Development Laboratory
+// (LRDE)
//
// This file is part of Olena.
//
@@ -30,7 +31,8 @@
///
/// Global type definition for labels.
-# include <mln/value/label.hh>
+# include <mln/value/int_u.hh>
+//# include <mln/value/label.hh>
namespace scribo
@@ -39,7 +41,11 @@ namespace scribo
namespace def
{
- typedef mln::value::label<30u> lbl_type;
+ // FIXME: we would like to use label type. We have got problems
+ // with types conversions and data computation (no arithmetic
+ // operators on labels!)
+ typedef mln::value::int_u<30u> lbl_type;
+// typedef mln::value::label<30u> lbl_type;
} // end of namespace scribo::def
--
1.5.6.5
1
0
14 Mar '11
* tests/unit_test/Makefile.am: Add new conditional test file.
* tests/unit_test/cond_tests_magickxx_qt_tesseract_tiff,
* tests/unit_test/cond_tests_qt,
* tests/unit_test/cond_tests_qt_tesseract_tiff: Update list of files.
* tests/unit_test/unit-tests.mk: Regen.
---
scribo/ChangeLog | 12 +++
scribo/tests/unit_test/Makefile.am | 7 +-
..._tiff => cond_tests_magickxx_qt_tesseract_tiff} | 1 +
scribo/tests/unit_test/cond_tests_qt | 2 +
.../tests/unit_test/cond_tests_qt_tesseract_tiff | 2 +-
scribo/tests/unit_test/unit-tests.mk | 96 +++++++++++++++-----
6 files changed, 92 insertions(+), 28 deletions(-)
copy scribo/tests/unit_test/{cond_tests_qt_tesseract_tiff => cond_tests_magickxx_qt_tesseract_tiff} (97%)
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index 23ecde3..e79e597 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,5 +1,17 @@
2011-03-14 Guillaume Lazzara <z(a)lrde.epita.fr>
+ Set unit tests dependencies.
+
+ * tests/unit_test/Makefile.am: Add new conditional test file.
+
+ * tests/unit_test/cond_tests_magickxx_qt_tesseract_tiff,
+ * tests/unit_test/cond_tests_qt,
+ * tests/unit_test/cond_tests_qt_tesseract_tiff: Update list of files.
+
+ * tests/unit_test/unit-tests.mk: Regen.
+
+2011-03-14 Guillaume Lazzara <z(a)lrde.epita.fr>
+
New debug tools.
* src/Makefile.am,
diff --git a/scribo/tests/unit_test/Makefile.am b/scribo/tests/unit_test/Makefile.am
index c88f553..e4c9e1b 100644
--- a/scribo/tests/unit_test/Makefile.am
+++ b/scribo/tests/unit_test/Makefile.am
@@ -17,9 +17,10 @@
include $(top_srcdir)/scribo/tests/tests.mk
-COND_TESTS = cond_tests_magickxx_tesseract_tiff \
- cond_tests_qt \
- cond_tests_qt_tesseract_tiff \
+COND_TESTS = cond_tests_magickxx_tesseract_tiff \
+ cond_tests_magickxx_qt_tesseract_tiff \
+ cond_tests_qt \
+ cond_tests_qt_tesseract_tiff \
cond_tests_tesseract_tiff
EXTRA_DIST = disabled_tests \
diff --git a/scribo/tests/unit_test/cond_tests_qt_tesseract_tiff b/scribo/tests/unit_test/cond_tests_magickxx_qt_tesseract_tiff
similarity index 97%
copy from scribo/tests/unit_test/cond_tests_qt_tesseract_tiff
copy to scribo/tests/unit_test/cond_tests_magickxx_qt_tesseract_tiff
index 7dfe76d..f5840ef 100644
--- a/scribo/tests/unit_test/cond_tests_qt_tesseract_tiff
+++ b/scribo/tests/unit_test/cond_tests_magickxx_qt_tesseract_tiff
@@ -1 +1,2 @@
scribo/toolchain/nepomuk/text_extraction.hh
+
diff --git a/scribo/tests/unit_test/cond_tests_qt b/scribo/tests/unit_test/cond_tests_qt
index 577f9e6..f7bc42e 100644
--- a/scribo/tests/unit_test/cond_tests_qt
+++ b/scribo/tests/unit_test/cond_tests_qt
@@ -1,2 +1,4 @@
+scribo/convert/from_base64.hh
scribo/convert/from_qimage.hh
+scribo/io/xml/internal/full_xml_visitor.hh
scribo/io/xml/load.hh
diff --git a/scribo/tests/unit_test/cond_tests_qt_tesseract_tiff b/scribo/tests/unit_test/cond_tests_qt_tesseract_tiff
index 7dfe76d..8b13789 100644
--- a/scribo/tests/unit_test/cond_tests_qt_tesseract_tiff
+++ b/scribo/tests/unit_test/cond_tests_qt_tesseract_tiff
@@ -1 +1 @@
-scribo/toolchain/nepomuk/text_extraction.hh
+
diff --git a/scribo/tests/unit_test/unit-tests.mk b/scribo/tests/unit_test/unit-tests.mk
index 71b0b5b..b0f5982 100644
--- a/scribo/tests/unit_test/unit-tests.mk
+++ b/scribo/tests/unit_test/unit-tests.mk
@@ -4,12 +4,28 @@ check_PROGRAMS =
# Starting a conditional unit test list.
if HAVE_MAGICKXX
+if HAVE_QT
+if HAVE_TESSERACT
+if HAVE_TIFF
+check_PROGRAMS += \
+scribo_toolchain_nepomuk_text_extraction
+
+scribo_toolchain_nepomuk_text_extraction_CPPFLAGS= ${TIFF_CPPFLAGS} -DHAVE_TIFF ${TESSERACT_CPPFLAGS} -DHAVE_TESSERACT ${QT_CPPFLAGS} -DHAVE_QT ${MAGICKXX_CPPFLAGS} -DHAVE_MAGICKXX ${AM_CPPFLAGS}
+scribo_toolchain_nepomuk_text_extraction_LDFLAGS= ${TIFF_LDFLAGS} ${TESSERACT_LDFLAGS} ${QT_LDFLAGS} ${MAGICKXX_LDFLAGS} ${AM_LDFLAGS}
+scribo_toolchain_nepomuk_text_extraction_SOURCES = scribo_toolchain_nepomuk_text_extraction.cc
+endif HAVE_TIFF
+endif HAVE_TESSERACT
+endif HAVE_QT
+endif HAVE_MAGICKXX
+
+# Starting a conditional unit test list.
+if HAVE_MAGICKXX
if HAVE_TESSERACT
if HAVE_TIFF
check_PROGRAMS += \
scribo_toolchain_internal_content_in_doc_functor
-scribo_toolchain_internal_content_in_doc_functor_CPPFLAGS= ${TIFF_CPPFLAGS} ${TESSERACT_CPPFLAGS} ${MAGICKXX_CPPFLAGS} ${AM_CPPFLAGS}
+scribo_toolchain_internal_content_in_doc_functor_CPPFLAGS= ${TIFF_CPPFLAGS} -DHAVE_TIFF ${TESSERACT_CPPFLAGS} -DHAVE_TESSERACT ${MAGICKXX_CPPFLAGS} -DHAVE_MAGICKXX ${AM_CPPFLAGS}
scribo_toolchain_internal_content_in_doc_functor_LDFLAGS= ${TIFF_LDFLAGS} ${TESSERACT_LDFLAGS} ${MAGICKXX_LDFLAGS} ${AM_LDFLAGS}
scribo_toolchain_internal_content_in_doc_functor_SOURCES = scribo_toolchain_internal_content_in_doc_functor.cc
endif HAVE_TIFF
@@ -19,13 +35,21 @@ endif HAVE_MAGICKXX
# Starting a conditional unit test list.
if HAVE_QT
check_PROGRAMS += \
+scribo_convert_from_base64 \
scribo_convert_from_qimage \
+scribo_io_xml_internal_full_xml_visitor \
scribo_io_xml_load
-scribo_convert_from_qimage_CPPFLAGS= ${QT_CPPFLAGS} ${AM_CPPFLAGS}
+scribo_convert_from_base64_CPPFLAGS= ${QT_CPPFLAGS} -DHAVE_QT ${AM_CPPFLAGS}
+scribo_convert_from_base64_LDFLAGS= ${QT_LDFLAGS} ${AM_LDFLAGS}
+scribo_convert_from_base64_SOURCES = scribo_convert_from_base64.cc
+scribo_convert_from_qimage_CPPFLAGS= ${QT_CPPFLAGS} -DHAVE_QT ${AM_CPPFLAGS}
scribo_convert_from_qimage_LDFLAGS= ${QT_LDFLAGS} ${AM_LDFLAGS}
scribo_convert_from_qimage_SOURCES = scribo_convert_from_qimage.cc
-scribo_io_xml_load_CPPFLAGS= ${QT_CPPFLAGS} ${AM_CPPFLAGS}
+scribo_io_xml_internal_full_xml_visitor_CPPFLAGS= ${QT_CPPFLAGS} -DHAVE_QT ${AM_CPPFLAGS}
+scribo_io_xml_internal_full_xml_visitor_LDFLAGS= ${QT_LDFLAGS} ${AM_LDFLAGS}
+scribo_io_xml_internal_full_xml_visitor_SOURCES = scribo_io_xml_internal_full_xml_visitor.cc
+scribo_io_xml_load_CPPFLAGS= ${QT_CPPFLAGS} -DHAVE_QT ${AM_CPPFLAGS}
scribo_io_xml_load_LDFLAGS= ${QT_LDFLAGS} ${AM_LDFLAGS}
scribo_io_xml_load_SOURCES = scribo_io_xml_load.cc
endif HAVE_QT
@@ -34,12 +58,8 @@ endif HAVE_QT
if HAVE_QT
if HAVE_TESSERACT
if HAVE_TIFF
-check_PROGRAMS += \
-scribo_toolchain_nepomuk_text_extraction
+check_PROGRAMS +=
-scribo_toolchain_nepomuk_text_extraction_CPPFLAGS= ${TIFF_CPPFLAGS} ${TESSERACT_CPPFLAGS} ${QT_CPPFLAGS} ${AM_CPPFLAGS}
-scribo_toolchain_nepomuk_text_extraction_LDFLAGS= ${TIFF_LDFLAGS} ${TESSERACT_LDFLAGS} ${QT_LDFLAGS} ${AM_LDFLAGS}
-scribo_toolchain_nepomuk_text_extraction_SOURCES = scribo_toolchain_nepomuk_text_extraction.cc
endif HAVE_TIFF
endif HAVE_TESSERACT
endif HAVE_QT
@@ -53,16 +73,16 @@ scribo_toolchain_internal_text_in_doc_functor \
scribo_toolchain_content_in_doc \
scribo_toolchain_text_in_doc
-scribo_text_recognition_CPPFLAGS= ${TIFF_CPPFLAGS} ${TESSERACT_CPPFLAGS} ${AM_CPPFLAGS}
+scribo_text_recognition_CPPFLAGS= ${TIFF_CPPFLAGS} -DHAVE_TIFF ${TESSERACT_CPPFLAGS} -DHAVE_TESSERACT ${AM_CPPFLAGS}
scribo_text_recognition_LDFLAGS= ${TIFF_LDFLAGS} ${TESSERACT_LDFLAGS} ${AM_LDFLAGS}
scribo_text_recognition_SOURCES = scribo_text_recognition.cc
-scribo_toolchain_internal_text_in_doc_functor_CPPFLAGS= ${TIFF_CPPFLAGS} ${TESSERACT_CPPFLAGS} ${AM_CPPFLAGS}
+scribo_toolchain_internal_text_in_doc_functor_CPPFLAGS= ${TIFF_CPPFLAGS} -DHAVE_TIFF ${TESSERACT_CPPFLAGS} -DHAVE_TESSERACT ${AM_CPPFLAGS}
scribo_toolchain_internal_text_in_doc_functor_LDFLAGS= ${TIFF_LDFLAGS} ${TESSERACT_LDFLAGS} ${AM_LDFLAGS}
scribo_toolchain_internal_text_in_doc_functor_SOURCES = scribo_toolchain_internal_text_in_doc_functor.cc
-scribo_toolchain_content_in_doc_CPPFLAGS= ${TIFF_CPPFLAGS} ${TESSERACT_CPPFLAGS} ${AM_CPPFLAGS}
+scribo_toolchain_content_in_doc_CPPFLAGS= ${TIFF_CPPFLAGS} -DHAVE_TIFF ${TESSERACT_CPPFLAGS} -DHAVE_TESSERACT ${AM_CPPFLAGS}
scribo_toolchain_content_in_doc_LDFLAGS= ${TIFF_LDFLAGS} ${TESSERACT_LDFLAGS} ${AM_LDFLAGS}
scribo_toolchain_content_in_doc_SOURCES = scribo_toolchain_content_in_doc.cc
-scribo_toolchain_text_in_doc_CPPFLAGS= ${TIFF_CPPFLAGS} ${TESSERACT_CPPFLAGS} ${AM_CPPFLAGS}
+scribo_toolchain_text_in_doc_CPPFLAGS= ${TIFF_CPPFLAGS} -DHAVE_TIFF ${TESSERACT_CPPFLAGS} -DHAVE_TESSERACT ${AM_CPPFLAGS}
scribo_toolchain_text_in_doc_LDFLAGS= ${TIFF_LDFLAGS} ${TESSERACT_LDFLAGS} ${AM_LDFLAGS}
scribo_toolchain_text_in_doc_SOURCES = scribo_toolchain_text_in_doc.cc
endif HAVE_TIFF
@@ -83,7 +103,6 @@ scribo_binarization_sauvola_ms \
scribo_binarization_sauvola_ms_split \
scribo_binarization_sauvola_threshold_image \
scribo_canvas_integral_browsing \
-scribo_convert_from_base64 \
scribo_convert_to_base64 \
scribo_core_all \
scribo_core_central_sites \
@@ -97,7 +116,7 @@ scribo_core_def_lbl_type \
scribo_core_document \
scribo_core_erase_objects \
scribo_core_init_integral_image \
-scribo_core_internal_doc_xml_serializer \
+scribo_core_internal_doc_serializer \
scribo_core_line_info \
scribo_core_line_links \
scribo_core_line_set \
@@ -112,25 +131,27 @@ scribo_core_tag_line \
scribo_debug_alignment_decision_image \
scribo_debug_all \
scribo_debug_bboxes_enlarged_image \
+scribo_debug_bboxes_image \
scribo_debug_char_space_image \
scribo_debug_decision_image \
scribo_debug_highlight_text_area \
scribo_debug_line_info_image \
+scribo_debug_linked_bboxes_image \
scribo_debug_links_decision_image \
+scribo_debug_links_image \
+scribo_debug_logger \
scribo_debug_looks_like_a_text_line_image \
scribo_debug_mean_and_base_lines_image \
-scribo_debug_save_bboxes_image \
scribo_debug_save_comp_diff \
scribo_debug_save_label_image \
-scribo_debug_save_linked_bboxes_image \
scribo_debug_save_table_image \
-scribo_debug_several_links_decision_image \
scribo_debug_text_areas_image \
scribo_debug_usage \
scribo_draw_all \
scribo_draw_bounding_box_links \
scribo_draw_bounding_boxes \
scribo_draw_groups_bboxes \
+scribo_estim_font_color \
scribo_estim_object_groups_v_thickness \
scribo_filter_all \
scribo_filter_common_objects_photo \
@@ -142,6 +163,7 @@ scribo_filter_object_groups_size_ratio \
scribo_filter_object_groups_small \
scribo_filter_object_groups_v_thickness \
scribo_filter_object_groups_with_holes \
+scribo_filter_object_links_aligned \
scribo_filter_object_links_bbox_h_ratio \
scribo_filter_object_links_bbox_overlap \
scribo_filter_object_links_bbox_ratio \
@@ -165,16 +187,25 @@ scribo_filter_objects_with_holes \
scribo_fun_v2b_label_to_bool \
scribo_fun_v2b_objects_large_filter \
scribo_fun_v2b_objects_small_filter \
+scribo_fun_v2v_highlight \
+scribo_io_img_internal_debug_img_visitor \
+scribo_io_img_internal_draw_edges \
+scribo_io_img_internal_full_img_visitor \
+scribo_io_img_internal_non_text_img_visitor \
+scribo_io_img_internal_text_img_visitor \
+scribo_io_img_save \
scribo_io_text_boxes_save \
scribo_io_xml_internal_extended_page_xml_visitor \
-scribo_io_xml_internal_full_xml_visitor \
scribo_io_xml_internal_page_xml_visitor \
scribo_io_xml_internal_print_box_coords \
+scribo_io_xml_internal_print_image_coords \
scribo_io_xml_internal_print_page_preambule \
scribo_io_xml_save \
scribo_make_all \
scribo_make_debug_filename \
scribo_make_influence_zone_graph \
+scribo_make_text_blocks_image \
+scribo_make_text_components_image \
scribo_postprocessing_all \
scribo_postprocessing_fill_object_holes \
scribo_preprocessing_all \
@@ -194,6 +225,7 @@ scribo_primitive_extract_canvas \
scribo_primitive_extract_cells \
scribo_primitive_extract_components \
scribo_primitive_extract_horizontal_separators \
+scribo_primitive_extract_internal_union \
scribo_primitive_extract_lines_discontinued \
scribo_primitive_extract_lines_h_discontinued \
scribo_primitive_extract_lines_h_pattern \
@@ -208,6 +240,7 @@ scribo_primitive_extract_lines_v_single \
scribo_primitive_extract_lines_v_thick \
scribo_primitive_extract_lines_v_thick_and_single \
scribo_primitive_extract_non_text \
+scribo_primitive_extract_non_text_kmean \
scribo_primitive_extract_separators \
scribo_primitive_extract_separators_nonvisible \
scribo_primitive_extract_vertical_separators \
@@ -236,6 +269,7 @@ scribo_primitive_link_internal_find_link \
scribo_primitive_link_internal_link_functor_base \
scribo_primitive_link_internal_link_several_dmax_base \
scribo_primitive_link_internal_link_single_dmax_base \
+scribo_primitive_link_internal_link_single_dmax_ratio_aligned_base \
scribo_primitive_link_internal_link_single_dmax_ratio_base \
scribo_primitive_link_merge_double_link \
scribo_primitive_link_with_graph \
@@ -300,7 +334,6 @@ scribo_binarization_sauvola_ms_SOURCES = scribo_binarization_sauvola_ms.cc
scribo_binarization_sauvola_ms_split_SOURCES = scribo_binarization_sauvola_ms_split.cc
scribo_binarization_sauvola_threshold_image_SOURCES = scribo_binarization_sauvola_threshold_image.cc
scribo_canvas_integral_browsing_SOURCES = scribo_canvas_integral_browsing.cc
-scribo_convert_from_base64_SOURCES = scribo_convert_from_base64.cc
scribo_convert_to_base64_SOURCES = scribo_convert_to_base64.cc
scribo_core_all_SOURCES = scribo_core_all.cc
scribo_core_central_sites_SOURCES = scribo_core_central_sites.cc
@@ -314,7 +347,7 @@ scribo_core_def_lbl_type_SOURCES = scribo_core_def_lbl_type.cc
scribo_core_document_SOURCES = scribo_core_document.cc
scribo_core_erase_objects_SOURCES = scribo_core_erase_objects.cc
scribo_core_init_integral_image_SOURCES = scribo_core_init_integral_image.cc
-scribo_core_internal_doc_xml_serializer_SOURCES = scribo_core_internal_doc_xml_serializer.cc
+scribo_core_internal_doc_serializer_SOURCES = scribo_core_internal_doc_serializer.cc
scribo_core_line_info_SOURCES = scribo_core_line_info.cc
scribo_core_line_links_SOURCES = scribo_core_line_links.cc
scribo_core_line_set_SOURCES = scribo_core_line_set.cc
@@ -329,25 +362,27 @@ scribo_core_tag_line_SOURCES = scribo_core_tag_line.cc
scribo_debug_alignment_decision_image_SOURCES = scribo_debug_alignment_decision_image.cc
scribo_debug_all_SOURCES = scribo_debug_all.cc
scribo_debug_bboxes_enlarged_image_SOURCES = scribo_debug_bboxes_enlarged_image.cc
+scribo_debug_bboxes_image_SOURCES = scribo_debug_bboxes_image.cc
scribo_debug_char_space_image_SOURCES = scribo_debug_char_space_image.cc
scribo_debug_decision_image_SOURCES = scribo_debug_decision_image.cc
scribo_debug_highlight_text_area_SOURCES = scribo_debug_highlight_text_area.cc
scribo_debug_line_info_image_SOURCES = scribo_debug_line_info_image.cc
+scribo_debug_linked_bboxes_image_SOURCES = scribo_debug_linked_bboxes_image.cc
scribo_debug_links_decision_image_SOURCES = scribo_debug_links_decision_image.cc
+scribo_debug_links_image_SOURCES = scribo_debug_links_image.cc
+scribo_debug_logger_SOURCES = scribo_debug_logger.cc
scribo_debug_looks_like_a_text_line_image_SOURCES = scribo_debug_looks_like_a_text_line_image.cc
scribo_debug_mean_and_base_lines_image_SOURCES = scribo_debug_mean_and_base_lines_image.cc
-scribo_debug_save_bboxes_image_SOURCES = scribo_debug_save_bboxes_image.cc
scribo_debug_save_comp_diff_SOURCES = scribo_debug_save_comp_diff.cc
scribo_debug_save_label_image_SOURCES = scribo_debug_save_label_image.cc
-scribo_debug_save_linked_bboxes_image_SOURCES = scribo_debug_save_linked_bboxes_image.cc
scribo_debug_save_table_image_SOURCES = scribo_debug_save_table_image.cc
-scribo_debug_several_links_decision_image_SOURCES = scribo_debug_several_links_decision_image.cc
scribo_debug_text_areas_image_SOURCES = scribo_debug_text_areas_image.cc
scribo_debug_usage_SOURCES = scribo_debug_usage.cc
scribo_draw_all_SOURCES = scribo_draw_all.cc
scribo_draw_bounding_box_links_SOURCES = scribo_draw_bounding_box_links.cc
scribo_draw_bounding_boxes_SOURCES = scribo_draw_bounding_boxes.cc
scribo_draw_groups_bboxes_SOURCES = scribo_draw_groups_bboxes.cc
+scribo_estim_font_color_SOURCES = scribo_estim_font_color.cc
scribo_estim_object_groups_v_thickness_SOURCES = scribo_estim_object_groups_v_thickness.cc
scribo_filter_all_SOURCES = scribo_filter_all.cc
scribo_filter_common_objects_photo_SOURCES = scribo_filter_common_objects_photo.cc
@@ -359,6 +394,7 @@ scribo_filter_object_groups_size_ratio_SOURCES = scribo_filter_object_groups_siz
scribo_filter_object_groups_small_SOURCES = scribo_filter_object_groups_small.cc
scribo_filter_object_groups_v_thickness_SOURCES = scribo_filter_object_groups_v_thickness.cc
scribo_filter_object_groups_with_holes_SOURCES = scribo_filter_object_groups_with_holes.cc
+scribo_filter_object_links_aligned_SOURCES = scribo_filter_object_links_aligned.cc
scribo_filter_object_links_bbox_h_ratio_SOURCES = scribo_filter_object_links_bbox_h_ratio.cc
scribo_filter_object_links_bbox_overlap_SOURCES = scribo_filter_object_links_bbox_overlap.cc
scribo_filter_object_links_bbox_ratio_SOURCES = scribo_filter_object_links_bbox_ratio.cc
@@ -382,16 +418,25 @@ scribo_filter_objects_with_holes_SOURCES = scribo_filter_objects_with_holes.cc
scribo_fun_v2b_label_to_bool_SOURCES = scribo_fun_v2b_label_to_bool.cc
scribo_fun_v2b_objects_large_filter_SOURCES = scribo_fun_v2b_objects_large_filter.cc
scribo_fun_v2b_objects_small_filter_SOURCES = scribo_fun_v2b_objects_small_filter.cc
+scribo_fun_v2v_highlight_SOURCES = scribo_fun_v2v_highlight.cc
+scribo_io_img_internal_debug_img_visitor_SOURCES = scribo_io_img_internal_debug_img_visitor.cc
+scribo_io_img_internal_draw_edges_SOURCES = scribo_io_img_internal_draw_edges.cc
+scribo_io_img_internal_full_img_visitor_SOURCES = scribo_io_img_internal_full_img_visitor.cc
+scribo_io_img_internal_non_text_img_visitor_SOURCES = scribo_io_img_internal_non_text_img_visitor.cc
+scribo_io_img_internal_text_img_visitor_SOURCES = scribo_io_img_internal_text_img_visitor.cc
+scribo_io_img_save_SOURCES = scribo_io_img_save.cc
scribo_io_text_boxes_save_SOURCES = scribo_io_text_boxes_save.cc
scribo_io_xml_internal_extended_page_xml_visitor_SOURCES = scribo_io_xml_internal_extended_page_xml_visitor.cc
-scribo_io_xml_internal_full_xml_visitor_SOURCES = scribo_io_xml_internal_full_xml_visitor.cc
scribo_io_xml_internal_page_xml_visitor_SOURCES = scribo_io_xml_internal_page_xml_visitor.cc
scribo_io_xml_internal_print_box_coords_SOURCES = scribo_io_xml_internal_print_box_coords.cc
+scribo_io_xml_internal_print_image_coords_SOURCES = scribo_io_xml_internal_print_image_coords.cc
scribo_io_xml_internal_print_page_preambule_SOURCES = scribo_io_xml_internal_print_page_preambule.cc
scribo_io_xml_save_SOURCES = scribo_io_xml_save.cc
scribo_make_all_SOURCES = scribo_make_all.cc
scribo_make_debug_filename_SOURCES = scribo_make_debug_filename.cc
scribo_make_influence_zone_graph_SOURCES = scribo_make_influence_zone_graph.cc
+scribo_make_text_blocks_image_SOURCES = scribo_make_text_blocks_image.cc
+scribo_make_text_components_image_SOURCES = scribo_make_text_components_image.cc
scribo_postprocessing_all_SOURCES = scribo_postprocessing_all.cc
scribo_postprocessing_fill_object_holes_SOURCES = scribo_postprocessing_fill_object_holes.cc
scribo_preprocessing_all_SOURCES = scribo_preprocessing_all.cc
@@ -411,6 +456,7 @@ scribo_primitive_extract_canvas_SOURCES = scribo_primitive_extract_canvas.cc
scribo_primitive_extract_cells_SOURCES = scribo_primitive_extract_cells.cc
scribo_primitive_extract_components_SOURCES = scribo_primitive_extract_components.cc
scribo_primitive_extract_horizontal_separators_SOURCES = scribo_primitive_extract_horizontal_separators.cc
+scribo_primitive_extract_internal_union_SOURCES = scribo_primitive_extract_internal_union.cc
scribo_primitive_extract_lines_discontinued_SOURCES = scribo_primitive_extract_lines_discontinued.cc
scribo_primitive_extract_lines_h_discontinued_SOURCES = scribo_primitive_extract_lines_h_discontinued.cc
scribo_primitive_extract_lines_h_pattern_SOURCES = scribo_primitive_extract_lines_h_pattern.cc
@@ -425,6 +471,7 @@ scribo_primitive_extract_lines_v_single_SOURCES = scribo_primitive_extract_lines
scribo_primitive_extract_lines_v_thick_SOURCES = scribo_primitive_extract_lines_v_thick.cc
scribo_primitive_extract_lines_v_thick_and_single_SOURCES = scribo_primitive_extract_lines_v_thick_and_single.cc
scribo_primitive_extract_non_text_SOURCES = scribo_primitive_extract_non_text.cc
+scribo_primitive_extract_non_text_kmean_SOURCES = scribo_primitive_extract_non_text_kmean.cc
scribo_primitive_extract_separators_SOURCES = scribo_primitive_extract_separators.cc
scribo_primitive_extract_separators_nonvisible_SOURCES = scribo_primitive_extract_separators_nonvisible.cc
scribo_primitive_extract_vertical_separators_SOURCES = scribo_primitive_extract_vertical_separators.cc
@@ -453,6 +500,7 @@ scribo_primitive_link_internal_find_link_SOURCES = scribo_primitive_link_interna
scribo_primitive_link_internal_link_functor_base_SOURCES = scribo_primitive_link_internal_link_functor_base.cc
scribo_primitive_link_internal_link_several_dmax_base_SOURCES = scribo_primitive_link_internal_link_several_dmax_base.cc
scribo_primitive_link_internal_link_single_dmax_base_SOURCES = scribo_primitive_link_internal_link_single_dmax_base.cc
+scribo_primitive_link_internal_link_single_dmax_ratio_aligned_base_SOURCES = scribo_primitive_link_internal_link_single_dmax_ratio_aligned_base.cc
scribo_primitive_link_internal_link_single_dmax_ratio_base_SOURCES = scribo_primitive_link_internal_link_single_dmax_ratio_base.cc
scribo_primitive_link_merge_double_link_SOURCES = scribo_primitive_link_merge_double_link.cc
scribo_primitive_link_with_graph_SOURCES = scribo_primitive_link_with_graph.cc
--
1.5.6.5
1
0
14 Mar '11
* tests/unit_test/Makefile.am: Add new conditional test file.
* tests/unit_test/cond_tests_magickxx_qt_tesseract_tiff,
* tests/unit_test/cond_tests_qt,
* tests/unit_test/cond_tests_qt_tesseract_tiff: Update list of files.
* tests/unit_test/unit-tests.mk: Regen.
---
scribo/ChangeLog | 12 +++
scribo/tests/unit_test/Makefile.am | 7 +-
..._tiff => cond_tests_magickxx_qt_tesseract_tiff} | 1 +
scribo/tests/unit_test/cond_tests_qt | 2 +
.../tests/unit_test/cond_tests_qt_tesseract_tiff | 2 +-
scribo/tests/unit_test/unit-tests.mk | 96 +++++++++++++++-----
6 files changed, 92 insertions(+), 28 deletions(-)
copy scribo/tests/unit_test/{cond_tests_qt_tesseract_tiff => cond_tests_magickxx_qt_tesseract_tiff} (97%)
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index cc6e861..60d1277 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,5 +1,17 @@
2011-03-14 Guillaume Lazzara <z(a)lrde.epita.fr>
+ Set unit tests dependencies.
+
+ * tests/unit_test/Makefile.am: Add new conditional test file.
+
+ * tests/unit_test/cond_tests_magickxx_qt_tesseract_tiff,
+ * tests/unit_test/cond_tests_qt,
+ * tests/unit_test/cond_tests_qt_tesseract_tiff: Update list of files.
+
+ * tests/unit_test/unit-tests.mk: Regen.
+
+2011-03-14 Guillaume Lazzara <z(a)lrde.epita.fr>
+
New debug tools.
* src/Makefile.am,
diff --git a/scribo/tests/unit_test/Makefile.am b/scribo/tests/unit_test/Makefile.am
index c88f553..e4c9e1b 100644
--- a/scribo/tests/unit_test/Makefile.am
+++ b/scribo/tests/unit_test/Makefile.am
@@ -17,9 +17,10 @@
include $(top_srcdir)/scribo/tests/tests.mk
-COND_TESTS = cond_tests_magickxx_tesseract_tiff \
- cond_tests_qt \
- cond_tests_qt_tesseract_tiff \
+COND_TESTS = cond_tests_magickxx_tesseract_tiff \
+ cond_tests_magickxx_qt_tesseract_tiff \
+ cond_tests_qt \
+ cond_tests_qt_tesseract_tiff \
cond_tests_tesseract_tiff
EXTRA_DIST = disabled_tests \
diff --git a/scribo/tests/unit_test/cond_tests_qt_tesseract_tiff b/scribo/tests/unit_test/cond_tests_magickxx_qt_tesseract_tiff
similarity index 97%
copy from scribo/tests/unit_test/cond_tests_qt_tesseract_tiff
copy to scribo/tests/unit_test/cond_tests_magickxx_qt_tesseract_tiff
index 7dfe76d..f5840ef 100644
--- a/scribo/tests/unit_test/cond_tests_qt_tesseract_tiff
+++ b/scribo/tests/unit_test/cond_tests_magickxx_qt_tesseract_tiff
@@ -1 +1,2 @@
scribo/toolchain/nepomuk/text_extraction.hh
+
diff --git a/scribo/tests/unit_test/cond_tests_qt b/scribo/tests/unit_test/cond_tests_qt
index 577f9e6..f7bc42e 100644
--- a/scribo/tests/unit_test/cond_tests_qt
+++ b/scribo/tests/unit_test/cond_tests_qt
@@ -1,2 +1,4 @@
+scribo/convert/from_base64.hh
scribo/convert/from_qimage.hh
+scribo/io/xml/internal/full_xml_visitor.hh
scribo/io/xml/load.hh
diff --git a/scribo/tests/unit_test/cond_tests_qt_tesseract_tiff b/scribo/tests/unit_test/cond_tests_qt_tesseract_tiff
index 7dfe76d..8b13789 100644
--- a/scribo/tests/unit_test/cond_tests_qt_tesseract_tiff
+++ b/scribo/tests/unit_test/cond_tests_qt_tesseract_tiff
@@ -1 +1 @@
-scribo/toolchain/nepomuk/text_extraction.hh
+
diff --git a/scribo/tests/unit_test/unit-tests.mk b/scribo/tests/unit_test/unit-tests.mk
index 71b0b5b..b0f5982 100644
--- a/scribo/tests/unit_test/unit-tests.mk
+++ b/scribo/tests/unit_test/unit-tests.mk
@@ -4,12 +4,28 @@ check_PROGRAMS =
# Starting a conditional unit test list.
if HAVE_MAGICKXX
+if HAVE_QT
+if HAVE_TESSERACT
+if HAVE_TIFF
+check_PROGRAMS += \
+scribo_toolchain_nepomuk_text_extraction
+
+scribo_toolchain_nepomuk_text_extraction_CPPFLAGS= ${TIFF_CPPFLAGS} -DHAVE_TIFF ${TESSERACT_CPPFLAGS} -DHAVE_TESSERACT ${QT_CPPFLAGS} -DHAVE_QT ${MAGICKXX_CPPFLAGS} -DHAVE_MAGICKXX ${AM_CPPFLAGS}
+scribo_toolchain_nepomuk_text_extraction_LDFLAGS= ${TIFF_LDFLAGS} ${TESSERACT_LDFLAGS} ${QT_LDFLAGS} ${MAGICKXX_LDFLAGS} ${AM_LDFLAGS}
+scribo_toolchain_nepomuk_text_extraction_SOURCES = scribo_toolchain_nepomuk_text_extraction.cc
+endif HAVE_TIFF
+endif HAVE_TESSERACT
+endif HAVE_QT
+endif HAVE_MAGICKXX
+
+# Starting a conditional unit test list.
+if HAVE_MAGICKXX
if HAVE_TESSERACT
if HAVE_TIFF
check_PROGRAMS += \
scribo_toolchain_internal_content_in_doc_functor
-scribo_toolchain_internal_content_in_doc_functor_CPPFLAGS= ${TIFF_CPPFLAGS} ${TESSERACT_CPPFLAGS} ${MAGICKXX_CPPFLAGS} ${AM_CPPFLAGS}
+scribo_toolchain_internal_content_in_doc_functor_CPPFLAGS= ${TIFF_CPPFLAGS} -DHAVE_TIFF ${TESSERACT_CPPFLAGS} -DHAVE_TESSERACT ${MAGICKXX_CPPFLAGS} -DHAVE_MAGICKXX ${AM_CPPFLAGS}
scribo_toolchain_internal_content_in_doc_functor_LDFLAGS= ${TIFF_LDFLAGS} ${TESSERACT_LDFLAGS} ${MAGICKXX_LDFLAGS} ${AM_LDFLAGS}
scribo_toolchain_internal_content_in_doc_functor_SOURCES = scribo_toolchain_internal_content_in_doc_functor.cc
endif HAVE_TIFF
@@ -19,13 +35,21 @@ endif HAVE_MAGICKXX
# Starting a conditional unit test list.
if HAVE_QT
check_PROGRAMS += \
+scribo_convert_from_base64 \
scribo_convert_from_qimage \
+scribo_io_xml_internal_full_xml_visitor \
scribo_io_xml_load
-scribo_convert_from_qimage_CPPFLAGS= ${QT_CPPFLAGS} ${AM_CPPFLAGS}
+scribo_convert_from_base64_CPPFLAGS= ${QT_CPPFLAGS} -DHAVE_QT ${AM_CPPFLAGS}
+scribo_convert_from_base64_LDFLAGS= ${QT_LDFLAGS} ${AM_LDFLAGS}
+scribo_convert_from_base64_SOURCES = scribo_convert_from_base64.cc
+scribo_convert_from_qimage_CPPFLAGS= ${QT_CPPFLAGS} -DHAVE_QT ${AM_CPPFLAGS}
scribo_convert_from_qimage_LDFLAGS= ${QT_LDFLAGS} ${AM_LDFLAGS}
scribo_convert_from_qimage_SOURCES = scribo_convert_from_qimage.cc
-scribo_io_xml_load_CPPFLAGS= ${QT_CPPFLAGS} ${AM_CPPFLAGS}
+scribo_io_xml_internal_full_xml_visitor_CPPFLAGS= ${QT_CPPFLAGS} -DHAVE_QT ${AM_CPPFLAGS}
+scribo_io_xml_internal_full_xml_visitor_LDFLAGS= ${QT_LDFLAGS} ${AM_LDFLAGS}
+scribo_io_xml_internal_full_xml_visitor_SOURCES = scribo_io_xml_internal_full_xml_visitor.cc
+scribo_io_xml_load_CPPFLAGS= ${QT_CPPFLAGS} -DHAVE_QT ${AM_CPPFLAGS}
scribo_io_xml_load_LDFLAGS= ${QT_LDFLAGS} ${AM_LDFLAGS}
scribo_io_xml_load_SOURCES = scribo_io_xml_load.cc
endif HAVE_QT
@@ -34,12 +58,8 @@ endif HAVE_QT
if HAVE_QT
if HAVE_TESSERACT
if HAVE_TIFF
-check_PROGRAMS += \
-scribo_toolchain_nepomuk_text_extraction
+check_PROGRAMS +=
-scribo_toolchain_nepomuk_text_extraction_CPPFLAGS= ${TIFF_CPPFLAGS} ${TESSERACT_CPPFLAGS} ${QT_CPPFLAGS} ${AM_CPPFLAGS}
-scribo_toolchain_nepomuk_text_extraction_LDFLAGS= ${TIFF_LDFLAGS} ${TESSERACT_LDFLAGS} ${QT_LDFLAGS} ${AM_LDFLAGS}
-scribo_toolchain_nepomuk_text_extraction_SOURCES = scribo_toolchain_nepomuk_text_extraction.cc
endif HAVE_TIFF
endif HAVE_TESSERACT
endif HAVE_QT
@@ -53,16 +73,16 @@ scribo_toolchain_internal_text_in_doc_functor \
scribo_toolchain_content_in_doc \
scribo_toolchain_text_in_doc
-scribo_text_recognition_CPPFLAGS= ${TIFF_CPPFLAGS} ${TESSERACT_CPPFLAGS} ${AM_CPPFLAGS}
+scribo_text_recognition_CPPFLAGS= ${TIFF_CPPFLAGS} -DHAVE_TIFF ${TESSERACT_CPPFLAGS} -DHAVE_TESSERACT ${AM_CPPFLAGS}
scribo_text_recognition_LDFLAGS= ${TIFF_LDFLAGS} ${TESSERACT_LDFLAGS} ${AM_LDFLAGS}
scribo_text_recognition_SOURCES = scribo_text_recognition.cc
-scribo_toolchain_internal_text_in_doc_functor_CPPFLAGS= ${TIFF_CPPFLAGS} ${TESSERACT_CPPFLAGS} ${AM_CPPFLAGS}
+scribo_toolchain_internal_text_in_doc_functor_CPPFLAGS= ${TIFF_CPPFLAGS} -DHAVE_TIFF ${TESSERACT_CPPFLAGS} -DHAVE_TESSERACT ${AM_CPPFLAGS}
scribo_toolchain_internal_text_in_doc_functor_LDFLAGS= ${TIFF_LDFLAGS} ${TESSERACT_LDFLAGS} ${AM_LDFLAGS}
scribo_toolchain_internal_text_in_doc_functor_SOURCES = scribo_toolchain_internal_text_in_doc_functor.cc
-scribo_toolchain_content_in_doc_CPPFLAGS= ${TIFF_CPPFLAGS} ${TESSERACT_CPPFLAGS} ${AM_CPPFLAGS}
+scribo_toolchain_content_in_doc_CPPFLAGS= ${TIFF_CPPFLAGS} -DHAVE_TIFF ${TESSERACT_CPPFLAGS} -DHAVE_TESSERACT ${AM_CPPFLAGS}
scribo_toolchain_content_in_doc_LDFLAGS= ${TIFF_LDFLAGS} ${TESSERACT_LDFLAGS} ${AM_LDFLAGS}
scribo_toolchain_content_in_doc_SOURCES = scribo_toolchain_content_in_doc.cc
-scribo_toolchain_text_in_doc_CPPFLAGS= ${TIFF_CPPFLAGS} ${TESSERACT_CPPFLAGS} ${AM_CPPFLAGS}
+scribo_toolchain_text_in_doc_CPPFLAGS= ${TIFF_CPPFLAGS} -DHAVE_TIFF ${TESSERACT_CPPFLAGS} -DHAVE_TESSERACT ${AM_CPPFLAGS}
scribo_toolchain_text_in_doc_LDFLAGS= ${TIFF_LDFLAGS} ${TESSERACT_LDFLAGS} ${AM_LDFLAGS}
scribo_toolchain_text_in_doc_SOURCES = scribo_toolchain_text_in_doc.cc
endif HAVE_TIFF
@@ -83,7 +103,6 @@ scribo_binarization_sauvola_ms \
scribo_binarization_sauvola_ms_split \
scribo_binarization_sauvola_threshold_image \
scribo_canvas_integral_browsing \
-scribo_convert_from_base64 \
scribo_convert_to_base64 \
scribo_core_all \
scribo_core_central_sites \
@@ -97,7 +116,7 @@ scribo_core_def_lbl_type \
scribo_core_document \
scribo_core_erase_objects \
scribo_core_init_integral_image \
-scribo_core_internal_doc_xml_serializer \
+scribo_core_internal_doc_serializer \
scribo_core_line_info \
scribo_core_line_links \
scribo_core_line_set \
@@ -112,25 +131,27 @@ scribo_core_tag_line \
scribo_debug_alignment_decision_image \
scribo_debug_all \
scribo_debug_bboxes_enlarged_image \
+scribo_debug_bboxes_image \
scribo_debug_char_space_image \
scribo_debug_decision_image \
scribo_debug_highlight_text_area \
scribo_debug_line_info_image \
+scribo_debug_linked_bboxes_image \
scribo_debug_links_decision_image \
+scribo_debug_links_image \
+scribo_debug_logger \
scribo_debug_looks_like_a_text_line_image \
scribo_debug_mean_and_base_lines_image \
-scribo_debug_save_bboxes_image \
scribo_debug_save_comp_diff \
scribo_debug_save_label_image \
-scribo_debug_save_linked_bboxes_image \
scribo_debug_save_table_image \
-scribo_debug_several_links_decision_image \
scribo_debug_text_areas_image \
scribo_debug_usage \
scribo_draw_all \
scribo_draw_bounding_box_links \
scribo_draw_bounding_boxes \
scribo_draw_groups_bboxes \
+scribo_estim_font_color \
scribo_estim_object_groups_v_thickness \
scribo_filter_all \
scribo_filter_common_objects_photo \
@@ -142,6 +163,7 @@ scribo_filter_object_groups_size_ratio \
scribo_filter_object_groups_small \
scribo_filter_object_groups_v_thickness \
scribo_filter_object_groups_with_holes \
+scribo_filter_object_links_aligned \
scribo_filter_object_links_bbox_h_ratio \
scribo_filter_object_links_bbox_overlap \
scribo_filter_object_links_bbox_ratio \
@@ -165,16 +187,25 @@ scribo_filter_objects_with_holes \
scribo_fun_v2b_label_to_bool \
scribo_fun_v2b_objects_large_filter \
scribo_fun_v2b_objects_small_filter \
+scribo_fun_v2v_highlight \
+scribo_io_img_internal_debug_img_visitor \
+scribo_io_img_internal_draw_edges \
+scribo_io_img_internal_full_img_visitor \
+scribo_io_img_internal_non_text_img_visitor \
+scribo_io_img_internal_text_img_visitor \
+scribo_io_img_save \
scribo_io_text_boxes_save \
scribo_io_xml_internal_extended_page_xml_visitor \
-scribo_io_xml_internal_full_xml_visitor \
scribo_io_xml_internal_page_xml_visitor \
scribo_io_xml_internal_print_box_coords \
+scribo_io_xml_internal_print_image_coords \
scribo_io_xml_internal_print_page_preambule \
scribo_io_xml_save \
scribo_make_all \
scribo_make_debug_filename \
scribo_make_influence_zone_graph \
+scribo_make_text_blocks_image \
+scribo_make_text_components_image \
scribo_postprocessing_all \
scribo_postprocessing_fill_object_holes \
scribo_preprocessing_all \
@@ -194,6 +225,7 @@ scribo_primitive_extract_canvas \
scribo_primitive_extract_cells \
scribo_primitive_extract_components \
scribo_primitive_extract_horizontal_separators \
+scribo_primitive_extract_internal_union \
scribo_primitive_extract_lines_discontinued \
scribo_primitive_extract_lines_h_discontinued \
scribo_primitive_extract_lines_h_pattern \
@@ -208,6 +240,7 @@ scribo_primitive_extract_lines_v_single \
scribo_primitive_extract_lines_v_thick \
scribo_primitive_extract_lines_v_thick_and_single \
scribo_primitive_extract_non_text \
+scribo_primitive_extract_non_text_kmean \
scribo_primitive_extract_separators \
scribo_primitive_extract_separators_nonvisible \
scribo_primitive_extract_vertical_separators \
@@ -236,6 +269,7 @@ scribo_primitive_link_internal_find_link \
scribo_primitive_link_internal_link_functor_base \
scribo_primitive_link_internal_link_several_dmax_base \
scribo_primitive_link_internal_link_single_dmax_base \
+scribo_primitive_link_internal_link_single_dmax_ratio_aligned_base \
scribo_primitive_link_internal_link_single_dmax_ratio_base \
scribo_primitive_link_merge_double_link \
scribo_primitive_link_with_graph \
@@ -300,7 +334,6 @@ scribo_binarization_sauvola_ms_SOURCES = scribo_binarization_sauvola_ms.cc
scribo_binarization_sauvola_ms_split_SOURCES = scribo_binarization_sauvola_ms_split.cc
scribo_binarization_sauvola_threshold_image_SOURCES = scribo_binarization_sauvola_threshold_image.cc
scribo_canvas_integral_browsing_SOURCES = scribo_canvas_integral_browsing.cc
-scribo_convert_from_base64_SOURCES = scribo_convert_from_base64.cc
scribo_convert_to_base64_SOURCES = scribo_convert_to_base64.cc
scribo_core_all_SOURCES = scribo_core_all.cc
scribo_core_central_sites_SOURCES = scribo_core_central_sites.cc
@@ -314,7 +347,7 @@ scribo_core_def_lbl_type_SOURCES = scribo_core_def_lbl_type.cc
scribo_core_document_SOURCES = scribo_core_document.cc
scribo_core_erase_objects_SOURCES = scribo_core_erase_objects.cc
scribo_core_init_integral_image_SOURCES = scribo_core_init_integral_image.cc
-scribo_core_internal_doc_xml_serializer_SOURCES = scribo_core_internal_doc_xml_serializer.cc
+scribo_core_internal_doc_serializer_SOURCES = scribo_core_internal_doc_serializer.cc
scribo_core_line_info_SOURCES = scribo_core_line_info.cc
scribo_core_line_links_SOURCES = scribo_core_line_links.cc
scribo_core_line_set_SOURCES = scribo_core_line_set.cc
@@ -329,25 +362,27 @@ scribo_core_tag_line_SOURCES = scribo_core_tag_line.cc
scribo_debug_alignment_decision_image_SOURCES = scribo_debug_alignment_decision_image.cc
scribo_debug_all_SOURCES = scribo_debug_all.cc
scribo_debug_bboxes_enlarged_image_SOURCES = scribo_debug_bboxes_enlarged_image.cc
+scribo_debug_bboxes_image_SOURCES = scribo_debug_bboxes_image.cc
scribo_debug_char_space_image_SOURCES = scribo_debug_char_space_image.cc
scribo_debug_decision_image_SOURCES = scribo_debug_decision_image.cc
scribo_debug_highlight_text_area_SOURCES = scribo_debug_highlight_text_area.cc
scribo_debug_line_info_image_SOURCES = scribo_debug_line_info_image.cc
+scribo_debug_linked_bboxes_image_SOURCES = scribo_debug_linked_bboxes_image.cc
scribo_debug_links_decision_image_SOURCES = scribo_debug_links_decision_image.cc
+scribo_debug_links_image_SOURCES = scribo_debug_links_image.cc
+scribo_debug_logger_SOURCES = scribo_debug_logger.cc
scribo_debug_looks_like_a_text_line_image_SOURCES = scribo_debug_looks_like_a_text_line_image.cc
scribo_debug_mean_and_base_lines_image_SOURCES = scribo_debug_mean_and_base_lines_image.cc
-scribo_debug_save_bboxes_image_SOURCES = scribo_debug_save_bboxes_image.cc
scribo_debug_save_comp_diff_SOURCES = scribo_debug_save_comp_diff.cc
scribo_debug_save_label_image_SOURCES = scribo_debug_save_label_image.cc
-scribo_debug_save_linked_bboxes_image_SOURCES = scribo_debug_save_linked_bboxes_image.cc
scribo_debug_save_table_image_SOURCES = scribo_debug_save_table_image.cc
-scribo_debug_several_links_decision_image_SOURCES = scribo_debug_several_links_decision_image.cc
scribo_debug_text_areas_image_SOURCES = scribo_debug_text_areas_image.cc
scribo_debug_usage_SOURCES = scribo_debug_usage.cc
scribo_draw_all_SOURCES = scribo_draw_all.cc
scribo_draw_bounding_box_links_SOURCES = scribo_draw_bounding_box_links.cc
scribo_draw_bounding_boxes_SOURCES = scribo_draw_bounding_boxes.cc
scribo_draw_groups_bboxes_SOURCES = scribo_draw_groups_bboxes.cc
+scribo_estim_font_color_SOURCES = scribo_estim_font_color.cc
scribo_estim_object_groups_v_thickness_SOURCES = scribo_estim_object_groups_v_thickness.cc
scribo_filter_all_SOURCES = scribo_filter_all.cc
scribo_filter_common_objects_photo_SOURCES = scribo_filter_common_objects_photo.cc
@@ -359,6 +394,7 @@ scribo_filter_object_groups_size_ratio_SOURCES = scribo_filter_object_groups_siz
scribo_filter_object_groups_small_SOURCES = scribo_filter_object_groups_small.cc
scribo_filter_object_groups_v_thickness_SOURCES = scribo_filter_object_groups_v_thickness.cc
scribo_filter_object_groups_with_holes_SOURCES = scribo_filter_object_groups_with_holes.cc
+scribo_filter_object_links_aligned_SOURCES = scribo_filter_object_links_aligned.cc
scribo_filter_object_links_bbox_h_ratio_SOURCES = scribo_filter_object_links_bbox_h_ratio.cc
scribo_filter_object_links_bbox_overlap_SOURCES = scribo_filter_object_links_bbox_overlap.cc
scribo_filter_object_links_bbox_ratio_SOURCES = scribo_filter_object_links_bbox_ratio.cc
@@ -382,16 +418,25 @@ scribo_filter_objects_with_holes_SOURCES = scribo_filter_objects_with_holes.cc
scribo_fun_v2b_label_to_bool_SOURCES = scribo_fun_v2b_label_to_bool.cc
scribo_fun_v2b_objects_large_filter_SOURCES = scribo_fun_v2b_objects_large_filter.cc
scribo_fun_v2b_objects_small_filter_SOURCES = scribo_fun_v2b_objects_small_filter.cc
+scribo_fun_v2v_highlight_SOURCES = scribo_fun_v2v_highlight.cc
+scribo_io_img_internal_debug_img_visitor_SOURCES = scribo_io_img_internal_debug_img_visitor.cc
+scribo_io_img_internal_draw_edges_SOURCES = scribo_io_img_internal_draw_edges.cc
+scribo_io_img_internal_full_img_visitor_SOURCES = scribo_io_img_internal_full_img_visitor.cc
+scribo_io_img_internal_non_text_img_visitor_SOURCES = scribo_io_img_internal_non_text_img_visitor.cc
+scribo_io_img_internal_text_img_visitor_SOURCES = scribo_io_img_internal_text_img_visitor.cc
+scribo_io_img_save_SOURCES = scribo_io_img_save.cc
scribo_io_text_boxes_save_SOURCES = scribo_io_text_boxes_save.cc
scribo_io_xml_internal_extended_page_xml_visitor_SOURCES = scribo_io_xml_internal_extended_page_xml_visitor.cc
-scribo_io_xml_internal_full_xml_visitor_SOURCES = scribo_io_xml_internal_full_xml_visitor.cc
scribo_io_xml_internal_page_xml_visitor_SOURCES = scribo_io_xml_internal_page_xml_visitor.cc
scribo_io_xml_internal_print_box_coords_SOURCES = scribo_io_xml_internal_print_box_coords.cc
+scribo_io_xml_internal_print_image_coords_SOURCES = scribo_io_xml_internal_print_image_coords.cc
scribo_io_xml_internal_print_page_preambule_SOURCES = scribo_io_xml_internal_print_page_preambule.cc
scribo_io_xml_save_SOURCES = scribo_io_xml_save.cc
scribo_make_all_SOURCES = scribo_make_all.cc
scribo_make_debug_filename_SOURCES = scribo_make_debug_filename.cc
scribo_make_influence_zone_graph_SOURCES = scribo_make_influence_zone_graph.cc
+scribo_make_text_blocks_image_SOURCES = scribo_make_text_blocks_image.cc
+scribo_make_text_components_image_SOURCES = scribo_make_text_components_image.cc
scribo_postprocessing_all_SOURCES = scribo_postprocessing_all.cc
scribo_postprocessing_fill_object_holes_SOURCES = scribo_postprocessing_fill_object_holes.cc
scribo_preprocessing_all_SOURCES = scribo_preprocessing_all.cc
@@ -411,6 +456,7 @@ scribo_primitive_extract_canvas_SOURCES = scribo_primitive_extract_canvas.cc
scribo_primitive_extract_cells_SOURCES = scribo_primitive_extract_cells.cc
scribo_primitive_extract_components_SOURCES = scribo_primitive_extract_components.cc
scribo_primitive_extract_horizontal_separators_SOURCES = scribo_primitive_extract_horizontal_separators.cc
+scribo_primitive_extract_internal_union_SOURCES = scribo_primitive_extract_internal_union.cc
scribo_primitive_extract_lines_discontinued_SOURCES = scribo_primitive_extract_lines_discontinued.cc
scribo_primitive_extract_lines_h_discontinued_SOURCES = scribo_primitive_extract_lines_h_discontinued.cc
scribo_primitive_extract_lines_h_pattern_SOURCES = scribo_primitive_extract_lines_h_pattern.cc
@@ -425,6 +471,7 @@ scribo_primitive_extract_lines_v_single_SOURCES = scribo_primitive_extract_lines
scribo_primitive_extract_lines_v_thick_SOURCES = scribo_primitive_extract_lines_v_thick.cc
scribo_primitive_extract_lines_v_thick_and_single_SOURCES = scribo_primitive_extract_lines_v_thick_and_single.cc
scribo_primitive_extract_non_text_SOURCES = scribo_primitive_extract_non_text.cc
+scribo_primitive_extract_non_text_kmean_SOURCES = scribo_primitive_extract_non_text_kmean.cc
scribo_primitive_extract_separators_SOURCES = scribo_primitive_extract_separators.cc
scribo_primitive_extract_separators_nonvisible_SOURCES = scribo_primitive_extract_separators_nonvisible.cc
scribo_primitive_extract_vertical_separators_SOURCES = scribo_primitive_extract_vertical_separators.cc
@@ -453,6 +500,7 @@ scribo_primitive_link_internal_find_link_SOURCES = scribo_primitive_link_interna
scribo_primitive_link_internal_link_functor_base_SOURCES = scribo_primitive_link_internal_link_functor_base.cc
scribo_primitive_link_internal_link_several_dmax_base_SOURCES = scribo_primitive_link_internal_link_several_dmax_base.cc
scribo_primitive_link_internal_link_single_dmax_base_SOURCES = scribo_primitive_link_internal_link_single_dmax_base.cc
+scribo_primitive_link_internal_link_single_dmax_ratio_aligned_base_SOURCES = scribo_primitive_link_internal_link_single_dmax_ratio_aligned_base.cc
scribo_primitive_link_internal_link_single_dmax_ratio_base_SOURCES = scribo_primitive_link_internal_link_single_dmax_ratio_base.cc
scribo_primitive_link_merge_double_link_SOURCES = scribo_primitive_link_merge_double_link.cc
scribo_primitive_link_with_graph_SOURCES = scribo_primitive_link_with_graph.cc
--
1.5.6.5
1
0
14 Mar '11
* tests/unit_test/Makefile.am: Add new conditional test file.
* tests/unit_test/cond_tests_magickxx_qt_tesseract_tiff,
* tests/unit_test/cond_tests_qt,
* tests/unit_test/cond_tests_qt_tesseract_tiff: Update list of files.
* tests/unit_test/unit-tests.mk: Regen.
---
scribo/ChangeLog | 12 +++
scribo/tests/unit_test/Makefile.am | 7 +-
..._tiff => cond_tests_magickxx_qt_tesseract_tiff} | 1 +
scribo/tests/unit_test/cond_tests_qt | 2 +
.../tests/unit_test/cond_tests_qt_tesseract_tiff | 2 +-
scribo/tests/unit_test/unit-tests.mk | 96 +++++++++++++++-----
6 files changed, 92 insertions(+), 28 deletions(-)
copy scribo/tests/unit_test/{cond_tests_qt_tesseract_tiff => cond_tests_magickxx_qt_tesseract_tiff} (97%)
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index cc6e861..60d1277 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,5 +1,17 @@
2011-03-14 Guillaume Lazzara <z(a)lrde.epita.fr>
+ Set unit tests dependencies.
+
+ * tests/unit_test/Makefile.am: Add new conditional test file.
+
+ * tests/unit_test/cond_tests_magickxx_qt_tesseract_tiff,
+ * tests/unit_test/cond_tests_qt,
+ * tests/unit_test/cond_tests_qt_tesseract_tiff: Update list of files.
+
+ * tests/unit_test/unit-tests.mk: Regen.
+
+2011-03-14 Guillaume Lazzara <z(a)lrde.epita.fr>
+
New debug tools.
* src/Makefile.am,
diff --git a/scribo/tests/unit_test/Makefile.am b/scribo/tests/unit_test/Makefile.am
index c88f553..e4c9e1b 100644
--- a/scribo/tests/unit_test/Makefile.am
+++ b/scribo/tests/unit_test/Makefile.am
@@ -17,9 +17,10 @@
include $(top_srcdir)/scribo/tests/tests.mk
-COND_TESTS = cond_tests_magickxx_tesseract_tiff \
- cond_tests_qt \
- cond_tests_qt_tesseract_tiff \
+COND_TESTS = cond_tests_magickxx_tesseract_tiff \
+ cond_tests_magickxx_qt_tesseract_tiff \
+ cond_tests_qt \
+ cond_tests_qt_tesseract_tiff \
cond_tests_tesseract_tiff
EXTRA_DIST = disabled_tests \
diff --git a/scribo/tests/unit_test/cond_tests_qt_tesseract_tiff b/scribo/tests/unit_test/cond_tests_magickxx_qt_tesseract_tiff
similarity index 97%
copy from scribo/tests/unit_test/cond_tests_qt_tesseract_tiff
copy to scribo/tests/unit_test/cond_tests_magickxx_qt_tesseract_tiff
index 7dfe76d..f5840ef 100644
--- a/scribo/tests/unit_test/cond_tests_qt_tesseract_tiff
+++ b/scribo/tests/unit_test/cond_tests_magickxx_qt_tesseract_tiff
@@ -1 +1,2 @@
scribo/toolchain/nepomuk/text_extraction.hh
+
diff --git a/scribo/tests/unit_test/cond_tests_qt b/scribo/tests/unit_test/cond_tests_qt
index 577f9e6..f7bc42e 100644
--- a/scribo/tests/unit_test/cond_tests_qt
+++ b/scribo/tests/unit_test/cond_tests_qt
@@ -1,2 +1,4 @@
+scribo/convert/from_base64.hh
scribo/convert/from_qimage.hh
+scribo/io/xml/internal/full_xml_visitor.hh
scribo/io/xml/load.hh
diff --git a/scribo/tests/unit_test/cond_tests_qt_tesseract_tiff b/scribo/tests/unit_test/cond_tests_qt_tesseract_tiff
index 7dfe76d..8b13789 100644
--- a/scribo/tests/unit_test/cond_tests_qt_tesseract_tiff
+++ b/scribo/tests/unit_test/cond_tests_qt_tesseract_tiff
@@ -1 +1 @@
-scribo/toolchain/nepomuk/text_extraction.hh
+
diff --git a/scribo/tests/unit_test/unit-tests.mk b/scribo/tests/unit_test/unit-tests.mk
index 71b0b5b..b0f5982 100644
--- a/scribo/tests/unit_test/unit-tests.mk
+++ b/scribo/tests/unit_test/unit-tests.mk
@@ -4,12 +4,28 @@ check_PROGRAMS =
# Starting a conditional unit test list.
if HAVE_MAGICKXX
+if HAVE_QT
+if HAVE_TESSERACT
+if HAVE_TIFF
+check_PROGRAMS += \
+scribo_toolchain_nepomuk_text_extraction
+
+scribo_toolchain_nepomuk_text_extraction_CPPFLAGS= ${TIFF_CPPFLAGS} -DHAVE_TIFF ${TESSERACT_CPPFLAGS} -DHAVE_TESSERACT ${QT_CPPFLAGS} -DHAVE_QT ${MAGICKXX_CPPFLAGS} -DHAVE_MAGICKXX ${AM_CPPFLAGS}
+scribo_toolchain_nepomuk_text_extraction_LDFLAGS= ${TIFF_LDFLAGS} ${TESSERACT_LDFLAGS} ${QT_LDFLAGS} ${MAGICKXX_LDFLAGS} ${AM_LDFLAGS}
+scribo_toolchain_nepomuk_text_extraction_SOURCES = scribo_toolchain_nepomuk_text_extraction.cc
+endif HAVE_TIFF
+endif HAVE_TESSERACT
+endif HAVE_QT
+endif HAVE_MAGICKXX
+
+# Starting a conditional unit test list.
+if HAVE_MAGICKXX
if HAVE_TESSERACT
if HAVE_TIFF
check_PROGRAMS += \
scribo_toolchain_internal_content_in_doc_functor
-scribo_toolchain_internal_content_in_doc_functor_CPPFLAGS= ${TIFF_CPPFLAGS} ${TESSERACT_CPPFLAGS} ${MAGICKXX_CPPFLAGS} ${AM_CPPFLAGS}
+scribo_toolchain_internal_content_in_doc_functor_CPPFLAGS= ${TIFF_CPPFLAGS} -DHAVE_TIFF ${TESSERACT_CPPFLAGS} -DHAVE_TESSERACT ${MAGICKXX_CPPFLAGS} -DHAVE_MAGICKXX ${AM_CPPFLAGS}
scribo_toolchain_internal_content_in_doc_functor_LDFLAGS= ${TIFF_LDFLAGS} ${TESSERACT_LDFLAGS} ${MAGICKXX_LDFLAGS} ${AM_LDFLAGS}
scribo_toolchain_internal_content_in_doc_functor_SOURCES = scribo_toolchain_internal_content_in_doc_functor.cc
endif HAVE_TIFF
@@ -19,13 +35,21 @@ endif HAVE_MAGICKXX
# Starting a conditional unit test list.
if HAVE_QT
check_PROGRAMS += \
+scribo_convert_from_base64 \
scribo_convert_from_qimage \
+scribo_io_xml_internal_full_xml_visitor \
scribo_io_xml_load
-scribo_convert_from_qimage_CPPFLAGS= ${QT_CPPFLAGS} ${AM_CPPFLAGS}
+scribo_convert_from_base64_CPPFLAGS= ${QT_CPPFLAGS} -DHAVE_QT ${AM_CPPFLAGS}
+scribo_convert_from_base64_LDFLAGS= ${QT_LDFLAGS} ${AM_LDFLAGS}
+scribo_convert_from_base64_SOURCES = scribo_convert_from_base64.cc
+scribo_convert_from_qimage_CPPFLAGS= ${QT_CPPFLAGS} -DHAVE_QT ${AM_CPPFLAGS}
scribo_convert_from_qimage_LDFLAGS= ${QT_LDFLAGS} ${AM_LDFLAGS}
scribo_convert_from_qimage_SOURCES = scribo_convert_from_qimage.cc
-scribo_io_xml_load_CPPFLAGS= ${QT_CPPFLAGS} ${AM_CPPFLAGS}
+scribo_io_xml_internal_full_xml_visitor_CPPFLAGS= ${QT_CPPFLAGS} -DHAVE_QT ${AM_CPPFLAGS}
+scribo_io_xml_internal_full_xml_visitor_LDFLAGS= ${QT_LDFLAGS} ${AM_LDFLAGS}
+scribo_io_xml_internal_full_xml_visitor_SOURCES = scribo_io_xml_internal_full_xml_visitor.cc
+scribo_io_xml_load_CPPFLAGS= ${QT_CPPFLAGS} -DHAVE_QT ${AM_CPPFLAGS}
scribo_io_xml_load_LDFLAGS= ${QT_LDFLAGS} ${AM_LDFLAGS}
scribo_io_xml_load_SOURCES = scribo_io_xml_load.cc
endif HAVE_QT
@@ -34,12 +58,8 @@ endif HAVE_QT
if HAVE_QT
if HAVE_TESSERACT
if HAVE_TIFF
-check_PROGRAMS += \
-scribo_toolchain_nepomuk_text_extraction
+check_PROGRAMS +=
-scribo_toolchain_nepomuk_text_extraction_CPPFLAGS= ${TIFF_CPPFLAGS} ${TESSERACT_CPPFLAGS} ${QT_CPPFLAGS} ${AM_CPPFLAGS}
-scribo_toolchain_nepomuk_text_extraction_LDFLAGS= ${TIFF_LDFLAGS} ${TESSERACT_LDFLAGS} ${QT_LDFLAGS} ${AM_LDFLAGS}
-scribo_toolchain_nepomuk_text_extraction_SOURCES = scribo_toolchain_nepomuk_text_extraction.cc
endif HAVE_TIFF
endif HAVE_TESSERACT
endif HAVE_QT
@@ -53,16 +73,16 @@ scribo_toolchain_internal_text_in_doc_functor \
scribo_toolchain_content_in_doc \
scribo_toolchain_text_in_doc
-scribo_text_recognition_CPPFLAGS= ${TIFF_CPPFLAGS} ${TESSERACT_CPPFLAGS} ${AM_CPPFLAGS}
+scribo_text_recognition_CPPFLAGS= ${TIFF_CPPFLAGS} -DHAVE_TIFF ${TESSERACT_CPPFLAGS} -DHAVE_TESSERACT ${AM_CPPFLAGS}
scribo_text_recognition_LDFLAGS= ${TIFF_LDFLAGS} ${TESSERACT_LDFLAGS} ${AM_LDFLAGS}
scribo_text_recognition_SOURCES = scribo_text_recognition.cc
-scribo_toolchain_internal_text_in_doc_functor_CPPFLAGS= ${TIFF_CPPFLAGS} ${TESSERACT_CPPFLAGS} ${AM_CPPFLAGS}
+scribo_toolchain_internal_text_in_doc_functor_CPPFLAGS= ${TIFF_CPPFLAGS} -DHAVE_TIFF ${TESSERACT_CPPFLAGS} -DHAVE_TESSERACT ${AM_CPPFLAGS}
scribo_toolchain_internal_text_in_doc_functor_LDFLAGS= ${TIFF_LDFLAGS} ${TESSERACT_LDFLAGS} ${AM_LDFLAGS}
scribo_toolchain_internal_text_in_doc_functor_SOURCES = scribo_toolchain_internal_text_in_doc_functor.cc
-scribo_toolchain_content_in_doc_CPPFLAGS= ${TIFF_CPPFLAGS} ${TESSERACT_CPPFLAGS} ${AM_CPPFLAGS}
+scribo_toolchain_content_in_doc_CPPFLAGS= ${TIFF_CPPFLAGS} -DHAVE_TIFF ${TESSERACT_CPPFLAGS} -DHAVE_TESSERACT ${AM_CPPFLAGS}
scribo_toolchain_content_in_doc_LDFLAGS= ${TIFF_LDFLAGS} ${TESSERACT_LDFLAGS} ${AM_LDFLAGS}
scribo_toolchain_content_in_doc_SOURCES = scribo_toolchain_content_in_doc.cc
-scribo_toolchain_text_in_doc_CPPFLAGS= ${TIFF_CPPFLAGS} ${TESSERACT_CPPFLAGS} ${AM_CPPFLAGS}
+scribo_toolchain_text_in_doc_CPPFLAGS= ${TIFF_CPPFLAGS} -DHAVE_TIFF ${TESSERACT_CPPFLAGS} -DHAVE_TESSERACT ${AM_CPPFLAGS}
scribo_toolchain_text_in_doc_LDFLAGS= ${TIFF_LDFLAGS} ${TESSERACT_LDFLAGS} ${AM_LDFLAGS}
scribo_toolchain_text_in_doc_SOURCES = scribo_toolchain_text_in_doc.cc
endif HAVE_TIFF
@@ -83,7 +103,6 @@ scribo_binarization_sauvola_ms \
scribo_binarization_sauvola_ms_split \
scribo_binarization_sauvola_threshold_image \
scribo_canvas_integral_browsing \
-scribo_convert_from_base64 \
scribo_convert_to_base64 \
scribo_core_all \
scribo_core_central_sites \
@@ -97,7 +116,7 @@ scribo_core_def_lbl_type \
scribo_core_document \
scribo_core_erase_objects \
scribo_core_init_integral_image \
-scribo_core_internal_doc_xml_serializer \
+scribo_core_internal_doc_serializer \
scribo_core_line_info \
scribo_core_line_links \
scribo_core_line_set \
@@ -112,25 +131,27 @@ scribo_core_tag_line \
scribo_debug_alignment_decision_image \
scribo_debug_all \
scribo_debug_bboxes_enlarged_image \
+scribo_debug_bboxes_image \
scribo_debug_char_space_image \
scribo_debug_decision_image \
scribo_debug_highlight_text_area \
scribo_debug_line_info_image \
+scribo_debug_linked_bboxes_image \
scribo_debug_links_decision_image \
+scribo_debug_links_image \
+scribo_debug_logger \
scribo_debug_looks_like_a_text_line_image \
scribo_debug_mean_and_base_lines_image \
-scribo_debug_save_bboxes_image \
scribo_debug_save_comp_diff \
scribo_debug_save_label_image \
-scribo_debug_save_linked_bboxes_image \
scribo_debug_save_table_image \
-scribo_debug_several_links_decision_image \
scribo_debug_text_areas_image \
scribo_debug_usage \
scribo_draw_all \
scribo_draw_bounding_box_links \
scribo_draw_bounding_boxes \
scribo_draw_groups_bboxes \
+scribo_estim_font_color \
scribo_estim_object_groups_v_thickness \
scribo_filter_all \
scribo_filter_common_objects_photo \
@@ -142,6 +163,7 @@ scribo_filter_object_groups_size_ratio \
scribo_filter_object_groups_small \
scribo_filter_object_groups_v_thickness \
scribo_filter_object_groups_with_holes \
+scribo_filter_object_links_aligned \
scribo_filter_object_links_bbox_h_ratio \
scribo_filter_object_links_bbox_overlap \
scribo_filter_object_links_bbox_ratio \
@@ -165,16 +187,25 @@ scribo_filter_objects_with_holes \
scribo_fun_v2b_label_to_bool \
scribo_fun_v2b_objects_large_filter \
scribo_fun_v2b_objects_small_filter \
+scribo_fun_v2v_highlight \
+scribo_io_img_internal_debug_img_visitor \
+scribo_io_img_internal_draw_edges \
+scribo_io_img_internal_full_img_visitor \
+scribo_io_img_internal_non_text_img_visitor \
+scribo_io_img_internal_text_img_visitor \
+scribo_io_img_save \
scribo_io_text_boxes_save \
scribo_io_xml_internal_extended_page_xml_visitor \
-scribo_io_xml_internal_full_xml_visitor \
scribo_io_xml_internal_page_xml_visitor \
scribo_io_xml_internal_print_box_coords \
+scribo_io_xml_internal_print_image_coords \
scribo_io_xml_internal_print_page_preambule \
scribo_io_xml_save \
scribo_make_all \
scribo_make_debug_filename \
scribo_make_influence_zone_graph \
+scribo_make_text_blocks_image \
+scribo_make_text_components_image \
scribo_postprocessing_all \
scribo_postprocessing_fill_object_holes \
scribo_preprocessing_all \
@@ -194,6 +225,7 @@ scribo_primitive_extract_canvas \
scribo_primitive_extract_cells \
scribo_primitive_extract_components \
scribo_primitive_extract_horizontal_separators \
+scribo_primitive_extract_internal_union \
scribo_primitive_extract_lines_discontinued \
scribo_primitive_extract_lines_h_discontinued \
scribo_primitive_extract_lines_h_pattern \
@@ -208,6 +240,7 @@ scribo_primitive_extract_lines_v_single \
scribo_primitive_extract_lines_v_thick \
scribo_primitive_extract_lines_v_thick_and_single \
scribo_primitive_extract_non_text \
+scribo_primitive_extract_non_text_kmean \
scribo_primitive_extract_separators \
scribo_primitive_extract_separators_nonvisible \
scribo_primitive_extract_vertical_separators \
@@ -236,6 +269,7 @@ scribo_primitive_link_internal_find_link \
scribo_primitive_link_internal_link_functor_base \
scribo_primitive_link_internal_link_several_dmax_base \
scribo_primitive_link_internal_link_single_dmax_base \
+scribo_primitive_link_internal_link_single_dmax_ratio_aligned_base \
scribo_primitive_link_internal_link_single_dmax_ratio_base \
scribo_primitive_link_merge_double_link \
scribo_primitive_link_with_graph \
@@ -300,7 +334,6 @@ scribo_binarization_sauvola_ms_SOURCES = scribo_binarization_sauvola_ms.cc
scribo_binarization_sauvola_ms_split_SOURCES = scribo_binarization_sauvola_ms_split.cc
scribo_binarization_sauvola_threshold_image_SOURCES = scribo_binarization_sauvola_threshold_image.cc
scribo_canvas_integral_browsing_SOURCES = scribo_canvas_integral_browsing.cc
-scribo_convert_from_base64_SOURCES = scribo_convert_from_base64.cc
scribo_convert_to_base64_SOURCES = scribo_convert_to_base64.cc
scribo_core_all_SOURCES = scribo_core_all.cc
scribo_core_central_sites_SOURCES = scribo_core_central_sites.cc
@@ -314,7 +347,7 @@ scribo_core_def_lbl_type_SOURCES = scribo_core_def_lbl_type.cc
scribo_core_document_SOURCES = scribo_core_document.cc
scribo_core_erase_objects_SOURCES = scribo_core_erase_objects.cc
scribo_core_init_integral_image_SOURCES = scribo_core_init_integral_image.cc
-scribo_core_internal_doc_xml_serializer_SOURCES = scribo_core_internal_doc_xml_serializer.cc
+scribo_core_internal_doc_serializer_SOURCES = scribo_core_internal_doc_serializer.cc
scribo_core_line_info_SOURCES = scribo_core_line_info.cc
scribo_core_line_links_SOURCES = scribo_core_line_links.cc
scribo_core_line_set_SOURCES = scribo_core_line_set.cc
@@ -329,25 +362,27 @@ scribo_core_tag_line_SOURCES = scribo_core_tag_line.cc
scribo_debug_alignment_decision_image_SOURCES = scribo_debug_alignment_decision_image.cc
scribo_debug_all_SOURCES = scribo_debug_all.cc
scribo_debug_bboxes_enlarged_image_SOURCES = scribo_debug_bboxes_enlarged_image.cc
+scribo_debug_bboxes_image_SOURCES = scribo_debug_bboxes_image.cc
scribo_debug_char_space_image_SOURCES = scribo_debug_char_space_image.cc
scribo_debug_decision_image_SOURCES = scribo_debug_decision_image.cc
scribo_debug_highlight_text_area_SOURCES = scribo_debug_highlight_text_area.cc
scribo_debug_line_info_image_SOURCES = scribo_debug_line_info_image.cc
+scribo_debug_linked_bboxes_image_SOURCES = scribo_debug_linked_bboxes_image.cc
scribo_debug_links_decision_image_SOURCES = scribo_debug_links_decision_image.cc
+scribo_debug_links_image_SOURCES = scribo_debug_links_image.cc
+scribo_debug_logger_SOURCES = scribo_debug_logger.cc
scribo_debug_looks_like_a_text_line_image_SOURCES = scribo_debug_looks_like_a_text_line_image.cc
scribo_debug_mean_and_base_lines_image_SOURCES = scribo_debug_mean_and_base_lines_image.cc
-scribo_debug_save_bboxes_image_SOURCES = scribo_debug_save_bboxes_image.cc
scribo_debug_save_comp_diff_SOURCES = scribo_debug_save_comp_diff.cc
scribo_debug_save_label_image_SOURCES = scribo_debug_save_label_image.cc
-scribo_debug_save_linked_bboxes_image_SOURCES = scribo_debug_save_linked_bboxes_image.cc
scribo_debug_save_table_image_SOURCES = scribo_debug_save_table_image.cc
-scribo_debug_several_links_decision_image_SOURCES = scribo_debug_several_links_decision_image.cc
scribo_debug_text_areas_image_SOURCES = scribo_debug_text_areas_image.cc
scribo_debug_usage_SOURCES = scribo_debug_usage.cc
scribo_draw_all_SOURCES = scribo_draw_all.cc
scribo_draw_bounding_box_links_SOURCES = scribo_draw_bounding_box_links.cc
scribo_draw_bounding_boxes_SOURCES = scribo_draw_bounding_boxes.cc
scribo_draw_groups_bboxes_SOURCES = scribo_draw_groups_bboxes.cc
+scribo_estim_font_color_SOURCES = scribo_estim_font_color.cc
scribo_estim_object_groups_v_thickness_SOURCES = scribo_estim_object_groups_v_thickness.cc
scribo_filter_all_SOURCES = scribo_filter_all.cc
scribo_filter_common_objects_photo_SOURCES = scribo_filter_common_objects_photo.cc
@@ -359,6 +394,7 @@ scribo_filter_object_groups_size_ratio_SOURCES = scribo_filter_object_groups_siz
scribo_filter_object_groups_small_SOURCES = scribo_filter_object_groups_small.cc
scribo_filter_object_groups_v_thickness_SOURCES = scribo_filter_object_groups_v_thickness.cc
scribo_filter_object_groups_with_holes_SOURCES = scribo_filter_object_groups_with_holes.cc
+scribo_filter_object_links_aligned_SOURCES = scribo_filter_object_links_aligned.cc
scribo_filter_object_links_bbox_h_ratio_SOURCES = scribo_filter_object_links_bbox_h_ratio.cc
scribo_filter_object_links_bbox_overlap_SOURCES = scribo_filter_object_links_bbox_overlap.cc
scribo_filter_object_links_bbox_ratio_SOURCES = scribo_filter_object_links_bbox_ratio.cc
@@ -382,16 +418,25 @@ scribo_filter_objects_with_holes_SOURCES = scribo_filter_objects_with_holes.cc
scribo_fun_v2b_label_to_bool_SOURCES = scribo_fun_v2b_label_to_bool.cc
scribo_fun_v2b_objects_large_filter_SOURCES = scribo_fun_v2b_objects_large_filter.cc
scribo_fun_v2b_objects_small_filter_SOURCES = scribo_fun_v2b_objects_small_filter.cc
+scribo_fun_v2v_highlight_SOURCES = scribo_fun_v2v_highlight.cc
+scribo_io_img_internal_debug_img_visitor_SOURCES = scribo_io_img_internal_debug_img_visitor.cc
+scribo_io_img_internal_draw_edges_SOURCES = scribo_io_img_internal_draw_edges.cc
+scribo_io_img_internal_full_img_visitor_SOURCES = scribo_io_img_internal_full_img_visitor.cc
+scribo_io_img_internal_non_text_img_visitor_SOURCES = scribo_io_img_internal_non_text_img_visitor.cc
+scribo_io_img_internal_text_img_visitor_SOURCES = scribo_io_img_internal_text_img_visitor.cc
+scribo_io_img_save_SOURCES = scribo_io_img_save.cc
scribo_io_text_boxes_save_SOURCES = scribo_io_text_boxes_save.cc
scribo_io_xml_internal_extended_page_xml_visitor_SOURCES = scribo_io_xml_internal_extended_page_xml_visitor.cc
-scribo_io_xml_internal_full_xml_visitor_SOURCES = scribo_io_xml_internal_full_xml_visitor.cc
scribo_io_xml_internal_page_xml_visitor_SOURCES = scribo_io_xml_internal_page_xml_visitor.cc
scribo_io_xml_internal_print_box_coords_SOURCES = scribo_io_xml_internal_print_box_coords.cc
+scribo_io_xml_internal_print_image_coords_SOURCES = scribo_io_xml_internal_print_image_coords.cc
scribo_io_xml_internal_print_page_preambule_SOURCES = scribo_io_xml_internal_print_page_preambule.cc
scribo_io_xml_save_SOURCES = scribo_io_xml_save.cc
scribo_make_all_SOURCES = scribo_make_all.cc
scribo_make_debug_filename_SOURCES = scribo_make_debug_filename.cc
scribo_make_influence_zone_graph_SOURCES = scribo_make_influence_zone_graph.cc
+scribo_make_text_blocks_image_SOURCES = scribo_make_text_blocks_image.cc
+scribo_make_text_components_image_SOURCES = scribo_make_text_components_image.cc
scribo_postprocessing_all_SOURCES = scribo_postprocessing_all.cc
scribo_postprocessing_fill_object_holes_SOURCES = scribo_postprocessing_fill_object_holes.cc
scribo_preprocessing_all_SOURCES = scribo_preprocessing_all.cc
@@ -411,6 +456,7 @@ scribo_primitive_extract_canvas_SOURCES = scribo_primitive_extract_canvas.cc
scribo_primitive_extract_cells_SOURCES = scribo_primitive_extract_cells.cc
scribo_primitive_extract_components_SOURCES = scribo_primitive_extract_components.cc
scribo_primitive_extract_horizontal_separators_SOURCES = scribo_primitive_extract_horizontal_separators.cc
+scribo_primitive_extract_internal_union_SOURCES = scribo_primitive_extract_internal_union.cc
scribo_primitive_extract_lines_discontinued_SOURCES = scribo_primitive_extract_lines_discontinued.cc
scribo_primitive_extract_lines_h_discontinued_SOURCES = scribo_primitive_extract_lines_h_discontinued.cc
scribo_primitive_extract_lines_h_pattern_SOURCES = scribo_primitive_extract_lines_h_pattern.cc
@@ -425,6 +471,7 @@ scribo_primitive_extract_lines_v_single_SOURCES = scribo_primitive_extract_lines
scribo_primitive_extract_lines_v_thick_SOURCES = scribo_primitive_extract_lines_v_thick.cc
scribo_primitive_extract_lines_v_thick_and_single_SOURCES = scribo_primitive_extract_lines_v_thick_and_single.cc
scribo_primitive_extract_non_text_SOURCES = scribo_primitive_extract_non_text.cc
+scribo_primitive_extract_non_text_kmean_SOURCES = scribo_primitive_extract_non_text_kmean.cc
scribo_primitive_extract_separators_SOURCES = scribo_primitive_extract_separators.cc
scribo_primitive_extract_separators_nonvisible_SOURCES = scribo_primitive_extract_separators_nonvisible.cc
scribo_primitive_extract_vertical_separators_SOURCES = scribo_primitive_extract_vertical_separators.cc
@@ -453,6 +500,7 @@ scribo_primitive_link_internal_find_link_SOURCES = scribo_primitive_link_interna
scribo_primitive_link_internal_link_functor_base_SOURCES = scribo_primitive_link_internal_link_functor_base.cc
scribo_primitive_link_internal_link_several_dmax_base_SOURCES = scribo_primitive_link_internal_link_several_dmax_base.cc
scribo_primitive_link_internal_link_single_dmax_base_SOURCES = scribo_primitive_link_internal_link_single_dmax_base.cc
+scribo_primitive_link_internal_link_single_dmax_ratio_aligned_base_SOURCES = scribo_primitive_link_internal_link_single_dmax_ratio_aligned_base.cc
scribo_primitive_link_internal_link_single_dmax_ratio_base_SOURCES = scribo_primitive_link_internal_link_single_dmax_ratio_base.cc
scribo_primitive_link_merge_double_link_SOURCES = scribo_primitive_link_merge_double_link.cc
scribo_primitive_link_with_graph_SOURCES = scribo_primitive_link_with_graph.cc
--
1.5.6.5
1
0
* src/Makefile.am,
* src/debug/Makefile.am: Update targets.
* src/debug/non_text_mask.cc,
* src/debug/show_components_bboxes.cc,
* src/debug/show_groups_bboxes.cc,
* src/debug/show_links_bottom_aligned_2angles.cc,
* src/debug/show_links_top_aligned_2angles.cc,
* src/debug/show_paragraph_blocks.cc,
* src/non_text_components.cc: New.
* src/debug/show_links_several_right.cc: Removed. Deprecated.
---
scribo/ChangeLog | 17 ++
scribo/src/Makefile.am | 32 ++++-
scribo/src/debug/Makefile.am | 66 ++++++-
scribo/src/debug/non_text_mask.cc | 26 +++
scribo/src/debug/show_components_bboxes.cc | 59 ++++++
scribo/src/debug/show_groups_bboxes.cc | 91 ++++++++++
...ned.cc => show_links_bottom_aligned_2angles.cc} | 58 +++---
scribo/src/debug/show_links_several_right.cc | 90 ----------
...ligned.cc => show_links_top_aligned_2angles.cc} | 61 +++----
scribo/src/debug/show_paragraph_blocks.cc | 185 ++++++++++++++++++++
scribo/src/non_text_components.cc | 128 ++++++++++++++
11 files changed, 652 insertions(+), 161 deletions(-)
create mode 100644 scribo/src/debug/non_text_mask.cc
create mode 100644 scribo/src/debug/show_components_bboxes.cc
create mode 100644 scribo/src/debug/show_groups_bboxes.cc
copy scribo/src/debug/{show_links_bottom_aligned.cc => show_links_bottom_aligned_2angles.cc} (60%)
delete mode 100644 scribo/src/debug/show_links_several_right.cc
copy scribo/src/debug/{show_links_top_aligned.cc => show_links_top_aligned_2angles.cc} (60%)
create mode 100644 scribo/src/debug/show_paragraph_blocks.cc
create mode 100644 scribo/src/non_text_components.cc
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index a3de9ea..23ecde3 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,5 +1,22 @@
2011-03-14 Guillaume Lazzara <z(a)lrde.epita.fr>
+ New debug tools.
+
+ * src/Makefile.am,
+ * src/debug/Makefile.am: Update targets.
+
+ * src/debug/non_text_mask.cc,
+ * src/debug/show_components_bboxes.cc,
+ * src/debug/show_groups_bboxes.cc,
+ * src/debug/show_links_bottom_aligned_2angles.cc,
+ * src/debug/show_links_top_aligned_2angles.cc,
+ * src/debug/show_paragraph_blocks.cc,
+ * src/non_text_components.cc: New.
+
+ * src/debug/show_links_several_right.cc: Removed. Deprecated.
+
+2011-03-14 Guillaume Lazzara <z(a)lrde.epita.fr>
+
New non-text components extraction routine.
* scribo/make/text_blocks_image.hh,
diff --git a/scribo/src/Makefile.am b/scribo/src/Makefile.am
index d6275fd..3a35528 100644
--- a/scribo/src/Makefile.am
+++ b/scribo/src/Makefile.am
@@ -96,18 +96,48 @@ if HAVE_TESSERACT
$(TIFF_LDFLAGS) \
$(MAGICKXX_LDFLAGS)
+if HAVE_QT
utilexec_PROGRAMS += content_in_doc
content_in_doc_SOURCES = content_in_doc.cc
content_in_doc_CPPFLAGS = $(AM_CPPFLAGS) \
$(TESSERACT_CPPFLAGS) \
$(TIFF_CPPFLAGS) \
- $(MAGICKXX_CPPFLAGS)
+ $(MAGICKXX_CPPFLAGS) \
+ $(QT_CPPFLAGS)
+ content_in_doc_CXXFLAGS = $(AM_CXXFLAGS) \
+ $(QT_CXXFLAGS)
content_in_doc_LDFLAGS = $(AM_LDFLAGS) \
$(TESSERACT_LDFLAGS) \
$(TIFF_LDFLAGS) \
$(MAGICKXX_LDFLAGS) \
+ $(QT_LDFLAGS) \
-lpthread
+ content_in_doc_LDADD = $(LDADD) \
+ $(QT_LIBS)
+
+
+ utilexec_PROGRAMS += non_text_components
+ non_text_components_SOURCES = non_text_components.cc
+ non_text_components_CPPFLAGS = $(AM_CPPFLAGS) \
+ $(TESSERACT_CPPFLAGS) \
+ $(TIFF_CPPFLAGS) \
+ $(MAGICKXX_CPPFLAGS) \
+ $(QT_CPPFLAGS)
+ non_text_components_CXXFLAGS = $(AM_CXXFLAGS) \
+ $(QT_CXXFLAGS)
+ non_text_components_LDFLAGS = $(AM_LDFLAGS) \
+ $(TESSERACT_LDFLAGS) \
+ $(TIFF_LDFLAGS) \
+ $(MAGICKXX_LDFLAGS) \
+ $(QT_LDFLAGS) \
+ -lpthread
+ non_text_components_LDADD = $(LDADD) \
+ $(QT_LIBS)
+
+
+endif HAVE_QT
+
endif HAVE_TESSERACT
diff --git a/scribo/src/debug/Makefile.am b/scribo/src/debug/Makefile.am
index cdb1f30..60d7afa 100644
--- a/scribo/src/debug/Makefile.am
+++ b/scribo/src/debug/Makefile.am
@@ -18,6 +18,8 @@
include $(top_srcdir)/scribo/scribo.mk
noinst_PROGRAMS = \
+ show_components_bboxes \
+ show_groups_bboxes \
show_info_x_height \
show_info_median_inter_characters \
show_separators \
@@ -25,9 +27,9 @@ noinst_PROGRAMS = \
show_links_bbox_h_ratio \
show_links_bbox_overlap \
show_links_bottom_aligned \
+ show_links_bottom_aligned_2angles \
show_links_center_aligned \
show_links_non_h_aligned \
- show_links_several_right \
show_links_several_right_overlap \
show_links_single_down \
show_links_single_down_left_aligned \
@@ -40,15 +42,16 @@ noinst_PROGRAMS = \
show_links_single_up_left_aligned \
show_links_single_up_right_aligned \
show_links_top_aligned \
+ show_links_top_aligned_2angles \
show_objects_large \
show_objects_large_small \
show_objects_small \
show_objects_thick \
- show_objects_thin \
- show_stoppers \
- show_text_lines
+ show_objects_thin
+show_components_bboxes_SOURCES = show_components_bboxes.cc
+show_groups_bboxes_SOURCES = show_groups_bboxes.cc
show_info_x_height_SOURCES = show_info_x_height.cc
show_info_median_inter_characters_SOURCES = show_info_median_inter_characters.cc
show_separators_SOURCES = show_separators.cc
@@ -56,9 +59,9 @@ show_links_left_right_links_validation_SOURCES = show_links_left_right_links_val
show_links_bbox_h_ratio_SOURCES = show_links_bbox_h_ratio.cc
show_links_bbox_overlap_SOURCES = show_links_bbox_overlap.cc
show_links_bottom_aligned_SOURCES = show_links_bottom_aligned.cc
+show_links_bottom_aligned_2angles_SOURCES = show_links_bottom_aligned_2angles.cc
show_links_center_aligned_SOURCES = show_links_center_aligned.cc
show_links_non_h_aligned_SOURCES = show_links_non_h_aligned.cc
-show_links_several_right_SOURCES = show_links_several_right.cc
show_links_several_right_overlap_SOURCES = show_links_several_right_overlap.cc
show_links_single_down_SOURCES = show_links_single_down.cc
show_links_single_down_left_aligned_SOURCES = show_links_single_down_left_aligned.cc
@@ -71,21 +74,66 @@ show_links_single_up_SOURCES = show_links_single_up.cc
show_links_single_up_left_aligned_SOURCES = show_links_single_up_left_aligned.cc
show_links_single_up_right_aligned_SOURCES = show_links_single_up_right_aligned.cc
show_links_top_aligned_SOURCES = show_links_top_aligned.cc
+show_links_top_aligned_2angles_SOURCES = show_links_top_aligned_2angles.cc
show_objects_large_SOURCES = show_objects_large.cc
show_objects_large_small_SOURCES = show_objects_large_small.cc
show_objects_small_SOURCES = show_objects_small.cc
show_objects_thick_SOURCES = show_objects_thick.cc
show_objects_thin_SOURCES = show_objects_thin.cc
-show_stoppers_SOURCES = show_stoppers.cc
-show_text_lines_SOURCES = show_text_lines.cc
if HAVE_MAGICKXX
+if HAVE_QT
+
+ noinst_PROGRAMS += show_paragraph_blocks
+ show_paragraph_blocks_SOURCES = show_paragraph_blocks.cc
+ show_paragraph_blocks_CPPFLAGS = $(AM_CPPFLAGS) \
+ $(QT_CPPFLAGS) \
+ $(MAGICKXX_CPPFLAGS)
+ show_paragraph_blocks_CXXFLAGS = $(AM_CXXFLAGS) \
+ $(QT_CXXFLAGS)
+ show_paragraph_blocks_LDFLAGS = $(AM_LDFLAGS) \
+ $(QT_LDFLAGS) \
+ $(MAGICKXX_LDFLAGS)
+ show_paragraph_blocks_LDADD = $(LDADD) \
+ $(QT_LIBS)
+
+ noinst_PROGRAMS += show_text_lines
+ show_text_lines_SOURCES = show_text_lines.cc
+ show_text_lines_CPPFLAGS = $(AM_CPPFLAGS) \
+ $(MAGICKXX_CPPFLAGS) \
+ $(QT_CPPFLAGS) -DHAVE_QT
+ show_text_lines_CXXFLAGS = $(AM_CXXFLAGS) \
+ $(QT_CXXFLAGS)
+ show_text_lines_LDFLAGS = $(AM_LDFLAGS) \
+ $(MAGICKXX_LDFLAGS) \
+ $(QT_LDFLAGS)
+ show_text_lines_LDADD = $(LDADD) \
+ $(QT_LIBS)
+
+
+ noinst_PROGRAMS += show_stoppers
+ show_stoppers_SOURCES = show_stoppers.cc
+ show_stoppers_CPPFLAGS = $(AM_CPPFLAGS) \
+ $(MAGICKXX_CPPFLAGS) \
+ $(QT_CPPFLAGS)
+ show_stoppers_CXXFLAGS = $(AM_CXXFLAGS) \
+ $(QT_CXXFLAGS)
+ show_stoppers_LDFLAGS = $(AM_LDFLAGS) \
+ $(MAGICKXX_LDFLAGS) \
+ $(QT_LDFLAGS)
+ show_stoppers_LDADD = $(LDADD) \
+ $(QT_LIBS)
+
+
+endif HAVE_QT
+
+
noinst_PROGRAMS += highlight_text_area
highlight_text_area_SOURCES = highlight_text_area.cc
highlight_text_area_CPPFLAGS = $(AM_CPPFLAGS) \
- `Magick++-config --cppflags`
+ $(MAGICKXX_CPPFLAGS)
highlight_text_area_LDFLAGS = $(AM_LDFLAGS) \
- -lpthread `Magick++-config --libs`
+ $(MAGICKXX_LDFLAGS)
endif HAVE_MAGICKXX
diff --git a/scribo/src/debug/non_text_mask.cc b/scribo/src/debug/non_text_mask.cc
new file mode 100644
index 0000000..6fce945
--- /dev/null
+++ b/scribo/src/debug/non_text_mask.cc
@@ -0,0 +1,26 @@
+
+
+int main(int argc, char *argv[])
+{
+ // Link text lines
+ on_new_progress_label("Linking text lines");
+ line_links<L> llinks = scribo::text::link_lines(lines);
+
+ // Filter line links.
+ on_new_progress_label("Filter line links");
+ llinks = scribo::filter::line_links_x_height(llinks);
+
+ // Construct paragraphs
+ on_new_progress_label("Constructing paragraphs");
+ scribo::paragraph_set<L> parset = scribo::make::paragraph(llinks);
+ doc.set_paragraphs(parset);
+
+ on_progress();
+
+
+ // Extract other Elements
+ on_new_progress_label("Extracting Elements");
+ component_set<L>
+ elements = scribo::primitive::extract::non_text_fast(doc);
+
+}
diff --git a/scribo/src/debug/show_components_bboxes.cc b/scribo/src/debug/show_components_bboxes.cc
new file mode 100644
index 0000000..7eab4ba
--- /dev/null
+++ b/scribo/src/debug/show_components_bboxes.cc
@@ -0,0 +1,59 @@
+// Copyright (C) 2010, 2011 EPITA Research and Development Laboratory
+// (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+
+#include <mln/core/image/image2d.hh>
+#include <mln/core/alias/neighb2d.hh>
+#include <mln/io/pbm/all.hh>
+#include <mln/value/int_u.hh>
+#include <mln/literal/colors.hh>
+#include <mln/draw/box.hh>
+
+#include <scribo/core/component_set.hh>
+#include <scribo/primitive/extract/components.hh>
+
+int main(int argc, char *argv[])
+{
+ using namespace mln;
+ using namespace scribo;
+
+ if (argc != 3)
+ {
+ std::cerr << "Usage : " << argv[0] << " input.pbm out.pbm" << std::endl;
+ return 1;
+ }
+
+ typedef mln::value::int_u<30> V;
+ typedef image2d<V> L;
+
+ image2d<bool> input;
+ io::pbm::load(input, argv[1]);
+
+ V ncomponents;
+ component_set<L>
+ components = scribo::primitive::extract::components(input, c8(),
+ ncomponents);
+
+ image2d<bool> output;
+ initialize(output, input);
+ data::fill(output, false);
+
+ for_all_comps(c, components)
+ if (components(c).is_valid())
+ mln::draw::box(output, components(c).bbox(), true);
+
+ io::pbm::save(output, argv[2]);
+}
diff --git a/scribo/src/debug/show_groups_bboxes.cc b/scribo/src/debug/show_groups_bboxes.cc
new file mode 100644
index 0000000..eff0eb7
--- /dev/null
+++ b/scribo/src/debug/show_groups_bboxes.cc
@@ -0,0 +1,91 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+
+#include <mln/core/image/image2d.hh>
+#include <mln/core/alias/neighb2d.hh>
+#include <mln/io/pbm/all.hh>
+#include <mln/value/int_u.hh>
+#include <mln/literal/colors.hh>
+#include <mln/draw/box.hh>
+
+#include <scribo/core/component_set.hh>
+#include <scribo/core/line_set.hh>
+#include <scribo/primitive/extract/components.hh>
+#include <scribo/primitive/group/from_single_link.hh>
+#include <scribo/primitive/link/with_single_left_link_dmax_ratio.hh>
+#include <scribo/primitive/link/with_single_right_link_dmax_ratio.hh>
+#include <scribo/primitive/link/merge_double_link.hh>
+#include <scribo/primitive/link/internal/dmax_width_and_height.hh>
+
+int main(int argc, char *argv[])
+{
+ using namespace mln;
+ using namespace scribo;
+ using namespace scribo::primitive;
+
+ if (argc != 3)
+ {
+ std::cerr << "Usage : " << argv[0] << " input.* out.pbm" << std::endl;
+ return 1;
+ }
+
+ typedef mln::value::int_u<30> V;
+ typedef image2d<V> L;
+
+ image2d<bool> input;
+ io::pbm::load(input, argv[1]);
+
+ V ncomponents;
+ component_set<L>
+ components = scribo::primitive::extract::components(input, c8(),
+ ncomponents);
+
+
+ object_links<L>
+ left_link = link::with_single_left_link_dmax_ratio(
+ components,
+// primitive::link::internal::dmax_width_and_height(1),
+ link::internal::dmax_default(1),
+ anchor::MassCenter);
+
+ object_links<L>
+ right_link = primitive::link::with_single_right_link_dmax_ratio(
+ components,
+// primitive::link::internal::dmax_width_and_height(1),
+ primitive::link::internal::dmax_default(1),
+ anchor::MassCenter);
+
+ // Validating left and right links.
+ object_links<L>
+ merged_links = primitive::link::merge_double_link(left_link,
+ right_link);
+
+
+ object_groups<L>
+ groups = group::from_single_link(merged_links);
+
+ line_set<L> lines(groups);
+
+ image2d<bool> output;
+ initialize(output, input);
+ data::fill(output, false);
+
+ for_all_lines(l, lines)
+ if (lines(l).is_valid())
+ mln::draw::box(output, lines(l).bbox(), true);
+
+ io::pbm::save(output, argv[2]);
+}
diff --git a/scribo/src/debug/show_links_bottom_aligned.cc b/scribo/src/debug/show_links_bottom_aligned_2angles.cc
similarity index 60%
copy from scribo/src/debug/show_links_bottom_aligned.cc
copy to scribo/src/debug/show_links_bottom_aligned_2angles.cc
index 634551b..4b0e765 100644
--- a/scribo/src/debug/show_links_bottom_aligned.cc
+++ b/scribo/src/debug/show_links_bottom_aligned_2angles.cc
@@ -1,5 +1,4 @@
-// Copyright (C) 2009, 2011 EPITA Research and Development Laboratory
-// (LRDE)
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
//
// This file is part of Olena.
//
@@ -38,22 +37,22 @@
#include <mln/io/pbm/load.hh>
#include <mln/io/ppm/save.hh>
-#include <scribo/core/component_set.hh>
-
+#include <scribo/core/def/lbl_type.hh>
+#include <scribo/debug/logger.hh>
#include <scribo/primitive/extract/components.hh>
-#include <scribo/primitive/link/with_single_right_link.hh>
#include <scribo/filter/object_links_aligned.hh>
+#include <scribo/primitive/link/with_single_right_link_dmax_ratio_aligned.hh>
-#include <scribo/debug/alignment_decision_image.hh>
#include <scribo/debug/usage.hh>
const char *args_desc[][2] =
{
- { "input.pbm", "A binary image. True for objects and False for the background." },
- { "max_dist", "Maximum distance lookup (common value 45)" },
- { "max_alpha", "Max angle between two object bottoms. (common value : 5)" },
+ { "input.pbm", "A binary image" },
+ { "dmax_ratio", "Maximum distance lookup (common value 5)" },
+ { "min_alpha", "First angle used for close objects. (common value : 3)" },
+ { "max_alpha", "Second angle used for further objects. (common value : 5)" },
{0, 0}
};
@@ -61,42 +60,41 @@ const char *args_desc[][2] =
int main(int argc, char* argv[])
{
using namespace scribo;
- using namespace scribo::primitive::internal;
+ using namespace scribo::primitive;
using namespace mln;
- if (argc != 5)
+ if (argc != 6)
return scribo::debug::usage(argv,
- "Show valid or invalid links according the"
+ "Show valid or invalid links according the "
"horizontal alignment (based on bottom line).",
- "input.pbm max_dist max_alpha output.ppm",
+ "input.pbm dmax_ratio min_angle max_angle "
+ " output.ppm",
args_desc);
image2d<bool> input;
io::pbm::load(input, argv[1]);
// Finding components.
- value::label_16 nbboxes;
- typedef image2d<value::label_16> L;
+ typedef scribo::def::lbl_type V;
+ V nbboxes;
+ typedef image2d<V> L;
component_set<L> components
- = scribo::primitive::extract::components(input, c8(), nbboxes);
-
+ = extract::components(input, c8(), nbboxes);
// Finding right links.
- object_links<L> right_links
- = primitive::link::with_single_right_link(components, atoi(argv[2]));
+ // object_links<L>
+ // right_links = primitive::link::with_single_right_link_dmax_ratio_aligned(
+ // components, atof(argv[2]), atof(argv[3]), atof(argv[4]));
+
+ scribo::debug::logger().set_level(scribo::debug::All);
- // Filtering.
- object_links<L> filtered_links
- = filter::object_links_aligned(right_links, atof(argv[3]),
- anchor::StrictBottomCenter);
+ link::internal::single_right_dmax_ratio_aligned_functor<L,link::internal::dmax_default>
+ functor(components, link::internal::dmax_default(atof(argv[2])),
+ atof(argv[3]), atof(argv[4]), anchor::StrictBottomCenter);
- // Debug image.
- image2d<value::rgb8> decision_image
- = scribo::debug::alignment_decision_image(input,
- right_links,
- filtered_links,
- anchor::StrictBottomCenter);
+ object_links<L> output = link::compute(functor, anchor::Bottom);
- io::ppm::save(decision_image, argv[4]);
+ scribo::debug::logger().set_level(scribo::debug::None);
+ io::ppm::save(functor.debug_, argv[5]);
}
diff --git a/scribo/src/debug/show_links_several_right.cc b/scribo/src/debug/show_links_several_right.cc
deleted file mode 100644
index a70b2fb..0000000
--- a/scribo/src/debug/show_links_several_right.cc
+++ /dev/null
@@ -1,90 +0,0 @@
-// Copyright (C) 2009, 2010 EPITA Research and Development Laboratory
-// (LRDE)
-//
-// This file is part of Olena.
-//
-// Olena is free software: you can redistribute it and/or modify it under
-// the terms of the GNU General Public License as published by the Free
-// Software Foundation, version 2 of the License.
-//
-// Olena is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-// General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with Olena. If not, see <http://www.gnu.org/licenses/>.
-//
-// As a special exception, you may use this file as part of a free
-// software project without restriction. Specifically, if other files
-// instantiate templates or use macros or inline functions from this
-// file, or you compile this file and link it with other files to produce
-// an executable, this file does not by itself cause the resulting
-// executable to be covered by the GNU General Public License. This
-// exception does not however invalidate any other reasons why the
-// executable file might be covered by the GNU General Public License.
-
-#include <iostream>
-
-#include <mln/core/image/image2d.hh>
-#include <mln/core/alias/neighb2d.hh>
-
-#include <mln/value/rgb8.hh>
-#include <mln/value/label_16.hh>
-#include <mln/literal/colors.hh>
-
-#include <mln/io/pbm/load.hh>
-#include <mln/io/ppm/save.hh>
-
-#include <scribo/primitive/extract/components.hh>
-#include <scribo/primitive/link/with_several_right_links.hh>
-
-#include <scribo/draw/bounding_boxes.hh>
-
-#include <scribo/debug/several_links_decision_image.hh>
-#include <scribo/debug/usage.hh>
-
-
-
-const char *args_desc[][2] =
-{
- { "input.pbm", "A binary image. True for objects and False for the "
- "background." },
- { "max_nbh_dist", " Maximum distance for neighborhood search."
- "(common value : 30)" },
- {0, 0}
-};
-
-
-int main(int argc, char* argv[])
-{
- using namespace scribo;
- using namespace scribo::primitive::internal;
- using namespace mln;
-
- if (argc != 4)
- return scribo::debug::usage(argv,
- "Show sucessful/unsuccessful right links between components.",
- "input.pbm max_nbh_dist output.ppm",
- args_desc);
-
- image2d<bool> input;
- io::pbm::load(input, argv[1]);
-
- // Finding objects.
- value::label_16 nbboxes;
- typedef image2d<value::label_16> L;
- component_set<L> comps
- = scribo::primitive::extract::components(input, c8(), nbboxes);
-
- // Finding right links.
- object_links<L> right_link
- = primitive::link::with_several_right_links(comps, atoi(argv[2]));
-
- image2d<value::rgb8> decision_image
- = scribo::debug::several_links_decision_image(input,
- right_link,
- right_link);
-
- io::ppm::save(decision_image, argv[3]);
-}
diff --git a/scribo/src/debug/show_links_top_aligned.cc b/scribo/src/debug/show_links_top_aligned_2angles.cc
similarity index 60%
copy from scribo/src/debug/show_links_top_aligned.cc
copy to scribo/src/debug/show_links_top_aligned_2angles.cc
index 5ffcb70..48f3a13 100644
--- a/scribo/src/debug/show_links_top_aligned.cc
+++ b/scribo/src/debug/show_links_top_aligned_2angles.cc
@@ -1,5 +1,4 @@
-// Copyright (C) 2009, 2010, 2011 EPITA Research and Development
-// Laboratory (LRDE)
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
//
// This file is part of Olena.
//
@@ -38,22 +37,22 @@
#include <mln/io/pbm/load.hh>
#include <mln/io/ppm/save.hh>
+#include <scribo/core/def/lbl_type.hh>
+#include <scribo/debug/logger.hh>
#include <scribo/primitive/extract/components.hh>
-#include <scribo/primitive/link/with_single_right_link.hh>
#include <scribo/filter/object_links_aligned.hh>
+#include <scribo/primitive/link/with_single_right_link_dmax_ratio_aligned.hh>
-#include <scribo/debug/alignment_decision_image.hh>
-#include <scribo/debug/links_image.hh>
#include <scribo/debug/usage.hh>
const char *args_desc[][2] =
{
- { "input.pbm", "A binary image. True for objects and False for the "
- "background." },
- { "max_dist", "Maximum distance lookup (common value 45)" },
- { "max_alpha", "Max angle between two object tops. (common value : 5)" },
+ { "input.pbm", "A binary image" },
+ { "dmax_ratio", "Maximum distance lookup (common value 5)" },
+ { "min_alpha", "First angle used for close objects. (common value : 3)" },
+ { "max_alpha", "Second angle used for further objects. (common value : 5)" },
{0, 0}
};
@@ -61,41 +60,41 @@ const char *args_desc[][2] =
int main(int argc, char* argv[])
{
using namespace scribo;
- using namespace scribo::primitive::internal;
+ using namespace scribo::primitive;
using namespace mln;
- if (argc != 5)
+ if (argc != 6)
return scribo::debug::usage(argv,
"Show valid or invalid links according the "
"horizontal alignment (based on top line).",
- "input.pbm max_dist max_alpha output.ppm",
+ "input.pbm dmax_ratio min_angle max_angle "
+ " output.ppm",
args_desc);
image2d<bool> input;
io::pbm::load(input, argv[1]);
// Finding components.
- value::label_16 nbboxes;
- typedef image2d<value::label_16> L;
+ typedef scribo::def::lbl_type V;
+ V nbboxes;
+ typedef image2d<V> L;
component_set<L> components
- = scribo::primitive::extract::components(input, c8(), nbboxes);
-
+ = extract::components(input, c8(), nbboxes);
// Finding right links.
- object_links<L> right_links
- = primitive::link::with_single_right_link(components, atoi(argv[2]));
-
- // Filtering.
- object_links<L> filtered_links
- = filter::object_links_aligned(right_links, atof(argv[3]),
- anchor::StrictTopCenter);
-
- // Debug image.
- image2d<value::rgb8> decision_image
- = scribo::debug::alignment_decision_image(input,
- right_links,
- filtered_links,
- anchor::StrictTopCenter);
- io::ppm::save(decision_image, argv[4]);
+ // object_links<L>
+ // right_links = primitive::link::with_single_right_link_dmax_ratio_aligned(
+ // components, atof(argv[2]), atof(argv[3]), atof(argv[4]));
+
+ scribo::debug::logger().set_level(scribo::debug::All);
+
+ link::internal::single_right_dmax_ratio_aligned_functor<L,link::internal::dmax_default>
+ functor(components, link::internal::dmax_default(atof(argv[2])),
+ atof(argv[3]), atof(argv[4]), anchor::StrictTopCenter);
+
+ object_links<L> output = link::compute(functor, anchor::Top);
+
+ scribo::debug::logger().set_level(scribo::debug::None);
+ io::ppm::save(functor.debug_, argv[5]);
}
diff --git a/scribo/src/debug/show_paragraph_blocks.cc b/scribo/src/debug/show_paragraph_blocks.cc
new file mode 100644
index 0000000..b16a751
--- /dev/null
+++ b/scribo/src/debug/show_paragraph_blocks.cc
@@ -0,0 +1,185 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#include <iostream>
+
+#include <mln/core/image/image2d.hh>
+#include <mln/io/pbm/save.hh>
+#include <mln/io/pgm/save.hh>
+#include <mln/draw/box_plain.hh>
+
+#include <mln/debug/filename.hh>
+
+#include <mln/util/timer.hh>
+
+#include <scribo/core/def/lbl_type.hh>
+#include <scribo/debug/usage.hh>
+
+#include <scribo/core/component_set.hh>
+#include <scribo/core/paragraph_set.hh>
+#include <scribo/core/line_info.hh>
+
+#include <scribo/text/link_lines.hh>
+#include <scribo/filter/line_links_x_height.hh>
+
+#include <scribo/io/xml/load.hh>
+
+// int i = 0;
+
+const char *args_desc[][2] =
+{
+ {0, 0}
+};
+
+
+
+int main(int argc, char* argv[])
+{
+ using namespace scribo;
+ using namespace mln;
+
+ if (argc != 3)
+ return scribo::debug::usage(argv,
+ "Show paragraph blocks",
+ "lines.xml out_blocks.pbm",
+ args_desc);
+
+ trace::entering("main");
+
+ typedef image2d<scribo::def::lbl_type> L;
+ document<L> doc;
+ scribo::io::xml::load(doc, argv[1]);
+
+ if (! doc.has_text())
+ {
+ std::cout << "ERROR: this XML file does not contain any text information!"
+ << std::endl;
+ return 1;
+ }
+
+
+ // Link text lines
+ line_links<L> llinks = scribo::text::link_lines(doc.lines());
+ llinks = scribo::filter::line_links_x_height(llinks);
+
+ scribo::paragraph_set<L> parset = scribo::make::paragraph(llinks);
+ doc.set_paragraphs(parset);
+
+ util::timer t;
+ t.start();
+
+ image2d<bool> blocks;
+ initialize(blocks, doc.lines().components().labeled_image());
+ data::fill(blocks, false);
+
+ // image2d<value::int_u8> log;
+ // initialize(log, blocks);
+ // data::fill(log, 0);
+
+ for_all_paragraphs(p, parset)
+ if (parset(p).nlines() >= 3)
+ {
+ box2d last_tbox, last_box;
+
+ // For each line in this paragraph.
+ for_all_elements(l, parset(p).line_ids())
+ {
+ const line_info<L>& line = parset.lines()(parset(p).line_ids()(l));
+
+ if (last_box.is_valid())
+ if (last_box.pmax().row() < line.bbox().pmin().row())
+ {
+ last_tbox = last_box;
+
+ point2d
+ pmin(std::min(last_tbox.pmax().row(), line.bbox().pmin().row()),
+ std::max(last_tbox.pmin().col(), line.bbox().pmin().col())),
+ pmax(std::max(last_tbox.pmax().row(), line.bbox().pmin().row()),
+ std::min(last_tbox.pmax().col(), line.bbox().pmax().col()));
+
+ // invalid case:
+ //
+ // =======
+ // ======
+
+ if (pmax.col() > pmin.col())
+ {
+ box2d new_box(pmin, pmax);
+ mln::draw::box_plain(blocks, new_box, true);
+ // mln::draw::box_plain(log, new_box, 1);
+
+ // mln::io::pgm::save(log, mln::debug::filename("log.pgm", i++));
+ }
+ }
+ else // Handle the case when there are several text boxes on the same line.
+ {
+ if (last_tbox.is_valid() && last_tbox.pmax().row() < line.bbox().pmin().row())
+ {
+ // Top box
+ point2d
+ pmin(std::min(last_tbox.pmax().row(), line.bbox().pmin().row()),
+ std::max(last_tbox.pmin().col(), line.bbox().pmin().col())),
+ pmax(std::max(last_tbox.pmax().row(), line.bbox().pmin().row()),
+ std::min(last_tbox.pmax().col(), line.bbox().pmax().col()));
+
+ box2d new_box(pmin, pmax);
+
+ mln::draw::box_plain(blocks, new_box, true);
+ // mln::draw::box_plain(log, new_box, 2);
+ // mln::io::pgm::save(log, mln::debug::filename("log.pgm", i++));
+ }
+
+ if (last_box.pmax().col() < line.bbox().pmax().col()
+ && last_box.pmin().col() < line.bbox().pmin().col())
+ {
+ // Left box
+ point2d
+ pmin(std::min(last_tbox.pmax().row(), line.bbox().pmin().row()),
+ std::min(last_box.pmax().col(), line.bbox().pmin().col())),
+ pmax(std::min(last_box.pmax().row(), line.bbox().pmax().row()),
+ std::max(last_box.pmax().col(), line.bbox().pmin().col()));
+
+ box2d new_box(pmin, pmax);
+ mln::draw::box_plain(blocks, new_box, true);
+ // mln::draw::box_plain(log, new_box, 3);
+ // mln::io::pgm::save(log, mln::debug::filename("log.pgm", i++));
+ }
+ }
+
+ mln::draw::box_plain(blocks, line.bbox(), true);
+ // mln::draw::box_plain(log, line.bbox(), 255);
+ // mln::io::pgm::save(log, mln::debug::filename("log.pgm", i++));
+ last_box = line.bbox();
+ }
+ }
+
+ t.stop();
+ std::cout << t << std::endl;
+
+ mln::io::pbm::save(blocks, argv[2]);
+ // mln::io::pgm::save(log, "log.pgm");
+
+ trace::exiting("main");
+}
diff --git a/scribo/src/non_text_components.cc b/scribo/src/non_text_components.cc
new file mode 100644
index 0000000..0f4cce4
--- /dev/null
+++ b/scribo/src/non_text_components.cc
@@ -0,0 +1,128 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory
+// (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+
+#include <libgen.h>
+#include <fstream>
+#include <iostream>
+
+#include <mln/core/image/image2d.hh>
+
+#include <mln/io/pbm/save.hh>
+#include <mln/io/magick/load.hh>
+
+#include <scribo/toolchain/content_in_doc.hh>
+#include <scribo/toolchain/text_in_doc_preprocess.hh>
+
+#include <scribo/core/document.hh>
+
+#include <scribo/debug/usage.hh>
+
+#include <scribo/preprocessing/crop_without_localization.hh>
+#include <scribo/preprocessing/crop.hh>
+
+#include <scribo/io/xml/save.hh>
+#include <scribo/io/img/save.hh>
+
+
+const char *args_desc[][2] =
+{
+ { "input.*", "An image." },
+ { "non_text_comps.pbm", "Non text components mask." },
+ { "enable_debug", "Enable debug image output. Set to 1 or 0." },
+ { "enable_tabstops", "Enable tabstops detection. Set to 1 or 0." },
+ {0, 0}
+};
+
+
+
+int main(int argc, char* argv[])
+{
+ using namespace scribo;
+ using namespace mln;
+
+ if (argc != 4 && argc != 3 && argc != 5)
+ return scribo::debug::usage(argv,
+ "Extract non text components mask/",
+ "input.* non_text_comps.pbm [enable_debug] [enable_tabstops]",
+ args_desc);
+
+ std::string out_img = basename(argv[1]);
+ out_img.erase(out_img.size() - 4);
+
+ std::string filename_prefix = out_img + "_debug";
+ scribo::debug::logger().set_filename_prefix(filename_prefix.c_str());
+ if (argc > 3 && atoi(argv[3]))
+ scribo::debug::logger().set_level(scribo::debug::Special);
+ else
+ scribo::debug::logger().set_level(scribo::debug::None);
+
+ trace::entering("main");
+
+ Magick::InitializeMagick(*argv);
+
+ typedef image2d<scribo::def::lbl_type> L;
+ image2d<value::rgb8> input;
+ mln::io::magick::load(input, argv[1]);
+
+ util::timer t;
+ t.start();
+
+ // Preprocess document
+ image2d<bool>
+ input_preproc = toolchain::text_in_doc_preprocess(input, false, 0.34);
+
+
+ bool denoise = true;
+ std::string language = "";
+ bool find_line_seps = true;
+ bool find_whitespace_seps = (argc > 4 && atoi(argv[4]));
+
+ std::cout << "Running with the following options :"
+ << " ocr_language = " << language
+ << " | find_lines_seps = " << find_line_seps
+ << " | find_whitespace_seps = " << find_whitespace_seps
+ << " | debug = " << scribo::debug::logger().is_enabled()
+ << std::endl;
+
+ // Run document toolchain.
+
+ // Text
+ std::cout << "Analysing document..." << std::endl;
+ document<L>
+ doc = scribo::toolchain::content_in_doc(input, input_preproc, denoise,
+ find_line_seps, find_whitespace_seps,
+ !language.empty(), language);
+ t.stop();
+ std::cout << t << std::endl;
+
+ mln::io::pbm::save(data::convert(bool(), doc.elements().labeled_image()), argv[2]);
+
+ scribo::io::img::save(doc, out_img + "_debug_wo_image.png", scribo::io::img::DebugWoImage);
+ scribo::io::img::save(doc, out_img + "_debug_with_image.png", scribo::io::img::DebugWithImage);
+
+ trace::exiting("main");
+}
--
1.5.6.5
1
0
* src/Makefile.am,
* src/debug/Makefile.am: Update targets.
* src/debug/non_text_mask.cc,
* src/debug/show_components_bboxes.cc,
* src/debug/show_groups_bboxes.cc,
* src/debug/show_links_bottom_aligned_2angles.cc,
* src/debug/show_links_top_aligned_2angles.cc,
* src/debug/show_paragraph_blocks.cc,
* src/non_text_components.cc: New.
* src/debug/show_links_several_right.cc: Removed. Deprecated.
---
scribo/ChangeLog | 17 ++
scribo/src/Makefile.am | 32 ++++-
scribo/src/debug/Makefile.am | 66 ++++++-
scribo/src/debug/non_text_mask.cc | 26 +++
scribo/src/debug/show_components_bboxes.cc | 59 ++++++
scribo/src/debug/show_groups_bboxes.cc | 91 ++++++++++
...ned.cc => show_links_bottom_aligned_2angles.cc} | 58 +++---
scribo/src/debug/show_links_several_right.cc | 90 ----------
...ligned.cc => show_links_top_aligned_2angles.cc} | 61 +++----
scribo/src/debug/show_paragraph_blocks.cc | 185 ++++++++++++++++++++
scribo/src/non_text_components.cc | 128 ++++++++++++++
11 files changed, 652 insertions(+), 161 deletions(-)
create mode 100644 scribo/src/debug/non_text_mask.cc
create mode 100644 scribo/src/debug/show_components_bboxes.cc
create mode 100644 scribo/src/debug/show_groups_bboxes.cc
copy scribo/src/debug/{show_links_bottom_aligned.cc => show_links_bottom_aligned_2angles.cc} (60%)
delete mode 100644 scribo/src/debug/show_links_several_right.cc
copy scribo/src/debug/{show_links_top_aligned.cc => show_links_top_aligned_2angles.cc} (60%)
create mode 100644 scribo/src/debug/show_paragraph_blocks.cc
create mode 100644 scribo/src/non_text_components.cc
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index 32dda87..cc6e861 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,5 +1,22 @@
2011-03-14 Guillaume Lazzara <z(a)lrde.epita.fr>
+ New debug tools.
+
+ * src/Makefile.am,
+ * src/debug/Makefile.am: Update targets.
+
+ * src/debug/non_text_mask.cc,
+ * src/debug/show_components_bboxes.cc,
+ * src/debug/show_groups_bboxes.cc,
+ * src/debug/show_links_bottom_aligned_2angles.cc,
+ * src/debug/show_links_top_aligned_2angles.cc,
+ * src/debug/show_paragraph_blocks.cc,
+ * src/non_text_components.cc: New.
+
+ * src/debug/show_links_several_right.cc: Removed. Deprecated.
+
+2011-03-14 Guillaume Lazzara <z(a)lrde.epita.fr>
+
New non-text components extraction routine.
* scribo/make/text_blocks_image.hh,
diff --git a/scribo/src/Makefile.am b/scribo/src/Makefile.am
index d6275fd..3a35528 100644
--- a/scribo/src/Makefile.am
+++ b/scribo/src/Makefile.am
@@ -96,18 +96,48 @@ if HAVE_TESSERACT
$(TIFF_LDFLAGS) \
$(MAGICKXX_LDFLAGS)
+if HAVE_QT
utilexec_PROGRAMS += content_in_doc
content_in_doc_SOURCES = content_in_doc.cc
content_in_doc_CPPFLAGS = $(AM_CPPFLAGS) \
$(TESSERACT_CPPFLAGS) \
$(TIFF_CPPFLAGS) \
- $(MAGICKXX_CPPFLAGS)
+ $(MAGICKXX_CPPFLAGS) \
+ $(QT_CPPFLAGS)
+ content_in_doc_CXXFLAGS = $(AM_CXXFLAGS) \
+ $(QT_CXXFLAGS)
content_in_doc_LDFLAGS = $(AM_LDFLAGS) \
$(TESSERACT_LDFLAGS) \
$(TIFF_LDFLAGS) \
$(MAGICKXX_LDFLAGS) \
+ $(QT_LDFLAGS) \
-lpthread
+ content_in_doc_LDADD = $(LDADD) \
+ $(QT_LIBS)
+
+
+ utilexec_PROGRAMS += non_text_components
+ non_text_components_SOURCES = non_text_components.cc
+ non_text_components_CPPFLAGS = $(AM_CPPFLAGS) \
+ $(TESSERACT_CPPFLAGS) \
+ $(TIFF_CPPFLAGS) \
+ $(MAGICKXX_CPPFLAGS) \
+ $(QT_CPPFLAGS)
+ non_text_components_CXXFLAGS = $(AM_CXXFLAGS) \
+ $(QT_CXXFLAGS)
+ non_text_components_LDFLAGS = $(AM_LDFLAGS) \
+ $(TESSERACT_LDFLAGS) \
+ $(TIFF_LDFLAGS) \
+ $(MAGICKXX_LDFLAGS) \
+ $(QT_LDFLAGS) \
+ -lpthread
+ non_text_components_LDADD = $(LDADD) \
+ $(QT_LIBS)
+
+
+endif HAVE_QT
+
endif HAVE_TESSERACT
diff --git a/scribo/src/debug/Makefile.am b/scribo/src/debug/Makefile.am
index cdb1f30..60d7afa 100644
--- a/scribo/src/debug/Makefile.am
+++ b/scribo/src/debug/Makefile.am
@@ -18,6 +18,8 @@
include $(top_srcdir)/scribo/scribo.mk
noinst_PROGRAMS = \
+ show_components_bboxes \
+ show_groups_bboxes \
show_info_x_height \
show_info_median_inter_characters \
show_separators \
@@ -25,9 +27,9 @@ noinst_PROGRAMS = \
show_links_bbox_h_ratio \
show_links_bbox_overlap \
show_links_bottom_aligned \
+ show_links_bottom_aligned_2angles \
show_links_center_aligned \
show_links_non_h_aligned \
- show_links_several_right \
show_links_several_right_overlap \
show_links_single_down \
show_links_single_down_left_aligned \
@@ -40,15 +42,16 @@ noinst_PROGRAMS = \
show_links_single_up_left_aligned \
show_links_single_up_right_aligned \
show_links_top_aligned \
+ show_links_top_aligned_2angles \
show_objects_large \
show_objects_large_small \
show_objects_small \
show_objects_thick \
- show_objects_thin \
- show_stoppers \
- show_text_lines
+ show_objects_thin
+show_components_bboxes_SOURCES = show_components_bboxes.cc
+show_groups_bboxes_SOURCES = show_groups_bboxes.cc
show_info_x_height_SOURCES = show_info_x_height.cc
show_info_median_inter_characters_SOURCES = show_info_median_inter_characters.cc
show_separators_SOURCES = show_separators.cc
@@ -56,9 +59,9 @@ show_links_left_right_links_validation_SOURCES = show_links_left_right_links_val
show_links_bbox_h_ratio_SOURCES = show_links_bbox_h_ratio.cc
show_links_bbox_overlap_SOURCES = show_links_bbox_overlap.cc
show_links_bottom_aligned_SOURCES = show_links_bottom_aligned.cc
+show_links_bottom_aligned_2angles_SOURCES = show_links_bottom_aligned_2angles.cc
show_links_center_aligned_SOURCES = show_links_center_aligned.cc
show_links_non_h_aligned_SOURCES = show_links_non_h_aligned.cc
-show_links_several_right_SOURCES = show_links_several_right.cc
show_links_several_right_overlap_SOURCES = show_links_several_right_overlap.cc
show_links_single_down_SOURCES = show_links_single_down.cc
show_links_single_down_left_aligned_SOURCES = show_links_single_down_left_aligned.cc
@@ -71,21 +74,66 @@ show_links_single_up_SOURCES = show_links_single_up.cc
show_links_single_up_left_aligned_SOURCES = show_links_single_up_left_aligned.cc
show_links_single_up_right_aligned_SOURCES = show_links_single_up_right_aligned.cc
show_links_top_aligned_SOURCES = show_links_top_aligned.cc
+show_links_top_aligned_2angles_SOURCES = show_links_top_aligned_2angles.cc
show_objects_large_SOURCES = show_objects_large.cc
show_objects_large_small_SOURCES = show_objects_large_small.cc
show_objects_small_SOURCES = show_objects_small.cc
show_objects_thick_SOURCES = show_objects_thick.cc
show_objects_thin_SOURCES = show_objects_thin.cc
-show_stoppers_SOURCES = show_stoppers.cc
-show_text_lines_SOURCES = show_text_lines.cc
if HAVE_MAGICKXX
+if HAVE_QT
+
+ noinst_PROGRAMS += show_paragraph_blocks
+ show_paragraph_blocks_SOURCES = show_paragraph_blocks.cc
+ show_paragraph_blocks_CPPFLAGS = $(AM_CPPFLAGS) \
+ $(QT_CPPFLAGS) \
+ $(MAGICKXX_CPPFLAGS)
+ show_paragraph_blocks_CXXFLAGS = $(AM_CXXFLAGS) \
+ $(QT_CXXFLAGS)
+ show_paragraph_blocks_LDFLAGS = $(AM_LDFLAGS) \
+ $(QT_LDFLAGS) \
+ $(MAGICKXX_LDFLAGS)
+ show_paragraph_blocks_LDADD = $(LDADD) \
+ $(QT_LIBS)
+
+ noinst_PROGRAMS += show_text_lines
+ show_text_lines_SOURCES = show_text_lines.cc
+ show_text_lines_CPPFLAGS = $(AM_CPPFLAGS) \
+ $(MAGICKXX_CPPFLAGS) \
+ $(QT_CPPFLAGS) -DHAVE_QT
+ show_text_lines_CXXFLAGS = $(AM_CXXFLAGS) \
+ $(QT_CXXFLAGS)
+ show_text_lines_LDFLAGS = $(AM_LDFLAGS) \
+ $(MAGICKXX_LDFLAGS) \
+ $(QT_LDFLAGS)
+ show_text_lines_LDADD = $(LDADD) \
+ $(QT_LIBS)
+
+
+ noinst_PROGRAMS += show_stoppers
+ show_stoppers_SOURCES = show_stoppers.cc
+ show_stoppers_CPPFLAGS = $(AM_CPPFLAGS) \
+ $(MAGICKXX_CPPFLAGS) \
+ $(QT_CPPFLAGS)
+ show_stoppers_CXXFLAGS = $(AM_CXXFLAGS) \
+ $(QT_CXXFLAGS)
+ show_stoppers_LDFLAGS = $(AM_LDFLAGS) \
+ $(MAGICKXX_LDFLAGS) \
+ $(QT_LDFLAGS)
+ show_stoppers_LDADD = $(LDADD) \
+ $(QT_LIBS)
+
+
+endif HAVE_QT
+
+
noinst_PROGRAMS += highlight_text_area
highlight_text_area_SOURCES = highlight_text_area.cc
highlight_text_area_CPPFLAGS = $(AM_CPPFLAGS) \
- `Magick++-config --cppflags`
+ $(MAGICKXX_CPPFLAGS)
highlight_text_area_LDFLAGS = $(AM_LDFLAGS) \
- -lpthread `Magick++-config --libs`
+ $(MAGICKXX_LDFLAGS)
endif HAVE_MAGICKXX
diff --git a/scribo/src/debug/non_text_mask.cc b/scribo/src/debug/non_text_mask.cc
new file mode 100644
index 0000000..6fce945
--- /dev/null
+++ b/scribo/src/debug/non_text_mask.cc
@@ -0,0 +1,26 @@
+
+
+int main(int argc, char *argv[])
+{
+ // Link text lines
+ on_new_progress_label("Linking text lines");
+ line_links<L> llinks = scribo::text::link_lines(lines);
+
+ // Filter line links.
+ on_new_progress_label("Filter line links");
+ llinks = scribo::filter::line_links_x_height(llinks);
+
+ // Construct paragraphs
+ on_new_progress_label("Constructing paragraphs");
+ scribo::paragraph_set<L> parset = scribo::make::paragraph(llinks);
+ doc.set_paragraphs(parset);
+
+ on_progress();
+
+
+ // Extract other Elements
+ on_new_progress_label("Extracting Elements");
+ component_set<L>
+ elements = scribo::primitive::extract::non_text_fast(doc);
+
+}
diff --git a/scribo/src/debug/show_components_bboxes.cc b/scribo/src/debug/show_components_bboxes.cc
new file mode 100644
index 0000000..7eab4ba
--- /dev/null
+++ b/scribo/src/debug/show_components_bboxes.cc
@@ -0,0 +1,59 @@
+// Copyright (C) 2010, 2011 EPITA Research and Development Laboratory
+// (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+
+#include <mln/core/image/image2d.hh>
+#include <mln/core/alias/neighb2d.hh>
+#include <mln/io/pbm/all.hh>
+#include <mln/value/int_u.hh>
+#include <mln/literal/colors.hh>
+#include <mln/draw/box.hh>
+
+#include <scribo/core/component_set.hh>
+#include <scribo/primitive/extract/components.hh>
+
+int main(int argc, char *argv[])
+{
+ using namespace mln;
+ using namespace scribo;
+
+ if (argc != 3)
+ {
+ std::cerr << "Usage : " << argv[0] << " input.pbm out.pbm" << std::endl;
+ return 1;
+ }
+
+ typedef mln::value::int_u<30> V;
+ typedef image2d<V> L;
+
+ image2d<bool> input;
+ io::pbm::load(input, argv[1]);
+
+ V ncomponents;
+ component_set<L>
+ components = scribo::primitive::extract::components(input, c8(),
+ ncomponents);
+
+ image2d<bool> output;
+ initialize(output, input);
+ data::fill(output, false);
+
+ for_all_comps(c, components)
+ if (components(c).is_valid())
+ mln::draw::box(output, components(c).bbox(), true);
+
+ io::pbm::save(output, argv[2]);
+}
diff --git a/scribo/src/debug/show_groups_bboxes.cc b/scribo/src/debug/show_groups_bboxes.cc
new file mode 100644
index 0000000..eff0eb7
--- /dev/null
+++ b/scribo/src/debug/show_groups_bboxes.cc
@@ -0,0 +1,91 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+
+#include <mln/core/image/image2d.hh>
+#include <mln/core/alias/neighb2d.hh>
+#include <mln/io/pbm/all.hh>
+#include <mln/value/int_u.hh>
+#include <mln/literal/colors.hh>
+#include <mln/draw/box.hh>
+
+#include <scribo/core/component_set.hh>
+#include <scribo/core/line_set.hh>
+#include <scribo/primitive/extract/components.hh>
+#include <scribo/primitive/group/from_single_link.hh>
+#include <scribo/primitive/link/with_single_left_link_dmax_ratio.hh>
+#include <scribo/primitive/link/with_single_right_link_dmax_ratio.hh>
+#include <scribo/primitive/link/merge_double_link.hh>
+#include <scribo/primitive/link/internal/dmax_width_and_height.hh>
+
+int main(int argc, char *argv[])
+{
+ using namespace mln;
+ using namespace scribo;
+ using namespace scribo::primitive;
+
+ if (argc != 3)
+ {
+ std::cerr << "Usage : " << argv[0] << " input.* out.pbm" << std::endl;
+ return 1;
+ }
+
+ typedef mln::value::int_u<30> V;
+ typedef image2d<V> L;
+
+ image2d<bool> input;
+ io::pbm::load(input, argv[1]);
+
+ V ncomponents;
+ component_set<L>
+ components = scribo::primitive::extract::components(input, c8(),
+ ncomponents);
+
+
+ object_links<L>
+ left_link = link::with_single_left_link_dmax_ratio(
+ components,
+// primitive::link::internal::dmax_width_and_height(1),
+ link::internal::dmax_default(1),
+ anchor::MassCenter);
+
+ object_links<L>
+ right_link = primitive::link::with_single_right_link_dmax_ratio(
+ components,
+// primitive::link::internal::dmax_width_and_height(1),
+ primitive::link::internal::dmax_default(1),
+ anchor::MassCenter);
+
+ // Validating left and right links.
+ object_links<L>
+ merged_links = primitive::link::merge_double_link(left_link,
+ right_link);
+
+
+ object_groups<L>
+ groups = group::from_single_link(merged_links);
+
+ line_set<L> lines(groups);
+
+ image2d<bool> output;
+ initialize(output, input);
+ data::fill(output, false);
+
+ for_all_lines(l, lines)
+ if (lines(l).is_valid())
+ mln::draw::box(output, lines(l).bbox(), true);
+
+ io::pbm::save(output, argv[2]);
+}
diff --git a/scribo/src/debug/show_links_bottom_aligned.cc b/scribo/src/debug/show_links_bottom_aligned_2angles.cc
similarity index 60%
copy from scribo/src/debug/show_links_bottom_aligned.cc
copy to scribo/src/debug/show_links_bottom_aligned_2angles.cc
index 634551b..4b0e765 100644
--- a/scribo/src/debug/show_links_bottom_aligned.cc
+++ b/scribo/src/debug/show_links_bottom_aligned_2angles.cc
@@ -1,5 +1,4 @@
-// Copyright (C) 2009, 2011 EPITA Research and Development Laboratory
-// (LRDE)
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
//
// This file is part of Olena.
//
@@ -38,22 +37,22 @@
#include <mln/io/pbm/load.hh>
#include <mln/io/ppm/save.hh>
-#include <scribo/core/component_set.hh>
-
+#include <scribo/core/def/lbl_type.hh>
+#include <scribo/debug/logger.hh>
#include <scribo/primitive/extract/components.hh>
-#include <scribo/primitive/link/with_single_right_link.hh>
#include <scribo/filter/object_links_aligned.hh>
+#include <scribo/primitive/link/with_single_right_link_dmax_ratio_aligned.hh>
-#include <scribo/debug/alignment_decision_image.hh>
#include <scribo/debug/usage.hh>
const char *args_desc[][2] =
{
- { "input.pbm", "A binary image. True for objects and False for the background." },
- { "max_dist", "Maximum distance lookup (common value 45)" },
- { "max_alpha", "Max angle between two object bottoms. (common value : 5)" },
+ { "input.pbm", "A binary image" },
+ { "dmax_ratio", "Maximum distance lookup (common value 5)" },
+ { "min_alpha", "First angle used for close objects. (common value : 3)" },
+ { "max_alpha", "Second angle used for further objects. (common value : 5)" },
{0, 0}
};
@@ -61,42 +60,41 @@ const char *args_desc[][2] =
int main(int argc, char* argv[])
{
using namespace scribo;
- using namespace scribo::primitive::internal;
+ using namespace scribo::primitive;
using namespace mln;
- if (argc != 5)
+ if (argc != 6)
return scribo::debug::usage(argv,
- "Show valid or invalid links according the"
+ "Show valid or invalid links according the "
"horizontal alignment (based on bottom line).",
- "input.pbm max_dist max_alpha output.ppm",
+ "input.pbm dmax_ratio min_angle max_angle "
+ " output.ppm",
args_desc);
image2d<bool> input;
io::pbm::load(input, argv[1]);
// Finding components.
- value::label_16 nbboxes;
- typedef image2d<value::label_16> L;
+ typedef scribo::def::lbl_type V;
+ V nbboxes;
+ typedef image2d<V> L;
component_set<L> components
- = scribo::primitive::extract::components(input, c8(), nbboxes);
-
+ = extract::components(input, c8(), nbboxes);
// Finding right links.
- object_links<L> right_links
- = primitive::link::with_single_right_link(components, atoi(argv[2]));
+ // object_links<L>
+ // right_links = primitive::link::with_single_right_link_dmax_ratio_aligned(
+ // components, atof(argv[2]), atof(argv[3]), atof(argv[4]));
+
+ scribo::debug::logger().set_level(scribo::debug::All);
- // Filtering.
- object_links<L> filtered_links
- = filter::object_links_aligned(right_links, atof(argv[3]),
- anchor::StrictBottomCenter);
+ link::internal::single_right_dmax_ratio_aligned_functor<L,link::internal::dmax_default>
+ functor(components, link::internal::dmax_default(atof(argv[2])),
+ atof(argv[3]), atof(argv[4]), anchor::StrictBottomCenter);
- // Debug image.
- image2d<value::rgb8> decision_image
- = scribo::debug::alignment_decision_image(input,
- right_links,
- filtered_links,
- anchor::StrictBottomCenter);
+ object_links<L> output = link::compute(functor, anchor::Bottom);
- io::ppm::save(decision_image, argv[4]);
+ scribo::debug::logger().set_level(scribo::debug::None);
+ io::ppm::save(functor.debug_, argv[5]);
}
diff --git a/scribo/src/debug/show_links_several_right.cc b/scribo/src/debug/show_links_several_right.cc
deleted file mode 100644
index a70b2fb..0000000
--- a/scribo/src/debug/show_links_several_right.cc
+++ /dev/null
@@ -1,90 +0,0 @@
-// Copyright (C) 2009, 2010 EPITA Research and Development Laboratory
-// (LRDE)
-//
-// This file is part of Olena.
-//
-// Olena is free software: you can redistribute it and/or modify it under
-// the terms of the GNU General Public License as published by the Free
-// Software Foundation, version 2 of the License.
-//
-// Olena is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-// General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with Olena. If not, see <http://www.gnu.org/licenses/>.
-//
-// As a special exception, you may use this file as part of a free
-// software project without restriction. Specifically, if other files
-// instantiate templates or use macros or inline functions from this
-// file, or you compile this file and link it with other files to produce
-// an executable, this file does not by itself cause the resulting
-// executable to be covered by the GNU General Public License. This
-// exception does not however invalidate any other reasons why the
-// executable file might be covered by the GNU General Public License.
-
-#include <iostream>
-
-#include <mln/core/image/image2d.hh>
-#include <mln/core/alias/neighb2d.hh>
-
-#include <mln/value/rgb8.hh>
-#include <mln/value/label_16.hh>
-#include <mln/literal/colors.hh>
-
-#include <mln/io/pbm/load.hh>
-#include <mln/io/ppm/save.hh>
-
-#include <scribo/primitive/extract/components.hh>
-#include <scribo/primitive/link/with_several_right_links.hh>
-
-#include <scribo/draw/bounding_boxes.hh>
-
-#include <scribo/debug/several_links_decision_image.hh>
-#include <scribo/debug/usage.hh>
-
-
-
-const char *args_desc[][2] =
-{
- { "input.pbm", "A binary image. True for objects and False for the "
- "background." },
- { "max_nbh_dist", " Maximum distance for neighborhood search."
- "(common value : 30)" },
- {0, 0}
-};
-
-
-int main(int argc, char* argv[])
-{
- using namespace scribo;
- using namespace scribo::primitive::internal;
- using namespace mln;
-
- if (argc != 4)
- return scribo::debug::usage(argv,
- "Show sucessful/unsuccessful right links between components.",
- "input.pbm max_nbh_dist output.ppm",
- args_desc);
-
- image2d<bool> input;
- io::pbm::load(input, argv[1]);
-
- // Finding objects.
- value::label_16 nbboxes;
- typedef image2d<value::label_16> L;
- component_set<L> comps
- = scribo::primitive::extract::components(input, c8(), nbboxes);
-
- // Finding right links.
- object_links<L> right_link
- = primitive::link::with_several_right_links(comps, atoi(argv[2]));
-
- image2d<value::rgb8> decision_image
- = scribo::debug::several_links_decision_image(input,
- right_link,
- right_link);
-
- io::ppm::save(decision_image, argv[3]);
-}
diff --git a/scribo/src/debug/show_links_top_aligned.cc b/scribo/src/debug/show_links_top_aligned_2angles.cc
similarity index 60%
copy from scribo/src/debug/show_links_top_aligned.cc
copy to scribo/src/debug/show_links_top_aligned_2angles.cc
index 5ffcb70..48f3a13 100644
--- a/scribo/src/debug/show_links_top_aligned.cc
+++ b/scribo/src/debug/show_links_top_aligned_2angles.cc
@@ -1,5 +1,4 @@
-// Copyright (C) 2009, 2010, 2011 EPITA Research and Development
-// Laboratory (LRDE)
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
//
// This file is part of Olena.
//
@@ -38,22 +37,22 @@
#include <mln/io/pbm/load.hh>
#include <mln/io/ppm/save.hh>
+#include <scribo/core/def/lbl_type.hh>
+#include <scribo/debug/logger.hh>
#include <scribo/primitive/extract/components.hh>
-#include <scribo/primitive/link/with_single_right_link.hh>
#include <scribo/filter/object_links_aligned.hh>
+#include <scribo/primitive/link/with_single_right_link_dmax_ratio_aligned.hh>
-#include <scribo/debug/alignment_decision_image.hh>
-#include <scribo/debug/links_image.hh>
#include <scribo/debug/usage.hh>
const char *args_desc[][2] =
{
- { "input.pbm", "A binary image. True for objects and False for the "
- "background." },
- { "max_dist", "Maximum distance lookup (common value 45)" },
- { "max_alpha", "Max angle between two object tops. (common value : 5)" },
+ { "input.pbm", "A binary image" },
+ { "dmax_ratio", "Maximum distance lookup (common value 5)" },
+ { "min_alpha", "First angle used for close objects. (common value : 3)" },
+ { "max_alpha", "Second angle used for further objects. (common value : 5)" },
{0, 0}
};
@@ -61,41 +60,41 @@ const char *args_desc[][2] =
int main(int argc, char* argv[])
{
using namespace scribo;
- using namespace scribo::primitive::internal;
+ using namespace scribo::primitive;
using namespace mln;
- if (argc != 5)
+ if (argc != 6)
return scribo::debug::usage(argv,
"Show valid or invalid links according the "
"horizontal alignment (based on top line).",
- "input.pbm max_dist max_alpha output.ppm",
+ "input.pbm dmax_ratio min_angle max_angle "
+ " output.ppm",
args_desc);
image2d<bool> input;
io::pbm::load(input, argv[1]);
// Finding components.
- value::label_16 nbboxes;
- typedef image2d<value::label_16> L;
+ typedef scribo::def::lbl_type V;
+ V nbboxes;
+ typedef image2d<V> L;
component_set<L> components
- = scribo::primitive::extract::components(input, c8(), nbboxes);
-
+ = extract::components(input, c8(), nbboxes);
// Finding right links.
- object_links<L> right_links
- = primitive::link::with_single_right_link(components, atoi(argv[2]));
-
- // Filtering.
- object_links<L> filtered_links
- = filter::object_links_aligned(right_links, atof(argv[3]),
- anchor::StrictTopCenter);
-
- // Debug image.
- image2d<value::rgb8> decision_image
- = scribo::debug::alignment_decision_image(input,
- right_links,
- filtered_links,
- anchor::StrictTopCenter);
- io::ppm::save(decision_image, argv[4]);
+ // object_links<L>
+ // right_links = primitive::link::with_single_right_link_dmax_ratio_aligned(
+ // components, atof(argv[2]), atof(argv[3]), atof(argv[4]));
+
+ scribo::debug::logger().set_level(scribo::debug::All);
+
+ link::internal::single_right_dmax_ratio_aligned_functor<L,link::internal::dmax_default>
+ functor(components, link::internal::dmax_default(atof(argv[2])),
+ atof(argv[3]), atof(argv[4]), anchor::StrictTopCenter);
+
+ object_links<L> output = link::compute(functor, anchor::Top);
+
+ scribo::debug::logger().set_level(scribo::debug::None);
+ io::ppm::save(functor.debug_, argv[5]);
}
diff --git a/scribo/src/debug/show_paragraph_blocks.cc b/scribo/src/debug/show_paragraph_blocks.cc
new file mode 100644
index 0000000..b16a751
--- /dev/null
+++ b/scribo/src/debug/show_paragraph_blocks.cc
@@ -0,0 +1,185 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#include <iostream>
+
+#include <mln/core/image/image2d.hh>
+#include <mln/io/pbm/save.hh>
+#include <mln/io/pgm/save.hh>
+#include <mln/draw/box_plain.hh>
+
+#include <mln/debug/filename.hh>
+
+#include <mln/util/timer.hh>
+
+#include <scribo/core/def/lbl_type.hh>
+#include <scribo/debug/usage.hh>
+
+#include <scribo/core/component_set.hh>
+#include <scribo/core/paragraph_set.hh>
+#include <scribo/core/line_info.hh>
+
+#include <scribo/text/link_lines.hh>
+#include <scribo/filter/line_links_x_height.hh>
+
+#include <scribo/io/xml/load.hh>
+
+// int i = 0;
+
+const char *args_desc[][2] =
+{
+ {0, 0}
+};
+
+
+
+int main(int argc, char* argv[])
+{
+ using namespace scribo;
+ using namespace mln;
+
+ if (argc != 3)
+ return scribo::debug::usage(argv,
+ "Show paragraph blocks",
+ "lines.xml out_blocks.pbm",
+ args_desc);
+
+ trace::entering("main");
+
+ typedef image2d<scribo::def::lbl_type> L;
+ document<L> doc;
+ scribo::io::xml::load(doc, argv[1]);
+
+ if (! doc.has_text())
+ {
+ std::cout << "ERROR: this XML file does not contain any text information!"
+ << std::endl;
+ return 1;
+ }
+
+
+ // Link text lines
+ line_links<L> llinks = scribo::text::link_lines(doc.lines());
+ llinks = scribo::filter::line_links_x_height(llinks);
+
+ scribo::paragraph_set<L> parset = scribo::make::paragraph(llinks);
+ doc.set_paragraphs(parset);
+
+ util::timer t;
+ t.start();
+
+ image2d<bool> blocks;
+ initialize(blocks, doc.lines().components().labeled_image());
+ data::fill(blocks, false);
+
+ // image2d<value::int_u8> log;
+ // initialize(log, blocks);
+ // data::fill(log, 0);
+
+ for_all_paragraphs(p, parset)
+ if (parset(p).nlines() >= 3)
+ {
+ box2d last_tbox, last_box;
+
+ // For each line in this paragraph.
+ for_all_elements(l, parset(p).line_ids())
+ {
+ const line_info<L>& line = parset.lines()(parset(p).line_ids()(l));
+
+ if (last_box.is_valid())
+ if (last_box.pmax().row() < line.bbox().pmin().row())
+ {
+ last_tbox = last_box;
+
+ point2d
+ pmin(std::min(last_tbox.pmax().row(), line.bbox().pmin().row()),
+ std::max(last_tbox.pmin().col(), line.bbox().pmin().col())),
+ pmax(std::max(last_tbox.pmax().row(), line.bbox().pmin().row()),
+ std::min(last_tbox.pmax().col(), line.bbox().pmax().col()));
+
+ // invalid case:
+ //
+ // =======
+ // ======
+
+ if (pmax.col() > pmin.col())
+ {
+ box2d new_box(pmin, pmax);
+ mln::draw::box_plain(blocks, new_box, true);
+ // mln::draw::box_plain(log, new_box, 1);
+
+ // mln::io::pgm::save(log, mln::debug::filename("log.pgm", i++));
+ }
+ }
+ else // Handle the case when there are several text boxes on the same line.
+ {
+ if (last_tbox.is_valid() && last_tbox.pmax().row() < line.bbox().pmin().row())
+ {
+ // Top box
+ point2d
+ pmin(std::min(last_tbox.pmax().row(), line.bbox().pmin().row()),
+ std::max(last_tbox.pmin().col(), line.bbox().pmin().col())),
+ pmax(std::max(last_tbox.pmax().row(), line.bbox().pmin().row()),
+ std::min(last_tbox.pmax().col(), line.bbox().pmax().col()));
+
+ box2d new_box(pmin, pmax);
+
+ mln::draw::box_plain(blocks, new_box, true);
+ // mln::draw::box_plain(log, new_box, 2);
+ // mln::io::pgm::save(log, mln::debug::filename("log.pgm", i++));
+ }
+
+ if (last_box.pmax().col() < line.bbox().pmax().col()
+ && last_box.pmin().col() < line.bbox().pmin().col())
+ {
+ // Left box
+ point2d
+ pmin(std::min(last_tbox.pmax().row(), line.bbox().pmin().row()),
+ std::min(last_box.pmax().col(), line.bbox().pmin().col())),
+ pmax(std::min(last_box.pmax().row(), line.bbox().pmax().row()),
+ std::max(last_box.pmax().col(), line.bbox().pmin().col()));
+
+ box2d new_box(pmin, pmax);
+ mln::draw::box_plain(blocks, new_box, true);
+ // mln::draw::box_plain(log, new_box, 3);
+ // mln::io::pgm::save(log, mln::debug::filename("log.pgm", i++));
+ }
+ }
+
+ mln::draw::box_plain(blocks, line.bbox(), true);
+ // mln::draw::box_plain(log, line.bbox(), 255);
+ // mln::io::pgm::save(log, mln::debug::filename("log.pgm", i++));
+ last_box = line.bbox();
+ }
+ }
+
+ t.stop();
+ std::cout << t << std::endl;
+
+ mln::io::pbm::save(blocks, argv[2]);
+ // mln::io::pgm::save(log, "log.pgm");
+
+ trace::exiting("main");
+}
diff --git a/scribo/src/non_text_components.cc b/scribo/src/non_text_components.cc
new file mode 100644
index 0000000..0f4cce4
--- /dev/null
+++ b/scribo/src/non_text_components.cc
@@ -0,0 +1,128 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory
+// (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+
+#include <libgen.h>
+#include <fstream>
+#include <iostream>
+
+#include <mln/core/image/image2d.hh>
+
+#include <mln/io/pbm/save.hh>
+#include <mln/io/magick/load.hh>
+
+#include <scribo/toolchain/content_in_doc.hh>
+#include <scribo/toolchain/text_in_doc_preprocess.hh>
+
+#include <scribo/core/document.hh>
+
+#include <scribo/debug/usage.hh>
+
+#include <scribo/preprocessing/crop_without_localization.hh>
+#include <scribo/preprocessing/crop.hh>
+
+#include <scribo/io/xml/save.hh>
+#include <scribo/io/img/save.hh>
+
+
+const char *args_desc[][2] =
+{
+ { "input.*", "An image." },
+ { "non_text_comps.pbm", "Non text components mask." },
+ { "enable_debug", "Enable debug image output. Set to 1 or 0." },
+ { "enable_tabstops", "Enable tabstops detection. Set to 1 or 0." },
+ {0, 0}
+};
+
+
+
+int main(int argc, char* argv[])
+{
+ using namespace scribo;
+ using namespace mln;
+
+ if (argc != 4 && argc != 3 && argc != 5)
+ return scribo::debug::usage(argv,
+ "Extract non text components mask/",
+ "input.* non_text_comps.pbm [enable_debug] [enable_tabstops]",
+ args_desc);
+
+ std::string out_img = basename(argv[1]);
+ out_img.erase(out_img.size() - 4);
+
+ std::string filename_prefix = out_img + "_debug";
+ scribo::debug::logger().set_filename_prefix(filename_prefix.c_str());
+ if (argc > 3 && atoi(argv[3]))
+ scribo::debug::logger().set_level(scribo::debug::Special);
+ else
+ scribo::debug::logger().set_level(scribo::debug::None);
+
+ trace::entering("main");
+
+ Magick::InitializeMagick(*argv);
+
+ typedef image2d<scribo::def::lbl_type> L;
+ image2d<value::rgb8> input;
+ mln::io::magick::load(input, argv[1]);
+
+ util::timer t;
+ t.start();
+
+ // Preprocess document
+ image2d<bool>
+ input_preproc = toolchain::text_in_doc_preprocess(input, false, 0.34);
+
+
+ bool denoise = true;
+ std::string language = "";
+ bool find_line_seps = true;
+ bool find_whitespace_seps = (argc > 4 && atoi(argv[4]));
+
+ std::cout << "Running with the following options :"
+ << " ocr_language = " << language
+ << " | find_lines_seps = " << find_line_seps
+ << " | find_whitespace_seps = " << find_whitespace_seps
+ << " | debug = " << scribo::debug::logger().is_enabled()
+ << std::endl;
+
+ // Run document toolchain.
+
+ // Text
+ std::cout << "Analysing document..." << std::endl;
+ document<L>
+ doc = scribo::toolchain::content_in_doc(input, input_preproc, denoise,
+ find_line_seps, find_whitespace_seps,
+ !language.empty(), language);
+ t.stop();
+ std::cout << t << std::endl;
+
+ mln::io::pbm::save(data::convert(bool(), doc.elements().labeled_image()), argv[2]);
+
+ scribo::io::img::save(doc, out_img + "_debug_wo_image.png", scribo::io::img::DebugWoImage);
+ scribo::io::img::save(doc, out_img + "_debug_with_image.png", scribo::io::img::DebugWithImage);
+
+ trace::exiting("main");
+}
--
1.5.6.5
1
0
* src/Makefile.am,
* src/debug/Makefile.am: Update targets.
* src/debug/non_text_mask.cc,
* src/debug/show_components_bboxes.cc,
* src/debug/show_groups_bboxes.cc,
* src/debug/show_links_bottom_aligned_2angles.cc,
* src/debug/show_links_top_aligned_2angles.cc,
* src/debug/show_paragraph_blocks.cc,
* src/non_text_components.cc: New.
* src/debug/show_links_several_right.cc: Removed. Deprecated.
---
scribo/ChangeLog | 17 ++
scribo/src/Makefile.am | 32 ++++-
scribo/src/debug/Makefile.am | 66 ++++++-
scribo/src/debug/non_text_mask.cc | 26 +++
scribo/src/debug/show_components_bboxes.cc | 59 ++++++
scribo/src/debug/show_groups_bboxes.cc | 91 ++++++++++
...ned.cc => show_links_bottom_aligned_2angles.cc} | 58 +++---
scribo/src/debug/show_links_several_right.cc | 90 ----------
...ligned.cc => show_links_top_aligned_2angles.cc} | 61 +++----
scribo/src/debug/show_paragraph_blocks.cc | 185 ++++++++++++++++++++
scribo/src/non_text_components.cc | 128 ++++++++++++++
11 files changed, 652 insertions(+), 161 deletions(-)
create mode 100644 scribo/src/debug/non_text_mask.cc
create mode 100644 scribo/src/debug/show_components_bboxes.cc
create mode 100644 scribo/src/debug/show_groups_bboxes.cc
copy scribo/src/debug/{show_links_bottom_aligned.cc => show_links_bottom_aligned_2angles.cc} (60%)
delete mode 100644 scribo/src/debug/show_links_several_right.cc
copy scribo/src/debug/{show_links_top_aligned.cc => show_links_top_aligned_2angles.cc} (60%)
create mode 100644 scribo/src/debug/show_paragraph_blocks.cc
create mode 100644 scribo/src/non_text_components.cc
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index 32dda87..cc6e861 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,5 +1,22 @@
2011-03-14 Guillaume Lazzara <z(a)lrde.epita.fr>
+ New debug tools.
+
+ * src/Makefile.am,
+ * src/debug/Makefile.am: Update targets.
+
+ * src/debug/non_text_mask.cc,
+ * src/debug/show_components_bboxes.cc,
+ * src/debug/show_groups_bboxes.cc,
+ * src/debug/show_links_bottom_aligned_2angles.cc,
+ * src/debug/show_links_top_aligned_2angles.cc,
+ * src/debug/show_paragraph_blocks.cc,
+ * src/non_text_components.cc: New.
+
+ * src/debug/show_links_several_right.cc: Removed. Deprecated.
+
+2011-03-14 Guillaume Lazzara <z(a)lrde.epita.fr>
+
New non-text components extraction routine.
* scribo/make/text_blocks_image.hh,
diff --git a/scribo/src/Makefile.am b/scribo/src/Makefile.am
index d6275fd..3a35528 100644
--- a/scribo/src/Makefile.am
+++ b/scribo/src/Makefile.am
@@ -96,18 +96,48 @@ if HAVE_TESSERACT
$(TIFF_LDFLAGS) \
$(MAGICKXX_LDFLAGS)
+if HAVE_QT
utilexec_PROGRAMS += content_in_doc
content_in_doc_SOURCES = content_in_doc.cc
content_in_doc_CPPFLAGS = $(AM_CPPFLAGS) \
$(TESSERACT_CPPFLAGS) \
$(TIFF_CPPFLAGS) \
- $(MAGICKXX_CPPFLAGS)
+ $(MAGICKXX_CPPFLAGS) \
+ $(QT_CPPFLAGS)
+ content_in_doc_CXXFLAGS = $(AM_CXXFLAGS) \
+ $(QT_CXXFLAGS)
content_in_doc_LDFLAGS = $(AM_LDFLAGS) \
$(TESSERACT_LDFLAGS) \
$(TIFF_LDFLAGS) \
$(MAGICKXX_LDFLAGS) \
+ $(QT_LDFLAGS) \
-lpthread
+ content_in_doc_LDADD = $(LDADD) \
+ $(QT_LIBS)
+
+
+ utilexec_PROGRAMS += non_text_components
+ non_text_components_SOURCES = non_text_components.cc
+ non_text_components_CPPFLAGS = $(AM_CPPFLAGS) \
+ $(TESSERACT_CPPFLAGS) \
+ $(TIFF_CPPFLAGS) \
+ $(MAGICKXX_CPPFLAGS) \
+ $(QT_CPPFLAGS)
+ non_text_components_CXXFLAGS = $(AM_CXXFLAGS) \
+ $(QT_CXXFLAGS)
+ non_text_components_LDFLAGS = $(AM_LDFLAGS) \
+ $(TESSERACT_LDFLAGS) \
+ $(TIFF_LDFLAGS) \
+ $(MAGICKXX_LDFLAGS) \
+ $(QT_LDFLAGS) \
+ -lpthread
+ non_text_components_LDADD = $(LDADD) \
+ $(QT_LIBS)
+
+
+endif HAVE_QT
+
endif HAVE_TESSERACT
diff --git a/scribo/src/debug/Makefile.am b/scribo/src/debug/Makefile.am
index cdb1f30..60d7afa 100644
--- a/scribo/src/debug/Makefile.am
+++ b/scribo/src/debug/Makefile.am
@@ -18,6 +18,8 @@
include $(top_srcdir)/scribo/scribo.mk
noinst_PROGRAMS = \
+ show_components_bboxes \
+ show_groups_bboxes \
show_info_x_height \
show_info_median_inter_characters \
show_separators \
@@ -25,9 +27,9 @@ noinst_PROGRAMS = \
show_links_bbox_h_ratio \
show_links_bbox_overlap \
show_links_bottom_aligned \
+ show_links_bottom_aligned_2angles \
show_links_center_aligned \
show_links_non_h_aligned \
- show_links_several_right \
show_links_several_right_overlap \
show_links_single_down \
show_links_single_down_left_aligned \
@@ -40,15 +42,16 @@ noinst_PROGRAMS = \
show_links_single_up_left_aligned \
show_links_single_up_right_aligned \
show_links_top_aligned \
+ show_links_top_aligned_2angles \
show_objects_large \
show_objects_large_small \
show_objects_small \
show_objects_thick \
- show_objects_thin \
- show_stoppers \
- show_text_lines
+ show_objects_thin
+show_components_bboxes_SOURCES = show_components_bboxes.cc
+show_groups_bboxes_SOURCES = show_groups_bboxes.cc
show_info_x_height_SOURCES = show_info_x_height.cc
show_info_median_inter_characters_SOURCES = show_info_median_inter_characters.cc
show_separators_SOURCES = show_separators.cc
@@ -56,9 +59,9 @@ show_links_left_right_links_validation_SOURCES = show_links_left_right_links_val
show_links_bbox_h_ratio_SOURCES = show_links_bbox_h_ratio.cc
show_links_bbox_overlap_SOURCES = show_links_bbox_overlap.cc
show_links_bottom_aligned_SOURCES = show_links_bottom_aligned.cc
+show_links_bottom_aligned_2angles_SOURCES = show_links_bottom_aligned_2angles.cc
show_links_center_aligned_SOURCES = show_links_center_aligned.cc
show_links_non_h_aligned_SOURCES = show_links_non_h_aligned.cc
-show_links_several_right_SOURCES = show_links_several_right.cc
show_links_several_right_overlap_SOURCES = show_links_several_right_overlap.cc
show_links_single_down_SOURCES = show_links_single_down.cc
show_links_single_down_left_aligned_SOURCES = show_links_single_down_left_aligned.cc
@@ -71,21 +74,66 @@ show_links_single_up_SOURCES = show_links_single_up.cc
show_links_single_up_left_aligned_SOURCES = show_links_single_up_left_aligned.cc
show_links_single_up_right_aligned_SOURCES = show_links_single_up_right_aligned.cc
show_links_top_aligned_SOURCES = show_links_top_aligned.cc
+show_links_top_aligned_2angles_SOURCES = show_links_top_aligned_2angles.cc
show_objects_large_SOURCES = show_objects_large.cc
show_objects_large_small_SOURCES = show_objects_large_small.cc
show_objects_small_SOURCES = show_objects_small.cc
show_objects_thick_SOURCES = show_objects_thick.cc
show_objects_thin_SOURCES = show_objects_thin.cc
-show_stoppers_SOURCES = show_stoppers.cc
-show_text_lines_SOURCES = show_text_lines.cc
if HAVE_MAGICKXX
+if HAVE_QT
+
+ noinst_PROGRAMS += show_paragraph_blocks
+ show_paragraph_blocks_SOURCES = show_paragraph_blocks.cc
+ show_paragraph_blocks_CPPFLAGS = $(AM_CPPFLAGS) \
+ $(QT_CPPFLAGS) \
+ $(MAGICKXX_CPPFLAGS)
+ show_paragraph_blocks_CXXFLAGS = $(AM_CXXFLAGS) \
+ $(QT_CXXFLAGS)
+ show_paragraph_blocks_LDFLAGS = $(AM_LDFLAGS) \
+ $(QT_LDFLAGS) \
+ $(MAGICKXX_LDFLAGS)
+ show_paragraph_blocks_LDADD = $(LDADD) \
+ $(QT_LIBS)
+
+ noinst_PROGRAMS += show_text_lines
+ show_text_lines_SOURCES = show_text_lines.cc
+ show_text_lines_CPPFLAGS = $(AM_CPPFLAGS) \
+ $(MAGICKXX_CPPFLAGS) \
+ $(QT_CPPFLAGS) -DHAVE_QT
+ show_text_lines_CXXFLAGS = $(AM_CXXFLAGS) \
+ $(QT_CXXFLAGS)
+ show_text_lines_LDFLAGS = $(AM_LDFLAGS) \
+ $(MAGICKXX_LDFLAGS) \
+ $(QT_LDFLAGS)
+ show_text_lines_LDADD = $(LDADD) \
+ $(QT_LIBS)
+
+
+ noinst_PROGRAMS += show_stoppers
+ show_stoppers_SOURCES = show_stoppers.cc
+ show_stoppers_CPPFLAGS = $(AM_CPPFLAGS) \
+ $(MAGICKXX_CPPFLAGS) \
+ $(QT_CPPFLAGS)
+ show_stoppers_CXXFLAGS = $(AM_CXXFLAGS) \
+ $(QT_CXXFLAGS)
+ show_stoppers_LDFLAGS = $(AM_LDFLAGS) \
+ $(MAGICKXX_LDFLAGS) \
+ $(QT_LDFLAGS)
+ show_stoppers_LDADD = $(LDADD) \
+ $(QT_LIBS)
+
+
+endif HAVE_QT
+
+
noinst_PROGRAMS += highlight_text_area
highlight_text_area_SOURCES = highlight_text_area.cc
highlight_text_area_CPPFLAGS = $(AM_CPPFLAGS) \
- `Magick++-config --cppflags`
+ $(MAGICKXX_CPPFLAGS)
highlight_text_area_LDFLAGS = $(AM_LDFLAGS) \
- -lpthread `Magick++-config --libs`
+ $(MAGICKXX_LDFLAGS)
endif HAVE_MAGICKXX
diff --git a/scribo/src/debug/non_text_mask.cc b/scribo/src/debug/non_text_mask.cc
new file mode 100644
index 0000000..6fce945
--- /dev/null
+++ b/scribo/src/debug/non_text_mask.cc
@@ -0,0 +1,26 @@
+
+
+int main(int argc, char *argv[])
+{
+ // Link text lines
+ on_new_progress_label("Linking text lines");
+ line_links<L> llinks = scribo::text::link_lines(lines);
+
+ // Filter line links.
+ on_new_progress_label("Filter line links");
+ llinks = scribo::filter::line_links_x_height(llinks);
+
+ // Construct paragraphs
+ on_new_progress_label("Constructing paragraphs");
+ scribo::paragraph_set<L> parset = scribo::make::paragraph(llinks);
+ doc.set_paragraphs(parset);
+
+ on_progress();
+
+
+ // Extract other Elements
+ on_new_progress_label("Extracting Elements");
+ component_set<L>
+ elements = scribo::primitive::extract::non_text_fast(doc);
+
+}
diff --git a/scribo/src/debug/show_components_bboxes.cc b/scribo/src/debug/show_components_bboxes.cc
new file mode 100644
index 0000000..7eab4ba
--- /dev/null
+++ b/scribo/src/debug/show_components_bboxes.cc
@@ -0,0 +1,59 @@
+// Copyright (C) 2010, 2011 EPITA Research and Development Laboratory
+// (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+
+#include <mln/core/image/image2d.hh>
+#include <mln/core/alias/neighb2d.hh>
+#include <mln/io/pbm/all.hh>
+#include <mln/value/int_u.hh>
+#include <mln/literal/colors.hh>
+#include <mln/draw/box.hh>
+
+#include <scribo/core/component_set.hh>
+#include <scribo/primitive/extract/components.hh>
+
+int main(int argc, char *argv[])
+{
+ using namespace mln;
+ using namespace scribo;
+
+ if (argc != 3)
+ {
+ std::cerr << "Usage : " << argv[0] << " input.pbm out.pbm" << std::endl;
+ return 1;
+ }
+
+ typedef mln::value::int_u<30> V;
+ typedef image2d<V> L;
+
+ image2d<bool> input;
+ io::pbm::load(input, argv[1]);
+
+ V ncomponents;
+ component_set<L>
+ components = scribo::primitive::extract::components(input, c8(),
+ ncomponents);
+
+ image2d<bool> output;
+ initialize(output, input);
+ data::fill(output, false);
+
+ for_all_comps(c, components)
+ if (components(c).is_valid())
+ mln::draw::box(output, components(c).bbox(), true);
+
+ io::pbm::save(output, argv[2]);
+}
diff --git a/scribo/src/debug/show_groups_bboxes.cc b/scribo/src/debug/show_groups_bboxes.cc
new file mode 100644
index 0000000..eff0eb7
--- /dev/null
+++ b/scribo/src/debug/show_groups_bboxes.cc
@@ -0,0 +1,91 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+
+#include <mln/core/image/image2d.hh>
+#include <mln/core/alias/neighb2d.hh>
+#include <mln/io/pbm/all.hh>
+#include <mln/value/int_u.hh>
+#include <mln/literal/colors.hh>
+#include <mln/draw/box.hh>
+
+#include <scribo/core/component_set.hh>
+#include <scribo/core/line_set.hh>
+#include <scribo/primitive/extract/components.hh>
+#include <scribo/primitive/group/from_single_link.hh>
+#include <scribo/primitive/link/with_single_left_link_dmax_ratio.hh>
+#include <scribo/primitive/link/with_single_right_link_dmax_ratio.hh>
+#include <scribo/primitive/link/merge_double_link.hh>
+#include <scribo/primitive/link/internal/dmax_width_and_height.hh>
+
+int main(int argc, char *argv[])
+{
+ using namespace mln;
+ using namespace scribo;
+ using namespace scribo::primitive;
+
+ if (argc != 3)
+ {
+ std::cerr << "Usage : " << argv[0] << " input.* out.pbm" << std::endl;
+ return 1;
+ }
+
+ typedef mln::value::int_u<30> V;
+ typedef image2d<V> L;
+
+ image2d<bool> input;
+ io::pbm::load(input, argv[1]);
+
+ V ncomponents;
+ component_set<L>
+ components = scribo::primitive::extract::components(input, c8(),
+ ncomponents);
+
+
+ object_links<L>
+ left_link = link::with_single_left_link_dmax_ratio(
+ components,
+// primitive::link::internal::dmax_width_and_height(1),
+ link::internal::dmax_default(1),
+ anchor::MassCenter);
+
+ object_links<L>
+ right_link = primitive::link::with_single_right_link_dmax_ratio(
+ components,
+// primitive::link::internal::dmax_width_and_height(1),
+ primitive::link::internal::dmax_default(1),
+ anchor::MassCenter);
+
+ // Validating left and right links.
+ object_links<L>
+ merged_links = primitive::link::merge_double_link(left_link,
+ right_link);
+
+
+ object_groups<L>
+ groups = group::from_single_link(merged_links);
+
+ line_set<L> lines(groups);
+
+ image2d<bool> output;
+ initialize(output, input);
+ data::fill(output, false);
+
+ for_all_lines(l, lines)
+ if (lines(l).is_valid())
+ mln::draw::box(output, lines(l).bbox(), true);
+
+ io::pbm::save(output, argv[2]);
+}
diff --git a/scribo/src/debug/show_links_bottom_aligned.cc b/scribo/src/debug/show_links_bottom_aligned_2angles.cc
similarity index 60%
copy from scribo/src/debug/show_links_bottom_aligned.cc
copy to scribo/src/debug/show_links_bottom_aligned_2angles.cc
index 634551b..4b0e765 100644
--- a/scribo/src/debug/show_links_bottom_aligned.cc
+++ b/scribo/src/debug/show_links_bottom_aligned_2angles.cc
@@ -1,5 +1,4 @@
-// Copyright (C) 2009, 2011 EPITA Research and Development Laboratory
-// (LRDE)
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
//
// This file is part of Olena.
//
@@ -38,22 +37,22 @@
#include <mln/io/pbm/load.hh>
#include <mln/io/ppm/save.hh>
-#include <scribo/core/component_set.hh>
-
+#include <scribo/core/def/lbl_type.hh>
+#include <scribo/debug/logger.hh>
#include <scribo/primitive/extract/components.hh>
-#include <scribo/primitive/link/with_single_right_link.hh>
#include <scribo/filter/object_links_aligned.hh>
+#include <scribo/primitive/link/with_single_right_link_dmax_ratio_aligned.hh>
-#include <scribo/debug/alignment_decision_image.hh>
#include <scribo/debug/usage.hh>
const char *args_desc[][2] =
{
- { "input.pbm", "A binary image. True for objects and False for the background." },
- { "max_dist", "Maximum distance lookup (common value 45)" },
- { "max_alpha", "Max angle between two object bottoms. (common value : 5)" },
+ { "input.pbm", "A binary image" },
+ { "dmax_ratio", "Maximum distance lookup (common value 5)" },
+ { "min_alpha", "First angle used for close objects. (common value : 3)" },
+ { "max_alpha", "Second angle used for further objects. (common value : 5)" },
{0, 0}
};
@@ -61,42 +60,41 @@ const char *args_desc[][2] =
int main(int argc, char* argv[])
{
using namespace scribo;
- using namespace scribo::primitive::internal;
+ using namespace scribo::primitive;
using namespace mln;
- if (argc != 5)
+ if (argc != 6)
return scribo::debug::usage(argv,
- "Show valid or invalid links according the"
+ "Show valid or invalid links according the "
"horizontal alignment (based on bottom line).",
- "input.pbm max_dist max_alpha output.ppm",
+ "input.pbm dmax_ratio min_angle max_angle "
+ " output.ppm",
args_desc);
image2d<bool> input;
io::pbm::load(input, argv[1]);
// Finding components.
- value::label_16 nbboxes;
- typedef image2d<value::label_16> L;
+ typedef scribo::def::lbl_type V;
+ V nbboxes;
+ typedef image2d<V> L;
component_set<L> components
- = scribo::primitive::extract::components(input, c8(), nbboxes);
-
+ = extract::components(input, c8(), nbboxes);
// Finding right links.
- object_links<L> right_links
- = primitive::link::with_single_right_link(components, atoi(argv[2]));
+ // object_links<L>
+ // right_links = primitive::link::with_single_right_link_dmax_ratio_aligned(
+ // components, atof(argv[2]), atof(argv[3]), atof(argv[4]));
+
+ scribo::debug::logger().set_level(scribo::debug::All);
- // Filtering.
- object_links<L> filtered_links
- = filter::object_links_aligned(right_links, atof(argv[3]),
- anchor::StrictBottomCenter);
+ link::internal::single_right_dmax_ratio_aligned_functor<L,link::internal::dmax_default>
+ functor(components, link::internal::dmax_default(atof(argv[2])),
+ atof(argv[3]), atof(argv[4]), anchor::StrictBottomCenter);
- // Debug image.
- image2d<value::rgb8> decision_image
- = scribo::debug::alignment_decision_image(input,
- right_links,
- filtered_links,
- anchor::StrictBottomCenter);
+ object_links<L> output = link::compute(functor, anchor::Bottom);
- io::ppm::save(decision_image, argv[4]);
+ scribo::debug::logger().set_level(scribo::debug::None);
+ io::ppm::save(functor.debug_, argv[5]);
}
diff --git a/scribo/src/debug/show_links_several_right.cc b/scribo/src/debug/show_links_several_right.cc
deleted file mode 100644
index a70b2fb..0000000
--- a/scribo/src/debug/show_links_several_right.cc
+++ /dev/null
@@ -1,90 +0,0 @@
-// Copyright (C) 2009, 2010 EPITA Research and Development Laboratory
-// (LRDE)
-//
-// This file is part of Olena.
-//
-// Olena is free software: you can redistribute it and/or modify it under
-// the terms of the GNU General Public License as published by the Free
-// Software Foundation, version 2 of the License.
-//
-// Olena is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-// General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with Olena. If not, see <http://www.gnu.org/licenses/>.
-//
-// As a special exception, you may use this file as part of a free
-// software project without restriction. Specifically, if other files
-// instantiate templates or use macros or inline functions from this
-// file, or you compile this file and link it with other files to produce
-// an executable, this file does not by itself cause the resulting
-// executable to be covered by the GNU General Public License. This
-// exception does not however invalidate any other reasons why the
-// executable file might be covered by the GNU General Public License.
-
-#include <iostream>
-
-#include <mln/core/image/image2d.hh>
-#include <mln/core/alias/neighb2d.hh>
-
-#include <mln/value/rgb8.hh>
-#include <mln/value/label_16.hh>
-#include <mln/literal/colors.hh>
-
-#include <mln/io/pbm/load.hh>
-#include <mln/io/ppm/save.hh>
-
-#include <scribo/primitive/extract/components.hh>
-#include <scribo/primitive/link/with_several_right_links.hh>
-
-#include <scribo/draw/bounding_boxes.hh>
-
-#include <scribo/debug/several_links_decision_image.hh>
-#include <scribo/debug/usage.hh>
-
-
-
-const char *args_desc[][2] =
-{
- { "input.pbm", "A binary image. True for objects and False for the "
- "background." },
- { "max_nbh_dist", " Maximum distance for neighborhood search."
- "(common value : 30)" },
- {0, 0}
-};
-
-
-int main(int argc, char* argv[])
-{
- using namespace scribo;
- using namespace scribo::primitive::internal;
- using namespace mln;
-
- if (argc != 4)
- return scribo::debug::usage(argv,
- "Show sucessful/unsuccessful right links between components.",
- "input.pbm max_nbh_dist output.ppm",
- args_desc);
-
- image2d<bool> input;
- io::pbm::load(input, argv[1]);
-
- // Finding objects.
- value::label_16 nbboxes;
- typedef image2d<value::label_16> L;
- component_set<L> comps
- = scribo::primitive::extract::components(input, c8(), nbboxes);
-
- // Finding right links.
- object_links<L> right_link
- = primitive::link::with_several_right_links(comps, atoi(argv[2]));
-
- image2d<value::rgb8> decision_image
- = scribo::debug::several_links_decision_image(input,
- right_link,
- right_link);
-
- io::ppm::save(decision_image, argv[3]);
-}
diff --git a/scribo/src/debug/show_links_top_aligned.cc b/scribo/src/debug/show_links_top_aligned_2angles.cc
similarity index 60%
copy from scribo/src/debug/show_links_top_aligned.cc
copy to scribo/src/debug/show_links_top_aligned_2angles.cc
index 5ffcb70..48f3a13 100644
--- a/scribo/src/debug/show_links_top_aligned.cc
+++ b/scribo/src/debug/show_links_top_aligned_2angles.cc
@@ -1,5 +1,4 @@
-// Copyright (C) 2009, 2010, 2011 EPITA Research and Development
-// Laboratory (LRDE)
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
//
// This file is part of Olena.
//
@@ -38,22 +37,22 @@
#include <mln/io/pbm/load.hh>
#include <mln/io/ppm/save.hh>
+#include <scribo/core/def/lbl_type.hh>
+#include <scribo/debug/logger.hh>
#include <scribo/primitive/extract/components.hh>
-#include <scribo/primitive/link/with_single_right_link.hh>
#include <scribo/filter/object_links_aligned.hh>
+#include <scribo/primitive/link/with_single_right_link_dmax_ratio_aligned.hh>
-#include <scribo/debug/alignment_decision_image.hh>
-#include <scribo/debug/links_image.hh>
#include <scribo/debug/usage.hh>
const char *args_desc[][2] =
{
- { "input.pbm", "A binary image. True for objects and False for the "
- "background." },
- { "max_dist", "Maximum distance lookup (common value 45)" },
- { "max_alpha", "Max angle between two object tops. (common value : 5)" },
+ { "input.pbm", "A binary image" },
+ { "dmax_ratio", "Maximum distance lookup (common value 5)" },
+ { "min_alpha", "First angle used for close objects. (common value : 3)" },
+ { "max_alpha", "Second angle used for further objects. (common value : 5)" },
{0, 0}
};
@@ -61,41 +60,41 @@ const char *args_desc[][2] =
int main(int argc, char* argv[])
{
using namespace scribo;
- using namespace scribo::primitive::internal;
+ using namespace scribo::primitive;
using namespace mln;
- if (argc != 5)
+ if (argc != 6)
return scribo::debug::usage(argv,
"Show valid or invalid links according the "
"horizontal alignment (based on top line).",
- "input.pbm max_dist max_alpha output.ppm",
+ "input.pbm dmax_ratio min_angle max_angle "
+ " output.ppm",
args_desc);
image2d<bool> input;
io::pbm::load(input, argv[1]);
// Finding components.
- value::label_16 nbboxes;
- typedef image2d<value::label_16> L;
+ typedef scribo::def::lbl_type V;
+ V nbboxes;
+ typedef image2d<V> L;
component_set<L> components
- = scribo::primitive::extract::components(input, c8(), nbboxes);
-
+ = extract::components(input, c8(), nbboxes);
// Finding right links.
- object_links<L> right_links
- = primitive::link::with_single_right_link(components, atoi(argv[2]));
-
- // Filtering.
- object_links<L> filtered_links
- = filter::object_links_aligned(right_links, atof(argv[3]),
- anchor::StrictTopCenter);
-
- // Debug image.
- image2d<value::rgb8> decision_image
- = scribo::debug::alignment_decision_image(input,
- right_links,
- filtered_links,
- anchor::StrictTopCenter);
- io::ppm::save(decision_image, argv[4]);
+ // object_links<L>
+ // right_links = primitive::link::with_single_right_link_dmax_ratio_aligned(
+ // components, atof(argv[2]), atof(argv[3]), atof(argv[4]));
+
+ scribo::debug::logger().set_level(scribo::debug::All);
+
+ link::internal::single_right_dmax_ratio_aligned_functor<L,link::internal::dmax_default>
+ functor(components, link::internal::dmax_default(atof(argv[2])),
+ atof(argv[3]), atof(argv[4]), anchor::StrictTopCenter);
+
+ object_links<L> output = link::compute(functor, anchor::Top);
+
+ scribo::debug::logger().set_level(scribo::debug::None);
+ io::ppm::save(functor.debug_, argv[5]);
}
diff --git a/scribo/src/debug/show_paragraph_blocks.cc b/scribo/src/debug/show_paragraph_blocks.cc
new file mode 100644
index 0000000..b16a751
--- /dev/null
+++ b/scribo/src/debug/show_paragraph_blocks.cc
@@ -0,0 +1,185 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#include <iostream>
+
+#include <mln/core/image/image2d.hh>
+#include <mln/io/pbm/save.hh>
+#include <mln/io/pgm/save.hh>
+#include <mln/draw/box_plain.hh>
+
+#include <mln/debug/filename.hh>
+
+#include <mln/util/timer.hh>
+
+#include <scribo/core/def/lbl_type.hh>
+#include <scribo/debug/usage.hh>
+
+#include <scribo/core/component_set.hh>
+#include <scribo/core/paragraph_set.hh>
+#include <scribo/core/line_info.hh>
+
+#include <scribo/text/link_lines.hh>
+#include <scribo/filter/line_links_x_height.hh>
+
+#include <scribo/io/xml/load.hh>
+
+// int i = 0;
+
+const char *args_desc[][2] =
+{
+ {0, 0}
+};
+
+
+
+int main(int argc, char* argv[])
+{
+ using namespace scribo;
+ using namespace mln;
+
+ if (argc != 3)
+ return scribo::debug::usage(argv,
+ "Show paragraph blocks",
+ "lines.xml out_blocks.pbm",
+ args_desc);
+
+ trace::entering("main");
+
+ typedef image2d<scribo::def::lbl_type> L;
+ document<L> doc;
+ scribo::io::xml::load(doc, argv[1]);
+
+ if (! doc.has_text())
+ {
+ std::cout << "ERROR: this XML file does not contain any text information!"
+ << std::endl;
+ return 1;
+ }
+
+
+ // Link text lines
+ line_links<L> llinks = scribo::text::link_lines(doc.lines());
+ llinks = scribo::filter::line_links_x_height(llinks);
+
+ scribo::paragraph_set<L> parset = scribo::make::paragraph(llinks);
+ doc.set_paragraphs(parset);
+
+ util::timer t;
+ t.start();
+
+ image2d<bool> blocks;
+ initialize(blocks, doc.lines().components().labeled_image());
+ data::fill(blocks, false);
+
+ // image2d<value::int_u8> log;
+ // initialize(log, blocks);
+ // data::fill(log, 0);
+
+ for_all_paragraphs(p, parset)
+ if (parset(p).nlines() >= 3)
+ {
+ box2d last_tbox, last_box;
+
+ // For each line in this paragraph.
+ for_all_elements(l, parset(p).line_ids())
+ {
+ const line_info<L>& line = parset.lines()(parset(p).line_ids()(l));
+
+ if (last_box.is_valid())
+ if (last_box.pmax().row() < line.bbox().pmin().row())
+ {
+ last_tbox = last_box;
+
+ point2d
+ pmin(std::min(last_tbox.pmax().row(), line.bbox().pmin().row()),
+ std::max(last_tbox.pmin().col(), line.bbox().pmin().col())),
+ pmax(std::max(last_tbox.pmax().row(), line.bbox().pmin().row()),
+ std::min(last_tbox.pmax().col(), line.bbox().pmax().col()));
+
+ // invalid case:
+ //
+ // =======
+ // ======
+
+ if (pmax.col() > pmin.col())
+ {
+ box2d new_box(pmin, pmax);
+ mln::draw::box_plain(blocks, new_box, true);
+ // mln::draw::box_plain(log, new_box, 1);
+
+ // mln::io::pgm::save(log, mln::debug::filename("log.pgm", i++));
+ }
+ }
+ else // Handle the case when there are several text boxes on the same line.
+ {
+ if (last_tbox.is_valid() && last_tbox.pmax().row() < line.bbox().pmin().row())
+ {
+ // Top box
+ point2d
+ pmin(std::min(last_tbox.pmax().row(), line.bbox().pmin().row()),
+ std::max(last_tbox.pmin().col(), line.bbox().pmin().col())),
+ pmax(std::max(last_tbox.pmax().row(), line.bbox().pmin().row()),
+ std::min(last_tbox.pmax().col(), line.bbox().pmax().col()));
+
+ box2d new_box(pmin, pmax);
+
+ mln::draw::box_plain(blocks, new_box, true);
+ // mln::draw::box_plain(log, new_box, 2);
+ // mln::io::pgm::save(log, mln::debug::filename("log.pgm", i++));
+ }
+
+ if (last_box.pmax().col() < line.bbox().pmax().col()
+ && last_box.pmin().col() < line.bbox().pmin().col())
+ {
+ // Left box
+ point2d
+ pmin(std::min(last_tbox.pmax().row(), line.bbox().pmin().row()),
+ std::min(last_box.pmax().col(), line.bbox().pmin().col())),
+ pmax(std::min(last_box.pmax().row(), line.bbox().pmax().row()),
+ std::max(last_box.pmax().col(), line.bbox().pmin().col()));
+
+ box2d new_box(pmin, pmax);
+ mln::draw::box_plain(blocks, new_box, true);
+ // mln::draw::box_plain(log, new_box, 3);
+ // mln::io::pgm::save(log, mln::debug::filename("log.pgm", i++));
+ }
+ }
+
+ mln::draw::box_plain(blocks, line.bbox(), true);
+ // mln::draw::box_plain(log, line.bbox(), 255);
+ // mln::io::pgm::save(log, mln::debug::filename("log.pgm", i++));
+ last_box = line.bbox();
+ }
+ }
+
+ t.stop();
+ std::cout << t << std::endl;
+
+ mln::io::pbm::save(blocks, argv[2]);
+ // mln::io::pgm::save(log, "log.pgm");
+
+ trace::exiting("main");
+}
diff --git a/scribo/src/non_text_components.cc b/scribo/src/non_text_components.cc
new file mode 100644
index 0000000..0f4cce4
--- /dev/null
+++ b/scribo/src/non_text_components.cc
@@ -0,0 +1,128 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory
+// (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+
+#include <libgen.h>
+#include <fstream>
+#include <iostream>
+
+#include <mln/core/image/image2d.hh>
+
+#include <mln/io/pbm/save.hh>
+#include <mln/io/magick/load.hh>
+
+#include <scribo/toolchain/content_in_doc.hh>
+#include <scribo/toolchain/text_in_doc_preprocess.hh>
+
+#include <scribo/core/document.hh>
+
+#include <scribo/debug/usage.hh>
+
+#include <scribo/preprocessing/crop_without_localization.hh>
+#include <scribo/preprocessing/crop.hh>
+
+#include <scribo/io/xml/save.hh>
+#include <scribo/io/img/save.hh>
+
+
+const char *args_desc[][2] =
+{
+ { "input.*", "An image." },
+ { "non_text_comps.pbm", "Non text components mask." },
+ { "enable_debug", "Enable debug image output. Set to 1 or 0." },
+ { "enable_tabstops", "Enable tabstops detection. Set to 1 or 0." },
+ {0, 0}
+};
+
+
+
+int main(int argc, char* argv[])
+{
+ using namespace scribo;
+ using namespace mln;
+
+ if (argc != 4 && argc != 3 && argc != 5)
+ return scribo::debug::usage(argv,
+ "Extract non text components mask/",
+ "input.* non_text_comps.pbm [enable_debug] [enable_tabstops]",
+ args_desc);
+
+ std::string out_img = basename(argv[1]);
+ out_img.erase(out_img.size() - 4);
+
+ std::string filename_prefix = out_img + "_debug";
+ scribo::debug::logger().set_filename_prefix(filename_prefix.c_str());
+ if (argc > 3 && atoi(argv[3]))
+ scribo::debug::logger().set_level(scribo::debug::Special);
+ else
+ scribo::debug::logger().set_level(scribo::debug::None);
+
+ trace::entering("main");
+
+ Magick::InitializeMagick(*argv);
+
+ typedef image2d<scribo::def::lbl_type> L;
+ image2d<value::rgb8> input;
+ mln::io::magick::load(input, argv[1]);
+
+ util::timer t;
+ t.start();
+
+ // Preprocess document
+ image2d<bool>
+ input_preproc = toolchain::text_in_doc_preprocess(input, false, 0.34);
+
+
+ bool denoise = true;
+ std::string language = "";
+ bool find_line_seps = true;
+ bool find_whitespace_seps = (argc > 4 && atoi(argv[4]));
+
+ std::cout << "Running with the following options :"
+ << " ocr_language = " << language
+ << " | find_lines_seps = " << find_line_seps
+ << " | find_whitespace_seps = " << find_whitespace_seps
+ << " | debug = " << scribo::debug::logger().is_enabled()
+ << std::endl;
+
+ // Run document toolchain.
+
+ // Text
+ std::cout << "Analysing document..." << std::endl;
+ document<L>
+ doc = scribo::toolchain::content_in_doc(input, input_preproc, denoise,
+ find_line_seps, find_whitespace_seps,
+ !language.empty(), language);
+ t.stop();
+ std::cout << t << std::endl;
+
+ mln::io::pbm::save(data::convert(bool(), doc.elements().labeled_image()), argv[2]);
+
+ scribo::io::img::save(doc, out_img + "_debug_wo_image.png", scribo::io::img::DebugWoImage);
+ scribo::io::img::save(doc, out_img + "_debug_with_image.png", scribo::io::img::DebugWithImage);
+
+ trace::exiting("main");
+}
--
1.5.6.5
1
0
last-svn-commit-809-g21ca187 New non-text components extraction routine.
by Guillaume Lazzara 14 Mar '11
by Guillaume Lazzara 14 Mar '11
14 Mar '11
* scribo/make/text_blocks_image.hh,
* scribo/make/text_components_image.hh,
* scribo/primitive/extract/internal/union.hh: New.
* scribo/primitive/extract/non_text.hh: New implementation.
* scribo/primitive/extract/non_text_kmean.hh: New. Old
implementatino is saved here.
---
scribo/ChangeLog | 13 +
scribo/scribo/make/text_blocks_image.hh | 136 ++++++++
scribo/scribo/make/text_components_image.hh | 101 ++++++
scribo/scribo/primitive/extract/internal/union.hh | 246 +++++++++++++
scribo/scribo/primitive/extract/non_text.hh | 366 ++++++++++++++------
.../extract/{non_text.hh => non_text_kmean.hh} | 17 +-
6 files changed, 761 insertions(+), 118 deletions(-)
create mode 100644 scribo/scribo/make/text_blocks_image.hh
create mode 100644 scribo/scribo/make/text_components_image.hh
create mode 100644 scribo/scribo/primitive/extract/internal/union.hh
copy scribo/scribo/primitive/extract/{non_text.hh => non_text_kmean.hh} (91%)
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index 3e77d78..a3de9ea 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,5 +1,18 @@
2011-03-14 Guillaume Lazzara <z(a)lrde.epita.fr>
+ New non-text components extraction routine.
+
+ * scribo/make/text_blocks_image.hh,
+ * scribo/make/text_components_image.hh,
+ * scribo/primitive/extract/internal/union.hh: New.
+
+ * scribo/primitive/extract/non_text.hh: New implementation.
+
+ * scribo/primitive/extract/non_text_kmean.hh: New. Old
+ implementatino is saved here.
+
+2011-03-14 Guillaume Lazzara <z(a)lrde.epita.fr>
+
Introduce result image output.
* scribo/fun/v2v/highlight.hh,
diff --git a/scribo/scribo/make/text_blocks_image.hh b/scribo/scribo/make/text_blocks_image.hh
new file mode 100644
index 0000000..fbc16df
--- /dev/null
+++ b/scribo/scribo/make/text_blocks_image.hh
@@ -0,0 +1,136 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef SCRIBO_MAKE_TEXT_BLOCKS_IMAGE_HH
+# define SCRIBO_MAKE_TEXT_BLOCKS_IMAGE_HH
+
+/// \file
+///
+/// \brief Create a mask of paragraph blocks.
+
+# include <mln/core/image/image2d.hh>
+# include <mln/draw/box_plain.hh>
+
+# include <scribo/core/document.hh>
+# include <scribo/core/paragraph_set.hh>
+# include <scribo/core/line_info.hh>
+
+
+namespace scribo
+{
+
+ namespace make
+ {
+ using namespace mln;
+
+
+ /// \brief Create a mask of paragraph blocks.
+ /*!
+ \pre \p doc has_text() methods MUST return True.
+ */
+ template <typename L>
+ mln_ch_value(L,bool)
+ text_blocks_image(const document<L>& doc, unsigned min_nlines);
+
+
+ # ifndef MLN_INCLUDE_ONLY
+
+ template <typename L>
+ mln_ch_value(L,bool)
+ text_blocks_image(const document<L>& doc, unsigned min_nlines)
+ {
+ trace::entering("scribo::make::text_blocks_image");
+
+ mln_precondition(doc.is_open());
+
+ image2d<bool> output;
+ initialize(output, doc.image());
+ data::fill(output, false);
+
+ // image2d<value::int_u8> log;
+ // initialize(log, output);
+ // data::fill(log, 0);
+
+ if (doc.has_text())
+ {
+ const paragraph_set<L>& parset = doc.paragraphs();
+ for_all_paragraphs(p, parset)
+ if (parset(p).nlines() >= min_nlines)
+ {
+ box2d last_box;
+
+ // For each line in this paragraph.
+ for_all_elements(l, parset(p).line_ids())
+ {
+ const line_info<L>&
+ line = parset.lines()(parset(p).line_ids()(l));
+
+ // Avoid invalid case:
+ //
+ // =======
+ // ======
+ if (last_box.is_valid()
+ && last_box.pmax().row() < line.bbox().pmin().row()
+ && last_box.pmin().col() < line.bbox().pmax().col())
+ {
+ point2d
+ pmin(std::min(last_box.pmax().row(),
+ line.bbox().pmin().row()),
+ std::max(last_box.pmin().col(),
+ line.bbox().pmin().col())),
+ pmax(std::max(last_box.pmax().row(),
+ line.bbox().pmin().row()),
+ std::min(last_box.pmax().col(),
+ line.bbox().pmax().col()));
+
+ box2d new_box(pmin, pmax);
+ mln::draw::box_plain(output, new_box, true);
+ // mln::draw::box_plain(log, new_box, 1);
+
+ // mln::io::pgm::save(log, mln::debug::filename("log.pgm", i++));
+ }
+
+ mln::draw::box_plain(output, line.bbox(), true);
+ // mln::draw::box_plain(log, line.bbox(), 255);
+ // mln::io::pgm::save(log, mln::debug::filename("log.pgm", i++));
+ last_box = line.bbox();
+ }
+ }
+ }
+
+
+ trace::exiting("scribo::make::text_blocks_image");
+ return output;
+ }
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::make
+
+} // end of namespace scribo
+
+
+
+#endif // ! SCRIBO_MAKE_TEXT_BLOCKS_IMAGE_HH
diff --git a/scribo/scribo/make/text_components_image.hh b/scribo/scribo/make/text_components_image.hh
new file mode 100644
index 0000000..e7c892e
--- /dev/null
+++ b/scribo/scribo/make/text_components_image.hh
@@ -0,0 +1,101 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef SCRIBO_MAKE_TEXT_COMPONENTS_IMAGE_HH
+# define SCRIBO_MAKE_TEXT_COMPONENTS_IMAGE_HH
+
+/// \file
+///
+/// Create a binary image with text components only.
+
+#include <mln/pw/all.hh>
+#include <mln/core/image/dmorph/image_if.hh>
+#include <mln/util/array.hh>
+#include <mln/data/fill.hh>
+
+#include <scribo/core/document.hh>
+#include <scribo/core/line_set.hh>
+
+
+namespace scribo
+{
+
+ namespace make
+ {
+ using namespace mln;
+
+
+ /// \brief Create a binary image with text components only.
+ /*!
+ \pre \p doc has_text() methods MUST return True.
+ */
+ template <typename L>
+ mln_ch_value(L,bool)
+ text_components_image(const document<L>& doc);
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+ template <typename L>
+ mln_ch_value(L,bool)
+ text_components_image(const document<L>& doc)
+ {
+ trace::entering("scribo::make::text_components_image");
+
+ mln_precondition(doc.is_open());
+
+ mln_ch_value(L,bool) output;
+ initialize(output, doc.image());
+ data::fill(output, false);
+
+ if (doc.has_text())
+ {
+ const scribo::line_set<L>& lines = doc.lines();
+ for_all_lines(l, doc.lines())
+ if (lines(l).is_textline())
+ {
+ const util::array<component_id_t>&
+ comp_ids = lines(l).component_ids();
+ const L& lbl = lines.components().labeled_image();
+ for_all_elements(c, comp_ids)
+ data::fill((output | lines.components()(comp_ids(c)).bbox()).rw(),
+ ((doc.binary_image() | lines.components()(comp_ids(c)).bbox())
+ | (pw::value(lbl) == comp_ids(c))));
+ }
+ }
+
+ trace::exiting("scribo::make::text_components_image");
+ return output;
+ }
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::make
+
+} // end of namespace scribo
+
+
+
+#endif // ! SCRIBO_MAKE_TEXT_COMPONENTS_IMAGE_HH
diff --git a/scribo/scribo/primitive/extract/internal/union.hh b/scribo/scribo/primitive/extract/internal/union.hh
new file mode 100644
index 0000000..509a7e4
--- /dev/null
+++ b/scribo/scribo/primitive/extract/internal/union.hh
@@ -0,0 +1,246 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+/// \file
+///
+/// \brief Various utilities for image extraction
+///
+/// \fixme To be cleanup
+
+#ifndef SCRIBO_PRIMITIVE_EXTRACT_INTERNAL_UNION_HH
+# define SCRIBO_PRIMITIVE_EXTRACT_INTERNAL_UNION_HH
+
+# include <mln/core/image/image2d.hh>
+# include <mln/border/fill.hh>
+
+
+namespace scribo
+{
+
+ namespace primitive
+ {
+
+ namespace extract
+ {
+
+ namespace internal
+ {
+
+ using namespace mln;
+
+
+ unsigned find_root(image2d<unsigned>& parent, unsigned x);
+
+
+ void union_find(const image2d<bool>& input, bool lab,
+ image2d<unsigned>& parent, image2d<unsigned>& area,
+ unsigned& max_area);
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+ inline
+ unsigned find_root(image2d<unsigned>& parent, unsigned x)
+ {
+ if (parent.element(x) == x)
+ return x;
+ return parent.element(x) = find_root(parent, parent.element(x));
+ }
+
+
+ inline
+ void union_find(const image2d<bool>& input,
+ bool lab,
+ // output:
+ image2d<unsigned>& parent,
+ image2d<unsigned>& area,
+ unsigned& max_area)
+ {
+ const unsigned nrows = input.nrows(), ncols = input.ncols();
+
+ unsigned op, on, delta = input.delta_index(dpoint2d(1, 0));
+
+ data::fill(parent, 0);
+ max_area = 0;
+
+ {
+
+ // row == 0 and col == 0
+
+ op = input.index_of_point(point2d(0,0));
+ if (input.element(op) == lab)
+ {
+ area.element(op) = 1;
+ parent.element(op) = op;
+ }
+
+
+ // row = 0
+
+ for (unsigned col = 1; col < ncols; ++col)
+ {
+ on = op;
+ ++op;
+
+ if (input.element(op) != lab)
+ continue;
+
+ if (input.element(on) == lab)
+ {
+ unsigned& par_p = parent.element(op);
+ par_p = find_root(parent, on);
+ ++area.element(par_p);
+ if (area.element(par_p) > max_area)
+ max_area = area.element(par_p);
+ }
+ else
+ {
+ area.element(op) = 1;
+ parent.element(op) = op;
+ }
+
+ }
+ }
+
+
+ for (unsigned row = 1; row < nrows; ++row)
+ {
+
+ {
+ // col == 0
+
+ op = input.index_of_point(point2d(row, 0));
+ on = op - delta;
+
+ if (input.element(op) == lab)
+ {
+ if (input.element(on) == lab)
+ {
+ unsigned& par_p = parent.element(op);
+ par_p = find_root(parent, on);
+ ++area.element(par_p);
+ if (area.element(par_p) > max_area)
+ max_area = area.element(par_p);
+ }
+ else
+ {
+ area.element(op) = 1;
+ parent.element(op) = op;
+ }
+ }
+
+ }
+
+ for (unsigned col = 1; col < ncols; ++col)
+ {
+ ++op;
+ ++on;
+
+ if (input.element(op) != lab)
+ continue;
+
+ bool merge_ = false;
+
+ // up
+
+ if (input.element(on) == lab)
+ {
+ unsigned& par_p = parent.element(op);
+ par_p = find_root(parent, on);
+ ++area.element(par_p);
+ if (area.element(par_p) > max_area)
+ max_area = area.element(par_p);
+ merge_ = true;
+ }
+
+
+ // left
+
+ unsigned ol = op - 1;
+
+ if (input.element(ol) == lab)
+ {
+ if (merge_)
+ {
+ if (input.element(on - 1) != lab) // not already merged
+ {
+ unsigned r_op = parent.element(op), r_ol = find_root(parent, ol);
+ if (r_op != r_ol)
+ {
+ // do-union
+ if (r_op < r_ol)
+ {
+ parent.element(r_ol) = r_op;
+ area.element(r_op) += area.element(r_ol);
+ if (area.element(r_op) > max_area)
+ max_area = area.element(r_op);
+ }
+ else
+ {
+ parent.element(r_op) = r_ol;
+ area.element(r_ol) += area.element(r_op);
+ if (area.element(r_ol) > max_area)
+ max_area = area.element(r_ol);
+ }
+ }
+ }
+ } // end of "if (merge)
+ else
+ {
+ unsigned& par_p = parent.element(op);
+ par_p = find_root(parent, ol);
+ ++area.element(par_p);
+ if (area.element(par_p) > max_area)
+ max_area = area.element(par_p);
+ merge_ = true;
+ }
+ }
+
+
+ // finalization
+
+ if (merge_ == false)
+ {
+ parent.element(op) = op;
+ area.element(op) = 1;
+ }
+
+ }
+ }
+
+ } // end of 'union_find'
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::primivite::extract::internal
+
+ } // end of namespace scribo::primitive::extract
+
+ } // end of namespace scribo::primitive
+
+} // end of namespace scribo
+
+
+#endif // ! SCRIBO_PRIMITIVE_EXTRACT_INTERNAL_UNION_HH
diff --git a/scribo/scribo/primitive/extract/non_text.hh b/scribo/scribo/primitive/extract/non_text.hh
index 8528782..a456270 100644
--- a/scribo/scribo/primitive/extract/non_text.hh
+++ b/scribo/scribo/primitive/extract/non_text.hh
@@ -1,4 +1,4 @@
-// Copyright (C) 2010, 2011 EPITA Research and Development Laboratory
+// Copyright (C) 2011 EPITA Research and Development Laboratory
// (LRDE)
//
// This file is part of Olena.
@@ -33,30 +33,19 @@
#ifndef SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_HH
# define SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_HH
-# include <mln/core/image/image2d.hh>
-# include <mln/core/alias/neighb2d.hh>
-# include <mln/data/fill.hh>
-# include <mln/util/array.hh>
-# include <mln/labeling/compute.hh>
-# include <mln/labeling/relabel.hh>
-# include <mln/accu/math/count.hh>
-# include <mln/pw/all.hh>
+# include <mln/morpho/elementary/dilation.hh>
-# include <mln/draw/box_plain.hh>
-# include <mln/value/label_8.hh>
-# include <mln/value/rgb.hh>
-# include <mln/value/rgb8.hh>
+# include <scribo/make/text_components_image.hh>
+# include <scribo/make/text_blocks_image.hh>
-# include <scribo/core/macros.hh>
-# include <scribo/core/component_set.hh>
-# include <scribo/core/document.hh>
-# include <scribo/core/line_set.hh>
-# include <scribo/core/def/lbl_type.hh>
-# include <scribo/filter/objects_small.hh>
+# include <scribo/primitive/extract/internal/union.hh>
+# include <scribo/debug/logger.hh>
+
+//DEBUG
+#include <mln/util/timer.hh>
+#include <mln/io/pbm/save.hh>
-# include <mln/clustering/kmean_rgb.hh>
-# include <mln/fun/v2v/rgb8_to_rgbn.hh>
namespace scribo
{
@@ -69,10 +58,24 @@ namespace scribo
using namespace mln;
+ /*! \brief Extract non text components.
+
+ This method takes text localization into account and tries to
+ learn the background colors to deduce the relevant non text
+ components.
+
+ \param[in] doc A document structure. Its must have paragraph
+ information.
+
+ \param[in] nlines The number of lines needed in a paragraph to
+ consider the latter during the background color learning.
+
- template <typename L, typename I>
+ \return A component set of non text components.
+ */
+ template <typename L>
component_set<L>
- non_text(const document<L>& doc, const Image<I>& input);
+ non_text(const document<L>& doc, unsigned nlines);
# ifndef MLN_INCLUDE_ONLY
@@ -82,133 +85,276 @@ namespace scribo
{
template <typename L>
- struct order_bbox
+ image2d<bool>
+ learn(const document<L>& doc,
+ const image2d<bool>& txt,
+ const image2d<bool>& txtblocks,
+ unsigned nbits,
+ float p_cover)
{
- order_bbox(const scribo::component_set<L>& comps)
- : comps_(comps)
+ const image2d<value::rgb8>& input = doc.image();
+ const image2d<bool>&
+ seps = doc.paragraphs().lines().components().separators();
+
+ if (txt.border() != input.border()
+ || txtblocks.border() != input.border()
+ || seps.border() != input.border())
{
+ std::cerr << " txt.border() = " << txt.border()
+ << " - txtblocks.border() = " << txtblocks.border()
+ << " - input.border() = " << input.border()
+ << " - seps.border() = " << seps.border()
+ << std::endl;
+ std::cerr << "different sizes for borders! Resizing..." << std::endl;
+
+
+ border::resize(txt, border::thickness);
+ border::resize(input, border::thickness);
+ border::resize(txtblocks, border::thickness);
+ border::resize(seps, border::thickness);
+ // std::abort();
}
- bool operator()(const unsigned& c1, const unsigned& c2) const
+
+ const unsigned q_div = std::pow(2, 8 - nbits);
+ const unsigned q = unsigned(std::pow(2, nbits));
+ const unsigned nelements = input.nelements();
+
+
+ image3d<unsigned> h_bg(q, q, q);
+ data::fill(h_bg, 0);
+
+ border::fill(txtblocks, false); // so h_bg is not updated by border pixels!
+
+ unsigned n_bg = 0;
{
- if (comps_(c1).bbox().nsites() == comps_(c2).bbox().nsites())
- return c1 > c2;
- return comps_(c1).bbox().nsites() > comps_(c2).bbox().nsites();
+ // compute h_bg
+ for (unsigned i = 0; i < nelements; ++i)
+ if (txtblocks.element(i) == true)
+ {
+ ++n_bg;
+ const value::rgb8& c = input.element(i);
+ ++h_bg.at_(c.red() / q_div, c.green() / q_div, c.blue() / q_div);
+ }
}
- scribo::component_set<L> comps_;
- };
+ typedef std::map<unsigned, unsigned> map_t;
+ map_t ncells_with_nitems;
+ {
+ mln_piter_(box3d) c(h_bg.domain());
+ for_all(c)
+ {
+ unsigned nitems_in_c = h_bg(c);
+ ++ncells_with_nitems[ nitems_in_c ];
+ }
+ }
- } // end of namespace scribo::primitive::extract::internal
+ unsigned n_items_min = 0;
+ {
+ map_t::const_reverse_iterator i;
+ unsigned N = 0;
+ for (i = ncells_with_nitems.rbegin(); i != ncells_with_nitems.rend(); ++i)
+ {
+ unsigned nitems = i->first, ncells = i->second;
+ N += nitems * ncells;
+ if (float(N) > p_cover * float(n_bg))
+ {
+ n_items_min = nitems;
+ break;
+ }
+ }
+ }
+ if (n_items_min == 0)
+ n_items_min = 1; // safety
- // FACADE
+ image3d<bool> bg(q, q, q);
+ {
+ mln_piter_(box3d) c(h_bg.domain());
+ for_all(c)
+ bg(c) = (h_bg(c) >= n_items_min);
+ }
- template <typename L, typename I>
- component_set<L>
- non_text(const document<L>& doc, const Image<I>& input_)
- {
- trace::entering("scribo::primitive::extract::non_text");
- const I& input = exact(input_);
- mln_precondition(doc.is_valid());
- mln_precondition(input.is_valid());
+ // outputing
- const line_set<L>& lines = doc.lines();
+ image2d<bool> output;
+ initialize(output, input);
+ {
+ for (unsigned i = 0; i < nelements; ++i)
+ if (txt.element(i) == true || seps.element(i) == true)
+ output.element(i) = false;
+ else
+ {
+ const value::rgb8& c = input.element(i);
+ output.element(i) = ! bg.at_(c.red() / q_div, c.green() / q_div, c.blue() / q_div);
+ }
+ }
- // Element extraction
+ return output;
+ }
- image2d<value::label_8> img_lbl8;
+
+
+
+
+ inline
+ image2d<bool>
+ cleaning(const image2d<bool>& input, unsigned lambda)
{
- image2d<bool> content;
- initialize(content, input);
- data::fill(content, true);
+ const box2d& dom = input.domain();
+
+ image2d<unsigned> area(dom);
+ image2d<unsigned> parent(dom);
+ image2d<bool> output(dom);
+
+ unsigned max_area = 0;
+
+
+ // 1st pass = bg union-find
+
+ {
+ union_find(input, false, // in
+ parent, area, max_area // out
+ );
+ }
+
+
+ // echo
+ // std::cout << "max_area = " << max_area << std::endl;
+
+
+ // 2nd pass = bg biggest component selection
+
+ {
+ const unsigned nelements = input.nelements();
+ const bool* p_i = input.buffer();
+ bool* p_o = output.buffer();
+ const unsigned* p_a = area.buffer();
+ const unsigned* p_par = parent.buffer();
+
+ for (unsigned i = 0; i < nelements; ++i)
+ {
+ if (*p_i == true)
+ *p_o = true;
+ else
+ {
+ if (*p_par == i)
+ *p_o = (*p_a != max_area);
+ else
+ *p_o = output.element(*p_par);
+ }
+ ++p_i;
+ ++p_o;
+ ++p_a;
+ ++p_par;
+ }
+ }
+
+
+
+ // 3rd pass = fg union-find
- for_all_lines(l, lines)
- if (lines(l).type() == line::Text)
- data::fill((content | lines(l).bbox()).rw(), false);
+ {
+ union_find(output, true, // in
+ parent, area, max_area // out
+ );
+ }
- typedef mln::value::rgb<5> t_rgb5;
- typedef mln::fun::v2v::rgb8_to_rgbn<5> t_rgb8_to_rgb5;
- image2d<t_rgb5>
- img_rgb5 = mln::data::transform(doc.image(), t_rgb8_to_rgb5());
- img_lbl8 =
- mln::clustering::kmean_rgb<double,5>((img_rgb5 | pw::value(content)), 3, 10, 10).unmorph_();
- data::fill((img_lbl8 | !pw::value(content)).rw(), 0u);
+ // 4th pass = cleaning fg
- mln::util::array<unsigned>
- card = mln::labeling::compute(accu::math::count<value::label_8>(),
- img_lbl8, img_lbl8, 3);
+ {
+ const unsigned nelements = input.nelements();
+ bool* p_o = output.buffer();
+ const unsigned* p_a = area.buffer();
+ const unsigned* p_par = parent.buffer();
- unsigned max = 0, bg_id = 0;
- for_all_ncomponents(c, 3)
- if (card(c) > max)
+ for (unsigned i = 0; i < nelements; ++i)
{
- max = card(c);
- bg_id = c;
+ if (*p_o == true)
+ {
+ if (*p_par == i)
+ *p_o = (*p_a > lambda);
+ else
+ *p_o = output.element(*p_par);
+ }
+ ++p_o;
+ ++p_a;
+ ++p_par;
}
+ }
+
- mln::fun::i2v::array<bool> f(4, true);
- f(0) = false;
- f(bg_id) = false;
- labeling::relabel_inplace(img_lbl8, 4, f);
+ return output;
}
+ } // end of namespace scribo::primitive::extract::internal
- component_set<L> output;
- std::cout << "Removing small elements" << std::endl;
- {
- image2d<bool> elts;
- initialize(elts, img_lbl8);
- data::fill(elts, false);
- data::fill((elts | (pw::value(img_lbl8) != pw::cst(0))).rw(), true);
- scribo::def::lbl_type nlabels;
- elts = filter::components_small(elts, c8(), nlabels, 40);
+ // FACADE
+
+ template <typename L>
+ component_set<L>
+ non_text(const document<L>& doc, unsigned nlines)
+ {
+ trace::entering("scribo::primitive::extract::non_text");
+
- output = primitive::extract::components(elts, c8(), nlabels);
+ util::timer t;
+ t.start();
+
+ mln_precondition(doc.is_valid());
+
+ mln_precondition(doc.has_line_seps());
+ mln_precondition(doc.has_text());
+
+ // FIXME: Do these images exist elsewhere?
+ image2d<bool>
+ txt = make::text_components_image(doc),
+ txtblocks = make::text_blocks_image(doc, nlines);
+
+ unsigned nbits = 5;
+ float p = 0.9998; // 0.80 <= x < 1.0
+ unsigned lambda = 1000;
+
+ // enlarge the text mask so that "not txt" does not include
+ // any text pixel
+ txt = morpho::elementary::dilation(txt, c8());
+ txt = morpho::elementary::dilation(txt, c4());
+
+ // FIXME: Make it faster?
+ data::fill((txtblocks | pw::value(txt)).rw(), false);
+
+ // Debug
+ {
+ debug::logger().log_image(debug::Special,//debug::AuxiliaryResults,
+ txt, "txt_components");
+ debug::logger().log_image(debug::Special,//debug::AuxiliaryResults,
+ txtblocks, "txt_blocks");
}
+ image2d<bool>
+ element_image = internal::learn(doc, txt, txtblocks, nbits, p);
+ element_image = internal::cleaning(element_image, lambda);
- std::cout << "Ignoring inner elements" << std::endl;
+ mln_value(L) ncomps;
+ component_set<L>
+ elements = primitive::extract::components(element_image,
+ c8(), ncomps);
+ // Debug
{
- // FIXME: We would like to use the convex hull instead of the bbox.
- internal::order_bbox<L> func(output);
- util::array<unsigned> box_ordered_comps;
- for (unsigned i = 1; i < output.nelements(); ++i)
- box_ordered_comps.append(i);
- std::sort(box_ordered_comps.hook_std_vector_().begin(),
- box_ordered_comps.hook_std_vector_().end(), func);
-
- image2d<bool> merged_elts;
- initialize(merged_elts, img_lbl8);
- data::fill(merged_elts, false);
- for (unsigned i = 0; i < box_ordered_comps.nelements(); ++i)
- {
- unsigned c = box_ordered_comps(i);
- point2d
- pminright = output(c).bbox().pmin(),
- pmaxleft = output(c).bbox().pmax();
- pminright.col() = output(c).bbox().pmax().col();
- pmaxleft.col() = output(c).bbox().pmin().col();
-
- if (merged_elts(output(c).bbox().pmin())
- && merged_elts(output(c).bbox().pmax())
- && merged_elts(pminright)
- && merged_elts(pmaxleft))
- output(c).update_tag(component::Ignored);
- else
- mln::draw::box_plain(merged_elts, output(c).bbox(), true);
- }
+ debug::logger().log_image(debug::Results,
+ elements.labeled_image(),
+ "non_text_components");
}
trace::exiting("scribo::primitive::extract::non_text");
- return output;
+ return elements;
}
# endif // ! MLN_INCLUDE_ONLY
diff --git a/scribo/scribo/primitive/extract/non_text.hh b/scribo/scribo/primitive/extract/non_text_kmean.hh
similarity index 91%
copy from scribo/scribo/primitive/extract/non_text.hh
copy to scribo/scribo/primitive/extract/non_text_kmean.hh
index 8528782..c76ce11 100644
--- a/scribo/scribo/primitive/extract/non_text.hh
+++ b/scribo/scribo/primitive/extract/non_text_kmean.hh
@@ -30,8 +30,8 @@
///
/// \fixme To be optimized!
-#ifndef SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_HH
-# define SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_HH
+#ifndef SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_KMEAN_HH
+# define SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_KMEAN_HH
# include <mln/core/image/image2d.hh>
# include <mln/core/alias/neighb2d.hh>
@@ -72,7 +72,7 @@ namespace scribo
template <typename L, typename I>
component_set<L>
- non_text(const document<L>& doc, const Image<I>& input);
+ non_text_kmean(const document<L>& doc, const Image<I>& input);
# ifndef MLN_INCLUDE_ONLY
@@ -107,9 +107,9 @@ namespace scribo
template <typename L, typename I>
component_set<L>
- non_text(const document<L>& doc, const Image<I>& input_)
+ non_text_kmean(const document<L>& doc, const Image<I>& input_)
{
- trace::entering("scribo::primitive::extract::non_text");
+ trace::entering("scribo::primitive::extract::non_text_kmean");
const I& input = exact(input_);
mln_precondition(doc.is_valid());
@@ -136,7 +136,8 @@ namespace scribo
img_rgb5 = mln::data::transform(doc.image(), t_rgb8_to_rgb5());
img_lbl8 =
- mln::clustering::kmean_rgb<double,5>((img_rgb5 | pw::value(content)), 3, 10, 10).unmorph_();
+ mln::clustering::kmean_rgb<double,5>(
+ (img_rgb5 | pw::value(content)), 3, 10, 10).unmorph_();
data::fill((img_lbl8 | !pw::value(content)).rw(), 0u);
mln::util::array<unsigned>
@@ -207,7 +208,7 @@ namespace scribo
}
}
- trace::exiting("scribo::primitive::extract::non_text");
+ trace::exiting("scribo::primitive::extract::non_text_kmean");
return output;
}
@@ -220,4 +221,4 @@ namespace scribo
} // end of namespace scribo
-#endif // ! SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_HH
+#endif // ! SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_KMEAN_HH
--
1.5.6.5
1
0
last-svn-commit-808-ged1425b New non-text components extraction routine.
by Guillaume Lazzara 14 Mar '11
by Guillaume Lazzara 14 Mar '11
14 Mar '11
* scribo/make/text_blocks_image.hh,
* scribo/make/text_components_image.hh,
* scribo/primitive/extract/internal/union.hh: New.
* scribo/primitive/extract/non_text.hh: New implementation.
* scribo/primitive/extract/non_text_kmean.hh: New. Old
implementatino is saved here.
---
scribo/ChangeLog | 13 +
scribo/scribo/make/text_blocks_image.hh | 136 ++++++++
scribo/scribo/make/text_components_image.hh | 101 ++++++
scribo/scribo/primitive/extract/internal/union.hh | 246 +++++++++++++
scribo/scribo/primitive/extract/non_text.hh | 366 ++++++++++++++------
.../extract/{non_text.hh => non_text_kmean.hh} | 17 +-
6 files changed, 761 insertions(+), 118 deletions(-)
create mode 100644 scribo/scribo/make/text_blocks_image.hh
create mode 100644 scribo/scribo/make/text_components_image.hh
create mode 100644 scribo/scribo/primitive/extract/internal/union.hh
copy scribo/scribo/primitive/extract/{non_text.hh => non_text_kmean.hh} (91%)
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index 43041fe..32dda87 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,5 +1,18 @@
2011-03-14 Guillaume Lazzara <z(a)lrde.epita.fr>
+ New non-text components extraction routine.
+
+ * scribo/make/text_blocks_image.hh,
+ * scribo/make/text_components_image.hh,
+ * scribo/primitive/extract/internal/union.hh: New.
+
+ * scribo/primitive/extract/non_text.hh: New implementation.
+
+ * scribo/primitive/extract/non_text_kmean.hh: New. Old
+ implementatino is saved here.
+
+2011-03-14 Guillaume Lazzara <z(a)lrde.epita.fr>
+
Introduce result image output.
* scribo/fun/v2v/highlight.hh,
diff --git a/scribo/scribo/make/text_blocks_image.hh b/scribo/scribo/make/text_blocks_image.hh
new file mode 100644
index 0000000..fbc16df
--- /dev/null
+++ b/scribo/scribo/make/text_blocks_image.hh
@@ -0,0 +1,136 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef SCRIBO_MAKE_TEXT_BLOCKS_IMAGE_HH
+# define SCRIBO_MAKE_TEXT_BLOCKS_IMAGE_HH
+
+/// \file
+///
+/// \brief Create a mask of paragraph blocks.
+
+# include <mln/core/image/image2d.hh>
+# include <mln/draw/box_plain.hh>
+
+# include <scribo/core/document.hh>
+# include <scribo/core/paragraph_set.hh>
+# include <scribo/core/line_info.hh>
+
+
+namespace scribo
+{
+
+ namespace make
+ {
+ using namespace mln;
+
+
+ /// \brief Create a mask of paragraph blocks.
+ /*!
+ \pre \p doc has_text() methods MUST return True.
+ */
+ template <typename L>
+ mln_ch_value(L,bool)
+ text_blocks_image(const document<L>& doc, unsigned min_nlines);
+
+
+ # ifndef MLN_INCLUDE_ONLY
+
+ template <typename L>
+ mln_ch_value(L,bool)
+ text_blocks_image(const document<L>& doc, unsigned min_nlines)
+ {
+ trace::entering("scribo::make::text_blocks_image");
+
+ mln_precondition(doc.is_open());
+
+ image2d<bool> output;
+ initialize(output, doc.image());
+ data::fill(output, false);
+
+ // image2d<value::int_u8> log;
+ // initialize(log, output);
+ // data::fill(log, 0);
+
+ if (doc.has_text())
+ {
+ const paragraph_set<L>& parset = doc.paragraphs();
+ for_all_paragraphs(p, parset)
+ if (parset(p).nlines() >= min_nlines)
+ {
+ box2d last_box;
+
+ // For each line in this paragraph.
+ for_all_elements(l, parset(p).line_ids())
+ {
+ const line_info<L>&
+ line = parset.lines()(parset(p).line_ids()(l));
+
+ // Avoid invalid case:
+ //
+ // =======
+ // ======
+ if (last_box.is_valid()
+ && last_box.pmax().row() < line.bbox().pmin().row()
+ && last_box.pmin().col() < line.bbox().pmax().col())
+ {
+ point2d
+ pmin(std::min(last_box.pmax().row(),
+ line.bbox().pmin().row()),
+ std::max(last_box.pmin().col(),
+ line.bbox().pmin().col())),
+ pmax(std::max(last_box.pmax().row(),
+ line.bbox().pmin().row()),
+ std::min(last_box.pmax().col(),
+ line.bbox().pmax().col()));
+
+ box2d new_box(pmin, pmax);
+ mln::draw::box_plain(output, new_box, true);
+ // mln::draw::box_plain(log, new_box, 1);
+
+ // mln::io::pgm::save(log, mln::debug::filename("log.pgm", i++));
+ }
+
+ mln::draw::box_plain(output, line.bbox(), true);
+ // mln::draw::box_plain(log, line.bbox(), 255);
+ // mln::io::pgm::save(log, mln::debug::filename("log.pgm", i++));
+ last_box = line.bbox();
+ }
+ }
+ }
+
+
+ trace::exiting("scribo::make::text_blocks_image");
+ return output;
+ }
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::make
+
+} // end of namespace scribo
+
+
+
+#endif // ! SCRIBO_MAKE_TEXT_BLOCKS_IMAGE_HH
diff --git a/scribo/scribo/make/text_components_image.hh b/scribo/scribo/make/text_components_image.hh
new file mode 100644
index 0000000..e7c892e
--- /dev/null
+++ b/scribo/scribo/make/text_components_image.hh
@@ -0,0 +1,101 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef SCRIBO_MAKE_TEXT_COMPONENTS_IMAGE_HH
+# define SCRIBO_MAKE_TEXT_COMPONENTS_IMAGE_HH
+
+/// \file
+///
+/// Create a binary image with text components only.
+
+#include <mln/pw/all.hh>
+#include <mln/core/image/dmorph/image_if.hh>
+#include <mln/util/array.hh>
+#include <mln/data/fill.hh>
+
+#include <scribo/core/document.hh>
+#include <scribo/core/line_set.hh>
+
+
+namespace scribo
+{
+
+ namespace make
+ {
+ using namespace mln;
+
+
+ /// \brief Create a binary image with text components only.
+ /*!
+ \pre \p doc has_text() methods MUST return True.
+ */
+ template <typename L>
+ mln_ch_value(L,bool)
+ text_components_image(const document<L>& doc);
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+ template <typename L>
+ mln_ch_value(L,bool)
+ text_components_image(const document<L>& doc)
+ {
+ trace::entering("scribo::make::text_components_image");
+
+ mln_precondition(doc.is_open());
+
+ mln_ch_value(L,bool) output;
+ initialize(output, doc.image());
+ data::fill(output, false);
+
+ if (doc.has_text())
+ {
+ const scribo::line_set<L>& lines = doc.lines();
+ for_all_lines(l, doc.lines())
+ if (lines(l).is_textline())
+ {
+ const util::array<component_id_t>&
+ comp_ids = lines(l).component_ids();
+ const L& lbl = lines.components().labeled_image();
+ for_all_elements(c, comp_ids)
+ data::fill((output | lines.components()(comp_ids(c)).bbox()).rw(),
+ ((doc.binary_image() | lines.components()(comp_ids(c)).bbox())
+ | (pw::value(lbl) == comp_ids(c))));
+ }
+ }
+
+ trace::exiting("scribo::make::text_components_image");
+ return output;
+ }
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::make
+
+} // end of namespace scribo
+
+
+
+#endif // ! SCRIBO_MAKE_TEXT_COMPONENTS_IMAGE_HH
diff --git a/scribo/scribo/primitive/extract/internal/union.hh b/scribo/scribo/primitive/extract/internal/union.hh
new file mode 100644
index 0000000..509a7e4
--- /dev/null
+++ b/scribo/scribo/primitive/extract/internal/union.hh
@@ -0,0 +1,246 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+/// \file
+///
+/// \brief Various utilities for image extraction
+///
+/// \fixme To be cleanup
+
+#ifndef SCRIBO_PRIMITIVE_EXTRACT_INTERNAL_UNION_HH
+# define SCRIBO_PRIMITIVE_EXTRACT_INTERNAL_UNION_HH
+
+# include <mln/core/image/image2d.hh>
+# include <mln/border/fill.hh>
+
+
+namespace scribo
+{
+
+ namespace primitive
+ {
+
+ namespace extract
+ {
+
+ namespace internal
+ {
+
+ using namespace mln;
+
+
+ unsigned find_root(image2d<unsigned>& parent, unsigned x);
+
+
+ void union_find(const image2d<bool>& input, bool lab,
+ image2d<unsigned>& parent, image2d<unsigned>& area,
+ unsigned& max_area);
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+ inline
+ unsigned find_root(image2d<unsigned>& parent, unsigned x)
+ {
+ if (parent.element(x) == x)
+ return x;
+ return parent.element(x) = find_root(parent, parent.element(x));
+ }
+
+
+ inline
+ void union_find(const image2d<bool>& input,
+ bool lab,
+ // output:
+ image2d<unsigned>& parent,
+ image2d<unsigned>& area,
+ unsigned& max_area)
+ {
+ const unsigned nrows = input.nrows(), ncols = input.ncols();
+
+ unsigned op, on, delta = input.delta_index(dpoint2d(1, 0));
+
+ data::fill(parent, 0);
+ max_area = 0;
+
+ {
+
+ // row == 0 and col == 0
+
+ op = input.index_of_point(point2d(0,0));
+ if (input.element(op) == lab)
+ {
+ area.element(op) = 1;
+ parent.element(op) = op;
+ }
+
+
+ // row = 0
+
+ for (unsigned col = 1; col < ncols; ++col)
+ {
+ on = op;
+ ++op;
+
+ if (input.element(op) != lab)
+ continue;
+
+ if (input.element(on) == lab)
+ {
+ unsigned& par_p = parent.element(op);
+ par_p = find_root(parent, on);
+ ++area.element(par_p);
+ if (area.element(par_p) > max_area)
+ max_area = area.element(par_p);
+ }
+ else
+ {
+ area.element(op) = 1;
+ parent.element(op) = op;
+ }
+
+ }
+ }
+
+
+ for (unsigned row = 1; row < nrows; ++row)
+ {
+
+ {
+ // col == 0
+
+ op = input.index_of_point(point2d(row, 0));
+ on = op - delta;
+
+ if (input.element(op) == lab)
+ {
+ if (input.element(on) == lab)
+ {
+ unsigned& par_p = parent.element(op);
+ par_p = find_root(parent, on);
+ ++area.element(par_p);
+ if (area.element(par_p) > max_area)
+ max_area = area.element(par_p);
+ }
+ else
+ {
+ area.element(op) = 1;
+ parent.element(op) = op;
+ }
+ }
+
+ }
+
+ for (unsigned col = 1; col < ncols; ++col)
+ {
+ ++op;
+ ++on;
+
+ if (input.element(op) != lab)
+ continue;
+
+ bool merge_ = false;
+
+ // up
+
+ if (input.element(on) == lab)
+ {
+ unsigned& par_p = parent.element(op);
+ par_p = find_root(parent, on);
+ ++area.element(par_p);
+ if (area.element(par_p) > max_area)
+ max_area = area.element(par_p);
+ merge_ = true;
+ }
+
+
+ // left
+
+ unsigned ol = op - 1;
+
+ if (input.element(ol) == lab)
+ {
+ if (merge_)
+ {
+ if (input.element(on - 1) != lab) // not already merged
+ {
+ unsigned r_op = parent.element(op), r_ol = find_root(parent, ol);
+ if (r_op != r_ol)
+ {
+ // do-union
+ if (r_op < r_ol)
+ {
+ parent.element(r_ol) = r_op;
+ area.element(r_op) += area.element(r_ol);
+ if (area.element(r_op) > max_area)
+ max_area = area.element(r_op);
+ }
+ else
+ {
+ parent.element(r_op) = r_ol;
+ area.element(r_ol) += area.element(r_op);
+ if (area.element(r_ol) > max_area)
+ max_area = area.element(r_ol);
+ }
+ }
+ }
+ } // end of "if (merge)
+ else
+ {
+ unsigned& par_p = parent.element(op);
+ par_p = find_root(parent, ol);
+ ++area.element(par_p);
+ if (area.element(par_p) > max_area)
+ max_area = area.element(par_p);
+ merge_ = true;
+ }
+ }
+
+
+ // finalization
+
+ if (merge_ == false)
+ {
+ parent.element(op) = op;
+ area.element(op) = 1;
+ }
+
+ }
+ }
+
+ } // end of 'union_find'
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::primivite::extract::internal
+
+ } // end of namespace scribo::primitive::extract
+
+ } // end of namespace scribo::primitive
+
+} // end of namespace scribo
+
+
+#endif // ! SCRIBO_PRIMITIVE_EXTRACT_INTERNAL_UNION_HH
diff --git a/scribo/scribo/primitive/extract/non_text.hh b/scribo/scribo/primitive/extract/non_text.hh
index 8528782..a456270 100644
--- a/scribo/scribo/primitive/extract/non_text.hh
+++ b/scribo/scribo/primitive/extract/non_text.hh
@@ -1,4 +1,4 @@
-// Copyright (C) 2010, 2011 EPITA Research and Development Laboratory
+// Copyright (C) 2011 EPITA Research and Development Laboratory
// (LRDE)
//
// This file is part of Olena.
@@ -33,30 +33,19 @@
#ifndef SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_HH
# define SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_HH
-# include <mln/core/image/image2d.hh>
-# include <mln/core/alias/neighb2d.hh>
-# include <mln/data/fill.hh>
-# include <mln/util/array.hh>
-# include <mln/labeling/compute.hh>
-# include <mln/labeling/relabel.hh>
-# include <mln/accu/math/count.hh>
-# include <mln/pw/all.hh>
+# include <mln/morpho/elementary/dilation.hh>
-# include <mln/draw/box_plain.hh>
-# include <mln/value/label_8.hh>
-# include <mln/value/rgb.hh>
-# include <mln/value/rgb8.hh>
+# include <scribo/make/text_components_image.hh>
+# include <scribo/make/text_blocks_image.hh>
-# include <scribo/core/macros.hh>
-# include <scribo/core/component_set.hh>
-# include <scribo/core/document.hh>
-# include <scribo/core/line_set.hh>
-# include <scribo/core/def/lbl_type.hh>
-# include <scribo/filter/objects_small.hh>
+# include <scribo/primitive/extract/internal/union.hh>
+# include <scribo/debug/logger.hh>
+
+//DEBUG
+#include <mln/util/timer.hh>
+#include <mln/io/pbm/save.hh>
-# include <mln/clustering/kmean_rgb.hh>
-# include <mln/fun/v2v/rgb8_to_rgbn.hh>
namespace scribo
{
@@ -69,10 +58,24 @@ namespace scribo
using namespace mln;
+ /*! \brief Extract non text components.
+
+ This method takes text localization into account and tries to
+ learn the background colors to deduce the relevant non text
+ components.
+
+ \param[in] doc A document structure. Its must have paragraph
+ information.
+
+ \param[in] nlines The number of lines needed in a paragraph to
+ consider the latter during the background color learning.
+
- template <typename L, typename I>
+ \return A component set of non text components.
+ */
+ template <typename L>
component_set<L>
- non_text(const document<L>& doc, const Image<I>& input);
+ non_text(const document<L>& doc, unsigned nlines);
# ifndef MLN_INCLUDE_ONLY
@@ -82,133 +85,276 @@ namespace scribo
{
template <typename L>
- struct order_bbox
+ image2d<bool>
+ learn(const document<L>& doc,
+ const image2d<bool>& txt,
+ const image2d<bool>& txtblocks,
+ unsigned nbits,
+ float p_cover)
{
- order_bbox(const scribo::component_set<L>& comps)
- : comps_(comps)
+ const image2d<value::rgb8>& input = doc.image();
+ const image2d<bool>&
+ seps = doc.paragraphs().lines().components().separators();
+
+ if (txt.border() != input.border()
+ || txtblocks.border() != input.border()
+ || seps.border() != input.border())
{
+ std::cerr << " txt.border() = " << txt.border()
+ << " - txtblocks.border() = " << txtblocks.border()
+ << " - input.border() = " << input.border()
+ << " - seps.border() = " << seps.border()
+ << std::endl;
+ std::cerr << "different sizes for borders! Resizing..." << std::endl;
+
+
+ border::resize(txt, border::thickness);
+ border::resize(input, border::thickness);
+ border::resize(txtblocks, border::thickness);
+ border::resize(seps, border::thickness);
+ // std::abort();
}
- bool operator()(const unsigned& c1, const unsigned& c2) const
+
+ const unsigned q_div = std::pow(2, 8 - nbits);
+ const unsigned q = unsigned(std::pow(2, nbits));
+ const unsigned nelements = input.nelements();
+
+
+ image3d<unsigned> h_bg(q, q, q);
+ data::fill(h_bg, 0);
+
+ border::fill(txtblocks, false); // so h_bg is not updated by border pixels!
+
+ unsigned n_bg = 0;
{
- if (comps_(c1).bbox().nsites() == comps_(c2).bbox().nsites())
- return c1 > c2;
- return comps_(c1).bbox().nsites() > comps_(c2).bbox().nsites();
+ // compute h_bg
+ for (unsigned i = 0; i < nelements; ++i)
+ if (txtblocks.element(i) == true)
+ {
+ ++n_bg;
+ const value::rgb8& c = input.element(i);
+ ++h_bg.at_(c.red() / q_div, c.green() / q_div, c.blue() / q_div);
+ }
}
- scribo::component_set<L> comps_;
- };
+ typedef std::map<unsigned, unsigned> map_t;
+ map_t ncells_with_nitems;
+ {
+ mln_piter_(box3d) c(h_bg.domain());
+ for_all(c)
+ {
+ unsigned nitems_in_c = h_bg(c);
+ ++ncells_with_nitems[ nitems_in_c ];
+ }
+ }
- } // end of namespace scribo::primitive::extract::internal
+ unsigned n_items_min = 0;
+ {
+ map_t::const_reverse_iterator i;
+ unsigned N = 0;
+ for (i = ncells_with_nitems.rbegin(); i != ncells_with_nitems.rend(); ++i)
+ {
+ unsigned nitems = i->first, ncells = i->second;
+ N += nitems * ncells;
+ if (float(N) > p_cover * float(n_bg))
+ {
+ n_items_min = nitems;
+ break;
+ }
+ }
+ }
+ if (n_items_min == 0)
+ n_items_min = 1; // safety
- // FACADE
+ image3d<bool> bg(q, q, q);
+ {
+ mln_piter_(box3d) c(h_bg.domain());
+ for_all(c)
+ bg(c) = (h_bg(c) >= n_items_min);
+ }
- template <typename L, typename I>
- component_set<L>
- non_text(const document<L>& doc, const Image<I>& input_)
- {
- trace::entering("scribo::primitive::extract::non_text");
- const I& input = exact(input_);
- mln_precondition(doc.is_valid());
- mln_precondition(input.is_valid());
+ // outputing
- const line_set<L>& lines = doc.lines();
+ image2d<bool> output;
+ initialize(output, input);
+ {
+ for (unsigned i = 0; i < nelements; ++i)
+ if (txt.element(i) == true || seps.element(i) == true)
+ output.element(i) = false;
+ else
+ {
+ const value::rgb8& c = input.element(i);
+ output.element(i) = ! bg.at_(c.red() / q_div, c.green() / q_div, c.blue() / q_div);
+ }
+ }
- // Element extraction
+ return output;
+ }
- image2d<value::label_8> img_lbl8;
+
+
+
+
+ inline
+ image2d<bool>
+ cleaning(const image2d<bool>& input, unsigned lambda)
{
- image2d<bool> content;
- initialize(content, input);
- data::fill(content, true);
+ const box2d& dom = input.domain();
+
+ image2d<unsigned> area(dom);
+ image2d<unsigned> parent(dom);
+ image2d<bool> output(dom);
+
+ unsigned max_area = 0;
+
+
+ // 1st pass = bg union-find
+
+ {
+ union_find(input, false, // in
+ parent, area, max_area // out
+ );
+ }
+
+
+ // echo
+ // std::cout << "max_area = " << max_area << std::endl;
+
+
+ // 2nd pass = bg biggest component selection
+
+ {
+ const unsigned nelements = input.nelements();
+ const bool* p_i = input.buffer();
+ bool* p_o = output.buffer();
+ const unsigned* p_a = area.buffer();
+ const unsigned* p_par = parent.buffer();
+
+ for (unsigned i = 0; i < nelements; ++i)
+ {
+ if (*p_i == true)
+ *p_o = true;
+ else
+ {
+ if (*p_par == i)
+ *p_o = (*p_a != max_area);
+ else
+ *p_o = output.element(*p_par);
+ }
+ ++p_i;
+ ++p_o;
+ ++p_a;
+ ++p_par;
+ }
+ }
+
+
+
+ // 3rd pass = fg union-find
- for_all_lines(l, lines)
- if (lines(l).type() == line::Text)
- data::fill((content | lines(l).bbox()).rw(), false);
+ {
+ union_find(output, true, // in
+ parent, area, max_area // out
+ );
+ }
- typedef mln::value::rgb<5> t_rgb5;
- typedef mln::fun::v2v::rgb8_to_rgbn<5> t_rgb8_to_rgb5;
- image2d<t_rgb5>
- img_rgb5 = mln::data::transform(doc.image(), t_rgb8_to_rgb5());
- img_lbl8 =
- mln::clustering::kmean_rgb<double,5>((img_rgb5 | pw::value(content)), 3, 10, 10).unmorph_();
- data::fill((img_lbl8 | !pw::value(content)).rw(), 0u);
+ // 4th pass = cleaning fg
- mln::util::array<unsigned>
- card = mln::labeling::compute(accu::math::count<value::label_8>(),
- img_lbl8, img_lbl8, 3);
+ {
+ const unsigned nelements = input.nelements();
+ bool* p_o = output.buffer();
+ const unsigned* p_a = area.buffer();
+ const unsigned* p_par = parent.buffer();
- unsigned max = 0, bg_id = 0;
- for_all_ncomponents(c, 3)
- if (card(c) > max)
+ for (unsigned i = 0; i < nelements; ++i)
{
- max = card(c);
- bg_id = c;
+ if (*p_o == true)
+ {
+ if (*p_par == i)
+ *p_o = (*p_a > lambda);
+ else
+ *p_o = output.element(*p_par);
+ }
+ ++p_o;
+ ++p_a;
+ ++p_par;
}
+ }
+
- mln::fun::i2v::array<bool> f(4, true);
- f(0) = false;
- f(bg_id) = false;
- labeling::relabel_inplace(img_lbl8, 4, f);
+ return output;
}
+ } // end of namespace scribo::primitive::extract::internal
- component_set<L> output;
- std::cout << "Removing small elements" << std::endl;
- {
- image2d<bool> elts;
- initialize(elts, img_lbl8);
- data::fill(elts, false);
- data::fill((elts | (pw::value(img_lbl8) != pw::cst(0))).rw(), true);
- scribo::def::lbl_type nlabels;
- elts = filter::components_small(elts, c8(), nlabels, 40);
+ // FACADE
+
+ template <typename L>
+ component_set<L>
+ non_text(const document<L>& doc, unsigned nlines)
+ {
+ trace::entering("scribo::primitive::extract::non_text");
+
- output = primitive::extract::components(elts, c8(), nlabels);
+ util::timer t;
+ t.start();
+
+ mln_precondition(doc.is_valid());
+
+ mln_precondition(doc.has_line_seps());
+ mln_precondition(doc.has_text());
+
+ // FIXME: Do these images exist elsewhere?
+ image2d<bool>
+ txt = make::text_components_image(doc),
+ txtblocks = make::text_blocks_image(doc, nlines);
+
+ unsigned nbits = 5;
+ float p = 0.9998; // 0.80 <= x < 1.0
+ unsigned lambda = 1000;
+
+ // enlarge the text mask so that "not txt" does not include
+ // any text pixel
+ txt = morpho::elementary::dilation(txt, c8());
+ txt = morpho::elementary::dilation(txt, c4());
+
+ // FIXME: Make it faster?
+ data::fill((txtblocks | pw::value(txt)).rw(), false);
+
+ // Debug
+ {
+ debug::logger().log_image(debug::Special,//debug::AuxiliaryResults,
+ txt, "txt_components");
+ debug::logger().log_image(debug::Special,//debug::AuxiliaryResults,
+ txtblocks, "txt_blocks");
}
+ image2d<bool>
+ element_image = internal::learn(doc, txt, txtblocks, nbits, p);
+ element_image = internal::cleaning(element_image, lambda);
- std::cout << "Ignoring inner elements" << std::endl;
+ mln_value(L) ncomps;
+ component_set<L>
+ elements = primitive::extract::components(element_image,
+ c8(), ncomps);
+ // Debug
{
- // FIXME: We would like to use the convex hull instead of the bbox.
- internal::order_bbox<L> func(output);
- util::array<unsigned> box_ordered_comps;
- for (unsigned i = 1; i < output.nelements(); ++i)
- box_ordered_comps.append(i);
- std::sort(box_ordered_comps.hook_std_vector_().begin(),
- box_ordered_comps.hook_std_vector_().end(), func);
-
- image2d<bool> merged_elts;
- initialize(merged_elts, img_lbl8);
- data::fill(merged_elts, false);
- for (unsigned i = 0; i < box_ordered_comps.nelements(); ++i)
- {
- unsigned c = box_ordered_comps(i);
- point2d
- pminright = output(c).bbox().pmin(),
- pmaxleft = output(c).bbox().pmax();
- pminright.col() = output(c).bbox().pmax().col();
- pmaxleft.col() = output(c).bbox().pmin().col();
-
- if (merged_elts(output(c).bbox().pmin())
- && merged_elts(output(c).bbox().pmax())
- && merged_elts(pminright)
- && merged_elts(pmaxleft))
- output(c).update_tag(component::Ignored);
- else
- mln::draw::box_plain(merged_elts, output(c).bbox(), true);
- }
+ debug::logger().log_image(debug::Results,
+ elements.labeled_image(),
+ "non_text_components");
}
trace::exiting("scribo::primitive::extract::non_text");
- return output;
+ return elements;
}
# endif // ! MLN_INCLUDE_ONLY
diff --git a/scribo/scribo/primitive/extract/non_text.hh b/scribo/scribo/primitive/extract/non_text_kmean.hh
similarity index 91%
copy from scribo/scribo/primitive/extract/non_text.hh
copy to scribo/scribo/primitive/extract/non_text_kmean.hh
index 8528782..c76ce11 100644
--- a/scribo/scribo/primitive/extract/non_text.hh
+++ b/scribo/scribo/primitive/extract/non_text_kmean.hh
@@ -30,8 +30,8 @@
///
/// \fixme To be optimized!
-#ifndef SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_HH
-# define SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_HH
+#ifndef SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_KMEAN_HH
+# define SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_KMEAN_HH
# include <mln/core/image/image2d.hh>
# include <mln/core/alias/neighb2d.hh>
@@ -72,7 +72,7 @@ namespace scribo
template <typename L, typename I>
component_set<L>
- non_text(const document<L>& doc, const Image<I>& input);
+ non_text_kmean(const document<L>& doc, const Image<I>& input);
# ifndef MLN_INCLUDE_ONLY
@@ -107,9 +107,9 @@ namespace scribo
template <typename L, typename I>
component_set<L>
- non_text(const document<L>& doc, const Image<I>& input_)
+ non_text_kmean(const document<L>& doc, const Image<I>& input_)
{
- trace::entering("scribo::primitive::extract::non_text");
+ trace::entering("scribo::primitive::extract::non_text_kmean");
const I& input = exact(input_);
mln_precondition(doc.is_valid());
@@ -136,7 +136,8 @@ namespace scribo
img_rgb5 = mln::data::transform(doc.image(), t_rgb8_to_rgb5());
img_lbl8 =
- mln::clustering::kmean_rgb<double,5>((img_rgb5 | pw::value(content)), 3, 10, 10).unmorph_();
+ mln::clustering::kmean_rgb<double,5>(
+ (img_rgb5 | pw::value(content)), 3, 10, 10).unmorph_();
data::fill((img_lbl8 | !pw::value(content)).rw(), 0u);
mln::util::array<unsigned>
@@ -207,7 +208,7 @@ namespace scribo
}
}
- trace::exiting("scribo::primitive::extract::non_text");
+ trace::exiting("scribo::primitive::extract::non_text_kmean");
return output;
}
@@ -220,4 +221,4 @@ namespace scribo
} // end of namespace scribo
-#endif // ! SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_HH
+#endif // ! SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_KMEAN_HH
--
1.5.6.5
1
0
last-svn-commit-808-g3f14376 New non-text components extraction routine.
by Guillaume Lazzara 14 Mar '11
by Guillaume Lazzara 14 Mar '11
14 Mar '11
* scribo/make/text_blocks_image.hh,
* scribo/make/text_components_image.hh,
* scribo/primitive/extract/internal/union.hh: New.
* scribo/primitive/extract/non_text.hh: New implementation.
* scribo/primitive/extract/non_text_kmean.hh: New. Old
implementatino is saved here.
---
scribo/ChangeLog | 13 +
scribo/scribo/make/text_blocks_image.hh | 136 ++++++++
scribo/scribo/make/text_components_image.hh | 101 ++++++
scribo/scribo/primitive/extract/internal/union.hh | 246 +++++++++++++
scribo/scribo/primitive/extract/non_text.hh | 366 ++++++++++++++------
.../extract/{non_text.hh => non_text_kmean.hh} | 17 +-
6 files changed, 761 insertions(+), 118 deletions(-)
create mode 100644 scribo/scribo/make/text_blocks_image.hh
create mode 100644 scribo/scribo/make/text_components_image.hh
create mode 100644 scribo/scribo/primitive/extract/internal/union.hh
copy scribo/scribo/primitive/extract/{non_text.hh => non_text_kmean.hh} (91%)
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index 43041fe..32dda87 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,5 +1,18 @@
2011-03-14 Guillaume Lazzara <z(a)lrde.epita.fr>
+ New non-text components extraction routine.
+
+ * scribo/make/text_blocks_image.hh,
+ * scribo/make/text_components_image.hh,
+ * scribo/primitive/extract/internal/union.hh: New.
+
+ * scribo/primitive/extract/non_text.hh: New implementation.
+
+ * scribo/primitive/extract/non_text_kmean.hh: New. Old
+ implementatino is saved here.
+
+2011-03-14 Guillaume Lazzara <z(a)lrde.epita.fr>
+
Introduce result image output.
* scribo/fun/v2v/highlight.hh,
diff --git a/scribo/scribo/make/text_blocks_image.hh b/scribo/scribo/make/text_blocks_image.hh
new file mode 100644
index 0000000..fbc16df
--- /dev/null
+++ b/scribo/scribo/make/text_blocks_image.hh
@@ -0,0 +1,136 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef SCRIBO_MAKE_TEXT_BLOCKS_IMAGE_HH
+# define SCRIBO_MAKE_TEXT_BLOCKS_IMAGE_HH
+
+/// \file
+///
+/// \brief Create a mask of paragraph blocks.
+
+# include <mln/core/image/image2d.hh>
+# include <mln/draw/box_plain.hh>
+
+# include <scribo/core/document.hh>
+# include <scribo/core/paragraph_set.hh>
+# include <scribo/core/line_info.hh>
+
+
+namespace scribo
+{
+
+ namespace make
+ {
+ using namespace mln;
+
+
+ /// \brief Create a mask of paragraph blocks.
+ /*!
+ \pre \p doc has_text() methods MUST return True.
+ */
+ template <typename L>
+ mln_ch_value(L,bool)
+ text_blocks_image(const document<L>& doc, unsigned min_nlines);
+
+
+ # ifndef MLN_INCLUDE_ONLY
+
+ template <typename L>
+ mln_ch_value(L,bool)
+ text_blocks_image(const document<L>& doc, unsigned min_nlines)
+ {
+ trace::entering("scribo::make::text_blocks_image");
+
+ mln_precondition(doc.is_open());
+
+ image2d<bool> output;
+ initialize(output, doc.image());
+ data::fill(output, false);
+
+ // image2d<value::int_u8> log;
+ // initialize(log, output);
+ // data::fill(log, 0);
+
+ if (doc.has_text())
+ {
+ const paragraph_set<L>& parset = doc.paragraphs();
+ for_all_paragraphs(p, parset)
+ if (parset(p).nlines() >= min_nlines)
+ {
+ box2d last_box;
+
+ // For each line in this paragraph.
+ for_all_elements(l, parset(p).line_ids())
+ {
+ const line_info<L>&
+ line = parset.lines()(parset(p).line_ids()(l));
+
+ // Avoid invalid case:
+ //
+ // =======
+ // ======
+ if (last_box.is_valid()
+ && last_box.pmax().row() < line.bbox().pmin().row()
+ && last_box.pmin().col() < line.bbox().pmax().col())
+ {
+ point2d
+ pmin(std::min(last_box.pmax().row(),
+ line.bbox().pmin().row()),
+ std::max(last_box.pmin().col(),
+ line.bbox().pmin().col())),
+ pmax(std::max(last_box.pmax().row(),
+ line.bbox().pmin().row()),
+ std::min(last_box.pmax().col(),
+ line.bbox().pmax().col()));
+
+ box2d new_box(pmin, pmax);
+ mln::draw::box_plain(output, new_box, true);
+ // mln::draw::box_plain(log, new_box, 1);
+
+ // mln::io::pgm::save(log, mln::debug::filename("log.pgm", i++));
+ }
+
+ mln::draw::box_plain(output, line.bbox(), true);
+ // mln::draw::box_plain(log, line.bbox(), 255);
+ // mln::io::pgm::save(log, mln::debug::filename("log.pgm", i++));
+ last_box = line.bbox();
+ }
+ }
+ }
+
+
+ trace::exiting("scribo::make::text_blocks_image");
+ return output;
+ }
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::make
+
+} // end of namespace scribo
+
+
+
+#endif // ! SCRIBO_MAKE_TEXT_BLOCKS_IMAGE_HH
diff --git a/scribo/scribo/make/text_components_image.hh b/scribo/scribo/make/text_components_image.hh
new file mode 100644
index 0000000..e7c892e
--- /dev/null
+++ b/scribo/scribo/make/text_components_image.hh
@@ -0,0 +1,101 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef SCRIBO_MAKE_TEXT_COMPONENTS_IMAGE_HH
+# define SCRIBO_MAKE_TEXT_COMPONENTS_IMAGE_HH
+
+/// \file
+///
+/// Create a binary image with text components only.
+
+#include <mln/pw/all.hh>
+#include <mln/core/image/dmorph/image_if.hh>
+#include <mln/util/array.hh>
+#include <mln/data/fill.hh>
+
+#include <scribo/core/document.hh>
+#include <scribo/core/line_set.hh>
+
+
+namespace scribo
+{
+
+ namespace make
+ {
+ using namespace mln;
+
+
+ /// \brief Create a binary image with text components only.
+ /*!
+ \pre \p doc has_text() methods MUST return True.
+ */
+ template <typename L>
+ mln_ch_value(L,bool)
+ text_components_image(const document<L>& doc);
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+ template <typename L>
+ mln_ch_value(L,bool)
+ text_components_image(const document<L>& doc)
+ {
+ trace::entering("scribo::make::text_components_image");
+
+ mln_precondition(doc.is_open());
+
+ mln_ch_value(L,bool) output;
+ initialize(output, doc.image());
+ data::fill(output, false);
+
+ if (doc.has_text())
+ {
+ const scribo::line_set<L>& lines = doc.lines();
+ for_all_lines(l, doc.lines())
+ if (lines(l).is_textline())
+ {
+ const util::array<component_id_t>&
+ comp_ids = lines(l).component_ids();
+ const L& lbl = lines.components().labeled_image();
+ for_all_elements(c, comp_ids)
+ data::fill((output | lines.components()(comp_ids(c)).bbox()).rw(),
+ ((doc.binary_image() | lines.components()(comp_ids(c)).bbox())
+ | (pw::value(lbl) == comp_ids(c))));
+ }
+ }
+
+ trace::exiting("scribo::make::text_components_image");
+ return output;
+ }
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::make
+
+} // end of namespace scribo
+
+
+
+#endif // ! SCRIBO_MAKE_TEXT_COMPONENTS_IMAGE_HH
diff --git a/scribo/scribo/primitive/extract/internal/union.hh b/scribo/scribo/primitive/extract/internal/union.hh
new file mode 100644
index 0000000..509a7e4
--- /dev/null
+++ b/scribo/scribo/primitive/extract/internal/union.hh
@@ -0,0 +1,246 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+/// \file
+///
+/// \brief Various utilities for image extraction
+///
+/// \fixme To be cleanup
+
+#ifndef SCRIBO_PRIMITIVE_EXTRACT_INTERNAL_UNION_HH
+# define SCRIBO_PRIMITIVE_EXTRACT_INTERNAL_UNION_HH
+
+# include <mln/core/image/image2d.hh>
+# include <mln/border/fill.hh>
+
+
+namespace scribo
+{
+
+ namespace primitive
+ {
+
+ namespace extract
+ {
+
+ namespace internal
+ {
+
+ using namespace mln;
+
+
+ unsigned find_root(image2d<unsigned>& parent, unsigned x);
+
+
+ void union_find(const image2d<bool>& input, bool lab,
+ image2d<unsigned>& parent, image2d<unsigned>& area,
+ unsigned& max_area);
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+ inline
+ unsigned find_root(image2d<unsigned>& parent, unsigned x)
+ {
+ if (parent.element(x) == x)
+ return x;
+ return parent.element(x) = find_root(parent, parent.element(x));
+ }
+
+
+ inline
+ void union_find(const image2d<bool>& input,
+ bool lab,
+ // output:
+ image2d<unsigned>& parent,
+ image2d<unsigned>& area,
+ unsigned& max_area)
+ {
+ const unsigned nrows = input.nrows(), ncols = input.ncols();
+
+ unsigned op, on, delta = input.delta_index(dpoint2d(1, 0));
+
+ data::fill(parent, 0);
+ max_area = 0;
+
+ {
+
+ // row == 0 and col == 0
+
+ op = input.index_of_point(point2d(0,0));
+ if (input.element(op) == lab)
+ {
+ area.element(op) = 1;
+ parent.element(op) = op;
+ }
+
+
+ // row = 0
+
+ for (unsigned col = 1; col < ncols; ++col)
+ {
+ on = op;
+ ++op;
+
+ if (input.element(op) != lab)
+ continue;
+
+ if (input.element(on) == lab)
+ {
+ unsigned& par_p = parent.element(op);
+ par_p = find_root(parent, on);
+ ++area.element(par_p);
+ if (area.element(par_p) > max_area)
+ max_area = area.element(par_p);
+ }
+ else
+ {
+ area.element(op) = 1;
+ parent.element(op) = op;
+ }
+
+ }
+ }
+
+
+ for (unsigned row = 1; row < nrows; ++row)
+ {
+
+ {
+ // col == 0
+
+ op = input.index_of_point(point2d(row, 0));
+ on = op - delta;
+
+ if (input.element(op) == lab)
+ {
+ if (input.element(on) == lab)
+ {
+ unsigned& par_p = parent.element(op);
+ par_p = find_root(parent, on);
+ ++area.element(par_p);
+ if (area.element(par_p) > max_area)
+ max_area = area.element(par_p);
+ }
+ else
+ {
+ area.element(op) = 1;
+ parent.element(op) = op;
+ }
+ }
+
+ }
+
+ for (unsigned col = 1; col < ncols; ++col)
+ {
+ ++op;
+ ++on;
+
+ if (input.element(op) != lab)
+ continue;
+
+ bool merge_ = false;
+
+ // up
+
+ if (input.element(on) == lab)
+ {
+ unsigned& par_p = parent.element(op);
+ par_p = find_root(parent, on);
+ ++area.element(par_p);
+ if (area.element(par_p) > max_area)
+ max_area = area.element(par_p);
+ merge_ = true;
+ }
+
+
+ // left
+
+ unsigned ol = op - 1;
+
+ if (input.element(ol) == lab)
+ {
+ if (merge_)
+ {
+ if (input.element(on - 1) != lab) // not already merged
+ {
+ unsigned r_op = parent.element(op), r_ol = find_root(parent, ol);
+ if (r_op != r_ol)
+ {
+ // do-union
+ if (r_op < r_ol)
+ {
+ parent.element(r_ol) = r_op;
+ area.element(r_op) += area.element(r_ol);
+ if (area.element(r_op) > max_area)
+ max_area = area.element(r_op);
+ }
+ else
+ {
+ parent.element(r_op) = r_ol;
+ area.element(r_ol) += area.element(r_op);
+ if (area.element(r_ol) > max_area)
+ max_area = area.element(r_ol);
+ }
+ }
+ }
+ } // end of "if (merge)
+ else
+ {
+ unsigned& par_p = parent.element(op);
+ par_p = find_root(parent, ol);
+ ++area.element(par_p);
+ if (area.element(par_p) > max_area)
+ max_area = area.element(par_p);
+ merge_ = true;
+ }
+ }
+
+
+ // finalization
+
+ if (merge_ == false)
+ {
+ parent.element(op) = op;
+ area.element(op) = 1;
+ }
+
+ }
+ }
+
+ } // end of 'union_find'
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::primivite::extract::internal
+
+ } // end of namespace scribo::primitive::extract
+
+ } // end of namespace scribo::primitive
+
+} // end of namespace scribo
+
+
+#endif // ! SCRIBO_PRIMITIVE_EXTRACT_INTERNAL_UNION_HH
diff --git a/scribo/scribo/primitive/extract/non_text.hh b/scribo/scribo/primitive/extract/non_text.hh
index 8528782..a456270 100644
--- a/scribo/scribo/primitive/extract/non_text.hh
+++ b/scribo/scribo/primitive/extract/non_text.hh
@@ -1,4 +1,4 @@
-// Copyright (C) 2010, 2011 EPITA Research and Development Laboratory
+// Copyright (C) 2011 EPITA Research and Development Laboratory
// (LRDE)
//
// This file is part of Olena.
@@ -33,30 +33,19 @@
#ifndef SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_HH
# define SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_HH
-# include <mln/core/image/image2d.hh>
-# include <mln/core/alias/neighb2d.hh>
-# include <mln/data/fill.hh>
-# include <mln/util/array.hh>
-# include <mln/labeling/compute.hh>
-# include <mln/labeling/relabel.hh>
-# include <mln/accu/math/count.hh>
-# include <mln/pw/all.hh>
+# include <mln/morpho/elementary/dilation.hh>
-# include <mln/draw/box_plain.hh>
-# include <mln/value/label_8.hh>
-# include <mln/value/rgb.hh>
-# include <mln/value/rgb8.hh>
+# include <scribo/make/text_components_image.hh>
+# include <scribo/make/text_blocks_image.hh>
-# include <scribo/core/macros.hh>
-# include <scribo/core/component_set.hh>
-# include <scribo/core/document.hh>
-# include <scribo/core/line_set.hh>
-# include <scribo/core/def/lbl_type.hh>
-# include <scribo/filter/objects_small.hh>
+# include <scribo/primitive/extract/internal/union.hh>
+# include <scribo/debug/logger.hh>
+
+//DEBUG
+#include <mln/util/timer.hh>
+#include <mln/io/pbm/save.hh>
-# include <mln/clustering/kmean_rgb.hh>
-# include <mln/fun/v2v/rgb8_to_rgbn.hh>
namespace scribo
{
@@ -69,10 +58,24 @@ namespace scribo
using namespace mln;
+ /*! \brief Extract non text components.
+
+ This method takes text localization into account and tries to
+ learn the background colors to deduce the relevant non text
+ components.
+
+ \param[in] doc A document structure. Its must have paragraph
+ information.
+
+ \param[in] nlines The number of lines needed in a paragraph to
+ consider the latter during the background color learning.
+
- template <typename L, typename I>
+ \return A component set of non text components.
+ */
+ template <typename L>
component_set<L>
- non_text(const document<L>& doc, const Image<I>& input);
+ non_text(const document<L>& doc, unsigned nlines);
# ifndef MLN_INCLUDE_ONLY
@@ -82,133 +85,276 @@ namespace scribo
{
template <typename L>
- struct order_bbox
+ image2d<bool>
+ learn(const document<L>& doc,
+ const image2d<bool>& txt,
+ const image2d<bool>& txtblocks,
+ unsigned nbits,
+ float p_cover)
{
- order_bbox(const scribo::component_set<L>& comps)
- : comps_(comps)
+ const image2d<value::rgb8>& input = doc.image();
+ const image2d<bool>&
+ seps = doc.paragraphs().lines().components().separators();
+
+ if (txt.border() != input.border()
+ || txtblocks.border() != input.border()
+ || seps.border() != input.border())
{
+ std::cerr << " txt.border() = " << txt.border()
+ << " - txtblocks.border() = " << txtblocks.border()
+ << " - input.border() = " << input.border()
+ << " - seps.border() = " << seps.border()
+ << std::endl;
+ std::cerr << "different sizes for borders! Resizing..." << std::endl;
+
+
+ border::resize(txt, border::thickness);
+ border::resize(input, border::thickness);
+ border::resize(txtblocks, border::thickness);
+ border::resize(seps, border::thickness);
+ // std::abort();
}
- bool operator()(const unsigned& c1, const unsigned& c2) const
+
+ const unsigned q_div = std::pow(2, 8 - nbits);
+ const unsigned q = unsigned(std::pow(2, nbits));
+ const unsigned nelements = input.nelements();
+
+
+ image3d<unsigned> h_bg(q, q, q);
+ data::fill(h_bg, 0);
+
+ border::fill(txtblocks, false); // so h_bg is not updated by border pixels!
+
+ unsigned n_bg = 0;
{
- if (comps_(c1).bbox().nsites() == comps_(c2).bbox().nsites())
- return c1 > c2;
- return comps_(c1).bbox().nsites() > comps_(c2).bbox().nsites();
+ // compute h_bg
+ for (unsigned i = 0; i < nelements; ++i)
+ if (txtblocks.element(i) == true)
+ {
+ ++n_bg;
+ const value::rgb8& c = input.element(i);
+ ++h_bg.at_(c.red() / q_div, c.green() / q_div, c.blue() / q_div);
+ }
}
- scribo::component_set<L> comps_;
- };
+ typedef std::map<unsigned, unsigned> map_t;
+ map_t ncells_with_nitems;
+ {
+ mln_piter_(box3d) c(h_bg.domain());
+ for_all(c)
+ {
+ unsigned nitems_in_c = h_bg(c);
+ ++ncells_with_nitems[ nitems_in_c ];
+ }
+ }
- } // end of namespace scribo::primitive::extract::internal
+ unsigned n_items_min = 0;
+ {
+ map_t::const_reverse_iterator i;
+ unsigned N = 0;
+ for (i = ncells_with_nitems.rbegin(); i != ncells_with_nitems.rend(); ++i)
+ {
+ unsigned nitems = i->first, ncells = i->second;
+ N += nitems * ncells;
+ if (float(N) > p_cover * float(n_bg))
+ {
+ n_items_min = nitems;
+ break;
+ }
+ }
+ }
+ if (n_items_min == 0)
+ n_items_min = 1; // safety
- // FACADE
+ image3d<bool> bg(q, q, q);
+ {
+ mln_piter_(box3d) c(h_bg.domain());
+ for_all(c)
+ bg(c) = (h_bg(c) >= n_items_min);
+ }
- template <typename L, typename I>
- component_set<L>
- non_text(const document<L>& doc, const Image<I>& input_)
- {
- trace::entering("scribo::primitive::extract::non_text");
- const I& input = exact(input_);
- mln_precondition(doc.is_valid());
- mln_precondition(input.is_valid());
+ // outputing
- const line_set<L>& lines = doc.lines();
+ image2d<bool> output;
+ initialize(output, input);
+ {
+ for (unsigned i = 0; i < nelements; ++i)
+ if (txt.element(i) == true || seps.element(i) == true)
+ output.element(i) = false;
+ else
+ {
+ const value::rgb8& c = input.element(i);
+ output.element(i) = ! bg.at_(c.red() / q_div, c.green() / q_div, c.blue() / q_div);
+ }
+ }
- // Element extraction
+ return output;
+ }
- image2d<value::label_8> img_lbl8;
+
+
+
+
+ inline
+ image2d<bool>
+ cleaning(const image2d<bool>& input, unsigned lambda)
{
- image2d<bool> content;
- initialize(content, input);
- data::fill(content, true);
+ const box2d& dom = input.domain();
+
+ image2d<unsigned> area(dom);
+ image2d<unsigned> parent(dom);
+ image2d<bool> output(dom);
+
+ unsigned max_area = 0;
+
+
+ // 1st pass = bg union-find
+
+ {
+ union_find(input, false, // in
+ parent, area, max_area // out
+ );
+ }
+
+
+ // echo
+ // std::cout << "max_area = " << max_area << std::endl;
+
+
+ // 2nd pass = bg biggest component selection
+
+ {
+ const unsigned nelements = input.nelements();
+ const bool* p_i = input.buffer();
+ bool* p_o = output.buffer();
+ const unsigned* p_a = area.buffer();
+ const unsigned* p_par = parent.buffer();
+
+ for (unsigned i = 0; i < nelements; ++i)
+ {
+ if (*p_i == true)
+ *p_o = true;
+ else
+ {
+ if (*p_par == i)
+ *p_o = (*p_a != max_area);
+ else
+ *p_o = output.element(*p_par);
+ }
+ ++p_i;
+ ++p_o;
+ ++p_a;
+ ++p_par;
+ }
+ }
+
+
+
+ // 3rd pass = fg union-find
- for_all_lines(l, lines)
- if (lines(l).type() == line::Text)
- data::fill((content | lines(l).bbox()).rw(), false);
+ {
+ union_find(output, true, // in
+ parent, area, max_area // out
+ );
+ }
- typedef mln::value::rgb<5> t_rgb5;
- typedef mln::fun::v2v::rgb8_to_rgbn<5> t_rgb8_to_rgb5;
- image2d<t_rgb5>
- img_rgb5 = mln::data::transform(doc.image(), t_rgb8_to_rgb5());
- img_lbl8 =
- mln::clustering::kmean_rgb<double,5>((img_rgb5 | pw::value(content)), 3, 10, 10).unmorph_();
- data::fill((img_lbl8 | !pw::value(content)).rw(), 0u);
+ // 4th pass = cleaning fg
- mln::util::array<unsigned>
- card = mln::labeling::compute(accu::math::count<value::label_8>(),
- img_lbl8, img_lbl8, 3);
+ {
+ const unsigned nelements = input.nelements();
+ bool* p_o = output.buffer();
+ const unsigned* p_a = area.buffer();
+ const unsigned* p_par = parent.buffer();
- unsigned max = 0, bg_id = 0;
- for_all_ncomponents(c, 3)
- if (card(c) > max)
+ for (unsigned i = 0; i < nelements; ++i)
{
- max = card(c);
- bg_id = c;
+ if (*p_o == true)
+ {
+ if (*p_par == i)
+ *p_o = (*p_a > lambda);
+ else
+ *p_o = output.element(*p_par);
+ }
+ ++p_o;
+ ++p_a;
+ ++p_par;
}
+ }
+
- mln::fun::i2v::array<bool> f(4, true);
- f(0) = false;
- f(bg_id) = false;
- labeling::relabel_inplace(img_lbl8, 4, f);
+ return output;
}
+ } // end of namespace scribo::primitive::extract::internal
- component_set<L> output;
- std::cout << "Removing small elements" << std::endl;
- {
- image2d<bool> elts;
- initialize(elts, img_lbl8);
- data::fill(elts, false);
- data::fill((elts | (pw::value(img_lbl8) != pw::cst(0))).rw(), true);
- scribo::def::lbl_type nlabels;
- elts = filter::components_small(elts, c8(), nlabels, 40);
+ // FACADE
+
+ template <typename L>
+ component_set<L>
+ non_text(const document<L>& doc, unsigned nlines)
+ {
+ trace::entering("scribo::primitive::extract::non_text");
+
- output = primitive::extract::components(elts, c8(), nlabels);
+ util::timer t;
+ t.start();
+
+ mln_precondition(doc.is_valid());
+
+ mln_precondition(doc.has_line_seps());
+ mln_precondition(doc.has_text());
+
+ // FIXME: Do these images exist elsewhere?
+ image2d<bool>
+ txt = make::text_components_image(doc),
+ txtblocks = make::text_blocks_image(doc, nlines);
+
+ unsigned nbits = 5;
+ float p = 0.9998; // 0.80 <= x < 1.0
+ unsigned lambda = 1000;
+
+ // enlarge the text mask so that "not txt" does not include
+ // any text pixel
+ txt = morpho::elementary::dilation(txt, c8());
+ txt = morpho::elementary::dilation(txt, c4());
+
+ // FIXME: Make it faster?
+ data::fill((txtblocks | pw::value(txt)).rw(), false);
+
+ // Debug
+ {
+ debug::logger().log_image(debug::Special,//debug::AuxiliaryResults,
+ txt, "txt_components");
+ debug::logger().log_image(debug::Special,//debug::AuxiliaryResults,
+ txtblocks, "txt_blocks");
}
+ image2d<bool>
+ element_image = internal::learn(doc, txt, txtblocks, nbits, p);
+ element_image = internal::cleaning(element_image, lambda);
- std::cout << "Ignoring inner elements" << std::endl;
+ mln_value(L) ncomps;
+ component_set<L>
+ elements = primitive::extract::components(element_image,
+ c8(), ncomps);
+ // Debug
{
- // FIXME: We would like to use the convex hull instead of the bbox.
- internal::order_bbox<L> func(output);
- util::array<unsigned> box_ordered_comps;
- for (unsigned i = 1; i < output.nelements(); ++i)
- box_ordered_comps.append(i);
- std::sort(box_ordered_comps.hook_std_vector_().begin(),
- box_ordered_comps.hook_std_vector_().end(), func);
-
- image2d<bool> merged_elts;
- initialize(merged_elts, img_lbl8);
- data::fill(merged_elts, false);
- for (unsigned i = 0; i < box_ordered_comps.nelements(); ++i)
- {
- unsigned c = box_ordered_comps(i);
- point2d
- pminright = output(c).bbox().pmin(),
- pmaxleft = output(c).bbox().pmax();
- pminright.col() = output(c).bbox().pmax().col();
- pmaxleft.col() = output(c).bbox().pmin().col();
-
- if (merged_elts(output(c).bbox().pmin())
- && merged_elts(output(c).bbox().pmax())
- && merged_elts(pminright)
- && merged_elts(pmaxleft))
- output(c).update_tag(component::Ignored);
- else
- mln::draw::box_plain(merged_elts, output(c).bbox(), true);
- }
+ debug::logger().log_image(debug::Results,
+ elements.labeled_image(),
+ "non_text_components");
}
trace::exiting("scribo::primitive::extract::non_text");
- return output;
+ return elements;
}
# endif // ! MLN_INCLUDE_ONLY
diff --git a/scribo/scribo/primitive/extract/non_text.hh b/scribo/scribo/primitive/extract/non_text_kmean.hh
similarity index 91%
copy from scribo/scribo/primitive/extract/non_text.hh
copy to scribo/scribo/primitive/extract/non_text_kmean.hh
index 8528782..c76ce11 100644
--- a/scribo/scribo/primitive/extract/non_text.hh
+++ b/scribo/scribo/primitive/extract/non_text_kmean.hh
@@ -30,8 +30,8 @@
///
/// \fixme To be optimized!
-#ifndef SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_HH
-# define SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_HH
+#ifndef SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_KMEAN_HH
+# define SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_KMEAN_HH
# include <mln/core/image/image2d.hh>
# include <mln/core/alias/neighb2d.hh>
@@ -72,7 +72,7 @@ namespace scribo
template <typename L, typename I>
component_set<L>
- non_text(const document<L>& doc, const Image<I>& input);
+ non_text_kmean(const document<L>& doc, const Image<I>& input);
# ifndef MLN_INCLUDE_ONLY
@@ -107,9 +107,9 @@ namespace scribo
template <typename L, typename I>
component_set<L>
- non_text(const document<L>& doc, const Image<I>& input_)
+ non_text_kmean(const document<L>& doc, const Image<I>& input_)
{
- trace::entering("scribo::primitive::extract::non_text");
+ trace::entering("scribo::primitive::extract::non_text_kmean");
const I& input = exact(input_);
mln_precondition(doc.is_valid());
@@ -136,7 +136,8 @@ namespace scribo
img_rgb5 = mln::data::transform(doc.image(), t_rgb8_to_rgb5());
img_lbl8 =
- mln::clustering::kmean_rgb<double,5>((img_rgb5 | pw::value(content)), 3, 10, 10).unmorph_();
+ mln::clustering::kmean_rgb<double,5>(
+ (img_rgb5 | pw::value(content)), 3, 10, 10).unmorph_();
data::fill((img_lbl8 | !pw::value(content)).rw(), 0u);
mln::util::array<unsigned>
@@ -207,7 +208,7 @@ namespace scribo
}
}
- trace::exiting("scribo::primitive::extract::non_text");
+ trace::exiting("scribo::primitive::extract::non_text_kmean");
return output;
}
@@ -220,4 +221,4 @@ namespace scribo
} // end of namespace scribo
-#endif // ! SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_HH
+#endif // ! SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_KMEAN_HH
--
1.5.6.5
1
0