Olena-patches
Threads by month
- ----- 2025 -----
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2024 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2023 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2022 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2021 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2020 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2019 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2018 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2017 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2016 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2015 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2014 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2013 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2012 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2011 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2010 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2009 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2008 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2007 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2006 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2005 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2004 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- 9625 discussions

14 Mar '11
* tests/unit_test/Makefile.am: Add new conditional test file.
* tests/unit_test/cond_tests_magickxx_qt_tesseract_tiff,
* tests/unit_test/cond_tests_qt,
* tests/unit_test/cond_tests_qt_tesseract_tiff: Update list of files.
* tests/unit_test/unit-tests.mk: Regen.
---
scribo/ChangeLog | 12 +++
scribo/tests/unit_test/Makefile.am | 7 +-
..._tiff => cond_tests_magickxx_qt_tesseract_tiff} | 1 +
scribo/tests/unit_test/cond_tests_qt | 2 +
.../tests/unit_test/cond_tests_qt_tesseract_tiff | 2 +-
scribo/tests/unit_test/unit-tests.mk | 96 +++++++++++++++-----
6 files changed, 92 insertions(+), 28 deletions(-)
copy scribo/tests/unit_test/{cond_tests_qt_tesseract_tiff => cond_tests_magickxx_qt_tesseract_tiff} (97%)
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index cc6e861..60d1277 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,5 +1,17 @@
2011-03-14 Guillaume Lazzara <z(a)lrde.epita.fr>
+ Set unit tests dependencies.
+
+ * tests/unit_test/Makefile.am: Add new conditional test file.
+
+ * tests/unit_test/cond_tests_magickxx_qt_tesseract_tiff,
+ * tests/unit_test/cond_tests_qt,
+ * tests/unit_test/cond_tests_qt_tesseract_tiff: Update list of files.
+
+ * tests/unit_test/unit-tests.mk: Regen.
+
+2011-03-14 Guillaume Lazzara <z(a)lrde.epita.fr>
+
New debug tools.
* src/Makefile.am,
diff --git a/scribo/tests/unit_test/Makefile.am b/scribo/tests/unit_test/Makefile.am
index c88f553..e4c9e1b 100644
--- a/scribo/tests/unit_test/Makefile.am
+++ b/scribo/tests/unit_test/Makefile.am
@@ -17,9 +17,10 @@
include $(top_srcdir)/scribo/tests/tests.mk
-COND_TESTS = cond_tests_magickxx_tesseract_tiff \
- cond_tests_qt \
- cond_tests_qt_tesseract_tiff \
+COND_TESTS = cond_tests_magickxx_tesseract_tiff \
+ cond_tests_magickxx_qt_tesseract_tiff \
+ cond_tests_qt \
+ cond_tests_qt_tesseract_tiff \
cond_tests_tesseract_tiff
EXTRA_DIST = disabled_tests \
diff --git a/scribo/tests/unit_test/cond_tests_qt_tesseract_tiff b/scribo/tests/unit_test/cond_tests_magickxx_qt_tesseract_tiff
similarity index 97%
copy from scribo/tests/unit_test/cond_tests_qt_tesseract_tiff
copy to scribo/tests/unit_test/cond_tests_magickxx_qt_tesseract_tiff
index 7dfe76d..f5840ef 100644
--- a/scribo/tests/unit_test/cond_tests_qt_tesseract_tiff
+++ b/scribo/tests/unit_test/cond_tests_magickxx_qt_tesseract_tiff
@@ -1 +1,2 @@
scribo/toolchain/nepomuk/text_extraction.hh
+
diff --git a/scribo/tests/unit_test/cond_tests_qt b/scribo/tests/unit_test/cond_tests_qt
index 577f9e6..f7bc42e 100644
--- a/scribo/tests/unit_test/cond_tests_qt
+++ b/scribo/tests/unit_test/cond_tests_qt
@@ -1,2 +1,4 @@
+scribo/convert/from_base64.hh
scribo/convert/from_qimage.hh
+scribo/io/xml/internal/full_xml_visitor.hh
scribo/io/xml/load.hh
diff --git a/scribo/tests/unit_test/cond_tests_qt_tesseract_tiff b/scribo/tests/unit_test/cond_tests_qt_tesseract_tiff
index 7dfe76d..8b13789 100644
--- a/scribo/tests/unit_test/cond_tests_qt_tesseract_tiff
+++ b/scribo/tests/unit_test/cond_tests_qt_tesseract_tiff
@@ -1 +1 @@
-scribo/toolchain/nepomuk/text_extraction.hh
+
diff --git a/scribo/tests/unit_test/unit-tests.mk b/scribo/tests/unit_test/unit-tests.mk
index 71b0b5b..b0f5982 100644
--- a/scribo/tests/unit_test/unit-tests.mk
+++ b/scribo/tests/unit_test/unit-tests.mk
@@ -4,12 +4,28 @@ check_PROGRAMS =
# Starting a conditional unit test list.
if HAVE_MAGICKXX
+if HAVE_QT
+if HAVE_TESSERACT
+if HAVE_TIFF
+check_PROGRAMS += \
+scribo_toolchain_nepomuk_text_extraction
+
+scribo_toolchain_nepomuk_text_extraction_CPPFLAGS= ${TIFF_CPPFLAGS} -DHAVE_TIFF ${TESSERACT_CPPFLAGS} -DHAVE_TESSERACT ${QT_CPPFLAGS} -DHAVE_QT ${MAGICKXX_CPPFLAGS} -DHAVE_MAGICKXX ${AM_CPPFLAGS}
+scribo_toolchain_nepomuk_text_extraction_LDFLAGS= ${TIFF_LDFLAGS} ${TESSERACT_LDFLAGS} ${QT_LDFLAGS} ${MAGICKXX_LDFLAGS} ${AM_LDFLAGS}
+scribo_toolchain_nepomuk_text_extraction_SOURCES = scribo_toolchain_nepomuk_text_extraction.cc
+endif HAVE_TIFF
+endif HAVE_TESSERACT
+endif HAVE_QT
+endif HAVE_MAGICKXX
+
+# Starting a conditional unit test list.
+if HAVE_MAGICKXX
if HAVE_TESSERACT
if HAVE_TIFF
check_PROGRAMS += \
scribo_toolchain_internal_content_in_doc_functor
-scribo_toolchain_internal_content_in_doc_functor_CPPFLAGS= ${TIFF_CPPFLAGS} ${TESSERACT_CPPFLAGS} ${MAGICKXX_CPPFLAGS} ${AM_CPPFLAGS}
+scribo_toolchain_internal_content_in_doc_functor_CPPFLAGS= ${TIFF_CPPFLAGS} -DHAVE_TIFF ${TESSERACT_CPPFLAGS} -DHAVE_TESSERACT ${MAGICKXX_CPPFLAGS} -DHAVE_MAGICKXX ${AM_CPPFLAGS}
scribo_toolchain_internal_content_in_doc_functor_LDFLAGS= ${TIFF_LDFLAGS} ${TESSERACT_LDFLAGS} ${MAGICKXX_LDFLAGS} ${AM_LDFLAGS}
scribo_toolchain_internal_content_in_doc_functor_SOURCES = scribo_toolchain_internal_content_in_doc_functor.cc
endif HAVE_TIFF
@@ -19,13 +35,21 @@ endif HAVE_MAGICKXX
# Starting a conditional unit test list.
if HAVE_QT
check_PROGRAMS += \
+scribo_convert_from_base64 \
scribo_convert_from_qimage \
+scribo_io_xml_internal_full_xml_visitor \
scribo_io_xml_load
-scribo_convert_from_qimage_CPPFLAGS= ${QT_CPPFLAGS} ${AM_CPPFLAGS}
+scribo_convert_from_base64_CPPFLAGS= ${QT_CPPFLAGS} -DHAVE_QT ${AM_CPPFLAGS}
+scribo_convert_from_base64_LDFLAGS= ${QT_LDFLAGS} ${AM_LDFLAGS}
+scribo_convert_from_base64_SOURCES = scribo_convert_from_base64.cc
+scribo_convert_from_qimage_CPPFLAGS= ${QT_CPPFLAGS} -DHAVE_QT ${AM_CPPFLAGS}
scribo_convert_from_qimage_LDFLAGS= ${QT_LDFLAGS} ${AM_LDFLAGS}
scribo_convert_from_qimage_SOURCES = scribo_convert_from_qimage.cc
-scribo_io_xml_load_CPPFLAGS= ${QT_CPPFLAGS} ${AM_CPPFLAGS}
+scribo_io_xml_internal_full_xml_visitor_CPPFLAGS= ${QT_CPPFLAGS} -DHAVE_QT ${AM_CPPFLAGS}
+scribo_io_xml_internal_full_xml_visitor_LDFLAGS= ${QT_LDFLAGS} ${AM_LDFLAGS}
+scribo_io_xml_internal_full_xml_visitor_SOURCES = scribo_io_xml_internal_full_xml_visitor.cc
+scribo_io_xml_load_CPPFLAGS= ${QT_CPPFLAGS} -DHAVE_QT ${AM_CPPFLAGS}
scribo_io_xml_load_LDFLAGS= ${QT_LDFLAGS} ${AM_LDFLAGS}
scribo_io_xml_load_SOURCES = scribo_io_xml_load.cc
endif HAVE_QT
@@ -34,12 +58,8 @@ endif HAVE_QT
if HAVE_QT
if HAVE_TESSERACT
if HAVE_TIFF
-check_PROGRAMS += \
-scribo_toolchain_nepomuk_text_extraction
+check_PROGRAMS +=
-scribo_toolchain_nepomuk_text_extraction_CPPFLAGS= ${TIFF_CPPFLAGS} ${TESSERACT_CPPFLAGS} ${QT_CPPFLAGS} ${AM_CPPFLAGS}
-scribo_toolchain_nepomuk_text_extraction_LDFLAGS= ${TIFF_LDFLAGS} ${TESSERACT_LDFLAGS} ${QT_LDFLAGS} ${AM_LDFLAGS}
-scribo_toolchain_nepomuk_text_extraction_SOURCES = scribo_toolchain_nepomuk_text_extraction.cc
endif HAVE_TIFF
endif HAVE_TESSERACT
endif HAVE_QT
@@ -53,16 +73,16 @@ scribo_toolchain_internal_text_in_doc_functor \
scribo_toolchain_content_in_doc \
scribo_toolchain_text_in_doc
-scribo_text_recognition_CPPFLAGS= ${TIFF_CPPFLAGS} ${TESSERACT_CPPFLAGS} ${AM_CPPFLAGS}
+scribo_text_recognition_CPPFLAGS= ${TIFF_CPPFLAGS} -DHAVE_TIFF ${TESSERACT_CPPFLAGS} -DHAVE_TESSERACT ${AM_CPPFLAGS}
scribo_text_recognition_LDFLAGS= ${TIFF_LDFLAGS} ${TESSERACT_LDFLAGS} ${AM_LDFLAGS}
scribo_text_recognition_SOURCES = scribo_text_recognition.cc
-scribo_toolchain_internal_text_in_doc_functor_CPPFLAGS= ${TIFF_CPPFLAGS} ${TESSERACT_CPPFLAGS} ${AM_CPPFLAGS}
+scribo_toolchain_internal_text_in_doc_functor_CPPFLAGS= ${TIFF_CPPFLAGS} -DHAVE_TIFF ${TESSERACT_CPPFLAGS} -DHAVE_TESSERACT ${AM_CPPFLAGS}
scribo_toolchain_internal_text_in_doc_functor_LDFLAGS= ${TIFF_LDFLAGS} ${TESSERACT_LDFLAGS} ${AM_LDFLAGS}
scribo_toolchain_internal_text_in_doc_functor_SOURCES = scribo_toolchain_internal_text_in_doc_functor.cc
-scribo_toolchain_content_in_doc_CPPFLAGS= ${TIFF_CPPFLAGS} ${TESSERACT_CPPFLAGS} ${AM_CPPFLAGS}
+scribo_toolchain_content_in_doc_CPPFLAGS= ${TIFF_CPPFLAGS} -DHAVE_TIFF ${TESSERACT_CPPFLAGS} -DHAVE_TESSERACT ${AM_CPPFLAGS}
scribo_toolchain_content_in_doc_LDFLAGS= ${TIFF_LDFLAGS} ${TESSERACT_LDFLAGS} ${AM_LDFLAGS}
scribo_toolchain_content_in_doc_SOURCES = scribo_toolchain_content_in_doc.cc
-scribo_toolchain_text_in_doc_CPPFLAGS= ${TIFF_CPPFLAGS} ${TESSERACT_CPPFLAGS} ${AM_CPPFLAGS}
+scribo_toolchain_text_in_doc_CPPFLAGS= ${TIFF_CPPFLAGS} -DHAVE_TIFF ${TESSERACT_CPPFLAGS} -DHAVE_TESSERACT ${AM_CPPFLAGS}
scribo_toolchain_text_in_doc_LDFLAGS= ${TIFF_LDFLAGS} ${TESSERACT_LDFLAGS} ${AM_LDFLAGS}
scribo_toolchain_text_in_doc_SOURCES = scribo_toolchain_text_in_doc.cc
endif HAVE_TIFF
@@ -83,7 +103,6 @@ scribo_binarization_sauvola_ms \
scribo_binarization_sauvola_ms_split \
scribo_binarization_sauvola_threshold_image \
scribo_canvas_integral_browsing \
-scribo_convert_from_base64 \
scribo_convert_to_base64 \
scribo_core_all \
scribo_core_central_sites \
@@ -97,7 +116,7 @@ scribo_core_def_lbl_type \
scribo_core_document \
scribo_core_erase_objects \
scribo_core_init_integral_image \
-scribo_core_internal_doc_xml_serializer \
+scribo_core_internal_doc_serializer \
scribo_core_line_info \
scribo_core_line_links \
scribo_core_line_set \
@@ -112,25 +131,27 @@ scribo_core_tag_line \
scribo_debug_alignment_decision_image \
scribo_debug_all \
scribo_debug_bboxes_enlarged_image \
+scribo_debug_bboxes_image \
scribo_debug_char_space_image \
scribo_debug_decision_image \
scribo_debug_highlight_text_area \
scribo_debug_line_info_image \
+scribo_debug_linked_bboxes_image \
scribo_debug_links_decision_image \
+scribo_debug_links_image \
+scribo_debug_logger \
scribo_debug_looks_like_a_text_line_image \
scribo_debug_mean_and_base_lines_image \
-scribo_debug_save_bboxes_image \
scribo_debug_save_comp_diff \
scribo_debug_save_label_image \
-scribo_debug_save_linked_bboxes_image \
scribo_debug_save_table_image \
-scribo_debug_several_links_decision_image \
scribo_debug_text_areas_image \
scribo_debug_usage \
scribo_draw_all \
scribo_draw_bounding_box_links \
scribo_draw_bounding_boxes \
scribo_draw_groups_bboxes \
+scribo_estim_font_color \
scribo_estim_object_groups_v_thickness \
scribo_filter_all \
scribo_filter_common_objects_photo \
@@ -142,6 +163,7 @@ scribo_filter_object_groups_size_ratio \
scribo_filter_object_groups_small \
scribo_filter_object_groups_v_thickness \
scribo_filter_object_groups_with_holes \
+scribo_filter_object_links_aligned \
scribo_filter_object_links_bbox_h_ratio \
scribo_filter_object_links_bbox_overlap \
scribo_filter_object_links_bbox_ratio \
@@ -165,16 +187,25 @@ scribo_filter_objects_with_holes \
scribo_fun_v2b_label_to_bool \
scribo_fun_v2b_objects_large_filter \
scribo_fun_v2b_objects_small_filter \
+scribo_fun_v2v_highlight \
+scribo_io_img_internal_debug_img_visitor \
+scribo_io_img_internal_draw_edges \
+scribo_io_img_internal_full_img_visitor \
+scribo_io_img_internal_non_text_img_visitor \
+scribo_io_img_internal_text_img_visitor \
+scribo_io_img_save \
scribo_io_text_boxes_save \
scribo_io_xml_internal_extended_page_xml_visitor \
-scribo_io_xml_internal_full_xml_visitor \
scribo_io_xml_internal_page_xml_visitor \
scribo_io_xml_internal_print_box_coords \
+scribo_io_xml_internal_print_image_coords \
scribo_io_xml_internal_print_page_preambule \
scribo_io_xml_save \
scribo_make_all \
scribo_make_debug_filename \
scribo_make_influence_zone_graph \
+scribo_make_text_blocks_image \
+scribo_make_text_components_image \
scribo_postprocessing_all \
scribo_postprocessing_fill_object_holes \
scribo_preprocessing_all \
@@ -194,6 +225,7 @@ scribo_primitive_extract_canvas \
scribo_primitive_extract_cells \
scribo_primitive_extract_components \
scribo_primitive_extract_horizontal_separators \
+scribo_primitive_extract_internal_union \
scribo_primitive_extract_lines_discontinued \
scribo_primitive_extract_lines_h_discontinued \
scribo_primitive_extract_lines_h_pattern \
@@ -208,6 +240,7 @@ scribo_primitive_extract_lines_v_single \
scribo_primitive_extract_lines_v_thick \
scribo_primitive_extract_lines_v_thick_and_single \
scribo_primitive_extract_non_text \
+scribo_primitive_extract_non_text_kmean \
scribo_primitive_extract_separators \
scribo_primitive_extract_separators_nonvisible \
scribo_primitive_extract_vertical_separators \
@@ -236,6 +269,7 @@ scribo_primitive_link_internal_find_link \
scribo_primitive_link_internal_link_functor_base \
scribo_primitive_link_internal_link_several_dmax_base \
scribo_primitive_link_internal_link_single_dmax_base \
+scribo_primitive_link_internal_link_single_dmax_ratio_aligned_base \
scribo_primitive_link_internal_link_single_dmax_ratio_base \
scribo_primitive_link_merge_double_link \
scribo_primitive_link_with_graph \
@@ -300,7 +334,6 @@ scribo_binarization_sauvola_ms_SOURCES = scribo_binarization_sauvola_ms.cc
scribo_binarization_sauvola_ms_split_SOURCES = scribo_binarization_sauvola_ms_split.cc
scribo_binarization_sauvola_threshold_image_SOURCES = scribo_binarization_sauvola_threshold_image.cc
scribo_canvas_integral_browsing_SOURCES = scribo_canvas_integral_browsing.cc
-scribo_convert_from_base64_SOURCES = scribo_convert_from_base64.cc
scribo_convert_to_base64_SOURCES = scribo_convert_to_base64.cc
scribo_core_all_SOURCES = scribo_core_all.cc
scribo_core_central_sites_SOURCES = scribo_core_central_sites.cc
@@ -314,7 +347,7 @@ scribo_core_def_lbl_type_SOURCES = scribo_core_def_lbl_type.cc
scribo_core_document_SOURCES = scribo_core_document.cc
scribo_core_erase_objects_SOURCES = scribo_core_erase_objects.cc
scribo_core_init_integral_image_SOURCES = scribo_core_init_integral_image.cc
-scribo_core_internal_doc_xml_serializer_SOURCES = scribo_core_internal_doc_xml_serializer.cc
+scribo_core_internal_doc_serializer_SOURCES = scribo_core_internal_doc_serializer.cc
scribo_core_line_info_SOURCES = scribo_core_line_info.cc
scribo_core_line_links_SOURCES = scribo_core_line_links.cc
scribo_core_line_set_SOURCES = scribo_core_line_set.cc
@@ -329,25 +362,27 @@ scribo_core_tag_line_SOURCES = scribo_core_tag_line.cc
scribo_debug_alignment_decision_image_SOURCES = scribo_debug_alignment_decision_image.cc
scribo_debug_all_SOURCES = scribo_debug_all.cc
scribo_debug_bboxes_enlarged_image_SOURCES = scribo_debug_bboxes_enlarged_image.cc
+scribo_debug_bboxes_image_SOURCES = scribo_debug_bboxes_image.cc
scribo_debug_char_space_image_SOURCES = scribo_debug_char_space_image.cc
scribo_debug_decision_image_SOURCES = scribo_debug_decision_image.cc
scribo_debug_highlight_text_area_SOURCES = scribo_debug_highlight_text_area.cc
scribo_debug_line_info_image_SOURCES = scribo_debug_line_info_image.cc
+scribo_debug_linked_bboxes_image_SOURCES = scribo_debug_linked_bboxes_image.cc
scribo_debug_links_decision_image_SOURCES = scribo_debug_links_decision_image.cc
+scribo_debug_links_image_SOURCES = scribo_debug_links_image.cc
+scribo_debug_logger_SOURCES = scribo_debug_logger.cc
scribo_debug_looks_like_a_text_line_image_SOURCES = scribo_debug_looks_like_a_text_line_image.cc
scribo_debug_mean_and_base_lines_image_SOURCES = scribo_debug_mean_and_base_lines_image.cc
-scribo_debug_save_bboxes_image_SOURCES = scribo_debug_save_bboxes_image.cc
scribo_debug_save_comp_diff_SOURCES = scribo_debug_save_comp_diff.cc
scribo_debug_save_label_image_SOURCES = scribo_debug_save_label_image.cc
-scribo_debug_save_linked_bboxes_image_SOURCES = scribo_debug_save_linked_bboxes_image.cc
scribo_debug_save_table_image_SOURCES = scribo_debug_save_table_image.cc
-scribo_debug_several_links_decision_image_SOURCES = scribo_debug_several_links_decision_image.cc
scribo_debug_text_areas_image_SOURCES = scribo_debug_text_areas_image.cc
scribo_debug_usage_SOURCES = scribo_debug_usage.cc
scribo_draw_all_SOURCES = scribo_draw_all.cc
scribo_draw_bounding_box_links_SOURCES = scribo_draw_bounding_box_links.cc
scribo_draw_bounding_boxes_SOURCES = scribo_draw_bounding_boxes.cc
scribo_draw_groups_bboxes_SOURCES = scribo_draw_groups_bboxes.cc
+scribo_estim_font_color_SOURCES = scribo_estim_font_color.cc
scribo_estim_object_groups_v_thickness_SOURCES = scribo_estim_object_groups_v_thickness.cc
scribo_filter_all_SOURCES = scribo_filter_all.cc
scribo_filter_common_objects_photo_SOURCES = scribo_filter_common_objects_photo.cc
@@ -359,6 +394,7 @@ scribo_filter_object_groups_size_ratio_SOURCES = scribo_filter_object_groups_siz
scribo_filter_object_groups_small_SOURCES = scribo_filter_object_groups_small.cc
scribo_filter_object_groups_v_thickness_SOURCES = scribo_filter_object_groups_v_thickness.cc
scribo_filter_object_groups_with_holes_SOURCES = scribo_filter_object_groups_with_holes.cc
+scribo_filter_object_links_aligned_SOURCES = scribo_filter_object_links_aligned.cc
scribo_filter_object_links_bbox_h_ratio_SOURCES = scribo_filter_object_links_bbox_h_ratio.cc
scribo_filter_object_links_bbox_overlap_SOURCES = scribo_filter_object_links_bbox_overlap.cc
scribo_filter_object_links_bbox_ratio_SOURCES = scribo_filter_object_links_bbox_ratio.cc
@@ -382,16 +418,25 @@ scribo_filter_objects_with_holes_SOURCES = scribo_filter_objects_with_holes.cc
scribo_fun_v2b_label_to_bool_SOURCES = scribo_fun_v2b_label_to_bool.cc
scribo_fun_v2b_objects_large_filter_SOURCES = scribo_fun_v2b_objects_large_filter.cc
scribo_fun_v2b_objects_small_filter_SOURCES = scribo_fun_v2b_objects_small_filter.cc
+scribo_fun_v2v_highlight_SOURCES = scribo_fun_v2v_highlight.cc
+scribo_io_img_internal_debug_img_visitor_SOURCES = scribo_io_img_internal_debug_img_visitor.cc
+scribo_io_img_internal_draw_edges_SOURCES = scribo_io_img_internal_draw_edges.cc
+scribo_io_img_internal_full_img_visitor_SOURCES = scribo_io_img_internal_full_img_visitor.cc
+scribo_io_img_internal_non_text_img_visitor_SOURCES = scribo_io_img_internal_non_text_img_visitor.cc
+scribo_io_img_internal_text_img_visitor_SOURCES = scribo_io_img_internal_text_img_visitor.cc
+scribo_io_img_save_SOURCES = scribo_io_img_save.cc
scribo_io_text_boxes_save_SOURCES = scribo_io_text_boxes_save.cc
scribo_io_xml_internal_extended_page_xml_visitor_SOURCES = scribo_io_xml_internal_extended_page_xml_visitor.cc
-scribo_io_xml_internal_full_xml_visitor_SOURCES = scribo_io_xml_internal_full_xml_visitor.cc
scribo_io_xml_internal_page_xml_visitor_SOURCES = scribo_io_xml_internal_page_xml_visitor.cc
scribo_io_xml_internal_print_box_coords_SOURCES = scribo_io_xml_internal_print_box_coords.cc
+scribo_io_xml_internal_print_image_coords_SOURCES = scribo_io_xml_internal_print_image_coords.cc
scribo_io_xml_internal_print_page_preambule_SOURCES = scribo_io_xml_internal_print_page_preambule.cc
scribo_io_xml_save_SOURCES = scribo_io_xml_save.cc
scribo_make_all_SOURCES = scribo_make_all.cc
scribo_make_debug_filename_SOURCES = scribo_make_debug_filename.cc
scribo_make_influence_zone_graph_SOURCES = scribo_make_influence_zone_graph.cc
+scribo_make_text_blocks_image_SOURCES = scribo_make_text_blocks_image.cc
+scribo_make_text_components_image_SOURCES = scribo_make_text_components_image.cc
scribo_postprocessing_all_SOURCES = scribo_postprocessing_all.cc
scribo_postprocessing_fill_object_holes_SOURCES = scribo_postprocessing_fill_object_holes.cc
scribo_preprocessing_all_SOURCES = scribo_preprocessing_all.cc
@@ -411,6 +456,7 @@ scribo_primitive_extract_canvas_SOURCES = scribo_primitive_extract_canvas.cc
scribo_primitive_extract_cells_SOURCES = scribo_primitive_extract_cells.cc
scribo_primitive_extract_components_SOURCES = scribo_primitive_extract_components.cc
scribo_primitive_extract_horizontal_separators_SOURCES = scribo_primitive_extract_horizontal_separators.cc
+scribo_primitive_extract_internal_union_SOURCES = scribo_primitive_extract_internal_union.cc
scribo_primitive_extract_lines_discontinued_SOURCES = scribo_primitive_extract_lines_discontinued.cc
scribo_primitive_extract_lines_h_discontinued_SOURCES = scribo_primitive_extract_lines_h_discontinued.cc
scribo_primitive_extract_lines_h_pattern_SOURCES = scribo_primitive_extract_lines_h_pattern.cc
@@ -425,6 +471,7 @@ scribo_primitive_extract_lines_v_single_SOURCES = scribo_primitive_extract_lines
scribo_primitive_extract_lines_v_thick_SOURCES = scribo_primitive_extract_lines_v_thick.cc
scribo_primitive_extract_lines_v_thick_and_single_SOURCES = scribo_primitive_extract_lines_v_thick_and_single.cc
scribo_primitive_extract_non_text_SOURCES = scribo_primitive_extract_non_text.cc
+scribo_primitive_extract_non_text_kmean_SOURCES = scribo_primitive_extract_non_text_kmean.cc
scribo_primitive_extract_separators_SOURCES = scribo_primitive_extract_separators.cc
scribo_primitive_extract_separators_nonvisible_SOURCES = scribo_primitive_extract_separators_nonvisible.cc
scribo_primitive_extract_vertical_separators_SOURCES = scribo_primitive_extract_vertical_separators.cc
@@ -453,6 +500,7 @@ scribo_primitive_link_internal_find_link_SOURCES = scribo_primitive_link_interna
scribo_primitive_link_internal_link_functor_base_SOURCES = scribo_primitive_link_internal_link_functor_base.cc
scribo_primitive_link_internal_link_several_dmax_base_SOURCES = scribo_primitive_link_internal_link_several_dmax_base.cc
scribo_primitive_link_internal_link_single_dmax_base_SOURCES = scribo_primitive_link_internal_link_single_dmax_base.cc
+scribo_primitive_link_internal_link_single_dmax_ratio_aligned_base_SOURCES = scribo_primitive_link_internal_link_single_dmax_ratio_aligned_base.cc
scribo_primitive_link_internal_link_single_dmax_ratio_base_SOURCES = scribo_primitive_link_internal_link_single_dmax_ratio_base.cc
scribo_primitive_link_merge_double_link_SOURCES = scribo_primitive_link_merge_double_link.cc
scribo_primitive_link_with_graph_SOURCES = scribo_primitive_link_with_graph.cc
--
1.5.6.5
1
0

14 Mar '11
* tests/unit_test/Makefile.am: Add new conditional test file.
* tests/unit_test/cond_tests_magickxx_qt_tesseract_tiff,
* tests/unit_test/cond_tests_qt,
* tests/unit_test/cond_tests_qt_tesseract_tiff: Update list of files.
* tests/unit_test/unit-tests.mk: Regen.
---
scribo/ChangeLog | 12 +++
scribo/tests/unit_test/Makefile.am | 7 +-
..._tiff => cond_tests_magickxx_qt_tesseract_tiff} | 1 +
scribo/tests/unit_test/cond_tests_qt | 2 +
.../tests/unit_test/cond_tests_qt_tesseract_tiff | 2 +-
scribo/tests/unit_test/unit-tests.mk | 96 +++++++++++++++-----
6 files changed, 92 insertions(+), 28 deletions(-)
copy scribo/tests/unit_test/{cond_tests_qt_tesseract_tiff => cond_tests_magickxx_qt_tesseract_tiff} (97%)
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index 23ecde3..e79e597 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,5 +1,17 @@
2011-03-14 Guillaume Lazzara <z(a)lrde.epita.fr>
+ Set unit tests dependencies.
+
+ * tests/unit_test/Makefile.am: Add new conditional test file.
+
+ * tests/unit_test/cond_tests_magickxx_qt_tesseract_tiff,
+ * tests/unit_test/cond_tests_qt,
+ * tests/unit_test/cond_tests_qt_tesseract_tiff: Update list of files.
+
+ * tests/unit_test/unit-tests.mk: Regen.
+
+2011-03-14 Guillaume Lazzara <z(a)lrde.epita.fr>
+
New debug tools.
* src/Makefile.am,
diff --git a/scribo/tests/unit_test/Makefile.am b/scribo/tests/unit_test/Makefile.am
index c88f553..e4c9e1b 100644
--- a/scribo/tests/unit_test/Makefile.am
+++ b/scribo/tests/unit_test/Makefile.am
@@ -17,9 +17,10 @@
include $(top_srcdir)/scribo/tests/tests.mk
-COND_TESTS = cond_tests_magickxx_tesseract_tiff \
- cond_tests_qt \
- cond_tests_qt_tesseract_tiff \
+COND_TESTS = cond_tests_magickxx_tesseract_tiff \
+ cond_tests_magickxx_qt_tesseract_tiff \
+ cond_tests_qt \
+ cond_tests_qt_tesseract_tiff \
cond_tests_tesseract_tiff
EXTRA_DIST = disabled_tests \
diff --git a/scribo/tests/unit_test/cond_tests_qt_tesseract_tiff b/scribo/tests/unit_test/cond_tests_magickxx_qt_tesseract_tiff
similarity index 97%
copy from scribo/tests/unit_test/cond_tests_qt_tesseract_tiff
copy to scribo/tests/unit_test/cond_tests_magickxx_qt_tesseract_tiff
index 7dfe76d..f5840ef 100644
--- a/scribo/tests/unit_test/cond_tests_qt_tesseract_tiff
+++ b/scribo/tests/unit_test/cond_tests_magickxx_qt_tesseract_tiff
@@ -1 +1,2 @@
scribo/toolchain/nepomuk/text_extraction.hh
+
diff --git a/scribo/tests/unit_test/cond_tests_qt b/scribo/tests/unit_test/cond_tests_qt
index 577f9e6..f7bc42e 100644
--- a/scribo/tests/unit_test/cond_tests_qt
+++ b/scribo/tests/unit_test/cond_tests_qt
@@ -1,2 +1,4 @@
+scribo/convert/from_base64.hh
scribo/convert/from_qimage.hh
+scribo/io/xml/internal/full_xml_visitor.hh
scribo/io/xml/load.hh
diff --git a/scribo/tests/unit_test/cond_tests_qt_tesseract_tiff b/scribo/tests/unit_test/cond_tests_qt_tesseract_tiff
index 7dfe76d..8b13789 100644
--- a/scribo/tests/unit_test/cond_tests_qt_tesseract_tiff
+++ b/scribo/tests/unit_test/cond_tests_qt_tesseract_tiff
@@ -1 +1 @@
-scribo/toolchain/nepomuk/text_extraction.hh
+
diff --git a/scribo/tests/unit_test/unit-tests.mk b/scribo/tests/unit_test/unit-tests.mk
index 71b0b5b..b0f5982 100644
--- a/scribo/tests/unit_test/unit-tests.mk
+++ b/scribo/tests/unit_test/unit-tests.mk
@@ -4,12 +4,28 @@ check_PROGRAMS =
# Starting a conditional unit test list.
if HAVE_MAGICKXX
+if HAVE_QT
+if HAVE_TESSERACT
+if HAVE_TIFF
+check_PROGRAMS += \
+scribo_toolchain_nepomuk_text_extraction
+
+scribo_toolchain_nepomuk_text_extraction_CPPFLAGS= ${TIFF_CPPFLAGS} -DHAVE_TIFF ${TESSERACT_CPPFLAGS} -DHAVE_TESSERACT ${QT_CPPFLAGS} -DHAVE_QT ${MAGICKXX_CPPFLAGS} -DHAVE_MAGICKXX ${AM_CPPFLAGS}
+scribo_toolchain_nepomuk_text_extraction_LDFLAGS= ${TIFF_LDFLAGS} ${TESSERACT_LDFLAGS} ${QT_LDFLAGS} ${MAGICKXX_LDFLAGS} ${AM_LDFLAGS}
+scribo_toolchain_nepomuk_text_extraction_SOURCES = scribo_toolchain_nepomuk_text_extraction.cc
+endif HAVE_TIFF
+endif HAVE_TESSERACT
+endif HAVE_QT
+endif HAVE_MAGICKXX
+
+# Starting a conditional unit test list.
+if HAVE_MAGICKXX
if HAVE_TESSERACT
if HAVE_TIFF
check_PROGRAMS += \
scribo_toolchain_internal_content_in_doc_functor
-scribo_toolchain_internal_content_in_doc_functor_CPPFLAGS= ${TIFF_CPPFLAGS} ${TESSERACT_CPPFLAGS} ${MAGICKXX_CPPFLAGS} ${AM_CPPFLAGS}
+scribo_toolchain_internal_content_in_doc_functor_CPPFLAGS= ${TIFF_CPPFLAGS} -DHAVE_TIFF ${TESSERACT_CPPFLAGS} -DHAVE_TESSERACT ${MAGICKXX_CPPFLAGS} -DHAVE_MAGICKXX ${AM_CPPFLAGS}
scribo_toolchain_internal_content_in_doc_functor_LDFLAGS= ${TIFF_LDFLAGS} ${TESSERACT_LDFLAGS} ${MAGICKXX_LDFLAGS} ${AM_LDFLAGS}
scribo_toolchain_internal_content_in_doc_functor_SOURCES = scribo_toolchain_internal_content_in_doc_functor.cc
endif HAVE_TIFF
@@ -19,13 +35,21 @@ endif HAVE_MAGICKXX
# Starting a conditional unit test list.
if HAVE_QT
check_PROGRAMS += \
+scribo_convert_from_base64 \
scribo_convert_from_qimage \
+scribo_io_xml_internal_full_xml_visitor \
scribo_io_xml_load
-scribo_convert_from_qimage_CPPFLAGS= ${QT_CPPFLAGS} ${AM_CPPFLAGS}
+scribo_convert_from_base64_CPPFLAGS= ${QT_CPPFLAGS} -DHAVE_QT ${AM_CPPFLAGS}
+scribo_convert_from_base64_LDFLAGS= ${QT_LDFLAGS} ${AM_LDFLAGS}
+scribo_convert_from_base64_SOURCES = scribo_convert_from_base64.cc
+scribo_convert_from_qimage_CPPFLAGS= ${QT_CPPFLAGS} -DHAVE_QT ${AM_CPPFLAGS}
scribo_convert_from_qimage_LDFLAGS= ${QT_LDFLAGS} ${AM_LDFLAGS}
scribo_convert_from_qimage_SOURCES = scribo_convert_from_qimage.cc
-scribo_io_xml_load_CPPFLAGS= ${QT_CPPFLAGS} ${AM_CPPFLAGS}
+scribo_io_xml_internal_full_xml_visitor_CPPFLAGS= ${QT_CPPFLAGS} -DHAVE_QT ${AM_CPPFLAGS}
+scribo_io_xml_internal_full_xml_visitor_LDFLAGS= ${QT_LDFLAGS} ${AM_LDFLAGS}
+scribo_io_xml_internal_full_xml_visitor_SOURCES = scribo_io_xml_internal_full_xml_visitor.cc
+scribo_io_xml_load_CPPFLAGS= ${QT_CPPFLAGS} -DHAVE_QT ${AM_CPPFLAGS}
scribo_io_xml_load_LDFLAGS= ${QT_LDFLAGS} ${AM_LDFLAGS}
scribo_io_xml_load_SOURCES = scribo_io_xml_load.cc
endif HAVE_QT
@@ -34,12 +58,8 @@ endif HAVE_QT
if HAVE_QT
if HAVE_TESSERACT
if HAVE_TIFF
-check_PROGRAMS += \
-scribo_toolchain_nepomuk_text_extraction
+check_PROGRAMS +=
-scribo_toolchain_nepomuk_text_extraction_CPPFLAGS= ${TIFF_CPPFLAGS} ${TESSERACT_CPPFLAGS} ${QT_CPPFLAGS} ${AM_CPPFLAGS}
-scribo_toolchain_nepomuk_text_extraction_LDFLAGS= ${TIFF_LDFLAGS} ${TESSERACT_LDFLAGS} ${QT_LDFLAGS} ${AM_LDFLAGS}
-scribo_toolchain_nepomuk_text_extraction_SOURCES = scribo_toolchain_nepomuk_text_extraction.cc
endif HAVE_TIFF
endif HAVE_TESSERACT
endif HAVE_QT
@@ -53,16 +73,16 @@ scribo_toolchain_internal_text_in_doc_functor \
scribo_toolchain_content_in_doc \
scribo_toolchain_text_in_doc
-scribo_text_recognition_CPPFLAGS= ${TIFF_CPPFLAGS} ${TESSERACT_CPPFLAGS} ${AM_CPPFLAGS}
+scribo_text_recognition_CPPFLAGS= ${TIFF_CPPFLAGS} -DHAVE_TIFF ${TESSERACT_CPPFLAGS} -DHAVE_TESSERACT ${AM_CPPFLAGS}
scribo_text_recognition_LDFLAGS= ${TIFF_LDFLAGS} ${TESSERACT_LDFLAGS} ${AM_LDFLAGS}
scribo_text_recognition_SOURCES = scribo_text_recognition.cc
-scribo_toolchain_internal_text_in_doc_functor_CPPFLAGS= ${TIFF_CPPFLAGS} ${TESSERACT_CPPFLAGS} ${AM_CPPFLAGS}
+scribo_toolchain_internal_text_in_doc_functor_CPPFLAGS= ${TIFF_CPPFLAGS} -DHAVE_TIFF ${TESSERACT_CPPFLAGS} -DHAVE_TESSERACT ${AM_CPPFLAGS}
scribo_toolchain_internal_text_in_doc_functor_LDFLAGS= ${TIFF_LDFLAGS} ${TESSERACT_LDFLAGS} ${AM_LDFLAGS}
scribo_toolchain_internal_text_in_doc_functor_SOURCES = scribo_toolchain_internal_text_in_doc_functor.cc
-scribo_toolchain_content_in_doc_CPPFLAGS= ${TIFF_CPPFLAGS} ${TESSERACT_CPPFLAGS} ${AM_CPPFLAGS}
+scribo_toolchain_content_in_doc_CPPFLAGS= ${TIFF_CPPFLAGS} -DHAVE_TIFF ${TESSERACT_CPPFLAGS} -DHAVE_TESSERACT ${AM_CPPFLAGS}
scribo_toolchain_content_in_doc_LDFLAGS= ${TIFF_LDFLAGS} ${TESSERACT_LDFLAGS} ${AM_LDFLAGS}
scribo_toolchain_content_in_doc_SOURCES = scribo_toolchain_content_in_doc.cc
-scribo_toolchain_text_in_doc_CPPFLAGS= ${TIFF_CPPFLAGS} ${TESSERACT_CPPFLAGS} ${AM_CPPFLAGS}
+scribo_toolchain_text_in_doc_CPPFLAGS= ${TIFF_CPPFLAGS} -DHAVE_TIFF ${TESSERACT_CPPFLAGS} -DHAVE_TESSERACT ${AM_CPPFLAGS}
scribo_toolchain_text_in_doc_LDFLAGS= ${TIFF_LDFLAGS} ${TESSERACT_LDFLAGS} ${AM_LDFLAGS}
scribo_toolchain_text_in_doc_SOURCES = scribo_toolchain_text_in_doc.cc
endif HAVE_TIFF
@@ -83,7 +103,6 @@ scribo_binarization_sauvola_ms \
scribo_binarization_sauvola_ms_split \
scribo_binarization_sauvola_threshold_image \
scribo_canvas_integral_browsing \
-scribo_convert_from_base64 \
scribo_convert_to_base64 \
scribo_core_all \
scribo_core_central_sites \
@@ -97,7 +116,7 @@ scribo_core_def_lbl_type \
scribo_core_document \
scribo_core_erase_objects \
scribo_core_init_integral_image \
-scribo_core_internal_doc_xml_serializer \
+scribo_core_internal_doc_serializer \
scribo_core_line_info \
scribo_core_line_links \
scribo_core_line_set \
@@ -112,25 +131,27 @@ scribo_core_tag_line \
scribo_debug_alignment_decision_image \
scribo_debug_all \
scribo_debug_bboxes_enlarged_image \
+scribo_debug_bboxes_image \
scribo_debug_char_space_image \
scribo_debug_decision_image \
scribo_debug_highlight_text_area \
scribo_debug_line_info_image \
+scribo_debug_linked_bboxes_image \
scribo_debug_links_decision_image \
+scribo_debug_links_image \
+scribo_debug_logger \
scribo_debug_looks_like_a_text_line_image \
scribo_debug_mean_and_base_lines_image \
-scribo_debug_save_bboxes_image \
scribo_debug_save_comp_diff \
scribo_debug_save_label_image \
-scribo_debug_save_linked_bboxes_image \
scribo_debug_save_table_image \
-scribo_debug_several_links_decision_image \
scribo_debug_text_areas_image \
scribo_debug_usage \
scribo_draw_all \
scribo_draw_bounding_box_links \
scribo_draw_bounding_boxes \
scribo_draw_groups_bboxes \
+scribo_estim_font_color \
scribo_estim_object_groups_v_thickness \
scribo_filter_all \
scribo_filter_common_objects_photo \
@@ -142,6 +163,7 @@ scribo_filter_object_groups_size_ratio \
scribo_filter_object_groups_small \
scribo_filter_object_groups_v_thickness \
scribo_filter_object_groups_with_holes \
+scribo_filter_object_links_aligned \
scribo_filter_object_links_bbox_h_ratio \
scribo_filter_object_links_bbox_overlap \
scribo_filter_object_links_bbox_ratio \
@@ -165,16 +187,25 @@ scribo_filter_objects_with_holes \
scribo_fun_v2b_label_to_bool \
scribo_fun_v2b_objects_large_filter \
scribo_fun_v2b_objects_small_filter \
+scribo_fun_v2v_highlight \
+scribo_io_img_internal_debug_img_visitor \
+scribo_io_img_internal_draw_edges \
+scribo_io_img_internal_full_img_visitor \
+scribo_io_img_internal_non_text_img_visitor \
+scribo_io_img_internal_text_img_visitor \
+scribo_io_img_save \
scribo_io_text_boxes_save \
scribo_io_xml_internal_extended_page_xml_visitor \
-scribo_io_xml_internal_full_xml_visitor \
scribo_io_xml_internal_page_xml_visitor \
scribo_io_xml_internal_print_box_coords \
+scribo_io_xml_internal_print_image_coords \
scribo_io_xml_internal_print_page_preambule \
scribo_io_xml_save \
scribo_make_all \
scribo_make_debug_filename \
scribo_make_influence_zone_graph \
+scribo_make_text_blocks_image \
+scribo_make_text_components_image \
scribo_postprocessing_all \
scribo_postprocessing_fill_object_holes \
scribo_preprocessing_all \
@@ -194,6 +225,7 @@ scribo_primitive_extract_canvas \
scribo_primitive_extract_cells \
scribo_primitive_extract_components \
scribo_primitive_extract_horizontal_separators \
+scribo_primitive_extract_internal_union \
scribo_primitive_extract_lines_discontinued \
scribo_primitive_extract_lines_h_discontinued \
scribo_primitive_extract_lines_h_pattern \
@@ -208,6 +240,7 @@ scribo_primitive_extract_lines_v_single \
scribo_primitive_extract_lines_v_thick \
scribo_primitive_extract_lines_v_thick_and_single \
scribo_primitive_extract_non_text \
+scribo_primitive_extract_non_text_kmean \
scribo_primitive_extract_separators \
scribo_primitive_extract_separators_nonvisible \
scribo_primitive_extract_vertical_separators \
@@ -236,6 +269,7 @@ scribo_primitive_link_internal_find_link \
scribo_primitive_link_internal_link_functor_base \
scribo_primitive_link_internal_link_several_dmax_base \
scribo_primitive_link_internal_link_single_dmax_base \
+scribo_primitive_link_internal_link_single_dmax_ratio_aligned_base \
scribo_primitive_link_internal_link_single_dmax_ratio_base \
scribo_primitive_link_merge_double_link \
scribo_primitive_link_with_graph \
@@ -300,7 +334,6 @@ scribo_binarization_sauvola_ms_SOURCES = scribo_binarization_sauvola_ms.cc
scribo_binarization_sauvola_ms_split_SOURCES = scribo_binarization_sauvola_ms_split.cc
scribo_binarization_sauvola_threshold_image_SOURCES = scribo_binarization_sauvola_threshold_image.cc
scribo_canvas_integral_browsing_SOURCES = scribo_canvas_integral_browsing.cc
-scribo_convert_from_base64_SOURCES = scribo_convert_from_base64.cc
scribo_convert_to_base64_SOURCES = scribo_convert_to_base64.cc
scribo_core_all_SOURCES = scribo_core_all.cc
scribo_core_central_sites_SOURCES = scribo_core_central_sites.cc
@@ -314,7 +347,7 @@ scribo_core_def_lbl_type_SOURCES = scribo_core_def_lbl_type.cc
scribo_core_document_SOURCES = scribo_core_document.cc
scribo_core_erase_objects_SOURCES = scribo_core_erase_objects.cc
scribo_core_init_integral_image_SOURCES = scribo_core_init_integral_image.cc
-scribo_core_internal_doc_xml_serializer_SOURCES = scribo_core_internal_doc_xml_serializer.cc
+scribo_core_internal_doc_serializer_SOURCES = scribo_core_internal_doc_serializer.cc
scribo_core_line_info_SOURCES = scribo_core_line_info.cc
scribo_core_line_links_SOURCES = scribo_core_line_links.cc
scribo_core_line_set_SOURCES = scribo_core_line_set.cc
@@ -329,25 +362,27 @@ scribo_core_tag_line_SOURCES = scribo_core_tag_line.cc
scribo_debug_alignment_decision_image_SOURCES = scribo_debug_alignment_decision_image.cc
scribo_debug_all_SOURCES = scribo_debug_all.cc
scribo_debug_bboxes_enlarged_image_SOURCES = scribo_debug_bboxes_enlarged_image.cc
+scribo_debug_bboxes_image_SOURCES = scribo_debug_bboxes_image.cc
scribo_debug_char_space_image_SOURCES = scribo_debug_char_space_image.cc
scribo_debug_decision_image_SOURCES = scribo_debug_decision_image.cc
scribo_debug_highlight_text_area_SOURCES = scribo_debug_highlight_text_area.cc
scribo_debug_line_info_image_SOURCES = scribo_debug_line_info_image.cc
+scribo_debug_linked_bboxes_image_SOURCES = scribo_debug_linked_bboxes_image.cc
scribo_debug_links_decision_image_SOURCES = scribo_debug_links_decision_image.cc
+scribo_debug_links_image_SOURCES = scribo_debug_links_image.cc
+scribo_debug_logger_SOURCES = scribo_debug_logger.cc
scribo_debug_looks_like_a_text_line_image_SOURCES = scribo_debug_looks_like_a_text_line_image.cc
scribo_debug_mean_and_base_lines_image_SOURCES = scribo_debug_mean_and_base_lines_image.cc
-scribo_debug_save_bboxes_image_SOURCES = scribo_debug_save_bboxes_image.cc
scribo_debug_save_comp_diff_SOURCES = scribo_debug_save_comp_diff.cc
scribo_debug_save_label_image_SOURCES = scribo_debug_save_label_image.cc
-scribo_debug_save_linked_bboxes_image_SOURCES = scribo_debug_save_linked_bboxes_image.cc
scribo_debug_save_table_image_SOURCES = scribo_debug_save_table_image.cc
-scribo_debug_several_links_decision_image_SOURCES = scribo_debug_several_links_decision_image.cc
scribo_debug_text_areas_image_SOURCES = scribo_debug_text_areas_image.cc
scribo_debug_usage_SOURCES = scribo_debug_usage.cc
scribo_draw_all_SOURCES = scribo_draw_all.cc
scribo_draw_bounding_box_links_SOURCES = scribo_draw_bounding_box_links.cc
scribo_draw_bounding_boxes_SOURCES = scribo_draw_bounding_boxes.cc
scribo_draw_groups_bboxes_SOURCES = scribo_draw_groups_bboxes.cc
+scribo_estim_font_color_SOURCES = scribo_estim_font_color.cc
scribo_estim_object_groups_v_thickness_SOURCES = scribo_estim_object_groups_v_thickness.cc
scribo_filter_all_SOURCES = scribo_filter_all.cc
scribo_filter_common_objects_photo_SOURCES = scribo_filter_common_objects_photo.cc
@@ -359,6 +394,7 @@ scribo_filter_object_groups_size_ratio_SOURCES = scribo_filter_object_groups_siz
scribo_filter_object_groups_small_SOURCES = scribo_filter_object_groups_small.cc
scribo_filter_object_groups_v_thickness_SOURCES = scribo_filter_object_groups_v_thickness.cc
scribo_filter_object_groups_with_holes_SOURCES = scribo_filter_object_groups_with_holes.cc
+scribo_filter_object_links_aligned_SOURCES = scribo_filter_object_links_aligned.cc
scribo_filter_object_links_bbox_h_ratio_SOURCES = scribo_filter_object_links_bbox_h_ratio.cc
scribo_filter_object_links_bbox_overlap_SOURCES = scribo_filter_object_links_bbox_overlap.cc
scribo_filter_object_links_bbox_ratio_SOURCES = scribo_filter_object_links_bbox_ratio.cc
@@ -382,16 +418,25 @@ scribo_filter_objects_with_holes_SOURCES = scribo_filter_objects_with_holes.cc
scribo_fun_v2b_label_to_bool_SOURCES = scribo_fun_v2b_label_to_bool.cc
scribo_fun_v2b_objects_large_filter_SOURCES = scribo_fun_v2b_objects_large_filter.cc
scribo_fun_v2b_objects_small_filter_SOURCES = scribo_fun_v2b_objects_small_filter.cc
+scribo_fun_v2v_highlight_SOURCES = scribo_fun_v2v_highlight.cc
+scribo_io_img_internal_debug_img_visitor_SOURCES = scribo_io_img_internal_debug_img_visitor.cc
+scribo_io_img_internal_draw_edges_SOURCES = scribo_io_img_internal_draw_edges.cc
+scribo_io_img_internal_full_img_visitor_SOURCES = scribo_io_img_internal_full_img_visitor.cc
+scribo_io_img_internal_non_text_img_visitor_SOURCES = scribo_io_img_internal_non_text_img_visitor.cc
+scribo_io_img_internal_text_img_visitor_SOURCES = scribo_io_img_internal_text_img_visitor.cc
+scribo_io_img_save_SOURCES = scribo_io_img_save.cc
scribo_io_text_boxes_save_SOURCES = scribo_io_text_boxes_save.cc
scribo_io_xml_internal_extended_page_xml_visitor_SOURCES = scribo_io_xml_internal_extended_page_xml_visitor.cc
-scribo_io_xml_internal_full_xml_visitor_SOURCES = scribo_io_xml_internal_full_xml_visitor.cc
scribo_io_xml_internal_page_xml_visitor_SOURCES = scribo_io_xml_internal_page_xml_visitor.cc
scribo_io_xml_internal_print_box_coords_SOURCES = scribo_io_xml_internal_print_box_coords.cc
+scribo_io_xml_internal_print_image_coords_SOURCES = scribo_io_xml_internal_print_image_coords.cc
scribo_io_xml_internal_print_page_preambule_SOURCES = scribo_io_xml_internal_print_page_preambule.cc
scribo_io_xml_save_SOURCES = scribo_io_xml_save.cc
scribo_make_all_SOURCES = scribo_make_all.cc
scribo_make_debug_filename_SOURCES = scribo_make_debug_filename.cc
scribo_make_influence_zone_graph_SOURCES = scribo_make_influence_zone_graph.cc
+scribo_make_text_blocks_image_SOURCES = scribo_make_text_blocks_image.cc
+scribo_make_text_components_image_SOURCES = scribo_make_text_components_image.cc
scribo_postprocessing_all_SOURCES = scribo_postprocessing_all.cc
scribo_postprocessing_fill_object_holes_SOURCES = scribo_postprocessing_fill_object_holes.cc
scribo_preprocessing_all_SOURCES = scribo_preprocessing_all.cc
@@ -411,6 +456,7 @@ scribo_primitive_extract_canvas_SOURCES = scribo_primitive_extract_canvas.cc
scribo_primitive_extract_cells_SOURCES = scribo_primitive_extract_cells.cc
scribo_primitive_extract_components_SOURCES = scribo_primitive_extract_components.cc
scribo_primitive_extract_horizontal_separators_SOURCES = scribo_primitive_extract_horizontal_separators.cc
+scribo_primitive_extract_internal_union_SOURCES = scribo_primitive_extract_internal_union.cc
scribo_primitive_extract_lines_discontinued_SOURCES = scribo_primitive_extract_lines_discontinued.cc
scribo_primitive_extract_lines_h_discontinued_SOURCES = scribo_primitive_extract_lines_h_discontinued.cc
scribo_primitive_extract_lines_h_pattern_SOURCES = scribo_primitive_extract_lines_h_pattern.cc
@@ -425,6 +471,7 @@ scribo_primitive_extract_lines_v_single_SOURCES = scribo_primitive_extract_lines
scribo_primitive_extract_lines_v_thick_SOURCES = scribo_primitive_extract_lines_v_thick.cc
scribo_primitive_extract_lines_v_thick_and_single_SOURCES = scribo_primitive_extract_lines_v_thick_and_single.cc
scribo_primitive_extract_non_text_SOURCES = scribo_primitive_extract_non_text.cc
+scribo_primitive_extract_non_text_kmean_SOURCES = scribo_primitive_extract_non_text_kmean.cc
scribo_primitive_extract_separators_SOURCES = scribo_primitive_extract_separators.cc
scribo_primitive_extract_separators_nonvisible_SOURCES = scribo_primitive_extract_separators_nonvisible.cc
scribo_primitive_extract_vertical_separators_SOURCES = scribo_primitive_extract_vertical_separators.cc
@@ -453,6 +500,7 @@ scribo_primitive_link_internal_find_link_SOURCES = scribo_primitive_link_interna
scribo_primitive_link_internal_link_functor_base_SOURCES = scribo_primitive_link_internal_link_functor_base.cc
scribo_primitive_link_internal_link_several_dmax_base_SOURCES = scribo_primitive_link_internal_link_several_dmax_base.cc
scribo_primitive_link_internal_link_single_dmax_base_SOURCES = scribo_primitive_link_internal_link_single_dmax_base.cc
+scribo_primitive_link_internal_link_single_dmax_ratio_aligned_base_SOURCES = scribo_primitive_link_internal_link_single_dmax_ratio_aligned_base.cc
scribo_primitive_link_internal_link_single_dmax_ratio_base_SOURCES = scribo_primitive_link_internal_link_single_dmax_ratio_base.cc
scribo_primitive_link_merge_double_link_SOURCES = scribo_primitive_link_merge_double_link.cc
scribo_primitive_link_with_graph_SOURCES = scribo_primitive_link_with_graph.cc
--
1.5.6.5
1
0
* src/Makefile.am,
* src/debug/Makefile.am: Update targets.
* src/debug/non_text_mask.cc,
* src/debug/show_components_bboxes.cc,
* src/debug/show_groups_bboxes.cc,
* src/debug/show_links_bottom_aligned_2angles.cc,
* src/debug/show_links_top_aligned_2angles.cc,
* src/debug/show_paragraph_blocks.cc,
* src/non_text_components.cc: New.
* src/debug/show_links_several_right.cc: Removed. Deprecated.
---
scribo/ChangeLog | 17 ++
scribo/src/Makefile.am | 32 ++++-
scribo/src/debug/Makefile.am | 66 ++++++-
scribo/src/debug/non_text_mask.cc | 26 +++
scribo/src/debug/show_components_bboxes.cc | 59 ++++++
scribo/src/debug/show_groups_bboxes.cc | 91 ++++++++++
...ned.cc => show_links_bottom_aligned_2angles.cc} | 58 +++---
scribo/src/debug/show_links_several_right.cc | 90 ----------
...ligned.cc => show_links_top_aligned_2angles.cc} | 61 +++----
scribo/src/debug/show_paragraph_blocks.cc | 185 ++++++++++++++++++++
scribo/src/non_text_components.cc | 128 ++++++++++++++
11 files changed, 652 insertions(+), 161 deletions(-)
create mode 100644 scribo/src/debug/non_text_mask.cc
create mode 100644 scribo/src/debug/show_components_bboxes.cc
create mode 100644 scribo/src/debug/show_groups_bboxes.cc
copy scribo/src/debug/{show_links_bottom_aligned.cc => show_links_bottom_aligned_2angles.cc} (60%)
delete mode 100644 scribo/src/debug/show_links_several_right.cc
copy scribo/src/debug/{show_links_top_aligned.cc => show_links_top_aligned_2angles.cc} (60%)
create mode 100644 scribo/src/debug/show_paragraph_blocks.cc
create mode 100644 scribo/src/non_text_components.cc
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index 32dda87..cc6e861 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,5 +1,22 @@
2011-03-14 Guillaume Lazzara <z(a)lrde.epita.fr>
+ New debug tools.
+
+ * src/Makefile.am,
+ * src/debug/Makefile.am: Update targets.
+
+ * src/debug/non_text_mask.cc,
+ * src/debug/show_components_bboxes.cc,
+ * src/debug/show_groups_bboxes.cc,
+ * src/debug/show_links_bottom_aligned_2angles.cc,
+ * src/debug/show_links_top_aligned_2angles.cc,
+ * src/debug/show_paragraph_blocks.cc,
+ * src/non_text_components.cc: New.
+
+ * src/debug/show_links_several_right.cc: Removed. Deprecated.
+
+2011-03-14 Guillaume Lazzara <z(a)lrde.epita.fr>
+
New non-text components extraction routine.
* scribo/make/text_blocks_image.hh,
diff --git a/scribo/src/Makefile.am b/scribo/src/Makefile.am
index d6275fd..3a35528 100644
--- a/scribo/src/Makefile.am
+++ b/scribo/src/Makefile.am
@@ -96,18 +96,48 @@ if HAVE_TESSERACT
$(TIFF_LDFLAGS) \
$(MAGICKXX_LDFLAGS)
+if HAVE_QT
utilexec_PROGRAMS += content_in_doc
content_in_doc_SOURCES = content_in_doc.cc
content_in_doc_CPPFLAGS = $(AM_CPPFLAGS) \
$(TESSERACT_CPPFLAGS) \
$(TIFF_CPPFLAGS) \
- $(MAGICKXX_CPPFLAGS)
+ $(MAGICKXX_CPPFLAGS) \
+ $(QT_CPPFLAGS)
+ content_in_doc_CXXFLAGS = $(AM_CXXFLAGS) \
+ $(QT_CXXFLAGS)
content_in_doc_LDFLAGS = $(AM_LDFLAGS) \
$(TESSERACT_LDFLAGS) \
$(TIFF_LDFLAGS) \
$(MAGICKXX_LDFLAGS) \
+ $(QT_LDFLAGS) \
-lpthread
+ content_in_doc_LDADD = $(LDADD) \
+ $(QT_LIBS)
+
+
+ utilexec_PROGRAMS += non_text_components
+ non_text_components_SOURCES = non_text_components.cc
+ non_text_components_CPPFLAGS = $(AM_CPPFLAGS) \
+ $(TESSERACT_CPPFLAGS) \
+ $(TIFF_CPPFLAGS) \
+ $(MAGICKXX_CPPFLAGS) \
+ $(QT_CPPFLAGS)
+ non_text_components_CXXFLAGS = $(AM_CXXFLAGS) \
+ $(QT_CXXFLAGS)
+ non_text_components_LDFLAGS = $(AM_LDFLAGS) \
+ $(TESSERACT_LDFLAGS) \
+ $(TIFF_LDFLAGS) \
+ $(MAGICKXX_LDFLAGS) \
+ $(QT_LDFLAGS) \
+ -lpthread
+ non_text_components_LDADD = $(LDADD) \
+ $(QT_LIBS)
+
+
+endif HAVE_QT
+
endif HAVE_TESSERACT
diff --git a/scribo/src/debug/Makefile.am b/scribo/src/debug/Makefile.am
index cdb1f30..60d7afa 100644
--- a/scribo/src/debug/Makefile.am
+++ b/scribo/src/debug/Makefile.am
@@ -18,6 +18,8 @@
include $(top_srcdir)/scribo/scribo.mk
noinst_PROGRAMS = \
+ show_components_bboxes \
+ show_groups_bboxes \
show_info_x_height \
show_info_median_inter_characters \
show_separators \
@@ -25,9 +27,9 @@ noinst_PROGRAMS = \
show_links_bbox_h_ratio \
show_links_bbox_overlap \
show_links_bottom_aligned \
+ show_links_bottom_aligned_2angles \
show_links_center_aligned \
show_links_non_h_aligned \
- show_links_several_right \
show_links_several_right_overlap \
show_links_single_down \
show_links_single_down_left_aligned \
@@ -40,15 +42,16 @@ noinst_PROGRAMS = \
show_links_single_up_left_aligned \
show_links_single_up_right_aligned \
show_links_top_aligned \
+ show_links_top_aligned_2angles \
show_objects_large \
show_objects_large_small \
show_objects_small \
show_objects_thick \
- show_objects_thin \
- show_stoppers \
- show_text_lines
+ show_objects_thin
+show_components_bboxes_SOURCES = show_components_bboxes.cc
+show_groups_bboxes_SOURCES = show_groups_bboxes.cc
show_info_x_height_SOURCES = show_info_x_height.cc
show_info_median_inter_characters_SOURCES = show_info_median_inter_characters.cc
show_separators_SOURCES = show_separators.cc
@@ -56,9 +59,9 @@ show_links_left_right_links_validation_SOURCES = show_links_left_right_links_val
show_links_bbox_h_ratio_SOURCES = show_links_bbox_h_ratio.cc
show_links_bbox_overlap_SOURCES = show_links_bbox_overlap.cc
show_links_bottom_aligned_SOURCES = show_links_bottom_aligned.cc
+show_links_bottom_aligned_2angles_SOURCES = show_links_bottom_aligned_2angles.cc
show_links_center_aligned_SOURCES = show_links_center_aligned.cc
show_links_non_h_aligned_SOURCES = show_links_non_h_aligned.cc
-show_links_several_right_SOURCES = show_links_several_right.cc
show_links_several_right_overlap_SOURCES = show_links_several_right_overlap.cc
show_links_single_down_SOURCES = show_links_single_down.cc
show_links_single_down_left_aligned_SOURCES = show_links_single_down_left_aligned.cc
@@ -71,21 +74,66 @@ show_links_single_up_SOURCES = show_links_single_up.cc
show_links_single_up_left_aligned_SOURCES = show_links_single_up_left_aligned.cc
show_links_single_up_right_aligned_SOURCES = show_links_single_up_right_aligned.cc
show_links_top_aligned_SOURCES = show_links_top_aligned.cc
+show_links_top_aligned_2angles_SOURCES = show_links_top_aligned_2angles.cc
show_objects_large_SOURCES = show_objects_large.cc
show_objects_large_small_SOURCES = show_objects_large_small.cc
show_objects_small_SOURCES = show_objects_small.cc
show_objects_thick_SOURCES = show_objects_thick.cc
show_objects_thin_SOURCES = show_objects_thin.cc
-show_stoppers_SOURCES = show_stoppers.cc
-show_text_lines_SOURCES = show_text_lines.cc
if HAVE_MAGICKXX
+if HAVE_QT
+
+ noinst_PROGRAMS += show_paragraph_blocks
+ show_paragraph_blocks_SOURCES = show_paragraph_blocks.cc
+ show_paragraph_blocks_CPPFLAGS = $(AM_CPPFLAGS) \
+ $(QT_CPPFLAGS) \
+ $(MAGICKXX_CPPFLAGS)
+ show_paragraph_blocks_CXXFLAGS = $(AM_CXXFLAGS) \
+ $(QT_CXXFLAGS)
+ show_paragraph_blocks_LDFLAGS = $(AM_LDFLAGS) \
+ $(QT_LDFLAGS) \
+ $(MAGICKXX_LDFLAGS)
+ show_paragraph_blocks_LDADD = $(LDADD) \
+ $(QT_LIBS)
+
+ noinst_PROGRAMS += show_text_lines
+ show_text_lines_SOURCES = show_text_lines.cc
+ show_text_lines_CPPFLAGS = $(AM_CPPFLAGS) \
+ $(MAGICKXX_CPPFLAGS) \
+ $(QT_CPPFLAGS) -DHAVE_QT
+ show_text_lines_CXXFLAGS = $(AM_CXXFLAGS) \
+ $(QT_CXXFLAGS)
+ show_text_lines_LDFLAGS = $(AM_LDFLAGS) \
+ $(MAGICKXX_LDFLAGS) \
+ $(QT_LDFLAGS)
+ show_text_lines_LDADD = $(LDADD) \
+ $(QT_LIBS)
+
+
+ noinst_PROGRAMS += show_stoppers
+ show_stoppers_SOURCES = show_stoppers.cc
+ show_stoppers_CPPFLAGS = $(AM_CPPFLAGS) \
+ $(MAGICKXX_CPPFLAGS) \
+ $(QT_CPPFLAGS)
+ show_stoppers_CXXFLAGS = $(AM_CXXFLAGS) \
+ $(QT_CXXFLAGS)
+ show_stoppers_LDFLAGS = $(AM_LDFLAGS) \
+ $(MAGICKXX_LDFLAGS) \
+ $(QT_LDFLAGS)
+ show_stoppers_LDADD = $(LDADD) \
+ $(QT_LIBS)
+
+
+endif HAVE_QT
+
+
noinst_PROGRAMS += highlight_text_area
highlight_text_area_SOURCES = highlight_text_area.cc
highlight_text_area_CPPFLAGS = $(AM_CPPFLAGS) \
- `Magick++-config --cppflags`
+ $(MAGICKXX_CPPFLAGS)
highlight_text_area_LDFLAGS = $(AM_LDFLAGS) \
- -lpthread `Magick++-config --libs`
+ $(MAGICKXX_LDFLAGS)
endif HAVE_MAGICKXX
diff --git a/scribo/src/debug/non_text_mask.cc b/scribo/src/debug/non_text_mask.cc
new file mode 100644
index 0000000..6fce945
--- /dev/null
+++ b/scribo/src/debug/non_text_mask.cc
@@ -0,0 +1,26 @@
+
+
+int main(int argc, char *argv[])
+{
+ // Link text lines
+ on_new_progress_label("Linking text lines");
+ line_links<L> llinks = scribo::text::link_lines(lines);
+
+ // Filter line links.
+ on_new_progress_label("Filter line links");
+ llinks = scribo::filter::line_links_x_height(llinks);
+
+ // Construct paragraphs
+ on_new_progress_label("Constructing paragraphs");
+ scribo::paragraph_set<L> parset = scribo::make::paragraph(llinks);
+ doc.set_paragraphs(parset);
+
+ on_progress();
+
+
+ // Extract other Elements
+ on_new_progress_label("Extracting Elements");
+ component_set<L>
+ elements = scribo::primitive::extract::non_text_fast(doc);
+
+}
diff --git a/scribo/src/debug/show_components_bboxes.cc b/scribo/src/debug/show_components_bboxes.cc
new file mode 100644
index 0000000..7eab4ba
--- /dev/null
+++ b/scribo/src/debug/show_components_bboxes.cc
@@ -0,0 +1,59 @@
+// Copyright (C) 2010, 2011 EPITA Research and Development Laboratory
+// (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+
+#include <mln/core/image/image2d.hh>
+#include <mln/core/alias/neighb2d.hh>
+#include <mln/io/pbm/all.hh>
+#include <mln/value/int_u.hh>
+#include <mln/literal/colors.hh>
+#include <mln/draw/box.hh>
+
+#include <scribo/core/component_set.hh>
+#include <scribo/primitive/extract/components.hh>
+
+int main(int argc, char *argv[])
+{
+ using namespace mln;
+ using namespace scribo;
+
+ if (argc != 3)
+ {
+ std::cerr << "Usage : " << argv[0] << " input.pbm out.pbm" << std::endl;
+ return 1;
+ }
+
+ typedef mln::value::int_u<30> V;
+ typedef image2d<V> L;
+
+ image2d<bool> input;
+ io::pbm::load(input, argv[1]);
+
+ V ncomponents;
+ component_set<L>
+ components = scribo::primitive::extract::components(input, c8(),
+ ncomponents);
+
+ image2d<bool> output;
+ initialize(output, input);
+ data::fill(output, false);
+
+ for_all_comps(c, components)
+ if (components(c).is_valid())
+ mln::draw::box(output, components(c).bbox(), true);
+
+ io::pbm::save(output, argv[2]);
+}
diff --git a/scribo/src/debug/show_groups_bboxes.cc b/scribo/src/debug/show_groups_bboxes.cc
new file mode 100644
index 0000000..eff0eb7
--- /dev/null
+++ b/scribo/src/debug/show_groups_bboxes.cc
@@ -0,0 +1,91 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+
+#include <mln/core/image/image2d.hh>
+#include <mln/core/alias/neighb2d.hh>
+#include <mln/io/pbm/all.hh>
+#include <mln/value/int_u.hh>
+#include <mln/literal/colors.hh>
+#include <mln/draw/box.hh>
+
+#include <scribo/core/component_set.hh>
+#include <scribo/core/line_set.hh>
+#include <scribo/primitive/extract/components.hh>
+#include <scribo/primitive/group/from_single_link.hh>
+#include <scribo/primitive/link/with_single_left_link_dmax_ratio.hh>
+#include <scribo/primitive/link/with_single_right_link_dmax_ratio.hh>
+#include <scribo/primitive/link/merge_double_link.hh>
+#include <scribo/primitive/link/internal/dmax_width_and_height.hh>
+
+int main(int argc, char *argv[])
+{
+ using namespace mln;
+ using namespace scribo;
+ using namespace scribo::primitive;
+
+ if (argc != 3)
+ {
+ std::cerr << "Usage : " << argv[0] << " input.* out.pbm" << std::endl;
+ return 1;
+ }
+
+ typedef mln::value::int_u<30> V;
+ typedef image2d<V> L;
+
+ image2d<bool> input;
+ io::pbm::load(input, argv[1]);
+
+ V ncomponents;
+ component_set<L>
+ components = scribo::primitive::extract::components(input, c8(),
+ ncomponents);
+
+
+ object_links<L>
+ left_link = link::with_single_left_link_dmax_ratio(
+ components,
+// primitive::link::internal::dmax_width_and_height(1),
+ link::internal::dmax_default(1),
+ anchor::MassCenter);
+
+ object_links<L>
+ right_link = primitive::link::with_single_right_link_dmax_ratio(
+ components,
+// primitive::link::internal::dmax_width_and_height(1),
+ primitive::link::internal::dmax_default(1),
+ anchor::MassCenter);
+
+ // Validating left and right links.
+ object_links<L>
+ merged_links = primitive::link::merge_double_link(left_link,
+ right_link);
+
+
+ object_groups<L>
+ groups = group::from_single_link(merged_links);
+
+ line_set<L> lines(groups);
+
+ image2d<bool> output;
+ initialize(output, input);
+ data::fill(output, false);
+
+ for_all_lines(l, lines)
+ if (lines(l).is_valid())
+ mln::draw::box(output, lines(l).bbox(), true);
+
+ io::pbm::save(output, argv[2]);
+}
diff --git a/scribo/src/debug/show_links_bottom_aligned.cc b/scribo/src/debug/show_links_bottom_aligned_2angles.cc
similarity index 60%
copy from scribo/src/debug/show_links_bottom_aligned.cc
copy to scribo/src/debug/show_links_bottom_aligned_2angles.cc
index 634551b..4b0e765 100644
--- a/scribo/src/debug/show_links_bottom_aligned.cc
+++ b/scribo/src/debug/show_links_bottom_aligned_2angles.cc
@@ -1,5 +1,4 @@
-// Copyright (C) 2009, 2011 EPITA Research and Development Laboratory
-// (LRDE)
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
//
// This file is part of Olena.
//
@@ -38,22 +37,22 @@
#include <mln/io/pbm/load.hh>
#include <mln/io/ppm/save.hh>
-#include <scribo/core/component_set.hh>
-
+#include <scribo/core/def/lbl_type.hh>
+#include <scribo/debug/logger.hh>
#include <scribo/primitive/extract/components.hh>
-#include <scribo/primitive/link/with_single_right_link.hh>
#include <scribo/filter/object_links_aligned.hh>
+#include <scribo/primitive/link/with_single_right_link_dmax_ratio_aligned.hh>
-#include <scribo/debug/alignment_decision_image.hh>
#include <scribo/debug/usage.hh>
const char *args_desc[][2] =
{
- { "input.pbm", "A binary image. True for objects and False for the background." },
- { "max_dist", "Maximum distance lookup (common value 45)" },
- { "max_alpha", "Max angle between two object bottoms. (common value : 5)" },
+ { "input.pbm", "A binary image" },
+ { "dmax_ratio", "Maximum distance lookup (common value 5)" },
+ { "min_alpha", "First angle used for close objects. (common value : 3)" },
+ { "max_alpha", "Second angle used for further objects. (common value : 5)" },
{0, 0}
};
@@ -61,42 +60,41 @@ const char *args_desc[][2] =
int main(int argc, char* argv[])
{
using namespace scribo;
- using namespace scribo::primitive::internal;
+ using namespace scribo::primitive;
using namespace mln;
- if (argc != 5)
+ if (argc != 6)
return scribo::debug::usage(argv,
- "Show valid or invalid links according the"
+ "Show valid or invalid links according the "
"horizontal alignment (based on bottom line).",
- "input.pbm max_dist max_alpha output.ppm",
+ "input.pbm dmax_ratio min_angle max_angle "
+ " output.ppm",
args_desc);
image2d<bool> input;
io::pbm::load(input, argv[1]);
// Finding components.
- value::label_16 nbboxes;
- typedef image2d<value::label_16> L;
+ typedef scribo::def::lbl_type V;
+ V nbboxes;
+ typedef image2d<V> L;
component_set<L> components
- = scribo::primitive::extract::components(input, c8(), nbboxes);
-
+ = extract::components(input, c8(), nbboxes);
// Finding right links.
- object_links<L> right_links
- = primitive::link::with_single_right_link(components, atoi(argv[2]));
+ // object_links<L>
+ // right_links = primitive::link::with_single_right_link_dmax_ratio_aligned(
+ // components, atof(argv[2]), atof(argv[3]), atof(argv[4]));
+
+ scribo::debug::logger().set_level(scribo::debug::All);
- // Filtering.
- object_links<L> filtered_links
- = filter::object_links_aligned(right_links, atof(argv[3]),
- anchor::StrictBottomCenter);
+ link::internal::single_right_dmax_ratio_aligned_functor<L,link::internal::dmax_default>
+ functor(components, link::internal::dmax_default(atof(argv[2])),
+ atof(argv[3]), atof(argv[4]), anchor::StrictBottomCenter);
- // Debug image.
- image2d<value::rgb8> decision_image
- = scribo::debug::alignment_decision_image(input,
- right_links,
- filtered_links,
- anchor::StrictBottomCenter);
+ object_links<L> output = link::compute(functor, anchor::Bottom);
- io::ppm::save(decision_image, argv[4]);
+ scribo::debug::logger().set_level(scribo::debug::None);
+ io::ppm::save(functor.debug_, argv[5]);
}
diff --git a/scribo/src/debug/show_links_several_right.cc b/scribo/src/debug/show_links_several_right.cc
deleted file mode 100644
index a70b2fb..0000000
--- a/scribo/src/debug/show_links_several_right.cc
+++ /dev/null
@@ -1,90 +0,0 @@
-// Copyright (C) 2009, 2010 EPITA Research and Development Laboratory
-// (LRDE)
-//
-// This file is part of Olena.
-//
-// Olena is free software: you can redistribute it and/or modify it under
-// the terms of the GNU General Public License as published by the Free
-// Software Foundation, version 2 of the License.
-//
-// Olena is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-// General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with Olena. If not, see <http://www.gnu.org/licenses/>.
-//
-// As a special exception, you may use this file as part of a free
-// software project without restriction. Specifically, if other files
-// instantiate templates or use macros or inline functions from this
-// file, or you compile this file and link it with other files to produce
-// an executable, this file does not by itself cause the resulting
-// executable to be covered by the GNU General Public License. This
-// exception does not however invalidate any other reasons why the
-// executable file might be covered by the GNU General Public License.
-
-#include <iostream>
-
-#include <mln/core/image/image2d.hh>
-#include <mln/core/alias/neighb2d.hh>
-
-#include <mln/value/rgb8.hh>
-#include <mln/value/label_16.hh>
-#include <mln/literal/colors.hh>
-
-#include <mln/io/pbm/load.hh>
-#include <mln/io/ppm/save.hh>
-
-#include <scribo/primitive/extract/components.hh>
-#include <scribo/primitive/link/with_several_right_links.hh>
-
-#include <scribo/draw/bounding_boxes.hh>
-
-#include <scribo/debug/several_links_decision_image.hh>
-#include <scribo/debug/usage.hh>
-
-
-
-const char *args_desc[][2] =
-{
- { "input.pbm", "A binary image. True for objects and False for the "
- "background." },
- { "max_nbh_dist", " Maximum distance for neighborhood search."
- "(common value : 30)" },
- {0, 0}
-};
-
-
-int main(int argc, char* argv[])
-{
- using namespace scribo;
- using namespace scribo::primitive::internal;
- using namespace mln;
-
- if (argc != 4)
- return scribo::debug::usage(argv,
- "Show sucessful/unsuccessful right links between components.",
- "input.pbm max_nbh_dist output.ppm",
- args_desc);
-
- image2d<bool> input;
- io::pbm::load(input, argv[1]);
-
- // Finding objects.
- value::label_16 nbboxes;
- typedef image2d<value::label_16> L;
- component_set<L> comps
- = scribo::primitive::extract::components(input, c8(), nbboxes);
-
- // Finding right links.
- object_links<L> right_link
- = primitive::link::with_several_right_links(comps, atoi(argv[2]));
-
- image2d<value::rgb8> decision_image
- = scribo::debug::several_links_decision_image(input,
- right_link,
- right_link);
-
- io::ppm::save(decision_image, argv[3]);
-}
diff --git a/scribo/src/debug/show_links_top_aligned.cc b/scribo/src/debug/show_links_top_aligned_2angles.cc
similarity index 60%
copy from scribo/src/debug/show_links_top_aligned.cc
copy to scribo/src/debug/show_links_top_aligned_2angles.cc
index 5ffcb70..48f3a13 100644
--- a/scribo/src/debug/show_links_top_aligned.cc
+++ b/scribo/src/debug/show_links_top_aligned_2angles.cc
@@ -1,5 +1,4 @@
-// Copyright (C) 2009, 2010, 2011 EPITA Research and Development
-// Laboratory (LRDE)
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
//
// This file is part of Olena.
//
@@ -38,22 +37,22 @@
#include <mln/io/pbm/load.hh>
#include <mln/io/ppm/save.hh>
+#include <scribo/core/def/lbl_type.hh>
+#include <scribo/debug/logger.hh>
#include <scribo/primitive/extract/components.hh>
-#include <scribo/primitive/link/with_single_right_link.hh>
#include <scribo/filter/object_links_aligned.hh>
+#include <scribo/primitive/link/with_single_right_link_dmax_ratio_aligned.hh>
-#include <scribo/debug/alignment_decision_image.hh>
-#include <scribo/debug/links_image.hh>
#include <scribo/debug/usage.hh>
const char *args_desc[][2] =
{
- { "input.pbm", "A binary image. True for objects and False for the "
- "background." },
- { "max_dist", "Maximum distance lookup (common value 45)" },
- { "max_alpha", "Max angle between two object tops. (common value : 5)" },
+ { "input.pbm", "A binary image" },
+ { "dmax_ratio", "Maximum distance lookup (common value 5)" },
+ { "min_alpha", "First angle used for close objects. (common value : 3)" },
+ { "max_alpha", "Second angle used for further objects. (common value : 5)" },
{0, 0}
};
@@ -61,41 +60,41 @@ const char *args_desc[][2] =
int main(int argc, char* argv[])
{
using namespace scribo;
- using namespace scribo::primitive::internal;
+ using namespace scribo::primitive;
using namespace mln;
- if (argc != 5)
+ if (argc != 6)
return scribo::debug::usage(argv,
"Show valid or invalid links according the "
"horizontal alignment (based on top line).",
- "input.pbm max_dist max_alpha output.ppm",
+ "input.pbm dmax_ratio min_angle max_angle "
+ " output.ppm",
args_desc);
image2d<bool> input;
io::pbm::load(input, argv[1]);
// Finding components.
- value::label_16 nbboxes;
- typedef image2d<value::label_16> L;
+ typedef scribo::def::lbl_type V;
+ V nbboxes;
+ typedef image2d<V> L;
component_set<L> components
- = scribo::primitive::extract::components(input, c8(), nbboxes);
-
+ = extract::components(input, c8(), nbboxes);
// Finding right links.
- object_links<L> right_links
- = primitive::link::with_single_right_link(components, atoi(argv[2]));
-
- // Filtering.
- object_links<L> filtered_links
- = filter::object_links_aligned(right_links, atof(argv[3]),
- anchor::StrictTopCenter);
-
- // Debug image.
- image2d<value::rgb8> decision_image
- = scribo::debug::alignment_decision_image(input,
- right_links,
- filtered_links,
- anchor::StrictTopCenter);
- io::ppm::save(decision_image, argv[4]);
+ // object_links<L>
+ // right_links = primitive::link::with_single_right_link_dmax_ratio_aligned(
+ // components, atof(argv[2]), atof(argv[3]), atof(argv[4]));
+
+ scribo::debug::logger().set_level(scribo::debug::All);
+
+ link::internal::single_right_dmax_ratio_aligned_functor<L,link::internal::dmax_default>
+ functor(components, link::internal::dmax_default(atof(argv[2])),
+ atof(argv[3]), atof(argv[4]), anchor::StrictTopCenter);
+
+ object_links<L> output = link::compute(functor, anchor::Top);
+
+ scribo::debug::logger().set_level(scribo::debug::None);
+ io::ppm::save(functor.debug_, argv[5]);
}
diff --git a/scribo/src/debug/show_paragraph_blocks.cc b/scribo/src/debug/show_paragraph_blocks.cc
new file mode 100644
index 0000000..b16a751
--- /dev/null
+++ b/scribo/src/debug/show_paragraph_blocks.cc
@@ -0,0 +1,185 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#include <iostream>
+
+#include <mln/core/image/image2d.hh>
+#include <mln/io/pbm/save.hh>
+#include <mln/io/pgm/save.hh>
+#include <mln/draw/box_plain.hh>
+
+#include <mln/debug/filename.hh>
+
+#include <mln/util/timer.hh>
+
+#include <scribo/core/def/lbl_type.hh>
+#include <scribo/debug/usage.hh>
+
+#include <scribo/core/component_set.hh>
+#include <scribo/core/paragraph_set.hh>
+#include <scribo/core/line_info.hh>
+
+#include <scribo/text/link_lines.hh>
+#include <scribo/filter/line_links_x_height.hh>
+
+#include <scribo/io/xml/load.hh>
+
+// int i = 0;
+
+const char *args_desc[][2] =
+{
+ {0, 0}
+};
+
+
+
+int main(int argc, char* argv[])
+{
+ using namespace scribo;
+ using namespace mln;
+
+ if (argc != 3)
+ return scribo::debug::usage(argv,
+ "Show paragraph blocks",
+ "lines.xml out_blocks.pbm",
+ args_desc);
+
+ trace::entering("main");
+
+ typedef image2d<scribo::def::lbl_type> L;
+ document<L> doc;
+ scribo::io::xml::load(doc, argv[1]);
+
+ if (! doc.has_text())
+ {
+ std::cout << "ERROR: this XML file does not contain any text information!"
+ << std::endl;
+ return 1;
+ }
+
+
+ // Link text lines
+ line_links<L> llinks = scribo::text::link_lines(doc.lines());
+ llinks = scribo::filter::line_links_x_height(llinks);
+
+ scribo::paragraph_set<L> parset = scribo::make::paragraph(llinks);
+ doc.set_paragraphs(parset);
+
+ util::timer t;
+ t.start();
+
+ image2d<bool> blocks;
+ initialize(blocks, doc.lines().components().labeled_image());
+ data::fill(blocks, false);
+
+ // image2d<value::int_u8> log;
+ // initialize(log, blocks);
+ // data::fill(log, 0);
+
+ for_all_paragraphs(p, parset)
+ if (parset(p).nlines() >= 3)
+ {
+ box2d last_tbox, last_box;
+
+ // For each line in this paragraph.
+ for_all_elements(l, parset(p).line_ids())
+ {
+ const line_info<L>& line = parset.lines()(parset(p).line_ids()(l));
+
+ if (last_box.is_valid())
+ if (last_box.pmax().row() < line.bbox().pmin().row())
+ {
+ last_tbox = last_box;
+
+ point2d
+ pmin(std::min(last_tbox.pmax().row(), line.bbox().pmin().row()),
+ std::max(last_tbox.pmin().col(), line.bbox().pmin().col())),
+ pmax(std::max(last_tbox.pmax().row(), line.bbox().pmin().row()),
+ std::min(last_tbox.pmax().col(), line.bbox().pmax().col()));
+
+ // invalid case:
+ //
+ // =======
+ // ======
+
+ if (pmax.col() > pmin.col())
+ {
+ box2d new_box(pmin, pmax);
+ mln::draw::box_plain(blocks, new_box, true);
+ // mln::draw::box_plain(log, new_box, 1);
+
+ // mln::io::pgm::save(log, mln::debug::filename("log.pgm", i++));
+ }
+ }
+ else // Handle the case when there are several text boxes on the same line.
+ {
+ if (last_tbox.is_valid() && last_tbox.pmax().row() < line.bbox().pmin().row())
+ {
+ // Top box
+ point2d
+ pmin(std::min(last_tbox.pmax().row(), line.bbox().pmin().row()),
+ std::max(last_tbox.pmin().col(), line.bbox().pmin().col())),
+ pmax(std::max(last_tbox.pmax().row(), line.bbox().pmin().row()),
+ std::min(last_tbox.pmax().col(), line.bbox().pmax().col()));
+
+ box2d new_box(pmin, pmax);
+
+ mln::draw::box_plain(blocks, new_box, true);
+ // mln::draw::box_plain(log, new_box, 2);
+ // mln::io::pgm::save(log, mln::debug::filename("log.pgm", i++));
+ }
+
+ if (last_box.pmax().col() < line.bbox().pmax().col()
+ && last_box.pmin().col() < line.bbox().pmin().col())
+ {
+ // Left box
+ point2d
+ pmin(std::min(last_tbox.pmax().row(), line.bbox().pmin().row()),
+ std::min(last_box.pmax().col(), line.bbox().pmin().col())),
+ pmax(std::min(last_box.pmax().row(), line.bbox().pmax().row()),
+ std::max(last_box.pmax().col(), line.bbox().pmin().col()));
+
+ box2d new_box(pmin, pmax);
+ mln::draw::box_plain(blocks, new_box, true);
+ // mln::draw::box_plain(log, new_box, 3);
+ // mln::io::pgm::save(log, mln::debug::filename("log.pgm", i++));
+ }
+ }
+
+ mln::draw::box_plain(blocks, line.bbox(), true);
+ // mln::draw::box_plain(log, line.bbox(), 255);
+ // mln::io::pgm::save(log, mln::debug::filename("log.pgm", i++));
+ last_box = line.bbox();
+ }
+ }
+
+ t.stop();
+ std::cout << t << std::endl;
+
+ mln::io::pbm::save(blocks, argv[2]);
+ // mln::io::pgm::save(log, "log.pgm");
+
+ trace::exiting("main");
+}
diff --git a/scribo/src/non_text_components.cc b/scribo/src/non_text_components.cc
new file mode 100644
index 0000000..0f4cce4
--- /dev/null
+++ b/scribo/src/non_text_components.cc
@@ -0,0 +1,128 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory
+// (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+
+#include <libgen.h>
+#include <fstream>
+#include <iostream>
+
+#include <mln/core/image/image2d.hh>
+
+#include <mln/io/pbm/save.hh>
+#include <mln/io/magick/load.hh>
+
+#include <scribo/toolchain/content_in_doc.hh>
+#include <scribo/toolchain/text_in_doc_preprocess.hh>
+
+#include <scribo/core/document.hh>
+
+#include <scribo/debug/usage.hh>
+
+#include <scribo/preprocessing/crop_without_localization.hh>
+#include <scribo/preprocessing/crop.hh>
+
+#include <scribo/io/xml/save.hh>
+#include <scribo/io/img/save.hh>
+
+
+const char *args_desc[][2] =
+{
+ { "input.*", "An image." },
+ { "non_text_comps.pbm", "Non text components mask." },
+ { "enable_debug", "Enable debug image output. Set to 1 or 0." },
+ { "enable_tabstops", "Enable tabstops detection. Set to 1 or 0." },
+ {0, 0}
+};
+
+
+
+int main(int argc, char* argv[])
+{
+ using namespace scribo;
+ using namespace mln;
+
+ if (argc != 4 && argc != 3 && argc != 5)
+ return scribo::debug::usage(argv,
+ "Extract non text components mask/",
+ "input.* non_text_comps.pbm [enable_debug] [enable_tabstops]",
+ args_desc);
+
+ std::string out_img = basename(argv[1]);
+ out_img.erase(out_img.size() - 4);
+
+ std::string filename_prefix = out_img + "_debug";
+ scribo::debug::logger().set_filename_prefix(filename_prefix.c_str());
+ if (argc > 3 && atoi(argv[3]))
+ scribo::debug::logger().set_level(scribo::debug::Special);
+ else
+ scribo::debug::logger().set_level(scribo::debug::None);
+
+ trace::entering("main");
+
+ Magick::InitializeMagick(*argv);
+
+ typedef image2d<scribo::def::lbl_type> L;
+ image2d<value::rgb8> input;
+ mln::io::magick::load(input, argv[1]);
+
+ util::timer t;
+ t.start();
+
+ // Preprocess document
+ image2d<bool>
+ input_preproc = toolchain::text_in_doc_preprocess(input, false, 0.34);
+
+
+ bool denoise = true;
+ std::string language = "";
+ bool find_line_seps = true;
+ bool find_whitespace_seps = (argc > 4 && atoi(argv[4]));
+
+ std::cout << "Running with the following options :"
+ << " ocr_language = " << language
+ << " | find_lines_seps = " << find_line_seps
+ << " | find_whitespace_seps = " << find_whitespace_seps
+ << " | debug = " << scribo::debug::logger().is_enabled()
+ << std::endl;
+
+ // Run document toolchain.
+
+ // Text
+ std::cout << "Analysing document..." << std::endl;
+ document<L>
+ doc = scribo::toolchain::content_in_doc(input, input_preproc, denoise,
+ find_line_seps, find_whitespace_seps,
+ !language.empty(), language);
+ t.stop();
+ std::cout << t << std::endl;
+
+ mln::io::pbm::save(data::convert(bool(), doc.elements().labeled_image()), argv[2]);
+
+ scribo::io::img::save(doc, out_img + "_debug_wo_image.png", scribo::io::img::DebugWoImage);
+ scribo::io::img::save(doc, out_img + "_debug_with_image.png", scribo::io::img::DebugWithImage);
+
+ trace::exiting("main");
+}
--
1.5.6.5
1
0
* src/Makefile.am,
* src/debug/Makefile.am: Update targets.
* src/debug/non_text_mask.cc,
* src/debug/show_components_bboxes.cc,
* src/debug/show_groups_bboxes.cc,
* src/debug/show_links_bottom_aligned_2angles.cc,
* src/debug/show_links_top_aligned_2angles.cc,
* src/debug/show_paragraph_blocks.cc,
* src/non_text_components.cc: New.
* src/debug/show_links_several_right.cc: Removed. Deprecated.
---
scribo/ChangeLog | 17 ++
scribo/src/Makefile.am | 32 ++++-
scribo/src/debug/Makefile.am | 66 ++++++-
scribo/src/debug/non_text_mask.cc | 26 +++
scribo/src/debug/show_components_bboxes.cc | 59 ++++++
scribo/src/debug/show_groups_bboxes.cc | 91 ++++++++++
...ned.cc => show_links_bottom_aligned_2angles.cc} | 58 +++---
scribo/src/debug/show_links_several_right.cc | 90 ----------
...ligned.cc => show_links_top_aligned_2angles.cc} | 61 +++----
scribo/src/debug/show_paragraph_blocks.cc | 185 ++++++++++++++++++++
scribo/src/non_text_components.cc | 128 ++++++++++++++
11 files changed, 652 insertions(+), 161 deletions(-)
create mode 100644 scribo/src/debug/non_text_mask.cc
create mode 100644 scribo/src/debug/show_components_bboxes.cc
create mode 100644 scribo/src/debug/show_groups_bboxes.cc
copy scribo/src/debug/{show_links_bottom_aligned.cc => show_links_bottom_aligned_2angles.cc} (60%)
delete mode 100644 scribo/src/debug/show_links_several_right.cc
copy scribo/src/debug/{show_links_top_aligned.cc => show_links_top_aligned_2angles.cc} (60%)
create mode 100644 scribo/src/debug/show_paragraph_blocks.cc
create mode 100644 scribo/src/non_text_components.cc
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index 32dda87..cc6e861 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,5 +1,22 @@
2011-03-14 Guillaume Lazzara <z(a)lrde.epita.fr>
+ New debug tools.
+
+ * src/Makefile.am,
+ * src/debug/Makefile.am: Update targets.
+
+ * src/debug/non_text_mask.cc,
+ * src/debug/show_components_bboxes.cc,
+ * src/debug/show_groups_bboxes.cc,
+ * src/debug/show_links_bottom_aligned_2angles.cc,
+ * src/debug/show_links_top_aligned_2angles.cc,
+ * src/debug/show_paragraph_blocks.cc,
+ * src/non_text_components.cc: New.
+
+ * src/debug/show_links_several_right.cc: Removed. Deprecated.
+
+2011-03-14 Guillaume Lazzara <z(a)lrde.epita.fr>
+
New non-text components extraction routine.
* scribo/make/text_blocks_image.hh,
diff --git a/scribo/src/Makefile.am b/scribo/src/Makefile.am
index d6275fd..3a35528 100644
--- a/scribo/src/Makefile.am
+++ b/scribo/src/Makefile.am
@@ -96,18 +96,48 @@ if HAVE_TESSERACT
$(TIFF_LDFLAGS) \
$(MAGICKXX_LDFLAGS)
+if HAVE_QT
utilexec_PROGRAMS += content_in_doc
content_in_doc_SOURCES = content_in_doc.cc
content_in_doc_CPPFLAGS = $(AM_CPPFLAGS) \
$(TESSERACT_CPPFLAGS) \
$(TIFF_CPPFLAGS) \
- $(MAGICKXX_CPPFLAGS)
+ $(MAGICKXX_CPPFLAGS) \
+ $(QT_CPPFLAGS)
+ content_in_doc_CXXFLAGS = $(AM_CXXFLAGS) \
+ $(QT_CXXFLAGS)
content_in_doc_LDFLAGS = $(AM_LDFLAGS) \
$(TESSERACT_LDFLAGS) \
$(TIFF_LDFLAGS) \
$(MAGICKXX_LDFLAGS) \
+ $(QT_LDFLAGS) \
-lpthread
+ content_in_doc_LDADD = $(LDADD) \
+ $(QT_LIBS)
+
+
+ utilexec_PROGRAMS += non_text_components
+ non_text_components_SOURCES = non_text_components.cc
+ non_text_components_CPPFLAGS = $(AM_CPPFLAGS) \
+ $(TESSERACT_CPPFLAGS) \
+ $(TIFF_CPPFLAGS) \
+ $(MAGICKXX_CPPFLAGS) \
+ $(QT_CPPFLAGS)
+ non_text_components_CXXFLAGS = $(AM_CXXFLAGS) \
+ $(QT_CXXFLAGS)
+ non_text_components_LDFLAGS = $(AM_LDFLAGS) \
+ $(TESSERACT_LDFLAGS) \
+ $(TIFF_LDFLAGS) \
+ $(MAGICKXX_LDFLAGS) \
+ $(QT_LDFLAGS) \
+ -lpthread
+ non_text_components_LDADD = $(LDADD) \
+ $(QT_LIBS)
+
+
+endif HAVE_QT
+
endif HAVE_TESSERACT
diff --git a/scribo/src/debug/Makefile.am b/scribo/src/debug/Makefile.am
index cdb1f30..60d7afa 100644
--- a/scribo/src/debug/Makefile.am
+++ b/scribo/src/debug/Makefile.am
@@ -18,6 +18,8 @@
include $(top_srcdir)/scribo/scribo.mk
noinst_PROGRAMS = \
+ show_components_bboxes \
+ show_groups_bboxes \
show_info_x_height \
show_info_median_inter_characters \
show_separators \
@@ -25,9 +27,9 @@ noinst_PROGRAMS = \
show_links_bbox_h_ratio \
show_links_bbox_overlap \
show_links_bottom_aligned \
+ show_links_bottom_aligned_2angles \
show_links_center_aligned \
show_links_non_h_aligned \
- show_links_several_right \
show_links_several_right_overlap \
show_links_single_down \
show_links_single_down_left_aligned \
@@ -40,15 +42,16 @@ noinst_PROGRAMS = \
show_links_single_up_left_aligned \
show_links_single_up_right_aligned \
show_links_top_aligned \
+ show_links_top_aligned_2angles \
show_objects_large \
show_objects_large_small \
show_objects_small \
show_objects_thick \
- show_objects_thin \
- show_stoppers \
- show_text_lines
+ show_objects_thin
+show_components_bboxes_SOURCES = show_components_bboxes.cc
+show_groups_bboxes_SOURCES = show_groups_bboxes.cc
show_info_x_height_SOURCES = show_info_x_height.cc
show_info_median_inter_characters_SOURCES = show_info_median_inter_characters.cc
show_separators_SOURCES = show_separators.cc
@@ -56,9 +59,9 @@ show_links_left_right_links_validation_SOURCES = show_links_left_right_links_val
show_links_bbox_h_ratio_SOURCES = show_links_bbox_h_ratio.cc
show_links_bbox_overlap_SOURCES = show_links_bbox_overlap.cc
show_links_bottom_aligned_SOURCES = show_links_bottom_aligned.cc
+show_links_bottom_aligned_2angles_SOURCES = show_links_bottom_aligned_2angles.cc
show_links_center_aligned_SOURCES = show_links_center_aligned.cc
show_links_non_h_aligned_SOURCES = show_links_non_h_aligned.cc
-show_links_several_right_SOURCES = show_links_several_right.cc
show_links_several_right_overlap_SOURCES = show_links_several_right_overlap.cc
show_links_single_down_SOURCES = show_links_single_down.cc
show_links_single_down_left_aligned_SOURCES = show_links_single_down_left_aligned.cc
@@ -71,21 +74,66 @@ show_links_single_up_SOURCES = show_links_single_up.cc
show_links_single_up_left_aligned_SOURCES = show_links_single_up_left_aligned.cc
show_links_single_up_right_aligned_SOURCES = show_links_single_up_right_aligned.cc
show_links_top_aligned_SOURCES = show_links_top_aligned.cc
+show_links_top_aligned_2angles_SOURCES = show_links_top_aligned_2angles.cc
show_objects_large_SOURCES = show_objects_large.cc
show_objects_large_small_SOURCES = show_objects_large_small.cc
show_objects_small_SOURCES = show_objects_small.cc
show_objects_thick_SOURCES = show_objects_thick.cc
show_objects_thin_SOURCES = show_objects_thin.cc
-show_stoppers_SOURCES = show_stoppers.cc
-show_text_lines_SOURCES = show_text_lines.cc
if HAVE_MAGICKXX
+if HAVE_QT
+
+ noinst_PROGRAMS += show_paragraph_blocks
+ show_paragraph_blocks_SOURCES = show_paragraph_blocks.cc
+ show_paragraph_blocks_CPPFLAGS = $(AM_CPPFLAGS) \
+ $(QT_CPPFLAGS) \
+ $(MAGICKXX_CPPFLAGS)
+ show_paragraph_blocks_CXXFLAGS = $(AM_CXXFLAGS) \
+ $(QT_CXXFLAGS)
+ show_paragraph_blocks_LDFLAGS = $(AM_LDFLAGS) \
+ $(QT_LDFLAGS) \
+ $(MAGICKXX_LDFLAGS)
+ show_paragraph_blocks_LDADD = $(LDADD) \
+ $(QT_LIBS)
+
+ noinst_PROGRAMS += show_text_lines
+ show_text_lines_SOURCES = show_text_lines.cc
+ show_text_lines_CPPFLAGS = $(AM_CPPFLAGS) \
+ $(MAGICKXX_CPPFLAGS) \
+ $(QT_CPPFLAGS) -DHAVE_QT
+ show_text_lines_CXXFLAGS = $(AM_CXXFLAGS) \
+ $(QT_CXXFLAGS)
+ show_text_lines_LDFLAGS = $(AM_LDFLAGS) \
+ $(MAGICKXX_LDFLAGS) \
+ $(QT_LDFLAGS)
+ show_text_lines_LDADD = $(LDADD) \
+ $(QT_LIBS)
+
+
+ noinst_PROGRAMS += show_stoppers
+ show_stoppers_SOURCES = show_stoppers.cc
+ show_stoppers_CPPFLAGS = $(AM_CPPFLAGS) \
+ $(MAGICKXX_CPPFLAGS) \
+ $(QT_CPPFLAGS)
+ show_stoppers_CXXFLAGS = $(AM_CXXFLAGS) \
+ $(QT_CXXFLAGS)
+ show_stoppers_LDFLAGS = $(AM_LDFLAGS) \
+ $(MAGICKXX_LDFLAGS) \
+ $(QT_LDFLAGS)
+ show_stoppers_LDADD = $(LDADD) \
+ $(QT_LIBS)
+
+
+endif HAVE_QT
+
+
noinst_PROGRAMS += highlight_text_area
highlight_text_area_SOURCES = highlight_text_area.cc
highlight_text_area_CPPFLAGS = $(AM_CPPFLAGS) \
- `Magick++-config --cppflags`
+ $(MAGICKXX_CPPFLAGS)
highlight_text_area_LDFLAGS = $(AM_LDFLAGS) \
- -lpthread `Magick++-config --libs`
+ $(MAGICKXX_LDFLAGS)
endif HAVE_MAGICKXX
diff --git a/scribo/src/debug/non_text_mask.cc b/scribo/src/debug/non_text_mask.cc
new file mode 100644
index 0000000..6fce945
--- /dev/null
+++ b/scribo/src/debug/non_text_mask.cc
@@ -0,0 +1,26 @@
+
+
+int main(int argc, char *argv[])
+{
+ // Link text lines
+ on_new_progress_label("Linking text lines");
+ line_links<L> llinks = scribo::text::link_lines(lines);
+
+ // Filter line links.
+ on_new_progress_label("Filter line links");
+ llinks = scribo::filter::line_links_x_height(llinks);
+
+ // Construct paragraphs
+ on_new_progress_label("Constructing paragraphs");
+ scribo::paragraph_set<L> parset = scribo::make::paragraph(llinks);
+ doc.set_paragraphs(parset);
+
+ on_progress();
+
+
+ // Extract other Elements
+ on_new_progress_label("Extracting Elements");
+ component_set<L>
+ elements = scribo::primitive::extract::non_text_fast(doc);
+
+}
diff --git a/scribo/src/debug/show_components_bboxes.cc b/scribo/src/debug/show_components_bboxes.cc
new file mode 100644
index 0000000..7eab4ba
--- /dev/null
+++ b/scribo/src/debug/show_components_bboxes.cc
@@ -0,0 +1,59 @@
+// Copyright (C) 2010, 2011 EPITA Research and Development Laboratory
+// (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+
+#include <mln/core/image/image2d.hh>
+#include <mln/core/alias/neighb2d.hh>
+#include <mln/io/pbm/all.hh>
+#include <mln/value/int_u.hh>
+#include <mln/literal/colors.hh>
+#include <mln/draw/box.hh>
+
+#include <scribo/core/component_set.hh>
+#include <scribo/primitive/extract/components.hh>
+
+int main(int argc, char *argv[])
+{
+ using namespace mln;
+ using namespace scribo;
+
+ if (argc != 3)
+ {
+ std::cerr << "Usage : " << argv[0] << " input.pbm out.pbm" << std::endl;
+ return 1;
+ }
+
+ typedef mln::value::int_u<30> V;
+ typedef image2d<V> L;
+
+ image2d<bool> input;
+ io::pbm::load(input, argv[1]);
+
+ V ncomponents;
+ component_set<L>
+ components = scribo::primitive::extract::components(input, c8(),
+ ncomponents);
+
+ image2d<bool> output;
+ initialize(output, input);
+ data::fill(output, false);
+
+ for_all_comps(c, components)
+ if (components(c).is_valid())
+ mln::draw::box(output, components(c).bbox(), true);
+
+ io::pbm::save(output, argv[2]);
+}
diff --git a/scribo/src/debug/show_groups_bboxes.cc b/scribo/src/debug/show_groups_bboxes.cc
new file mode 100644
index 0000000..eff0eb7
--- /dev/null
+++ b/scribo/src/debug/show_groups_bboxes.cc
@@ -0,0 +1,91 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+
+#include <mln/core/image/image2d.hh>
+#include <mln/core/alias/neighb2d.hh>
+#include <mln/io/pbm/all.hh>
+#include <mln/value/int_u.hh>
+#include <mln/literal/colors.hh>
+#include <mln/draw/box.hh>
+
+#include <scribo/core/component_set.hh>
+#include <scribo/core/line_set.hh>
+#include <scribo/primitive/extract/components.hh>
+#include <scribo/primitive/group/from_single_link.hh>
+#include <scribo/primitive/link/with_single_left_link_dmax_ratio.hh>
+#include <scribo/primitive/link/with_single_right_link_dmax_ratio.hh>
+#include <scribo/primitive/link/merge_double_link.hh>
+#include <scribo/primitive/link/internal/dmax_width_and_height.hh>
+
+int main(int argc, char *argv[])
+{
+ using namespace mln;
+ using namespace scribo;
+ using namespace scribo::primitive;
+
+ if (argc != 3)
+ {
+ std::cerr << "Usage : " << argv[0] << " input.* out.pbm" << std::endl;
+ return 1;
+ }
+
+ typedef mln::value::int_u<30> V;
+ typedef image2d<V> L;
+
+ image2d<bool> input;
+ io::pbm::load(input, argv[1]);
+
+ V ncomponents;
+ component_set<L>
+ components = scribo::primitive::extract::components(input, c8(),
+ ncomponents);
+
+
+ object_links<L>
+ left_link = link::with_single_left_link_dmax_ratio(
+ components,
+// primitive::link::internal::dmax_width_and_height(1),
+ link::internal::dmax_default(1),
+ anchor::MassCenter);
+
+ object_links<L>
+ right_link = primitive::link::with_single_right_link_dmax_ratio(
+ components,
+// primitive::link::internal::dmax_width_and_height(1),
+ primitive::link::internal::dmax_default(1),
+ anchor::MassCenter);
+
+ // Validating left and right links.
+ object_links<L>
+ merged_links = primitive::link::merge_double_link(left_link,
+ right_link);
+
+
+ object_groups<L>
+ groups = group::from_single_link(merged_links);
+
+ line_set<L> lines(groups);
+
+ image2d<bool> output;
+ initialize(output, input);
+ data::fill(output, false);
+
+ for_all_lines(l, lines)
+ if (lines(l).is_valid())
+ mln::draw::box(output, lines(l).bbox(), true);
+
+ io::pbm::save(output, argv[2]);
+}
diff --git a/scribo/src/debug/show_links_bottom_aligned.cc b/scribo/src/debug/show_links_bottom_aligned_2angles.cc
similarity index 60%
copy from scribo/src/debug/show_links_bottom_aligned.cc
copy to scribo/src/debug/show_links_bottom_aligned_2angles.cc
index 634551b..4b0e765 100644
--- a/scribo/src/debug/show_links_bottom_aligned.cc
+++ b/scribo/src/debug/show_links_bottom_aligned_2angles.cc
@@ -1,5 +1,4 @@
-// Copyright (C) 2009, 2011 EPITA Research and Development Laboratory
-// (LRDE)
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
//
// This file is part of Olena.
//
@@ -38,22 +37,22 @@
#include <mln/io/pbm/load.hh>
#include <mln/io/ppm/save.hh>
-#include <scribo/core/component_set.hh>
-
+#include <scribo/core/def/lbl_type.hh>
+#include <scribo/debug/logger.hh>
#include <scribo/primitive/extract/components.hh>
-#include <scribo/primitive/link/with_single_right_link.hh>
#include <scribo/filter/object_links_aligned.hh>
+#include <scribo/primitive/link/with_single_right_link_dmax_ratio_aligned.hh>
-#include <scribo/debug/alignment_decision_image.hh>
#include <scribo/debug/usage.hh>
const char *args_desc[][2] =
{
- { "input.pbm", "A binary image. True for objects and False for the background." },
- { "max_dist", "Maximum distance lookup (common value 45)" },
- { "max_alpha", "Max angle between two object bottoms. (common value : 5)" },
+ { "input.pbm", "A binary image" },
+ { "dmax_ratio", "Maximum distance lookup (common value 5)" },
+ { "min_alpha", "First angle used for close objects. (common value : 3)" },
+ { "max_alpha", "Second angle used for further objects. (common value : 5)" },
{0, 0}
};
@@ -61,42 +60,41 @@ const char *args_desc[][2] =
int main(int argc, char* argv[])
{
using namespace scribo;
- using namespace scribo::primitive::internal;
+ using namespace scribo::primitive;
using namespace mln;
- if (argc != 5)
+ if (argc != 6)
return scribo::debug::usage(argv,
- "Show valid or invalid links according the"
+ "Show valid or invalid links according the "
"horizontal alignment (based on bottom line).",
- "input.pbm max_dist max_alpha output.ppm",
+ "input.pbm dmax_ratio min_angle max_angle "
+ " output.ppm",
args_desc);
image2d<bool> input;
io::pbm::load(input, argv[1]);
// Finding components.
- value::label_16 nbboxes;
- typedef image2d<value::label_16> L;
+ typedef scribo::def::lbl_type V;
+ V nbboxes;
+ typedef image2d<V> L;
component_set<L> components
- = scribo::primitive::extract::components(input, c8(), nbboxes);
-
+ = extract::components(input, c8(), nbboxes);
// Finding right links.
- object_links<L> right_links
- = primitive::link::with_single_right_link(components, atoi(argv[2]));
+ // object_links<L>
+ // right_links = primitive::link::with_single_right_link_dmax_ratio_aligned(
+ // components, atof(argv[2]), atof(argv[3]), atof(argv[4]));
+
+ scribo::debug::logger().set_level(scribo::debug::All);
- // Filtering.
- object_links<L> filtered_links
- = filter::object_links_aligned(right_links, atof(argv[3]),
- anchor::StrictBottomCenter);
+ link::internal::single_right_dmax_ratio_aligned_functor<L,link::internal::dmax_default>
+ functor(components, link::internal::dmax_default(atof(argv[2])),
+ atof(argv[3]), atof(argv[4]), anchor::StrictBottomCenter);
- // Debug image.
- image2d<value::rgb8> decision_image
- = scribo::debug::alignment_decision_image(input,
- right_links,
- filtered_links,
- anchor::StrictBottomCenter);
+ object_links<L> output = link::compute(functor, anchor::Bottom);
- io::ppm::save(decision_image, argv[4]);
+ scribo::debug::logger().set_level(scribo::debug::None);
+ io::ppm::save(functor.debug_, argv[5]);
}
diff --git a/scribo/src/debug/show_links_several_right.cc b/scribo/src/debug/show_links_several_right.cc
deleted file mode 100644
index a70b2fb..0000000
--- a/scribo/src/debug/show_links_several_right.cc
+++ /dev/null
@@ -1,90 +0,0 @@
-// Copyright (C) 2009, 2010 EPITA Research and Development Laboratory
-// (LRDE)
-//
-// This file is part of Olena.
-//
-// Olena is free software: you can redistribute it and/or modify it under
-// the terms of the GNU General Public License as published by the Free
-// Software Foundation, version 2 of the License.
-//
-// Olena is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-// General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with Olena. If not, see <http://www.gnu.org/licenses/>.
-//
-// As a special exception, you may use this file as part of a free
-// software project without restriction. Specifically, if other files
-// instantiate templates or use macros or inline functions from this
-// file, or you compile this file and link it with other files to produce
-// an executable, this file does not by itself cause the resulting
-// executable to be covered by the GNU General Public License. This
-// exception does not however invalidate any other reasons why the
-// executable file might be covered by the GNU General Public License.
-
-#include <iostream>
-
-#include <mln/core/image/image2d.hh>
-#include <mln/core/alias/neighb2d.hh>
-
-#include <mln/value/rgb8.hh>
-#include <mln/value/label_16.hh>
-#include <mln/literal/colors.hh>
-
-#include <mln/io/pbm/load.hh>
-#include <mln/io/ppm/save.hh>
-
-#include <scribo/primitive/extract/components.hh>
-#include <scribo/primitive/link/with_several_right_links.hh>
-
-#include <scribo/draw/bounding_boxes.hh>
-
-#include <scribo/debug/several_links_decision_image.hh>
-#include <scribo/debug/usage.hh>
-
-
-
-const char *args_desc[][2] =
-{
- { "input.pbm", "A binary image. True for objects and False for the "
- "background." },
- { "max_nbh_dist", " Maximum distance for neighborhood search."
- "(common value : 30)" },
- {0, 0}
-};
-
-
-int main(int argc, char* argv[])
-{
- using namespace scribo;
- using namespace scribo::primitive::internal;
- using namespace mln;
-
- if (argc != 4)
- return scribo::debug::usage(argv,
- "Show sucessful/unsuccessful right links between components.",
- "input.pbm max_nbh_dist output.ppm",
- args_desc);
-
- image2d<bool> input;
- io::pbm::load(input, argv[1]);
-
- // Finding objects.
- value::label_16 nbboxes;
- typedef image2d<value::label_16> L;
- component_set<L> comps
- = scribo::primitive::extract::components(input, c8(), nbboxes);
-
- // Finding right links.
- object_links<L> right_link
- = primitive::link::with_several_right_links(comps, atoi(argv[2]));
-
- image2d<value::rgb8> decision_image
- = scribo::debug::several_links_decision_image(input,
- right_link,
- right_link);
-
- io::ppm::save(decision_image, argv[3]);
-}
diff --git a/scribo/src/debug/show_links_top_aligned.cc b/scribo/src/debug/show_links_top_aligned_2angles.cc
similarity index 60%
copy from scribo/src/debug/show_links_top_aligned.cc
copy to scribo/src/debug/show_links_top_aligned_2angles.cc
index 5ffcb70..48f3a13 100644
--- a/scribo/src/debug/show_links_top_aligned.cc
+++ b/scribo/src/debug/show_links_top_aligned_2angles.cc
@@ -1,5 +1,4 @@
-// Copyright (C) 2009, 2010, 2011 EPITA Research and Development
-// Laboratory (LRDE)
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
//
// This file is part of Olena.
//
@@ -38,22 +37,22 @@
#include <mln/io/pbm/load.hh>
#include <mln/io/ppm/save.hh>
+#include <scribo/core/def/lbl_type.hh>
+#include <scribo/debug/logger.hh>
#include <scribo/primitive/extract/components.hh>
-#include <scribo/primitive/link/with_single_right_link.hh>
#include <scribo/filter/object_links_aligned.hh>
+#include <scribo/primitive/link/with_single_right_link_dmax_ratio_aligned.hh>
-#include <scribo/debug/alignment_decision_image.hh>
-#include <scribo/debug/links_image.hh>
#include <scribo/debug/usage.hh>
const char *args_desc[][2] =
{
- { "input.pbm", "A binary image. True for objects and False for the "
- "background." },
- { "max_dist", "Maximum distance lookup (common value 45)" },
- { "max_alpha", "Max angle between two object tops. (common value : 5)" },
+ { "input.pbm", "A binary image" },
+ { "dmax_ratio", "Maximum distance lookup (common value 5)" },
+ { "min_alpha", "First angle used for close objects. (common value : 3)" },
+ { "max_alpha", "Second angle used for further objects. (common value : 5)" },
{0, 0}
};
@@ -61,41 +60,41 @@ const char *args_desc[][2] =
int main(int argc, char* argv[])
{
using namespace scribo;
- using namespace scribo::primitive::internal;
+ using namespace scribo::primitive;
using namespace mln;
- if (argc != 5)
+ if (argc != 6)
return scribo::debug::usage(argv,
"Show valid or invalid links according the "
"horizontal alignment (based on top line).",
- "input.pbm max_dist max_alpha output.ppm",
+ "input.pbm dmax_ratio min_angle max_angle "
+ " output.ppm",
args_desc);
image2d<bool> input;
io::pbm::load(input, argv[1]);
// Finding components.
- value::label_16 nbboxes;
- typedef image2d<value::label_16> L;
+ typedef scribo::def::lbl_type V;
+ V nbboxes;
+ typedef image2d<V> L;
component_set<L> components
- = scribo::primitive::extract::components(input, c8(), nbboxes);
-
+ = extract::components(input, c8(), nbboxes);
// Finding right links.
- object_links<L> right_links
- = primitive::link::with_single_right_link(components, atoi(argv[2]));
-
- // Filtering.
- object_links<L> filtered_links
- = filter::object_links_aligned(right_links, atof(argv[3]),
- anchor::StrictTopCenter);
-
- // Debug image.
- image2d<value::rgb8> decision_image
- = scribo::debug::alignment_decision_image(input,
- right_links,
- filtered_links,
- anchor::StrictTopCenter);
- io::ppm::save(decision_image, argv[4]);
+ // object_links<L>
+ // right_links = primitive::link::with_single_right_link_dmax_ratio_aligned(
+ // components, atof(argv[2]), atof(argv[3]), atof(argv[4]));
+
+ scribo::debug::logger().set_level(scribo::debug::All);
+
+ link::internal::single_right_dmax_ratio_aligned_functor<L,link::internal::dmax_default>
+ functor(components, link::internal::dmax_default(atof(argv[2])),
+ atof(argv[3]), atof(argv[4]), anchor::StrictTopCenter);
+
+ object_links<L> output = link::compute(functor, anchor::Top);
+
+ scribo::debug::logger().set_level(scribo::debug::None);
+ io::ppm::save(functor.debug_, argv[5]);
}
diff --git a/scribo/src/debug/show_paragraph_blocks.cc b/scribo/src/debug/show_paragraph_blocks.cc
new file mode 100644
index 0000000..b16a751
--- /dev/null
+++ b/scribo/src/debug/show_paragraph_blocks.cc
@@ -0,0 +1,185 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#include <iostream>
+
+#include <mln/core/image/image2d.hh>
+#include <mln/io/pbm/save.hh>
+#include <mln/io/pgm/save.hh>
+#include <mln/draw/box_plain.hh>
+
+#include <mln/debug/filename.hh>
+
+#include <mln/util/timer.hh>
+
+#include <scribo/core/def/lbl_type.hh>
+#include <scribo/debug/usage.hh>
+
+#include <scribo/core/component_set.hh>
+#include <scribo/core/paragraph_set.hh>
+#include <scribo/core/line_info.hh>
+
+#include <scribo/text/link_lines.hh>
+#include <scribo/filter/line_links_x_height.hh>
+
+#include <scribo/io/xml/load.hh>
+
+// int i = 0;
+
+const char *args_desc[][2] =
+{
+ {0, 0}
+};
+
+
+
+int main(int argc, char* argv[])
+{
+ using namespace scribo;
+ using namespace mln;
+
+ if (argc != 3)
+ return scribo::debug::usage(argv,
+ "Show paragraph blocks",
+ "lines.xml out_blocks.pbm",
+ args_desc);
+
+ trace::entering("main");
+
+ typedef image2d<scribo::def::lbl_type> L;
+ document<L> doc;
+ scribo::io::xml::load(doc, argv[1]);
+
+ if (! doc.has_text())
+ {
+ std::cout << "ERROR: this XML file does not contain any text information!"
+ << std::endl;
+ return 1;
+ }
+
+
+ // Link text lines
+ line_links<L> llinks = scribo::text::link_lines(doc.lines());
+ llinks = scribo::filter::line_links_x_height(llinks);
+
+ scribo::paragraph_set<L> parset = scribo::make::paragraph(llinks);
+ doc.set_paragraphs(parset);
+
+ util::timer t;
+ t.start();
+
+ image2d<bool> blocks;
+ initialize(blocks, doc.lines().components().labeled_image());
+ data::fill(blocks, false);
+
+ // image2d<value::int_u8> log;
+ // initialize(log, blocks);
+ // data::fill(log, 0);
+
+ for_all_paragraphs(p, parset)
+ if (parset(p).nlines() >= 3)
+ {
+ box2d last_tbox, last_box;
+
+ // For each line in this paragraph.
+ for_all_elements(l, parset(p).line_ids())
+ {
+ const line_info<L>& line = parset.lines()(parset(p).line_ids()(l));
+
+ if (last_box.is_valid())
+ if (last_box.pmax().row() < line.bbox().pmin().row())
+ {
+ last_tbox = last_box;
+
+ point2d
+ pmin(std::min(last_tbox.pmax().row(), line.bbox().pmin().row()),
+ std::max(last_tbox.pmin().col(), line.bbox().pmin().col())),
+ pmax(std::max(last_tbox.pmax().row(), line.bbox().pmin().row()),
+ std::min(last_tbox.pmax().col(), line.bbox().pmax().col()));
+
+ // invalid case:
+ //
+ // =======
+ // ======
+
+ if (pmax.col() > pmin.col())
+ {
+ box2d new_box(pmin, pmax);
+ mln::draw::box_plain(blocks, new_box, true);
+ // mln::draw::box_plain(log, new_box, 1);
+
+ // mln::io::pgm::save(log, mln::debug::filename("log.pgm", i++));
+ }
+ }
+ else // Handle the case when there are several text boxes on the same line.
+ {
+ if (last_tbox.is_valid() && last_tbox.pmax().row() < line.bbox().pmin().row())
+ {
+ // Top box
+ point2d
+ pmin(std::min(last_tbox.pmax().row(), line.bbox().pmin().row()),
+ std::max(last_tbox.pmin().col(), line.bbox().pmin().col())),
+ pmax(std::max(last_tbox.pmax().row(), line.bbox().pmin().row()),
+ std::min(last_tbox.pmax().col(), line.bbox().pmax().col()));
+
+ box2d new_box(pmin, pmax);
+
+ mln::draw::box_plain(blocks, new_box, true);
+ // mln::draw::box_plain(log, new_box, 2);
+ // mln::io::pgm::save(log, mln::debug::filename("log.pgm", i++));
+ }
+
+ if (last_box.pmax().col() < line.bbox().pmax().col()
+ && last_box.pmin().col() < line.bbox().pmin().col())
+ {
+ // Left box
+ point2d
+ pmin(std::min(last_tbox.pmax().row(), line.bbox().pmin().row()),
+ std::min(last_box.pmax().col(), line.bbox().pmin().col())),
+ pmax(std::min(last_box.pmax().row(), line.bbox().pmax().row()),
+ std::max(last_box.pmax().col(), line.bbox().pmin().col()));
+
+ box2d new_box(pmin, pmax);
+ mln::draw::box_plain(blocks, new_box, true);
+ // mln::draw::box_plain(log, new_box, 3);
+ // mln::io::pgm::save(log, mln::debug::filename("log.pgm", i++));
+ }
+ }
+
+ mln::draw::box_plain(blocks, line.bbox(), true);
+ // mln::draw::box_plain(log, line.bbox(), 255);
+ // mln::io::pgm::save(log, mln::debug::filename("log.pgm", i++));
+ last_box = line.bbox();
+ }
+ }
+
+ t.stop();
+ std::cout << t << std::endl;
+
+ mln::io::pbm::save(blocks, argv[2]);
+ // mln::io::pgm::save(log, "log.pgm");
+
+ trace::exiting("main");
+}
diff --git a/scribo/src/non_text_components.cc b/scribo/src/non_text_components.cc
new file mode 100644
index 0000000..0f4cce4
--- /dev/null
+++ b/scribo/src/non_text_components.cc
@@ -0,0 +1,128 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory
+// (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+
+#include <libgen.h>
+#include <fstream>
+#include <iostream>
+
+#include <mln/core/image/image2d.hh>
+
+#include <mln/io/pbm/save.hh>
+#include <mln/io/magick/load.hh>
+
+#include <scribo/toolchain/content_in_doc.hh>
+#include <scribo/toolchain/text_in_doc_preprocess.hh>
+
+#include <scribo/core/document.hh>
+
+#include <scribo/debug/usage.hh>
+
+#include <scribo/preprocessing/crop_without_localization.hh>
+#include <scribo/preprocessing/crop.hh>
+
+#include <scribo/io/xml/save.hh>
+#include <scribo/io/img/save.hh>
+
+
+const char *args_desc[][2] =
+{
+ { "input.*", "An image." },
+ { "non_text_comps.pbm", "Non text components mask." },
+ { "enable_debug", "Enable debug image output. Set to 1 or 0." },
+ { "enable_tabstops", "Enable tabstops detection. Set to 1 or 0." },
+ {0, 0}
+};
+
+
+
+int main(int argc, char* argv[])
+{
+ using namespace scribo;
+ using namespace mln;
+
+ if (argc != 4 && argc != 3 && argc != 5)
+ return scribo::debug::usage(argv,
+ "Extract non text components mask/",
+ "input.* non_text_comps.pbm [enable_debug] [enable_tabstops]",
+ args_desc);
+
+ std::string out_img = basename(argv[1]);
+ out_img.erase(out_img.size() - 4);
+
+ std::string filename_prefix = out_img + "_debug";
+ scribo::debug::logger().set_filename_prefix(filename_prefix.c_str());
+ if (argc > 3 && atoi(argv[3]))
+ scribo::debug::logger().set_level(scribo::debug::Special);
+ else
+ scribo::debug::logger().set_level(scribo::debug::None);
+
+ trace::entering("main");
+
+ Magick::InitializeMagick(*argv);
+
+ typedef image2d<scribo::def::lbl_type> L;
+ image2d<value::rgb8> input;
+ mln::io::magick::load(input, argv[1]);
+
+ util::timer t;
+ t.start();
+
+ // Preprocess document
+ image2d<bool>
+ input_preproc = toolchain::text_in_doc_preprocess(input, false, 0.34);
+
+
+ bool denoise = true;
+ std::string language = "";
+ bool find_line_seps = true;
+ bool find_whitespace_seps = (argc > 4 && atoi(argv[4]));
+
+ std::cout << "Running with the following options :"
+ << " ocr_language = " << language
+ << " | find_lines_seps = " << find_line_seps
+ << " | find_whitespace_seps = " << find_whitespace_seps
+ << " | debug = " << scribo::debug::logger().is_enabled()
+ << std::endl;
+
+ // Run document toolchain.
+
+ // Text
+ std::cout << "Analysing document..." << std::endl;
+ document<L>
+ doc = scribo::toolchain::content_in_doc(input, input_preproc, denoise,
+ find_line_seps, find_whitespace_seps,
+ !language.empty(), language);
+ t.stop();
+ std::cout << t << std::endl;
+
+ mln::io::pbm::save(data::convert(bool(), doc.elements().labeled_image()), argv[2]);
+
+ scribo::io::img::save(doc, out_img + "_debug_wo_image.png", scribo::io::img::DebugWoImage);
+ scribo::io::img::save(doc, out_img + "_debug_with_image.png", scribo::io::img::DebugWithImage);
+
+ trace::exiting("main");
+}
--
1.5.6.5
1
0
* src/Makefile.am,
* src/debug/Makefile.am: Update targets.
* src/debug/non_text_mask.cc,
* src/debug/show_components_bboxes.cc,
* src/debug/show_groups_bboxes.cc,
* src/debug/show_links_bottom_aligned_2angles.cc,
* src/debug/show_links_top_aligned_2angles.cc,
* src/debug/show_paragraph_blocks.cc,
* src/non_text_components.cc: New.
* src/debug/show_links_several_right.cc: Removed. Deprecated.
---
scribo/ChangeLog | 17 ++
scribo/src/Makefile.am | 32 ++++-
scribo/src/debug/Makefile.am | 66 ++++++-
scribo/src/debug/non_text_mask.cc | 26 +++
scribo/src/debug/show_components_bboxes.cc | 59 ++++++
scribo/src/debug/show_groups_bboxes.cc | 91 ++++++++++
...ned.cc => show_links_bottom_aligned_2angles.cc} | 58 +++---
scribo/src/debug/show_links_several_right.cc | 90 ----------
...ligned.cc => show_links_top_aligned_2angles.cc} | 61 +++----
scribo/src/debug/show_paragraph_blocks.cc | 185 ++++++++++++++++++++
scribo/src/non_text_components.cc | 128 ++++++++++++++
11 files changed, 652 insertions(+), 161 deletions(-)
create mode 100644 scribo/src/debug/non_text_mask.cc
create mode 100644 scribo/src/debug/show_components_bboxes.cc
create mode 100644 scribo/src/debug/show_groups_bboxes.cc
copy scribo/src/debug/{show_links_bottom_aligned.cc => show_links_bottom_aligned_2angles.cc} (60%)
delete mode 100644 scribo/src/debug/show_links_several_right.cc
copy scribo/src/debug/{show_links_top_aligned.cc => show_links_top_aligned_2angles.cc} (60%)
create mode 100644 scribo/src/debug/show_paragraph_blocks.cc
create mode 100644 scribo/src/non_text_components.cc
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index a3de9ea..23ecde3 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,5 +1,22 @@
2011-03-14 Guillaume Lazzara <z(a)lrde.epita.fr>
+ New debug tools.
+
+ * src/Makefile.am,
+ * src/debug/Makefile.am: Update targets.
+
+ * src/debug/non_text_mask.cc,
+ * src/debug/show_components_bboxes.cc,
+ * src/debug/show_groups_bboxes.cc,
+ * src/debug/show_links_bottom_aligned_2angles.cc,
+ * src/debug/show_links_top_aligned_2angles.cc,
+ * src/debug/show_paragraph_blocks.cc,
+ * src/non_text_components.cc: New.
+
+ * src/debug/show_links_several_right.cc: Removed. Deprecated.
+
+2011-03-14 Guillaume Lazzara <z(a)lrde.epita.fr>
+
New non-text components extraction routine.
* scribo/make/text_blocks_image.hh,
diff --git a/scribo/src/Makefile.am b/scribo/src/Makefile.am
index d6275fd..3a35528 100644
--- a/scribo/src/Makefile.am
+++ b/scribo/src/Makefile.am
@@ -96,18 +96,48 @@ if HAVE_TESSERACT
$(TIFF_LDFLAGS) \
$(MAGICKXX_LDFLAGS)
+if HAVE_QT
utilexec_PROGRAMS += content_in_doc
content_in_doc_SOURCES = content_in_doc.cc
content_in_doc_CPPFLAGS = $(AM_CPPFLAGS) \
$(TESSERACT_CPPFLAGS) \
$(TIFF_CPPFLAGS) \
- $(MAGICKXX_CPPFLAGS)
+ $(MAGICKXX_CPPFLAGS) \
+ $(QT_CPPFLAGS)
+ content_in_doc_CXXFLAGS = $(AM_CXXFLAGS) \
+ $(QT_CXXFLAGS)
content_in_doc_LDFLAGS = $(AM_LDFLAGS) \
$(TESSERACT_LDFLAGS) \
$(TIFF_LDFLAGS) \
$(MAGICKXX_LDFLAGS) \
+ $(QT_LDFLAGS) \
-lpthread
+ content_in_doc_LDADD = $(LDADD) \
+ $(QT_LIBS)
+
+
+ utilexec_PROGRAMS += non_text_components
+ non_text_components_SOURCES = non_text_components.cc
+ non_text_components_CPPFLAGS = $(AM_CPPFLAGS) \
+ $(TESSERACT_CPPFLAGS) \
+ $(TIFF_CPPFLAGS) \
+ $(MAGICKXX_CPPFLAGS) \
+ $(QT_CPPFLAGS)
+ non_text_components_CXXFLAGS = $(AM_CXXFLAGS) \
+ $(QT_CXXFLAGS)
+ non_text_components_LDFLAGS = $(AM_LDFLAGS) \
+ $(TESSERACT_LDFLAGS) \
+ $(TIFF_LDFLAGS) \
+ $(MAGICKXX_LDFLAGS) \
+ $(QT_LDFLAGS) \
+ -lpthread
+ non_text_components_LDADD = $(LDADD) \
+ $(QT_LIBS)
+
+
+endif HAVE_QT
+
endif HAVE_TESSERACT
diff --git a/scribo/src/debug/Makefile.am b/scribo/src/debug/Makefile.am
index cdb1f30..60d7afa 100644
--- a/scribo/src/debug/Makefile.am
+++ b/scribo/src/debug/Makefile.am
@@ -18,6 +18,8 @@
include $(top_srcdir)/scribo/scribo.mk
noinst_PROGRAMS = \
+ show_components_bboxes \
+ show_groups_bboxes \
show_info_x_height \
show_info_median_inter_characters \
show_separators \
@@ -25,9 +27,9 @@ noinst_PROGRAMS = \
show_links_bbox_h_ratio \
show_links_bbox_overlap \
show_links_bottom_aligned \
+ show_links_bottom_aligned_2angles \
show_links_center_aligned \
show_links_non_h_aligned \
- show_links_several_right \
show_links_several_right_overlap \
show_links_single_down \
show_links_single_down_left_aligned \
@@ -40,15 +42,16 @@ noinst_PROGRAMS = \
show_links_single_up_left_aligned \
show_links_single_up_right_aligned \
show_links_top_aligned \
+ show_links_top_aligned_2angles \
show_objects_large \
show_objects_large_small \
show_objects_small \
show_objects_thick \
- show_objects_thin \
- show_stoppers \
- show_text_lines
+ show_objects_thin
+show_components_bboxes_SOURCES = show_components_bboxes.cc
+show_groups_bboxes_SOURCES = show_groups_bboxes.cc
show_info_x_height_SOURCES = show_info_x_height.cc
show_info_median_inter_characters_SOURCES = show_info_median_inter_characters.cc
show_separators_SOURCES = show_separators.cc
@@ -56,9 +59,9 @@ show_links_left_right_links_validation_SOURCES = show_links_left_right_links_val
show_links_bbox_h_ratio_SOURCES = show_links_bbox_h_ratio.cc
show_links_bbox_overlap_SOURCES = show_links_bbox_overlap.cc
show_links_bottom_aligned_SOURCES = show_links_bottom_aligned.cc
+show_links_bottom_aligned_2angles_SOURCES = show_links_bottom_aligned_2angles.cc
show_links_center_aligned_SOURCES = show_links_center_aligned.cc
show_links_non_h_aligned_SOURCES = show_links_non_h_aligned.cc
-show_links_several_right_SOURCES = show_links_several_right.cc
show_links_several_right_overlap_SOURCES = show_links_several_right_overlap.cc
show_links_single_down_SOURCES = show_links_single_down.cc
show_links_single_down_left_aligned_SOURCES = show_links_single_down_left_aligned.cc
@@ -71,21 +74,66 @@ show_links_single_up_SOURCES = show_links_single_up.cc
show_links_single_up_left_aligned_SOURCES = show_links_single_up_left_aligned.cc
show_links_single_up_right_aligned_SOURCES = show_links_single_up_right_aligned.cc
show_links_top_aligned_SOURCES = show_links_top_aligned.cc
+show_links_top_aligned_2angles_SOURCES = show_links_top_aligned_2angles.cc
show_objects_large_SOURCES = show_objects_large.cc
show_objects_large_small_SOURCES = show_objects_large_small.cc
show_objects_small_SOURCES = show_objects_small.cc
show_objects_thick_SOURCES = show_objects_thick.cc
show_objects_thin_SOURCES = show_objects_thin.cc
-show_stoppers_SOURCES = show_stoppers.cc
-show_text_lines_SOURCES = show_text_lines.cc
if HAVE_MAGICKXX
+if HAVE_QT
+
+ noinst_PROGRAMS += show_paragraph_blocks
+ show_paragraph_blocks_SOURCES = show_paragraph_blocks.cc
+ show_paragraph_blocks_CPPFLAGS = $(AM_CPPFLAGS) \
+ $(QT_CPPFLAGS) \
+ $(MAGICKXX_CPPFLAGS)
+ show_paragraph_blocks_CXXFLAGS = $(AM_CXXFLAGS) \
+ $(QT_CXXFLAGS)
+ show_paragraph_blocks_LDFLAGS = $(AM_LDFLAGS) \
+ $(QT_LDFLAGS) \
+ $(MAGICKXX_LDFLAGS)
+ show_paragraph_blocks_LDADD = $(LDADD) \
+ $(QT_LIBS)
+
+ noinst_PROGRAMS += show_text_lines
+ show_text_lines_SOURCES = show_text_lines.cc
+ show_text_lines_CPPFLAGS = $(AM_CPPFLAGS) \
+ $(MAGICKXX_CPPFLAGS) \
+ $(QT_CPPFLAGS) -DHAVE_QT
+ show_text_lines_CXXFLAGS = $(AM_CXXFLAGS) \
+ $(QT_CXXFLAGS)
+ show_text_lines_LDFLAGS = $(AM_LDFLAGS) \
+ $(MAGICKXX_LDFLAGS) \
+ $(QT_LDFLAGS)
+ show_text_lines_LDADD = $(LDADD) \
+ $(QT_LIBS)
+
+
+ noinst_PROGRAMS += show_stoppers
+ show_stoppers_SOURCES = show_stoppers.cc
+ show_stoppers_CPPFLAGS = $(AM_CPPFLAGS) \
+ $(MAGICKXX_CPPFLAGS) \
+ $(QT_CPPFLAGS)
+ show_stoppers_CXXFLAGS = $(AM_CXXFLAGS) \
+ $(QT_CXXFLAGS)
+ show_stoppers_LDFLAGS = $(AM_LDFLAGS) \
+ $(MAGICKXX_LDFLAGS) \
+ $(QT_LDFLAGS)
+ show_stoppers_LDADD = $(LDADD) \
+ $(QT_LIBS)
+
+
+endif HAVE_QT
+
+
noinst_PROGRAMS += highlight_text_area
highlight_text_area_SOURCES = highlight_text_area.cc
highlight_text_area_CPPFLAGS = $(AM_CPPFLAGS) \
- `Magick++-config --cppflags`
+ $(MAGICKXX_CPPFLAGS)
highlight_text_area_LDFLAGS = $(AM_LDFLAGS) \
- -lpthread `Magick++-config --libs`
+ $(MAGICKXX_LDFLAGS)
endif HAVE_MAGICKXX
diff --git a/scribo/src/debug/non_text_mask.cc b/scribo/src/debug/non_text_mask.cc
new file mode 100644
index 0000000..6fce945
--- /dev/null
+++ b/scribo/src/debug/non_text_mask.cc
@@ -0,0 +1,26 @@
+
+
+int main(int argc, char *argv[])
+{
+ // Link text lines
+ on_new_progress_label("Linking text lines");
+ line_links<L> llinks = scribo::text::link_lines(lines);
+
+ // Filter line links.
+ on_new_progress_label("Filter line links");
+ llinks = scribo::filter::line_links_x_height(llinks);
+
+ // Construct paragraphs
+ on_new_progress_label("Constructing paragraphs");
+ scribo::paragraph_set<L> parset = scribo::make::paragraph(llinks);
+ doc.set_paragraphs(parset);
+
+ on_progress();
+
+
+ // Extract other Elements
+ on_new_progress_label("Extracting Elements");
+ component_set<L>
+ elements = scribo::primitive::extract::non_text_fast(doc);
+
+}
diff --git a/scribo/src/debug/show_components_bboxes.cc b/scribo/src/debug/show_components_bboxes.cc
new file mode 100644
index 0000000..7eab4ba
--- /dev/null
+++ b/scribo/src/debug/show_components_bboxes.cc
@@ -0,0 +1,59 @@
+// Copyright (C) 2010, 2011 EPITA Research and Development Laboratory
+// (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+
+#include <mln/core/image/image2d.hh>
+#include <mln/core/alias/neighb2d.hh>
+#include <mln/io/pbm/all.hh>
+#include <mln/value/int_u.hh>
+#include <mln/literal/colors.hh>
+#include <mln/draw/box.hh>
+
+#include <scribo/core/component_set.hh>
+#include <scribo/primitive/extract/components.hh>
+
+int main(int argc, char *argv[])
+{
+ using namespace mln;
+ using namespace scribo;
+
+ if (argc != 3)
+ {
+ std::cerr << "Usage : " << argv[0] << " input.pbm out.pbm" << std::endl;
+ return 1;
+ }
+
+ typedef mln::value::int_u<30> V;
+ typedef image2d<V> L;
+
+ image2d<bool> input;
+ io::pbm::load(input, argv[1]);
+
+ V ncomponents;
+ component_set<L>
+ components = scribo::primitive::extract::components(input, c8(),
+ ncomponents);
+
+ image2d<bool> output;
+ initialize(output, input);
+ data::fill(output, false);
+
+ for_all_comps(c, components)
+ if (components(c).is_valid())
+ mln::draw::box(output, components(c).bbox(), true);
+
+ io::pbm::save(output, argv[2]);
+}
diff --git a/scribo/src/debug/show_groups_bboxes.cc b/scribo/src/debug/show_groups_bboxes.cc
new file mode 100644
index 0000000..eff0eb7
--- /dev/null
+++ b/scribo/src/debug/show_groups_bboxes.cc
@@ -0,0 +1,91 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+
+#include <mln/core/image/image2d.hh>
+#include <mln/core/alias/neighb2d.hh>
+#include <mln/io/pbm/all.hh>
+#include <mln/value/int_u.hh>
+#include <mln/literal/colors.hh>
+#include <mln/draw/box.hh>
+
+#include <scribo/core/component_set.hh>
+#include <scribo/core/line_set.hh>
+#include <scribo/primitive/extract/components.hh>
+#include <scribo/primitive/group/from_single_link.hh>
+#include <scribo/primitive/link/with_single_left_link_dmax_ratio.hh>
+#include <scribo/primitive/link/with_single_right_link_dmax_ratio.hh>
+#include <scribo/primitive/link/merge_double_link.hh>
+#include <scribo/primitive/link/internal/dmax_width_and_height.hh>
+
+int main(int argc, char *argv[])
+{
+ using namespace mln;
+ using namespace scribo;
+ using namespace scribo::primitive;
+
+ if (argc != 3)
+ {
+ std::cerr << "Usage : " << argv[0] << " input.* out.pbm" << std::endl;
+ return 1;
+ }
+
+ typedef mln::value::int_u<30> V;
+ typedef image2d<V> L;
+
+ image2d<bool> input;
+ io::pbm::load(input, argv[1]);
+
+ V ncomponents;
+ component_set<L>
+ components = scribo::primitive::extract::components(input, c8(),
+ ncomponents);
+
+
+ object_links<L>
+ left_link = link::with_single_left_link_dmax_ratio(
+ components,
+// primitive::link::internal::dmax_width_and_height(1),
+ link::internal::dmax_default(1),
+ anchor::MassCenter);
+
+ object_links<L>
+ right_link = primitive::link::with_single_right_link_dmax_ratio(
+ components,
+// primitive::link::internal::dmax_width_and_height(1),
+ primitive::link::internal::dmax_default(1),
+ anchor::MassCenter);
+
+ // Validating left and right links.
+ object_links<L>
+ merged_links = primitive::link::merge_double_link(left_link,
+ right_link);
+
+
+ object_groups<L>
+ groups = group::from_single_link(merged_links);
+
+ line_set<L> lines(groups);
+
+ image2d<bool> output;
+ initialize(output, input);
+ data::fill(output, false);
+
+ for_all_lines(l, lines)
+ if (lines(l).is_valid())
+ mln::draw::box(output, lines(l).bbox(), true);
+
+ io::pbm::save(output, argv[2]);
+}
diff --git a/scribo/src/debug/show_links_bottom_aligned.cc b/scribo/src/debug/show_links_bottom_aligned_2angles.cc
similarity index 60%
copy from scribo/src/debug/show_links_bottom_aligned.cc
copy to scribo/src/debug/show_links_bottom_aligned_2angles.cc
index 634551b..4b0e765 100644
--- a/scribo/src/debug/show_links_bottom_aligned.cc
+++ b/scribo/src/debug/show_links_bottom_aligned_2angles.cc
@@ -1,5 +1,4 @@
-// Copyright (C) 2009, 2011 EPITA Research and Development Laboratory
-// (LRDE)
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
//
// This file is part of Olena.
//
@@ -38,22 +37,22 @@
#include <mln/io/pbm/load.hh>
#include <mln/io/ppm/save.hh>
-#include <scribo/core/component_set.hh>
-
+#include <scribo/core/def/lbl_type.hh>
+#include <scribo/debug/logger.hh>
#include <scribo/primitive/extract/components.hh>
-#include <scribo/primitive/link/with_single_right_link.hh>
#include <scribo/filter/object_links_aligned.hh>
+#include <scribo/primitive/link/with_single_right_link_dmax_ratio_aligned.hh>
-#include <scribo/debug/alignment_decision_image.hh>
#include <scribo/debug/usage.hh>
const char *args_desc[][2] =
{
- { "input.pbm", "A binary image. True for objects and False for the background." },
- { "max_dist", "Maximum distance lookup (common value 45)" },
- { "max_alpha", "Max angle between two object bottoms. (common value : 5)" },
+ { "input.pbm", "A binary image" },
+ { "dmax_ratio", "Maximum distance lookup (common value 5)" },
+ { "min_alpha", "First angle used for close objects. (common value : 3)" },
+ { "max_alpha", "Second angle used for further objects. (common value : 5)" },
{0, 0}
};
@@ -61,42 +60,41 @@ const char *args_desc[][2] =
int main(int argc, char* argv[])
{
using namespace scribo;
- using namespace scribo::primitive::internal;
+ using namespace scribo::primitive;
using namespace mln;
- if (argc != 5)
+ if (argc != 6)
return scribo::debug::usage(argv,
- "Show valid or invalid links according the"
+ "Show valid or invalid links according the "
"horizontal alignment (based on bottom line).",
- "input.pbm max_dist max_alpha output.ppm",
+ "input.pbm dmax_ratio min_angle max_angle "
+ " output.ppm",
args_desc);
image2d<bool> input;
io::pbm::load(input, argv[1]);
// Finding components.
- value::label_16 nbboxes;
- typedef image2d<value::label_16> L;
+ typedef scribo::def::lbl_type V;
+ V nbboxes;
+ typedef image2d<V> L;
component_set<L> components
- = scribo::primitive::extract::components(input, c8(), nbboxes);
-
+ = extract::components(input, c8(), nbboxes);
// Finding right links.
- object_links<L> right_links
- = primitive::link::with_single_right_link(components, atoi(argv[2]));
+ // object_links<L>
+ // right_links = primitive::link::with_single_right_link_dmax_ratio_aligned(
+ // components, atof(argv[2]), atof(argv[3]), atof(argv[4]));
+
+ scribo::debug::logger().set_level(scribo::debug::All);
- // Filtering.
- object_links<L> filtered_links
- = filter::object_links_aligned(right_links, atof(argv[3]),
- anchor::StrictBottomCenter);
+ link::internal::single_right_dmax_ratio_aligned_functor<L,link::internal::dmax_default>
+ functor(components, link::internal::dmax_default(atof(argv[2])),
+ atof(argv[3]), atof(argv[4]), anchor::StrictBottomCenter);
- // Debug image.
- image2d<value::rgb8> decision_image
- = scribo::debug::alignment_decision_image(input,
- right_links,
- filtered_links,
- anchor::StrictBottomCenter);
+ object_links<L> output = link::compute(functor, anchor::Bottom);
- io::ppm::save(decision_image, argv[4]);
+ scribo::debug::logger().set_level(scribo::debug::None);
+ io::ppm::save(functor.debug_, argv[5]);
}
diff --git a/scribo/src/debug/show_links_several_right.cc b/scribo/src/debug/show_links_several_right.cc
deleted file mode 100644
index a70b2fb..0000000
--- a/scribo/src/debug/show_links_several_right.cc
+++ /dev/null
@@ -1,90 +0,0 @@
-// Copyright (C) 2009, 2010 EPITA Research and Development Laboratory
-// (LRDE)
-//
-// This file is part of Olena.
-//
-// Olena is free software: you can redistribute it and/or modify it under
-// the terms of the GNU General Public License as published by the Free
-// Software Foundation, version 2 of the License.
-//
-// Olena is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-// General Public License for more details.
-//
-// You should have received a copy of the GNU General Public License
-// along with Olena. If not, see <http://www.gnu.org/licenses/>.
-//
-// As a special exception, you may use this file as part of a free
-// software project without restriction. Specifically, if other files
-// instantiate templates or use macros or inline functions from this
-// file, or you compile this file and link it with other files to produce
-// an executable, this file does not by itself cause the resulting
-// executable to be covered by the GNU General Public License. This
-// exception does not however invalidate any other reasons why the
-// executable file might be covered by the GNU General Public License.
-
-#include <iostream>
-
-#include <mln/core/image/image2d.hh>
-#include <mln/core/alias/neighb2d.hh>
-
-#include <mln/value/rgb8.hh>
-#include <mln/value/label_16.hh>
-#include <mln/literal/colors.hh>
-
-#include <mln/io/pbm/load.hh>
-#include <mln/io/ppm/save.hh>
-
-#include <scribo/primitive/extract/components.hh>
-#include <scribo/primitive/link/with_several_right_links.hh>
-
-#include <scribo/draw/bounding_boxes.hh>
-
-#include <scribo/debug/several_links_decision_image.hh>
-#include <scribo/debug/usage.hh>
-
-
-
-const char *args_desc[][2] =
-{
- { "input.pbm", "A binary image. True for objects and False for the "
- "background." },
- { "max_nbh_dist", " Maximum distance for neighborhood search."
- "(common value : 30)" },
- {0, 0}
-};
-
-
-int main(int argc, char* argv[])
-{
- using namespace scribo;
- using namespace scribo::primitive::internal;
- using namespace mln;
-
- if (argc != 4)
- return scribo::debug::usage(argv,
- "Show sucessful/unsuccessful right links between components.",
- "input.pbm max_nbh_dist output.ppm",
- args_desc);
-
- image2d<bool> input;
- io::pbm::load(input, argv[1]);
-
- // Finding objects.
- value::label_16 nbboxes;
- typedef image2d<value::label_16> L;
- component_set<L> comps
- = scribo::primitive::extract::components(input, c8(), nbboxes);
-
- // Finding right links.
- object_links<L> right_link
- = primitive::link::with_several_right_links(comps, atoi(argv[2]));
-
- image2d<value::rgb8> decision_image
- = scribo::debug::several_links_decision_image(input,
- right_link,
- right_link);
-
- io::ppm::save(decision_image, argv[3]);
-}
diff --git a/scribo/src/debug/show_links_top_aligned.cc b/scribo/src/debug/show_links_top_aligned_2angles.cc
similarity index 60%
copy from scribo/src/debug/show_links_top_aligned.cc
copy to scribo/src/debug/show_links_top_aligned_2angles.cc
index 5ffcb70..48f3a13 100644
--- a/scribo/src/debug/show_links_top_aligned.cc
+++ b/scribo/src/debug/show_links_top_aligned_2angles.cc
@@ -1,5 +1,4 @@
-// Copyright (C) 2009, 2010, 2011 EPITA Research and Development
-// Laboratory (LRDE)
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
//
// This file is part of Olena.
//
@@ -38,22 +37,22 @@
#include <mln/io/pbm/load.hh>
#include <mln/io/ppm/save.hh>
+#include <scribo/core/def/lbl_type.hh>
+#include <scribo/debug/logger.hh>
#include <scribo/primitive/extract/components.hh>
-#include <scribo/primitive/link/with_single_right_link.hh>
#include <scribo/filter/object_links_aligned.hh>
+#include <scribo/primitive/link/with_single_right_link_dmax_ratio_aligned.hh>
-#include <scribo/debug/alignment_decision_image.hh>
-#include <scribo/debug/links_image.hh>
#include <scribo/debug/usage.hh>
const char *args_desc[][2] =
{
- { "input.pbm", "A binary image. True for objects and False for the "
- "background." },
- { "max_dist", "Maximum distance lookup (common value 45)" },
- { "max_alpha", "Max angle between two object tops. (common value : 5)" },
+ { "input.pbm", "A binary image" },
+ { "dmax_ratio", "Maximum distance lookup (common value 5)" },
+ { "min_alpha", "First angle used for close objects. (common value : 3)" },
+ { "max_alpha", "Second angle used for further objects. (common value : 5)" },
{0, 0}
};
@@ -61,41 +60,41 @@ const char *args_desc[][2] =
int main(int argc, char* argv[])
{
using namespace scribo;
- using namespace scribo::primitive::internal;
+ using namespace scribo::primitive;
using namespace mln;
- if (argc != 5)
+ if (argc != 6)
return scribo::debug::usage(argv,
"Show valid or invalid links according the "
"horizontal alignment (based on top line).",
- "input.pbm max_dist max_alpha output.ppm",
+ "input.pbm dmax_ratio min_angle max_angle "
+ " output.ppm",
args_desc);
image2d<bool> input;
io::pbm::load(input, argv[1]);
// Finding components.
- value::label_16 nbboxes;
- typedef image2d<value::label_16> L;
+ typedef scribo::def::lbl_type V;
+ V nbboxes;
+ typedef image2d<V> L;
component_set<L> components
- = scribo::primitive::extract::components(input, c8(), nbboxes);
-
+ = extract::components(input, c8(), nbboxes);
// Finding right links.
- object_links<L> right_links
- = primitive::link::with_single_right_link(components, atoi(argv[2]));
-
- // Filtering.
- object_links<L> filtered_links
- = filter::object_links_aligned(right_links, atof(argv[3]),
- anchor::StrictTopCenter);
-
- // Debug image.
- image2d<value::rgb8> decision_image
- = scribo::debug::alignment_decision_image(input,
- right_links,
- filtered_links,
- anchor::StrictTopCenter);
- io::ppm::save(decision_image, argv[4]);
+ // object_links<L>
+ // right_links = primitive::link::with_single_right_link_dmax_ratio_aligned(
+ // components, atof(argv[2]), atof(argv[3]), atof(argv[4]));
+
+ scribo::debug::logger().set_level(scribo::debug::All);
+
+ link::internal::single_right_dmax_ratio_aligned_functor<L,link::internal::dmax_default>
+ functor(components, link::internal::dmax_default(atof(argv[2])),
+ atof(argv[3]), atof(argv[4]), anchor::StrictTopCenter);
+
+ object_links<L> output = link::compute(functor, anchor::Top);
+
+ scribo::debug::logger().set_level(scribo::debug::None);
+ io::ppm::save(functor.debug_, argv[5]);
}
diff --git a/scribo/src/debug/show_paragraph_blocks.cc b/scribo/src/debug/show_paragraph_blocks.cc
new file mode 100644
index 0000000..b16a751
--- /dev/null
+++ b/scribo/src/debug/show_paragraph_blocks.cc
@@ -0,0 +1,185 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#include <iostream>
+
+#include <mln/core/image/image2d.hh>
+#include <mln/io/pbm/save.hh>
+#include <mln/io/pgm/save.hh>
+#include <mln/draw/box_plain.hh>
+
+#include <mln/debug/filename.hh>
+
+#include <mln/util/timer.hh>
+
+#include <scribo/core/def/lbl_type.hh>
+#include <scribo/debug/usage.hh>
+
+#include <scribo/core/component_set.hh>
+#include <scribo/core/paragraph_set.hh>
+#include <scribo/core/line_info.hh>
+
+#include <scribo/text/link_lines.hh>
+#include <scribo/filter/line_links_x_height.hh>
+
+#include <scribo/io/xml/load.hh>
+
+// int i = 0;
+
+const char *args_desc[][2] =
+{
+ {0, 0}
+};
+
+
+
+int main(int argc, char* argv[])
+{
+ using namespace scribo;
+ using namespace mln;
+
+ if (argc != 3)
+ return scribo::debug::usage(argv,
+ "Show paragraph blocks",
+ "lines.xml out_blocks.pbm",
+ args_desc);
+
+ trace::entering("main");
+
+ typedef image2d<scribo::def::lbl_type> L;
+ document<L> doc;
+ scribo::io::xml::load(doc, argv[1]);
+
+ if (! doc.has_text())
+ {
+ std::cout << "ERROR: this XML file does not contain any text information!"
+ << std::endl;
+ return 1;
+ }
+
+
+ // Link text lines
+ line_links<L> llinks = scribo::text::link_lines(doc.lines());
+ llinks = scribo::filter::line_links_x_height(llinks);
+
+ scribo::paragraph_set<L> parset = scribo::make::paragraph(llinks);
+ doc.set_paragraphs(parset);
+
+ util::timer t;
+ t.start();
+
+ image2d<bool> blocks;
+ initialize(blocks, doc.lines().components().labeled_image());
+ data::fill(blocks, false);
+
+ // image2d<value::int_u8> log;
+ // initialize(log, blocks);
+ // data::fill(log, 0);
+
+ for_all_paragraphs(p, parset)
+ if (parset(p).nlines() >= 3)
+ {
+ box2d last_tbox, last_box;
+
+ // For each line in this paragraph.
+ for_all_elements(l, parset(p).line_ids())
+ {
+ const line_info<L>& line = parset.lines()(parset(p).line_ids()(l));
+
+ if (last_box.is_valid())
+ if (last_box.pmax().row() < line.bbox().pmin().row())
+ {
+ last_tbox = last_box;
+
+ point2d
+ pmin(std::min(last_tbox.pmax().row(), line.bbox().pmin().row()),
+ std::max(last_tbox.pmin().col(), line.bbox().pmin().col())),
+ pmax(std::max(last_tbox.pmax().row(), line.bbox().pmin().row()),
+ std::min(last_tbox.pmax().col(), line.bbox().pmax().col()));
+
+ // invalid case:
+ //
+ // =======
+ // ======
+
+ if (pmax.col() > pmin.col())
+ {
+ box2d new_box(pmin, pmax);
+ mln::draw::box_plain(blocks, new_box, true);
+ // mln::draw::box_plain(log, new_box, 1);
+
+ // mln::io::pgm::save(log, mln::debug::filename("log.pgm", i++));
+ }
+ }
+ else // Handle the case when there are several text boxes on the same line.
+ {
+ if (last_tbox.is_valid() && last_tbox.pmax().row() < line.bbox().pmin().row())
+ {
+ // Top box
+ point2d
+ pmin(std::min(last_tbox.pmax().row(), line.bbox().pmin().row()),
+ std::max(last_tbox.pmin().col(), line.bbox().pmin().col())),
+ pmax(std::max(last_tbox.pmax().row(), line.bbox().pmin().row()),
+ std::min(last_tbox.pmax().col(), line.bbox().pmax().col()));
+
+ box2d new_box(pmin, pmax);
+
+ mln::draw::box_plain(blocks, new_box, true);
+ // mln::draw::box_plain(log, new_box, 2);
+ // mln::io::pgm::save(log, mln::debug::filename("log.pgm", i++));
+ }
+
+ if (last_box.pmax().col() < line.bbox().pmax().col()
+ && last_box.pmin().col() < line.bbox().pmin().col())
+ {
+ // Left box
+ point2d
+ pmin(std::min(last_tbox.pmax().row(), line.bbox().pmin().row()),
+ std::min(last_box.pmax().col(), line.bbox().pmin().col())),
+ pmax(std::min(last_box.pmax().row(), line.bbox().pmax().row()),
+ std::max(last_box.pmax().col(), line.bbox().pmin().col()));
+
+ box2d new_box(pmin, pmax);
+ mln::draw::box_plain(blocks, new_box, true);
+ // mln::draw::box_plain(log, new_box, 3);
+ // mln::io::pgm::save(log, mln::debug::filename("log.pgm", i++));
+ }
+ }
+
+ mln::draw::box_plain(blocks, line.bbox(), true);
+ // mln::draw::box_plain(log, line.bbox(), 255);
+ // mln::io::pgm::save(log, mln::debug::filename("log.pgm", i++));
+ last_box = line.bbox();
+ }
+ }
+
+ t.stop();
+ std::cout << t << std::endl;
+
+ mln::io::pbm::save(blocks, argv[2]);
+ // mln::io::pgm::save(log, "log.pgm");
+
+ trace::exiting("main");
+}
diff --git a/scribo/src/non_text_components.cc b/scribo/src/non_text_components.cc
new file mode 100644
index 0000000..0f4cce4
--- /dev/null
+++ b/scribo/src/non_text_components.cc
@@ -0,0 +1,128 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory
+// (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+
+#include <libgen.h>
+#include <fstream>
+#include <iostream>
+
+#include <mln/core/image/image2d.hh>
+
+#include <mln/io/pbm/save.hh>
+#include <mln/io/magick/load.hh>
+
+#include <scribo/toolchain/content_in_doc.hh>
+#include <scribo/toolchain/text_in_doc_preprocess.hh>
+
+#include <scribo/core/document.hh>
+
+#include <scribo/debug/usage.hh>
+
+#include <scribo/preprocessing/crop_without_localization.hh>
+#include <scribo/preprocessing/crop.hh>
+
+#include <scribo/io/xml/save.hh>
+#include <scribo/io/img/save.hh>
+
+
+const char *args_desc[][2] =
+{
+ { "input.*", "An image." },
+ { "non_text_comps.pbm", "Non text components mask." },
+ { "enable_debug", "Enable debug image output. Set to 1 or 0." },
+ { "enable_tabstops", "Enable tabstops detection. Set to 1 or 0." },
+ {0, 0}
+};
+
+
+
+int main(int argc, char* argv[])
+{
+ using namespace scribo;
+ using namespace mln;
+
+ if (argc != 4 && argc != 3 && argc != 5)
+ return scribo::debug::usage(argv,
+ "Extract non text components mask/",
+ "input.* non_text_comps.pbm [enable_debug] [enable_tabstops]",
+ args_desc);
+
+ std::string out_img = basename(argv[1]);
+ out_img.erase(out_img.size() - 4);
+
+ std::string filename_prefix = out_img + "_debug";
+ scribo::debug::logger().set_filename_prefix(filename_prefix.c_str());
+ if (argc > 3 && atoi(argv[3]))
+ scribo::debug::logger().set_level(scribo::debug::Special);
+ else
+ scribo::debug::logger().set_level(scribo::debug::None);
+
+ trace::entering("main");
+
+ Magick::InitializeMagick(*argv);
+
+ typedef image2d<scribo::def::lbl_type> L;
+ image2d<value::rgb8> input;
+ mln::io::magick::load(input, argv[1]);
+
+ util::timer t;
+ t.start();
+
+ // Preprocess document
+ image2d<bool>
+ input_preproc = toolchain::text_in_doc_preprocess(input, false, 0.34);
+
+
+ bool denoise = true;
+ std::string language = "";
+ bool find_line_seps = true;
+ bool find_whitespace_seps = (argc > 4 && atoi(argv[4]));
+
+ std::cout << "Running with the following options :"
+ << " ocr_language = " << language
+ << " | find_lines_seps = " << find_line_seps
+ << " | find_whitespace_seps = " << find_whitespace_seps
+ << " | debug = " << scribo::debug::logger().is_enabled()
+ << std::endl;
+
+ // Run document toolchain.
+
+ // Text
+ std::cout << "Analysing document..." << std::endl;
+ document<L>
+ doc = scribo::toolchain::content_in_doc(input, input_preproc, denoise,
+ find_line_seps, find_whitespace_seps,
+ !language.empty(), language);
+ t.stop();
+ std::cout << t << std::endl;
+
+ mln::io::pbm::save(data::convert(bool(), doc.elements().labeled_image()), argv[2]);
+
+ scribo::io::img::save(doc, out_img + "_debug_wo_image.png", scribo::io::img::DebugWoImage);
+ scribo::io::img::save(doc, out_img + "_debug_with_image.png", scribo::io::img::DebugWithImage);
+
+ trace::exiting("main");
+}
--
1.5.6.5
1
0

last-svn-commit-808-g3f14376 New non-text components extraction routine.
by Guillaume Lazzara 14 Mar '11
by Guillaume Lazzara 14 Mar '11
14 Mar '11
* scribo/make/text_blocks_image.hh,
* scribo/make/text_components_image.hh,
* scribo/primitive/extract/internal/union.hh: New.
* scribo/primitive/extract/non_text.hh: New implementation.
* scribo/primitive/extract/non_text_kmean.hh: New. Old
implementatino is saved here.
---
scribo/ChangeLog | 13 +
scribo/scribo/make/text_blocks_image.hh | 136 ++++++++
scribo/scribo/make/text_components_image.hh | 101 ++++++
scribo/scribo/primitive/extract/internal/union.hh | 246 +++++++++++++
scribo/scribo/primitive/extract/non_text.hh | 366 ++++++++++++++------
.../extract/{non_text.hh => non_text_kmean.hh} | 17 +-
6 files changed, 761 insertions(+), 118 deletions(-)
create mode 100644 scribo/scribo/make/text_blocks_image.hh
create mode 100644 scribo/scribo/make/text_components_image.hh
create mode 100644 scribo/scribo/primitive/extract/internal/union.hh
copy scribo/scribo/primitive/extract/{non_text.hh => non_text_kmean.hh} (91%)
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index 43041fe..32dda87 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,5 +1,18 @@
2011-03-14 Guillaume Lazzara <z(a)lrde.epita.fr>
+ New non-text components extraction routine.
+
+ * scribo/make/text_blocks_image.hh,
+ * scribo/make/text_components_image.hh,
+ * scribo/primitive/extract/internal/union.hh: New.
+
+ * scribo/primitive/extract/non_text.hh: New implementation.
+
+ * scribo/primitive/extract/non_text_kmean.hh: New. Old
+ implementatino is saved here.
+
+2011-03-14 Guillaume Lazzara <z(a)lrde.epita.fr>
+
Introduce result image output.
* scribo/fun/v2v/highlight.hh,
diff --git a/scribo/scribo/make/text_blocks_image.hh b/scribo/scribo/make/text_blocks_image.hh
new file mode 100644
index 0000000..fbc16df
--- /dev/null
+++ b/scribo/scribo/make/text_blocks_image.hh
@@ -0,0 +1,136 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef SCRIBO_MAKE_TEXT_BLOCKS_IMAGE_HH
+# define SCRIBO_MAKE_TEXT_BLOCKS_IMAGE_HH
+
+/// \file
+///
+/// \brief Create a mask of paragraph blocks.
+
+# include <mln/core/image/image2d.hh>
+# include <mln/draw/box_plain.hh>
+
+# include <scribo/core/document.hh>
+# include <scribo/core/paragraph_set.hh>
+# include <scribo/core/line_info.hh>
+
+
+namespace scribo
+{
+
+ namespace make
+ {
+ using namespace mln;
+
+
+ /// \brief Create a mask of paragraph blocks.
+ /*!
+ \pre \p doc has_text() methods MUST return True.
+ */
+ template <typename L>
+ mln_ch_value(L,bool)
+ text_blocks_image(const document<L>& doc, unsigned min_nlines);
+
+
+ # ifndef MLN_INCLUDE_ONLY
+
+ template <typename L>
+ mln_ch_value(L,bool)
+ text_blocks_image(const document<L>& doc, unsigned min_nlines)
+ {
+ trace::entering("scribo::make::text_blocks_image");
+
+ mln_precondition(doc.is_open());
+
+ image2d<bool> output;
+ initialize(output, doc.image());
+ data::fill(output, false);
+
+ // image2d<value::int_u8> log;
+ // initialize(log, output);
+ // data::fill(log, 0);
+
+ if (doc.has_text())
+ {
+ const paragraph_set<L>& parset = doc.paragraphs();
+ for_all_paragraphs(p, parset)
+ if (parset(p).nlines() >= min_nlines)
+ {
+ box2d last_box;
+
+ // For each line in this paragraph.
+ for_all_elements(l, parset(p).line_ids())
+ {
+ const line_info<L>&
+ line = parset.lines()(parset(p).line_ids()(l));
+
+ // Avoid invalid case:
+ //
+ // =======
+ // ======
+ if (last_box.is_valid()
+ && last_box.pmax().row() < line.bbox().pmin().row()
+ && last_box.pmin().col() < line.bbox().pmax().col())
+ {
+ point2d
+ pmin(std::min(last_box.pmax().row(),
+ line.bbox().pmin().row()),
+ std::max(last_box.pmin().col(),
+ line.bbox().pmin().col())),
+ pmax(std::max(last_box.pmax().row(),
+ line.bbox().pmin().row()),
+ std::min(last_box.pmax().col(),
+ line.bbox().pmax().col()));
+
+ box2d new_box(pmin, pmax);
+ mln::draw::box_plain(output, new_box, true);
+ // mln::draw::box_plain(log, new_box, 1);
+
+ // mln::io::pgm::save(log, mln::debug::filename("log.pgm", i++));
+ }
+
+ mln::draw::box_plain(output, line.bbox(), true);
+ // mln::draw::box_plain(log, line.bbox(), 255);
+ // mln::io::pgm::save(log, mln::debug::filename("log.pgm", i++));
+ last_box = line.bbox();
+ }
+ }
+ }
+
+
+ trace::exiting("scribo::make::text_blocks_image");
+ return output;
+ }
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::make
+
+} // end of namespace scribo
+
+
+
+#endif // ! SCRIBO_MAKE_TEXT_BLOCKS_IMAGE_HH
diff --git a/scribo/scribo/make/text_components_image.hh b/scribo/scribo/make/text_components_image.hh
new file mode 100644
index 0000000..e7c892e
--- /dev/null
+++ b/scribo/scribo/make/text_components_image.hh
@@ -0,0 +1,101 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef SCRIBO_MAKE_TEXT_COMPONENTS_IMAGE_HH
+# define SCRIBO_MAKE_TEXT_COMPONENTS_IMAGE_HH
+
+/// \file
+///
+/// Create a binary image with text components only.
+
+#include <mln/pw/all.hh>
+#include <mln/core/image/dmorph/image_if.hh>
+#include <mln/util/array.hh>
+#include <mln/data/fill.hh>
+
+#include <scribo/core/document.hh>
+#include <scribo/core/line_set.hh>
+
+
+namespace scribo
+{
+
+ namespace make
+ {
+ using namespace mln;
+
+
+ /// \brief Create a binary image with text components only.
+ /*!
+ \pre \p doc has_text() methods MUST return True.
+ */
+ template <typename L>
+ mln_ch_value(L,bool)
+ text_components_image(const document<L>& doc);
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+ template <typename L>
+ mln_ch_value(L,bool)
+ text_components_image(const document<L>& doc)
+ {
+ trace::entering("scribo::make::text_components_image");
+
+ mln_precondition(doc.is_open());
+
+ mln_ch_value(L,bool) output;
+ initialize(output, doc.image());
+ data::fill(output, false);
+
+ if (doc.has_text())
+ {
+ const scribo::line_set<L>& lines = doc.lines();
+ for_all_lines(l, doc.lines())
+ if (lines(l).is_textline())
+ {
+ const util::array<component_id_t>&
+ comp_ids = lines(l).component_ids();
+ const L& lbl = lines.components().labeled_image();
+ for_all_elements(c, comp_ids)
+ data::fill((output | lines.components()(comp_ids(c)).bbox()).rw(),
+ ((doc.binary_image() | lines.components()(comp_ids(c)).bbox())
+ | (pw::value(lbl) == comp_ids(c))));
+ }
+ }
+
+ trace::exiting("scribo::make::text_components_image");
+ return output;
+ }
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::make
+
+} // end of namespace scribo
+
+
+
+#endif // ! SCRIBO_MAKE_TEXT_COMPONENTS_IMAGE_HH
diff --git a/scribo/scribo/primitive/extract/internal/union.hh b/scribo/scribo/primitive/extract/internal/union.hh
new file mode 100644
index 0000000..509a7e4
--- /dev/null
+++ b/scribo/scribo/primitive/extract/internal/union.hh
@@ -0,0 +1,246 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+/// \file
+///
+/// \brief Various utilities for image extraction
+///
+/// \fixme To be cleanup
+
+#ifndef SCRIBO_PRIMITIVE_EXTRACT_INTERNAL_UNION_HH
+# define SCRIBO_PRIMITIVE_EXTRACT_INTERNAL_UNION_HH
+
+# include <mln/core/image/image2d.hh>
+# include <mln/border/fill.hh>
+
+
+namespace scribo
+{
+
+ namespace primitive
+ {
+
+ namespace extract
+ {
+
+ namespace internal
+ {
+
+ using namespace mln;
+
+
+ unsigned find_root(image2d<unsigned>& parent, unsigned x);
+
+
+ void union_find(const image2d<bool>& input, bool lab,
+ image2d<unsigned>& parent, image2d<unsigned>& area,
+ unsigned& max_area);
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+ inline
+ unsigned find_root(image2d<unsigned>& parent, unsigned x)
+ {
+ if (parent.element(x) == x)
+ return x;
+ return parent.element(x) = find_root(parent, parent.element(x));
+ }
+
+
+ inline
+ void union_find(const image2d<bool>& input,
+ bool lab,
+ // output:
+ image2d<unsigned>& parent,
+ image2d<unsigned>& area,
+ unsigned& max_area)
+ {
+ const unsigned nrows = input.nrows(), ncols = input.ncols();
+
+ unsigned op, on, delta = input.delta_index(dpoint2d(1, 0));
+
+ data::fill(parent, 0);
+ max_area = 0;
+
+ {
+
+ // row == 0 and col == 0
+
+ op = input.index_of_point(point2d(0,0));
+ if (input.element(op) == lab)
+ {
+ area.element(op) = 1;
+ parent.element(op) = op;
+ }
+
+
+ // row = 0
+
+ for (unsigned col = 1; col < ncols; ++col)
+ {
+ on = op;
+ ++op;
+
+ if (input.element(op) != lab)
+ continue;
+
+ if (input.element(on) == lab)
+ {
+ unsigned& par_p = parent.element(op);
+ par_p = find_root(parent, on);
+ ++area.element(par_p);
+ if (area.element(par_p) > max_area)
+ max_area = area.element(par_p);
+ }
+ else
+ {
+ area.element(op) = 1;
+ parent.element(op) = op;
+ }
+
+ }
+ }
+
+
+ for (unsigned row = 1; row < nrows; ++row)
+ {
+
+ {
+ // col == 0
+
+ op = input.index_of_point(point2d(row, 0));
+ on = op - delta;
+
+ if (input.element(op) == lab)
+ {
+ if (input.element(on) == lab)
+ {
+ unsigned& par_p = parent.element(op);
+ par_p = find_root(parent, on);
+ ++area.element(par_p);
+ if (area.element(par_p) > max_area)
+ max_area = area.element(par_p);
+ }
+ else
+ {
+ area.element(op) = 1;
+ parent.element(op) = op;
+ }
+ }
+
+ }
+
+ for (unsigned col = 1; col < ncols; ++col)
+ {
+ ++op;
+ ++on;
+
+ if (input.element(op) != lab)
+ continue;
+
+ bool merge_ = false;
+
+ // up
+
+ if (input.element(on) == lab)
+ {
+ unsigned& par_p = parent.element(op);
+ par_p = find_root(parent, on);
+ ++area.element(par_p);
+ if (area.element(par_p) > max_area)
+ max_area = area.element(par_p);
+ merge_ = true;
+ }
+
+
+ // left
+
+ unsigned ol = op - 1;
+
+ if (input.element(ol) == lab)
+ {
+ if (merge_)
+ {
+ if (input.element(on - 1) != lab) // not already merged
+ {
+ unsigned r_op = parent.element(op), r_ol = find_root(parent, ol);
+ if (r_op != r_ol)
+ {
+ // do-union
+ if (r_op < r_ol)
+ {
+ parent.element(r_ol) = r_op;
+ area.element(r_op) += area.element(r_ol);
+ if (area.element(r_op) > max_area)
+ max_area = area.element(r_op);
+ }
+ else
+ {
+ parent.element(r_op) = r_ol;
+ area.element(r_ol) += area.element(r_op);
+ if (area.element(r_ol) > max_area)
+ max_area = area.element(r_ol);
+ }
+ }
+ }
+ } // end of "if (merge)
+ else
+ {
+ unsigned& par_p = parent.element(op);
+ par_p = find_root(parent, ol);
+ ++area.element(par_p);
+ if (area.element(par_p) > max_area)
+ max_area = area.element(par_p);
+ merge_ = true;
+ }
+ }
+
+
+ // finalization
+
+ if (merge_ == false)
+ {
+ parent.element(op) = op;
+ area.element(op) = 1;
+ }
+
+ }
+ }
+
+ } // end of 'union_find'
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::primivite::extract::internal
+
+ } // end of namespace scribo::primitive::extract
+
+ } // end of namespace scribo::primitive
+
+} // end of namespace scribo
+
+
+#endif // ! SCRIBO_PRIMITIVE_EXTRACT_INTERNAL_UNION_HH
diff --git a/scribo/scribo/primitive/extract/non_text.hh b/scribo/scribo/primitive/extract/non_text.hh
index 8528782..a456270 100644
--- a/scribo/scribo/primitive/extract/non_text.hh
+++ b/scribo/scribo/primitive/extract/non_text.hh
@@ -1,4 +1,4 @@
-// Copyright (C) 2010, 2011 EPITA Research and Development Laboratory
+// Copyright (C) 2011 EPITA Research and Development Laboratory
// (LRDE)
//
// This file is part of Olena.
@@ -33,30 +33,19 @@
#ifndef SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_HH
# define SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_HH
-# include <mln/core/image/image2d.hh>
-# include <mln/core/alias/neighb2d.hh>
-# include <mln/data/fill.hh>
-# include <mln/util/array.hh>
-# include <mln/labeling/compute.hh>
-# include <mln/labeling/relabel.hh>
-# include <mln/accu/math/count.hh>
-# include <mln/pw/all.hh>
+# include <mln/morpho/elementary/dilation.hh>
-# include <mln/draw/box_plain.hh>
-# include <mln/value/label_8.hh>
-# include <mln/value/rgb.hh>
-# include <mln/value/rgb8.hh>
+# include <scribo/make/text_components_image.hh>
+# include <scribo/make/text_blocks_image.hh>
-# include <scribo/core/macros.hh>
-# include <scribo/core/component_set.hh>
-# include <scribo/core/document.hh>
-# include <scribo/core/line_set.hh>
-# include <scribo/core/def/lbl_type.hh>
-# include <scribo/filter/objects_small.hh>
+# include <scribo/primitive/extract/internal/union.hh>
+# include <scribo/debug/logger.hh>
+
+//DEBUG
+#include <mln/util/timer.hh>
+#include <mln/io/pbm/save.hh>
-# include <mln/clustering/kmean_rgb.hh>
-# include <mln/fun/v2v/rgb8_to_rgbn.hh>
namespace scribo
{
@@ -69,10 +58,24 @@ namespace scribo
using namespace mln;
+ /*! \brief Extract non text components.
+
+ This method takes text localization into account and tries to
+ learn the background colors to deduce the relevant non text
+ components.
+
+ \param[in] doc A document structure. Its must have paragraph
+ information.
+
+ \param[in] nlines The number of lines needed in a paragraph to
+ consider the latter during the background color learning.
+
- template <typename L, typename I>
+ \return A component set of non text components.
+ */
+ template <typename L>
component_set<L>
- non_text(const document<L>& doc, const Image<I>& input);
+ non_text(const document<L>& doc, unsigned nlines);
# ifndef MLN_INCLUDE_ONLY
@@ -82,133 +85,276 @@ namespace scribo
{
template <typename L>
- struct order_bbox
+ image2d<bool>
+ learn(const document<L>& doc,
+ const image2d<bool>& txt,
+ const image2d<bool>& txtblocks,
+ unsigned nbits,
+ float p_cover)
{
- order_bbox(const scribo::component_set<L>& comps)
- : comps_(comps)
+ const image2d<value::rgb8>& input = doc.image();
+ const image2d<bool>&
+ seps = doc.paragraphs().lines().components().separators();
+
+ if (txt.border() != input.border()
+ || txtblocks.border() != input.border()
+ || seps.border() != input.border())
{
+ std::cerr << " txt.border() = " << txt.border()
+ << " - txtblocks.border() = " << txtblocks.border()
+ << " - input.border() = " << input.border()
+ << " - seps.border() = " << seps.border()
+ << std::endl;
+ std::cerr << "different sizes for borders! Resizing..." << std::endl;
+
+
+ border::resize(txt, border::thickness);
+ border::resize(input, border::thickness);
+ border::resize(txtblocks, border::thickness);
+ border::resize(seps, border::thickness);
+ // std::abort();
}
- bool operator()(const unsigned& c1, const unsigned& c2) const
+
+ const unsigned q_div = std::pow(2, 8 - nbits);
+ const unsigned q = unsigned(std::pow(2, nbits));
+ const unsigned nelements = input.nelements();
+
+
+ image3d<unsigned> h_bg(q, q, q);
+ data::fill(h_bg, 0);
+
+ border::fill(txtblocks, false); // so h_bg is not updated by border pixels!
+
+ unsigned n_bg = 0;
{
- if (comps_(c1).bbox().nsites() == comps_(c2).bbox().nsites())
- return c1 > c2;
- return comps_(c1).bbox().nsites() > comps_(c2).bbox().nsites();
+ // compute h_bg
+ for (unsigned i = 0; i < nelements; ++i)
+ if (txtblocks.element(i) == true)
+ {
+ ++n_bg;
+ const value::rgb8& c = input.element(i);
+ ++h_bg.at_(c.red() / q_div, c.green() / q_div, c.blue() / q_div);
+ }
}
- scribo::component_set<L> comps_;
- };
+ typedef std::map<unsigned, unsigned> map_t;
+ map_t ncells_with_nitems;
+ {
+ mln_piter_(box3d) c(h_bg.domain());
+ for_all(c)
+ {
+ unsigned nitems_in_c = h_bg(c);
+ ++ncells_with_nitems[ nitems_in_c ];
+ }
+ }
- } // end of namespace scribo::primitive::extract::internal
+ unsigned n_items_min = 0;
+ {
+ map_t::const_reverse_iterator i;
+ unsigned N = 0;
+ for (i = ncells_with_nitems.rbegin(); i != ncells_with_nitems.rend(); ++i)
+ {
+ unsigned nitems = i->first, ncells = i->second;
+ N += nitems * ncells;
+ if (float(N) > p_cover * float(n_bg))
+ {
+ n_items_min = nitems;
+ break;
+ }
+ }
+ }
+ if (n_items_min == 0)
+ n_items_min = 1; // safety
- // FACADE
+ image3d<bool> bg(q, q, q);
+ {
+ mln_piter_(box3d) c(h_bg.domain());
+ for_all(c)
+ bg(c) = (h_bg(c) >= n_items_min);
+ }
- template <typename L, typename I>
- component_set<L>
- non_text(const document<L>& doc, const Image<I>& input_)
- {
- trace::entering("scribo::primitive::extract::non_text");
- const I& input = exact(input_);
- mln_precondition(doc.is_valid());
- mln_precondition(input.is_valid());
+ // outputing
- const line_set<L>& lines = doc.lines();
+ image2d<bool> output;
+ initialize(output, input);
+ {
+ for (unsigned i = 0; i < nelements; ++i)
+ if (txt.element(i) == true || seps.element(i) == true)
+ output.element(i) = false;
+ else
+ {
+ const value::rgb8& c = input.element(i);
+ output.element(i) = ! bg.at_(c.red() / q_div, c.green() / q_div, c.blue() / q_div);
+ }
+ }
- // Element extraction
+ return output;
+ }
- image2d<value::label_8> img_lbl8;
+
+
+
+
+ inline
+ image2d<bool>
+ cleaning(const image2d<bool>& input, unsigned lambda)
{
- image2d<bool> content;
- initialize(content, input);
- data::fill(content, true);
+ const box2d& dom = input.domain();
+
+ image2d<unsigned> area(dom);
+ image2d<unsigned> parent(dom);
+ image2d<bool> output(dom);
+
+ unsigned max_area = 0;
+
+
+ // 1st pass = bg union-find
+
+ {
+ union_find(input, false, // in
+ parent, area, max_area // out
+ );
+ }
+
+
+ // echo
+ // std::cout << "max_area = " << max_area << std::endl;
+
+
+ // 2nd pass = bg biggest component selection
+
+ {
+ const unsigned nelements = input.nelements();
+ const bool* p_i = input.buffer();
+ bool* p_o = output.buffer();
+ const unsigned* p_a = area.buffer();
+ const unsigned* p_par = parent.buffer();
+
+ for (unsigned i = 0; i < nelements; ++i)
+ {
+ if (*p_i == true)
+ *p_o = true;
+ else
+ {
+ if (*p_par == i)
+ *p_o = (*p_a != max_area);
+ else
+ *p_o = output.element(*p_par);
+ }
+ ++p_i;
+ ++p_o;
+ ++p_a;
+ ++p_par;
+ }
+ }
+
+
+
+ // 3rd pass = fg union-find
- for_all_lines(l, lines)
- if (lines(l).type() == line::Text)
- data::fill((content | lines(l).bbox()).rw(), false);
+ {
+ union_find(output, true, // in
+ parent, area, max_area // out
+ );
+ }
- typedef mln::value::rgb<5> t_rgb5;
- typedef mln::fun::v2v::rgb8_to_rgbn<5> t_rgb8_to_rgb5;
- image2d<t_rgb5>
- img_rgb5 = mln::data::transform(doc.image(), t_rgb8_to_rgb5());
- img_lbl8 =
- mln::clustering::kmean_rgb<double,5>((img_rgb5 | pw::value(content)), 3, 10, 10).unmorph_();
- data::fill((img_lbl8 | !pw::value(content)).rw(), 0u);
+ // 4th pass = cleaning fg
- mln::util::array<unsigned>
- card = mln::labeling::compute(accu::math::count<value::label_8>(),
- img_lbl8, img_lbl8, 3);
+ {
+ const unsigned nelements = input.nelements();
+ bool* p_o = output.buffer();
+ const unsigned* p_a = area.buffer();
+ const unsigned* p_par = parent.buffer();
- unsigned max = 0, bg_id = 0;
- for_all_ncomponents(c, 3)
- if (card(c) > max)
+ for (unsigned i = 0; i < nelements; ++i)
{
- max = card(c);
- bg_id = c;
+ if (*p_o == true)
+ {
+ if (*p_par == i)
+ *p_o = (*p_a > lambda);
+ else
+ *p_o = output.element(*p_par);
+ }
+ ++p_o;
+ ++p_a;
+ ++p_par;
}
+ }
+
- mln::fun::i2v::array<bool> f(4, true);
- f(0) = false;
- f(bg_id) = false;
- labeling::relabel_inplace(img_lbl8, 4, f);
+ return output;
}
+ } // end of namespace scribo::primitive::extract::internal
- component_set<L> output;
- std::cout << "Removing small elements" << std::endl;
- {
- image2d<bool> elts;
- initialize(elts, img_lbl8);
- data::fill(elts, false);
- data::fill((elts | (pw::value(img_lbl8) != pw::cst(0))).rw(), true);
- scribo::def::lbl_type nlabels;
- elts = filter::components_small(elts, c8(), nlabels, 40);
+ // FACADE
+
+ template <typename L>
+ component_set<L>
+ non_text(const document<L>& doc, unsigned nlines)
+ {
+ trace::entering("scribo::primitive::extract::non_text");
+
- output = primitive::extract::components(elts, c8(), nlabels);
+ util::timer t;
+ t.start();
+
+ mln_precondition(doc.is_valid());
+
+ mln_precondition(doc.has_line_seps());
+ mln_precondition(doc.has_text());
+
+ // FIXME: Do these images exist elsewhere?
+ image2d<bool>
+ txt = make::text_components_image(doc),
+ txtblocks = make::text_blocks_image(doc, nlines);
+
+ unsigned nbits = 5;
+ float p = 0.9998; // 0.80 <= x < 1.0
+ unsigned lambda = 1000;
+
+ // enlarge the text mask so that "not txt" does not include
+ // any text pixel
+ txt = morpho::elementary::dilation(txt, c8());
+ txt = morpho::elementary::dilation(txt, c4());
+
+ // FIXME: Make it faster?
+ data::fill((txtblocks | pw::value(txt)).rw(), false);
+
+ // Debug
+ {
+ debug::logger().log_image(debug::Special,//debug::AuxiliaryResults,
+ txt, "txt_components");
+ debug::logger().log_image(debug::Special,//debug::AuxiliaryResults,
+ txtblocks, "txt_blocks");
}
+ image2d<bool>
+ element_image = internal::learn(doc, txt, txtblocks, nbits, p);
+ element_image = internal::cleaning(element_image, lambda);
- std::cout << "Ignoring inner elements" << std::endl;
+ mln_value(L) ncomps;
+ component_set<L>
+ elements = primitive::extract::components(element_image,
+ c8(), ncomps);
+ // Debug
{
- // FIXME: We would like to use the convex hull instead of the bbox.
- internal::order_bbox<L> func(output);
- util::array<unsigned> box_ordered_comps;
- for (unsigned i = 1; i < output.nelements(); ++i)
- box_ordered_comps.append(i);
- std::sort(box_ordered_comps.hook_std_vector_().begin(),
- box_ordered_comps.hook_std_vector_().end(), func);
-
- image2d<bool> merged_elts;
- initialize(merged_elts, img_lbl8);
- data::fill(merged_elts, false);
- for (unsigned i = 0; i < box_ordered_comps.nelements(); ++i)
- {
- unsigned c = box_ordered_comps(i);
- point2d
- pminright = output(c).bbox().pmin(),
- pmaxleft = output(c).bbox().pmax();
- pminright.col() = output(c).bbox().pmax().col();
- pmaxleft.col() = output(c).bbox().pmin().col();
-
- if (merged_elts(output(c).bbox().pmin())
- && merged_elts(output(c).bbox().pmax())
- && merged_elts(pminright)
- && merged_elts(pmaxleft))
- output(c).update_tag(component::Ignored);
- else
- mln::draw::box_plain(merged_elts, output(c).bbox(), true);
- }
+ debug::logger().log_image(debug::Results,
+ elements.labeled_image(),
+ "non_text_components");
}
trace::exiting("scribo::primitive::extract::non_text");
- return output;
+ return elements;
}
# endif // ! MLN_INCLUDE_ONLY
diff --git a/scribo/scribo/primitive/extract/non_text.hh b/scribo/scribo/primitive/extract/non_text_kmean.hh
similarity index 91%
copy from scribo/scribo/primitive/extract/non_text.hh
copy to scribo/scribo/primitive/extract/non_text_kmean.hh
index 8528782..c76ce11 100644
--- a/scribo/scribo/primitive/extract/non_text.hh
+++ b/scribo/scribo/primitive/extract/non_text_kmean.hh
@@ -30,8 +30,8 @@
///
/// \fixme To be optimized!
-#ifndef SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_HH
-# define SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_HH
+#ifndef SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_KMEAN_HH
+# define SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_KMEAN_HH
# include <mln/core/image/image2d.hh>
# include <mln/core/alias/neighb2d.hh>
@@ -72,7 +72,7 @@ namespace scribo
template <typename L, typename I>
component_set<L>
- non_text(const document<L>& doc, const Image<I>& input);
+ non_text_kmean(const document<L>& doc, const Image<I>& input);
# ifndef MLN_INCLUDE_ONLY
@@ -107,9 +107,9 @@ namespace scribo
template <typename L, typename I>
component_set<L>
- non_text(const document<L>& doc, const Image<I>& input_)
+ non_text_kmean(const document<L>& doc, const Image<I>& input_)
{
- trace::entering("scribo::primitive::extract::non_text");
+ trace::entering("scribo::primitive::extract::non_text_kmean");
const I& input = exact(input_);
mln_precondition(doc.is_valid());
@@ -136,7 +136,8 @@ namespace scribo
img_rgb5 = mln::data::transform(doc.image(), t_rgb8_to_rgb5());
img_lbl8 =
- mln::clustering::kmean_rgb<double,5>((img_rgb5 | pw::value(content)), 3, 10, 10).unmorph_();
+ mln::clustering::kmean_rgb<double,5>(
+ (img_rgb5 | pw::value(content)), 3, 10, 10).unmorph_();
data::fill((img_lbl8 | !pw::value(content)).rw(), 0u);
mln::util::array<unsigned>
@@ -207,7 +208,7 @@ namespace scribo
}
}
- trace::exiting("scribo::primitive::extract::non_text");
+ trace::exiting("scribo::primitive::extract::non_text_kmean");
return output;
}
@@ -220,4 +221,4 @@ namespace scribo
} // end of namespace scribo
-#endif // ! SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_HH
+#endif // ! SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_KMEAN_HH
--
1.5.6.5
1
0

last-svn-commit-808-ged1425b New non-text components extraction routine.
by Guillaume Lazzara 14 Mar '11
by Guillaume Lazzara 14 Mar '11
14 Mar '11
* scribo/make/text_blocks_image.hh,
* scribo/make/text_components_image.hh,
* scribo/primitive/extract/internal/union.hh: New.
* scribo/primitive/extract/non_text.hh: New implementation.
* scribo/primitive/extract/non_text_kmean.hh: New. Old
implementatino is saved here.
---
scribo/ChangeLog | 13 +
scribo/scribo/make/text_blocks_image.hh | 136 ++++++++
scribo/scribo/make/text_components_image.hh | 101 ++++++
scribo/scribo/primitive/extract/internal/union.hh | 246 +++++++++++++
scribo/scribo/primitive/extract/non_text.hh | 366 ++++++++++++++------
.../extract/{non_text.hh => non_text_kmean.hh} | 17 +-
6 files changed, 761 insertions(+), 118 deletions(-)
create mode 100644 scribo/scribo/make/text_blocks_image.hh
create mode 100644 scribo/scribo/make/text_components_image.hh
create mode 100644 scribo/scribo/primitive/extract/internal/union.hh
copy scribo/scribo/primitive/extract/{non_text.hh => non_text_kmean.hh} (91%)
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index 43041fe..32dda87 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,5 +1,18 @@
2011-03-14 Guillaume Lazzara <z(a)lrde.epita.fr>
+ New non-text components extraction routine.
+
+ * scribo/make/text_blocks_image.hh,
+ * scribo/make/text_components_image.hh,
+ * scribo/primitive/extract/internal/union.hh: New.
+
+ * scribo/primitive/extract/non_text.hh: New implementation.
+
+ * scribo/primitive/extract/non_text_kmean.hh: New. Old
+ implementatino is saved here.
+
+2011-03-14 Guillaume Lazzara <z(a)lrde.epita.fr>
+
Introduce result image output.
* scribo/fun/v2v/highlight.hh,
diff --git a/scribo/scribo/make/text_blocks_image.hh b/scribo/scribo/make/text_blocks_image.hh
new file mode 100644
index 0000000..fbc16df
--- /dev/null
+++ b/scribo/scribo/make/text_blocks_image.hh
@@ -0,0 +1,136 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef SCRIBO_MAKE_TEXT_BLOCKS_IMAGE_HH
+# define SCRIBO_MAKE_TEXT_BLOCKS_IMAGE_HH
+
+/// \file
+///
+/// \brief Create a mask of paragraph blocks.
+
+# include <mln/core/image/image2d.hh>
+# include <mln/draw/box_plain.hh>
+
+# include <scribo/core/document.hh>
+# include <scribo/core/paragraph_set.hh>
+# include <scribo/core/line_info.hh>
+
+
+namespace scribo
+{
+
+ namespace make
+ {
+ using namespace mln;
+
+
+ /// \brief Create a mask of paragraph blocks.
+ /*!
+ \pre \p doc has_text() methods MUST return True.
+ */
+ template <typename L>
+ mln_ch_value(L,bool)
+ text_blocks_image(const document<L>& doc, unsigned min_nlines);
+
+
+ # ifndef MLN_INCLUDE_ONLY
+
+ template <typename L>
+ mln_ch_value(L,bool)
+ text_blocks_image(const document<L>& doc, unsigned min_nlines)
+ {
+ trace::entering("scribo::make::text_blocks_image");
+
+ mln_precondition(doc.is_open());
+
+ image2d<bool> output;
+ initialize(output, doc.image());
+ data::fill(output, false);
+
+ // image2d<value::int_u8> log;
+ // initialize(log, output);
+ // data::fill(log, 0);
+
+ if (doc.has_text())
+ {
+ const paragraph_set<L>& parset = doc.paragraphs();
+ for_all_paragraphs(p, parset)
+ if (parset(p).nlines() >= min_nlines)
+ {
+ box2d last_box;
+
+ // For each line in this paragraph.
+ for_all_elements(l, parset(p).line_ids())
+ {
+ const line_info<L>&
+ line = parset.lines()(parset(p).line_ids()(l));
+
+ // Avoid invalid case:
+ //
+ // =======
+ // ======
+ if (last_box.is_valid()
+ && last_box.pmax().row() < line.bbox().pmin().row()
+ && last_box.pmin().col() < line.bbox().pmax().col())
+ {
+ point2d
+ pmin(std::min(last_box.pmax().row(),
+ line.bbox().pmin().row()),
+ std::max(last_box.pmin().col(),
+ line.bbox().pmin().col())),
+ pmax(std::max(last_box.pmax().row(),
+ line.bbox().pmin().row()),
+ std::min(last_box.pmax().col(),
+ line.bbox().pmax().col()));
+
+ box2d new_box(pmin, pmax);
+ mln::draw::box_plain(output, new_box, true);
+ // mln::draw::box_plain(log, new_box, 1);
+
+ // mln::io::pgm::save(log, mln::debug::filename("log.pgm", i++));
+ }
+
+ mln::draw::box_plain(output, line.bbox(), true);
+ // mln::draw::box_plain(log, line.bbox(), 255);
+ // mln::io::pgm::save(log, mln::debug::filename("log.pgm", i++));
+ last_box = line.bbox();
+ }
+ }
+ }
+
+
+ trace::exiting("scribo::make::text_blocks_image");
+ return output;
+ }
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::make
+
+} // end of namespace scribo
+
+
+
+#endif // ! SCRIBO_MAKE_TEXT_BLOCKS_IMAGE_HH
diff --git a/scribo/scribo/make/text_components_image.hh b/scribo/scribo/make/text_components_image.hh
new file mode 100644
index 0000000..e7c892e
--- /dev/null
+++ b/scribo/scribo/make/text_components_image.hh
@@ -0,0 +1,101 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef SCRIBO_MAKE_TEXT_COMPONENTS_IMAGE_HH
+# define SCRIBO_MAKE_TEXT_COMPONENTS_IMAGE_HH
+
+/// \file
+///
+/// Create a binary image with text components only.
+
+#include <mln/pw/all.hh>
+#include <mln/core/image/dmorph/image_if.hh>
+#include <mln/util/array.hh>
+#include <mln/data/fill.hh>
+
+#include <scribo/core/document.hh>
+#include <scribo/core/line_set.hh>
+
+
+namespace scribo
+{
+
+ namespace make
+ {
+ using namespace mln;
+
+
+ /// \brief Create a binary image with text components only.
+ /*!
+ \pre \p doc has_text() methods MUST return True.
+ */
+ template <typename L>
+ mln_ch_value(L,bool)
+ text_components_image(const document<L>& doc);
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+ template <typename L>
+ mln_ch_value(L,bool)
+ text_components_image(const document<L>& doc)
+ {
+ trace::entering("scribo::make::text_components_image");
+
+ mln_precondition(doc.is_open());
+
+ mln_ch_value(L,bool) output;
+ initialize(output, doc.image());
+ data::fill(output, false);
+
+ if (doc.has_text())
+ {
+ const scribo::line_set<L>& lines = doc.lines();
+ for_all_lines(l, doc.lines())
+ if (lines(l).is_textline())
+ {
+ const util::array<component_id_t>&
+ comp_ids = lines(l).component_ids();
+ const L& lbl = lines.components().labeled_image();
+ for_all_elements(c, comp_ids)
+ data::fill((output | lines.components()(comp_ids(c)).bbox()).rw(),
+ ((doc.binary_image() | lines.components()(comp_ids(c)).bbox())
+ | (pw::value(lbl) == comp_ids(c))));
+ }
+ }
+
+ trace::exiting("scribo::make::text_components_image");
+ return output;
+ }
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::make
+
+} // end of namespace scribo
+
+
+
+#endif // ! SCRIBO_MAKE_TEXT_COMPONENTS_IMAGE_HH
diff --git a/scribo/scribo/primitive/extract/internal/union.hh b/scribo/scribo/primitive/extract/internal/union.hh
new file mode 100644
index 0000000..509a7e4
--- /dev/null
+++ b/scribo/scribo/primitive/extract/internal/union.hh
@@ -0,0 +1,246 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+/// \file
+///
+/// \brief Various utilities for image extraction
+///
+/// \fixme To be cleanup
+
+#ifndef SCRIBO_PRIMITIVE_EXTRACT_INTERNAL_UNION_HH
+# define SCRIBO_PRIMITIVE_EXTRACT_INTERNAL_UNION_HH
+
+# include <mln/core/image/image2d.hh>
+# include <mln/border/fill.hh>
+
+
+namespace scribo
+{
+
+ namespace primitive
+ {
+
+ namespace extract
+ {
+
+ namespace internal
+ {
+
+ using namespace mln;
+
+
+ unsigned find_root(image2d<unsigned>& parent, unsigned x);
+
+
+ void union_find(const image2d<bool>& input, bool lab,
+ image2d<unsigned>& parent, image2d<unsigned>& area,
+ unsigned& max_area);
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+ inline
+ unsigned find_root(image2d<unsigned>& parent, unsigned x)
+ {
+ if (parent.element(x) == x)
+ return x;
+ return parent.element(x) = find_root(parent, parent.element(x));
+ }
+
+
+ inline
+ void union_find(const image2d<bool>& input,
+ bool lab,
+ // output:
+ image2d<unsigned>& parent,
+ image2d<unsigned>& area,
+ unsigned& max_area)
+ {
+ const unsigned nrows = input.nrows(), ncols = input.ncols();
+
+ unsigned op, on, delta = input.delta_index(dpoint2d(1, 0));
+
+ data::fill(parent, 0);
+ max_area = 0;
+
+ {
+
+ // row == 0 and col == 0
+
+ op = input.index_of_point(point2d(0,0));
+ if (input.element(op) == lab)
+ {
+ area.element(op) = 1;
+ parent.element(op) = op;
+ }
+
+
+ // row = 0
+
+ for (unsigned col = 1; col < ncols; ++col)
+ {
+ on = op;
+ ++op;
+
+ if (input.element(op) != lab)
+ continue;
+
+ if (input.element(on) == lab)
+ {
+ unsigned& par_p = parent.element(op);
+ par_p = find_root(parent, on);
+ ++area.element(par_p);
+ if (area.element(par_p) > max_area)
+ max_area = area.element(par_p);
+ }
+ else
+ {
+ area.element(op) = 1;
+ parent.element(op) = op;
+ }
+
+ }
+ }
+
+
+ for (unsigned row = 1; row < nrows; ++row)
+ {
+
+ {
+ // col == 0
+
+ op = input.index_of_point(point2d(row, 0));
+ on = op - delta;
+
+ if (input.element(op) == lab)
+ {
+ if (input.element(on) == lab)
+ {
+ unsigned& par_p = parent.element(op);
+ par_p = find_root(parent, on);
+ ++area.element(par_p);
+ if (area.element(par_p) > max_area)
+ max_area = area.element(par_p);
+ }
+ else
+ {
+ area.element(op) = 1;
+ parent.element(op) = op;
+ }
+ }
+
+ }
+
+ for (unsigned col = 1; col < ncols; ++col)
+ {
+ ++op;
+ ++on;
+
+ if (input.element(op) != lab)
+ continue;
+
+ bool merge_ = false;
+
+ // up
+
+ if (input.element(on) == lab)
+ {
+ unsigned& par_p = parent.element(op);
+ par_p = find_root(parent, on);
+ ++area.element(par_p);
+ if (area.element(par_p) > max_area)
+ max_area = area.element(par_p);
+ merge_ = true;
+ }
+
+
+ // left
+
+ unsigned ol = op - 1;
+
+ if (input.element(ol) == lab)
+ {
+ if (merge_)
+ {
+ if (input.element(on - 1) != lab) // not already merged
+ {
+ unsigned r_op = parent.element(op), r_ol = find_root(parent, ol);
+ if (r_op != r_ol)
+ {
+ // do-union
+ if (r_op < r_ol)
+ {
+ parent.element(r_ol) = r_op;
+ area.element(r_op) += area.element(r_ol);
+ if (area.element(r_op) > max_area)
+ max_area = area.element(r_op);
+ }
+ else
+ {
+ parent.element(r_op) = r_ol;
+ area.element(r_ol) += area.element(r_op);
+ if (area.element(r_ol) > max_area)
+ max_area = area.element(r_ol);
+ }
+ }
+ }
+ } // end of "if (merge)
+ else
+ {
+ unsigned& par_p = parent.element(op);
+ par_p = find_root(parent, ol);
+ ++area.element(par_p);
+ if (area.element(par_p) > max_area)
+ max_area = area.element(par_p);
+ merge_ = true;
+ }
+ }
+
+
+ // finalization
+
+ if (merge_ == false)
+ {
+ parent.element(op) = op;
+ area.element(op) = 1;
+ }
+
+ }
+ }
+
+ } // end of 'union_find'
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::primivite::extract::internal
+
+ } // end of namespace scribo::primitive::extract
+
+ } // end of namespace scribo::primitive
+
+} // end of namespace scribo
+
+
+#endif // ! SCRIBO_PRIMITIVE_EXTRACT_INTERNAL_UNION_HH
diff --git a/scribo/scribo/primitive/extract/non_text.hh b/scribo/scribo/primitive/extract/non_text.hh
index 8528782..a456270 100644
--- a/scribo/scribo/primitive/extract/non_text.hh
+++ b/scribo/scribo/primitive/extract/non_text.hh
@@ -1,4 +1,4 @@
-// Copyright (C) 2010, 2011 EPITA Research and Development Laboratory
+// Copyright (C) 2011 EPITA Research and Development Laboratory
// (LRDE)
//
// This file is part of Olena.
@@ -33,30 +33,19 @@
#ifndef SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_HH
# define SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_HH
-# include <mln/core/image/image2d.hh>
-# include <mln/core/alias/neighb2d.hh>
-# include <mln/data/fill.hh>
-# include <mln/util/array.hh>
-# include <mln/labeling/compute.hh>
-# include <mln/labeling/relabel.hh>
-# include <mln/accu/math/count.hh>
-# include <mln/pw/all.hh>
+# include <mln/morpho/elementary/dilation.hh>
-# include <mln/draw/box_plain.hh>
-# include <mln/value/label_8.hh>
-# include <mln/value/rgb.hh>
-# include <mln/value/rgb8.hh>
+# include <scribo/make/text_components_image.hh>
+# include <scribo/make/text_blocks_image.hh>
-# include <scribo/core/macros.hh>
-# include <scribo/core/component_set.hh>
-# include <scribo/core/document.hh>
-# include <scribo/core/line_set.hh>
-# include <scribo/core/def/lbl_type.hh>
-# include <scribo/filter/objects_small.hh>
+# include <scribo/primitive/extract/internal/union.hh>
+# include <scribo/debug/logger.hh>
+
+//DEBUG
+#include <mln/util/timer.hh>
+#include <mln/io/pbm/save.hh>
-# include <mln/clustering/kmean_rgb.hh>
-# include <mln/fun/v2v/rgb8_to_rgbn.hh>
namespace scribo
{
@@ -69,10 +58,24 @@ namespace scribo
using namespace mln;
+ /*! \brief Extract non text components.
+
+ This method takes text localization into account and tries to
+ learn the background colors to deduce the relevant non text
+ components.
+
+ \param[in] doc A document structure. Its must have paragraph
+ information.
+
+ \param[in] nlines The number of lines needed in a paragraph to
+ consider the latter during the background color learning.
+
- template <typename L, typename I>
+ \return A component set of non text components.
+ */
+ template <typename L>
component_set<L>
- non_text(const document<L>& doc, const Image<I>& input);
+ non_text(const document<L>& doc, unsigned nlines);
# ifndef MLN_INCLUDE_ONLY
@@ -82,133 +85,276 @@ namespace scribo
{
template <typename L>
- struct order_bbox
+ image2d<bool>
+ learn(const document<L>& doc,
+ const image2d<bool>& txt,
+ const image2d<bool>& txtblocks,
+ unsigned nbits,
+ float p_cover)
{
- order_bbox(const scribo::component_set<L>& comps)
- : comps_(comps)
+ const image2d<value::rgb8>& input = doc.image();
+ const image2d<bool>&
+ seps = doc.paragraphs().lines().components().separators();
+
+ if (txt.border() != input.border()
+ || txtblocks.border() != input.border()
+ || seps.border() != input.border())
{
+ std::cerr << " txt.border() = " << txt.border()
+ << " - txtblocks.border() = " << txtblocks.border()
+ << " - input.border() = " << input.border()
+ << " - seps.border() = " << seps.border()
+ << std::endl;
+ std::cerr << "different sizes for borders! Resizing..." << std::endl;
+
+
+ border::resize(txt, border::thickness);
+ border::resize(input, border::thickness);
+ border::resize(txtblocks, border::thickness);
+ border::resize(seps, border::thickness);
+ // std::abort();
}
- bool operator()(const unsigned& c1, const unsigned& c2) const
+
+ const unsigned q_div = std::pow(2, 8 - nbits);
+ const unsigned q = unsigned(std::pow(2, nbits));
+ const unsigned nelements = input.nelements();
+
+
+ image3d<unsigned> h_bg(q, q, q);
+ data::fill(h_bg, 0);
+
+ border::fill(txtblocks, false); // so h_bg is not updated by border pixels!
+
+ unsigned n_bg = 0;
{
- if (comps_(c1).bbox().nsites() == comps_(c2).bbox().nsites())
- return c1 > c2;
- return comps_(c1).bbox().nsites() > comps_(c2).bbox().nsites();
+ // compute h_bg
+ for (unsigned i = 0; i < nelements; ++i)
+ if (txtblocks.element(i) == true)
+ {
+ ++n_bg;
+ const value::rgb8& c = input.element(i);
+ ++h_bg.at_(c.red() / q_div, c.green() / q_div, c.blue() / q_div);
+ }
}
- scribo::component_set<L> comps_;
- };
+ typedef std::map<unsigned, unsigned> map_t;
+ map_t ncells_with_nitems;
+ {
+ mln_piter_(box3d) c(h_bg.domain());
+ for_all(c)
+ {
+ unsigned nitems_in_c = h_bg(c);
+ ++ncells_with_nitems[ nitems_in_c ];
+ }
+ }
- } // end of namespace scribo::primitive::extract::internal
+ unsigned n_items_min = 0;
+ {
+ map_t::const_reverse_iterator i;
+ unsigned N = 0;
+ for (i = ncells_with_nitems.rbegin(); i != ncells_with_nitems.rend(); ++i)
+ {
+ unsigned nitems = i->first, ncells = i->second;
+ N += nitems * ncells;
+ if (float(N) > p_cover * float(n_bg))
+ {
+ n_items_min = nitems;
+ break;
+ }
+ }
+ }
+ if (n_items_min == 0)
+ n_items_min = 1; // safety
- // FACADE
+ image3d<bool> bg(q, q, q);
+ {
+ mln_piter_(box3d) c(h_bg.domain());
+ for_all(c)
+ bg(c) = (h_bg(c) >= n_items_min);
+ }
- template <typename L, typename I>
- component_set<L>
- non_text(const document<L>& doc, const Image<I>& input_)
- {
- trace::entering("scribo::primitive::extract::non_text");
- const I& input = exact(input_);
- mln_precondition(doc.is_valid());
- mln_precondition(input.is_valid());
+ // outputing
- const line_set<L>& lines = doc.lines();
+ image2d<bool> output;
+ initialize(output, input);
+ {
+ for (unsigned i = 0; i < nelements; ++i)
+ if (txt.element(i) == true || seps.element(i) == true)
+ output.element(i) = false;
+ else
+ {
+ const value::rgb8& c = input.element(i);
+ output.element(i) = ! bg.at_(c.red() / q_div, c.green() / q_div, c.blue() / q_div);
+ }
+ }
- // Element extraction
+ return output;
+ }
- image2d<value::label_8> img_lbl8;
+
+
+
+
+ inline
+ image2d<bool>
+ cleaning(const image2d<bool>& input, unsigned lambda)
{
- image2d<bool> content;
- initialize(content, input);
- data::fill(content, true);
+ const box2d& dom = input.domain();
+
+ image2d<unsigned> area(dom);
+ image2d<unsigned> parent(dom);
+ image2d<bool> output(dom);
+
+ unsigned max_area = 0;
+
+
+ // 1st pass = bg union-find
+
+ {
+ union_find(input, false, // in
+ parent, area, max_area // out
+ );
+ }
+
+
+ // echo
+ // std::cout << "max_area = " << max_area << std::endl;
+
+
+ // 2nd pass = bg biggest component selection
+
+ {
+ const unsigned nelements = input.nelements();
+ const bool* p_i = input.buffer();
+ bool* p_o = output.buffer();
+ const unsigned* p_a = area.buffer();
+ const unsigned* p_par = parent.buffer();
+
+ for (unsigned i = 0; i < nelements; ++i)
+ {
+ if (*p_i == true)
+ *p_o = true;
+ else
+ {
+ if (*p_par == i)
+ *p_o = (*p_a != max_area);
+ else
+ *p_o = output.element(*p_par);
+ }
+ ++p_i;
+ ++p_o;
+ ++p_a;
+ ++p_par;
+ }
+ }
+
+
+
+ // 3rd pass = fg union-find
- for_all_lines(l, lines)
- if (lines(l).type() == line::Text)
- data::fill((content | lines(l).bbox()).rw(), false);
+ {
+ union_find(output, true, // in
+ parent, area, max_area // out
+ );
+ }
- typedef mln::value::rgb<5> t_rgb5;
- typedef mln::fun::v2v::rgb8_to_rgbn<5> t_rgb8_to_rgb5;
- image2d<t_rgb5>
- img_rgb5 = mln::data::transform(doc.image(), t_rgb8_to_rgb5());
- img_lbl8 =
- mln::clustering::kmean_rgb<double,5>((img_rgb5 | pw::value(content)), 3, 10, 10).unmorph_();
- data::fill((img_lbl8 | !pw::value(content)).rw(), 0u);
+ // 4th pass = cleaning fg
- mln::util::array<unsigned>
- card = mln::labeling::compute(accu::math::count<value::label_8>(),
- img_lbl8, img_lbl8, 3);
+ {
+ const unsigned nelements = input.nelements();
+ bool* p_o = output.buffer();
+ const unsigned* p_a = area.buffer();
+ const unsigned* p_par = parent.buffer();
- unsigned max = 0, bg_id = 0;
- for_all_ncomponents(c, 3)
- if (card(c) > max)
+ for (unsigned i = 0; i < nelements; ++i)
{
- max = card(c);
- bg_id = c;
+ if (*p_o == true)
+ {
+ if (*p_par == i)
+ *p_o = (*p_a > lambda);
+ else
+ *p_o = output.element(*p_par);
+ }
+ ++p_o;
+ ++p_a;
+ ++p_par;
}
+ }
+
- mln::fun::i2v::array<bool> f(4, true);
- f(0) = false;
- f(bg_id) = false;
- labeling::relabel_inplace(img_lbl8, 4, f);
+ return output;
}
+ } // end of namespace scribo::primitive::extract::internal
- component_set<L> output;
- std::cout << "Removing small elements" << std::endl;
- {
- image2d<bool> elts;
- initialize(elts, img_lbl8);
- data::fill(elts, false);
- data::fill((elts | (pw::value(img_lbl8) != pw::cst(0))).rw(), true);
- scribo::def::lbl_type nlabels;
- elts = filter::components_small(elts, c8(), nlabels, 40);
+ // FACADE
+
+ template <typename L>
+ component_set<L>
+ non_text(const document<L>& doc, unsigned nlines)
+ {
+ trace::entering("scribo::primitive::extract::non_text");
+
- output = primitive::extract::components(elts, c8(), nlabels);
+ util::timer t;
+ t.start();
+
+ mln_precondition(doc.is_valid());
+
+ mln_precondition(doc.has_line_seps());
+ mln_precondition(doc.has_text());
+
+ // FIXME: Do these images exist elsewhere?
+ image2d<bool>
+ txt = make::text_components_image(doc),
+ txtblocks = make::text_blocks_image(doc, nlines);
+
+ unsigned nbits = 5;
+ float p = 0.9998; // 0.80 <= x < 1.0
+ unsigned lambda = 1000;
+
+ // enlarge the text mask so that "not txt" does not include
+ // any text pixel
+ txt = morpho::elementary::dilation(txt, c8());
+ txt = morpho::elementary::dilation(txt, c4());
+
+ // FIXME: Make it faster?
+ data::fill((txtblocks | pw::value(txt)).rw(), false);
+
+ // Debug
+ {
+ debug::logger().log_image(debug::Special,//debug::AuxiliaryResults,
+ txt, "txt_components");
+ debug::logger().log_image(debug::Special,//debug::AuxiliaryResults,
+ txtblocks, "txt_blocks");
}
+ image2d<bool>
+ element_image = internal::learn(doc, txt, txtblocks, nbits, p);
+ element_image = internal::cleaning(element_image, lambda);
- std::cout << "Ignoring inner elements" << std::endl;
+ mln_value(L) ncomps;
+ component_set<L>
+ elements = primitive::extract::components(element_image,
+ c8(), ncomps);
+ // Debug
{
- // FIXME: We would like to use the convex hull instead of the bbox.
- internal::order_bbox<L> func(output);
- util::array<unsigned> box_ordered_comps;
- for (unsigned i = 1; i < output.nelements(); ++i)
- box_ordered_comps.append(i);
- std::sort(box_ordered_comps.hook_std_vector_().begin(),
- box_ordered_comps.hook_std_vector_().end(), func);
-
- image2d<bool> merged_elts;
- initialize(merged_elts, img_lbl8);
- data::fill(merged_elts, false);
- for (unsigned i = 0; i < box_ordered_comps.nelements(); ++i)
- {
- unsigned c = box_ordered_comps(i);
- point2d
- pminright = output(c).bbox().pmin(),
- pmaxleft = output(c).bbox().pmax();
- pminright.col() = output(c).bbox().pmax().col();
- pmaxleft.col() = output(c).bbox().pmin().col();
-
- if (merged_elts(output(c).bbox().pmin())
- && merged_elts(output(c).bbox().pmax())
- && merged_elts(pminright)
- && merged_elts(pmaxleft))
- output(c).update_tag(component::Ignored);
- else
- mln::draw::box_plain(merged_elts, output(c).bbox(), true);
- }
+ debug::logger().log_image(debug::Results,
+ elements.labeled_image(),
+ "non_text_components");
}
trace::exiting("scribo::primitive::extract::non_text");
- return output;
+ return elements;
}
# endif // ! MLN_INCLUDE_ONLY
diff --git a/scribo/scribo/primitive/extract/non_text.hh b/scribo/scribo/primitive/extract/non_text_kmean.hh
similarity index 91%
copy from scribo/scribo/primitive/extract/non_text.hh
copy to scribo/scribo/primitive/extract/non_text_kmean.hh
index 8528782..c76ce11 100644
--- a/scribo/scribo/primitive/extract/non_text.hh
+++ b/scribo/scribo/primitive/extract/non_text_kmean.hh
@@ -30,8 +30,8 @@
///
/// \fixme To be optimized!
-#ifndef SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_HH
-# define SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_HH
+#ifndef SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_KMEAN_HH
+# define SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_KMEAN_HH
# include <mln/core/image/image2d.hh>
# include <mln/core/alias/neighb2d.hh>
@@ -72,7 +72,7 @@ namespace scribo
template <typename L, typename I>
component_set<L>
- non_text(const document<L>& doc, const Image<I>& input);
+ non_text_kmean(const document<L>& doc, const Image<I>& input);
# ifndef MLN_INCLUDE_ONLY
@@ -107,9 +107,9 @@ namespace scribo
template <typename L, typename I>
component_set<L>
- non_text(const document<L>& doc, const Image<I>& input_)
+ non_text_kmean(const document<L>& doc, const Image<I>& input_)
{
- trace::entering("scribo::primitive::extract::non_text");
+ trace::entering("scribo::primitive::extract::non_text_kmean");
const I& input = exact(input_);
mln_precondition(doc.is_valid());
@@ -136,7 +136,8 @@ namespace scribo
img_rgb5 = mln::data::transform(doc.image(), t_rgb8_to_rgb5());
img_lbl8 =
- mln::clustering::kmean_rgb<double,5>((img_rgb5 | pw::value(content)), 3, 10, 10).unmorph_();
+ mln::clustering::kmean_rgb<double,5>(
+ (img_rgb5 | pw::value(content)), 3, 10, 10).unmorph_();
data::fill((img_lbl8 | !pw::value(content)).rw(), 0u);
mln::util::array<unsigned>
@@ -207,7 +208,7 @@ namespace scribo
}
}
- trace::exiting("scribo::primitive::extract::non_text");
+ trace::exiting("scribo::primitive::extract::non_text_kmean");
return output;
}
@@ -220,4 +221,4 @@ namespace scribo
} // end of namespace scribo
-#endif // ! SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_HH
+#endif // ! SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_KMEAN_HH
--
1.5.6.5
1
0

last-svn-commit-809-g21ca187 New non-text components extraction routine.
by Guillaume Lazzara 14 Mar '11
by Guillaume Lazzara 14 Mar '11
14 Mar '11
* scribo/make/text_blocks_image.hh,
* scribo/make/text_components_image.hh,
* scribo/primitive/extract/internal/union.hh: New.
* scribo/primitive/extract/non_text.hh: New implementation.
* scribo/primitive/extract/non_text_kmean.hh: New. Old
implementatino is saved here.
---
scribo/ChangeLog | 13 +
scribo/scribo/make/text_blocks_image.hh | 136 ++++++++
scribo/scribo/make/text_components_image.hh | 101 ++++++
scribo/scribo/primitive/extract/internal/union.hh | 246 +++++++++++++
scribo/scribo/primitive/extract/non_text.hh | 366 ++++++++++++++------
.../extract/{non_text.hh => non_text_kmean.hh} | 17 +-
6 files changed, 761 insertions(+), 118 deletions(-)
create mode 100644 scribo/scribo/make/text_blocks_image.hh
create mode 100644 scribo/scribo/make/text_components_image.hh
create mode 100644 scribo/scribo/primitive/extract/internal/union.hh
copy scribo/scribo/primitive/extract/{non_text.hh => non_text_kmean.hh} (91%)
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index 3e77d78..a3de9ea 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,5 +1,18 @@
2011-03-14 Guillaume Lazzara <z(a)lrde.epita.fr>
+ New non-text components extraction routine.
+
+ * scribo/make/text_blocks_image.hh,
+ * scribo/make/text_components_image.hh,
+ * scribo/primitive/extract/internal/union.hh: New.
+
+ * scribo/primitive/extract/non_text.hh: New implementation.
+
+ * scribo/primitive/extract/non_text_kmean.hh: New. Old
+ implementatino is saved here.
+
+2011-03-14 Guillaume Lazzara <z(a)lrde.epita.fr>
+
Introduce result image output.
* scribo/fun/v2v/highlight.hh,
diff --git a/scribo/scribo/make/text_blocks_image.hh b/scribo/scribo/make/text_blocks_image.hh
new file mode 100644
index 0000000..fbc16df
--- /dev/null
+++ b/scribo/scribo/make/text_blocks_image.hh
@@ -0,0 +1,136 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef SCRIBO_MAKE_TEXT_BLOCKS_IMAGE_HH
+# define SCRIBO_MAKE_TEXT_BLOCKS_IMAGE_HH
+
+/// \file
+///
+/// \brief Create a mask of paragraph blocks.
+
+# include <mln/core/image/image2d.hh>
+# include <mln/draw/box_plain.hh>
+
+# include <scribo/core/document.hh>
+# include <scribo/core/paragraph_set.hh>
+# include <scribo/core/line_info.hh>
+
+
+namespace scribo
+{
+
+ namespace make
+ {
+ using namespace mln;
+
+
+ /// \brief Create a mask of paragraph blocks.
+ /*!
+ \pre \p doc has_text() methods MUST return True.
+ */
+ template <typename L>
+ mln_ch_value(L,bool)
+ text_blocks_image(const document<L>& doc, unsigned min_nlines);
+
+
+ # ifndef MLN_INCLUDE_ONLY
+
+ template <typename L>
+ mln_ch_value(L,bool)
+ text_blocks_image(const document<L>& doc, unsigned min_nlines)
+ {
+ trace::entering("scribo::make::text_blocks_image");
+
+ mln_precondition(doc.is_open());
+
+ image2d<bool> output;
+ initialize(output, doc.image());
+ data::fill(output, false);
+
+ // image2d<value::int_u8> log;
+ // initialize(log, output);
+ // data::fill(log, 0);
+
+ if (doc.has_text())
+ {
+ const paragraph_set<L>& parset = doc.paragraphs();
+ for_all_paragraphs(p, parset)
+ if (parset(p).nlines() >= min_nlines)
+ {
+ box2d last_box;
+
+ // For each line in this paragraph.
+ for_all_elements(l, parset(p).line_ids())
+ {
+ const line_info<L>&
+ line = parset.lines()(parset(p).line_ids()(l));
+
+ // Avoid invalid case:
+ //
+ // =======
+ // ======
+ if (last_box.is_valid()
+ && last_box.pmax().row() < line.bbox().pmin().row()
+ && last_box.pmin().col() < line.bbox().pmax().col())
+ {
+ point2d
+ pmin(std::min(last_box.pmax().row(),
+ line.bbox().pmin().row()),
+ std::max(last_box.pmin().col(),
+ line.bbox().pmin().col())),
+ pmax(std::max(last_box.pmax().row(),
+ line.bbox().pmin().row()),
+ std::min(last_box.pmax().col(),
+ line.bbox().pmax().col()));
+
+ box2d new_box(pmin, pmax);
+ mln::draw::box_plain(output, new_box, true);
+ // mln::draw::box_plain(log, new_box, 1);
+
+ // mln::io::pgm::save(log, mln::debug::filename("log.pgm", i++));
+ }
+
+ mln::draw::box_plain(output, line.bbox(), true);
+ // mln::draw::box_plain(log, line.bbox(), 255);
+ // mln::io::pgm::save(log, mln::debug::filename("log.pgm", i++));
+ last_box = line.bbox();
+ }
+ }
+ }
+
+
+ trace::exiting("scribo::make::text_blocks_image");
+ return output;
+ }
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::make
+
+} // end of namespace scribo
+
+
+
+#endif // ! SCRIBO_MAKE_TEXT_BLOCKS_IMAGE_HH
diff --git a/scribo/scribo/make/text_components_image.hh b/scribo/scribo/make/text_components_image.hh
new file mode 100644
index 0000000..e7c892e
--- /dev/null
+++ b/scribo/scribo/make/text_components_image.hh
@@ -0,0 +1,101 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef SCRIBO_MAKE_TEXT_COMPONENTS_IMAGE_HH
+# define SCRIBO_MAKE_TEXT_COMPONENTS_IMAGE_HH
+
+/// \file
+///
+/// Create a binary image with text components only.
+
+#include <mln/pw/all.hh>
+#include <mln/core/image/dmorph/image_if.hh>
+#include <mln/util/array.hh>
+#include <mln/data/fill.hh>
+
+#include <scribo/core/document.hh>
+#include <scribo/core/line_set.hh>
+
+
+namespace scribo
+{
+
+ namespace make
+ {
+ using namespace mln;
+
+
+ /// \brief Create a binary image with text components only.
+ /*!
+ \pre \p doc has_text() methods MUST return True.
+ */
+ template <typename L>
+ mln_ch_value(L,bool)
+ text_components_image(const document<L>& doc);
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+ template <typename L>
+ mln_ch_value(L,bool)
+ text_components_image(const document<L>& doc)
+ {
+ trace::entering("scribo::make::text_components_image");
+
+ mln_precondition(doc.is_open());
+
+ mln_ch_value(L,bool) output;
+ initialize(output, doc.image());
+ data::fill(output, false);
+
+ if (doc.has_text())
+ {
+ const scribo::line_set<L>& lines = doc.lines();
+ for_all_lines(l, doc.lines())
+ if (lines(l).is_textline())
+ {
+ const util::array<component_id_t>&
+ comp_ids = lines(l).component_ids();
+ const L& lbl = lines.components().labeled_image();
+ for_all_elements(c, comp_ids)
+ data::fill((output | lines.components()(comp_ids(c)).bbox()).rw(),
+ ((doc.binary_image() | lines.components()(comp_ids(c)).bbox())
+ | (pw::value(lbl) == comp_ids(c))));
+ }
+ }
+
+ trace::exiting("scribo::make::text_components_image");
+ return output;
+ }
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::make
+
+} // end of namespace scribo
+
+
+
+#endif // ! SCRIBO_MAKE_TEXT_COMPONENTS_IMAGE_HH
diff --git a/scribo/scribo/primitive/extract/internal/union.hh b/scribo/scribo/primitive/extract/internal/union.hh
new file mode 100644
index 0000000..509a7e4
--- /dev/null
+++ b/scribo/scribo/primitive/extract/internal/union.hh
@@ -0,0 +1,246 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+/// \file
+///
+/// \brief Various utilities for image extraction
+///
+/// \fixme To be cleanup
+
+#ifndef SCRIBO_PRIMITIVE_EXTRACT_INTERNAL_UNION_HH
+# define SCRIBO_PRIMITIVE_EXTRACT_INTERNAL_UNION_HH
+
+# include <mln/core/image/image2d.hh>
+# include <mln/border/fill.hh>
+
+
+namespace scribo
+{
+
+ namespace primitive
+ {
+
+ namespace extract
+ {
+
+ namespace internal
+ {
+
+ using namespace mln;
+
+
+ unsigned find_root(image2d<unsigned>& parent, unsigned x);
+
+
+ void union_find(const image2d<bool>& input, bool lab,
+ image2d<unsigned>& parent, image2d<unsigned>& area,
+ unsigned& max_area);
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+ inline
+ unsigned find_root(image2d<unsigned>& parent, unsigned x)
+ {
+ if (parent.element(x) == x)
+ return x;
+ return parent.element(x) = find_root(parent, parent.element(x));
+ }
+
+
+ inline
+ void union_find(const image2d<bool>& input,
+ bool lab,
+ // output:
+ image2d<unsigned>& parent,
+ image2d<unsigned>& area,
+ unsigned& max_area)
+ {
+ const unsigned nrows = input.nrows(), ncols = input.ncols();
+
+ unsigned op, on, delta = input.delta_index(dpoint2d(1, 0));
+
+ data::fill(parent, 0);
+ max_area = 0;
+
+ {
+
+ // row == 0 and col == 0
+
+ op = input.index_of_point(point2d(0,0));
+ if (input.element(op) == lab)
+ {
+ area.element(op) = 1;
+ parent.element(op) = op;
+ }
+
+
+ // row = 0
+
+ for (unsigned col = 1; col < ncols; ++col)
+ {
+ on = op;
+ ++op;
+
+ if (input.element(op) != lab)
+ continue;
+
+ if (input.element(on) == lab)
+ {
+ unsigned& par_p = parent.element(op);
+ par_p = find_root(parent, on);
+ ++area.element(par_p);
+ if (area.element(par_p) > max_area)
+ max_area = area.element(par_p);
+ }
+ else
+ {
+ area.element(op) = 1;
+ parent.element(op) = op;
+ }
+
+ }
+ }
+
+
+ for (unsigned row = 1; row < nrows; ++row)
+ {
+
+ {
+ // col == 0
+
+ op = input.index_of_point(point2d(row, 0));
+ on = op - delta;
+
+ if (input.element(op) == lab)
+ {
+ if (input.element(on) == lab)
+ {
+ unsigned& par_p = parent.element(op);
+ par_p = find_root(parent, on);
+ ++area.element(par_p);
+ if (area.element(par_p) > max_area)
+ max_area = area.element(par_p);
+ }
+ else
+ {
+ area.element(op) = 1;
+ parent.element(op) = op;
+ }
+ }
+
+ }
+
+ for (unsigned col = 1; col < ncols; ++col)
+ {
+ ++op;
+ ++on;
+
+ if (input.element(op) != lab)
+ continue;
+
+ bool merge_ = false;
+
+ // up
+
+ if (input.element(on) == lab)
+ {
+ unsigned& par_p = parent.element(op);
+ par_p = find_root(parent, on);
+ ++area.element(par_p);
+ if (area.element(par_p) > max_area)
+ max_area = area.element(par_p);
+ merge_ = true;
+ }
+
+
+ // left
+
+ unsigned ol = op - 1;
+
+ if (input.element(ol) == lab)
+ {
+ if (merge_)
+ {
+ if (input.element(on - 1) != lab) // not already merged
+ {
+ unsigned r_op = parent.element(op), r_ol = find_root(parent, ol);
+ if (r_op != r_ol)
+ {
+ // do-union
+ if (r_op < r_ol)
+ {
+ parent.element(r_ol) = r_op;
+ area.element(r_op) += area.element(r_ol);
+ if (area.element(r_op) > max_area)
+ max_area = area.element(r_op);
+ }
+ else
+ {
+ parent.element(r_op) = r_ol;
+ area.element(r_ol) += area.element(r_op);
+ if (area.element(r_ol) > max_area)
+ max_area = area.element(r_ol);
+ }
+ }
+ }
+ } // end of "if (merge)
+ else
+ {
+ unsigned& par_p = parent.element(op);
+ par_p = find_root(parent, ol);
+ ++area.element(par_p);
+ if (area.element(par_p) > max_area)
+ max_area = area.element(par_p);
+ merge_ = true;
+ }
+ }
+
+
+ // finalization
+
+ if (merge_ == false)
+ {
+ parent.element(op) = op;
+ area.element(op) = 1;
+ }
+
+ }
+ }
+
+ } // end of 'union_find'
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::primivite::extract::internal
+
+ } // end of namespace scribo::primitive::extract
+
+ } // end of namespace scribo::primitive
+
+} // end of namespace scribo
+
+
+#endif // ! SCRIBO_PRIMITIVE_EXTRACT_INTERNAL_UNION_HH
diff --git a/scribo/scribo/primitive/extract/non_text.hh b/scribo/scribo/primitive/extract/non_text.hh
index 8528782..a456270 100644
--- a/scribo/scribo/primitive/extract/non_text.hh
+++ b/scribo/scribo/primitive/extract/non_text.hh
@@ -1,4 +1,4 @@
-// Copyright (C) 2010, 2011 EPITA Research and Development Laboratory
+// Copyright (C) 2011 EPITA Research and Development Laboratory
// (LRDE)
//
// This file is part of Olena.
@@ -33,30 +33,19 @@
#ifndef SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_HH
# define SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_HH
-# include <mln/core/image/image2d.hh>
-# include <mln/core/alias/neighb2d.hh>
-# include <mln/data/fill.hh>
-# include <mln/util/array.hh>
-# include <mln/labeling/compute.hh>
-# include <mln/labeling/relabel.hh>
-# include <mln/accu/math/count.hh>
-# include <mln/pw/all.hh>
+# include <mln/morpho/elementary/dilation.hh>
-# include <mln/draw/box_plain.hh>
-# include <mln/value/label_8.hh>
-# include <mln/value/rgb.hh>
-# include <mln/value/rgb8.hh>
+# include <scribo/make/text_components_image.hh>
+# include <scribo/make/text_blocks_image.hh>
-# include <scribo/core/macros.hh>
-# include <scribo/core/component_set.hh>
-# include <scribo/core/document.hh>
-# include <scribo/core/line_set.hh>
-# include <scribo/core/def/lbl_type.hh>
-# include <scribo/filter/objects_small.hh>
+# include <scribo/primitive/extract/internal/union.hh>
+# include <scribo/debug/logger.hh>
+
+//DEBUG
+#include <mln/util/timer.hh>
+#include <mln/io/pbm/save.hh>
-# include <mln/clustering/kmean_rgb.hh>
-# include <mln/fun/v2v/rgb8_to_rgbn.hh>
namespace scribo
{
@@ -69,10 +58,24 @@ namespace scribo
using namespace mln;
+ /*! \brief Extract non text components.
+
+ This method takes text localization into account and tries to
+ learn the background colors to deduce the relevant non text
+ components.
+
+ \param[in] doc A document structure. Its must have paragraph
+ information.
+
+ \param[in] nlines The number of lines needed in a paragraph to
+ consider the latter during the background color learning.
+
- template <typename L, typename I>
+ \return A component set of non text components.
+ */
+ template <typename L>
component_set<L>
- non_text(const document<L>& doc, const Image<I>& input);
+ non_text(const document<L>& doc, unsigned nlines);
# ifndef MLN_INCLUDE_ONLY
@@ -82,133 +85,276 @@ namespace scribo
{
template <typename L>
- struct order_bbox
+ image2d<bool>
+ learn(const document<L>& doc,
+ const image2d<bool>& txt,
+ const image2d<bool>& txtblocks,
+ unsigned nbits,
+ float p_cover)
{
- order_bbox(const scribo::component_set<L>& comps)
- : comps_(comps)
+ const image2d<value::rgb8>& input = doc.image();
+ const image2d<bool>&
+ seps = doc.paragraphs().lines().components().separators();
+
+ if (txt.border() != input.border()
+ || txtblocks.border() != input.border()
+ || seps.border() != input.border())
{
+ std::cerr << " txt.border() = " << txt.border()
+ << " - txtblocks.border() = " << txtblocks.border()
+ << " - input.border() = " << input.border()
+ << " - seps.border() = " << seps.border()
+ << std::endl;
+ std::cerr << "different sizes for borders! Resizing..." << std::endl;
+
+
+ border::resize(txt, border::thickness);
+ border::resize(input, border::thickness);
+ border::resize(txtblocks, border::thickness);
+ border::resize(seps, border::thickness);
+ // std::abort();
}
- bool operator()(const unsigned& c1, const unsigned& c2) const
+
+ const unsigned q_div = std::pow(2, 8 - nbits);
+ const unsigned q = unsigned(std::pow(2, nbits));
+ const unsigned nelements = input.nelements();
+
+
+ image3d<unsigned> h_bg(q, q, q);
+ data::fill(h_bg, 0);
+
+ border::fill(txtblocks, false); // so h_bg is not updated by border pixels!
+
+ unsigned n_bg = 0;
{
- if (comps_(c1).bbox().nsites() == comps_(c2).bbox().nsites())
- return c1 > c2;
- return comps_(c1).bbox().nsites() > comps_(c2).bbox().nsites();
+ // compute h_bg
+ for (unsigned i = 0; i < nelements; ++i)
+ if (txtblocks.element(i) == true)
+ {
+ ++n_bg;
+ const value::rgb8& c = input.element(i);
+ ++h_bg.at_(c.red() / q_div, c.green() / q_div, c.blue() / q_div);
+ }
}
- scribo::component_set<L> comps_;
- };
+ typedef std::map<unsigned, unsigned> map_t;
+ map_t ncells_with_nitems;
+ {
+ mln_piter_(box3d) c(h_bg.domain());
+ for_all(c)
+ {
+ unsigned nitems_in_c = h_bg(c);
+ ++ncells_with_nitems[ nitems_in_c ];
+ }
+ }
- } // end of namespace scribo::primitive::extract::internal
+ unsigned n_items_min = 0;
+ {
+ map_t::const_reverse_iterator i;
+ unsigned N = 0;
+ for (i = ncells_with_nitems.rbegin(); i != ncells_with_nitems.rend(); ++i)
+ {
+ unsigned nitems = i->first, ncells = i->second;
+ N += nitems * ncells;
+ if (float(N) > p_cover * float(n_bg))
+ {
+ n_items_min = nitems;
+ break;
+ }
+ }
+ }
+ if (n_items_min == 0)
+ n_items_min = 1; // safety
- // FACADE
+ image3d<bool> bg(q, q, q);
+ {
+ mln_piter_(box3d) c(h_bg.domain());
+ for_all(c)
+ bg(c) = (h_bg(c) >= n_items_min);
+ }
- template <typename L, typename I>
- component_set<L>
- non_text(const document<L>& doc, const Image<I>& input_)
- {
- trace::entering("scribo::primitive::extract::non_text");
- const I& input = exact(input_);
- mln_precondition(doc.is_valid());
- mln_precondition(input.is_valid());
+ // outputing
- const line_set<L>& lines = doc.lines();
+ image2d<bool> output;
+ initialize(output, input);
+ {
+ for (unsigned i = 0; i < nelements; ++i)
+ if (txt.element(i) == true || seps.element(i) == true)
+ output.element(i) = false;
+ else
+ {
+ const value::rgb8& c = input.element(i);
+ output.element(i) = ! bg.at_(c.red() / q_div, c.green() / q_div, c.blue() / q_div);
+ }
+ }
- // Element extraction
+ return output;
+ }
- image2d<value::label_8> img_lbl8;
+
+
+
+
+ inline
+ image2d<bool>
+ cleaning(const image2d<bool>& input, unsigned lambda)
{
- image2d<bool> content;
- initialize(content, input);
- data::fill(content, true);
+ const box2d& dom = input.domain();
+
+ image2d<unsigned> area(dom);
+ image2d<unsigned> parent(dom);
+ image2d<bool> output(dom);
+
+ unsigned max_area = 0;
+
+
+ // 1st pass = bg union-find
+
+ {
+ union_find(input, false, // in
+ parent, area, max_area // out
+ );
+ }
+
+
+ // echo
+ // std::cout << "max_area = " << max_area << std::endl;
+
+
+ // 2nd pass = bg biggest component selection
+
+ {
+ const unsigned nelements = input.nelements();
+ const bool* p_i = input.buffer();
+ bool* p_o = output.buffer();
+ const unsigned* p_a = area.buffer();
+ const unsigned* p_par = parent.buffer();
+
+ for (unsigned i = 0; i < nelements; ++i)
+ {
+ if (*p_i == true)
+ *p_o = true;
+ else
+ {
+ if (*p_par == i)
+ *p_o = (*p_a != max_area);
+ else
+ *p_o = output.element(*p_par);
+ }
+ ++p_i;
+ ++p_o;
+ ++p_a;
+ ++p_par;
+ }
+ }
+
+
+
+ // 3rd pass = fg union-find
- for_all_lines(l, lines)
- if (lines(l).type() == line::Text)
- data::fill((content | lines(l).bbox()).rw(), false);
+ {
+ union_find(output, true, // in
+ parent, area, max_area // out
+ );
+ }
- typedef mln::value::rgb<5> t_rgb5;
- typedef mln::fun::v2v::rgb8_to_rgbn<5> t_rgb8_to_rgb5;
- image2d<t_rgb5>
- img_rgb5 = mln::data::transform(doc.image(), t_rgb8_to_rgb5());
- img_lbl8 =
- mln::clustering::kmean_rgb<double,5>((img_rgb5 | pw::value(content)), 3, 10, 10).unmorph_();
- data::fill((img_lbl8 | !pw::value(content)).rw(), 0u);
+ // 4th pass = cleaning fg
- mln::util::array<unsigned>
- card = mln::labeling::compute(accu::math::count<value::label_8>(),
- img_lbl8, img_lbl8, 3);
+ {
+ const unsigned nelements = input.nelements();
+ bool* p_o = output.buffer();
+ const unsigned* p_a = area.buffer();
+ const unsigned* p_par = parent.buffer();
- unsigned max = 0, bg_id = 0;
- for_all_ncomponents(c, 3)
- if (card(c) > max)
+ for (unsigned i = 0; i < nelements; ++i)
{
- max = card(c);
- bg_id = c;
+ if (*p_o == true)
+ {
+ if (*p_par == i)
+ *p_o = (*p_a > lambda);
+ else
+ *p_o = output.element(*p_par);
+ }
+ ++p_o;
+ ++p_a;
+ ++p_par;
}
+ }
+
- mln::fun::i2v::array<bool> f(4, true);
- f(0) = false;
- f(bg_id) = false;
- labeling::relabel_inplace(img_lbl8, 4, f);
+ return output;
}
+ } // end of namespace scribo::primitive::extract::internal
- component_set<L> output;
- std::cout << "Removing small elements" << std::endl;
- {
- image2d<bool> elts;
- initialize(elts, img_lbl8);
- data::fill(elts, false);
- data::fill((elts | (pw::value(img_lbl8) != pw::cst(0))).rw(), true);
- scribo::def::lbl_type nlabels;
- elts = filter::components_small(elts, c8(), nlabels, 40);
+ // FACADE
+
+ template <typename L>
+ component_set<L>
+ non_text(const document<L>& doc, unsigned nlines)
+ {
+ trace::entering("scribo::primitive::extract::non_text");
+
- output = primitive::extract::components(elts, c8(), nlabels);
+ util::timer t;
+ t.start();
+
+ mln_precondition(doc.is_valid());
+
+ mln_precondition(doc.has_line_seps());
+ mln_precondition(doc.has_text());
+
+ // FIXME: Do these images exist elsewhere?
+ image2d<bool>
+ txt = make::text_components_image(doc),
+ txtblocks = make::text_blocks_image(doc, nlines);
+
+ unsigned nbits = 5;
+ float p = 0.9998; // 0.80 <= x < 1.0
+ unsigned lambda = 1000;
+
+ // enlarge the text mask so that "not txt" does not include
+ // any text pixel
+ txt = morpho::elementary::dilation(txt, c8());
+ txt = morpho::elementary::dilation(txt, c4());
+
+ // FIXME: Make it faster?
+ data::fill((txtblocks | pw::value(txt)).rw(), false);
+
+ // Debug
+ {
+ debug::logger().log_image(debug::Special,//debug::AuxiliaryResults,
+ txt, "txt_components");
+ debug::logger().log_image(debug::Special,//debug::AuxiliaryResults,
+ txtblocks, "txt_blocks");
}
+ image2d<bool>
+ element_image = internal::learn(doc, txt, txtblocks, nbits, p);
+ element_image = internal::cleaning(element_image, lambda);
- std::cout << "Ignoring inner elements" << std::endl;
+ mln_value(L) ncomps;
+ component_set<L>
+ elements = primitive::extract::components(element_image,
+ c8(), ncomps);
+ // Debug
{
- // FIXME: We would like to use the convex hull instead of the bbox.
- internal::order_bbox<L> func(output);
- util::array<unsigned> box_ordered_comps;
- for (unsigned i = 1; i < output.nelements(); ++i)
- box_ordered_comps.append(i);
- std::sort(box_ordered_comps.hook_std_vector_().begin(),
- box_ordered_comps.hook_std_vector_().end(), func);
-
- image2d<bool> merged_elts;
- initialize(merged_elts, img_lbl8);
- data::fill(merged_elts, false);
- for (unsigned i = 0; i < box_ordered_comps.nelements(); ++i)
- {
- unsigned c = box_ordered_comps(i);
- point2d
- pminright = output(c).bbox().pmin(),
- pmaxleft = output(c).bbox().pmax();
- pminright.col() = output(c).bbox().pmax().col();
- pmaxleft.col() = output(c).bbox().pmin().col();
-
- if (merged_elts(output(c).bbox().pmin())
- && merged_elts(output(c).bbox().pmax())
- && merged_elts(pminright)
- && merged_elts(pmaxleft))
- output(c).update_tag(component::Ignored);
- else
- mln::draw::box_plain(merged_elts, output(c).bbox(), true);
- }
+ debug::logger().log_image(debug::Results,
+ elements.labeled_image(),
+ "non_text_components");
}
trace::exiting("scribo::primitive::extract::non_text");
- return output;
+ return elements;
}
# endif // ! MLN_INCLUDE_ONLY
diff --git a/scribo/scribo/primitive/extract/non_text.hh b/scribo/scribo/primitive/extract/non_text_kmean.hh
similarity index 91%
copy from scribo/scribo/primitive/extract/non_text.hh
copy to scribo/scribo/primitive/extract/non_text_kmean.hh
index 8528782..c76ce11 100644
--- a/scribo/scribo/primitive/extract/non_text.hh
+++ b/scribo/scribo/primitive/extract/non_text_kmean.hh
@@ -30,8 +30,8 @@
///
/// \fixme To be optimized!
-#ifndef SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_HH
-# define SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_HH
+#ifndef SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_KMEAN_HH
+# define SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_KMEAN_HH
# include <mln/core/image/image2d.hh>
# include <mln/core/alias/neighb2d.hh>
@@ -72,7 +72,7 @@ namespace scribo
template <typename L, typename I>
component_set<L>
- non_text(const document<L>& doc, const Image<I>& input);
+ non_text_kmean(const document<L>& doc, const Image<I>& input);
# ifndef MLN_INCLUDE_ONLY
@@ -107,9 +107,9 @@ namespace scribo
template <typename L, typename I>
component_set<L>
- non_text(const document<L>& doc, const Image<I>& input_)
+ non_text_kmean(const document<L>& doc, const Image<I>& input_)
{
- trace::entering("scribo::primitive::extract::non_text");
+ trace::entering("scribo::primitive::extract::non_text_kmean");
const I& input = exact(input_);
mln_precondition(doc.is_valid());
@@ -136,7 +136,8 @@ namespace scribo
img_rgb5 = mln::data::transform(doc.image(), t_rgb8_to_rgb5());
img_lbl8 =
- mln::clustering::kmean_rgb<double,5>((img_rgb5 | pw::value(content)), 3, 10, 10).unmorph_();
+ mln::clustering::kmean_rgb<double,5>(
+ (img_rgb5 | pw::value(content)), 3, 10, 10).unmorph_();
data::fill((img_lbl8 | !pw::value(content)).rw(), 0u);
mln::util::array<unsigned>
@@ -207,7 +208,7 @@ namespace scribo
}
}
- trace::exiting("scribo::primitive::extract::non_text");
+ trace::exiting("scribo::primitive::extract::non_text_kmean");
return output;
}
@@ -220,4 +221,4 @@ namespace scribo
} // end of namespace scribo
-#endif // ! SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_HH
+#endif // ! SCRIBO_PRIMITIVE_EXTRACT_NON_TEXT_KMEAN_HH
--
1.5.6.5
1
0

14 Mar '11
* scribo/fun/v2v/highlight.hh,
* scribo/io/img/internal/debug_img_visitor.hh,
* scribo/io/img/internal/draw_edges.hh,
* scribo/io/img/internal/full_img_visitor.hh,
* scribo/io/img/internal/non_text_img_visitor.hh,
* scribo/io/img/internal/text_img_visitor.hh,
* scribo/io/img/save.hh: New.
---
scribo/ChangeLog | 12 +
.../serializable.hh => fun/v2v/highlight.hh} | 57 +++--
scribo/scribo/io/img/internal/debug_img_visitor.hh | 263 ++++++++++++++++++++
.../internal/draw_edges.hh} | 62 +++---
scribo/scribo/io/img/internal/full_img_visitor.hh | 194 ++++++++++++++
.../scribo/io/img/internal/non_text_img_visitor.hh | 162 ++++++++++++
scribo/scribo/io/img/internal/text_img_visitor.hh | 164 ++++++++++++
scribo/scribo/io/img/save.hh | 242 ++++++++++++++++++
8 files changed, 1104 insertions(+), 52 deletions(-)
copy scribo/scribo/{core/concept/serializable.hh => fun/v2v/highlight.hh} (66%)
create mode 100644 scribo/scribo/io/img/internal/debug_img_visitor.hh
copy scribo/scribo/io/{xml/internal/print_box_coords.hh => img/internal/draw_edges.hh} (56%)
create mode 100644 scribo/scribo/io/img/internal/full_img_visitor.hh
create mode 100644 scribo/scribo/io/img/internal/non_text_img_visitor.hh
create mode 100644 scribo/scribo/io/img/internal/text_img_visitor.hh
create mode 100644 scribo/scribo/io/img/save.hh
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index 5b96508..43041fe 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,5 +1,17 @@
2011-03-14 Guillaume Lazzara <z(a)lrde.epita.fr>
+ Introduce result image output.
+
+ * scribo/fun/v2v/highlight.hh,
+ * scribo/io/img/internal/debug_img_visitor.hh,
+ * scribo/io/img/internal/draw_edges.hh,
+ * scribo/io/img/internal/full_img_visitor.hh,
+ * scribo/io/img/internal/non_text_img_visitor.hh,
+ * scribo/io/img/internal/text_img_visitor.hh,
+ * scribo/io/img/save.hh: New.
+
+2011-03-14 Guillaume Lazzara <z(a)lrde.epita.fr>
+
Save image edge coordinates in XML output.
* scribo/io/xml/internal/extended_page_xml_visitor.hh: Here.
diff --git a/scribo/scribo/core/concept/serializable.hh b/scribo/scribo/fun/v2v/highlight.hh
similarity index 66%
copy from scribo/scribo/core/concept/serializable.hh
copy to scribo/scribo/fun/v2v/highlight.hh
index 6e661a6..835be59 100644
--- a/scribo/scribo/core/concept/serializable.hh
+++ b/scribo/scribo/fun/v2v/highlight.hh
@@ -23,42 +23,55 @@
// exception does not however invalidate any other reasons why the
// executable file might be covered by the GNU General Public License.
-#ifndef SCRIBO_CORE_CONCEPT_SERIALIZABLE_HH
-# define SCRIBO_CORE_CONCEPT_SERIALIZABLE_HH
+#ifndef SCRIBO_FUN_V2V_HIGHLIGHT_HH
+# define SCRIBO_FUN_V2V_HIGHLIGHT_HH
-/// \file
-///
-/// Concept for serializer visitors.
+/// \brief Function increasing values to highlight areas.
-# include <mln/core/concept/object.hh>
-# include <scribo/core/concept/serialize_visitor.hh>
+# include <mln/core/concept/function.hh>
namespace scribo
{
- /// \brief Link functor concept.
- template <typename E>
- class Serializable : public mln::Object<E>
+ namespace fun
{
- public:
- template <typename E2>
- void accept(const SerializeVisitor<E2>& visitor) const;
- };
+
+ namespace v2v
+ {
+
+ template <typename R>
+ struct highlight
+ : mln::Function_v2v<highlight<R> >
+ {
+ typedef R result;
+
+ highlight();
+ result operator()(const result& v) const;
+ };
# ifndef MLN_INCLUDE_ONLY
- template <typename E>
- template <typename E2>
- void
- Serializable<E>::accept(const SerializeVisitor<E2>& visitor) const
- {
- exact(visitor).visit(exact(*this));
- }
+
+ template <typename R>
+ highlight<R>::highlight()
+ {
+ }
+
+
+ template <typename R>
+ typename highlight<R>::result
+ highlight<R>::operator()(const result& v) const
+ {
+ return v + v / 2;
+ }
# endif // ! MLN_INCLUDE_ONLY
+ } // end of namespace scribo::fun::v2v
+
+ } // end of namespace scribo::fun
} // end of namespace scribo
-#endif // SCRIBO_CORE_CONCEPT_SERIALIZABLE_HH
+#endif // ! SCRIBO_FUN_V2V_HIGHLIGHT_HH
diff --git a/scribo/scribo/io/img/internal/debug_img_visitor.hh b/scribo/scribo/io/img/internal/debug_img_visitor.hh
new file mode 100644
index 0000000..f1c689d
--- /dev/null
+++ b/scribo/scribo/io/img/internal/debug_img_visitor.hh
@@ -0,0 +1,263 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef SCRIBO_IO_IMG_INTERNAL_DEBUG_IMG_VISITOR_HH
+# define SCRIBO_IO_IMG_INTERNAL_DEBUG_IMG_VISITOR_HH
+
+/// \file
+///
+/// Save document image analysis to a small image.
+
+# include <fstream>
+
+# include <mln/core/image/image2d.hh>
+# include <mln/value/rgb8.hh>
+# include <mln/draw/box.hh>
+# include <mln/subsampling/antialiased.hh>
+# include <mln/morpho/elementary/gradient_external.hh>
+
+# include <scribo/core/internal/doc_serializer.hh>
+# include <scribo/core/document.hh>
+# include <scribo/core/paragraph_set.hh>
+# include <scribo/core/line_info.hh>
+
+# include <scribo/io/img/internal/draw_edges.hh>
+
+namespace scribo
+{
+
+ namespace io
+ {
+
+ namespace img
+ {
+
+ namespace internal
+ {
+
+
+ class debug_img_visitor : public doc_serializer<debug_img_visitor>
+ {
+ public:
+ // Constructor
+ debug_img_visitor(mln::image2d<value::rgb8>& out,
+ unsigned output_ratio);
+
+ // Visit overloads
+ template <typename L>
+ void visit(const document<L>& doc) const;
+
+ void visit(const component_info& info) const;
+
+ template <typename L>
+ void visit(const paragraph_set<L>& parset) const;
+
+ template <typename L>
+ void visit(const line_info<L>& line) const;
+
+ private: // Attributes
+ mln::image2d<value::rgb8>& output;
+ unsigned output_ratio;
+
+ mutable image2d<scribo::def::lbl_type> elt_edge;
+
+
+ private: // Methods
+ box2d compute_bbox(const box2d& b) const;
+ };
+
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+
+ inline
+ box2d
+ debug_img_visitor::compute_bbox(const box2d& b) const
+ {
+ point2d
+ pmin = b.pmin() / output_ratio,
+ pmax = b.pmax() / output_ratio;
+
+ return box2d(pmin, pmax);
+ }
+
+
+ inline
+ debug_img_visitor::debug_img_visitor(mln::image2d<value::rgb8>& out,
+ unsigned output_ratio)
+ : output(out), output_ratio(output_ratio)
+ {
+ mln_assertion(output.is_valid());
+ }
+
+
+ /// Document
+ //
+ template <typename L>
+ void
+ debug_img_visitor::visit(const document<L>& doc) const
+ {
+ // Text
+ if (doc.has_text())
+ doc.paragraphs().accept(*this);
+
+ // Page elements (Pictures, ...)
+ if (doc.has_elements())
+ {
+ // Prepare element edges
+
+ // FIXME: UGLY! Too slow!
+ scribo::def::lbl_type nlabels;
+ component_set<L> elts = primitive::extract::components(
+ data::convert(bool(), mln::subsampling::antialiased(doc.elements().labeled_image(),
+ output_ratio)),
+ c8(),
+ nlabels);
+
+ // Preserving elements tags
+ if (doc.elements().nelements() != elts.nelements())
+ {
+ std::cerr << "Warnig: could not preserve element type in "
+ << "img debug output." << std::endl;
+ std::cerr << "The number of non text element has changed while "
+ << "subsampling images : "
+ << doc.elements().nelements() << " vs "
+ << elts.nelements() << std::endl;
+ }
+ else
+ for_all_comps(c, doc.elements())
+ elts(c).update_type(doc.elements()(c).type());
+
+ elt_edge = morpho::elementary::gradient_external(elts.labeled_image(), c8());
+
+// const component_set<L>& elts = doc.elements();
+ for_all_comps(e, elts)
+ if (elts(e).is_valid())
+ elts(e).accept(*this);
+ }
+
+
+ // line seraparators
+ if (doc.has_vline_seps())
+ for_all_comps(c, doc.vline_seps_comps())
+ doc.vline_seps_comps()(c).accept(*this);
+ if (doc.has_hline_seps())
+ for_all_comps(c, doc.hline_seps_comps())
+ doc.hline_seps_comps()(c).accept(*this);
+
+ }
+
+
+ /// Component_info
+ //
+ inline
+ void
+ debug_img_visitor::visit(const component_info& info) const
+ {
+ switch (info.type())
+ {
+ case component::LineSeparator:
+ {
+ mln::draw::box(output, compute_bbox(info.bbox()),
+ literal::cyan);
+ }
+ break;
+
+
+ default:
+ case component::Image:
+ {
+ // The bbox does not need to be reajusted to the
+ // subsampled domain since it has been recomputed while
+ // computing the edge image.
+ //
+ // However, the bbox must be enlarged since only the
+ // _external_ edge is computed.
+ box2d b = info.bbox();
+ b.enlarge(1);
+ b.crop_wrt(output.domain());
+ data::fill(((output | b).rw()
+ | (pw::value(elt_edge)
+ == pw::cst((scribo::def::lbl_type)info.id().to_equiv()))).rw(),
+ literal::orange);
+ }
+ break;
+ }
+ }
+
+ /// Paragraph Set
+ //
+ template <typename L>
+ void
+ debug_img_visitor::visit(const paragraph_set<L>& parset) const
+ {
+ const line_set<L>& lines = parset.lines();
+
+ for_all_paragraphs(p, parset)
+ {
+ const mln::util::array<line_id_t>& line_ids = parset(p).line_ids();
+
+ for_all_paragraph_lines(lid, line_ids)
+ {
+ line_id_t l = line_ids(lid);
+ lines(l).accept(*this);
+ }
+
+ box2d b = compute_bbox(parset(p).bbox());
+ b.enlarge(1);
+ b.crop_wrt(output.domain());
+ mln::draw::box(output, b, literal::blue);
+ }
+ }
+
+
+ template <typename L>
+ void
+ debug_img_visitor::visit(const line_info<L>& line) const
+ {
+ point2d
+ pmin = line.bbox().pmin(),
+ pmax = line.bbox().pmax();
+ pmax.row() = line.baseline();
+ pmin.row() = line.baseline();
+
+ pmin = pmin / output_ratio;
+ pmax = pmax / output_ratio;
+
+ mln::draw::line(output, pmin, pmax, literal::red);
+ }
+
+#endif // MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::io::img::internal
+
+ } // end of namespace scribo::io::img
+
+ } // end of namespace scribo::io
+
+} // end of namespace scribo
+
+#endif // SCRIBO_IO_IMG_INTERNAL_DEBUG_IMG_VISITOR_HH
diff --git a/scribo/scribo/io/xml/internal/print_box_coords.hh b/scribo/scribo/io/img/internal/draw_edges.hh
similarity index 56%
copy from scribo/scribo/io/xml/internal/print_box_coords.hh
copy to scribo/scribo/io/img/internal/draw_edges.hh
index ad84709..664a352 100644
--- a/scribo/scribo/io/xml/internal/print_box_coords.hh
+++ b/scribo/scribo/io/img/internal/draw_edges.hh
@@ -23,15 +23,26 @@
// exception does not however invalidate any other reasons why the
// executable file might be covered by the GNU General Public License.
-#ifndef SCRIBO_IO_XML_INTERNAL_PRINT_BOX_COORDS_HH
-# define SCRIBO_IO_XML_INTERNAL_PRINT_BOX_COORDS_HH
+#ifndef SCRIBO_IO_IMG_INTERNAL_DRAW_EDGES_HH
+# define SCRIBO_IO_IMG_INTERNAL_DRAW_EDGES_HH
/// \file
///
-/// \brief Prints box2d coordinates to XML data.
+/// \brief Draw object edges.
# include <fstream>
-# include <mln/core/alias/box2d.hh>
+# include <mln/core/image/image2d.hh>
+# include <mln/core/image/dmorph/image_if.hh>
+# include <mln/pw/all.hh>
+# include <mln/data/fill.hh>
+# include <mln/data/convert.hh>
+# include <mln/literal/colors.hh>
+# include <mln/value/rgb8.hh>
+# include <mln/morpho/elementary/gradient_external.hh>
+
+# include <scribo/core/component_info.hh>
+# include <scribo/core/def/lbl_type.hh>
+
namespace scribo
{
@@ -39,7 +50,7 @@ namespace scribo
namespace io
{
- namespace xml
+ namespace img
{
namespace internal
@@ -47,11 +58,12 @@ namespace scribo
using namespace mln;
- /*! \brief Prints box2d coordinates to XML data.
+ /*! \brief Draw component edges.
*/
void
- print_box_coords(std::ofstream& ostr, const box2d& b,
- const char *space);
+ draw_edges(const component_info& info,
+ image2d<value::rgb8>& output, const value::rgb8& value,
+ const image2d<scribo::def::lbl_type>& edges);
# ifndef MLN_INCLUDE_ONLY
@@ -59,37 +71,27 @@ namespace scribo
inline
void
- print_box_coords(std::ofstream& ostr, const box2d& b,
- const char *space)
+ draw_edges(const component_info& info,
+ image2d<value::rgb8>& output, const value::rgb8& value,
+ const image2d<scribo::def::lbl_type>& edges)
{
- std::string sc = space;
- std::string sp = sc + " ";
- ostr << sc << "<coords>" << std::endl
- << sp << "<point x=\"" << b.pmin().col()
- << "\" y=\"" << b.pmin().row() << "\"/>"
- << std::endl
- << sp << "<point x=\"" << b.pmax().col()
- << "\" y=\"" << b.pmin().row() << "\"/>"
- << std::endl
- << sp << "<point x=\"" << b.pmax().col()
- << "\" y=\"" << b.pmax().row() << "\"/>"
- << std::endl
- << sp << "<point x=\"" << b.pmin().col()
- << "\" y=\"" << b.pmax().row() << "\"/>"
- << std::endl
- << sc << "</coords>" << std::endl;
-
+ box2d b = info.bbox();
+ b.enlarge(1);
+ data::fill(((output | b).rw()
+ | (pw::value(edges)
+ == pw::cst((scribo::def::lbl_type)info.id().to_equiv()))).rw(),
+ value);
}
# endif // ! MLN_INCLUDE_ONLY
- } // end of namespace scribo::io::xml::internal
+ } // end of namespace scribo::io::img::internal
- } // end of namespace scribo::io::xml
+ } // end of namespace scribo::io::img
} // end of namespace scribo::io
} // end of namespace scribo
-#endif // ! SCRIBO_IO_XML_INTERNAL_PRINT_BOX_COORDS_HH
+#endif // ! SCRIBO_IO_IMG_INTERNAL_DRAW_EDGES_HH
diff --git a/scribo/scribo/io/img/internal/full_img_visitor.hh b/scribo/scribo/io/img/internal/full_img_visitor.hh
new file mode 100644
index 0000000..30987db
--- /dev/null
+++ b/scribo/scribo/io/img/internal/full_img_visitor.hh
@@ -0,0 +1,194 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef SCRIBO_IO_IMG_INTERNAL_FULL_IMG_VISITOR_HH
+# define SCRIBO_IO_IMG_INTERNAL_FULL_IMG_VISITOR_HH
+
+/// \file
+///
+/// Save document image analysis to an image.
+
+# include <fstream>
+
+# include <mln/core/image/image2d.hh>
+# include <mln/value/rgb8.hh>
+# include <mln/draw/box.hh>
+
+# include <scribo/core/internal/doc_serializer.hh>
+# include <scribo/core/document.hh>
+# include <scribo/core/paragraph_set.hh>
+# include <scribo/core/line_info.hh>
+
+# include <scribo/io/img/internal/draw_edges.hh>
+
+namespace scribo
+{
+
+ namespace io
+ {
+
+ namespace img
+ {
+
+ namespace internal
+ {
+
+
+ class full_img_visitor : public doc_serializer<full_img_visitor>
+ {
+ public:
+ // Constructor
+ full_img_visitor(mln::image2d<value::rgb8>& out);
+
+ // Visit overloads
+ template <typename L>
+ void visit(const document<L>& doc) const;
+
+ void visit(const component_info& info) const;
+
+ template <typename L>
+ void visit(const paragraph_set<L>& parset) const;
+
+ template <typename L>
+ void visit(const line_info<L>& line) const;
+
+ private: // Attributes
+ mln::image2d<value::rgb8>& output;
+
+ mutable image2d<scribo::def::lbl_type> elt_edge;
+ };
+
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+
+ inline
+ full_img_visitor::full_img_visitor(mln::image2d<value::rgb8>& out)
+ : output(out)
+ {
+ mln_assertion(output.is_valid());
+ }
+
+
+ /// Document
+ //
+ template <typename L>
+ void
+ full_img_visitor::visit(const document<L>& doc) const
+ {
+ // Text
+ if (doc.has_text())
+ doc.paragraphs().accept(*this);
+
+ // Page elements (Pictures, ...)
+ if (doc.has_elements())
+ {
+ // Prepare element edges
+ elt_edge = morpho::elementary::gradient_external(doc.elements().labeled_image(), c8());
+
+ const component_set<L>& elts = doc.elements();
+ for_all_comps(e, elts)
+ if (elts(e).is_valid())
+ elts(e).accept(*this);
+ }
+
+
+ // line seraparators
+ if (doc.has_vline_seps())
+ for_all_comps(c, doc.vline_seps_comps())
+ doc.vline_seps_comps()(c).accept(*this);
+ if (doc.has_hline_seps())
+ for_all_comps(c, doc.hline_seps_comps())
+ doc.hline_seps_comps()(c).accept(*this);
+
+ }
+
+
+ /// Component_info
+ //
+ inline
+ void
+ full_img_visitor::visit(const component_info& info) const
+ {
+ switch (info.type())
+ {
+ case component::LineSeparator:
+ {
+ mln::draw::box(output, info.bbox(), literal::cyan);
+ }
+ break;
+
+
+ default:
+ case component::Image:
+ {
+ draw_edges(info, output, literal::orange, elt_edge);
+ }
+ break;
+ }
+ }
+
+ /// Paragraph Set
+ //
+ template <typename L>
+ void
+ full_img_visitor::visit(const paragraph_set<L>& parset) const
+ {
+ const line_set<L>& lines = parset.lines();
+
+ for_all_paragraphs(p, parset)
+ {
+ const mln::util::array<line_id_t>& line_ids = parset(p).line_ids();
+
+ for_all_paragraph_lines(lid, line_ids)
+ {
+ line_id_t l = line_ids(lid);
+ lines(l).accept(*this);
+ }
+
+ mln::draw::box(output, parset(p).bbox(), literal::blue);
+ }
+ }
+
+
+ template <typename L>
+ void
+ full_img_visitor::visit(const line_info<L>& line) const
+ {
+ mln::draw::box(output, line.bbox(), literal::red);
+ }
+
+#endif // MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::io::img::internal
+
+ } // end of namespace scribo::io::img
+
+ } // end of namespace scribo::io
+
+} // end of namespace scribo
+
+#endif // SCRIBO_IO_IMG_INTERNAL_FULL_IMG_VISITOR_HH
diff --git a/scribo/scribo/io/img/internal/non_text_img_visitor.hh b/scribo/scribo/io/img/internal/non_text_img_visitor.hh
new file mode 100644
index 0000000..24b027e
--- /dev/null
+++ b/scribo/scribo/io/img/internal/non_text_img_visitor.hh
@@ -0,0 +1,162 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef SCRIBO_IO_IMG_INTERNAL_NON_TEXT_IMG_VISITOR_HH
+# define SCRIBO_IO_IMG_INTERNAL_NON_TEXT_IMG_VISITOR_HH
+
+/// \file
+///
+/// \brief Save non-text information as an image.
+
+# include <fstream>
+
+
+# include <mln/core/image/image2d.hh>
+# include <mln/value/rgb8.hh>
+# include <mln/draw/box.hh>
+
+# include <scribo/core/internal/doc_serializer.hh>
+# include <scribo/io/img/internal/draw_edges.hh>
+
+namespace scribo
+{
+
+ namespace io
+ {
+
+ namespace img
+ {
+
+ namespace internal
+ {
+
+ /*! \brief Save non-text information as an image.
+ */
+ class non_text_img_visitor : public doc_serializer<non_text_img_visitor>
+ {
+ public:
+ // Constructor
+ non_text_img_visitor(mln::image2d<value::rgb8>& out);
+
+ // Visit overloads
+ template <typename L>
+ void visit(const document<L>& doc) const;
+
+ template <typename L>
+ void visit(const component_set<L>& comp_set) const;
+
+ void visit(const component_info& info) const;
+
+ private: // Attributes
+ mln::image2d<value::rgb8>& output;
+
+ mutable image2d<scribo::def::lbl_type> elt_edge;
+ };
+
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+
+ inline
+ non_text_img_visitor::non_text_img_visitor(mln::image2d<value::rgb8>& out)
+ : output(out)
+ {
+ mln_assertion(output.is_valid());
+ }
+
+
+
+ /// Document
+ //
+ template <typename L>
+ void
+ non_text_img_visitor::visit(const document<L>& doc) const
+ {
+ // Page elements (Pictures, ...)
+ if (doc.has_elements())
+ {
+ // Prepare element edges
+ elt_edge = morpho::elementary::gradient_external(doc.elements().labeled_image(), c8());
+
+ doc.elements().accept(*this);
+ }
+
+ // line seraparators
+ if (doc.has_hline_seps())
+ doc.hline_seps_comps().accept(*this);
+ if (doc.has_vline_seps())
+ doc.vline_seps_comps().accept(*this);
+ }
+
+
+ /// Component Set
+ //
+ template <typename L>
+ void
+ non_text_img_visitor::visit(const component_set<L>& comp_set) const
+ {
+ for_all_comps(c, comp_set)
+ if (comp_set(c).is_valid())
+ comp_set(c).accept(*this);
+ }
+
+
+ /// Component_info
+ //
+ inline
+ void
+ non_text_img_visitor::visit(const component_info& info) const
+ {
+ switch (info.type())
+ {
+ case component::LineSeparator:
+ {
+ mln::draw::box(output, info.bbox(), literal::cyan);
+ }
+ break;
+
+
+ default:
+ case component::Image:
+ {
+ draw_edges(info, output, literal::orange, elt_edge);
+ }
+ break;
+ }
+ }
+
+
+#endif // MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::io::img::internal
+
+ } // end of namespace scribo::io::img
+
+ } // end of namespace scribo::io
+
+} // end of namespace scribo
+
+#endif // SCRIBO_IO_IMG_INTERNAL_NON_TEXT_IMG_VISITOR_HH
diff --git a/scribo/scribo/io/img/internal/text_img_visitor.hh b/scribo/scribo/io/img/internal/text_img_visitor.hh
new file mode 100644
index 0000000..7629bf7
--- /dev/null
+++ b/scribo/scribo/io/img/internal/text_img_visitor.hh
@@ -0,0 +1,164 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef SCRIBO_IO_IMG_INTERNAL_TEXT_IMG_VISITOR_HH
+# define SCRIBO_IO_IMG_INTERNAL_TEXT_IMG_VISITOR_HH
+
+/// \file
+///
+/// Image output with text related information.
+
+# include <fstream>
+
+# include <mln/core/image/image2d.hh>
+# include <mln/value/rgb8.hh>
+# include <mln/draw/box.hh>
+# include <mln/literal/colors.hh>
+
+# include <scribo/core/internal/doc_serializer.hh>
+# include <scribo/core/document.hh>
+# include <scribo/core/component_set.hh>
+# include <scribo/core/paragraph_set.hh>
+# include <scribo/core/line_info.hh>
+
+namespace scribo
+{
+
+ namespace io
+ {
+
+ namespace img
+ {
+
+ namespace internal
+ {
+
+
+ class text_img_visitor : public doc_serializer<text_img_visitor>
+ {
+ public:
+ // Constructor
+ text_img_visitor(mln::image2d<value::rgb8>&);
+
+ // Visit overloads
+ template <typename L>
+ void visit(const document<L>& doc) const;
+
+ template <typename L>
+ void visit(const component_set<L>& comp_set) const;
+
+ template <typename L>
+ void visit(const paragraph_set<L>& parset) const;
+
+ template <typename L>
+ void visit(const line_info<L>& line) const;
+
+ private: // Attributes
+ mln::image2d<value::rgb8>& output;
+ };
+
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+ inline
+ text_img_visitor::text_img_visitor(mln::image2d<value::rgb8>& out)
+ : output(out)
+ {
+ mln_assertion(output.is_valid());
+ }
+
+
+
+ /// Document
+ //
+ template <typename L>
+ void
+ text_img_visitor::visit(const document<L>& doc) const
+ {
+ mln_precondition(doc.is_open());
+
+ // Prepare output image.
+ output = duplicate(doc.image());
+
+
+ // Text
+ if (doc.has_text())
+ doc.paragraphs().accept(*this);
+ }
+
+ /// Component Set
+ //
+ template <typename L>
+ void
+ text_img_visitor::visit(const component_set<L>& comp_set) const
+ {
+ for_all_comps(c, comp_set)
+ if (comp_set(c).is_valid())
+ comp_set(c).accept(*this);
+ }
+
+
+ /// Paragraph Set
+ //
+ template <typename L>
+ void
+ text_img_visitor::visit(const paragraph_set<L>& parset) const
+ {
+ const line_set<L>& lines = parset.lines();
+
+ for_all_paragraphs(p, parset)
+ {
+ const mln::util::array<line_id_t>& line_ids = parset(p).line_ids();
+
+ for_all_paragraph_lines(lid, line_ids)
+ {
+ line_id_t l = line_ids(lid);
+ lines(l).accept(*this);
+ }
+
+ mln::draw::box(output, parset(p).bbox(), literal::blue);
+ }
+ }
+
+
+ template <typename L>
+ void
+ text_img_visitor::visit(const line_info<L>& line) const
+ {
+ mln::draw::box(output, line.bbox(), literal::red);
+ }
+
+#endif // MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::io::img::internal
+
+ } // end of namespace scribo::io::img
+
+ } // end of namespace scribo::io
+
+} // end of namespace scribo
+
+#endif // SCRIBO_IO_IMG_INTERNAL_TEXT_IMG_VISITOR_HH
diff --git a/scribo/scribo/io/img/save.hh b/scribo/scribo/io/img/save.hh
new file mode 100644
index 0000000..56300d6
--- /dev/null
+++ b/scribo/scribo/io/img/save.hh
@@ -0,0 +1,242 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef SCRIBO_IO_IMG_SAVE_HH
+# define SCRIBO_IO_IMG_SAVE_HH
+
+/// \file
+///
+/// \brief Save document information as an image.
+
+# include <libgen.h>
+# include <fstream>
+# include <sstream>
+
+# include <map>
+
+# include <mln/core/image/image2d.hh>
+# include <mln/value/rgb8.hh>
+# include <mln/io/magick/save.hh>
+# include <mln/subsampling/antialiased.hh>
+# include <mln/data/transform_inplace.hh>
+
+# include <scribo/core/document.hh>
+
+# include <scribo/io/img/internal/text_img_visitor.hh>
+# include <scribo/io/img/internal/non_text_img_visitor.hh>
+# include <scribo/io/img/internal/full_img_visitor.hh>
+# include <scribo/io/img/internal/debug_img_visitor.hh>
+
+
+namespace scribo
+{
+
+ namespace io
+ {
+
+ namespace img
+ {
+
+ /*! \brief Supported image formats
+
+ Text : Display text regions.
+
+ NonText : Display non-text regions.
+
+ Full : Text and non-text regions.
+ */
+ enum Format
+ {
+ Text,
+ NonText,
+ Full,
+ DebugWoImage,
+ DebugWithImage,
+ };
+
+
+ /*! \brief Save document information as an image
+
+ */
+ template <typename L>
+ void
+ save(const document<L>& doc, const std::string& output_name,
+ Format format);
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+
+ namespace internal
+ {
+
+ struct highlight_mask
+ : Function_v2v<highlight_mask>
+ {
+ typedef value::rgb8 result;
+
+ highlight_mask(float ratio)
+ : ratio(ratio)
+ {
+ }
+
+ result operator()(const result& v) const
+ {
+ result out = v;
+ out.red() = std::min(out.red() * ratio, 255.f);
+ out.green() = std::min(out.green() * ratio, 255.f);
+ out.blue() = std::min(out.blue() * ratio, 255.f);
+ return out;
+ }
+
+ float ratio;
+ };
+
+
+ template <typename L>
+ mln::image2d<value::rgb8>
+ save_text(const document<L>& doc)
+ {
+ mln_precondition(doc.is_valid());
+ mln::image2d<value::rgb8> output = duplicate(doc.image());
+ scribo::io::img::internal::text_img_visitor f(output);
+ doc.accept(f);
+ return output;
+ }
+
+ template <typename L>
+ mln::image2d<value::rgb8>
+ save_non_text(const document<L>& doc)
+ {
+ mln_precondition(doc.is_valid());
+ mln::image2d<value::rgb8> output = duplicate(doc.image());
+ scribo::io::img::internal::non_text_img_visitor f(output);
+ doc.accept(f);
+ return output;
+ }
+
+ template <typename L>
+ mln::image2d<value::rgb8>
+ save_full(const document<L>& doc)
+ {
+ mln_precondition(doc.is_valid());
+ mln::image2d<value::rgb8> output = duplicate(doc.image());
+ scribo::io::img::internal::full_img_visitor f(output);
+ doc.accept(f);
+ return output;
+ }
+
+ template <typename L>
+ mln::image2d<value::rgb8>
+ save_debug_without_image(const document<L>& doc)
+ {
+ mln_precondition(doc.is_valid());
+ mln::image2d<value::rgb8>
+ output(box2d(doc.image().domain().pmin() / 4,
+ doc.image().domain().pmax() / 4));
+ data::fill(output, literal::black);
+ scribo::io::img::internal::debug_img_visitor f(output, 4);
+ doc.accept(f);
+ return output;
+ }
+
+ template <typename L>
+ mln::image2d<value::rgb8>
+ save_debug_with_image(const document<L>& doc)
+ {
+ mln_precondition(doc.is_valid());
+ mln::image2d<value::rgb8>
+ output = mln::subsampling::antialiased(doc.image(), 4);
+ internal::highlight_mask highlight(0.5f);
+ data::transform_inplace(output, highlight);
+ scribo::io::img::internal::debug_img_visitor f(output, 4);
+ doc.accept(f);
+ return output;
+ }
+
+ } // end of namespace scribo::io::img::internal
+
+
+
+ // FACADE
+
+ template <typename L>
+ void
+ save(const document<L>& doc,
+ const std::string& output_name,
+ Format format)
+ {
+ trace::entering("scribo::io::img::save");
+
+ mln_precondition(doc.is_open());
+
+ mln::image2d<value::rgb8> output;
+
+ // Choose saving method.
+ switch (format)
+ {
+ case Text:
+ output = internal::save_text(doc);
+ break;
+
+ case NonText:
+ output = internal::save_non_text(doc);
+ break;
+
+ case Full:
+ output = internal::save_full(doc);
+ break;
+
+ case DebugWoImage:
+ output = internal::save_debug_without_image(doc);
+ break;
+
+ case DebugWithImage:
+ output = internal::save_debug_with_image(doc);
+ break;
+
+ default:
+ trace::warning("scribo::io::img::save - "
+ "Invalid image format! Skip saving...");
+ trace::exiting("scribo::io::img::save");
+ return;
+ }
+
+ mln::io::magick::save(output, output_name.c_str());
+
+ trace::exiting("scribo::io::img::save");
+ }
+
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::io::img
+
+ } // end of namespace scribo::io
+
+} // end of namespace scribo
+
+
+#endif // ! SCRIBO_IO_IMG_SAVE_HH
--
1.5.6.5
1
0

14 Mar '11
* scribo/fun/v2v/highlight.hh,
* scribo/io/img/internal/debug_img_visitor.hh,
* scribo/io/img/internal/draw_edges.hh,
* scribo/io/img/internal/full_img_visitor.hh,
* scribo/io/img/internal/non_text_img_visitor.hh,
* scribo/io/img/internal/text_img_visitor.hh,
* scribo/io/img/save.hh: New.
---
scribo/ChangeLog | 12 +
.../serializable.hh => fun/v2v/highlight.hh} | 57 +++--
scribo/scribo/io/img/internal/debug_img_visitor.hh | 263 ++++++++++++++++++++
.../internal/draw_edges.hh} | 62 +++---
scribo/scribo/io/img/internal/full_img_visitor.hh | 194 ++++++++++++++
.../scribo/io/img/internal/non_text_img_visitor.hh | 162 ++++++++++++
scribo/scribo/io/img/internal/text_img_visitor.hh | 164 ++++++++++++
scribo/scribo/io/img/save.hh | 242 ++++++++++++++++++
8 files changed, 1104 insertions(+), 52 deletions(-)
copy scribo/scribo/{core/concept/serializable.hh => fun/v2v/highlight.hh} (66%)
create mode 100644 scribo/scribo/io/img/internal/debug_img_visitor.hh
copy scribo/scribo/io/{xml/internal/print_box_coords.hh => img/internal/draw_edges.hh} (56%)
create mode 100644 scribo/scribo/io/img/internal/full_img_visitor.hh
create mode 100644 scribo/scribo/io/img/internal/non_text_img_visitor.hh
create mode 100644 scribo/scribo/io/img/internal/text_img_visitor.hh
create mode 100644 scribo/scribo/io/img/save.hh
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index 5b96508..43041fe 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,5 +1,17 @@
2011-03-14 Guillaume Lazzara <z(a)lrde.epita.fr>
+ Introduce result image output.
+
+ * scribo/fun/v2v/highlight.hh,
+ * scribo/io/img/internal/debug_img_visitor.hh,
+ * scribo/io/img/internal/draw_edges.hh,
+ * scribo/io/img/internal/full_img_visitor.hh,
+ * scribo/io/img/internal/non_text_img_visitor.hh,
+ * scribo/io/img/internal/text_img_visitor.hh,
+ * scribo/io/img/save.hh: New.
+
+2011-03-14 Guillaume Lazzara <z(a)lrde.epita.fr>
+
Save image edge coordinates in XML output.
* scribo/io/xml/internal/extended_page_xml_visitor.hh: Here.
diff --git a/scribo/scribo/core/concept/serializable.hh b/scribo/scribo/fun/v2v/highlight.hh
similarity index 66%
copy from scribo/scribo/core/concept/serializable.hh
copy to scribo/scribo/fun/v2v/highlight.hh
index 6e661a6..835be59 100644
--- a/scribo/scribo/core/concept/serializable.hh
+++ b/scribo/scribo/fun/v2v/highlight.hh
@@ -23,42 +23,55 @@
// exception does not however invalidate any other reasons why the
// executable file might be covered by the GNU General Public License.
-#ifndef SCRIBO_CORE_CONCEPT_SERIALIZABLE_HH
-# define SCRIBO_CORE_CONCEPT_SERIALIZABLE_HH
+#ifndef SCRIBO_FUN_V2V_HIGHLIGHT_HH
+# define SCRIBO_FUN_V2V_HIGHLIGHT_HH
-/// \file
-///
-/// Concept for serializer visitors.
+/// \brief Function increasing values to highlight areas.
-# include <mln/core/concept/object.hh>
-# include <scribo/core/concept/serialize_visitor.hh>
+# include <mln/core/concept/function.hh>
namespace scribo
{
- /// \brief Link functor concept.
- template <typename E>
- class Serializable : public mln::Object<E>
+ namespace fun
{
- public:
- template <typename E2>
- void accept(const SerializeVisitor<E2>& visitor) const;
- };
+
+ namespace v2v
+ {
+
+ template <typename R>
+ struct highlight
+ : mln::Function_v2v<highlight<R> >
+ {
+ typedef R result;
+
+ highlight();
+ result operator()(const result& v) const;
+ };
# ifndef MLN_INCLUDE_ONLY
- template <typename E>
- template <typename E2>
- void
- Serializable<E>::accept(const SerializeVisitor<E2>& visitor) const
- {
- exact(visitor).visit(exact(*this));
- }
+
+ template <typename R>
+ highlight<R>::highlight()
+ {
+ }
+
+
+ template <typename R>
+ typename highlight<R>::result
+ highlight<R>::operator()(const result& v) const
+ {
+ return v + v / 2;
+ }
# endif // ! MLN_INCLUDE_ONLY
+ } // end of namespace scribo::fun::v2v
+
+ } // end of namespace scribo::fun
} // end of namespace scribo
-#endif // SCRIBO_CORE_CONCEPT_SERIALIZABLE_HH
+#endif // ! SCRIBO_FUN_V2V_HIGHLIGHT_HH
diff --git a/scribo/scribo/io/img/internal/debug_img_visitor.hh b/scribo/scribo/io/img/internal/debug_img_visitor.hh
new file mode 100644
index 0000000..f1c689d
--- /dev/null
+++ b/scribo/scribo/io/img/internal/debug_img_visitor.hh
@@ -0,0 +1,263 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef SCRIBO_IO_IMG_INTERNAL_DEBUG_IMG_VISITOR_HH
+# define SCRIBO_IO_IMG_INTERNAL_DEBUG_IMG_VISITOR_HH
+
+/// \file
+///
+/// Save document image analysis to a small image.
+
+# include <fstream>
+
+# include <mln/core/image/image2d.hh>
+# include <mln/value/rgb8.hh>
+# include <mln/draw/box.hh>
+# include <mln/subsampling/antialiased.hh>
+# include <mln/morpho/elementary/gradient_external.hh>
+
+# include <scribo/core/internal/doc_serializer.hh>
+# include <scribo/core/document.hh>
+# include <scribo/core/paragraph_set.hh>
+# include <scribo/core/line_info.hh>
+
+# include <scribo/io/img/internal/draw_edges.hh>
+
+namespace scribo
+{
+
+ namespace io
+ {
+
+ namespace img
+ {
+
+ namespace internal
+ {
+
+
+ class debug_img_visitor : public doc_serializer<debug_img_visitor>
+ {
+ public:
+ // Constructor
+ debug_img_visitor(mln::image2d<value::rgb8>& out,
+ unsigned output_ratio);
+
+ // Visit overloads
+ template <typename L>
+ void visit(const document<L>& doc) const;
+
+ void visit(const component_info& info) const;
+
+ template <typename L>
+ void visit(const paragraph_set<L>& parset) const;
+
+ template <typename L>
+ void visit(const line_info<L>& line) const;
+
+ private: // Attributes
+ mln::image2d<value::rgb8>& output;
+ unsigned output_ratio;
+
+ mutable image2d<scribo::def::lbl_type> elt_edge;
+
+
+ private: // Methods
+ box2d compute_bbox(const box2d& b) const;
+ };
+
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+
+ inline
+ box2d
+ debug_img_visitor::compute_bbox(const box2d& b) const
+ {
+ point2d
+ pmin = b.pmin() / output_ratio,
+ pmax = b.pmax() / output_ratio;
+
+ return box2d(pmin, pmax);
+ }
+
+
+ inline
+ debug_img_visitor::debug_img_visitor(mln::image2d<value::rgb8>& out,
+ unsigned output_ratio)
+ : output(out), output_ratio(output_ratio)
+ {
+ mln_assertion(output.is_valid());
+ }
+
+
+ /// Document
+ //
+ template <typename L>
+ void
+ debug_img_visitor::visit(const document<L>& doc) const
+ {
+ // Text
+ if (doc.has_text())
+ doc.paragraphs().accept(*this);
+
+ // Page elements (Pictures, ...)
+ if (doc.has_elements())
+ {
+ // Prepare element edges
+
+ // FIXME: UGLY! Too slow!
+ scribo::def::lbl_type nlabels;
+ component_set<L> elts = primitive::extract::components(
+ data::convert(bool(), mln::subsampling::antialiased(doc.elements().labeled_image(),
+ output_ratio)),
+ c8(),
+ nlabels);
+
+ // Preserving elements tags
+ if (doc.elements().nelements() != elts.nelements())
+ {
+ std::cerr << "Warnig: could not preserve element type in "
+ << "img debug output." << std::endl;
+ std::cerr << "The number of non text element has changed while "
+ << "subsampling images : "
+ << doc.elements().nelements() << " vs "
+ << elts.nelements() << std::endl;
+ }
+ else
+ for_all_comps(c, doc.elements())
+ elts(c).update_type(doc.elements()(c).type());
+
+ elt_edge = morpho::elementary::gradient_external(elts.labeled_image(), c8());
+
+// const component_set<L>& elts = doc.elements();
+ for_all_comps(e, elts)
+ if (elts(e).is_valid())
+ elts(e).accept(*this);
+ }
+
+
+ // line seraparators
+ if (doc.has_vline_seps())
+ for_all_comps(c, doc.vline_seps_comps())
+ doc.vline_seps_comps()(c).accept(*this);
+ if (doc.has_hline_seps())
+ for_all_comps(c, doc.hline_seps_comps())
+ doc.hline_seps_comps()(c).accept(*this);
+
+ }
+
+
+ /// Component_info
+ //
+ inline
+ void
+ debug_img_visitor::visit(const component_info& info) const
+ {
+ switch (info.type())
+ {
+ case component::LineSeparator:
+ {
+ mln::draw::box(output, compute_bbox(info.bbox()),
+ literal::cyan);
+ }
+ break;
+
+
+ default:
+ case component::Image:
+ {
+ // The bbox does not need to be reajusted to the
+ // subsampled domain since it has been recomputed while
+ // computing the edge image.
+ //
+ // However, the bbox must be enlarged since only the
+ // _external_ edge is computed.
+ box2d b = info.bbox();
+ b.enlarge(1);
+ b.crop_wrt(output.domain());
+ data::fill(((output | b).rw()
+ | (pw::value(elt_edge)
+ == pw::cst((scribo::def::lbl_type)info.id().to_equiv()))).rw(),
+ literal::orange);
+ }
+ break;
+ }
+ }
+
+ /// Paragraph Set
+ //
+ template <typename L>
+ void
+ debug_img_visitor::visit(const paragraph_set<L>& parset) const
+ {
+ const line_set<L>& lines = parset.lines();
+
+ for_all_paragraphs(p, parset)
+ {
+ const mln::util::array<line_id_t>& line_ids = parset(p).line_ids();
+
+ for_all_paragraph_lines(lid, line_ids)
+ {
+ line_id_t l = line_ids(lid);
+ lines(l).accept(*this);
+ }
+
+ box2d b = compute_bbox(parset(p).bbox());
+ b.enlarge(1);
+ b.crop_wrt(output.domain());
+ mln::draw::box(output, b, literal::blue);
+ }
+ }
+
+
+ template <typename L>
+ void
+ debug_img_visitor::visit(const line_info<L>& line) const
+ {
+ point2d
+ pmin = line.bbox().pmin(),
+ pmax = line.bbox().pmax();
+ pmax.row() = line.baseline();
+ pmin.row() = line.baseline();
+
+ pmin = pmin / output_ratio;
+ pmax = pmax / output_ratio;
+
+ mln::draw::line(output, pmin, pmax, literal::red);
+ }
+
+#endif // MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::io::img::internal
+
+ } // end of namespace scribo::io::img
+
+ } // end of namespace scribo::io
+
+} // end of namespace scribo
+
+#endif // SCRIBO_IO_IMG_INTERNAL_DEBUG_IMG_VISITOR_HH
diff --git a/scribo/scribo/io/xml/internal/print_box_coords.hh b/scribo/scribo/io/img/internal/draw_edges.hh
similarity index 56%
copy from scribo/scribo/io/xml/internal/print_box_coords.hh
copy to scribo/scribo/io/img/internal/draw_edges.hh
index ad84709..664a352 100644
--- a/scribo/scribo/io/xml/internal/print_box_coords.hh
+++ b/scribo/scribo/io/img/internal/draw_edges.hh
@@ -23,15 +23,26 @@
// exception does not however invalidate any other reasons why the
// executable file might be covered by the GNU General Public License.
-#ifndef SCRIBO_IO_XML_INTERNAL_PRINT_BOX_COORDS_HH
-# define SCRIBO_IO_XML_INTERNAL_PRINT_BOX_COORDS_HH
+#ifndef SCRIBO_IO_IMG_INTERNAL_DRAW_EDGES_HH
+# define SCRIBO_IO_IMG_INTERNAL_DRAW_EDGES_HH
/// \file
///
-/// \brief Prints box2d coordinates to XML data.
+/// \brief Draw object edges.
# include <fstream>
-# include <mln/core/alias/box2d.hh>
+# include <mln/core/image/image2d.hh>
+# include <mln/core/image/dmorph/image_if.hh>
+# include <mln/pw/all.hh>
+# include <mln/data/fill.hh>
+# include <mln/data/convert.hh>
+# include <mln/literal/colors.hh>
+# include <mln/value/rgb8.hh>
+# include <mln/morpho/elementary/gradient_external.hh>
+
+# include <scribo/core/component_info.hh>
+# include <scribo/core/def/lbl_type.hh>
+
namespace scribo
{
@@ -39,7 +50,7 @@ namespace scribo
namespace io
{
- namespace xml
+ namespace img
{
namespace internal
@@ -47,11 +58,12 @@ namespace scribo
using namespace mln;
- /*! \brief Prints box2d coordinates to XML data.
+ /*! \brief Draw component edges.
*/
void
- print_box_coords(std::ofstream& ostr, const box2d& b,
- const char *space);
+ draw_edges(const component_info& info,
+ image2d<value::rgb8>& output, const value::rgb8& value,
+ const image2d<scribo::def::lbl_type>& edges);
# ifndef MLN_INCLUDE_ONLY
@@ -59,37 +71,27 @@ namespace scribo
inline
void
- print_box_coords(std::ofstream& ostr, const box2d& b,
- const char *space)
+ draw_edges(const component_info& info,
+ image2d<value::rgb8>& output, const value::rgb8& value,
+ const image2d<scribo::def::lbl_type>& edges)
{
- std::string sc = space;
- std::string sp = sc + " ";
- ostr << sc << "<coords>" << std::endl
- << sp << "<point x=\"" << b.pmin().col()
- << "\" y=\"" << b.pmin().row() << "\"/>"
- << std::endl
- << sp << "<point x=\"" << b.pmax().col()
- << "\" y=\"" << b.pmin().row() << "\"/>"
- << std::endl
- << sp << "<point x=\"" << b.pmax().col()
- << "\" y=\"" << b.pmax().row() << "\"/>"
- << std::endl
- << sp << "<point x=\"" << b.pmin().col()
- << "\" y=\"" << b.pmax().row() << "\"/>"
- << std::endl
- << sc << "</coords>" << std::endl;
-
+ box2d b = info.bbox();
+ b.enlarge(1);
+ data::fill(((output | b).rw()
+ | (pw::value(edges)
+ == pw::cst((scribo::def::lbl_type)info.id().to_equiv()))).rw(),
+ value);
}
# endif // ! MLN_INCLUDE_ONLY
- } // end of namespace scribo::io::xml::internal
+ } // end of namespace scribo::io::img::internal
- } // end of namespace scribo::io::xml
+ } // end of namespace scribo::io::img
} // end of namespace scribo::io
} // end of namespace scribo
-#endif // ! SCRIBO_IO_XML_INTERNAL_PRINT_BOX_COORDS_HH
+#endif // ! SCRIBO_IO_IMG_INTERNAL_DRAW_EDGES_HH
diff --git a/scribo/scribo/io/img/internal/full_img_visitor.hh b/scribo/scribo/io/img/internal/full_img_visitor.hh
new file mode 100644
index 0000000..30987db
--- /dev/null
+++ b/scribo/scribo/io/img/internal/full_img_visitor.hh
@@ -0,0 +1,194 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef SCRIBO_IO_IMG_INTERNAL_FULL_IMG_VISITOR_HH
+# define SCRIBO_IO_IMG_INTERNAL_FULL_IMG_VISITOR_HH
+
+/// \file
+///
+/// Save document image analysis to an image.
+
+# include <fstream>
+
+# include <mln/core/image/image2d.hh>
+# include <mln/value/rgb8.hh>
+# include <mln/draw/box.hh>
+
+# include <scribo/core/internal/doc_serializer.hh>
+# include <scribo/core/document.hh>
+# include <scribo/core/paragraph_set.hh>
+# include <scribo/core/line_info.hh>
+
+# include <scribo/io/img/internal/draw_edges.hh>
+
+namespace scribo
+{
+
+ namespace io
+ {
+
+ namespace img
+ {
+
+ namespace internal
+ {
+
+
+ class full_img_visitor : public doc_serializer<full_img_visitor>
+ {
+ public:
+ // Constructor
+ full_img_visitor(mln::image2d<value::rgb8>& out);
+
+ // Visit overloads
+ template <typename L>
+ void visit(const document<L>& doc) const;
+
+ void visit(const component_info& info) const;
+
+ template <typename L>
+ void visit(const paragraph_set<L>& parset) const;
+
+ template <typename L>
+ void visit(const line_info<L>& line) const;
+
+ private: // Attributes
+ mln::image2d<value::rgb8>& output;
+
+ mutable image2d<scribo::def::lbl_type> elt_edge;
+ };
+
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+
+ inline
+ full_img_visitor::full_img_visitor(mln::image2d<value::rgb8>& out)
+ : output(out)
+ {
+ mln_assertion(output.is_valid());
+ }
+
+
+ /// Document
+ //
+ template <typename L>
+ void
+ full_img_visitor::visit(const document<L>& doc) const
+ {
+ // Text
+ if (doc.has_text())
+ doc.paragraphs().accept(*this);
+
+ // Page elements (Pictures, ...)
+ if (doc.has_elements())
+ {
+ // Prepare element edges
+ elt_edge = morpho::elementary::gradient_external(doc.elements().labeled_image(), c8());
+
+ const component_set<L>& elts = doc.elements();
+ for_all_comps(e, elts)
+ if (elts(e).is_valid())
+ elts(e).accept(*this);
+ }
+
+
+ // line seraparators
+ if (doc.has_vline_seps())
+ for_all_comps(c, doc.vline_seps_comps())
+ doc.vline_seps_comps()(c).accept(*this);
+ if (doc.has_hline_seps())
+ for_all_comps(c, doc.hline_seps_comps())
+ doc.hline_seps_comps()(c).accept(*this);
+
+ }
+
+
+ /// Component_info
+ //
+ inline
+ void
+ full_img_visitor::visit(const component_info& info) const
+ {
+ switch (info.type())
+ {
+ case component::LineSeparator:
+ {
+ mln::draw::box(output, info.bbox(), literal::cyan);
+ }
+ break;
+
+
+ default:
+ case component::Image:
+ {
+ draw_edges(info, output, literal::orange, elt_edge);
+ }
+ break;
+ }
+ }
+
+ /// Paragraph Set
+ //
+ template <typename L>
+ void
+ full_img_visitor::visit(const paragraph_set<L>& parset) const
+ {
+ const line_set<L>& lines = parset.lines();
+
+ for_all_paragraphs(p, parset)
+ {
+ const mln::util::array<line_id_t>& line_ids = parset(p).line_ids();
+
+ for_all_paragraph_lines(lid, line_ids)
+ {
+ line_id_t l = line_ids(lid);
+ lines(l).accept(*this);
+ }
+
+ mln::draw::box(output, parset(p).bbox(), literal::blue);
+ }
+ }
+
+
+ template <typename L>
+ void
+ full_img_visitor::visit(const line_info<L>& line) const
+ {
+ mln::draw::box(output, line.bbox(), literal::red);
+ }
+
+#endif // MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::io::img::internal
+
+ } // end of namespace scribo::io::img
+
+ } // end of namespace scribo::io
+
+} // end of namespace scribo
+
+#endif // SCRIBO_IO_IMG_INTERNAL_FULL_IMG_VISITOR_HH
diff --git a/scribo/scribo/io/img/internal/non_text_img_visitor.hh b/scribo/scribo/io/img/internal/non_text_img_visitor.hh
new file mode 100644
index 0000000..24b027e
--- /dev/null
+++ b/scribo/scribo/io/img/internal/non_text_img_visitor.hh
@@ -0,0 +1,162 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef SCRIBO_IO_IMG_INTERNAL_NON_TEXT_IMG_VISITOR_HH
+# define SCRIBO_IO_IMG_INTERNAL_NON_TEXT_IMG_VISITOR_HH
+
+/// \file
+///
+/// \brief Save non-text information as an image.
+
+# include <fstream>
+
+
+# include <mln/core/image/image2d.hh>
+# include <mln/value/rgb8.hh>
+# include <mln/draw/box.hh>
+
+# include <scribo/core/internal/doc_serializer.hh>
+# include <scribo/io/img/internal/draw_edges.hh>
+
+namespace scribo
+{
+
+ namespace io
+ {
+
+ namespace img
+ {
+
+ namespace internal
+ {
+
+ /*! \brief Save non-text information as an image.
+ */
+ class non_text_img_visitor : public doc_serializer<non_text_img_visitor>
+ {
+ public:
+ // Constructor
+ non_text_img_visitor(mln::image2d<value::rgb8>& out);
+
+ // Visit overloads
+ template <typename L>
+ void visit(const document<L>& doc) const;
+
+ template <typename L>
+ void visit(const component_set<L>& comp_set) const;
+
+ void visit(const component_info& info) const;
+
+ private: // Attributes
+ mln::image2d<value::rgb8>& output;
+
+ mutable image2d<scribo::def::lbl_type> elt_edge;
+ };
+
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+
+ inline
+ non_text_img_visitor::non_text_img_visitor(mln::image2d<value::rgb8>& out)
+ : output(out)
+ {
+ mln_assertion(output.is_valid());
+ }
+
+
+
+ /// Document
+ //
+ template <typename L>
+ void
+ non_text_img_visitor::visit(const document<L>& doc) const
+ {
+ // Page elements (Pictures, ...)
+ if (doc.has_elements())
+ {
+ // Prepare element edges
+ elt_edge = morpho::elementary::gradient_external(doc.elements().labeled_image(), c8());
+
+ doc.elements().accept(*this);
+ }
+
+ // line seraparators
+ if (doc.has_hline_seps())
+ doc.hline_seps_comps().accept(*this);
+ if (doc.has_vline_seps())
+ doc.vline_seps_comps().accept(*this);
+ }
+
+
+ /// Component Set
+ //
+ template <typename L>
+ void
+ non_text_img_visitor::visit(const component_set<L>& comp_set) const
+ {
+ for_all_comps(c, comp_set)
+ if (comp_set(c).is_valid())
+ comp_set(c).accept(*this);
+ }
+
+
+ /// Component_info
+ //
+ inline
+ void
+ non_text_img_visitor::visit(const component_info& info) const
+ {
+ switch (info.type())
+ {
+ case component::LineSeparator:
+ {
+ mln::draw::box(output, info.bbox(), literal::cyan);
+ }
+ break;
+
+
+ default:
+ case component::Image:
+ {
+ draw_edges(info, output, literal::orange, elt_edge);
+ }
+ break;
+ }
+ }
+
+
+#endif // MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::io::img::internal
+
+ } // end of namespace scribo::io::img
+
+ } // end of namespace scribo::io
+
+} // end of namespace scribo
+
+#endif // SCRIBO_IO_IMG_INTERNAL_NON_TEXT_IMG_VISITOR_HH
diff --git a/scribo/scribo/io/img/internal/text_img_visitor.hh b/scribo/scribo/io/img/internal/text_img_visitor.hh
new file mode 100644
index 0000000..7629bf7
--- /dev/null
+++ b/scribo/scribo/io/img/internal/text_img_visitor.hh
@@ -0,0 +1,164 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef SCRIBO_IO_IMG_INTERNAL_TEXT_IMG_VISITOR_HH
+# define SCRIBO_IO_IMG_INTERNAL_TEXT_IMG_VISITOR_HH
+
+/// \file
+///
+/// Image output with text related information.
+
+# include <fstream>
+
+# include <mln/core/image/image2d.hh>
+# include <mln/value/rgb8.hh>
+# include <mln/draw/box.hh>
+# include <mln/literal/colors.hh>
+
+# include <scribo/core/internal/doc_serializer.hh>
+# include <scribo/core/document.hh>
+# include <scribo/core/component_set.hh>
+# include <scribo/core/paragraph_set.hh>
+# include <scribo/core/line_info.hh>
+
+namespace scribo
+{
+
+ namespace io
+ {
+
+ namespace img
+ {
+
+ namespace internal
+ {
+
+
+ class text_img_visitor : public doc_serializer<text_img_visitor>
+ {
+ public:
+ // Constructor
+ text_img_visitor(mln::image2d<value::rgb8>&);
+
+ // Visit overloads
+ template <typename L>
+ void visit(const document<L>& doc) const;
+
+ template <typename L>
+ void visit(const component_set<L>& comp_set) const;
+
+ template <typename L>
+ void visit(const paragraph_set<L>& parset) const;
+
+ template <typename L>
+ void visit(const line_info<L>& line) const;
+
+ private: // Attributes
+ mln::image2d<value::rgb8>& output;
+ };
+
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+ inline
+ text_img_visitor::text_img_visitor(mln::image2d<value::rgb8>& out)
+ : output(out)
+ {
+ mln_assertion(output.is_valid());
+ }
+
+
+
+ /// Document
+ //
+ template <typename L>
+ void
+ text_img_visitor::visit(const document<L>& doc) const
+ {
+ mln_precondition(doc.is_open());
+
+ // Prepare output image.
+ output = duplicate(doc.image());
+
+
+ // Text
+ if (doc.has_text())
+ doc.paragraphs().accept(*this);
+ }
+
+ /// Component Set
+ //
+ template <typename L>
+ void
+ text_img_visitor::visit(const component_set<L>& comp_set) const
+ {
+ for_all_comps(c, comp_set)
+ if (comp_set(c).is_valid())
+ comp_set(c).accept(*this);
+ }
+
+
+ /// Paragraph Set
+ //
+ template <typename L>
+ void
+ text_img_visitor::visit(const paragraph_set<L>& parset) const
+ {
+ const line_set<L>& lines = parset.lines();
+
+ for_all_paragraphs(p, parset)
+ {
+ const mln::util::array<line_id_t>& line_ids = parset(p).line_ids();
+
+ for_all_paragraph_lines(lid, line_ids)
+ {
+ line_id_t l = line_ids(lid);
+ lines(l).accept(*this);
+ }
+
+ mln::draw::box(output, parset(p).bbox(), literal::blue);
+ }
+ }
+
+
+ template <typename L>
+ void
+ text_img_visitor::visit(const line_info<L>& line) const
+ {
+ mln::draw::box(output, line.bbox(), literal::red);
+ }
+
+#endif // MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::io::img::internal
+
+ } // end of namespace scribo::io::img
+
+ } // end of namespace scribo::io
+
+} // end of namespace scribo
+
+#endif // SCRIBO_IO_IMG_INTERNAL_TEXT_IMG_VISITOR_HH
diff --git a/scribo/scribo/io/img/save.hh b/scribo/scribo/io/img/save.hh
new file mode 100644
index 0000000..56300d6
--- /dev/null
+++ b/scribo/scribo/io/img/save.hh
@@ -0,0 +1,242 @@
+// Copyright (C) 2011 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef SCRIBO_IO_IMG_SAVE_HH
+# define SCRIBO_IO_IMG_SAVE_HH
+
+/// \file
+///
+/// \brief Save document information as an image.
+
+# include <libgen.h>
+# include <fstream>
+# include <sstream>
+
+# include <map>
+
+# include <mln/core/image/image2d.hh>
+# include <mln/value/rgb8.hh>
+# include <mln/io/magick/save.hh>
+# include <mln/subsampling/antialiased.hh>
+# include <mln/data/transform_inplace.hh>
+
+# include <scribo/core/document.hh>
+
+# include <scribo/io/img/internal/text_img_visitor.hh>
+# include <scribo/io/img/internal/non_text_img_visitor.hh>
+# include <scribo/io/img/internal/full_img_visitor.hh>
+# include <scribo/io/img/internal/debug_img_visitor.hh>
+
+
+namespace scribo
+{
+
+ namespace io
+ {
+
+ namespace img
+ {
+
+ /*! \brief Supported image formats
+
+ Text : Display text regions.
+
+ NonText : Display non-text regions.
+
+ Full : Text and non-text regions.
+ */
+ enum Format
+ {
+ Text,
+ NonText,
+ Full,
+ DebugWoImage,
+ DebugWithImage,
+ };
+
+
+ /*! \brief Save document information as an image
+
+ */
+ template <typename L>
+ void
+ save(const document<L>& doc, const std::string& output_name,
+ Format format);
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+
+ namespace internal
+ {
+
+ struct highlight_mask
+ : Function_v2v<highlight_mask>
+ {
+ typedef value::rgb8 result;
+
+ highlight_mask(float ratio)
+ : ratio(ratio)
+ {
+ }
+
+ result operator()(const result& v) const
+ {
+ result out = v;
+ out.red() = std::min(out.red() * ratio, 255.f);
+ out.green() = std::min(out.green() * ratio, 255.f);
+ out.blue() = std::min(out.blue() * ratio, 255.f);
+ return out;
+ }
+
+ float ratio;
+ };
+
+
+ template <typename L>
+ mln::image2d<value::rgb8>
+ save_text(const document<L>& doc)
+ {
+ mln_precondition(doc.is_valid());
+ mln::image2d<value::rgb8> output = duplicate(doc.image());
+ scribo::io::img::internal::text_img_visitor f(output);
+ doc.accept(f);
+ return output;
+ }
+
+ template <typename L>
+ mln::image2d<value::rgb8>
+ save_non_text(const document<L>& doc)
+ {
+ mln_precondition(doc.is_valid());
+ mln::image2d<value::rgb8> output = duplicate(doc.image());
+ scribo::io::img::internal::non_text_img_visitor f(output);
+ doc.accept(f);
+ return output;
+ }
+
+ template <typename L>
+ mln::image2d<value::rgb8>
+ save_full(const document<L>& doc)
+ {
+ mln_precondition(doc.is_valid());
+ mln::image2d<value::rgb8> output = duplicate(doc.image());
+ scribo::io::img::internal::full_img_visitor f(output);
+ doc.accept(f);
+ return output;
+ }
+
+ template <typename L>
+ mln::image2d<value::rgb8>
+ save_debug_without_image(const document<L>& doc)
+ {
+ mln_precondition(doc.is_valid());
+ mln::image2d<value::rgb8>
+ output(box2d(doc.image().domain().pmin() / 4,
+ doc.image().domain().pmax() / 4));
+ data::fill(output, literal::black);
+ scribo::io::img::internal::debug_img_visitor f(output, 4);
+ doc.accept(f);
+ return output;
+ }
+
+ template <typename L>
+ mln::image2d<value::rgb8>
+ save_debug_with_image(const document<L>& doc)
+ {
+ mln_precondition(doc.is_valid());
+ mln::image2d<value::rgb8>
+ output = mln::subsampling::antialiased(doc.image(), 4);
+ internal::highlight_mask highlight(0.5f);
+ data::transform_inplace(output, highlight);
+ scribo::io::img::internal::debug_img_visitor f(output, 4);
+ doc.accept(f);
+ return output;
+ }
+
+ } // end of namespace scribo::io::img::internal
+
+
+
+ // FACADE
+
+ template <typename L>
+ void
+ save(const document<L>& doc,
+ const std::string& output_name,
+ Format format)
+ {
+ trace::entering("scribo::io::img::save");
+
+ mln_precondition(doc.is_open());
+
+ mln::image2d<value::rgb8> output;
+
+ // Choose saving method.
+ switch (format)
+ {
+ case Text:
+ output = internal::save_text(doc);
+ break;
+
+ case NonText:
+ output = internal::save_non_text(doc);
+ break;
+
+ case Full:
+ output = internal::save_full(doc);
+ break;
+
+ case DebugWoImage:
+ output = internal::save_debug_without_image(doc);
+ break;
+
+ case DebugWithImage:
+ output = internal::save_debug_with_image(doc);
+ break;
+
+ default:
+ trace::warning("scribo::io::img::save - "
+ "Invalid image format! Skip saving...");
+ trace::exiting("scribo::io::img::save");
+ return;
+ }
+
+ mln::io::magick::save(output, output_name.c_str());
+
+ trace::exiting("scribo::io::img::save");
+ }
+
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace scribo::io::img
+
+ } // end of namespace scribo::io
+
+} // end of namespace scribo
+
+
+#endif // ! SCRIBO_IO_IMG_SAVE_HH
--
1.5.6.5
1
0