olena-2.0-391-g5991f6a [ICDAR] Handle reverse video

--- scribo/sandbox/icdar_13_table/Makefile | 4 +- scribo/sandbox/icdar_13_table/TODO | 2 + scribo/sandbox/icdar_13_table/src/new.cc | 61 ++++++++++++++++++++++-------- 3 files changed, 49 insertions(+), 18 deletions(-) diff --git a/scribo/sandbox/icdar_13_table/Makefile b/scribo/sandbox/icdar_13_table/Makefile index 19743e1..8155a34 100644 --- a/scribo/sandbox/icdar_13_table/Makefile +++ b/scribo/sandbox/icdar_13_table/Makefile @@ -1,6 +1,6 @@ -CCACHE= +CCACHE=ccache CC=g++ -CFLAGS=-Wall -Werror -O3 -DHAVE_TESSERACT_3 -DNDEBUG -g +CFLAGS=-Wall -Werror -O3 -DHAVE_TESSERACT_3 -DNDEBUG CLIBS=-I../../../milena/ -I../../ -I/usr/include/poppler CLEAN=*.o output/* log final.xml diff --git a/scribo/sandbox/icdar_13_table/TODO b/scribo/sandbox/icdar_13_table/TODO index a4aa631..e3c6f52 100644 --- a/scribo/sandbox/icdar_13_table/TODO +++ b/scribo/sandbox/icdar_13_table/TODO @@ -5,6 +5,8 @@ Table location sub-competition : * Find links betwwen pages for mutlipages tables * Get text from reversed-video zones + * *** glibc detected *** ./table: corrupted double-linked list + with the file us-005.pdf from the test set Table structure recognition sub-competition : * All diff --git a/scribo/sandbox/icdar_13_table/src/new.cc b/scribo/sandbox/icdar_13_table/src/new.cc index 714d0c2..aca31bb 100644 --- a/scribo/sandbox/icdar_13_table/src/new.cc +++ b/scribo/sandbox/icdar_13_table/src/new.cc @@ -98,8 +98,8 @@ int main(int argc, char** argv) std::ostringstream path; image2d<value::rgb8> original, ima_links, ima_groups, ima_valid; image2d<value::int_u8> filtered; - image2d<bool> bin, separators, bin_without_separators, whitespaces, comp, denoised; - scribo::component_set< image2d<unsigned> > components; + image2d<bool> bin, reverse, reverse_selection, bin_merged, separators, bin_without_separators, whitespaces, comp, denoised; + scribo::component_set< image2d<unsigned> > components, rcomponents; unsigned dpi = 72; @@ -114,6 +114,25 @@ int main(int argc, char** argv) filtered = data::transform(original, fun::v2v::rgb_to_luma<value::int_u8>()); bin = scribo::binarization::sauvola(filtered, 81, 0.44); + // Reverse selection + reverse = logical::not_(bin); + initialize(reverse_selection, reverse); + data::fill(reverse_selection, false); + + unsigned nrcomponents; + rcomponents = scribo::primitive::extract::components(reverse, c8(), nrcomponents); + + for (unsigned i = 1; i < rcomponents.nelements(); ++i) + { + const box2d& b = rcomponents(i).bbox(); + + if (b.height() < 20 && b.width() < 20) + data::fill((reverse_selection | b).rw(), true); + } + + reverse_selection = logical::and_(reverse, reverse_selection); + reverse_selection = scribo::preprocessing::denoise_fg(reverse_selection, c8(), 4); + // Find separators bin_without_separators = duplicate(bin); separators = separators; @@ -131,11 +150,14 @@ int main(int argc, char** argv) // Denoise denoised = scribo::preprocessing::denoise_fg(bin_without_separators, c8(), 4); + // Bin merged + bin_merged = logical::or_(denoised, reverse_selection); + // Extract components unsigned ncomponents; - components = scribo::primitive::extract::components(denoised, c8(), ncomponents); + components = scribo::primitive::extract::components(bin_merged, c8(), ncomponents); - initialize(comp, denoised); + initialize(comp, bin_merged); data::fill(comp, false); for (unsigned i = 1; i <= components.nelements(); ++i) { @@ -158,9 +180,9 @@ int main(int argc, char** argv) // Filter links scribo::object_links< image2d<unsigned> > hratio_filtered_links = scribo::filter::object_links_bbox_h_ratio(merged_links, 2.5f); - ima_links = data::convert(value::rgb8(), denoised); - ima_groups = data::convert(value::rgb8(), denoised); - ima_valid = data::convert(value::rgb8(), denoised); + ima_links = data::convert(value::rgb8(), bin_merged); + ima_groups = data::convert(value::rgb8(), bin_merged); + ima_valid = data::convert(value::rgb8(), bin_merged); // Write links for (unsigned l = 1; l < merged_links.nelements(); ++l) @@ -213,7 +235,7 @@ int main(int argc, char** argv) unsigned min_height = std::min(b1.height(), b2.height()); if (p1[0] < p2[0] // Avoid redundancy - && max_height * 2 < denoised.ncols() + && max_height * 2 < bin_merged.ncols() && min_height + 3 >= max_height // Same heights && b1.width() < 2 * average_width && b2.width() < 2 * average_width // Regular width && (b1.pmin()[1] == b2.pmin()[1] @@ -258,10 +280,9 @@ int main(int argc, char** argv) } } - // Draw weighted boxes (red < orange < cyan < green) + // Draw weighted boxes (red < orange < cyan < green) (useless ?) for (unsigned i = 0; i < balance.size(); ++i) { - std::cout << balance[i] << " "; if (balance[i] == 1) draw::box(ima_valid, groups(i).bbox(), literal::red); @@ -274,10 +295,9 @@ int main(int argc, char** argv) if (balance[i] > 3) draw::box(ima_valid, groups(i).bbox(), literal::green); } - std::cout << std::endl << std::endl; - // Write images and close XML + // FIXME To externalize path.str(""); path << "output/p" << page << "_0_bin.pbm"; io::pbm::save(bin, path.str()); @@ -287,16 +307,25 @@ int main(int argc, char** argv) path.str(""); path << "output/p" << page << "_2_denoised.pbm"; io::pbm::save(denoised, path.str()); - path.str(""); path << "output/p" << page << "_3_components.pbm"; + path.str(""); path << "output/p" << page << "_3_reverse.pbm"; + io::pbm::save(reverse, path.str()); + + path.str(""); path << "output/p" << page << "_4_reverse_selection.pbm"; + io::pbm::save(reverse_selection, path.str()); + + path.str(""); path << "output/p" << page << "_5_bin_merged.pbm"; + io::pbm::save(bin_merged, path.str()); + + path.str(""); path << "output/p" << page << "_6_components.pbm"; io::pbm::save(comp, path.str()); - path.str(""); path << "output/p" << page << "_4_links.ppm"; + path.str(""); path << "output/p" << page << "_7_links.ppm"; io::ppm::save(ima_links, path.str()); - path.str(""); path << "output/p" << page << "_5_groups.ppm"; + path.str(""); path << "output/p" << page << "_8_groups.ppm"; io::ppm::save(ima_groups, path.str()); - path.str(""); path << "output/p" << page << "_6_valid.ppm"; + path.str(""); path << "output/p" << page << "_9_valid.ppm"; io::ppm::save(ima_valid, path.str()); } -- 1.7.2.5
participants (1)
-
Anthony Seure