---
scribo/sandbox/icdar_13_table/Makefile | 13 ++-
scribo/sandbox/icdar_13_table/src/main.cc | 184 +++++++++++++++++------------
2 files changed, 114 insertions(+), 83 deletions(-)
diff --git a/scribo/sandbox/icdar_13_table/Makefile
b/scribo/sandbox/icdar_13_table/Makefile
index 2420eec..4e552dd 100644
--- a/scribo/sandbox/icdar_13_table/Makefile
+++ b/scribo/sandbox/icdar_13_table/Makefile
@@ -1,8 +1,8 @@
CCACHE=
CC=g++
-CFLAGS=-Wall -Werror -O3
-CLIBS=-I../../../milena/ -I../../
-CLEAN=*.o $(OUTPUT) output/* log final.xml
+CFLAGS=-Wall -Werror -O3 -DHAVE_TESSERACT_3 -DNDEBUG
+CLIBS=-I../../../milena/ -I../../ -I/usr/include/poppler
+CLEAN=*.o output/* log final.xml
SRC=src/main.cc
OUTPUT=table
@@ -10,9 +10,12 @@ OUTPUT=table
all: table
table:
- $(CCACHE) $(CC) $(CFLAGS) -DHAVE_TESSERACT_3 $(CLIBS) $(SRC) -ltesseract -o $(OUTPUT)
+ $(CCACHE) $(CC) $(CFLAGS) $(CLIBS) $(SRC) -ltesseract -lpoppler-cpp -o $(OUTPUT)
clean:
rm -rf $(CLEAN)
-.PHONY: table
+mrproper: clean
+ rm -f $(OUTPUT)
+
+.PHONY: table clean mrproper
diff --git a/scribo/sandbox/icdar_13_table/src/main.cc
b/scribo/sandbox/icdar_13_table/src/main.cc
index 7151b16..be394ba 100644
--- a/scribo/sandbox/icdar_13_table/src/main.cc
+++ b/scribo/sandbox/icdar_13_table/src/main.cc
@@ -8,6 +8,7 @@
#include <mln/fun/v2v/rgb_to_luma.hh>
#include <mln/io/pbm/all.hh>
+#include <mln/io/pdf/load.hh>
#include <mln/io/ppm/all.hh>
#include <mln/labeling/all.hh>
@@ -180,6 +181,10 @@ void find_borders(image2d<bool>& ima,
bottom = find_bottom(ima);
}
+ /********/
+ /* MAIN */
+ /********/
+
int main(int argc, char** argv)
{
typedef value::label_16 V;
@@ -195,116 +200,139 @@ int main(int argc, char** argv)
V nhlines, nvlines;
L hlines_ima, vlines_ima;
scribo::component_set<L> hlines, vlines;
+ std::ostringstream path;
- // PARAMETERS
+ // Parameters
unsigned av_height = 4;
unsigned av_width = 4;
int delta_prox_h = 5;
unsigned min_height = 71;
unsigned min_width = 31;
+ unsigned dpi = 72;
- // Loadin and binarization
+ // Loading and binarization
std::ofstream xml;
start_xml(xml, "final.xml", argv[1]);
- io::ppm::load(original, argv[1]);
- filtered = data::transform(original,
mln::fun::v2v::rgb_to_luma<value::int_u8>());
+ //io::ppm::load(original, argv[1]);
+ util::array< image2d<value::rgb8> > pdf;
+ io::pdf::load(pdf, argv[1], dpi);
+ for (unsigned page = 0; page < pdf.nelements(); ++page)
+ {
+ original = pdf[page];
+ filtered = data::transform(original,
mln::fun::v2v::rgb_to_luma<value::int_u8>());
- bin = scribo::binarization::sauvola(filtered, 81, 0.44);
- final = data::convert(value::rgb8(), bin);
+ bin = scribo::binarization::sauvola(filtered, 81, 0.44);
+ final = data::convert(value::rgb8(), bin);
- initialize(mask, bin);
- initialize(ima_texts, bin);
- initialize(ima_tables, bin);
- data::fill(ima_tables, false);
+ initialize(mask, bin);
+ initialize(ima_texts, bin);
+ initialize(ima_tables, bin);
+ data::fill(ima_tables, false);
- bin_without_lines = duplicate(bin);
+ bin_without_lines = duplicate(bin);
- // Lines extraction
- hlines = scribo::primitive::extract::lines_h_discontinued(bin, c4(), nhlines,
min_width, 2);
- vlines = scribo::primitive::extract::lines_v_discontinued(bin, c4(), nvlines,
min_height, 2);
+ // Lines extraction
+ hlines = scribo::primitive::extract::lines_h_discontinued(bin, c4(), nhlines,
min_width, 2);
+ vlines = scribo::primitive::extract::lines_v_discontinued(bin, c4(), nvlines,
min_height, 2);
- get_horizontal_lines(hlines, ima_tables, bin_without_lines, av_height);
- get_vertical_lines(vlines, ima_tables, bin_without_lines, av_width, delta_prox_h);
+ get_horizontal_lines(hlines, ima_tables, bin_without_lines, av_height);
+ get_vertical_lines(vlines, ima_tables, bin_without_lines, av_width, delta_prox_h);
- // Denoising
- bin_without_lines_denoised = scribo::preprocessing::denoise_fg(bin_without_lines, c8(),
3);
+ // Denoising
+ bin_without_lines_denoised = scribo::preprocessing::denoise_fg(bin_without_lines,
c8(), 4);
- // Set the tables mask
- unsigned n;
- labeled = labeling::blobs(ima_tables, c8(), n);
- masks = scribo::component_set< image2d<unsigned> >(labeled, n);
- data::fill(mask, false);
+ // Set the tables mask
+ unsigned n;
+ labeled = labeling::blobs(ima_tables, c8(), n);
+ masks = scribo::component_set< image2d<unsigned> >(labeled, n);
+ data::fill(mask, false);
- for (unsigned i = 1; i <= masks.nelements(); ++i)
- data::fill((mask | masks(i).bbox()).rw(), true);
+ for (unsigned i = 1; i <= masks.nelements(); ++i)
+ data::fill((mask | masks(i).bbox()).rw(), true);
- // Compose table zones with bin_without_lines_lines
- ima_texts = logical::and_(bin_without_lines_denoised, mask);
+ // Compose table zones with bin_without_lines_lines
+ ima_texts = logical::and_(bin_without_lines_denoised, mask);
- // Isolate texts between tables
- for (unsigned i = 1; i <= masks.nelements(); ++i)
- {
- image2d<bool> table_mask, isolated_text;
- std::ostringstream path;
- bool empty = true;
+ // Isolate texts between tables
+ for (unsigned i = 1; i <= masks.nelements(); ++i)
+ {
+ image2d<bool> table_mask, isolated_text;
+ bool empty = true;
- initialize(table_mask, bin);
- data::fill(table_mask, false);
- data::fill((table_mask | masks(i).bbox()).rw(), true);
+ initialize(table_mask, bin);
+ data::fill(table_mask, false);
+ data::fill((table_mask | masks(i).bbox()).rw(), true);
- isolated_text = logical::and_(bin_without_lines_denoised, table_mask);
+ isolated_text = logical::and_(bin_without_lines_denoised, table_mask);
- mln_piter_(image2d<bool>) p(isolated_text.domain());
+ mln_piter_(image2d<bool>) p(isolated_text.domain());
- for_all(p)
- empty = empty && !(isolated_text(p));
+ for_all(p)
+ empty = empty && !(isolated_text(p));
- if (!empty)
- {
- path << "output/8_" << i << "_isolated.pbm";
- io::pbm::save(isolated_text, path.str());
+ if (!empty)
+ {
+ path.str("");
+ path << "output/p" << page << "_8_"
<< i << "_isolated.pbm";
+ io::pbm::save(isolated_text, path.str());
- // Find coordinated
- unsigned left, right, top, bottom;
- find_borders(isolated_text, left, right, top, bottom);
+ // Find coordinates
+ unsigned left, right, top, bottom;
+ find_borders(isolated_text, left, right, top, bottom);
- std::cout << "(" << left << "," << top
<< ") ->"
- << "(" << right << "," <<
bottom << ")" << std::endl;
+ point2d p1, p2, p3, p4;
- point2d p1, p2, p3, p4;
+ p1 = point2d(top, left);
+ p2 = point2d(top, right);
+ p3 = point2d(bottom, right);
+ p4 = point2d(bottom, left);
- p1 = point2d(top, left);
- p2 = point2d(top, right);
- p3 = point2d(bottom, right);
- p4 = point2d(bottom, left);
+ draw::line(final, p1, p2, literal::green);
+ draw::line(final, p2, p3, literal::green);
+ draw::line(final, p3, p4, literal::green);
+ draw::line(final, p4, p1, literal::green);
- draw::line(final, p1, p2, literal::red);
- draw::line(final, p2, p3, literal::red);
- draw::line(final, p3, p4, literal::red);
- draw::line(final, p4, p1, literal::red);
-
- write_table(xml, p1, p3);
+ write_table(xml, p1, p3);
+ }
}
- }
- // Get lines images
- hlines_ima = hlines.labeled_image();
- vlines_ima = vlines.labeled_image();
- ima_hlines = data::convert(bool(), hlines_ima);
- ima_vlines = data::convert(bool(), vlines_ima);
-
- // Write images and close XML
- io::pbm::save(bin, "output/0_bin.pbm");
- io::pbm::save(bin_without_lines, "output/1_bin_without_lines.pbm");
- io::pbm::save(bin_without_lines_denoised,
"output/2_bin_without_lines_denoised.pbm");
- io::pbm::save(ima_hlines, "output/3_hlines.pbm");
- io::pbm::save(ima_vlines, "output/4_vlines.pbm");
- io::pbm::save(ima_tables, "output/5_tables.pbm");
- io::pbm::save(mask, "output/6_mask.pbm");
- io::pbm::save(ima_texts, "output/7_texts.pbm");
- /* Save 8_i_isolated */
- io::ppm::save(final, "output/9_final.ppm");
+ // Get lines images
+ hlines_ima = hlines.labeled_image();
+ vlines_ima = vlines.labeled_image();
+ ima_hlines = data::convert(bool(), hlines_ima);
+ ima_vlines = data::convert(bool(), vlines_ima);
+
+ // Write images and close XML
+ path.str(""); path << "output/p" << page <<
"_0_bin.pbm";
+ io::pbm::save(bin, path.str());
+
+ path.str(""); path << "output/p" << page <<
"_1_bin_without_lines.pbm";
+ io::pbm::save(bin_without_lines, path.str());
+
+ path.str(""); path << "output/p" << page <<
"_2_bin_without_lines_denoised.pbm";
+ io::pbm::save(bin_without_lines_denoised, path.str());
+
+ path.str(""); path << "output/p" << page <<
"_3_hlines.pbm";
+ io::pbm::save(ima_hlines, path.str());
+
+ path.str(""); path << "output/p" << page <<
"_4_vlines.pbm";
+ io::pbm::save(ima_vlines, path.str());
+
+ path.str(""); path << "output/p" << page <<
"_5_tables.pbm";
+ io::pbm::save(ima_tables, path.str());
+
+ path.str(""); path << "output/p" << page <<
"_6_mask.pbm";
+ io::pbm::save(mask, path.str());
+
+ path.str(""); path << "output/p" << page <<
"_7_texts.pbm";
+ io::pbm::save(ima_texts, path.str());
+
+ /* Save 8_i_isolated */
+
+ path.str(""); path << "output/p" << page <<
"_9_final.pbm";
+ io::ppm::save(final, path.str());
+ }
end_xml(xml);
--
1.7.2.5