
--- scribo/sandbox/raphael/code/makefile | 17 +- scribo/sandbox/raphael/code/my/document/clean.hh | 488 +++++++-- .../sandbox/raphael/code/my/document/document.hh | 1182 ++++++++++++++++++-- scribo/sandbox/raphael/code/my/document/letters.hh | 12 +- .../raphael/code/my/document/recognition.hh | 74 ++ .../sandbox/raphael/code/my/document/separator.hh | 64 +- scribo/sandbox/raphael/code/my/runtime/lib.hh | 180 +++ scribo/sandbox/raphael/code/my/runtime/runtime.hh | 196 ++++ scribo/sandbox/raphael/code/test.cc | 232 +++-- 9 files changed, 2157 insertions(+), 288 deletions(-) create mode 100644 scribo/sandbox/raphael/code/my/document/recognition.hh create mode 100644 scribo/sandbox/raphael/code/my/runtime/lib.hh create mode 100644 scribo/sandbox/raphael/code/my/runtime/runtime.hh diff --git a/scribo/sandbox/raphael/code/makefile b/scribo/sandbox/raphael/code/makefile index 288352e..8eddc7a 100644 --- a/scribo/sandbox/raphael/code/makefile +++ b/scribo/sandbox/raphael/code/makefile @@ -1,13 +1,18 @@ speed: - g++ test.cc -o ~/Bureau/test/bin/test.elf -I . -I ./../../../../milena -O3 -fwhole-program - ~/Bureau/test/bin/test.elf -D ~/Bureau/test/bin ima.pbm + ccache g++ test.cc -o ~/Bureau/test/bin/test.elf -I . -I ./../../../../milena -O3 -fwhole-program + ~/Bureau/test/bin/test.elf -D ~/Bureau/test/bin -P ~/Bureau/test/bin/script ima.pbm std: - g++ test.cc -o ~/Bureau/test/bin/test.elf -I . -I ./../../../../milena - ~/Bureau/test/bin/test.elf -D ~/Bureau/test/bin ima.pbm + ccache g++ test.cc -o ~/Bureau/test/bin/test.elf -I . -I ./../../../../milena + ~/Bureau/test/bin/test.elf -D ~/Bureau/test/bin -P ~/Bureau/test/bin/script ima.pbm release: - g++ test.cc -o ~/Bureau/test/bin/test.elf -I . -I ./../../../../milena -DNDEBUG -O2 - ~/Bureau/test/bin/test.elf -D ~/Bureau/test/bin ima.pbm + ccache g++ test.cc -o ~/Bureau/test/bin/test.elf -I . -I ./../../../../milena -DNDEBUG -O2 + ~/Bureau/test/bin/test.elf -D ~/Bureau/test/bin -P ~/Bureau/test/bin/script ima.pbm +debug: + ccache g++ test.cc -o ~/Bureau/test/bin/test.elf -I . -I ./../../../../milena -DNDEBUG -ggdb + ~/Bureau/test/bin/test.elf -D ~/Bureau/test/bin -P ~/Bureau/test/bin/script ima.pbm +test: + ~/Bureau/test/bin/test.elf -D ~/Bureau/test/bin -P ~/Bureau/test/bin/script ima.pbm clean: rm -r -f ../bin/* diff --git a/scribo/sandbox/raphael/code/my/document/clean.hh b/scribo/sandbox/raphael/code/my/document/clean.hh index 56c7445..b996b8a 100644 --- a/scribo/sandbox/raphael/code/my/document/clean.hh +++ b/scribo/sandbox/raphael/code/my/document/clean.hh @@ -144,7 +144,7 @@ namespace mymln if((!doc.contain_line(q))) { // draw::line(out, q,v, mln::literal::blue); - if(doc.allign_V(q,v) && doc.allign_size(q, v) && (doc.allign_proximity_large(q, v) || doc.allign_proximity_large(v, q)) ) + if(doc.allign_V(q,v) && doc.allign_size(q, v) && doc.allign_proximity_large(q, v) ) { doc.add_to_line_link(v, q); All_Alone = false; @@ -152,7 +152,7 @@ namespace mymln } else { - if(doc.allign_V(q,v) && doc.allign_size(q, v) && (doc.allign_proximity_large(q, v) || doc.allign_proximity_large(v, q))) + if(doc.allign_V(q,v) && doc.allign_size(q, v) && doc.allign_proximity_large(q, v)) { doc.add_to_line_link(q, v); All_Alone = false; @@ -186,7 +186,12 @@ namespace mymln { for_all(q) { - if(!doc.allign_H(q, v) && doc.allign_base_line(v, q)) + if( + !doc.allign_H(q, v) && + doc.allign_base_line_line_strict(v, q) && + doc.allign_proximity(v,q) && + doc.allign_smaller_line(v, q) && + doc.get_line_length(v) > 2) { doc.add_to_line_link(v, q); doc.add_letter_coerce(q); @@ -200,14 +205,21 @@ namespace mymln mln_niter_(nbh_t) q2(nbh, v2); for_all(v2) { - if(doc.contain_line(v2)) + if(doc.contain_line(v2) && !doc.contain_alone_letter(v2)) { for_all(q2) { - if (doc.allign_H_Large(v2, q2) && doc.allign_top(v2, q2)) + if ( + doc.allign_H_large(v2, q2) && + doc.allign_top(v2, q2) && + doc.line_has(v2, q2) && + doc.letter_ratio_XY(q2) < 2 && + doc.allign_size_width_large(v2, q2)) { doc.add_to_line_link(v2, q2); doc.add_letter_coerce(q2); + if(doc.allign_H(v2,q2)) + {doc.merge(v2,q2); doc.tag_label(v2, "i");} } } } @@ -216,12 +228,8 @@ namespace mymln } template<typename L, typename F, typename D> - void clean_quote_items(mymln::document::document<L,F,D>& doc, std::string dgb_out,image2d<bool> s) + void clean_quote_items(mymln::document::document<L,F,D>& doc) { - #ifndef NGRAPHDEBUG - image2d<value::rgb8> out; - mln::initialize(out, s); - #endif typedef vertex_image<point2d,bool> v_ima_g; typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p; v_ima_g mask = doc.fun_mask_start_end_lines(); @@ -239,38 +247,40 @@ namespace mymln { if(doc.allign_size(v,q)) { - if(!doc.contain_alone_letter(v) && !doc.contain_alone_letter(q) ) + if(!doc.contain_alone_letter(v) && !doc.contain_alone_letter(q) && doc.allign_proximity_large_left(v,q) ) { if( - (doc.allign_top(v, q) || doc.allign_top(doc.get_beginning_of_line(v), doc.get_label(q))) + (doc.allign_top(v, q) || doc.allign_top(doc.get_beginning_of_line(v), doc.get_label(q)) ) && + doc.allign_smaller_line_letter(v,q) ) { - doc.add_to_line_link(v, q); - draw::line(out, q,v, mln::literal::green); + if(doc.get_line_length(q) < 3 || doc.allign_V_line(v, q)) + { + doc.add_to_line_link(v, q); + doc.tag_label(v, "'"); + } } } - else if(doc.allign_top(v, q) && !doc.allign_H(v, q)) - { - doc.add_to_line_link(v, q); - } - else + else if(doc.allign_top(v, q) && !doc.allign_H(v, q) && doc.allign_proximity_large_left(v,q) && doc.allign_smaller_line_letter(v,q)) { - draw::line(out, q,v, mln::literal::magenta); + if(doc.get_line_length(q) < 3 || doc.allign_V_line(v, q)) + { + doc.add_to_line_link(v, q); + doc.tag_label(v, "'"); + } } } - else if (doc.allign_H_Large(q, v) && doc.allign_top(v, q)) + else if (doc.allign_H_large(q, v) && doc.allign_top(v, q) && doc.allign_size_width_large(v, q)) { doc.add_to_line_link(v, q); - draw::line(out, q,v, mln::literal::blue); + if(doc.allign_H(v,q)) + {doc.merge(v,q); doc.tag_label(v, "i");} } } } } } - #ifndef NGRAPHDEBUG - io::ppm::save(mln::debug::superpose(out, s, literal::white),dgb_out); - #endif doc.propage_line_link(); doc.recook_lines(); } @@ -287,6 +297,7 @@ namespace mymln mln_niter_(nbh_t) q(nbh, v); for_all(v) { + if(doc.contain_letter(v)) { if(doc.contain_line(v)) @@ -303,9 +314,37 @@ namespace mymln { doc.add_to_line_link(v, q); } + } } } + } + doc.propage_line_link(); + } + + + template<typename L, typename F, typename D> + void clean_proximity_letters(mymln::document::document<L,F,D>& doc) + { + typedef vertex_image<point2d,bool> v_ima_g; + typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p; + v_ima_g mask = doc.fun_mask_all_letters(); + mln_piter_(v_ima_g) v(mask.domain()); + typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t; + nbh_t nbh(mask); + mln_niter_(nbh_t) q(nbh, v); + for_all(v) + { + if(doc.contain_line(v)) + { + for_all(q) + { + if(doc.contain_line(q) && doc.get_line_length(q) == 1 && doc.line_influence_has(v,q)) + { + doc.add_to_line_link(q,v); + } + } + } } } @@ -326,33 +365,80 @@ namespace mymln { for_all(q) { - if(doc.contain_line(q)) + if(doc.same_line(q,v)){continue;} + if(doc.contain_alone_letter(q)) { - if(doc.allign_V(q,v) && doc.allign_size(q, v) && doc.allign_proximity(q,v)) + if(doc.allign_V(q,v) && doc.allign_proximity_strict(q, v) && doc.allign_size_height(q, v)) { doc.add_to_line_link(v, q); + + } + + } + else if(doc.contain_line(q)) + { + if(doc.allign_V(q,v) && doc.allign_size_height_line_strict(q, v) && doc.allign_proximity_strict(q,v)) + { + doc.add_to_line_link(v, q); + } else if(doc.allign_size_height_line(q,v)) { - if(doc.allign_proximity_line(q,v) && doc.allign_V_line(q, v)) + if(doc.allign_proximity_line(q,v) && doc.allign_V_line_strict(q, v)) { doc.add_to_line_link(v, q); + doc.debug_draw_line_green_buffer(v,q); + } + else if(doc.line_influence_reciprocal(q, v) && doc.allign_V_line_strict(q, v)) + { + doc.add_to_line_link(v, q); + doc.debug_draw_line_red_buffer(v,q); } } + + } } } } doc.propage_line_link(); } - + template<typename L, typename F, typename D> + void clean_between(mymln::document::document<L,F,D>& doc) + { + typedef vertex_image<point2d,bool> v_ima_g; + typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p; + v_ima_g mask = doc.fun_mask_start_end_lines(); + mln_piter_(v_ima_g) v(mask.domain()); + typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t; + nbh_t nbh(mask); + mln_niter_(nbh_t) q(nbh, v); + for_all(v) + { + L Start = 0; + L End = 0; + for_all(q) + { + if(doc.allign_V(q,v) && doc.allign_size(q, v) && doc.allign_proximity(q,v)) + { + if(doc[q] == doc.get_beginning_of_line(q)) + {Start = doc[q]; } + else + {End = doc[q]; } + } + + } + if(Start && End){doc.add_to_line_link(Start, doc[v]);} + } + doc.propage_paragraph_link(); + + } + + + template<typename L, typename F, typename D> - void clean_paragraph_items(mymln::document::document<L,F,D>& doc, std::string dgb_out,image2d<bool> s) + void clean_paragraph_items(mymln::document::document<L,F,D>& doc) { - #ifndef NGRAPHDEBUG - image2d<value::rgb8> out; - mln::initialize(out, s); - #endif typedef vertex_image<point2d,bool> v_ima_g; typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p; v_ima_g mask = doc.fun_mask_start_end_lines(); @@ -373,19 +459,22 @@ namespace mymln for_all(q) { - if(doc.allign_H_Large(q,v) && doc.allign_size(q, v) && doc.allign_proximity_V_line(v,q)) + if( + doc.allign_H_large(q,v) && + doc.allign_size_height_line(q, v) && + doc.allign_proximity_V_line(v,q) && + doc.allign_size_width_line(q, v) + ) { if(doc.contain_paragraph(q)) { doc.add_to_paragraph(q); doc.add_to_paragraph_link(q, v); - draw::line(out, q,v, mln::literal::green); } else { doc.add_to_paragraph(q); doc.add_to_paragraph_link(v, q); - draw::line(out, q,v, mln::literal::magenta); } } @@ -398,8 +487,9 @@ namespace mymln { if( doc.get_beginning_of_line(q) == doc[q] && - doc.allign_H_Large(q,v) && - doc.allign_size(q, v) && + doc.allign_H_large(q,v) && + doc.allign_size_height_line(q, v) && + doc.allign_size_width_line(q, v) && doc.allign_proximity_V_line(v,q) && doc.allign_bottom_line(q,v) ) @@ -416,7 +506,6 @@ namespace mymln { doc.add_to_paragraph_link(v, q); } - draw::line(out, q,v, mln::literal::blue); } else { @@ -427,7 +516,6 @@ namespace mymln } doc.add_to_paragraph(q); doc.add_to_paragraph_link(v, q); - draw::line(out, q,v, mln::literal::blue); } } } @@ -435,7 +523,6 @@ namespace mymln } } } - io::ppm::save(mln::debug::superpose(out, s, literal::white),dgb_out); doc.propage_paragraph_link(); } @@ -473,10 +560,10 @@ namespace mymln doc.propage_line_link(); } template<typename L, typename F, typename D> - void clean_alone_letters_lines(mymln::document::document<L,F,D>& doc, std::string dgb_out,image2d<bool> s) + void clean_alone_letters_lines(mymln::document::document<L,F,D>& doc) { - image2d<value::rgb8> out; - mln::initialize(out, s); + //image2d<value::rgb8> out; + //mln::initialize(out, s); typedef vertex_image<point2d,bool> v_ima_g; typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p; v_ima_g mask = doc.fun_mask_alone_letters(); @@ -490,38 +577,303 @@ namespace mymln { for_all(q) { - draw::line(out, q,v, mln::literal::red); - if(doc.line_has(v,q)) - {doc.add_to_line_link(v, q); draw::line(out, q,v, mln::literal::green);} - + //draw::line(out, q,v, mln::literal::red); + if( + ((doc.line_influence_has(v,q) && doc.is_line_representative(q)) || + doc.line_has(v,q)) && + doc.allign_V(v, q) + ) + {doc.add_to_line_link(v, q);} + else if(doc.line_has(v,q)) + {doc.add_to_line_link(v, q);} } } } doc.propage_line_link(); - io::ppm::save(mln::debug::superpose(out, s, literal::white),dgb_out); + //io::ppm::save(mln::debug::superpose(out, s, literal::white),dgb_out); + } + + template<typename L, typename F, typename D> + void remove_alone_letter(mymln::document::document<L,F,D>& doc) + { + typedef vertex_image<point2d,bool> v_ima_g; + typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p; + v_ima_g mask = doc.fun_mask_alone_letters(); + mln_piter_(v_ima_g) v(mask.domain()); + typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t; + nbh_t nbh(mask); + mln_niter_(nbh_t) q(nbh, v); + for_all(v) + { + for_all(q) + { + if(doc.in_header(q) || doc.in_footer(q)){continue;} + if(doc.contain_alone_letter(q)) {doc.add_noise(q);} + } + } + } + + template<typename L, typename F, typename D> + void clean_odd_letters(mymln::document::document<L,F,D>& doc) + { + for(unsigned int N = 1;N < doc.count(); N++) + { + if(doc.contain_line(N)) + if (doc.get_letter_middle_height(N) * 3 < doc.get_bbox(N).len(0)) + doc.add_noise(N); + else if(doc.get_letter_middle_width(N) * 4 < doc.get_bbox(N).len(1)) + doc.add_noise(N); + } + } + + template<typename L, typename F, typename D> + void clean_paragraphs_up(mymln::document::document<L,F,D>& doc) + { + typedef vertex_image<point2d,bool> v_ima_g; + typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p; + v_ima_g mask = doc.fun_mask_letters(); + mln_piter_(v_ima_g) v(mask.domain()); + typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t; + nbh_t nbh(mask); + mln_niter_(nbh_t) q(nbh, v); + for_all(v) + { + if(doc.contain_paragraph(v)) + { + for_all(q) + { + if( + doc.contain_paragraph(q) && + !doc.same_paragraph(v, q) && + doc.allign_top_paragraph(q, v) && + doc.decal_left_paragraph(q, v) && + doc.allign_size_height_line(q, v) && + doc.allign_size_width_paragraph(q, v) && + doc.get_paragraph_length(v) == 1 && + doc.allign_H_paragraph(v, q) && + doc.allign_proximity_V_line(v,q) + ) + { + doc.add_to_paragraph_link(q,v); + } + } + } + } + doc.propage_paragraph_link(); + } + + template<typename L, typename F, typename D> + void clean_paragraphs_large(mymln::document::document<L,F,D>& doc) + { + typedef vertex_image<point2d,bool> v_ima_g; + typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p; + v_ima_g mask = doc.fun_mask_letters(); + mln_piter_(v_ima_g) v(mask.domain()); + typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t; + nbh_t nbh(mask); + mln_niter_(nbh_t) q(nbh, v); + for_all(v) + { + if(doc.contain_paragraph(v) && doc.get_paragraph_length(v) > 1) + { + for_all(q) + { + if( + doc.contain_paragraph(q) && + doc.get_paragraph_length(q) > 1 && + !doc.same_paragraph(v, q) && + doc.allign_top_paragraph(q, v)) + { + + if( + doc.decal_left_paragraph_strong(q, v) && + doc.allign_size_height_line(q, v) && + doc.allign_size_width_paragraph(q, v) && + doc.allign_H_paragraph(v, q) && + doc.allign_proximity_V_line(v,q) + ) + { + doc.add_to_paragraph_link(q,v); + + } + } + } + } + } + doc.propage_paragraph_link(); + + + + } + template<typename L, typename F, typename D> + void clean_included_paragraphs(mymln::document::document<L,F,D>& doc) + { + typedef vertex_image<point2d,bool> v_ima_g; + typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p; + v_ima_g mask = doc.fun_mask_letters(); + mln_piter_(v_ima_g) v(mask.domain()); + typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t; + nbh_t nbh(mask); + mln_niter_(nbh_t) q(nbh, v); + for_all(v) + { + if(doc.contain_paragraph(v) && doc.get_paragraph_length(v) > 1) + { + for_all(q) + { + if( + doc.contain_paragraph(q) && + doc.get_paragraph_length(q) == 1 && + !doc.same_paragraph(q, v) && + doc.paragraph_included_influence(v, q) && + doc.allign_size_height_line(v, q)) + { + if(doc.line_influence_reciprocal(q, v)) + doc.add_to_paragraph_link(v,q); + } + } + } + } + doc.propage_paragraph_link(); + } + + template<typename L, typename F, typename D> + void clean_lines_space(mymln::document::document<L,F,D>& doc, std::string dgb_out,image2d<bool> s) + { + image2d<value::rgb8> out; + mln::initialize(out, s); + typedef vertex_image<point2d,bool> v_ima_g; + typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p; + v_ima_g mask = doc.fun_mask_letters(); + mln_piter_(v_ima_g) v(mask.domain()); + typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t; + nbh_t nbh(mask); + mln_niter_(nbh_t) q(nbh, v); + for_all(v) + { + for_all(q) + { + if(doc.same_line(q, v)) + { + draw::line(out, q,v, mln::literal::red); + if(doc.in_beginning_of_line(q) || doc.in_end_of_line(q)) + { + draw::line(out, q,v, mln::literal::green); + if(doc.space(q, v) > doc.get_letter_middle_space(q) * 10) + { + draw::line(out, q,v, mln::literal::blue); + /*if(doc[q] == doc.get_beginning_of_line(q)) + doc.add_to_line_link(v, q); */ + //doc.add_noise(q); + } + } + } + } + } + doc.propage_line_link(); + io::ppm::save(mln::debug::superpose(out, s, literal::white),dgb_out); + } + + + template<typename L, typename F, typename D> + void clean_included_letters(mymln::document::document<L,F,D>& doc) + { + + typedef vertex_image<point2d,bool> v_ima_g; + typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p; + v_ima_g mask = doc.fun_mask_all_letters(); + mln_piter_(v_ima_g) v(mask.domain()); + typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t; + nbh_t nbh(mask); + mln_niter_(nbh_t) q(nbh, v); + for_all(v) + { + for_all(q) + { + if(doc.letter_included(q, v) && doc.allign_size_large_inside(q, v)) + { + + + doc.merge(q,v); + } + } + } + doc.propage_line_link(); + } + + template<typename L, typename F, typename D> - void remove_alone_letter(mymln::document::document<L,F,D>& doc) + void clean_backward_letters(mymln::document::document<L,F,D>& doc) { - typedef vertex_image<point2d,bool> v_ima_g; - typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p; - v_ima_g mask = doc.fun_mask_alone_letters(); - mln_piter_(v_ima_g) v(mask.domain()); - typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t; - nbh_t nbh(mask); - mln_niter_(nbh_t) q(nbh, v); - for_all(v) - { - for_all(q) - { - if(doc.in_header(q) || doc.in_footer(q)){continue;} - doc.add_noise(q); - } - } + typedef vertex_image<point2d,bool> v_ima_g; + typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p; + v_ima_g mask = doc.fun_mask_start_lines(); + mln_piter_(v_ima_g) v(mask.domain()); + typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t; + nbh_t nbh(mask); + mln_niter_(nbh_t) q(nbh, v); + for_all(v) + { + if(!doc.contain_line(v)) + { + for_all(q) + { + if( + doc.contain_line(q) && + doc.get_line_length(q)> 5 && + doc.allign_V(v, q) && + doc.allign_proximity(v, q) && + doc.allign_size_height(v, q) + ) + { + doc.add_to_line_link(q,v); + doc.debug_draw_line_green_buffer(v, q); + } + } + } + } + doc.propage_line_link(); } - - + + template<typename L, typename F, typename D> + void clean_paragraphs_tab(mymln::document::document<L,F,D>& doc) + { + typedef vertex_image<point2d,bool> v_ima_g; + typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p; + v_ima_g mask = doc.fun_mask_all_letters(); + mln_piter_(v_ima_g) v(mask.domain()); + typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t; + nbh_t nbh(mask); + mln_niter_(nbh_t) q(nbh, v); + for_all(v) + { + if(doc.contain_paragraph(v)) + { + for_all(q) + { + if( + doc.contain_paragraph(q) && + !doc.same_paragraph(q,v) && + !doc.paragraph_start_with_tab(q) && + doc.allign_top_paragraph(q, v) && + doc.get_paragraph_length(q) > 1 && + doc.get_first_line_ID(doc[q]) == doc.get_line_ID(doc[q]) && + doc.allign_size_width_paragraph(q,v) && + doc.allign_proximity_paragraph_up(q,v) && + doc.allign_H_paragraph(q,v) + ) + { + doc.add_to_paragraph_link(q,v); + doc.debug_draw_line_green_buffer(v, q); + } + } + } + } + doc.propage_paragraph_link(); + } + } } diff --git a/scribo/sandbox/raphael/code/my/document/document.hh b/scribo/sandbox/raphael/code/my/document/document.hh index 1a84edb..880d4c5 100644 --- a/scribo/sandbox/raphael/code/my/document/document.hh +++ b/scribo/sandbox/raphael/code/my/document/document.hh @@ -40,7 +40,7 @@ namespace mymln // WARNING: Methods on lines like get_end_of_line can be used only after a line cooking. - + document(){} document(image2d<Label>& ima, image2d<Label>& ima_influ,mln::util::array<box2d>& bboxgp, g_vertices_p& area_graph, Label Areas) { img = ima; @@ -56,17 +56,19 @@ namespace mymln alone_letters_mask = fun::i2v::array<bool>(Areas + 1); implicit_separators_left_mask = fun::i2v::array<bool>(Areas + 1); implicit_separators_right_mask = fun::i2v::array<bool>(Areas + 1); + kill_mask = fun::i2v::array<bool>(Areas + 1); CImpSep = 1; NImpSep = 2; lines_union = mymln::util::union_find<Label>(Areas + 1); implicit_separators_union = mymln::util::union_find<Label>(Areas + 1); - + debug_buffer_enable = false; paragraphs_union = mymln::util::union_find<Label>(Areas + 1); tag_lbl = mln::util::array<std::string>(Areas + 1); + Btag_lbl = mln::util::array<bool>(Areas + 1); lines_split = mln::util::array<Label>(Areas + 1); lines_split.fill(0); - + tag_lbl.fill(""); img_influ = ima_influ; CSep = 0; CSepH = 0; @@ -79,6 +81,8 @@ namespace mymln Areas_Number_ = Areas + 1; } + inline unsigned int count() + {return Areas_Number_;} /* OPERATION ON PAGE */ inline bool in_header(const point2d& p) { return p[0] < (img_influ.domain().len(0) / 8);} @@ -93,16 +97,28 @@ namespace mymln /* OPERATION ON PARAGRAPH */ inline bool link_paragraphs() { - for(unsigned int N = 1; N < Areas_Number_; N++) - { + paragraphs_union[0] = 0; + for(unsigned int N = 1; N < Areas_Number_; N++) + { paragraphs_union.invalidate_link(N); - if(start_lines_mask(N)) + if (!contain_paragraph(N) && contain_line(N)) { - paragraphs_union.add_self_link(N); + jump_to_paragraph(N); + if(start_lines_mask(N)) + { + add_to_paragraph(N); + paragraphs_union.add_self_link(N); + } } - else if(contain_line(N)) + } + for(unsigned int N = 1; N < Areas_Number_; N++) + { + + if (!contain_paragraph(N) && contain_line(N)) { - if(get_beginning_of_line(N) == 0){std::cout <<"ERROR#\n";} + jump_to_paragraph(get_beginning_of_line(N)); + add_to_paragraph(N); + if(contain_line(N) && get_beginning_of_line(N) != 0) paragraphs_union.add_link(get_beginning_of_line(N), N); } } @@ -200,12 +216,14 @@ namespace mymln inline void cook_line_splitting_exclusive() { + std::cout << "--> start union exclusive" << std::endl; for(unsigned int N = 1; N < Areas_Number_; N++) { lines_union.invalidate_link(N); if(end_lines_mask(N) || implicit_separators_right_mask(N)) split_line_exclusive(N); } + std::cout << "--> start linking" << std::endl; for(unsigned int N = 1; N < Areas_Number_; N++) { if(lines_union.is_self_link(N)) @@ -221,17 +239,19 @@ namespace mymln } lines_union[0] = 0; lines_union.invalidate_link(0); + std::cout << "--> propage union " << std::endl; for(unsigned int N = 1; N < Areas_Number_; N++) { if(!contain_line(N) || lines_union.is_self_link(N)) continue; Label pos = get_end_of_line(N); - while(lines_split[pos] && _bboxgp[lines_split[pos]].pmin()[1] > _bboxgp[N].pmin()[1]) - pos = lines_split[pos]; - if(pos != 0) + Label oldpos = pos; + while(lines_split[pos] && _bboxgp[lines_split[pos]].pmin()[1] >= _bboxgp[N].pmin()[1]) + {oldpos = pos; pos = lines_split[pos];} + if(pos != 0 && pos != N && pos < Areas_Number_ ) {lines_union[N] = lines_union[pos]; lines_union.add_link(pos,N);} } - + std::cout << "--> end propage union " << std::endl; //lines_union.propage_links();lines_union cook_lines(); } @@ -266,10 +286,9 @@ namespace mymln Label pos = get_beginning_of_line(N); while(lines_split[pos] && _bboxgp[lines_split[pos]].pmin()[1] < _bboxgp[N].pmin()[1]) pos = lines_split[pos]; - if(pos != 0) + if(pos != 0 && pos != N && pos < Areas_Number_ ) {lines_union[N] = lines_union[pos]; lines_union.add_link(pos,N);} } - //lines_union.propage_links();lines_union cook_lines(); } @@ -285,6 +304,46 @@ namespace mymln { return same_line(img_influ(A), img_influ(B)); } inline bool same_line(const Label A, const Label B) { return lines_union[A] == lines_union[B]; } + inline bool same_paragraph(const point2d& A, const point2d& B) + { return same_paragraph(img_influ(A), img_influ(B)); } + inline bool same_paragraph(const Label A, const Label B) + { return paragraphs_union[A] == paragraphs_union[B]; } + + inline bool in_beginning_of_line(const point2d& A) + {return in_beginning_of_line(img_influ(A));} + inline bool in_beginning_of_line(const Label A) + {return lines_bbox[lines_union[A]].len(1) / 8 + lines_bbox[lines_union[A]].pmin()[1] > _bboxgp[A].pmax()[1];} + + + inline bool is_line_representative(const point2d& A) + {return is_line_representative(img_influ(A));} + inline bool is_line_representative(const Label A) + {return lines_bbox[lines_union[A]].len(0) < _bboxgp[A].len(0) * 2 ;} + + + inline bool in_end_of_line(const point2d& A) + {return in_end_of_line(img_influ(A));} + inline bool in_end_of_line(const Label A) + {return lines_bbox[lines_union[A]].pmax()[1] - lines_bbox[lines_union[A]].len(1) / 8 < _bboxgp[A].pmax()[1];} + + inline unsigned int space(const point2d& A,const point2d& B) + {return space(img_influ(A), img_influ(B));} + inline unsigned int space(const Label A, const Label B) + { + box2d LB = _bboxgp[A]; + box2d RB = _bboxgp[B]; + + int DisA = LB.pmax()[1] - RB.pmin()[1]; + int DisB = RB.pmax()[1] - LB.pmin()[1]; + if(DisA < 0){DisA = -DisA;} + if(DisB < 0){DisB = -DisB;} + if(DisA > DisB) + { DisA = DisB; } + return DisA; + } + + + inline void add_new_line(const point2d& point) { add_new_line(img_influ(point)); } @@ -351,6 +410,7 @@ namespace mymln /* SET UP SPECIAL MASK TO FALSE */ implicit_separators_left_mask(lbl) = false; implicit_separators_right_mask(lbl) = false; + kill_mask(lbl) = false; } void inline invalid_letter(const point2d& point) {invalid_letter(img_influ(point));} @@ -400,8 +460,9 @@ namespace mymln void add_letter(const Label lbl) { CLet++; - if(label_valid_size_Min_(lbl, 2)) + if(label_valid_size_Min_(lbl, 3) || label_valid_size_Min_Large_(lbl, 2)) { + img_influ(_bboxgp[lbl].pcenter()) = lbl; letters_mask(lbl) = true; all_letters_mask(lbl) = true; separators_mask(lbl) = false; @@ -519,6 +580,15 @@ namespace mymln inline bool allign_top( const point2d& Left, const point2d& Right) {return allign_top(img_influ(Left), img_influ(Right));} + inline bool allign_top_line( const point2d& Left, const point2d& Right) + {return allign_top_line(img_influ(Left), img_influ(Right));} + + inline bool allign_top_line( const Label Left, const Label Right) + { + short int allignV = lines_bbox[lines_union[Left]].pcenter()[0] - lines_bbox[lines_union[Right]].pcenter()[0]; + return (!allignV < 0) && allignV * 2 > lines_bbox[lines_union[Left]].len(0); + } + inline bool allign_top( const Label Left, const Label Right) { short int allignV = label_allign_(0, Left, Right); @@ -535,6 +605,10 @@ namespace mymln return allignV < label_size_(0, Left) && (_bboxgp[Left].pcenter()[0]) > (_bboxgp[Right].pcenter()[0]); } + + + + inline bool allign_up_line_line( const point2d& Left, const point2d& Right) {return allign_up_line_line(img_influ(Left), img_influ(Right));} @@ -548,10 +622,30 @@ namespace mymln (lines_bbox[lines_union[Left]].pcenter()[0]) > (lines_bbox[lines_union[Left]].pcenter()[0]); } - inline bool allign_H_Large( const point2d& Left, const point2d& Right) - {return allign_H_Large(img_influ(Left), img_influ(Right));} + inline bool allign_left( const point2d& Left, const point2d& Right) + {return allign_left(img_influ(Left), img_influ(Right));} + + inline bool allign_left( const Label Left, const Label Right) + { + + return _bboxgp[Left].pmin()[0] > _bboxgp[Right].pmin()[0]; + } + + inline bool allign_right( const point2d& Left, const point2d& Right) + {return allign_right(img_influ(Left), img_influ(Right));} + + inline bool allign_right( const Label Left, const Label Right) + { + + return _bboxgp[Left].pmin()[0] < _bboxgp[Right].pmin()[0]; + } + + - inline bool allign_H_Large( const Label Left, const Label Right) + inline bool allign_H_large( const point2d& Left, const point2d& Right) + {return allign_H_large(img_influ(Left), img_influ(Right));} + + inline bool allign_H_large( const Label Left, const Label Right) { short int allignV = label_allign_(1, Left, Right) * 1.5f; return allignV < label_size_(1, Left); @@ -610,20 +704,80 @@ namespace mymln inline bool allign_proximity( const Label Left, const Label Right) { - short int SizeL0 = label_size_(0, Left); - short int SizeL1 = label_size_(1, Left); - short int Swap = 0; - if(SizeL0 < SizeL1) - { SizeL0 = SizeL1; } - short int Dis = _bboxgp[Left].pmin()[1] - _bboxgp[Right].pmin()[1]; - if(Dis < 0) - Dis = -Dis; - return Dis < SizeL0 * 1.5f; + box2d LB = _bboxgp[Left]; + box2d RB = _bboxgp[Right]; + + int DisA = LB.pmax()[1] - RB.pmin()[1]; + int DisB = RB.pmax()[1] - LB.pmin()[1]; + if(DisA < 0){DisA = -DisA;} + if(DisB < 0){DisB = -DisB;} + if(DisA > DisB) + { DisA = DisB; } + + unsigned int HA = LB.len(0); + unsigned int HB = LB.len(1); + + if(HB > HA) + { HA = HB; } + return (DisA * 2) < HA * 3; } - inline bool allign_proximity_line( const point2d& Left, const point2d& Right) - {return allign_proximity_line(img_influ(Left), img_influ(Right));} + + + + inline bool allign_proximity_strict( const point2d& Left, const point2d& Right) + {return allign_proximity_strict(img_influ(Left), img_influ(Right));} + + inline bool allign_proximity_strict( const Label Left, const Label Right) + { + box2d LB = _bboxgp[Left]; + box2d RB = _bboxgp[Right]; + + int DisA = LB.pmax()[1] - RB.pmin()[1]; + int DisB = RB.pmax()[1] - LB.pmin()[1]; + if(DisA < 0){DisA = -DisA;} + if(DisB < 0){DisB = -DisB;} + if(DisA > DisB) + { DisA = DisB; } + + unsigned int HA = LB.len(0); + unsigned int HB = RB.len(0); + unsigned int VA = LB.len(1); + unsigned int VB = RB.len(1); + + if(VA > HA) + { HA = VA; } + if(VB > HB) + { HB = VB; } + return (DisA * 2) < HA && (DisA * 2) < HB; + } + + + inline bool allign_proximity_paragraph_up( const point2d& Left, const point2d& Right) + {return allign_proximity_paragraph_up(img_influ(Left), img_influ(Right));} + + inline bool allign_proximity_paragraph_up( const Label Left, const Label Right) + { + box2d LB = paragraphs_bbox[paragraphs_union[Left]]; + box2d RB = paragraphs_bbox[paragraphs_union[Right]]; + + int DisA = LB.pmax()[0] - RB.pmin()[0]; + int DisB = RB.pmax()[0] - LB.pmin()[0]; + if(DisA < 0){DisA = -DisA;} + if(DisB < 0){DisB = -DisB;} + if(DisA > DisB) + { DisA = DisB; } + + unsigned int HA = lines_bbox[paragraphs_first_line[paragraphs_union[Left]]].len(0); + unsigned int HB = lines_bbox[paragraphs_first_line[paragraphs_union[Right]]].len(0); + if(HA < HB) + { HA = HB; } + return (DisA * 5) < HA; + } + + inline bool allign_proximity_line_large( const point2d& Left, const point2d& Right) + {return allign_proximity_line_large(img_influ(Left), img_influ(Right));} inline bool allign_size_height_line( const point2d& Left, const point2d& Right) { @@ -637,6 +791,61 @@ namespace mymln return SizeR > (SizeL / 2.2f) && SizeR < (SizeL * 2.2); } + inline bool allign_size_height_line_strict( const point2d& Left, const point2d& Right) + { + return allign_size_height_line_strict(img_influ(Left), img_influ(Right)); + } + + inline bool allign_size_height_line_strict( const Label Left, const Label Right) + { + short int SizeL = lines_bbox[lines_union[Left]].len(0); + short int SizeR = lines_bbox[lines_union[Right]].len(0); + return SizeR > (SizeL / 1.3f) && SizeR < (SizeL * 1.3); + } + + inline bool allign_proximity_line( const point2d& Left, const point2d& Right) + {return allign_proximity_line(img_influ(Left), img_influ(Right));} + + inline bool allign_size_width_large( const point2d& Left, const point2d& Right) + { + return allign_size_width_large(img_influ(Left), img_influ(Right)); + } + + inline bool allign_size_width_large( const Label Left, const Label Right) + { + short int SizeL = _bboxgp[lines_union[Left]].len(1); + short int SizeR = _bboxgp[lines_union[Right]].len(1); + return SizeR >= (SizeL / 5) && SizeR <= (SizeL * 5); + } + + + + + inline bool allign_size_width_line( const point2d& Left, const point2d& Right) + { + return allign_size_width_line(img_influ(Left), img_influ(Right)); + } + + inline bool allign_size_width_line( const Label Left, const Label Right) + { + short int SizeL = lines_bbox[lines_union[Left]].len(1); + short int SizeR = lines_bbox[lines_union[Right]].len(1); + return SizeR > (SizeL / 4) && SizeR < (SizeL * 4); + } + + inline bool allign_size_width_paragraph( const point2d& Left, const point2d& Right) + { + return allign_size_width_paragraph(img_influ(Left), img_influ(Right)); + } + + inline bool allign_size_width_paragraph( const Label Left, const Label Right) + { + short int SizeL = paragraphs_bbox[paragraphs_union[Left]].len(1); + short int SizeR = paragraphs_bbox[paragraphs_union[Right]].len(1); + return SizeR > ((SizeL * 2) / 3) && SizeR < ((SizeL * 3) / 2); + } + + inline bool allign_proximity_line( const Label Left, const Label Right) { box2d LB = lines_bbox[lines_union[Left]]; @@ -658,6 +867,27 @@ namespace mymln } + inline bool allign_proximity_line_large( const Label Left, const Label Right) + { + box2d LB = lines_bbox[lines_union[Left]]; + box2d RB = lines_bbox[lines_union[Right]]; + + int DisA = LB.pmax()[1] - RB.pmin()[1]; + int DisB = RB.pmax()[1] - LB.pmin()[1]; + if(DisA < 0){DisA = -DisA;} + if(DisB < 0){DisB = -DisB;} + if(DisA > DisB) + { DisA = DisB; } + + unsigned int HA = LB.len(0); + unsigned int HB = RB.len(0); + + if(HA < HB) + { HA = HB; } + return (DisA * 2) < HA; + } + + inline bool allign_proximity_V_line( const point2d& Left, const point2d& Right) {return allign_proximity_V_line(img_influ(Left), img_influ(Right));} @@ -680,21 +910,93 @@ namespace mymln { HA = HB; } return (DisA * 1.5f) < HA; } + + inline bool allign_H_paragraph( const point2d& Left, const point2d& Right) + {return allign_H_paragraph(img_influ(Left), img_influ(Right));} + inline bool allign_H_paragraph( const Label Left, const Label Right ) + { + short int Dis = paragraphs_bbox[paragraphs_union[Left]].pcenter()[1] - paragraphs_bbox[paragraphs_union[Right]].pcenter()[1]; + if(Dis < 0){Dis = -Dis;} + return + Dis * 2 < paragraphs_bbox[paragraphs_union[Right]].len(1) && + Dis * 2 < paragraphs_bbox[paragraphs_union[Left]].len(1); + } + + inline bool allign_top_paragraph( const point2d& Left, const point2d& Right) + {return allign_top_paragraph(img_influ(Left), img_influ(Right));} + inline bool allign_top_paragraph( const Label Left, const Label Right ) + {return paragraphs_bbox[paragraphs_union[Left]].pmin()[0] > paragraphs_bbox[paragraphs_union[Right]].pmax()[0]; } + + inline bool decal_left_paragraph(const point2d& Left, const point2d& Right) + {return decal_left_paragraph(img_influ(Left), img_influ(Right));} + inline bool decal_left_paragraph( const Label Left, const Label Right ) + { + return paragraphs_bbox[paragraphs_union[Left]].pmin()[1] > paragraphs_bbox[paragraphs_union[Right]].pmin()[1] + + (paragraphs_bbox[paragraphs_union[Right]].len(1) / 40) ; + } + + inline bool decal_left_paragraph_strong(const point2d& Left, const point2d& Right) + {return decal_left_paragraph_strong(img_influ(Left), img_influ(Right));} + inline bool decal_left_paragraph_strong( const Label Left, const Label Right ) + { + return paragraphs_bbox[paragraphs_union[Left]].pmin()[1] > paragraphs_bbox[paragraphs_union[Right]].pmin()[1] + + (paragraphs_bbox[paragraphs_union[Right]].len(1) / 20) ; + } + + + inline bool allign_proximity_large_left( const point2d& Left, const point2d& Right) + {return allign_proximity_large_left(img_influ(Left), img_influ(Right));} + inline bool allign_proximity_large_left( const Label Left, const Label Right) + { + box2d LB = _bboxgp[Left]; + box2d RB = _bboxgp[Right]; + + int DisA = LB.pmax()[1] - RB.pmin()[1]; + int DisB = RB.pmax()[1] - LB.pmin()[1]; + if(DisA < 0){DisA = -DisA;} + if(DisB < 0){DisB = -DisB;} + if(DisA > DisB) + { DisA = DisB; } + + unsigned int HA = LB.len(0); + unsigned int HB = RB.len(0); + unsigned int VA = LB.len(1); + unsigned int VB = RB.len(1); + + if(VA > HA) + { HA = VA; } + if(VB > HB) + { HB = VB; } + return (DisA) < HA * 2; + } + + inline bool allign_proximity_large( const point2d& Left, const point2d& Right) {return allign_proximity_large(img_influ(Left), img_influ(Right));} inline bool allign_proximity_large( const Label Left, const Label Right) { - short int SizeL0 = label_size_(0, Left); - short int SizeL1 = label_size_(1, Left); - short int Swap = 0; - if(SizeL0 < SizeL1) - { SizeL0 = SizeL1; } - short int Dis = _bboxgp[Left].pmin()[1] - _bboxgp[Right].pmin()[1]; - if(Dis < 0) - Dis = -Dis; - return Dis < SizeL0 * 3; + box2d LB = _bboxgp[Left]; + box2d RB = _bboxgp[Right]; + + int DisA = LB.pmax()[1] - RB.pmin()[1]; + int DisB = RB.pmax()[1] - LB.pmin()[1]; + if(DisA < 0){DisA = -DisA;} + if(DisB < 0){DisB = -DisB;} + if(DisA > DisB) + { DisA = DisB; } + + unsigned int HA = LB.len(0); + unsigned int HB = RB.len(0); + unsigned int VA = LB.len(1); + unsigned int VB = RB.len(1); + + if(VA > HA) + { HA = VA; } + if(VB > HB) + { HB = VB; } + return (DisA) < HA * 2 && (DisA) < HB * 2; } @@ -705,7 +1007,23 @@ namespace mymln short int SizeR = label_size_(0, Right); return SizeR > (SizeL / 3) && SizeR < (SizeL * 3); } + + inline bool allign_size_strict( const point2d& Left, const point2d& Right) + {return allign_size(img_influ(Left), img_influ(Right));} + inline bool allign_size_strict( const Label Left, const Label Right) + { + short int SizeL0 = label_size_(0, Left); + short int SizeR0 = label_size_(0, Right); + short int SizeL1 = label_size_(1, Left); + short int SizeR1 = label_size_(1, Right); + short int Swap = 0; + if(SizeL0 < SizeL1) + { SizeL0 = SizeL1; } + if(SizeR0 < SizeR1){SizeR0 = SizeR1;} + return SizeR0 > (SizeL0 / 2) && SizeR0 < (SizeL0 * 2); + } + inline bool allign_size( const point2d& Left, const point2d& Right) {return allign_size(img_influ(Left), img_influ(Right));} @@ -754,6 +1072,18 @@ namespace mymln return allignV < lines_bbox[lines_union[Left]].len(0) && allignV < lines_bbox[lines_union[Right]].len(0); } + inline bool allign_V_line_strict( const point2d& Left, const point2d& Right) + {return allign_V_line_strict(img_influ(Left), img_influ(Right));} + + inline bool allign_V_line_strict( Label Left, Label Right) + { + short int allignV = lines_bbox[lines_union[Left]].pcenter()[0] - lines_bbox[lines_union[Right]].pcenter()[0]; + if(allignV<0){allignV = -allignV;} + allignV *= 4; + return allignV < lines_bbox[lines_union[Left]].len(0) && allignV < lines_bbox[lines_union[Right]].len(0); + } + + inline bool allign_center_line( const point2d& Left, const point2d& Right) {return allign_center_line(img_influ(Left), img_influ(Right));} inline bool allign_center_line( Label Left, Label Right) @@ -762,6 +1092,7 @@ namespace mymln if(allignC<0){allignC = -allignC;} return allignC * 5 < lines_bbox[lines_union[Left]].len(0); } + inline bool allign_smaller_line( const point2d& Left, const point2d& Right) {return allign_smaller_line(img_influ(Left), img_influ(Right));} @@ -770,6 +1101,12 @@ namespace mymln return lines_bbox[lines_union[Left]].len(0) > (lines_bbox[lines_union[Right]].len(0) * 2); } + inline bool allign_smaller_line_letter( const point2d& Left, const point2d& Right) + {return allign_smaller_line_letter(img_influ(Left), img_influ(Right));} + inline bool allign_smaller_line_letter( Label Left, Label Right) + { + return lines_bbox[lines_union[Left]].len(0) > (_bboxgp[Right].len(0) * 1.5f); + } inline bool allign_V_large( const point2d& Left, const point2d& Right) {return allign_V_large(img_influ(Left), img_influ(Right));} @@ -801,6 +1138,18 @@ namespace mymln allignV < lines_bbox[lines_union[Left]].len(0) && lines_bbox[lines_union[Left]].pcenter()[0] < lines_bbox[lines_union[Right]].pcenter()[0]; } + inline bool allign_base_line_line_strict(const point2d& Left, const point2d& Right) + {return allign_base_line_line_strict(img_influ(Left), img_influ(Right));} + inline bool allign_base_line_line_strict(const Label Left, const Label Right) + { + short int allignV = lines_bbox[lines_union[Left]].pcenter()[0] - _bboxgp[Right].pcenter()[0]; + if(allignV<0){allignV = -allignV;} + allignV *= 3; + return + allignV < lines_bbox[lines_union[Left]].len(0) && + lines_bbox[lines_union[Left]].pcenter()[0] < lines_bbox[lines_union[Right]].pcenter()[0]; + } + inline bool allign_bottom(const point2d& Left, const point2d& Right) {return allign_bottom(img_influ(Left), img_influ(Right));} inline bool allign_bottom(const Label Left, const Label Right) @@ -832,7 +1181,15 @@ namespace mymln Float AFactor = label_allign_(1, Left, Right); return AFactor < label_size_(1,Left); } - + inline bool paragraph_start_with_tab(const point2d& Point) + { return paragraph_start_with_tab(img_influ(Point));} + inline bool paragraph_start_with_tab(const Label Paragraph) + { + Label FirstLine = paragraphs_first_line[paragraphs_union[Paragraph]]; + return + _bboxgp[FirstLine].pmin()[1] > paragraphs_bbox[paragraphs_union[Paragraph]].pmin()[1] + + (paragraphs_bbox[paragraphs_union[Paragraph]].len(1) / 20); + } void stat() { @@ -844,38 +1201,158 @@ namespace mymln std::cout << " lines(s) : " << CLine << std::endl; } - void debug_save_all(std::string file, image2d<bool> source) + void debug_set_image(image2d<bool>& source) + {debug_source = source;} + inline void debug_create_buffer() + { + mln::initialize(debug_buffer,img_influ); + debug_buffer_enable = true; + } + + inline void debug_save_buffer(std::string file) + { + debug_buffer_enable = false; + io::ppm::save(mln::debug::superpose(debug_buffer, debug_source, literal::white) , file); + } + + inline void debug_draw_line_red_buffer(const point2d& A,const point2d& B ) + { + if(debug_buffer_enable) + draw::line(debug_buffer, A, B, mln::literal::red); + } + inline void debug_draw_line_green_buffer(const point2d& A,const point2d& B ) + { + if(debug_buffer_enable) + draw::line(debug_buffer, A, B, mln::literal::green); + } + + void debug_save_union(std::string file) + {debug_save_union(file, debug_source);} + void debug_save_union(std::string file, image2d<bool> source) { image2d<value::rgb8> ima_color; mln::initialize(ima_color,img_influ); - for(unsigned int N = 0; N < lines_bbox.size(); N++) + for(unsigned int N = 1; N < lines_union.size(); N++) { - if(lines_bbox[N].is_valid()) + if(lines_union[N]) { - draw::box(ima_color, lines_bbox[N], mln::literal::blue); + if(N == lines_union.link(N)) + draw::box(ima_color, _bboxgp[N], mln::literal::blue); + else + draw::line(ima_color, _bboxgp[N].pcenter(), _bboxgp[lines_union.link(N)].pcenter(), mln::literal::blue); } } - for(unsigned int N = 0; N < paragraphs_bbox.size(); N++) + for(unsigned int N = 1; N < paragraphs_union.size(); N++) { - if(paragraphs_bbox[N].is_valid()) + if(paragraphs_union[N]) { - draw::box(ima_color, paragraphs_bbox[N], mln::literal::red); + if(N == paragraphs_union.link(N)) + draw::box(ima_color, _bboxgp[N], mln::literal::red); + else + draw::line(ima_color, _bboxgp[N].pcenter(), _bboxgp[paragraphs_union.link(N)].pcenter(), mln::literal::red); } } - for(unsigned int N = 0; N < lines_first_label.size(); N++) + io::ppm::save(mln::debug::superpose(ima_color, source, literal::white) , file); + } + void debug_save_lines(std::string file) + {debug_save_lines(file, debug_source);} + void debug_save_lines(std::string file, image2d<bool> source) + { + image2d<value::rgb8> ima_color; + mln::initialize(ima_color,img_influ); + + for(unsigned int N = 0; N < lines_bbox.size(); N++) + { + if(lines_bbox[N].is_valid()) + { + draw::box(ima_color, lines_bbox[N], mln::literal::blue); + } + } + for(unsigned int N = 0; N < _bboxgp.size(); N++) + { + if(_bboxgp[N].is_valid() && contain_letter(N)) + { + draw::box(ima_color, _bboxgp[N], mln::literal::cyan); + } + } + io::ppm::save(mln::debug::superpose(ima_color, source, literal::white) , file); + } + + + + + + void debug_save_all(std::string file) + {debug_save_all(file, debug_source);} + void debug_save_all(std::string file, image2d<bool> source) + { + image2d<value::rgb8> ima_color; + mln::initialize(ima_color,img_influ); + + for(unsigned int N = 0; N < _bboxgp.size(); N++) + { + if(_bboxgp[N].is_valid() && contain_letter(N)) + { + draw::box(ima_color, _bboxgp[N], mln::literal::cyan); + } + } + + for(unsigned int N = 0; N < lines_first_label.size(); N++) { if(_bboxgp[lines_first_label[N]].is_valid()) { draw::box(ima_color, _bboxgp[lines_first_label[N]], mln::literal::yellow); } } + + for(unsigned int N = 0; N < lines_last_label.size(); N++) + { + if(_bboxgp[lines_first_label[N]].is_valid()) + { + draw::box(ima_color, _bboxgp[lines_last_label[N]], mln::literal::orange); + } + } + + + for(unsigned int N = 0; N < lines_bbox.size(); N++) + { + if(lines_bbox[N].is_valid()) + { + draw::box(ima_color, lines_bbox[N], mln::literal::blue); + } + } + for(unsigned int N = 0; N < lines_influ_bbox.size(); N++) + { + if(lines_influ_bbox[N].is_valid()) + { + //draw::box(ima_color, lines_influ_bbox[N], mln::literal::cyan); + } + } + for(unsigned int N = 0; N < paragraphs_bbox.size(); N++) + { + if(paragraphs_bbox[N].is_valid()) + { + draw::box(ima_color, paragraphs_bbox[N], mln::literal::red); + if(paragraphs_bbox_influ[N].is_valid()) + { + draw::box(ima_color, paragraphs_bbox_influ[N], mln::literal::orange); + } + } + + } + for(unsigned int N = 0; N < _bboxgp.size(); N++) + { + if(_bboxgp[N].is_valid() && (implicit_separators_left_mask(N) || implicit_separators_right_mask(N))) + { + // draw::box(ima_color, _bboxgp[N], mln::literal::yellow); + } + } + io::ppm::save(mln::debug::superpose(ima_color, source, literal::white) , file); } void debug_save_paragraphs(std::string file) { mymln::debug::save_label_image(img, paragraphs_union , file);} - void debug_save_lines(std::string file) - { mymln::debug::save_label_image(img, lines_union , file);} void debug_save_separators(std::string file) { mymln::debug::save_label_image(img, implicit_separators_union , file);} vertex_image<point2d,bool> fun_mask_separators() @@ -944,10 +1421,29 @@ namespace mymln mln::util::array<box2d> bbox_enlarge_mask_noise(short int x, short int y) { return bbox_mask_enlarge_(noise_mask, x, y); } - Label get_label(point2d point) + Label get_label(const point2d& point) { return img_influ(point); } + + inline box2d get_paragraph_bbox(const point2d& point) + { return get_paragraph_bbox(img_influ(point)); } + + inline box2d get_paragraph_bbox(Label L) + { return paragraphs_bbox[paragraphs_union[L]]; } + + + inline box2d get_line_bbox(const point2d& point) + { return get_line_bbox(img_influ(point)); } + + inline box2d get_line_bbox(Label L) + { return lines_bbox[lines_union[L]]; } + + inline unsigned int get_paragraph_length(const point2d& point) + { return get_paragraph_length(img_influ(point)); } - inline unsigned int get_line_length(point2d point) + inline unsigned int get_paragraph_length(Label L) + { return paragraphs_len[paragraphs_union[L]]; } + + inline unsigned int get_line_length(const point2d& point) { return get_line_length(img_influ(point)); } inline unsigned int get_line_length(Label L) @@ -958,6 +1454,102 @@ namespace mymln inline unsigned int get_line_width(Label L) { return lines_bbox[lines_union[L]].len(1); } + + + + inline Float letter_ratio_XY(const point2d& point) + {return letter_ratio_XY(img_influ(point));} + inline Float letter_ratio_XY(Label Letter) + { + return (Float)_bboxgp[Letter].len(1) / (Float)_bboxgp[Letter].len(0); + } + inline bool line_median(const point2d& point) + { return line_median(img_influ(point)); } + + inline bool line_median(Label Letter) + { + short int D = _bboxgp[Letter].pcenter()[0] - get_line_bbox(Letter).pcenter()[0]; + if(D<0)D=-D; + return D * 3 < get_line_bbox(Letter).len(0); + } + + inline bool line_size_small(const point2d& point) + { return line_size_small(img_influ(point)); } + inline bool line_size_small(Label Letter) + { + return _bboxgp[Letter].len(0) * 3 < get_line_bbox(Letter).len(0); + } + + inline bool line_base(const point2d& point) + { return line_base(img_influ(point)); } + inline bool line_base(Label Letter) + { + short int D = _bboxgp[Letter].pcenter()[0] - get_line_bbox(Letter).pcenter()[0]; + if(D<0)D=-D; + return + D * 2 < get_line_bbox(Letter).len(0) && + get_line_bbox(Letter).pcenter()[0] + (get_line_bbox(Letter).len(0) / 5) < _bboxgp[Letter].pcenter()[0]; + } + + inline bool letter_included(point2d Par1, point2d Par2) + { return letter_included(img_influ(Par1), img_influ(Par2)); } + inline bool letter_included(Label Par1, Label Par2) + { + return + _bboxgp[Par1].has(_bboxgp[Par2].pmin()) && + _bboxgp[Par1].has(_bboxgp[Par2].pmax()) ; + } + + inline bool paragraph_included_influence(point2d Par1, point2d Par2) + { return paragraph_included_influence(img_influ(Par1), img_influ(Par2)); } + inline bool paragraph_included_influence(Label Par1, Label Par2) + { + return + paragraphs_bbox_influ[paragraphs_union[Par1]].has(paragraphs_bbox[paragraphs_union[Par2]].pmin()) && + paragraphs_bbox_influ[paragraphs_union[Par1]].has(paragraphs_bbox[paragraphs_union[Par2]].pmax()) ; + } + + inline bool paragraph_included(point2d Par1, point2d Par2) + { return paragraph_included(img_influ(Par1), img_influ(Par2)); } + inline bool paragraph_included(Label Par1, Label Par2) + { + return + paragraphs_bbox[paragraphs_union[Par1]].has(paragraphs_bbox[paragraphs_union[Par2]].pmin()) && + paragraphs_bbox[paragraphs_union[Par1]].has(paragraphs_bbox[paragraphs_union[Par2]].pmax()) ; + } + + inline bool line_influence_reciprocal(const point2d& L1, const point2d& L2) + {return line_influence_reciprocal(img_influ(L1), img_influ(L2));} + + inline bool line_influence_reciprocal(Label L1, Label L2) + { + return + lines_influ_bbox[lines_union[L1]].has(lines_influ_bbox[lines_union[L2]].pmin()) || + lines_influ_bbox[lines_union[L1]].has(lines_influ_bbox[lines_union[L2]].pmax()) || + lines_influ_bbox[lines_union[L2]].has(lines_influ_bbox[lines_union[L1]].pmin()) || + lines_influ_bbox[lines_union[L2]].has(lines_influ_bbox[lines_union[L1]].pmax()) ; + } + + inline bool allign_size_large_inside( const point2d& Left, const point2d& Right) + {return allign_size_large_inside(img_influ(Left), img_influ(Right));} + + inline bool allign_size_large_inside( const Label Left, const Label Right) + { + short int SizeL0 = label_size_(0, Left); + short int SizeR0 = label_size_(0, Right); + short int SizeL1 = label_size_(1, Left); + short int SizeR1 = label_size_(1, Right); + short int Swap = 0; + if(SizeL0 < SizeL1) + { SizeL0 = SizeL1; } + if(SizeR0 < SizeR1){SizeR0 = SizeR1;} + return SizeR0 > (SizeL0 / 5) && SizeR0 < (SizeL0); + } + inline bool paragraph_has(point2d Par, point2d Point) + { return paragraph_has(img_influ(Par), Point); } + + inline bool paragraph_has(Label Par, point2d Point) + { return paragraph_has[paragraphs_union[Par]].has(Point); } inline bool line_has(point2d Line, point2d Point) { return line_has(img_influ(Line), Point); } @@ -965,6 +1557,13 @@ namespace mymln inline bool line_has(Label Line, point2d Point) { return lines_bbox[lines_union[Line]].has(Point); } + inline bool line_influence_has(point2d Line, point2d Point) + { return line_influence_has(img_influ(Line), Point); } + + inline bool line_influence_has(Label Line, point2d Point) + { return lines_influ_bbox[lines_union[Line]].has(Point); } + + inline unsigned int get_beginning_of_line(point2d point) { return get_beginning_of_line(img_influ(point)); } @@ -991,6 +1590,7 @@ namespace mymln lines_first_label.fill(0); lines_last_label.fill(0); lines_len.fill(0); + start_lines_mask(0) = false; end_lines_mask(0) = false; @@ -1007,6 +1607,7 @@ namespace mymln end_lines_mask = fun::i2v::array<bool>(Areas_Number_); start_end_lines_mask = fun::i2v::array<bool>(Areas_Number_); lines_bbox = mln::util::array<box2d>(NLine + 1); + lines_influ_bbox = mln::util::array<box2d>(NLine + 1); lines_len.fill(0); start_lines_mask(0) = false; end_lines_mask(0) = false; @@ -1076,7 +1677,68 @@ namespace mymln inline bool contain_implicit_separator(const Label lbl) {return implicit_separators_union[lbl] != 0; } + inline void merge(const point2d& A, const point2d& B) + { + merge(img_influ(A), img_influ(B)); + } + inline void merge(const Label A, const Label B) + { + if( A && B && !kill_mask(A) && !kill_mask(B) && A != B) + { + img_influ(_bboxgp[B].pcenter()) = A; + _bboxgp[A].merge(_bboxgp[B]); + _bboxgp[B] = box2d(); + kill_mask(B) = true; + if(letters_mask(A) && letters_mask(B)) + { + if(lines_union.is_self_link(B)) + { + lines_union.add_self_link(A); + lines_union.add_link(A, B); + } + } + else if(alone_letters_mask(A) && letters_mask(B)) + { + alone_letters_mask(A) = false; + letters_mask(A) = true; + all_letters_mask(A) = true; + if(lines_union.is_self_link(B)) + { + lines_union.add_self_link(A); + lines_union.add_link(A, B); + } + } + else if(letters_mask(B)) + { + add_letter_coerce(A); + lines_union.add_link(B, A); + } + + + + implicit_separators_left_mask(B) = false; + implicit_separators_right_mask(B) = false; + noise_mask(B) = false; + alone_letters_mask(B) = false; + all_letters_mask(B) = false; + letters_mask(B) = false; + separators_mask(B) = false; + containers_mask(B) = false; + start_end_lines_mask(B) = false; + Hseparator_mask(B) = false; + Vseparator_mask(B) = false; + if(letters_mask(A) && start_lines_mask(B)){start_lines_mask(A) = true;} + if(letters_mask(A) && end_lines_mask(B)){end_lines_mask(A) = true;} + if(letters_mask(A) && start_end_lines_mask(B)){start_end_lines_mask(A) = true;} + + + start_lines_mask(B) = false; + end_lines_mask(B) = false; + start_end_lines_mask(B) = false; + + } + } inline void add_to_separator_left(const point2d& point) @@ -1106,6 +1768,23 @@ namespace mymln point2d p = _bboxgp[i].pcenter(); return p; } + inline void reset_tag_bool() + {Btag_lbl.fill(false);} + inline void tag_label_bool(const point2d& point, bool tag) + { tag_label_bool(img_influ(point), tag);} + inline void tag_label_bool(Label lbl, bool tag) + {Btag_lbl[lbl] = tag;} + + inline bool get_tag_bool(const point2d& point) + { return get_tag_bool(img_influ(point));} + inline bool get_tag_bool(Label lbl) + {return Btag_lbl[lbl];} + + inline std::string get_tag(const point2d& point) + { return get_tag(img_influ(point));} + inline std::string get_tag(Label lbl) + {return tag_lbl[lbl];} + inline void tag_label(const point2d& point, std::string tag) { tag_label(img_influ(point), tag);} inline void tag_label(Label lbl, std::string tag) @@ -1125,26 +1804,140 @@ namespace mymln inline void lines_iter_valid() { return SeqP < Areas_Number_; } - + inline void recook_paragraphs() + { + paragraphs_len.fill(0); + cook_paragraphs_(); + } inline void cook_paragraphs() { paragraphs_bbox = mln::util::array<box2d>(NPar + 1); + paragraphs_len = mln::util::array<unsigned int>(NPar + 1); + paragraphs_first_line = mln::util::array<unsigned int>(NPar + 1); + paragraphs_bbox_influ = mln::util::array<box2d>(NPar + 1); cook_paragraphs_(); } + inline void compute_letter_middle_space() + { + lines_space = mln::util::array<unsigned int>(NLine + 1); + lines_space.fill(0); + compute_letter_middle_space_(); + } + inline void compute_letter_middle_height() + { + lines_height = mln::util::array<unsigned int>(NLine + 1); + lines_height.fill(0); + compute_letter_middle_height_(); + } + inline void compute_letter_middle_width() + { + lines_width = mln::util::array<unsigned int>(NLine + 1); + lines_width.fill(0); + compute_letter_middle_width_(); + } + inline void recompute_letter_middle_space() + { + lines_space.fill(0); + compute_letter_middle_height_(); + } + inline void recompute_letter_middle_height() + { + lines_height.fill(0); + compute_letter_middle_height_(); + } + inline void recompute_letter_middle_width() + { + lines_width.fill(0); + compute_letter_middle_height_(); + } + + inline unsigned int get_letter_middle_space(const point2d& point) + {return get_letter_middle_space(img_influ(point));} + inline unsigned int get_letter_middle_space(const Label lbl) + {return lines_space[lines_union[lbl]];} + + + inline unsigned int get_letter_middle_height(const point2d& point) + {return get_letter_middle_height(img_influ(point));} + inline unsigned int get_letter_middle_height(const Label lbl) + {return lines_height[lines_union[lbl]];} + + inline unsigned int get_letter_middle_width(const point2d& point) + {return get_letter_middle_width(img_influ(point));} + inline unsigned int get_letter_middle_width(const Label lbl) + {return lines_width[lines_union[lbl]];} + + inline unsigned int get_line_ID(const Label lbl) + { + return lines_union[lbl]; + } + inline unsigned int get_first_line_ID(const Label lbl) + { + return paragraphs_first_line[paragraphs_union[lbl]]; + } + inline unsigned int get_first_line() + { + return first_line; + } + inline unsigned int get_first_letter(const unsigned int line_ID) + { + return lines_first_label[line_ID]; + } + inline void get_next_line(int& line_ID) + { + if(lines_seq_pos[line_ID] == line_ID){ line_ID = 0; } + line_ID = lines_seq_pos[line_ID]; + } + inline void get_next_line(unsigned int& line_ID) + { + if(lines_seq_pos[line_ID] == line_ID){ line_ID = 0; } + line_ID = lines_seq_pos[line_ID]; + } + inline void get_next_letter(Label& lbl) + { + if(lines_seq[lbl] == lbl){ lbl = 0; } + lbl = lines_seq[lbl]; + } + inline void get_next_letter(int& lbl) + { + if(lines_seq[lbl] == lbl){ lbl = 0; } + lbl = lines_seq[lbl]; + } + inline void get_next_letter(unsigned int& lbl) + { + if(lines_seq[lbl] == lbl){ lbl = 0; } + lbl = lines_seq[lbl]; + } + inline std::string get_line_string(const unsigned int ID) + { + std::string line = ""; + unsigned int Last = 0; + for(int N = get_first_letter(ID); N != 0; get_next_letter(N)) + { + if(Last) + if(space(Last,N) > get_letter_middle_space(N) * 2) + line += " "; + + if(!get_tag(N).compare("")){line += "?";} + else{line += get_tag(N);} + Last = N; + } + return line; + } private: fun::i2v::array<bool> implicit_separators_left_mask; fun::i2v::array<bool> implicit_separators_right_mask; mln::util::array<unsigned int> separators_len_right; mln::util::array<unsigned int> separators_len_left; mln::util::array<unsigned int> separators_middle; - + mln::util::array<unsigned int> separators_marging; inline void cook_separators_() { implicit_separators_left_mask(0) = false; - for(unsigned int N = 1; N < implicit_separators_union.size(); N++) + for(int N = 1; N < implicit_separators_union.size(); N++) { if(implicit_separators_union[N] != 0) { @@ -1160,11 +1953,8 @@ namespace mymln /* processor */ for(unsigned int N = 1; N < NImpSep + 1; N++) { - if(separators_len_left[N] != 0) - { if(separators_len_left[N] != 0) separators_middle[N] /= separators_len_left[N]; - } } @@ -1177,27 +1967,27 @@ namespace mymln implicit_separators_left_mask(N) = false; } else if ( - _bboxgp[N].pmin()[1] < separators_middle[implicit_separators_union[N]] - 10 || - _bboxgp[N].pmin()[1] > separators_middle[implicit_separators_union[N]] + 10 + _bboxgp[N].pmin()[1] < separators_middle[implicit_separators_union[N]] - _bboxgp[N].len(1) * 2 || + _bboxgp[N].pmin()[1] > separators_middle[implicit_separators_union[N]] + _bboxgp[N].len(1) * 2 ) { - + /* separators_len_left[implicit_separators_union[N]]--; implicit_separators_union[N] = 0; - implicit_separators_left_mask(N) = false; + implicit_separators_left_mask(N) = false;*/ } } for(unsigned int N = 1; N < Areas_Number_; N++) { if(!start_lines_mask(N) || implicit_separators_union[N] == 0) { - if( separators_len_left[implicit_separators_union[N]] > 0) + if( separators_len_left[implicit_separators_union[N]] > 0) separators_len_left[implicit_separators_union[N]]--; } } for(unsigned int N = 1; N < Areas_Number_; N++) { - if(separators_len_left[implicit_separators_union[N]] < 2) + if(separators_len_left[implicit_separators_union[N]] < 1) { separators_len_left[implicit_separators_union[N]] = 0; implicit_separators_union[N] = 0; @@ -1225,11 +2015,8 @@ namespace mymln /* processor */ for(unsigned int N = 1; N < NImpSep + 1; N++) { - if(separators_len_right[N] != 0) - { if(separators_len_right[N] != 0) separators_middle[N] /= separators_len_right[N]; - } } @@ -1247,9 +2034,9 @@ namespace mymln ) { - separators_len_right[implicit_separators_union[N]]--; + /*separators_len_right[implicit_separators_union[N]]--; implicit_separators_union[N] = 0; - implicit_separators_right_mask(N) = false; + implicit_separators_right_mask(N) = false;*/ } } for(unsigned int N = 1; N < Areas_Number_; N++) @@ -1262,7 +2049,7 @@ namespace mymln } for(unsigned int N = 1; N < Areas_Number_; N++) { - if(separators_len_right[implicit_separators_union[N]] < 2) + if(separators_len_right[implicit_separators_union[N]] < 1) { separators_len_right[implicit_separators_union[N]] = 0; implicit_separators_union[N] = 0; @@ -1274,57 +2061,150 @@ namespace mymln // PRIVATE DATA ON LINES mln::util::array<unsigned int> lines_len; + mln::util::array<unsigned int> lines_height; + mln::util::array<unsigned int> lines_width; + mln::util::array<unsigned int> lines_space; mln::util::array<unsigned int> lines_first_label; mln::util::array<unsigned int> lines_last_label; mln::util::array<unsigned int> lines_seq; mln::util::array<unsigned int> lines_seq_pos; mln::util::array<box2d> lines_bbox; + mln::util::array<box2d> lines_influ_bbox; mln::util::array<Label> lines_split; fun::i2v::array<bool> start_lines_mask; fun::i2v::array<bool> end_lines_mask; fun::i2v::array<bool> start_end_lines_mask; + unsigned int first_line; unsigned int SeqP; + inline void compute_letter_middle_width_() + { + for(unsigned int N = 1; N < Areas_Number_; N++) + { + if(lines_union[N]) + { + lines_width[lines_union[N]] += _bboxgp[N].len(1); + } + } + for(unsigned int N = 1; N < lines_height.size(); N++) + { + if(lines_len[N]) + lines_width[N] /= lines_len[N]; + } + } + + inline void compute_letter_middle_height_() + { + + for(unsigned int N = 1; N < Areas_Number_; N++) + { + if(lines_union[N]) + { + lines_height[lines_union[N]] += _bboxgp[N].len(0); + } + } + for(unsigned int N = 1; N < lines_height.size(); N++) + { + if(lines_len[N]) + lines_height[N] /= lines_len[N]; + } + } + + inline void compute_letter_middle_space_() + { + + for(unsigned int N = 1; N < Areas_Number_; N++) + { + if(lines_union[N]) + { + lines_space[lines_union[N]] += _bboxgp[N].len(1); + } + } + for(unsigned int N = 1; N < lines_space.size(); N++) + { + if(lines_len[N] - 1 > 0) + { + + if(lines_space[N] > lines_bbox[N].len(1)) + lines_space[N] = 0; + else + { + lines_space[N] = (lines_bbox[N].len(1) - lines_space[N]) / (lines_len[N] - 1); + } + } + else + { + lines_space[N] = 0; + } + - + } + } inline void cook_lines_iter_() { + first_line = 0; lines_seq = mln::util::array<unsigned int>(Areas_Number_); lines_seq_pos = mln::util::array<unsigned int>(NLine + 1); lines_seq.fill(0); lines_seq_pos.fill(0); - for(unsigned int N = 0; N < NLine + 1; N++) - { - lines_seq[SeqP] = lines_first_label[N]; - lines_seq_pos[N] = SeqP; - SeqP += lines_len[N]; - } - for(unsigned int N = 1; N < Areas_Number_; N++) + + typedef vertex_image<point2d,bool> v_ima_g; + typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p; + v_ima_g mask = fun_mask_letters(); + mln_piter_(v_ima_g) v(mask.domain()); + typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t; + nbh_t nbh(mask); + mln_niter_(nbh_t) q(nbh, v); + for_all(v) { - if(contain_line(N) && !start_lines_mask(N)) + if(contain_letter(v)) + for_all(q) { - SeqP = lines_seq_pos[get_beginning_of_line(N)]; - SeqP++; - - while( lines_seq[SeqP] && _bboxgp[lines_seq[SeqP]].pmin()[1] < _bboxgp[N].pmin()[1] ) - SeqP++; - if(!lines_seq[SeqP]) - {lines_seq[SeqP] = N;} - else - { - unsigned int Swap1, Swap2; - Swap1 = lines_seq[SeqP]; - lines_seq[SeqP] = N; - while(lines_seq[SeqP]) + if(contain_letter(q)) + if(same_line(q, v)) { - Swap2 = lines_seq[SeqP]; - lines_seq[SeqP] = Swap1; - Swap1 = Swap2; + if(_bboxgp[img_influ(q)].pmax()[1] < _bboxgp[img_influ(v)].pmin()[1]) + { + if(lines_seq[img_influ(q)]) + { + if(_bboxgp[lines_seq[img_influ(q)]].pmin()[1] > _bboxgp[img_influ(v)].pmin()[1]) + lines_seq[img_influ(q)] = img_influ(v); + } + else + lines_seq[img_influ(q)] = img_influ(v); + } + } + else + { + if(get_line_bbox(q).pmax()[0] < get_line_bbox(v).pmin()[0]) + { + if(lines_seq_pos[lines_union[img_influ(q)]]) + { + if(lines_bbox[lines_seq_pos[lines_union[img_influ(q)]]].pmin()[0] > get_line_bbox(v).pmin()[0]) + lines_seq_pos[lines_union[img_influ(q)]] =lines_union[img_influ(v)]; + } + else + lines_seq_pos[lines_union[img_influ(q)]] = lines_union[img_influ(v)]; + + } } - lines_seq[SeqP] = Swap1; - } } } + std::cout << "end graph cooking"; + unsigned int Last = 0; + int count = 0; + for(unsigned int N = 1; N < lines_seq_pos.size() && N < lines_len.size(); N++) + { + if(lines_len[N] && !first_line) + {first_line = N;} + if(lines_len[N] && Last) + {lines_seq_pos[Last] = N;} + if(lines_len[N]) + {Last = N; std::cout << lines_len[N] << endl; count++;} + + } + std::cout << count << endl; + std::cout << "linear"; } inline void cook_lines_() @@ -1332,7 +2212,7 @@ namespace mymln Cooked_CLine = CLine; for(unsigned int N = 1; N < lines_union.size(); N++) { - if(lines_union[N] != 0) + if(lines_union[N] != 0 && !kill_mask(N)) { /* APPROXIMATE THE NUMBER OF CHAR IN THE LINE */ lines_len[lines_union[N]]++; @@ -1360,6 +2240,7 @@ namespace mymln if( lines_first_label[N] != 0) { lines_bbox[N] = box2d(); + start_lines_mask(lines_first_label[N]) = true; end_lines_mask(lines_last_label[N]) = true; start_end_lines_mask(lines_first_label[N]) = true; @@ -1381,11 +2262,25 @@ namespace mymln lines_bbox[lines_union[N]].merge(_bboxgp[N]); } if(lines_len[lines_union[N]] == 1) - { letters_mask(N) = false; alone_letters_mask(N) = true; } + { + letters_mask(N) = false; + alone_letters_mask(N) = true; + all_letters_mask(N) = true; + end_lines_mask(N) = true; + start_lines_mask(N)= true; + start_end_lines_mask(N) = true; + } else if(lines_union[N]) - { letters_mask(N) = true; alone_letters_mask(N) = false; } + { + letters_mask(N) = true; + alone_letters_mask(N) = false; + all_letters_mask(N) = true; + } + } + for(unsigned int N = 1; N < lines_bbox.size(); N++) + { + lines_influ_bbox[N] = lines_bbox[N].to_larger(lines_bbox[N].len(0) / 3); } - } @@ -1430,7 +2325,13 @@ namespace mymln { Data SX = label_size_(0, label); Data SY = label_size_(1, label); - return SX >= Min && SY >= Min; + return SX >= Min && SY >= Min ; + } + inline bool label_valid_size_Min_Large_(Label label, Data Min) + { + Data SX = label_size_(0, label); + Data SY = label_size_(1, label); + return SX >= Min && SY >= Min || SX >= Min * 2 || SY >= Min * 2; } inline bool label_valid_ratio_(Label label, Float Min, Float Max) { @@ -1500,8 +2401,10 @@ namespace mymln fun::i2v::array<bool> all_letters_mask; fun::i2v::array<bool> containers_mask; fun::i2v::array<bool> noise_mask; + fun::i2v::array<bool> kill_mask; mln::util::array<std::string> tag_lbl; + mln::util::array<bool> Btag_lbl; unsigned int Cooked_CLine; unsigned int CLine; @@ -1521,36 +2424,90 @@ namespace mymln mln::util::array<unsigned int> paragraphs_first_label; mln::util::array<unsigned int> paragraphs_last_label; mln::util::array<unsigned int> paragraphs_assoc; + mln::util::array<unsigned int> paragraphs_len; + /* NOTE THESE ARRAYS MUST BE INITIALIZEDD WITH THE NUMBER OF PARAGRAPH */ + + mln::util::array<box2d> paragraphs_bbox; - + mln::util::array<box2d> paragraphs_bbox_influ; + mln::util::array<unsigned int> paragraphs_first_line; + inline void first_recognition() + { + + } + + inline void cook_paragraphs_() { - mln::util::array<unsigned int> paragraphs_assoc(lines_union.size()); - for(int N = 0; N < paragraphs_union.size(); N++) + /* mln::util::array<unsigned int> paragraphs_assoc(lines_union.size()); + + for(int N = 1; N < paragraphs_union.size(); N++) { - if(paragraphs_union[N]) + if(paragraphs_union[N] && lines_union[N] && !start_lines_mask) { if(paragraphs_assoc[lines_union[N]]) - { paragraphs_union.add_link(N, paragraphs_assoc[lines_union[N]]); } + { paragraphs_union.add_link(paragraphs_assoc[lines_union[N]], N); } else - {paragraphs_assoc[lines_union[N]] = N;} + {paragraphs_assoc[lines_union[N]] = get_beginning_of_line(N);} } } - paragraphs_union.propage_links(); + paragraphs_union.propage_links();*/ for(int N = 0; N < paragraphs_bbox.size(); N++) { paragraphs_bbox[N] = box2d(); } + + for(int N = 0; N < lines_len.size(); N++) + { + if(lines_len[N] && paragraphs_union[lines_first_label[N]]) + { + paragraphs_len[paragraphs_union[lines_first_label[N]]]++; + if(paragraphs_first_line[paragraphs_union[lines_first_label[N]]]) + { + if( + lines_bbox[paragraphs_first_line[paragraphs_union[lines_first_label[N]]]].pmin()[0] > + lines_bbox[N].pmin()[0] + ) + { + paragraphs_first_line[paragraphs_union[lines_first_label[N]]] = N; + } + } + else + paragraphs_first_line[paragraphs_union[lines_first_label[N]]] = N; + } + } + for(int N = 0; N < paragraphs_union.size(); N++) { - if(paragraphs_union[N]) + if(paragraphs_union[N] && paragraphs_len[paragraphs_union[N]]) + { paragraphs_bbox[paragraphs_union[N]].merge(lines_bbox[lines_union[N]]); + } + else + { + paragraphs_union[N] = 0; + } } + + + for(int N = 0; N < paragraphs_len.size(); N++) + { + if(paragraphs_len[N]) + { + paragraphs_bbox_influ[N] = paragraphs_bbox[N].to_larger(lines_bbox[paragraphs_first_line[N]].len(0) / 10); + } + } } - + template<typename T> void debug_assert_array_(mln::util::array<T>& array, int N, const std::string& name) + { + if(N >= array.size()) + { + std::cout << "WARNING : " << name << " " << N << " " << "is invalid" << endl; + } + } @@ -1574,6 +2531,9 @@ namespace mymln g_vertices_p _area_graph; mln::image2d<Label> img; mln::image2d<Label> img_influ; + mln::image2d<bool> debug_source; + mln::image2d<value::rgb8> debug_buffer; + bool debug_buffer_enable; Label Areas_Number_; /* IMPLICIT SEPARATOR DETECTION */ diff --git a/scribo/sandbox/raphael/code/my/document/letters.hh b/scribo/sandbox/raphael/code/my/document/letters.hh index 6701943..f0249ae 100644 --- a/scribo/sandbox/raphael/code/my/document/letters.hh +++ b/scribo/sandbox/raphael/code/my/document/letters.hh @@ -9,9 +9,17 @@ namespace mymln { namespace document { - void clean_letter_aberation() + template<typename L, typename F, typename D> + void clean_letters_aberations_big(document<L,F,D> doc, mln::util::array<box2> bouningbox_letters, mln::util::array<box2> middle_box_lines, mymln::util::union_find<L> letters_union, ) { - + for(int N = 0; N < letters_union.size(); N++) + { + if(letters_union[N]) + { + doc.get_letter_middle_height(N) * 3 < doc.get_bbox(N).len(0); + doc.add_noise(N); + } + } } } } \ No newline at end of file diff --git a/scribo/sandbox/raphael/code/my/document/recognition.hh b/scribo/sandbox/raphael/code/my/document/recognition.hh new file mode 100644 index 0000000..9c10cd9 --- /dev/null +++ b/scribo/sandbox/raphael/code/my/document/recognition.hh @@ -0,0 +1,74 @@ +#ifndef INC_RECOGNITION_DOC +#define INC_RECOGNITION_DOC +#include<my/document/document.hh> +#include <mln/core/image/graph_elt_neighborhood.hh> +#include <mln/core/image/vertex_image.hh> +using namespace mln; + +namespace mymln +{ + namespace document + { + template<typename L, typename F, typename D> + void recognize_minus(mymln::document::document<L,F,D>& doc) + { + typedef vertex_image<point2d,bool> v_ima_g; + typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p; + v_ima_g mask = doc.fun_mask_letters(); + mln_piter_(v_ima_g) v(mask.domain()); + typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t; + nbh_t nbh(mask); + mln_niter_(nbh_t) q(nbh, v); + for_all(v) + { + if(doc.contain_line(v)) + { + for_all(q) + { + if(doc.contain_line(q) && doc.line_median(q) && doc.letter_ratio_XY(q) >= 3.0f) + { + doc.tag_label(q, "-"); + } + } + } + } + doc.propage_paragraph_link(); + + } + template<typename L, typename F, typename D> + void recognize_dot(mymln::document::document<L,F,D>& doc) + { + typedef vertex_image<point2d,bool> v_ima_g; + typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p; + v_ima_g mask = doc.fun_mask_letters(); + mln_piter_(v_ima_g) v(mask.domain()); + typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t; + nbh_t nbh(mask); + mln_niter_(nbh_t) q(nbh, v); + for_all(v) + { + if(doc.contain_line(v)) + { + for_all(q) + { + if( doc.contain_line(q) && doc.same_line(q,v) && doc.line_base(q) && doc.line_size_small(q)) + { + if(doc.letter_ratio_XY(q) > 0.7f && doc.letter_ratio_XY(q) < 1.3f) + doc.tag_label(q, "."); + else if(doc.letter_ratio_XY(q) <= 0.7f) + doc.tag_label(q, ","); + else + doc.tag_label(q, "_"); + + } + } + } + } + doc.propage_paragraph_link(); + + } + } +} + + +#endif \ No newline at end of file diff --git a/scribo/sandbox/raphael/code/my/document/separator.hh b/scribo/sandbox/raphael/code/my/document/separator.hh index 660bbed..2cd98ea 100644 --- a/scribo/sandbox/raphael/code/my/document/separator.hh +++ b/scribo/sandbox/raphael/code/my/document/separator.hh @@ -133,20 +133,72 @@ namespace mymln if(doc.contain_implicit_separator(v)) { bool All_Alone = true; - doc.jump_to_line(v); - if((!doc.contain_line(v))) + + + for_all(q) { - doc.add_to_line(v); - doc.add_to_line_self_link(v); + + if(doc.contain_implicit_separator(q) && doc.same_implicit_separator(q,v) ) + { + // draw::line(out, q,v, mln::literal::blue); + if(doc.allign_V(q,v) && doc.allign_size(q, v) && doc.allign_right(v,q)) + { + count[doc[q]]++; + } + + } + else if (doc.contain_implicit_separator(q)) + { + if(doc.allign_V(q,v) && doc.allign_size(q, v) && doc.allign_right(v,q) && doc.allign_proximity_strict(v, q)) + { + count[doc[q]]++; + } + } + } + } + } + for(unsigned int N = 0; N < doc.size();N++) + { + if(count[N] > 0) + doc.invalidate_implicit_separator(N); + } + } + + template<typename L, typename F, typename D> + void separators_make_clean_right(mymln::document::document<L,F,D>& doc) + { + + typedef vertex_image<point2d,bool> v_ima_g; + typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p; + v_ima_g mask = doc.fun_mask_letters(); + mln_piter_(v_ima_g) v(mask.domain()); + typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t; + nbh_t nbh(mask); + mln_niter_(nbh_t) q(nbh, v); + mln::util::array<unsigned> count = mln::util::array<unsigned>(doc.size()); + count.fill(0); + for_all(v) + { + if(doc.contain_implicit_separator(v)) + { + bool All_Alone = true; for_all(q) { - if(doc.contain_implicit_separator(q) && doc.same_implicit_separator(q,v)) + if(doc.contain_implicit_separator(q) && doc.same_implicit_separator(q,v) ) { // draw::line(out, q,v, mln::literal::blue); - if(doc.allign_V(q,v) && doc.allign_size(q, v)) + if(doc.allign_V(q,v) && doc.allign_size(q, v) && doc.allign_right(q,v)) + { + count[doc[q]]++; + } + + } + else if (doc.contain_implicit_separator(q)) + { + if(doc.allign_V(q,v) && doc.allign_size(q, v) && doc.allign_right(q,v) && doc.allign_proximity_strict(v, q)) { count[doc[q]]++; } diff --git a/scribo/sandbox/raphael/code/my/runtime/lib.hh b/scribo/sandbox/raphael/code/my/runtime/lib.hh new file mode 100644 index 0000000..2a36ef3 --- /dev/null +++ b/scribo/sandbox/raphael/code/my/runtime/lib.hh @@ -0,0 +1,180 @@ +#ifndef INC_RUNTIME_LIB +#define INC_RUNTIME_LIB + +using namespace mln; +using namespace std; +namespace mymln +{ + namespace runtime + { + template<typename L, typename F, typename D> + void load_separators(runtime<L,F,D>& run) + { + + run.add_function("separators.find_allign_right", &(mymln::document::separators::separators_find_allign_right)); + run.add_function("separators.make_clean_right", &(mymln::document::separators::separators_make_clean_right)); + run.add_function("separators.find_allign_left", &(mymln::document::separators::separators_find_allign)); + run.add_function("separators.make_clean_left", &(mymln::document::separators::separators_find_allign)); + } + + template<typename L, typename F, typename D> + void load_clean(runtime<L,F,D>& run) + { + run.add_function("clean.containers_items", &(mymln::document::clean_containers_items)); + run.add_function("clean.letters_items", &(mymln::document::clean_letters_items)); + run.add_function("clean.get_lines", &(mymln::document::clean_get_lines)); + + run.add_function("clean.letters_alone", &(mymln::document::clean_letters_alone)); + run.add_function("clean.included_letters", &(mymln::document::clean_included_letters)); + run.add_function("clean.dot_items", &(mymln::document::clean_dot_items)); + run.add_function("clean.quote_items", &(mymln::document::clean_quote_items)); + run.add_function("clean.between", &(mymln::document::clean_between)); + + run.add_function("clean.line_link_item", &(mymln::document::clean_line_link_item)); + run.add_function("clean.proximity_lines", &(mymln::document::clean_proximity_lines)); + run.add_function("clean.quote_lines", &(mymln::document::clean_quote_lines)); + run.add_function("clean.alone_letters_lines", &(mymln::document::clean_alone_letters_lines)); + run.add_function("clean.odd_letters", &(mymln::document::clean_odd_letters)); + + + run.add_function("clean.remove_alone_letter", &(mymln::document::remove_alone_letter)); + run.add_function("clean.paragraph_items", &(mymln::document::clean_paragraph_items)); + run.add_function("clean.paragraphs_up", &(mymln::document::clean_paragraphs_up)); + run.add_function("clean.paragraphs_large", &(mymln::document::clean_paragraphs_large)); + run.add_function("clean.included_paragraphs", &(mymln::document::clean_included_paragraphs)); + run.add_function("clean.backward_letters", &(mymln::document::clean_backward_letters)); + run.add_function("clean.paragraphs_tab", &(mymln::document::clean_paragraphs_tab)); + run.add_function("clean.proximity_letters", &(mymln::document::clean_proximity_letters)); + + } + + template<typename L, typename F, typename D> + void lib_debug_save_all(mymln::document::document<L,F,D>& doc, std::string file) + { doc.debug_save_all(file); } + template<typename L, typename F, typename D> + void lib_debug_save_buffer(mymln::document::document<L,F,D>& doc, std::string file) + { doc.debug_save_buffer(file); } + template<typename L, typename F, typename D> + void lib_debug_create_buffer(mymln::document::document<L,F,D>& doc) + { doc.debug_create_buffer(); } + + template<typename L, typename F, typename D> + void load_debug(runtime<L,F,D>& run) + { + run.add_function_string("debug.save", &(lib_debug_save_all)); + run.add_function("debug.create_buffer", &(lib_debug_create_buffer)); + run.add_function_string("debug.save_buffer", &(lib_debug_save_buffer)); + } + + + template<typename L, typename F, typename D> + void lib_cook_lines(mymln::document::document<L,F,D>& doc) + { doc.cook_lines(); } + template<typename L, typename F, typename D> + void lib_recook_lines(mymln::document::document<L,F,D>& doc) + { doc.recook_lines(); } + template<typename L, typename F, typename D> + void lib_cook_separators_left(mymln::document::document<L,F,D>& doc) + { doc.cook_separators(); } + template<typename L, typename F, typename D> + void lib_cook_separators_right(mymln::document::document<L,F,D>& doc) + { doc.cook_separators_right(); } + template<typename L, typename F, typename D> + void lib_cook_line_splitting(mymln::document::document<L,F,D>& doc) + { doc.cook_line_splitting(); } + template<typename L, typename F, typename D> + void lib_reset_implicit_separators(mymln::document::document<L,F,D>& doc) + { doc.reset_implicit_separators(); } + + template<typename L, typename F, typename D> + void lib_cook_line_splitting_exclusive(mymln::document::document<L,F,D>& doc) + { doc.cook_line_splitting_exclusive(); } + + template<typename L, typename F, typename D> + void lib_cook_paragraphs(mymln::document::document<L,F,D>& doc) + { doc.cook_paragraphs(); } + + template<typename L, typename F, typename D> + void lib_recook_paragraphs(mymln::document::document<L,F,D>& doc) + { doc.recook_paragraphs(); } + + template<typename L, typename F, typename D> + void load_cooking(runtime<L,F,D>& run) + { + run.add_function("cook.lines", &(lib_cook_lines)); + run.add_function("cook.separators_right", &(lib_cook_separators_right)); + run.add_function("cook.separators_left", &(lib_cook_separators_left)); + run.add_function("recook.lines", &(lib_recook_lines)); + run.add_function("cook.line_splitting", &(lib_cook_line_splitting)); + run.add_function("cook.line_splitting_inclusive", &(lib_cook_line_splitting)); + run.add_function("cook.line_splitting_exclusive", &(lib_cook_line_splitting_exclusive)); + run.add_function("cook.reset_implicit_separators", &(lib_reset_implicit_separators)); + run.add_function("cook.paragraphs", &(lib_cook_paragraphs)); + run.add_function("recook.paragraphs", &(lib_recook_paragraphs)); + + } + + + template<typename L, typename F, typename D> + void lib_compute_letter_middle_height(mymln::document::document<L,F,D>& doc) + { doc.compute_letter_middle_height(); } + template<typename L, typename F, typename D> + void lib_compute_letter_middle_width(mymln::document::document<L,F,D>& doc) + { doc.compute_letter_middle_width(); } + template<typename L, typename F, typename D> + void load_compute(runtime<L,F,D>& run) + { + run.add_function("compute.letter_middle_height", &(lib_compute_letter_middle_height)); + run.add_function("compute.letter_middle_width", &(lib_compute_letter_middle_width)); + } + + + template<typename L, typename F, typename D> + void lib_string_print(mymln::document::document<L,F,D>& doc, std::string str) + { std::cout << str; } + template<typename L, typename F, typename D> + void lib_string_print_line(mymln::document::document<L,F,D>& doc, std::string str) + { std::cout << str << std::endl; } + template<typename L, typename F, typename D> + void lib_string_concat(runtime<L,F,D>& run, std::string A, std::string B) + { run.add_variable(A, B); } + + template<typename L, typename F, typename D> + void load_string(runtime<L,F,D>& run) + { + run.add_function_string("string.print", &(lib_string_print)); + run.add_function_string("string.print_line", &(lib_string_print_line)); + run.add_function_string_string("string.concat", &(lib_string_concat)); + run.add_function_string_string("string.clone", &(lib_string_concat)); + } + + + template<typename L, typename F, typename D> + void lib_system_set(runtime<L,F,D>& run, std::string A, std::string B) + { run.add_variable(A, B); } + template<typename L, typename F, typename D> + void lib_system_test_equal(runtime<L,F,D>& run, std::string A, std::string B) + { if(!A.compare("true")){run.call_function(B);} } + + template<typename L, typename F, typename D> + void lib_system_test_nequal(runtime<L,F,D>& run, std::string A, std::string B) + { if(!A.compare("false")){run.call_function(B);} } + + template<typename L, typename F, typename D> + void lib_system_not(runtime<L,F,D>& run, std::string A, std::string B) + { + if(!A.compare("false")){run.add_variable(A, "true");} + else{run.add_variable(A, "false");} + } + + template<typename L, typename F, typename D> + void load_system(runtime<L,F,D>& run) + { + run.add_function_string_string("system.set", &(lib_system_set)); + run.add_function_string_string("system.equal", &(lib_string_concat)); + run.add_function_string_string("system.nequal", &(lib_string_concat)); + } + + } +} +#endif \ No newline at end of file diff --git a/scribo/sandbox/raphael/code/my/runtime/runtime.hh b/scribo/sandbox/raphael/code/my/runtime/runtime.hh new file mode 100644 index 0000000..f4057bc --- /dev/null +++ b/scribo/sandbox/raphael/code/my/runtime/runtime.hh @@ -0,0 +1,196 @@ +#ifndef INC_RUNTIME +#define INC_RUNTIME + +using namespace mln; +using namespace std; +namespace mymln +{ + namespace runtime + { + template<typename Label, typename Float, typename Data> + + class runtime + { + typedef document::document<Label, Float, Data> doc; + typedef void (*fun_doc_ptr)(doc); + public: + runtime() + { + program_argument = mln::util::array<string>(0); + program_argument2 = mln::util::array<string>(0); + program_instruction = mln::util::array<string>(0); + doc_fun = map<string, void(*)(doc&) >(); + doc_arg_fun = map<string, void(*)(doc&, string) >(); + doc_arg2_fun = map<string, void(*)(runtime<Label,Float, Data>&, string, string) >(); + doc_local_fun = map<string, int >(); + call_stack = stack<int>(); + CP = 0; + } + void load(const char* file) + { + + fstream filestream(file, fstream::in | fstream::out); + std::string Buffer = ""; + bool flag = !getline(filestream, Buffer).eof(); + int L = 0; + bool remain = flag; + while(remain) + { + if(!flag){remain = false;} + int N = 0; + std::string Instr = ""; + std::string Arg = ""; + std::string Arg2 = ""; + while( N < Buffer.length() && (Buffer[N] == ' ' || Buffer[N] == '\t') && Buffer[N] != ';'){N++;} // trim + while( N < Buffer.length() && (Buffer[N] == ' ' || Buffer[N] == '\t') && Buffer[N] != ';'){N++;} // trim + while(N < Buffer.length() && Buffer[N] != ' ' && Buffer[N] != '\t' && Buffer[N] != ';'){Instr += Buffer[N]; N++;} + while(N < Buffer.length() && (Buffer[N] == ' ' || Buffer[N] == '\t') && Buffer[N] != ';'){N++;} // trim + while(N < Buffer.length() && Buffer[N] != ' ' && Buffer[N] != '\t' && Buffer[N] != ';'){Arg += Buffer[N]; N++;} + while(N < Buffer.length() && (Buffer[N] == ' ' || Buffer[N] == '\t') && Buffer[N] != ';'){N++;} // trim + while(N < Buffer.length() && Buffer[N] != ' ' && Buffer[N] != '\t' && Buffer[N] != ';'){Arg2 += Buffer[N]; N++;} + if(!Instr.compare("fun")) + { + doc_local_fun[Arg] = L; + program_instruction.append(""); + program_argument.append(""); + program_argument2.append(""); + } + else if(Instr.length() > 0 && Instr[0] == '$') + { + program_instruction.append("system.set"); + program_argument.append(Instr); + program_argument2.append(Arg); + } + else + { + program_instruction.append(Instr); + program_argument.append(Arg); + program_argument2.append(Arg2); + } + if(flag) + flag = !getline(filestream, Buffer).eof(); + L++; + } + + filestream.close(); + + } + runtime(const char* file) + { + CP = 0; + program_argument2 = mln::util::array<string>(0); + program_argument = mln::util::array<string>(0); + program_instruction = mln::util::array<string>(0); + doc_local_fun = map<string, void(*)(doc&, string) >(); + load(file); + doc_fun = map<string, void(*)(doc&) >(); + doc_arg_fun = map<string, void(*)(doc&, string) >(); + doc_arg2_fun = map<string, void(*)(runtime<Label,Float, Data>&, string, string) >(); + doc_local_fun = map<string, int >(); + call_stack = stack<int>(); + } + + void run() + { + for(CP = 0; CP < program_instruction.size(); CP++) + { + if(!program_instruction[CP].compare("end")) + ret(); + else if(program_argument2[CP].compare("")) + call_function(program_instruction[CP], get_variable(program_argument[CP]), get_variable(program_argument2[CP])); + else if(program_argument[CP].compare("")) + call_function(program_instruction[CP], get_variable(program_argument[CP])); + else if(program_instruction[CP].compare("")) + call_function(program_instruction[CP]); + } + } + inline void add_function(string name, void(*ptr)(doc&) ) + { + doc_fun[name] = ptr; + } + inline void add_function_string(string name, void(*ptr)(doc&, string arg) ) + { + doc_arg_fun[name] = ptr; + } + inline void add_function_string_string(string name, void(*ptr)(runtime<Label,Float, Data>&, string arg, string arg2) ) + { + doc_arg2_fun[name] = ptr; + } + void add_variable(string name, string value) + { + vars["$" + name] = value; + } + string get_variable(string name) + { + + if(name.length() > 0 && name[0] == '$') + { + if(vars.find(name) == vars.end()) + { + std::cout << "#SCRIPT ERROR : The variable " << name << " doesn't exist" << std::endl; + return ""; + } + return vars[name]; + } + else + return name; + } + void ret() + { + if(call_stack.empty()){ CP = program_instruction.size(); } + else{ CP = call_stack.top(); call_stack.pop(); } + } + void call_function(string name) + { + if(doc_local_fun.find(name) != doc_local_fun.end()) + { + call_stack.push(CP); + CP = doc_local_fun[name]; + return; + } + + if(doc_fun.find(name) == doc_fun.end()) + { + std::cout << "#SCRIPT ERROR : The function " << name << " doesn't exist" << std::endl; + return; + } + doc_fun[name](*current); + } + void call_function(string name, string arg) + { + if(doc_arg_fun.find(name) == doc_arg_fun.end()) + { + std::cout << "#SCRIPT ERROR : The function " << name << " doesn't exist" << std::endl; + return; + } + doc_arg_fun[name](*current, arg); + } + + void call_function(string name, string arg, string arg2) + { + if(doc_arg2_fun.find(name) == doc_arg2_fun.end()) + { + std::cout << "#SCRIPT ERROR : The function " << name << " doesn't exist" << std::endl; + return; + } + doc_arg2_fun[name](*this, arg, arg2); + } + void set_current_document(doc* document) + {current = document;} + private: + int CP; + map<string, string > vars; + map<string, void(*)(doc&) > doc_fun; + map<string, void(*)(doc&, string) > doc_arg_fun; + map<string, void(*)(runtime<Label,Float, Data>&, string, string) > doc_arg2_fun; + map<string, int > doc_local_fun; + mln::util::array<std::string> program_instruction; + mln::util::array<std::string> program_argument; + mln::util::array<std::string> program_argument2; + + stack<int> call_stack; + doc* current; + }; + } +} +#endif \ No newline at end of file diff --git a/scribo/sandbox/raphael/code/test.cc b/scribo/sandbox/raphael/code/test.cc index feaf817..1f8d94d 100644 --- a/scribo/sandbox/raphael/code/test.cc +++ b/scribo/sandbox/raphael/code/test.cc @@ -1,6 +1,7 @@ #include <vector> -#include <mln/io/all.hh> +#include <mln/io/pbm/all.hh> +#include <mln/io/ppm/all.hh> #include <mln/core/site_set/p_vertices.hh> #include <mln/core/image/graph_elt_window.hh> @@ -16,21 +17,20 @@ #include <mln/util/timer.hh> #include <mln/debug/draw_graph.hh> #include <mln/debug/println.hh> -#include <mln/transform/all.hh> +#include <mln/transform/influence_zone_geodesic.hh> #include <mln/make/image2d.hh> #include <mln/core/alias/neighb2d.hh> #include <mln/make/influence_zone_adjacency_graph.hh> #include <mln/make/w_window2d.hh> -#include <mln/labeling/all.hh> +#include <mln/labeling/value_and_compute.hh> #include <mln/make/image.hh> #include <mln/value/rgb8.hh> #include <mln/value/int_u8.hh> #include <mln/value/int_u.hh> -#include <mln/data/all.hh> +#include <mln/labeling/colorize.hh> #include <mln/core/alias/neighb2d.hh> -#include <mln/algebra/all.hh> +#include <mln/algebra/vec.hh> #include <mln/core/image/graph_elt_neighborhood.hh> -#include <mln/literal/all.hh> #include <mln/graph/compute.hh> #include <mln/draw/plot.hh> @@ -42,10 +42,21 @@ #include <my/document/separator.hh> #include <my/document/clean.hh> +#include <my/document/recognition.hh> + +#include <my/runtime/runtime.hh> +#include <my/runtime/lib.hh> + using namespace mln; using namespace std; -void Process(std::string File, std::string Dir) +void Process(std::string File, std::string Dir, mymln::runtime::runtime< value::int_u<16> ,float,short>& runtime) { + // RUNTIME + + runtime.add_variable("FILE", Dir + "/" + File); + runtime.add_variable("DIR", Dir); + runtime.add_variable("DEBUG_FILE", Dir + "/debug_" + File); + std::cout << "Processing : " << File << endl; /* CREATE GRAPH */ @@ -61,24 +72,19 @@ void Process(std::string File, std::string Dir) uint16 areas_detected; - timer.start(); - image2d<uint16> ima_blob = labeling::blobs(ima, c8(), areas_detected); - std::cout << "CREATE BLOBS : " << timer.stop() << endl; - timer.restart(); - timer.start(); + mln_VAR( couple , mln::labeling::value_and_compute(ima, true, c8(), areas_detected, accu::shape::bbox<point2d>())); + image2d<uint16> ima_blob = couple.first(); + util::array<box2d> boxes = couple.second().first(); + //image2d<uint16> ima_blob = labeling::blobs(ima, c8(), areas_detected); + image2d<uint16> ima_influ = transform::influence_zone_geodesic(ima_blob, c8()); - std::cout << "CREATE INFLUENCE ZONE GEODESIC : " << timer.stop() << endl; - timer.restart(); - timer.start(); + util::graph grph = make::influence_zone_adjacency_graph(ima_influ, c8(), areas_detected); // mymln::debug::save_label_image(ima_influ, Dir + "/influ_" + File); - std::cout << "CREATE GRAPH : " << timer.stop() << endl; - - + /* COMPUTE GRAPH POINT POSITION */ - timer.restart(); - util::array<box2d> boxes = labeling::compute(accu::meta::shape::bbox(), ima_blob, areas_detected); + //util::array<box2d> boxes = labeling::compute(accu::meta::shape::bbox(), ima_blob, areas_detected); typedef p_vertices<util::graph, fun::i2v::array<mln::point2d> > g_vertices_p; typedef graph_elt_neighborhood<util::graph, g_vertices_p> g_nbh; fun::i2v::array<point2d> graph_points(areas_detected + 1); @@ -87,18 +93,20 @@ void Process(std::string File, std::string Dir) {graph_points(N + 1) = boxes[N + 1].pcenter();} g_vertices_p area_grph(grph, graph_points); - std::cout << "COMPUTE GRAPH POINT POSITION : " << timer.stop() << endl; - /* WORK ON GRAPH */ - - - timer.restart(); + + std::cout << "INITIALIZING : " << timer.stop() << endl; + timer.restart(); + /* WORK ON GRAPH */ mymln::document::document<uint16,float,short> doc(ima_blob, ima_influ, boxes, area_grph, areas_detected); + runtime.set_current_document(&doc); + doc.debug_set_image(ima); doc.vertical_separator_ratio_range(0.0f, 0.2f); doc.horizontal_separator_ratio_range(6.0f, 1000.0f); doc.container_volume_range(40, 100); + for (uint16 N = 1; N <= areas_detected; N++) { @@ -114,113 +122,139 @@ void Process(std::string File, std::string Dir) } //mymln::debug::save_label_image(ima_influ, "influ_" + File); - + /* mymln::document::clean_containers_items(doc); mymln::document::clean_letters_items(doc); mymln::document::clean_get_lines(doc); + + + mymln::document::clean_letters_alone(doc); + doc.cook_lines(); + mymln::document::clean_included_letters(doc); + doc.recook_lines(); mymln::document::clean_dot_items(doc); - doc.cook_lines(); - mymln::document::clean_quote_items(doc, Dir + "/" + "quote_graph_" + File, doc.image_mask_letters()); + doc.recook_lines(); + mymln::document::clean_quote_items(doc); + doc.recook_lines(); + + mymln::document::clean_between(doc); + doc.recook_lines(); + doc.compute_letter_middle_height(); + doc.compute_letter_middle_width(); + mymln::document::clean_odd_letters(doc); + doc.recook_lines(); + +*/ + + /*doc.compute_letter_middle_space(); + mymln::document::clean_lines_space(doc, Dir + "/" + "quote_graph_" + File, doc.image_mask_letters()); + doc.recook_lines();*/ + +/* mymln::document::separators::separators_find_allign(doc); mymln::document::separators::separators_make_clean(doc); doc.cook_separators(); - std::cout << "-> compute separator left " << endl; doc.cook_line_splitting(); - + + mymln::document::clean_line_link_item(doc); - mymln::document::clean_proximity_lines(doc); + mymln::document::clean_proximity_lines(doc); mymln::document::clean_quote_lines(doc); + doc.recook_lines(); + + + + doc.reset_implicit_separators(); - std::cout << "-> clean separator right " << endl; mymln::document::separators::separators_find_allign_right(doc); - mymln::document::separators::separators_make_clean(doc); - std::cout << "-> compute separator right " << endl; + mymln::document::separators::separators_make_clean_right(doc); doc.cook_separators_right(); doc.cook_line_splitting_exclusive(); - std::cout << "-> clean separator right " << endl; - mymln::document::clean_line_link_item(doc); - mymln::document::clean_proximity_lines(doc); - std::cout << "-> clean " << endl; - mymln::document::clean_quote_lines(doc); - mymln::document::clean_alone_letters_lines(doc, Dir + "/" + "alone_graph_" + File, doc.image_mask_letters()); + mymln::document::clean_line_link_item(doc); + + mymln::document::clean_proximity_lines(doc); + mymln::document::clean_quote_lines(doc); + mymln::document::clean_alone_letters_lines(doc); doc.recook_lines(); + + + doc.compute_letter_middle_height(); + doc.compute_letter_middle_width(); + mymln::document::clean_odd_letters(doc); + doc.recook_lines(); + */ + /* + mymln::document::clean_lines_space(doc, Dir + "/" + "alone_graph_" + File, doc.image_mask_letters()); + doc.recook_lines();*/ + + /* mymln::document::remove_alone_letter(doc); doc.recook_lines(); - mymln::document::clean_paragraph_items(doc, Dir + "/" + "para_graph_" + File, doc.image_mask_letters()); + mymln::document::clean_paragraph_items(doc); doc.cook_paragraphs(); - std::cout << "WORK ON GRAPH : " << timer.stop() << endl; - //io::ppm::save(ima_influ, "separator.ppm"); - //io::pbm::save(doc.image_mask_separators(),"separators"); - // io::pbm::save(doc.image_mask_letters(),Dir + "/" + "letters_" + File); - //io::pbm::save(doc.image_mask_alone_letters(),Dir + "/" + "letters_alone_" + File); - //io::pbm::save(doc.image_mask_separators(),Dir + "/" + "separators_" + File); - //io::pbm::save(doc.image_mask_containers(),Dir + "/" + "containers_" + File); - //io::pbm::save(doc.image_mask_noise(),Dir + "/" + "noise_" + File); - - - - //doc.debug_save_lines(Dir + "/" + "lines_" + File); - doc.debug_save_all(Dir + "/" + "debug_" + File, ima); - //mymln::debug::save_graph_image(doc.fun_mask_implicit_separators_left(), doc.image_mask_letters(), Dir + "/" + "graph_imp_sep_line_" + File); - //doc.debug_save_separators(Dir + "/" + "imp_sep_graph_" + File); + - /* typedef vertex_image<point2d,bool> v_ima_g; - v_ima_g mask = doc.fun_mask_letters(); -*/ - /*image2d<bool> out(3500,3500); - + mymln::document::clean_paragraphs_up(doc); + doc.recook_paragraphs(); + mymln::document::clean_paragraphs_large(doc); + doc.recook_paragraphs(); + mymln::document::clean_included_paragraphs(doc); + doc.recook_paragraphs(); + std::cout << "WORK ON GRAPH : " << timer.stop() << endl; + doc.recook_lines(); + */ + runtime.run(); + + + - mln_piter_(v_ima_g) v(mask.domain()); - typedef graph_elt_neighborhood_if<util::graph, g_vertices_p, v_ima_g> nbh_t; - nbh_t nbh(mask); - mln_niter_(nbh_t) q(nbh, v); + /* + doc.cook_lines_iter(); + std::cout << doc.get_first_line(); + + + - unsigned int fnds = 0; - for_all(v) + doc.compute_letter_middle_space(); + + mymln::document::recognize_minus(doc); + mymln::document::recognize_dot(doc); + for(int Line = doc.get_first_line(); Line; doc.get_next_line(Line)) { - unsigned int nds = 0; - for_all(q) - { - nds++; - - draw::line(out, q,v, true); - } - if(nds > 0) - { - std::cout << v << endl; - fnds++; - } - - - }*/ - //mymln::debug::draw_graph(out, mask); - //io::pbm::save(out, "maskltt.dgb"); - //std::cout << "NODES:" << fnds << endl; - // mymln::debug::save_graph_image(area_grph, ima, "graph_" + File); - // mymln::debug::save_graph_image(doc.fun_mask_separators(), ima, "separator_graph_" + File); - //mymln::debug::save_graph_image(area_grph, doc.image_mask_letters(), Dir + "/" + "graph_" + File); - //mymln::debug::save_graph_image(doc.fun_mask_letters(), doc.image_mask_letters(), Dir + "/" + "container_graph_" + File); - mln::util::array<box2d> linebx = doc.bbox_mask_lines(); - mymln::debug::save_boxes_image(linebx, doc.image_mask_letters(), Dir + "/" + "lbox_" + File); - - //mymln::debug::save_boxes_image(doc.bbox_enlarge_mask_letters(10, 0), ima, "linebox_" + File); + std::cout << doc.get_line_string(Line) << endl; + } + + //doc.debug_save_lines(Dir + "/" + "lines_" + File); + //doc.debug_save_all(Dir + "/" + "debug_" + File, ima); + */ + } int main( int argc, char** argv) { - if(argc <= 1){Process("ima.pbm", "");} + mymln::runtime::runtime< value::int_u<16> ,float,short> run; + mymln::runtime::load_clean(run); + mymln::runtime::load_debug(run); + mymln::runtime::load_cooking(run); + mymln::runtime::load_string(run); + mymln::runtime::load_system(run); + mymln::runtime::load_separators(run); + mymln::runtime::load_compute(run); + if(argc <= 1){Process("ima.pbm", "", run);} else { bool dir = false; + bool prog = false; std::string Dir = ""; + std::string Prog = ""; for(int N = 1 ; N < argc; N++) { if(dir) @@ -228,12 +262,20 @@ int main( int argc, char** argv) Dir = argv[N]; dir = false; } + else if(prog) + { + Prog = argv[N]; + run.load(Prog. c_str()); + prog = false; + } else { if(!strcmp(argv[N], "-D")) { dir = true;} + else if(!strcmp(argv[N], "-P")) + { prog = true; } else - { Process(argv[N], Dir); } + { Process(argv[N], Dir, run); } } } } -- 1.7.2.5