last-svn-commit-889-gf4a851e Correct some problems with lines detection and improve paragraphs detection. Add remote debug for clean.hh

--- .../code/my/debug/remote/document_remote.hh | 146 ++++++++++++++++++++ scribo/sandbox/raphael/code/my/debug/remote/lib.hh | 34 +++++ scribo/sandbox/raphael/code/my/document/clean.hh | 139 +++++++++++++++++-- .../sandbox/raphael/code/my/document/document.hh | 140 ++++++++++++++++++- .../sandbox/raphael/code/my/document/separator.hh | 45 ++++++- scribo/sandbox/raphael/code/my/runtime/lib.hh | 6 + scribo/sandbox/raphael/code/test.cc | 37 +++++- 7 files changed, 523 insertions(+), 24 deletions(-) create mode 100644 scribo/sandbox/raphael/code/my/debug/remote/document_remote.hh create mode 100644 scribo/sandbox/raphael/code/my/debug/remote/lib.hh diff --git a/scribo/sandbox/raphael/code/my/debug/remote/document_remote.hh b/scribo/sandbox/raphael/code/my/debug/remote/document_remote.hh new file mode 100644 index 0000000..24b0889 --- /dev/null +++ b/scribo/sandbox/raphael/code/my/debug/remote/document_remote.hh @@ -0,0 +1,146 @@ +#ifndef MLN_DEBUG_DOCUMENT_REMOTE +#define MLN_DEBUG_DOCUMENT_REMOTE +#include <my/document/document.hh> +using namespace mln; +namespace mymln +{ + namespace document + { + namespace debug + { + template<typename Label, typename Float, typename Data> + class remote + { + typedef document< Label, Float, Data > doc; + public: + remote() + { + program_node = mln::util::array<string>(0); + program_argument = mln::util::array<string>(0); + program_argument2 = mln::util::array<string>(0); + program_instruction = mln::util::array<string>(0); + doc_b_pp_fun = map<string, bool(*)(const doc&, const point2d&, const point2d&) >(); + doc_b_p_fun = map<string, bool(*)(const doc&, const point2d&) >(); + } + void load(const char* file) + { + + fstream filestream(file, fstream::in | fstream::out); + std::string Buffer = ""; + bool flag = !getline(filestream, Buffer).eof(); + int L = 0; + bool remain = flag; + bool fun_mask_set = false; + + while(remain) + { + if(!flag){remain = false;} + int N = 0; + std::string Node = ""; + std::string Instr = ""; + std::string Arg = ""; + std::string Arg2 = ""; + while( N < Buffer.length() && (Buffer[N] == ' ' || Buffer[N] == '\t') && Buffer[N] != ';'){N++;} // trim + while(N < Buffer.length() && Buffer[N] != ' ' && Buffer[N] != '\t' && Buffer[N] != ';' && Buffer[N] != ':'){Instr += Buffer[N]; N++;} + if(Buffer[N] != ':') + {std::cout << "#REMOTE DEBUG SCRIPT ERROR : THE NODE HAS NOT BEEN SPECIFIED" << std::endl; return;} + while( N < Buffer.length() && (Buffer[N] == ' ' || Buffer[N] == '\t') && Buffer[N] != ';'){N++;} // trim + while(N < Buffer.length() && Buffer[N] != ' ' && Buffer[N] != '\t' && Buffer[N] != ';'){Instr += Buffer[N]; N++;} + while(N < Buffer.length() && (Buffer[N] == ' ' || Buffer[N] == '\t') && Buffer[N] != ';'){N++;} // trim + while(N < Buffer.length() && Buffer[N] != ' ' && Buffer[N] != '\t' && Buffer[N] != ';'){Arg += Buffer[N]; N++;} + while(N < Buffer.length() && (Buffer[N] == ' ' || Buffer[N] == '\t') && Buffer[N] != ';'){N++;} // trim + while(N < Buffer.length() && Buffer[N] != ' ' && Buffer[N] != '\t' && Buffer[N] != ';'){Arg2 += Buffer[N]; N++;} + if(Instr.length() != 1 || (Instr[0] != 'v' && Instr[0] != 'q')) + { + if(Instr.length() != 2 || (Instr[0] != 'v' && Instr[0] != 'q') || (Instr[1] != 'v' && Instr[1] != 'q')) + { + if(fun_mask_set) + { + std::cout << "#REMOTE DEBUG SCRIPT ERROR : INVALID NODE" << std::endl; return; + } + else + { + fun_mask_set = true; + fun_mask = Node; + } + } + } + program_node.append(Node); + program_instruction.append(Instr); + program_argument.append(Arg); + program_argument2.append(Arg2); + + } + + filestream.close(); + + } + inline void filter(doc& d) + { + typedef vertex_image<point2d,bool> v_ima_g; + typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p; + v_ima_g mask = + (!fun_mask.compare("all_letters"))?d.fun_mask_all_letters() + :(!fun_mask.compare("letters"))?d.fun_mask_letters() + :(!fun_mask.compare("alone_letters"))?d.fun_mask_letters() + :(!fun_mask.compare("start_end_lines"))?d.fun_mask_start_end_lines() + :(!fun_mask.compare("start_lines"))?d.fun_mask_start_lines() + :(!fun_mask.compare("end_lines"))?d.fun_mask_start_lines() + :(!fun_mask.compare("all"))?d.fun_mask_all() + :d.fun_mask_all(); // DEFAULT VALUE + + + mln_piter_(v_ima_g) v(mask.domain()); + typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t; + nbh_t nbh(mask); + mln_niter_(nbh_t) q(nbh, v); + for_all(v) + { + bool _VTRUE = true; + for(int N = 0; N < program_instruction.size(); N++) + { + if(!program_node[N].compare("v")) + { _VTRUE = _VTRUE && doc_b_p_fun[program_instruction[N]](d, q); } + } + if(!_VTRUE){continue;} + for_all(q) + { + bool _TRUE = true; + for(int N = 0; N < program_instruction.size(); N++) + { + if(!program_node[N].compare("vq")) + { _TRUE = _TRUE && doc_b_pp_fun[program_instruction[N]](d, v, q); } + else if(!program_node[N].compare("qv")) + { _TRUE = _TRUE && doc_b_pp_fun[program_instruction[N]](d, (point2d)q, v); } + else if(!program_node[N].compare("q")) + { _TRUE = _TRUE && doc_b_p_fun[program_instruction[N]](d, (point2d)q); } + } + + if(_TRUE) + {d.debug_draw_line_green_buffer(q,v);} + else + {d.debug_draw_line_red_buffer(q,v);} + } + } + } + inline void add_function_pp(string name, bool(*ptr)(const doc&, const point2d&, const point2d&) ) + { + doc_b_pp_fun[name] = ptr; + } + inline void add_function_p(string name, bool(*ptr)(const doc&, const point2d&) ) + { + doc_b_p_fun[name] = ptr; + } + private: + map<string, bool(*)(const doc&, const point2d&, const point2d&) > doc_b_pp_fun; + map<string, bool(*)(const doc&, const point2d&) > doc_b_p_fun; + mln::util::array<std::string> program_node; + mln::util::array<std::string> program_instruction; + mln::util::array<std::string> program_argument; + mln::util::array<std::string> program_argument2; + std::string fun_mask; + }; + } + } +} +#endif \ No newline at end of file diff --git a/scribo/sandbox/raphael/code/my/debug/remote/lib.hh b/scribo/sandbox/raphael/code/my/debug/remote/lib.hh new file mode 100644 index 0000000..97db857 --- /dev/null +++ b/scribo/sandbox/raphael/code/my/debug/remote/lib.hh @@ -0,0 +1,34 @@ +#ifndef MLN_DEBUG_DOCUMENT_REMOTE_LIB +#define MLN_DEBUG_DOCUMENT_REMOTE_LIB +#include <my/debug/remote/document_remote.hh> +using namespace mln; +namespace mymln +{ + namespace document + { + namespace debug + { + + template<typename L, typename F, typename D> + bool allign_V(const document<L,F,D>& doc, const point2d& A,const point2d& B){return doc.allign_V(A,B);} + template<typename L, typename F, typename D> + bool allign_V_line(const document<L,F,D>& doc, const point2d& A,const point2d& B){return doc.allign_V_line(A,B);} + template<typename L, typename F, typename D> + bool allign_V_line_strict(const document<L,F,D>& doc, const point2d& A,const point2d& B){return doc.allign_V_line_strict(A,B);} + template<typename L, typename F, typename D> + bool allign_V_large(const document<L,F,D>& doc, const point2d& A,const point2d& B){return doc.allign_V_large(A,B);} + + + template<typename L, typename F, typename D> + void load(const remote<L,F,D>& rem) + { + typedef document< L, F, D > doc; + rem.add_function_pp("allign_V_large", &(allign_V_large)); + rem.add_function_pp("allign_V_line_strict", &(allign_V_line_strict)); + rem.add_function_pp("allign_V_line", &(allign_V_line)); + rem.add_function_pp("allign_V", &(allign_V)); + } + } + } +} +#endif \ No newline at end of file diff --git a/scribo/sandbox/raphael/code/my/document/clean.hh b/scribo/sandbox/raphael/code/my/document/clean.hh index 9304dcc..cfff068 100644 --- a/scribo/sandbox/raphael/code/my/document/clean.hh +++ b/scribo/sandbox/raphael/code/my/document/clean.hh @@ -196,7 +196,20 @@ namespace mymln doc.add_to_line_link(v, q); doc.add_letter_coerce(q); } + else if( + doc.is_start_end_line(v) && + doc.allign_base_line(v, q) && + doc.allign_small_item(v,q) && + !doc.contain_alone_letter(v) && + doc.allign_proximity_large_left(v,q) && + doc.letter_ratio_YX(q) >= 1 + ) + { + doc.debug_draw_line_green_buffer(v,q); + doc.add_to_line_link(v,q); + } } + } } mask = doc.fun_mask_alone_letters(); @@ -380,9 +393,12 @@ namespace mymln { for_all(q) { - if(doc.same_line(q,v)){continue;} + + if(doc.same_line(q,v)){ continue;} + if(doc.contain_alone_letter(q)) { + if(doc.allign_V(q,v) && doc.allign_proximity_strict(q, v) && doc.allign_size_height(q, v)) { doc.add_to_line_link(v, q); @@ -391,7 +407,8 @@ namespace mymln } else if(doc.contain_line(q)) - { + { + if(doc.allign_V(q,v) && doc.allign_size_height_line_strict(q, v) && doc.allign_proximity_strict(q,v)) { doc.add_to_line_link(v, q); @@ -399,12 +416,13 @@ namespace mymln } else if(doc.allign_size_height_line(q,v)) { + if(doc.allign_proximity_line(q,v) && doc.allign_V_line_strict(q, v)) { doc.add_to_line_link(v, q); doc.debug_draw_line_green_buffer(v,q); } - else if(doc.line_influence_reciprocal(q, v) && doc.allign_V_line_strict(q, v)) + else if(doc.line_influence_reciprocal(q, v) && doc.allign_V_line_strict(q, v) && doc.allign_size_height_line(q,v)) { doc.add_to_line_link(v, q); doc.debug_draw_line_red_buffer(v,q); @@ -419,15 +437,22 @@ namespace mymln doc.allign_proximity_line(v,q) ) { - doc.debug_draw_line_orange_buffer(v,q); doc.debug_draw_box_red_buffer(v); doc.debug_draw_box_green_buffer(q); doc.add_to_line_link(v, q); } + + else if(doc.allign_V(q,v) && doc.allign_proximity_strict(q,v)) + { + doc.debug_draw_line_orange_buffer(q,v); + doc.add_to_line_link(v, q); + } } + } + } } } @@ -491,7 +516,7 @@ namespace mymln { if( doc.allign_H_large(q,v) && - doc.allign_size_height_line(q, v) && + doc.allign_size_height_line_medium(q, v) && doc.allign_proximity_V_line(v,q) && doc.allign_size_width_line(q, v) ) @@ -506,7 +531,7 @@ namespace mymln doc.add_to_paragraph(q); doc.add_to_paragraph_link(v, q); } - + doc.debug_draw_line_red_buffer(v,q); } } } @@ -518,7 +543,7 @@ namespace mymln if( doc.get_beginning_of_line(q) == doc[q] && doc.allign_H_large(q,v) && - doc.allign_size_height_line(q, v) && + doc.allign_size_height_line_medium(q, v) && doc.allign_size_width_line(q, v) && doc.allign_proximity_V_line(v,q) && doc.allign_bottom_line(q,v) @@ -546,7 +571,11 @@ namespace mymln } doc.add_to_paragraph(q); doc.add_to_paragraph_link(v, q); + + } + + doc.debug_draw_line_green_buffer(v,q); } } @@ -829,9 +858,10 @@ namespace mymln { doc.add_noise(KillMe[Killer]); doc.kill(KillMe[Killer]); + doc.invalidate_line_link(KillMe[Killer]); Killer++; } - + doc.propage_line_link(); } @@ -1000,7 +1030,7 @@ namespace mymln doc.return_next_line(doc.get_line_ID(v)) == doc.return_next_line(doc.get_line_ID(q)) && doc.return_previous_line(doc.get_line_ID(v)) == doc.return_previous_line(doc.get_line_ID(q)) && doc.allign_V_line(v, q) && - doc.allign_size_height_line(v, q) && + doc.allign_size_height_line_medium(v, q) && !doc.killed(doc[q]) ) { @@ -1175,15 +1205,20 @@ namespace mymln { for_all(q) { - if(doc.contain_line(q)) + if(doc.contain_line(q) && doc.same_line(q,v)) { - doc.debug_draw_line_orange_buffer(q,v); + if( + doc.letter_included_center(v,q) && doc.allign_small_item(v,q)) + { + doc.debug_draw_line_orange_buffer(v,q); + doc.merge(q,v); + } + } } } } doc.propage_line_link(); - doc.propage_paragraph_link(); } @@ -1219,6 +1254,11 @@ namespace mymln doc.debug_draw_line_green_buffer(q,v); doc.add_to_paragraph_link(v, q); } + else if(doc.allign_paragraph_center_strict(q,v)) + { + doc.debug_draw_line_orange_buffer(q,v); + doc.add_to_paragraph_link(v, q); + } } else { @@ -1234,9 +1274,78 @@ namespace mymln } - - - + + template<typename L, typename F, typename D> + void clean_paragraphs_first_line(mymln::document::document<L,F,D>& doc) + { + typedef vertex_image<point2d,bool> v_ima_g; + typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p; + v_ima_g mask = doc.fun_mask_all_letters(); + mln_piter_(v_ima_g) v(mask.domain()); + typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t; + nbh_t nbh(mask); + mln_niter_(nbh_t) q(nbh, v); + for_all(v) + { + if(doc.contain_paragraph(v) && doc.get_paragraph_length(v) > 1 && doc.contain_start_paragraph(v)) + { + for_all(q) + { + if( + doc.same_paragraph(q,v) && + !doc.same_line(q,v) && + doc.allign_left(v,q) && + doc.allign_V_line(v,q) + ) + { + doc.debug_draw_line_green_buffer(q,v); + doc.debug_draw_box_green_buffer(q); + doc.add_to_line_link(v,q); + } + } + } + } + doc.propage_line_link(); + } + + + template<typename L, typename F, typename D> + void clean_paragraphs_couple(mymln::document::document<L,F,D>& doc) + { + typedef vertex_image<point2d,bool> v_ima_g; + typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p; + v_ima_g mask = doc.fun_mask_all_letters(); + mln_piter_(v_ima_g) v(mask.domain()); + typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t; + nbh_t nbh(mask); + mln_niter_(nbh_t) q(nbh, v); + for_all(v) + { + if(doc.contain_paragraph(v) && doc.get_paragraph_length(v) == 1) + { + for_all(q) + { + if( + doc.contain_paragraph(q) && + doc.get_paragraph_length(q) == 1 && + !doc.same_paragraph(q,v) && + doc.allign_proximity_paragraph_up_medium(q,v) && + doc.compatible_paragraph_middle_width(q,v) && + !doc.decal_left_paragraph(q, v) && + doc.allign_top_paragraph(q,v) && + doc.allign_size_height_paragraph_line(q,v) && + (doc.contain_start_line(q) || doc.contain_start_line(v)) && // THESE TWO LINES CONATIN MAYBE SOME ERROR + doc.allign_H_paragraph(q,v) // FIX THE PROBLEM BEFORE TO START IT + ) + { + doc.debug_draw_line_green_buffer(q,v); + doc.add_to_paragraph_link(q,v); + } + } + } + } + doc.propage_paragraph_link(); + } template<typename L, typename F, typename D> diff --git a/scribo/sandbox/raphael/code/my/document/document.hh b/scribo/sandbox/raphael/code/my/document/document.hh index 219d245..c7fc0c1 100644 --- a/scribo/sandbox/raphael/code/my/document/document.hh +++ b/scribo/sandbox/raphael/code/my/document/document.hh @@ -52,6 +52,7 @@ namespace mymln all_letters_mask = fun::i2v::array<bool>(Areas + 1); Hseparator_mask = fun::i2v::array<bool>(Areas + 1); Vseparator_mask = fun::i2v::array<bool>(Areas + 1); + image_mask = fun::i2v::array<bool>(Areas + 1); noise_mask = fun::i2v::array<bool>(Areas + 1); temp_letter = fun::i2v::array<bool>(Areas + 1); alone_letters_mask = fun::i2v::array<bool>(Areas + 1); @@ -312,6 +313,8 @@ namespace mymln inline void add_to_line_link(const point2d& A, const point2d& B) { add_to_line_link(img_influ(A), img_influ(B)); } + inline void invalidate_line_link(const point2d& A) + { invalidate_line_link(img_influ(A)); } inline bool same_line(const point2d& A, const point2d& B) { return same_line(img_influ(A), img_influ(B)); } inline bool same_line(const Label A, const Label B) @@ -362,6 +365,9 @@ namespace mymln inline void jump_to_line(const point2d& point) { jump_to_line(img_influ(point)); } + inline bool contain_start_paragraph(const point2d& point) + { return contain_start_paragraph(img_influ(point)); } + inline bool contain_start_line(const point2d& point) { return contain_start_line(img_influ(point)); } @@ -388,7 +394,8 @@ namespace mymln inline void add_to_line_link(const Label A, const Label B) {lines_union.add_link(A, B);} - + inline void invalidate_line_link(const Label A) + {lines_union.invalidate_link(A);} inline void jump_to_line(const Label lbl) { if(lines_union[lbl] != 0) @@ -451,6 +458,9 @@ namespace mymln inline bool contain_start_line(const Label lbl) { return start_lines_mask(lbl);} + inline bool contain_start_paragraph(const Label lbl) + { return paragraphs_first_line[paragraphs_union[lbl]] == lines_union[lbl];} + inline bool contain_end_line(const Label lbl) { return start_lines_mask(lbl);} @@ -487,6 +497,7 @@ namespace mymln } void inline add(Label lbl, int link) { + image_mask(lbl) = false; all_mask(lbl) = true; if (link == 0){add_noise(lbl);} else if (link > 30){ add_separator(lbl);} @@ -521,6 +532,21 @@ namespace mymln {add_letter(img_influ(point)); } void inline add_letter_coerce(const point2d& point) {add_letter_coerce(img_influ(point)); } + + + void add_image(const Label lbl) + { + image_mask(lbl) = true; + separators_mask(lbl) = false; + containers_mask(lbl) = false; + Vseparator_mask(lbl) = false; + Hseparator_mask(lbl) = false; + alone_letters_mask(lbl) = false; + noise_mask(lbl) = false; + all_letters_mask(lbl) = false; + temp_letter = false; + } + void add_alone_letter(const point2d& point) {add_alone_letter(img_influ(point));} void add_alone_letter(const Label lbl) @@ -572,6 +598,41 @@ namespace mymln else add_noise(lbl); } + inline bool is_big_element_V(const point2d& point) + {return is_big_element_V(img_influ(point));} + inline bool is_big_element_V(const Label lbl) + { + return _bboxgp[lbl].len(0) > img_influ.domain().len(0) / 13; + } + inline bool is_big_element_H(const point2d& point) + {return is_big_element_H(img_influ(point));} + inline bool is_big_element_H(const Label lbl) + { + return _bboxgp[lbl].len(1) > img_influ.domain().len(1) / 13; + } + + + + + + inline bool is_very_big_element_V(const point2d& point) + {return is_very_big_element_V(img_influ(point));} + inline bool is_very_big_element_V(const Label lbl) + { + return _bboxgp[lbl].len(0) > img_influ.domain().len(0) / 6; + } + inline bool is_very_big_element_H(const point2d& point) + {return is_very_big_element_H(img_influ(point));} + inline bool is_very_big_element_H(const Label lbl) + { + return _bboxgp[lbl].len(1) > img_influ.domain().len(1) / 6; + } + + + + + + void inline add_container(const point2d& point) {add_container(img_influ(point)); } void add_container(const Label lbl) @@ -736,6 +797,7 @@ namespace mymln return allignV < label_size_(0, Left) && (_bboxgp[Left].pcenter()[0]) > (_bboxgp[Right].pcenter()[0]); } + inline bool allign_paragraph_center(const point2d& Left, const point2d& Right) {return allign_paragraph_center(img_influ(Left), img_influ(Right));} inline bool allign_paragraph_center(const Label Left, const Label Right) @@ -744,7 +806,14 @@ namespace mymln if(Diff < 0){Diff = -Diff;} return Diff < paragraphs_bbox[paragraphs_union[Left]].len(1)/ 30 && Diff < paragraphs_bbox[paragraphs_union[Right]].len(1) / 30; } - + inline bool allign_paragraph_center_strict(const point2d& Left, const point2d& Right) + {return allign_paragraph_center_strict(img_influ(Left), img_influ(Right));} + inline bool allign_paragraph_center_strict(const Label Left, const Label Right) + { + short int Diff = paragraphs_bbox[paragraphs_union[Left]].pcenter()[1] - paragraphs_bbox[paragraphs_union[Right]].pcenter()[1]; + if(Diff < 0){Diff = -Diff;} + return Diff < paragraphs_bbox[paragraphs_union[Left]].len(1)/ 60 && Diff < paragraphs_bbox[paragraphs_union[Right]].len(1) / 60; + } inline bool allign_line_center(const point2d& Left, const point2d& Right) {return allign_line_center(img_influ(Left), img_influ(Right));} @@ -1028,10 +1097,13 @@ namespace mymln { HA = HB; } return (DisA) < HA; } + + + inline bool allign_proximity_paragraph_up_medium( const point2d& Left, const point2d& Right) - {return allign_proximity_paragraph_up(img_influ(Left), img_influ(Right));} + {return allign_proximity_paragraph_up_medium(img_influ(Left), img_influ(Right));} inline bool allign_proximity_paragraph_up_medium( const Label Left, const Label Right) { @@ -1097,6 +1169,19 @@ namespace mymln + inline bool allign_size_height_line_medium( const point2d& Left, const point2d& Right) + { + return allign_size_height_line_medium(img_influ(Left), img_influ(Right)); + } + + inline bool allign_size_height_line_medium( const Label Left, const Label Right) + { + short int SizeL = lines_bbox[lines_union[Left]].len(0); + short int SizeR = lines_bbox[lines_union[Right]].len(0); + return SizeR > (SizeL / 1.8f) && SizeR < (SizeL * 1.8f); + } + + inline bool allign_size_height_line( const point2d& Left, const point2d& Right) { return allign_size_height_line(img_influ(Left), img_influ(Right)); @@ -1288,6 +1373,35 @@ namespace mymln } + + + inline bool allign_proximity_left( const point2d& Left, const point2d& Right) + {return allign_proximity_left(img_influ(Left), img_influ(Right));} + + inline bool allign_proximity_left( const Label Left, const Label Right) + { + box2d LB = _bboxgp[Left]; + box2d RB = _bboxgp[Right]; + + int DisA = LB.pmax()[1] - RB.pmin()[1]; + int DisB = RB.pmax()[1] - LB.pmin()[1]; + if(DisA < 0){DisA = -DisA;} + if(DisB < 0){DisB = -DisB;} + if(DisA > DisB) + { DisA = DisB; } + + unsigned int HA = LB.len(0); + unsigned int VA = LB.len(1); + + + if(VA > HA) + { HA = VA; } + return (DisA) * 3 < HA * 2; + } + + + + inline bool allign_proximity_large_left( const point2d& Left, const point2d& Right) {return allign_proximity_large_left(img_influ(Left), img_influ(Right));} @@ -1538,7 +1652,10 @@ namespace mymln allignV < lines_bbox[lines_union[Left]].len(0) && lines_bbox[lines_union[Left]].pcenter()[0] < lines_bbox[lines_union[Right]].pcenter()[0]; } - + inline bool is_start_end_line(const point2d& point) + {return is_start_end_line(img_influ(point));} + inline bool is_start_end_line(const Label lbl) + {return start_end_lines_mask(lbl);} inline bool allign_bottom(const point2d& Left, const point2d& Right) {return allign_bottom(img_influ(Left), img_influ(Right));} inline bool allign_bottom(const Label Left, const Label Right) @@ -2033,6 +2150,8 @@ namespace mymln { mymln::debug::save_label_image(img, implicit_separators_union , file);} vertex_image<point2d,bool> fun_mask_separators() { return fun_mask_(separators_mask); } + vertex_image<point2d,bool> fun_mask_V_separators() + { return fun_mask_(Vseparator_mask); } vertex_image<point2d,bool> fun_mask_containers() { return fun_mask_(containers_mask); } vertex_image<point2d,bool> fun_mask_alone_letters() @@ -2047,6 +2166,8 @@ namespace mymln { return fun_mask_(all_mask); } + vertex_image<point2d,bool> fun_mask_image() + { return fun_mask_(image_mask); } vertex_image<point2d,bool> fun_mask_letters() { return fun_mask_(letters_mask); } vertex_image<point2d,bool> fun_mask_start_lines() @@ -2195,6 +2316,15 @@ namespace mymln _bboxgp[Par1].has(_bboxgp[Par2].pmax()) ; } + inline bool letter_included_center(point2d Par1, point2d Par2) + { return letter_included_center(img_influ(Par1), img_influ(Par2)); } + inline bool letter_included_center(Label Par1, Label Par2) + { + return + _bboxgp[Par1].has(_bboxgp[Par2].pcenter()); + } + + inline bool paragraph_included_influence(point2d Par1, point2d Par2) { return paragraph_included_influence(img_influ(Par1), img_influ(Par2)); } inline bool paragraph_included_influence(Label Par1, Label Par2) @@ -3258,7 +3388,7 @@ namespace mymln fun::i2v::array<bool> noise_mask; fun::i2v::array<bool> kill_mask; fun::i2v::array<bool> all_mask; - + fun::i2v::array<bool> image_mask; mln::util::array<std::string> tag_lbl; mln::util::array<bool> Btag_lbl; diff --git a/scribo/sandbox/raphael/code/my/document/separator.hh b/scribo/sandbox/raphael/code/my/document/separator.hh index 9c39f20..aa5ace9 100644 --- a/scribo/sandbox/raphael/code/my/document/separator.hh +++ b/scribo/sandbox/raphael/code/my/document/separator.hh @@ -10,7 +10,37 @@ namespace mymln { namespace separators { - //TODO: CHANGER contain_Hseparator en VSeparator Le H c etait pour le test + + + template<typename L, typename F, typename D> + void separators_find_wrong(mymln::document::document<L,F,D>& doc) + { + + typedef vertex_image<point2d,bool> v_ima_g; + typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p; + v_ima_g mask = doc.fun_mask_V_separators(); + mln_piter_(v_ima_g) v(mask.domain()); + typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t; + nbh_t nbh(mask); + mln_niter_(nbh_t) q(nbh, v); + mymln::util::union_find<L> sep_union(doc.size()); + for_all(v) + { + if(doc.contain_letter(v)) + { + for_all(q) + { + if(doc.is_big_element_V(q)){continue;} + if(doc.allign_proximity_strict(q,v) && doc.allign_V(q,v) && doc.allign_size(q,v)) + {doc.debug_draw_line_red_buffer(q,v); doc.add_letter_coerce(q);} + else if(doc.allign_V(q,v) && doc.allign_size(q,v) && doc.allign_proximity_left(v,q)) + {doc.debug_draw_line_red_buffer(q,v); doc.add_letter_coerce(q);} + } + } + } + + } + template<typename L, typename F, typename D> void separators_rebuild(mymln::document::document<L,F,D>& doc) { @@ -27,8 +57,11 @@ namespace mymln if(!doc[v]){continue;} if(doc.contain_Vseparator(doc[v]) && !sep_union[doc[v]]) { + sep_union[doc[v]] = sep_union.new_set(); sep_union.add_self_link(doc[v]); + if(doc.is_very_big_element_V(v)) + { continue; } } for_all(q) { @@ -256,6 +289,11 @@ namespace mymln count[doc[q]]++; } } + else if(doc.allign_proximity_strict(q,v) && doc.allign_left(v,q) && doc.allign_V(q,v) && doc.same_line(q, v)) + { + count[doc[q]]++; + doc.debug_draw_line_orange_buffer(v, q); + } } } @@ -308,6 +346,11 @@ namespace mymln doc.debug_draw_line_green_buffer(v, q); } } + else if(doc.allign_proximity_strict(q,v) && doc.allign_right(v,q) && doc.allign_V(q,v) && doc.same_line(q, v)) + { + count[doc[q]]++; + doc.debug_draw_line_orange_buffer(v, q); + } } } diff --git a/scribo/sandbox/raphael/code/my/runtime/lib.hh b/scribo/sandbox/raphael/code/my/runtime/lib.hh index b3100f9..f60a995 100644 --- a/scribo/sandbox/raphael/code/my/runtime/lib.hh +++ b/scribo/sandbox/raphael/code/my/runtime/lib.hh @@ -17,6 +17,7 @@ namespace mymln run.add_function("separators.make_clean_left", &(mymln::document::separators::separators_find_allign)); run.add_function("separators.rebuild", &(mymln::document::separators::separators_rebuild)); run.add_function("separators.merge", &(mymln::document::separators::separators_merge)); + run.add_function("separators.find_wrong", &(mymln::document::separators::separators_find_wrong)); } @@ -60,6 +61,11 @@ namespace mymln run.add_function("clean.finalize_paragraph", &(mymln::document::clean_finalize_paragraph)); run.add_function("clean.center_paragraphs", &(mymln::document::clean_center_paragraphs)); + + + run.add_function("clean.paragraphs_couple", &(mymln::document::clean_paragraphs_couple)); + + run.add_function("clean.paragraphs_first_line", &(mymln::document::clean_paragraphs_first_line)); } template<typename L, typename F, typename D> diff --git a/scribo/sandbox/raphael/code/test.cc b/scribo/sandbox/raphael/code/test.cc index 6f33db5..3eab190 100644 --- a/scribo/sandbox/raphael/code/test.cc +++ b/scribo/sandbox/raphael/code/test.cc @@ -1,4 +1,4 @@ - +#define NREMOTE #include <vector> #include <mln/io/pbm/all.hh> #include <mln/io/ppm/all.hh> @@ -51,6 +51,10 @@ #include <my/data/page.hh> #include <my/preprocessing/preprocessing.hh> +#ifndef NREMOTE +#include <my/debug/remote/document_remote.hh> +#include <my/debug/remote/lib.hh> +#endif using namespace mln; using namespace std; @@ -252,11 +256,23 @@ void Process(std::string File, std::string Dir, mymln::runtime::runtime< value:: } - - +#ifndef NREMOTE +// THIS IS USED TO CREATE A FILTER WITHOUT RECOMPILING +mymln::document::debug::remote< value::int_u<16> ,float,short> rem; +template<typename L, typename F, typename D> + void clean_remote(mymln::document::document<L,F,D>& doc) + { + rem.filter(doc); + } +#endif + + int main( int argc, char** argv) { mymln::runtime::runtime< value::int_u<16> ,float,short> run; + #ifndef NREMOTE + rem = mymln::document::debug::remote< value::int_u<16> ,float,short>(); + #endif mymln::runtime::load_clean(run); mymln::runtime::load_debug(run); mymln::runtime::load_cooking(run); @@ -269,6 +285,7 @@ int main( int argc, char** argv) { bool dir = false; bool prog = false; + bool remote = false; std::string Dir = ""; std::string Prog = ""; for(int N = 1 ; N < argc; N++) @@ -287,12 +304,26 @@ int main( int argc, char** argv) prog = false; } + #ifndef NREMOTE + else if(remote) + { + Prog = argv[N]; + rem.load(Prog.c_str()); + remote = false; + run.add_function("@remote", clean_remote); + mymln::document::debug::load(rem); + } + #endif else { if(!strcmp(argv[N], "-D")) { dir = true;} else if(!strcmp(argv[N], "-P")) { prog = true; } + #ifndef NREMOTE + else if(!strcmp(argv[N], "--Remote")) + { remote = true; } + #endif else { Process(argv[N], Dir, run); } } -- 1.7.2.5
participants (1)
-
Raphael Boissel