last-svn-commit-890-ga709ecb Fix bug with finalize function. Add functions to recognize pictures.

--- scribo/sandbox/raphael/code/my/document/clean.hh | 98 ++++++++++++++++++-- .../sandbox/raphael/code/my/document/document.hh | 95 ++++++++++++++++++- scribo/sandbox/raphael/code/my/document/image.hh | 81 ++++++++++++++++ scribo/sandbox/raphael/code/my/document/outline.hh | 52 ++++++++++ scribo/sandbox/raphael/code/my/runtime/lib.hh | 8 ++- scribo/sandbox/raphael/code/test.cc | 2 - 6 files changed, 319 insertions(+), 17 deletions(-) diff --git a/scribo/sandbox/raphael/code/my/document/clean.hh b/scribo/sandbox/raphael/code/my/document/clean.hh index cfff068..cf61a1c 100644 --- a/scribo/sandbox/raphael/code/my/document/clean.hh +++ b/scribo/sandbox/raphael/code/my/document/clean.hh @@ -144,7 +144,7 @@ namespace mymln if((!doc.contain_line(q))) { // draw::line(out, q,v, mln::literal::blue); - if(doc.allign_V(q,v) && doc.allign_size(q, v) && doc.allign_proximity_large(q, v) ) + if(doc.allign_V(q,v) && doc.allign_size_medium(q, v) && doc.allign_proximity_large(q, v) ) { doc.add_to_line_link(v, q); All_Alone = false; @@ -152,7 +152,7 @@ namespace mymln } else { - if(doc.allign_V(q,v) && doc.allign_size(q, v) && doc.allign_proximity_large(q, v)) + if(doc.allign_V(q,v) && doc.allign_size_medium(q, v) && doc.allign_proximity_large(q, v)) { doc.add_to_line_link(q, v); All_Alone = false; @@ -303,6 +303,53 @@ namespace mymln doc.propage_line_link(); doc.recook_lines(); } + + + + + + template<typename L, typename F, typename D> + void clean_apostrophe_items(mymln::document::document<L,F,D>& doc) + { + typedef vertex_image<point2d,bool> v_ima_g; + typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p; + v_ima_g mask = doc.fun_mask_start_end_lines(); + mln_piter_(v_ima_g) v(mask.domain()); + typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t; + nbh_t nbh(mask); + mln_niter_(nbh_t) q(nbh, v); + for_all(v) + { + if(doc.contain_line(v) ) + { + for_all(q) + { + if( + doc.get_line_length(q) == 1 && + doc.line_reciprocal(q,v) && + doc.allign_top(v,q) && + doc.letter_ratio_YX(q) > 1 && + !doc.allign_H_tube(v,q) && + doc.allign_proximity_strict_left(v,q) && + doc.allign_small_item_large(v,q) + + ) + { + doc.debug_draw_line_green_buffer(q,v); + doc.add_to_line_link(v,q); + } + } + } + } + doc.propage_line_link(); + doc.recook_lines(); + } + + + + + + template<typename L, typename F, typename D> void clean_line_link_item(mymln::document::document<L,F,D>& doc) @@ -474,7 +521,7 @@ namespace mymln L End = 0; for_all(q) { - if(doc.allign_V(q,v) && doc.allign_size(q, v) && doc.allign_proximity(q,v)) + if(doc.allign_V(q,v) && doc.allign_size_medium(q, v) && doc.allign_proximity(q,v)) { if(doc[q] == doc.get_beginning_of_line(q)) {Start = doc[q]; } @@ -485,7 +532,7 @@ namespace mymln } if(Start && End){doc.add_to_line_link(Start, doc[v]);} } - doc.propage_paragraph_link(); + doc.propage_line_link(); } @@ -1167,10 +1214,14 @@ namespace mymln doc.return_previous_line(doc[q]) == doc.return_previous_line(doc[v]) ) { - if(doc.get_line_length(q) < 4 || doc.get_line_length(v) < 4 ) - doc.debug_draw_line_green_buffer(q,v); - doc.add_to_line_link(v, q); - doc.add_to_paragraph_link(v, q); + //if(doc.get_line_length(q) < 4 || doc.get_line_length(v) < 4 ) + + if(doc.get_paragraph_length(v) < 3 || doc.get_line_length(q) < 3) + { + doc.debug_draw_line_green_buffer(q,v); + doc.add_to_line_link(v, q); + doc.add_to_paragraph_link(v, q); + } } } } @@ -1396,10 +1447,37 @@ namespace mymln doc.propage_paragraph_link(); } + template<typename L, typename F, typename D> + void clean_ellipsis(mymln::document::document<L,F,D>& doc) + { + typedef vertex_image<point2d,bool> v_ima_g; + typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p; + v_ima_g mask = doc.fun_mask_start_end_lines(); + mln_piter_(v_ima_g) v(mask.domain()); + typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t; + nbh_t nbh(mask); + mln_niter_(nbh_t) q(nbh, v); + for_all(v) + { + for_all(q) + { + if( + doc.get_line_length(q) == 3 && + doc.allign_base_line_line_strict(v, q) && + doc.allign_smaller_line_strict(v,q) && + doc.allign_proximity_large_left(v,q) + ) + { + doc.debug_draw_line_green_buffer(q,v); + doc.add_to_line_link(v,q); + } + } + } + doc.propage_line_link(); + } - - + } diff --git a/scribo/sandbox/raphael/code/my/document/document.hh b/scribo/sandbox/raphael/code/my/document/document.hh index c7fc0c1..1d50e5c 100644 --- a/scribo/sandbox/raphael/code/my/document/document.hh +++ b/scribo/sandbox/raphael/code/my/document/document.hh @@ -58,6 +58,7 @@ namespace mymln alone_letters_mask = fun::i2v::array<bool>(Areas + 1); implicit_separators_left_mask = fun::i2v::array<bool>(Areas + 1); implicit_separators_right_mask = fun::i2v::array<bool>(Areas + 1); + anomaly_mask = fun::i2v::array<bool>(Areas + 1); kill_mask = fun::i2v::array<bool>(Areas + 1); all_mask = fun::i2v::array<bool>(Areas + 1); CImpSep = 1; @@ -509,6 +510,7 @@ namespace mymln /* SET UP SPECIAL MASK TO FALSE */ implicit_separators_left_mask(lbl) = false; implicit_separators_right_mask(lbl) = false; + anomaly_mask(lbl) = false; kill_mask(lbl) = false; temp_letter(lbl) = false; } @@ -547,6 +549,25 @@ namespace mymln temp_letter = false; } + + + void add_anomaly(const point2d& point) + {add_anomaly(img_influ(point));} + void add_anomaly(const Label lbl) + {anomaly_mask(lbl) = true;} + + + void remove_anomaly(const point2d& point) + {remove_anomaly(img_influ(point));} + void remove_anomaly(const Label lbl) + {anomaly_mask(lbl) = false;} + + + bool contain_anomaly(const point2d& point) + {return contain_anomaly(img_influ(point));} + void contain_anomaly(const Label lbl) + {return anomaly_mask(lbl);} + void add_alone_letter(const point2d& point) {add_alone_letter(img_influ(point));} void add_alone_letter(const Label lbl) @@ -1399,7 +1420,26 @@ namespace mymln return (DisA) * 3 < HA * 2; } - + inline bool allign_proximity_strict_left( const point2d& Left, const point2d& Right) + {return allign_proximity_strict_left(img_influ(Left), img_influ(Right));} + + inline bool allign_proximity_strict_left( const Label Left, const Label Right) + { + box2d LB = _bboxgp[Left]; + box2d RB = _bboxgp[Right]; + + int DisA = LB.pmax()[1] - RB.pmin()[1]; + int DisB = RB.pmax()[1] - LB.pmin()[1]; + if(DisA < 0){DisA = -DisA;} + if(DisB < 0){DisB = -DisB;} + if(DisA > DisB) + { DisA = DisB; } + unsigned int HA = LB.len(0); + unsigned int VA = LB.len(1); + if(VA > HA) + { HA = VA; } + return (DisA) * 2 < HA; + } inline bool allign_proximity_large_left( const point2d& Left, const point2d& Right) @@ -1499,6 +1539,23 @@ namespace mymln } + inline bool allign_size_medium( const point2d& Left, const point2d& Right) + {return allign_size_medium(img_influ(Left), img_influ(Right));} + + inline bool allign_size_medium( const Label Left, const Label Right) + { + short int SizeL0 = label_size_(0, Left); + short int SizeR0 = label_size_(0, Right); + short int SizeL1 = label_size_(1, Left); + short int SizeR1 = label_size_(1, Right); + short int Swap = 0; + if(SizeL0 < SizeL1) + { SizeL0 = SizeL1; } + if(SizeR0 < SizeR1){SizeR0 = SizeR1;} + return SizeR0 > (SizeL0 / 3) && SizeR0 < (SizeL0 * 3); + } + + inline bool allign_size_height_max( const point2d& Left, const point2d& Right) {return allign_size_height_max(img_influ(Left), img_influ(Right));} @@ -1567,7 +1624,14 @@ namespace mymln lines_bbox[lines_union[Left]].len(0) < (_bboxgp[Right].len(0) * 12) && lines_bbox[lines_union[Left]].len(0) > (_bboxgp[Right].len(0) * 2); } - + inline bool allign_small_item_large( const point2d& Left, const point2d& Right) + {return allign_small_item_large(img_influ(Left), img_influ(Right));} + inline bool allign_small_item_large( Label Left, Label Right) + { + return + lines_bbox[lines_union[Left]].len(0) < (_bboxgp[Right].len(0) * 12) && + lines_bbox[lines_union[Left]].len(0)*2 > (_bboxgp[Right].len(0) * 3); + } inline bool allign_small_item_line( const point2d& Left, const point2d& Right) {return allign_small_item(img_influ(Left), img_influ(Right));} inline bool allign_small_item_line( Label Left, Label Right) @@ -1594,7 +1658,12 @@ namespace mymln { return lines_bbox[lines_union[Left]].len(0) > (lines_bbox[lines_union[Right]].len(0) * 2); } - + inline bool allign_smaller_line_strict( const point2d& Left, const point2d& Right) + {return allign_smaller_line_strict(img_influ(Left), img_influ(Right));} + inline bool allign_smaller_line_strict( Label Left, Label Right) + { + return lines_bbox[lines_union[Left]].len(0) > (lines_bbox[lines_union[Right]].len(0) * 3); + } inline bool allign_smaller_line_letter( const point2d& Left, const point2d& Right) {return allign_smaller_line_letter(img_influ(Left), img_influ(Right));} inline bool allign_smaller_line_letter( Label Left, Label Right) @@ -2170,6 +2239,8 @@ namespace mymln { return fun_mask_(image_mask); } vertex_image<point2d,bool> fun_mask_letters() { return fun_mask_(letters_mask); } + vertex_image<point2d,bool> fun_mask_anomalies() + { return fun_mask_(anomaly_mask); } vertex_image<point2d,bool> fun_mask_start_lines() { return fun_mask_(start_lines_mask); } vertex_image<point2d,bool> fun_mask_end_lines() @@ -2353,6 +2424,18 @@ namespace mymln paragraphs_bbox[paragraphs_union[Par1]].has(paragraphs_bbox[paragraphs_union[Par2]].pmax()) ; } + inline bool line_reciprocal(const point2d& L1, const point2d& L2) + {return line_reciprocal(img_influ(L1), img_influ(L2));} + + inline bool line_reciprocal(Label L1, Label L2) + { + return + lines_bbox[lines_union[L1]].has(lines_bbox[lines_union[L2]].pmin()) || + lines_bbox[lines_union[L1]].has(lines_bbox[lines_union[L2]].pmax()) || + lines_bbox[lines_union[L2]].has(lines_bbox[lines_union[L1]].pmin()) || + lines_bbox[lines_union[L2]].has(lines_bbox[lines_union[L1]].pmax()) ; + } + inline bool line_influence_reciprocal(const point2d& L1, const point2d& L2) {return line_influence_reciprocal(img_influ(L1), img_influ(L2));} @@ -2541,9 +2624,12 @@ namespace mymln } inline bool contain_implicit_separator(const point2d& point) { return contain_implicit_separator(img_influ(point)); } - + inline bool contain_noise(const point2d& point) + { return contain_noise(img_influ(point)); } inline bool contain_implicit_separator(const Label lbl) {return implicit_separators_union[lbl] != 0; } + inline bool contain_noise(const Label lbl) + {return noise_mask(lbl); } inline void merge_separators(const point2d& A, const point2d& B) { @@ -3389,6 +3475,7 @@ namespace mymln fun::i2v::array<bool> kill_mask; fun::i2v::array<bool> all_mask; fun::i2v::array<bool> image_mask; + fun::i2v::array<bool> anomaly_mask; mln::util::array<std::string> tag_lbl; mln::util::array<bool> Btag_lbl; diff --git a/scribo/sandbox/raphael/code/my/document/image.hh b/scribo/sandbox/raphael/code/my/document/image.hh index e69de29..158be87 100644 --- a/scribo/sandbox/raphael/code/my/document/image.hh +++ b/scribo/sandbox/raphael/code/my/document/image.hh @@ -0,0 +1,81 @@ +#ifndef INC_IMAGE_DOC +#define INC_IMAGE_DOC +#include<my/document/document.hh> +#include <mln/core/image/graph_elt_neighborhood.hh> +#include <mln/core/image/vertex_image.hh> +using namespace mln; + +namespace mymln +{ + namespace document + { + template<typename L, typename F, typename D> + void image_bad_paragraph(mymln::document::document<L,F,D>& doc) + { + typedef vertex_image<point2d,bool> v_ima_g; + typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p; + v_ima_g mask = doc.fun_mask_all_letters(); + mln_piter_(v_ima_g) v(mask.domain()); + typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t; + nbh_t nbh(mask); + mln_niter_(nbh_t) q(nbh, v); + for_all(v) + { + if( + doc.contain_paragraph(v) && + doc.get_paragraph_length(v) == 1 && + doc.get_line_length(v) < 4 + + ) + { + for_all(q) + { + if + ( + doc.contain_noise(q) + + + ) + { + doc.debug_draw_line_green_buffer(q,v); + doc.debug_draw_box_green_buffer(v); + } + } + } + } + doc.propage_line_link(); + } + + template<typename L, typename F, typename D> + void image_anomalies_paragraph(mymln::document::document<L,F,D>& doc) + { + typedef vertex_image<point2d,bool> v_ima_g; + typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p; + v_ima_g mask = doc.fun_mask_all_letters(); + mln_piter_(v_ima_g) v(mask.domain()); + typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t; + nbh_t nbh(mask); + mln_niter_(nbh_t) q(nbh, v); + for_all(v) + { + if( + doc.contain_paragraph(v) && + doc.get_paragraph_length(v) < 3 && + doc.get_line_length(v) < 4 + ) + { + for_all(q) + { + if(doc.contain_noise(q)) + { + doc.debug_draw_line_green_buffer(q,v); + doc.debug_draw_box_green_buffer(v); + } + } + } + } + doc.propage_line_link(); + } + } +} +#endif \ No newline at end of file diff --git a/scribo/sandbox/raphael/code/my/document/outline.hh b/scribo/sandbox/raphael/code/my/document/outline.hh index cff52e3..e6244d4 100644 --- a/scribo/sandbox/raphael/code/my/document/outline.hh +++ b/scribo/sandbox/raphael/code/my/document/outline.hh @@ -59,7 +59,59 @@ namespace mymln return out; } + template <typename I, typename N> + image2d<bool> threshold(const Image<I>& iz_, const Neighborhood<N>& nbh_) + { + const I& iz = exact(iz_); + const N& nbh = exact(nbh_); + + + image2d<bool> out(iz.domain()); + + mln_pixter(const I) p(iz); + mln_nixter(const I, N) n(p, nbh); + + for_all(p) + { + unsigned int count = 0; + int variate = 0; + for_all(n) + { + variate += n.val(); + count++; + } + variate /= count; + variate -= p.val(); + if(variate > 10) + { + out.element(p.offset()) = true; + } + } + + + for_all(p) + { + unsigned int count = 0; + int variate = 0; + if(out.element(p.offset())) + { + for_all(n) + { + if(out.element(n.offset())) + count++; + } + if(count == 0) + { + out.element(p.offset()) = false; + } + } + } + + + + return out; + } template <typename I, typename N> image2d<value::int_u8> luminance(const Image<I>& iz_) { diff --git a/scribo/sandbox/raphael/code/my/runtime/lib.hh b/scribo/sandbox/raphael/code/my/runtime/lib.hh index f60a995..951c571 100644 --- a/scribo/sandbox/raphael/code/my/runtime/lib.hh +++ b/scribo/sandbox/raphael/code/my/runtime/lib.hh @@ -1,6 +1,6 @@ #ifndef INC_RUNTIME_LIB #define INC_RUNTIME_LIB - +#include<my/document/image.hh> using namespace mln; using namespace std; namespace mymln @@ -66,6 +66,12 @@ namespace mymln run.add_function("clean.paragraphs_couple", &(mymln::document::clean_paragraphs_couple)); run.add_function("clean.paragraphs_first_line", &(mymln::document::clean_paragraphs_first_line)); + run.add_function("clean.ellipsis", &(mymln::document::clean_ellipsis)); + run.add_function("clean.apostrophe_items", &(mymln::document::clean_apostrophe_items)); + + + run.add_function("image.bad_paragraph", &(mymln::document::image_bad_paragraph)); + run.add_function("image.anomalies_paragraph", &(mymln::document::image_anomalies_paragraph)); } template<typename L, typename F, typename D> diff --git a/scribo/sandbox/raphael/code/test.cc b/scribo/sandbox/raphael/code/test.cc index 3eab190..99d6600 100644 --- a/scribo/sandbox/raphael/code/test.cc +++ b/scribo/sandbox/raphael/code/test.cc @@ -44,7 +44,6 @@ #include <my/document/separator.hh> #include <my/document/clean.hh> #include <my/document/recognition.hh> - #include <my/runtime/runtime.hh> #include <my/runtime/lib.hh> @@ -72,7 +71,6 @@ void Process(std::string File, std::string Dir, mymln::runtime::runtime< value:: /* CREATE GRAPH */ typedef value::int_u<16> uint16; image2d<bool> ima; - mln::util::timer timer; timer.start(); io::pbm::load(ima, Dir + "/" + File); -- 1.7.2.5
participants (1)
-
Raphael Boissel