Olena-patches
Threads by month
- ----- 2025 -----
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2024 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2023 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2022 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2021 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2020 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2019 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2018 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2017 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2016 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2015 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2014 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2013 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2012 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2011 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2010 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2009 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2008 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2007 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2006 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2005 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2004 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- 9625 discussions
last-svn-commit-883-gb303289 Add union find class. Increase the quality of the letter detection.
by Raphael Boissel 08 Sep '11
by Raphael Boissel 08 Sep '11
08 Sep '11
---
scribo/sandbox/raphael/code/my/debug/pict.hh | 15 ++
scribo/sandbox/raphael/code/my/document/clean.hh | 35 +++-
.../sandbox/raphael/code/my/document/document.hh | 188 ++++++++++----------
.../raphael/code/my/document/filter/filter.hh | 182 +++++++++++++++++++
.../sandbox/raphael/code/my/document/separator.hh | 79 +++++++--
scribo/sandbox/raphael/code/my/util/union.hh | 86 +++++++++
6 files changed, 474 insertions(+), 111 deletions(-)
create mode 100644 scribo/sandbox/raphael/code/my/document/filter/filter.hh
create mode 100644 scribo/sandbox/raphael/code/my/util/union.hh
diff --git a/scribo/sandbox/raphael/code/my/debug/pict.hh b/scribo/sandbox/raphael/code/my/debug/pict.hh
index 1597d53..56fdca3 100644
--- a/scribo/sandbox/raphael/code/my/debug/pict.hh
+++ b/scribo/sandbox/raphael/code/my/debug/pict.hh
@@ -3,6 +3,7 @@
#include <mln/util/graph.hh>
#include <mln/debug/superpose.hh>
+#include <my/util/union.hh>
using namespace mln;
using namespace std;
namespace mymln
@@ -26,6 +27,20 @@ namespace mymln
image2d<value::rgb8> ima_color = labeling::colorize(value::rgb8(), ima);
io::ppm::save(ima_color, file);
}
+
+ template<typename I1, typename I2> inline void save_label_image(image2d<I1> ima, mymln::util::union_find<I2> trans, std::string file)
+ {
+ mln_piter(image2d<I1>) p(ima.domain());
+ p.start();
+ while(p.is_valid())
+ {
+ ima(p) = trans[ima(p)];
+ p.next();
+ }
+ image2d<value::rgb8> ima_color = labeling::colorize(value::rgb8(), ima);
+ io::ppm::save(ima_color, file);
+ }
+
template<typename p_v> inline void save_graph_image(p_v& pv, unsigned int SizeX, unsigned int SizeY, std::string file)
{
image2d<value::rgb8> ima_graph(SizeY, SizeX);
diff --git a/scribo/sandbox/raphael/code/my/document/clean.hh b/scribo/sandbox/raphael/code/my/document/clean.hh
index 9028e63..2ce8614 100644
--- a/scribo/sandbox/raphael/code/my/document/clean.hh
+++ b/scribo/sandbox/raphael/code/my/document/clean.hh
@@ -12,31 +12,52 @@ namespace mymln
template<typename L, typename F, typename D>
void clean_containers_items(mymln::document::document<L,F,D>& doc)
{
+
typedef vertex_image<point2d,bool> v_ima_g;
typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p;
- v_ima_g mask = doc.fun_mask_containers();
+ v_ima_g mask = doc.fun_mask_letters();
mln_piter_(v_ima_g) v(mask.domain());
typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t;
nbh_t nbh(mask);
mln_niter_(nbh_t) q(nbh, v);
+ mln::util::array<unsigned> count = mln::util::array<unsigned>(doc.size());
+ mln::util::array<bool> inside = mln::util::array<bool>(doc.size());
+ count.fill(0);
+ inside.fill(false);
for_all(v)
{
+ unsigned link = 0;
for_all(q)
- {
- if(!doc.contain_container(v) && doc.get_bbox(q).has(v))
+ {
+ if(doc.contain_container(v) && doc.get_bbox(v).has(q))
+ {
+
+ inside[doc[q]] = true;
+ link++;
+ }
+ else if(doc.contain_letter(v))
{
- doc.add_noise(v);
+ count[doc[q]]++;
}
+
- }
+ }
+ }
+ for(unsigned int N = 0; N < doc.size();N++)
+ {
+ if(inside[N])
+ if(count[N] < 3)
+ doc.add_noise(N);
}
+
}
template<typename L, typename F, typename D>
void clean_letters_items(mymln::document::document<L,F,D>& doc)
{
+
typedef vertex_image<point2d,bool> v_ima_g;
typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p;
v_ima_g mask = doc.fun_mask_letters();
@@ -63,7 +84,7 @@ namespace mymln
template<typename L, typename F, typename D>
void clean_get_lines(mymln::document::document<L,F,D>& doc, std::string dgb_out,image2d<bool> s)
- {
+ {
image2d<value::rgb8> out;
mln::initialize(out, s);
typedef vertex_image<point2d,bool> v_ima_g;
@@ -212,7 +233,7 @@ namespace mymln
draw::line(out, q,v, mln::literal::magenta);
}
}
- else if (doc.allign_H_Large(q, v) && doc.allign_up_line(v, q))
+ else if (doc.allign_H_Large(q, v) && doc.allign_top(v, q))
{
doc.add_to_line_link(v, q);
draw::line(out, q,v, mln::literal::blue);
diff --git a/scribo/sandbox/raphael/code/my/document/document.hh b/scribo/sandbox/raphael/code/my/document/document.hh
index 4e13103..67bda18 100644
--- a/scribo/sandbox/raphael/code/my/document/document.hh
+++ b/scribo/sandbox/raphael/code/my/document/document.hh
@@ -1,7 +1,10 @@
#ifndef INC_DOCUMENT_DOC
#define INC_DOCUMENT_DOC
#include<my/util/vector_bbox_group.hh>
+#include<my/util/union.hh>
#include <mln/util/graph.hh>
+
+
using namespace mln;
namespace mymln
{
@@ -48,11 +51,10 @@ namespace mymln
Vseparator_mask = fun::i2v::array<bool>(Areas + 1);
noise_mask = fun::i2v::array<bool>(Areas + 1);
alone_letters_mask = fun::i2v::array<bool>(Areas + 1);
-
- lines_mark = mln::util::array<unsigned int>(Areas + 1);
- lines_mark_link = mln::util::array<unsigned int>(Areas + 1);
- lines_mark.fill(0);
- lines_mark_link.fill(0);
+ CImpSep = 1;
+ NImpSep = 2;
+ lines_union = mymln::util::union_find<Label>(Areas + 1);
+ implicit_separators_union = mymln::util::union_find<Label>(Areas + 1);
img_influ = ima_influ;
CSep = 0;
CSepH = 0;
@@ -65,9 +67,7 @@ namespace mymln
}
/* OPERATION ON LINES */
inline void add_to_line_self_link(const point2d& point)
- {
- add_to_line_self_link(img_influ(point));
- }
+ { add_to_line_self_link(img_influ(point));}
inline void add_to_line(const point2d& point)
{ add_to_line(img_influ(point)); }
@@ -77,7 +77,7 @@ namespace mymln
inline bool same_line(const point2d& A, const point2d& B)
{ return same_line(img_influ(A), img_influ(B)); }
inline bool same_line(const Label A, const Label B)
- { return lines_mark[A] == lines_mark[B]; }
+ { return lines_union[A] == lines_union[B]; }
inline void add_new_line(const point2d& point)
{ add_new_line(img_influ(point)); }
@@ -88,74 +88,33 @@ namespace mymln
{ return contain_line(img_influ(point)); }
inline void add_to_line(const Label lbl)
- { lines_mark[lbl] = CLine; }
+ { lines_union[lbl] = CLine; }
inline void add_new_line(const Label lbl)
{ CLine = NLine; NLine++; }
+
inline void add_to_line_self_link(const Label A)
- {
- lines_mark_link[A] = A;
- }
+ {lines_union.add_self_link(A);}
+
inline void add_to_line_link(const Label A, const Label B)
- {
-
- unsigned int Pos = find_line_parent(A);
- if(lines_mark_link[B] == 0)
- {
- if(Pos != B)
- {
- if(Pos != 0)
- {
- lines_mark_link[B] = Pos;
- lines_mark_link[A] = Pos;
- }
- else
- {
- lines_mark_link[A] = B;
- }
- }
- }
- else
- {
- unsigned int PosB = find_line_parent(B);
- if(PosB == Pos)
- {
- lines_mark_link[B] = Pos;
- lines_mark_link[A] = Pos;
- }
- else
- {
- lines_mark_link[B] = Pos;
- lines_mark_link[PosB] = Pos;
- }
- }
-
-
-
- }
- inline unsigned int find_line_parent(const Label A)
- {
- unsigned int Pos = A;
- while(Pos != lines_mark_link[Pos] && Pos != 0){Pos = lines_mark_link[Pos];}
- return Pos;
- }
+ {lines_union.add_link(A, B);}
inline void jump_to_line(const Label lbl)
{
- if(lines_mark[lbl] != 0)
- CLine = lines_mark[lbl];
+ if(lines_union[lbl] != 0)
+ CLine = lines_union[lbl];
else
add_new_line(lbl);
}
inline bool contain_line(const Label lbl)
- { return lines_mark[lbl] != 0;}
+ { return lines_union[lbl] != 0;}
- /* LABELS MUST ALLWAYS BE SORTED */
inline void add_noise(const point2d& point)
{add_noise(img_influ(point));}
+ inline unsigned int size(){return Areas_Number_;}
void add_noise(Label lbl)
@@ -463,7 +422,9 @@ namespace mymln
}
void debug_save_lines(std::string file)
- { mymln::debug::save_label_image(img, lines_mark , file);}
+ { mymln::debug::save_label_image(img, lines_union , file);}
+ void debug_save_separators(std::string file)
+ { mymln::debug::save_label_image(img, implicit_separators_union , file);}
vertex_image<point2d,bool> fun_mask_separators()
{ return fun_mask_(separators_mask); }
vertex_image<point2d,bool> fun_mask_containers()
@@ -525,27 +486,27 @@ namespace mymln
{ return get_line_length(img_influ(point)); }
unsigned int get_line_length(Label L)
- { return lines_len[lines_mark[L]]; }
+ { return lines_len[lines_union[L]]; }
unsigned int get_beginning_of_line(point2d point)
{ return get_beginning_of_line(img_influ(point)); }
unsigned int get_beginning_of_line(Label L)
- { return lines_first_label[lines_mark[L]]; }
+ { return lines_first_label[lines_union[L]]; }
unsigned int get_end_of_line(point2d point)
{ return get_end_of_line(img_influ(point)); }
unsigned int get_end_of_line(Label L)
- { return lines_last_label[lines_mark[L]]; }
+ { return lines_last_label[lines_union[L]]; }
unsigned int get_parent_line(point2d point)
- { return lines_mark[img_influ(point)]; }
+ { return lines_union[img_influ(point)]; }
unsigned int get_parent_line(Label L)
- { return lines_mark[L]; }
+ { return lines_union[L]; }
inline void recook_lines()
@@ -569,20 +530,68 @@ namespace mymln
cook_lines_();
}
inline void propage_line_link()
- {
- for(unsigned int N = 1; N < lines_mark_link.size(); N++)
- {
- unsigned int Pos = N;
- while(Pos != lines_mark_link[Pos] && Pos != 0){Pos = lines_mark_link[Pos]; }
- lines_mark[N] = lines_mark[Pos];
- }
- }
+ { lines_union.propage_links(); }
/*image_if<image2d<Label> masked_image_letters()
{return masked_image_(letters_mask); }
image_if<image2d<Label> masked_image_separator()
- {return masked_image_(letters_mask); }*/
+ {return masked_image_(letters_mask); }*/
+
+ /* IMPLICIT SEPARATORS */
+ inline void add_new_separator(const point2d& point)
+ { add_new_separator(img_influ(point));}
+ inline void add_new_separator(const Label lbl)
+ { CImpSep = NImpSep; NImpSep++; }
+
+ inline void add_to_separator_self_link(const point2d& point)
+ { add_to_separator_self_link(img_influ(point));}
+ inline void add_to_separator_self_link(const Label A)
+ {implicit_separators_union.add_self_link(A);}
+
+ inline void add_to_separator_link(const point2d& A, const point2d& B)
+ { add_to_separator_link(img_influ(A), img_influ(B));}
+ inline void add_to_separator_link(const Label A, const Label B)
+ {implicit_separators_union.add_link(A, B);}
+
+ inline bool same_implicit_separator(const point2d& A, const point2d& B)
+ {return same_implicit_separator(img_influ(A), img_influ(B));}
+ inline bool same_implicit_separator(const Label A, const Label B)
+ {return implicit_separators_union[A] == implicit_separators_union[B];}
+
+ inline void propage_separator_link()
+ { implicit_separators_union.propage_links(); }
+
+ inline void jump_to_separator(const point2d& point)
+ { jump_to_separator(img_influ(point)); }
+ inline void jump_to_separator(const Label lbl)
+ {
+ if(implicit_separators_union[lbl] != 0)
+ CImpSep = implicit_separators_union[lbl];
+ else
+ add_new_separator(lbl);
+ }
+ inline bool contain_implicit_separator(const point2d& point)
+ { return contain_implicit_separator(img_influ(point)); }
+ inline bool contain_implicit_separator(const Label lbl)
+ {return implicit_separators_union[lbl] != 0; }
+
+ inline void add_to_separator(const point2d& point)
+ { add_to_separator(img_influ(point)); }
+ inline void add_to_separator(const Label lbl)
+ { implicit_separators_union[lbl] = CImpSep; }
+
+ inline void invalidate_implicit_separator(const point2d& point)
+ { invalidate_implicit_separator(img_influ(point)); }
+ inline void invalidate_implicit_separator(Label lbl)
+ { implicit_separators_union[lbl] = 0; }
+
+ inline Label& operator[](point2d i)
+ { return img_influ(i); }
+
+ inline point2d& operator[](Label i)
+ { return _bboxgp[i].pcenter(); }
private:
+
// PRIVATE DATA ON LINES
mln::util::array<unsigned int> lines_len;
mln::util::array<unsigned int> lines_first_label;
@@ -595,22 +604,22 @@ namespace mymln
inline void cook_lines_()
{
- for(unsigned int N = 1; N < lines_mark.size(); N++)
+ for(unsigned int N = 1; N < lines_union.size(); N++)
{
- if(lines_mark[N] != 0)
+ if(lines_union[N] != 0)
{
/* APPROXIMATE THE NUMBER OF CHAR IN THE LINE */
- lines_len[lines_mark[N]]++;
+ lines_len[lines_union[N]]++;
/* COOK THE FIRST AND THE LAST LABEL OF THE LINE */
- if(lines_first_label[lines_mark[N]] == 0)
- lines_first_label[lines_mark[N]] = N;
- else if(_bboxgp[N].pcenter()[1] < _bboxgp[lines_first_label[lines_mark[N]]].pcenter()[1])
- lines_first_label[lines_mark[N]] = N;
+ if(lines_first_label[lines_union[N]] == 0)
+ lines_first_label[lines_union[N]] = N;
+ else if(_bboxgp[N].pcenter()[1] < _bboxgp[lines_first_label[lines_union[N]]].pcenter()[1])
+ lines_first_label[lines_union[N]] = N;
- if(lines_last_label[lines_mark[N]] == 0)
- lines_last_label[lines_mark[N]] = N;
- else if(_bboxgp[N].pcenter()[1] > _bboxgp[lines_last_label[lines_mark[N]]].pcenter()[1])
- lines_last_label[lines_mark[N]] = N;
+ if(lines_last_label[lines_union[N]] == 0)
+ lines_last_label[lines_union[N]] = N;
+ else if(_bboxgp[N].pcenter()[1] > _bboxgp[lines_last_label[lines_union[N]]].pcenter()[1])
+ lines_last_label[lines_union[N]] = N;
/* FILL THE MASK WITH FALSE:MAYBE USELESS IF THE MASK IS INITIALIZED */
start_lines_mask(N) = false;
@@ -738,10 +747,10 @@ namespace mymln
unsigned int CLine;
unsigned int NLine;
+ unsigned int CImpSep;
+ unsigned int NImpSep;
-
- mln::util::array<unsigned int> lines_mark;
- mln::util::array<unsigned int> lines_mark_link;
+ mymln::util::union_find<Label> lines_union;
unsigned int CLet ;
unsigned int CSep ;
unsigned int CSepH ;
@@ -770,8 +779,7 @@ namespace mymln
Label Areas_Number_;
/* IMPLICIT SEPARATOR DETECTION */
- mln::util::array<unsigned int> implicit_separator_mark;
- mln::util::array<unsigned int> implicit_separator_mark_link;
+ mymln::util::union_find<Label> implicit_separators_union;
};
}
}
diff --git a/scribo/sandbox/raphael/code/my/document/filter/filter.hh b/scribo/sandbox/raphael/code/my/document/filter/filter.hh
new file mode 100644
index 0000000..c3a294c
--- /dev/null
+++ b/scribo/sandbox/raphael/code/my/document/filter/filter.hh
@@ -0,0 +1,182 @@
+#ifndef INC_DOCUMENT_FILTER_GENERIC
+#define INC_DOCUMENT_FILTER_GENERIC
+namespace mymln
+{
+ namespace document
+ {
+ namespace filter
+ {
+ template<typename L, typename F, typename D, typename Left, typename Right>
+ class filter
+ {
+ public:
+ filter(){}
+ filter(document<L,F,D>& doc){ doc_ = doc; }
+ filter(document<L,F,D>& doc, vertex_image<point2d,bool> mask){ doc_ = doc; mask_ = mask; }
+ inline bool link_test(point2d& A, point2d& B){ return true; }
+ inline bool vertex_test(point2d& A){ return true; }
+ inline bool gen_link_test(point2d& A, point2d& B)
+ {
+ return link_test(A, B);
+ }
+ inline bool gen_vertex_test(point2d& A)
+ {
+ return vertex_test(A);
+ }
+ inline void iter_dgb(std::string dgb_out, image2d<bool> s)
+ {
+ image2d<value::rgb8> out;
+ mln::initialize(out, s);
+ typedef vertex_image<point2d,bool> v_ima_g;
+ typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p;
+ mln_piter_(v_ima_g) v(mask_.domain());
+ typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t;
+ nbh_t nbh(mask_);
+ mln_niter_(nbh_t) q(nbh, v);
+ for_all(v)
+ {
+ if(gen_vertex_test(v))
+ {
+ for_all(q)
+ {
+ if(gen_link_test(v, q))
+ {
+ draw::line(out, q,v, mln::literal::green);
+ }
+ else
+ {
+ draw::line(out, q,v, mln::literal::magenta);
+ }
+ }
+ }
+ else
+ {
+ draw::line(out, q,v, mln::literal::magenta);
+ }
+ }
+ }
+ inline void iter()
+ {
+ typedef vertex_image<point2d,bool> v_ima_g;
+ typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p;
+ mln_piter_(v_ima_g) v(mask_.domain());
+ typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t;
+ nbh_t nbh(mask_);
+ mln_niter_(nbh_t) q(nbh, v);
+ for_all(v)
+ {
+ if(gen_vertex_test(v))
+ {
+ for_all(q)
+ {
+ if(gen_link_test(v, q))
+ {
+
+ }
+ }
+ }
+ }
+ }
+
+ inline filter& operator|(filter& B)
+ {
+ filter<L,F,D> PFilter = filter_or(doc_, mask_);
+ PFilter.sub_filter_A_ = this;
+ PFilter.sub_filter_B_ = B;
+ B.doc_ = doc_;
+ B.mask_ = mask_;
+ return PFilter;
+ }
+
+ inline filter& operator&(filter& B)
+ {
+ filter<L,F,D> PFilter = filter_and(doc_, mask_);
+ PFilter.sub_filter_A_ = this;
+ PFilter.sub_filter_B_ = B;
+ B.doc_ = doc_;
+ B.mask_ = mask_;
+ return PFilter;
+ }
+
+ protected:
+ Left sub_filter_A_;
+ Right sub_filter_B_;
+
+ document<L,F,D> doc_;
+ vertex_image<point2d,bool> mask_;
+
+
+
+ };
+
+
+
+
+
+
+
+
+
+
+ template<typename L, typename F, typename D>
+ class filter_or : filter<L,F,D>
+ {
+ public:
+ inline bool gen_link_test(point2d& A, point2d& B)
+ {
+ return sub_filter_A_.gen_link_test(A, B) || sub_filter_B_.gen_link_test(A, B);
+ }
+ inline bool gen_vertex_test(point2d& A)
+ {
+ return sub_filter_A_.gen_vertex_test(A) || sub_filter_B_.gen_vertex_test(A);
+ }
+
+ protected:
+ filter<L,F,D> sub_filter_A_;
+ filter<L,F,D> sub_filter_B_;
+
+ document<L,F,D> doc_;
+ vertex_image<point2d,bool> mask_;
+ };
+
+ template<typename L, typename F, typename D>
+ class filter_and : filter<L,F,D>
+ {
+ public:
+ inline bool gen_link_test(point2d& A, point2d& B)
+ {
+ return sub_filter_A_.gen_link_test(A, B) || sub_filter_B_.gen_link_test(A, B);
+ }
+ inline bool gen_vertex_test(point2d& A)
+ {
+ return sub_filter_A_.gen_vertex_test(A) || sub_filter_B_.gen_vertex_test(A);
+ }
+
+ protected:
+ filter<L,F,D> sub_filter_A_;
+ filter<L,F,D> sub_filter_B_;
+
+ document<L,F,D> doc_;
+ vertex_image<point2d,bool> mask_;
+ };
+
+ template<typename L, typename F, typename D>
+ class filter_letter : filter<L,F,D>
+ {
+ public:
+ inline bool vertex_test(point2d& A){ return doc_.contain_letter(A); }
+
+ protected:
+ filter<L,F,D> sub_filter_A_;
+ filter<L,F,D> sub_filter_B_;
+
+ document<L,F,D> doc_;
+ vertex_image<point2d,bool> mask_;
+ };
+
+
+
+ }
+ }
+}
+#endif
\ No newline at end of file
diff --git a/scribo/sandbox/raphael/code/my/document/separator.hh b/scribo/sandbox/raphael/code/my/document/separator.hh
index 756f04b..f5a32db 100644
--- a/scribo/sandbox/raphael/code/my/document/separator.hh
+++ b/scribo/sandbox/raphael/code/my/document/separator.hh
@@ -2,7 +2,7 @@
#define INC_DOCUMENT_SEPARATOR
#include <my/util/vector_bbox_group.hh>
#include <mln/util/graph.hh>
-#include <mln/document/document.hh>
+#include <my/document/document.hh>
using namespace mln;
namespace mymln
{
@@ -11,7 +11,7 @@ namespace mymln
namespace separators
{
template<typename L, typename F, typename D>
- void clean_containers_items(mymln::document::document<L,F,D>& doc, std::string dgb_out, image2d<bool> s)
+ void separators_find_allign(mymln::document::document<L,F,D>& doc, std::string dgb_out, image2d<bool> s)
{
image2d<value::rgb8> out;
mln::initialize(out, s);
@@ -27,42 +27,93 @@ namespace mymln
if(doc.contain_letter(v))
{
+ doc.jump_to_separator(v);
if((!doc.contain_implicit_separator(v)))
{
- doc.add_to_implicit_separator(v);
- doc.add_to_implicit_separator_self_link(v);
+ doc.add_to_separator(v);
+ doc.add_to_separator_self_link(v);
}
-
+ bool All_Alone = true;
for_all(q)
{
- if((!doc.contain_line(q)))
+ if((!doc.contain_implicit_separator(q)))
{
// draw::line(out, q,v, mln::literal::blue);
- if(doc.allign_H(q,v) && doc.allign_size(q, v))
+ if(doc.allign_H_Large(q,v) && doc.allign_size(q, v))
{
- doc.add_to_implicit_separator_link(v, q);
+ doc.add_to_separator_link(v, q);
draw::line(out, q,v, mln::literal::magenta);
- All_Alone = false;
+ All_Alone = false;
}
}
else
{
- if(doc.allign_V(q,v) && doc.allign_size(q, v))
+ if(doc.allign_H_Large(q,v) && doc.allign_size(q, v))
{
- doc.add_to_implicit_separator_link(q, v);
+ doc.add_to_separator_link(q, v);
draw::line(out, q,v, mln::literal::green);
- All_Alone = false;
+ All_Alone = false;
}
}
}
+ if(All_Alone){doc.invalidate_implicit_separator(v);}
}
}
- doc.propage_implicit_separator_link();
+ doc.propage_separator_link();
io::ppm::save(mln::debug::superpose(out, s, literal::white),dgb_out);
}
+
+ template<typename L, typename F, typename D>
+ void separators_make_clean(mymln::document::document<L,F,D>& doc)
+ {
+
+ typedef vertex_image<point2d,bool> v_ima_g;
+ typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p;
+ v_ima_g mask = doc.fun_mask_letters();
+ mln_piter_(v_ima_g) v(mask.domain());
+ typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t;
+ nbh_t nbh(mask);
+ mln_niter_(nbh_t) q(nbh, v);
+ mln::util::array<unsigned> count = mln::util::array<unsigned>(doc.size());
+ count.fill(0);
+ for_all(v)
+ {
+
+ if(doc.contain_implicit_separator(v))
+ {
+ bool All_Alone = true;
+ doc.jump_to_line(v);
+ if((!doc.contain_line(v)))
+ {
+ doc.add_to_line(v);
+ doc.add_to_line_self_link(v);
+ }
+
+ for_all(q)
+ {
+
+ if(doc.contain_implicit_separator(q) && doc.same_implicit_separator(q,v))
+ {
+ // draw::line(out, q,v, mln::literal::blue);
+ if(doc.allign_V(q,v) && doc.allign_size(q, v))
+ {
+ count[doc[q]]++;
+ }
+ }
+
+ }
+ }
+ }
+ for(unsigned int N = 0; N < doc.size();N++)
+ {
+ if(count[N] > 1)
+ doc.invalidate_implicit_separator(N);
+ }
+ }
}
}
-}
\ No newline at end of file
+}
+#endif
\ No newline at end of file
diff --git a/scribo/sandbox/raphael/code/my/util/union.hh b/scribo/sandbox/raphael/code/my/util/union.hh
new file mode 100644
index 0000000..7f21e87
--- /dev/null
+++ b/scribo/sandbox/raphael/code/my/util/union.hh
@@ -0,0 +1,86 @@
+#ifndef INC_DOCUMENT_UNION
+#define INC_DOCUMENT_UNION
+namespace mymln
+{
+ namespace util
+ {
+ template<typename Label>
+ class union_find
+ {
+ public :
+ union_find()
+ {size_ = 0;}
+ union_find(const unsigned int max_size)
+ {
+ mark = mln::util::array<unsigned int>(max_size);
+ mark_link = mln::util::array<unsigned int>(max_size);
+ mark.fill(0);
+ mark_link.fill(0);
+ size_ = max_size;
+ }
+
+ inline void add_self_link(const Label A)
+ { mark_link[A] = A; }
+ inline void add_link(const Label A, const Label B)
+ {
+
+ unsigned int Pos = find_parent_(A);
+ if(mark_link[B] == 0)
+ {
+ if(Pos != B)
+ {
+ if(Pos != 0)
+ {
+ mark_link[B] = Pos;
+ mark_link[A] = Pos;
+ }
+ else
+ {
+ mark_link[A] = B;
+ }
+ }
+ }
+ else
+ {
+ unsigned int PosB = find_parent_(B);
+ if(PosB == Pos)
+ {
+ mark_link[B] = Pos;
+ mark_link[A] = Pos;
+ }
+ else
+ {
+ mark_link[B] = Pos;
+ mark_link[PosB] = Pos;
+ }
+ }
+ }
+ inline void propage_links()
+ {
+ for(unsigned int N = 1; N < size_; N++)
+ {
+ unsigned int Pos = N;
+ while(Pos != mark_link[Pos] && Pos != 0){Pos = mark_link[Pos]; }
+ mark[N] = mark[Pos];
+ }
+ }
+ inline unsigned int size()
+ {return size_; }
+ inline unsigned int& operator[](unsigned int i)
+ {
+ return mark[i];
+ }
+ private :
+ inline unsigned int find_parent_(const Label A)
+ {
+ unsigned int Pos = A;
+ while(Pos != mark_link[Pos] && Pos != 0){Pos = mark_link[Pos];}
+ return Pos;
+ }
+ mln::util::array<unsigned int> mark;
+ mln::util::array<unsigned int> mark_link;
+ unsigned int size_;
+ };
+ }
+}
+#endif
\ No newline at end of file
--
1.7.2.5
1
0
last-svn-commit-884-gf5c0102 Add the detection of Right implicit separator and improve lines detection
by Raphael Boissel 08 Sep '11
by Raphael Boissel 08 Sep '11
08 Sep '11
---
scribo/sandbox/raphael/code/my/debug/pict.hh | 9 +-
scribo/sandbox/raphael/code/my/document/clean.hh | 227 ++++++-
.../sandbox/raphael/code/my/document/document.hh | 715 +++++++++++++++++++-
.../sandbox/raphael/code/my/document/separator.hh | 118 +++-
scribo/sandbox/raphael/code/my/util/union.hh | 12 +-
scribo/sandbox/raphael/code/test.cc | 57 ++-
6 files changed, 1066 insertions(+), 72 deletions(-)
diff --git a/scribo/sandbox/raphael/code/my/debug/pict.hh b/scribo/sandbox/raphael/code/my/debug/pict.hh
index 56fdca3..23880bc 100644
--- a/scribo/sandbox/raphael/code/my/debug/pict.hh
+++ b/scribo/sandbox/raphael/code/my/debug/pict.hh
@@ -41,6 +41,7 @@ namespace mymln
io::ppm::save(ima_color, file);
}
+
template<typename p_v> inline void save_graph_image(p_v& pv, unsigned int SizeX, unsigned int SizeY, std::string file)
{
image2d<value::rgb8> ima_graph(SizeY, SizeX);
@@ -69,10 +70,14 @@ namespace mymln
}
template<typename I> inline void save_boxes_image(mln::util::array<box2d> boxes, I source, std::string file)
{
- image2d<bool> out(source.domain());
- data::fill(out, false);
+ image2d<bool> out;
+ mln::initialize(out, source);
for(unsigned int N = 0 ; N < boxes.size(); N++)
{
+ if(!boxes[N].is_valid()){continue;}
+ if((boxes[N]).pmin()[0] < 0 || (boxes[N]).pmin()[1] < 0 || (boxes[N]).pmax()[0] < 0 || (boxes[N]).pmax()[1] < 0 ){continue;}
+ if((boxes[N]).pmax()[0] > source.domain().pmax()[0] || (boxes[N]).pmax()[1] > source.domain().pmax()[1] ){continue;}
+ if((boxes[N]).pmin()[0] > source.domain().pmax()[0] || (boxes[N]).pmin()[1] > source.domain().pmax()[1] ){continue;}
data::fill((out | (boxes[N])).rw(), true);
}
io::pbm::save(out , file);
diff --git a/scribo/sandbox/raphael/code/my/document/clean.hh b/scribo/sandbox/raphael/code/my/document/clean.hh
index 2ce8614..18669d3 100644
--- a/scribo/sandbox/raphael/code/my/document/clean.hh
+++ b/scribo/sandbox/raphael/code/my/document/clean.hh
@@ -83,10 +83,41 @@ namespace mymln
}
template<typename L, typename F, typename D>
- void clean_get_lines(mymln::document::document<L,F,D>& doc, std::string dgb_out,image2d<bool> s)
+ void clean_letters_alone(mymln::document::document<L,F,D>& doc)
+ {
+
+ typedef vertex_image<point2d,bool> v_ima_g;
+ typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p;
+ v_ima_g mask = doc.fun_mask_all_letters();
+ mln_piter_(v_ima_g) v(mask.domain());
+ typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t;
+ nbh_t nbh(mask);
+ mln_niter_(nbh_t) q(nbh, v);
+ for_all(v)
+ {
+ if(doc.contain_alone_letter(v))
+ {
+ unsigned int count = 0;
+ unsigned int real_count = 0;
+ for_all(q)
+ {
+ if(doc.contain_alone_letter(q))
+ count++;
+ else if(doc.contain_letter(q))
+ real_count++;
+ }
+ if(real_count < 2 && count > 2)
+ { doc.add_noise(v);}
+ else if(real_count == 0 && count > 1)
+ { doc.add_noise(v);}
+ }
+ }
+
+ }
+
+ template<typename L, typename F, typename D>
+ void clean_get_lines(mymln::document::document<L,F,D>& doc)
{
- image2d<value::rgb8> out;
- mln::initialize(out, s);
typedef vertex_image<point2d,bool> v_ima_g;
typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p;
v_ima_g mask = doc.fun_mask_letters();
@@ -113,19 +144,17 @@ namespace mymln
if((!doc.contain_line(q)))
{
// draw::line(out, q,v, mln::literal::blue);
- if(doc.allign_V(q,v) && doc.allign_size(q, v))
+ if(doc.allign_V(q,v) && doc.allign_size(q, v) && (doc.allign_proximity_large(q, v) || doc.allign_proximity_large(v, q)) )
{
doc.add_to_line_link(v, q);
- draw::line(out, q,v, mln::literal::magenta);
All_Alone = false;
}
}
else
{
- if(doc.allign_V(q,v) && doc.allign_size(q, v))
+ if(doc.allign_V(q,v) && doc.allign_size(q, v) && (doc.allign_proximity_large(q, v) || doc.allign_proximity_large(v, q)))
{
doc.add_to_line_link(q, v);
- draw::line(out, q,v, mln::literal::green);
All_Alone = false;
}
}
@@ -138,14 +167,12 @@ namespace mymln
}
doc.propage_line_link();
- io::ppm::save(mln::debug::superpose(out, s, literal::white),dgb_out);
}
+
template<typename L, typename F, typename D>
- void clean_dot_items(mymln::document::document<L,F,D>& doc, std::string dgb_out,image2d<bool> s)
+ void clean_dot_items(mymln::document::document<L,F,D>& doc)
{
- image2d<value::rgb8> out;
- mln::initialize(out, s);
typedef vertex_image<point2d,bool> v_ima_g;
typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p;
v_ima_g mask = doc.fun_mask_alone_letters();
@@ -161,7 +188,6 @@ namespace mymln
{
if(!doc.allign_H(q, v) && doc.allign_base_line(v, q))
{
- draw::line(out, q,v, mln::literal::green);
doc.add_to_line_link(v, q);
doc.add_letter_coerce(q);
}
@@ -180,23 +206,22 @@ namespace mymln
{
if (doc.allign_H_Large(v2, q2) && doc.allign_top(v2, q2))
{
- draw::line(out, q2,v2, mln::literal::magenta);
doc.add_to_line_link(v2, q2);
doc.add_letter_coerce(q2);
}
}
}
}
- io::ppm::save(mln::debug::superpose(out, s, literal::white),dgb_out);
doc.propage_line_link();
}
template<typename L, typename F, typename D>
void clean_quote_items(mymln::document::document<L,F,D>& doc, std::string dgb_out,image2d<bool> s)
{
-
+ #ifndef NGRAPHDEBUG
image2d<value::rgb8> out;
mln::initialize(out, s);
+ #endif
typedef vertex_image<point2d,bool> v_ima_g;
typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p;
v_ima_g mask = doc.fun_mask_start_end_lines();
@@ -243,18 +268,178 @@ namespace mymln
}
}
}
+ #ifndef NGRAPHDEBUG
io::ppm::save(mln::debug::superpose(out, s, literal::white),dgb_out);
+ #endif
doc.propage_line_link();
doc.recook_lines();
}
-
-
-
-
+
+ template<typename L, typename F, typename D>
+ void clean_line_link_item(mymln::document::document<L,F,D>& doc)
+ {
+ typedef vertex_image<point2d,bool> v_ima_g;
+ typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p;
+ v_ima_g mask = doc.fun_mask_start_end_lines();
+ mln_piter_(v_ima_g) v(mask.domain());
+ typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t;
+ nbh_t nbh(mask);
+ mln_niter_(nbh_t) q(nbh, v);
+ for_all(v)
+ {
+ if(doc.contain_letter(v))
+ {
+ if(doc.contain_line(v))
+ {
+ for_all(q)
+ {
+ if(
+ doc.allign_V_line(v,q) &&
+ doc.allign_center_line(v, q) &&
+ doc.allign_smaller_line(v,q) &&
+ doc.get_line_length(q) < 3 &&
+ doc.allign_proximity_line(v,q)
+ )
+ {
+ doc.add_to_line_link(v, q);
+ }
+ }
+ }
+ }
+ }
+ }
+
+
+ template<typename L, typename F, typename D>
+ void clean_proximity_lines(mymln::document::document<L,F,D>& doc)
+ {
+ typedef vertex_image<point2d,bool> v_ima_g;
+ typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p;
+ v_ima_g mask = doc.fun_mask_letters();
+ mln_piter_(v_ima_g) v(mask.domain());
+ typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t;
+ nbh_t nbh(mask);
+ mln_niter_(nbh_t) q(nbh, v);
+ for_all(v)
+ {
+ if(doc.contain_letter(v))
+ {
+ if(doc.contain_line(v))
+ {
+ for_all(q)
+ {
+ if(doc.allign_V(q,v) && doc.allign_size(q, v) && doc.allign_proximity(q,v))
+ {
+ doc.add_to_line_link(v, q);
+ }
+ else if(doc.allign_size_height_line(q,v) && doc.allign_proximity_line(q,v) && doc.allign_V_line(q, v))
+ {
+ doc.add_to_line_link(v, q);
+ }
+ }
+ }
+ }
+ }
+ doc.propage_line_link();
+ }
+
+ template<typename L, typename F, typename D>
+ void clean_paragraph_items(mymln::document::document<L,F,D>& doc, std::string dgb_out,image2d<bool> s)
+ {
+ #ifndef NGRAPHDEBUG
+ image2d<value::rgb8> out;
+ mln::initialize(out, s);
+ #endif
+ typedef vertex_image<point2d,bool> v_ima_g;
+ typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p;
+ v_ima_g mask = doc.fun_mask_start_lines();
+ mln_piter_(v_ima_g) v(mask.domain());
+ typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t;
+ nbh_t nbh(mask);
+ doc.link_paragraphs();
+ mln_niter_(nbh_t) q(nbh, v);
+ for_all(v)
+ {
+ if(doc.contain_letter(v))
+ {
+ if(doc.contain_line(v) && doc.get_beginning_of_line(v) == doc[v])
+ {
+ doc.jump_to_paragraph(v);
+ for_all(q)
+ {
+ if(doc.allign_H_Large(q,v) && doc.allign_size(q, v))
+ {
+ if(doc.contain_paragraph(q))
+ {
+ if(!doc.contain_paragraph(v))
+ {
+ doc.add_to_paragraph(v);
+
+ }
+ doc.add_to_paragraph_link(q, v);
+ draw::line(out, q,v, mln::literal::green);
+ }
+ else
+ {
+
+ if(!doc.contain_paragraph(v))
+ {
+ doc.add_to_paragraph(q);
+ doc.add_to_paragraph(v);
+ doc.add_to_paragraph_self_link(q);
+ doc.add_to_paragraph_link(q, v);
+ }
+ else
+ {
+ doc.add_to_paragraph(q);
+ doc.add_to_paragraph_link(v, q);
+ }
+ draw::line(out, q,v, mln::literal::magenta);
+ }
+
+ }
+ }
+ }
+ }
+ }
+ io::ppm::save(mln::debug::superpose(out, s, literal::white),dgb_out);
+ doc.propage_paragraph_link();
}
-
-
+ template<typename L, typename F, typename D>
+ void clean_quote_lines(mymln::document::document<L,F,D>& doc)
+ {
+ typedef vertex_image<point2d,bool> v_ima_g;
+ typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p;
+ v_ima_g mask = doc.fun_mask_start_end_lines();
+ mln_piter_(v_ima_g) v(mask.domain());
+ typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t;
+ nbh_t nbh(mask);
+ mln_niter_(nbh_t) q(nbh, v);
+ for_all(v)
+ {
+ if(doc.contain_line(v))
+ {
+ for_all(q)
+ {
+ if(
+ doc.get_line_length(q) < 5 &&
+ doc.allign_smaller_line(v,q) &&
+ doc.get_line_length(v) > 3 &&
+ doc.allign_proximity_line(v,q) &&
+ doc.allign_V_line(v,q)
+ )
+ {
+ if(doc.allign_base_line_line(v,q) && doc.get_line_length(q) < 3)
+ {doc.add_to_line_link(v, q);}
+ else if(doc.allign_up_line_line(v,q))
+ {doc.add_to_line_link(v, q);}
+ }
+ }
+ }
+ }
+ }
+ }
}
diff --git a/scribo/sandbox/raphael/code/my/document/document.hh b/scribo/sandbox/raphael/code/my/document/document.hh
index 67bda18..f6204de 100644
--- a/scribo/sandbox/raphael/code/my/document/document.hh
+++ b/scribo/sandbox/raphael/code/my/document/document.hh
@@ -1,7 +1,9 @@
#ifndef INC_DOCUMENT_DOC
#define INC_DOCUMENT_DOC
+#include <mln/accu/shape/bbox.hh>
#include<my/util/vector_bbox_group.hh>
#include<my/util/union.hh>
+#include<my/debug/pict.hh>
#include <mln/util/graph.hh>
@@ -47,14 +49,24 @@ namespace mymln
separators_mask = fun::i2v::array<bool>(Areas + 1);
containers_mask = fun::i2v::array<bool>(Areas + 1);
letters_mask = fun::i2v::array<bool>(Areas + 1);
+ all_letters_mask = fun::i2v::array<bool>(Areas + 1);
Hseparator_mask = fun::i2v::array<bool>(Areas + 1);
Vseparator_mask = fun::i2v::array<bool>(Areas + 1);
noise_mask = fun::i2v::array<bool>(Areas + 1);
alone_letters_mask = fun::i2v::array<bool>(Areas + 1);
+ implicit_separators_left_mask = fun::i2v::array<bool>(Areas + 1);
+ implicit_separators_right_mask = fun::i2v::array<bool>(Areas + 1);
CImpSep = 1;
NImpSep = 2;
lines_union = mymln::util::union_find<Label>(Areas + 1);
implicit_separators_union = mymln::util::union_find<Label>(Areas + 1);
+
+ paragraphs_union = mymln::util::union_find<Label>(Areas + 1);
+
+ tag_lbl = mln::util::array<std::string>(Areas + 1);
+ lines_split = mln::util::array<Label>(Areas + 1);
+ lines_split.fill(0);
+
img_influ = ima_influ;
CSep = 0;
CSepH = 0;
@@ -62,10 +74,190 @@ namespace mymln
CLet = 0;
CLine = 1;
NLine = 2;
+ CPar = 1;
+ NPar = 2;
Areas_Number_ = Areas + 1;
}
+ /* OPERATION ON PARAGRAPH */
+ inline bool link_paragraphs()
+ {
+ for(unsigned int N = 1; N < Areas_Number_; N++)
+ {
+ paragraphs_union.invalidate_link(N);
+ if(start_lines_mask(N))
+ {
+ paragraphs_union.add_self_link(N);
+ }
+ else if(contain_line(N))
+ {
+ if(get_beginning_of_line(N) == 0){std::cout <<"ERROR#\n";}
+ paragraphs_union.add_link(get_beginning_of_line(N), N);
+ }
+ }
+ }
+ inline bool contain_paragraph(const point2d& point)
+ {return contain_paragraph(img_influ(point));}
+ inline bool contain_paragraph(const Label lbl)
+ {return paragraphs_union[lbl] != 0;}
+ inline void add_to_paragraph(const point2d& point)
+ {add_to_paragraph(img_influ(point));}
+ inline void add_to_paragraph(const Label lbl)
+ {paragraphs_union[lbl] = CPar;}
+
+ inline void add_new_paragraph(const point2d& point)
+ {add_new_paragraph(img_influ(point));}
+ inline void add_new_paragraph(const Label lbl)
+ {CPar = NPar; NPar++;}
+
+ inline void add_to_paragraph_link(const point2d& A, const point2d& B)
+ {add_to_paragraph_link(img_influ(A),img_influ(B));}
+ inline void add_to_paragraph_link(const Label A, const Label B)
+ {paragraphs_union.add_link(A, B);}
+
+ inline void add_to_paragraph_self_link(const point2d& A)
+ {add_to_paragraph_self_link(img_influ(A));}
+ inline void add_to_paragraph_self_link(const Label A)
+ {paragraphs_union.add_self_link(A);}
+
+ inline void propage_paragraph_link()
+ {paragraphs_union.propage_links();}
+
+ inline void jump_to_paragraph(const point2d& point)
+ {
+ jump_to_paragraph(img_influ(point));
+ }
+ inline void jump_to_paragraph(const Label lbl)
+ {
+ if(paragraphs_union[lbl] != 0)
+ CPar = paragraphs_union[lbl];
+ else
+ add_new_paragraph(lbl);
+ }
/* OPERATION ON LINES */
+ inline void split_line_exclusive(const point2d& point)
+ {split_line_exclusive(img_influ(point));}
+ inline void split_line_exclusive(const Label lbl)
+ {
+ if(lbl == 0){return;}
+ lines_union.add_self_link(lbl);
+ Label pos = get_end_of_line(lbl);
+ if(pos == lbl){return;}
+
+ while(lines_split[pos] && lines_split[pos] != lbl && pos != lbl)
+ {
+ if(_bboxgp[lines_split[pos]].pmin()[1] < _bboxgp[lbl].pmin()[1])
+ {
+ lines_split[lbl] = lines_split[pos];
+ lines_split[pos] = lbl;
+ return;
+ }
+ pos = lines_split[pos];
+ }
+ if(pos == lbl || lines_split[pos] == lbl){return;}
+ lines_split[pos] = lbl;
+
+ }
+
+
+ inline void split_line(const point2d& point)
+ {split_line(img_influ(point));}
+ inline void split_line(const Label lbl)
+ {
+ if(lbl == 0){return;}
+ lines_union.add_self_link(lbl);
+ Label pos = get_beginning_of_line(lbl);
+ if(pos == lbl){return;}
+
+ while(lines_split[pos] && lines_split[pos] != lbl && pos != lbl)
+ {
+ if(_bboxgp[lines_split[pos]].pmin()[1] > _bboxgp[lbl].pmin()[1])
+ {
+ lines_split[lbl] = lines_split[pos];
+ lines_split[pos] = lbl;
+ return;
+ }
+ pos = lines_split[pos];
+ }
+ if(pos == lbl || lines_split[pos] == lbl){return;}
+ lines_split[pos] = lbl;
+
+ }
+
+
+ inline void cook_line_splitting_exclusive()
+ {
+ for(unsigned int N = 1; N < Areas_Number_; N++)
+ {
+ lines_union.invalidate_link(N);
+ if(end_lines_mask(N) || implicit_separators_right_mask(N))
+ split_line_exclusive(N);
+ }
+ for(unsigned int N = 1; N < Areas_Number_; N++)
+ {
+ if(lines_union.is_self_link(N))
+ {
+ add_new_line(N);
+ add_to_line(N);
+ }
+ else if(end_lines_mask(N))
+ lines_union.add_self_link(N);
+ else
+ {lines_union.invalidate_link(N);}
+ }
+ lines_union[0] = 0;
+ lines_union.invalidate_link(0);
+ for(unsigned int N = 1; N < Areas_Number_; N++)
+ {
+ if(!contain_line(N) || lines_union.is_self_link(N))
+ continue;
+ Label pos = get_end_of_line(N);
+ while(lines_split[pos] && _bboxgp[lines_split[pos]].pmin()[1] > _bboxgp[N].pmin()[1])
+ pos = lines_split[pos];
+ if(pos != 0)
+ {lines_union[N] = lines_union[pos]; lines_union.add_link(pos,N);}
+ }
+
+ //lines_union.propage_links();lines_union
+ cook_lines();
+ }
+
+ inline void cook_line_splitting()
+ {
+ for(unsigned int N = 1; N < Areas_Number_; N++)
+ {
+ lines_union.invalidate_link(N);
+ if(start_lines_mask(N) || implicit_separators_left_mask(N))
+ split_line(N);
+ }
+ for(unsigned int N = 1; N < Areas_Number_; N++)
+ {
+ if(lines_union.is_self_link(N))
+ {
+ add_new_line(N);
+ add_to_line(N);
+ }
+ else if(start_lines_mask(N))
+ lines_union.add_self_link(N);
+ else
+ {lines_union.invalidate_link(N);}
+ }
+ lines_union[0] = 0;
+ lines_union.invalidate_link(0);
+ for(unsigned int N = 1; N < Areas_Number_; N++)
+ {
+ if(!contain_line(N) || lines_union.is_self_link(N))
+ continue;
+ Label pos = get_beginning_of_line(N);
+ while(lines_split[pos] && _bboxgp[lines_split[pos]].pmin()[1] < _bboxgp[N].pmin()[1])
+ pos = lines_split[pos];
+ if(pos != 0)
+ {lines_union[N] = lines_union[pos]; lines_union.add_link(pos,N);}
+ }
+
+ //lines_union.propage_links();lines_union
+ cook_lines();
+ }
inline void add_to_line_self_link(const point2d& point)
{ add_to_line_self_link(img_influ(point));}
inline void add_to_line(const point2d& point)
@@ -87,6 +279,11 @@ namespace mymln
inline bool contain_line(const point2d& point)
{ return contain_line(img_influ(point)); }
+ inline bool contain_line_start(const point2d& point)
+ { return contain_line_start(img_influ(point)); }
+ inline bool contain_line_start(const Label lbl)
+ { return lines_first_label(lbl); }
+
inline void add_to_line(const Label lbl)
{ lines_union[lbl] = CLine; }
@@ -134,6 +331,10 @@ namespace mymln
if (link == 0){add_noise(lbl);}
else if (link > 30){ add_separator(lbl);}
else { add_letter(lbl);}
+
+ /* SET UP SPECIAL MASK TO FALSE */
+ implicit_separators_left_mask(lbl) = false;
+ implicit_separators_right_mask(lbl) = false;
}
void inline invalid_letter(const point2d& point)
{invalid_letter(img_influ(point));}
@@ -166,11 +367,13 @@ namespace mymln
Hseparator_mask(lbl) = false;
alone_letters_mask(lbl) = true;
noise_mask(lbl) = false;
+ all_letters_mask(lbl) = true;
}
void add_letter_coerce(const Label lbl)
{
letters_mask(lbl) = true;
+ all_letters_mask(lbl) = true;
separators_mask(lbl) = false;
containers_mask(lbl) = false;
Vseparator_mask(lbl) = false;
@@ -184,6 +387,7 @@ namespace mymln
if(label_valid_size_Min_(lbl, 2))
{
letters_mask(lbl) = true;
+ all_letters_mask(lbl) = true;
separators_mask(lbl) = false;
containers_mask(lbl) = false;
Vseparator_mask(lbl) = false;
@@ -209,6 +413,7 @@ namespace mymln
separators_mask(lbl) = false;
noise_mask(lbl) = false;
alone_letters_mask(lbl) = false;
+ all_letters_mask(lbl) = false;
}
else
add_noise(lbl);
@@ -223,6 +428,7 @@ namespace mymln
separators_mask(lbl) = true;
alone_letters_mask(lbl) = false;
noise_mask(lbl) = false;
+ all_letters_mask(lbl) = false;
}
void add_Vseparator(const Label lbl)
{
@@ -234,6 +440,7 @@ namespace mymln
separators_mask(lbl) = true;
alone_letters_mask(lbl) = false;
noise_mask(lbl) = false;
+ all_letters_mask(lbl) = false;
}
void inline add_separator(const point2d& point)
{add_letter(img_influ(point)); }
@@ -259,6 +466,8 @@ namespace mymln
bool inline contain_letter(const Label lbl)
{return contain_(lbl, letters_mask);}
+
+
bool inline contain_container(const Label lbl)
{return contain_(lbl, containers_mask);}
@@ -310,6 +519,19 @@ namespace mymln
return allignV < label_size_(0, Left) && (_bboxgp[Left].pcenter()[0]) > (_bboxgp[Right].pcenter()[0]);
}
+ inline bool allign_up_line_line( const point2d& Left, const point2d& Right)
+ {return allign_up_line_line(img_influ(Left), img_influ(Right));}
+
+ inline bool allign_up_line_line( const Label Left, const Label Right)
+ {
+ short int allignV = lines_bbox[lines_union[Left]].pcenter()[0] - lines_bbox[lines_union[Right]].pcenter()[0];
+ if(allignV < 0){allignV = -allignV; }
+ allignV *= 1.4f;
+ return
+ allignV < lines_bbox[lines_union[Left]].len(0) &&
+ (lines_bbox[lines_union[Left]].pcenter()[0]) > (lines_bbox[lines_union[Left]].pcenter()[0]);
+ }
+
inline bool allign_H_Large( const point2d& Left, const point2d& Right)
{return allign_H_Large(img_influ(Left), img_influ(Right));}
@@ -328,9 +550,101 @@ namespace mymln
return allignH < label_size_(1, Left) && allignH < label_size_(1, Right);
}
- inline bool allign_size_height( const point2d& Left, const point2d& Right)
+ inline bool allign_H_min( const point2d& Left, const point2d& Right)
+ {return allign_H_min(img_influ(Left), img_influ(Right));}
+
+ inline bool allign_H_min( const Label Left, const Label Right)
+ {
+ short int allignH = label_allign_min_(1, Left, Right) * 2;
+ return allignH < label_size_(1, Left) && allignH < label_size_(1, Right);
+ }
+
+ inline bool allign_H_max( const point2d& Left, const point2d& Right)
+ {return allign_H_max(img_influ(Left), img_influ(Right));}
+
+ inline bool allign_H_max( const Label Left, const Label Right)
+ {
+ short int allignH = label_allign_max_(1, Left, Right) * 2;
+ return allignH < label_size_(1, Left) && allignH < label_size_(1, Right);
+ }
+
+
+ inline bool allign_size_height( const point2d& Left, const point2d& Right)
{return allign_size_height(img_influ(Left), img_influ(Right));}
+ inline bool allign_proximity( const point2d& Left, const point2d& Right)
+ {return allign_proximity(img_influ(Left), img_influ(Right));}
+
+ inline bool allign_proximity( const Label Left, const Label Right)
+ {
+ short int SizeL0 = label_size_(0, Left);
+ short int SizeL1 = label_size_(1, Left);
+ short int Swap = 0;
+ if(SizeL0 < SizeL1)
+ { SizeL0 = SizeL1; }
+ short int Dis = _bboxgp[Left].pmin()[1] - _bboxgp[Right].pmin()[1];
+ if(Dis < 0)
+ Dis = -Dis;
+ return Dis < SizeL0 * 1.5f;
+ }
+
+ inline bool allign_proximity_line( const point2d& Left, const point2d& Right)
+ {return allign_proximity_line(img_influ(Left), img_influ(Right));}
+
+
+ inline bool allign_size_height_line( const point2d& Left, const point2d& Right)
+ {
+ return allign_size_height_line(img_influ(Left), img_influ(Right));
+ }
+
+ inline bool allign_size_height_line( const Label Left, const Label Right)
+ {
+ short int SizeL = lines_bbox[lines_union[Left]].len(0);
+ short int SizeR = lines_bbox[lines_union[Right]].len(0);
+ return SizeR > (SizeL / 2) && SizeR < (SizeL * 2);
+ }
+
+ inline bool allign_proximity_line( const Label Left, const Label Right)
+ {
+ box2d LB = lines_bbox[lines_union[Left]];
+ box2d RB = lines_bbox[lines_union[Right]];
+
+ int DisA = LB.pmax()[1] - RB.pmin()[1];
+ int DisB = RB.pmax()[1] - LB.pmin()[1];
+ if(DisA < 0){DisA = -DisA;}
+ if(DisB < 0){DisB = -DisB;}
+ if(DisA > DisB)
+ { DisA = DisB; }
+
+ unsigned int HA = LB.len(0);
+ unsigned int HB = RB.len(0);
+
+ if(HA < HB)
+ { HA = HB; }
+ return (DisA * 5) < HA;
+ }
+
+
+
+
+ inline bool allign_proximity_large( const point2d& Left, const point2d& Right)
+ {return allign_proximity_large(img_influ(Left), img_influ(Right));}
+
+ inline bool allign_proximity_large( const Label Left, const Label Right)
+ {
+ short int SizeL0 = label_size_(0, Left);
+ short int SizeL1 = label_size_(1, Left);
+ short int Swap = 0;
+ if(SizeL0 < SizeL1)
+ { SizeL0 = SizeL1; }
+ short int Dis = _bboxgp[Left].pmin()[1] - _bboxgp[Right].pmin()[1];
+ if(Dis < 0)
+ Dis = -Dis;
+ return Dis < SizeL0 * 3;
+ }
+
+
+
inline bool allign_size_height( const Label Left, const Label Right)
{
short int SizeL = label_size_(0, Left);
@@ -374,6 +688,34 @@ namespace mymln
short int allignV = label_allign_(0, Left, Right) * 2;
return allignV < label_size_(0, Left) && allignV < label_size_(0, Right);
}
+
+ inline bool allign_V_line( const point2d& Left, const point2d& Right)
+ {return allign_V_line(img_influ(Left), img_influ(Right));}
+
+ inline bool allign_V_line( Label Left, Label Right)
+ {
+ short int allignV = lines_bbox[lines_union[Left]].pcenter()[0] - lines_bbox[lines_union[Right]].pcenter()[0];
+ if(allignV<0){allignV = -allignV;}
+ return allignV < lines_bbox[lines_union[Left]].len(0) && allignV < lines_bbox[lines_union[Right]].len(0);
+ }
+
+ inline bool allign_center_line( const point2d& Left, const point2d& Right)
+ {return allign_center_line(img_influ(Left), img_influ(Right));}
+ inline bool allign_center_line( Label Left, Label Right)
+ {
+ short int allignC = lines_bbox[lines_union[Left]].pcenter()[0] - lines_bbox[lines_union[Right]].pcenter()[0];
+ if(allignC<0){allignC = -allignC;}
+ return allignC * 5 < lines_bbox[lines_union[Left]].len(0);
+ }
+
+ inline bool allign_smaller_line( const point2d& Left, const point2d& Right)
+ {return allign_smaller_line(img_influ(Left), img_influ(Right));}
+ inline bool allign_smaller_line( Label Left, Label Right)
+ {
+ return lines_bbox[lines_union[Left]].len(0) > (lines_bbox[lines_union[Right]].len(0) * 2);
+ }
+
+
inline bool allign_V_large( const point2d& Left, const point2d& Right)
{return allign_V_large(img_influ(Left), img_influ(Right));}
@@ -393,6 +735,18 @@ namespace mymln
return allignV < label_size_(0, Left) && allignV < label_size_(0, Right);
}
+ inline bool allign_base_line_line(const point2d& Left, const point2d& Right)
+ {return allign_base_line_line(img_influ(Left), img_influ(Right));}
+ inline bool allign_base_line_line(const Label Left, const Label Right)
+ {
+ short int allignV = lines_bbox[lines_union[Left]].pcenter()[0] - lines_bbox[lines_union[Right]].pcenter()[0];
+ if(allignV<0){allignV = -allignV;}
+ allignV *= 1.5f;
+ return
+ allignV < lines_bbox[lines_union[Left]].len(0) &&
+ lines_bbox[lines_union[Left]].pcenter()[0] < lines_bbox[lines_union[Right]].pcenter()[0];
+ }
+
inline bool allign_base_line(const point2d& Left, const point2d& Right)
{return allign_base_line(img_influ(Left), img_influ(Right));}
@@ -421,6 +775,8 @@ namespace mymln
std::cout << " lines(s) : " << CLine << std::endl;
}
+ void debug_save_paragraphs(std::string file)
+ { mymln::debug::save_label_image(img, paragraphs_union , file);}
void debug_save_lines(std::string file)
{ mymln::debug::save_label_image(img, lines_union , file);}
void debug_save_separators(std::string file)
@@ -431,6 +787,12 @@ namespace mymln
{ return fun_mask_(containers_mask); }
vertex_image<point2d,bool> fun_mask_alone_letters()
{ return fun_mask_(alone_letters_mask); }
+ vertex_image<point2d,bool> fun_mask_implicit_separators_left()
+ { return fun_mask_(implicit_separators_left_mask); }
+ vertex_image<point2d,bool> fun_mask_implicit_separators_right()
+ { return fun_mask_(implicit_separators_right_mask); }
+ vertex_image<point2d,bool> fun_mask_all_letters()
+ {return fun_mask_(all_letters_mask);}
vertex_image<point2d,bool> fun_mask_all()
{
typedef vertex_image<point2d,bool> v_ima_g;
@@ -450,6 +812,10 @@ namespace mymln
{ return image_mask_(containers_mask); }
image2d<bool> image_mask_separators()
{ return image_mask_(separators_mask); }
+ image2d<bool> image_mask_implicit_separators_left()
+ { return image_mask_(implicit_separators_left_mask); }
+ image2d<bool> image_mask_implicit_separators_right()
+ { return image_mask_(implicit_separators_right_mask); }
image2d<bool> image_mask_letters()
{ return image_mask_(letters_mask); }
image2d<bool> image_mask_noise()
@@ -461,6 +827,8 @@ namespace mymln
image2d<bool> image_mask_end_lines()
{ return image_mask_(end_lines_mask); }
+ mln::util::array<box2d> bbox_mask_lines()
+ { return lines_bbox; }
mln::util::array<box2d> bbox_mask_containers()
{ return bbox_mask_(containers_mask); }
mln::util::array<box2d> bbox_mask_separators()
@@ -482,30 +850,42 @@ namespace mymln
Label get_label(point2d point)
{ return img_influ(point); }
- unsigned int get_line_length(point2d point)
+ inline unsigned int get_line_length(point2d point)
{ return get_line_length(img_influ(point)); }
- unsigned int get_line_length(Label L)
+ inline unsigned int get_line_length(Label L)
{ return lines_len[lines_union[L]]; }
- unsigned int get_beginning_of_line(point2d point)
+ inline unsigned int get_line_width(point2d point)
+ { return get_line_width(img_influ(point)); }
+
+ inline unsigned int get_line_width(Label L)
+ { return lines_bbox[lines_union[L]].len(1); }
+
+ inline bool line_has(point2d Line, point2d Point)
+ { return line_has(img_influ(Line), Point); }
+
+ inline bool line_has(Label Line, point2d Point)
+ { return lines_bbox[lines_union[Line]].has(Point); }
+
+ inline unsigned int get_beginning_of_line(point2d point)
{ return get_beginning_of_line(img_influ(point)); }
- unsigned int get_beginning_of_line(Label L)
+ inline unsigned int get_beginning_of_line(Label L)
{ return lines_first_label[lines_union[L]]; }
- unsigned int get_end_of_line(point2d point)
+ inline unsigned int get_end_of_line(point2d point)
{ return get_end_of_line(img_influ(point)); }
- unsigned int get_end_of_line(Label L)
+ inline unsigned int get_end_of_line(Label L)
{ return lines_last_label[lines_union[L]]; }
- unsigned int get_parent_line(point2d point)
+ inline unsigned int get_parent_line(point2d point)
{ return lines_union[img_influ(point)]; }
- unsigned int get_parent_line(Label L)
+ inline unsigned int get_parent_line(Label L)
{ return lines_union[L]; }
@@ -514,21 +894,46 @@ namespace mymln
lines_first_label.fill(0);
lines_last_label.fill(0);
lines_len.fill(0);
+ start_lines_mask(0) = false;
+ end_lines_mask(0) = false;
+
cook_lines_();
}
+ inline void reset_implicit_separators()
+ { implicit_separators_union.reset(); }
inline void cook_lines()
{
- lines_len = mln::util::array<unsigned int>(CLine + 1);
- lines_first_label = mln::util::array<unsigned int>(CLine + 1);
- lines_last_label = mln::util::array<unsigned int>(CLine + 1);
+ lines_len = mln::util::array<unsigned int>(NLine + 1);
+ lines_first_label = mln::util::array<unsigned int>(NLine + 1);
+ lines_last_label = mln::util::array<unsigned int>(NLine + 1);
start_lines_mask = fun::i2v::array<bool>(Areas_Number_);
end_lines_mask = fun::i2v::array<bool>(Areas_Number_);
start_end_lines_mask = fun::i2v::array<bool>(Areas_Number_);
+ lines_bbox = mln::util::array<box2d>(NLine + 1);
lines_len.fill(0);
start_lines_mask(0) = false;
end_lines_mask(0) = false;
cook_lines_();
}
+
+
+ inline void cook_separators()
+ {
+ separators_len_left = mln::util::array<unsigned int>(NImpSep + 1);
+ separators_middle = mln::util::array<unsigned int>(NImpSep + 1);
+ separators_len_left.fill(0);
+ separators_middle.fill(0);
+ cook_separators_();
+ }
+
+ inline void cook_separators_right()
+ {
+ separators_len_right = mln::util::array<unsigned int>(NImpSep + 1);
+ separators_middle.resize(NImpSep + 1);
+ separators_len_right.fill(0);
+ separators_middle.fill(0);
+ cook_separators_right_();
+ }
inline void propage_line_link()
{ lines_union.propage_links(); }
/*image_if<image2d<Label> masked_image_letters()
@@ -575,35 +980,251 @@ namespace mymln
inline bool contain_implicit_separator(const Label lbl)
{return implicit_separators_union[lbl] != 0; }
- inline void add_to_separator(const point2d& point)
- { add_to_separator(img_influ(point)); }
- inline void add_to_separator(const Label lbl)
- { implicit_separators_union[lbl] = CImpSep; }
+
+
+ inline void add_to_separator_left(const point2d& point)
+ { add_to_separator_left(img_influ(point)); }
+ inline void add_to_separator_left(const Label lbl)
+ { implicit_separators_union[lbl] = CImpSep; implicit_separators_left_mask(lbl) = true; }
+
+ inline void add_to_separator_right(const point2d& point)
+ { add_to_separator_right(img_influ(point)); }
+ inline void add_to_separator_right(const Label lbl)
+ { implicit_separators_union[lbl] = CImpSep; implicit_separators_right_mask(lbl) = true; }
inline void invalidate_implicit_separator(const point2d& point)
- { invalidate_implicit_separator(img_influ(point)); }
+ { invalidate_implicit_separator(img_influ(point)); }
inline void invalidate_implicit_separator(Label lbl)
- { implicit_separators_union[lbl] = 0; }
+ {
+ implicit_separators_union[lbl] = 0;
+ implicit_separators_left_mask(lbl) = false;
+ implicit_separators_right_mask(lbl) = false;
+ }
inline Label& operator[](point2d i)
- { return img_influ(i); }
+ { return img_influ(i);}
- inline point2d& operator[](Label i)
- { return _bboxgp[i].pcenter(); }
+ inline point2d operator[](Label i)
+ {
+ point2d p = _bboxgp[i].pcenter();
+ return p;
+ }
+ inline void tag_label(const point2d& point, std::string tag)
+ { tag_label(img_influ(point), tag);}
+ inline void tag_label(Label lbl, std::string tag)
+ {tag_lbl[lbl] = tag;}
+ /* ITER ON LINES */
+ inline void cook_lines_iter()
+ {cook_lines_iter_();}
+
+ inline unsigned int lines_iter_value()
+ {return lines_seq[SeqP]; }
+ inline void lines_iter_start()
+ { SeqP = 0; }
+ inline void lines_iter_next_line()
+ { SeqP = lines_seq_pos[get_beginning_of_line(SeqP) + 1]; }
+ inline void lines_iter_next_letter()
+ { SeqP++; while(lines_iter_valid() && !lines_seq[SeqP]){SeqP++;} }
+ inline void lines_iter_valid()
+ { return SeqP < Areas_Number_; }
private:
+ fun::i2v::array<bool> implicit_separators_left_mask;
+ fun::i2v::array<bool> implicit_separators_right_mask;
+ mln::util::array<unsigned int> separators_len_right;
+ mln::util::array<unsigned int> separators_len_left;
+ mln::util::array<unsigned int> separators_middle;
+
+
+
+ inline void cook_separators_()
+ {
+ implicit_separators_left_mask(0) = false;
+ for(unsigned int N = 1; N < implicit_separators_union.size(); N++)
+ {
+ if(implicit_separators_union[N] != 0)
+ {
+ separators_len_left[implicit_separators_union[N]]++;
+ separators_middle[implicit_separators_union[N]] += _bboxgp[N].pmin()[1];
+ }
+ }
+
+ /* WARNING : This method to compute the middle value is correct */
+ /* and faster than merge the computing of the middle value and the */
+ /* computing of the lenght of the line . However this doesn't works */
+ /* if you are trying to use very big image or if you work with a 16 BITS*/
+ /* processor */
+ for(unsigned int N = 1; N < NImpSep + 1; N++)
+ {
+ if(separators_len_left[N] != 0)
+ {
+ if(separators_len_left[N] != 0)
+ separators_middle[N] /= separators_len_left[N];
+ }
+ }
+
+
+ for(unsigned int N = 1; N < Areas_Number_; N++)
+ {
+ if(separators_len_left[implicit_separators_union[N]] < 3)
+ {
+ separators_len_left[implicit_separators_union[N]] = 0;
+ implicit_separators_union[N] = 0;
+ implicit_separators_left_mask(N) = false;
+ }
+ else if (
+ _bboxgp[N].pmin()[1] < separators_middle[implicit_separators_union[N]] - 10 ||
+ _bboxgp[N].pmin()[1] > separators_middle[implicit_separators_union[N]] + 10
+ )
+ {
+
+ separators_len_left[implicit_separators_union[N]]--;
+ implicit_separators_union[N] = 0;
+ implicit_separators_left_mask(N) = false;
+ }
+ }
+ for(unsigned int N = 1; N < Areas_Number_; N++)
+ {
+ if(!start_lines_mask(N) || implicit_separators_union[N] == 0)
+ {
+ if( separators_len_left[implicit_separators_union[N]] > 0)
+ separators_len_left[implicit_separators_union[N]]--;
+ }
+ }
+ for(unsigned int N = 1; N < Areas_Number_; N++)
+ {
+ if(separators_len_left[implicit_separators_union[N]] < 2)
+ {
+ separators_len_left[implicit_separators_union[N]] = 0;
+ implicit_separators_union[N] = 0;
+ implicit_separators_left_mask(N) = false;
+ }
+ }
+ }
+
+ inline void cook_separators_right_()
+ {
+ implicit_separators_right_mask(0) = false;
+ for(unsigned int N = 1; N < implicit_separators_union.size(); N++)
+ {
+ if(implicit_separators_union[N] != 0)
+ {
+ separators_len_right[implicit_separators_union[N]]++;
+ separators_middle[implicit_separators_union[N]] += _bboxgp[N].pmax()[1];
+ }
+ }
+
+ /* WARNING : This method to compute the middle value is correct */
+ /* and faster than merge the computing of the middle value and the */
+ /* computing of the lenght of the line . However this doesn't works */
+ /* if you are trying to use very big image or if you work with a 16 BITS*/
+ /* processor */
+ for(unsigned int N = 1; N < NImpSep + 1; N++)
+ {
+ if(separators_len_right[N] != 0)
+ {
+ if(separators_len_right[N] != 0)
+ separators_middle[N] /= separators_len_right[N];
+ }
+ }
+
+
+ for(unsigned int N = 1; N < Areas_Number_; N++)
+ {
+ if(separators_len_right[implicit_separators_union[N]] < 3)
+ {
+ separators_len_right[implicit_separators_union[N]] = 0;
+ implicit_separators_union[N] = 0;
+ implicit_separators_right_mask(N) = false;
+ }
+ else if (
+ _bboxgp[N].pmax()[1] < separators_middle[implicit_separators_union[N]] - 10 ||
+ _bboxgp[N].pmax()[1] > separators_middle[implicit_separators_union[N]] + 10
+ )
+ {
+
+ separators_len_right[implicit_separators_union[N]]--;
+ implicit_separators_union[N] = 0;
+ implicit_separators_right_mask(N) = false;
+ }
+ }
+ for(unsigned int N = 1; N < Areas_Number_; N++)
+ {
+ if(!end_lines_mask(N) || implicit_separators_union[N] == 0)
+ {
+ if( separators_len_right[implicit_separators_union[N]] > 0)
+ separators_len_right[implicit_separators_union[N]]--;
+ }
+ }
+ for(unsigned int N = 1; N < Areas_Number_; N++)
+ {
+ if(separators_len_right[implicit_separators_union[N]] < 2)
+ {
+ separators_len_right[implicit_separators_union[N]] = 0;
+ implicit_separators_union[N] = 0;
+ implicit_separators_right_mask(N) = false;
+ }
+ }
+ }
+
// PRIVATE DATA ON LINES
mln::util::array<unsigned int> lines_len;
mln::util::array<unsigned int> lines_first_label;
mln::util::array<unsigned int> lines_last_label;
+ mln::util::array<unsigned int> lines_seq;
+ mln::util::array<unsigned int> lines_seq_pos;
+ mln::util::array<box2d> lines_bbox;
+ mln::util::array<Label> lines_split;
fun::i2v::array<bool> start_lines_mask;
fun::i2v::array<bool> end_lines_mask;
fun::i2v::array<bool> start_end_lines_mask;
+ unsigned int SeqP;
-
+ inline void cook_lines_iter_()
+ {
+ lines_seq = mln::util::array<unsigned int>(Areas_Number_);
+ lines_seq_pos = mln::util::array<unsigned int>(NLine + 1);
+
+ lines_seq.fill(0);
+ lines_seq_pos.fill(0);
+ for(unsigned int N = 0; N < NLine + 1; N++)
+ {
+ lines_seq[SeqP] = lines_first_label[N];
+ lines_seq_pos[N] = SeqP;
+ SeqP += lines_len[N];
+ }
+ for(unsigned int N = 1; N < Areas_Number_; N++)
+ {
+ if(contain_line(N) && !start_lines_mask(N))
+ {
+ SeqP = lines_seq_pos[get_beginning_of_line(N)];
+ SeqP++;
+
+ while( lines_seq[SeqP] && _bboxgp[lines_seq[SeqP]].pmin()[1] < _bboxgp[N].pmin()[1] )
+ SeqP++;
+ if(!lines_seq[SeqP])
+ {lines_seq[SeqP] = N;}
+ else
+ {
+ unsigned int Swap1, Swap2;
+ Swap1 = lines_seq[SeqP];
+ lines_seq[SeqP] = N;
+ while(lines_seq[SeqP])
+ {
+ Swap2 = lines_seq[SeqP];
+ lines_seq[SeqP] = Swap1;
+ Swap1 = Swap2;
+ }
+ lines_seq[SeqP] = Swap1;
+ }
+ }
+ }
+ }
+
inline void cook_lines_()
{
+ Cooked_CLine = CLine;
for(unsigned int N = 1; N < lines_union.size(); N++)
{
if(lines_union[N] != 0)
@@ -628,18 +1249,34 @@ namespace mymln
}
}
-
/* SECOND STEP OF THE COOKING */
- for(unsigned int N = 0; N < CLine + 1; N++)
+ for(unsigned int N = 0; N < lines_first_label.size(); N++)
{
if( lines_first_label[N] != 0)
{
+ lines_bbox[N] = box2d();
start_lines_mask(lines_first_label[N]) = true;
end_lines_mask(lines_last_label[N]) = true;
start_end_lines_mask(lines_first_label[N]) = true;
start_end_lines_mask(lines_last_label[N]) = true;
}
+ else
+ {
+ // USEFULL ONLY FOR DEBUG WHEN WE NEED TO DRAW ALL THE BOUNDING BOX
+ // NOTE:REMOVE IT FOR THE FINAL RELEASE
+ lines_bbox[N] = box2d();
+ }
+ }
+
+ /* THE LAST STEP OF COOKING */
+ for(unsigned int N = 1; N < lines_union.size(); N++)
+ {
+ if(lines_union[N] && lines_first_label[lines_union[N]])
+ {
+ lines_bbox[lines_union[N]].merge(_bboxgp[N]);
+ }
}
+
}
@@ -655,6 +1292,16 @@ namespace mymln
{
return ((Float)_bboxgp[label].len(0)) / ((Float)_bboxgp[label].len(1));
}
+ inline short int label_allign_min_(const unsigned int N, const Label l1, const Label l2)
+ {
+ short int AFactor = _bboxgp[l1].pmin()[N] - _bboxgp[l2].pmin()[N];
+ return AFactor < 0 ? -AFactor : AFactor;
+ }
+ inline short int label_allign_max_(const unsigned int N, const Label l1, const Label l2)
+ {
+ short int AFactor = _bboxgp[l1].pmax()[N] - _bboxgp[l2].pmax()[N];
+ return AFactor < 0 ? -AFactor : AFactor;
+ }
inline short int label_allign_(const unsigned int N, const Label l1, const Label l2)
{
short int AFactor = _bboxgp[l1].pcenter()[N] - _bboxgp[l2].pcenter()[N];
@@ -741,10 +1388,13 @@ namespace mymln
fun::i2v::array<bool> separators_mask;
fun::i2v::array<bool> letters_mask;
fun::i2v::array<bool> alone_letters_mask;
+ fun::i2v::array<bool> all_letters_mask;
fun::i2v::array<bool> containers_mask;
fun::i2v::array<bool> noise_mask;
+ mln::util::array<std::string> tag_lbl;
+ unsigned int Cooked_CLine;
unsigned int CLine;
unsigned int NLine;
unsigned int CImpSep;
@@ -756,6 +1406,22 @@ namespace mymln
unsigned int CSepH ;
unsigned int CSepV ;
+ mymln::util::union_find<Label> paragraphs_union;
+ unsigned int CPar ;
+ unsigned int NPar ;
+ mln::util::array<unsigned int> paragraphs_first_label;
+ mln::util::array<unsigned int> paragraphs_last_label;
+ mln::util::array<box2d> paragraphs_bbox;
+
+ inline void cook_paragraphs_()
+ {
+
+ }
+
+
+
+
+
/* RANGE DATA */
Float _VSepRatio_Min; // The ratio is computed with the bounding box
Float _VSepRatio_Max;
@@ -780,6 +1446,7 @@ namespace mymln
/* IMPLICIT SEPARATOR DETECTION */
mymln::util::union_find<Label> implicit_separators_union;
+ mymln::util::union_find<Label> implicit_separators_union_right;
};
}
}
diff --git a/scribo/sandbox/raphael/code/my/document/separator.hh b/scribo/sandbox/raphael/code/my/document/separator.hh
index f5a32db..660bbed 100644
--- a/scribo/sandbox/raphael/code/my/document/separator.hh
+++ b/scribo/sandbox/raphael/code/my/document/separator.hh
@@ -11,10 +11,8 @@ namespace mymln
namespace separators
{
template<typename L, typename F, typename D>
- void separators_find_allign(mymln::document::document<L,F,D>& doc, std::string dgb_out, image2d<bool> s)
+ void separators_find_allign(mymln::document::document<L,F,D>& doc)
{
- image2d<value::rgb8> out;
- mln::initialize(out, s);
typedef vertex_image<point2d,bool> v_ima_g;
typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p;
v_ima_g mask = doc.fun_mask_letters();
@@ -30,7 +28,7 @@ namespace mymln
doc.jump_to_separator(v);
if((!doc.contain_implicit_separator(v)))
{
- doc.add_to_separator(v);
+ doc.add_to_separator_left(v);
doc.add_to_separator_self_link(v);
}
bool All_Alone = true;
@@ -40,19 +38,17 @@ namespace mymln
if((!doc.contain_implicit_separator(q)))
{
// draw::line(out, q,v, mln::literal::blue);
- if(doc.allign_H_Large(q,v) && doc.allign_size(q, v))
+ if(doc.allign_H_min(q,v) && doc.allign_size(q, v))
{
doc.add_to_separator_link(v, q);
- draw::line(out, q,v, mln::literal::magenta);
All_Alone = false;
}
}
else
{
- if(doc.allign_H_Large(q,v) && doc.allign_size(q, v))
+ if(doc.allign_H_min(q,v) && doc.allign_size(q, v))
{
doc.add_to_separator_link(q, v);
- draw::line(out, q,v, mln::literal::green);
All_Alone = false;
}
}
@@ -63,9 +59,61 @@ namespace mymln
}
doc.propage_separator_link();
- io::ppm::save(mln::debug::superpose(out, s, literal::white),dgb_out);
}
+
+ template<typename L, typename F, typename D>
+ void separators_find_allign_right(mymln::document::document<L,F,D>& doc)
+ {
+ typedef vertex_image<point2d,bool> v_ima_g;
+ typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p;
+ v_ima_g mask = doc.fun_mask_letters();
+ mln_piter_(v_ima_g) v(mask.domain());
+ typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t;
+ nbh_t nbh(mask);
+ mln_niter_(nbh_t) q(nbh, v);
+ for_all(v)
+ {
+
+ if(doc.contain_letter(v))
+ {
+ doc.jump_to_separator(v);
+ if((!doc.contain_implicit_separator(v)))
+ {
+ doc.add_to_separator_right(v);
+ doc.add_to_separator_self_link(v);
+ }
+ bool All_Alone = true;
+ for_all(q)
+ {
+
+ if((!doc.contain_implicit_separator(q)))
+ {
+ // draw::line(out, q,v, mln::literal::blue);
+ if(doc.allign_H_max(q,v) && doc.allign_size(q, v))
+ {
+ doc.add_to_separator_link(v, q);
+ All_Alone = false;
+ }
+ }
+ else
+ {
+ if(doc.allign_H_max(q,v) && doc.allign_size(q, v))
+ {
+ doc.add_to_separator_link(q, v);
+ All_Alone = false;
+ }
+ }
+ }
+ if(All_Alone){doc.invalidate_implicit_separator(v);}
+
+ }
+
+ }
+ doc.propage_separator_link();
+ }
+
+
template<typename L, typename F, typename D>
void separators_make_clean(mymln::document::document<L,F,D>& doc)
{
@@ -109,10 +157,60 @@ namespace mymln
}
for(unsigned int N = 0; N < doc.size();N++)
{
- if(count[N] > 1)
+ if(count[N] > 0)
doc.invalidate_implicit_separator(N);
}
}
+
+
+ template<typename L, typename F, typename D>
+ void separators_final_clean(mymln::document::document<L,F,D>& doc)
+ {
+
+ typedef vertex_image<point2d,bool> v_ima_g;
+ typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p;
+ v_ima_g mask = doc.fun_mask_letters();
+ mln_piter_(v_ima_g) v(mask.domain());
+ typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t;
+ nbh_t nbh(mask);
+ mln_niter_(nbh_t) q(nbh, v);
+ mln::util::array<unsigned> count = mln::util::array<unsigned>(doc.size());
+ count.fill(0);
+ for_all(v)
+ {
+
+ if(doc.contain_implicit_separator(v))
+ {
+ bool All_Alone = true;
+ doc.jump_to_line(v);
+ if((!doc.contain_line(v)))
+ {
+ doc.add_to_line(v);
+ doc.add_to_line_self_link(v);
+ }
+
+ for_all(q)
+ {
+
+ if(doc.contain_implicit_separator(q) && doc.same_implicit_separator(q,v))
+ {
+ // draw::line(out, q,v, mln::literal::blue);
+ if(doc.allign_V(q,v) && doc.allign_size(q, v))
+ {
+ count[doc[q]]++;
+ }
+ }
+
+ }
+ }
+ }
+ for(unsigned int N = 0; N < doc.size();N++)
+ {
+ if(count[N] > 0)
+ doc.invalidate_implicit_separator(N);
+ }
+ }
+
}
}
}
diff --git a/scribo/sandbox/raphael/code/my/util/union.hh b/scribo/sandbox/raphael/code/my/util/union.hh
index 7f21e87..53fcbb3 100644
--- a/scribo/sandbox/raphael/code/my/util/union.hh
+++ b/scribo/sandbox/raphael/code/my/util/union.hh
@@ -18,9 +18,19 @@ namespace mymln
mark_link.fill(0);
size_ = max_size;
}
-
+ inline void reset()
+ {
+ mark.fill(0);
+ mark_link.fill(0);
+ }
+ inline bool is_self_link(const Label A)
+ {return mark_link[A] == A;}
+ inline void invalidate_link(const Label A)
+ { mark_link[A] = 0; }
inline void add_self_link(const Label A)
{ mark_link[A] = A; }
+ inline unsigned int link(const unsigned int index)
+ {return mark_link[index]; }
inline void add_link(const Label A, const Label B)
{
diff --git a/scribo/sandbox/raphael/code/test.cc b/scribo/sandbox/raphael/code/test.cc
index b33a4eb..b009c2e 100644
--- a/scribo/sandbox/raphael/code/test.cc
+++ b/scribo/sandbox/raphael/code/test.cc
@@ -12,7 +12,6 @@
#include <mln/core/var.hh>
#include <mln/accu/shape/bbox.hh>
#include <mln/fun/i2v/array.hh>
-#include <mln/fun/p2b/all.hh>
#include <mln/util/graph.hh>
#include <mln/util/timer.hh>
#include <mln/debug/draw_graph.hh>
@@ -40,6 +39,7 @@
#include <my/util/vector_bbox.hh>
#include <my/util/vector_bbox_group.hh>
#include <my/document/document.hh>
+#include <my/document/separator.hh>
#include <my/document/clean.hh>
using namespace mln;
@@ -114,24 +114,53 @@ void Process(std::string File, std::string Dir)
}
//mymln::debug::save_label_image(ima_influ, "influ_" + File);
+
mymln::document::clean_containers_items(doc);
mymln::document::clean_letters_items(doc);
- mymln::document::clean_get_lines(doc, Dir + "/" + "line_graph_" + File, doc.image_mask_letters());
- mymln::document::clean_dot_items(doc, Dir + "/" + "dot_graph_" + File, doc.image_mask_letters());
+ mymln::document::clean_get_lines(doc);
+ mymln::document::clean_letters_alone(doc);
+ mymln::document::clean_dot_items(doc);
doc.cook_lines();
mymln::document::clean_quote_items(doc, Dir + "/" + "quote_graph_" + File, doc.image_mask_letters());
-
- doc.stat();
+
+
+ mymln::document::separators::separators_find_allign(doc);
+ mymln::document::separators::separators_make_clean(doc);
+ doc.cook_separators();
+ doc.cook_line_splitting();
+ mymln::document::clean_line_link_item(doc);
+ mymln::document::clean_proximity_lines(doc);
+ mymln::document::clean_quote_lines(doc);
+
+ doc.reset_implicit_separators();
+ mymln::document::separators::separators_find_allign_right(doc);
+ mymln::document::separators::separators_make_clean(doc);
+ doc.cook_separators_right();
+ doc.cook_line_splitting_exclusive();
+ mymln::document::clean_line_link_item(doc);
+ mymln::document::clean_proximity_lines(doc);
+ mymln::document::clean_quote_lines(doc);
+
+
+ doc.recook_lines();
+
+ mymln::document::clean_paragraph_items(doc, Dir + "/" + "para_graph_" + File, doc.image_mask_letters());
+
std::cout << "WORK ON GRAPH : " << timer.stop() << endl;
//io::ppm::save(ima_influ, "separator.ppm");
//io::pbm::save(doc.image_mask_separators(),"separators");
- io::pbm::save(doc.image_mask_letters(),Dir + "/" + "letters_" + File);
- io::pbm::save(doc.image_mask_alone_letters(),Dir + "/" + "letters_alone_" + File);
- io::pbm::save(doc.image_mask_separators(),Dir + "/" + "separators_" + File);
- io::pbm::save(doc.image_mask_containers(),Dir + "/" + "containers_" + File);
- io::pbm::save(doc.image_mask_noise(),Dir + "/" + "noise_" + File);
- io::pbm::save(doc.image_mask_start_lines(), Dir + "/" + "start_line_" + File);
- doc.debug_save_lines(Dir + "/" + "lines_" + File);
+ // io::pbm::save(doc.image_mask_letters(),Dir + "/" + "letters_" + File);
+ //io::pbm::save(doc.image_mask_alone_letters(),Dir + "/" + "letters_alone_" + File);
+ //io::pbm::save(doc.image_mask_separators(),Dir + "/" + "separators_" + File);
+ //io::pbm::save(doc.image_mask_containers(),Dir + "/" + "containers_" + File);
+ //io::pbm::save(doc.image_mask_noise(),Dir + "/" + "noise_" + File);
+
+
+
+ doc.debug_save_paragraphs(Dir + "/" + "lines_" + File);
+ //mymln::debug::save_graph_image(doc.fun_mask_implicit_separators_left(), doc.image_mask_letters(), Dir + "/" + "graph_imp_sep_line_" + File);
+ //doc.debug_save_separators(Dir + "/" + "imp_sep_graph_" + File);
+
/* typedef vertex_image<point2d,bool> v_ima_g;
v_ima_g mask = doc.fun_mask_letters();
*/
@@ -170,8 +199,8 @@ void Process(std::string File, std::string Dir)
// mymln::debug::save_graph_image(doc.fun_mask_separators(), ima, "separator_graph_" + File);
//mymln::debug::save_graph_image(area_grph, doc.image_mask_letters(), Dir + "/" + "graph_" + File);
//mymln::debug::save_graph_image(doc.fun_mask_letters(), doc.image_mask_letters(), Dir + "/" + "container_graph_" + File);
- //mymln::debug::save_graph_image(doc.fun_mask_end_lines(), doc.image_mask_letters(), Dir + "/" + "graph_start_line_" + File);
- //mymln::debug::save_boxes_image(doc.bbox_mask_letters(), ima, "lbox_" + File);
+mln::util::array<box2d> linebx = doc.bbox_mask_lines();
+ mymln::debug::save_boxes_image(linebx, doc.image_mask_letters(), Dir + "/" + "lbox_" + File);
//mymln::debug::save_boxes_image(doc.bbox_enlarge_mask_letters(10, 0), ima, "linebox_" + File);
}
--
1.7.2.5
1
0
last-svn-commit-885-g33e4509 Better paragraph detection and fix some bug
by Raphael Boissel 08 Sep '11
by Raphael Boissel 08 Sep '11
08 Sep '11
---
scribo/sandbox/raphael/code/my/document/clean.hh | 138 +++++++++++++++----
.../sandbox/raphael/code/my/document/document.hh | 148 ++++++++++++++++++-
scribo/sandbox/raphael/code/my/document/letters.hh | 17 +++
scribo/sandbox/raphael/code/my/util/union.hh | 24 +++-
scribo/sandbox/raphael/code/test.cc | 19 ++-
5 files changed, 303 insertions(+), 43 deletions(-)
create mode 100644 scribo/sandbox/raphael/code/my/document/letters.hh
diff --git a/scribo/sandbox/raphael/code/my/document/clean.hh b/scribo/sandbox/raphael/code/my/document/clean.hh
index 18669d3..56c7445 100644
--- a/scribo/sandbox/raphael/code/my/document/clean.hh
+++ b/scribo/sandbox/raphael/code/my/document/clean.hh
@@ -315,30 +315,33 @@ namespace mymln
{
typedef vertex_image<point2d,bool> v_ima_g;
typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p;
- v_ima_g mask = doc.fun_mask_letters();
+ v_ima_g mask = doc.fun_mask_all_letters();
mln_piter_(v_ima_g) v(mask.domain());
typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t;
nbh_t nbh(mask);
mln_niter_(nbh_t) q(nbh, v);
for_all(v)
{
- if(doc.contain_letter(v))
- {
if(doc.contain_line(v))
{
for_all(q)
{
- if(doc.allign_V(q,v) && doc.allign_size(q, v) && doc.allign_proximity(q,v))
- {
- doc.add_to_line_link(v, q);
- }
- else if(doc.allign_size_height_line(q,v) && doc.allign_proximity_line(q,v) && doc.allign_V_line(q, v))
+ if(doc.contain_line(q))
{
- doc.add_to_line_link(v, q);
+ if(doc.allign_V(q,v) && doc.allign_size(q, v) && doc.allign_proximity(q,v))
+ {
+ doc.add_to_line_link(v, q);
+ }
+ else if(doc.allign_size_height_line(q,v))
+ {
+ if(doc.allign_proximity_line(q,v) && doc.allign_V_line(q, v))
+ {
+ doc.add_to_line_link(v, q);
+ }
+ }
}
}
}
- }
}
doc.propage_line_link();
}
@@ -352,7 +355,7 @@ namespace mymln
#endif
typedef vertex_image<point2d,bool> v_ima_g;
typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p;
- v_ima_g mask = doc.fun_mask_start_lines();
+ v_ima_g mask = doc.fun_mask_start_end_lines();
mln_piter_(v_ima_g) v(mask.domain());
typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t;
nbh_t nbh(mask);
@@ -364,41 +367,71 @@ namespace mymln
{
if(doc.contain_line(v) && doc.get_beginning_of_line(v) == doc[v])
{
- doc.jump_to_paragraph(v);
+ doc.jump_to_paragraph(v);
+ if(!doc.contain_paragraph(v))
+ { doc.add_to_paragraph(v); }
+
for_all(q)
{
- if(doc.allign_H_Large(q,v) && doc.allign_size(q, v))
+ if(doc.allign_H_Large(q,v) && doc.allign_size(q, v) && doc.allign_proximity_V_line(v,q))
{
if(doc.contain_paragraph(q))
{
- if(!doc.contain_paragraph(v))
- {
- doc.add_to_paragraph(v);
-
- }
- doc.add_to_paragraph_link(q, v);
+ doc.add_to_paragraph(q);
+ doc.add_to_paragraph_link(q, v);
draw::line(out, q,v, mln::literal::green);
}
else
{
-
+ doc.add_to_paragraph(q);
+ doc.add_to_paragraph_link(v, q);
+ draw::line(out, q,v, mln::literal::magenta);
+ }
+
+ }
+ }
+ }
+ else if(doc.contain_line(v) && doc.get_end_of_line(v) == doc[v]){}
+ else if(doc.contain_line(v))
+ {
+ for_all(q)
+ {
+ if(
+ doc.get_beginning_of_line(q) == doc[q] &&
+ doc.allign_H_Large(q,v) &&
+ doc.allign_size(q, v) &&
+ doc.allign_proximity_V_line(v,q) &&
+ doc.allign_bottom_line(q,v)
+ )
+ {
+ if(doc.contain_paragraph(q))
+ {
+ doc.jump_to_paragraph(q);
if(!doc.contain_paragraph(v))
{
- doc.add_to_paragraph(q);
doc.add_to_paragraph(v);
- doc.add_to_paragraph_self_link(q);
doc.add_to_paragraph_link(q, v);
}
else
{
- doc.add_to_paragraph(q);
doc.add_to_paragraph_link(v, q);
}
- draw::line(out, q,v, mln::literal::magenta);
+ draw::line(out, q,v, mln::literal::blue);
+ }
+ else
+ {
+ doc.jump_to_paragraph(v);
+ if(!doc.contain_paragraph(v))
+ {
+ doc.add_to_paragraph(v);
+ }
+ doc.add_to_paragraph(q);
+ doc.add_to_paragraph_link(v, q);
+ draw::line(out, q,v, mln::literal::blue);
}
-
}
}
+
}
}
}
@@ -426,8 +459,7 @@ namespace mymln
doc.get_line_length(q) < 5 &&
doc.allign_smaller_line(v,q) &&
doc.get_line_length(v) > 3 &&
- doc.allign_proximity_line(v,q) &&
- doc.allign_V_line(v,q)
+ doc.allign_proximity_line(v,q)
)
{
if(doc.allign_base_line_line(v,q) && doc.get_line_length(q) < 3)
@@ -437,8 +469,60 @@ namespace mymln
}
}
}
+ }
+ doc.propage_line_link();
+ }
+ template<typename L, typename F, typename D>
+ void clean_alone_letters_lines(mymln::document::document<L,F,D>& doc, std::string dgb_out,image2d<bool> s)
+ {
+ image2d<value::rgb8> out;
+ mln::initialize(out, s);
+ typedef vertex_image<point2d,bool> v_ima_g;
+ typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p;
+ v_ima_g mask = doc.fun_mask_alone_letters();
+ mln_piter_(v_ima_g) v(mask.domain());
+ typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t;
+ nbh_t nbh(mask);
+ mln_niter_(nbh_t) q(nbh, v);
+ for_all(v)
+ {
+ if(doc.contain_line(v))
+ {
+ for_all(q)
+ {
+ draw::line(out, q,v, mln::literal::red);
+ if(doc.line_has(v,q))
+ {doc.add_to_line_link(v, q); draw::line(out, q,v, mln::literal::green);}
+
+ }
+ }
+ }
+ doc.propage_line_link();
+ io::ppm::save(mln::debug::superpose(out, s, literal::white),dgb_out);
+ }
+
+ template<typename L, typename F, typename D>
+ void remove_alone_letter(mymln::document::document<L,F,D>& doc)
+ {
+ typedef vertex_image<point2d,bool> v_ima_g;
+ typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p;
+ v_ima_g mask = doc.fun_mask_alone_letters();
+ mln_piter_(v_ima_g) v(mask.domain());
+ typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t;
+ nbh_t nbh(mask);
+ mln_niter_(nbh_t) q(nbh, v);
+ for_all(v)
+ {
+ for_all(q)
+ {
+ if(doc.in_header(q) || doc.in_footer(q)){continue;}
+ doc.add_noise(q);
+ }
}
}
+
+
+
}
}
diff --git a/scribo/sandbox/raphael/code/my/document/document.hh b/scribo/sandbox/raphael/code/my/document/document.hh
index f6204de..1a84edb 100644
--- a/scribo/sandbox/raphael/code/my/document/document.hh
+++ b/scribo/sandbox/raphael/code/my/document/document.hh
@@ -79,6 +79,17 @@ namespace mymln
Areas_Number_ = Areas + 1;
}
+ /* OPERATION ON PAGE */
+ inline bool in_header(const point2d& p)
+ { return p[0] < (img_influ.domain().len(0) / 8);}
+ inline bool in_header(Label lbl)
+ { return in_header(_bboxgp[lbl]); }
+
+ inline bool in_footer(const point2d& p)
+ { return p[0] > ((img_influ.domain().len(0) / 8) * 7);}
+ inline bool in_footer(Label lbl)
+ { return in_footer(_bboxgp[lbl]); }
+
/* OPERATION ON PARAGRAPH */
inline bool link_paragraphs()
{
@@ -140,7 +151,8 @@ namespace mymln
inline void split_line_exclusive(const Label lbl)
{
if(lbl == 0){return;}
- lines_union.add_self_link(lbl);
+ if(!lines_union.is_self_link(lbl))
+ lines_union.add_self_link(lbl);
Label pos = get_end_of_line(lbl);
if(pos == lbl){return;}
@@ -165,7 +177,8 @@ namespace mymln
inline void split_line(const Label lbl)
{
if(lbl == 0){return;}
- lines_union.add_self_link(lbl);
+ if(!lines_union.is_self_link(lbl))
+ lines_union.add_self_link(lbl);
Label pos = get_beginning_of_line(lbl);
if(pos == lbl){return;}
@@ -201,7 +214,8 @@ namespace mymln
add_to_line(N);
}
else if(end_lines_mask(N))
- lines_union.add_self_link(N);
+ if(!lines_union.is_self_link(N))
+ lines_union.add_self_link(N);
else
{lines_union.invalidate_link(N);}
}
@@ -238,6 +252,7 @@ namespace mymln
add_to_line(N);
}
else if(start_lines_mask(N))
+ if(!lines_union.is_self_link(N))
lines_union.add_self_link(N);
else
{lines_union.invalidate_link(N);}
@@ -325,6 +340,7 @@ namespace mymln
alone_letters_mask(lbl) = false;
noise_mask(lbl) = true;
+ lines_union[lbl] = 0;
}
void inline add(Label lbl, int link)
{
@@ -571,7 +587,24 @@ namespace mymln
inline bool allign_size_height( const point2d& Left, const point2d& Right)
{return allign_size_height(img_influ(Left), img_influ(Right));}
+
+ inline bool allign_proximity_V( const point2d& Left, const point2d& Right)
+ {return allign_proximity_V(img_influ(Left), img_influ(Right));}
+ inline bool allign_proximity_V( const Label Left, const Label Right)
+ {
+ short int SizeL0 = label_size_(0, Left);
+ short int SizeL1 = label_size_(1, Left);
+ short int Swap = 0;
+ if(SizeL0 < SizeL1)
+ { SizeL0 = SizeL1; }
+ short int Dis = _bboxgp[Left].pmin()[0] - _bboxgp[Right].pmin()[0];
+ if(Dis < 0)
+ Dis = -Dis;
+ return Dis < SizeL0 * 1.5f;
+ }
+
+
inline bool allign_proximity( const point2d& Left, const point2d& Right)
{return allign_proximity(img_influ(Left), img_influ(Right));}
@@ -601,7 +634,7 @@ namespace mymln
{
short int SizeL = lines_bbox[lines_union[Left]].len(0);
short int SizeR = lines_bbox[lines_union[Right]].len(0);
- return SizeR > (SizeL / 2) && SizeR < (SizeL * 2);
+ return SizeR > (SizeL / 2.2f) && SizeR < (SizeL * 2.2);
}
inline bool allign_proximity_line( const Label Left, const Label Right)
@@ -625,7 +658,28 @@ namespace mymln
}
+ inline bool allign_proximity_V_line( const point2d& Left, const point2d& Right)
+ {return allign_proximity_V_line(img_influ(Left), img_influ(Right));}
+ inline bool allign_proximity_V_line( const Label Left, const Label Right)
+ {
+ box2d LB = lines_bbox[lines_union[Left]];
+ box2d RB = lines_bbox[lines_union[Right]];
+
+ int DisA = LB.pmax()[0] - RB.pmin()[0];
+ int DisB = RB.pmax()[0] - LB.pmin()[0];
+ if(DisA < 0){DisA = -DisA;}
+ if(DisB < 0){DisB = -DisB;}
+ if(DisA > DisB)
+ { DisA = DisB; }
+
+ unsigned int HA = LB.len(0);
+ unsigned int HB = RB.len(0);
+
+ if(HA < HB)
+ { HA = HB; }
+ return (DisA * 1.5f) < HA;
+ }
inline bool allign_proximity_large( const point2d& Left, const point2d& Right)
{return allign_proximity_large(img_influ(Left), img_influ(Right));}
@@ -696,6 +750,7 @@ namespace mymln
{
short int allignV = lines_bbox[lines_union[Left]].pcenter()[0] - lines_bbox[lines_union[Right]].pcenter()[0];
if(allignV<0){allignV = -allignV;}
+ allignV *= 2;
return allignV < lines_bbox[lines_union[Left]].len(0) && allignV < lines_bbox[lines_union[Right]].len(0);
}
@@ -746,6 +801,20 @@ namespace mymln
allignV < lines_bbox[lines_union[Left]].len(0) &&
lines_bbox[lines_union[Left]].pcenter()[0] < lines_bbox[lines_union[Right]].pcenter()[0];
}
+ inline bool allign_bottom(const point2d& Left, const point2d& Right)
+ {return allign_bottom(img_influ(Left), img_influ(Right));}
+ inline bool allign_bottom(const Label Left, const Label Right)
+ {
+ return _bboxgp[Left].pmin()[0] < _bboxgp[Right].pmin()[0];
+ }
+
+ inline bool allign_bottom_line(const point2d& Left, const point2d& Right)
+ {return allign_bottom_line(img_influ(Left), img_influ(Right));}
+ inline bool allign_bottom_line(const Label Left, const Label Right)
+ {
+ return lines_bbox[lines_union[Left]].pmin()[0] < lines_bbox[lines_union[Right]].pmin()[0];
+ }
+
inline bool allign_base_line(const point2d& Left, const point2d& Right)
@@ -775,6 +844,34 @@ namespace mymln
std::cout << " lines(s) : " << CLine << std::endl;
}
+ void debug_save_all(std::string file, image2d<bool> source)
+ {
+ image2d<value::rgb8> ima_color;
+ mln::initialize(ima_color,img_influ);
+
+ for(unsigned int N = 0; N < lines_bbox.size(); N++)
+ {
+ if(lines_bbox[N].is_valid())
+ {
+ draw::box(ima_color, lines_bbox[N], mln::literal::blue);
+ }
+ }
+ for(unsigned int N = 0; N < paragraphs_bbox.size(); N++)
+ {
+ if(paragraphs_bbox[N].is_valid())
+ {
+ draw::box(ima_color, paragraphs_bbox[N], mln::literal::red);
+ }
+ }
+ for(unsigned int N = 0; N < lines_first_label.size(); N++)
+ {
+ if(_bboxgp[lines_first_label[N]].is_valid())
+ {
+ draw::box(ima_color, _bboxgp[lines_first_label[N]], mln::literal::yellow);
+ }
+ }
+ io::ppm::save(mln::debug::superpose(ima_color, source, literal::white) , file);
+ }
void debug_save_paragraphs(std::string file)
{ mymln::debug::save_label_image(img, paragraphs_union , file);}
void debug_save_lines(std::string file)
@@ -900,7 +997,7 @@ namespace mymln
cook_lines_();
}
inline void reset_implicit_separators()
- { implicit_separators_union.reset(); }
+ { implicit_separators_union.reset(); lines_split.fill(0);}
inline void cook_lines()
{
lines_len = mln::util::array<unsigned int>(NLine + 1);
@@ -1027,6 +1124,14 @@ namespace mymln
{ SeqP++; while(lines_iter_valid() && !lines_seq[SeqP]){SeqP++;} }
inline void lines_iter_valid()
{ return SeqP < Areas_Number_; }
+
+
+ inline void cook_paragraphs()
+ {
+ paragraphs_bbox = mln::util::array<box2d>(NPar + 1);
+ cook_paragraphs_();
+ }
+
private:
fun::i2v::array<bool> implicit_separators_left_mask;
fun::i2v::array<bool> implicit_separators_right_mask;
@@ -1100,7 +1205,7 @@ namespace mymln
}
}
}
-
+
inline void cook_separators_right_()
{
implicit_separators_right_mask(0) = false;
@@ -1234,12 +1339,12 @@ namespace mymln
/* COOK THE FIRST AND THE LAST LABEL OF THE LINE */
if(lines_first_label[lines_union[N]] == 0)
lines_first_label[lines_union[N]] = N;
- else if(_bboxgp[N].pcenter()[1] < _bboxgp[lines_first_label[lines_union[N]]].pcenter()[1])
+ else if(_bboxgp[N].pmin()[1] < _bboxgp[lines_first_label[lines_union[N]]].pmin()[1])
lines_first_label[lines_union[N]] = N;
if(lines_last_label[lines_union[N]] == 0)
lines_last_label[lines_union[N]] = N;
- else if(_bboxgp[N].pcenter()[1] > _bboxgp[lines_last_label[lines_union[N]]].pcenter()[1])
+ else if(_bboxgp[N].pmax()[1] > _bboxgp[lines_last_label[lines_union[N]]].pmax()[1])
lines_last_label[lines_union[N]] = N;
/* FILL THE MASK WITH FALSE:MAYBE USELESS IF THE MASK IS INITIALIZED */
@@ -1275,6 +1380,10 @@ namespace mymln
{
lines_bbox[lines_union[N]].merge(_bboxgp[N]);
}
+ if(lines_len[lines_union[N]] == 1)
+ { letters_mask(N) = false; alone_letters_mask(N) = true; }
+ else if(lines_union[N])
+ { letters_mask(N) = true; alone_letters_mask(N) = false; }
}
}
@@ -1411,11 +1520,34 @@ namespace mymln
unsigned int NPar ;
mln::util::array<unsigned int> paragraphs_first_label;
mln::util::array<unsigned int> paragraphs_last_label;
+ mln::util::array<unsigned int> paragraphs_assoc;
mln::util::array<box2d> paragraphs_bbox;
inline void cook_paragraphs_()
{
+ mln::util::array<unsigned int> paragraphs_assoc(lines_union.size());
+ for(int N = 0; N < paragraphs_union.size(); N++)
+ {
+ if(paragraphs_union[N])
+ {
+ if(paragraphs_assoc[lines_union[N]])
+ { paragraphs_union.add_link(N, paragraphs_assoc[lines_union[N]]); }
+ else
+ {paragraphs_assoc[lines_union[N]] = N;}
+ }
+ }
+ paragraphs_union.propage_links();
+ for(int N = 0; N < paragraphs_bbox.size(); N++)
+ {
+ paragraphs_bbox[N] = box2d();
+ }
+ for(int N = 0; N < paragraphs_union.size(); N++)
+ {
+ if(paragraphs_union[N])
+ paragraphs_bbox[paragraphs_union[N]].merge(lines_bbox[lines_union[N]]);
+
+ }
}
diff --git a/scribo/sandbox/raphael/code/my/document/letters.hh b/scribo/sandbox/raphael/code/my/document/letters.hh
new file mode 100644
index 0000000..6701943
--- /dev/null
+++ b/scribo/sandbox/raphael/code/my/document/letters.hh
@@ -0,0 +1,17 @@
+#ifndef INC_CLEAN_LETTER_DOC
+#define INC_CLEAN_LETTER_DOC
+#include<my/document/document.hh>
+#include <mln/core/image/graph_elt_neighborhood.hh>
+#include <mln/core/image/vertex_image.hh>
+using namespace mln;
+
+namespace mymln
+{
+ namespace document
+ {
+ void clean_letter_aberation()
+ {
+
+ }
+ }
+}
\ No newline at end of file
diff --git a/scribo/sandbox/raphael/code/my/util/union.hh b/scribo/sandbox/raphael/code/my/util/union.hh
index 53fcbb3..90a7e68 100644
--- a/scribo/sandbox/raphael/code/my/util/union.hh
+++ b/scribo/sandbox/raphael/code/my/util/union.hh
@@ -28,12 +28,23 @@ namespace mymln
inline void invalidate_link(const Label A)
{ mark_link[A] = 0; }
inline void add_self_link(const Label A)
- { mark_link[A] = A; }
+ {
+ if(!A){return;}
+ if(mark_link[A] == 0)
+ mark_link[A] = A;
+ else
+ {
+ unsigned int Pos = find_parent_(A);
+ if(Pos)
+ mark_link[Pos] = A;
+ mark_link[A] = A;
+ }
+ }
inline unsigned int link(const unsigned int index)
{return mark_link[index]; }
inline void add_link(const Label A, const Label B)
{
-
+ if(!B || !A){return;}
unsigned int Pos = find_parent_(A);
if(mark_link[B] == 0)
{
@@ -84,7 +95,14 @@ namespace mymln
inline unsigned int find_parent_(const Label A)
{
unsigned int Pos = A;
- while(Pos != mark_link[Pos] && Pos != 0){Pos = mark_link[Pos];}
+ unsigned int OldPos = A;
+ while(Pos != mark_link[Pos] && Pos != 0)
+ {
+
+ Pos = mark_link[Pos];
+ mark_link[OldPos] = mark_link[Pos];
+ OldPos = Pos;
+ }
return Pos;
}
mln::util::array<unsigned int> mark;
diff --git a/scribo/sandbox/raphael/code/test.cc b/scribo/sandbox/raphael/code/test.cc
index b009c2e..feaf817 100644
--- a/scribo/sandbox/raphael/code/test.cc
+++ b/scribo/sandbox/raphael/code/test.cc
@@ -127,25 +127,32 @@ void Process(std::string File, std::string Dir)
mymln::document::separators::separators_find_allign(doc);
mymln::document::separators::separators_make_clean(doc);
doc.cook_separators();
+ std::cout << "-> compute separator left " << endl;
doc.cook_line_splitting();
+
mymln::document::clean_line_link_item(doc);
mymln::document::clean_proximity_lines(doc);
mymln::document::clean_quote_lines(doc);
-
doc.reset_implicit_separators();
+ std::cout << "-> clean separator right " << endl;
mymln::document::separators::separators_find_allign_right(doc);
mymln::document::separators::separators_make_clean(doc);
+ std::cout << "-> compute separator right " << endl;
doc.cook_separators_right();
doc.cook_line_splitting_exclusive();
+ std::cout << "-> clean separator right " << endl;
mymln::document::clean_line_link_item(doc);
mymln::document::clean_proximity_lines(doc);
+ std::cout << "-> clean " << endl;
mymln::document::clean_quote_lines(doc);
-
+ mymln::document::clean_alone_letters_lines(doc, Dir + "/" + "alone_graph_" + File, doc.image_mask_letters());
+ doc.recook_lines();
+ mymln::document::remove_alone_letter(doc);
doc.recook_lines();
mymln::document::clean_paragraph_items(doc, Dir + "/" + "para_graph_" + File, doc.image_mask_letters());
-
+ doc.cook_paragraphs();
std::cout << "WORK ON GRAPH : " << timer.stop() << endl;
//io::ppm::save(ima_influ, "separator.ppm");
//io::pbm::save(doc.image_mask_separators(),"separators");
@@ -157,7 +164,8 @@ void Process(std::string File, std::string Dir)
- doc.debug_save_paragraphs(Dir + "/" + "lines_" + File);
+ //doc.debug_save_lines(Dir + "/" + "lines_" + File);
+ doc.debug_save_all(Dir + "/" + "debug_" + File, ima);
//mymln::debug::save_graph_image(doc.fun_mask_implicit_separators_left(), doc.image_mask_letters(), Dir + "/" + "graph_imp_sep_line_" + File);
//doc.debug_save_separators(Dir + "/" + "imp_sep_graph_" + File);
@@ -199,8 +207,9 @@ void Process(std::string File, std::string Dir)
// mymln::debug::save_graph_image(doc.fun_mask_separators(), ima, "separator_graph_" + File);
//mymln::debug::save_graph_image(area_grph, doc.image_mask_letters(), Dir + "/" + "graph_" + File);
//mymln::debug::save_graph_image(doc.fun_mask_letters(), doc.image_mask_letters(), Dir + "/" + "container_graph_" + File);
-mln::util::array<box2d> linebx = doc.bbox_mask_lines();
+ mln::util::array<box2d> linebx = doc.bbox_mask_lines();
mymln::debug::save_boxes_image(linebx, doc.image_mask_letters(), Dir + "/" + "lbox_" + File);
+
//mymln::debug::save_boxes_image(doc.bbox_enlarge_mask_letters(10, 0), ima, "linebox_" + File);
}
--
1.7.2.5
1
0
last-svn-commit-886-gbb109db Add script system and recognition system. Fix some bugs
by Raphael Boissel 08 Sep '11
by Raphael Boissel 08 Sep '11
08 Sep '11
---
scribo/sandbox/raphael/code/makefile | 17 +-
scribo/sandbox/raphael/code/my/document/clean.hh | 488 +++++++--
.../sandbox/raphael/code/my/document/document.hh | 1182 ++++++++++++++++++--
scribo/sandbox/raphael/code/my/document/letters.hh | 12 +-
.../raphael/code/my/document/recognition.hh | 74 ++
.../sandbox/raphael/code/my/document/separator.hh | 64 +-
scribo/sandbox/raphael/code/my/runtime/lib.hh | 180 +++
scribo/sandbox/raphael/code/my/runtime/runtime.hh | 196 ++++
scribo/sandbox/raphael/code/test.cc | 232 +++--
9 files changed, 2157 insertions(+), 288 deletions(-)
create mode 100644 scribo/sandbox/raphael/code/my/document/recognition.hh
create mode 100644 scribo/sandbox/raphael/code/my/runtime/lib.hh
create mode 100644 scribo/sandbox/raphael/code/my/runtime/runtime.hh
diff --git a/scribo/sandbox/raphael/code/makefile b/scribo/sandbox/raphael/code/makefile
index 288352e..8eddc7a 100644
--- a/scribo/sandbox/raphael/code/makefile
+++ b/scribo/sandbox/raphael/code/makefile
@@ -1,13 +1,18 @@
speed:
- g++ test.cc -o ~/Bureau/test/bin/test.elf -I . -I ./../../../../milena -O3 -fwhole-program
- ~/Bureau/test/bin/test.elf -D ~/Bureau/test/bin ima.pbm
+ ccache g++ test.cc -o ~/Bureau/test/bin/test.elf -I . -I ./../../../../milena -O3 -fwhole-program
+ ~/Bureau/test/bin/test.elf -D ~/Bureau/test/bin -P ~/Bureau/test/bin/script ima.pbm
std:
- g++ test.cc -o ~/Bureau/test/bin/test.elf -I . -I ./../../../../milena
- ~/Bureau/test/bin/test.elf -D ~/Bureau/test/bin ima.pbm
+ ccache g++ test.cc -o ~/Bureau/test/bin/test.elf -I . -I ./../../../../milena
+ ~/Bureau/test/bin/test.elf -D ~/Bureau/test/bin -P ~/Bureau/test/bin/script ima.pbm
release:
- g++ test.cc -o ~/Bureau/test/bin/test.elf -I . -I ./../../../../milena -DNDEBUG -O2
- ~/Bureau/test/bin/test.elf -D ~/Bureau/test/bin ima.pbm
+ ccache g++ test.cc -o ~/Bureau/test/bin/test.elf -I . -I ./../../../../milena -DNDEBUG -O2
+ ~/Bureau/test/bin/test.elf -D ~/Bureau/test/bin -P ~/Bureau/test/bin/script ima.pbm
+debug:
+ ccache g++ test.cc -o ~/Bureau/test/bin/test.elf -I . -I ./../../../../milena -DNDEBUG -ggdb
+ ~/Bureau/test/bin/test.elf -D ~/Bureau/test/bin -P ~/Bureau/test/bin/script ima.pbm
+test:
+ ~/Bureau/test/bin/test.elf -D ~/Bureau/test/bin -P ~/Bureau/test/bin/script ima.pbm
clean:
rm -r -f ../bin/*
diff --git a/scribo/sandbox/raphael/code/my/document/clean.hh b/scribo/sandbox/raphael/code/my/document/clean.hh
index 56c7445..b996b8a 100644
--- a/scribo/sandbox/raphael/code/my/document/clean.hh
+++ b/scribo/sandbox/raphael/code/my/document/clean.hh
@@ -144,7 +144,7 @@ namespace mymln
if((!doc.contain_line(q)))
{
// draw::line(out, q,v, mln::literal::blue);
- if(doc.allign_V(q,v) && doc.allign_size(q, v) && (doc.allign_proximity_large(q, v) || doc.allign_proximity_large(v, q)) )
+ if(doc.allign_V(q,v) && doc.allign_size(q, v) && doc.allign_proximity_large(q, v) )
{
doc.add_to_line_link(v, q);
All_Alone = false;
@@ -152,7 +152,7 @@ namespace mymln
}
else
{
- if(doc.allign_V(q,v) && doc.allign_size(q, v) && (doc.allign_proximity_large(q, v) || doc.allign_proximity_large(v, q)))
+ if(doc.allign_V(q,v) && doc.allign_size(q, v) && doc.allign_proximity_large(q, v))
{
doc.add_to_line_link(q, v);
All_Alone = false;
@@ -186,7 +186,12 @@ namespace mymln
{
for_all(q)
{
- if(!doc.allign_H(q, v) && doc.allign_base_line(v, q))
+ if(
+ !doc.allign_H(q, v) &&
+ doc.allign_base_line_line_strict(v, q) &&
+ doc.allign_proximity(v,q) &&
+ doc.allign_smaller_line(v, q) &&
+ doc.get_line_length(v) > 2)
{
doc.add_to_line_link(v, q);
doc.add_letter_coerce(q);
@@ -200,14 +205,21 @@ namespace mymln
mln_niter_(nbh_t) q2(nbh, v2);
for_all(v2)
{
- if(doc.contain_line(v2))
+ if(doc.contain_line(v2) && !doc.contain_alone_letter(v2))
{
for_all(q2)
{
- if (doc.allign_H_Large(v2, q2) && doc.allign_top(v2, q2))
+ if (
+ doc.allign_H_large(v2, q2) &&
+ doc.allign_top(v2, q2) &&
+ doc.line_has(v2, q2) &&
+ doc.letter_ratio_XY(q2) < 2 &&
+ doc.allign_size_width_large(v2, q2))
{
doc.add_to_line_link(v2, q2);
doc.add_letter_coerce(q2);
+ if(doc.allign_H(v2,q2))
+ {doc.merge(v2,q2); doc.tag_label(v2, "i");}
}
}
}
@@ -216,12 +228,8 @@ namespace mymln
}
template<typename L, typename F, typename D>
- void clean_quote_items(mymln::document::document<L,F,D>& doc, std::string dgb_out,image2d<bool> s)
+ void clean_quote_items(mymln::document::document<L,F,D>& doc)
{
- #ifndef NGRAPHDEBUG
- image2d<value::rgb8> out;
- mln::initialize(out, s);
- #endif
typedef vertex_image<point2d,bool> v_ima_g;
typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p;
v_ima_g mask = doc.fun_mask_start_end_lines();
@@ -239,38 +247,40 @@ namespace mymln
{
if(doc.allign_size(v,q))
{
- if(!doc.contain_alone_letter(v) && !doc.contain_alone_letter(q) )
+ if(!doc.contain_alone_letter(v) && !doc.contain_alone_letter(q) && doc.allign_proximity_large_left(v,q) )
{
if(
- (doc.allign_top(v, q) || doc.allign_top(doc.get_beginning_of_line(v), doc.get_label(q)))
+ (doc.allign_top(v, q) || doc.allign_top(doc.get_beginning_of_line(v), doc.get_label(q)) ) &&
+ doc.allign_smaller_line_letter(v,q)
)
{
- doc.add_to_line_link(v, q);
- draw::line(out, q,v, mln::literal::green);
+ if(doc.get_line_length(q) < 3 || doc.allign_V_line(v, q))
+ {
+ doc.add_to_line_link(v, q);
+ doc.tag_label(v, "'");
+ }
}
}
- else if(doc.allign_top(v, q) && !doc.allign_H(v, q))
- {
- doc.add_to_line_link(v, q);
- }
- else
+ else if(doc.allign_top(v, q) && !doc.allign_H(v, q) && doc.allign_proximity_large_left(v,q) && doc.allign_smaller_line_letter(v,q))
{
- draw::line(out, q,v, mln::literal::magenta);
+ if(doc.get_line_length(q) < 3 || doc.allign_V_line(v, q))
+ {
+ doc.add_to_line_link(v, q);
+ doc.tag_label(v, "'");
+ }
}
}
- else if (doc.allign_H_Large(q, v) && doc.allign_top(v, q))
+ else if (doc.allign_H_large(q, v) && doc.allign_top(v, q) && doc.allign_size_width_large(v, q))
{
doc.add_to_line_link(v, q);
- draw::line(out, q,v, mln::literal::blue);
+ if(doc.allign_H(v,q))
+ {doc.merge(v,q); doc.tag_label(v, "i");}
}
}
}
}
}
- #ifndef NGRAPHDEBUG
- io::ppm::save(mln::debug::superpose(out, s, literal::white),dgb_out);
- #endif
doc.propage_line_link();
doc.recook_lines();
}
@@ -287,6 +297,7 @@ namespace mymln
mln_niter_(nbh_t) q(nbh, v);
for_all(v)
{
+
if(doc.contain_letter(v))
{
if(doc.contain_line(v))
@@ -303,9 +314,37 @@ namespace mymln
{
doc.add_to_line_link(v, q);
}
+
}
}
}
+ }
+ doc.propage_line_link();
+ }
+
+
+ template<typename L, typename F, typename D>
+ void clean_proximity_letters(mymln::document::document<L,F,D>& doc)
+ {
+ typedef vertex_image<point2d,bool> v_ima_g;
+ typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p;
+ v_ima_g mask = doc.fun_mask_all_letters();
+ mln_piter_(v_ima_g) v(mask.domain());
+ typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t;
+ nbh_t nbh(mask);
+ mln_niter_(nbh_t) q(nbh, v);
+ for_all(v)
+ {
+ if(doc.contain_line(v))
+ {
+ for_all(q)
+ {
+ if(doc.contain_line(q) && doc.get_line_length(q) == 1 && doc.line_influence_has(v,q))
+ {
+ doc.add_to_line_link(q,v);
+ }
+ }
+ }
}
}
@@ -326,33 +365,80 @@ namespace mymln
{
for_all(q)
{
- if(doc.contain_line(q))
+ if(doc.same_line(q,v)){continue;}
+ if(doc.contain_alone_letter(q))
{
- if(doc.allign_V(q,v) && doc.allign_size(q, v) && doc.allign_proximity(q,v))
+ if(doc.allign_V(q,v) && doc.allign_proximity_strict(q, v) && doc.allign_size_height(q, v))
{
doc.add_to_line_link(v, q);
+
+ }
+
+ }
+ else if(doc.contain_line(q))
+ {
+ if(doc.allign_V(q,v) && doc.allign_size_height_line_strict(q, v) && doc.allign_proximity_strict(q,v))
+ {
+ doc.add_to_line_link(v, q);
+
}
else if(doc.allign_size_height_line(q,v))
{
- if(doc.allign_proximity_line(q,v) && doc.allign_V_line(q, v))
+ if(doc.allign_proximity_line(q,v) && doc.allign_V_line_strict(q, v))
{
doc.add_to_line_link(v, q);
+ doc.debug_draw_line_green_buffer(v,q);
+ }
+ else if(doc.line_influence_reciprocal(q, v) && doc.allign_V_line_strict(q, v))
+ {
+ doc.add_to_line_link(v, q);
+ doc.debug_draw_line_red_buffer(v,q);
}
}
+
+
}
}
}
}
doc.propage_line_link();
}
-
+ template<typename L, typename F, typename D>
+ void clean_between(mymln::document::document<L,F,D>& doc)
+ {
+ typedef vertex_image<point2d,bool> v_ima_g;
+ typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p;
+ v_ima_g mask = doc.fun_mask_start_end_lines();
+ mln_piter_(v_ima_g) v(mask.domain());
+ typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t;
+ nbh_t nbh(mask);
+ mln_niter_(nbh_t) q(nbh, v);
+ for_all(v)
+ {
+ L Start = 0;
+ L End = 0;
+ for_all(q)
+ {
+ if(doc.allign_V(q,v) && doc.allign_size(q, v) && doc.allign_proximity(q,v))
+ {
+ if(doc[q] == doc.get_beginning_of_line(q))
+ {Start = doc[q]; }
+ else
+ {End = doc[q]; }
+ }
+
+ }
+ if(Start && End){doc.add_to_line_link(Start, doc[v]);}
+ }
+ doc.propage_paragraph_link();
+
+ }
+
+
+
template<typename L, typename F, typename D>
- void clean_paragraph_items(mymln::document::document<L,F,D>& doc, std::string dgb_out,image2d<bool> s)
+ void clean_paragraph_items(mymln::document::document<L,F,D>& doc)
{
- #ifndef NGRAPHDEBUG
- image2d<value::rgb8> out;
- mln::initialize(out, s);
- #endif
typedef vertex_image<point2d,bool> v_ima_g;
typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p;
v_ima_g mask = doc.fun_mask_start_end_lines();
@@ -373,19 +459,22 @@ namespace mymln
for_all(q)
{
- if(doc.allign_H_Large(q,v) && doc.allign_size(q, v) && doc.allign_proximity_V_line(v,q))
+ if(
+ doc.allign_H_large(q,v) &&
+ doc.allign_size_height_line(q, v) &&
+ doc.allign_proximity_V_line(v,q) &&
+ doc.allign_size_width_line(q, v)
+ )
{
if(doc.contain_paragraph(q))
{
doc.add_to_paragraph(q);
doc.add_to_paragraph_link(q, v);
- draw::line(out, q,v, mln::literal::green);
}
else
{
doc.add_to_paragraph(q);
doc.add_to_paragraph_link(v, q);
- draw::line(out, q,v, mln::literal::magenta);
}
}
@@ -398,8 +487,9 @@ namespace mymln
{
if(
doc.get_beginning_of_line(q) == doc[q] &&
- doc.allign_H_Large(q,v) &&
- doc.allign_size(q, v) &&
+ doc.allign_H_large(q,v) &&
+ doc.allign_size_height_line(q, v) &&
+ doc.allign_size_width_line(q, v) &&
doc.allign_proximity_V_line(v,q) &&
doc.allign_bottom_line(q,v)
)
@@ -416,7 +506,6 @@ namespace mymln
{
doc.add_to_paragraph_link(v, q);
}
- draw::line(out, q,v, mln::literal::blue);
}
else
{
@@ -427,7 +516,6 @@ namespace mymln
}
doc.add_to_paragraph(q);
doc.add_to_paragraph_link(v, q);
- draw::line(out, q,v, mln::literal::blue);
}
}
}
@@ -435,7 +523,6 @@ namespace mymln
}
}
}
- io::ppm::save(mln::debug::superpose(out, s, literal::white),dgb_out);
doc.propage_paragraph_link();
}
@@ -473,10 +560,10 @@ namespace mymln
doc.propage_line_link();
}
template<typename L, typename F, typename D>
- void clean_alone_letters_lines(mymln::document::document<L,F,D>& doc, std::string dgb_out,image2d<bool> s)
+ void clean_alone_letters_lines(mymln::document::document<L,F,D>& doc)
{
- image2d<value::rgb8> out;
- mln::initialize(out, s);
+ //image2d<value::rgb8> out;
+ //mln::initialize(out, s);
typedef vertex_image<point2d,bool> v_ima_g;
typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p;
v_ima_g mask = doc.fun_mask_alone_letters();
@@ -490,38 +577,303 @@ namespace mymln
{
for_all(q)
{
- draw::line(out, q,v, mln::literal::red);
- if(doc.line_has(v,q))
- {doc.add_to_line_link(v, q); draw::line(out, q,v, mln::literal::green);}
-
+ //draw::line(out, q,v, mln::literal::red);
+ if(
+ ((doc.line_influence_has(v,q) && doc.is_line_representative(q)) ||
+ doc.line_has(v,q)) &&
+ doc.allign_V(v, q)
+ )
+ {doc.add_to_line_link(v, q);}
+ else if(doc.line_has(v,q))
+ {doc.add_to_line_link(v, q);}
}
}
}
doc.propage_line_link();
- io::ppm::save(mln::debug::superpose(out, s, literal::white),dgb_out);
+ //io::ppm::save(mln::debug::superpose(out, s, literal::white),dgb_out);
+ }
+
+ template<typename L, typename F, typename D>
+ void remove_alone_letter(mymln::document::document<L,F,D>& doc)
+ {
+ typedef vertex_image<point2d,bool> v_ima_g;
+ typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p;
+ v_ima_g mask = doc.fun_mask_alone_letters();
+ mln_piter_(v_ima_g) v(mask.domain());
+ typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t;
+ nbh_t nbh(mask);
+ mln_niter_(nbh_t) q(nbh, v);
+ for_all(v)
+ {
+ for_all(q)
+ {
+ if(doc.in_header(q) || doc.in_footer(q)){continue;}
+ if(doc.contain_alone_letter(q)) {doc.add_noise(q);}
+ }
+ }
+ }
+
+ template<typename L, typename F, typename D>
+ void clean_odd_letters(mymln::document::document<L,F,D>& doc)
+ {
+ for(unsigned int N = 1;N < doc.count(); N++)
+ {
+ if(doc.contain_line(N))
+ if (doc.get_letter_middle_height(N) * 3 < doc.get_bbox(N).len(0))
+ doc.add_noise(N);
+ else if(doc.get_letter_middle_width(N) * 4 < doc.get_bbox(N).len(1))
+ doc.add_noise(N);
+ }
+ }
+
+ template<typename L, typename F, typename D>
+ void clean_paragraphs_up(mymln::document::document<L,F,D>& doc)
+ {
+ typedef vertex_image<point2d,bool> v_ima_g;
+ typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p;
+ v_ima_g mask = doc.fun_mask_letters();
+ mln_piter_(v_ima_g) v(mask.domain());
+ typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t;
+ nbh_t nbh(mask);
+ mln_niter_(nbh_t) q(nbh, v);
+ for_all(v)
+ {
+ if(doc.contain_paragraph(v))
+ {
+ for_all(q)
+ {
+ if(
+ doc.contain_paragraph(q) &&
+ !doc.same_paragraph(v, q) &&
+ doc.allign_top_paragraph(q, v) &&
+ doc.decal_left_paragraph(q, v) &&
+ doc.allign_size_height_line(q, v) &&
+ doc.allign_size_width_paragraph(q, v) &&
+ doc.get_paragraph_length(v) == 1 &&
+ doc.allign_H_paragraph(v, q) &&
+ doc.allign_proximity_V_line(v,q)
+ )
+ {
+ doc.add_to_paragraph_link(q,v);
+ }
+ }
+ }
+ }
+ doc.propage_paragraph_link();
+ }
+
+ template<typename L, typename F, typename D>
+ void clean_paragraphs_large(mymln::document::document<L,F,D>& doc)
+ {
+ typedef vertex_image<point2d,bool> v_ima_g;
+ typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p;
+ v_ima_g mask = doc.fun_mask_letters();
+ mln_piter_(v_ima_g) v(mask.domain());
+ typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t;
+ nbh_t nbh(mask);
+ mln_niter_(nbh_t) q(nbh, v);
+ for_all(v)
+ {
+ if(doc.contain_paragraph(v) && doc.get_paragraph_length(v) > 1)
+ {
+ for_all(q)
+ {
+ if(
+ doc.contain_paragraph(q) &&
+ doc.get_paragraph_length(q) > 1 &&
+ !doc.same_paragraph(v, q) &&
+ doc.allign_top_paragraph(q, v))
+ {
+
+ if(
+ doc.decal_left_paragraph_strong(q, v) &&
+ doc.allign_size_height_line(q, v) &&
+ doc.allign_size_width_paragraph(q, v) &&
+ doc.allign_H_paragraph(v, q) &&
+ doc.allign_proximity_V_line(v,q)
+ )
+ {
+ doc.add_to_paragraph_link(q,v);
+
+ }
+ }
+ }
+ }
+ }
+ doc.propage_paragraph_link();
+
+
+
+ }
+ template<typename L, typename F, typename D>
+ void clean_included_paragraphs(mymln::document::document<L,F,D>& doc)
+ {
+ typedef vertex_image<point2d,bool> v_ima_g;
+ typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p;
+ v_ima_g mask = doc.fun_mask_letters();
+ mln_piter_(v_ima_g) v(mask.domain());
+ typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t;
+ nbh_t nbh(mask);
+ mln_niter_(nbh_t) q(nbh, v);
+ for_all(v)
+ {
+ if(doc.contain_paragraph(v) && doc.get_paragraph_length(v) > 1)
+ {
+ for_all(q)
+ {
+ if(
+ doc.contain_paragraph(q) &&
+ doc.get_paragraph_length(q) == 1 &&
+ !doc.same_paragraph(q, v) &&
+ doc.paragraph_included_influence(v, q) &&
+ doc.allign_size_height_line(v, q))
+ {
+ if(doc.line_influence_reciprocal(q, v))
+ doc.add_to_paragraph_link(v,q);
+ }
+ }
+ }
+ }
+ doc.propage_paragraph_link();
+ }
+
+ template<typename L, typename F, typename D>
+ void clean_lines_space(mymln::document::document<L,F,D>& doc, std::string dgb_out,image2d<bool> s)
+ {
+ image2d<value::rgb8> out;
+ mln::initialize(out, s);
+ typedef vertex_image<point2d,bool> v_ima_g;
+ typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p;
+ v_ima_g mask = doc.fun_mask_letters();
+ mln_piter_(v_ima_g) v(mask.domain());
+ typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t;
+ nbh_t nbh(mask);
+ mln_niter_(nbh_t) q(nbh, v);
+ for_all(v)
+ {
+ for_all(q)
+ {
+ if(doc.same_line(q, v))
+ {
+ draw::line(out, q,v, mln::literal::red);
+ if(doc.in_beginning_of_line(q) || doc.in_end_of_line(q))
+ {
+ draw::line(out, q,v, mln::literal::green);
+ if(doc.space(q, v) > doc.get_letter_middle_space(q) * 10)
+ {
+ draw::line(out, q,v, mln::literal::blue);
+ /*if(doc[q] == doc.get_beginning_of_line(q))
+ doc.add_to_line_link(v, q); */
+ //doc.add_noise(q);
+ }
+ }
+ }
+ }
+ }
+ doc.propage_line_link();
+ io::ppm::save(mln::debug::superpose(out, s, literal::white),dgb_out);
+ }
+
+
+ template<typename L, typename F, typename D>
+ void clean_included_letters(mymln::document::document<L,F,D>& doc)
+ {
+
+ typedef vertex_image<point2d,bool> v_ima_g;
+ typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p;
+ v_ima_g mask = doc.fun_mask_all_letters();
+ mln_piter_(v_ima_g) v(mask.domain());
+ typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t;
+ nbh_t nbh(mask);
+ mln_niter_(nbh_t) q(nbh, v);
+ for_all(v)
+ {
+ for_all(q)
+ {
+ if(doc.letter_included(q, v) && doc.allign_size_large_inside(q, v))
+ {
+
+
+ doc.merge(q,v);
+ }
+ }
+ }
+ doc.propage_line_link();
+
}
+
+
template<typename L, typename F, typename D>
- void remove_alone_letter(mymln::document::document<L,F,D>& doc)
+ void clean_backward_letters(mymln::document::document<L,F,D>& doc)
{
- typedef vertex_image<point2d,bool> v_ima_g;
- typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p;
- v_ima_g mask = doc.fun_mask_alone_letters();
- mln_piter_(v_ima_g) v(mask.domain());
- typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t;
- nbh_t nbh(mask);
- mln_niter_(nbh_t) q(nbh, v);
- for_all(v)
- {
- for_all(q)
- {
- if(doc.in_header(q) || doc.in_footer(q)){continue;}
- doc.add_noise(q);
- }
- }
+ typedef vertex_image<point2d,bool> v_ima_g;
+ typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p;
+ v_ima_g mask = doc.fun_mask_start_lines();
+ mln_piter_(v_ima_g) v(mask.domain());
+ typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t;
+ nbh_t nbh(mask);
+ mln_niter_(nbh_t) q(nbh, v);
+ for_all(v)
+ {
+ if(!doc.contain_line(v))
+ {
+ for_all(q)
+ {
+ if(
+ doc.contain_line(q) &&
+ doc.get_line_length(q)> 5 &&
+ doc.allign_V(v, q) &&
+ doc.allign_proximity(v, q) &&
+ doc.allign_size_height(v, q)
+ )
+ {
+ doc.add_to_line_link(q,v);
+ doc.debug_draw_line_green_buffer(v, q);
+ }
+ }
+ }
+ }
+ doc.propage_line_link();
}
-
-
+
+ template<typename L, typename F, typename D>
+ void clean_paragraphs_tab(mymln::document::document<L,F,D>& doc)
+ {
+ typedef vertex_image<point2d,bool> v_ima_g;
+ typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p;
+ v_ima_g mask = doc.fun_mask_all_letters();
+ mln_piter_(v_ima_g) v(mask.domain());
+ typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t;
+ nbh_t nbh(mask);
+ mln_niter_(nbh_t) q(nbh, v);
+ for_all(v)
+ {
+ if(doc.contain_paragraph(v))
+ {
+ for_all(q)
+ {
+ if(
+ doc.contain_paragraph(q) &&
+ !doc.same_paragraph(q,v) &&
+ !doc.paragraph_start_with_tab(q) &&
+ doc.allign_top_paragraph(q, v) &&
+ doc.get_paragraph_length(q) > 1 &&
+ doc.get_first_line_ID(doc[q]) == doc.get_line_ID(doc[q]) &&
+ doc.allign_size_width_paragraph(q,v) &&
+ doc.allign_proximity_paragraph_up(q,v) &&
+ doc.allign_H_paragraph(q,v)
+ )
+ {
+ doc.add_to_paragraph_link(q,v);
+ doc.debug_draw_line_green_buffer(v, q);
+ }
+ }
+ }
+ }
+ doc.propage_paragraph_link();
+ }
+
}
}
diff --git a/scribo/sandbox/raphael/code/my/document/document.hh b/scribo/sandbox/raphael/code/my/document/document.hh
index 1a84edb..880d4c5 100644
--- a/scribo/sandbox/raphael/code/my/document/document.hh
+++ b/scribo/sandbox/raphael/code/my/document/document.hh
@@ -40,7 +40,7 @@ namespace mymln
// WARNING: Methods on lines like get_end_of_line can be used only after a line cooking.
-
+ document(){}
document(image2d<Label>& ima, image2d<Label>& ima_influ,mln::util::array<box2d>& bboxgp, g_vertices_p& area_graph, Label Areas)
{
img = ima;
@@ -56,17 +56,19 @@ namespace mymln
alone_letters_mask = fun::i2v::array<bool>(Areas + 1);
implicit_separators_left_mask = fun::i2v::array<bool>(Areas + 1);
implicit_separators_right_mask = fun::i2v::array<bool>(Areas + 1);
+ kill_mask = fun::i2v::array<bool>(Areas + 1);
CImpSep = 1;
NImpSep = 2;
lines_union = mymln::util::union_find<Label>(Areas + 1);
implicit_separators_union = mymln::util::union_find<Label>(Areas + 1);
-
+ debug_buffer_enable = false;
paragraphs_union = mymln::util::union_find<Label>(Areas + 1);
tag_lbl = mln::util::array<std::string>(Areas + 1);
+ Btag_lbl = mln::util::array<bool>(Areas + 1);
lines_split = mln::util::array<Label>(Areas + 1);
lines_split.fill(0);
-
+ tag_lbl.fill("");
img_influ = ima_influ;
CSep = 0;
CSepH = 0;
@@ -79,6 +81,8 @@ namespace mymln
Areas_Number_ = Areas + 1;
}
+ inline unsigned int count()
+ {return Areas_Number_;}
/* OPERATION ON PAGE */
inline bool in_header(const point2d& p)
{ return p[0] < (img_influ.domain().len(0) / 8);}
@@ -93,16 +97,28 @@ namespace mymln
/* OPERATION ON PARAGRAPH */
inline bool link_paragraphs()
{
- for(unsigned int N = 1; N < Areas_Number_; N++)
- {
+ paragraphs_union[0] = 0;
+ for(unsigned int N = 1; N < Areas_Number_; N++)
+ {
paragraphs_union.invalidate_link(N);
- if(start_lines_mask(N))
+ if (!contain_paragraph(N) && contain_line(N))
{
- paragraphs_union.add_self_link(N);
+ jump_to_paragraph(N);
+ if(start_lines_mask(N))
+ {
+ add_to_paragraph(N);
+ paragraphs_union.add_self_link(N);
+ }
}
- else if(contain_line(N))
+ }
+ for(unsigned int N = 1; N < Areas_Number_; N++)
+ {
+
+ if (!contain_paragraph(N) && contain_line(N))
{
- if(get_beginning_of_line(N) == 0){std::cout <<"ERROR#\n";}
+ jump_to_paragraph(get_beginning_of_line(N));
+ add_to_paragraph(N);
+ if(contain_line(N) && get_beginning_of_line(N) != 0)
paragraphs_union.add_link(get_beginning_of_line(N), N);
}
}
@@ -200,12 +216,14 @@ namespace mymln
inline void cook_line_splitting_exclusive()
{
+ std::cout << "--> start union exclusive" << std::endl;
for(unsigned int N = 1; N < Areas_Number_; N++)
{
lines_union.invalidate_link(N);
if(end_lines_mask(N) || implicit_separators_right_mask(N))
split_line_exclusive(N);
}
+ std::cout << "--> start linking" << std::endl;
for(unsigned int N = 1; N < Areas_Number_; N++)
{
if(lines_union.is_self_link(N))
@@ -221,17 +239,19 @@ namespace mymln
}
lines_union[0] = 0;
lines_union.invalidate_link(0);
+ std::cout << "--> propage union " << std::endl;
for(unsigned int N = 1; N < Areas_Number_; N++)
{
if(!contain_line(N) || lines_union.is_self_link(N))
continue;
Label pos = get_end_of_line(N);
- while(lines_split[pos] && _bboxgp[lines_split[pos]].pmin()[1] > _bboxgp[N].pmin()[1])
- pos = lines_split[pos];
- if(pos != 0)
+ Label oldpos = pos;
+ while(lines_split[pos] && _bboxgp[lines_split[pos]].pmin()[1] >= _bboxgp[N].pmin()[1])
+ {oldpos = pos; pos = lines_split[pos];}
+ if(pos != 0 && pos != N && pos < Areas_Number_ )
{lines_union[N] = lines_union[pos]; lines_union.add_link(pos,N);}
}
-
+ std::cout << "--> end propage union " << std::endl;
//lines_union.propage_links();lines_union
cook_lines();
}
@@ -266,10 +286,9 @@ namespace mymln
Label pos = get_beginning_of_line(N);
while(lines_split[pos] && _bboxgp[lines_split[pos]].pmin()[1] < _bboxgp[N].pmin()[1])
pos = lines_split[pos];
- if(pos != 0)
+ if(pos != 0 && pos != N && pos < Areas_Number_ )
{lines_union[N] = lines_union[pos]; lines_union.add_link(pos,N);}
}
-
//lines_union.propage_links();lines_union
cook_lines();
}
@@ -285,6 +304,46 @@ namespace mymln
{ return same_line(img_influ(A), img_influ(B)); }
inline bool same_line(const Label A, const Label B)
{ return lines_union[A] == lines_union[B]; }
+ inline bool same_paragraph(const point2d& A, const point2d& B)
+ { return same_paragraph(img_influ(A), img_influ(B)); }
+ inline bool same_paragraph(const Label A, const Label B)
+ { return paragraphs_union[A] == paragraphs_union[B]; }
+
+ inline bool in_beginning_of_line(const point2d& A)
+ {return in_beginning_of_line(img_influ(A));}
+ inline bool in_beginning_of_line(const Label A)
+ {return lines_bbox[lines_union[A]].len(1) / 8 + lines_bbox[lines_union[A]].pmin()[1] > _bboxgp[A].pmax()[1];}
+
+
+ inline bool is_line_representative(const point2d& A)
+ {return is_line_representative(img_influ(A));}
+ inline bool is_line_representative(const Label A)
+ {return lines_bbox[lines_union[A]].len(0) < _bboxgp[A].len(0) * 2 ;}
+
+
+ inline bool in_end_of_line(const point2d& A)
+ {return in_end_of_line(img_influ(A));}
+ inline bool in_end_of_line(const Label A)
+ {return lines_bbox[lines_union[A]].pmax()[1] - lines_bbox[lines_union[A]].len(1) / 8 < _bboxgp[A].pmax()[1];}
+
+ inline unsigned int space(const point2d& A,const point2d& B)
+ {return space(img_influ(A), img_influ(B));}
+ inline unsigned int space(const Label A, const Label B)
+ {
+ box2d LB = _bboxgp[A];
+ box2d RB = _bboxgp[B];
+
+ int DisA = LB.pmax()[1] - RB.pmin()[1];
+ int DisB = RB.pmax()[1] - LB.pmin()[1];
+ if(DisA < 0){DisA = -DisA;}
+ if(DisB < 0){DisB = -DisB;}
+ if(DisA > DisB)
+ { DisA = DisB; }
+ return DisA;
+ }
+
+
+
inline void add_new_line(const point2d& point)
{ add_new_line(img_influ(point)); }
@@ -351,6 +410,7 @@ namespace mymln
/* SET UP SPECIAL MASK TO FALSE */
implicit_separators_left_mask(lbl) = false;
implicit_separators_right_mask(lbl) = false;
+ kill_mask(lbl) = false;
}
void inline invalid_letter(const point2d& point)
{invalid_letter(img_influ(point));}
@@ -400,8 +460,9 @@ namespace mymln
void add_letter(const Label lbl)
{
CLet++;
- if(label_valid_size_Min_(lbl, 2))
+ if(label_valid_size_Min_(lbl, 3) || label_valid_size_Min_Large_(lbl, 2))
{
+ img_influ(_bboxgp[lbl].pcenter()) = lbl;
letters_mask(lbl) = true;
all_letters_mask(lbl) = true;
separators_mask(lbl) = false;
@@ -519,6 +580,15 @@ namespace mymln
inline bool allign_top( const point2d& Left, const point2d& Right)
{return allign_top(img_influ(Left), img_influ(Right));}
+ inline bool allign_top_line( const point2d& Left, const point2d& Right)
+ {return allign_top_line(img_influ(Left), img_influ(Right));}
+
+ inline bool allign_top_line( const Label Left, const Label Right)
+ {
+ short int allignV = lines_bbox[lines_union[Left]].pcenter()[0] - lines_bbox[lines_union[Right]].pcenter()[0];
+ return (!allignV < 0) && allignV * 2 > lines_bbox[lines_union[Left]].len(0);
+ }
+
inline bool allign_top( const Label Left, const Label Right)
{
short int allignV = label_allign_(0, Left, Right);
@@ -535,6 +605,10 @@ namespace mymln
return allignV < label_size_(0, Left) && (_bboxgp[Left].pcenter()[0]) > (_bboxgp[Right].pcenter()[0]);
}
+
+
+
+
inline bool allign_up_line_line( const point2d& Left, const point2d& Right)
{return allign_up_line_line(img_influ(Left), img_influ(Right));}
@@ -548,10 +622,30 @@ namespace mymln
(lines_bbox[lines_union[Left]].pcenter()[0]) > (lines_bbox[lines_union[Left]].pcenter()[0]);
}
- inline bool allign_H_Large( const point2d& Left, const point2d& Right)
- {return allign_H_Large(img_influ(Left), img_influ(Right));}
+ inline bool allign_left( const point2d& Left, const point2d& Right)
+ {return allign_left(img_influ(Left), img_influ(Right));}
+
+ inline bool allign_left( const Label Left, const Label Right)
+ {
+
+ return _bboxgp[Left].pmin()[0] > _bboxgp[Right].pmin()[0];
+ }
+
+ inline bool allign_right( const point2d& Left, const point2d& Right)
+ {return allign_right(img_influ(Left), img_influ(Right));}
+
+ inline bool allign_right( const Label Left, const Label Right)
+ {
+
+ return _bboxgp[Left].pmin()[0] < _bboxgp[Right].pmin()[0];
+ }
+
+
- inline bool allign_H_Large( const Label Left, const Label Right)
+ inline bool allign_H_large( const point2d& Left, const point2d& Right)
+ {return allign_H_large(img_influ(Left), img_influ(Right));}
+
+ inline bool allign_H_large( const Label Left, const Label Right)
{
short int allignV = label_allign_(1, Left, Right) * 1.5f;
return allignV < label_size_(1, Left);
@@ -610,20 +704,80 @@ namespace mymln
inline bool allign_proximity( const Label Left, const Label Right)
{
- short int SizeL0 = label_size_(0, Left);
- short int SizeL1 = label_size_(1, Left);
- short int Swap = 0;
- if(SizeL0 < SizeL1)
- { SizeL0 = SizeL1; }
- short int Dis = _bboxgp[Left].pmin()[1] - _bboxgp[Right].pmin()[1];
- if(Dis < 0)
- Dis = -Dis;
- return Dis < SizeL0 * 1.5f;
+ box2d LB = _bboxgp[Left];
+ box2d RB = _bboxgp[Right];
+
+ int DisA = LB.pmax()[1] - RB.pmin()[1];
+ int DisB = RB.pmax()[1] - LB.pmin()[1];
+ if(DisA < 0){DisA = -DisA;}
+ if(DisB < 0){DisB = -DisB;}
+ if(DisA > DisB)
+ { DisA = DisB; }
+
+ unsigned int HA = LB.len(0);
+ unsigned int HB = LB.len(1);
+
+ if(HB > HA)
+ { HA = HB; }
+ return (DisA * 2) < HA * 3;
}
- inline bool allign_proximity_line( const point2d& Left, const point2d& Right)
- {return allign_proximity_line(img_influ(Left), img_influ(Right));}
+
+
+
+ inline bool allign_proximity_strict( const point2d& Left, const point2d& Right)
+ {return allign_proximity_strict(img_influ(Left), img_influ(Right));}
+
+ inline bool allign_proximity_strict( const Label Left, const Label Right)
+ {
+ box2d LB = _bboxgp[Left];
+ box2d RB = _bboxgp[Right];
+
+ int DisA = LB.pmax()[1] - RB.pmin()[1];
+ int DisB = RB.pmax()[1] - LB.pmin()[1];
+ if(DisA < 0){DisA = -DisA;}
+ if(DisB < 0){DisB = -DisB;}
+ if(DisA > DisB)
+ { DisA = DisB; }
+
+ unsigned int HA = LB.len(0);
+ unsigned int HB = RB.len(0);
+ unsigned int VA = LB.len(1);
+ unsigned int VB = RB.len(1);
+
+ if(VA > HA)
+ { HA = VA; }
+ if(VB > HB)
+ { HB = VB; }
+ return (DisA * 2) < HA && (DisA * 2) < HB;
+ }
+
+
+ inline bool allign_proximity_paragraph_up( const point2d& Left, const point2d& Right)
+ {return allign_proximity_paragraph_up(img_influ(Left), img_influ(Right));}
+
+ inline bool allign_proximity_paragraph_up( const Label Left, const Label Right)
+ {
+ box2d LB = paragraphs_bbox[paragraphs_union[Left]];
+ box2d RB = paragraphs_bbox[paragraphs_union[Right]];
+
+ int DisA = LB.pmax()[0] - RB.pmin()[0];
+ int DisB = RB.pmax()[0] - LB.pmin()[0];
+ if(DisA < 0){DisA = -DisA;}
+ if(DisB < 0){DisB = -DisB;}
+ if(DisA > DisB)
+ { DisA = DisB; }
+
+ unsigned int HA = lines_bbox[paragraphs_first_line[paragraphs_union[Left]]].len(0);
+ unsigned int HB = lines_bbox[paragraphs_first_line[paragraphs_union[Right]]].len(0);
+ if(HA < HB)
+ { HA = HB; }
+ return (DisA * 5) < HA;
+ }
+
+ inline bool allign_proximity_line_large( const point2d& Left, const point2d& Right)
+ {return allign_proximity_line_large(img_influ(Left), img_influ(Right));}
inline bool allign_size_height_line( const point2d& Left, const point2d& Right)
{
@@ -637,6 +791,61 @@ namespace mymln
return SizeR > (SizeL / 2.2f) && SizeR < (SizeL * 2.2);
}
+ inline bool allign_size_height_line_strict( const point2d& Left, const point2d& Right)
+ {
+ return allign_size_height_line_strict(img_influ(Left), img_influ(Right));
+ }
+
+ inline bool allign_size_height_line_strict( const Label Left, const Label Right)
+ {
+ short int SizeL = lines_bbox[lines_union[Left]].len(0);
+ short int SizeR = lines_bbox[lines_union[Right]].len(0);
+ return SizeR > (SizeL / 1.3f) && SizeR < (SizeL * 1.3);
+ }
+
+ inline bool allign_proximity_line( const point2d& Left, const point2d& Right)
+ {return allign_proximity_line(img_influ(Left), img_influ(Right));}
+
+ inline bool allign_size_width_large( const point2d& Left, const point2d& Right)
+ {
+ return allign_size_width_large(img_influ(Left), img_influ(Right));
+ }
+
+ inline bool allign_size_width_large( const Label Left, const Label Right)
+ {
+ short int SizeL = _bboxgp[lines_union[Left]].len(1);
+ short int SizeR = _bboxgp[lines_union[Right]].len(1);
+ return SizeR >= (SizeL / 5) && SizeR <= (SizeL * 5);
+ }
+
+
+
+
+ inline bool allign_size_width_line( const point2d& Left, const point2d& Right)
+ {
+ return allign_size_width_line(img_influ(Left), img_influ(Right));
+ }
+
+ inline bool allign_size_width_line( const Label Left, const Label Right)
+ {
+ short int SizeL = lines_bbox[lines_union[Left]].len(1);
+ short int SizeR = lines_bbox[lines_union[Right]].len(1);
+ return SizeR > (SizeL / 4) && SizeR < (SizeL * 4);
+ }
+
+ inline bool allign_size_width_paragraph( const point2d& Left, const point2d& Right)
+ {
+ return allign_size_width_paragraph(img_influ(Left), img_influ(Right));
+ }
+
+ inline bool allign_size_width_paragraph( const Label Left, const Label Right)
+ {
+ short int SizeL = paragraphs_bbox[paragraphs_union[Left]].len(1);
+ short int SizeR = paragraphs_bbox[paragraphs_union[Right]].len(1);
+ return SizeR > ((SizeL * 2) / 3) && SizeR < ((SizeL * 3) / 2);
+ }
+
+
inline bool allign_proximity_line( const Label Left, const Label Right)
{
box2d LB = lines_bbox[lines_union[Left]];
@@ -658,6 +867,27 @@ namespace mymln
}
+ inline bool allign_proximity_line_large( const Label Left, const Label Right)
+ {
+ box2d LB = lines_bbox[lines_union[Left]];
+ box2d RB = lines_bbox[lines_union[Right]];
+
+ int DisA = LB.pmax()[1] - RB.pmin()[1];
+ int DisB = RB.pmax()[1] - LB.pmin()[1];
+ if(DisA < 0){DisA = -DisA;}
+ if(DisB < 0){DisB = -DisB;}
+ if(DisA > DisB)
+ { DisA = DisB; }
+
+ unsigned int HA = LB.len(0);
+ unsigned int HB = RB.len(0);
+
+ if(HA < HB)
+ { HA = HB; }
+ return (DisA * 2) < HA;
+ }
+
+
inline bool allign_proximity_V_line( const point2d& Left, const point2d& Right)
{return allign_proximity_V_line(img_influ(Left), img_influ(Right));}
@@ -680,21 +910,93 @@ namespace mymln
{ HA = HB; }
return (DisA * 1.5f) < HA;
}
+
+ inline bool allign_H_paragraph( const point2d& Left, const point2d& Right)
+ {return allign_H_paragraph(img_influ(Left), img_influ(Right));}
+ inline bool allign_H_paragraph( const Label Left, const Label Right )
+ {
+ short int Dis = paragraphs_bbox[paragraphs_union[Left]].pcenter()[1] - paragraphs_bbox[paragraphs_union[Right]].pcenter()[1];
+ if(Dis < 0){Dis = -Dis;}
+ return
+ Dis * 2 < paragraphs_bbox[paragraphs_union[Right]].len(1) &&
+ Dis * 2 < paragraphs_bbox[paragraphs_union[Left]].len(1);
+ }
+
+ inline bool allign_top_paragraph( const point2d& Left, const point2d& Right)
+ {return allign_top_paragraph(img_influ(Left), img_influ(Right));}
+ inline bool allign_top_paragraph( const Label Left, const Label Right )
+ {return paragraphs_bbox[paragraphs_union[Left]].pmin()[0] > paragraphs_bbox[paragraphs_union[Right]].pmax()[0]; }
+
+ inline bool decal_left_paragraph(const point2d& Left, const point2d& Right)
+ {return decal_left_paragraph(img_influ(Left), img_influ(Right));}
+ inline bool decal_left_paragraph( const Label Left, const Label Right )
+ {
+ return paragraphs_bbox[paragraphs_union[Left]].pmin()[1] > paragraphs_bbox[paragraphs_union[Right]].pmin()[1]
+ + (paragraphs_bbox[paragraphs_union[Right]].len(1) / 40) ;
+ }
+
+ inline bool decal_left_paragraph_strong(const point2d& Left, const point2d& Right)
+ {return decal_left_paragraph_strong(img_influ(Left), img_influ(Right));}
+ inline bool decal_left_paragraph_strong( const Label Left, const Label Right )
+ {
+ return paragraphs_bbox[paragraphs_union[Left]].pmin()[1] > paragraphs_bbox[paragraphs_union[Right]].pmin()[1]
+ + (paragraphs_bbox[paragraphs_union[Right]].len(1) / 20) ;
+ }
+
+
+ inline bool allign_proximity_large_left( const point2d& Left, const point2d& Right)
+ {return allign_proximity_large_left(img_influ(Left), img_influ(Right));}
+ inline bool allign_proximity_large_left( const Label Left, const Label Right)
+ {
+ box2d LB = _bboxgp[Left];
+ box2d RB = _bboxgp[Right];
+
+ int DisA = LB.pmax()[1] - RB.pmin()[1];
+ int DisB = RB.pmax()[1] - LB.pmin()[1];
+ if(DisA < 0){DisA = -DisA;}
+ if(DisB < 0){DisB = -DisB;}
+ if(DisA > DisB)
+ { DisA = DisB; }
+
+ unsigned int HA = LB.len(0);
+ unsigned int HB = RB.len(0);
+ unsigned int VA = LB.len(1);
+ unsigned int VB = RB.len(1);
+
+ if(VA > HA)
+ { HA = VA; }
+ if(VB > HB)
+ { HB = VB; }
+ return (DisA) < HA * 2;
+ }
+
+
inline bool allign_proximity_large( const point2d& Left, const point2d& Right)
{return allign_proximity_large(img_influ(Left), img_influ(Right));}
inline bool allign_proximity_large( const Label Left, const Label Right)
{
- short int SizeL0 = label_size_(0, Left);
- short int SizeL1 = label_size_(1, Left);
- short int Swap = 0;
- if(SizeL0 < SizeL1)
- { SizeL0 = SizeL1; }
- short int Dis = _bboxgp[Left].pmin()[1] - _bboxgp[Right].pmin()[1];
- if(Dis < 0)
- Dis = -Dis;
- return Dis < SizeL0 * 3;
+ box2d LB = _bboxgp[Left];
+ box2d RB = _bboxgp[Right];
+
+ int DisA = LB.pmax()[1] - RB.pmin()[1];
+ int DisB = RB.pmax()[1] - LB.pmin()[1];
+ if(DisA < 0){DisA = -DisA;}
+ if(DisB < 0){DisB = -DisB;}
+ if(DisA > DisB)
+ { DisA = DisB; }
+
+ unsigned int HA = LB.len(0);
+ unsigned int HB = RB.len(0);
+ unsigned int VA = LB.len(1);
+ unsigned int VB = RB.len(1);
+
+ if(VA > HA)
+ { HA = VA; }
+ if(VB > HB)
+ { HB = VB; }
+ return (DisA) < HA * 2 && (DisA) < HB * 2;
}
@@ -705,7 +1007,23 @@ namespace mymln
short int SizeR = label_size_(0, Right);
return SizeR > (SizeL / 3) && SizeR < (SizeL * 3);
}
+
+ inline bool allign_size_strict( const point2d& Left, const point2d& Right)
+ {return allign_size(img_influ(Left), img_influ(Right));}
+ inline bool allign_size_strict( const Label Left, const Label Right)
+ {
+ short int SizeL0 = label_size_(0, Left);
+ short int SizeR0 = label_size_(0, Right);
+ short int SizeL1 = label_size_(1, Left);
+ short int SizeR1 = label_size_(1, Right);
+ short int Swap = 0;
+ if(SizeL0 < SizeL1)
+ { SizeL0 = SizeL1; }
+ if(SizeR0 < SizeR1){SizeR0 = SizeR1;}
+ return SizeR0 > (SizeL0 / 2) && SizeR0 < (SizeL0 * 2);
+ }
+
inline bool allign_size( const point2d& Left, const point2d& Right)
{return allign_size(img_influ(Left), img_influ(Right));}
@@ -754,6 +1072,18 @@ namespace mymln
return allignV < lines_bbox[lines_union[Left]].len(0) && allignV < lines_bbox[lines_union[Right]].len(0);
}
+ inline bool allign_V_line_strict( const point2d& Left, const point2d& Right)
+ {return allign_V_line_strict(img_influ(Left), img_influ(Right));}
+
+ inline bool allign_V_line_strict( Label Left, Label Right)
+ {
+ short int allignV = lines_bbox[lines_union[Left]].pcenter()[0] - lines_bbox[lines_union[Right]].pcenter()[0];
+ if(allignV<0){allignV = -allignV;}
+ allignV *= 4;
+ return allignV < lines_bbox[lines_union[Left]].len(0) && allignV < lines_bbox[lines_union[Right]].len(0);
+ }
+
+
inline bool allign_center_line( const point2d& Left, const point2d& Right)
{return allign_center_line(img_influ(Left), img_influ(Right));}
inline bool allign_center_line( Label Left, Label Right)
@@ -762,6 +1092,7 @@ namespace mymln
if(allignC<0){allignC = -allignC;}
return allignC * 5 < lines_bbox[lines_union[Left]].len(0);
}
+
inline bool allign_smaller_line( const point2d& Left, const point2d& Right)
{return allign_smaller_line(img_influ(Left), img_influ(Right));}
@@ -770,6 +1101,12 @@ namespace mymln
return lines_bbox[lines_union[Left]].len(0) > (lines_bbox[lines_union[Right]].len(0) * 2);
}
+ inline bool allign_smaller_line_letter( const point2d& Left, const point2d& Right)
+ {return allign_smaller_line_letter(img_influ(Left), img_influ(Right));}
+ inline bool allign_smaller_line_letter( Label Left, Label Right)
+ {
+ return lines_bbox[lines_union[Left]].len(0) > (_bboxgp[Right].len(0) * 1.5f);
+ }
inline bool allign_V_large( const point2d& Left, const point2d& Right)
{return allign_V_large(img_influ(Left), img_influ(Right));}
@@ -801,6 +1138,18 @@ namespace mymln
allignV < lines_bbox[lines_union[Left]].len(0) &&
lines_bbox[lines_union[Left]].pcenter()[0] < lines_bbox[lines_union[Right]].pcenter()[0];
}
+ inline bool allign_base_line_line_strict(const point2d& Left, const point2d& Right)
+ {return allign_base_line_line_strict(img_influ(Left), img_influ(Right));}
+ inline bool allign_base_line_line_strict(const Label Left, const Label Right)
+ {
+ short int allignV = lines_bbox[lines_union[Left]].pcenter()[0] - _bboxgp[Right].pcenter()[0];
+ if(allignV<0){allignV = -allignV;}
+ allignV *= 3;
+ return
+ allignV < lines_bbox[lines_union[Left]].len(0) &&
+ lines_bbox[lines_union[Left]].pcenter()[0] < lines_bbox[lines_union[Right]].pcenter()[0];
+ }
+
inline bool allign_bottom(const point2d& Left, const point2d& Right)
{return allign_bottom(img_influ(Left), img_influ(Right));}
inline bool allign_bottom(const Label Left, const Label Right)
@@ -832,7 +1181,15 @@ namespace mymln
Float AFactor = label_allign_(1, Left, Right);
return AFactor < label_size_(1,Left);
}
-
+ inline bool paragraph_start_with_tab(const point2d& Point)
+ { return paragraph_start_with_tab(img_influ(Point));}
+ inline bool paragraph_start_with_tab(const Label Paragraph)
+ {
+ Label FirstLine = paragraphs_first_line[paragraphs_union[Paragraph]];
+ return
+ _bboxgp[FirstLine].pmin()[1] > paragraphs_bbox[paragraphs_union[Paragraph]].pmin()[1] +
+ (paragraphs_bbox[paragraphs_union[Paragraph]].len(1) / 20);
+ }
void stat()
{
@@ -844,38 +1201,158 @@ namespace mymln
std::cout << " lines(s) : " << CLine << std::endl;
}
- void debug_save_all(std::string file, image2d<bool> source)
+ void debug_set_image(image2d<bool>& source)
+ {debug_source = source;}
+ inline void debug_create_buffer()
+ {
+ mln::initialize(debug_buffer,img_influ);
+ debug_buffer_enable = true;
+ }
+
+ inline void debug_save_buffer(std::string file)
+ {
+ debug_buffer_enable = false;
+ io::ppm::save(mln::debug::superpose(debug_buffer, debug_source, literal::white) , file);
+ }
+
+ inline void debug_draw_line_red_buffer(const point2d& A,const point2d& B )
+ {
+ if(debug_buffer_enable)
+ draw::line(debug_buffer, A, B, mln::literal::red);
+ }
+ inline void debug_draw_line_green_buffer(const point2d& A,const point2d& B )
+ {
+ if(debug_buffer_enable)
+ draw::line(debug_buffer, A, B, mln::literal::green);
+ }
+
+ void debug_save_union(std::string file)
+ {debug_save_union(file, debug_source);}
+ void debug_save_union(std::string file, image2d<bool> source)
{
image2d<value::rgb8> ima_color;
mln::initialize(ima_color,img_influ);
- for(unsigned int N = 0; N < lines_bbox.size(); N++)
+ for(unsigned int N = 1; N < lines_union.size(); N++)
{
- if(lines_bbox[N].is_valid())
+ if(lines_union[N])
{
- draw::box(ima_color, lines_bbox[N], mln::literal::blue);
+ if(N == lines_union.link(N))
+ draw::box(ima_color, _bboxgp[N], mln::literal::blue);
+ else
+ draw::line(ima_color, _bboxgp[N].pcenter(), _bboxgp[lines_union.link(N)].pcenter(), mln::literal::blue);
}
}
- for(unsigned int N = 0; N < paragraphs_bbox.size(); N++)
+ for(unsigned int N = 1; N < paragraphs_union.size(); N++)
{
- if(paragraphs_bbox[N].is_valid())
+ if(paragraphs_union[N])
{
- draw::box(ima_color, paragraphs_bbox[N], mln::literal::red);
+ if(N == paragraphs_union.link(N))
+ draw::box(ima_color, _bboxgp[N], mln::literal::red);
+ else
+ draw::line(ima_color, _bboxgp[N].pcenter(), _bboxgp[paragraphs_union.link(N)].pcenter(), mln::literal::red);
}
}
- for(unsigned int N = 0; N < lines_first_label.size(); N++)
+ io::ppm::save(mln::debug::superpose(ima_color, source, literal::white) , file);
+ }
+ void debug_save_lines(std::string file)
+ {debug_save_lines(file, debug_source);}
+ void debug_save_lines(std::string file, image2d<bool> source)
+ {
+ image2d<value::rgb8> ima_color;
+ mln::initialize(ima_color,img_influ);
+
+ for(unsigned int N = 0; N < lines_bbox.size(); N++)
+ {
+ if(lines_bbox[N].is_valid())
+ {
+ draw::box(ima_color, lines_bbox[N], mln::literal::blue);
+ }
+ }
+ for(unsigned int N = 0; N < _bboxgp.size(); N++)
+ {
+ if(_bboxgp[N].is_valid() && contain_letter(N))
+ {
+ draw::box(ima_color, _bboxgp[N], mln::literal::cyan);
+ }
+ }
+ io::ppm::save(mln::debug::superpose(ima_color, source, literal::white) , file);
+ }
+
+
+
+
+
+ void debug_save_all(std::string file)
+ {debug_save_all(file, debug_source);}
+ void debug_save_all(std::string file, image2d<bool> source)
+ {
+ image2d<value::rgb8> ima_color;
+ mln::initialize(ima_color,img_influ);
+
+ for(unsigned int N = 0; N < _bboxgp.size(); N++)
+ {
+ if(_bboxgp[N].is_valid() && contain_letter(N))
+ {
+ draw::box(ima_color, _bboxgp[N], mln::literal::cyan);
+ }
+ }
+
+ for(unsigned int N = 0; N < lines_first_label.size(); N++)
{
if(_bboxgp[lines_first_label[N]].is_valid())
{
draw::box(ima_color, _bboxgp[lines_first_label[N]], mln::literal::yellow);
}
}
+
+ for(unsigned int N = 0; N < lines_last_label.size(); N++)
+ {
+ if(_bboxgp[lines_first_label[N]].is_valid())
+ {
+ draw::box(ima_color, _bboxgp[lines_last_label[N]], mln::literal::orange);
+ }
+ }
+
+
+ for(unsigned int N = 0; N < lines_bbox.size(); N++)
+ {
+ if(lines_bbox[N].is_valid())
+ {
+ draw::box(ima_color, lines_bbox[N], mln::literal::blue);
+ }
+ }
+ for(unsigned int N = 0; N < lines_influ_bbox.size(); N++)
+ {
+ if(lines_influ_bbox[N].is_valid())
+ {
+ //draw::box(ima_color, lines_influ_bbox[N], mln::literal::cyan);
+ }
+ }
+ for(unsigned int N = 0; N < paragraphs_bbox.size(); N++)
+ {
+ if(paragraphs_bbox[N].is_valid())
+ {
+ draw::box(ima_color, paragraphs_bbox[N], mln::literal::red);
+ if(paragraphs_bbox_influ[N].is_valid())
+ {
+ draw::box(ima_color, paragraphs_bbox_influ[N], mln::literal::orange);
+ }
+ }
+
+ }
+ for(unsigned int N = 0; N < _bboxgp.size(); N++)
+ {
+ if(_bboxgp[N].is_valid() && (implicit_separators_left_mask(N) || implicit_separators_right_mask(N)))
+ {
+ // draw::box(ima_color, _bboxgp[N], mln::literal::yellow);
+ }
+ }
+
io::ppm::save(mln::debug::superpose(ima_color, source, literal::white) , file);
}
void debug_save_paragraphs(std::string file)
{ mymln::debug::save_label_image(img, paragraphs_union , file);}
- void debug_save_lines(std::string file)
- { mymln::debug::save_label_image(img, lines_union , file);}
void debug_save_separators(std::string file)
{ mymln::debug::save_label_image(img, implicit_separators_union , file);}
vertex_image<point2d,bool> fun_mask_separators()
@@ -944,10 +1421,29 @@ namespace mymln
mln::util::array<box2d> bbox_enlarge_mask_noise(short int x, short int y)
{ return bbox_mask_enlarge_(noise_mask, x, y); }
- Label get_label(point2d point)
+ Label get_label(const point2d& point)
{ return img_influ(point); }
+
+ inline box2d get_paragraph_bbox(const point2d& point)
+ { return get_paragraph_bbox(img_influ(point)); }
+
+ inline box2d get_paragraph_bbox(Label L)
+ { return paragraphs_bbox[paragraphs_union[L]]; }
+
+
+ inline box2d get_line_bbox(const point2d& point)
+ { return get_line_bbox(img_influ(point)); }
+
+ inline box2d get_line_bbox(Label L)
+ { return lines_bbox[lines_union[L]]; }
+
+ inline unsigned int get_paragraph_length(const point2d& point)
+ { return get_paragraph_length(img_influ(point)); }
- inline unsigned int get_line_length(point2d point)
+ inline unsigned int get_paragraph_length(Label L)
+ { return paragraphs_len[paragraphs_union[L]]; }
+
+ inline unsigned int get_line_length(const point2d& point)
{ return get_line_length(img_influ(point)); }
inline unsigned int get_line_length(Label L)
@@ -958,6 +1454,102 @@ namespace mymln
inline unsigned int get_line_width(Label L)
{ return lines_bbox[lines_union[L]].len(1); }
+
+
+
+ inline Float letter_ratio_XY(const point2d& point)
+ {return letter_ratio_XY(img_influ(point));}
+ inline Float letter_ratio_XY(Label Letter)
+ {
+ return (Float)_bboxgp[Letter].len(1) / (Float)_bboxgp[Letter].len(0);
+ }
+ inline bool line_median(const point2d& point)
+ { return line_median(img_influ(point)); }
+
+ inline bool line_median(Label Letter)
+ {
+ short int D = _bboxgp[Letter].pcenter()[0] - get_line_bbox(Letter).pcenter()[0];
+ if(D<0)D=-D;
+ return D * 3 < get_line_bbox(Letter).len(0);
+ }
+
+ inline bool line_size_small(const point2d& point)
+ { return line_size_small(img_influ(point)); }
+ inline bool line_size_small(Label Letter)
+ {
+ return _bboxgp[Letter].len(0) * 3 < get_line_bbox(Letter).len(0);
+ }
+
+ inline bool line_base(const point2d& point)
+ { return line_base(img_influ(point)); }
+ inline bool line_base(Label Letter)
+ {
+ short int D = _bboxgp[Letter].pcenter()[0] - get_line_bbox(Letter).pcenter()[0];
+ if(D<0)D=-D;
+ return
+ D * 2 < get_line_bbox(Letter).len(0) &&
+ get_line_bbox(Letter).pcenter()[0] + (get_line_bbox(Letter).len(0) / 5) < _bboxgp[Letter].pcenter()[0];
+ }
+
+ inline bool letter_included(point2d Par1, point2d Par2)
+ { return letter_included(img_influ(Par1), img_influ(Par2)); }
+ inline bool letter_included(Label Par1, Label Par2)
+ {
+ return
+ _bboxgp[Par1].has(_bboxgp[Par2].pmin()) &&
+ _bboxgp[Par1].has(_bboxgp[Par2].pmax()) ;
+ }
+
+ inline bool paragraph_included_influence(point2d Par1, point2d Par2)
+ { return paragraph_included_influence(img_influ(Par1), img_influ(Par2)); }
+ inline bool paragraph_included_influence(Label Par1, Label Par2)
+ {
+ return
+ paragraphs_bbox_influ[paragraphs_union[Par1]].has(paragraphs_bbox[paragraphs_union[Par2]].pmin()) &&
+ paragraphs_bbox_influ[paragraphs_union[Par1]].has(paragraphs_bbox[paragraphs_union[Par2]].pmax()) ;
+ }
+
+ inline bool paragraph_included(point2d Par1, point2d Par2)
+ { return paragraph_included(img_influ(Par1), img_influ(Par2)); }
+ inline bool paragraph_included(Label Par1, Label Par2)
+ {
+ return
+ paragraphs_bbox[paragraphs_union[Par1]].has(paragraphs_bbox[paragraphs_union[Par2]].pmin()) &&
+ paragraphs_bbox[paragraphs_union[Par1]].has(paragraphs_bbox[paragraphs_union[Par2]].pmax()) ;
+ }
+
+ inline bool line_influence_reciprocal(const point2d& L1, const point2d& L2)
+ {return line_influence_reciprocal(img_influ(L1), img_influ(L2));}
+
+ inline bool line_influence_reciprocal(Label L1, Label L2)
+ {
+ return
+ lines_influ_bbox[lines_union[L1]].has(lines_influ_bbox[lines_union[L2]].pmin()) ||
+ lines_influ_bbox[lines_union[L1]].has(lines_influ_bbox[lines_union[L2]].pmax()) ||
+ lines_influ_bbox[lines_union[L2]].has(lines_influ_bbox[lines_union[L1]].pmin()) ||
+ lines_influ_bbox[lines_union[L2]].has(lines_influ_bbox[lines_union[L1]].pmax()) ;
+ }
+
+ inline bool allign_size_large_inside( const point2d& Left, const point2d& Right)
+ {return allign_size_large_inside(img_influ(Left), img_influ(Right));}
+
+ inline bool allign_size_large_inside( const Label Left, const Label Right)
+ {
+ short int SizeL0 = label_size_(0, Left);
+ short int SizeR0 = label_size_(0, Right);
+ short int SizeL1 = label_size_(1, Left);
+ short int SizeR1 = label_size_(1, Right);
+ short int Swap = 0;
+ if(SizeL0 < SizeL1)
+ { SizeL0 = SizeL1; }
+ if(SizeR0 < SizeR1){SizeR0 = SizeR1;}
+ return SizeR0 > (SizeL0 / 5) && SizeR0 < (SizeL0);
+ }
+ inline bool paragraph_has(point2d Par, point2d Point)
+ { return paragraph_has(img_influ(Par), Point); }
+
+ inline bool paragraph_has(Label Par, point2d Point)
+ { return paragraph_has[paragraphs_union[Par]].has(Point); }
inline bool line_has(point2d Line, point2d Point)
{ return line_has(img_influ(Line), Point); }
@@ -965,6 +1557,13 @@ namespace mymln
inline bool line_has(Label Line, point2d Point)
{ return lines_bbox[lines_union[Line]].has(Point); }
+ inline bool line_influence_has(point2d Line, point2d Point)
+ { return line_influence_has(img_influ(Line), Point); }
+
+ inline bool line_influence_has(Label Line, point2d Point)
+ { return lines_influ_bbox[lines_union[Line]].has(Point); }
+
+
inline unsigned int get_beginning_of_line(point2d point)
{ return get_beginning_of_line(img_influ(point)); }
@@ -991,6 +1590,7 @@ namespace mymln
lines_first_label.fill(0);
lines_last_label.fill(0);
lines_len.fill(0);
+
start_lines_mask(0) = false;
end_lines_mask(0) = false;
@@ -1007,6 +1607,7 @@ namespace mymln
end_lines_mask = fun::i2v::array<bool>(Areas_Number_);
start_end_lines_mask = fun::i2v::array<bool>(Areas_Number_);
lines_bbox = mln::util::array<box2d>(NLine + 1);
+ lines_influ_bbox = mln::util::array<box2d>(NLine + 1);
lines_len.fill(0);
start_lines_mask(0) = false;
end_lines_mask(0) = false;
@@ -1076,7 +1677,68 @@ namespace mymln
inline bool contain_implicit_separator(const Label lbl)
{return implicit_separators_union[lbl] != 0; }
+ inline void merge(const point2d& A, const point2d& B)
+ {
+ merge(img_influ(A), img_influ(B));
+ }
+ inline void merge(const Label A, const Label B)
+ {
+ if( A && B && !kill_mask(A) && !kill_mask(B) && A != B)
+ {
+ img_influ(_bboxgp[B].pcenter()) = A;
+ _bboxgp[A].merge(_bboxgp[B]);
+ _bboxgp[B] = box2d();
+ kill_mask(B) = true;
+ if(letters_mask(A) && letters_mask(B))
+ {
+ if(lines_union.is_self_link(B))
+ {
+ lines_union.add_self_link(A);
+ lines_union.add_link(A, B);
+ }
+ }
+ else if(alone_letters_mask(A) && letters_mask(B))
+ {
+ alone_letters_mask(A) = false;
+ letters_mask(A) = true;
+ all_letters_mask(A) = true;
+ if(lines_union.is_self_link(B))
+ {
+ lines_union.add_self_link(A);
+ lines_union.add_link(A, B);
+ }
+ }
+ else if(letters_mask(B))
+ {
+ add_letter_coerce(A);
+ lines_union.add_link(B, A);
+ }
+
+
+
+ implicit_separators_left_mask(B) = false;
+ implicit_separators_right_mask(B) = false;
+ noise_mask(B) = false;
+ alone_letters_mask(B) = false;
+ all_letters_mask(B) = false;
+ letters_mask(B) = false;
+ separators_mask(B) = false;
+ containers_mask(B) = false;
+ start_end_lines_mask(B) = false;
+ Hseparator_mask(B) = false;
+ Vseparator_mask(B) = false;
+ if(letters_mask(A) && start_lines_mask(B)){start_lines_mask(A) = true;}
+ if(letters_mask(A) && end_lines_mask(B)){end_lines_mask(A) = true;}
+ if(letters_mask(A) && start_end_lines_mask(B)){start_end_lines_mask(A) = true;}
+
+
+ start_lines_mask(B) = false;
+ end_lines_mask(B) = false;
+ start_end_lines_mask(B) = false;
+
+ }
+ }
inline void add_to_separator_left(const point2d& point)
@@ -1106,6 +1768,23 @@ namespace mymln
point2d p = _bboxgp[i].pcenter();
return p;
}
+ inline void reset_tag_bool()
+ {Btag_lbl.fill(false);}
+ inline void tag_label_bool(const point2d& point, bool tag)
+ { tag_label_bool(img_influ(point), tag);}
+ inline void tag_label_bool(Label lbl, bool tag)
+ {Btag_lbl[lbl] = tag;}
+
+ inline bool get_tag_bool(const point2d& point)
+ { return get_tag_bool(img_influ(point));}
+ inline bool get_tag_bool(Label lbl)
+ {return Btag_lbl[lbl];}
+
+ inline std::string get_tag(const point2d& point)
+ { return get_tag(img_influ(point));}
+ inline std::string get_tag(Label lbl)
+ {return tag_lbl[lbl];}
+
inline void tag_label(const point2d& point, std::string tag)
{ tag_label(img_influ(point), tag);}
inline void tag_label(Label lbl, std::string tag)
@@ -1125,26 +1804,140 @@ namespace mymln
inline void lines_iter_valid()
{ return SeqP < Areas_Number_; }
-
+ inline void recook_paragraphs()
+ {
+ paragraphs_len.fill(0);
+ cook_paragraphs_();
+ }
inline void cook_paragraphs()
{
paragraphs_bbox = mln::util::array<box2d>(NPar + 1);
+ paragraphs_len = mln::util::array<unsigned int>(NPar + 1);
+ paragraphs_first_line = mln::util::array<unsigned int>(NPar + 1);
+ paragraphs_bbox_influ = mln::util::array<box2d>(NPar + 1);
cook_paragraphs_();
}
+ inline void compute_letter_middle_space()
+ {
+ lines_space = mln::util::array<unsigned int>(NLine + 1);
+ lines_space.fill(0);
+ compute_letter_middle_space_();
+ }
+ inline void compute_letter_middle_height()
+ {
+ lines_height = mln::util::array<unsigned int>(NLine + 1);
+ lines_height.fill(0);
+ compute_letter_middle_height_();
+ }
+ inline void compute_letter_middle_width()
+ {
+ lines_width = mln::util::array<unsigned int>(NLine + 1);
+ lines_width.fill(0);
+ compute_letter_middle_width_();
+ }
+ inline void recompute_letter_middle_space()
+ {
+ lines_space.fill(0);
+ compute_letter_middle_height_();
+ }
+ inline void recompute_letter_middle_height()
+ {
+ lines_height.fill(0);
+ compute_letter_middle_height_();
+ }
+ inline void recompute_letter_middle_width()
+ {
+ lines_width.fill(0);
+ compute_letter_middle_height_();
+ }
+
+ inline unsigned int get_letter_middle_space(const point2d& point)
+ {return get_letter_middle_space(img_influ(point));}
+ inline unsigned int get_letter_middle_space(const Label lbl)
+ {return lines_space[lines_union[lbl]];}
+
+
+ inline unsigned int get_letter_middle_height(const point2d& point)
+ {return get_letter_middle_height(img_influ(point));}
+ inline unsigned int get_letter_middle_height(const Label lbl)
+ {return lines_height[lines_union[lbl]];}
+
+ inline unsigned int get_letter_middle_width(const point2d& point)
+ {return get_letter_middle_width(img_influ(point));}
+ inline unsigned int get_letter_middle_width(const Label lbl)
+ {return lines_width[lines_union[lbl]];}
+
+ inline unsigned int get_line_ID(const Label lbl)
+ {
+ return lines_union[lbl];
+ }
+ inline unsigned int get_first_line_ID(const Label lbl)
+ {
+ return paragraphs_first_line[paragraphs_union[lbl]];
+ }
+ inline unsigned int get_first_line()
+ {
+ return first_line;
+ }
+ inline unsigned int get_first_letter(const unsigned int line_ID)
+ {
+ return lines_first_label[line_ID];
+ }
+ inline void get_next_line(int& line_ID)
+ {
+ if(lines_seq_pos[line_ID] == line_ID){ line_ID = 0; }
+ line_ID = lines_seq_pos[line_ID];
+ }
+ inline void get_next_line(unsigned int& line_ID)
+ {
+ if(lines_seq_pos[line_ID] == line_ID){ line_ID = 0; }
+ line_ID = lines_seq_pos[line_ID];
+ }
+ inline void get_next_letter(Label& lbl)
+ {
+ if(lines_seq[lbl] == lbl){ lbl = 0; }
+ lbl = lines_seq[lbl];
+ }
+ inline void get_next_letter(int& lbl)
+ {
+ if(lines_seq[lbl] == lbl){ lbl = 0; }
+ lbl = lines_seq[lbl];
+ }
+ inline void get_next_letter(unsigned int& lbl)
+ {
+ if(lines_seq[lbl] == lbl){ lbl = 0; }
+ lbl = lines_seq[lbl];
+ }
+ inline std::string get_line_string(const unsigned int ID)
+ {
+ std::string line = "";
+ unsigned int Last = 0;
+ for(int N = get_first_letter(ID); N != 0; get_next_letter(N))
+ {
+ if(Last)
+ if(space(Last,N) > get_letter_middle_space(N) * 2)
+ line += " ";
+
+ if(!get_tag(N).compare("")){line += "?";}
+ else{line += get_tag(N);}
+ Last = N;
+ }
+ return line;
+ }
private:
fun::i2v::array<bool> implicit_separators_left_mask;
fun::i2v::array<bool> implicit_separators_right_mask;
mln::util::array<unsigned int> separators_len_right;
mln::util::array<unsigned int> separators_len_left;
mln::util::array<unsigned int> separators_middle;
-
+ mln::util::array<unsigned int> separators_marging;
inline void cook_separators_()
{
implicit_separators_left_mask(0) = false;
- for(unsigned int N = 1; N < implicit_separators_union.size(); N++)
+ for(int N = 1; N < implicit_separators_union.size(); N++)
{
if(implicit_separators_union[N] != 0)
{
@@ -1160,11 +1953,8 @@ namespace mymln
/* processor */
for(unsigned int N = 1; N < NImpSep + 1; N++)
{
- if(separators_len_left[N] != 0)
- {
if(separators_len_left[N] != 0)
separators_middle[N] /= separators_len_left[N];
- }
}
@@ -1177,27 +1967,27 @@ namespace mymln
implicit_separators_left_mask(N) = false;
}
else if (
- _bboxgp[N].pmin()[1] < separators_middle[implicit_separators_union[N]] - 10 ||
- _bboxgp[N].pmin()[1] > separators_middle[implicit_separators_union[N]] + 10
+ _bboxgp[N].pmin()[1] < separators_middle[implicit_separators_union[N]] - _bboxgp[N].len(1) * 2 ||
+ _bboxgp[N].pmin()[1] > separators_middle[implicit_separators_union[N]] + _bboxgp[N].len(1) * 2
)
{
-
+ /*
separators_len_left[implicit_separators_union[N]]--;
implicit_separators_union[N] = 0;
- implicit_separators_left_mask(N) = false;
+ implicit_separators_left_mask(N) = false;*/
}
}
for(unsigned int N = 1; N < Areas_Number_; N++)
{
if(!start_lines_mask(N) || implicit_separators_union[N] == 0)
{
- if( separators_len_left[implicit_separators_union[N]] > 0)
+ if( separators_len_left[implicit_separators_union[N]] > 0)
separators_len_left[implicit_separators_union[N]]--;
}
}
for(unsigned int N = 1; N < Areas_Number_; N++)
{
- if(separators_len_left[implicit_separators_union[N]] < 2)
+ if(separators_len_left[implicit_separators_union[N]] < 1)
{
separators_len_left[implicit_separators_union[N]] = 0;
implicit_separators_union[N] = 0;
@@ -1225,11 +2015,8 @@ namespace mymln
/* processor */
for(unsigned int N = 1; N < NImpSep + 1; N++)
{
- if(separators_len_right[N] != 0)
- {
if(separators_len_right[N] != 0)
separators_middle[N] /= separators_len_right[N];
- }
}
@@ -1247,9 +2034,9 @@ namespace mymln
)
{
- separators_len_right[implicit_separators_union[N]]--;
+ /*separators_len_right[implicit_separators_union[N]]--;
implicit_separators_union[N] = 0;
- implicit_separators_right_mask(N) = false;
+ implicit_separators_right_mask(N) = false;*/
}
}
for(unsigned int N = 1; N < Areas_Number_; N++)
@@ -1262,7 +2049,7 @@ namespace mymln
}
for(unsigned int N = 1; N < Areas_Number_; N++)
{
- if(separators_len_right[implicit_separators_union[N]] < 2)
+ if(separators_len_right[implicit_separators_union[N]] < 1)
{
separators_len_right[implicit_separators_union[N]] = 0;
implicit_separators_union[N] = 0;
@@ -1274,57 +2061,150 @@ namespace mymln
// PRIVATE DATA ON LINES
mln::util::array<unsigned int> lines_len;
+ mln::util::array<unsigned int> lines_height;
+ mln::util::array<unsigned int> lines_width;
+ mln::util::array<unsigned int> lines_space;
mln::util::array<unsigned int> lines_first_label;
mln::util::array<unsigned int> lines_last_label;
mln::util::array<unsigned int> lines_seq;
mln::util::array<unsigned int> lines_seq_pos;
mln::util::array<box2d> lines_bbox;
+ mln::util::array<box2d> lines_influ_bbox;
mln::util::array<Label> lines_split;
fun::i2v::array<bool> start_lines_mask;
fun::i2v::array<bool> end_lines_mask;
fun::i2v::array<bool> start_end_lines_mask;
+ unsigned int first_line;
unsigned int SeqP;
+ inline void compute_letter_middle_width_()
+ {
+ for(unsigned int N = 1; N < Areas_Number_; N++)
+ {
+ if(lines_union[N])
+ {
+ lines_width[lines_union[N]] += _bboxgp[N].len(1);
+ }
+ }
+ for(unsigned int N = 1; N < lines_height.size(); N++)
+ {
+ if(lines_len[N])
+ lines_width[N] /= lines_len[N];
+ }
+ }
+
+ inline void compute_letter_middle_height_()
+ {
+
+ for(unsigned int N = 1; N < Areas_Number_; N++)
+ {
+ if(lines_union[N])
+ {
+ lines_height[lines_union[N]] += _bboxgp[N].len(0);
+ }
+ }
+ for(unsigned int N = 1; N < lines_height.size(); N++)
+ {
+ if(lines_len[N])
+ lines_height[N] /= lines_len[N];
+ }
+ }
+
+ inline void compute_letter_middle_space_()
+ {
+
+ for(unsigned int N = 1; N < Areas_Number_; N++)
+ {
+ if(lines_union[N])
+ {
+ lines_space[lines_union[N]] += _bboxgp[N].len(1);
+ }
+ }
+ for(unsigned int N = 1; N < lines_space.size(); N++)
+ {
+ if(lines_len[N] - 1 > 0)
+ {
+
+ if(lines_space[N] > lines_bbox[N].len(1))
+ lines_space[N] = 0;
+ else
+ {
+ lines_space[N] = (lines_bbox[N].len(1) - lines_space[N]) / (lines_len[N] - 1);
+ }
+ }
+ else
+ {
+ lines_space[N] = 0;
+ }
+
-
+ }
+ }
inline void cook_lines_iter_()
{
+ first_line = 0;
lines_seq = mln::util::array<unsigned int>(Areas_Number_);
lines_seq_pos = mln::util::array<unsigned int>(NLine + 1);
lines_seq.fill(0);
lines_seq_pos.fill(0);
- for(unsigned int N = 0; N < NLine + 1; N++)
- {
- lines_seq[SeqP] = lines_first_label[N];
- lines_seq_pos[N] = SeqP;
- SeqP += lines_len[N];
- }
- for(unsigned int N = 1; N < Areas_Number_; N++)
+
+ typedef vertex_image<point2d,bool> v_ima_g;
+ typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p;
+ v_ima_g mask = fun_mask_letters();
+ mln_piter_(v_ima_g) v(mask.domain());
+ typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t;
+ nbh_t nbh(mask);
+ mln_niter_(nbh_t) q(nbh, v);
+ for_all(v)
{
- if(contain_line(N) && !start_lines_mask(N))
+ if(contain_letter(v))
+ for_all(q)
{
- SeqP = lines_seq_pos[get_beginning_of_line(N)];
- SeqP++;
-
- while( lines_seq[SeqP] && _bboxgp[lines_seq[SeqP]].pmin()[1] < _bboxgp[N].pmin()[1] )
- SeqP++;
- if(!lines_seq[SeqP])
- {lines_seq[SeqP] = N;}
- else
- {
- unsigned int Swap1, Swap2;
- Swap1 = lines_seq[SeqP];
- lines_seq[SeqP] = N;
- while(lines_seq[SeqP])
+ if(contain_letter(q))
+ if(same_line(q, v))
{
- Swap2 = lines_seq[SeqP];
- lines_seq[SeqP] = Swap1;
- Swap1 = Swap2;
+ if(_bboxgp[img_influ(q)].pmax()[1] < _bboxgp[img_influ(v)].pmin()[1])
+ {
+ if(lines_seq[img_influ(q)])
+ {
+ if(_bboxgp[lines_seq[img_influ(q)]].pmin()[1] > _bboxgp[img_influ(v)].pmin()[1])
+ lines_seq[img_influ(q)] = img_influ(v);
+ }
+ else
+ lines_seq[img_influ(q)] = img_influ(v);
+ }
+ }
+ else
+ {
+ if(get_line_bbox(q).pmax()[0] < get_line_bbox(v).pmin()[0])
+ {
+ if(lines_seq_pos[lines_union[img_influ(q)]])
+ {
+ if(lines_bbox[lines_seq_pos[lines_union[img_influ(q)]]].pmin()[0] > get_line_bbox(v).pmin()[0])
+ lines_seq_pos[lines_union[img_influ(q)]] =lines_union[img_influ(v)];
+ }
+ else
+ lines_seq_pos[lines_union[img_influ(q)]] = lines_union[img_influ(v)];
+
+ }
}
- lines_seq[SeqP] = Swap1;
- }
}
}
+ std::cout << "end graph cooking";
+ unsigned int Last = 0;
+ int count = 0;
+ for(unsigned int N = 1; N < lines_seq_pos.size() && N < lines_len.size(); N++)
+ {
+ if(lines_len[N] && !first_line)
+ {first_line = N;}
+ if(lines_len[N] && Last)
+ {lines_seq_pos[Last] = N;}
+ if(lines_len[N])
+ {Last = N; std::cout << lines_len[N] << endl; count++;}
+
+ }
+ std::cout << count << endl;
+ std::cout << "linear";
}
inline void cook_lines_()
@@ -1332,7 +2212,7 @@ namespace mymln
Cooked_CLine = CLine;
for(unsigned int N = 1; N < lines_union.size(); N++)
{
- if(lines_union[N] != 0)
+ if(lines_union[N] != 0 && !kill_mask(N))
{
/* APPROXIMATE THE NUMBER OF CHAR IN THE LINE */
lines_len[lines_union[N]]++;
@@ -1360,6 +2240,7 @@ namespace mymln
if( lines_first_label[N] != 0)
{
lines_bbox[N] = box2d();
+
start_lines_mask(lines_first_label[N]) = true;
end_lines_mask(lines_last_label[N]) = true;
start_end_lines_mask(lines_first_label[N]) = true;
@@ -1381,11 +2262,25 @@ namespace mymln
lines_bbox[lines_union[N]].merge(_bboxgp[N]);
}
if(lines_len[lines_union[N]] == 1)
- { letters_mask(N) = false; alone_letters_mask(N) = true; }
+ {
+ letters_mask(N) = false;
+ alone_letters_mask(N) = true;
+ all_letters_mask(N) = true;
+ end_lines_mask(N) = true;
+ start_lines_mask(N)= true;
+ start_end_lines_mask(N) = true;
+ }
else if(lines_union[N])
- { letters_mask(N) = true; alone_letters_mask(N) = false; }
+ {
+ letters_mask(N) = true;
+ alone_letters_mask(N) = false;
+ all_letters_mask(N) = true;
+ }
+ }
+ for(unsigned int N = 1; N < lines_bbox.size(); N++)
+ {
+ lines_influ_bbox[N] = lines_bbox[N].to_larger(lines_bbox[N].len(0) / 3);
}
-
}
@@ -1430,7 +2325,13 @@ namespace mymln
{
Data SX = label_size_(0, label);
Data SY = label_size_(1, label);
- return SX >= Min && SY >= Min;
+ return SX >= Min && SY >= Min ;
+ }
+ inline bool label_valid_size_Min_Large_(Label label, Data Min)
+ {
+ Data SX = label_size_(0, label);
+ Data SY = label_size_(1, label);
+ return SX >= Min && SY >= Min || SX >= Min * 2 || SY >= Min * 2;
}
inline bool label_valid_ratio_(Label label, Float Min, Float Max)
{
@@ -1500,8 +2401,10 @@ namespace mymln
fun::i2v::array<bool> all_letters_mask;
fun::i2v::array<bool> containers_mask;
fun::i2v::array<bool> noise_mask;
+ fun::i2v::array<bool> kill_mask;
mln::util::array<std::string> tag_lbl;
+ mln::util::array<bool> Btag_lbl;
unsigned int Cooked_CLine;
unsigned int CLine;
@@ -1521,36 +2424,90 @@ namespace mymln
mln::util::array<unsigned int> paragraphs_first_label;
mln::util::array<unsigned int> paragraphs_last_label;
mln::util::array<unsigned int> paragraphs_assoc;
+ mln::util::array<unsigned int> paragraphs_len;
+ /* NOTE THESE ARRAYS MUST BE INITIALIZEDD WITH THE NUMBER OF PARAGRAPH */
+
+
mln::util::array<box2d> paragraphs_bbox;
-
+ mln::util::array<box2d> paragraphs_bbox_influ;
+ mln::util::array<unsigned int> paragraphs_first_line;
+ inline void first_recognition()
+ {
+
+ }
+
+
inline void cook_paragraphs_()
{
- mln::util::array<unsigned int> paragraphs_assoc(lines_union.size());
- for(int N = 0; N < paragraphs_union.size(); N++)
+ /* mln::util::array<unsigned int> paragraphs_assoc(lines_union.size());
+
+ for(int N = 1; N < paragraphs_union.size(); N++)
{
- if(paragraphs_union[N])
+ if(paragraphs_union[N] && lines_union[N] && !start_lines_mask)
{
if(paragraphs_assoc[lines_union[N]])
- { paragraphs_union.add_link(N, paragraphs_assoc[lines_union[N]]); }
+ { paragraphs_union.add_link(paragraphs_assoc[lines_union[N]], N); }
else
- {paragraphs_assoc[lines_union[N]] = N;}
+ {paragraphs_assoc[lines_union[N]] = get_beginning_of_line(N);}
}
}
- paragraphs_union.propage_links();
+ paragraphs_union.propage_links();*/
for(int N = 0; N < paragraphs_bbox.size(); N++)
{
paragraphs_bbox[N] = box2d();
}
+
+ for(int N = 0; N < lines_len.size(); N++)
+ {
+ if(lines_len[N] && paragraphs_union[lines_first_label[N]])
+ {
+ paragraphs_len[paragraphs_union[lines_first_label[N]]]++;
+ if(paragraphs_first_line[paragraphs_union[lines_first_label[N]]])
+ {
+ if(
+ lines_bbox[paragraphs_first_line[paragraphs_union[lines_first_label[N]]]].pmin()[0] >
+ lines_bbox[N].pmin()[0]
+ )
+ {
+ paragraphs_first_line[paragraphs_union[lines_first_label[N]]] = N;
+ }
+ }
+ else
+ paragraphs_first_line[paragraphs_union[lines_first_label[N]]] = N;
+ }
+ }
+
for(int N = 0; N < paragraphs_union.size(); N++)
{
- if(paragraphs_union[N])
+ if(paragraphs_union[N] && paragraphs_len[paragraphs_union[N]])
+ {
paragraphs_bbox[paragraphs_union[N]].merge(lines_bbox[lines_union[N]]);
+ }
+ else
+ {
+ paragraphs_union[N] = 0;
+ }
}
+
+
+ for(int N = 0; N < paragraphs_len.size(); N++)
+ {
+ if(paragraphs_len[N])
+ {
+ paragraphs_bbox_influ[N] = paragraphs_bbox[N].to_larger(lines_bbox[paragraphs_first_line[N]].len(0) / 10);
+ }
+ }
}
-
+ template<typename T> void debug_assert_array_(mln::util::array<T>& array, int N, const std::string& name)
+ {
+ if(N >= array.size())
+ {
+ std::cout << "WARNING : " << name << " " << N << " " << "is invalid" << endl;
+ }
+ }
@@ -1574,6 +2531,9 @@ namespace mymln
g_vertices_p _area_graph;
mln::image2d<Label> img;
mln::image2d<Label> img_influ;
+ mln::image2d<bool> debug_source;
+ mln::image2d<value::rgb8> debug_buffer;
+ bool debug_buffer_enable;
Label Areas_Number_;
/* IMPLICIT SEPARATOR DETECTION */
diff --git a/scribo/sandbox/raphael/code/my/document/letters.hh b/scribo/sandbox/raphael/code/my/document/letters.hh
index 6701943..f0249ae 100644
--- a/scribo/sandbox/raphael/code/my/document/letters.hh
+++ b/scribo/sandbox/raphael/code/my/document/letters.hh
@@ -9,9 +9,17 @@ namespace mymln
{
namespace document
{
- void clean_letter_aberation()
+ template<typename L, typename F, typename D>
+ void clean_letters_aberations_big(document<L,F,D> doc, mln::util::array<box2> bouningbox_letters, mln::util::array<box2> middle_box_lines, mymln::util::union_find<L> letters_union, )
{
-
+ for(int N = 0; N < letters_union.size(); N++)
+ {
+ if(letters_union[N])
+ {
+ doc.get_letter_middle_height(N) * 3 < doc.get_bbox(N).len(0);
+ doc.add_noise(N);
+ }
+ }
}
}
}
\ No newline at end of file
diff --git a/scribo/sandbox/raphael/code/my/document/recognition.hh b/scribo/sandbox/raphael/code/my/document/recognition.hh
new file mode 100644
index 0000000..9c10cd9
--- /dev/null
+++ b/scribo/sandbox/raphael/code/my/document/recognition.hh
@@ -0,0 +1,74 @@
+#ifndef INC_RECOGNITION_DOC
+#define INC_RECOGNITION_DOC
+#include<my/document/document.hh>
+#include <mln/core/image/graph_elt_neighborhood.hh>
+#include <mln/core/image/vertex_image.hh>
+using namespace mln;
+
+namespace mymln
+{
+ namespace document
+ {
+ template<typename L, typename F, typename D>
+ void recognize_minus(mymln::document::document<L,F,D>& doc)
+ {
+ typedef vertex_image<point2d,bool> v_ima_g;
+ typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p;
+ v_ima_g mask = doc.fun_mask_letters();
+ mln_piter_(v_ima_g) v(mask.domain());
+ typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t;
+ nbh_t nbh(mask);
+ mln_niter_(nbh_t) q(nbh, v);
+ for_all(v)
+ {
+ if(doc.contain_line(v))
+ {
+ for_all(q)
+ {
+ if(doc.contain_line(q) && doc.line_median(q) && doc.letter_ratio_XY(q) >= 3.0f)
+ {
+ doc.tag_label(q, "-");
+ }
+ }
+ }
+ }
+ doc.propage_paragraph_link();
+
+ }
+ template<typename L, typename F, typename D>
+ void recognize_dot(mymln::document::document<L,F,D>& doc)
+ {
+ typedef vertex_image<point2d,bool> v_ima_g;
+ typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p;
+ v_ima_g mask = doc.fun_mask_letters();
+ mln_piter_(v_ima_g) v(mask.domain());
+ typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t;
+ nbh_t nbh(mask);
+ mln_niter_(nbh_t) q(nbh, v);
+ for_all(v)
+ {
+ if(doc.contain_line(v))
+ {
+ for_all(q)
+ {
+ if( doc.contain_line(q) && doc.same_line(q,v) && doc.line_base(q) && doc.line_size_small(q))
+ {
+ if(doc.letter_ratio_XY(q) > 0.7f && doc.letter_ratio_XY(q) < 1.3f)
+ doc.tag_label(q, ".");
+ else if(doc.letter_ratio_XY(q) <= 0.7f)
+ doc.tag_label(q, ",");
+ else
+ doc.tag_label(q, "_");
+
+ }
+ }
+ }
+ }
+ doc.propage_paragraph_link();
+
+ }
+ }
+}
+
+
+#endif
\ No newline at end of file
diff --git a/scribo/sandbox/raphael/code/my/document/separator.hh b/scribo/sandbox/raphael/code/my/document/separator.hh
index 660bbed..2cd98ea 100644
--- a/scribo/sandbox/raphael/code/my/document/separator.hh
+++ b/scribo/sandbox/raphael/code/my/document/separator.hh
@@ -133,20 +133,72 @@ namespace mymln
if(doc.contain_implicit_separator(v))
{
bool All_Alone = true;
- doc.jump_to_line(v);
- if((!doc.contain_line(v)))
+
+
+ for_all(q)
{
- doc.add_to_line(v);
- doc.add_to_line_self_link(v);
+
+ if(doc.contain_implicit_separator(q) && doc.same_implicit_separator(q,v) )
+ {
+ // draw::line(out, q,v, mln::literal::blue);
+ if(doc.allign_V(q,v) && doc.allign_size(q, v) && doc.allign_right(v,q))
+ {
+ count[doc[q]]++;
+ }
+
+ }
+ else if (doc.contain_implicit_separator(q))
+ {
+ if(doc.allign_V(q,v) && doc.allign_size(q, v) && doc.allign_right(v,q) && doc.allign_proximity_strict(v, q))
+ {
+ count[doc[q]]++;
+ }
+ }
+
}
+ }
+ }
+ for(unsigned int N = 0; N < doc.size();N++)
+ {
+ if(count[N] > 0)
+ doc.invalidate_implicit_separator(N);
+ }
+ }
+
+ template<typename L, typename F, typename D>
+ void separators_make_clean_right(mymln::document::document<L,F,D>& doc)
+ {
+
+ typedef vertex_image<point2d,bool> v_ima_g;
+ typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p;
+ v_ima_g mask = doc.fun_mask_letters();
+ mln_piter_(v_ima_g) v(mask.domain());
+ typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t;
+ nbh_t nbh(mask);
+ mln_niter_(nbh_t) q(nbh, v);
+ mln::util::array<unsigned> count = mln::util::array<unsigned>(doc.size());
+ count.fill(0);
+ for_all(v)
+ {
+ if(doc.contain_implicit_separator(v))
+ {
+ bool All_Alone = true;
for_all(q)
{
- if(doc.contain_implicit_separator(q) && doc.same_implicit_separator(q,v))
+ if(doc.contain_implicit_separator(q) && doc.same_implicit_separator(q,v) )
{
// draw::line(out, q,v, mln::literal::blue);
- if(doc.allign_V(q,v) && doc.allign_size(q, v))
+ if(doc.allign_V(q,v) && doc.allign_size(q, v) && doc.allign_right(q,v))
+ {
+ count[doc[q]]++;
+ }
+
+ }
+ else if (doc.contain_implicit_separator(q))
+ {
+ if(doc.allign_V(q,v) && doc.allign_size(q, v) && doc.allign_right(q,v) && doc.allign_proximity_strict(v, q))
{
count[doc[q]]++;
}
diff --git a/scribo/sandbox/raphael/code/my/runtime/lib.hh b/scribo/sandbox/raphael/code/my/runtime/lib.hh
new file mode 100644
index 0000000..2a36ef3
--- /dev/null
+++ b/scribo/sandbox/raphael/code/my/runtime/lib.hh
@@ -0,0 +1,180 @@
+#ifndef INC_RUNTIME_LIB
+#define INC_RUNTIME_LIB
+
+using namespace mln;
+using namespace std;
+namespace mymln
+{
+ namespace runtime
+ {
+ template<typename L, typename F, typename D>
+ void load_separators(runtime<L,F,D>& run)
+ {
+
+ run.add_function("separators.find_allign_right", &(mymln::document::separators::separators_find_allign_right));
+ run.add_function("separators.make_clean_right", &(mymln::document::separators::separators_make_clean_right));
+ run.add_function("separators.find_allign_left", &(mymln::document::separators::separators_find_allign));
+ run.add_function("separators.make_clean_left", &(mymln::document::separators::separators_find_allign));
+ }
+
+ template<typename L, typename F, typename D>
+ void load_clean(runtime<L,F,D>& run)
+ {
+ run.add_function("clean.containers_items", &(mymln::document::clean_containers_items));
+ run.add_function("clean.letters_items", &(mymln::document::clean_letters_items));
+ run.add_function("clean.get_lines", &(mymln::document::clean_get_lines));
+
+ run.add_function("clean.letters_alone", &(mymln::document::clean_letters_alone));
+ run.add_function("clean.included_letters", &(mymln::document::clean_included_letters));
+ run.add_function("clean.dot_items", &(mymln::document::clean_dot_items));
+ run.add_function("clean.quote_items", &(mymln::document::clean_quote_items));
+ run.add_function("clean.between", &(mymln::document::clean_between));
+
+ run.add_function("clean.line_link_item", &(mymln::document::clean_line_link_item));
+ run.add_function("clean.proximity_lines", &(mymln::document::clean_proximity_lines));
+ run.add_function("clean.quote_lines", &(mymln::document::clean_quote_lines));
+ run.add_function("clean.alone_letters_lines", &(mymln::document::clean_alone_letters_lines));
+ run.add_function("clean.odd_letters", &(mymln::document::clean_odd_letters));
+
+
+ run.add_function("clean.remove_alone_letter", &(mymln::document::remove_alone_letter));
+ run.add_function("clean.paragraph_items", &(mymln::document::clean_paragraph_items));
+ run.add_function("clean.paragraphs_up", &(mymln::document::clean_paragraphs_up));
+ run.add_function("clean.paragraphs_large", &(mymln::document::clean_paragraphs_large));
+ run.add_function("clean.included_paragraphs", &(mymln::document::clean_included_paragraphs));
+ run.add_function("clean.backward_letters", &(mymln::document::clean_backward_letters));
+ run.add_function("clean.paragraphs_tab", &(mymln::document::clean_paragraphs_tab));
+ run.add_function("clean.proximity_letters", &(mymln::document::clean_proximity_letters));
+
+ }
+
+ template<typename L, typename F, typename D>
+ void lib_debug_save_all(mymln::document::document<L,F,D>& doc, std::string file)
+ { doc.debug_save_all(file); }
+ template<typename L, typename F, typename D>
+ void lib_debug_save_buffer(mymln::document::document<L,F,D>& doc, std::string file)
+ { doc.debug_save_buffer(file); }
+ template<typename L, typename F, typename D>
+ void lib_debug_create_buffer(mymln::document::document<L,F,D>& doc)
+ { doc.debug_create_buffer(); }
+
+ template<typename L, typename F, typename D>
+ void load_debug(runtime<L,F,D>& run)
+ {
+ run.add_function_string("debug.save", &(lib_debug_save_all));
+ run.add_function("debug.create_buffer", &(lib_debug_create_buffer));
+ run.add_function_string("debug.save_buffer", &(lib_debug_save_buffer));
+ }
+
+
+ template<typename L, typename F, typename D>
+ void lib_cook_lines(mymln::document::document<L,F,D>& doc)
+ { doc.cook_lines(); }
+ template<typename L, typename F, typename D>
+ void lib_recook_lines(mymln::document::document<L,F,D>& doc)
+ { doc.recook_lines(); }
+ template<typename L, typename F, typename D>
+ void lib_cook_separators_left(mymln::document::document<L,F,D>& doc)
+ { doc.cook_separators(); }
+ template<typename L, typename F, typename D>
+ void lib_cook_separators_right(mymln::document::document<L,F,D>& doc)
+ { doc.cook_separators_right(); }
+ template<typename L, typename F, typename D>
+ void lib_cook_line_splitting(mymln::document::document<L,F,D>& doc)
+ { doc.cook_line_splitting(); }
+ template<typename L, typename F, typename D>
+ void lib_reset_implicit_separators(mymln::document::document<L,F,D>& doc)
+ { doc.reset_implicit_separators(); }
+
+ template<typename L, typename F, typename D>
+ void lib_cook_line_splitting_exclusive(mymln::document::document<L,F,D>& doc)
+ { doc.cook_line_splitting_exclusive(); }
+
+ template<typename L, typename F, typename D>
+ void lib_cook_paragraphs(mymln::document::document<L,F,D>& doc)
+ { doc.cook_paragraphs(); }
+
+ template<typename L, typename F, typename D>
+ void lib_recook_paragraphs(mymln::document::document<L,F,D>& doc)
+ { doc.recook_paragraphs(); }
+
+ template<typename L, typename F, typename D>
+ void load_cooking(runtime<L,F,D>& run)
+ {
+ run.add_function("cook.lines", &(lib_cook_lines));
+ run.add_function("cook.separators_right", &(lib_cook_separators_right));
+ run.add_function("cook.separators_left", &(lib_cook_separators_left));
+ run.add_function("recook.lines", &(lib_recook_lines));
+ run.add_function("cook.line_splitting", &(lib_cook_line_splitting));
+ run.add_function("cook.line_splitting_inclusive", &(lib_cook_line_splitting));
+ run.add_function("cook.line_splitting_exclusive", &(lib_cook_line_splitting_exclusive));
+ run.add_function("cook.reset_implicit_separators", &(lib_reset_implicit_separators));
+ run.add_function("cook.paragraphs", &(lib_cook_paragraphs));
+ run.add_function("recook.paragraphs", &(lib_recook_paragraphs));
+
+ }
+
+
+ template<typename L, typename F, typename D>
+ void lib_compute_letter_middle_height(mymln::document::document<L,F,D>& doc)
+ { doc.compute_letter_middle_height(); }
+ template<typename L, typename F, typename D>
+ void lib_compute_letter_middle_width(mymln::document::document<L,F,D>& doc)
+ { doc.compute_letter_middle_width(); }
+ template<typename L, typename F, typename D>
+ void load_compute(runtime<L,F,D>& run)
+ {
+ run.add_function("compute.letter_middle_height", &(lib_compute_letter_middle_height));
+ run.add_function("compute.letter_middle_width", &(lib_compute_letter_middle_width));
+ }
+
+
+ template<typename L, typename F, typename D>
+ void lib_string_print(mymln::document::document<L,F,D>& doc, std::string str)
+ { std::cout << str; }
+ template<typename L, typename F, typename D>
+ void lib_string_print_line(mymln::document::document<L,F,D>& doc, std::string str)
+ { std::cout << str << std::endl; }
+ template<typename L, typename F, typename D>
+ void lib_string_concat(runtime<L,F,D>& run, std::string A, std::string B)
+ { run.add_variable(A, B); }
+
+ template<typename L, typename F, typename D>
+ void load_string(runtime<L,F,D>& run)
+ {
+ run.add_function_string("string.print", &(lib_string_print));
+ run.add_function_string("string.print_line", &(lib_string_print_line));
+ run.add_function_string_string("string.concat", &(lib_string_concat));
+ run.add_function_string_string("string.clone", &(lib_string_concat));
+ }
+
+
+ template<typename L, typename F, typename D>
+ void lib_system_set(runtime<L,F,D>& run, std::string A, std::string B)
+ { run.add_variable(A, B); }
+ template<typename L, typename F, typename D>
+ void lib_system_test_equal(runtime<L,F,D>& run, std::string A, std::string B)
+ { if(!A.compare("true")){run.call_function(B);} }
+
+ template<typename L, typename F, typename D>
+ void lib_system_test_nequal(runtime<L,F,D>& run, std::string A, std::string B)
+ { if(!A.compare("false")){run.call_function(B);} }
+
+ template<typename L, typename F, typename D>
+ void lib_system_not(runtime<L,F,D>& run, std::string A, std::string B)
+ {
+ if(!A.compare("false")){run.add_variable(A, "true");}
+ else{run.add_variable(A, "false");}
+ }
+
+ template<typename L, typename F, typename D>
+ void load_system(runtime<L,F,D>& run)
+ {
+ run.add_function_string_string("system.set", &(lib_system_set));
+ run.add_function_string_string("system.equal", &(lib_string_concat));
+ run.add_function_string_string("system.nequal", &(lib_string_concat));
+ }
+
+ }
+}
+#endif
\ No newline at end of file
diff --git a/scribo/sandbox/raphael/code/my/runtime/runtime.hh b/scribo/sandbox/raphael/code/my/runtime/runtime.hh
new file mode 100644
index 0000000..f4057bc
--- /dev/null
+++ b/scribo/sandbox/raphael/code/my/runtime/runtime.hh
@@ -0,0 +1,196 @@
+#ifndef INC_RUNTIME
+#define INC_RUNTIME
+
+using namespace mln;
+using namespace std;
+namespace mymln
+{
+ namespace runtime
+ {
+ template<typename Label, typename Float, typename Data>
+
+ class runtime
+ {
+ typedef document::document<Label, Float, Data> doc;
+ typedef void (*fun_doc_ptr)(doc);
+ public:
+ runtime()
+ {
+ program_argument = mln::util::array<string>(0);
+ program_argument2 = mln::util::array<string>(0);
+ program_instruction = mln::util::array<string>(0);
+ doc_fun = map<string, void(*)(doc&) >();
+ doc_arg_fun = map<string, void(*)(doc&, string) >();
+ doc_arg2_fun = map<string, void(*)(runtime<Label,Float, Data>&, string, string) >();
+ doc_local_fun = map<string, int >();
+ call_stack = stack<int>();
+ CP = 0;
+ }
+ void load(const char* file)
+ {
+
+ fstream filestream(file, fstream::in | fstream::out);
+ std::string Buffer = "";
+ bool flag = !getline(filestream, Buffer).eof();
+ int L = 0;
+ bool remain = flag;
+ while(remain)
+ {
+ if(!flag){remain = false;}
+ int N = 0;
+ std::string Instr = "";
+ std::string Arg = "";
+ std::string Arg2 = "";
+ while( N < Buffer.length() && (Buffer[N] == ' ' || Buffer[N] == '\t') && Buffer[N] != ';'){N++;} // trim
+ while( N < Buffer.length() && (Buffer[N] == ' ' || Buffer[N] == '\t') && Buffer[N] != ';'){N++;} // trim
+ while(N < Buffer.length() && Buffer[N] != ' ' && Buffer[N] != '\t' && Buffer[N] != ';'){Instr += Buffer[N]; N++;}
+ while(N < Buffer.length() && (Buffer[N] == ' ' || Buffer[N] == '\t') && Buffer[N] != ';'){N++;} // trim
+ while(N < Buffer.length() && Buffer[N] != ' ' && Buffer[N] != '\t' && Buffer[N] != ';'){Arg += Buffer[N]; N++;}
+ while(N < Buffer.length() && (Buffer[N] == ' ' || Buffer[N] == '\t') && Buffer[N] != ';'){N++;} // trim
+ while(N < Buffer.length() && Buffer[N] != ' ' && Buffer[N] != '\t' && Buffer[N] != ';'){Arg2 += Buffer[N]; N++;}
+ if(!Instr.compare("fun"))
+ {
+ doc_local_fun[Arg] = L;
+ program_instruction.append("");
+ program_argument.append("");
+ program_argument2.append("");
+ }
+ else if(Instr.length() > 0 && Instr[0] == '$')
+ {
+ program_instruction.append("system.set");
+ program_argument.append(Instr);
+ program_argument2.append(Arg);
+ }
+ else
+ {
+ program_instruction.append(Instr);
+ program_argument.append(Arg);
+ program_argument2.append(Arg2);
+ }
+ if(flag)
+ flag = !getline(filestream, Buffer).eof();
+ L++;
+ }
+
+ filestream.close();
+
+ }
+ runtime(const char* file)
+ {
+ CP = 0;
+ program_argument2 = mln::util::array<string>(0);
+ program_argument = mln::util::array<string>(0);
+ program_instruction = mln::util::array<string>(0);
+ doc_local_fun = map<string, void(*)(doc&, string) >();
+ load(file);
+ doc_fun = map<string, void(*)(doc&) >();
+ doc_arg_fun = map<string, void(*)(doc&, string) >();
+ doc_arg2_fun = map<string, void(*)(runtime<Label,Float, Data>&, string, string) >();
+ doc_local_fun = map<string, int >();
+ call_stack = stack<int>();
+ }
+
+ void run()
+ {
+ for(CP = 0; CP < program_instruction.size(); CP++)
+ {
+ if(!program_instruction[CP].compare("end"))
+ ret();
+ else if(program_argument2[CP].compare(""))
+ call_function(program_instruction[CP], get_variable(program_argument[CP]), get_variable(program_argument2[CP]));
+ else if(program_argument[CP].compare(""))
+ call_function(program_instruction[CP], get_variable(program_argument[CP]));
+ else if(program_instruction[CP].compare(""))
+ call_function(program_instruction[CP]);
+ }
+ }
+ inline void add_function(string name, void(*ptr)(doc&) )
+ {
+ doc_fun[name] = ptr;
+ }
+ inline void add_function_string(string name, void(*ptr)(doc&, string arg) )
+ {
+ doc_arg_fun[name] = ptr;
+ }
+ inline void add_function_string_string(string name, void(*ptr)(runtime<Label,Float, Data>&, string arg, string arg2) )
+ {
+ doc_arg2_fun[name] = ptr;
+ }
+ void add_variable(string name, string value)
+ {
+ vars["$" + name] = value;
+ }
+ string get_variable(string name)
+ {
+
+ if(name.length() > 0 && name[0] == '$')
+ {
+ if(vars.find(name) == vars.end())
+ {
+ std::cout << "#SCRIPT ERROR : The variable " << name << " doesn't exist" << std::endl;
+ return "";
+ }
+ return vars[name];
+ }
+ else
+ return name;
+ }
+ void ret()
+ {
+ if(call_stack.empty()){ CP = program_instruction.size(); }
+ else{ CP = call_stack.top(); call_stack.pop(); }
+ }
+ void call_function(string name)
+ {
+ if(doc_local_fun.find(name) != doc_local_fun.end())
+ {
+ call_stack.push(CP);
+ CP = doc_local_fun[name];
+ return;
+ }
+
+ if(doc_fun.find(name) == doc_fun.end())
+ {
+ std::cout << "#SCRIPT ERROR : The function " << name << " doesn't exist" << std::endl;
+ return;
+ }
+ doc_fun[name](*current);
+ }
+ void call_function(string name, string arg)
+ {
+ if(doc_arg_fun.find(name) == doc_arg_fun.end())
+ {
+ std::cout << "#SCRIPT ERROR : The function " << name << " doesn't exist" << std::endl;
+ return;
+ }
+ doc_arg_fun[name](*current, arg);
+ }
+
+ void call_function(string name, string arg, string arg2)
+ {
+ if(doc_arg2_fun.find(name) == doc_arg2_fun.end())
+ {
+ std::cout << "#SCRIPT ERROR : The function " << name << " doesn't exist" << std::endl;
+ return;
+ }
+ doc_arg2_fun[name](*this, arg, arg2);
+ }
+ void set_current_document(doc* document)
+ {current = document;}
+ private:
+ int CP;
+ map<string, string > vars;
+ map<string, void(*)(doc&) > doc_fun;
+ map<string, void(*)(doc&, string) > doc_arg_fun;
+ map<string, void(*)(runtime<Label,Float, Data>&, string, string) > doc_arg2_fun;
+ map<string, int > doc_local_fun;
+ mln::util::array<std::string> program_instruction;
+ mln::util::array<std::string> program_argument;
+ mln::util::array<std::string> program_argument2;
+
+ stack<int> call_stack;
+ doc* current;
+ };
+ }
+}
+#endif
\ No newline at end of file
diff --git a/scribo/sandbox/raphael/code/test.cc b/scribo/sandbox/raphael/code/test.cc
index feaf817..1f8d94d 100644
--- a/scribo/sandbox/raphael/code/test.cc
+++ b/scribo/sandbox/raphael/code/test.cc
@@ -1,6 +1,7 @@
#include <vector>
-#include <mln/io/all.hh>
+#include <mln/io/pbm/all.hh>
+#include <mln/io/ppm/all.hh>
#include <mln/core/site_set/p_vertices.hh>
#include <mln/core/image/graph_elt_window.hh>
@@ -16,21 +17,20 @@
#include <mln/util/timer.hh>
#include <mln/debug/draw_graph.hh>
#include <mln/debug/println.hh>
-#include <mln/transform/all.hh>
+#include <mln/transform/influence_zone_geodesic.hh>
#include <mln/make/image2d.hh>
#include <mln/core/alias/neighb2d.hh>
#include <mln/make/influence_zone_adjacency_graph.hh>
#include <mln/make/w_window2d.hh>
-#include <mln/labeling/all.hh>
+#include <mln/labeling/value_and_compute.hh>
#include <mln/make/image.hh>
#include <mln/value/rgb8.hh>
#include <mln/value/int_u8.hh>
#include <mln/value/int_u.hh>
-#include <mln/data/all.hh>
+#include <mln/labeling/colorize.hh>
#include <mln/core/alias/neighb2d.hh>
-#include <mln/algebra/all.hh>
+#include <mln/algebra/vec.hh>
#include <mln/core/image/graph_elt_neighborhood.hh>
-#include <mln/literal/all.hh>
#include <mln/graph/compute.hh>
#include <mln/draw/plot.hh>
@@ -42,10 +42,21 @@
#include <my/document/separator.hh>
#include <my/document/clean.hh>
+#include <my/document/recognition.hh>
+
+#include <my/runtime/runtime.hh>
+#include <my/runtime/lib.hh>
+
using namespace mln;
using namespace std;
-void Process(std::string File, std::string Dir)
+void Process(std::string File, std::string Dir, mymln::runtime::runtime< value::int_u<16> ,float,short>& runtime)
{
+ // RUNTIME
+
+ runtime.add_variable("FILE", Dir + "/" + File);
+ runtime.add_variable("DIR", Dir);
+ runtime.add_variable("DEBUG_FILE", Dir + "/debug_" + File);
+
std::cout << "Processing : " << File << endl;
/* CREATE GRAPH */
@@ -61,24 +72,19 @@ void Process(std::string File, std::string Dir)
uint16 areas_detected;
- timer.start();
- image2d<uint16> ima_blob = labeling::blobs(ima, c8(), areas_detected);
- std::cout << "CREATE BLOBS : " << timer.stop() << endl;
- timer.restart();
- timer.start();
+ mln_VAR( couple , mln::labeling::value_and_compute(ima, true, c8(), areas_detected, accu::shape::bbox<point2d>()));
+ image2d<uint16> ima_blob = couple.first();
+ util::array<box2d> boxes = couple.second().first();
+ //image2d<uint16> ima_blob = labeling::blobs(ima, c8(), areas_detected);
+
image2d<uint16> ima_influ = transform::influence_zone_geodesic(ima_blob, c8());
- std::cout << "CREATE INFLUENCE ZONE GEODESIC : " << timer.stop() << endl;
- timer.restart();
- timer.start();
+
util::graph grph = make::influence_zone_adjacency_graph(ima_influ, c8(), areas_detected);
// mymln::debug::save_label_image(ima_influ, Dir + "/influ_" + File);
- std::cout << "CREATE GRAPH : " << timer.stop() << endl;
-
-
+
/* COMPUTE GRAPH POINT POSITION */
- timer.restart();
- util::array<box2d> boxes = labeling::compute(accu::meta::shape::bbox(), ima_blob, areas_detected);
+ //util::array<box2d> boxes = labeling::compute(accu::meta::shape::bbox(), ima_blob, areas_detected);
typedef p_vertices<util::graph, fun::i2v::array<mln::point2d> > g_vertices_p;
typedef graph_elt_neighborhood<util::graph, g_vertices_p> g_nbh;
fun::i2v::array<point2d> graph_points(areas_detected + 1);
@@ -87,18 +93,20 @@ void Process(std::string File, std::string Dir)
{graph_points(N + 1) = boxes[N + 1].pcenter();}
g_vertices_p area_grph(grph, graph_points);
- std::cout << "COMPUTE GRAPH POINT POSITION : " << timer.stop() << endl;
- /* WORK ON GRAPH */
-
-
- timer.restart();
+
+ std::cout << "INITIALIZING : " << timer.stop() << endl;
+ timer.restart();
+ /* WORK ON GRAPH */
mymln::document::document<uint16,float,short> doc(ima_blob, ima_influ, boxes, area_grph, areas_detected);
+ runtime.set_current_document(&doc);
+ doc.debug_set_image(ima);
doc.vertical_separator_ratio_range(0.0f, 0.2f);
doc.horizontal_separator_ratio_range(6.0f, 1000.0f);
doc.container_volume_range(40, 100);
+
for (uint16 N = 1; N <= areas_detected; N++)
{
@@ -114,113 +122,139 @@ void Process(std::string File, std::string Dir)
}
//mymln::debug::save_label_image(ima_influ, "influ_" + File);
-
+ /*
mymln::document::clean_containers_items(doc);
mymln::document::clean_letters_items(doc);
mymln::document::clean_get_lines(doc);
+
+
+
mymln::document::clean_letters_alone(doc);
+ doc.cook_lines();
+ mymln::document::clean_included_letters(doc);
+ doc.recook_lines();
mymln::document::clean_dot_items(doc);
- doc.cook_lines();
- mymln::document::clean_quote_items(doc, Dir + "/" + "quote_graph_" + File, doc.image_mask_letters());
+ doc.recook_lines();
+ mymln::document::clean_quote_items(doc);
+ doc.recook_lines();
+
+ mymln::document::clean_between(doc);
+ doc.recook_lines();
+ doc.compute_letter_middle_height();
+ doc.compute_letter_middle_width();
+ mymln::document::clean_odd_letters(doc);
+ doc.recook_lines();
+
+*/
+
+ /*doc.compute_letter_middle_space();
+ mymln::document::clean_lines_space(doc, Dir + "/" + "quote_graph_" + File, doc.image_mask_letters());
+ doc.recook_lines();*/
+
+/*
mymln::document::separators::separators_find_allign(doc);
mymln::document::separators::separators_make_clean(doc);
doc.cook_separators();
- std::cout << "-> compute separator left " << endl;
doc.cook_line_splitting();
-
+
+
mymln::document::clean_line_link_item(doc);
- mymln::document::clean_proximity_lines(doc);
+ mymln::document::clean_proximity_lines(doc);
mymln::document::clean_quote_lines(doc);
+ doc.recook_lines();
+
+
+
+
doc.reset_implicit_separators();
- std::cout << "-> clean separator right " << endl;
mymln::document::separators::separators_find_allign_right(doc);
- mymln::document::separators::separators_make_clean(doc);
- std::cout << "-> compute separator right " << endl;
+ mymln::document::separators::separators_make_clean_right(doc);
doc.cook_separators_right();
doc.cook_line_splitting_exclusive();
- std::cout << "-> clean separator right " << endl;
- mymln::document::clean_line_link_item(doc);
- mymln::document::clean_proximity_lines(doc);
- std::cout << "-> clean " << endl;
- mymln::document::clean_quote_lines(doc);
- mymln::document::clean_alone_letters_lines(doc, Dir + "/" + "alone_graph_" + File, doc.image_mask_letters());
+ mymln::document::clean_line_link_item(doc);
+
+ mymln::document::clean_proximity_lines(doc);
+ mymln::document::clean_quote_lines(doc);
+ mymln::document::clean_alone_letters_lines(doc);
doc.recook_lines();
+
+
+ doc.compute_letter_middle_height();
+ doc.compute_letter_middle_width();
+ mymln::document::clean_odd_letters(doc);
+ doc.recook_lines();
+ */
+ /*
+ mymln::document::clean_lines_space(doc, Dir + "/" + "alone_graph_" + File, doc.image_mask_letters());
+ doc.recook_lines();*/
+
+ /*
mymln::document::remove_alone_letter(doc);
doc.recook_lines();
- mymln::document::clean_paragraph_items(doc, Dir + "/" + "para_graph_" + File, doc.image_mask_letters());
+ mymln::document::clean_paragraph_items(doc);
doc.cook_paragraphs();
- std::cout << "WORK ON GRAPH : " << timer.stop() << endl;
- //io::ppm::save(ima_influ, "separator.ppm");
- //io::pbm::save(doc.image_mask_separators(),"separators");
- // io::pbm::save(doc.image_mask_letters(),Dir + "/" + "letters_" + File);
- //io::pbm::save(doc.image_mask_alone_letters(),Dir + "/" + "letters_alone_" + File);
- //io::pbm::save(doc.image_mask_separators(),Dir + "/" + "separators_" + File);
- //io::pbm::save(doc.image_mask_containers(),Dir + "/" + "containers_" + File);
- //io::pbm::save(doc.image_mask_noise(),Dir + "/" + "noise_" + File);
-
-
-
- //doc.debug_save_lines(Dir + "/" + "lines_" + File);
- doc.debug_save_all(Dir + "/" + "debug_" + File, ima);
- //mymln::debug::save_graph_image(doc.fun_mask_implicit_separators_left(), doc.image_mask_letters(), Dir + "/" + "graph_imp_sep_line_" + File);
- //doc.debug_save_separators(Dir + "/" + "imp_sep_graph_" + File);
+
- /* typedef vertex_image<point2d,bool> v_ima_g;
- v_ima_g mask = doc.fun_mask_letters();
-*/
- /*image2d<bool> out(3500,3500);
-
+ mymln::document::clean_paragraphs_up(doc);
+ doc.recook_paragraphs();
+ mymln::document::clean_paragraphs_large(doc);
+ doc.recook_paragraphs();
+ mymln::document::clean_included_paragraphs(doc);
+ doc.recook_paragraphs();
+ std::cout << "WORK ON GRAPH : " << timer.stop() << endl;
+ doc.recook_lines();
+ */
+ runtime.run();
+
+
+
- mln_piter_(v_ima_g) v(mask.domain());
- typedef graph_elt_neighborhood_if<util::graph, g_vertices_p, v_ima_g> nbh_t;
- nbh_t nbh(mask);
- mln_niter_(nbh_t) q(nbh, v);
+ /*
+ doc.cook_lines_iter();
+ std::cout << doc.get_first_line();
+
+
+
- unsigned int fnds = 0;
- for_all(v)
+ doc.compute_letter_middle_space();
+
+ mymln::document::recognize_minus(doc);
+ mymln::document::recognize_dot(doc);
+ for(int Line = doc.get_first_line(); Line; doc.get_next_line(Line))
{
- unsigned int nds = 0;
- for_all(q)
- {
- nds++;
-
- draw::line(out, q,v, true);
- }
- if(nds > 0)
- {
- std::cout << v << endl;
- fnds++;
- }
-
-
- }*/
- //mymln::debug::draw_graph(out, mask);
- //io::pbm::save(out, "maskltt.dgb");
- //std::cout << "NODES:" << fnds << endl;
- // mymln::debug::save_graph_image(area_grph, ima, "graph_" + File);
- // mymln::debug::save_graph_image(doc.fun_mask_separators(), ima, "separator_graph_" + File);
- //mymln::debug::save_graph_image(area_grph, doc.image_mask_letters(), Dir + "/" + "graph_" + File);
- //mymln::debug::save_graph_image(doc.fun_mask_letters(), doc.image_mask_letters(), Dir + "/" + "container_graph_" + File);
- mln::util::array<box2d> linebx = doc.bbox_mask_lines();
- mymln::debug::save_boxes_image(linebx, doc.image_mask_letters(), Dir + "/" + "lbox_" + File);
-
- //mymln::debug::save_boxes_image(doc.bbox_enlarge_mask_letters(10, 0), ima, "linebox_" + File);
+ std::cout << doc.get_line_string(Line) << endl;
+ }
+
+ //doc.debug_save_lines(Dir + "/" + "lines_" + File);
+ //doc.debug_save_all(Dir + "/" + "debug_" + File, ima);
+ */
+
}
int main( int argc, char** argv)
{
- if(argc <= 1){Process("ima.pbm", "");}
+ mymln::runtime::runtime< value::int_u<16> ,float,short> run;
+ mymln::runtime::load_clean(run);
+ mymln::runtime::load_debug(run);
+ mymln::runtime::load_cooking(run);
+ mymln::runtime::load_string(run);
+ mymln::runtime::load_system(run);
+ mymln::runtime::load_separators(run);
+ mymln::runtime::load_compute(run);
+ if(argc <= 1){Process("ima.pbm", "", run);}
else
{
bool dir = false;
+ bool prog = false;
std::string Dir = "";
+ std::string Prog = "";
for(int N = 1 ; N < argc; N++)
{
if(dir)
@@ -228,12 +262,20 @@ int main( int argc, char** argv)
Dir = argv[N];
dir = false;
}
+ else if(prog)
+ {
+ Prog = argv[N];
+ run.load(Prog. c_str());
+ prog = false;
+ }
else
{
if(!strcmp(argv[N], "-D"))
{ dir = true;}
+ else if(!strcmp(argv[N], "-P"))
+ { prog = true; }
else
- { Process(argv[N], Dir); }
+ { Process(argv[N], Dir, run); }
}
}
}
--
1.7.2.5
1
0
last-svn-commit-887-ge10fcbe Updtate everything. Add Breakpoint, Rebuild Separators. Patch someproblems with union find. Patch some problem with paragraph detection. Still have some problem with paragraphs
by Raphael Boissel 08 Sep '11
by Raphael Boissel 08 Sep '11
08 Sep '11
---
scribo/sandbox/raphael/code/my/document/clean.hh | 233 ++++++-
.../sandbox/raphael/code/my/document/document.hh | 807 ++++++++++++++++++--
scribo/sandbox/raphael/code/my/document/outline.hh | 87 +++
.../sandbox/raphael/code/my/document/separator.hh | 115 +++-
scribo/sandbox/raphael/code/my/runtime/lib.hh | 50 ++
scribo/sandbox/raphael/code/my/util/union.hh | 15 +-
scribo/sandbox/raphael/code/test.cc | 15 +-
7 files changed, 1251 insertions(+), 71 deletions(-)
create mode 100644 scribo/sandbox/raphael/code/my/document/outline.hh
diff --git a/scribo/sandbox/raphael/code/my/document/clean.hh b/scribo/sandbox/raphael/code/my/document/clean.hh
index b996b8a..94d0422 100644
--- a/scribo/sandbox/raphael/code/my/document/clean.hh
+++ b/scribo/sandbox/raphael/code/my/document/clean.hh
@@ -15,7 +15,7 @@ namespace mymln
typedef vertex_image<point2d,bool> v_ima_g;
typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p;
- v_ima_g mask = doc.fun_mask_letters();
+ v_ima_g mask = doc.fun_mask_all_letters();
mln_piter_(v_ima_g) v(mask.domain());
typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t;
nbh_t nbh(mask);
@@ -339,13 +339,22 @@ namespace mymln
{
for_all(q)
{
- if(doc.contain_line(q) && doc.get_line_length(q) == 1 && doc.line_influence_has(v,q))
+ if(
+ doc.contain_line(q) &&
+ !doc.same_line(q,v) &&
+ doc.get_line_length(q) == 1 &&
+ doc.line_influence_has(v,q) &&
+ !doc.allign_H_large(v,q) &&
+ (doc.allign_base_line_strict(v,q) || doc.allign_V(v,q))
+ )
{
+ doc.debug_draw_line_green_buffer(q,v);
doc.add_to_line_link(q,v);
}
}
}
}
+ doc.propage_line_link();
}
@@ -394,6 +403,21 @@ namespace mymln
doc.add_to_line_link(v, q);
doc.debug_draw_line_red_buffer(v,q);
}
+ else if(
+
+ doc.line_influence_reciprocal(q, v) &&
+ !doc.same_line(q,v) &&
+ doc.allign_V(q,v) &&
+ doc.allign_size_x_height(v,q) &&
+ doc.get_line_length(v) > 4 &&
+ doc.allign_proximity_line(v,q)
+ )
+ {
+ doc.debug_draw_line_orange_buffer(v,q);
+ doc.debug_draw_box_red_buffer(v);
+ doc.debug_draw_box_green_buffer(q);
+ doc.add_to_line_link(v, q);
+ }
}
@@ -738,10 +762,12 @@ namespace mymln
}
template<typename L, typename F, typename D>
- void clean_lines_space(mymln::document::document<L,F,D>& doc, std::string dgb_out,image2d<bool> s)
+ void clean_line_space(mymln::document::document<L,F,D>& doc)
{
- image2d<value::rgb8> out;
- mln::initialize(out, s);
+ mln::util::array<L> KillMe = mln::util::array<L>(doc.size());
+ unsigned int Killer = 0;
+ if(doc.size() > 0){KillMe[0] = 0;}
+
typedef vertex_image<point2d,bool> v_ima_g;
typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p;
v_ima_g mask = doc.fun_mask_letters();
@@ -755,23 +781,37 @@ namespace mymln
{
if(doc.same_line(q, v))
{
- draw::line(out, q,v, mln::literal::red);
if(doc.in_beginning_of_line(q) || doc.in_end_of_line(q))
{
- draw::line(out, q,v, mln::literal::green);
- if(doc.space(q, v) > doc.get_letter_middle_space(q) * 10)
+ if(doc.space(q, v) > doc.get_letter_middle_space(q) * 4 && doc.allign_half_line_letter(v,q))
{
- draw::line(out, q,v, mln::literal::blue);
- /*if(doc[q] == doc.get_beginning_of_line(q))
- doc.add_to_line_link(v, q); */
- //doc.add_noise(q);
+ if(doc[q] == doc.get_beginning_of_line(q) && doc.allign_V_side(v,q))
+ {
+ doc.add_to_line_self_link(v);
+ doc.add_to_line_link(v, q);
+ KillMe[Killer++] = doc[q];
+ KillMe[Killer] = 0;
+ doc.debug_draw_line_green_buffer(q,v);
+ doc.debug_draw_box_red_buffer(q);
+ }
+ else
+ {
+ doc.debug_draw_line_red_buffer(q,v);
+ }
}
}
}
}
}
doc.propage_line_link();
- io::ppm::save(mln::debug::superpose(out, s, literal::white),dgb_out);
+ Killer = 0;
+ while(Killer < doc.size() && KillMe[Killer])
+ {
+ doc.add_noise(KillMe[Killer]);
+ doc.kill(KillMe[Killer]);
+ Killer++;
+ }
+
}
@@ -825,7 +865,8 @@ namespace mymln
doc.get_line_length(q)> 5 &&
doc.allign_V(v, q) &&
doc.allign_proximity(v, q) &&
- doc.allign_size_height(v, q)
+ doc.allign_size_height(v, q) &&
+ !doc.contain_separator(v)
)
{
doc.add_to_line_link(q,v);
@@ -853,6 +894,8 @@ namespace mymln
{
for_all(q)
{
+ if(doc.paragraph_start_with_tab(q) && doc.same_paragraph(q,v))
+ {doc.debug_draw_line_red_buffer(v, q);}
if(
doc.contain_paragraph(q) &&
!doc.same_paragraph(q,v) &&
@@ -873,7 +916,169 @@ namespace mymln
}
doc.propage_paragraph_link();
}
+ template<typename L, typename F, typename D>
+ void find_previous_next_line(mymln::document::document<L,F,D>& doc)
+ {
+ doc.reserve_previous_next_line();
+ typedef vertex_image<point2d,bool> v_ima_g;
+ typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p;
+ v_ima_g mask = doc.fun_mask_all_letters();
+ mln_piter_(v_ima_g) v(mask.domain());
+ typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t;
+ nbh_t nbh(mask);
+ mln_niter_(nbh_t) q(nbh, v);
+ for_all(v)
+ {
+ if(doc.contain_line(v))
+ {
+ for_all(q)
+ {
+ if(
+ doc.contain_line(q) &&
+ !doc.same_line(q, v) &&
+ doc.allign_H_large(q, v) &&
+ doc.allign_size_height_line(q,v))
+ {
+ if(doc.allign_top_large(q,v))
+ {
+ doc.debug_draw_line_green_buffer(q,v);
+ doc.add_line_previous(q,v);
+ }
+ else if(doc.allign_bottom_large(q,v))
+ {
+ doc.debug_draw_line_red_buffer(q,v);
+ doc.add_line_next(q,v);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ template<typename L, typename F, typename D>
+ void clean_letter_previous_next_line(mymln::document::document<L,F,D>& doc)
+ {
+
+ typedef vertex_image<point2d,bool> v_ima_g;
+ typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p;
+ v_ima_g mask = doc.fun_mask_start_end_lines();
+ mln_piter_(v_ima_g) v(mask.domain());
+ typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t;
+ nbh_t nbh(mask);
+ mln_niter_(nbh_t) q(nbh, v);
+ for_all(v)
+ {
+ if(doc.contain_line(v) && doc.return_next_line(doc.get_line_ID(v)))
+ {
+ for_all(q)
+ {
+ if(
+
+ doc.contain_line(q) &&
+ !doc.same_line(q, v) &&
+ doc.return_next_line(doc.get_line_ID(v)) == doc.return_next_line(doc.get_line_ID(q)) &&
+ doc.return_previous_line(doc.get_line_ID(v)) == doc.return_previous_line(doc.get_line_ID(q)) &&
+ doc.allign_V_line(v, q) &&
+ doc.allign_size_height_line(v, q) &&
+ !doc.killed(doc[q])
+ )
+ {
+ doc.debug_draw_line_green_buffer(v,q);
+
+ doc.add_to_line_link(v,q);
+ }
+ }
+ }
+ }
+ doc.propage_line_link();
+
+ }
+
+ template<typename L, typename F, typename D>
+ void clean_V_lines(mymln::document::document<L,F,D>& doc)
+ {
+
+ typedef vertex_image<point2d,bool> v_ima_g;
+ typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p;
+ v_ima_g mask = doc.fun_mask_all_letters();
+ mln_piter_(v_ima_g) v(mask.domain());
+ typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t;
+ nbh_t nbh(mask);
+ mln_niter_(nbh_t) q(nbh, v);
+ for_all(v)
+ {
+ for_all(q)
+ {
+ if(
+ doc.allign_H(v,q) &&
+ doc.allign_size_width_strict(v,q)
+ )
+ {
+ doc.add_temp_letter(v);
+ doc.debug_draw_line_green_buffer(v,q);
+ }
+ }
+ }
+ doc.propage_line_link();
+
+ }
+
+
+ template<typename L, typename F, typename D>
+ void clean_paragraphs_end_line(mymln::document::document<L,F,D>& doc)
+ {
+ typedef vertex_image<point2d,bool> v_ima_g;
+ typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p;
+ v_ima_g mask = doc.fun_mask_all_letters();
+ mln_piter_(v_ima_g) v(mask.domain());
+ typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t;
+ nbh_t nbh(mask);
+ mln_niter_(nbh_t) q(nbh, v);
+ for_all(v)
+ {
+ if(doc.contain_paragraph(v))
+ {
+ for_all(q)
+ {
+ if(
+ doc.contain_paragraph(q) &&
+ !doc.same_paragraph(q,v) &&
+ doc.get_paragraph_length(q) == 1)
+ {
+ if(
+ doc.allign_top_paragraph(q,v) &&
+ doc.allign_H_min_paragraph(q,v) &&
+ doc.allign_size_height_paragraph_line(q,v) &&
+ doc.allign_smaller_paragraph(v, q) &&
+ doc.compatible_paragraph_middle_width(v,q) &&
+ doc.allign_proximity_paragraph_up_large(q,v) &&
+ doc.get_line_length(q) > 3
+ )
+ {
+ doc.debug_draw_line_green_buffer(q,v);
+ doc.add_to_paragraph_link(v, q);
+ }
+ else if(
+ doc.allign_top_paragraph(v,q) &&
+ !doc.paragraph_start_with_tab(v) &&
+ doc.allign_size_height_paragraph_line(q,v) &&
+ doc.allign_H_max_paragraph(q,v) &&
+ doc.allign_smaller_paragraph(v, q) &&
+ doc.compatible_paragraph_middle_width(v,q) &&
+ doc.allign_proximity_paragraph_up_large(q,v) &&
+ doc.get_line_length(q) > 3
+ )
+ {
+ doc.debug_draw_line_green_buffer(q,v);
+ doc.add_to_paragraph_link(v, q);
+ }
+ }
+ }
+ }
+ }
+ doc.propage_paragraph_link();
+ }
}
}
diff --git a/scribo/sandbox/raphael/code/my/document/document.hh b/scribo/sandbox/raphael/code/my/document/document.hh
index 880d4c5..b3735df 100644
--- a/scribo/sandbox/raphael/code/my/document/document.hh
+++ b/scribo/sandbox/raphael/code/my/document/document.hh
@@ -45,6 +45,7 @@ namespace mymln
{
img = ima;
_bboxgp = bboxgp;
+ _bboxgp_influ = mln::util::array<box2d>(Areas + 1);
_area_graph = area_graph;
separators_mask = fun::i2v::array<bool>(Areas + 1);
containers_mask = fun::i2v::array<bool>(Areas + 1);
@@ -53,17 +54,18 @@ namespace mymln
Hseparator_mask = fun::i2v::array<bool>(Areas + 1);
Vseparator_mask = fun::i2v::array<bool>(Areas + 1);
noise_mask = fun::i2v::array<bool>(Areas + 1);
+ temp_letter = fun::i2v::array<bool>(Areas + 1);
alone_letters_mask = fun::i2v::array<bool>(Areas + 1);
implicit_separators_left_mask = fun::i2v::array<bool>(Areas + 1);
implicit_separators_right_mask = fun::i2v::array<bool>(Areas + 1);
kill_mask = fun::i2v::array<bool>(Areas + 1);
+ all_mask = fun::i2v::array<bool>(Areas + 1);
CImpSep = 1;
NImpSep = 2;
lines_union = mymln::util::union_find<Label>(Areas + 1);
implicit_separators_union = mymln::util::union_find<Label>(Areas + 1);
debug_buffer_enable = false;
paragraphs_union = mymln::util::union_find<Label>(Areas + 1);
-
tag_lbl = mln::util::array<std::string>(Areas + 1);
Btag_lbl = mln::util::array<bool>(Areas + 1);
lines_split = mln::util::array<Label>(Areas + 1);
@@ -79,8 +81,14 @@ namespace mymln
CPar = 1;
NPar = 2;
Areas_Number_ = Areas + 1;
-
+ sep_right_cooked = false;
+ lines_cooked = false;
+ Enable_Debug_Buffer = false; // Remanant version of debug_buffer_enable
}
+ inline bool killed(const Label lbl)
+ {return kill_mask(lbl);}
+ inline void kill(const Label lbl)
+ {kill_mask(lbl) = true;all_mask(lbl) = false;}
inline unsigned int count()
{return Areas_Number_;}
/* OPERATION ON PAGE */
@@ -123,6 +131,11 @@ namespace mymln
}
}
}
+
+
+
+
+
inline bool contain_paragraph(const point2d& point)
{return contain_paragraph(img_influ(point));}
inline bool contain_paragraph(const Label lbl)
@@ -349,7 +362,13 @@ namespace mymln
inline void jump_to_line(const point2d& point)
{ jump_to_line(img_influ(point)); }
+
+ inline bool contain_start_line(const point2d& point)
+ { return contain_start_line(img_influ(point)); }
+ inline bool contain_end_line(const point2d& point)
+ { return contain_end_line(img_influ(point)); }
+
inline bool contain_line(const point2d& point)
{ return contain_line(img_influ(point)); }
@@ -379,9 +398,61 @@ namespace mymln
add_new_line(lbl);
}
+ inline bool contain_line_self_link(const Label lbl)
+ { return lines_union[lbl].is_self_link();}
+ inline bool move_line_self_link(const Label lbl)
+ {
+ if(lines_union[lbl] && lines_union[lbl].is_self_link())
+ {
+ if(lines_first_label[lines_union[lbl]] == lbl)
+ {
+ if(lines_union[lines_last_label[lines_union[lbl]]] == 0) // CHECK IF THE LAST LABEL HAS NOT BEEN REMOVED
+ recook_lines();
+
+ lines_union.add_link(lines_last_label[lines_union[lbl]], lbl);
+ lines_union.add_self_link(lines_last_label[lines_union[lbl]]);
+ }
+ else if(lines_last_label[lines_union[lbl]] == lbl)
+ {
+ if(lines_union[lines_first_label[lines_union[lbl]]] == 0) // CHECK IF THE FIRST LABEL HAS NOT BEEN REMOVED
+ recook_lines();
+
+ lines_union.add_link(lines_first_label[lines_union[lbl]], lbl);
+ lines_union.add_self_link(lines_first_label[lines_union[lbl]]);
+ }
+ else
+ {
+ if(lines_union[lines_first_label[lines_union[lbl]]])
+ {
+ lines_union.add_link(lines_first_label[lines_union[lbl]], lbl);
+ lines_union.add_self_link(lines_first_label[lines_union[lbl]]);
+ }
+ else if(lines_first_label[lines_union[lbl]])
+ {
+ lines_union.add_link(lines_last_label[lines_union[lbl]], lbl);
+ lines_union.add_self_link(lines_last_label[lines_union[lbl]]);
+ }
+ else
+ {
+ recook_lines();
+ lines_union.add_link(lines_first_label[lines_union[lbl]], lbl);
+ lines_union.add_self_link(lines_first_label[lines_union[lbl]]);
+ }
+ }
+ return true;
+ }
+ return false;
+ }
+
inline bool contain_line(const Label lbl)
{ return lines_union[lbl] != 0;}
+ inline bool contain_start_line(const Label lbl)
+ { return start_lines_mask(lbl);}
+
+ inline bool contain_end_line(const Label lbl)
+ { return start_lines_mask(lbl);}
+
inline void add_noise(const point2d& point)
{add_noise(img_influ(point));}
@@ -397,20 +468,31 @@ namespace mymln
Hseparator_mask(lbl) = false;
Vseparator_mask(lbl) = false;
alone_letters_mask(lbl) = false;
-
+ all_letters_mask(lbl) = false;
+ if(lines_cooked)
+ {
+ start_end_lines_mask(lbl) = false;
+ end_lines_mask(lbl) = false;
+ start_lines_mask(lbl) = false;
+ }
noise_mask(lbl) = true;
lines_union[lbl] = 0;
}
void inline add(Label lbl, int link)
{
+ all_mask(lbl) = true;
if (link == 0){add_noise(lbl);}
else if (link > 30){ add_separator(lbl);}
- else { add_letter(lbl);}
+ else
+ {
+ add_letter(lbl);
+ }
/* SET UP SPECIAL MASK TO FALSE */
implicit_separators_left_mask(lbl) = false;
implicit_separators_right_mask(lbl) = false;
kill_mask(lbl) = false;
+ temp_letter(lbl) = false;
}
void inline invalid_letter(const point2d& point)
{invalid_letter(img_influ(point));}
@@ -444,6 +526,7 @@ namespace mymln
alone_letters_mask(lbl) = true;
noise_mask(lbl) = false;
all_letters_mask(lbl) = true;
+ temp_letter = false;
}
void add_letter_coerce(const Label lbl)
{
@@ -456,12 +539,18 @@ namespace mymln
Hseparator_mask(lbl) = false;
alone_letters_mask(lbl) = false;
noise_mask(lbl) = false;
+ temp_letter = false;
}
void add_letter(const Label lbl)
{
CLet++;
if(label_valid_size_Min_(lbl, 3) || label_valid_size_Min_Large_(lbl, 2))
{
+ if(letter_ratio_XY(lbl) > 20)
+ {add_separator(lbl); return;}
+ if(letter_ratio_YX(lbl) > 10)
+ {add_separator(lbl); return;}
+
img_influ(_bboxgp[lbl].pcenter()) = lbl;
letters_mask(lbl) = true;
all_letters_mask(lbl) = true;
@@ -470,7 +559,7 @@ namespace mymln
Vseparator_mask(lbl) = false;
Hseparator_mask(lbl) = false;
alone_letters_mask(lbl) = false;
-
+ temp_letter = false;
noise_mask(lbl) = false;
}
else
@@ -491,12 +580,14 @@ namespace mymln
noise_mask(lbl) = false;
alone_letters_mask(lbl) = false;
all_letters_mask(lbl) = false;
+
}
else
add_noise(lbl);
}
void add_Hseparator(const Label lbl)
{
+ _bboxgp_influ[lbl] = _bboxgp[lbl].to_larger(4);
CSep++;
containers_mask(lbl) = false;
Vseparator_mask(lbl) = false;
@@ -509,6 +600,7 @@ namespace mymln
}
void add_Vseparator(const Label lbl)
{
+ _bboxgp_influ[lbl] = _bboxgp[lbl].to_larger(4);
CSep++;
containers_mask(lbl) = false;
Vseparator_mask(lbl) = true;
@@ -519,10 +611,19 @@ namespace mymln
noise_mask(lbl) = false;
all_letters_mask(lbl) = false;
}
+ bool inline separator_has(const point2d& A, const point2d& B)
+ {
+ return _bboxgp_influ[img_influ(A)].has(B) || separator_has(img_influ(A), img_influ(B));
+ }
+ bool inline separator_has(const Label A,const Label B)
+ {
+ return _bboxgp_influ[A].has(_bboxgp[B].pmin()) || _bboxgp_influ[A].has(_bboxgp[B].pmax());
+ }
void inline add_separator(const point2d& point)
{add_letter(img_influ(point)); }
void add_separator(const Label lbl)
{
+
if(label_valid_ratio_(lbl, _VSepRatio_Min,_VSepRatio_Max))
add_Vseparator(lbl);
else if(label_valid_ratio_(lbl, _HSepRatio_Min,_HSepRatio_Max))
@@ -540,6 +641,11 @@ namespace mymln
bool inline contain_separator(const Label lbl)
{return contain_(lbl, separators_mask);}
+ bool inline contain_Vseparator(const Label lbl)
+ {return contain_(lbl, Vseparator_mask);}
+ bool inline contain_Hseparator(const Label lbl)
+ {return contain_(lbl, Hseparator_mask);}
+
bool inline contain_letter(const Label lbl)
{return contain_(lbl, letters_mask);}
@@ -588,6 +694,7 @@ namespace mymln
short int allignV = lines_bbox[lines_union[Left]].pcenter()[0] - lines_bbox[lines_union[Right]].pcenter()[0];
return (!allignV < 0) && allignV * 2 > lines_bbox[lines_union[Left]].len(0);
}
+
inline bool allign_top( const Label Left, const Label Right)
{
@@ -595,6 +702,23 @@ namespace mymln
return allignV < label_size_(0, Left) && (_bboxgp[Left].pcenter()[0]) > (_bboxgp[Right].pcenter()[0]);
}
+ inline bool allign_top_large( const point2d& Left, const point2d& Right)
+ {return allign_top_large(img_influ(Left), img_influ(Right));}
+
+ inline bool allign_top_large( const Label Left, const Label Right)
+ {
+ short int allignV = label_allign_(0, Left, Right);
+ return allignV < lines_bbox[lines_union[Left]].len(0) * 2 && (_bboxgp[Left].pcenter()[0]) > (_bboxgp[Right].pcenter()[0]);
+ }
+
+ inline bool allign_bottom_large( const point2d& Left, const point2d& Right)
+ {return allign_bottom_large(img_influ(Left), img_influ(Right));}
+
+ inline bool allign_bottom_large( const Label Left, const Label Right)
+ {
+ short int allignV = label_allign_(0, Left, Right);
+ return allignV < lines_bbox[lines_union[Left]].len(0) * 2 && (_bboxgp[Left].pcenter()[0]) < (_bboxgp[Right].pcenter()[0]);
+ }
inline bool allign_up_line( const point2d& Left, const point2d& Right)
{return allign_up_line(img_influ(Left), img_influ(Right));}
@@ -628,7 +752,7 @@ namespace mymln
inline bool allign_left( const Label Left, const Label Right)
{
- return _bboxgp[Left].pmin()[0] > _bboxgp[Right].pmin()[0];
+ return _bboxgp[Left].pmin()[1] > _bboxgp[Right].pmin()[1];
}
inline bool allign_right( const point2d& Left, const point2d& Right)
@@ -637,11 +761,20 @@ namespace mymln
inline bool allign_right( const Label Left, const Label Right)
{
- return _bboxgp[Left].pmin()[0] < _bboxgp[Right].pmin()[0];
+ return _bboxgp[Left].pmin()[1] < _bboxgp[Right].pmin()[1];
}
+ inline bool allign_H_large_one( const point2d& Left, const point2d& Right)
+ {return allign_H_large_one(img_influ(Left), img_influ(Right));}
+
+ inline bool allign_H_large_one( const Label Left, const Label Right)
+ {
+ short int allignV = label_allign_(1, Left, Right) * 1.5f;
+ return allignV <= label_size_(1, Left) + 2;
+ }
+
inline bool allign_H_large( const point2d& Left, const point2d& Right)
{return allign_H_large(img_influ(Left), img_influ(Right));}
@@ -650,7 +783,18 @@ namespace mymln
short int allignV = label_allign_(1, Left, Right) * 1.5f;
return allignV < label_size_(1, Left);
}
+
+
+ inline bool allign_H_strict( const point2d& Left, const point2d& Right)
+ {return allign_H(img_influ(Left), img_influ(Right));}
+ inline bool allign_H_strict( const Label Left, const Label Right)
+ {
+ short int allignH = label_allign_(1, Left, Right) * 5;
+ return allignH < label_size_(1, Left) && allignH < label_size_(1, Right);
+ }
+
+
inline bool allign_H( const point2d& Left, const point2d& Right)
{return allign_H(img_influ(Left), img_influ(Right));}
@@ -660,6 +804,29 @@ namespace mymln
return allignH < label_size_(1, Left) && allignH < label_size_(1, Right);
}
+
+ inline bool allign_H_min_paragraph( const point2d& Left, const point2d& Right)
+ {return allign_H_min_paragraph(img_influ(Left), img_influ(Right));}
+
+ inline bool allign_H_min_paragraph( const Label Left, const Label Right)
+ {
+ short int allignH = paragraphs_bbox[paragraphs_union[Left]].pmin()[1] - paragraphs_bbox[paragraphs_union[Right]].pmin()[1];
+ allignH *= 2;
+ return allignH < paragraphs_bbox[paragraphs_union[Left]].len(0);
+ }
+
+
+ inline bool allign_H_max_paragraph( const point2d& Left, const point2d& Right)
+ {return allign_H_max_paragraph(img_influ(Left), img_influ(Right));}
+
+ inline bool allign_H_max_paragraph( const Label Left, const Label Right)
+ {
+ short int allignH = paragraphs_bbox[paragraphs_union[Left]].pmin()[1] - paragraphs_bbox[paragraphs_union[Right]].pmin()[1];
+ allignH *= 2;
+ return allignH < paragraphs_bbox[paragraphs_union[Left]].len(0);
+ }
+
+
inline bool allign_H_min( const point2d& Left, const point2d& Right)
{return allign_H_min(img_influ(Left), img_influ(Right));}
@@ -681,6 +848,56 @@ namespace mymln
inline bool allign_size_height( const point2d& Left, const point2d& Right)
{return allign_size_height(img_influ(Left), img_influ(Right));}
+
+
+ inline bool allign_proximity_top_strict( const point2d& Left, const point2d& Right)
+ {return allign_proximity_top(img_influ(Left), img_influ(Right));}
+
+ inline bool allign_proximity_top_strict( const Label Left, const Label Right)
+ {
+ box2d LB = _bboxgp[Left];
+ box2d RB = _bboxgp[Right];
+
+ int DisA = LB.pmax()[0] - RB.pmin()[0];
+ int DisB = RB.pmax()[0] - LB.pmin()[0];
+ if(DisA < 0){DisA = -DisA;}
+ if(DisB < 0){DisB = -DisB;}
+ if(DisA > DisB)
+ { DisA = DisB; }
+
+ unsigned int HA = LB.len(0);
+ unsigned int HB = LB.len(1);
+
+ if(HB > HA)
+ { HA = HB; }
+ return (DisA * 2) < HA;
+ }
+
+
+ inline bool allign_proximity_top( const point2d& Left, const point2d& Right)
+ {return allign_proximity_top(img_influ(Left), img_influ(Right));}
+
+ inline bool allign_proximity_top( const Label Left, const Label Right)
+ {
+ box2d LB = _bboxgp[Left];
+ box2d RB = _bboxgp[Right];
+
+ int DisA = LB.pmax()[0] - RB.pmin()[0];
+ int DisB = RB.pmax()[0] - LB.pmin()[0];
+ if(DisA < 0){DisA = -DisA;}
+ if(DisB < 0){DisB = -DisB;}
+ if(DisA > DisB)
+ { DisA = DisB; }
+
+ unsigned int HA = LB.len(0);
+ unsigned int HB = LB.len(1);
+
+ if(HB > HA)
+ { HA = HB; }
+ return (DisA * 2) < HA * 3;
+ }
+
+
inline bool allign_proximity_V( const point2d& Left, const point2d& Right)
{return allign_proximity_V(img_influ(Left), img_influ(Right));}
@@ -752,7 +969,33 @@ namespace mymln
return (DisA * 2) < HA && (DisA * 2) < HB;
}
+
+
+ inline bool allign_proximity_paragraph_up_large( const point2d& Left, const point2d& Right)
+ {return allign_proximity_paragraph_up_large(img_influ(Left), img_influ(Right));}
+ inline bool allign_proximity_paragraph_up_large( const Label Left, const Label Right)
+ {
+ box2d LB = paragraphs_bbox[paragraphs_union[Left]];
+ box2d RB = paragraphs_bbox[paragraphs_union[Right]];
+
+ int DisA = LB.pmax()[0] - RB.pmin()[0];
+ int DisB = RB.pmax()[0] - LB.pmin()[0];
+ if(DisA < 0){DisA = -DisA;}
+ if(DisB < 0){DisB = -DisB;}
+ if(DisA > DisB)
+ { DisA = DisB; }
+
+ unsigned int HA = lines_bbox[paragraphs_first_line[paragraphs_union[Left]]].len(0);
+ unsigned int HB = lines_bbox[paragraphs_first_line[paragraphs_union[Right]]].len(0);
+
+ if(HA < HB)
+ { HA = HB; }
+ return (DisA) < HA;
+ }
+
+
+
inline bool allign_proximity_paragraph_up( const point2d& Left, const point2d& Right)
{return allign_proximity_paragraph_up(img_influ(Left), img_influ(Right));}
@@ -779,6 +1022,21 @@ namespace mymln
inline bool allign_proximity_line_large( const point2d& Left, const point2d& Right)
{return allign_proximity_line_large(img_influ(Left), img_influ(Right));}
+
+ inline bool allign_size_height_paragraph_line( const point2d& Left, const point2d& Right)
+ {
+ return allign_size_height_paragraph_line(img_influ(Left), img_influ(Right));
+ }
+
+ inline bool allign_size_height_paragraph_line( const Label Left, const Label Right)
+ {
+ short int SizeL = lines_bbox[paragraphs_first_line[paragraphs_union[Left]]].len(0);
+ short int SizeR = lines_bbox[paragraphs_first_line[paragraphs_union[Right]]].len(0);
+ return SizeR > (SizeL / 2.2f) && SizeR < (SizeL * 2.2);
+ }
+
+
+
inline bool allign_size_height_line( const point2d& Left, const point2d& Right)
{
return allign_size_height_line(img_influ(Left), img_influ(Right));
@@ -817,6 +1075,32 @@ namespace mymln
short int SizeR = _bboxgp[lines_union[Right]].len(1);
return SizeR >= (SizeL / 5) && SizeR <= (SizeL * 5);
}
+
+ inline bool allign_size_width( const point2d& Left, const point2d& Right)
+ {
+ return allign_size_width(img_influ(Left), img_influ(Right));
+ }
+
+ inline bool allign_size_width( const Label Left, const Label Right)
+ {
+ short int SizeL = _bboxgp[Left].len(1);
+ short int SizeR = _bboxgp[Right].len(1);
+ return SizeR >= (SizeL / 2) && SizeR <= (SizeL * 2);
+ }
+
+
+
+ inline bool allign_size_width_strict( const point2d& Left, const point2d& Right)
+ {
+ return allign_size_width_strict(img_influ(Left), img_influ(Right));
+ }
+
+ inline bool allign_size_width_strict( const Label Left, const Label Right)
+ {
+ short int SizeL = _bboxgp[Left].len(1);
+ short int SizeR = _bboxgp[Right].len(1);
+ return SizeR >= (SizeL / 1.5f) && SizeR <= (SizeL * 1.5f);
+ }
@@ -1094,6 +1378,22 @@ namespace mymln
}
+ inline bool allign_half_line_letter( const point2d& Left, const point2d& Right)
+ {return allign_half_line_letter(img_influ(Left), img_influ(Right));}
+ inline bool allign_half_line_letter( Label Left, Label Right)
+ {
+ return lines_bbox[lines_union[Left]].len(0) > (_bboxgp[Right].len(0) * 8);
+ }
+
+
+ inline bool allign_smaller_paragraph( const point2d& Left, const point2d& Right)
+ {return allign_smaller_paragraph(img_influ(Left), img_influ(Right));}
+ inline bool allign_smaller_paragraph( Label Left, Label Right)
+ {
+ return paragraphs_bbox[paragraphs_union[Left]].len(1) > (paragraphs_bbox[paragraphs_union[Right]].len(1));
+ }
+
+
inline bool allign_smaller_line( const point2d& Left, const point2d& Right)
{return allign_smaller_line(img_influ(Left), img_influ(Right));}
inline bool allign_smaller_line( Label Left, Label Right)
@@ -1117,6 +1417,15 @@ namespace mymln
return allignV < label_size_(0, Left) && allignV < label_size_(0, Right);
}
+ inline bool allign_V_side( const point2d& Left, const point2d& Right)
+ {return allign_V_side(img_influ(Left), img_influ(Right));}
+
+ inline bool allign_V_side( const Label Left, const Label Right)
+ {
+ short int allignV = label_allign_(0, Left, Right);
+ return allignV * 4 < label_size_(0, Left);
+ }
+
inline bool allign_V_extra_large( const point2d& Left, const point2d& Right)
{return allign_V_extra_large(img_influ(Left), img_influ(Right));}
@@ -1164,7 +1473,13 @@ namespace mymln
return lines_bbox[lines_union[Left]].pmin()[0] < lines_bbox[lines_union[Right]].pmin()[0];
}
-
+ inline bool allign_base_line_strict(const point2d& Left, const point2d& Right)
+ {return allign_base_line_strict(img_influ(Left), img_influ(Right));}
+ inline bool allign_base_line_strict(const Label Left, const Label Right)
+ {
+ short int allignV = label_allign_(0, Left, Right) * 1.7f;
+ return allignV < label_size_(0, Left) && _bboxgp[Left].pcenter()[0] < _bboxgp[Right].pcenter()[0];
+ }
inline bool allign_base_line(const point2d& Left, const point2d& Right)
{return allign_base_line(img_influ(Left), img_influ(Right));}
@@ -1187,8 +1502,8 @@ namespace mymln
{
Label FirstLine = paragraphs_first_line[paragraphs_union[Paragraph]];
return
- _bboxgp[FirstLine].pmin()[1] > paragraphs_bbox[paragraphs_union[Paragraph]].pmin()[1] +
- (paragraphs_bbox[paragraphs_union[Paragraph]].len(1) / 20);
+ lines_bbox[FirstLine].pmin()[1] > paragraphs_bbox[paragraphs_union[Paragraph]].pmin()[1] +
+ (paragraphs_bbox[paragraphs_union[Paragraph]].len(1) / 30);
}
void stat()
@@ -1201,20 +1516,201 @@ namespace mymln
std::cout << " lines(s) : " << CLine << std::endl;
}
+ void debug_breakpoint()
+ {
+ if(debug_buffer_enable)
+ {
+ debug_save_buffer("break.ppm");
+ std::system("eog break.ppm");
+ debug_buffer_enable = true;
+ Enable_Debug_Buffer = true;
+ }
+ else
+ {
+ debug_save_all("break.ppm");
+ std::system("eog break.ppm");
+ }
+ }
void debug_set_image(image2d<bool>& source)
{debug_source = source;}
+
+ /// ADD TEMP LETTER
+ /// description : add a label to the letter mask. The label will remain a letter while
+ /// reset_temp_letter is not called
+ /// WARNING: The old type of the label is still activated
+ inline void add_temp_letter(const point2d& Lbl)
+ {
+ add_temp_letter(img_influ(Lbl));
+ }
+ inline void add_temp_letter(const Label lbl)
+ {
+ if(!all_letters_mask(lbl))
+ {
+ letters_mask(lbl) = true;
+ all_letters_mask(lbl) = true;
+ temp_letter(lbl) = true;
+ }
+ }
+ inline void reset_temp_letter()
+ {
+ for(int N = 0; N < Areas_Number_; N++)
+ {
+ if(temp_letter(N))
+ {
+ letters_mask(N) = false;
+ all_letters_mask(N) = false;
+ start_end_lines_mask(N) = false;
+ end_lines_mask(N) = false;
+ start_lines_mask(N) = false;
+ }
+ }
+ }
+ inline void debug_disable_buffer()
+ {
+ debug_buffer_enable = false;
+ }
+ inline void debug_enable_buffer()
+ {
+ if(Enable_Debug_Buffer)
+ {
+ debug_buffer_enable = true;
+ }
+ else
+ {
+ debug_create_buffer();
+ }
+ }
inline void debug_create_buffer()
{
mln::initialize(debug_buffer,img_influ);
debug_buffer_enable = true;
+ Enable_Debug_Buffer = true;
+ }
+ inline void debug_save_dot_graph(std::string file)
+ {
+ fstream filestream(file.c_str(), fstream::in | fstream::out);
+ filestream << "graph 1 { " << std::endl;
+ typedef vertex_image<point2d,bool> v_ima_g;
+ typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p;
+ v_ima_g mask = fun_mask_all_letters();
+ mln_piter_(v_ima_g) v(mask.domain());
+ typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t;
+ nbh_t nbh(mask);
+ mln_niter_(nbh_t) q(nbh, v);
+ for(int N = 0; N < Areas_Number_; N++)
+ {
+ filestream
+ << "_"
+ << N
+ << " [pos=\""<<_bboxgp[N].pcenter()[1]
+ << ".0," << img_influ.domain().pmax()[0] - _bboxgp[N].pcenter()[0]
+ << ".0\" "
+ << "shape=\"box\" "
+ << "width=\"" << ((Float)_bboxgp[N].len(1)) / 70.0f << ".0\" "
+ << "height=\"" << ((Float)_bboxgp[N].len(0)) / 70.0f << ".0\" "
+ << "label=\""<< get_tag(N) <<"\" "
+ << "];"
+
+ << std::endl;
+ }
+ for_all(v)
+ {
+ for_all(q)
+ {
+ if(contain_letter(q) && contain_letter(v))
+ {
+ if(contain_alone_letter(q) || contain_alone_letter(v))
+ {
+
+ filestream<< "_" << img_influ(q) << " -- _" << img_influ(v) << " [style=\"dotted\"]";
+ }
+ else
+ {
+ if(same_line(q,v))
+ {filestream<< "_" << img_influ(q) << " -- _" << img_influ(v) << " [color=\"green\"]";}
+ else
+ {filestream<< "_" << img_influ(q) << " -- _" << img_influ(v) << ";" << std::endl;}
+ }
+ }
+ else
+ {
+ filestream<< "_" << img_influ(q) << " -- _" << img_influ(v) << " [style=\"dotted\" color=\"red\"]";
+ }
+
+ }
+ }
+ filestream<< "}" << std::endl;
+ filestream.close();
}
inline void debug_save_buffer(std::string file)
{
debug_buffer_enable = false;
+ Enable_Debug_Buffer = false;
io::ppm::save(mln::debug::superpose(debug_buffer, debug_source, literal::white) , file);
}
-
+ inline void debug_save_buffer_paragraphs(std::string file)
+ {
+ Enable_Debug_Buffer = false;
+ debug_buffer_enable = false;
+ for(unsigned int N = 0; N < paragraphs_bbox.size(); N++)
+ {
+ if(paragraphs_bbox[N].is_valid())
+ {
+ draw::box(debug_buffer, paragraphs_bbox[N], mln::literal::red);
+ if(paragraphs_bbox_influ[N].is_valid())
+ {
+ draw::box(debug_buffer, paragraphs_bbox_influ[N], mln::literal::orange);
+ }
+ }
+
+ }
+ io::ppm::save(mln::debug::superpose(debug_buffer, debug_source, literal::white) , file);
+ }
+ inline void debug_save_buffer_lines(std::string file)
+ {
+ Enable_Debug_Buffer = false;
+ debug_buffer_enable = false;
+ for(unsigned int N = 0; N < paragraphs_bbox.size(); N++)
+ {
+ if(lines_bbox[N].is_valid())
+ {
+ draw::box(debug_buffer, lines_bbox[N], mln::literal::blue);
+ }
+
+ }
+ io::ppm::save(mln::debug::superpose(debug_buffer, debug_source, literal::white) , file);
+ }
+ inline void debug_draw_box_red_buffer(const point2d& L)
+ {debug_draw_box_red_buffer(img_influ(L));}
+ inline void debug_draw_box_green_buffer(const point2d& L)
+ {debug_draw_box_green_buffer(img_influ(L));}
+ inline void debug_draw_box_red_buffer(const Label L)
+ {
+ if(debug_buffer_enable)
+ draw::box(debug_buffer, _bboxgp[L], mln::literal::red);
+ }
+ inline void debug_draw_box_green_buffer(const Label L)
+ {
+ if(debug_buffer_enable)
+ draw::box(debug_buffer,_bboxgp[L], mln::literal::green);
+ }
+ inline void debug_draw_box_green_influence_buffer(const Label L)
+ {
+ if(debug_buffer_enable)
+ {
+ draw::box(debug_buffer,_bboxgp[L], mln::literal::green);
+ draw::box(debug_buffer,_bboxgp_influ[L], mln::literal::green);
+ }
+ }
+ inline void debug_draw_box_red_influence_buffer(const Label L)
+ {
+ if(debug_buffer_enable)
+ {
+ draw::box(debug_buffer,_bboxgp[L], mln::literal::red);
+ draw::box(debug_buffer,_bboxgp_influ[L], mln::literal::red);
+ }
+ }
inline void debug_draw_line_red_buffer(const point2d& A,const point2d& B )
{
if(debug_buffer_enable)
@@ -1225,35 +1721,31 @@ namespace mymln
if(debug_buffer_enable)
draw::line(debug_buffer, A, B, mln::literal::green);
}
-
- void debug_save_union(std::string file)
- {debug_save_union(file, debug_source);}
- void debug_save_union(std::string file, image2d<bool> source)
+ inline void debug_draw_line_orange_buffer(const point2d& A,const point2d& B )
{
- image2d<value::rgb8> ima_color;
- mln::initialize(ima_color,img_influ);
-
- for(unsigned int N = 1; N < lines_union.size(); N++)
- {
- if(lines_union[N])
- {
- if(N == lines_union.link(N))
- draw::box(ima_color, _bboxgp[N], mln::literal::blue);
- else
- draw::line(ima_color, _bboxgp[N].pcenter(), _bboxgp[lines_union.link(N)].pcenter(), mln::literal::blue);
- }
- }
- for(unsigned int N = 1; N < paragraphs_union.size(); N++)
+ if(debug_buffer_enable)
+ draw::line(debug_buffer, A, B, mln::literal::orange);
+ }
+ template<typename UnionData>
+ void debug_union(mymln::util::union_find<UnionData> dat)
+ {
+ if(debug_buffer_enable)
{
- if(paragraphs_union[N])
+ for(unsigned int N = 1; N < dat.size(); N++)
{
- if(N == paragraphs_union.link(N))
- draw::box(ima_color, _bboxgp[N], mln::literal::red);
- else
- draw::line(ima_color, _bboxgp[N].pcenter(), _bboxgp[paragraphs_union.link(N)].pcenter(), mln::literal::red);
+ if(dat[N])
+ {
+ if(N == dat.link(N))
+ draw::box(debug_buffer, _bboxgp[N], mln::literal::blue);
+ else
+ draw::line(debug_buffer, _bboxgp[N].pcenter(), _bboxgp[dat.link(N)].pcenter(), mln::literal::blue);
+ }
+ else if(dat.link(N) != 0)
+ {
+ draw::line(debug_buffer, _bboxgp[N].pcenter(), _bboxgp[dat.link(N)].pcenter(), mln::literal::red);
+ }
}
}
- io::ppm::save(mln::debug::superpose(ima_color, source, literal::white) , file);
}
void debug_save_lines(std::string file)
{debug_save_lines(file, debug_source);}
@@ -1280,7 +1772,48 @@ namespace mymln
}
-
+ void debug_save_all_separators(std::string file)
+ {debug_save_all_separators(file, debug_source);}
+ void debug_save_all_separators(std::string file, image2d<bool> source)
+ {
+ image2d<value::rgb8> ima_color;
+ mln::initialize(ima_color,img_influ);
+
+ for(unsigned int N = 0; N < _bboxgp.size(); N++)
+ {
+ if(_bboxgp[N].is_valid() && (contain_letter(N) || contain_alone_letter(N)))
+ {
+ if(sep_right_cooked)
+ {
+ if(implicit_separators_right_mask(N))
+ draw::box(ima_color, _bboxgp[N], mln::literal::red);
+ else if(implicit_separators_left_mask(N))
+ draw::box(ima_color, _bboxgp[N], mln::literal::cyan);
+ else if(temp_letter(N))
+ draw::box(ima_color, _bboxgp[N], mln::literal::olive);
+ else
+ draw::box(ima_color, _bboxgp[N], mln::literal::green);
+ }
+ else
+ {
+ if(implicit_separators_left_mask(N))
+ draw::box(ima_color, _bboxgp[N], mln::literal::cyan);
+ else if(implicit_separators_right_mask(N))
+ draw::box(ima_color, _bboxgp[N], mln::literal::red);
+ else if(temp_letter(N))
+ draw::box(ima_color, _bboxgp[N], mln::literal::olive);
+ else
+ draw::box(ima_color, _bboxgp[N], mln::literal::green);
+ }
+ }
+ else if(_bboxgp[N].is_valid() && contain_separator(N))
+ {
+ draw::box(ima_color, _bboxgp[N], mln::literal::yellow);
+ }
+ }
+
+ io::ppm::save(mln::debug::superpose(ima_color, source, literal::white) , file);
+ }
void debug_save_all(std::string file)
@@ -1294,7 +1827,15 @@ namespace mymln
{
if(_bboxgp[N].is_valid() && contain_letter(N))
{
- draw::box(ima_color, _bboxgp[N], mln::literal::cyan);
+ if(temp_letter(N))
+ draw::box(ima_color, _bboxgp[N], mln::literal::teal);
+ else
+ draw::box(ima_color, _bboxgp[N], mln::literal::cyan);
+ }
+ else if(_bboxgp[N].is_valid() && contain_separator(N))
+ {
+ draw::box(ima_color, _bboxgp[N], mln::literal::green);
+ draw::box(ima_color, _bboxgp_influ[N], mln::literal::green);
}
}
@@ -1369,9 +1910,7 @@ namespace mymln
{return fun_mask_(all_letters_mask);}
vertex_image<point2d,bool> fun_mask_all()
{
- typedef vertex_image<point2d,bool> v_ima_g;
- v_ima_g result(_area_graph);
- return result;
+ return fun_mask_(all_mask);
}
vertex_image<point2d,bool> fun_mask_letters()
{ return fun_mask_(letters_mask); }
@@ -1456,7 +1995,12 @@ namespace mymln
{ return lines_bbox[lines_union[L]].len(1); }
-
+ inline Float letter_ratio_YX(const point2d& point)
+ {return letter_ratio_YX(img_influ(point));}
+ inline Float letter_ratio_YX(Label Letter)
+ {
+ return (Float)_bboxgp[Letter].len(0) / (Float)_bboxgp[Letter].len(1);
+ }
inline Float letter_ratio_XY(const point2d& point)
{return letter_ratio_XY(img_influ(point));}
inline Float letter_ratio_XY(Label Letter)
@@ -1529,7 +2073,22 @@ namespace mymln
lines_influ_bbox[lines_union[L2]].has(lines_influ_bbox[lines_union[L1]].pmin()) ||
lines_influ_bbox[lines_union[L2]].has(lines_influ_bbox[lines_union[L1]].pmax()) ;
}
-
+ inline bool allign_size_x_height( const point2d& Left, const point2d& Right)
+ {return allign_size_x_height(img_influ(Left), img_influ(Right));}
+
+ inline bool allign_size_x_height( const Label Left, const Label Right)
+ {
+ short int SizeL0 = label_size_(0, Left);
+ short int SizeR0 = label_size_(0, Right);
+ short int SizeL1 = label_size_(1, Left);
+ short int SizeR1 = label_size_(1, Right);
+ short int Swap = 0;
+ if(SizeL0 < SizeL1)
+ { SizeL0 = SizeL1; }
+ if(SizeR0 < SizeR1){SizeR0 = SizeR1;}
+ return SizeR0 > (SizeL0 / 3) && SizeR0 < (SizeL0);
+ }
+
inline bool allign_size_large_inside( const point2d& Left, const point2d& Right)
{return allign_size_large_inside(img_influ(Left), img_influ(Right));}
@@ -1632,6 +2191,7 @@ namespace mymln
separators_middle.fill(0);
cook_separators_right_();
}
+
inline void propage_line_link()
{ lines_union.propage_links(); }
/*image_if<image2d<Label> masked_image_letters()
@@ -1677,6 +2237,34 @@ namespace mymln
inline bool contain_implicit_separator(const Label lbl)
{return implicit_separators_union[lbl] != 0; }
+
+ inline void merge_separators(const point2d& A, const point2d& B)
+ {
+ merge_separators(img_influ(A), img_influ(B));
+ }
+ inline void merge_separators(const Label A, const Label B)
+ {
+
+ if( A && B && !kill_mask(A) && !kill_mask(B) && A != B)
+ {
+ img_influ(_bboxgp[B].pcenter()) = A;
+ _bboxgp[A].merge(_bboxgp[B]);
+
+ _bboxgp[B] = box2d();
+
+ _bboxgp_influ[A].merge(_bboxgp_influ[B]);
+ _bboxgp_influ[B] = box2d();
+ separators_mask(B) = false;
+ separators_mask(A) = true;
+ Vseparator_mask(B) = false;
+ Hseparator_mask(B) = false;
+ kill_mask(B) = true;
+ all_mask(B) = false;
+ }
+
+
+ }
+
inline void merge(const point2d& A, const point2d& B)
{
merge(img_influ(A), img_influ(B));
@@ -1689,6 +2277,7 @@ namespace mymln
_bboxgp[A].merge(_bboxgp[B]);
_bboxgp[B] = box2d();
kill_mask(B) = true;
+ all_mask(B) = false;
if(letters_mask(A) && letters_mask(B))
{
if(lines_union.is_self_link(B))
@@ -1760,7 +2349,7 @@ namespace mymln
implicit_separators_right_mask(lbl) = false;
}
- inline Label& operator[](point2d i)
+ inline const Label& operator[](point2d i)
{ return img_influ(i);}
inline point2d operator[](Label i)
@@ -1807,14 +2396,19 @@ namespace mymln
inline void recook_paragraphs()
{
paragraphs_len.fill(0);
+ paragraphs_letter_len.fill(0);
cook_paragraphs_();
}
inline void cook_paragraphs()
{
paragraphs_bbox = mln::util::array<box2d>(NPar + 1);
paragraphs_len = mln::util::array<unsigned int>(NPar + 1);
+ paragraphs_letter_len = mln::util::array<unsigned int>(NPar + 1);
+
paragraphs_first_line = mln::util::array<unsigned int>(NPar + 1);
paragraphs_bbox_influ = mln::util::array<box2d>(NPar + 1);
+ paragraphs_len.fill(0);
+ paragraphs_letter_len.fill(0);
cook_paragraphs_();
}
@@ -1836,6 +2430,18 @@ namespace mymln
lines_width.fill(0);
compute_letter_middle_width_();
}
+ inline void compute_paragraph_middle_height()
+ {
+ paragraphs_mid_height = mln::util::array<unsigned int>(NPar + 1);
+ paragraphs_mid_width.fill(0);
+ compute_paragraph_middle_height_();
+ }
+ inline void compute_paragraph_middle_width()
+ {
+ paragraphs_mid_width = mln::util::array<unsigned int>(NPar + 1);
+ paragraphs_mid_width.fill(0);
+ compute_paragraph_middle_width_();
+ }
inline void recompute_letter_middle_space()
{
lines_space.fill(0);
@@ -1867,7 +2473,35 @@ namespace mymln
{return get_letter_middle_width(img_influ(point));}
inline unsigned int get_letter_middle_width(const Label lbl)
{return lines_width[lines_union[lbl]];}
+
+
+ inline bool compatible_paragraph_middle_width(const point2d& A, const point2d& B)
+ { return compatible_paragraph_middle_width(img_influ(A), img_influ(B));}
+ inline bool compatible_paragraph_middle_width(const Label A, const Label B)
+ {
+ short int LA = lines_bbox[paragraphs_first_line[paragraphs_union[A]]].len(0);
+ short int LB = lines_bbox[paragraphs_first_line[paragraphs_union[A]]].len(0);
+ if(LA > LB)
+ {LA = LB;}
+ return get_paragraph_middle_width_diff(A, B) * 8 < LA;
+ }
+ inline int get_paragraph_middle_width_diff(const point2d& A, const point2d& B)
+ {return get_paragraph_middle_width_diff(img_influ(A), img_influ(B));}
+ inline int get_paragraph_middle_width_diff(const Label A, const Label B)
+ {
+ int diff = paragraphs_mid_width[paragraphs_union[A]] - paragraphs_mid_width[paragraphs_union[B]];
+ if(diff < 0){diff = -diff;}
+ return diff;
+ }
+
+ inline unsigned int get_paragraph_middle_width(const point2d& point)
+ {return get_letter_paragraph_width(img_influ(point));}
+ inline unsigned int get_paragraph_middle_width(const Label lbl)
+ {return paragraphs_mid_width[paragraphs_union[lbl]];}
+
+ inline unsigned int get_line_ID(const point2d& lbl)
+ {return get_line_ID(img_influ(lbl));}
inline unsigned int get_line_ID(const Label lbl)
{
return lines_union[lbl];
@@ -1894,6 +2528,26 @@ namespace mymln
if(lines_seq_pos[line_ID] == line_ID){ line_ID = 0; }
line_ID = lines_seq_pos[line_ID];
}
+ inline void get_next_line(Label& line_ID)
+ {
+ if(lines_seq_pos[line_ID] == line_ID){ line_ID = 0; }
+ line_ID = lines_seq_pos[line_ID];
+ }
+ inline unsigned int return_next_line(const Label& line_ID)
+ {
+ if(lines_seq_pos[line_ID] == line_ID){ return 0; }
+ return lines_seq_pos[line_ID];
+ }
+ inline void get_previous_line(Label& line_ID)
+ {
+ if(lines_seq_pos_reverse[line_ID] == line_ID){ line_ID = 0; }
+ line_ID = lines_seq_pos_reverse[line_ID];
+ }
+ inline unsigned int return_previous_line(const Label& line_ID)
+ {
+ if(lines_seq_pos_reverse[line_ID] == line_ID){ return 0; }
+ return lines_seq_pos_reverse[line_ID];
+ }
inline void get_next_letter(Label& lbl)
{
if(lines_seq[lbl] == lbl){ lbl = 0; }
@@ -1925,7 +2579,23 @@ namespace mymln
}
return line;
}
+ inline void reserve_previous_next_line()
+ {
+ lines_seq_pos = mln::util::array<unsigned int>(NLine + 1);
+ lines_seq_pos_reverse = mln::util::array<unsigned int>(NLine + 1);
+ }
+ inline void add_line_previous(const point2d& A,const point2d& Prev)
+ {add_line_previous(img_influ(A),img_influ(Prev)); }
+ inline void add_line_next(const point2d& A, const point2d& Next)
+ {add_line_next(img_influ(A),img_influ(Next)); }
+ inline void add_line_previous(const Label A,const Label Prev)
+ {lines_seq_pos_reverse[get_line_ID(A)] = get_line_ID(Prev); }
+ inline void add_line_next(const Label A, const Label Next)
+ {lines_seq_pos[get_line_ID(A)] = get_line_ID(Next); }
private:
+ bool Enable_Debug_Buffer;
+ fun::i2v::array<bool> temp_letter;
+
fun::i2v::array<bool> implicit_separators_left_mask;
fun::i2v::array<bool> implicit_separators_right_mask;
mln::util::array<unsigned int> separators_len_right;
@@ -1933,7 +2603,8 @@ namespace mymln
mln::util::array<unsigned int> separators_middle;
mln::util::array<unsigned int> separators_marging;
-
+ bool sep_right_cooked;
+ bool lines_cooked;
inline void cook_separators_()
{
implicit_separators_left_mask(0) = false;
@@ -1998,6 +2669,7 @@ namespace mymln
inline void cook_separators_right_()
{
+ sep_right_cooked = true;
implicit_separators_right_mask(0) = false;
for(unsigned int N = 1; N < implicit_separators_union.size(); N++)
{
@@ -2068,6 +2740,7 @@ namespace mymln
mln::util::array<unsigned int> lines_last_label;
mln::util::array<unsigned int> lines_seq;
mln::util::array<unsigned int> lines_seq_pos;
+ mln::util::array<unsigned int> lines_seq_pos_reverse;
mln::util::array<box2d> lines_bbox;
mln::util::array<box2d> lines_influ_bbox;
mln::util::array<Label> lines_split;
@@ -2206,9 +2879,11 @@ namespace mymln
std::cout << count << endl;
std::cout << "linear";
}
-
+ //NOTE: THIS FUNCTION IS ONE VERY COMMON FUNCTION
+ // PLEASE OPTIMIZE ME
inline void cook_lines_()
{
+ lines_cooked = true;
Cooked_CLine = CLine;
for(unsigned int N = 1; N < lines_union.size(); N++)
{
@@ -2402,6 +3077,7 @@ namespace mymln
fun::i2v::array<bool> containers_mask;
fun::i2v::array<bool> noise_mask;
fun::i2v::array<bool> kill_mask;
+ fun::i2v::array<bool> all_mask;
mln::util::array<std::string> tag_lbl;
mln::util::array<bool> Btag_lbl;
@@ -2417,20 +3093,59 @@ namespace mymln
unsigned int CSep ;
unsigned int CSepH ;
unsigned int CSepV ;
+
mymln::util::union_find<Label> paragraphs_union;
unsigned int CPar ;
unsigned int NPar ;
mln::util::array<unsigned int> paragraphs_first_label;
mln::util::array<unsigned int> paragraphs_last_label;
+ mln::util::array<unsigned int> paragraphs_mid_width;
+ mln::util::array<unsigned int> paragraphs_mid_height;
mln::util::array<unsigned int> paragraphs_assoc;
mln::util::array<unsigned int> paragraphs_len;
+ mln::util::array<unsigned int> paragraphs_letter_len;
/* NOTE THESE ARRAYS MUST BE INITIALIZEDD WITH THE NUMBER OF PARAGRAPH */
mln::util::array<box2d> paragraphs_bbox;
mln::util::array<box2d> paragraphs_bbox_influ;
mln::util::array<unsigned int> paragraphs_first_line;
+
+
+ inline void compute_paragraph_middle_width_()
+ {
+ for(unsigned int N = 1; N < Areas_Number_; N++)
+ {
+ if(paragraphs_union[N])
+ {
+ paragraphs_mid_width[paragraphs_union[N]] += _bboxgp[N].len(1);
+ }
+ }
+ for(unsigned int N = 1; N < paragraphs_mid_width.size() && N < paragraphs_letter_len.size(); N++)
+ {
+ if(paragraphs_letter_len[N])
+ {
+
+ paragraphs_mid_width[N] /= paragraphs_letter_len[N];
+ }
+ }
+ }
+ inline void compute_paragraph_middle_height_()
+ {
+ for(unsigned int N = 1; N < Areas_Number_; N++)
+ {
+ if(paragraphs_union[N])
+ {
+ paragraphs_mid_height[paragraphs_union[N]] += _bboxgp[N].len(0);
+ }
+ }
+ for(unsigned int N = 1; N < paragraphs_mid_height.size() && N < paragraphs_letter_len.size(); N++)
+ {
+ if(paragraphs_len[N])
+ paragraphs_mid_height[N] /= paragraphs_letter_len[N];
+ }
+ }
inline void first_recognition()
{
@@ -2463,6 +3178,7 @@ namespace mymln
if(lines_len[N] && paragraphs_union[lines_first_label[N]])
{
paragraphs_len[paragraphs_union[lines_first_label[N]]]++;
+ paragraphs_letter_len[paragraphs_union[lines_first_label[N]]] += lines_len[N];
if(paragraphs_first_line[paragraphs_union[lines_first_label[N]]])
{
if(
@@ -2526,6 +3242,7 @@ namespace mymln
mln::util::array<box2d> _bboxgp;
+ mln::util::array<box2d> _bboxgp_influ;
/* DOCUMENT DATA */
g_vertices_p _area_graph;
diff --git a/scribo/sandbox/raphael/code/my/document/outline.hh b/scribo/sandbox/raphael/code/my/document/outline.hh
new file mode 100644
index 0000000..00d62a6
--- /dev/null
+++ b/scribo/sandbox/raphael/code/my/document/outline.hh
@@ -0,0 +1,87 @@
+using namespace mln;
+namespace mymln
+{
+ namespace document
+ {
+
+
+
+ template <typename I, typename N>
+ image2d<bool> outline(const Image<I>& iz_, const Neighborhood<N>& nbh_)
+ {
+
+ const I& iz = exact(iz_);
+ const N& nbh = exact(nbh_);
+
+
+ image2d<bool> out(iz.domain());
+
+ mln_pixter(const I) p(iz);
+ mln_nixter(const I, N) n(p, nbh);
+
+ for_all(p)
+ {
+ unsigned int count = 0;
+ int variate = 0;
+ for_all(n)
+ {
+ variate += n.val();
+ count++;
+ }
+ variate /= count;
+ variate -= p.val();
+ if(variate > 10)
+ {
+ out.element(p.offset()) = true;
+ }
+ }
+
+
+ for_all(p)
+ {
+ unsigned int count = 0;
+ int variate = 0;
+ if(out.element(p.offset()))
+ {
+ for_all(n)
+ {
+ if(out.element(n.offset()))
+ count++;
+ }
+ if(count == 0)
+ {
+ out.element(p.offset()) = false;
+ }
+ }
+ }
+
+
+
+ return out;
+ }
+
+ template <typename I, typename N>
+ image2d<value::int_u8> luminance(const Image<I>& iz_)
+ {
+
+ const I& iz = exact(iz_);
+
+
+ image2d<value::int_u8> out(iz.domain());
+
+ mln_pixter(const I) p(iz);
+ for_all(p)
+ {
+ value::rgb8 V = p.val();
+ out.element(p.offset()) = ( (V.red() + V.blue() + V.green()) / 9 );
+ out.element(p.offset()) *= 3;
+ }
+ return out;
+ }
+
+ template <typename I>
+ image2d<bool> binarize_outline(const Image<I>& iz_)
+ {return mymln::document::outline(mymln::document::luminance(iz_), c8());}
+
+ }
+}
\ No newline at end of file
diff --git a/scribo/sandbox/raphael/code/my/document/separator.hh b/scribo/sandbox/raphael/code/my/document/separator.hh
index 2cd98ea..de3b4d0 100644
--- a/scribo/sandbox/raphael/code/my/document/separator.hh
+++ b/scribo/sandbox/raphael/code/my/document/separator.hh
@@ -10,12 +10,110 @@ namespace mymln
{
namespace separators
{
+ //TODO: CHANGER contain_Hseparator en VSeparator Le H c etait pour le test
+ template<typename L, typename F, typename D>
+ void separators_rebuild(mymln::document::document<L,F,D>& doc)
+ {
+ typedef vertex_image<point2d,bool> v_ima_g;
+ typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p;
+ v_ima_g mask = doc.fun_mask_all();
+ mln_piter_(v_ima_g) v(mask.domain());
+ typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t;
+ nbh_t nbh(mask);
+ mln_niter_(nbh_t) q(nbh, v);
+ mymln::util::union_find<L> sep_union(doc.size());
+ for_all(v)
+ {
+ if(!doc[v]){continue;}
+ if(doc.contain_Vseparator(doc[v]) && !sep_union[doc[v]])
+ {
+ sep_union[doc[v]] = sep_union.new_set();
+ sep_union.add_self_link(doc[v]);
+ /*doc.debug_union(sep_union);
+ doc.debug_breakpoint();*/
+ }
+ for_all(q)
+ {
+ if(doc.contain_Vseparator(doc[q])){continue;}
+ if(doc.contain_Vseparator(doc[v]) &&
+ doc.separator_has(v, q))
+ {
+
+ doc.add_Vseparator(doc[q]);
+ doc.debug_draw_box_red_buffer(doc[q]);
+ sep_union[doc[q]] = sep_union[doc[v]];
+ sep_union.add_link(doc[v], doc[q]);
+
+ }
+ else if(
+
+ doc.letter_ratio_XY (q) <= 1 &&
+ doc.allign_H_large_one(q, v) &&
+ doc.allign_proximity_top_strict(q,v)
+ )
+ {
+ sep_union.add_link(doc[v], doc[q]);
+ }
+ }
+ }
+ doc.debug_union(sep_union);
+ sep_union.propage_links();
+ for(int N = 0; N < doc.size(); N++)
+ {
+ if(sep_union[N])
+ {
+ if(!doc.contain_Vseparator(N))
+ {
+ doc.add_Vseparator(N);
+ }
+ }
+ }
+
+
+ }
+ template<typename L, typename F, typename D>
+ void separators_merge(mymln::document::document<L,F,D>& doc)
+ {
+
+ typedef vertex_image<point2d,bool> v_ima_g;
+ typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p;
+ v_ima_g mask = doc.fun_mask_separators();
+ mln_piter_(v_ima_g) v(mask.domain());
+ typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t;
+ nbh_t nbh(mask);
+ mln_niter_(nbh_t) q(nbh, v);
+ mymln::util::union_find<L> sep_union(doc.size());
+ for_all(v)
+ {
+ if(doc.contain_separator(v))
+ {
+ for_all(q)
+ {
+ if(doc.separator_has(v, q))
+ {doc.merge_separators(v,q);}
+ }
+ }
+ else
+ {
+ for_all(q)
+ {
+ if(doc.separator_has(v, q))
+ {
+ doc.add_Vseparator(doc[q]);
+ doc.merge_separators(v,q);
+ break;
+ }
+ }
+ }
+ }
+
+ }
template<typename L, typename F, typename D>
void separators_find_allign(mymln::document::document<L,F,D>& doc)
{
typedef vertex_image<point2d,bool> v_ima_g;
typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p;
- v_ima_g mask = doc.fun_mask_letters();
+ v_ima_g mask = doc.fun_mask_all_letters();
mln_piter_(v_ima_g) v(mask.domain());
typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t;
nbh_t nbh(mask);
@@ -67,7 +165,7 @@ namespace mymln
{
typedef vertex_image<point2d,bool> v_ima_g;
typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p;
- v_ima_g mask = doc.fun_mask_letters();
+ v_ima_g mask = doc.fun_mask_all_letters();
mln_piter_(v_ima_g) v(mask.domain());
typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t;
nbh_t nbh(mask);
@@ -75,7 +173,7 @@ namespace mymln
for_all(v)
{
- if(doc.contain_letter(v))
+ if(doc.contain_letter(v) || doc.contain_alone_letter(v))
{
doc.jump_to_separator(v);
if((!doc.contain_implicit_separator(v)))
@@ -92,17 +190,23 @@ namespace mymln
// draw::line(out, q,v, mln::literal::blue);
if(doc.allign_H_max(q,v) && doc.allign_size(q, v))
{
+ doc.debug_draw_line_green_buffer(v, q);
doc.add_to_separator_link(v, q);
All_Alone = false;
}
+ else
+ {doc.debug_draw_line_red_buffer(v, q);}
}
else
{
if(doc.allign_H_max(q,v) && doc.allign_size(q, v))
{
+ doc.debug_draw_line_green_buffer(v, q);
doc.add_to_separator_link(q, v);
All_Alone = false;
}
+ else
+ {doc.debug_draw_line_red_buffer(v, q);}
}
}
if(All_Alone){doc.invalidate_implicit_separator(v);}
@@ -193,14 +297,17 @@ namespace mymln
if(doc.allign_V(q,v) && doc.allign_size(q, v) && doc.allign_right(q,v))
{
count[doc[q]]++;
+ doc.debug_draw_box_red_buffer(q);
+ doc.debug_draw_line_red_buffer(v, q);
}
-
}
else if (doc.contain_implicit_separator(q))
{
if(doc.allign_V(q,v) && doc.allign_size(q, v) && doc.allign_right(q,v) && doc.allign_proximity_strict(v, q))
{
count[doc[q]]++;
+ doc.debug_draw_box_green_buffer(q);
+ doc.debug_draw_line_green_buffer(v, q);
}
}
diff --git a/scribo/sandbox/raphael/code/my/runtime/lib.hh b/scribo/sandbox/raphael/code/my/runtime/lib.hh
index 2a36ef3..b002e63 100644
--- a/scribo/sandbox/raphael/code/my/runtime/lib.hh
+++ b/scribo/sandbox/raphael/code/my/runtime/lib.hh
@@ -15,6 +15,9 @@ namespace mymln
run.add_function("separators.make_clean_right", &(mymln::document::separators::separators_make_clean_right));
run.add_function("separators.find_allign_left", &(mymln::document::separators::separators_find_allign));
run.add_function("separators.make_clean_left", &(mymln::document::separators::separators_find_allign));
+ run.add_function("separators.rebuild", &(mymln::document::separators::separators_rebuild));
+ run.add_function("separators.merge", &(mymln::document::separators::separators_merge));
+
}
template<typename L, typename F, typename D>
@@ -30,6 +33,7 @@ namespace mymln
run.add_function("clean.quote_items", &(mymln::document::clean_quote_items));
run.add_function("clean.between", &(mymln::document::clean_between));
+ run.add_function("clean.line_space", &(mymln::document::clean_line_space));
run.add_function("clean.line_link_item", &(mymln::document::clean_line_link_item));
run.add_function("clean.proximity_lines", &(mymln::document::clean_proximity_lines));
run.add_function("clean.quote_lines", &(mymln::document::clean_quote_lines));
@@ -45,18 +49,45 @@ namespace mymln
run.add_function("clean.backward_letters", &(mymln::document::clean_backward_letters));
run.add_function("clean.paragraphs_tab", &(mymln::document::clean_paragraphs_tab));
run.add_function("clean.proximity_letters", &(mymln::document::clean_proximity_letters));
+ run.add_function("clean.letter_previous_next_line", &(mymln::document::clean_letter_previous_next_line));
+ run.add_function("clean.V_lines", &(mymln::document::clean_V_lines));
+ run.add_function("clean.paragraphs_end_line", &(mymln::document::clean_paragraphs_end_line));
+ run.add_function("find.previous_next_line", &(mymln::document::find_previous_next_line));
}
template<typename L, typename F, typename D>
void lib_debug_save_all(mymln::document::document<L,F,D>& doc, std::string file)
{ doc.debug_save_all(file); }
template<typename L, typename F, typename D>
+ void lib_debug_save_all_separators(mymln::document::document<L,F,D>& doc, std::string file)
+ { doc.debug_save_all_separators(file); }
+ template<typename L, typename F, typename D>
+ void lib_debug_save_buffer_paragraphs(mymln::document::document<L,F,D>& doc, std::string file)
+ { doc.debug_save_buffer_paragraphs(file); }
+ template<typename L, typename F, typename D>
+ void lib_debug_save_buffer_lines(mymln::document::document<L,F,D>& doc, std::string file)
+ { doc.debug_save_buffer_lines(file); }
+ template<typename L, typename F, typename D>
void lib_debug_save_buffer(mymln::document::document<L,F,D>& doc, std::string file)
{ doc.debug_save_buffer(file); }
template<typename L, typename F, typename D>
void lib_debug_create_buffer(mymln::document::document<L,F,D>& doc)
{ doc.debug_create_buffer(); }
+ template<typename L, typename F, typename D>
+ void lib_debug_enable_buffer(mymln::document::document<L,F,D>& doc)
+ { doc.debug_enable_buffer(); }
+ template<typename L, typename F, typename D>
+ void lib_debug_disable_buffer(mymln::document::document<L,F,D>& doc)
+ { doc.debug_disable_buffer(); }
+
+ template<typename L, typename F, typename D>
+ void lib_debug_save_dot_graph(mymln::document::document<L,F,D>& doc, std::string file)
+ { doc.debug_save_dot_graph(file); }
+
+ template<typename L, typename F, typename D>
+ void lib_debug_breakpoint(mymln::document::document<L,F,D>& doc)
+ { doc.debug_breakpoint(); }
template<typename L, typename F, typename D>
void load_debug(runtime<L,F,D>& run)
@@ -64,6 +95,13 @@ namespace mymln
run.add_function_string("debug.save", &(lib_debug_save_all));
run.add_function("debug.create_buffer", &(lib_debug_create_buffer));
run.add_function_string("debug.save_buffer", &(lib_debug_save_buffer));
+ run.add_function_string("debug.save_dot_graph", &(lib_debug_save_dot_graph));
+ run.add_function_string("debug.save_separators", &(lib_debug_save_all_separators));
+ run.add_function_string("debug.save_buffer_paragraphs", &(lib_debug_save_buffer_paragraphs));
+ run.add_function_string("debug.save_buffer_lines", &(lib_debug_save_buffer_lines));
+ run.add_function("debug.enable_buffer", &(lib_debug_enable_buffer));
+ run.add_function("debug.disable_buffer", &(lib_debug_disable_buffer));
+ run.add_function("break", &(lib_debug_breakpoint));
}
@@ -122,10 +160,22 @@ namespace mymln
void lib_compute_letter_middle_width(mymln::document::document<L,F,D>& doc)
{ doc.compute_letter_middle_width(); }
template<typename L, typename F, typename D>
+ void lib_compute_paragraph_middle_height(mymln::document::document<L,F,D>& doc)
+ { doc.compute_paragraph_middle_height(); }
+ template<typename L, typename F, typename D>
+ void lib_compute_paragraph_middle_width(mymln::document::document<L,F,D>& doc)
+ { doc.compute_paragraph_middle_width(); }
+ template<typename L, typename F, typename D>
+ void lib_compute_letter_middle_space(mymln::document::document<L,F,D>& doc)
+ { doc.compute_letter_middle_space(); }
+ template<typename L, typename F, typename D>
void load_compute(runtime<L,F,D>& run)
{
run.add_function("compute.letter_middle_height", &(lib_compute_letter_middle_height));
run.add_function("compute.letter_middle_width", &(lib_compute_letter_middle_width));
+ run.add_function("compute.letter_middle_space", &(lib_compute_letter_middle_space));
+ run.add_function("compute.paragraph_middle_width", &(lib_compute_paragraph_middle_width));
+ run.add_function("compute.paragraph_middle_height", &(lib_compute_paragraph_middle_height));
}
diff --git a/scribo/sandbox/raphael/code/my/util/union.hh b/scribo/sandbox/raphael/code/my/util/union.hh
index 90a7e68..d6e39ba 100644
--- a/scribo/sandbox/raphael/code/my/util/union.hh
+++ b/scribo/sandbox/raphael/code/my/util/union.hh
@@ -17,7 +17,12 @@ namespace mymln
mark.fill(0);
mark_link.fill(0);
size_ = max_size;
+ Nset = 2;
+ Cset = 1;
}
+ inline unsigned int new_set()
+ {Cset = Nset; Nset++; return Cset;}
+
inline void reset()
{
mark.fill(0);
@@ -57,7 +62,7 @@ namespace mymln
}
else
{
- mark_link[A] = B;
+ mark_link[B] = A;
}
}
}
@@ -72,7 +77,11 @@ namespace mymln
else
{
mark_link[B] = Pos;
- mark_link[PosB] = Pos;
+ mark_link[A] = Pos;
+ if(PosB != 0)
+ {
+ mark_link[PosB] = Pos;
+ }
}
}
}
@@ -108,6 +117,8 @@ namespace mymln
mln::util::array<unsigned int> mark;
mln::util::array<unsigned int> mark_link;
unsigned int size_;
+ unsigned int Nset;
+ unsigned int Cset;
};
}
}
diff --git a/scribo/sandbox/raphael/code/test.cc b/scribo/sandbox/raphael/code/test.cc
index 1f8d94d..463e598 100644
--- a/scribo/sandbox/raphael/code/test.cc
+++ b/scribo/sandbox/raphael/code/test.cc
@@ -2,6 +2,8 @@
#include <vector>
#include <mln/io/pbm/all.hh>
#include <mln/io/ppm/all.hh>
+#include <mln/io/pgm/all.hh>
+#include <mln/io/magick/all.hh>
#include <mln/core/site_set/p_vertices.hh>
#include <mln/core/image/graph_elt_window.hh>
@@ -40,7 +42,7 @@
#include <my/util/vector_bbox_group.hh>
#include <my/document/document.hh>
#include <my/document/separator.hh>
-
+#include <my/document/outline.hh>
#include <my/document/clean.hh>
#include <my/document/recognition.hh>
@@ -58,6 +60,7 @@ void Process(std::string File, std::string Dir, mymln::runtime::runtime< value::
runtime.add_variable("DEBUG_FILE", Dir + "/debug_" + File);
+
std::cout << "Processing : " << File << endl;
/* CREATE GRAPH */
typedef value::int_u<16> uint16;
@@ -103,8 +106,8 @@ void Process(std::string File, std::string Dir, mymln::runtime::runtime< value::
mymln::document::document<uint16,float,short> doc(ima_blob, ima_influ, boxes, area_grph, areas_detected);
runtime.set_current_document(&doc);
doc.debug_set_image(ima);
- doc.vertical_separator_ratio_range(0.0f, 0.2f);
- doc.horizontal_separator_ratio_range(6.0f, 1000.0f);
+ doc.horizontal_separator_ratio_range(0.0f, 0.2f);
+ doc.vertical_separator_ratio_range(6.0f, 1000.0f);
doc.container_volume_range(40, 100);
for (uint16 N = 1; N <= areas_detected; N++)
@@ -206,12 +209,12 @@ void Process(std::string File, std::string Dir, mymln::runtime::runtime< value::
doc.recook_paragraphs();
mymln::document::clean_included_paragraphs(doc);
doc.recook_paragraphs();
- std::cout << "WORK ON GRAPH : " << timer.stop() << endl;
+
doc.recook_lines();
*/
runtime.run();
-
+ std::cout << "WORK ON GRAPH : " << timer.stop() << endl;
@@ -265,7 +268,7 @@ int main( int argc, char** argv)
else if(prog)
{
Prog = argv[N];
- run.load(Prog. c_str());
+ run.load(Prog.c_str());
prog = false;
}
else
--
1.7.2.5
1
0
last-svn-commit-875-g0321a02 scribo/toolchain/internal/toolchain_functor.hh: Fix a warning with GCC 4.5.
by Guillaume Lazzara 08 Sep '11
by Guillaume Lazzara 08 Sep '11
08 Sep '11
---
scribo/ChangeLog | 5 +++++
.../scribo/toolchain/internal/toolchain_functor.hh | 4 ++++
2 files changed, 9 insertions(+), 0 deletions(-)
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index fd8acad..b00493a 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1099,6 +1099,11 @@
2011-03-29 Guillaume Lazzara <z(a)lrde.epita.fr>
+ * scribo/toolchain/internal/toolchain_functor.hh: Fix a warning
+ with GCC 4.5.
+
+2011-03-29 Guillaume Lazzara <z(a)lrde.epita.fr>
+
* scribo/toolchain/nepomuk/text_extraction.hh: Remove useless
punctuation in text output.
diff --git a/scribo/scribo/toolchain/internal/toolchain_functor.hh b/scribo/scribo/toolchain/internal/toolchain_functor.hh
index a29dafa..8f5336d 100644
--- a/scribo/scribo/toolchain/internal/toolchain_functor.hh
+++ b/scribo/scribo/toolchain/internal/toolchain_functor.hh
@@ -42,6 +42,7 @@ namespace scribo
public:
Toolchain_Functor();
+ virtual ~Toolchain_Functor();
virtual int nsteps() const = 0;
@@ -66,6 +67,9 @@ namespace scribo
{
}
+ Toolchain_Functor::~Toolchain_Functor()
+ {
+ }
//==========
// Triggers
--
1.7.2.5
1
0
08 Sep '11
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "Olena, a generic and efficient image processing platform".
The branch raphael has been created
at 485817ced01f1d00df8a080a1a87c3caacc0cc90 (commit)
- Log -----------------------------------------------------------------
485817c Clean everything and create the function influence_and_graph in milena. influance_and_graph saves 400 ms during the creation of the graph.
36186b0 Optimize the code. Some optimization has not been tested, if there is some problem please use the previous version.
e0f8e0e Optimize the code and add debugging options for runtime
f5d237a Patch the Page system. Add demos Add Finalizer to the cleanning system. still one problem when a single short line of text is surrounded by a form
fd4c5d3 Update and add exemples . Add debug draw string to draw strings on an image
a709ecb Fix bug with finalize function. Add functions to recognize pictures.
f4a851e Correct some problems with lines detection and improve paragraphs detection. Add remote debug for clean.hh
5c727d4 Add viewer test for debug. Add debug option. Add Page and exportation system. Add Finalisation System. Add Other method toclean paragraphs. Fix problem with some clean functions.
e10fcbe Updtate everything. Add Breakpoint, Rebuild Separators. Patch someproblems with union find. Patch some problem with paragraph detection. Still have some problem with paragraphs
bb109db Add script system and recognition system. Fix some bugs
33e4509 Better paragraph detection and fix some bug
f5c0102 Add the detection of Right implicit separator and improve lines detection
b303289 Add union find class. Increase the quality of the letter detection.
c2df1b3 update line recognition system. still have problem with document 228
68f091d the program detects ,, . , i and j but still has a problem with "
6689819 this new project is able to analyse .and ,
b7a40cf optimize influence zone adjacency graph
0481b3e adapt the make file
36edc0a add project to sandbox
07c6af5 Ajout du projet et correction d'un problème dans le système de création de vertex_image( mauvais fonctionnement de | )
0321a02 scribo/toolchain/internal/toolchain_functor.hh: Fix a warning with GCC 4.5.
-----------------------------------------------------------------------
hooks/post-receive
--
Olena, a generic and efficient image processing platform
1
0
last-svn-commit-969-gf6b3eed tests/estim/font_boldness.cc: Make the test deterministic.
by Guillaume Lazzara 07 Sep '11
by Guillaume Lazzara 07 Sep '11
07 Sep '11
---
scribo/ChangeLog | 4 ++++
scribo/tests/estim/font_boldness.cc | 6 ++++--
2 files changed, 8 insertions(+), 2 deletions(-)
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index 060bd2a..d667e43 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,3 +1,7 @@
+2011-09-07 Guillaume Lazzara <z(a)lrde.epita.fr>
+
+ * tests/estim/font_boldness.cc: Make the test deterministic.
+
2011-09-06 Guillaume Lazzara <z(a)lrde.epita.fr>
* tests/img/several_objects.pgm: Add a new test image.
diff --git a/scribo/tests/estim/font_boldness.cc b/scribo/tests/estim/font_boldness.cc
index 9e85795..bc42054 100644
--- a/scribo/tests/estim/font_boldness.cc
+++ b/scribo/tests/estim/font_boldness.cc
@@ -38,9 +38,11 @@ int main()
image2d<bool> input;
io::pbm::load(input, SCRIBO_IMG_DIR "/phillip.pbm");
- float val = scribo::estim::font_boldness(input);
+ volatile float val = scribo::estim::font_boldness(input);
+ val = ((int)(val * 100) / 100.f);
- mln_assertion(((int)(val * 100) / 100.f) == 18.44f);
+ volatile float ref = 18.44f;
+ mln_assertion(val == ref);
return 0;
}
--
1.7.2.5
1
0
---
scribo/sandbox/z/yann.tex | 69 +++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 69 insertions(+), 0 deletions(-)
create mode 100644 scribo/sandbox/z/yann.tex
diff --git a/scribo/sandbox/z/yann.tex b/scribo/sandbox/z/yann.tex
new file mode 100644
index 0000000..2c0a1ff
--- /dev/null
+++ b/scribo/sandbox/z/yann.tex
@@ -0,0 +1,69 @@
+* Identification des éléments non texte
+---------------------------------------
+
+2 possibilités :
+
+
+- K-mean => Classe fond + 2 classes objets => je retiens que les classes objets.
+
+Images pour illustrer le rapport :
+Rejouer /work/lazzara/olena/scribo/sandbox/green/demo/clustering/kmean3d/kmean3d.cc
+
+
+- Regmax =>
+
+Ensemble de fonctions utiles :
+/work/lazzara/olena/scribo/sandbox/green/tools/labeling/
+
+Example d'appel :
+/work/lazzara/olena/scribo/sandbox/green/doc/labeling/mp00307c_bis/mp00307c.sh.gz
+
+Images pour illustrer le rapport :
+/work/lazzara/olena/scribo/sandbox/green/doc/labeling/mp00307c (sur d'avoir toutes les images intermédiaires)
+/work/lazzara/olena/scribo/sandbox/green/doc/labeling/
+
+
+
+* Reconnaissance du type de document
+------------------------------------
+
+/work/lazzara/olena/scribo/sandbox/green/exp/annotating/bench/bench.cc.
+
+=> - Calcule des descripteurs
+ - Calcule seuils
+
+
+FIXME: faire une routine à partir des seuils qui auront été calculés.
+Warning : prend en compte que 2 sous-population! -> voir comment gérer
+plus de 2 si besoin.
+
+
+
+Images pour illustrer le rapport :
+Rejouer bench
+
+
+
+Base d'apprentissage :
+/lrde/doc/sip/image/images/c_o_p_y_r_i_g_h_t_e_d/classif_icdar_versus_afp
+
+
+
+* Outils pour histogrammes
+--------------------------
+
+
+- Nouveaux accu histogrammes :
+/work/lazzara/olena/scribo/sandbox/green/mln/accu/stat/*
+
+Warning : la version hsl a des problèmes de quantification =>
+histogramme sur float => Problème!
+
+
+- Nouvelles Routines de calcul sur les histogrammes :
+grep -R ``*_histo(.*)''
+
+ex : /work/lazzara/olena/scribo/sandbox/green/exp/annotating/bench/bench.cc
+
+
+
--
1.7.2.5
1
0
* mln/accu/take.hh,
* mln/border/mirror.hh: Here.
---
milena/ChangeLog | 7 +++++++
milena/mln/accu/take.hh | 2 ++
milena/mln/border/mirror.hh | 10 ++++++++--
3 files changed, 17 insertions(+), 2 deletions(-)
diff --git a/milena/ChangeLog b/milena/ChangeLog
index 6dd4556..2902026 100644
--- a/milena/ChangeLog
+++ b/milena/ChangeLog
@@ -1,5 +1,12 @@
2011-09-07 Guillaume Lazzara <z(a)lrde.epita.fr>
+ Add comments.
+
+ * mln/accu/take.hh,
+ * mln/border/mirror.hh: Here.
+
+2011-09-07 Guillaume Lazzara <z(a)lrde.epita.fr>
+
* tests/border/mirror.cc: Revamp test.
2011-09-06 Guillaume Lazzara <z(a)lrde.epita.fr>
diff --git a/milena/mln/accu/take.hh b/milena/mln/accu/take.hh
index 7bafbac..56bdaba 100644
--- a/milena/mln/accu/take.hh
+++ b/milena/mln/accu/take.hh
@@ -34,6 +34,8 @@
# include <mln/core/concept/image.hh>
# include <mln/util/pix.hh>
+// FIXME: to be removed? Seems to duplicate data::compute and
+// set::compute. Moreover it does not work...
namespace mln
{
diff --git a/milena/mln/border/mirror.hh b/milena/mln/border/mirror.hh
index 183c0a2..793ef21 100644
--- a/milena/mln/border/mirror.hh
+++ b/milena/mln/border/mirror.hh
@@ -1,4 +1,5 @@
-// Copyright (C) 2007, 2008, 2009 EPITA Research and Development Laboratory (LRDE)
+// Copyright (C) 2007, 2008, 2009, 2011 EPITA Research and Development
+// Laboratory (LRDE)
//
// This file is part of Olena.
//
@@ -31,7 +32,10 @@
/// Define function that fills border using nearer pixels with a
/// mirroring effect.
///
-/// \todo Awful code: rewrite it!
+/// \fixme 2D version is not correct if the border is larger than the
+/// image domain.
+///
+/// \todo Awful code: rewrite it!
# include <mln/core/image/image1d.hh>
# include <mln/core/image/image2d.hh>
@@ -193,6 +197,8 @@ namespace mln
inline
void mirror_(const box3d&, const I& ima)
{
+ trace::warning("border::mirror for 3D image is not implemented,"
+ " so image borders have not been mirrored!");
mln::internal::fixme();
}
--
1.7.2.5
1
0