
--- .../raphael/code/demo/demo_extract_paragraphs.hh | 31 + .../raphael/code/demo/demo_label_to_point.hh | 24 + scribo/sandbox/raphael/code/makefile | 2 +- scribo/sandbox/raphael/code/my/data/image.hh | 26 + scribo/sandbox/raphael/code/my/data/line.hh | 9 +- scribo/sandbox/raphael/code/my/data/page.hh | 67 ++- scribo/sandbox/raphael/code/my/data/paragraph.hh | 18 +- scribo/sandbox/raphael/code/my/data/style.hh | 8 +- scribo/sandbox/raphael/code/my/document/clean.hh | 89 ++- .../sandbox/raphael/code/my/document/document.hh | 188 +++++- .../raphael/code/my/document/filter/filter.hh | 182 ----- scribo/sandbox/raphael/code/my/document/image.hh | 20 +- scribo/sandbox/raphael/code/my/draw/string.hh | 817 ++++++++++++++++++++ scribo/sandbox/raphael/code/my/runtime/lib.hh | 5 +- scribo/sandbox/raphael/code/my/util/union.hh | 5 + scribo/sandbox/raphael/code/test.cc | 133 +--- 16 files changed, 1290 insertions(+), 334 deletions(-) create mode 100644 scribo/sandbox/raphael/code/demo/demo_extract_paragraphs.hh create mode 100644 scribo/sandbox/raphael/code/demo/demo_label_to_point.hh delete mode 100644 scribo/sandbox/raphael/code/my/document/filter/filter.hh create mode 100644 scribo/sandbox/raphael/code/my/draw/string.hh diff --git a/scribo/sandbox/raphael/code/demo/demo_extract_paragraphs.hh b/scribo/sandbox/raphael/code/demo/demo_extract_paragraphs.hh new file mode 100644 index 0000000..f527fa7 --- /dev/null +++ b/scribo/sandbox/raphael/code/demo/demo_extract_paragraphs.hh @@ -0,0 +1,31 @@ +#include <my/document/document.hh> + +// THIS DEMO SHOW YOU HOW TO EXTRACT INFORMATION ON +// A PARAGRAPH WHEN THE DATA HAVE BEEN +// COMPUTED + +template<typename L,typename F,typename D> +void demo_extract_paragraphs(const document<L,F,D>& doc) +{ + + std::cout << "Demo : extract paragraphs" << std::endl; + for(int Id = 0; Id < doc.paragraph_count(); Id++) + { + // get_paragraph_length_direct must be use because here + // we have directly the Id of the paragraph + // NOTE : + // if we are working on a node and if we want the length of the paragraph that + // contain the node, the method get_paragraph_length must be called + if(doc.get_paragraph_length_direct(Id) > 0) // Check if the paragraph exists + { + + // here we can directly extract all the info that the document contain on the paragraph + // by using the methods *_direct + std::cout + << "Paragraph : " + << doc.get_paragraph_length_direct(Id) + << doc.get_paragraph_bbox_direct(Id) + std::endl; + } + } +} \ No newline at end of file diff --git a/scribo/sandbox/raphael/code/demo/demo_label_to_point.hh b/scribo/sandbox/raphael/code/demo/demo_label_to_point.hh new file mode 100644 index 0000000..2ff7525 --- /dev/null +++ b/scribo/sandbox/raphael/code/demo/demo_label_to_point.hh @@ -0,0 +1,24 @@ +#include <my/document/document.hh> + +// THIS DEMO SHOW YOU HOW TO OBTAIN THE POINT THAT +// IS THE BOUNDING BOX CENTER OF THE COMPONENT + +template<typename L,typename F,typename D> +void demo_extract_paragraphs(const document<L,F,D>& doc) +{ + + std::cout << "Demo : label to point" << std::endl; + for(int Id = 0; Id < doc.size(); Id++) + { + // Note that if you have got a point and if the want the label + // you can use exactly the same syntax + // label = document[point] + std::cout + << "The center of : " + << Id + << " is : " + << doc[Id] + << std::endl; + } + } +} \ No newline at end of file diff --git a/scribo/sandbox/raphael/code/makefile b/scribo/sandbox/raphael/code/makefile index 2e1e3f6..bad2d2f 100644 --- a/scribo/sandbox/raphael/code/makefile +++ b/scribo/sandbox/raphael/code/makefile @@ -9,7 +9,7 @@ std: ccache g++ test.cc -o ~/Bureau/test/bin/test.elf -I . -I ./../../../../milena ~/Bureau/test/bin/test.elf -D ~/Bureau/test/bin -P ~/Bureau/test/bin/script ima.pbm release: - ccache g++ test.cc -o ~/Bureau/test/bin/test.elf -I . -I ./../../../../milena $(QT4) $(QT4_LIB) -DNDEBUG -O2 + ccache g++ test.cc -msse3 -o ~/Bureau/test/bin/test.elf -I . -I ./../../../../milena $(QT4) $(QT4_LIB) -DNDEBUG -O2 -march=native ~/Bureau/test/bin/test.elf -D ~/Bureau/test/bin -P ~/Bureau/test/bin/script ima.pbm debug: ccache g++ test.cc -o ~/Bureau/test/bin/test.elf -I . -I ./../../../../milena -DNDEBUG -ggdb diff --git a/scribo/sandbox/raphael/code/my/data/image.hh b/scribo/sandbox/raphael/code/my/data/image.hh index e69de29..de5464a 100644 --- a/scribo/sandbox/raphael/code/my/data/image.hh +++ b/scribo/sandbox/raphael/code/my/data/image.hh @@ -0,0 +1,26 @@ +#ifndef MLN_DATA_IMAGE +#define MLN_DATA_IMAGE +#include <stdlib.h> +namespace mymln +{ + namespace data + { + enum image_type + { + Gray, + Color, + Black_And_White + }; + + /// The class image is use to contain images + class image + { + public: + image() + { } + private: + + }; + } +} +#endif \ No newline at end of file diff --git a/scribo/sandbox/raphael/code/my/data/line.hh b/scribo/sandbox/raphael/code/my/data/line.hh index d5ed237..99e133b 100644 --- a/scribo/sandbox/raphael/code/my/data/line.hh +++ b/scribo/sandbox/raphael/code/my/data/line.hh @@ -8,14 +8,19 @@ namespace mymln { public: line() - {bbox_ = mln::box2d(); ID_ = 0;} + {bbox_ = mln::box2d(); ID_ = 0; value_ = "-*-";} line(box2d bounding_box, int ID) - {bbox_ = bounding_box; ID_ = ID;} + {bbox_ = bounding_box; ID_ = ID;value_ = "-*-";} mln::box2d bbox() {return bbox_;} + std::string get_value() + {return value_;} + void set_value(std::string value) + {value_ = value;} private: int ID_; mln::box2d bbox_; + std::string value_; }; } } diff --git a/scribo/sandbox/raphael/code/my/data/page.hh b/scribo/sandbox/raphael/code/my/data/page.hh index 0b1c0aa..04f4e99 100644 --- a/scribo/sandbox/raphael/code/my/data/page.hh +++ b/scribo/sandbox/raphael/code/my/data/page.hh @@ -3,7 +3,9 @@ #include <my/data/style.hh> #include <my/data/paragraph.hh> #include <my/data/line.hh> +#include <my/data/image.hh> #include <my/document/document.hh> + namespace mymln { namespace data @@ -18,31 +20,68 @@ namespace mymln { doc_ = doc; paragraphs = mln::util::array<paragraph>(0); + images = mln::util::array<image>(0); paragraphs.reserve(doc_.paragraph_count()); unsigned int paragraph_count = doc_.paragraph_count(); + + mln::util::array<int> paragraphs_IDS; + paragraphs_IDS.reserve(doc_.paragraph_count()); + for(int N = 0; N < paragraph_count; N++) { + if(doc_.get_paragraph_length_direct(N)) { paragraphs.append(paragraph(doc_.get_paragraph_bbox_direct(N), N)); paragraphs[paragraphs.size() - 1].set_font_size(doc_.get_paragraph_middle_width_direct(N)); paragraphs[paragraphs.size() - 1].set_line_count(doc_.get_paragraph_length_direct(N)); + paragraphs_IDS.append(paragraphs.size() - 1); + } + else + { + paragraphs_IDS.append(-1); } } unsigned int lines_count = doc_.line_count(); lines.reserve(lines_count); for(int N = 0; N < lines_count; N++) { - lines.append(line(doc_.get_line_bbox_direct(N), N)); + if(doc_.get_line_length_direct(N)) + { + lines.append(line(doc_.get_line_bbox_direct(N), N)); + if(paragraphs_IDS[doc_.get_line_parent(N)] >= 0) + { + paragraphs[paragraphs_IDS[doc_.get_line_parent(N)]].add_line(lines[lines.size() - 1]); + } + } } } + void set_source_image(image2d<value::rgb8> source) + { + src_ = source; + } + void add_line(data::line Line) + { + lines.append(Line); + } + void add_paragraph(data::paragraph Paragraph) + { + paragraphs.append(Paragraph); + } + void add_image(data::image Image) + { + images.append(Image); + } + + inline void export_PRima(std::string file, const char qname) + { + + } inline void export_HTML(std::string file) { fstream filestream(file.c_str(), std::fstream::out); - if(filestream.is_open()) - std::cout << "EXPORT : " + file; filestream << "<html>\n"; filestream << "<head>\n"; filestream << "<style type=\"text/css\">\n"; @@ -65,33 +104,27 @@ namespace mymln filestream.close(); - std::cout << "EXPORTED : " + file; } + inline unsigned int paragraph_count() { return paragraphs.size();} inline unsigned int line_count() { return lines.size();} - + inline unsigned int image_count() + { return images.size();} + inline paragraph get_paragraph(unsigned int ID) { return paragraphs[ID];} inline line get_line(unsigned int ID) { return lines[ID];} + inline line get_image(unsigned int ID) + { return images[ID];} private: + image2d<value::rgb8> src_; document::document<L,F,D> doc_; mln::util::array<paragraph> paragraphs; mln::util::array<line> lines; - template<typename I> - inline mln::util::array<image2d<I> > extract_lines_(image2d<I>& source) - { - mln::util::array<image2d<I> > array(0); - array.reserve(lines.size()); - - for(int N = 0; N < lines.size();N++) - { - array.append((source | lines[N].bbox()).rw()); - } - return array; - } + mln::util::array<image> images; }; } } diff --git a/scribo/sandbox/raphael/code/my/data/paragraph.hh b/scribo/sandbox/raphael/code/my/data/paragraph.hh index 9e277c9..e11c205 100644 --- a/scribo/sandbox/raphael/code/my/data/paragraph.hh +++ b/scribo/sandbox/raphael/code/my/data/paragraph.hh @@ -16,6 +16,7 @@ namespace mymln bbox_ = mln::box2d(); style_ = style(); LineC = 1; + mln::util::array<line>(); } paragraph(mln::box2d bbox, int ID) { @@ -23,11 +24,18 @@ namespace mymln bbox_ = bbox; style_ = style(); LineC = 1; + mln::util::array<line>(); } void set_line_count(int count) - {LineC = count;} + {LineC = count;lines.reserve(count);} + void add_line(line Line) + {lines.append(Line);} + int line_count(int count) + {return LineC;} void set_font_size(int px) {style_.set_font_size(px);} + int get_font_size() + { style_.get_font_size(); } int ID(){return ID_;} std::string To_HTML_Paragraph_Style() { @@ -38,9 +46,14 @@ namespace mymln { std::string output = ""; output += "<st" + itoa(ID_) + ">\n"; + output += "X:"; + output += itoa(bbox_.pmin()[1]); + output += "Y:"; + output += itoa(bbox_.pmin()[0]); for(int N = 0; N < LineC; N++) { - output += "!!--FIXME--!! <br> \n"; + output += lines[N].get_value(); + output += "</br> \n"; } @@ -52,6 +65,7 @@ namespace mymln {return bbox_;} private: mln::box2d bbox_; + mln::util::array<line> lines; style style_; int LineC; int ID_; diff --git a/scribo/sandbox/raphael/code/my/data/style.hh b/scribo/sandbox/raphael/code/my/data/style.hh index 823fbeb..e6eb534 100644 --- a/scribo/sandbox/raphael/code/my/data/style.hh +++ b/scribo/sandbox/raphael/code/my/data/style.hh @@ -9,7 +9,7 @@ namespace mymln { std::string output = ""; if(value < 0){output+="-"; value = -value;} - while(value > 10) + while(value >= 10) { output = (char)('0' + (value % 10)) + output; value /= 10; @@ -43,6 +43,10 @@ namespace mymln { Font_Size_ = px; } + int get_font_size() + { + return Font_Size_; + } // WARNING THIS FUNCTION USE iota // iota is define only in mymln::data // this is not a standart function you can use everywhere @@ -55,7 +59,7 @@ namespace mymln output += "position:absolute;"; output += "left:" + itoa(pmin_X) + ";"; output += "top:" + itoa(pmin_Y) + ";"; - output += "}"; + output += "} \n"; // FIXME return output; } diff --git a/scribo/sandbox/raphael/code/my/document/clean.hh b/scribo/sandbox/raphael/code/my/document/clean.hh index cf61a1c..f2c4992 100644 --- a/scribo/sandbox/raphael/code/my/document/clean.hh +++ b/scribo/sandbox/raphael/code/my/document/clean.hh @@ -728,9 +728,9 @@ namespace mymln { if(doc.contain_line(N)) if (doc.get_letter_middle_height(N) * 3 < doc.get_bbox(N).len(0)) - {doc.move_line_self_link(N); doc.add_noise(N);} + {doc.move_line_self_link(N); doc.add_noise(N); doc.invalidate_line_link(N);} else if(doc.get_letter_middle_width(N) * 4 < doc.get_bbox(N).len(1)) - {doc.move_line_self_link(N); doc.add_noise(N);} + {doc.move_line_self_link(N); doc.add_noise(N); doc.invalidate_line_link(N);} } } @@ -1476,8 +1476,91 @@ namespace mymln doc.propage_line_link(); } - + template<typename L, typename F, typename D> + void clean_finalize_letters(mymln::document::document<L,F,D>& doc) + { + typedef vertex_image<point2d,bool> v_ima_g; + typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p; + v_ima_g mask = doc.fun_mask_letters(); + mln_piter_(v_ima_g) v(mask.domain()); + typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t; + nbh_t nbh(mask); + mln_niter_(nbh_t) q(nbh, v); + for_all(v) + { + + if(doc.contain_noise(v)) + { + unsigned int Count = 0; + bool HasLink = false; + L line = 0; + for_all(q) + { + if(doc.contain_paragraph(q) && doc.contain_line(q) && doc.paragraph_included_letter(q,v)) + { + if(doc.allign_V_line_artefact(q,v) && doc.allign_size_height_line_artefact(q,v)) + { + HasLink = true; + if(!line) + line = doc[q]; + else if(doc.get_line_distance(q,v) < doc.get_line_distance(doc[line], v)) + line = doc[q]; + } + else if(doc.allign_top_large(q,v) && !doc.allign_V(q,v)) + { + Count++; + } + } + + } + if(Count > 0 && HasLink) + { + doc.debug_draw_box_green_buffer(v); + doc.debug_draw_line_green_buffer(v, doc[line]); + doc.debug_draw_string(v, Count); + doc.add_to_line_link(doc[line],v); + doc.add_to_paragraph_link(doc[line], v); + } + } + } + + doc.propage_line_link(); + doc.propage_paragraph_link(); + + } + + template<typename L, typename F, typename D> + void clean_lines_artefacts(mymln::document::document<L,F,D>& doc) + { + //image2d<value::rgb8> out; + //mln::initialize(out, s); + typedef vertex_image<point2d,bool> v_ima_g; + typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p; + v_ima_g mask = doc.fun_mask_letters(); + mln_piter_(v_ima_g) v(mask.domain()); + typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t; + nbh_t nbh(mask); + mln_niter_(nbh_t) q(nbh, v); + for_all(v) + { + if(doc.contain_line(v)) + { + for_all(q) + { + //draw::line(out, q,v, mln::literal::red); + if(doc.same_line(v,q) && doc.is_line_artefact(q)) + { + doc.debug_draw_string(q, "ARTEFACT"); + doc.add_noise(q); + } + } + } + } + doc.clean_noise_lines(); + doc.propage_line_link(); + //io::ppm::save(mln::debug::superpose(out, s, literal::white),dgb_out); + } } diff --git a/scribo/sandbox/raphael/code/my/document/document.hh b/scribo/sandbox/raphael/code/my/document/document.hh index 1d50e5c..ae4a51a 100644 --- a/scribo/sandbox/raphael/code/my/document/document.hh +++ b/scribo/sandbox/raphael/code/my/document/document.hh @@ -1,6 +1,25 @@ #ifndef INC_DOCUMENT_DOC #define INC_DOCUMENT_DOC #include <mln/accu/shape/bbox.hh> +#include <mln/fun/i2v/array.hh> +#include <mln/util/graph.hh> +#include <mln/debug/draw_graph.hh> +#include <mln/core/site_set/p_vertices.hh> +#include <mln/transform/influence_zone_geodesic.hh> +#include <mln/make/image2d.hh> +#include <mln/core/alias/neighb2d.hh> +#include <mln/make/influence_zone_adjacency_graph.hh> +#include <mln/make/w_window2d.hh> +#include <mln/labeling/value_and_compute.hh> +#include <mln/make/image.hh> +#include <mln/value/rgb8.hh> +#include <mln/value/int_u8.hh> +#include <mln/value/int_u.hh> +#include <mln/labeling/colorize.hh> +#include <mln/core/alias/neighb2d.hh> +#include <mln/algebra/vec.hh> +#include <mln/core/image/graph_elt_neighborhood.hh> +#include <mln/graph/compute.hh> #include<my/util/vector_bbox_group.hh> #include<my/util/union.hh> #include<my/debug/pict.hh> @@ -9,9 +28,11 @@ using namespace mln; namespace mymln { + namespace document { - + /// THE CLASS DOCUMENT STORE DATA THAT ALLOW THE USER TO PERFORM OPPERATIONS ON A DOCUMENT + /// DESCRIBED BY A GRAPH template<typename Label, typename Float, typename Data> class document { @@ -86,6 +107,10 @@ namespace mymln lines_cooked = false; Enable_Debug_Buffer = false; // Remanant version of debug_buffer_enable } + inline unsigned int height() + {return img_influ.domain().height();} + inline unsigned int width() + {return img_influ.domain().width();} inline bool killed(const Label lbl) {return kill_mask(lbl);} inline void kill(const Label lbl) @@ -102,7 +127,10 @@ namespace mymln { return p[0] > ((img_influ.domain().len(0) / 8) * 7);} inline bool in_footer(Label lbl) { return in_footer(_bboxgp[lbl]); } - + inline bool is_word(const point2d& p) + { return is_word(img_influ(p)); } + inline bool is_word(const Label lbl) + { return lines_space[lbl] < lines_width[lbl] / 15.0f; } /* OPERATION ON PARAGRAPH */ inline bool link_paragraphs() { @@ -337,6 +365,43 @@ namespace mymln {return lines_bbox[lines_union[A]].len(0) < _bboxgp[A].len(0) * 2 ;} + inline bool is_line_artefact(const point2d& A) + {return is_line_artefact(img_influ(A));} + inline bool is_line_artefact(const Label A) + { + return lines_bbox[lines_union[A]].len(0) > _bboxgp[A].len(0) * 24; + } + + inline void clean_noise_lines() + { + for(int N = 0;N < Areas_Number_; N++) + { + if(noise_mask(N)) + { + if(lines_union[N] || lines_union.link(N)) + { + lines_union.invalidate_link(N); + lines_union[N] = 0; + + } + } + } + for(int N = 0;N < Areas_Number_; N++) + { + if(lines_union[N] && lines_union[lines_first_label[lines_union[N]]]) + { + + lines_union.add_link(lines_first_label[lines_union[N]], N); + } + else if(lines_union[N]) + { + lines_union.add_self_link_coerce(N); + lines_first_label[lines_union[N]] = N; + } + } + } + + inline bool in_end_of_line(const point2d& A) {return in_end_of_line(img_influ(A));} inline bool in_end_of_line(const Label A) @@ -413,19 +478,28 @@ namespace mymln { if(lines_union[lbl] && lines_union.is_self_link(lbl)) { + + if(lines_len[lbl] < 2){ return false; } // THE ITEM IS ALONE ON THE LINE + if(lines_first_label[lines_union[lbl]] == lbl) { + if(lines_union[lines_last_label[lines_union[lbl]]] == 0) // CHECK IF THE LAST LABEL HAS NOT BEEN REMOVED + { + if(lines_len[lbl] < 3){ return false; } // THE LINE HAS TWO ITEM AND ONE HAS BEEN REMOVED recook_lines(); - + } lines_union.add_link(lines_last_label[lines_union[lbl]], lbl); lines_union.add_self_link(lines_last_label[lines_union[lbl]]); } else if(lines_last_label[lines_union[lbl]] == lbl) { + if(lines_union[lines_first_label[lines_union[lbl]]] == 0) // CHECK IF THE FIRST LABEL HAS NOT BEEN REMOVED + { + if(lines_len[lbl] < 3){ return false; } // THE LINE HAS TWO ITEM AND ONE HAS BEEN REMOVED recook_lines(); - + } lines_union.add_link(lines_first_label[lines_union[lbl]], lbl); lines_union.add_self_link(lines_first_label[lines_union[lbl]]); } @@ -463,7 +537,7 @@ namespace mymln { return paragraphs_first_line[paragraphs_union[lbl]] == lines_union[lbl];} inline bool contain_end_line(const Label lbl) - { return start_lines_mask(lbl);} + { return end_lines_mask(lbl);} inline void add_noise(const point2d& point) {add_noise(img_influ(point));} @@ -1203,6 +1277,22 @@ namespace mymln } + + inline bool allign_size_height_line_artefact( const point2d& Line, const point2d& Artefact) + { + return allign_size_height_line_artefact(img_influ(Line), img_influ(Artefact)); + } + + inline bool allign_size_height_line_artefact( const Label Line, const Label Artefact) + { + short int SizeL = lines_bbox[lines_union[Line]].len(0); + short int SizeR = _bboxgp[Artefact].len(0); + return SizeR > (SizeL / 2.3f) && SizeR < (SizeL * 1.1f); + } + + + + inline bool allign_size_height_line( const point2d& Left, const point2d& Right) { return allign_size_height_line(img_influ(Left), img_influ(Right)); @@ -1587,6 +1677,21 @@ namespace mymln return allignV < lines_bbox[lines_union[Left]].len(0) && allignV < lines_bbox[lines_union[Right]].len(0); } + + + inline bool allign_V_line_artefact( const point2d& Line, const point2d& Artefact) + {return allign_V_line_artefact(img_influ(Line), img_influ(Artefact));} + + inline bool allign_V_line_artefact( Label Line, Label Artefact) + { + short int allignV = lines_bbox[lines_union[Line]].pcenter()[0] - _bboxgp[Artefact].pcenter()[0]; + if(allignV<0){allignV = -allignV;} + allignV *= 5; + return allignV < lines_bbox[lines_union[Line]].len(0) && allignV < _bboxgp[Artefact].len(0); + } + + + inline bool allign_V_line_strict( const point2d& Left, const point2d& Right) {return allign_V_line_strict(img_influ(Left), img_influ(Right));} @@ -1643,7 +1748,6 @@ namespace mymln - inline bool allign_smaller_paragraph( const point2d& Left, const point2d& Right) {return allign_smaller_paragraph(img_influ(Left), img_influ(Right));} inline bool allign_smaller_paragraph( Label Left, Label Right) @@ -2002,6 +2106,26 @@ namespace mymln } io::ppm::save(mln::debug::superpose(debug_buffer, debug_source, literal::white) , file); } + inline void debug_draw_string(const point2d& P, const char* string) + { + if(debug_buffer_enable) + { + mln_VAR(pmin, _bboxgp[img_influ(P)].pmax()); + mln_VAR(pmax, _bboxgp[img_influ(P)].pmax()); + pmin[0] = _bboxgp[img_influ(P)].pmin()[0]; + pmin[1] = pmin[1] + 3; + pmax[0] = pmin[0] + 20; + pmax[1] = pmin[1] + 11; + box2d font_size(pmin, pmax); + mln::draw::string(debug_buffer, string,font_size , mln::literal::red); + } + } + + inline void debug_draw_string(const point2d& P,int value) + { + debug_draw_string(P, itoa(value).c_str()); + } + inline void debug_draw_box_red_buffer(const point2d& L) {debug_draw_box_red_buffer(img_influ(L));} inline void debug_draw_box_green_buffer(const point2d& L) @@ -2326,7 +2450,9 @@ namespace mymln /// USE THIS METHOD ONLY IF YOU KNOW THE LINE ID inline unsigned int get_line_length_direct(unsigned int ID) { return lines_len[ID]; } - + /// USE THIS METHOD ONLY IF YOU KNOW THE LINE ID + inline box2d get_line_bbox_direct(unsigned int ID) + { return lines_bbox[ID]; } inline unsigned int get_line_width(point2d point) { return get_line_width(img_influ(point)); } @@ -2334,6 +2460,12 @@ namespace mymln { return lines_bbox[lines_union[L]].len(1); } + /// USE THIS METHOD ONLY IF YOU KNOW THE LINE ID + inline unsigned int get_line_parent(unsigned int ID) + { return paragraphs_union[lines_first_label[ID]]; } + + + inline Float letter_ratio_YX(const point2d& point) {return letter_ratio_YX(img_influ(point));} inline Float letter_ratio_YX(Label Letter) @@ -2414,6 +2546,14 @@ namespace mymln paragraphs_bbox_influ[paragraphs_union[Par1]].has(lines_bbox[lines_union[Line2]].pmax()) ; } + inline bool paragraph_included_letter(point2d Par1, point2d Letter2) + { return paragraph_included_letter(img_influ(Par1), img_influ(Letter2)); } + inline bool paragraph_included_letter(Label Par1, Label Letter2) + { + return + paragraphs_bbox[paragraphs_union[Par1]].has(_bboxgp[Letter2].pmin()) && + paragraphs_bbox[paragraphs_union[Par1]].has(_bboxgp[Letter2].pmax()) ; + } inline bool paragraph_included(point2d Par1, point2d Par2) { return paragraph_included(img_influ(Par1), img_influ(Par2)); } @@ -2857,6 +2997,25 @@ namespace mymln {return lines_space[lines_union[lbl]];} + inline short int get_line_distance(const point2d& Line, const point2d& Item) + { return get_line_distance(img_influ(Line), img_influ(Item));} + inline short int get_line_distance(const Label Line, const Label Item) + { + short int DisLeft = _bboxgp[Item].pmin()[1] - lines_bbox[lines_union[Line]].pmax()[1]; + if(DisLeft < 0){DisLeft = -DisLeft;} + short int DisRight = _bboxgp[Item].pmax()[1] - lines_bbox[lines_union[Line]].pmin()[1]; + if(DisRight < 0){DisRight = -DisRight;} + short int DisUp = _bboxgp[Item].pmin()[0] - lines_bbox[lines_union[Line]].pmax()[0]; + if(DisUp < 0){DisUp = -DisUp;} + short int DisDown = _bboxgp[Item].pmax()[0] - lines_bbox[lines_union[Line]].pmin()[0]; + if(DisDown < 0){DisDown = -DisDown;} + if(DisLeft > DisRight){DisLeft = DisRight;} + if(DisUp > DisDown){DisUp = DisDown;} + if(DisUp > DisLeft){return DisLeft;} + else{return DisUp;} + } + + inline unsigned int get_letter_middle_height(const point2d& point) {return get_letter_middle_height(img_influ(point));} inline unsigned int get_letter_middle_height(const Label lbl) @@ -3404,6 +3563,7 @@ namespace mymln Data SY = label_size_(1, label); return SX >= Min && SY >= Min || SX >= Min * 2 || SY >= Min * 2; } + inline bool label_valid_ratio_(Label label, Float Min, Float Max) { Float Ratio = label_ratio_(label); @@ -3653,6 +3813,20 @@ namespace mymln /* IMPLICIT SEPARATOR DETECTION */ mymln::util::union_find<Label> implicit_separators_union; mymln::util::union_find<Label> implicit_separators_union_right; + + + std::string itoa(int value) + { + std::string output = ""; + if(value < 0){output+="-"; value = -value;} + while(value >= 10) + { + output = (char)('0' + (value % 10)) + output; + value /= 10; + } + output = (char)('0' + value) + output; + return output; + } }; } } diff --git a/scribo/sandbox/raphael/code/my/document/filter/filter.hh b/scribo/sandbox/raphael/code/my/document/filter/filter.hh deleted file mode 100644 index c3a294c..0000000 --- a/scribo/sandbox/raphael/code/my/document/filter/filter.hh +++ /dev/null @@ -1,182 +0,0 @@ -#ifndef INC_DOCUMENT_FILTER_GENERIC -#define INC_DOCUMENT_FILTER_GENERIC -namespace mymln -{ - namespace document - { - namespace filter - { - template<typename L, typename F, typename D, typename Left, typename Right> - class filter - { - public: - filter(){} - filter(document<L,F,D>& doc){ doc_ = doc; } - filter(document<L,F,D>& doc, vertex_image<point2d,bool> mask){ doc_ = doc; mask_ = mask; } - inline bool link_test(point2d& A, point2d& B){ return true; } - inline bool vertex_test(point2d& A){ return true; } - inline bool gen_link_test(point2d& A, point2d& B) - { - return link_test(A, B); - } - inline bool gen_vertex_test(point2d& A) - { - return vertex_test(A); - } - inline void iter_dgb(std::string dgb_out, image2d<bool> s) - { - image2d<value::rgb8> out; - mln::initialize(out, s); - typedef vertex_image<point2d,bool> v_ima_g; - typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p; - mln_piter_(v_ima_g) v(mask_.domain()); - typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t; - nbh_t nbh(mask_); - mln_niter_(nbh_t) q(nbh, v); - for_all(v) - { - if(gen_vertex_test(v)) - { - for_all(q) - { - if(gen_link_test(v, q)) - { - draw::line(out, q,v, mln::literal::green); - } - else - { - draw::line(out, q,v, mln::literal::magenta); - } - } - } - else - { - draw::line(out, q,v, mln::literal::magenta); - } - } - } - inline void iter() - { - typedef vertex_image<point2d,bool> v_ima_g; - typedef p_vertices<mln::util::graph, fun::i2v::array<mln::point2d> > g_vertices_p; - mln_piter_(v_ima_g) v(mask_.domain()); - typedef graph_elt_neighborhood_if<mln::util::graph, g_vertices_p, v_ima_g> nbh_t; - nbh_t nbh(mask_); - mln_niter_(nbh_t) q(nbh, v); - for_all(v) - { - if(gen_vertex_test(v)) - { - for_all(q) - { - if(gen_link_test(v, q)) - { - - } - } - } - } - } - - inline filter& operator|(filter& B) - { - filter<L,F,D> PFilter = filter_or(doc_, mask_); - PFilter.sub_filter_A_ = this; - PFilter.sub_filter_B_ = B; - B.doc_ = doc_; - B.mask_ = mask_; - return PFilter; - } - - inline filter& operator&(filter& B) - { - filter<L,F,D> PFilter = filter_and(doc_, mask_); - PFilter.sub_filter_A_ = this; - PFilter.sub_filter_B_ = B; - B.doc_ = doc_; - B.mask_ = mask_; - return PFilter; - } - - protected: - Left sub_filter_A_; - Right sub_filter_B_; - - document<L,F,D> doc_; - vertex_image<point2d,bool> mask_; - - - - }; - - - - - - - - - - - template<typename L, typename F, typename D> - class filter_or : filter<L,F,D> - { - public: - inline bool gen_link_test(point2d& A, point2d& B) - { - return sub_filter_A_.gen_link_test(A, B) || sub_filter_B_.gen_link_test(A, B); - } - inline bool gen_vertex_test(point2d& A) - { - return sub_filter_A_.gen_vertex_test(A) || sub_filter_B_.gen_vertex_test(A); - } - - protected: - filter<L,F,D> sub_filter_A_; - filter<L,F,D> sub_filter_B_; - - document<L,F,D> doc_; - vertex_image<point2d,bool> mask_; - }; - - template<typename L, typename F, typename D> - class filter_and : filter<L,F,D> - { - public: - inline bool gen_link_test(point2d& A, point2d& B) - { - return sub_filter_A_.gen_link_test(A, B) || sub_filter_B_.gen_link_test(A, B); - } - inline bool gen_vertex_test(point2d& A) - { - return sub_filter_A_.gen_vertex_test(A) || sub_filter_B_.gen_vertex_test(A); - } - - protected: - filter<L,F,D> sub_filter_A_; - filter<L,F,D> sub_filter_B_; - - document<L,F,D> doc_; - vertex_image<point2d,bool> mask_; - }; - - template<typename L, typename F, typename D> - class filter_letter : filter<L,F,D> - { - public: - inline bool vertex_test(point2d& A){ return doc_.contain_letter(A); } - - protected: - filter<L,F,D> sub_filter_A_; - filter<L,F,D> sub_filter_B_; - - document<L,F,D> doc_; - vertex_image<point2d,bool> mask_; - }; - - - - } - } -} -#endif \ No newline at end of file diff --git a/scribo/sandbox/raphael/code/my/document/image.hh b/scribo/sandbox/raphael/code/my/document/image.hh index 158be87..edec471 100644 --- a/scribo/sandbox/raphael/code/my/document/image.hh +++ b/scribo/sandbox/raphael/code/my/document/image.hh @@ -37,8 +37,10 @@ namespace mymln ) { + doc.move_line_self_link(doc[v]); doc.debug_draw_line_green_buffer(q,v); doc.debug_draw_box_green_buffer(v); + doc.add_noise(v); } } } @@ -58,23 +60,33 @@ namespace mymln mln_niter_(nbh_t) q(nbh, v); for_all(v) { + if(doc.contain_end_line(v)) + { + doc.debug_draw_string(v, doc.get_line_length(v)); + } if( doc.contain_paragraph(v) && doc.get_paragraph_length(v) < 3 && - doc.get_line_length(v) < 4 + doc.get_line_length(v) < 20 ) { + unsigned int noise_count = 0; for_all(q) { if(doc.contain_noise(q)) { - doc.debug_draw_line_green_buffer(q,v); - doc.debug_draw_box_green_buffer(v); + + noise_count++; } } + if(noise_count > 3) + { + doc.move_line_self_link(doc[v]); + doc.debug_draw_box_green_buffer(v); + doc.add_noise(v); + } } } - doc.propage_line_link(); } } } diff --git a/scribo/sandbox/raphael/code/my/draw/string.hh b/scribo/sandbox/raphael/code/my/draw/string.hh new file mode 100644 index 0000000..5eefa66 --- /dev/null +++ b/scribo/sandbox/raphael/code/my/draw/string.hh @@ -0,0 +1,817 @@ +// Copyright (C) 2007, 2008, 2009 EPITA Research and Development +// Laboratory (LRDE) +// +// This file is part of Olena. +// +// Olena is free software: you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free +// Software Foundation, version 2 of the License. +// +// Olena is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Olena. If not, see <http://www.gnu.org/licenses/>. +// +// As a special exception, you may use this file as part of a free +// software project without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to produce +// an executable, this file does not by itself cause the resulting +// executable to be covered by the GNU General Public License. This +// exception does not however invalidate any other reasons why the +// executable file might be covered by the GNU General Public License. + +#ifndef MLN_DRAW_STRING_HH +# define MLN_DRAW_STRING_HH + +/*! \file + * + * \brief Draw a string in an image. + * + * + */ + +# include <mln/core/concept/image.hh> +# include <mln/core/alias/box2d.hh> +# include <mln/data/paste.hh> +# include <mln/draw/line.hh> +# include <mln/draw/box.hh> +# include <mln/pw/image.hh> +# include <mln/pw/cst.hh> +# include <mln/core/var.hh> + +namespace mln +{ + + namespace draw + { + /*! Draw a char at value \p v in image \p ima + * + * \param[in,out] ima The image to be drawn. + * \param[in] c the char draw. + * \param[in] _b the bounding box of the character. + * \param[in] v The value to assign to all drawn pixels. + * + * \pre \p ima has to be initialized. + * \pre \p ima has \p beg. + * \pre \p ima has \p end. + * + */ + template <typename I, typename B> + void character(Image<I>& ima, + const char c, + const Box<B>& b, + const mln_value(I)& v); + + + /*! Draw a string at value \p v in image \p ima + * + * \param[in,out] ima The image to be drawn. + * \param[in] s the string draw. + * \param[in] _b the bounding box of one character. + * \param[in] v The value to assign to all drawn pixels. + * + * \pre \p ima has to be initialized. + * \pre \p ima has \p beg. + * \pre \p ima has \p end. + * + */ + template <typename I, typename B> + void string(Image<I>& ima, + const char* s, + const Box<B>& b, + const mln_value(I)& v); + + +# ifndef MLN_INCLUDE_ONLY + + + + namespace internal + { + template <typename I, typename B> + inline + void string_dot(Image<I>& ima, + const Box<B>& _b, + const mln_value(I)& v) + { + B b = exact(_b); + mln_VAR(cp_min, b.pcenter()); + mln_VAR(cp_max, b.pcenter()); + cp_min[1] -= 1; + cp_min[0] -= 1; + B cb(cp_min, cp_max); + draw::box(ima, cb, v); + } + template <typename I, typename B> + inline + void string_box(Image<I>& ima, + const Box<B>& _b, + const mln_value(I)& v) + { + B b = exact(_b); + mln_VAR(cp_min, b.pmin()); + mln_VAR(cp_max, b.pmax()); + cp_min[0] -= 1; cp_min[1] -= 1; + cp_max[0] -= 1; cp_max[1] -= 1; + B cb(cp_min, cp_max); + draw::box(ima, cb, v); + draw::box(ima, b, v); + } + + template <typename I, typename B> + inline + void string_pipe(Image<I>& ima, + const Box<B>& _b, + const mln_value(I)& v) + { + B b = exact(_b); + mln_VAR(cp_min, b.pcenter()); + mln_VAR(cp_max, b.pcenter()); + cp_min[1] -= 1; cp_min[0] = b.pmin()[0]; + cp_min[0] -= 1; cp_max[0] = b.pmax()[0]; + B cb(cp_min, cp_max); + draw::box(ima, cb, v); + } + + template <typename I, typename B> + inline + void string_pipe_down(Image<I>& ima, + const Box<B>& _b, + const mln_value(I)& v) + { + B b = exact(_b); + mln_VAR(cp_min, b.pcenter()); + mln_VAR(cp_max, b.pcenter()); + cp_min[1] -= 1; cp_min[0] = b.pmin()[0]; + cp_min[0] -= 1; cp_max[0] = b.pmax()[0]; + cp_min[0] = cp_max[0] - b.len(0) / 2; + B cb(cp_min, cp_max); + draw::box(ima, cb, v); + } + template <typename I, typename B> + inline + void string_pipe_top(Image<I>& ima, + const Box<B>& _b, + const mln_value(I)& v) + { + B b = exact(_b); + mln_VAR(cp_min, b.pcenter()); + mln_VAR(cp_max, b.pcenter()); + cp_min[1] -= 1; cp_min[0] = b.pmin()[0]; + cp_min[0] -= 1; cp_max[0] = b.pmax()[0]; + cp_max[0] = cp_min[0] + b.len(0) / 2; + B cb(cp_min, cp_max); + draw::box(ima, cb, v); + } + template <typename I, typename B> + inline + void string_right_pipe(Image<I>& ima, + const Box<B>& _b, + const mln_value(I)& v) + { + B b = exact(_b); + mln_VAR(cp_min, b.pmax()); + mln_VAR(cp_max, b.pmax()); + cp_min[1] -= 1; cp_min[0] = b.pmin()[0]; + cp_min[0] -= 1; cp_max[0] = b.pmax()[0]; + B cb(cp_min, cp_max); + draw::box(ima, cb, v); + } + + + template <typename I, typename B> + inline + void string_right_pipe_top(Image<I>& ima, + const Box<B>& _b, + const mln_value(I)& v) + { + B b = exact(_b); + mln_VAR(cp_min, b.pmax()); + mln_VAR(cp_max, b.pmax()); + cp_min[1] -= 1; cp_min[0] = b.pmin()[0]; + cp_min[0] -= 1; cp_max[0] = b.pmax()[0]; + cp_max[0] = cp_min[0] + b.len(0) / 2; + B cb(cp_min, cp_max); + draw::box(ima, cb, v); + } + + template <typename I, typename B> + inline + void string_right_pipe_down(Image<I>& ima, + const Box<B>& _b, + const mln_value(I)& v) + { + B b = exact(_b); + mln_VAR(cp_min, b.pmax()); + mln_VAR(cp_max, b.pmax()); + cp_min[1] -= 1; cp_min[0] = b.pmin()[0]; + cp_min[0] -= 1; cp_max[0] = b.pmax()[0]; + cp_min[0] = cp_max[0] - b.len(0) / 2; + B cb(cp_min, cp_max); + draw::box(ima, cb, v); + } + + template <typename I, typename B> + inline + void string_left_pipe(Image<I>& ima, + const Box<B>& _b, + const mln_value(I)& v) + { + B b = exact(_b); + mln_VAR(cp_min, b.pmin()); + mln_VAR(cp_max, b.pmin()); + cp_min[1] -= 1; cp_min[0] = b.pmin()[0]; + cp_min[0] -= 1; cp_max[0] = b.pmax()[0]; + B cb(cp_min, cp_max); + draw::box(ima, cb, v); + } + + template <typename I, typename B> + inline + void string_left_pipe_top(Image<I>& ima, + const Box<B>& _b, + const mln_value(I)& v) + { + B b = exact(_b); + mln_VAR(cp_min, b.pmin()); + mln_VAR(cp_max, b.pmin()); + cp_min[1] -= 1; cp_min[0] = b.pmin()[0]; + cp_min[0] -= 1; cp_max[0] = b.pmax()[0]; + cp_max[0] = cp_min[0] + b.len(0) / 2; + B cb(cp_min, cp_max); + draw::box(ima, cb, v); + } + + template <typename I, typename B> + inline + void string_down_slash(Image<I>& ima, + const Box<B>& _b, + const mln_value(I)& v) + { + B b = exact(_b); + mln_VAR(cp_min, b.pcenter()); + mln_VAR(cp_max, b.pmax()); + cp_min[1] = b.pmin()[1]; + draw::line(ima,cp_min, cp_max, v ); + cp_min[0] -= 1; cp_max[0] -= 1; + draw::line(ima,cp_min, cp_max, v ); + } + + + template <typename I, typename B> + inline + void string_top_slash(Image<I>& ima, + const Box<B>& _b, + const mln_value(I)& v) + { + B b = exact(_b); + mln_VAR(cp_min, b.pcenter()); + mln_VAR(cp_max, b.pmin()); + cp_min[1] = b.pmin()[1]; cp_max[1] = b.pmax()[1]; + draw::line(ima,cp_min, cp_max, v ); + cp_min[0] -= 1; cp_max[0] -= 1; + draw::line(ima,cp_min, cp_max, v ); + } + + + template <typename I, typename B> + inline + void string_left_slash(Image<I>& ima, + const Box<B>& _b, + const mln_value(I)& v) + { + B b = exact(_b); + mln_VAR(cp_min, b.pmax()); + mln_VAR(cp_max, b.pmin()); + draw::line(ima,cp_min, cp_max, v ); + cp_min[0] -= 1; cp_max[0] -= 1; + draw::line(ima,cp_min, cp_max, v ); + } + + template <typename I, typename B> + inline + void string_right_slash(Image<I>& ima, + const Box<B>& _b, + const mln_value(I)& v) + { + B b = exact(_b); + mln_VAR(cp_min, b.pmin()); + mln_VAR(cp_max, b.pmax()); + cp_min[1] = b.pmax()[1]; cp_max[1] = b.pmin()[1]; + draw::line(ima,cp_min, cp_max, v ); + cp_min[0] -= 1; cp_max[0] -= 1; + draw::line(ima,cp_min, cp_max, v ); + } + + template <typename I, typename B> + inline + void string_left_pipe_down(Image<I>& ima, + const Box<B>& _b, + const mln_value(I)& v) + { + B b = exact(_b); + mln_VAR(cp_min, b.pmin()); + mln_VAR(cp_max, b.pmin()); + cp_min[1] -= 1; cp_min[0] = b.pmin()[0]; + cp_min[0] -= 1; cp_max[0] = b.pmax()[0]; + cp_min[0] = cp_max[0] - b.len(0) / 2; + B cb(cp_min, cp_max); + draw::box(ima, cb, v); + } + + template <typename I, typename B> + inline + void string_minus(Image<I>& ima, + const Box<B>& _b, + const mln_value(I)& v) + { + B b = exact(_b); + mln_VAR(cp_min, b.pcenter()); + mln_VAR(cp_max, b.pcenter()); + cp_min[1] -= 1; cp_min[1] = b.pmin()[1]; + cp_min[0] -= 1; cp_max[1] = b.pmax()[1]; + B cb(cp_min, cp_max); + draw::box(ima, cb, v); + } + + template <typename I, typename B> + inline + void string_underscore(Image<I>& ima, + const Box<B>& _b, + const mln_value(I)& v) + { + B b = exact(_b); + mln_VAR(cp_min, b.pmax()); + mln_VAR(cp_max, b.pmax()); + cp_min[1] -= 1; cp_min[1] = b.pmin()[1]; + cp_min[0] -= 1; cp_max[1] = b.pmax()[1]; + B cb(cp_min, cp_max); + draw::box(ima, cb, v); + } + + template <typename I, typename B> + inline + void string_topscore(Image<I>& ima, + const Box<B>& _b, + const mln_value(I)& v) + { + B b = exact(_b); + mln_VAR(cp_min, b.pmin()); + mln_VAR(cp_max, b.pmin()); + cp_min[1] -= 1; cp_min[1] = b.pmin()[1]; + cp_min[0] -= 1; cp_max[1] = b.pmax()[1]; + B cb(cp_min, cp_max); + draw::box(ima, cb, v); + } + + template <typename I, typename B> + inline + void string_topbox(Image<I>& ima, + const Box<B>& _b, + const mln_value(I)& v) + { + string_topscore(ima, _b, v); + string_minus(ima, _b, v); + string_left_pipe_top(ima, _b, v); + string_right_pipe_top(ima, _b, v); + } + + template <typename I, typename B> + inline + void string_downbox(Image<I>& ima, + const Box<B>& _b, + const mln_value(I)& v) + { + string_underscore(ima, _b, v); + string_minus(ima, _b, v); + string_left_pipe_down(ima, _b, v); + string_right_pipe_down(ima, _b, v); + } + } // end of namespace mln::draw::internal + + + + template <typename I, typename B> + inline + void character(Image<I>& ima, + const char c, + const Box<B>& _b, + const mln_value(I)& v) + { + B b = exact(_b); + switch(c) + { + case 'A': + mln::draw::internal::string_left_pipe(ima, b, v); + mln::draw::internal::string_right_pipe(ima, b, v); + mln::draw::internal::string_topbox(ima, b, v); + break; + case 'a': + mln::draw::internal::string_left_pipe(ima, b, v); + mln::draw::internal::string_right_pipe(ima, b, v); + mln::draw::internal::string_topbox(ima, b, v); + break; + + case 'B': + mln::draw::internal::string_left_pipe(ima, b, v); + mln::draw::internal::string_downbox(ima, b, v); + break; + case 'b': + mln::draw::internal::string_left_pipe(ima, b, v); + mln::draw::internal::string_downbox(ima, b, v); + break; + case 'C': + mln::draw::internal::string_left_pipe(ima, b, v); + mln::draw::internal::string_topscore(ima, b, v); + mln::draw::internal::string_underscore(ima, b, v); + break; + case 'c': + mln::draw::internal::string_left_pipe_down(ima, b, v); + mln::draw::internal::string_minus(ima, b, v); + mln::draw::internal::string_underscore(ima, b, v); + break; + case 'D': + mln::draw::internal::string_right_pipe(ima, b, v); + mln::draw::internal::string_downbox(ima, b, v); + break; + case 'd': + mln::draw::internal::string_right_pipe(ima, b, v); + mln::draw::internal::string_downbox(ima, b, v); + break; + + case 'E': + mln::draw::internal::string_left_pipe(ima, b, v); + mln::draw::internal::string_minus(ima, b, v); + mln::draw::internal::string_underscore(ima, b, v); + mln::draw::internal::string_topscore(ima, b, v); + break; + case 'e': + mln::draw::internal::string_left_pipe(ima, b, v); + mln::draw::internal::string_minus(ima, b, v); + mln::draw::internal::string_underscore(ima, b, v); + mln::draw::internal::string_topscore(ima, b, v); + break; + + case 'F': + mln::draw::internal::string_left_pipe(ima, b, v); + mln::draw::internal::string_minus(ima, b, v); + mln::draw::internal::string_topscore(ima, b, v); + break; + case 'f': + mln::draw::internal::string_left_pipe(ima, b, v); + mln::draw::internal::string_minus(ima, b, v); + mln::draw::internal::string_topscore(ima, b, v); + break; + + + case 'G': + mln::draw::internal::string_left_pipe(ima, b, v); + mln::draw::internal::string_topscore(ima, b, v); + mln::draw::internal::string_downbox(ima, b, v); + mln::draw::internal::string_left_pipe_top(ima, b, v); + break; + case 'g': + mln::draw::internal::string_left_pipe(ima, b, v); + mln::draw::internal::string_topscore(ima, b, v); + mln::draw::internal::string_downbox(ima, b, v); + mln::draw::internal::string_left_pipe_top(ima, b, v); + break; + + case 'H': + mln::draw::internal::string_left_pipe(ima, b, v); + mln::draw::internal::string_minus(ima, b, v); + mln::draw::internal::string_right_pipe(ima, b, v); + break; + case 'h': + mln::draw::internal::string_left_pipe(ima, b, v); + mln::draw::internal::string_minus(ima, b, v); + mln::draw::internal::string_right_pipe_down(ima, b, v); + break; + + case 'I': + mln::draw::internal::string_pipe(ima, b, v); + mln::draw::internal::string_underscore(ima, b, v); + mln::draw::internal::string_topscore(ima, b, v); + break; + case 'i': + mln::draw::internal::string_pipe_down(ima, b, v); + break; + + case 'J': + mln::draw::internal::string_right_pipe(ima, b, v); + mln::draw::internal::string_down_slash(ima, b, v); + mln::draw::internal::string_topscore(ima, b, v); + break; + case 'j': + mln::draw::internal::string_right_pipe(ima, b, v); + mln::draw::internal::string_down_slash(ima, b, v); + mln::draw::internal::string_topscore(ima, b, v); + break; + + + case 'K': + mln::draw::internal::string_top_slash(ima, b, v); + mln::draw::internal::string_down_slash(ima, b, v); + mln::draw::internal::string_left_pipe(ima, b, v); + break; + case 'k': + mln::draw::internal::string_down_slash(ima, b, v); + mln::draw::internal::string_top_slash(ima, b, v); + mln::draw::internal::string_left_pipe(ima, b, v); + break; + + case 'L': + mln::draw::internal::string_left_pipe(ima, b, v); + mln::draw::internal::string_underscore(ima, b, v); + break; + case 'l': + mln::draw::internal::string_left_pipe(ima, b, v); break; + + case 'M': + mln::draw::internal::string_topscore(ima, b, v); + mln::draw::internal::string_left_pipe(ima, b, v); + mln::draw::internal::string_pipe_top(ima, b, v); + mln::draw::internal::string_right_pipe(ima, b, v); + break; + case 'm': + mln::draw::internal::string_minus(ima, b, v); + mln::draw::internal::string_left_pipe_down(ima, b, v); + mln::draw::internal::string_pipe_down(ima, b, v); + mln::draw::internal::string_right_pipe_down(ima, b, v); + break; + + + case 'N': + mln::draw::internal::string_topscore(ima, b, v); + mln::draw::internal::string_left_pipe(ima, b, v); + mln::draw::internal::string_right_pipe(ima, b, v); + break; + case 'n': + mln::draw::internal::string_minus(ima, b, v); + mln::draw::internal::string_left_pipe_down(ima, b, v); + mln::draw::internal::string_right_pipe_down(ima, b, v); + break; + + case 'O': + mln::draw::internal::string_box(ima, b, v); break; + case 'o': + mln::draw::internal::string_downbox(ima, b, v); break; + + case 'P': + mln::draw::internal::string_topbox(ima, b, v); + mln::draw::internal::string_left_pipe(ima, b, v); + break; + case 'p': + mln::draw::internal::string_topbox(ima, b, v); + mln::draw::internal::string_left_pipe(ima, b, v); + break; + + case 'Q': + mln::draw::internal::string_topbox(ima, b, v); + mln::draw::internal::string_right_pipe(ima, b, v); + break; + case 'q': + mln::draw::internal::string_topbox(ima, b, v); + mln::draw::internal::string_right_pipe(ima, b, v); + break; + + case 'R': + mln::draw::internal::string_down_slash(ima, b, v); + mln::draw::internal::string_left_pipe(ima, b, v); + mln::draw::internal::string_topbox(ima, b,v); + break; + case 'r': + mln::draw::internal::string_down_slash(ima, b, v); + mln::draw::internal::string_left_pipe(ima, b, v); + mln::draw::internal::string_topbox(ima, b,v); + break; + case 'S': + mln::draw::internal::string_underscore(ima, b, v); + mln::draw::internal::string_topscore(ima, b, v); + mln::draw::internal::string_minus(ima, b, v); + mln::draw::internal::string_right_pipe_down(ima, b, v); + mln::draw::internal::string_left_pipe_top(ima, b, v); + break; + + case 's': + mln::draw::internal::string_underscore(ima, b, v); + mln::draw::internal::string_topscore(ima, b, v); + mln::draw::internal::string_minus(ima, b, v); + mln::draw::internal::string_right_pipe_down(ima, b, v); + mln::draw::internal::string_left_pipe_top(ima, b, v); + break; + + case 'T': + mln::draw::internal::string_pipe(ima, b, v); + mln::draw::internal::string_topscore(ima, b, v); + break; + case 't': + mln::draw::internal::string_pipe(ima, b, v); + mln::draw::internal::string_topscore(ima, b, v); + break; + + case 'U': + mln::draw::internal::string_underscore(ima, b, v); + mln::draw::internal::string_left_pipe(ima, b, v); + mln::draw::internal::string_right_pipe(ima, b, v); + break; + case 'u': + mln::draw::internal::string_underscore(ima, b, v); + mln::draw::internal::string_left_pipe_down(ima, b, v); + mln::draw::internal::string_right_pipe_down(ima, b, v); + break; + case 'V': + mln::draw::internal::string_left_pipe_top(ima, b, v); + mln::draw::internal::string_down_slash(ima, b, v); + mln::draw::internal::string_right_pipe(ima, b, v); + break; + case 'v': + mln::draw::internal::string_left_pipe_down(ima, b, v); + mln::draw::internal::string_down_slash(ima, b, v); + mln::draw::internal::string_right_pipe_down(ima, b, v); + break; + + case 'W': + mln::draw::internal::string_underscore(ima, b, v); + mln::draw::internal::string_left_pipe(ima, b, v); + mln::draw::internal::string_pipe_down(ima, b, v); + mln::draw::internal::string_right_pipe(ima, b, v); + break; + case 'w': + mln::draw::internal::string_underscore(ima, b, v); + mln::draw::internal::string_left_pipe_down(ima, b, v); + mln::draw::internal::string_right_pipe_down(ima, b, v); + mln::draw::internal::string_pipe_down(ima, b, v); + break; + + case 'X': + mln::draw::internal::string_left_slash(ima, b, v); + mln::draw::internal::string_right_slash(ima, b, v); + break; + case 'x': + mln::draw::internal::string_left_slash(ima, b, v); + mln::draw::internal::string_right_slash(ima, b, v); + break; + + case 'Y': + mln::draw::internal::string_pipe_top(ima, b, v); + mln::draw::internal::string_right_slash(ima, b, v); + break; + case 'y': + mln::draw::internal::string_pipe_top(ima, b, v); + mln::draw::internal::string_right_slash(ima, b, v); + break; + + case 'Z': + mln::draw::internal::string_topscore(ima, b, v); + mln::draw::internal::string_right_slash(ima, b, v); + mln::draw::internal::string_underscore(ima, b, v); + break; + case 'z': + mln::draw::internal::string_minus(ima, b, v); + mln::draw::internal::string_underscore(ima, b, v); + mln::draw::internal::string_down_slash(ima, b, v); + break; + + + case '0': + mln::draw::internal::string_box(ima, b, v); + mln::draw::internal::string_dot(ima, b, v); + break; + case '1': + mln::draw::internal::string_pipe(ima, b, v);break; + case '2': + mln::draw::internal::string_right_pipe_top(ima, b, v); + mln::draw::internal::string_left_pipe_down(ima, b, v); + mln::draw::internal::string_minus(ima, b, v); + mln::draw::internal::string_underscore(ima, b, v); + mln::draw::internal::string_topscore(ima, b, v); + break; + case '3': + mln::draw::internal::string_right_pipe(ima, b, v); + mln::draw::internal::string_minus(ima, b, v); + mln::draw::internal::string_underscore(ima, b, v); + mln::draw::internal::string_topscore(ima, b, v); + break; + case '4': + mln::draw::internal::string_right_pipe(ima, b, v); + mln::draw::internal::string_minus(ima, b, v); + mln::draw::internal::string_left_pipe_top(ima, b, v); + break; + case '5': + mln::draw::internal::string_underscore(ima, b, v); + mln::draw::internal::string_topscore(ima, b, v); + mln::draw::internal::string_minus(ima, b, v); + mln::draw::internal::string_right_pipe_down(ima, b, v); + mln::draw::internal::string_left_pipe_top(ima, b, v); + break; + case '6': + mln::draw::internal::string_left_pipe(ima, b, v); + mln::draw::internal::string_topscore(ima, b, v); + mln::draw::internal::string_downbox(ima, b, v); + mln::draw::internal::string_left_pipe_top(ima, b, v); + break; + + case '7': + mln::draw::internal::string_right_pipe(ima, b, v); + mln::draw::internal::string_topscore(ima, b, v); + break; + case '8': + mln::draw::internal::string_box(ima, b, v); + mln::draw::internal::string_minus(ima, b, v); + break; + case '9': + mln::draw::internal::string_right_pipe(ima, b, v); + mln::draw::internal::string_topbox(ima, b, v); + break; + case '.': + mln::draw::internal::string_dot(ima, b, v);break; + case ' ': + break; + case '-': + mln::draw::internal::string_minus(ima, b, v);break; + break; + case '_': + mln::draw::internal::string_underscore(ima, b, v); + break; + case '=': + mln::draw::internal::string_minus(ima, b, v); + mln::draw::internal::string_underscore(ima, b, v); + break; + case '$': + mln::draw::internal::string_right_pipe_top(ima, b, v); + mln::draw::internal::string_left_pipe_down(ima, b, v); + mln::draw::internal::string_minus(ima, b, v); + mln::draw::internal::string_underscore(ima, b, v); + mln::draw::internal::string_topscore(ima, b, v); + mln::draw::internal::string_pipe(ima, b, v); + break; + + case '(': + mln::draw::internal::string_underscore(ima, b, v); + mln::draw::internal::string_topscore(ima, b, v); + mln::draw::internal::string_left_pipe(ima, b, v); + break; + + case ')': + mln::draw::internal::string_underscore(ima, b, v); + mln::draw::internal::string_topscore(ima, b, v); + mln::draw::internal::string_right_pipe(ima, b, v); + break; + + case '|': + mln::draw::internal::string_pipe(ima, b, v); + break; + case '/': + mln::draw::internal::string_right_slash(ima, b, v); + break; + case '\\': + mln::draw::internal::string_left_slash(ima, b, v); + break; + } + } + + + template <typename I, typename B> + inline + void string(Image<I>& ima, + const char* s, + const Box<B>& _b, + const mln_value(I)& v) + { + B b = exact(_b); + const char* ptr = &(s[0]); + int Decal = 0; + int DecalY = 0; + int DecalX = 0; + while(ptr[Decal]) + { + if(ptr[Decal] == '\n'){DecalY++; DecalX = 0; ptr++; continue;} + if(ptr[Decal] == '\t'){ DecalX += 5; ptr++; continue;} + mln_VAR(cp_min, b.pmin()); + mln_VAR(cp_max, b.pmax()); + cp_min[1] += (b.len(1) + (b.len(1) / 2)) * DecalX; + cp_max[1] += (b.len(1) + (b.len(1) / 2)) * DecalX; + + cp_min[0] += (b.len(0) + (b.len(1) / 2)) * DecalY; + cp_max[0] += (b.len(0) + (b.len(1) / 2)) * DecalY; + + B cb(cp_min, cp_max); + character(ima, ptr[Decal], cb, v); + Decal++; + DecalX++; + } + } + +# endif // ! MLN_INCLUDE_ONLY + + } // end of namespace mln::draw + +} // end of namespace mln + + +#endif // ! MLN_DRAW_BOX_HH diff --git a/scribo/sandbox/raphael/code/my/runtime/lib.hh b/scribo/sandbox/raphael/code/my/runtime/lib.hh index 951c571..41b2a43 100644 --- a/scribo/sandbox/raphael/code/my/runtime/lib.hh +++ b/scribo/sandbox/raphael/code/my/runtime/lib.hh @@ -68,10 +68,11 @@ namespace mymln run.add_function("clean.paragraphs_first_line", &(mymln::document::clean_paragraphs_first_line)); run.add_function("clean.ellipsis", &(mymln::document::clean_ellipsis)); run.add_function("clean.apostrophe_items", &(mymln::document::clean_apostrophe_items)); - - + run.add_function("clean.finalize_letters", &(mymln::document::clean_finalize_letters)); + run.add_function("clean.lines_artefacts", &(mymln::document::clean_lines_artefacts)); run.add_function("image.bad_paragraph", &(mymln::document::image_bad_paragraph)); run.add_function("image.anomalies_paragraph", &(mymln::document::image_anomalies_paragraph)); + } template<typename L, typename F, typename D> diff --git a/scribo/sandbox/raphael/code/my/util/union.hh b/scribo/sandbox/raphael/code/my/util/union.hh index 81b551d..e86e15f 100644 --- a/scribo/sandbox/raphael/code/my/util/union.hh +++ b/scribo/sandbox/raphael/code/my/util/union.hh @@ -32,6 +32,11 @@ namespace mymln {return mark_link[A] == A;} inline void invalidate_link(const Label A) { mark_link[A] = 0; } + inline void add_self_link_coerce(const Label A) + { + if(!A){return;} + mark_link[A] = A; + } inline void add_self_link(const Label A) { if(!A){return;} diff --git a/scribo/sandbox/raphael/code/test.cc b/scribo/sandbox/raphael/code/test.cc index 99d6600..83be0d9 100644 --- a/scribo/sandbox/raphael/code/test.cc +++ b/scribo/sandbox/raphael/code/test.cc @@ -34,6 +34,7 @@ #include <mln/algebra/vec.hh> #include <mln/core/image/graph_elt_neighborhood.hh> #include <mln/graph/compute.hh> +#include <my/draw/string.hh> #include <mln/draw/plot.hh> #include <my/debug/pict.hh> @@ -54,6 +55,16 @@ #include <my/debug/remote/document_remote.hh> #include <my/debug/remote/lib.hh> #endif + + + + +#ifndef NDEMO +#include <demo/demo_extract_paragraphs.hh> +#endif + + + using namespace mln; using namespace std; @@ -133,127 +144,25 @@ void Process(std::string File, std::string Dir, mymln::runtime::runtime< value:: doc.add(N, (uint16)link); } - //mymln::debug::save_label_image(ima_influ, "influ_" + File); - /* - mymln::document::clean_containers_items(doc); - mymln::document::clean_letters_items(doc); - mymln::document::clean_get_lines(doc); - - - mymln::document::clean_letters_alone(doc); - doc.cook_lines(); - mymln::document::clean_included_letters(doc); - doc.recook_lines(); - mymln::document::clean_dot_items(doc); - doc.recook_lines(); - mymln::document::clean_quote_items(doc); - doc.recook_lines(); + runtime.run(); + std::cout << "WORK ON GRAPH : " << timer.stop() << endl; + mymln::data::page<uint16,float,short> page(doc); + page.export_HTML(Dir + "/debug_" + File + ".html"); - - mymln::document::clean_between(doc); - doc.recook_lines(); - doc.compute_letter_middle_height(); - doc.compute_letter_middle_width(); - mymln::document::clean_odd_letters(doc); - doc.recook_lines(); - -*/ - - /*doc.compute_letter_middle_space(); - mymln::document::clean_lines_space(doc, Dir + "/" + "quote_graph_" + File, doc.image_mask_letters()); - doc.recook_lines();*/ - -/* - - mymln::document::separators::separators_find_allign(doc); - mymln::document::separators::separators_make_clean(doc); - doc.cook_separators(); - doc.cook_line_splitting(); - - - mymln::document::clean_line_link_item(doc); - mymln::document::clean_proximity_lines(doc); - mymln::document::clean_quote_lines(doc); - doc.recook_lines(); - - - doc.reset_implicit_separators(); - mymln::document::separators::separators_find_allign_right(doc); - mymln::document::separators::separators_make_clean_right(doc); - doc.cook_separators_right(); - doc.cook_line_splitting_exclusive(); - mymln::document::clean_line_link_item(doc); - - mymln::document::clean_proximity_lines(doc); - mymln::document::clean_quote_lines(doc); - mymln::document::clean_alone_letters_lines(doc); - doc.recook_lines(); - - doc.compute_letter_middle_height(); - doc.compute_letter_middle_width(); - mymln::document::clean_odd_letters(doc); - doc.recook_lines(); - */ - /* - mymln::document::clean_lines_space(doc, Dir + "/" + "alone_graph_" + File, doc.image_mask_letters()); - doc.recook_lines();*/ - - /* - mymln::document::remove_alone_letter(doc); - - doc.recook_lines(); - - mymln::document::clean_paragraph_items(doc); - doc.cook_paragraphs(); - - - mymln::document::clean_paragraphs_up(doc); - doc.recook_paragraphs(); - - mymln::document::clean_paragraphs_large(doc); - doc.recook_paragraphs(); - mymln::document::clean_included_paragraphs(doc); - doc.recook_paragraphs(); - - - doc.recook_lines(); - */ - - - runtime.run(); - std::cout << "WORK ON GRAPH : " << timer.stop() << endl; + + // THE DEMOS START HERE +#ifndef NDEMO + demo_extract_paragraphs(doc); +#endif NDEMO - //mymln::data::page<uint16,float,short> page(doc); - //page.export_HTML( Dir + "/" + File + ".html"); - - - /* - doc.cook_lines_iter(); - std::cout << doc.get_first_line(); - - - - - doc.compute_letter_middle_space(); - - mymln::document::recognize_minus(doc); - mymln::document::recognize_dot(doc); - for(int Line = doc.get_first_line(); Line; doc.get_next_line(Line)) - { - std::cout << doc.get_line_string(Line) << endl; - } - - //doc.debug_save_lines(Dir + "/" + "lines_" + File); - //doc.debug_save_all(Dir + "/" + "debug_" + File, ima); - */ - } + #ifndef NREMOTE // THIS IS USED TO CREATE A FILTER WITHOUT RECOMPILING mymln::document::debug::remote< value::int_u<16> ,float,short> rem; -- 1.7.2.5