Olena-patches
Threads by month
- ----- 2025 -----
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2024 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2023 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2022 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2021 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2020 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2019 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2018 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2017 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2016 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2015 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2014 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2013 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2012 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2011 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2010 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2009 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2008 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2007 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2006 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2005 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2004 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
June 2011
- 6 participants
- 92 discussions

02 Jun '11
---
.../toolchain/internal/content_in_doc_functor.hh | 5 +++++
scribo/src/Makefile.am | 3 ++-
scribo/src/contest/DAE-2011/Makefile.am | 11 ++++++++++-
scribo/src/contest/DAE-2011/content_in_hdoc_dae.cc | 4 ++--
4 files changed, 19 insertions(+), 4 deletions(-)
diff --git a/scribo/scribo/toolchain/internal/content_in_doc_functor.hh b/scribo/scribo/toolchain/internal/content_in_doc_functor.hh
index 4308056..4f2c074 100644
--- a/scribo/scribo/toolchain/internal/content_in_doc_functor.hh
+++ b/scribo/scribo/toolchain/internal/content_in_doc_functor.hh
@@ -57,7 +57,10 @@
# include <scribo/preprocessing/denoise_fg.hh>
+# ifndef SCRIBO_NOCR
# include <scribo/text/recognition.hh>
+# endif // ! SCRIBO_NOCR
+
# include <scribo/text/merging.hh>
# include <scribo/text/link_lines.hh>
# include <scribo/text/paragraphs.hh>
@@ -437,6 +440,7 @@ namespace scribo
on_progress();
+# ifndef SCRIBO_NOCR
// Text recognition
if (enable_ocr)
{
@@ -446,6 +450,7 @@ namespace scribo
on_progress();
}
+# endif // ! SCRIBO_NOCR
// // Link text lines
// on_new_progress_label("Linking text lines");
diff --git a/scribo/src/Makefile.am b/scribo/src/Makefile.am
index 6ab5d7d..26f2ade 100644
--- a/scribo/src/Makefile.am
+++ b/scribo/src/Makefile.am
@@ -28,7 +28,8 @@ SUBDIRS = \
preprocessing \
table \
text \
- toolchain
+ toolchain \
+ util
diff --git a/scribo/src/contest/DAE-2011/Makefile.am b/scribo/src/contest/DAE-2011/Makefile.am
index 09bccd7..d7d6b8d 100644
--- a/scribo/src/contest/DAE-2011/Makefile.am
+++ b/scribo/src/contest/DAE-2011/Makefile.am
@@ -24,7 +24,7 @@ noinst_PROGRAMS =
if HAVE_MAGICKXX
-noinst_PROGRAMS += content_in_hdoc_dae
+noinst_PROGRAMS += content_in_hdoc_dae content_in_doc_dae
content_in_hdoc_dae_SOURCES = content_in_hdoc_dae.cc
content_in_hdoc_dae_CPPFLAGS = $(AM_CPPFLAGS) \
@@ -34,4 +34,13 @@ content_in_hdoc_dae_LDFLAGS = $(AM_LDFLAGS) \
-lpthread \
$(MAGICKXX_LDFLAGS)
+content_in_doc_dae_SOURCES = content_in_doc_dae.cc
+content_in_doc_dae_CPPFLAGS = $(AM_CPPFLAGS) \
+ $(TESSERACT_CPPFLAGS) \
+ -I$(top_builddir)
+content_in_doc_dae_LDFLAGS = $(AM_LDFLAGS) \
+ -lpthread \
+ $(MAGICKXX_LDFLAGS)
+
+
endif HAVE_MAGICKXX
\ No newline at end of file
diff --git a/scribo/src/contest/DAE-2011/content_in_hdoc_dae.cc b/scribo/src/contest/DAE-2011/content_in_hdoc_dae.cc
index f13b8f6..5986142 100644
--- a/scribo/src/contest/DAE-2011/content_in_hdoc_dae.cc
+++ b/scribo/src/contest/DAE-2011/content_in_hdoc_dae.cc
@@ -1,4 +1,4 @@
-// Copyright (C) 2010, 2011 EPITA Research and Development Laboratory
+// Copyright (C) 2011 EPITA Research and Development Laboratory
// (LRDE)
//
// This file is part of Olena.
@@ -24,7 +24,7 @@
// exception does not however invalidate any other reasons why the
// executable file might be covered by the GNU General Public License.
-# define SCRIBO_NOCR
+#define SCRIBO_NOCR
#include <libgen.h>
#include <fstream>
--
1.5.6.5
1
0

branch fix-leak-remove_holder updated: last-svn-commit-910-g4543e11
by lazzara@lrde.epita.fr 01 Jun '11
by lazzara@lrde.epita.fr 01 Jun '11
01 Jun '11
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "Olena, a generic and efficient image processing platform".
The branch fix-leak-remove_holder has been updated
via 4543e118e2559cbd15afd3bf26531d1db6dde9d3 (commit)
from 1dec414d7cbe485e888953444197cc645d076540 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
4543e11 Specify output dir.
-----------------------------------------------------------------------
Summary of changes:
scribo/src/contest/DAE-2011/content_in_hdoc_dae.cc | 7 ++++---
1 files changed, 4 insertions(+), 3 deletions(-)
hooks/post-receive
--
Olena, a generic and efficient image processing platform
1
0
---
scribo/src/contest/DAE-2011/content_in_hdoc_dae.cc | 7 ++++---
1 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/scribo/src/contest/DAE-2011/content_in_hdoc_dae.cc b/scribo/src/contest/DAE-2011/content_in_hdoc_dae.cc
index 5309d49..f13b8f6 100644
--- a/scribo/src/contest/DAE-2011/content_in_hdoc_dae.cc
+++ b/scribo/src/contest/DAE-2011/content_in_hdoc_dae.cc
@@ -60,6 +60,7 @@
const char *args_desc[][2] =
{
{ "input.tif", "An image." },
+ { "output_dir", "Output directory." },
{0, 0}
};
@@ -70,10 +71,10 @@ int main(int argc, char* argv[])
using namespace scribo;
using namespace mln;
- if (argc != 2)
+ if (argc != 3)
return scribo::debug::usage(argv,
"Document Image Analysis in Historical Documents",
- "input.tif",
+ "input.tif output_dir",
args_desc);
trace::entering("main");
@@ -144,7 +145,7 @@ int main(int argc, char* argv[])
}
std::stringstream ss;
- ss << basename(argv[1]) << p << ".pbm";
+ ss << argv[2] << "/" << basename(argv[1]) << "." << p << ".pbm";
mln::io::pbm::save(output, ss.str());
}
--
1.5.6.5
1
0

branch fix-leak-remove_holder created: last-svn-commit-909-g1dec414
by lazzara@lrde.epita.fr 01 Jun '11
by lazzara@lrde.epita.fr 01 Jun '11
01 Jun '11
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "Olena, a generic and efficient image processing platform".
The branch fix-leak-remove_holder has been created
at 1dec414d7cbe485e888953444197cc645d076540 (commit)
- Log -----------------------------------------------------------------
1dec414 Add binaries for ICDAR contests.
b0db028 Small fixes.
a714708 Fix use of uninitialized values.
e6c5288 Fix a serious memory leak.
7142b16 BACKUP
bce6cef BACKUP
-----------------------------------------------------------------------
hooks/post-receive
--
Olena, a generic and efficient image processing platform
1
0

01 Jun '11
---
configure.ac | 3 +
.../toolchain/internal/content_in_hdoc_functor.hh | 6 +-
scribo/scribo/toolchain/text_in_doc_preprocess.hh | 1 +
scribo/src/content_in_hdoc.cc | 20 ++--
.../estim => src/contest/DAE-2011}/Makefile.am | 22 +++--
.../DAE-2011/content_in_hdoc_dae.cc} | 111 ++++++++++++--------
scribo/src/contest/Makefile.am | 7 +-
.../estim => src/contest/hdlac-2011}/Makefile.am | 22 +++--
.../hdlac-2011/content_in_hdoc_hdlac.cc} | 91 +++++++++--------
9 files changed, 172 insertions(+), 111 deletions(-)
copy scribo/{tests/estim => src/contest/DAE-2011}/Makefile.am (64%)
copy scribo/src/{non_text_components.cc => contest/DAE-2011/content_in_hdoc_dae.cc} (51%)
copy scribo/{tests/estim => src/contest/hdlac-2011}/Makefile.am (63%)
copy scribo/src/{non_text_components.cc => contest/hdlac-2011/content_in_hdoc_hdlac.cc} (56%)
diff --git a/configure.ac b/configure.ac
index fdbddd9..66643a4 100644
--- a/configure.ac
+++ b/configure.ac
@@ -323,6 +323,8 @@ AC_CONFIG_FILES([
scribo/src/binarization/Makefile
scribo/src/contest/Makefile
scribo/src/contest/hdibco-2010/Makefile
+ scribo/src/contest/hdlac-2011/Makefile
+ scribo/src/contest/DAE-2011/Makefile
scribo/src/debug/Makefile
scribo/src/filter/Makefile
scribo/src/misc/Makefile
@@ -337,6 +339,7 @@ AC_CONFIG_FILES([
scribo/src/text/Makefile
scribo/src/toolchain/Makefile
scribo/src/toolchain/nepomuk/Makefile
+ scribo/src/util/Makefile
])
AC_CONFIG_FILES([scribo/tests/data.hh])
diff --git a/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh b/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh
index adfcdb3..5e98f3e 100644
--- a/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh
+++ b/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh
@@ -272,7 +272,8 @@ namespace scribo
{
on_new_progress_label("Denoise...");
- std::cout << ">> min_area = " << min_area << std::endl;
+ if (verbose)
+ std::cout << ">> min_area = " << min_area << std::endl;
input_cleaned = preprocessing::denoise_fg(input_cleaned, c8(), min_area);
@@ -547,7 +548,8 @@ namespace scribo
if (!(closing_size % 2))
closing_size += 1;
- std::cout << ">> CLosing size = " << closing_size << std::endl;
+ if (verbose)
+ std::cout << ">> CLosing size = " << closing_size << std::endl;
component_set<L>
elements = scribo::primitive::extract::non_text_hdoc(doc, closing_size);
diff --git a/scribo/scribo/toolchain/text_in_doc_preprocess.hh b/scribo/scribo/toolchain/text_in_doc_preprocess.hh
index 20fea02..dba5601 100644
--- a/scribo/scribo/toolchain/text_in_doc_preprocess.hh
+++ b/scribo/scribo/toolchain/text_in_doc_preprocess.hh
@@ -159,6 +159,7 @@ namespace scribo
f.sauvola_K = K;
f.enable_fg_extraction = enable_fg_bg;
f.lambda = lambda;
+ f.verbose = false;
// Get results.
mln_ch_value(I,bool) output = f(input);
diff --git a/scribo/src/content_in_hdoc.cc b/scribo/src/content_in_hdoc.cc
index ed15693..737b1d3 100644
--- a/scribo/src/content_in_hdoc.cc
+++ b/scribo/src/content_in_hdoc.cc
@@ -31,7 +31,6 @@
#include <mln/core/image/image2d.hh>
-#include <mln/io/pbm/save.hh>
#include <mln/io/magick/load.hh>
#include <scribo/toolchain/content_in_hdoc.hh>
@@ -95,8 +94,6 @@ int main(int argc, char* argv[])
scribo::make::internal::debug_filename_prefix = argv[argc - 1];
}
- scribo::debug::logger().set_level(scribo::debug::None);
-
trace::entering("main");
Magick::InitializeMagick(*argv);
@@ -198,16 +195,23 @@ int main(int argc, char* argv[])
find_line_seps, find_whitespace_seps,
!language.empty(), language);
+ doc.set_filename(basename(argv[1]));
+
// Saving results
std::cout << "Saving results..." << std::endl;
-// scribo::io::xml::save(doc, "page.xml", scribo::io::xml::Page);
+ scribo::io::xml::save(doc, "page.xml", scribo::io::xml::Page);
std::cout << "End of process - " << t << std::endl;
- scribo::io::xml::save(doc, argv[2], scribo::io::xml::PageExtended);
- scribo::io::img::save(doc, "debug.png", scribo::io::img::DebugWoImage);
- scribo::io::img::save(doc, "full.png", scribo::io::img::Full);
- // scribo::io::xml::save(doc, "full.xml", scribo::io::xml::Full);
+// scribo::io::xml::save(doc, argv[2], scribo::io::xml::PageExtended);
+
+ // scribo::io::img::save(doc, "debug.png", scribo::io::img::DebugWoImage);
+ // std::cout << "Debug image saved " << t << std::endl;
+ // scribo::io::img::save(doc, "full.png", scribo::io::img::DebugWithImage);
+ // std::cout << "Full Debug image saved " << t << std::endl;
+ // scribo::io::img::save(doc, "full_hd.png", scribo::io::img::Full);
+
+// sleep(10);
trace::exiting("main");
}
diff --git a/scribo/tests/estim/Makefile.am b/scribo/src/contest/DAE-2011/Makefile.am
similarity index 64%
copy from scribo/tests/estim/Makefile.am
copy to scribo/src/contest/DAE-2011/Makefile.am
index 1c39ab6..09bccd7 100644
--- a/scribo/tests/estim/Makefile.am
+++ b/scribo/src/contest/DAE-2011/Makefile.am
@@ -17,13 +17,21 @@
## Process this file through Automake to create Makefile.in.
-include $(top_srcdir)/scribo/tests/tests.mk
+include $(top_srcdir)/scribo/scribo.mk
-check_PROGRAMS = \
- font_color \
- font_boldness
+noinst_PROGRAMS =
-font_color_SOURCES = font_color.cc
-font_boldness_SOURCES = font_boldness.cc
-TESTS = $(check_PROGRAMS)
+if HAVE_MAGICKXX
+
+noinst_PROGRAMS += content_in_hdoc_dae
+
+content_in_hdoc_dae_SOURCES = content_in_hdoc_dae.cc
+content_in_hdoc_dae_CPPFLAGS = $(AM_CPPFLAGS) \
+ $(TESSERACT_CPPFLAGS) \
+ -I$(top_builddir)
+content_in_hdoc_dae_LDFLAGS = $(AM_LDFLAGS) \
+ -lpthread \
+ $(MAGICKXX_LDFLAGS)
+
+endif HAVE_MAGICKXX
\ No newline at end of file
diff --git a/scribo/src/non_text_components.cc b/scribo/src/contest/DAE-2011/content_in_hdoc_dae.cc
similarity index 51%
copy from scribo/src/non_text_components.cc
copy to scribo/src/contest/DAE-2011/content_in_hdoc_dae.cc
index 0f4cce4..5309d49 100644
--- a/scribo/src/non_text_components.cc
+++ b/scribo/src/contest/DAE-2011/content_in_hdoc_dae.cc
@@ -1,4 +1,4 @@
-// Copyright (C) 2011 EPITA Research and Development Laboratory
+// Copyright (C) 2010, 2011 EPITA Research and Development Laboratory
// (LRDE)
//
// This file is part of Olena.
@@ -24,22 +24,24 @@
// exception does not however invalidate any other reasons why the
// executable file might be covered by the GNU General Public License.
+# define SCRIBO_NOCR
#include <libgen.h>
#include <fstream>
#include <iostream>
+#include <sstream>
#include <mln/core/image/image2d.hh>
-#include <mln/io/pbm/save.hh>
#include <mln/io/magick/load.hh>
-#include <scribo/toolchain/content_in_doc.hh>
+#include <scribo/toolchain/content_in_hdoc.hh>
#include <scribo/toolchain/text_in_doc_preprocess.hh>
#include <scribo/core/document.hh>
#include <scribo/debug/usage.hh>
+#include <scribo/debug/logger.hh>
#include <scribo/preprocessing/crop_without_localization.hh>
#include <scribo/preprocessing/crop.hh>
@@ -48,12 +50,16 @@
#include <scribo/io/img/save.hh>
+#include <mln/core/alias/neighb2d.hh>
+#include <mln/labeling/compute.hh>
+#include <mln/labeling/foreground.hh>
+#include <mln/util/timer.hh>
+
+
+
const char *args_desc[][2] =
{
- { "input.*", "An image." },
- { "non_text_comps.pbm", "Non text components mask." },
- { "enable_debug", "Enable debug image output. Set to 1 or 0." },
- { "enable_tabstops", "Enable tabstops detection. Set to 1 or 0." },
+ { "input.tif", "An image." },
{0, 0}
};
@@ -64,65 +70,84 @@ int main(int argc, char* argv[])
using namespace scribo;
using namespace mln;
- if (argc != 4 && argc != 3 && argc != 5)
+ if (argc != 2)
return scribo::debug::usage(argv,
- "Extract non text components mask/",
- "input.* non_text_comps.pbm [enable_debug] [enable_tabstops]",
+ "Document Image Analysis in Historical Documents",
+ "input.tif",
args_desc);
- std::string out_img = basename(argv[1]);
- out_img.erase(out_img.size() - 4);
-
- std::string filename_prefix = out_img + "_debug";
- scribo::debug::logger().set_filename_prefix(filename_prefix.c_str());
- if (argc > 3 && atoi(argv[3]))
- scribo::debug::logger().set_level(scribo::debug::Special);
- else
- scribo::debug::logger().set_level(scribo::debug::None);
-
trace::entering("main");
Magick::InitializeMagick(*argv);
+ mln::util::timer t;
+ t.start();
+
typedef image2d<scribo::def::lbl_type> L;
image2d<value::rgb8> input;
mln::io::magick::load(input, argv[1]);
- util::timer t;
- t.start();
-
// Preprocess document
- image2d<bool>
+ image2d<bool> input_preproc;
+ {
input_preproc = toolchain::text_in_doc_preprocess(input, false, 0.34);
-
- bool denoise = true;
+ // Cleanup components on borders
+ {
+ typedef scribo::def::lbl_type V;
+ V nlabels;
+ image2d<V> lbl = labeling::foreground(input_preproc, c8(), nlabels);
+ mln::util::array<box2d>
+ bbox = labeling::compute(accu::shape::bbox<point2d>(), lbl, nlabels);
+
+ const box2d& b = input.domain();
+ for_all_ncomponents(e, nlabels)
+ if (bbox(e).pmin().row() == b.pmin().row()
+ || bbox(e).pmax().row() == b.pmax().row()
+ || bbox(e).pmin().col() == b.pmin().col()
+ || bbox(e).pmax().col() == b.pmax().col())
+ data::fill(((input_preproc | bbox(e)).rw() | (pw::value(lbl) == pw::cst(e))).rw(), false);
+ }
+ }
+
+ bool denoise = 1;
std::string language = "";
bool find_line_seps = true;
- bool find_whitespace_seps = (argc > 4 && atoi(argv[4]));
-
- std::cout << "Running with the following options :"
- << " ocr_language = " << language
- << " | find_lines_seps = " << find_line_seps
- << " | find_whitespace_seps = " << find_whitespace_seps
- << " | debug = " << scribo::debug::logger().is_enabled()
- << std::endl;
+ bool find_whitespace_seps = true;
// Run document toolchain.
// Text
std::cout << "Analysing document..." << std::endl;
document<L>
- doc = scribo::toolchain::content_in_doc(input, input_preproc, denoise,
- find_line_seps, find_whitespace_seps,
- !language.empty(), language);
- t.stop();
- std::cout << t << std::endl;
-
- mln::io::pbm::save(data::convert(bool(), doc.elements().labeled_image()), argv[2]);
+ doc = scribo::toolchain::content_in_hdoc(input, input_preproc, denoise,
+ find_line_seps, find_whitespace_seps,
+ !language.empty(), language);
+
+ doc.set_filename(basename(argv[1]));
+
+ const paragraph_set<L>& par_set = doc.paragraphs();
+ image2d<bool> output(input.domain());
+ for_all_paragraphs(p, par_set)
+ {
+ data::fill(output, true);
+ const paragraph_info<L>& current_par = par_set(p);
+ const mln::util::array<line_id_t>& line_ids = current_par.line_ids();
+ const unsigned nelements = line_ids.nelements();
+
+ for (unsigned i = 0; i < nelements; ++i)
+ {
+ const line_id_t& line_id = line_ids(i);
+ const line_info<L>& current_line = par_set.lines()(line_id);
+
+ scribo::draw::line_components(output, par_set.lines(), current_line, false);
+ }
+
+ std::stringstream ss;
+ ss << basename(argv[1]) << p << ".pbm";
+ mln::io::pbm::save(output, ss.str());
+ }
- scribo::io::img::save(doc, out_img + "_debug_wo_image.png", scribo::io::img::DebugWoImage);
- scribo::io::img::save(doc, out_img + "_debug_with_image.png", scribo::io::img::DebugWithImage);
trace::exiting("main");
}
diff --git a/scribo/src/contest/Makefile.am b/scribo/src/contest/Makefile.am
index b1d2dfb..ade408a 100644
--- a/scribo/src/contest/Makefile.am
+++ b/scribo/src/contest/Makefile.am
@@ -1,4 +1,5 @@
-# Copyright (C) 2010 EPITA Research and Development Laboratory (LRDE).
+# Copyright (C) 2010, 2011 EPITA Research and Development Laboratory
+# (LRDE).
#
# This file is part of Olena.
#
@@ -20,4 +21,6 @@
include $(top_srcdir)/scribo/scribo.mk
SUBDIRS = \
- hdibco-2010
\ No newline at end of file
+ hdibco-2010 \
+ hdlac-2011 \
+ DAE-2011
\ No newline at end of file
diff --git a/scribo/tests/estim/Makefile.am b/scribo/src/contest/hdlac-2011/Makefile.am
similarity index 63%
copy from scribo/tests/estim/Makefile.am
copy to scribo/src/contest/hdlac-2011/Makefile.am
index 1c39ab6..f5ead52 100644
--- a/scribo/tests/estim/Makefile.am
+++ b/scribo/src/contest/hdlac-2011/Makefile.am
@@ -17,13 +17,21 @@
## Process this file through Automake to create Makefile.in.
-include $(top_srcdir)/scribo/tests/tests.mk
+include $(top_srcdir)/scribo/scribo.mk
-check_PROGRAMS = \
- font_color \
- font_boldness
+noinst_PROGRAMS =
-font_color_SOURCES = font_color.cc
-font_boldness_SOURCES = font_boldness.cc
-TESTS = $(check_PROGRAMS)
+if HAVE_MAGICKXX
+
+noinst_PROGRAMS += content_in_hdoc_hdlac
+
+content_in_hdoc_hdlac_SOURCES = content_in_hdoc_hdlac.cc
+content_in_hdoc_hdlac_CPPFLAGS = $(AM_CPPFLAGS) \
+ $(TESSERACT_CPPFLAGS) \
+ -I$(top_builddir)
+content_in_hdoc_hdlac_LDFLAGS = $(AM_LDFLAGS) \
+ -lpthread \
+ $(MAGICKXX_LDFLAGS)
+
+endif HAVE_MAGICKXX
\ No newline at end of file
diff --git a/scribo/src/non_text_components.cc b/scribo/src/contest/hdlac-2011/content_in_hdoc_hdlac.cc
similarity index 56%
copy from scribo/src/non_text_components.cc
copy to scribo/src/contest/hdlac-2011/content_in_hdoc_hdlac.cc
index 0f4cce4..045803f 100644
--- a/scribo/src/non_text_components.cc
+++ b/scribo/src/contest/hdlac-2011/content_in_hdoc_hdlac.cc
@@ -1,4 +1,4 @@
-// Copyright (C) 2011 EPITA Research and Development Laboratory
+// Copyright (C) 2010, 2011 EPITA Research and Development Laboratory
// (LRDE)
//
// This file is part of Olena.
@@ -24,6 +24,7 @@
// exception does not however invalidate any other reasons why the
// executable file might be covered by the GNU General Public License.
+# define SCRIBO_NOCR
#include <libgen.h>
#include <fstream>
@@ -31,15 +32,15 @@
#include <mln/core/image/image2d.hh>
-#include <mln/io/pbm/save.hh>
#include <mln/io/magick/load.hh>
-#include <scribo/toolchain/content_in_doc.hh>
+#include <scribo/toolchain/content_in_hdoc.hh>
#include <scribo/toolchain/text_in_doc_preprocess.hh>
#include <scribo/core/document.hh>
#include <scribo/debug/usage.hh>
+#include <scribo/debug/logger.hh>
#include <scribo/preprocessing/crop_without_localization.hh>
#include <scribo/preprocessing/crop.hh>
@@ -48,12 +49,17 @@
#include <scribo/io/img/save.hh>
+#include <mln/core/alias/neighb2d.hh>
+#include <mln/labeling/compute.hh>
+#include <mln/labeling/foreground.hh>
+#include <mln/util/timer.hh>
+
+
+
const char *args_desc[][2] =
{
- { "input.*", "An image." },
- { "non_text_comps.pbm", "Non text components mask." },
- { "enable_debug", "Enable debug image output. Set to 1 or 0." },
- { "enable_tabstops", "Enable tabstops detection. Set to 1 or 0." },
+ { "input.tif", "An image." },
+ { "out.xml", "Result of the document analysis." },
{0, 0}
};
@@ -64,65 +70,66 @@ int main(int argc, char* argv[])
using namespace scribo;
using namespace mln;
- if (argc != 4 && argc != 3 && argc != 5)
+ if (argc != 3)
return scribo::debug::usage(argv,
- "Extract non text components mask/",
- "input.* non_text_comps.pbm [enable_debug] [enable_tabstops]",
+ "Document Image Analysis in Historical Documents",
+ "input.tif out.xml",
args_desc);
- std::string out_img = basename(argv[1]);
- out_img.erase(out_img.size() - 4);
-
- std::string filename_prefix = out_img + "_debug";
- scribo::debug::logger().set_filename_prefix(filename_prefix.c_str());
- if (argc > 3 && atoi(argv[3]))
- scribo::debug::logger().set_level(scribo::debug::Special);
- else
- scribo::debug::logger().set_level(scribo::debug::None);
-
trace::entering("main");
Magick::InitializeMagick(*argv);
+ mln::util::timer t;
+ t.start();
+
typedef image2d<scribo::def::lbl_type> L;
image2d<value::rgb8> input;
mln::io::magick::load(input, argv[1]);
- util::timer t;
- t.start();
-
// Preprocess document
- image2d<bool>
+ image2d<bool> input_preproc;
+ {
input_preproc = toolchain::text_in_doc_preprocess(input, false, 0.34);
-
- bool denoise = true;
+ // Cleanup components on borders
+ {
+ typedef scribo::def::lbl_type V;
+ V nlabels;
+ image2d<V> lbl = labeling::foreground(input_preproc, c8(), nlabels);
+ mln::util::array<box2d>
+ bbox = labeling::compute(accu::shape::bbox<point2d>(), lbl, nlabels);
+
+ const box2d& b = input.domain();
+ for_all_ncomponents(e, nlabels)
+ if (bbox(e).pmin().row() == b.pmin().row()
+ || bbox(e).pmax().row() == b.pmax().row()
+ || bbox(e).pmin().col() == b.pmin().col()
+ || bbox(e).pmax().col() == b.pmax().col())
+ data::fill(((input_preproc | bbox(e)).rw() | (pw::value(lbl) == pw::cst(e))).rw(), false);
+ }
+ }
+
+ bool denoise = 1;
std::string language = "";
bool find_line_seps = true;
- bool find_whitespace_seps = (argc > 4 && atoi(argv[4]));
-
- std::cout << "Running with the following options :"
- << " ocr_language = " << language
- << " | find_lines_seps = " << find_line_seps
- << " | find_whitespace_seps = " << find_whitespace_seps
- << " | debug = " << scribo::debug::logger().is_enabled()
- << std::endl;
+ bool find_whitespace_seps = true;
// Run document toolchain.
// Text
std::cout << "Analysing document..." << std::endl;
document<L>
- doc = scribo::toolchain::content_in_doc(input, input_preproc, denoise,
- find_line_seps, find_whitespace_seps,
- !language.empty(), language);
- t.stop();
- std::cout << t << std::endl;
+ doc = scribo::toolchain::content_in_hdoc(input, input_preproc, denoise,
+ find_line_seps, find_whitespace_seps,
+ !language.empty(), language);
- mln::io::pbm::save(data::convert(bool(), doc.elements().labeled_image()), argv[2]);
+ doc.set_filename(basename(argv[1]));
- scribo::io::img::save(doc, out_img + "_debug_wo_image.png", scribo::io::img::DebugWoImage);
- scribo::io::img::save(doc, out_img + "_debug_with_image.png", scribo::io::img::DebugWithImage);
+ // Saving results
+ std::cout << "Saving results..." << std::endl;
+ scribo::io::xml::save(doc, argv[2], scribo::io::xml::Page);
+ std::cout << "End of process - " << t << std::endl;
trace::exiting("main");
}
--
1.5.6.5
1
0
---
scribo/scribo/io/xml/internal/page_xml_visitor.hh | 15 ++++++++-
scribo/scribo/toolchain/content_in_hdoc.hh | 1 +
.../toolchain/internal/content_in_hdoc_functor.hh | 32 +++++++++++--------
scribo/scribo/util/component_precise_outline.hh | 4 --
4 files changed, 32 insertions(+), 20 deletions(-)
diff --git a/scribo/scribo/io/xml/internal/page_xml_visitor.hh b/scribo/scribo/io/xml/internal/page_xml_visitor.hh
index 8373b02..dab1cce 100644
--- a/scribo/scribo/io/xml/internal/page_xml_visitor.hh
+++ b/scribo/scribo/io/xml/internal/page_xml_visitor.hh
@@ -85,6 +85,7 @@ namespace scribo
private: // Attributes
std::ofstream& output;
mutable int base_vertical_line_id_;
+ mutable int base_text_id_;
mutable L lbl_;
};
@@ -113,13 +114,23 @@ namespace scribo
// 0, so vertical and horizontal lines with the same id
// exist.
base_vertical_line_id_ = doc.hline_seps_comps().nelements();
+ base_text_id_ = 0;
// Preambule
print_PAGE_preambule(output, doc, true);
// Text
if (doc.has_text())
+ {
+
+ // FIXME: counting the number of valid lines...
+ for_all_paragraphs(p, doc.paragraphs())
+ if (doc.paragraphs()(p).is_valid())
+ ++base_text_id_;
+ --base_text_id_;
+
doc.paragraphs().accept(*this);
+ }
// Page elements (Pictures, ...)
if (doc.has_elements())
@@ -191,8 +202,8 @@ namespace scribo
case component::DropCapital:
{
- output << " <TextRegion id=\"r" << id << "\" "
- << " Type=\"Drop_Capital\">"
+ output << " <TextRegion id=\"r" << base_text_id_ + id << "\" "
+ << " type=\"drop-capital\">" // FIXME: should not be inline here!
<< std::endl;
internal::print_image_coords(output, par, " ");
diff --git a/scribo/scribo/toolchain/content_in_hdoc.hh b/scribo/scribo/toolchain/content_in_hdoc.hh
index 97233d5..67d8d2a 100644
--- a/scribo/scribo/toolchain/content_in_hdoc.hh
+++ b/scribo/scribo/toolchain/content_in_hdoc.hh
@@ -72,6 +72,7 @@ namespace scribo
f.enable_whitespace_seps = find_whitespace_seps;
f.ocr_language = language;
f.enable_ocr = enable_ocr;
+ f.verbose = false;
document<mln_ch_value(I, def::lbl_type)> doc = f(input, input_preproc);
diff --git a/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh b/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh
index 24d24a3..adfcdb3 100644
--- a/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh
+++ b/scribo/scribo/toolchain/internal/content_in_hdoc_functor.hh
@@ -70,7 +70,10 @@
# include <scribo/postprocessing/images_to_drop_capital.hh>
+# ifndef SCRIBO_NOCR
# include <scribo/text/recognition.hh>
+# endif // ! SCRIBO_NOCR
+
# include <scribo/text/merging.hh>
# include <scribo/text/link_lines.hh>
# include <scribo/text/paragraphs.hh>
@@ -505,6 +508,8 @@ namespace scribo
# endif // ! SCRIBO_NDEBUG
//===== END OF DEBUG =====
+
+# ifndef SCRIBO_NOCR
// Text recognition
if (enable_ocr)
{
@@ -514,6 +519,7 @@ namespace scribo
on_progress();
}
+# endif // ! SCRIBO_NOCR
on_new_progress_label("Extracting paragraphs");
@@ -524,11 +530,11 @@ namespace scribo
on_new_progress_label("Filtering paragraphs");
- paragraph_set<L> parset_f = filter::paragraphs_bbox_overlap(parset);
- doc.set_paragraphs(parset_f);
+ // paragraph_set<L> parset_f = filter::paragraphs_bbox_overlap(parset);
+ // doc.set_paragraphs(parset_f);
- // parset = filter::paragraphs_bbox_overlap(parset);
- // doc.set_paragraphs(parset);
+ parset = filter::paragraphs_bbox_overlap(parset);
+ doc.set_paragraphs(parset);
on_progress();
@@ -557,14 +563,14 @@ namespace scribo
on_progress();
-// TEMPORARY DEBUG
- on_new_progress_label("Saving debug data");
- doc.set_paragraphs(parset);
- scribo::io::img::save(doc, "debug_wo_filter.png", scribo::io::img::DebugWoImage);
- scribo::io::img::save(doc, "full_wo_filter.png", scribo::io::img::DebugWithImage);
- doc.set_paragraphs(parset_f);
- on_progress();
-// END OF TEMPORARY DEBUG
+// // TEMPORARY DEBUG
+// on_new_progress_label("Saving debug data");
+// doc.set_paragraphs(parset);
+// scribo::io::img::save(doc, "debug_wo_filter.png", scribo::io::img::DebugWoImage);
+// scribo::io::img::save(doc, "full_wo_filter.png", scribo::io::img::DebugWithImage);
+// doc.set_paragraphs(parset_f);
+// on_progress();
+// // END OF TEMPORARY DEBUG
on_new_progress_label("Cleanup miscellaneous false positive");
@@ -604,8 +610,6 @@ namespace scribo
on_end();
- sleep(10);
-
return doc;
}
diff --git a/scribo/scribo/util/component_precise_outline.hh b/scribo/scribo/util/component_precise_outline.hh
index 70fc995..175160c 100644
--- a/scribo/scribo/util/component_precise_outline.hh
+++ b/scribo/scribo/util/component_precise_outline.hh
@@ -386,13 +386,9 @@ namespace scribo
}
}
- std::cout << "Before filter points - " << points.nsites() << std::endl;
-
mln::p_array<P> waypoints;
internal::filter_points(points, waypoints);
- std::cout << "After filter points - " << waypoints.nsites() << std::endl;
-
trace::exiting("scribo::util::component_precise_outline");
return waypoints;
}
--
1.5.6.5
1
0

01 Jun '11
* scribo/primitive/extract/lines_h_thick_and_thin.hh,
* scribo/primitive/remove/separators.hh: Initialize border.
---
scribo/ChangeLog | 7 +++++++
.../primitive/extract/lines_h_thick_and_thin.hh | 1 +
scribo/scribo/primitive/remove/separators.hh | 2 ++
3 files changed, 10 insertions(+), 0 deletions(-)
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index 16fe8c8..59e3e84 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,5 +1,12 @@
2011-06-01 Guillaume Lazzara <z(a)lrde.epita.fr>
+ Fix use of uninitialized values.
+
+ * scribo/primitive/extract/lines_h_thick_and_thin.hh,
+ * scribo/primitive/remove/separators.hh: Initialize border.
+
+2011-06-01 Guillaume Lazzara <z(a)lrde.epita.fr>
+
Fix a serious memory leak.
* scribo/core/component_info.hh,
diff --git a/scribo/scribo/primitive/extract/lines_h_thick_and_thin.hh b/scribo/scribo/primitive/extract/lines_h_thick_and_thin.hh
index 53a5c32..45b2ad3 100644
--- a/scribo/scribo/primitive/extract/lines_h_thick_and_thin.hh
+++ b/scribo/scribo/primitive/extract/lines_h_thick_and_thin.hh
@@ -156,6 +156,7 @@ namespace scribo
extension::adjust_fill(input, length / 2, 0);
accu::count_value<bool> accu(true);
image2d<unsigned> count = accu::transform_line(accu, input, length, 1);
+ border::fill(count, 0); // FIXME: correct?
image2d<value::int_u8> output;
initialize(output, count);
diff --git a/scribo/scribo/primitive/remove/separators.hh b/scribo/scribo/primitive/remove/separators.hh
index 23d5fd1..1b75494 100644
--- a/scribo/scribo/primitive/remove/separators.hh
+++ b/scribo/scribo/primitive/remove/separators.hh
@@ -88,6 +88,8 @@ namespace scribo
mln_concrete(I) output = duplicate(input);
border::resize(separators, output.border());
+ border::fill(separators, 0); // FIXME: we should rewrite image
+ // browsing below instead.
typedef const mln_value(I)* sep_ptr_t;
sep_ptr_t
--
1.5.6.5
1
0

01 Jun '11
* scribo/primitive/extract/lines_h_thick_and_thin.hh,
* scribo/primitive/remove/separators.hh: Initialize border.
---
scribo/ChangeLog | 7 +++++++
.../primitive/extract/lines_h_thick_and_thin.hh | 1 +
scribo/scribo/primitive/remove/separators.hh | 2 ++
3 files changed, 10 insertions(+), 0 deletions(-)
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index cc7cefc..1814185 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,5 +1,12 @@
2011-06-01 Guillaume Lazzara <z(a)lrde.epita.fr>
+ Fix use of uninitialized values.
+
+ * scribo/primitive/extract/lines_h_thick_and_thin.hh,
+ * scribo/primitive/remove/separators.hh: Initialize border.
+
+2011-06-01 Guillaume Lazzara <z(a)lrde.epita.fr>
+
Fix a serious memory leak.
* scribo/core/component_info.hh,
diff --git a/scribo/scribo/primitive/extract/lines_h_thick_and_thin.hh b/scribo/scribo/primitive/extract/lines_h_thick_and_thin.hh
index 53a5c32..45b2ad3 100644
--- a/scribo/scribo/primitive/extract/lines_h_thick_and_thin.hh
+++ b/scribo/scribo/primitive/extract/lines_h_thick_and_thin.hh
@@ -156,6 +156,7 @@ namespace scribo
extension::adjust_fill(input, length / 2, 0);
accu::count_value<bool> accu(true);
image2d<unsigned> count = accu::transform_line(accu, input, length, 1);
+ border::fill(count, 0); // FIXME: correct?
image2d<value::int_u8> output;
initialize(output, count);
diff --git a/scribo/scribo/primitive/remove/separators.hh b/scribo/scribo/primitive/remove/separators.hh
index 23d5fd1..1b75494 100644
--- a/scribo/scribo/primitive/remove/separators.hh
+++ b/scribo/scribo/primitive/remove/separators.hh
@@ -88,6 +88,8 @@ namespace scribo
mln_concrete(I) output = duplicate(input);
border::resize(separators, output.border());
+ border::fill(separators, 0); // FIXME: we should rewrite image
+ // browsing below instead.
typedef const mln_value(I)* sep_ptr_t;
sep_ptr_t
--
1.5.6.5
1
0
* scribo/core/component_info.hh,
* scribo/core/component_set.hh,
* scribo/core/line_info.hh,
* scribo/draw/line_components.hh: Remove holder data. Prevented
the containers from being freed.
* scribo/io/img/internal/debug_img_visitor.hh,
* scribo/io/img/internal/full_img_visitor.hh,
* scribo/io/img/save.hh,
* scribo/io/xml/internal/page_xml_visitor.hh,
* scribo/io/xml/save.hh,
* scribo/primitive/extract/lines_h_thick_and_thin.hh,
* scribo/primitive/remove/separators.hh,
* scribo/text/merging.hh,
* scribo/text/paragraphs.hh,
* scribo/text/paragraphs_closing.hh,
* scribo/toolchain/internal/content_in_hdoc_functor.hh: Update
code in order to make holder data useless.
---
scribo/ChangeLog | 23 ++++++++++
scribo/scribo/core/component_info.hh | 19 +-------
scribo/scribo/core/component_set.hh | 6 +-
scribo/scribo/core/line_info.hh | 44 +++++++------------
scribo/scribo/draw/line_components.hh | 7 ++-
scribo/scribo/io/img/internal/debug_img_visitor.hh | 37 +++++++++------
scribo/scribo/io/img/internal/full_img_visitor.hh | 34 +++++++++------
scribo/scribo/io/img/save.hh | 6 +-
scribo/scribo/io/xml/internal/page_xml_visitor.hh | 28 ++++++------
scribo/scribo/io/xml/save.hh | 2 +-
scribo/scribo/text/merging.hh | 46 ++++++++++++-------
scribo/scribo/text/paragraphs.hh | 26 ++++++-----
scribo/scribo/text/paragraphs_closing.hh | 2 +-
13 files changed, 155 insertions(+), 125 deletions(-)
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index 5cf6f94..16fe8c8 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,3 +1,26 @@
+2011-06-01 Guillaume Lazzara <z(a)lrde.epita.fr>
+
+ Fix a serious memory leak.
+
+ * scribo/core/component_info.hh,
+ * scribo/core/component_set.hh,
+ * scribo/core/line_info.hh,
+ * scribo/draw/line_components.hh: Remove holder data. Prevented
+ the containers from being freed.
+
+ * scribo/io/img/internal/debug_img_visitor.hh,
+ * scribo/io/img/internal/full_img_visitor.hh,
+ * scribo/io/img/save.hh,
+ * scribo/io/xml/internal/page_xml_visitor.hh,
+ * scribo/io/xml/save.hh,
+ * scribo/primitive/extract/lines_h_thick_and_thin.hh,
+ * scribo/primitive/remove/separators.hh,
+ * scribo/text/merging.hh,
+ * scribo/text/paragraphs.hh,
+ * scribo/text/paragraphs_closing.hh,
+ * scribo/toolchain/internal/content_in_hdoc_functor.hh: Update
+ code in order to make holder data useless.
+
2011-06-07 Guillaume Lazzara <z(a)lrde.epita.fr>
* scribo/text/paragraphs_closing.hh: Revamp code.
diff --git a/scribo/scribo/core/component_info.hh b/scribo/scribo/core/component_info.hh
index 2fa2ad1..b42787f 100644
--- a/scribo/scribo/core/component_info.hh
+++ b/scribo/scribo/core/component_info.hh
@@ -57,8 +57,7 @@ namespace scribo
public:
component_info();
- component_info(const component_set<L>& holder,
- const component_id_t& id,
+ component_info(const component_id_t& id,
const mln::box2d& bbox,
const mln::point2d& mass_center,
unsigned card,
@@ -86,8 +85,6 @@ namespace scribo
bool is_valid() const;
- const component_set<L>& holder() const;
-
protected:
component_id_t id_;
mln::box2d bbox_;
@@ -98,8 +95,6 @@ namespace scribo
component::Tag tag_;
component::Type type_;
-
- component_set<L> holder_;
};
@@ -125,14 +120,13 @@ namespace scribo
template <typename L>
- component_info<L>::component_info(const component_set<L>& holder,
- const component_id_t& id,
+ component_info<L>::component_info(const component_id_t& id,
const mln::box2d& bbox,
const mln::point2d& mass_center,
unsigned card,
component::Type type)
: id_(id), bbox_(bbox), mass_center_(mass_center), card_(card),
- type_(type), holder_(holder)
+ type_(type)
{
if (!bbox.is_valid())
tag_ = component::Ignored;
@@ -232,13 +226,6 @@ namespace scribo
}
- template <typename L>
- const component_set<L>&
- component_info<L>::holder() const
- {
- return holder_;
- }
-
template <typename L>
std::ostream&
diff --git a/scribo/scribo/core/component_set.hh b/scribo/scribo/core/component_set.hh
index 4f4cd61..ded64ae 100644
--- a/scribo/scribo/core/component_set.hh
+++ b/scribo/scribo/core/component_set.hh
@@ -337,7 +337,7 @@ namespace scribo
data_->infos_.append(component_info<L>()); // Component 0, i.e. the background.
for_all_comp_data(i, attribs)
{
- component_info<L> info(*this, i, attribs[i].first(),
+ component_info<L> info(i, attribs[i].first(),
attribs[i].second(), attribs[i].second_accu().nsites());
data_->infos_.append(info);
}
@@ -362,7 +362,7 @@ namespace scribo
data_->infos_.append(component_info<L>()); // Component 0, i.e. the background.
for_all_comp_data(i, attribs)
{
- component_info<L> info(*this, i, attribs[i].first(),
+ component_info<L> info(i, attribs[i].first(),
attribs[i].second(), attribs[i].second_accu().nsites(),
type);
data_->infos_.append(info);
@@ -389,7 +389,7 @@ namespace scribo
data_->infos_.append(component_info<L>()); // Component 0, i.e. the background.
for_all_comp_data(i, attribs)
{
- component_info<L> info(*this, i, attribs[i].first,
+ component_info<L> info(i, attribs[i].first,
attribs[i].second.first, attribs[i].second.second,
type);
data_->infos_.append(info);
diff --git a/scribo/scribo/core/line_info.hh b/scribo/scribo/core/line_info.hh
index 9320416..2913f81 100644
--- a/scribo/scribo/core/line_info.hh
+++ b/scribo/scribo/core/line_info.hh
@@ -140,13 +140,13 @@ namespace scribo
std::string text_;
std::string html_text_;
- // Line set holding this element.
- line_set<L> holder_;
-
// DEBUG
stats< float > meanline_clusters_;
stats< float > baseline_clusters_;
+ component_set<L> components_;
+ object_links<L> links_;
+
private:
void init_();
};
@@ -282,10 +282,6 @@ namespace scribo
/// Force a new computation of statistics.
void force_stats_update();
-
- /// Returns the line set holding this element.
- const line_set<L>& holder() const;
-
/// Returns the delta used to compute the extended bbox.
int delta_of_line() const;
@@ -376,7 +372,7 @@ namespace scribo
line_info_data<L>::line_info_data(const line_set<L>& holder,
const group_info& group)
: hidden_(false), tag_(line::None), component_ids_(group.component_ids()),
- type_(line::Undefined), holder_(holder)
+ type_(line::Undefined), components_(holder.components()), links_(holder.links())
{
init_();
}
@@ -385,7 +381,7 @@ namespace scribo
line_info_data<L>::line_info_data(const line_set<L>& holder,
const mln::util::array<component_id_t>& component_ids)
: hidden_(false), tag_(line::None), component_ids_(component_ids),
- type_(line::Undefined), holder_(holder)
+ type_(line::Undefined), components_(holder.components()), links_(holder.links())
{
init_();
}
@@ -696,7 +692,7 @@ namespace scribo
for_all_elements(i, data_->component_ids_)
{
unsigned c = data_->component_ids_[i];
- data_->holder_.components_()(c).update_type(type);
+ data_->components_(c).update_type(type);
}
}
@@ -855,7 +851,7 @@ namespace scribo
data_->baseline_ + D,
bbox().pmax().col() + delta);
- data_->ebbox_.crop_wrt(data_->holder_.components().labeled_image().domain());
+ data_->ebbox_.crop_wrt(data_->components_.labeled_image().domain());
}
@@ -915,7 +911,7 @@ namespace scribo
data_->ebbox_.merge(enlarge(b, d_delta));
}
- data_->ebbox_.crop_wrt(data_->holder_.components().labeled_image().domain());
+ data_->ebbox_.crop_wrt(data_->components_.labeled_image().domain());
}
else // /other/ IS NOT a text line.
{
@@ -941,7 +937,7 @@ namespace scribo
data_->bbox_.merge(other.bbox());
// Make sure the ebbox is included in the image domain.
- data_->ebbox_.crop_wrt(data_->holder_.components().labeled_image().domain());
+ data_->ebbox_.crop_wrt(data_->components_.labeled_image().domain());
}
@@ -975,13 +971,13 @@ namespace scribo
// Only for the case of two-character words
if (card() == 2)
{
- const component_set<L>& comp_set = data_->holder_.components();
+ const component_set<L>& comp_set = data_->components_;
const unsigned c1 = data_->component_ids_(0);
const unsigned c2 = data_->component_ids_(1);
- if (data_->holder_.components()(c1).type() == component::Punctuation
- || data_->holder_.components()(c2).type() == component::Punctuation)
+ if (data_->components_(c1).type() == component::Punctuation
+ || data_->components_(c2).type() == component::Punctuation)
return false;
const mln::box2d& bb1 = comp_set(c1).bbox();
@@ -1026,7 +1022,7 @@ namespace scribo
unsigned
line_info<L>::get_first_char_height() const
{
- const component_set<L>& comp_set = data_->holder_.components();
+ const component_set<L>& comp_set = data_->components_;
const unsigned c1 = data_->components_(0);
const mln::box2d& bb1 = comp_set(c1).bbox();
@@ -1120,7 +1116,7 @@ namespace scribo
line_info<L>::force_stats_update()
{
typedef mln_site(L) P;
- const component_set<L>& comp_set = data_->holder_.components();
+ const component_set<L>& comp_set = data_->components_;
// Init.
typedef mln::value::int_u<12> median_data_t;
@@ -1222,11 +1218,11 @@ namespace scribo
// (right link) (left link)
// Space between characters.
- if (data_->holder_.links()(c) != c)
+ if (data_->links_(c) != c)
{
int
space = bb.pmin().col()
- - comp_set(data_->holder_.links()(c)).bbox().pmax().col() - 1;
+ - comp_set(data_->links_(c)).bbox().pmax().col() - 1;
// -- Ignore overlapped characters.
if (space > 0)
@@ -1330,14 +1326,6 @@ namespace scribo
template <typename L>
- const line_set<L>&
- line_info<L>::holder() const
- {
- return data_->holder_;
- }
-
-
- template <typename L>
std::ostream&
operator<<(std::ostream& ostr, const line_info<L>& info)
{
diff --git a/scribo/scribo/draw/line_components.hh b/scribo/scribo/draw/line_components.hh
index 878b2c1..12e7489 100644
--- a/scribo/scribo/draw/line_components.hh
+++ b/scribo/scribo/draw/line_components.hh
@@ -46,6 +46,7 @@ namespace scribo
template <typename L, typename I>
void
line_components(Image<I>& input_,
+ const line_set<L>& lines,
const line_info<L>& line,
const mln_value(I)& value);
@@ -56,6 +57,7 @@ namespace scribo
template <typename L, typename I>
void
line_components(Image<I>& input_,
+ const line_set<L>& lines,
const line_info<L>& line,
const mln_value(I)& value)
{
@@ -65,9 +67,8 @@ namespace scribo
mln_precondition(input.is_valid());
- const line_set<L>& holder = line.holder();
- const component_set<L>& comp_set = holder.components();
- const L& labeled_image = holder.components().labeled_image();
+ const component_set<L>& comp_set = lines.components();
+ const L& labeled_image = lines.components().labeled_image();
const mln::util::array<component_id_t>& component_ids = line.component_ids();
diff --git a/scribo/scribo/io/img/internal/debug_img_visitor.hh b/scribo/scribo/io/img/internal/debug_img_visitor.hh
index 520a743..cde16ed 100644
--- a/scribo/scribo/io/img/internal/debug_img_visitor.hh
+++ b/scribo/scribo/io/img/internal/debug_img_visitor.hh
@@ -61,7 +61,8 @@ namespace scribo
{
- class debug_img_visitor : public doc_serializer<debug_img_visitor>
+ template <typename L>
+ class debug_img_visitor : public doc_serializer<debug_img_visitor<L> >
{
public:
// Constructor
@@ -69,21 +70,18 @@ namespace scribo
unsigned output_ratio);
// Visit overloads
- template <typename L>
void visit(const document<L>& doc) const;
- template <typename L>
void visit(const component_info<L>& info) const;
- template <typename L>
void visit(const paragraph_set<L>& parset) const;
- template <typename L>
void visit(const line_info<L>& line) const;
private: // Attributes
mln::image2d<value::rgb8>& output;
unsigned output_ratio;
+ mutable L lbl_;
private: // Methods
box2d compute_bbox(const box2d& b) const;
@@ -94,9 +92,9 @@ namespace scribo
# ifndef MLN_INCLUDE_ONLY
- inline
+ template <typename L>
box2d
- debug_img_visitor::compute_bbox(const box2d& b) const
+ debug_img_visitor<L>::compute_bbox(const box2d& b) const
{
point2d
pmin = b.pmin() / output_ratio,
@@ -106,8 +104,8 @@ namespace scribo
}
- inline
- debug_img_visitor::debug_img_visitor(mln::image2d<value::rgb8>& out,
+ template <typename L>
+ debug_img_visitor<L>::debug_img_visitor(mln::image2d<value::rgb8>& out,
unsigned output_ratio)
: output(out), output_ratio(output_ratio)
{
@@ -119,7 +117,7 @@ namespace scribo
//
template <typename L>
void
- debug_img_visitor::visit(const document<L>& doc) const
+ debug_img_visitor<L>::visit(const document<L>& doc) const
{
// Text
if (doc.has_text())
@@ -129,19 +127,28 @@ namespace scribo
if (doc.has_elements())
{
for_all_comps(e, doc.elements())
+ {
+ lbl_ = doc.elements().labeled_image();
if (doc.elements()(e).is_valid())
doc.elements()(e).accept(*this);
+ }
}
// line seraparators
if (doc.has_vline_seps())
+ {
+ lbl_ = doc.vline_seps_comps().labeled_image();
for_all_comps(c, doc.vline_seps_comps())
if (doc.vline_seps_comps()(c).is_valid())
doc.vline_seps_comps()(c).accept(*this);
+ }
if (doc.has_hline_seps())
+ {
+ lbl_ = doc.hline_seps_comps().labeled_image();
for_all_comps(c, doc.hline_seps_comps())
if (doc.hline_seps_comps()(c).is_valid())
doc.hline_seps_comps()(c).accept(*this);
+ }
}
@@ -150,13 +157,13 @@ namespace scribo
//
template <typename L>
void
- debug_img_visitor::visit(const component_info<L>& info) const
+ debug_img_visitor<L>::visit(const component_info<L>& info) const
{
// Getting component outline
scribo::def::lbl_type id = (scribo::def::lbl_type)info.id().to_equiv();
- const L& lbl = info.holder().labeled_image();
+ //const L& lbl = info.holder().labeled_image();
p_array<point2d>
- par = scribo::util::component_precise_outline(lbl | info.bbox(), id);
+ par = scribo::util::component_precise_outline(lbl_ | info.bbox(), id);
switch (info.type())
{
@@ -187,7 +194,7 @@ namespace scribo
//
template <typename L>
void
- debug_img_visitor::visit(const paragraph_set<L>& parset) const
+ debug_img_visitor<L>::visit(const paragraph_set<L>& parset) const
{
const line_set<L>& lines = parset.lines();
@@ -216,7 +223,7 @@ namespace scribo
template <typename L>
void
- debug_img_visitor::visit(const line_info<L>& line) const
+ debug_img_visitor<L>::visit(const line_info<L>& line) const
{
point2d
pmin = line.bbox().pmin(),
diff --git a/scribo/scribo/io/img/internal/full_img_visitor.hh b/scribo/scribo/io/img/internal/full_img_visitor.hh
index 7b20970..f31eec1 100644
--- a/scribo/scribo/io/img/internal/full_img_visitor.hh
+++ b/scribo/scribo/io/img/internal/full_img_visitor.hh
@@ -58,27 +58,26 @@ namespace scribo
{
- class full_img_visitor : public doc_serializer<full_img_visitor>
+ template <typename L>
+ class full_img_visitor : public doc_serializer<full_img_visitor<L> >
{
public:
// Constructor
full_img_visitor(mln::image2d<value::rgb8>& out);
// Visit overloads
- template <typename L>
void visit(const document<L>& doc) const;
- template <typename L>
void visit(const component_info<L>& info) const;
- template <typename L>
void visit(const paragraph_set<L>& parset) const;
- template <typename L>
void visit(const line_info<L>& line) const;
private: // Attributes
mln::image2d<value::rgb8>& output;
+
+ mutable L lbl_;
};
@@ -86,8 +85,8 @@ namespace scribo
# ifndef MLN_INCLUDE_ONLY
- inline
- full_img_visitor::full_img_visitor(mln::image2d<value::rgb8>& out)
+ template <typename L>
+ full_img_visitor<L>::full_img_visitor(mln::image2d<value::rgb8>& out)
: output(out)
{
mln_assertion(output.is_valid());
@@ -98,7 +97,7 @@ namespace scribo
//
template <typename L>
void
- full_img_visitor::visit(const document<L>& doc) const
+ full_img_visitor<L>::visit(const document<L>& doc) const
{
// Text
if (doc.has_text())
@@ -109,20 +108,29 @@ namespace scribo
{
const component_set<L>& elts = doc.elements();
for_all_comps(e, elts)
+ {
+ lbl_ = elts.labeled_image();
if (elts(e).is_valid())
elts(e).accept(*this);
+ }
}
// line seraparators
if (doc.has_vline_seps())
+ {
+ lbl_ = doc.vline_seps_comps().labeled_image();
for_all_comps(c, doc.vline_seps_comps())
if (doc.vline_seps_comps()(c).is_valid())
doc.vline_seps_comps()(c).accept(*this);
+ }
if (doc.has_hline_seps())
+ {
+ lbl_ = doc.hline_seps_comps().labeled_image();
for_all_comps(c, doc.hline_seps_comps())
if (doc.hline_seps_comps()(c).is_valid())
doc.hline_seps_comps()(c).accept(*this);
+ }
}
@@ -131,13 +139,13 @@ namespace scribo
//
template <typename L>
void
- full_img_visitor::visit(const component_info<L>& info) const
+ full_img_visitor<L>::visit(const component_info<L>& info) const
{
// Getting component outline
scribo::def::lbl_type id = (scribo::def::lbl_type)info.id().to_equiv();
- const L& lbl = info.holder().labeled_image();
+ //const L& lbl = info.holder().labeled_image();
p_array<point2d>
- par = scribo::util::component_precise_outline(lbl | info.bbox(), id);
+ par = scribo::util::component_precise_outline(lbl_ | info.bbox(), id);
switch (info.type())
{
@@ -167,7 +175,7 @@ namespace scribo
//
template <typename L>
void
- full_img_visitor::visit(const paragraph_set<L>& parset) const
+ full_img_visitor<L>::visit(const paragraph_set<L>& parset) const
{
// const line_set<L>& lines = parset.lines();
@@ -187,7 +195,7 @@ namespace scribo
template <typename L>
void
- full_img_visitor::visit(const line_info<L>& line) const
+ full_img_visitor<L>::visit(const line_info<L>& line) const
{
// mln::draw::box(output, line.bbox(), literal::red);
diff --git a/scribo/scribo/io/img/save.hh b/scribo/scribo/io/img/save.hh
index 04f0a3c..a985d07 100644
--- a/scribo/scribo/io/img/save.hh
+++ b/scribo/scribo/io/img/save.hh
@@ -150,7 +150,7 @@ namespace scribo
{
mln_precondition(doc.is_valid());
mln::image2d<value::rgb8> output = duplicate(doc.image());
- scribo::io::img::internal::full_img_visitor f(output);
+ scribo::io::img::internal::full_img_visitor<L> f(output);
doc.accept(f);
return output;
}
@@ -164,7 +164,7 @@ namespace scribo
output(box2d(doc.image().domain().pmin() / 4,
doc.image().domain().pmax() / 4));
data::fill(output, literal::black);
- scribo::io::img::internal::debug_img_visitor f(output, 4);
+ scribo::io::img::internal::debug_img_visitor<L> f(output, 4);
doc.accept(f);
return output;
}
@@ -178,7 +178,7 @@ namespace scribo
output = mln::subsampling::antialiased(doc.image(), 4);
internal::highlight_mask highlight(0.5f);
data::transform_inplace(output, highlight);
- scribo::io::img::internal::debug_img_visitor f(output, 4);
+ scribo::io::img::internal::debug_img_visitor<L> f(output, 4);
doc.accept(f);
return output;
}
diff --git a/scribo/scribo/io/xml/internal/page_xml_visitor.hh b/scribo/scribo/io/xml/internal/page_xml_visitor.hh
index bbdd3e2..8373b02 100644
--- a/scribo/scribo/io/xml/internal/page_xml_visitor.hh
+++ b/scribo/scribo/io/xml/internal/page_xml_visitor.hh
@@ -66,28 +66,27 @@ namespace scribo
Its XSD file is located here:
http://schema.primaresearch.org/PAGE/gts/pagecontent/2010-03-19/pagecontent…
*/
- class page_xml_visitor : public doc_serializer<page_xml_visitor>
+ template <typename L>
+ class page_xml_visitor : public doc_serializer<page_xml_visitor<L> >
{
public:
// Constructor
- page_xml_visitor(std::ofstream& out);
+ page_xml_visitor<L>(std::ofstream& out);
// Visit overloads
- template <typename L>
void visit(const document<L>& doc) const;
- template <typename L>
void visit(const component_set<L>& comp_set) const;
- template <typename L>
void visit(const component_info<L>& info) const;
- template <typename L>
void visit(const paragraph_set<L>& parset) const;
private: // Attributes
std::ofstream& output;
mutable int base_vertical_line_id_;
+
+ mutable L lbl_;
};
@@ -95,8 +94,8 @@ namespace scribo
# ifndef MLN_INCLUDE_ONLY
- inline
- page_xml_visitor::page_xml_visitor(std::ofstream& out)
+ template <typename L>
+ page_xml_visitor<L>::page_xml_visitor(std::ofstream& out)
: output(out)
{
}
@@ -107,7 +106,7 @@ namespace scribo
//
template <typename L>
void
- page_xml_visitor::visit(const document<L>& doc) const
+ page_xml_visitor<L>::visit(const document<L>& doc) const
{
// Make sure there are no duplicate ids for line separators.
// Vertical and horizontal lines are indexed separately from
@@ -143,8 +142,9 @@ namespace scribo
//
template <typename L>
void
- page_xml_visitor::visit(const component_set<L>& comp_set) const
+ page_xml_visitor<L>::visit(const component_set<L>& comp_set) const
{
+ lbl_ = comp_set.labeled_image();
for_all_comps(c, comp_set)
if (comp_set(c).is_valid())
comp_set(c).accept(*this);
@@ -155,13 +155,13 @@ namespace scribo
//
template <typename L>
void
- page_xml_visitor::visit(const component_info<L>& info) const
+ page_xml_visitor<L>::visit(const component_info<L>& info) const
{
// Getting component outline
scribo::def::lbl_type id = (scribo::def::lbl_type)info.id().to_equiv();
- const L& lbl = info.holder().labeled_image();
+ //const L& lbl = info.holder().labeled_image();
p_array<point2d>
- par = scribo::util::component_precise_outline(lbl | info.bbox(), id);
+ par = scribo::util::component_precise_outline(lbl_ | info.bbox(), id);
switch (info.type())
{
@@ -223,7 +223,7 @@ namespace scribo
//
template <typename L>
void
- page_xml_visitor::visit(const paragraph_set<L>& parset) const
+ page_xml_visitor<L>::visit(const paragraph_set<L>& parset) const
{
const line_set<L>& lines = parset.lines();
diff --git a/scribo/scribo/io/xml/save.hh b/scribo/scribo/io/xml/save.hh
index 54afa79..cc6905b 100644
--- a/scribo/scribo/io/xml/save.hh
+++ b/scribo/scribo/io/xml/save.hh
@@ -96,7 +96,7 @@ namespace scribo
template <typename L>
void save_page(const document<L>& doc, std::ofstream& output)
{
- scribo::io::xml::internal::page_xml_visitor f(output);
+ scribo::io::xml::internal::page_xml_visitor<L> f(output);
doc.accept(f);
}
diff --git a/scribo/scribo/text/merging.hh b/scribo/scribo/text/merging.hh
index f1135ed..31a5ed4 100644
--- a/scribo/scribo/text/merging.hh
+++ b/scribo/scribo/text/merging.hh
@@ -224,11 +224,15 @@ namespace scribo
template <typename L>
- bool between_separators(const scribo::line_info<L>& l1,
- const scribo::line_info<L>& l2)
+ bool between_separators(const scribo::line_set<L>& lines,
+ const line_id_t& l1_,
+ const line_id_t& l2_)
{
+ const scribo::line_info<L>& l1 = lines(l1_);
+ const scribo::line_info<L>& l2 = lines(l2_);
+
// No separators found in image.
- mln_precondition(l1.holder().components().has_separators());
+ mln_precondition(lines.components().has_separators());
const box2d& l1_bbox = l1.bbox();
const box2d& l2_bbox = l2.bbox();
@@ -237,7 +241,7 @@ namespace scribo
col1 = l1_bbox.pcenter().col(),
col2 = l2_bbox.pcenter().col();
const mln_ch_value(L, bool)&
- separators = l1.holder().components().separators();
+ separators = lines.components().separators();
// Checking for separators starting from 1 / 4, 3/ 4 and the
// center of the box
@@ -290,9 +294,13 @@ namespace scribo
*/
template <typename L>
- bool lines_can_merge(scribo::line_info<L>& l1,
- const scribo::line_info<L>& l2)
+ bool lines_can_merge(scribo::line_set<L>& lines,
+ const scribo::line_id_t& l1_,
+ const scribo::line_id_t& l2_)
{
+ scribo::line_info<L>& l1 = lines(l1_);
+ scribo::line_info<L>& l2 = lines(l2_);
+
// Parameters.
const float x_ratio_max = 1.7f;
const float baseline_delta_max =
@@ -306,9 +314,9 @@ namespace scribo
const point2d& l1_pmax = l1_bbox.pmax();
const point2d& l2_pmax = l2_bbox.pmax();
- const bool l1_has_separators = l1.holder().components().has_separators();
+ const bool l1_has_separators = lines.components().has_separators();
const bool l1_l2_between_separators = (l1_has_separators) ?
- between_separators(l1, l2) : false;
+ between_separators(lines, l1_, l2_) : false;
const float l_ted_cw = l2.char_width();
const float dx = std::max(l1_pmin.col(), l2_pmin.col())
@@ -424,9 +432,13 @@ namespace scribo
*/
template <typename L>
- bool non_text_and_text_can_merge(scribo::line_info<L>& l_cur, // current
- const scribo::line_info<L>& l_ted) // touched
+ bool non_text_and_text_can_merge(scribo::line_set<L>& lines,
+ const scribo::line_id_t& l_cur_, // current
+ const scribo::line_id_t l_ted_) // touched
{
+ scribo::line_info<L>& l_cur = lines(l_cur_);
+ scribo::line_info<L>& l_ted = lines(l_ted_);
+
if (l_cur.type() == line::Text || l_ted.type() != line::Text)
return false;
// the current object is a NON-textline
@@ -434,8 +446,8 @@ namespace scribo
// Check that there is no separator in between.
- if (l_cur.holder().components().has_separators()
- && between_separators(l_cur, l_ted))
+ if (lines.components().has_separators()
+ && between_separators(lines, l_cur_, l_ted_))
return false;
const box2d& l_cur_bbox = l_cur.bbox();
@@ -744,8 +756,8 @@ namespace scribo
< 5 && std::abs(l_info.meanline() -
mc_info.meanline()) < 5))
&& dx < l_ted_cw && dy < 0
- && not (l_info.holder().components().has_separators()
- && between_separators(l_info, mc_info)))
+ && not (lines.components().has_separators()
+ && between_separators(lines, l, mc)))
l = do_union(lines, l, mc, parent);
// }
@@ -801,7 +813,7 @@ namespace scribo
// could be noise or garbage... So adding new
// criterions could fix this issue.
//
- if (!non_text_and_text_can_merge(lines(l), lines(mc)))
+ if (!non_text_and_text_can_merge(lines, l, mc))
continue;
// Avoid the case when a large title ebbox overlap
@@ -868,7 +880,7 @@ namespace scribo
if (lines(l_).type() == line::Text)
{
// l_ and lcand look like text line chunks.
- if (lines_can_merge(lines(l_), lines(lcand)))
+ if (lines_can_merge(lines, l_, lcand))
{
++count_two_lines_merge;
l_ = do_union(lines, l_, lcand, parent);
@@ -899,7 +911,7 @@ namespace scribo
{
// l_ does NOT looks like a text line chunk.
++count_comp_HITS_txtline;
- if (non_text_and_text_can_merge(lines(l_), lines(lcand)))
+ if (non_text_and_text_can_merge(lines, l_, lcand))
// a petouille merges with a text line?
{
++count_comp_HITS_txtline;
diff --git a/scribo/scribo/text/paragraphs.hh b/scribo/scribo/text/paragraphs.hh
index 8fd89be..e37f610 100644
--- a/scribo/scribo/text/paragraphs.hh
+++ b/scribo/scribo/text/paragraphs.hh
@@ -29,11 +29,15 @@ namespace scribo
template <typename L>
inline
bool
- between_horizontal_separator(const scribo::line_info<L>& l1,
- const scribo::line_info<L>& l2)
+ between_horizontal_separator(const line_set<L>& lines,
+ const line_id_t& l1_,
+ const line_id_t& l2_)
{
+ const line_info<L>& l1 = lines(l1_);
+ const line_info<L>& l2 = lines(l2_);
+
// No separators found in image.
- mln_precondition(l1.holder().components().has_separators());
+ mln_precondition(lines.components().has_separators());
const box2d& l1_bbox = l1.bbox();
const box2d& l2_bbox = l2.bbox();
@@ -42,7 +46,7 @@ namespace scribo
row1 = l1_bbox.pcenter().row(),
row2 = l2_bbox.pcenter().row();
const mln_ch_value(L, bool)&
- separators = l1.holder().components().separators();
+ separators = lines.components().separators();
unsigned row;
unsigned col_ptr;
@@ -153,18 +157,18 @@ namespace scribo
line_id_t right_nbh = right(l);
line_id_t lol_nbh = output(left_nbh);
- const line_info<L>& left_line = lines(left_nbh);
- const line_info<L>& current_line = lines(l);
- const line_info<L>& right_line = lines(right_nbh);
+ // const line_info<L>& left_line = lines(left_nbh);
+ // const line_info<L>& current_line = lines(l);
+ // const line_info<L>& right_line = lines(right_nbh);
- if (right_line.holder().components().has_separators() &&
- between_horizontal_separator(right_line, current_line))
+ if (lines.components().has_separators() &&
+ between_horizontal_separator(lines, right_nbh, l))
{
output(right_nbh) = right_nbh;
right_nbh = l;
}
- if (current_line.holder().components().has_separators() &&
- between_horizontal_separator(current_line, left_line))
+ if (lines.components().has_separators() &&
+ between_horizontal_separator(lines, l, left_nbh))
{
output(l) = l;
left_nbh = l;
diff --git a/scribo/scribo/text/paragraphs_closing.hh b/scribo/scribo/text/paragraphs_closing.hh
index efc5259..2b685df 100644
--- a/scribo/scribo/text/paragraphs_closing.hh
+++ b/scribo/scribo/text/paragraphs_closing.hh
@@ -210,7 +210,7 @@ namespace scribo
const line_id_t& line_id = line_ids(i);
const line_info<L>& current_line = lines(line_id);
- scribo::draw::line_components(debug, current_line, p);
+ scribo::draw::line_components(debug, lines, current_line, p);
// HACK DISCLAIMER : this line is drawn in order to be
// sure that every line will be reduced to a single
--
1.5.6.5
1
0
* scribo/core/component_info.hh,
* scribo/core/component_set.hh,
* scribo/core/line_info.hh,
* scribo/draw/line_components.hh: Remove holder data. Prevented
the containers from being freed.
* scribo/io/img/internal/debug_img_visitor.hh,
* scribo/io/img/internal/full_img_visitor.hh,
* scribo/io/img/save.hh,
* scribo/io/xml/internal/page_xml_visitor.hh,
* scribo/io/xml/save.hh,
* scribo/primitive/extract/lines_h_thick_and_thin.hh,
* scribo/primitive/remove/separators.hh,
* scribo/text/merging.hh,
* scribo/text/paragraphs.hh,
* scribo/text/paragraphs_closing.hh,
* scribo/toolchain/internal/content_in_hdoc_functor.hh: Update
code in order to make holder data useless.
---
scribo/ChangeLog | 23 ++++++++++
scribo/scribo/core/component_info.hh | 19 +-------
scribo/scribo/core/component_set.hh | 6 +-
scribo/scribo/core/line_info.hh | 44 +++++++------------
scribo/scribo/draw/line_components.hh | 7 ++-
scribo/scribo/io/img/internal/debug_img_visitor.hh | 37 +++++++++------
scribo/scribo/io/img/internal/full_img_visitor.hh | 34 +++++++++------
scribo/scribo/io/img/save.hh | 6 +-
scribo/scribo/io/xml/internal/page_xml_visitor.hh | 28 ++++++------
scribo/scribo/io/xml/save.hh | 2 +-
scribo/scribo/text/merging.hh | 46 ++++++++++++-------
scribo/scribo/text/paragraphs.hh | 26 ++++++-----
scribo/scribo/text/paragraphs_closing.hh | 2 +-
13 files changed, 155 insertions(+), 125 deletions(-)
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index 6f44931..cc7cefc 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,3 +1,26 @@
+2011-06-01 Guillaume Lazzara <z(a)lrde.epita.fr>
+
+ Fix a serious memory leak.
+
+ * scribo/core/component_info.hh,
+ * scribo/core/component_set.hh,
+ * scribo/core/line_info.hh,
+ * scribo/draw/line_components.hh: Remove holder data. Prevented
+ the containers from being freed.
+
+ * scribo/io/img/internal/debug_img_visitor.hh,
+ * scribo/io/img/internal/full_img_visitor.hh,
+ * scribo/io/img/save.hh,
+ * scribo/io/xml/internal/page_xml_visitor.hh,
+ * scribo/io/xml/save.hh,
+ * scribo/primitive/extract/lines_h_thick_and_thin.hh,
+ * scribo/primitive/remove/separators.hh,
+ * scribo/text/merging.hh,
+ * scribo/text/paragraphs.hh,
+ * scribo/text/paragraphs_closing.hh,
+ * scribo/toolchain/internal/content_in_hdoc_functor.hh: Update
+ code in order to make holder data useless.
+
2011-05-26 Guillaume Lazzara <z(a)lrde.epita.fr>
* scribo/util/component_precise_outline.hh: Fix namespace
diff --git a/scribo/scribo/core/component_info.hh b/scribo/scribo/core/component_info.hh
index 2fa2ad1..b42787f 100644
--- a/scribo/scribo/core/component_info.hh
+++ b/scribo/scribo/core/component_info.hh
@@ -57,8 +57,7 @@ namespace scribo
public:
component_info();
- component_info(const component_set<L>& holder,
- const component_id_t& id,
+ component_info(const component_id_t& id,
const mln::box2d& bbox,
const mln::point2d& mass_center,
unsigned card,
@@ -86,8 +85,6 @@ namespace scribo
bool is_valid() const;
- const component_set<L>& holder() const;
-
protected:
component_id_t id_;
mln::box2d bbox_;
@@ -98,8 +95,6 @@ namespace scribo
component::Tag tag_;
component::Type type_;
-
- component_set<L> holder_;
};
@@ -125,14 +120,13 @@ namespace scribo
template <typename L>
- component_info<L>::component_info(const component_set<L>& holder,
- const component_id_t& id,
+ component_info<L>::component_info(const component_id_t& id,
const mln::box2d& bbox,
const mln::point2d& mass_center,
unsigned card,
component::Type type)
: id_(id), bbox_(bbox), mass_center_(mass_center), card_(card),
- type_(type), holder_(holder)
+ type_(type)
{
if (!bbox.is_valid())
tag_ = component::Ignored;
@@ -232,13 +226,6 @@ namespace scribo
}
- template <typename L>
- const component_set<L>&
- component_info<L>::holder() const
- {
- return holder_;
- }
-
template <typename L>
std::ostream&
diff --git a/scribo/scribo/core/component_set.hh b/scribo/scribo/core/component_set.hh
index 4f4cd61..ded64ae 100644
--- a/scribo/scribo/core/component_set.hh
+++ b/scribo/scribo/core/component_set.hh
@@ -337,7 +337,7 @@ namespace scribo
data_->infos_.append(component_info<L>()); // Component 0, i.e. the background.
for_all_comp_data(i, attribs)
{
- component_info<L> info(*this, i, attribs[i].first(),
+ component_info<L> info(i, attribs[i].first(),
attribs[i].second(), attribs[i].second_accu().nsites());
data_->infos_.append(info);
}
@@ -362,7 +362,7 @@ namespace scribo
data_->infos_.append(component_info<L>()); // Component 0, i.e. the background.
for_all_comp_data(i, attribs)
{
- component_info<L> info(*this, i, attribs[i].first(),
+ component_info<L> info(i, attribs[i].first(),
attribs[i].second(), attribs[i].second_accu().nsites(),
type);
data_->infos_.append(info);
@@ -389,7 +389,7 @@ namespace scribo
data_->infos_.append(component_info<L>()); // Component 0, i.e. the background.
for_all_comp_data(i, attribs)
{
- component_info<L> info(*this, i, attribs[i].first,
+ component_info<L> info(i, attribs[i].first,
attribs[i].second.first, attribs[i].second.second,
type);
data_->infos_.append(info);
diff --git a/scribo/scribo/core/line_info.hh b/scribo/scribo/core/line_info.hh
index 9320416..2913f81 100644
--- a/scribo/scribo/core/line_info.hh
+++ b/scribo/scribo/core/line_info.hh
@@ -140,13 +140,13 @@ namespace scribo
std::string text_;
std::string html_text_;
- // Line set holding this element.
- line_set<L> holder_;
-
// DEBUG
stats< float > meanline_clusters_;
stats< float > baseline_clusters_;
+ component_set<L> components_;
+ object_links<L> links_;
+
private:
void init_();
};
@@ -282,10 +282,6 @@ namespace scribo
/// Force a new computation of statistics.
void force_stats_update();
-
- /// Returns the line set holding this element.
- const line_set<L>& holder() const;
-
/// Returns the delta used to compute the extended bbox.
int delta_of_line() const;
@@ -376,7 +372,7 @@ namespace scribo
line_info_data<L>::line_info_data(const line_set<L>& holder,
const group_info& group)
: hidden_(false), tag_(line::None), component_ids_(group.component_ids()),
- type_(line::Undefined), holder_(holder)
+ type_(line::Undefined), components_(holder.components()), links_(holder.links())
{
init_();
}
@@ -385,7 +381,7 @@ namespace scribo
line_info_data<L>::line_info_data(const line_set<L>& holder,
const mln::util::array<component_id_t>& component_ids)
: hidden_(false), tag_(line::None), component_ids_(component_ids),
- type_(line::Undefined), holder_(holder)
+ type_(line::Undefined), components_(holder.components()), links_(holder.links())
{
init_();
}
@@ -696,7 +692,7 @@ namespace scribo
for_all_elements(i, data_->component_ids_)
{
unsigned c = data_->component_ids_[i];
- data_->holder_.components_()(c).update_type(type);
+ data_->components_(c).update_type(type);
}
}
@@ -855,7 +851,7 @@ namespace scribo
data_->baseline_ + D,
bbox().pmax().col() + delta);
- data_->ebbox_.crop_wrt(data_->holder_.components().labeled_image().domain());
+ data_->ebbox_.crop_wrt(data_->components_.labeled_image().domain());
}
@@ -915,7 +911,7 @@ namespace scribo
data_->ebbox_.merge(enlarge(b, d_delta));
}
- data_->ebbox_.crop_wrt(data_->holder_.components().labeled_image().domain());
+ data_->ebbox_.crop_wrt(data_->components_.labeled_image().domain());
}
else // /other/ IS NOT a text line.
{
@@ -941,7 +937,7 @@ namespace scribo
data_->bbox_.merge(other.bbox());
// Make sure the ebbox is included in the image domain.
- data_->ebbox_.crop_wrt(data_->holder_.components().labeled_image().domain());
+ data_->ebbox_.crop_wrt(data_->components_.labeled_image().domain());
}
@@ -975,13 +971,13 @@ namespace scribo
// Only for the case of two-character words
if (card() == 2)
{
- const component_set<L>& comp_set = data_->holder_.components();
+ const component_set<L>& comp_set = data_->components_;
const unsigned c1 = data_->component_ids_(0);
const unsigned c2 = data_->component_ids_(1);
- if (data_->holder_.components()(c1).type() == component::Punctuation
- || data_->holder_.components()(c2).type() == component::Punctuation)
+ if (data_->components_(c1).type() == component::Punctuation
+ || data_->components_(c2).type() == component::Punctuation)
return false;
const mln::box2d& bb1 = comp_set(c1).bbox();
@@ -1026,7 +1022,7 @@ namespace scribo
unsigned
line_info<L>::get_first_char_height() const
{
- const component_set<L>& comp_set = data_->holder_.components();
+ const component_set<L>& comp_set = data_->components_;
const unsigned c1 = data_->components_(0);
const mln::box2d& bb1 = comp_set(c1).bbox();
@@ -1120,7 +1116,7 @@ namespace scribo
line_info<L>::force_stats_update()
{
typedef mln_site(L) P;
- const component_set<L>& comp_set = data_->holder_.components();
+ const component_set<L>& comp_set = data_->components_;
// Init.
typedef mln::value::int_u<12> median_data_t;
@@ -1222,11 +1218,11 @@ namespace scribo
// (right link) (left link)
// Space between characters.
- if (data_->holder_.links()(c) != c)
+ if (data_->links_(c) != c)
{
int
space = bb.pmin().col()
- - comp_set(data_->holder_.links()(c)).bbox().pmax().col() - 1;
+ - comp_set(data_->links_(c)).bbox().pmax().col() - 1;
// -- Ignore overlapped characters.
if (space > 0)
@@ -1330,14 +1326,6 @@ namespace scribo
template <typename L>
- const line_set<L>&
- line_info<L>::holder() const
- {
- return data_->holder_;
- }
-
-
- template <typename L>
std::ostream&
operator<<(std::ostream& ostr, const line_info<L>& info)
{
diff --git a/scribo/scribo/draw/line_components.hh b/scribo/scribo/draw/line_components.hh
index 878b2c1..12e7489 100644
--- a/scribo/scribo/draw/line_components.hh
+++ b/scribo/scribo/draw/line_components.hh
@@ -46,6 +46,7 @@ namespace scribo
template <typename L, typename I>
void
line_components(Image<I>& input_,
+ const line_set<L>& lines,
const line_info<L>& line,
const mln_value(I)& value);
@@ -56,6 +57,7 @@ namespace scribo
template <typename L, typename I>
void
line_components(Image<I>& input_,
+ const line_set<L>& lines,
const line_info<L>& line,
const mln_value(I)& value)
{
@@ -65,9 +67,8 @@ namespace scribo
mln_precondition(input.is_valid());
- const line_set<L>& holder = line.holder();
- const component_set<L>& comp_set = holder.components();
- const L& labeled_image = holder.components().labeled_image();
+ const component_set<L>& comp_set = lines.components();
+ const L& labeled_image = lines.components().labeled_image();
const mln::util::array<component_id_t>& component_ids = line.component_ids();
diff --git a/scribo/scribo/io/img/internal/debug_img_visitor.hh b/scribo/scribo/io/img/internal/debug_img_visitor.hh
index 520a743..cde16ed 100644
--- a/scribo/scribo/io/img/internal/debug_img_visitor.hh
+++ b/scribo/scribo/io/img/internal/debug_img_visitor.hh
@@ -61,7 +61,8 @@ namespace scribo
{
- class debug_img_visitor : public doc_serializer<debug_img_visitor>
+ template <typename L>
+ class debug_img_visitor : public doc_serializer<debug_img_visitor<L> >
{
public:
// Constructor
@@ -69,21 +70,18 @@ namespace scribo
unsigned output_ratio);
// Visit overloads
- template <typename L>
void visit(const document<L>& doc) const;
- template <typename L>
void visit(const component_info<L>& info) const;
- template <typename L>
void visit(const paragraph_set<L>& parset) const;
- template <typename L>
void visit(const line_info<L>& line) const;
private: // Attributes
mln::image2d<value::rgb8>& output;
unsigned output_ratio;
+ mutable L lbl_;
private: // Methods
box2d compute_bbox(const box2d& b) const;
@@ -94,9 +92,9 @@ namespace scribo
# ifndef MLN_INCLUDE_ONLY
- inline
+ template <typename L>
box2d
- debug_img_visitor::compute_bbox(const box2d& b) const
+ debug_img_visitor<L>::compute_bbox(const box2d& b) const
{
point2d
pmin = b.pmin() / output_ratio,
@@ -106,8 +104,8 @@ namespace scribo
}
- inline
- debug_img_visitor::debug_img_visitor(mln::image2d<value::rgb8>& out,
+ template <typename L>
+ debug_img_visitor<L>::debug_img_visitor(mln::image2d<value::rgb8>& out,
unsigned output_ratio)
: output(out), output_ratio(output_ratio)
{
@@ -119,7 +117,7 @@ namespace scribo
//
template <typename L>
void
- debug_img_visitor::visit(const document<L>& doc) const
+ debug_img_visitor<L>::visit(const document<L>& doc) const
{
// Text
if (doc.has_text())
@@ -129,19 +127,28 @@ namespace scribo
if (doc.has_elements())
{
for_all_comps(e, doc.elements())
+ {
+ lbl_ = doc.elements().labeled_image();
if (doc.elements()(e).is_valid())
doc.elements()(e).accept(*this);
+ }
}
// line seraparators
if (doc.has_vline_seps())
+ {
+ lbl_ = doc.vline_seps_comps().labeled_image();
for_all_comps(c, doc.vline_seps_comps())
if (doc.vline_seps_comps()(c).is_valid())
doc.vline_seps_comps()(c).accept(*this);
+ }
if (doc.has_hline_seps())
+ {
+ lbl_ = doc.hline_seps_comps().labeled_image();
for_all_comps(c, doc.hline_seps_comps())
if (doc.hline_seps_comps()(c).is_valid())
doc.hline_seps_comps()(c).accept(*this);
+ }
}
@@ -150,13 +157,13 @@ namespace scribo
//
template <typename L>
void
- debug_img_visitor::visit(const component_info<L>& info) const
+ debug_img_visitor<L>::visit(const component_info<L>& info) const
{
// Getting component outline
scribo::def::lbl_type id = (scribo::def::lbl_type)info.id().to_equiv();
- const L& lbl = info.holder().labeled_image();
+ //const L& lbl = info.holder().labeled_image();
p_array<point2d>
- par = scribo::util::component_precise_outline(lbl | info.bbox(), id);
+ par = scribo::util::component_precise_outline(lbl_ | info.bbox(), id);
switch (info.type())
{
@@ -187,7 +194,7 @@ namespace scribo
//
template <typename L>
void
- debug_img_visitor::visit(const paragraph_set<L>& parset) const
+ debug_img_visitor<L>::visit(const paragraph_set<L>& parset) const
{
const line_set<L>& lines = parset.lines();
@@ -216,7 +223,7 @@ namespace scribo
template <typename L>
void
- debug_img_visitor::visit(const line_info<L>& line) const
+ debug_img_visitor<L>::visit(const line_info<L>& line) const
{
point2d
pmin = line.bbox().pmin(),
diff --git a/scribo/scribo/io/img/internal/full_img_visitor.hh b/scribo/scribo/io/img/internal/full_img_visitor.hh
index 7b20970..f31eec1 100644
--- a/scribo/scribo/io/img/internal/full_img_visitor.hh
+++ b/scribo/scribo/io/img/internal/full_img_visitor.hh
@@ -58,27 +58,26 @@ namespace scribo
{
- class full_img_visitor : public doc_serializer<full_img_visitor>
+ template <typename L>
+ class full_img_visitor : public doc_serializer<full_img_visitor<L> >
{
public:
// Constructor
full_img_visitor(mln::image2d<value::rgb8>& out);
// Visit overloads
- template <typename L>
void visit(const document<L>& doc) const;
- template <typename L>
void visit(const component_info<L>& info) const;
- template <typename L>
void visit(const paragraph_set<L>& parset) const;
- template <typename L>
void visit(const line_info<L>& line) const;
private: // Attributes
mln::image2d<value::rgb8>& output;
+
+ mutable L lbl_;
};
@@ -86,8 +85,8 @@ namespace scribo
# ifndef MLN_INCLUDE_ONLY
- inline
- full_img_visitor::full_img_visitor(mln::image2d<value::rgb8>& out)
+ template <typename L>
+ full_img_visitor<L>::full_img_visitor(mln::image2d<value::rgb8>& out)
: output(out)
{
mln_assertion(output.is_valid());
@@ -98,7 +97,7 @@ namespace scribo
//
template <typename L>
void
- full_img_visitor::visit(const document<L>& doc) const
+ full_img_visitor<L>::visit(const document<L>& doc) const
{
// Text
if (doc.has_text())
@@ -109,20 +108,29 @@ namespace scribo
{
const component_set<L>& elts = doc.elements();
for_all_comps(e, elts)
+ {
+ lbl_ = elts.labeled_image();
if (elts(e).is_valid())
elts(e).accept(*this);
+ }
}
// line seraparators
if (doc.has_vline_seps())
+ {
+ lbl_ = doc.vline_seps_comps().labeled_image();
for_all_comps(c, doc.vline_seps_comps())
if (doc.vline_seps_comps()(c).is_valid())
doc.vline_seps_comps()(c).accept(*this);
+ }
if (doc.has_hline_seps())
+ {
+ lbl_ = doc.hline_seps_comps().labeled_image();
for_all_comps(c, doc.hline_seps_comps())
if (doc.hline_seps_comps()(c).is_valid())
doc.hline_seps_comps()(c).accept(*this);
+ }
}
@@ -131,13 +139,13 @@ namespace scribo
//
template <typename L>
void
- full_img_visitor::visit(const component_info<L>& info) const
+ full_img_visitor<L>::visit(const component_info<L>& info) const
{
// Getting component outline
scribo::def::lbl_type id = (scribo::def::lbl_type)info.id().to_equiv();
- const L& lbl = info.holder().labeled_image();
+ //const L& lbl = info.holder().labeled_image();
p_array<point2d>
- par = scribo::util::component_precise_outline(lbl | info.bbox(), id);
+ par = scribo::util::component_precise_outline(lbl_ | info.bbox(), id);
switch (info.type())
{
@@ -167,7 +175,7 @@ namespace scribo
//
template <typename L>
void
- full_img_visitor::visit(const paragraph_set<L>& parset) const
+ full_img_visitor<L>::visit(const paragraph_set<L>& parset) const
{
// const line_set<L>& lines = parset.lines();
@@ -187,7 +195,7 @@ namespace scribo
template <typename L>
void
- full_img_visitor::visit(const line_info<L>& line) const
+ full_img_visitor<L>::visit(const line_info<L>& line) const
{
// mln::draw::box(output, line.bbox(), literal::red);
diff --git a/scribo/scribo/io/img/save.hh b/scribo/scribo/io/img/save.hh
index 04f0a3c..a985d07 100644
--- a/scribo/scribo/io/img/save.hh
+++ b/scribo/scribo/io/img/save.hh
@@ -150,7 +150,7 @@ namespace scribo
{
mln_precondition(doc.is_valid());
mln::image2d<value::rgb8> output = duplicate(doc.image());
- scribo::io::img::internal::full_img_visitor f(output);
+ scribo::io::img::internal::full_img_visitor<L> f(output);
doc.accept(f);
return output;
}
@@ -164,7 +164,7 @@ namespace scribo
output(box2d(doc.image().domain().pmin() / 4,
doc.image().domain().pmax() / 4));
data::fill(output, literal::black);
- scribo::io::img::internal::debug_img_visitor f(output, 4);
+ scribo::io::img::internal::debug_img_visitor<L> f(output, 4);
doc.accept(f);
return output;
}
@@ -178,7 +178,7 @@ namespace scribo
output = mln::subsampling::antialiased(doc.image(), 4);
internal::highlight_mask highlight(0.5f);
data::transform_inplace(output, highlight);
- scribo::io::img::internal::debug_img_visitor f(output, 4);
+ scribo::io::img::internal::debug_img_visitor<L> f(output, 4);
doc.accept(f);
return output;
}
diff --git a/scribo/scribo/io/xml/internal/page_xml_visitor.hh b/scribo/scribo/io/xml/internal/page_xml_visitor.hh
index bbdd3e2..8373b02 100644
--- a/scribo/scribo/io/xml/internal/page_xml_visitor.hh
+++ b/scribo/scribo/io/xml/internal/page_xml_visitor.hh
@@ -66,28 +66,27 @@ namespace scribo
Its XSD file is located here:
http://schema.primaresearch.org/PAGE/gts/pagecontent/2010-03-19/pagecontent…
*/
- class page_xml_visitor : public doc_serializer<page_xml_visitor>
+ template <typename L>
+ class page_xml_visitor : public doc_serializer<page_xml_visitor<L> >
{
public:
// Constructor
- page_xml_visitor(std::ofstream& out);
+ page_xml_visitor<L>(std::ofstream& out);
// Visit overloads
- template <typename L>
void visit(const document<L>& doc) const;
- template <typename L>
void visit(const component_set<L>& comp_set) const;
- template <typename L>
void visit(const component_info<L>& info) const;
- template <typename L>
void visit(const paragraph_set<L>& parset) const;
private: // Attributes
std::ofstream& output;
mutable int base_vertical_line_id_;
+
+ mutable L lbl_;
};
@@ -95,8 +94,8 @@ namespace scribo
# ifndef MLN_INCLUDE_ONLY
- inline
- page_xml_visitor::page_xml_visitor(std::ofstream& out)
+ template <typename L>
+ page_xml_visitor<L>::page_xml_visitor(std::ofstream& out)
: output(out)
{
}
@@ -107,7 +106,7 @@ namespace scribo
//
template <typename L>
void
- page_xml_visitor::visit(const document<L>& doc) const
+ page_xml_visitor<L>::visit(const document<L>& doc) const
{
// Make sure there are no duplicate ids for line separators.
// Vertical and horizontal lines are indexed separately from
@@ -143,8 +142,9 @@ namespace scribo
//
template <typename L>
void
- page_xml_visitor::visit(const component_set<L>& comp_set) const
+ page_xml_visitor<L>::visit(const component_set<L>& comp_set) const
{
+ lbl_ = comp_set.labeled_image();
for_all_comps(c, comp_set)
if (comp_set(c).is_valid())
comp_set(c).accept(*this);
@@ -155,13 +155,13 @@ namespace scribo
//
template <typename L>
void
- page_xml_visitor::visit(const component_info<L>& info) const
+ page_xml_visitor<L>::visit(const component_info<L>& info) const
{
// Getting component outline
scribo::def::lbl_type id = (scribo::def::lbl_type)info.id().to_equiv();
- const L& lbl = info.holder().labeled_image();
+ //const L& lbl = info.holder().labeled_image();
p_array<point2d>
- par = scribo::util::component_precise_outline(lbl | info.bbox(), id);
+ par = scribo::util::component_precise_outline(lbl_ | info.bbox(), id);
switch (info.type())
{
@@ -223,7 +223,7 @@ namespace scribo
//
template <typename L>
void
- page_xml_visitor::visit(const paragraph_set<L>& parset) const
+ page_xml_visitor<L>::visit(const paragraph_set<L>& parset) const
{
const line_set<L>& lines = parset.lines();
diff --git a/scribo/scribo/io/xml/save.hh b/scribo/scribo/io/xml/save.hh
index 54afa79..cc6905b 100644
--- a/scribo/scribo/io/xml/save.hh
+++ b/scribo/scribo/io/xml/save.hh
@@ -96,7 +96,7 @@ namespace scribo
template <typename L>
void save_page(const document<L>& doc, std::ofstream& output)
{
- scribo::io::xml::internal::page_xml_visitor f(output);
+ scribo::io::xml::internal::page_xml_visitor<L> f(output);
doc.accept(f);
}
diff --git a/scribo/scribo/text/merging.hh b/scribo/scribo/text/merging.hh
index f1135ed..31a5ed4 100644
--- a/scribo/scribo/text/merging.hh
+++ b/scribo/scribo/text/merging.hh
@@ -224,11 +224,15 @@ namespace scribo
template <typename L>
- bool between_separators(const scribo::line_info<L>& l1,
- const scribo::line_info<L>& l2)
+ bool between_separators(const scribo::line_set<L>& lines,
+ const line_id_t& l1_,
+ const line_id_t& l2_)
{
+ const scribo::line_info<L>& l1 = lines(l1_);
+ const scribo::line_info<L>& l2 = lines(l2_);
+
// No separators found in image.
- mln_precondition(l1.holder().components().has_separators());
+ mln_precondition(lines.components().has_separators());
const box2d& l1_bbox = l1.bbox();
const box2d& l2_bbox = l2.bbox();
@@ -237,7 +241,7 @@ namespace scribo
col1 = l1_bbox.pcenter().col(),
col2 = l2_bbox.pcenter().col();
const mln_ch_value(L, bool)&
- separators = l1.holder().components().separators();
+ separators = lines.components().separators();
// Checking for separators starting from 1 / 4, 3/ 4 and the
// center of the box
@@ -290,9 +294,13 @@ namespace scribo
*/
template <typename L>
- bool lines_can_merge(scribo::line_info<L>& l1,
- const scribo::line_info<L>& l2)
+ bool lines_can_merge(scribo::line_set<L>& lines,
+ const scribo::line_id_t& l1_,
+ const scribo::line_id_t& l2_)
{
+ scribo::line_info<L>& l1 = lines(l1_);
+ scribo::line_info<L>& l2 = lines(l2_);
+
// Parameters.
const float x_ratio_max = 1.7f;
const float baseline_delta_max =
@@ -306,9 +314,9 @@ namespace scribo
const point2d& l1_pmax = l1_bbox.pmax();
const point2d& l2_pmax = l2_bbox.pmax();
- const bool l1_has_separators = l1.holder().components().has_separators();
+ const bool l1_has_separators = lines.components().has_separators();
const bool l1_l2_between_separators = (l1_has_separators) ?
- between_separators(l1, l2) : false;
+ between_separators(lines, l1_, l2_) : false;
const float l_ted_cw = l2.char_width();
const float dx = std::max(l1_pmin.col(), l2_pmin.col())
@@ -424,9 +432,13 @@ namespace scribo
*/
template <typename L>
- bool non_text_and_text_can_merge(scribo::line_info<L>& l_cur, // current
- const scribo::line_info<L>& l_ted) // touched
+ bool non_text_and_text_can_merge(scribo::line_set<L>& lines,
+ const scribo::line_id_t& l_cur_, // current
+ const scribo::line_id_t l_ted_) // touched
{
+ scribo::line_info<L>& l_cur = lines(l_cur_);
+ scribo::line_info<L>& l_ted = lines(l_ted_);
+
if (l_cur.type() == line::Text || l_ted.type() != line::Text)
return false;
// the current object is a NON-textline
@@ -434,8 +446,8 @@ namespace scribo
// Check that there is no separator in between.
- if (l_cur.holder().components().has_separators()
- && between_separators(l_cur, l_ted))
+ if (lines.components().has_separators()
+ && between_separators(lines, l_cur_, l_ted_))
return false;
const box2d& l_cur_bbox = l_cur.bbox();
@@ -744,8 +756,8 @@ namespace scribo
< 5 && std::abs(l_info.meanline() -
mc_info.meanline()) < 5))
&& dx < l_ted_cw && dy < 0
- && not (l_info.holder().components().has_separators()
- && between_separators(l_info, mc_info)))
+ && not (lines.components().has_separators()
+ && between_separators(lines, l, mc)))
l = do_union(lines, l, mc, parent);
// }
@@ -801,7 +813,7 @@ namespace scribo
// could be noise or garbage... So adding new
// criterions could fix this issue.
//
- if (!non_text_and_text_can_merge(lines(l), lines(mc)))
+ if (!non_text_and_text_can_merge(lines, l, mc))
continue;
// Avoid the case when a large title ebbox overlap
@@ -868,7 +880,7 @@ namespace scribo
if (lines(l_).type() == line::Text)
{
// l_ and lcand look like text line chunks.
- if (lines_can_merge(lines(l_), lines(lcand)))
+ if (lines_can_merge(lines, l_, lcand))
{
++count_two_lines_merge;
l_ = do_union(lines, l_, lcand, parent);
@@ -899,7 +911,7 @@ namespace scribo
{
// l_ does NOT looks like a text line chunk.
++count_comp_HITS_txtline;
- if (non_text_and_text_can_merge(lines(l_), lines(lcand)))
+ if (non_text_and_text_can_merge(lines, l_, lcand))
// a petouille merges with a text line?
{
++count_comp_HITS_txtline;
diff --git a/scribo/scribo/text/paragraphs.hh b/scribo/scribo/text/paragraphs.hh
index 8fd89be..e37f610 100644
--- a/scribo/scribo/text/paragraphs.hh
+++ b/scribo/scribo/text/paragraphs.hh
@@ -29,11 +29,15 @@ namespace scribo
template <typename L>
inline
bool
- between_horizontal_separator(const scribo::line_info<L>& l1,
- const scribo::line_info<L>& l2)
+ between_horizontal_separator(const line_set<L>& lines,
+ const line_id_t& l1_,
+ const line_id_t& l2_)
{
+ const line_info<L>& l1 = lines(l1_);
+ const line_info<L>& l2 = lines(l2_);
+
// No separators found in image.
- mln_precondition(l1.holder().components().has_separators());
+ mln_precondition(lines.components().has_separators());
const box2d& l1_bbox = l1.bbox();
const box2d& l2_bbox = l2.bbox();
@@ -42,7 +46,7 @@ namespace scribo
row1 = l1_bbox.pcenter().row(),
row2 = l2_bbox.pcenter().row();
const mln_ch_value(L, bool)&
- separators = l1.holder().components().separators();
+ separators = lines.components().separators();
unsigned row;
unsigned col_ptr;
@@ -153,18 +157,18 @@ namespace scribo
line_id_t right_nbh = right(l);
line_id_t lol_nbh = output(left_nbh);
- const line_info<L>& left_line = lines(left_nbh);
- const line_info<L>& current_line = lines(l);
- const line_info<L>& right_line = lines(right_nbh);
+ // const line_info<L>& left_line = lines(left_nbh);
+ // const line_info<L>& current_line = lines(l);
+ // const line_info<L>& right_line = lines(right_nbh);
- if (right_line.holder().components().has_separators() &&
- between_horizontal_separator(right_line, current_line))
+ if (lines.components().has_separators() &&
+ between_horizontal_separator(lines, right_nbh, l))
{
output(right_nbh) = right_nbh;
right_nbh = l;
}
- if (current_line.holder().components().has_separators() &&
- between_horizontal_separator(current_line, left_line))
+ if (lines.components().has_separators() &&
+ between_horizontal_separator(lines, l, left_nbh))
{
output(l) = l;
left_nbh = l;
diff --git a/scribo/scribo/text/paragraphs_closing.hh b/scribo/scribo/text/paragraphs_closing.hh
index efc5259..2b685df 100644
--- a/scribo/scribo/text/paragraphs_closing.hh
+++ b/scribo/scribo/text/paragraphs_closing.hh
@@ -210,7 +210,7 @@ namespace scribo
const line_id_t& line_id = line_ids(i);
const line_info<L>& current_line = lines(line_id);
- scribo::draw::line_components(debug, current_line, p);
+ scribo::draw::line_components(debug, lines, current_line, p);
// HACK DISCLAIMER : this line is drawn in order to be
// sure that every line will be reduced to a single
--
1.5.6.5
1
0