* scribo/text/recognition.hh: Use the correct code depending on
Tesseract version.
* scribo/toolchain/nepomuk/text_extraction.hh: Add few comments
about how to handle Tesseract versions.
* src/Makefile.am,
* src/text/Makefile.am,
* src/toolchain/nepomuk/Makefile.am,
* tests/toolchain/nepomuk/Makefile.am: Add path to config.h.
* src/pbm_text_in_doc.cc,
* src/text/pbm_lines_recognition.cc,
* src/text/pbm_recognition.cc,
* src/text_recognition_in_picture.cc,
* src/toolchain/nepomuk/text_extraction.cc,
* tests/toolchain/nepomuk/text_extraction.cc: Include config.h
---
scribo/ChangeLog | 22 +++++++++
scribo/scribo/text/recognition.hh | 50 ++++++++++++++++++-
scribo/scribo/toolchain/nepomuk/text_extraction.hh | 9 +++-
scribo/src/Makefile.am | 6 ++-
scribo/src/pbm_text_in_doc.cc | 5 ++-
scribo/src/text/Makefile.am | 6 ++-
scribo/src/text/pbm_lines_recognition.cc | 4 ++
scribo/src/text/pbm_recognition.cc | 4 ++
scribo/src/text_recognition_in_picture.cc | 4 ++
scribo/src/toolchain/nepomuk/Makefile.am | 3 +-
scribo/src/toolchain/nepomuk/text_extraction.cc | 4 ++
scribo/tests/toolchain/nepomuk/Makefile.am | 3 +-
scribo/tests/toolchain/nepomuk/text_extraction.cc | 4 ++
13 files changed, 112 insertions(+), 12 deletions(-)
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index 82c02ca..cb05c44 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,5 +1,27 @@
2010-10-25 Guillaume Lazzara <z(a)lrde.epita.fr>
+ Support for Tesseract 2.x and 3.x in Scribo.
+
+ * scribo/text/recognition.hh: Use the correct code depending on
+ Tesseract version.
+
+ * scribo/toolchain/nepomuk/text_extraction.hh: Add few comments
+ about how to handle Tesseract versions.
+
+ * src/Makefile.am,
+ * src/text/Makefile.am,
+ * src/toolchain/nepomuk/Makefile.am,
+ * tests/toolchain/nepomuk/Makefile.am: Add path to config.h.
+
+ * src/pbm_text_in_doc.cc,
+ * src/text/pbm_lines_recognition.cc,
+ * src/text/pbm_recognition.cc,
+ * src/text_recognition_in_picture.cc,
+ * src/toolchain/nepomuk/text_extraction.cc,
+ * tests/toolchain/nepomuk/text_extraction.cc: Include config.h
+
+2010-10-25 Guillaume Lazzara <z(a)lrde.epita.fr>
+
Cleanup files.
* scribo/debug/usage.hh,
diff --git a/scribo/scribo/text/recognition.hh b/scribo/scribo/text/recognition.hh
index 17cbaa2..07f585d 100644
--- a/scribo/scribo/text/recognition.hh
+++ b/scribo/scribo/text/recognition.hh
@@ -61,6 +61,11 @@
# include <tesseract/baseapi.h>
+# if !defined HAVE_TESSERACT_2 && !defined HAVE_TESSERACT_3
+# define HAVE_TESSERACT_2
+# endif
+
+
namespace scribo
{
@@ -103,8 +108,19 @@ namespace scribo
{
trace::entering("scribo::text::recognition");
+
// Initialize Tesseract.
+# ifdef HAVE_TESSERACT_2
TessBaseAPI::InitWithLanguage(NULL, NULL, language, NULL, false, 0, NULL);
+# else // HAVE_TESSERACT_3
+ tesseract::TessBaseAPI tess;
+ if (tess.Init(NULL, language, NULL, 0, false) == -1)
+ {
+ std::cout << "Error: cannot initialize tesseract!" << std::endl;
+ abort();
+ }
+ tess.SetPageSegMode(tesseract::PSM_SINGLE_LINE);
+# endif // HAVE_TESSERACT_2
typedef mln_ch_value(L,bool) I;
int vals[] = { 0, 9, 0, 9, 0,
@@ -155,8 +171,8 @@ namespace scribo
data::fill(line_image, false);
data::paste_without_localization(text_ima, line_image);
-
// Recognize characters.
+# ifdef HAVE_TESSERACT_2
char* s = TessBaseAPI::TesseractRect(
(unsigned char*) line_image.buffer(),
sizeof (bool), // Pixel size.
@@ -165,7 +181,16 @@ namespace scribo
0, // Top
line_image.ncols(), // n cols
line_image.nrows()); // n rows
-
+# else // HAVE_TESSERACT_3
+ char* s = tess.TesseractRect(
+ (unsigned char*) line_image.buffer(),
+ sizeof (bool), // Pixel size.
+ line_image.ncols() * sizeof (bool), // Row_offset
+ 0, // Left
+ 0, // Top
+ line_image.ncols(), // n cols
+ line_image.nrows()); // n rows
+# endif // ! HAVE_TESSERACT_2
if (s != 0)
{
@@ -194,7 +219,16 @@ namespace scribo
mln_precondition(line.is_valid());
// Initialize Tesseract.
+# ifdef HAVE_TESSERACT_2
TessBaseAPI::InitWithLanguage(NULL, NULL, language, NULL, false, 0, NULL);
+# else // HAVE_TESSERACT_3
+ tesseract::TessBaseAPI tess;
+ if (tess.Init(NULL, language, NULL, 0, false) == -1)
+ {
+ std::cout << "Error: cannot initialize tesseract!" << std::endl;
+ abort();
+ }
+# endif // ! HAVE_TESSERACT_2
std::ofstream file;
if (!output_file.empty())
@@ -213,6 +247,7 @@ namespace scribo
border::resize(text_ima, 0);
// Recognize characters.
+# ifdef HAVE_TESSERACT_2
char* s = TessBaseAPI::TesseractRect(
(unsigned char*) text_ima.buffer(),
sizeof (bool), // Pixel size.
@@ -221,7 +256,16 @@ namespace scribo
0, // Top
text_ima.ncols(), // n cols
text_ima.nrows()); // n rows
-
+# else // HAVE_TESSERACT_3
+ char* s = tess.TesseractRect(
+ (unsigned char*) text_ima.buffer(),
+ sizeof (bool), // Pixel size.
+ text_ima.ncols() * sizeof (bool), // Row_offset
+ 0, // Left
+ 0, // Top
+ text_ima.ncols(), // n cols
+ text_ima.nrows()); // n rows
+# endif // ! HAVE_TESSERACT_2
if (s != 0)
{
diff --git a/scribo/scribo/toolchain/nepomuk/text_extraction.hh
b/scribo/scribo/toolchain/nepomuk/text_extraction.hh
index e16cd56..effb13f 100644
--- a/scribo/scribo/toolchain/nepomuk/text_extraction.hh
+++ b/scribo/scribo/toolchain/nepomuk/text_extraction.hh
@@ -64,12 +64,17 @@ namespace scribo
This is a convenient routine to be used in Nepomuk.
-
-
\param[in] ima A document image. The
\return A set of recognized words.
+
+ Don't forget to define NDEBUG for compilation to disable debug
+ checks.
+
+ Depending on your version of Tesseract (OCR) you may define
+ HAVE_TESSERACT_2 or HAVE_TESSERACT_3 .
+
*/
QSet<QString>
text_extraction(const QImage& input);
diff --git a/scribo/src/Makefile.am b/scribo/src/Makefile.am
index d3f686b..c99a791 100644
--- a/scribo/src/Makefile.am
+++ b/scribo/src/Makefile.am
@@ -42,7 +42,8 @@ if HAVE_TESSERACT
pbm_text_in_doc_SOURCES = pbm_text_in_doc.cc
pbm_text_in_doc_CPPFLAGS = $(AM_CPPFLAGS) \
$(TESSERACT_CPPFLAGS) \
- $(TIFF_CPPFLAGS)
+ $(TIFF_CPPFLAGS) \
+ -I$(top_builddir)
pbm_text_in_doc_LDFLAGS = $(AM_LDFLAGS) \
$(TESSERACT_LDFLAGS) \
$(TIFF_LDFLAGS) \
@@ -77,7 +78,8 @@ if HAVE_TESSERACT
text_recognition_in_picture_CPPFLAGS = $(AM_CPPFLAGS) \
$(TESSERACT_CPPFLAGS) \
$(TIFF_CPPFLAGS) \
- $(MAGICKXX_CPPFLAGS)
+ $(MAGICKXX_CPPFLAGS) \
+ -I$(top_builddir)
text_recognition_in_picture_LDFLAGS = $(AM_LDFLAGS) \
-lpthread \
$(TESSERACT_LDFLAGS) \
diff --git a/scribo/src/pbm_text_in_doc.cc b/scribo/src/pbm_text_in_doc.cc
index fa8b5ae..23ed9e7 100644
--- a/scribo/src/pbm_text_in_doc.cc
+++ b/scribo/src/pbm_text_in_doc.cc
@@ -24,6 +24,9 @@
// exception does not however invalidate any other reasons why the
// executable file might be covered by the GNU General Public License.
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
#include <libgen.h>
#include <fstream>
@@ -119,7 +122,7 @@ int main(int argc, char* argv[])
typedef image2d<scribo::def::lbl_type> L;
line_set<L>
lines = scribo::toolchain::text_in_doc(input, denoise, debug);
-
+
// Saving results
scribo::io::xml::save(argv[1], lines, "out.xml", true);
diff --git a/scribo/src/text/Makefile.am b/scribo/src/text/Makefile.am
index fc03ae8..dbdb1da 100644
--- a/scribo/src/text/Makefile.am
+++ b/scribo/src/text/Makefile.am
@@ -30,7 +30,8 @@ if HAVE_TIFF
pbm_lines_recognition_SOURCES = pbm_lines_recognition.cc
pbm_lines_recognition_CPPFLAGS = $(AM_CPPFLAGS) \
$(TESSERACT_CPPFLAGS) \
- $(TIFF_CPPFLAGS)
+ $(TIFF_CPPFLAGS) \
+ -I$(top_builddir)
pbm_lines_recognition_LDFLAGS = $(AM_LDFLAGS) \
$(TESSERACT_LDFLAGS) \
$(TIFF_LDFLAGS) \
@@ -40,7 +41,8 @@ if HAVE_TIFF
pbm_recognition_SOURCES = pbm_recognition.cc
pbm_recognition_CPPFLAGS = $(AM_CPPFLAGS) \
$(TESSERACT_CPPFLAGS) \
- $(TIFF_CPPFLAGS)
+ $(TIFF_CPPFLAGS) \
+ -I$(top_builddir)
pbm_recognition_LDFLAGS = $(AM_LDFLAGS) \
$(TESSERACT_LDFLAGS) \
$(TIFF_LDFLAGS) \
diff --git a/scribo/src/text/pbm_lines_recognition.cc
b/scribo/src/text/pbm_lines_recognition.cc
index 5aaa15b..3451449 100644
--- a/scribo/src/text/pbm_lines_recognition.cc
+++ b/scribo/src/text/pbm_lines_recognition.cc
@@ -23,6 +23,10 @@
// exception does not however invalidate any other reasons why the
// executable file might be covered by the GNU General Public License.
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
#include <iostream>
#include <mln/core/image/image2d.hh>
diff --git a/scribo/src/text/pbm_recognition.cc b/scribo/src/text/pbm_recognition.cc
index 082faef..b262aab 100644
--- a/scribo/src/text/pbm_recognition.cc
+++ b/scribo/src/text/pbm_recognition.cc
@@ -23,6 +23,10 @@
// exception does not however invalidate any other reasons why the
// executable file might be covered by the GNU General Public License.
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
#include <iostream>
#include <mln/core/image/image2d.hh>
diff --git a/scribo/src/text_recognition_in_picture.cc
b/scribo/src/text_recognition_in_picture.cc
index acf8fec..b94ed0b 100644
--- a/scribo/src/text_recognition_in_picture.cc
+++ b/scribo/src/text_recognition_in_picture.cc
@@ -24,6 +24,10 @@
// exception does not however invalidate any other reasons why the
// executable file might be covered by the GNU General Public License.
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
#include <libgen.h>
#include <iostream>
diff --git a/scribo/src/toolchain/nepomuk/Makefile.am
b/scribo/src/toolchain/nepomuk/Makefile.am
index 07c2c33..4328be5 100644
--- a/scribo/src/toolchain/nepomuk/Makefile.am
+++ b/scribo/src/toolchain/nepomuk/Makefile.am
@@ -30,7 +30,8 @@ text_extraction_CPPFLAGS = $(AM_CPPFLAGS) \
$(MAGICKXX_CPPFLAGS) \
$(TESSERACT_CPPFLAGS) \
$(TIFF_CPPFLAGS) \
- $(QT_CPPFLAGS)
+ $(QT_CPPFLAGS) \
+ -I$(top_builddir)
text_extraction_LDFLAGS = $(AM_LDFLAGS) \
$(MAGICKXX_LDFLAGS) \
$(TESSERACT_LDFLAGS) \
diff --git a/scribo/src/toolchain/nepomuk/text_extraction.cc
b/scribo/src/toolchain/nepomuk/text_extraction.cc
index e5e2de4..41e843a 100644
--- a/scribo/src/toolchain/nepomuk/text_extraction.cc
+++ b/scribo/src/toolchain/nepomuk/text_extraction.cc
@@ -23,6 +23,10 @@
// exception does not however invalidate any other reasons why the
// executable file might be covered by the GNU General Public License.
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
#include <libgen.h>
#include <iostream>
#include <QtCore/QDebug>
diff --git a/scribo/tests/toolchain/nepomuk/Makefile.am
b/scribo/tests/toolchain/nepomuk/Makefile.am
index 4bce3bd..67a594b 100644
--- a/scribo/tests/toolchain/nepomuk/Makefile.am
+++ b/scribo/tests/toolchain/nepomuk/Makefile.am
@@ -31,7 +31,8 @@ text_extraction_SOURCES = text_extraction.cc
text_extraction_CXXFLAGS = $(QT_CXXFLAGS) $(AM_CXXFLAGS)
text_extraction_CPPFLAGS = $(QT_CPPFLAGS) $(AM_CPPFLAGS) \
$(TESSERACT_CPPFLAGS) \
- $(TIFF_CPPFLAGS)
+ $(TIFF_CPPFLAGS) \
+ -I$(top_builddir)
text_extraction_LDFLAGS = $(QT_LDFLAGS) $(LDFLAGS) \
$(TESSERACT_LDFLAGS) \
$(TIFF_LDFLAGS) \
diff --git a/scribo/tests/toolchain/nepomuk/text_extraction.cc
b/scribo/tests/toolchain/nepomuk/text_extraction.cc
index 027ad99..c475aa9 100644
--- a/scribo/tests/toolchain/nepomuk/text_extraction.cc
+++ b/scribo/tests/toolchain/nepomuk/text_extraction.cc
@@ -27,6 +27,10 @@
///
/// Test of scribo::toolchain::nepomuk::text_extraction
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
#include <QtGui/QImage>
#include <QtCore>
#include <scribo/toolchain/nepomuk/text_extraction.hh>
--
1.5.6.5