* scribo/core/line_info.hh,
* scribo/text/recognition.hh: Here.
---
scribo/ChangeLog | 7 ++
scribo/scribo/core/line_info.hh | 11 ++-
scribo/scribo/text/recognition.hh | 189 +++++++++++++++++++++++++++++--------
3 files changed, 167 insertions(+), 40 deletions(-)
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index a105435..e8fe131 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,3 +1,10 @@
+2011-11-29 Guillaume Lazzara <z(a)lrde.epita.fr>
+
+ Make recognition faster and store recognition confidence.
+
+ * scribo/core/line_info.hh,
+ * scribo/text/recognition.hh: Here.
+
2011-11-23 Guillaume Lazzara <z(a)lrde.epita.fr>
Regenerate mk files.
diff --git a/scribo/scribo/core/line_info.hh b/scribo/scribo/core/line_info.hh
index 2913f81..b121205 100644
--- a/scribo/scribo/core/line_info.hh
+++ b/scribo/scribo/core/line_info.hh
@@ -137,6 +137,7 @@ namespace scribo
// sqrt(Max(VAR(R), VAR(G), VAR(B)))
float color_reliability_;
+ float text_confidence_;
std::string text_;
std::string html_text_;
@@ -237,9 +238,12 @@ namespace scribo
bool indented() const;
bool has_text() const;
+ // Returns the percentage of confidence of the recognized text. If
+ // no text has been recognized, it returns -1.
+ float text_confidence() const;
const std::string& text() const;
const std::string& html_text() const;
- void update_text(const std::string& str);
+ void update_text(const std::string& str, float confidence);
bool is_valid() const;
@@ -400,6 +404,8 @@ namespace scribo
reading_orientation_ = 0.;
indented_ = false;
+
+ text_confidence_ = -1;
}
} // end of namespace scribo::internal
@@ -769,8 +775,9 @@ namespace scribo
template <typename L>
void
- line_info<L>::update_text(const std::string& str)
+ line_info<L>::update_text(const std::string& str, float confidence = 1.0)
{
+ data_->text_confidence_ = confidence;
data_->text_ = str;
data_->html_text_ = scribo::internal::html_markups_replace(str);
}
diff --git a/scribo/scribo/text/recognition.hh b/scribo/scribo/text/recognition.hh
index b7e5c91..358ff24 100644
--- a/scribo/scribo/text/recognition.hh
+++ b/scribo/scribo/text/recognition.hh
@@ -59,6 +59,8 @@
# include <tesseract/baseapi.h>
+# include <tesseract/ocrclass.h>
+# include <tesseract/resultiterator.h>
# if !defined HAVE_TESSERACT_2 && !defined HAVE_TESSERACT_3
@@ -100,6 +102,7 @@ namespace scribo
# ifndef MLN_INCLUDE_ONLY
+# ifdef HAVE_TESSERACT_2
template <typename L>
void
recognition(line_set<L>& lines, const char *language)
@@ -108,19 +111,8 @@ namespace scribo
// Initialize Tesseract.
-# ifdef HAVE_TESSERACT_2
- TessBaseAPI::InitWithLanguage(NULL, NULL, language, NULL, false, 0, NULL);
-# else // HAVE_TESSERACT_3
- tesseract::TessBaseAPI tess;
- if (tess.Init(NULL, language, tesseract::OEM_DEFAULT) == -1)
- {
- std::cout << "Error: cannot initialize tesseract!" << std::endl;
- abort();
- }
- tess.SetPageSegMode(tesseract::PSM_SINGLE_LINE);
-
-# endif // HAVE_TESSERACT_2
+ TessBaseAPI::InitWithLanguage(NULL, NULL, language, NULL, false, 0, NULL);
typedef mln_ch_value(L,bool) I;
@@ -148,7 +140,8 @@ namespace scribo
for_all_elements(e, lines(i).component_ids())
{
unsigned comp_id = comps(e);
- data::fill(((text_ima | comp_set(comp_id).bbox()).rw() | (pw::value(lbl) ==
pw::cst(comp_id))).rw(),
+ data::fill(((text_ima | comp_set(comp_id).bbox()).rw()
+ | (pw::value(lbl) == pw::cst(comp_id))).rw(),
true);
}
@@ -170,7 +163,6 @@ namespace scribo
data::paste_without_localization(text_ima, line_image);
// Recognize characters.
-# ifdef HAVE_TESSERACT_2
char* s = TessBaseAPI::TesseractRect(
(unsigned char*) line_image.buffer(),
sizeof (bool), // Pixel size.
@@ -179,15 +171,6 @@ namespace scribo
0, // Top
line_image.ncols(), // n cols
line_image.nrows()); // n rows
-# else // HAVE_TESSERACT_3
- tess.SetImage(
- (unsigned char*) line_image.buffer(),
- line_image.ncols(), // n cols
- line_image.nrows(), // n rows
- sizeof (bool), // Pixel size.
- line_image.ncols() * sizeof (bool)); // Row_offset
- char* s = tess.GetUTF8Text();
-# endif // ! HAVE_TESSERACT_2
if (s != 0)
{
@@ -216,16 +199,7 @@ namespace scribo
mln_precondition(line.is_valid());
// Initialize Tesseract.
-# ifdef HAVE_TESSERACT_2
TessBaseAPI::InitWithLanguage(NULL, NULL, language, NULL, false, 0, NULL);
-# else // HAVE_TESSERACT_3
- tesseract::TessBaseAPI tess;
- if (tess.Init(NULL, language, tesseract::OEM_DEFAULT) == -1)
- {
- std::cout << "Error: cannot initialize tesseract!" << std::endl;
- abort();
- }
-# endif // ! HAVE_TESSERACT_2
std::ofstream file;
if (!output_file.empty())
@@ -244,7 +218,6 @@ namespace scribo
border::resize(text_ima, 0);
// Recognize characters.
-# ifdef HAVE_TESSERACT_2
char* s = TessBaseAPI::TesseractRect(
(unsigned char*) text_ima.buffer(),
sizeof (bool), // Pixel size.
@@ -253,16 +226,152 @@ namespace scribo
0, // Top
text_ima.ncols(), // n cols
text_ima.nrows()); // n rows
+
+ if (s != 0)
+ {
+ if (!output_file.empty())
+ {
+ std::string str(s);
+ str = str.substr(0, str.length() - 1);
+ file << line.domain().bbox().pmin().row()
+ << " "
+ << line.domain().bbox().pmin().col()
+ << " "
+ << line.domain().bbox().pmax().row()
+ << " "
+ << line.domain().bbox().pmax().col()
+ << " "
+ << str;
+ }
+ }
+
+ // The string has been allocated by Tesseract. We must free it.
+ delete [] s;
+
+ if (!output_file.empty())
+ file.close();
+
+ trace::exiting("scribo::text::recognition");
+ }
+
+
+
# else // HAVE_TESSERACT_3
+
+
+
+ template <typename L>
+ void
+ recognition(line_set<L>& lines, const char *language)
+ {
+ trace::entering("scribo::text::recognition");
+
+
+ // Initialize Tesseract.
+ tesseract::TessBaseAPI tess;
+ if (tess.Init(NULL, language, tesseract::OEM_DEFAULT) == -1)
+ {
+ std::cout << "Error: cannot initialize tesseract!" << std::endl;
+ abort();
+ }
+ tess.SetPageSegMode(tesseract::PSM_SINGLE_LINE);
+
+ typedef mln_ch_value(L,bool) I;
+
+
+ /// Use text bboxes with Tesseract
+ for_all_lines(i, lines)
+ {
+ if (! lines(i).is_textline())
+ continue;
+
+ mln_domain(I) box = lines(i).bbox();
+
+ // Make sure characters are isolated from the borders.
+ // Help Tesseract.
+ // FIXME: not needed anymore in tesseract 3 ?
+ //
+ box.enlarge(2);
+
+ I text_ima(box);
+ data::fill(text_ima, false);
+
+ // Careful : background is set to 'False'
+ const component_set<L>& comp_set = lines.components();
+ const L& lbl = comp_set.labeled_image();
+
+ // Extract each character components to create the line image.
+ const mln::util::array<component_id_t>& comps = lines(i).component_ids();
+ for_all_elements(e, lines(i).component_ids())
+ {
+ unsigned comp_id = comps(e);
+ data::fill(((text_ima | comp_set(comp_id).bbox()).rw()
+ | (pw::value(lbl) == pw::cst(comp_id))).rw(),
+ true);
+ }
+
+ /// Improve text quality.
+ text::clean_inplace(lines(i), text_ima);
+
+ // Recognize characters.
+ tess.SetImage(
+ (unsigned char*) &text_ima(text_ima.domain().pmin()),
+ text_ima.ncols(), // n cols
+ text_ima.nrows(), // n rows
+ sizeof (bool), // Pixel size.
+ text_ima.ncols() * sizeof (bool) + 2 * text_ima.border()); // Row_offset
+
+ char *s = tess.GetUTF8Text();
+ if (s != 0)
+ {
+ tesseract::ResultIterator *it = tess.GetIterator();
+ std::string str(s);
+ str = str.substr(0, str.length() - 2);
+ lines(i).update_text(str, it->Confidence(tesseract::RIL_TEXTLINE));
+ }
+
+ delete[] s;
+ }
+
+ trace::exiting("scribo::text::recognition");
+ }
+
+
+
+ template <typename I>
+ void
+ recognition(const Image<I>& line_,
+ const char *language,
+ const std::string& output_file = std::string())
+ {
+ trace::entering("scribo::text::recognition");
+
+ const I& line = exact(line_);
+ mln_precondition(line.is_valid());
+
+ // Initialize Tesseract.
+ tesseract::TessBaseAPI tess;
+ if (tess.Init(NULL, language, tesseract::OEM_DEFAULT) == -1)
+ {
+ std::cout << "Error: cannot initialize tesseract!" << std::endl;
+ abort();
+ }
+
+ std::ofstream file;
+ if (!output_file.empty())
+ file.open(output_file.c_str());
+
+ mln_domain(I) box = line.domain();
+
+ // Recognize characters.
char* s = tess.TesseractRect(
- (unsigned char*) text_ima.buffer(),
+ (unsigned char*) &line(line.domain().pmin()),
sizeof (bool), // Pixel size.
- text_ima.ncols() * sizeof (bool), // Row_offset
+ line.ncols() * sizeof (bool) + line.border() * 2, // Row_offset
0, // Left
0, // Top
- text_ima.ncols(), // n cols
- text_ima.nrows()); // n rows
-# endif // ! HAVE_TESSERACT_2
+ line.ncols(), // n cols
+ line.nrows()); // n rows
if (s != 0)
{
@@ -292,6 +401,10 @@ namespace scribo
}
+# endif // ! HAVE_TESSERACT_2
+
+
+
# endif // ! MLN_INCLUDE_ONLY
--
1.7.2.5