---
scribo/ChangeLog | 4 +
scribo/scribo/text/recognition.hh | 171 ++++++++++++++++++-------------------
2 files changed, 89 insertions(+), 86 deletions(-)
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index 412f271..bd66473 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,5 +1,9 @@
2013-06-25 Roland Levillain <roland(a)lrde.epita.fr>
+ * scribo/text/recognition.hh: Aesthetic changes.
+
+2013-06-25 Roland Levillain <roland(a)lrde.epita.fr>
+
Restrict workaround-related locale changes to LC_NUMERIC.
* scribo/text/recognition.hh: Here.
diff --git a/scribo/scribo/text/recognition.hh b/scribo/scribo/text/recognition.hh
index d74ae5c..00e18d5 100644
--- a/scribo/scribo/text/recognition.hh
+++ b/scribo/scribo/text/recognition.hh
@@ -38,9 +38,9 @@
# define HAVE_TESSERACT_2
# endif
+# include <clocale>
# include <ostream>
-# include <clocale>
# include <mln/core/image/dmorph/image_if.hh>
# include <mln/core/concept/neighborhood.hh>
@@ -79,13 +79,11 @@ namespace scribo
namespace text
{
- using namespace mln;
-
/*! \brief Passes the text bboxes to Tesseract (OCR).
\param[in] lines The lines of text.
\param[in] language The language which should be recognized by
- Tesseract. (fra, en, ...)
+ Tesseract. (fra, en, ...)
\ingroup grpocr
*/
@@ -98,9 +96,9 @@ namespace scribo
\param[in] line Image of text line.
\param[in] language The language which should be recognized by
- Tesseract. (fra, en, ...)
+ Tesseract. (fra, en, ...)
\param[in] output_file If set, store the recognized text in
- this file.
+ this file.
\ingroup grpocr
*/
@@ -114,12 +112,15 @@ namespace scribo
# ifndef MLN_INCLUDE_ONLY
-
# ifdef HAVE_TESSERACT_2
+
+
template <typename L>
void
recognition(line_set<L>& lines, const char *language)
{
+ using namespace mln;
+
mln_trace("scribo::text::recognition");
/* Tesseract is known to have issues while reading training
@@ -142,8 +143,7 @@ namespace scribo
typedef mln_ch_value(L,bool) I;
-
- /// Use text bboxes with Tesseract
+ /// Use text bboxes with Tesseract.
for_all_lines(i, lines)
{
if (! lines(i).is_textline())
@@ -158,12 +158,13 @@ namespace scribo
I text_ima(box);
data::fill(text_ima, false);
- // Careful : background is set to 'False'
+ // Careful: background is set to 'false'.
const component_set<L>& comp_set = lines.components();
const L& lbl = comp_set.labeled_image();
- // Extract each character components to create the line image.
- const mln::util::array<component_id_t>& comps = lines(i).component_ids();
+ // Extract each character component to create the line image.
+ const mln::util::array<component_id_t>& comps =
+ lines(i).component_ids();
for_all_elements(e, lines(i).component_ids())
{
unsigned comp_id = comps(e);
@@ -192,10 +193,10 @@ namespace scribo
// Recognize characters.
char* s = TessBaseAPI::TesseractRect(
(unsigned char*) line_image.buffer(),
- sizeof (bool), // Pixel size.
- line_image.ncols() * sizeof (bool), // Row_offset
- 0, // Left
- 0, // Top
+ sizeof (bool), // pixel size
+ line_image.ncols() * sizeof (bool), // row offset
+ 0, // left
+ 0, // top
line_image.ncols(), // n cols
line_image.nrows()); // n rows
@@ -207,12 +208,11 @@ namespace scribo
}
// The string has been allocated by Tesseract. It must be released.
- delete [] s;
+ delete[] s;
}
// Restore the default locale from the environment.
setlocale(LC_NUMERIC, "");
-
}
@@ -222,6 +222,8 @@ namespace scribo
const char *language,
const std::string& output_file)
{
+ using namespace mln;
+
mln_trace("scribo::text::recognition");
const I& line = exact(line_);
@@ -252,52 +254,51 @@ namespace scribo
// Recognize characters.
char* s = TessBaseAPI::TesseractRect(
(unsigned char*) text_ima.buffer(),
- sizeof (bool), // Pixel size.
- text_ima.ncols() * sizeof (bool), // Row_offset
- 0, // Left
- 0, // Top
+ sizeof (bool), // pixel size
+ text_ima.ncols() * sizeof (bool), // row offset
+ 0, // left
+ 0, // top
text_ima.ncols(), // n cols
text_ima.nrows()); // n rows
- if (s != 0)
+ if (s != 0)
{
if (!output_file.empty())
- {
- std::string str(s);
- str = str.substr(0, str.length() - 1);
- file << line.domain().bbox().pmin().row()
- << " "
- << line.domain().bbox().pmin().col()
- << " "
- << line.domain().bbox().pmax().row()
- << " "
- << line.domain().bbox().pmax().col()
- << " "
- << str;
- }
+ {
+ std::string str(s);
+ str = str.substr(0, str.length() - 1);
+ file << line.domain().bbox().pmin().row()
+ << " "
+ << line.domain().bbox().pmin().col()
+ << " "
+ << line.domain().bbox().pmax().row()
+ << " "
+ << line.domain().bbox().pmax().col()
+ << " "
+ << str;
+ }
}
- // The string has been allocated by Tesseract. We must free it.
- delete [] s;
+ // The string has been allocated by Tesseract. We must free it.
+ delete[] s;
- if (!output_file.empty())
- file.close();
-
- // See the above explanations about setlocale.
- setlocale(LC_NUMERIC, "");
+ if (!output_file.empty())
+ file.close();
+ // See the above explanations about setlocale.
+ setlocale(LC_NUMERIC, "");
}
-
# else // HAVE_TESSERACT_3
-
template <typename L>
void
recognition(line_set<L>& lines, const char *language)
{
+ using namespace mln;
+
mln_trace("scribo::text::recognition");
// See the above explanations about setlocale.
@@ -314,8 +315,7 @@ namespace scribo
typedef mln_ch_value(L,bool) I;
-
- /// Use text bboxes with Tesseract
+ // Use text bboxes with Tesseract.
for_all_lines(i, lines)
{
if (! lines(i).is_textline())
@@ -332,12 +332,13 @@ namespace scribo
I text_ima(box);
data::fill(text_ima, false);
- // Careful : background is set to 'False'
+ // Careful: background is set to 'false'.
const component_set<L>& comp_set = lines.components();
const L& lbl = comp_set.labeled_image();
- // Extract each character components to create the line image.
- const mln::util::array<component_id_t>& comps = lines(i).component_ids();
+ // Extract each character component to create the line image.
+ const mln::util::array<component_id_t>& comps =
+ lines(i).component_ids();
for_all_elements(e, lines(i).component_ids())
{
unsigned comp_id = comps(e);
@@ -352,10 +353,11 @@ namespace scribo
// Recognize characters.
tess.SetImage(
(unsigned char*) &text_ima(text_ima.domain().pmin()),
- text_ima.ncols(), // n cols
- text_ima.nrows(), // n rows
- sizeof (bool), // Pixel size.
- text_ima.ncols() * sizeof (bool) + 2 * text_ima.border()); // Row_offset
+ text_ima.ncols(), // n cols
+ text_ima.nrows(), // n rows
+ sizeof (bool), // pixel size
+ text_ima.ncols() * sizeof (bool)
+ + 2 * text_ima.border()); // row offset
char *s = tess.GetUTF8Text();
if (s != 0)
@@ -374,13 +376,14 @@ namespace scribo
}
-
template <typename I>
void
recognition(const Image<I>& line_,
const char *language,
const std::string& output_file)
{
+ using namespace mln;
+
mln_trace("scribo::text::recognition");
const I& line = exact(line_);
@@ -404,48 +407,44 @@ namespace scribo
// Recognize characters.
char* s = tess.TesseractRect(
(unsigned char*) &line(line.domain().pmin()),
- sizeof (bool), // Pixel size.
- line.ncols() * sizeof (bool) + line.border() * 2, // Row_offset
- 0, // Left
- 0, // Top
- line.ncols(), // n cols
- line.nrows()); // n rows
-
- if (s != 0)
+ sizeof (bool), // pixel size
+ line.ncols() * sizeof (bool) + line.border() * 2, // row offset
+ 0, // left
+ 0, // top
+ line.ncols(), // n cols
+ line.nrows()); // n rows
+
+ if (s != 0)
{
if (!output_file.empty())
- {
- std::string str(s);
- str = str.substr(0, str.length() - 1);
- file << line.domain().bbox().pmin().row()
- << " "
- << line.domain().bbox().pmin().col()
- << " "
- << line.domain().bbox().pmax().row()
- << " "
- << line.domain().bbox().pmax().col()
- << " "
- << str;
- }
+ {
+ std::string str(s);
+ str = str.substr(0, str.length() - 1);
+ file << line.domain().bbox().pmin().row()
+ << " "
+ << line.domain().bbox().pmin().col()
+ << " "
+ << line.domain().bbox().pmax().row()
+ << " "
+ << line.domain().bbox().pmax().col()
+ << " "
+ << str;
+ }
}
- // The string has been allocated by Tesseract. We must free it.
- delete [] s;
+ // The string has been allocated by Tesseract. We must free it.
+ delete[] s;
- if (!output_file.empty())
- file.close();
-
- // Restore the default locale from the environment.
- setlocale(LC_NUMERIC, "");
+ if (!output_file.empty())
+ file.close();
+ // Restore the default locale from the environment.
+ setlocale(LC_NUMERIC, "");
}
# endif // ! HAVE_TESSERACT_2
-
-
-
# endif // ! MLN_INCLUDE_ONLY
} // end of namespace scribo::text
--
1.7.10.4