* src/text_recognition_in_picture.cc: Use new routines. Cleanup
useless comments. Use a height ratio filter to improve OCR
results.
---
scribo/ChangeLog | 6 ++
scribo/src/text_recognition_in_picture.cc | 129 ++++++++++++++---------------
2 files changed, 68 insertions(+), 67 deletions(-)
diff --git a/scribo/ChangeLog b/scribo/ChangeLog
index 54a9d1c..1ce0459 100644
--- a/scribo/ChangeLog
+++ b/scribo/ChangeLog
@@ -1,5 +1,11 @@
2010-08-09 Guillaume Lazzara <z(a)lrde.epita.fr>
+ Cleanup and improve text recognition.
+
+
+
+2010-08-09 Guillaume Lazzara <z(a)lrde.epita.fr>
+
Cleanup and make use of the new routines.
* src/text_in_picture.cc: Here.
diff --git a/scribo/src/text_recognition_in_picture.cc
b/scribo/src/text_recognition_in_picture.cc
index 119f757..b69411e 100644
--- a/scribo/src/text_recognition_in_picture.cc
+++ b/scribo/src/text_recognition_in_picture.cc
@@ -74,8 +74,10 @@
#include <scribo/primitive/group/from_single_link.hh>
#include <scribo/primitive/regroup/from_single_left_link.hh>
+#include <scribo/primitive/regroup/from_single_left_link_wrt_h_ratio.hh>
+
+#include <scribo/filter/objects_size_ratio.hh>
-//#include <scribo/filter/objects_with_holes.hh>
#include <scribo/filter/object_groups_with_holes.hh>
#include <scribo/filter/object_links_bbox_h_ratio.hh>
@@ -95,6 +97,7 @@
#include <scribo/debug/usage.hh>
#include <scribo/preprocessing/split_bg_fg.hh>
+#include <scribo/preprocessing/rotate_90.hh>
#include <scribo/make/debug_filename.hh>
@@ -104,7 +107,6 @@
#include <scribo/src/afp/components.hh>
#include <scribo/src/afp/link.hh>
-#include <scribo/src/afp/regroup.hh>
#include <scribo/core/line_set.hh>
#include <scribo/text/recognition.hh>
@@ -116,11 +118,15 @@
const char *args_desc[][2] =
{
- { "input.ppm", "A color image." },
+ { "input.*", "An image." },
{ "ouput.ppm", "A color image where the text is highlighted." },
{ "out.txt", "Text recognized text with its position." },
- { "max_dim_size", "The highest dimension size of the image used for
computation. It is used to find a resize factor and impacts on the performance. (1024 by
default)" },
- { "lbl.ppm", "A color image with the labeled text components.
(Considered as optional debug)" },
+ { "max_dim_size", "The highest dimension size of the image used for
"
+ "computation. It is used to find a resize factor and impacts on the"
+ " performance. (1024 by default)" },
+ { "lambda", "Lambda value used for foreground extraction." },
+ { "lbl.ppm", "A color image with the labeled text components.
(Considered"
+ " as optional debug)" },
{0, 0}
};
@@ -132,6 +138,7 @@ namespace mln
{
config()
{
+ min_size_ratio = 0.2f;
max_dim_size = 1024;
sauvola_s = 2u; // 3?
@@ -146,6 +153,9 @@ namespace mln
group_min_holes = 3;
}
+ // Component filtering
+ double min_size_ratio;
+
// Image resizing factor
unsigned max_dim_size;
@@ -173,51 +183,6 @@ mln::config conf;
namespace mln
{
- template <typename I, typename L>
- mln_concrete(I)
- compute_text_image(const I& input_rgb,
- const scribo::component_set<L>& grouped_objects)
- {
- unsigned shift = 5;
- float height = 1, width = 0;
- for_all_comps(i, grouped_objects)
- if (grouped_objects(i).is_valid())
- {
- height += grouped_objects(i).bbox().nrows() + shift;
- width = math::max(static_cast<float>(grouped_objects(i).bbox().ncols()),
- width);
- }
- if (width == 0)
- width = 1;
-
- I output(height, width);
- data::fill(output, literal::black);
-
- algebra::vec<2, float> dv;
- dv[0] = 0;
- dv[1] = 0;
- for_all_comps(i, grouped_objects)
- if (grouped_objects(i).is_valid())
- {
- mln_VAR(tmp, duplicate(input_rgb | grouped_objects(i).bbox()));
-
- typedef fun::x2x::translation<mln_site_(I)::dim, float> trans_t;
- trans_t trans(dv - grouped_objects(i).bbox().pmin().to_vec());
-
- mln_domain(I)
- tr_box(grouped_objects(i).bbox().pmin().to_vec() + trans.t(),
- grouped_objects(i).bbox().pmax().to_vec() + trans.t());
-
- tr_image<mln_domain(I), tmp_t, trans_t> tr_ima(tr_box, tmp, trans);
-
- data::paste(tr_ima, output);
- dv[0] += grouped_objects(i).bbox().nrows() + shift;
- }
-
- return output;
- }
-
-
template <typename I>
unsigned get_factor(const I& ima)
{
@@ -238,13 +203,17 @@ namespace mln
int main(int argc, char* argv[])
{
using namespace scribo;
+ using namespace scribo::primitive;
using namespace mln;
- if (argc < 4 || argc > 6)
+ if (argc < 4 || argc > 7)
return scribo::debug::usage(argv,
- "Find text in a photo.\n\n\
-Common usage: ./text_in_photo_fast input.ppm output.ppm out.txt [max_dim_size]
[lbl.ppm]",
- "input.ppm output.ppm out.txt [max_dim_size] [lbl.ppm]",
+ "Find text in a photo.\n\n"
+ "Common usage: ./text_recognition_in_picture"
+ " input.* output.ppm out.txt [max_dim_size]"
+ " [lambda] [lbl.ppm]",
+ "input.* output.ppm out.txt [max_dim_size]"
+ " [lambda] [lbl.ppm]",
args_desc);
@@ -266,7 +235,12 @@ Common usage: ./text_in_photo_fast input.ppm output.ppm out.txt
[max_dim_size] [
unsigned lambda;
- lambda = 1.2 * (input_rgb.nrows() + input_rgb.ncols());
+ if (argc >= 6)
+ lambda = atoi(argv[5]);
+ else
+ lambda = 1.2 * (input_rgb.nrows() + input_rgb.ncols());
+
+ std::cout << "Using lambda = " << lambda << std::endl;
image2d<value::int_u8> intensity_ima;
@@ -275,6 +249,24 @@ Common usage: ./text_in_photo_fast input.ppm output.ppm out.txt
[max_dim_size] [
fg = preprocessing::split_bg_fg(input_rgb, lambda, 32).second();
intensity_ima = data::transform(fg, mln::fun::v2v::rgb_to_int_u<8>());
+// // Perform an initial rotation if needed.
+// // input_rgb = geom::rotate(input_rgb, -45, literal::black);
+// intensity_ima = geom::rotate(intensity_ima, -45);
+
+// // Make sure the domain origin is set to (0,0).
+// box2d rb = intensity_ima.domain();
+// box2d b(geom::nrows(intensity_ima), geom::ncols(intensity_ima));
+// // {
+// // image2d<value::rgb8> tmp(b);
+// // data::paste_without_localization(input_rgb, tmp);
+// // input_rgb = tmp;
+// // }
+// {
+// image2d<value::int_u8> tmp(b);
+// data::paste_without_localization(intensity_ima, tmp);
+// intensity_ima = tmp;
+// }
+
// Binarize foreground to use it in the processing chain.
@@ -292,18 +284,19 @@ Common usage: ./text_in_photo_fast input.ppm output.ppm out.txt
[max_dim_size] [
typedef image2d<value::label_16> L;
/// Finding components.
- typedef component_set<L> Obj;
- Obj filtered_components;
-
+ component_set<L> filtered_components;
{
mln::util::array<std::pair<box2d, std::pair<point2d, unsigned> > >
attribs;
value::label_16 ncomponents;
L components = extract_components(input, ncomponents, attribs);
- filtered_components = Obj(components, ncomponents, attribs);
+ filtered_components = component_set<L>(components, ncomponents, attribs);
}
+ filtered_components = filter::objects_size_ratio(filtered_components,
+ conf.min_size_ratio);
+
/// linking potential components
mln::util::couple<object_links<L>, object_links<L> >
links = primitive::link::left_right(filtered_components);
@@ -325,8 +318,6 @@ Common usage: ./text_in_photo_fast input.ppm output.ppm out.txt
[max_dim_size] [
-
-
//Remove links if bboxes overlap too much.
object_links<L> overlap_filtered_links
= filter::object_links_bbox_overlap(hratio_filtered_links,
@@ -353,22 +344,26 @@ Common usage: ./text_in_photo_fast input.ppm output.ppm out.txt
[max_dim_size] [
// Grouping groups together if possible.
- groups = primitive::regroup::from_single_left_link(filtered_thin_groups,
- conf.regroup_dmax);
+ groups = regroup::from_single_left_link_wrt_h_ratio(filtered_thin_groups,
+ conf.regroup_dmax,
+ conf.bbox_h_ratio);
/// Filter grouped objects not having enough background components.
groups = scribo::filter::object_groups_with_holes(groups,
- conf.group_min_holes);
+ conf.group_min_holes);
component_set<L> comps = primitive::group::apply(groups);
- if (argc > 5)
+ if (argc > 6)
mln::io::ppm::save(mln::labeling::colorize(value::rgb8(),
comps.labeled_image(),
comps.nelements()),
- argv[5]);
+ argv[6]);
+// mln::io::ppm::save(scribo::debug::highlight_text_area_rotated(input_rgb,
+// comps, -45, rb),
+// argv[2]);
mln::io::ppm::save(scribo::debug::highlight_text_area(input_rgb, comps),
- argv[2]);
+ argv[2]);
scribo::line_set<L> lines = scribo::make::line_set(groups);
--
1.5.6.5