URL:
https://svn.lrde.epita.fr/svn/oln/branches/cleanup-2008/milena/sandbox
ChangeLog:
2008-10-28 Matthieu Garrigues <garrigues(a)lrde.epita.fr>
Add some bench tests on ocr preprocessing.
* garrigues/ocr/Makefile: New, compile an ocr with preprocess
and an other one without.
* garrigues/ocr/check.sh: New, script to bench ocr preprocessing.
* garrigues/ocr/ocr.cc: Rename as...
* garrigues/ocr/ocr_with_preprocess.cc: ...this.
* garrigues/ocr/ocr_without_preprocess.cc: New. version without
preprocess.
---
Makefile | 21 +++++++
check.sh | 29 ++++++++++
ocr_with_preprocess.cc | 130 ++++++++++++++++++++++++++++++++++++++++++++++
ocr_without_preprocess.cc | 98 ++++++++++++++++++++++++++++++++++
4 files changed, 278 insertions(+)
Index: branches/cleanup-2008/milena/sandbox/garrigues/ocr/ocr.cc (deleted)
===================================================================
Index: branches/cleanup-2008/milena/sandbox/garrigues/ocr/ocr_without_preprocess.cc
===================================================================
--- branches/cleanup-2008/milena/sandbox/garrigues/ocr/ocr_without_preprocess.cc (revision
0)
+++ branches/cleanup-2008/milena/sandbox/garrigues/ocr/ocr_without_preprocess.cc (revision
2710)
@@ -0,0 +1,98 @@
+// Copyright (C) 2008 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of the Olena Library. This library is free
+// software; you can redistribute it and/or modify it under the terms
+// of the GNU General Public License version 2 as published by the
+// Free Software Foundation.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this library; see the file COPYING. If not, write to
+// the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+// Boston, MA 02111-1307, USA.
+//
+// As a special exception, you may use this file as part of a free
+// software library without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to
+// produce an executable, this file does not by itself cause the
+// resulting executable to be covered by the GNU General Public
+// License. This exception does not however invalidate any other
+// reasons why the executable file might be covered by the GNU General
+// Public License.
+
+#include <iostream>
+
+#include <mln/core/image/image2d.hh>
+#include <mln/core/alias/window2d.hh>
+#include <mln/core/alias/neighb2d.hh>
+
+#include <mln/core/image/cast_image.hh>
+
+#include <mln/value/int_u8.hh>
+
+#include "resize.hh"
+#include "enlarge.hh"
+//#include "skeleton.hh"
+#include <mln/linear/gaussian.hh>
+
+#include <mln/trace/all.hh>
+#include <mln/io/pgm/load.hh>
+#include <mln/io/pgm/save.hh>
+#include <mln/io/pbm/load.hh>
+#include <mln/io/pbm/save.hh>
+#include <mln/core/alias/w_window2d_float.hh>
+
+#include <mln/debug/println.hh>
+#include <mln/geom/chamfer.hh>
+#include <mln/make/win_chamfer.hh>
+#include <mln/labeling/regional_maxima.hh>
+#include <mln/morpho/dilation.hh>
+
+#include <tesseract/baseapi.h>
+
+// _COMPILATION_
+// g++ -DNDEBUG -O3 -I../../.. ocr.cc -L/usr/lib -ltesseract_full -lpthread
+
+
+// Call tesseract
+// lang: expected language
+template <typename T>
+char* tesseract(const char* lang, const mln::image2d<T>& input)
+{
+ TessBaseAPI::InitWithLanguage(NULL, NULL, lang, NULL, false, 0, NULL);
+ char* s = TessBaseAPI::TesseractRect(
+ (unsigned char*) input.buffer(),
+ sizeof (T),
+ input.ncols() * sizeof (T),
+ 0, 0,
+ input.ncols(),
+ input.nrows());
+ return s;
+}
+
+int main(int argc, char** argv)
+{
+ using namespace mln;
+ using value::int_u8;
+
+ image2d<bool> input;
+
+ if (argc < 2)
+ {
+ std::cerr << "Usage: " << argv[0] << " in.pbm"
<< std::endl;
+ return 1;
+ }
+
+ mln::border::thickness = 0;
+
+ io::pbm::load(input, argv[1]);
+
+ char* s = tesseract("fra", input);
+ std::cout << s;
+ free(s);
+}
Index: branches/cleanup-2008/milena/sandbox/garrigues/ocr/check.sh
===================================================================
--- branches/cleanup-2008/milena/sandbox/garrigues/ocr/check.sh (revision 0)
+++ branches/cleanup-2008/milena/sandbox/garrigues/ocr/check.sh (revision 2710)
@@ -0,0 +1,29 @@
+for i in input/*.pbm ; do
+ echo "==========================================="
+ echo "--------- $i"
+ echo "==========================================="
+
+ ref="$i.txt"
+ cat $ref
+ sed -e 's/\(.\)/\1\n/g' $ref > tmp/ref
+ total=`cat tmp/ref | wc -l `
+
+ ./ocr_without_preprocess $i tmp/`basename $i` > tmp/without
+ echo "_______________without preprocessing"
+ cat tmp/without
+ cat tmp/without | sed -e 's/\(.\)/\1\n/g' > tmp/without
+
+ ./ocr_with_preprocess $i tmp/`basename $i` > tmp/with
+ echo "_______________with preprocessing"
+ cat tmp/with
+ cat tmp/with | sed -e 's/\(.\)/\1\n/g' > tmp/with
+
+
+ d_without=`diff ./tmp/without tmp/ref | diffstat | grep insert | sed -r 's/.*,
([0-9]+) insertion.*/\1/g'`
+ echo "$(($d_without * 100 / $total))% missmatch without preprocessing"
+
+ ./ocr_with_preprocess $i tmp/`basename $i` | sed -e 's/\(.\)/\1\n/g' >
tmp/with
+ d_with=`diff ./tmp/with tmp/ref | diffstat | grep insert | sed -r 's/.*,
([0-9]+) insertion.*/\1/g'`
+ echo "$(($d_with * 100 / $total))% missmatch with preprocessing"
+ echo ""
+done
Property changes on: branches/cleanup-2008/milena/sandbox/garrigues/ocr/check.sh
___________________________________________________________________
Name: svn:executable
+ *
Index: branches/cleanup-2008/milena/sandbox/garrigues/ocr/ocr_with_preprocess.cc
===================================================================
--- branches/cleanup-2008/milena/sandbox/garrigues/ocr/ocr_with_preprocess.cc (revision
0)
+++ branches/cleanup-2008/milena/sandbox/garrigues/ocr/ocr_with_preprocess.cc (revision
2710)
@@ -0,0 +1,130 @@
+// Copyright (C) 2008 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of the Olena Library. This library is free
+// software; you can redistribute it and/or modify it under the terms
+// of the GNU General Public License version 2 as published by the
+// Free Software Foundation.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this library; see the file COPYING. If not, write to
+// the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+// Boston, MA 02111-1307, USA.
+//
+// As a special exception, you may use this file as part of a free
+// software library without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to
+// produce an executable, this file does not by itself cause the
+// resulting executable to be covered by the GNU General Public
+// License. This exception does not however invalidate any other
+// reasons why the executable file might be covered by the GNU General
+// Public License.
+
+#include <iostream>
+
+#include <mln/core/image/image2d.hh>
+#include <mln/core/alias/window2d.hh>
+#include <mln/core/alias/neighb2d.hh>
+
+#include <mln/core/image/cast_image.hh>
+
+#include <mln/value/int_u8.hh>
+
+#include "resize.hh"
+#include "enlarge.hh"
+//#include "skeleton.hh"
+#include <mln/linear/gaussian.hh>
+
+#include <mln/trace/all.hh>
+#include <mln/io/pgm/load.hh>
+#include <mln/io/pgm/save.hh>
+#include <mln/io/pbm/load.hh>
+#include <mln/io/pbm/save.hh>
+#include <mln/core/alias/w_window2d_float.hh>
+
+#include <mln/debug/println.hh>
+#include <mln/geom/chamfer.hh>
+#include <mln/make/win_chamfer.hh>
+#include <mln/labeling/regional_maxima.hh>
+#include <mln/morpho/dilation.hh>
+
+#include <tesseract/baseapi.h>
+
+// _COMPILATION_
+// g++ -DNDEBUG -O3 -I../../.. ocr.cc -L/usr/lib -ltesseract_full -lpthread
+
+
+// Call tesseract
+// lang: expected language
+template <typename T>
+char* tesseract(const char* lang, const mln::image2d<T>& input)
+{
+ TessBaseAPI::InitWithLanguage(NULL, NULL, lang, NULL, false, 0, NULL);
+ char* s = TessBaseAPI::TesseractRect(
+ (unsigned char*) input.buffer(),
+ sizeof (T),
+ input.ncols() * sizeof (T),
+ 0, 0,
+ input.ncols(),
+ input.nrows());
+ return s;
+}
+
+int main(int argc, char** argv)
+{
+ using namespace mln;
+ using value::int_u8;
+
+ image2d<bool> input;
+
+ if (argc < 2)
+ {
+ std::cerr << "Usage: " << argv[0] << " in.pbm
out.pbm" << std::endl;
+ return 1;
+ }
+
+ mln::border::thickness = 0;
+
+ io::pbm::load(input, argv[1]);
+
+ // Resize
+ image2d<int_u8> output = enlarge(input, 1);
+
+ // TODO CLEANUP
+#if 0
+ // Blur.
+ output = linear::gaussian(output, 1);
+#endif
+
+#if 0
+ // Threshold
+ mln_piter_(image2d<unsigned>) p(output.domain());
+ for_all(p)
+ {
+ output(p) = output(p) > 127 ? 1 : 0;
+ }
+#endif
+
+#if 0
+ // Compute chamfer distance map.
+ const w_window2d_int& w_win = make::mk_chamfer_3x3_int<8, 0> ();
+ image2d<unsigned> out = geom::chamfer(output, w_win, 255);
+
+ for_all(p)
+ {
+ out(p) = out(p) > 10 ? 255 : 0;
+ }
+#endif
+
+ io::pgm::save(cast_image<int_u8>(output), argv[2]);
+
+ std::cout << "> with preprocessing." << std::endl;
+ char* s = tesseract("fra", output);
+ std::cout << s;
+ free(s);
+}
Index: branches/cleanup-2008/milena/sandbox/garrigues/ocr/Makefile
===================================================================
--- branches/cleanup-2008/milena/sandbox/garrigues/ocr/Makefile (revision 0)
+++ branches/cleanup-2008/milena/sandbox/garrigues/ocr/Makefile (revision 2710)
@@ -0,0 +1,21 @@
+CXXFLAGS=-I../../.. -I${HOME}/local/include
+LFLAGS=-L${HOME}/local/lib -ltesseract_full -lpthread
+
+all: ocr_without_preprocess ocr_with_preprocess
+
+ocr_without_preprocess: ocr_without_preprocess.cc
+ g++ -DNDEBUG -O3 ${CXXFLAGS} $< ${LFLAGS} -o $@
+
+ocr_with_preprocess: ocr_with_preprocess.cc
+ g++ -DNDEBUG -O3 ${CXXFLAGS} $< ${LFLAGS} -o $@
+clean:
+ rm -f *.o ocr_without_preprocess ocr_with_preprocess
+
+logs:
+ mkdir logs
+
+tmp:
+ mkdir tmp
+
+check: logs tmp ocr_without_preprocess ocr_with_preprocess
+ ./check.sh