
URL: https://svn.lrde.epita.fr/svn/oln/branches/cleanup-2008/milena/sandbox ChangeLog: 2008-10-28 Matthieu Garrigues <garrigues@lrde.epita.fr> Add some bench tests on ocr preprocessing. * garrigues/ocr/Makefile: New, compile an ocr with preprocess and an other one without. * garrigues/ocr/check.sh: New, script to bench ocr preprocessing. * garrigues/ocr/ocr.cc: Rename as... * garrigues/ocr/ocr_with_preprocess.cc: ...this. * garrigues/ocr/ocr_without_preprocess.cc: New. version without preprocess. --- Makefile | 21 +++++++ check.sh | 29 ++++++++++ ocr_with_preprocess.cc | 130 ++++++++++++++++++++++++++++++++++++++++++++++ ocr_without_preprocess.cc | 98 ++++++++++++++++++++++++++++++++++ 4 files changed, 278 insertions(+) Index: branches/cleanup-2008/milena/sandbox/garrigues/ocr/ocr.cc (deleted) =================================================================== Index: branches/cleanup-2008/milena/sandbox/garrigues/ocr/ocr_without_preprocess.cc =================================================================== --- branches/cleanup-2008/milena/sandbox/garrigues/ocr/ocr_without_preprocess.cc (revision 0) +++ branches/cleanup-2008/milena/sandbox/garrigues/ocr/ocr_without_preprocess.cc (revision 2710) @@ -0,0 +1,98 @@ +// Copyright (C) 2008 EPITA Research and Development Laboratory (LRDE) +// +// This file is part of the Olena Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License version 2 as published by the +// Free Software Foundation. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this library; see the file COPYING. If not, write to +// the Free Software Foundation, 51 Franklin Street, Fifth Floor, +// Boston, MA 02111-1307, USA. +// +// As a special exception, you may use this file as part of a free +// software library without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to +// produce an executable, this file does not by itself cause the +// resulting executable to be covered by the GNU General Public +// License. This exception does not however invalidate any other +// reasons why the executable file might be covered by the GNU General +// Public License. + +#include <iostream> + +#include <mln/core/image/image2d.hh> +#include <mln/core/alias/window2d.hh> +#include <mln/core/alias/neighb2d.hh> + +#include <mln/core/image/cast_image.hh> + +#include <mln/value/int_u8.hh> + +#include "resize.hh" +#include "enlarge.hh" +//#include "skeleton.hh" +#include <mln/linear/gaussian.hh> + +#include <mln/trace/all.hh> +#include <mln/io/pgm/load.hh> +#include <mln/io/pgm/save.hh> +#include <mln/io/pbm/load.hh> +#include <mln/io/pbm/save.hh> +#include <mln/core/alias/w_window2d_float.hh> + +#include <mln/debug/println.hh> +#include <mln/geom/chamfer.hh> +#include <mln/make/win_chamfer.hh> +#include <mln/labeling/regional_maxima.hh> +#include <mln/morpho/dilation.hh> + +#include <tesseract/baseapi.h> + +// _COMPILATION_ +// g++ -DNDEBUG -O3 -I../../.. ocr.cc -L/usr/lib -ltesseract_full -lpthread + + +// Call tesseract +// lang: expected language +template <typename T> +char* tesseract(const char* lang, const mln::image2d<T>& input) +{ + TessBaseAPI::InitWithLanguage(NULL, NULL, lang, NULL, false, 0, NULL); + char* s = TessBaseAPI::TesseractRect( + (unsigned char*) input.buffer(), + sizeof (T), + input.ncols() * sizeof (T), + 0, 0, + input.ncols(), + input.nrows()); + return s; +} + +int main(int argc, char** argv) +{ + using namespace mln; + using value::int_u8; + + image2d<bool> input; + + if (argc < 2) + { + std::cerr << "Usage: " << argv[0] << " in.pbm" << std::endl; + return 1; + } + + mln::border::thickness = 0; + + io::pbm::load(input, argv[1]); + + char* s = tesseract("fra", input); + std::cout << s; + free(s); +} Index: branches/cleanup-2008/milena/sandbox/garrigues/ocr/check.sh =================================================================== --- branches/cleanup-2008/milena/sandbox/garrigues/ocr/check.sh (revision 0) +++ branches/cleanup-2008/milena/sandbox/garrigues/ocr/check.sh (revision 2710) @@ -0,0 +1,29 @@ +for i in input/*.pbm ; do + echo "===========================================" + echo "--------- $i" + echo "===========================================" + + ref="$i.txt" + cat $ref + sed -e 's/\(.\)/\1\n/g' $ref > tmp/ref + total=`cat tmp/ref | wc -l ` + + ./ocr_without_preprocess $i tmp/`basename $i` > tmp/without + echo "_______________without preprocessing" + cat tmp/without + cat tmp/without | sed -e 's/\(.\)/\1\n/g' > tmp/without + + ./ocr_with_preprocess $i tmp/`basename $i` > tmp/with + echo "_______________with preprocessing" + cat tmp/with + cat tmp/with | sed -e 's/\(.\)/\1\n/g' > tmp/with + + + d_without=`diff ./tmp/without tmp/ref | diffstat | grep insert | sed -r 's/.*, ([0-9]+) insertion.*/\1/g'` + echo "$(($d_without * 100 / $total))% missmatch without preprocessing" + + ./ocr_with_preprocess $i tmp/`basename $i` | sed -e 's/\(.\)/\1\n/g' > tmp/with + d_with=`diff ./tmp/with tmp/ref | diffstat | grep insert | sed -r 's/.*, ([0-9]+) insertion.*/\1/g'` + echo "$(($d_with * 100 / $total))% missmatch with preprocessing" + echo "" +done Property changes on: branches/cleanup-2008/milena/sandbox/garrigues/ocr/check.sh ___________________________________________________________________ Name: svn:executable + * Index: branches/cleanup-2008/milena/sandbox/garrigues/ocr/ocr_with_preprocess.cc =================================================================== --- branches/cleanup-2008/milena/sandbox/garrigues/ocr/ocr_with_preprocess.cc (revision 0) +++ branches/cleanup-2008/milena/sandbox/garrigues/ocr/ocr_with_preprocess.cc (revision 2710) @@ -0,0 +1,130 @@ +// Copyright (C) 2008 EPITA Research and Development Laboratory (LRDE) +// +// This file is part of the Olena Library. This library is free +// software; you can redistribute it and/or modify it under the terms +// of the GNU General Public License version 2 as published by the +// Free Software Foundation. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this library; see the file COPYING. If not, write to +// the Free Software Foundation, 51 Franklin Street, Fifth Floor, +// Boston, MA 02111-1307, USA. +// +// As a special exception, you may use this file as part of a free +// software library without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to +// produce an executable, this file does not by itself cause the +// resulting executable to be covered by the GNU General Public +// License. This exception does not however invalidate any other +// reasons why the executable file might be covered by the GNU General +// Public License. + +#include <iostream> + +#include <mln/core/image/image2d.hh> +#include <mln/core/alias/window2d.hh> +#include <mln/core/alias/neighb2d.hh> + +#include <mln/core/image/cast_image.hh> + +#include <mln/value/int_u8.hh> + +#include "resize.hh" +#include "enlarge.hh" +//#include "skeleton.hh" +#include <mln/linear/gaussian.hh> + +#include <mln/trace/all.hh> +#include <mln/io/pgm/load.hh> +#include <mln/io/pgm/save.hh> +#include <mln/io/pbm/load.hh> +#include <mln/io/pbm/save.hh> +#include <mln/core/alias/w_window2d_float.hh> + +#include <mln/debug/println.hh> +#include <mln/geom/chamfer.hh> +#include <mln/make/win_chamfer.hh> +#include <mln/labeling/regional_maxima.hh> +#include <mln/morpho/dilation.hh> + +#include <tesseract/baseapi.h> + +// _COMPILATION_ +// g++ -DNDEBUG -O3 -I../../.. ocr.cc -L/usr/lib -ltesseract_full -lpthread + + +// Call tesseract +// lang: expected language +template <typename T> +char* tesseract(const char* lang, const mln::image2d<T>& input) +{ + TessBaseAPI::InitWithLanguage(NULL, NULL, lang, NULL, false, 0, NULL); + char* s = TessBaseAPI::TesseractRect( + (unsigned char*) input.buffer(), + sizeof (T), + input.ncols() * sizeof (T), + 0, 0, + input.ncols(), + input.nrows()); + return s; +} + +int main(int argc, char** argv) +{ + using namespace mln; + using value::int_u8; + + image2d<bool> input; + + if (argc < 2) + { + std::cerr << "Usage: " << argv[0] << " in.pbm out.pbm" << std::endl; + return 1; + } + + mln::border::thickness = 0; + + io::pbm::load(input, argv[1]); + + // Resize + image2d<int_u8> output = enlarge(input, 1); + + // TODO CLEANUP +#if 0 + // Blur. + output = linear::gaussian(output, 1); +#endif + +#if 0 + // Threshold + mln_piter_(image2d<unsigned>) p(output.domain()); + for_all(p) + { + output(p) = output(p) > 127 ? 1 : 0; + } +#endif + +#if 0 + // Compute chamfer distance map. + const w_window2d_int& w_win = make::mk_chamfer_3x3_int<8, 0> (); + image2d<unsigned> out = geom::chamfer(output, w_win, 255); + + for_all(p) + { + out(p) = out(p) > 10 ? 255 : 0; + } +#endif + + io::pgm::save(cast_image<int_u8>(output), argv[2]); + + std::cout << "> with preprocessing." << std::endl; + char* s = tesseract("fra", output); + std::cout << s; + free(s); +} Index: branches/cleanup-2008/milena/sandbox/garrigues/ocr/Makefile =================================================================== --- branches/cleanup-2008/milena/sandbox/garrigues/ocr/Makefile (revision 0) +++ branches/cleanup-2008/milena/sandbox/garrigues/ocr/Makefile (revision 2710) @@ -0,0 +1,21 @@ +CXXFLAGS=-I../../.. -I${HOME}/local/include +LFLAGS=-L${HOME}/local/lib -ltesseract_full -lpthread + +all: ocr_without_preprocess ocr_with_preprocess + +ocr_without_preprocess: ocr_without_preprocess.cc + g++ -DNDEBUG -O3 ${CXXFLAGS} $< ${LFLAGS} -o $@ + +ocr_with_preprocess: ocr_with_preprocess.cc + g++ -DNDEBUG -O3 ${CXXFLAGS} $< ${LFLAGS} -o $@ +clean: + rm -f *.o ocr_without_preprocess ocr_with_preprocess + +logs: + mkdir logs + +tmp: + mkdir tmp + +check: logs tmp ocr_without_preprocess ocr_with_preprocess + ./check.sh