4230: Add new programs to extract data from microfilms.

* lazzara/afp/micro_films/Makefile: New. * lazzara/afp/micro_films/demat_mf_afp.sh: New script running various programs. * lazzara/afp/micro_films/keep_background.cc, * lazzara/afp/micro_films/extract_mf.cc, * lazzara/afp/micro_films/split_image.cc: new dedicated programs for microfilms data extraction. --- milena/sandbox/ChangeLog | 14 ++ milena/sandbox/lazzara/afp/micro_films/Makefile | 8 ++ .../lazzara/afp/micro_films/demat_mf_afp.sh | 52 ++++++++ .../sandbox/lazzara/afp/micro_films/extract_mf.cc | 126 ++++++++++++++++++++ .../lazzara/afp/micro_films/keep_background.cc | 85 +++++++++++++ .../sandbox/lazzara/afp/micro_films/split_image.cc | 80 +++++++++++++ 6 files changed, 365 insertions(+), 0 deletions(-) create mode 100644 milena/sandbox/lazzara/afp/micro_films/Makefile create mode 100755 milena/sandbox/lazzara/afp/micro_films/demat_mf_afp.sh create mode 100644 milena/sandbox/lazzara/afp/micro_films/extract_mf.cc create mode 100644 milena/sandbox/lazzara/afp/micro_films/keep_background.cc create mode 100644 milena/sandbox/lazzara/afp/micro_films/split_image.cc diff --git a/milena/sandbox/ChangeLog b/milena/sandbox/ChangeLog index 5ec4781..216b67c 100644 --- a/milena/sandbox/ChangeLog +++ b/milena/sandbox/ChangeLog @@ -1,3 +1,17 @@ +2009-07-01 Guillaume Lazzara <lazzara@lrde.epita.fr> + + Add new programs to extract data from microfilms. + + * lazzara/afp/micro_films/Makefile: New. + + * lazzara/afp/micro_films/demat_mf_afp.sh: New script running various + programs. + + * lazzara/afp/micro_films/keep_background.cc, + * lazzara/afp/micro_films/extract_mf.cc, + * lazzara/afp/micro_films/split_image.cc: new dedicated programs for + microfilms data extraction. + 2009-07-03 Edwin Carlinet <carlinet@lrde.epita.fr> Add some utils to handle objects detection with mean color attribute. diff --git a/milena/sandbox/lazzara/afp/micro_films/Makefile b/milena/sandbox/lazzara/afp/micro_films/Makefile new file mode 100644 index 0000000..4c81cc0 --- /dev/null +++ b/milena/sandbox/lazzara/afp/micro_films/Makefile @@ -0,0 +1,8 @@ +CXXFLAGS = -I../../../../ -I. -I../../../../../ -I../../../bin -O3 -DNDEBUG + +PROGRAMS = \ + extract_mf \ + split_image \ + keep_background + +all: $(PROGRAMS) diff --git a/milena/sandbox/lazzara/afp/micro_films/demat_mf_afp.sh b/milena/sandbox/lazzara/afp/micro_films/demat_mf_afp.sh new file mode 100755 index 0000000..99f71d4 --- /dev/null +++ b/milena/sandbox/lazzara/afp/micro_films/demat_mf_afp.sh @@ -0,0 +1,52 @@ +#!/bin/sh + +PATH_TO_BIN=/lrde/stockholm/lazzara/svn/olena/git/oln/milena/sandbox/bin +PATH_TO_SCRIBO=/lrde/stockholm/lazzara/svn/olena/git/build/scribo/src + +if [ $# -ne 2 ]; then + echo "Usage: $0 <input.pgm> <initial rotation>" + exit 1 +fi + +input=$PWD/$1 +bname=`basename $1 .pgm` +mkdir -p $bname +cd $bname + + +echo "* Running pre-processing algorithms on $1." +${PATH_TO_SCRIBO}/binarization/simple $input ${bname}.pbm +${PATH_TO_BIN}/morpho/closing/structural_rectangle2d ${bname}.pbm 21 21 ${bname}_clo.pbm +./../keep_background ${bname}_clo.pbm ${bname}_bg.pbm +${PATH_TO_BIN}/morpho/dilation_rectangle2d ${bname}_bg.pbm 70 70 ${bname}_clo_dil.pbm +${PATH_TO_SCRIBO}/negate ${bname}_clo_dil.pbm ${bname}_clo_dil_i.pbm + +echo "* Splitting $1 into several documents." +./../extract_mf $input ${bname}.pbm ${bname}_clo_dil_i.pbm "$2" + +echo "* Processing extracted documents" +for f in mf_*_.pgm; do + echo " - Processing $f..." + + fbname=`basename $f .pgm` + +# Compute two different binary versions. + echo " # Binarization." + ${PATH_TO_BIN}/level_objects $f 100 ${fbname}_level.pgm + ${PATH_TO_SCRIBO}/binarization/simple ${fbname}_level.pgm ${fbname}_level.pbm + + ${PATH_TO_SCRIBO}/binarization/simple $f ${fbname}.pbm + +# '_level' or '' according to which binary version you want to use. +use_level='' + + ${PATH_TO_BIN}/logical/not ${fbname}${use_level}.pbm ${fbname}${use_level}_i.pbm + + echo " # Filtering." + ${PATH_TO_SCRIBO}/filter/small_objects ${fbname}${use_level}_i.pbm 3 ${fbname}${use_level}_small.pbm + ${PATH_TO_SCRIBO}/filter/thin_objects ${fbname}${use_level}_small.pbm 3 ${fbname}${use_level}_small_thin.pbm + + echo " # Text extraction." + ${PATH_TO_SCRIBO}/extract_text_double_link ${fbname}${use_level}_small_thin.pbm 100 100 "${fbname}_text" +done + diff --git a/milena/sandbox/lazzara/afp/micro_films/extract_mf.cc b/milena/sandbox/lazzara/afp/micro_films/extract_mf.cc new file mode 100644 index 0000000..6587d1d --- /dev/null +++ b/milena/sandbox/lazzara/afp/micro_films/extract_mf.cc @@ -0,0 +1,126 @@ +// Copyright (C) 2009 EPITA Research and Development Laboratory (LRDE) +// +// This file is part of Olena. +// +// Olena is free software: you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free +// Software Foundation, version 2 of the License. +// +// Olena is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Olena. If not, see <http://www.gnu.org/licenses/>. +// +// As a special exception, you may use this file as part of a free +// software project without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to produce +// an executable, this file does not by itself cause the resulting +// executable to be covered by the GNU General Public License. This +// exception does not however invalidate any other reasons why the +// executable file might be covered by the GNU General Public License. + + +/// \file +/// +/// + +#include <mln/core/image/image2d.hh> +#include <mln/core/image/imorph/labeled_image.hh> +#include <mln/core/image/dmorph/image_if.hh> +#include <mln/core/alias/neighb2d.hh> +#include <mln/core/var.hh> + +#include <mln/data/fill.hh> +#include <mln/debug/filename.hh> +#include <mln/io/pbm/all.hh> +#include <mln/io/pgm/all.hh> +#include <mln/labeling/blobs.hh> +#include <mln/labeling/background.hh> +#include <mln/labeling/n_max.hh> + +#include <mln/value/label_16.hh> + +#include <mln/geom/rotate.hh> +#include <mln/data/fill.hh> +#include <mln/logical/not.hh> + +#include <tools/usage.hh> +#include <scribo/preprocessing/unskew.hh> +#include <scribo/filter/small_objects.hh> + +#include <mln/data/compare.hh> + + +const char *args_desc[][2] = +{ + { "input.pgm", "A gray level image." }, + { "input.pbm", "A binary image." }, + { "mask.pbm", "A binary image. Objects are set to 'true'. Will be used to split input.pbm into several images." }, + { "rot", "Initial rotation in degrees." }, + { 0, 0 } +}; + + + +int main(int argc, char *argv[]) +{ + using namespace mln; + + if (argc != 5) + return tools::usage(argv, + "Extract an image for each object in the input image.", + "input.pgm input.pbm mask.pbm rot", + args_desc, "Gray level images extracted from the input."); + + image2d<value::int_u8> input; + io::pgm::load(input, argv[1]); + + image2d<bool> input_bw; + io::pbm::load(input_bw, argv[2]); + + image2d<bool> mask; + io::pbm::load(mask, argv[3]); + + value::label_16 nlabels; + typedef image2d<value::label_16> lbl_t; + lbl_t lbl_ = labeling::blobs(mask, c8(), nlabels); + labeled_image<lbl_t> lbl(lbl_, nlabels); + + debug::internal::filename_prefix = "mf"; + + + value::label_16 nbglabels; + lbl_t lbl_bg = labeling::background(input_bw, c8(), nbglabels); + + // Should be always true... + value::label_16 bg = lbl_bg(point2d(0,0)); + // ... but not as robust as the line below. +// value::label_16 bg = labeling::n_max(lbl_bg, nbglabels, 2)[1]; + + logical::not_inplace(input_bw); + + for (unsigned i = 1; i <= lbl.nlabels(); ++i) + { + io::pbm::save(input_bw | lbl.bbox(i), debug::filename(".pbm", i)); + mln_VAR(tmp_bw, input_bw | lbl.bbox(i)); + data::fill((tmp_bw | (pw::value(lbl_bg) == pw::cst(bg))).rw(), false); + + mln_VAR(cleaned, + scribo::preprocessing::unskew(geom::rotate(tmp_bw, atoi(argv[4])))); + + double angle = cleaned.second(); + +// io::pbm::save(cleaned.first(), debug::filename("object.pbm")); + mln_VAR(cell, input | lbl.bbox(i)); + data::fill((cell | (pw::value(lbl_bg) == pw::cst(bg))).rw(), + mln_max(value::int_u8)); + io::pgm::save(geom::rotate(cell, angle + atoi(argv[4]), + mln_max(value::int_u8)), + debug::filename(".pgm", i)); + } + +} diff --git a/milena/sandbox/lazzara/afp/micro_films/keep_background.cc b/milena/sandbox/lazzara/afp/micro_films/keep_background.cc new file mode 100644 index 0000000..78373ad --- /dev/null +++ b/milena/sandbox/lazzara/afp/micro_films/keep_background.cc @@ -0,0 +1,85 @@ +// Copyright (C) 2009 EPITA Research and Development Laboratory (LRDE) +// +// This file is part of Olena. +// +// Olena is free software: you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free +// Software Foundation, version 2 of the License. +// +// Olena is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Olena. If not, see <http://www.gnu.org/licenses/>. +// +// As a special exception, you may use this file as part of a free +// software project without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to produce +// an executable, this file does not by itself cause the resulting +// executable to be covered by the GNU General Public License. This +// exception does not however invalidate any other reasons why the +// executable file might be covered by the GNU General Public License. + + +/// \file +/// +/// Keep the background from a binary image. + +#include <mln/core/image/image2d.hh> +#include <mln/core/alias/neighb2d.hh> + +#include <mln/pw/all.hh> +#include <mln/core/image/dmorph/image_if.hh> + +#include <mln/data/fill.hh> +#include <mln/io/pbm/all.hh> +#include <mln/labeling/blobs.hh> + +#include <mln/logical/not.hh> + +#include <mln/value/label_16.hh> + +#include <tools/usage.hh> + + +const char *args_desc[][2] = +{ + { "input.pbm", "A binary image. Objects are set to 'true'." }, + { 0, 0 } +}; + + + +int main(int argc, char *argv[]) +{ + using namespace mln; + + if (argc != 3) + return tools::usage(argv, + "Extract the background from a binary image.", + "input.pbm output.pbm", + args_desc, "A binary image. Background is set to 'false'."); + + image2d<bool> input; + io::pbm::load(input, argv[1]); + + logical::not_inplace(input); + + typedef value::label_16 V; + V nlabels; + image2d<V> lbl = labeling::blobs(input, c8(), nlabels); + + image2d<bool> output; + initialize(output, input); + + V bg_lbl = lbl(literal::origin); + + data::fill(output, false); + data::fill((output | (pw::value(lbl) == pw::cst(bg_lbl))).rw(), true); + + io::pbm::save(output, argv[2]); +} + diff --git a/milena/sandbox/lazzara/afp/micro_films/split_image.cc b/milena/sandbox/lazzara/afp/micro_films/split_image.cc new file mode 100644 index 0000000..b0c71c7 --- /dev/null +++ b/milena/sandbox/lazzara/afp/micro_films/split_image.cc @@ -0,0 +1,80 @@ +// Copyright (C) 2009 EPITA Research and Development Laboratory (LRDE) +// +// This file is part of Olena. +// +// Olena is free software: you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free +// Software Foundation, version 2 of the License. +// +// Olena is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Olena. If not, see <http://www.gnu.org/licenses/>. +// +// As a special exception, you may use this file as part of a free +// software project without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to produce +// an executable, this file does not by itself cause the resulting +// executable to be covered by the GNU General Public License. This +// exception does not however invalidate any other reasons why the +// executable file might be covered by the GNU General Public License. + + +/// \file +/// +/// Split an image into several smaller image based on the objects. + +#include <mln/core/image/image2d.hh> +#include <mln/core/image/imorph/labeled_image.hh> +#include <mln/core/alias/neighb2d.hh> + +#include <mln/data/fill.hh> +#include <mln/debug/filename.hh> +#include <mln/io/pbm/all.hh> +#include <mln/io/pgm/all.hh> +#include <mln/labeling/blobs.hh> +#include <mln/value/int_u8.hh> + +#include <tools/usage.hh> + + +const char *args_desc[][2] = +{ + { "input.pgm", "A gray level image." }, + { "mask.pbm", "A binary image. Objects are set to 'true'. Will be used to split input.pbm into several images." }, + { 0, 0 } +}; + + + +int main(int argc, char *argv[]) +{ + using namespace mln; + + if (argc != 3) + return tools::usage(argv, + "Extract an image for each object in the input image.", + "input.pgm mask.pbm", + args_desc, "Gray level images extracted from the input."); + + image2d<value::int_u8> input; + io::pgm::load(input, argv[1]); + + image2d<bool> mask; + io::pbm::load(mask, argv[2]); + + value::int_u8 nlabels; + typedef image2d<value::int_u8> lbl_t; + lbl_t lbl_ = labeling::blobs(mask, c8(), nlabels); + labeled_image<lbl_t> lbl(lbl_, nlabels); + + debug::internal::filename_prefix = "split"; + + for (unsigned i = 1; i < lbl.nlabels(); ++i) + io::pgm::save(input | lbl.bbox(i), debug::filename("object.pgm")); + +} -- 1.5.6.5
participants (1)
-
Guillaume Lazzara