
* mln/io/pdf/get_header.hh, * mln/io/pdf/load.hh, * mln/tests/io/pdf/Makefile.am, * mln/tests/io/pdf/load.cc: New. * mln/tests/io/Makefile.am: Add pdf/ subdir. --- milena/ChangeLog | 11 + milena/mln/io/{dicom => pdf}/get_header.hh | 85 ++++--- milena/mln/io/pdf/load.hh | 335 ++++++++++++++++++++++++++++ milena/tests/io/Makefile.am | 6 +- milena/tests/io/{pbms => pdf}/Makefile.am | 9 +- milena/tests/io/pdf/load.cc | 88 ++++++++ 6 files changed, 491 insertions(+), 43 deletions(-) copy milena/mln/io/{dicom => pdf}/get_header.hh (53%) create mode 100644 milena/mln/io/pdf/load.hh copy milena/tests/io/{pbms => pdf}/Makefile.am (82%) create mode 100644 milena/tests/io/pdf/load.cc diff --git a/milena/ChangeLog b/milena/ChangeLog index 4d53dd4..2b19aec 100644 --- a/milena/ChangeLog +++ b/milena/ChangeLog @@ -1,3 +1,14 @@ +2013-02-28 Guillaume Lazzara <z@lrde.epita.fr> + + Add support for loading multiple pages PDF files using libpoppler. + + * mln/io/pdf/get_header.hh, + * mln/io/pdf/load.hh, + * mln/tests/io/pdf/Makefile.am, + * mln/tests/io/pdf/load.cc: New. + + * mln/tests/io/Makefile.am: Add pdf/ subdir. + 2013-02-26 Guillaume Lazzara <z@lrde.epita.fr> * tests/unit_test/unit-tests.mk: Update. diff --git a/milena/mln/io/dicom/get_header.hh b/milena/mln/io/pdf/get_header.hh similarity index 53% copy from milena/mln/io/dicom/get_header.hh copy to milena/mln/io/pdf/get_header.hh index 8ebe375..1969689 100644 --- a/milena/mln/io/dicom/get_header.hh +++ b/milena/mln/io/pdf/get_header.hh @@ -1,4 +1,4 @@ -// Copyright (C) 2010 EPITA Research and Development Laboratory (LRDE) +// Copyright (C) 2013 EPITA Research and Development Laboratory (LRDE) // // This file is part of Olena. // @@ -23,22 +23,16 @@ // exception does not however invalidate any other reasons why the // executable file might be covered by the GNU General Public License. -#ifndef MLN_IO_DICOM_GET_HEADER_HH -# define MLN_IO_DICOM_GET_HEADER_HH +#ifndef MLN_IO_PDF_GET_HEADER_HH +# define MLN_IO_PDF_GET_HEADER_HH /// \file /// -/// Load a DICOM file header. +/// Load PDF header. # include <iostream> # include <fstream> -# include <gdcm-2.0/gdcmReader.h> -# include <gdcm-2.0/gdcmImageReader.h> -# include <gdcm-2.0/gdcmWriter.h> -# include <gdcm-2.0/gdcmDataSet.h> -# include <gdcm-2.0/gdcmAttribute.h> - # include <mln/core/concept/image.hh> # include <mln/core/routine/initialize.hh> # include <mln/core/box_runstart_piter.hh> @@ -52,63 +46,78 @@ namespace mln namespace io { - namespace dicom + namespace pdf { - /// Store dicom file header. - struct dicom_header + /// Store pdf file header. + struct pdf_header { - // The number of dimensions. - unsigned dim; - - // The size in each dimension. - util::array<unsigned> size; + pdf_header(const poppler::document *doc); + + int page_count; + int pdf_version_major; + int pdf_version_minor; + bool is_encrypted; + bool is_linearized; + bool is_locked; + std::string metadata; }; - /// Retrieve header in a dicom file. - dicom_header get_header(const std::string& filename); + /// Retrieve header in a pdf file. + pdf_header get_header(const std::string& filename); # ifndef MLN_INCLUDE_ONLY + // Begin of pdf_header implementation. - dicom_header get_header(const std::string& filename) + pdf_header::pdf_header(const poppler::document *doc) { - trace::entering("mln::io::dicom::get_header"); + page_count = doc->pages(); + doc->get_pdf_version(&pdf_version_major, 0); + doc->get_pdf_version(0, &pdf_version_minor); + is_encrypted = doc->is_encrypted(); + is_linearized = doc->is_linearized(); + is_locked = doc->is_locked(); + metadata = doc->metadata().to_latin1(); + } + + // End of pdf_header implementation. - dicom_header header; - gdcm::ImageReader r; - r.SetFileName(filename.c_str()); - if (!r.Read()) + + inline + pdf_header get_header(const std::string& filename) + { + trace::entering("mln::io::pdf::get_header"); + + // Load document + poppler::document *pdf = poppler::document::load_from_file(filename); + if (pdf == 0) { - std::cerr << "error: cannot open file '" << filename << "'!"; + std::cerr << "Error: Cannot load PDF " << filename << std::endl; abort(); } - gdcm::Image& image = r.GetImage(); - - header.dim = image.GetNumberOfDimensions(); - const unsigned int* dims = image.GetDimensions(); + // Initialize structure with pdf information. + pdf_header header(pdf); - for (unsigned i = 2; i < header.dim; ++i) - header.size.append(dims[i]); // sli, ... - for (unsigned i = 0; i < 2; ++i) - header.size.append(dims[i]); // row, col + // Clear pdf document. + delete pdf; - trace::exiting("mln::io::dicom::get_header"); + trace::exiting("mln::io::pdf::get_header"); return header; } # endif // ! MLN_INCLUDE_ONLY - } // end of namespace mln::io::dicom + } // end of namespace mln::io::pdf } // end of namespace mln::io } // end of namespace mln -#endif // ! MLN_IO_DICOM_GET_HEADER_HH +#endif // ! MLN_IO_PDF_GET_HEADER_HH diff --git a/milena/mln/io/pdf/load.hh b/milena/mln/io/pdf/load.hh new file mode 100644 index 0000000..513a61d --- /dev/null +++ b/milena/mln/io/pdf/load.hh @@ -0,0 +1,335 @@ +// Copyright (C) 2013 EPITA Research and Development Laboratory (LRDE) +// +// This file is part of Olena. +// +// Olena is free software: you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free +// Software Foundation, version 2 of the License. +// +// Olena is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Olena. If not, see <http://www.gnu.org/licenses/>. +// +// As a special exception, you may use this file as part of a free +// software project without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to produce +// an executable, this file does not by itself cause the resulting +// executable to be covered by the GNU General Public License. This +// exception does not however invalidate any other reasons why the +// executable file might be covered by the GNU General Public License. + +#ifndef MLN_IO_PDF_LOAD_HH +# define MLN_IO_PDF_LOAD_HH + +/// \file +/// +/// \brief Load a multiple page PDF document using libpoppler. + +#include <iostream> +#include <mln/core/image/image2d.hh> +#include <poppler/cpp/poppler-document.h> +#include <poppler/cpp/poppler-page-renderer.h> + + +namespace mln +{ + + namespace io + { + + namespace pdf + { + + /**! Load a multiple page PDF document using libpoppler. + + \param[out] arr An array of images. + \param[in] filename The name of the input file. + \param[in] dpi Document resolution. + + Page numbering starts from 0. + */ + template <typename I> + void load(util::array<I>& arr, const std::string& filename, + float dpi); + + /**! Load a range of pages from a PDF document using libpoppler. + + \param[out] arr An array of images. + \param[in] filename The name of the input file. + \param[in] first_page First page number. + \param[in] last_page Last page number. + \param[in] dpi Document resolution. + + Page numbering starts from 0. + */ + template <typename I> + void load(util::array<I>& arr, const std::string& filename, + int first_page, int last_page, float dpi); + + + /**! Load specific pages from a PDF document using libpoppler. + + \param[out] arr An array of images. + \param[in] filename The name of the input file. + \param[in] pages An array of pages numbers. + \param[in] dpi Document resolution. + + Page numbering starts from 0. + */ + template <typename I> + void load(util::array<I>& arr, const std::string& filename, + util::array<int> pages, float dpi); + + /**! Load a specific page from a PDF document using libpoppler. + + \param[out] ima An image. + \param[in] filename The name of the input file. + \param[in] page The page number. + \param[in] dpi Document resolution. + + Page numbering starts from 0. + */ + template <typename I> + void load(Image<I>& ima, const std::string& filename, + int page, float dpi); + + + /**! \overload Load a multiple page PDF document using libpoppler. + + \param[out] arr An array of images. + \param[in] filename The name of the input file. + + DPI resolution is set to 300. + Page numbering starts from 0. + */ + template <typename I> + void load(util::array<I>& arr, const std::string& filename); + + /**! \overload Load specific pages from a PDF document using + libpoppler. + + \param[out] arr An array of images. + \param[in] filename The name of the input file. + \param[in] pages An array of pages numbers. + + DPI resolution is set to 300. + Page numbering starts from 0. + */ + template <typename I> + void load(util::array<I>& arr, const std::string& filename, + util::array<int> pages); + + +# ifndef MLN_INCLUDE_ONLY + + namespace internal + { + + template <typename I> + void copy_data_argb32(I& ima, const poppler::image& pima) + { + mln_value(I) *vima = &ima(ima.domain().pmin()); + const unsigned char * + nsites = (unsigned char *)pima.const_data() + + pima.width() * pima.height() * 4; + + unsigned next = 4; + for (const unsigned char *v = (unsigned char *)pima.const_data(); + v < nsites; v += next, ++vima) + { + vima->red() = *(v + 2); + vima->green() = *(v + 1); + vima->blue() = *v; + } + } + + template <typename I> + void copy_data_rgb24(I& ima, const poppler::image& pima) + { + mln_value(I) *vima = &ima(ima.domain().pmin()); + const unsigned char * + nsites = (unsigned char *)pima.const_data() + + pima.width() * pima.height() * 3; + + unsigned next = 3; + for (const unsigned char *v = (unsigned char *)pima.const_data(); + v < nsites; v += next, ++vima) + { + vima->red() = *v; + vima->green() = *(v + 1); + vima->blue() = *(v + 2); + } + } + + + template <typename I> + void copy_data(I& ima, const poppler::image& pima) + { + mln_precondition(ima.is_valid()); + + switch(pima.format()) + { + case poppler::image::format_invalid: + case poppler::image::format_mono: + std::cout << "Loading black and white pdf is not implemented!"; + abort(); + break; + case poppler::image::format_rgb24: + copy_data_rgb24(ima, pima); + break; + case poppler::image::format_argb32: + copy_data_argb32(ima, pima); + break; + } + } + + + template <typename I> + void extract_page(const poppler::document* pdf, int i, + util::array<I>& arr, float dpi) + { + // Read page + poppler::page* p = pdf->create_page(i); + + // Render Page + poppler::page_renderer pr; + poppler::image pima = pr.render_page(p, dpi, dpi); + + // Prepare MLN image. + I ima(make::box2d(pima.height(), pima.width()), 0); + mln_assertion(ima.is_valid()); + + // Copy data + internal::copy_data(ima, pima); + + // Append result + arr.append(ima); + } + + } // end of namespace mln::io::pdf::internal + + + template <typename I> + void load(util::array<I>& arr, const std::string& filename, float dpi) + { + trace::entering("mln::io::pdf::load"); + + // Load document + poppler::document *pdf = poppler::document::load_from_file(filename); + if (pdf == 0) + { + std::cerr << "Error: Cannot load pdf " << filename << std::endl; + abort(); + } + + for (int i = 0; i < pdf->pages(); ++i) + internal::extract_page(pdf, i, arr, dpi); + + delete pdf; + + trace::exiting("mln::io::pdf::load"); + } + + template <typename I> + void load(util::array<I>& arr, const std::string& filename, + int first_page, int last_page, float dpi) + { + trace::entering("mln::io::pdf::load"); + + // Load document + poppler::document *pdf = poppler::document::load_from_file(filename); + if (pdf == 0) + { + std::cerr << "Error: Cannot load PDF " << filename << std::endl; + abort(); + } + + if (first_page < 0 || first_page > (pdf->pages() - 1) + || last_page < 0 || last_page > (pdf->pages() - 1)) + { + std::cout << "Error while loading PDF: page range is not correct!" + << std::endl; + abort(); + } + + for (int i = first_page; i <= last_page; ++i) + internal::extract_page(pdf, i, arr, dpi); + + delete pdf; + + trace::exiting("mln::io::pdf::load"); + } + + + template <typename I> + void load(util::array<I>& arr, const std::string& filename, + util::array<int> pages, float dpi) + { + trace::entering("mln::io::pdf::load"); + + // Load document + poppler::document *pdf = poppler::document::load_from_file(filename); + if (pdf == 0) + { + std::cerr << "Error: Cannot load PDF " << filename << std::endl; + abort(); + } + + for (unsigned i = 0; i < pages.size(); ++i) + { + if (pages[i] < 0 || pages[i] > pdf->pages() - 1) + { + std::cout << "Error while loading PDF: selected page " + << pages[i] << " does not exist!" << std::endl; + abort(); + } + + internal::extract_page(pdf, pages[i], arr, dpi); + } + + delete pdf; + + trace::exiting("mln::io::pdf::load"); + } + + + template <typename I> + void load(Image<I>& ima, const std::string& filename, + int page, float dpi) + { + util::array<I> arr; + load(arr, filename, page, page, dpi); + mln_assertion(arr.size() == 1); + exact(ima) = arr[0]; + } + + template <typename I> + void load(util::array<I>& arr, const std::string& filename) + { + load(arr, filename, 300); + } + + + template <typename I> + void load(util::array<I>& arr, const std::string& filename, + util::array<int> pages) + { + load(arr, filename, pages, 300); + } + + +# endif // ! MLN_INCLUDE_ONLY + + } // end of namespace mln::io::pdf + + } // end of namespace mln::io + +} // end of namespace mln + + +#endif // ! MLN_IO_PDF_LOAD_HH diff --git a/milena/tests/io/Makefile.am b/milena/tests/io/Makefile.am index bd6c0a4..1859b6a 100644 --- a/milena/tests/io/Makefile.am +++ b/milena/tests/io/Makefile.am @@ -1,4 +1,4 @@ -# Copyright (C) 2007, 2008, 2009 EPITA Research and Development +# Copyright (C) 2007, 2008, 2009, 2013 EPITA Research and Development # Laboratory (LRDE). # # This file is part of Olena. @@ -53,3 +53,7 @@ endif HAVE_MAGICKXX if HAVE_TIFF SUBDIRS += tiff endif HAVE_TIFF + +if HAVE_POPPLER + SUBDIRS += pdf +endif HAVE_POPPLER \ No newline at end of file diff --git a/milena/tests/io/pbms/Makefile.am b/milena/tests/io/pdf/Makefile.am similarity index 82% copy from milena/tests/io/pbms/Makefile.am copy to milena/tests/io/pdf/Makefile.am index 4d54371..0144098 100644 --- a/milena/tests/io/pbms/Makefile.am +++ b/milena/tests/io/pdf/Makefile.am @@ -1,4 +1,4 @@ -# Copyright (C) 2009, 2010 EPITA Research and Development Laboratory (LRDE). +# Copyright (C) 2013 EPITA Research and Development Laboratory (LRDE). # # This file is part of Olena. # @@ -16,10 +16,11 @@ include $(top_srcdir)/milena/tests/tests.mk +AM_CPPFLAGS += $(POPPLER_CPPFLAGS) +AM_LDFLAGS = $(POPPLER_LDFLAGS) + check_PROGRAMS = \ - load + load TESTS = $(check_PROGRAMS) - -MOSTLYCLEANFILES = pbms-out.pbm diff --git a/milena/tests/io/pdf/load.cc b/milena/tests/io/pdf/load.cc new file mode 100644 index 0000000..c517784 --- /dev/null +++ b/milena/tests/io/pdf/load.cc @@ -0,0 +1,88 @@ +// Copyright (C) 2013 EPITA Research and Development Laboratory (LRDE) +// +// This file is part of Olena. +// +// Olena is free software: you can redistribute it and/or modify it under +// the terms of the GNU General Public License as published by the Free +// Software Foundation, version 2 of the License. +// +// Olena is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with Olena. If not, see <http://www.gnu.org/licenses/>. +// +// As a special exception, you may use this file as part of a free +// software project without restriction. Specifically, if other files +// instantiate templates or use macros or inline functions from this +// file, or you compile this file and link it with other files to produce +// an executable, this file does not by itself cause the resulting +// executable to be covered by the GNU General Public License. This +// exception does not however invalidate any other reasons why the +// executable file might be covered by the GNU General Public License. + +#include <mln/core/image/image2d.hh> +#include <mln/data/compare.hh> +#include <mln/value/rgb8.hh> +#include <mln/io/ppm/load.hh> +#include <mln/io/pdf/load.hh> +#include "tests/data.hh" + +int main() +{ + using namespace mln; + + image2d<value::rgb8> page0, page1, page2, page3; + + io::ppm::load(page0, MLN_TESTS_IMG_DIR "/example-0.ppm"); + io::ppm::load(page1, MLN_TESTS_IMG_DIR "/example-1.ppm"); + io::ppm::load(page2, MLN_TESTS_IMG_DIR "/example-2.ppm"); + io::ppm::load(page3, MLN_TESTS_IMG_DIR "/example-3.ppm"); + + // Loading full PDF. + { + util::array<image2d<value::rgb8> > arr; + io::pdf::load(arr, MLN_TESTS_IMG_DIR "/example.pdf", 75); + + mln_assertion(arr.size() == 4); + mln_assertion(arr[0] == page0); + mln_assertion(arr[1] == page1); + mln_assertion(arr[2] == page2); + mln_assertion(arr[3] == page3); + } + + // Loading a page range + { + util::array<image2d<value::rgb8> > arr; + io::pdf::load(arr, MLN_TESTS_IMG_DIR "/example.pdf", 1, 2, 75); + + mln_assertion(arr.size() == 2); + mln_assertion(arr[0] == page1); + mln_assertion(arr[1] == page2); + } + + // Loading specific pages. + { + util::array<int> pages; + pages.append(1); + pages.append(3); + util::array<image2d<value::rgb8> > arr; + io::pdf::load(arr, MLN_TESTS_IMG_DIR "/example.pdf", pages, 75); + + mln_assertion(arr.size() == 2); + mln_assertion(arr[0] == page1); + mln_assertion(arr[1] == page3); + } + + // Loading a specific page. + { + image2d<value::rgb8> ima; + io::pdf::load(ima, MLN_TESTS_IMG_DIR "/example.pdf", 3, 75); + + mln_assertion(ima.is_valid()); + mln_assertion(ima == page3); + } + +} -- 1.7.2.5