* mln/io/pdf/get_header.hh,
* mln/io/pdf/load.hh,
* mln/tests/io/pdf/Makefile.am,
* mln/tests/io/pdf/load.cc: New.
* mln/tests/io/Makefile.am: Add pdf/ subdir.
---
milena/ChangeLog | 11 +
milena/mln/io/{dicom => pdf}/get_header.hh | 85 ++++---
milena/mln/io/pdf/load.hh | 335 ++++++++++++++++++++++++++++
milena/tests/io/Makefile.am | 6 +-
milena/tests/io/{pbms => pdf}/Makefile.am | 9 +-
milena/tests/io/pdf/load.cc | 88 ++++++++
6 files changed, 491 insertions(+), 43 deletions(-)
copy milena/mln/io/{dicom => pdf}/get_header.hh (53%)
create mode 100644 milena/mln/io/pdf/load.hh
copy milena/tests/io/{pbms => pdf}/Makefile.am (82%)
create mode 100644 milena/tests/io/pdf/load.cc
diff --git a/milena/ChangeLog b/milena/ChangeLog
index 4d53dd4..2b19aec 100644
--- a/milena/ChangeLog
+++ b/milena/ChangeLog
@@ -1,3 +1,14 @@
+2013-02-28 Guillaume Lazzara <z(a)lrde.epita.fr>
+
+ Add support for loading multiple pages PDF files using libpoppler.
+
+ * mln/io/pdf/get_header.hh,
+ * mln/io/pdf/load.hh,
+ * mln/tests/io/pdf/Makefile.am,
+ * mln/tests/io/pdf/load.cc: New.
+
+ * mln/tests/io/Makefile.am: Add pdf/ subdir.
+
2013-02-26 Guillaume Lazzara <z(a)lrde.epita.fr>
* tests/unit_test/unit-tests.mk: Update.
diff --git a/milena/mln/io/dicom/get_header.hh b/milena/mln/io/pdf/get_header.hh
similarity index 53%
copy from milena/mln/io/dicom/get_header.hh
copy to milena/mln/io/pdf/get_header.hh
index 8ebe375..1969689 100644
--- a/milena/mln/io/dicom/get_header.hh
+++ b/milena/mln/io/pdf/get_header.hh
@@ -1,4 +1,4 @@
-// Copyright (C) 2010 EPITA Research and Development Laboratory (LRDE)
+// Copyright (C) 2013 EPITA Research and Development Laboratory (LRDE)
//
// This file is part of Olena.
//
@@ -23,22 +23,16 @@
// exception does not however invalidate any other reasons why the
// executable file might be covered by the GNU General Public License.
-#ifndef MLN_IO_DICOM_GET_HEADER_HH
-# define MLN_IO_DICOM_GET_HEADER_HH
+#ifndef MLN_IO_PDF_GET_HEADER_HH
+# define MLN_IO_PDF_GET_HEADER_HH
/// \file
///
-/// Load a DICOM file header.
+/// Load PDF header.
# include <iostream>
# include <fstream>
-# include <gdcm-2.0/gdcmReader.h>
-# include <gdcm-2.0/gdcmImageReader.h>
-# include <gdcm-2.0/gdcmWriter.h>
-# include <gdcm-2.0/gdcmDataSet.h>
-# include <gdcm-2.0/gdcmAttribute.h>
-
# include <mln/core/concept/image.hh>
# include <mln/core/routine/initialize.hh>
# include <mln/core/box_runstart_piter.hh>
@@ -52,63 +46,78 @@ namespace mln
namespace io
{
- namespace dicom
+ namespace pdf
{
- /// Store dicom file header.
- struct dicom_header
+ /// Store pdf file header.
+ struct pdf_header
{
- // The number of dimensions.
- unsigned dim;
-
- // The size in each dimension.
- util::array<unsigned> size;
+ pdf_header(const poppler::document *doc);
+
+ int page_count;
+ int pdf_version_major;
+ int pdf_version_minor;
+ bool is_encrypted;
+ bool is_linearized;
+ bool is_locked;
+ std::string metadata;
};
- /// Retrieve header in a dicom file.
- dicom_header get_header(const std::string& filename);
+ /// Retrieve header in a pdf file.
+ pdf_header get_header(const std::string& filename);
# ifndef MLN_INCLUDE_ONLY
+ // Begin of pdf_header implementation.
- dicom_header get_header(const std::string& filename)
+ pdf_header::pdf_header(const poppler::document *doc)
{
- trace::entering("mln::io::dicom::get_header");
+ page_count = doc->pages();
+ doc->get_pdf_version(&pdf_version_major, 0);
+ doc->get_pdf_version(0, &pdf_version_minor);
+ is_encrypted = doc->is_encrypted();
+ is_linearized = doc->is_linearized();
+ is_locked = doc->is_locked();
+ metadata = doc->metadata().to_latin1();
+ }
+
+ // End of pdf_header implementation.
- dicom_header header;
- gdcm::ImageReader r;
- r.SetFileName(filename.c_str());
- if (!r.Read())
+
+ inline
+ pdf_header get_header(const std::string& filename)
+ {
+ trace::entering("mln::io::pdf::get_header");
+
+ // Load document
+ poppler::document *pdf = poppler::document::load_from_file(filename);
+ if (pdf == 0)
{
- std::cerr << "error: cannot open file '" << filename
<< "'!";
+ std::cerr << "Error: Cannot load PDF " << filename <<
std::endl;
abort();
}
- gdcm::Image& image = r.GetImage();
-
- header.dim = image.GetNumberOfDimensions();
- const unsigned int* dims = image.GetDimensions();
+ // Initialize structure with pdf information.
+ pdf_header header(pdf);
- for (unsigned i = 2; i < header.dim; ++i)
- header.size.append(dims[i]); // sli, ...
- for (unsigned i = 0; i < 2; ++i)
- header.size.append(dims[i]); // row, col
+ // Clear pdf document.
+ delete pdf;
- trace::exiting("mln::io::dicom::get_header");
+ trace::exiting("mln::io::pdf::get_header");
return header;
}
# endif // ! MLN_INCLUDE_ONLY
- } // end of namespace mln::io::dicom
+ } // end of namespace mln::io::pdf
} // end of namespace mln::io
} // end of namespace mln
-#endif // ! MLN_IO_DICOM_GET_HEADER_HH
+#endif // ! MLN_IO_PDF_GET_HEADER_HH
diff --git a/milena/mln/io/pdf/load.hh b/milena/mln/io/pdf/load.hh
new file mode 100644
index 0000000..513a61d
--- /dev/null
+++ b/milena/mln/io/pdf/load.hh
@@ -0,0 +1,335 @@
+// Copyright (C) 2013 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#ifndef MLN_IO_PDF_LOAD_HH
+# define MLN_IO_PDF_LOAD_HH
+
+/// \file
+///
+/// \brief Load a multiple page PDF document using libpoppler.
+
+#include <iostream>
+#include <mln/core/image/image2d.hh>
+#include <poppler/cpp/poppler-document.h>
+#include <poppler/cpp/poppler-page-renderer.h>
+
+
+namespace mln
+{
+
+ namespace io
+ {
+
+ namespace pdf
+ {
+
+ /**! Load a multiple page PDF document using libpoppler.
+
+ \param[out] arr An array of images.
+ \param[in] filename The name of the input file.
+ \param[in] dpi Document resolution.
+
+ Page numbering starts from 0.
+ */
+ template <typename I>
+ void load(util::array<I>& arr, const std::string& filename,
+ float dpi);
+
+ /**! Load a range of pages from a PDF document using libpoppler.
+
+ \param[out] arr An array of images.
+ \param[in] filename The name of the input file.
+ \param[in] first_page First page number.
+ \param[in] last_page Last page number.
+ \param[in] dpi Document resolution.
+
+ Page numbering starts from 0.
+ */
+ template <typename I>
+ void load(util::array<I>& arr, const std::string& filename,
+ int first_page, int last_page, float dpi);
+
+
+ /**! Load specific pages from a PDF document using libpoppler.
+
+ \param[out] arr An array of images.
+ \param[in] filename The name of the input file.
+ \param[in] pages An array of pages numbers.
+ \param[in] dpi Document resolution.
+
+ Page numbering starts from 0.
+ */
+ template <typename I>
+ void load(util::array<I>& arr, const std::string& filename,
+ util::array<int> pages, float dpi);
+
+ /**! Load a specific page from a PDF document using libpoppler.
+
+ \param[out] ima An image.
+ \param[in] filename The name of the input file.
+ \param[in] page The page number.
+ \param[in] dpi Document resolution.
+
+ Page numbering starts from 0.
+ */
+ template <typename I>
+ void load(Image<I>& ima, const std::string& filename,
+ int page, float dpi);
+
+
+ /**! \overload Load a multiple page PDF document using libpoppler.
+
+ \param[out] arr An array of images.
+ \param[in] filename The name of the input file.
+
+ DPI resolution is set to 300.
+ Page numbering starts from 0.
+ */
+ template <typename I>
+ void load(util::array<I>& arr, const std::string& filename);
+
+ /**! \overload Load specific pages from a PDF document using
+ libpoppler.
+
+ \param[out] arr An array of images.
+ \param[in] filename The name of the input file.
+ \param[in] pages An array of pages numbers.
+
+ DPI resolution is set to 300.
+ Page numbering starts from 0.
+ */
+ template <typename I>
+ void load(util::array<I>& arr, const std::string& filename,
+ util::array<int> pages);
+
+
+# ifndef MLN_INCLUDE_ONLY
+
+ namespace internal
+ {
+
+ template <typename I>
+ void copy_data_argb32(I& ima, const poppler::image& pima)
+ {
+ mln_value(I) *vima = &ima(ima.domain().pmin());
+ const unsigned char *
+ nsites = (unsigned char *)pima.const_data()
+ + pima.width() * pima.height() * 4;
+
+ unsigned next = 4;
+ for (const unsigned char *v = (unsigned char *)pima.const_data();
+ v < nsites; v += next, ++vima)
+ {
+ vima->red() = *(v + 2);
+ vima->green() = *(v + 1);
+ vima->blue() = *v;
+ }
+ }
+
+ template <typename I>
+ void copy_data_rgb24(I& ima, const poppler::image& pima)
+ {
+ mln_value(I) *vima = &ima(ima.domain().pmin());
+ const unsigned char *
+ nsites = (unsigned char *)pima.const_data()
+ + pima.width() * pima.height() * 3;
+
+ unsigned next = 3;
+ for (const unsigned char *v = (unsigned char *)pima.const_data();
+ v < nsites; v += next, ++vima)
+ {
+ vima->red() = *v;
+ vima->green() = *(v + 1);
+ vima->blue() = *(v + 2);
+ }
+ }
+
+
+ template <typename I>
+ void copy_data(I& ima, const poppler::image& pima)
+ {
+ mln_precondition(ima.is_valid());
+
+ switch(pima.format())
+ {
+ case poppler::image::format_invalid:
+ case poppler::image::format_mono:
+ std::cout << "Loading black and white pdf is not implemented!";
+ abort();
+ break;
+ case poppler::image::format_rgb24:
+ copy_data_rgb24(ima, pima);
+ break;
+ case poppler::image::format_argb32:
+ copy_data_argb32(ima, pima);
+ break;
+ }
+ }
+
+
+ template <typename I>
+ void extract_page(const poppler::document* pdf, int i,
+ util::array<I>& arr, float dpi)
+ {
+ // Read page
+ poppler::page* p = pdf->create_page(i);
+
+ // Render Page
+ poppler::page_renderer pr;
+ poppler::image pima = pr.render_page(p, dpi, dpi);
+
+ // Prepare MLN image.
+ I ima(make::box2d(pima.height(), pima.width()), 0);
+ mln_assertion(ima.is_valid());
+
+ // Copy data
+ internal::copy_data(ima, pima);
+
+ // Append result
+ arr.append(ima);
+ }
+
+ } // end of namespace mln::io::pdf::internal
+
+
+ template <typename I>
+ void load(util::array<I>& arr, const std::string& filename, float
dpi)
+ {
+ trace::entering("mln::io::pdf::load");
+
+ // Load document
+ poppler::document *pdf = poppler::document::load_from_file(filename);
+ if (pdf == 0)
+ {
+ std::cerr << "Error: Cannot load pdf " << filename <<
std::endl;
+ abort();
+ }
+
+ for (int i = 0; i < pdf->pages(); ++i)
+ internal::extract_page(pdf, i, arr, dpi);
+
+ delete pdf;
+
+ trace::exiting("mln::io::pdf::load");
+ }
+
+ template <typename I>
+ void load(util::array<I>& arr, const std::string& filename,
+ int first_page, int last_page, float dpi)
+ {
+ trace::entering("mln::io::pdf::load");
+
+ // Load document
+ poppler::document *pdf = poppler::document::load_from_file(filename);
+ if (pdf == 0)
+ {
+ std::cerr << "Error: Cannot load PDF " << filename <<
std::endl;
+ abort();
+ }
+
+ if (first_page < 0 || first_page > (pdf->pages() - 1)
+ || last_page < 0 || last_page > (pdf->pages() - 1))
+ {
+ std::cout << "Error while loading PDF: page range is not correct!"
+ << std::endl;
+ abort();
+ }
+
+ for (int i = first_page; i <= last_page; ++i)
+ internal::extract_page(pdf, i, arr, dpi);
+
+ delete pdf;
+
+ trace::exiting("mln::io::pdf::load");
+ }
+
+
+ template <typename I>
+ void load(util::array<I>& arr, const std::string& filename,
+ util::array<int> pages, float dpi)
+ {
+ trace::entering("mln::io::pdf::load");
+
+ // Load document
+ poppler::document *pdf = poppler::document::load_from_file(filename);
+ if (pdf == 0)
+ {
+ std::cerr << "Error: Cannot load PDF " << filename <<
std::endl;
+ abort();
+ }
+
+ for (unsigned i = 0; i < pages.size(); ++i)
+ {
+ if (pages[i] < 0 || pages[i] > pdf->pages() - 1)
+ {
+ std::cout << "Error while loading PDF: selected page "
+ << pages[i] << " does not exist!" << std::endl;
+ abort();
+ }
+
+ internal::extract_page(pdf, pages[i], arr, dpi);
+ }
+
+ delete pdf;
+
+ trace::exiting("mln::io::pdf::load");
+ }
+
+
+ template <typename I>
+ void load(Image<I>& ima, const std::string& filename,
+ int page, float dpi)
+ {
+ util::array<I> arr;
+ load(arr, filename, page, page, dpi);
+ mln_assertion(arr.size() == 1);
+ exact(ima) = arr[0];
+ }
+
+ template <typename I>
+ void load(util::array<I>& arr, const std::string& filename)
+ {
+ load(arr, filename, 300);
+ }
+
+
+ template <typename I>
+ void load(util::array<I>& arr, const std::string& filename,
+ util::array<int> pages)
+ {
+ load(arr, filename, pages, 300);
+ }
+
+
+# endif // ! MLN_INCLUDE_ONLY
+
+ } // end of namespace mln::io::pdf
+
+ } // end of namespace mln::io
+
+} // end of namespace mln
+
+
+#endif // ! MLN_IO_PDF_LOAD_HH
diff --git a/milena/tests/io/Makefile.am b/milena/tests/io/Makefile.am
index bd6c0a4..1859b6a 100644
--- a/milena/tests/io/Makefile.am
+++ b/milena/tests/io/Makefile.am
@@ -1,4 +1,4 @@
-# Copyright (C) 2007, 2008, 2009 EPITA Research and Development
+# Copyright (C) 2007, 2008, 2009, 2013 EPITA Research and Development
# Laboratory (LRDE).
#
# This file is part of Olena.
@@ -53,3 +53,7 @@ endif HAVE_MAGICKXX
if HAVE_TIFF
SUBDIRS += tiff
endif HAVE_TIFF
+
+if HAVE_POPPLER
+ SUBDIRS += pdf
+endif HAVE_POPPLER
\ No newline at end of file
diff --git a/milena/tests/io/pbms/Makefile.am b/milena/tests/io/pdf/Makefile.am
similarity index 82%
copy from milena/tests/io/pbms/Makefile.am
copy to milena/tests/io/pdf/Makefile.am
index 4d54371..0144098 100644
--- a/milena/tests/io/pbms/Makefile.am
+++ b/milena/tests/io/pdf/Makefile.am
@@ -1,4 +1,4 @@
-# Copyright (C) 2009, 2010 EPITA Research and Development Laboratory (LRDE).
+# Copyright (C) 2013 EPITA Research and Development Laboratory (LRDE).
#
# This file is part of Olena.
#
@@ -16,10 +16,11 @@
include $(top_srcdir)/milena/tests/tests.mk
+AM_CPPFLAGS += $(POPPLER_CPPFLAGS)
+AM_LDFLAGS = $(POPPLER_LDFLAGS)
+
check_PROGRAMS = \
- load
+ load
TESTS = $(check_PROGRAMS)
-
-MOSTLYCLEANFILES = pbms-out.pbm
diff --git a/milena/tests/io/pdf/load.cc b/milena/tests/io/pdf/load.cc
new file mode 100644
index 0000000..c517784
--- /dev/null
+++ b/milena/tests/io/pdf/load.cc
@@ -0,0 +1,88 @@
+// Copyright (C) 2013 EPITA Research and Development Laboratory (LRDE)
+//
+// This file is part of Olena.
+//
+// Olena is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free
+// Software Foundation, version 2 of the License.
+//
+// Olena is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Olena. If not, see <http://www.gnu.org/licenses/>.
+//
+// As a special exception, you may use this file as part of a free
+// software project without restriction. Specifically, if other files
+// instantiate templates or use macros or inline functions from this
+// file, or you compile this file and link it with other files to produce
+// an executable, this file does not by itself cause the resulting
+// executable to be covered by the GNU General Public License. This
+// exception does not however invalidate any other reasons why the
+// executable file might be covered by the GNU General Public License.
+
+#include <mln/core/image/image2d.hh>
+#include <mln/data/compare.hh>
+#include <mln/value/rgb8.hh>
+#include <mln/io/ppm/load.hh>
+#include <mln/io/pdf/load.hh>
+#include "tests/data.hh"
+
+int main()
+{
+ using namespace mln;
+
+ image2d<value::rgb8> page0, page1, page2, page3;
+
+ io::ppm::load(page0, MLN_TESTS_IMG_DIR "/example-0.ppm");
+ io::ppm::load(page1, MLN_TESTS_IMG_DIR "/example-1.ppm");
+ io::ppm::load(page2, MLN_TESTS_IMG_DIR "/example-2.ppm");
+ io::ppm::load(page3, MLN_TESTS_IMG_DIR "/example-3.ppm");
+
+ // Loading full PDF.
+ {
+ util::array<image2d<value::rgb8> > arr;
+ io::pdf::load(arr, MLN_TESTS_IMG_DIR "/example.pdf", 75);
+
+ mln_assertion(arr.size() == 4);
+ mln_assertion(arr[0] == page0);
+ mln_assertion(arr[1] == page1);
+ mln_assertion(arr[2] == page2);
+ mln_assertion(arr[3] == page3);
+ }
+
+ // Loading a page range
+ {
+ util::array<image2d<value::rgb8> > arr;
+ io::pdf::load(arr, MLN_TESTS_IMG_DIR "/example.pdf", 1, 2, 75);
+
+ mln_assertion(arr.size() == 2);
+ mln_assertion(arr[0] == page1);
+ mln_assertion(arr[1] == page2);
+ }
+
+ // Loading specific pages.
+ {
+ util::array<int> pages;
+ pages.append(1);
+ pages.append(3);
+ util::array<image2d<value::rgb8> > arr;
+ io::pdf::load(arr, MLN_TESTS_IMG_DIR "/example.pdf", pages, 75);
+
+ mln_assertion(arr.size() == 2);
+ mln_assertion(arr[0] == page1);
+ mln_assertion(arr[1] == page3);
+ }
+
+ // Loading a specific page.
+ {
+ image2d<value::rgb8> ima;
+ io::pdf::load(ima, MLN_TESTS_IMG_DIR "/example.pdf", 3, 75);
+
+ mln_assertion(ima.is_valid());
+ mln_assertion(ima == page3);
+ }
+
+}
--
1.7.2.5