last-svn-commit-572-ge91a188 Output change in xml_tranform.

* arthur/xml_transform/README * arthur/xml_transform/templates/opendoc/xsl.xsl * arthur/xml_transform/templates/pdf/main.xsl * arthur/xml_transform/templates/pdf/regions.xsl * arthur/xml_transform/image_crop.cc, * arthur/xml_transform/image_crop.hh: Change ppm loader to magick loader. * arthur/xml_transform/main.cc: Change man. * arthur/xml_transform/templates/pdf/main_crop_64.xsl, * arthur/xml_transform/templates/pdf/regions_base64.xsl, * arthur/xml_transform/templates/pdf/regions_png.xsl, * arthur/xml_transform/templates/html/html_generator.sh, * arthur/xml_transform/templates/pdf/svg_generator.sh, * arthur/xml_transform/templates/pdf/pdf_generator.sh: Delete. * arthur/xml_transform/templates/html/xsl.xsl: Rename as... * thur/xml_transform/templates/html/main.xsl: ...This. * arthur/xml_transform/templates/html/xsl_base64.xsl: Rename as... * arthur/xml_transform/templates/html/main.xsl: ...This. * arthur/xml_transform/templates/pdf/regions_svg.xsl: Rename as... * arthur/xml_transform/templates/svg/regions.xsl: ...This. * arthur/xml_transform/templates/xml_transform.sh, * arthur/xml_transform/templates/svg/line.xsl, * arthur/xml_transform/templates/ooconvert, * arthur/xml_transform/templates/pdf/full/line.xsl: New. * arthur/xml_transform/xml_transform.cc, * arthur/xml_transform/xml_transform.hh: Change output. * arthur/xml_transform/xml_transform.pro: Add QMAKE_POST_LINK. --- scribo/sandbox/ChangeLog | 44 ++- scribo/sandbox/arthur/xml_transform/README | 31 +- scribo/sandbox/arthur/xml_transform/image_crop.cc | 73 ++- scribo/sandbox/arthur/xml_transform/image_crop.hh | 11 +- scribo/sandbox/arthur/xml_transform/main.cc | 139 ++++-- .../xml_transform/templates/html/html_generator.sh | 3 - .../templates/html/{xsl.xsl => main.xsl} | 46 ++- .../html/{xsl_base64.xsl => main_base64.xsl} | 111 +++-- .../arthur/xml_transform/templates/ooconvert | 517 ++++++++++++++++++++ .../templates/opendoc/opendoc_generator.sh | 24 - .../arthur/xml_transform/templates/opendoc/xsl.xsl | 33 ++- .../arthur/xml_transform/templates/pdf/main.xsl | 22 +- .../arthur/xml_transform/templates/pdf/main64.xsl | 62 --- .../xml_transform/templates/pdf/pdf_generator.sh | 3 - .../pdf/{regions_base64.xsl => regions.xsl} | 0 .../xml_transform/templates/pdf/regions_png.xsl | 59 --- .../xml_transform/templates/pdf/svg_generator.sh | 3 - .../xml_transform/templates/{pdf => svg}/line.xsl | 0 .../templates/{pdf/main_crop.xsl => svg/main.xsl} | 0 .../{pdf/regions_svg.xsl => svg/regions.xsl} | 0 .../xml_transform/templates/xml_transform.sh | 134 +++++ .../sandbox/arthur/xml_transform/xml_transform.cc | 76 +-- .../sandbox/arthur/xml_transform/xml_transform.hh | 2 + .../sandbox/arthur/xml_transform/xml_transform.pro | 8 +- 24 files changed, 1018 insertions(+), 383 deletions(-) delete mode 100644 scribo/sandbox/arthur/xml_transform/templates/html/html_generator.sh rename scribo/sandbox/arthur/xml_transform/templates/html/{xsl.xsl => main.xsl} (89%) rename scribo/sandbox/arthur/xml_transform/templates/html/{xsl_base64.xsl => main_base64.xsl} (75%) create mode 100755 scribo/sandbox/arthur/xml_transform/templates/ooconvert delete mode 100755 scribo/sandbox/arthur/xml_transform/templates/opendoc/opendoc_generator.sh delete mode 100644 scribo/sandbox/arthur/xml_transform/templates/pdf/main64.xsl delete mode 100644 scribo/sandbox/arthur/xml_transform/templates/pdf/pdf_generator.sh rename scribo/sandbox/arthur/xml_transform/templates/pdf/{regions_base64.xsl => regions.xsl} (100%) delete mode 100644 scribo/sandbox/arthur/xml_transform/templates/pdf/regions_png.xsl delete mode 100644 scribo/sandbox/arthur/xml_transform/templates/pdf/svg_generator.sh copy scribo/sandbox/arthur/xml_transform/templates/{pdf => svg}/line.xsl (100%) rename scribo/sandbox/arthur/xml_transform/templates/{pdf/main_crop.xsl => svg/main.xsl} (100%) rename scribo/sandbox/arthur/xml_transform/templates/{pdf/regions_svg.xsl => svg/regions.xsl} (100%) create mode 100755 scribo/sandbox/arthur/xml_transform/templates/xml_transform.sh diff --git a/scribo/sandbox/ChangeLog b/scribo/sandbox/ChangeLog index 176e809..dcff59e 100644 --- a/scribo/sandbox/ChangeLog +++ b/scribo/sandbox/ChangeLog @@ -1,4 +1,40 @@ -2010-07-23 Arthur Crepin-Leblond <crepin@ptaouchnok.lrde.epita.fr> +2010-08-05 Arthur Crepin-Leblond <crepin@ptaouchnok.lrde.epita.fr@> + + Output change in xml_tranform. + + * arthur/xml_transform/README + * arthur/xml_transform/templates/opendoc/xsl.xsl + * arthur/xml_transform/templates/pdf/main.xsl + * arthur/xml_transform/templates/pdf/regions.xsl + + * arthur/xml_transform/image_crop.cc, + * arthur/xml_transform/image_crop.hh: Change ppm loader to magick loader. + * arthur/xml_transform/main.cc: Change man. + + * arthur/xml_transform/templates/pdf/main_crop_64.xsl, + * arthur/xml_transform/templates/pdf/regions_base64.xsl, + * arthur/xml_transform/templates/pdf/regions_png.xsl, + * arthur/xml_transform/templates/html/html_generator.sh, + * arthur/xml_transform/templates/pdf/svg_generator.sh, + * arthur/xml_transform/templates/pdf/pdf_generator.sh: Delete. + + * arthur/xml_transform/templates/html/xsl.xsl: Rename as... + * thur/xml_transform/templates/html/main.xsl: ...This. + * arthur/xml_transform/templates/html/xsl_base64.xsl: Rename as... + * arthur/xml_transform/templates/html/main.xsl: ...This. + * arthur/xml_transform/templates/pdf/regions_svg.xsl: Rename as... + * arthur/xml_transform/templates/svg/regions.xsl: ...This. + + * arthur/xml_transform/templates/xml_transform.sh, + * arthur/xml_transform/templates/svg/line.xsl, + * arthur/xml_transform/templates/ooconvert, + * arthur/xml_transform/templates/pdf/full/line.xsl: New. + + * arthur/xml_transform/xml_transform.cc, + * arthur/xml_transform/xml_transform.hh: Change output. + * arthur/xml_transform/xml_transform.pro: Add QMAKE_POST_LINK. + +2010-07-23 Arthur Crepin-Leblond <crepin@lrde.epita.fr> OpenDocument output (odt and odf). @@ -13,7 +49,7 @@ * arthur/xml_transform/xml_transform.cc * arthur/xml_transform/xml_transform.hh -2010-07-22 Arthur Crepin-Leblond <crepin@ptaouchnok.lrde.epita.fr> +2010-07-22 Arthur Crepin-Leblond <crepin@lrde.epita.fr> SVG output. @@ -60,7 +96,7 @@ * arthur/xml_transform/templates/pdf/regions.xsl: Rename as... * arthur/xml_transform/templates/pdf/regions_png.xsl:...this. -2010-07-08 Arthur Crepin-Leblond <crepin@ptaouchnok.lrde.epita.fr> +2010-07-08 Arthur Crepin-Leblond <crepin@lrde.epita.fr> XML transform program. @@ -99,7 +135,7 @@ * arthur/xml_transform/templates/pdf/regions.xsl, * arthur/xml_transform/xml_transform.pro: New. -2010-07-05 Arthur Crepin-Leblond <crepin@stockholm.lrde.epita.fr> +2010-07-05 Arthur Crepin-Leblond <crepin@lrde.epita.fr> ICDAR XML to HTML. diff --git a/scribo/sandbox/arthur/xml_transform/README b/scribo/sandbox/arthur/xml_transform/README index 9ac4973..1733535 100644 --- a/scribo/sandbox/arthur/xml_transform/README +++ b/scribo/sandbox/arthur/xml_transform/README @@ -2,35 +2,31 @@ xml_transform OPTIONS: HTML output: - --html <xml> <ppm> <out_dir> : HTML output with non-text regions cropped into many png files. - --html-base64 <xml> <out_dir> : HTML output from a container XML file. + --html input.xml image output.html Creates HTML file, images are embedded inside in base 64 format. + --html-base64 input.xml output.html Same as --html but input is a XML file with images encoded in base 64 inside. + --html-full input.xml image output_dir Creates HTML file without converting images in base 64. Images + HTML file are placed in output_dir. PDF output: - --pdf <xml> <ppm> <out_dir> : PDF output with non-text regions cropped into many png files. - --pdf-no-crop <xml> <ppm> <out_dir>: PDF output with the entire picture displayed over the text. - --pdf-base64 <xml> <out_dir> : PDF output from a container XML file. - --pdf-base64-no-crop <xml> <ppm> <out_dir> : PDF with the entire picture displayed over the text, the picture is loaded from a container XML file. + --pdf input.xml image output.pdf Creates PDF file, images are embedded inside in base 64 format. + --pdf-base64 input.xml output.pdf Same as --pdf but input is a XML file with images encoded in base 64 inside. -SVG output: - --svg <xml> <out_dir> : SVG output (works only with base 64 xml encoded. - -Base 64 operations: - --to-base64 <xml> <ppm> <out_xml> : Produces a container XML file by converting cropped pictures into base 64 format. +SVG output (experimental, may not work with all files): + --svg input.xml image output.svg Creates SVG file, images are embedded inside in base 64 format. + --svg-base64 input.xml output.svg Same as --svg but input is a XML file with images encoded in base 64 inside. OpenDocument output: - --open <xml> <ppm> <out_dir> : OpenDocument output (odt, odf). + --oo-doc input.xml image output.odt Creates Open Document Writer (odt) file. Base 64 operations: - --to-base64 <xml> <ppm> <out_xml> : Produces a container XML file by converting cropped pictures into base 64 format. - --to-base64-no-crop <xml> <ppm> <out_xml> : Same as to-base64 but only the picture is converted in base 64. - --from-base64 <xml> <out_dir> : Decodes a XML container file to produce original image files. + --to-base64 input.xml image output.xml Creates a container XML file. It will contain regions which are cropped and converted in base 64. + --to-base64-no-crop input.xml mage output.xml Same as --to-base64 but only the full picture will be converted, regions are not cropped + --from-base64 input.xml output_dir Decodes a XML file which has been encoded in base 64, regions will be converted into png files and saved in output_dir. -For PDF/HTML/SVG/OD output, Bourne shell scripts are provided to produce the outputs, just run sh *_generator.sh Requirements: PDF : fop >= 0.95 HTML : xsltproc SVG : trunk version of fop -OpenDocument : ooconvert +OpenDocument : ooconvert (included) BUILD: @@ -38,7 +34,6 @@ Chnage the environment variable QMAKE_CXXFLAGS in xml_transfrom.pro then mkdir _build cd _build -cp -r ../templates . qmake ../xml_transfrom.pro make diff --git a/scribo/sandbox/arthur/xml_transform/image_crop.cc b/scribo/sandbox/arthur/xml_transform/image_crop.cc index ffe2b44..50fadd8 100644 --- a/scribo/sandbox/arthur/xml_transform/image_crop.cc +++ b/scribo/sandbox/arthur/xml_transform/image_crop.cc @@ -34,6 +34,7 @@ #include <mln/core/alias/box2d.hh> #include <mln/core/image/image2d.hh> #include <mln/io/magick/save.hh> +#include <mln/io/magick/load.hh> #include <mln/io/ppm/all.hh> ImageCrop::ImageCrop(QString xml, QString img, QString output) : @@ -47,26 +48,15 @@ ImageCrop::~ImageCrop() { } -/* Save PPM image to PNG format in output_dir/img. */ -void ImageCrop::save_image(QString file) +/* Save image to PNG format in output_dir/img. */ +void ImageCrop::save_image(QString out) { using namespace mln; image2d<value::rgb8> ima; - io::ppm::load(ima, image_.toStdString()); + io::magick::load(ima, image_.toStdString()); - QString filename; - if (file == QString::Null()) - filename = Common::get_file_name(image_); - else - filename = file; - - // std::cout << "Saving " << image_.toStdString() << " to " - // << output_dir_.toStdString() << "img/" - // << filename.toStdString() << ".png" - // << std::endl; - - io::magick::save(ima, output_dir_.toStdString() + "img/" + filename.toStdString() + ".png"); + io::magick::save(ima, out.toStdString()); } /* Return the image in base 64. */ @@ -83,7 +73,7 @@ QString ImageCrop::img_to_base64() } /* Decode the base 64 string str and save into output_dir_/img/img_name. */ -bool ImageCrop::img_from_base64(QString str, QString img_name, QString mime) +bool ImageCrop::img_from_base64(QString str, QString img) { QByteArray ba; @@ -91,7 +81,7 @@ bool ImageCrop::img_from_base64(QString str, QString img_name, QString mime) QByteArray out_ba = QByteArray::fromBase64(ba); QImage ima = QImage::fromData(out_ba); - return ima.save(output_dir_ + "img/" + img_name + "." + mime); + return ima.save(output_dir_ + img); } /* Read all regions of the XML file and save all base 64 data into output_dir/img */ @@ -125,7 +115,8 @@ void ImageCrop::from_base64() { QString data = node.firstChildElement("data").text(); QString mime = node.firstChildElement("mime").text(); - img_from_base64(data, id, mime); + QString img = id + "." + mime; + img_from_base64(data, img); } } child = child.nextSibling(); @@ -138,6 +129,21 @@ void ImageCrop::to_base64(QString out_file, bool no_crop) { QFile file(xml_); file.open(QIODevice::ReadOnly); + + QDomDocument doc; + doc.setContent(&file); + + QDomElement root = doc.documentElement(); + QDomNode child = root.firstChild(); + + while (!child.isNull() && !child.toElement().tagName().contains("page")) + child = child.nextSibling(); + + QString width = child.toElement().attribute("image_width", "0"); + QString height = child.toElement().attribute("image_height", "0"); + + file.close(); + file.open(QIODevice::ReadOnly); QTextStream stream(&file); QFile file2(out_file); @@ -163,9 +169,10 @@ void ImageCrop::to_base64(QString out_file, bool no_crop) stream2 << "\n" << " <container>\n"; stream2 << " <mime>png</mime>\n"; - QString file_name = Common::get_file_name(image_); - save_image(); - QFile img(output_dir_ + "img/" + file_name + ".png"); + QTemporaryFile tmp(output_dir_ + Common::get_file_name(image_) + "_XXXXXX.png"); + tmp.open(); + save_image(tmp.fileName()); + QFile img(tmp.fileName()); img.open(QIODevice::ReadOnly); stream2 << " <data>\n"; @@ -178,7 +185,9 @@ void ImageCrop::to_base64(QString out_file, bool no_crop) stream2 << "\n <coords>\n"; stream2 << " <point x=\"0\" y=\"0\" />\n"; - stream2 << " <point x=\"0\" y=\"0\" />"; + stream2 << " <point x=\"" << width << "\" y=\"0\" />\n"; + stream2 << " <point x=\"" << width << "\" y=\"" << height << "\" />\n"; + stream2 << " <point x=\"0\" y=\"" << height << "\" />"; stream2 << "\n </coords>"; stream2 << "\n" << " </image_region>\n"; @@ -210,7 +219,7 @@ void ImageCrop::to_base64(QString out_file, bool no_crop) QDomElement root = doc.documentElement(); QString id = root.attribute("id", "none"); - QFile img(output_dir_ + "img/" + id + ".png"); + QFile img(region_map_[id]); img.open(QIODevice::ReadOnly); stream2 << " <data>\n"; @@ -220,6 +229,7 @@ void ImageCrop::to_base64(QString out_file, bool no_crop) stream2 << " </data>"; stream2 << "\n </container>"; + img.remove(); img.close(); } @@ -233,7 +243,7 @@ void ImageCrop::to_base64(QString out_file, bool no_crop) file.close(); } -bool ImageCrop::crop_regions() +bool ImageCrop::crop_regions(bool temp) { Loader loader; QFile f(xml_); @@ -265,7 +275,7 @@ bool ImageCrop::crop_regions() QDomNode coords = region.firstChild(); QString id = region.toElement().attribute("id", "none"); - qDebug() << region.toElement().tagName(); + // qDebug() << region.toElement().tagName(); while (!coords.isNull() && !coords.toElement().tagName().contains("coords")) coords = coords.nextSibling(); @@ -301,10 +311,19 @@ bool ImageCrop::crop_regions() box2d box = make::box2d(y_min, x_min, y_max, x_max); image2d<value::rgb8> ima; - io::ppm::load(ima, image_.toStdString()); + io::magick::load(ima, image_.toStdString()); ima = scribo::preprocessing::crop(ima, box); - io::magick::save(ima, output_dir_.toStdString() + "img/" + id.toStdString() + ".png"); + if (temp) + { + QTemporaryFile tmp(output_dir_ + id + ".XXXXXX.png"); + tmp.open(); + region_map_[id] = tmp.fileName(); + tmp.setAutoRemove(false); + io::magick::save(ima, tmp.fileName().toStdString()); + } + else + io::magick::save(ima, QString(output_dir_ + id + ".png").toStdString()); } region = region.nextSibling(); } diff --git a/scribo/sandbox/arthur/xml_transform/image_crop.hh b/scribo/sandbox/arthur/xml_transform/image_crop.hh index 9b034cc..ee5ca56 100644 --- a/scribo/sandbox/arthur/xml_transform/image_crop.hh +++ b/scribo/sandbox/arthur/xml_transform/image_crop.hh @@ -36,15 +36,15 @@ class ImageCrop : public QObject Q_OBJECT public: - ImageCrop(QString xml, QString img, QString output); + ImageCrop(QString, QString, QString); ~ImageCrop(); - void save_image(QString file = QString::Null()); - bool crop_regions(); + void save_image(QString); + bool crop_regions(bool temp = false); QString img_to_base64(); - bool img_from_base64(QString str, QString img_name, QString mime); - void to_base64(QString out_file, bool no_crop); + bool img_from_base64(QString, QString); + void to_base64(QString, bool); void from_base64(); @@ -52,6 +52,7 @@ private: QString xml_; QString image_; QString output_dir_; + QMap<QString, QString> region_map_; }; #endif /* !IMAGE_CROP_HH */ diff --git a/scribo/sandbox/arthur/xml_transform/main.cc b/scribo/sandbox/arthur/xml_transform/main.cc index 34a1b6e..2270d2e 100644 --- a/scribo/sandbox/arthur/xml_transform/main.cc +++ b/scribo/sandbox/arthur/xml_transform/main.cc @@ -1,5 +1,7 @@ #include <iostream> -#include <QDomDocument> +#include <string> +#include <string> +#include <cstdlib> #include "xml_transform.hh" int main(int argc, char **argv) @@ -8,60 +10,109 @@ int main(int argc, char **argv) std::string man; man = "xml_transform\n" "OPTIONS:\n\n" + "HTML output:\n" - "\t--html <xml> <ppm> <out_dir> : HTML output with non-text regions cropped into many png files.\n" - "\t--html-base64 <xml> <out_dir> : HTML output from a container XML file.\n\n" + "\t\033[01m--html\033[00m\033[00m \033[04minput.xml\033[00m \033[04mimage\033[00m \033[04moutput.html\033[00m" + "\t\t\tCreates HTML file, images are embedded inside in base 64 format.\n" + "\t\033[01m--html-base64\033[00m \033[04minput.xml\033[00m \033[04moutput.html\033[00m" + "\t\t\tSame as --html but input is a XML file with images encoded in base 64 inside.\n" + + "\t\033[01m--html-full\033[00m \033[04minput.xml\033[00m \033[04mimage\033[00m \033[04moutput_dir\033[00m" + "\t\t\tCreates HTML file without converting images in base 64. Images + HTML file are placed in output_dir.\n\n" + "PDF output:\n" - "\t--pdf <xml> <ppm> <out_dir> : PDF output with non-text regions cropped into many png files.\n" - "\t--pdf-no-crop <xml> <ppm> <out_dir>: PDF output with the entire picture displayed over the text.\n" - "\t--pdf-base64 <xml> <out_dir> : PDF output from a container XML file.\n" - "\t--pdf-base64-no-crop <xml> <ppm> <out_dir> : PDF with the entire picture displayed over the text, the picture is loaded from a container XML file.\n\n" - "SVG output:\n" - "\t--svg <xml> <out_dir> : SVG output (works only with base 64 xml encoded.\n\n" - "Base 64 operations:\n" - "\t--to-base64 <xml> <ppm> <out_xml> : Produces a container XML file by converting cropped pictures into base 64 format.\n\n" + "\t\033[01m--pdf\033[00m \033[04minput.xml\033[00m \033[04mimage\033[00m \033[04moutput.pdf\033[00m" + "\t\t\tCreates PDF file, images are embedded inside in base 64 format.\n" + "\t\033[01m--pdf-base64\033[00m \033[04minput.xml\033[00m \033[04moutput.pdf\033[00m" + "\t\t\tSame as --pdf but input is a XML file with images encoded in base 64 inside.\n\n" + + "SVG output (experimental, may not work with all files):\n" + "\t\033[01m--svg\033[00m \033[04minput.xml\033[00m \033[04mimage\033[00m \033[04moutput.svg\033[00m" + "\t\t\tCreates SVG file, images are embedded inside in base 64 format.\n" + "\t\033[01m--svg-base64\033[00m \033[04minput.xml\033[00m \033[04moutput.svg\033[00m" + "\t\t\tSame as --svg but input is a XML file with images encoded in base 64 inside.\n\n" "OpenDocument output:\n" - "\t--open <xml> <ppm> <out_dir> : OpenDocument output (odt, odf).\n\n" + "\t\033[01m--oo-doc\033[00m \033[04minput.xml\033[00m \033[04mimage\033[00m \033[04moutput.odt\033[00m" + "\t\t\tCreates Open Document Writer (odt) file.\n\n" + "Base 64 operations:\n" - "\t--to-base64 <xml> <ppm> <out_xml> : Produces a container XML file by converting cropped pictures into base 64 format.\n" - "\t--to-base64-no-crop <xml> <ppm> <out_xml> : Same as to-base64 but only the picture is converted in base 64.\n" - "\t--from-base64 <xml> <out_dir> : Decodes a XML container file to produce original image files.\n\n" - "For PDF/HTML/SVG/OD output, Bourne shell scripts are provided to produce the outputs, just run sh *_generator.sh \n" + "\t\033[01m--to-base64\033[00m \033[04minput.xml\033[00m \033[04mimage\033[00m \033[04moutput.xml\033[00m" + "\t\t\tCreates a container XML file. It will contain regions which are cropped and converted in base 64.\n" + + "\t\033[01m--to-base64-no-crop\033[00m \033[04minput.xml\033[00m \033[04image\033[00m \033[04moutput.xml\033[00m" + "\t\tSame as --to-base64 but only the full picture will be converted, regions are not cropped\n" + + "\t\033[01m--from-base64\033[00m \033[04minput.xml\033[00m \033[04moutput_dir\033[00m" + "\t\t\tDecodes a XML file which has been encoded in base 64, regions will be converted into png files and saved in output_dir.\n\n" + "Requirements:\n" "PDF : fop >= 0.95\n" "HTML : xsltproc\n" "SVG : trunk version of fop\n" - "OpenDocument : ooconvert"; - + "OpenDocument : ooconvert (included)"; if (argc > 4) { std::string html = "--html"; - std::string open = "--open"; + std::string html_full = "--html-full"; + std::string open = "--oo-doc"; std::string pdf = "--pdf"; - std::string pdf_no_crop = "--pdf-no-crop"; + std::string svg = "--svg"; std::string to_base64 = "--to-base64"; std::string to_base64nocrop = "--to-base64-no-crop"; - if (html.compare(argv[1]) == 0) + if (html_full.compare(argv[1]) == 0) { XmlTransform xmlt(argv[2], argv[3], argv[4]); xmlt.createHTML(false); + + QString cmd = "sh templates/xml_transform.sh html " + xmlt.out() + "output.xml " + xmlt.out() + "output.html"; + return system (cmd.toAscii().constData()); + } + else if (html.compare(argv[1]) == 0) + { + QTemporaryFile tmp; + tmp.open(); + tmp.fileName(); + + XmlTransform xmlt(argv[2], argv[3], QString::Null(), tmp.fileName()); + xmlt.toBase64(false); + + QString cmd = "sh templates/xml_transform.sh html-base64 " + tmp.fileName() + " " + argv[4]; + return system (cmd.toAscii().constData()); } else if (pdf.compare(argv[1]) == 0) { - XmlTransform xmlt(argv[2], argv[3], argv[4]); - xmlt.createPDF(true, false); + QTemporaryFile tmp; + tmp.open(); + tmp.fileName(); + + XmlTransform xmlt(argv[2], argv[3], QString::Null(), tmp.fileName()); + xmlt.toBase64(false); + + QString cmd = "sh templates/xml_transform.sh pdf " + tmp.fileName() + " " + argv[4]; + return system (cmd.toAscii().constData()); } - else if (open.compare(argv[1]) == 0) + else if (svg.compare(argv[1]) == 0) { - XmlTransform xmlt(argv[2], argv[3], argv[4]); - xmlt.createOpen(); + QTemporaryFile tmp; + tmp.open(); + tmp.fileName(); + + XmlTransform xmlt(argv[2], argv[3], QString::Null(), tmp.fileName()); + xmlt.toBase64(false); + + QString cmd = "sh templates/xml_transform.sh svg " + tmp.fileName() + " " + argv[4]; + return system (cmd.toAscii().constData()); } - else if (pdf_no_crop.compare(argv[1]) == 0) + else if (open.compare(argv[1]) == 0) { - XmlTransform xmlt(argv[2], argv[3], argv[4]); - xmlt.createPDF(false, false); + QString dir = QDir::tempPath() + "/xml_transform." + argv[4]; + XmlTransform xmlt(argv[2], argv[3], dir); + xmlt.createOpen(); + + QString cmd = "sh templates/xml_transform.sh oo-doc " + dir + " " + argv[4]; + return system (cmd.toAscii().constData()); } else if (to_base64.compare(argv[1]) == 0) { @@ -78,31 +129,25 @@ int main(int argc, char **argv) } else if (argc > 3) { - std::string pdfbase64 = "--pdf-base64"; - std::string svg = "--svg"; - std::string pdfbase64nocrop = "--pdf-base64-no-crop"; - std::string htmlbase64 = "--html-base64"; + std::string pdf_base64 = "--pdf-base64"; + std::string svg_base64 = "--svg-base64"; + std::string html_base64 = "--html-base64"; std::string from_base64 = "--from-base64"; - if (pdfbase64.compare(argv[1]) == 0) - { - XmlTransform xmlt(argv[2], QString::Null(), argv[3]); - xmlt.createPDF(true, true); - } - else if (svg.compare(argv[1]) == 0) + if (svg_base64.compare(argv[1]) == 0) { - XmlTransform xmlt(argv[2], QString::Null(), argv[3]); - xmlt.createSVG(); + QString cmd = "sh templates/xml_transform.sh svg " + QString(argv[2]) + " " + QString(argv[3]); + return system (cmd.toAscii().constData()); } - else if (pdfbase64nocrop.compare(argv[1]) == 0) + else if (pdf_base64.compare(argv[1]) == 0) { - XmlTransform xmlt(argv[2], QString::Null(), argv[3]); - xmlt.createPDF(false, true); + QString cmd = "sh templates/xml_transform.sh pdf " + QString(argv[2]) + " " + QString(argv[3]); + return system (cmd.toAscii().constData()); } - else if (htmlbase64.compare(argv[1]) == 0) + else if (html_base64.compare(argv[1]) == 0) { - XmlTransform xmlt(argv[2], QString::Null(), argv[3]); - xmlt.createHTML(true); + QString cmd = "sh templates/xml_transform.sh html-base64 " + QString(argv[2]) + " " + QString(argv[3]); + return system (cmd.toAscii().constData()); } else if (from_base64.compare(argv[1]) == 0) { diff --git a/scribo/sandbox/arthur/xml_transform/templates/html/html_generator.sh b/scribo/sandbox/arthur/xml_transform/templates/html/html_generator.sh deleted file mode 100644 index bb24305..0000000 --- a/scribo/sandbox/arthur/xml_transform/templates/html/html_generator.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/sh - -xsltproc xsl.xsl output.xml > output.html \ No newline at end of file diff --git a/scribo/sandbox/arthur/xml_transform/templates/html/xsl.xsl b/scribo/sandbox/arthur/xml_transform/templates/html/main.xsl similarity index 89% rename from scribo/sandbox/arthur/xml_transform/templates/html/xsl.xsl rename to scribo/sandbox/arthur/xml_transform/templates/html/main.xsl index 1f29044..9c90970 100644 --- a/scribo/sandbox/arthur/xml_transform/templates/html/xsl.xsl +++ b/scribo/sandbox/arthur/xml_transform/templates/html/main.xsl @@ -6,7 +6,37 @@ <head> <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /> <title>SCRIBO</title> - <link rel="stylesheet" type="text/css" href="css.css" /> + <style type="text/css"> + .line + { + position:absolute; + z-index:7; + display:inline; + white-space:pre; + + <!-- FIXME --> + letter-spacing:-2px; + + padding:0px; + margin:0px; + font-family:"Times New Roman", Times, serif; + } + .para + { + position:absolute; + z-index:6; + } + .region + { + position:absolute; + z-index:5; + } + .image + { + position:absolute; + border:0; + } + </style> </head> <body> @@ -129,10 +159,8 @@ </xsl:choose> </xsl:variable> - <div class="line" onmouseover="this.style.opacity=0.2;this.filters.alpha.opacity=20" - onmouseout="this.style.opacity=1;this.filters.alpha.opacity=100"> + <div class="line"> <xsl:attribute name="style"> - opacity:1; height:auto; font-size:<xsl:value-of select="$a+$d" />px; width:<xsl:value-of select="$x2 - $x1" />px; @@ -146,7 +174,7 @@ <!-- ENF OF TEXT LINE --> <!-- TEXT REGION --> - <xsl:if test="name() = 'text_region'"> +<!-- <xsl:if test="name() = 'text_region'"> <div class="region"> <xsl:attribute name="style"> height:<xsl:value-of select="$y2 - $y1" />px; @@ -155,11 +183,11 @@ top:<xsl:value-of select="$y1" />px; </xsl:attribute> </div> - </xsl:if> + </xsl:if>--> <!-- ENF OF TEXT REGION --> <!-- PARAGRAPH --> - <xsl:if test="name() = 'paragraph'"> +<!-- <xsl:if test="name() = 'paragraph'"> <div class="para"> <xsl:attribute name="style"> height:<xsl:value-of select="$y2 - $y1" />px; @@ -168,7 +196,7 @@ top:<xsl:value-of select="$y1" />px; </xsl:attribute> </div> - </xsl:if> + </xsl:if>--> <!-- ENF OF PARAGRAPH --> <!-- NON-TEXT REGIONS --> @@ -209,7 +237,7 @@ <xsl:attribute name="height"> <xsl:value-of select="$y2 - $y1" /> </xsl:attribute> - <xsl:attribute name="src">img/<xsl:value-of select="$id"/>.png</xsl:attribute> + <xsl:attribute name="src"><xsl:value-of select="$id"/>.png</xsl:attribute> </img> </div> diff --git a/scribo/sandbox/arthur/xml_transform/templates/html/xsl_base64.xsl b/scribo/sandbox/arthur/xml_transform/templates/html/main_base64.xsl similarity index 75% rename from scribo/sandbox/arthur/xml_transform/templates/html/xsl_base64.xsl rename to scribo/sandbox/arthur/xml_transform/templates/html/main_base64.xsl index 1c25e42..b24f02d 100644 --- a/scribo/sandbox/arthur/xml_transform/templates/html/xsl_base64.xsl +++ b/scribo/sandbox/arthur/xml_transform/templates/html/main_base64.xsl @@ -6,7 +6,40 @@ <head> <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /> <title>SCRIBO</title> - <link rel="stylesheet" type="text/css" href="css.css" /> + <style type="text/css"> + .line + { + position:absolute; + background-color:white; + z-index:7; + display:inline; + white-space:pre; + + /* FIXME */ + letter-spacing:-2px; + + padding:0px; + margin:0px; + filter:alpha(opacity=100); + font-family:"Times New Roman", Times, serif; + } + .para + { + position:absolute; + z-index:6; + } + .region + { + position:absolute; + z-index:5; + } + .image + { + position:absolute; + border:0; + } + </style> +<!-- <link rel="stylesheet" type="text/css" href="css.css" /> --> </head> <body> @@ -174,46 +207,48 @@ <!-- NON-TEXT REGIONS --> <xsl:if test="name() = 'image_region' or name() = 'separator_region' or name() = 'graphic_region' or name() = 'chart_region' or name() = 'table_region'"> - <!-- data --> - <xsl:variable name="data"> - <xsl:value-of select="container/data" /> - </xsl:variable> + <xsl:if test="container"> + <!-- data --> + <xsl:variable name="data"> + <xsl:value-of select="container/data" /> + </xsl:variable> - <!-- depth --> - <xsl:variable name="depth"> - <xsl:choose> - <xsl:when test="name() = 'separator_region'"> - 1 - </xsl:when> - <xsl:otherwise> - 4 - </xsl:otherwise> - </xsl:choose> - </xsl:variable> + <!-- depth --> + <xsl:variable name="depth"> + <xsl:choose> + <xsl:when test="name() = 'separator_region'"> + 1 + </xsl:when> + <xsl:otherwise> + 4 + </xsl:otherwise> + </xsl:choose> + </xsl:variable> - <div class="image"> - <xsl:attribute name="style"> - top:<xsl:value-of select="$y1" />px; - left:<xsl:value-of select="$x1" />px; - width:<xsl:value-of select="$x2 - $x1"/>px; - height:<xsl:value-of select="$y2 - $y1"/>px; - z-index:<xsl:value-of select="$depth"/>; - </xsl:attribute> - <img> - <xsl:attribute name="alt"> - <xsl:value-of select="name()" /> + <div class="image"> + <xsl:attribute name="style"> + top:<xsl:value-of select="$y1" />px; + left:<xsl:value-of select="$x1" />px; + width:<xsl:value-of select="$x2 - $x1"/>px; + height:<xsl:value-of select="$y2 - $y1"/>px; + z-index:<xsl:value-of select="$depth"/>; </xsl:attribute> - <xsl:attribute name="width"> - <xsl:value-of select="$x2 - $x1" /> - </xsl:attribute> - <xsl:attribute name="height"> - <xsl:value-of select="$y2 - $y1" /> - </xsl:attribute> - <xsl:attribute name="src"> - data:image/png;base64,<xsl:value-of select="$data"/> - </xsl:attribute> - </img> - </div> + <img> + <xsl:attribute name="alt"> + <xsl:value-of select="name()" /> + </xsl:attribute> + <xsl:attribute name="width"> + <xsl:value-of select="$x2 - $x1" /> + </xsl:attribute> + <xsl:attribute name="height"> + <xsl:value-of select="$y2 - $y1" /> + </xsl:attribute> + <xsl:attribute name="src"> + data:image/png;base64,<xsl:value-of select="$data"/> + </xsl:attribute> + </img> + </div> + </xsl:if> </xsl:if> <!-- END NON-TEXT REGIONS --> diff --git a/scribo/sandbox/arthur/xml_transform/templates/ooconvert b/scribo/sandbox/arthur/xml_transform/templates/ooconvert new file mode 100755 index 0000000..6c81550 --- /dev/null +++ b/scribo/sandbox/arthur/xml_transform/templates/ooconvert @@ -0,0 +1,517 @@ +#!/bin/bash + +# ooconvert: convert openoffice documents to other formats +# Copyright (C) 2007, Metaladder, Inc. + +# Developers: +# +# Nathan Coulter +# Andrey Plotnikov +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +############################################################################## +############################################################################## +# This script should be run with the same python interpreter that openoffice +# uses, which is usually included with openoffice: +# +# /path/to/openoffice-2.0.1/program/python.sh +# +# However, under debian, ubuntu, etc, openoffice uses the system-installed +# python interpreter. To find which interpreter is being used, look in +# +# /path/to/openoffice/program/pythonloader.unorc +# +# If openoffice is using an external python interpreter, modify the following +# LD_LIBRARY_PATH to reflect your openoffice environment and to import uno +# without errors + + +# If $DISPLAY does not point to a working X server, or is unset, this program +# tries to start Xvfb. Bypass all this nonsense by having DISPLAY point to a +# working X Server. +# +# +# NOTE! - If OpenOffice has never been started interactively, this program will +# fail silently because the license agreement has not been accepted. + +############################################################################## +############################################################################## + +shopt -s -o nounset + +### site configuration +#OO_LOGLEVEL=20 +#USE_OO_INTERNAL_PYTHON=0 #set to 1 if python is embedded in openoffice +#OOPROGRAM=/usr/lib/openoffice/program #directory containing soffice +#OOPYTHON=${OOPROGRAM}/python.sh" +#OO_EXTERNAL_PYTHON=/usr/bin/python +#OOXVFB="Xvfb" +#OOXWININFO=xwininfo +#OOXVFB_START=0} #first port to try Xvfb on +#OOXVFB_RETRIES:="1000"} #how many successive port to try +#alternatively, put site details in one of these files +[ -r /etc/ooconvert ] && source /etc/ooconvert +[ -r ~/etc/ooconvert ] && source ~/etc/ooconvert +### site configuration end + +### configuration defaults start +# Choose openoffice internal python (1) or external python (0) +: ${OO_LOGLEVEL:=20} +: ${USE_OO_INTERNAL_PYTHON:=0} +: ${OOPROGRAM:="/usr/lib/openoffice/program"} +: ${OOPYTHON:="${OOPROGRAM}/python.sh"} +: ${OO_EXTERNAL_PYTHON:="/usr/bin/python"} +: ${OOXVFB:="Xvfb"} +: ${OOXWININFO:="xwininfo"} +: ${OOXVFB_START:=0} +: ${OOXVFB_RETRIES:="100"} +### configuration defaults end + +LOG_NOTSET=0 +LOG_DEBUG=10 +LOG_INFO=20 +LOG_WARNING=30 +LOG_ERROR=40 +LOG_CRITICAL=50 + +OOXVFBJOB= + +oo_log () { + if [ "$OO_LOGLEVEL" -le "$1" ]; then + shift + printf '%s\n' "$@" >&2 + fi +} + +oo_err () { + exit_status="$1" + shift + oo_log $LOG_ERROR "$@" + exit "$exit_status" +} + +### If an X Server is not detected, try to use Xvfb +if [ ! "${DISPLAY:-}" ]; then + oo_log $LOG_INFO "\"DISPLAY\" variable is not set. Attempting Xvfb." + for i in "$OOXVFB" /usr/X11/bin/Xvfb /usr/X11R6/bin/Xvfb; do + oo_log "$LOG_INFO" "Trying to invoke $i" + $i >& /dev/null + if [ $? != 127 ]; then #127 is the bash shell code for "not found" + OOXVFB="$i" + break + fi + OOXVFB= + done + unset i + if [ "$OOXVFB" ]; then + oo_log $LOG_INFO "Xvfb is installed." + for i in "$OOXWININFO" /usr/X11/bin/xwininfo \ + /usr/X11R6/bin/xwininfo "$(dirname "$OOXVFB")/xwininfo" + do + $i >& /dev/null + if [ $? != 127 ]; then #127 is the bash shell code for "not found" + OOXWININFO="$i" + break + fi + OOXWININFO= + done + [ "$OOXWININFO" ] || oo_err 130 "ooxwininfo not found" + unset i + ((last=$OOXVFB_START+$OOXVFB_RETRIES)) + while :; do + "$OOXVFB" :$OOXVFB_START -screen scrn 1280x1024x16 & + OOXVFBJOB=$! + export DISPLAY=":$OOXVFB_START" + sleep 1 + ps "$OOXVFBJOB" && "$OOXWININFO" -root && break + ps "$OOXVFBJOB" && kill "$OOXVFBJOB" + oo_log $LOG_INFO "Failed to start Xvfb on display $OOXVFB_START" + ((OOXVFB_START++)) + [ $OOXVFB_START -ge $last ] && \ + oo_err 129 "Xvfb attempts exceeded $OOXVFB_RETRIES" + done + oo_log $LOG_INFO "Xvfb started. Process id: $OOXVFBJOB" + oo_log $LOG_INFO "Setting \"Display\" to $DISPLAY for Xvfb" + fi +fi + +case "$USE_OO_INTERNAL_PYTHON" in + 0) + OOPYTHON="$OO_EXTERNAL_PYTHON" + export LD_LIBRARY_PATH="$OOPROGRAM${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}" + ;; +esac + + +"$OOPYTHON" - "$@" <<EOF +# OpenOffice1.1 comes with its own python interpreter. +# This Script needs to be run with the python from OpenOffice.org: +# /opt/OpenOffice.org/program/python or python.sh + +#pythonloader.unorc + +__doc__="Document converter which uses OO for actual converting" + +import sys, os, time, signal +from optparse import OptionParser + +#how long should we wait for openoffice to start +OOCONV_MAX_STARTUP_TIME = os.getenv("OOCONV_MAX_STARTUP_TIME") +if not OOCONV_MAX_STARTUP_TIME: OOCONV_MAX_STARTUP_TIME=60 +OOCONV_MAX_STARTUP_TIME=int(OOCONV_MAX_STARTUP_TIME) + +#what port should openoffice listen on +OOCONV_PORT = os.getenv("OOCONV_PORT") +if not OOCONV_PORT: OOCONV_PORT=2002 +OOCONV_PORT = int(OOCONV_PORT) + +def importUNO(): + """ + Import UNO modules + """ + import uno + globals()['uno'] = uno + from com.sun.star.beans import PropertyValue + globals()['PropertyValue'] = PropertyValue + from com.sun.star.container import NoSuchElementException + globals()['NoSuchElementException'] = NoSuchElementException + + +class OOConverter: + """ + Convert documents by means of Open Office. + """ + + def __init__(self, desktop): + self.desktop = desktop + + def convert(self, fromFile, toFile, format): + """ + fromFile -- source file name + toFile -- destination file name + format -- sequence of propery values + """ + desktop = self.desktop + fromFile=os.path.abspath(fromFile) + url = fromFile + #url="file://%s" % fromFile + url=uno.systemPathToFileUrl(fromFile) + context = uno.getComponentContext() + properties=[] + p=PropertyValue() + p.Name="Hidden" + p.Value=True + properties.append(p) + doc=desktop.loadComponentFromURL( + url, "_blank", 0, tuple(properties)); + if not doc: + print "Failed to open '%s'" % type(file) + return False + # Save File + properties=[] + p=PropertyValue() + p.Name="Overwrite" + p.Value=True + properties.append(p) + p=PropertyValue() + p.Name="FilterName" + p.Value=property(format, 'Name').Value + properties.append(p) + p=PropertyValue() + p.Name="Hidden" + p.Value=True + url_save="file://%s" % os.path.abspath(toFile) + try: + doc.storeToURL( + url_save, tuple(properties)) + except: + print "Failed while writing: '%s'" % file + doc.dispose() + return True + + + +class FileIterator: + """ + Iterate over each conversion task. + A conversion task consists of source file name, + destination file name and format + """ + def __init__(self, sources, dest, format, ext, keep): + self.sources = sources + self.dest = dest + if not ext: ext = '' + self.ext = ext + self.keep = keep + self.isDestDir = False + if os.path.isdir(dest): + self.isDestDir = True + self.format = format + self.siter = iter(self.sources) + + def __iter__(self): + return self + + def next(self): + src = self.siter.next() + return (src, self._getDestFN(src), self.format) + + def _getDestFN(self, src): + if not self.isDestDir: + return self.dest + base = os.path.basename(src) + if not self.keep: + base, ext = os.path.splitext(base) + return os.path.join(self.dest, base) + self.ext + + def _getFormatExt(self): + return self.format.ext + + + +def expandSources(sources): + result = [] + for src in sources: + if os.path.isdir(src): + result.extend([os.path.join(src,entry) for entry in os.listdir(src) if os.path.isfile(os.path.join(src,entry))]) + elif os.path.isfile(src): + result.append(src) + return result + +def property(properties, which, default=None): + '''value: property p from a set of properties''' + for p in properties: + if p.Name == which: return p + return default + +def format(attribute, value): + for fmt in formats()[0]: + if value == property(fmt, attribute).Value: + return fmt + +def formats(): + resolve = filterFactory.getElementNames() + installed = [] + missing = [] + for name in resolve: + try: + installed.append(filterFactory.getByName(name)) + except NoSuchElementException, e: + print >> sys.stderr, 'Warning: NoSuchElementException: ', + print >> sys.stderr, e.Message + missing.append(name) + return iter(installed), iter(missing) + + +def fileformat(fileName): + url=uno.systemPathToFileUrl(fileName) + type_ = typeDetection.queryTypeByURL(url) + fmt = format('Type', type_) + return fmt + + +def componentContext(): + """ + Start OO in child process and connect to them. + Return OO component context object. + """ + pid = os.fork() + if not pid: + pid = os.getpid() + os.setpgid(pid,pid) + os.system("soffice -headless -norestore '-accept=socket,host=localhost,port=%s;urp;' &" % OOCONV_PORT) + while 1: + time.sleep(1) + else: + limit = time.time() + OOCONV_MAX_STARTUP_TIME + context = None + contextproxy = uno.getComponentContext() + resolver=contextproxy.ServiceManager.createInstanceWithContext( + "com.sun.star.bridge.UnoUrlResolver", contextproxy) + + while time.time() < limit: + try: + context = resolver.resolve( + "uno:socket,host=localhost,port=%s;urp;StarOffice.ComponentContext" % OOCONV_PORT) + break + except: + pass + time.sleep(5) + if context is None: + pgid = os.getpgid(pid) + os.killpg(pgid, signal.SIGTERM) + return None, None + return context, pid + + +def connect(host='localhost', port=OOCONV_PORT): + """ + Try to connect to the running OO instance + """ + context = uno.getComponentContext() + resolver=context.ServiceManager.createInstanceWithContext( + "com.sun.star.bridge.UnoUrlResolver", context) + try: + return resolver.resolve( + "uno:socket,host=%s,port=%s;urp;StarOffice.ComponentContext" % (host, port)) + except: + pass + return None + +def options(): + usage1 = "usage: $(basename $0) [options] <source file> <destination file>" + usage2 = " or: $(basename $0) --format=<format> [options] sources... <destination dir>" + usage3 = " or: $(basename $0) --list" + usage4 = " or: $(basename $0) --help" + parser = OptionParser(usage1+'\n'+usage2+'\n'+usage3+'\n'+usage4) + parser.add_option('-e', '--ext', dest='ext', + help='Output extension', metavar='ext') + parser.add_option('-f','--format', dest='format', + help='Output file format', metavar='format') + parser.add_option('-k', '--keep', dest='keep', + help='Keep original extension', metavar='keep') + parser.add_option('-p','--oopath', dest='oopath', metavar='path', + help='Path to OO program directory') + parser.add_option('-l','--list', dest='listFormats', action="store_true", + default=False, + help='List all available formats') + parser.add_option('-a' ,'--laddr', dest='listener', metavar='host[:port]', + help='If this option exists then try to connect to given OO listener address') + return parser + +def listFormats(): + installed, missing = formats() + if installed: + print '\n\nAvailable formats: ' + for i in installed: + print property(i, 'Name').Value + if missing: + print '\n\nFormats not currently installed: ' + for i in missing: + print i + +def services(): + smgr=context.ServiceManager + globals()['desktop'] = \ + smgr.createInstanceWithContext("com.sun.star.frame.Desktop",context) + globals()['typeDetection'] = \ + smgr.createInstance('com.sun.star.document.TypeDetection') + globals()['filterFactory'] = \ + smgr.createInstance('com.sun.star.document.FilterFactory') + +def opts_format(parser, opts, dest, isDestDir): + ## if destination is directory then format option is required + if isDestDir: + if not opts.format: + parser.error('format option is required') + return format('Name', opts.format) + elif not opts.format: + ## try to guess format by the output filename extension + format1 = fileformat(dest) + if not format1: + parser.error("can't guess output format for %s" % dest) + else: + format1 = format('Name', opts.format) + if not format1: + err = 'Unknown format: %s. ' % opts.format + err += 'Please use --list to see formats' + parser.error(err) + return format1 + + +def main(): + parser = options() + opts, args = parser.parse_args() + pid = None + try: + importUNO() + if opts.listener: + parts = opts.listener.split(':') + host = parts[0] + port=OOCONV_PORT + if len(parts)>1: + port = int(parts[1]) + context = connect(host, port) + else: + context, pid = componentContext() + + if context is None: + print "Could not connect to running openoffice" + sys.exit(1) + globals()['context'] = context + + services() + + if opts.listFormats: + listFormats() + sys.exit(0) + + if len(args)<1: + parser.error('incorrect number of arguments') + + if len(args) == 1: + dest = os.getcwd() + sources = [os.path.abspath(args[0])] + isDestDir = True + else: + dest = os.path.abspath(args[-1]) + sources = [os.path.abspath(src) for src in args[:-1]] + isDestDir = os.path.isdir(dest) + ## if we have more than one input files then last argument must be output directory + if len(args) > 2 and not isDestDir: + parser.error('last argument must be directory') + if not isDestDir and len(args)==2: + src = sources[0] + if not os.path.isfile(src): + parser.error('first argument must be a path to the file in this case') + + format1 = opts_format(parser, opts, dest, isDestDir) + + if opts.oopath: + os.environ['PATH'] = '%s:%s' % (os.environ['PATH'], opts.oopath) + sys.path.append(opts.oopath) + + sources = expandSources(sources) + + converter = OOConverter(desktop) + iterator = FileIterator(sources, dest, format1, opts.ext, opts.keep) + for fromFile, toFile, format1 in iterator: + print 'Converting %s to %s (format: %s)...' % ( + os.path.basename(fromFile), + os.path.basename(toFile), + property(format1, 'Name').Value), + converter.convert(fromFile, toFile, format1) + print 'successfully.' + finally: + if pid: + pgid = os.getpgid(pid) + os.killpg(pgid, signal.SIGTERM) + time.sleep(2) + os.killpg(pgid, signal.SIGKILL) + + +if __name__ == '__main__': + main() + +EOF + +if [ "$OOXVFBJOB" ]; then + oo_log $LOG_INFO "Killing Xvfb, running as process number $OOXVFBJOB" + kill "$OOXVFBJOB" + #necessary when process is headless + kill -s 9 $OOXVFBJOB + rm -f /tmp/.X$OOXVFB_START-lock +fi diff --git a/scribo/sandbox/arthur/xml_transform/templates/opendoc/opendoc_generator.sh b/scribo/sandbox/arthur/xml_transform/templates/opendoc/opendoc_generator.sh deleted file mode 100755 index c2d9e6f..0000000 --- a/scribo/sandbox/arthur/xml_transform/templates/opendoc/opendoc_generator.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/sh - -if [ -z "$1" ] -then - echo "opendoc_generator <odt|odf>"; -else - if [ "$1" == "odt" ] - then - echo "saving to output.odt"; - xsltproc xsl.xsl output.xml > output.html - ooconvert output.html output.odt - rm output.html - elif [ "$1" == "odf" ] - then - echo "saving to output.odf"; - xsltproc xsl.xsl output.xml > output.html - ooconvert output.html output.odf; - rm output.html - else - echo "opendoc_generator <odt|odf>"; - fi -fi - -#rm output.html \ No newline at end of file diff --git a/scribo/sandbox/arthur/xml_transform/templates/opendoc/xsl.xsl b/scribo/sandbox/arthur/xml_transform/templates/opendoc/xsl.xsl index bf1cc30..e9266d7 100755 --- a/scribo/sandbox/arthur/xml_transform/templates/opendoc/xsl.xsl +++ b/scribo/sandbox/arthur/xml_transform/templates/opendoc/xsl.xsl @@ -6,7 +6,38 @@ <head> <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /> <title>SCRIBO</title> - <link rel="stylesheet" type="text/css" href="css.css" /> + <style type="text/css"> + .line + { + position:absolute; + z-index:7; + display:inline; + white-space:pre; + + /* FIXME */ + letter-spacing:-2px; + + padding:0px; + margin:0px; + filter:alpha(opacity=100); + font-family:"Times New Roman", Times, serif; + } + .para + { + position:absolute; + z-index:6; + } + .region + { + position:absolute; + z-index:5; + } + .image + { + position:absolute; + border:0; + } + </style> </head> <body> diff --git a/scribo/sandbox/arthur/xml_transform/templates/pdf/main.xsl b/scribo/sandbox/arthur/xml_transform/templates/pdf/main.xsl index 065831e..0dbb673 100644 --- a/scribo/sandbox/arthur/xml_transform/templates/pdf/main.xsl +++ b/scribo/sandbox/arthur/xml_transform/templates/pdf/main.xsl @@ -4,6 +4,7 @@ <!-- FILE: main.xsl DESCRIPTION: produce pdf output --> + <xsl:import href="regions.xsl"/> <xsl:import href="line.xsl"/> <xsl:template match="/"> @@ -31,28 +32,7 @@ <fo:page-sequence master-reference="page"> <fo:flow flow-name="xsl-region-body"> - <xsl:apply-imports /> - - <fo:block-container position="absolute" z-index="-3"> - - <xsl:attribute name="left"> - 0px - </xsl:attribute> - <xsl:attribute name="top"> - 0px - </xsl:attribute> - - <fo:block> - <fo:external-graphic> - <xsl:attribute name="src"> - url('img/image.png') - </xsl:attribute> - </fo:external-graphic> - </fo:block> - - </fo:block-container> - </fo:flow> </fo:page-sequence> diff --git a/scribo/sandbox/arthur/xml_transform/templates/pdf/main64.xsl b/scribo/sandbox/arthur/xml_transform/templates/pdf/main64.xsl deleted file mode 100644 index ecc7443..0000000 --- a/scribo/sandbox/arthur/xml_transform/templates/pdf/main64.xsl +++ /dev/null @@ -1,62 +0,0 @@ -<?xml version="1.0"?> -<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:fo="http://www.w3.org/1999/XSL/Format" version="1.0"> - - <!-- FILE: main.xsl - DESCRIPTION: produce pdf output --> - - <xsl:import href="line.xsl"/> - - <xsl:template match="/"> - <fo:root> - - <fo:layout-master-set> - <fo:simple-page-master master-name="page" margin-left="0cm" margin-right="0cm" margin-bottom="0cm" margin-top="0cm"> - - <!-- Output will be sized with image size --> - <xsl:attribute name="page-width"> - <xsl:value-of select="pcGts/page/@image_width" />px - </xsl:attribute> - <xsl:attribute name="page-height"> - <xsl:value-of select="pcGts/page/@image_height" />px - </xsl:attribute> - - <fo:region-body - margin-top="0mm" margin-bottom="0mm" - margin-left="0mm" margin-right="0mm"/> - <fo:region-after extent="0mm"/> - - </fo:simple-page-master> - </fo:layout-master-set> - - <fo:page-sequence master-reference="page"> - - <fo:flow flow-name="xsl-region-body"> - - <xsl:apply-imports /> - - <fo:block-container position="absolute" z-index="-3"> - - <xsl:attribute name="left"> - 0px - </xsl:attribute> - <xsl:attribute name="top"> - 0px - </xsl:attribute> - - <fo:block> - <fo:external-graphic> - <xsl:attribute name="src"> - data:image/png;base64,<xsl:value-of select="pcGts/page/image_region/container/data" /> - </xsl:attribute> - </fo:external-graphic> - </fo:block> - - </fo:block-container> - - </fo:flow> - - </fo:page-sequence> - </fo:root> - </xsl:template> - -</xsl:stylesheet> diff --git a/scribo/sandbox/arthur/xml_transform/templates/pdf/pdf_generator.sh b/scribo/sandbox/arthur/xml_transform/templates/pdf/pdf_generator.sh deleted file mode 100644 index c6339a5..0000000 --- a/scribo/sandbox/arthur/xml_transform/templates/pdf/pdf_generator.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/sh - -fop -xsl main.xsl -xml output.xml -pdf output.pdf \ No newline at end of file diff --git a/scribo/sandbox/arthur/xml_transform/templates/pdf/regions_base64.xsl b/scribo/sandbox/arthur/xml_transform/templates/pdf/regions.xsl similarity index 100% rename from scribo/sandbox/arthur/xml_transform/templates/pdf/regions_base64.xsl rename to scribo/sandbox/arthur/xml_transform/templates/pdf/regions.xsl diff --git a/scribo/sandbox/arthur/xml_transform/templates/pdf/regions_png.xsl b/scribo/sandbox/arthur/xml_transform/templates/pdf/regions_png.xsl deleted file mode 100644 index 65a8c79..0000000 --- a/scribo/sandbox/arthur/xml_transform/templates/pdf/regions_png.xsl +++ /dev/null @@ -1,59 +0,0 @@ -<?xml version="1.0"?> -<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:fo="http://www.w3.org/1999/XSL/Format" version="1.0"> - - <!-- FILE: regions.xsl - DESCRIPTION: Match all regions that are not text to display them --> - - <xsl:template match="pcGts/page/image_region| - pcGts/page/graphic_region| - pcGts/page/chart_region| - pcGts/page/table_region| - pcGts/page/separator_region"> - - <!-- ID of the region, used to display id.png --> - <xsl:variable name="id"> - <xsl:value-of select="@id" /> - </xsl:variable> - - <!-- Regions coordinates --> - <xsl:variable name="y1"> - <xsl:for-each select="coords/point"> - <xsl:sort select="@y" order="ascending" data-type="number"/> - <xsl:if test="position() = 1"> - <xsl:value-of select="@y" /> - </xsl:if> - </xsl:for-each> - </xsl:variable> - - <xsl:variable name="x1"> - <xsl:for-each select="coords/point"> - <xsl:sort select="@x" order="ascending" data-type="number"/> - <xsl:if test="position() = 1"> - <xsl:value-of select="@x" /> - </xsl:if> - </xsl:for-each> - </xsl:variable> - <!-- END OF regions coordinates --> - - <fo:block-container position="absolute"> - - <xsl:attribute name="left"> - <xsl:value-of select="$x1" />px - </xsl:attribute> - <xsl:attribute name="top"> - <xsl:value-of select="$y1" />px - </xsl:attribute> - - <fo:block> - <fo:external-graphic> - <xsl:attribute name="src"> - url('img/<xsl:value-of select="$id" />.png') - </xsl:attribute> - </fo:external-graphic> - </fo:block> - - </fo:block-container> - - </xsl:template> - -</xsl:stylesheet> diff --git a/scribo/sandbox/arthur/xml_transform/templates/pdf/svg_generator.sh b/scribo/sandbox/arthur/xml_transform/templates/pdf/svg_generator.sh deleted file mode 100644 index 0d4907c..0000000 --- a/scribo/sandbox/arthur/xml_transform/templates/pdf/svg_generator.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/sh - -fop -xsl main.xsl -xml output.xml -svg output.svg \ No newline at end of file diff --git a/scribo/sandbox/arthur/xml_transform/templates/pdf/line.xsl b/scribo/sandbox/arthur/xml_transform/templates/svg/line.xsl similarity index 100% copy from scribo/sandbox/arthur/xml_transform/templates/pdf/line.xsl copy to scribo/sandbox/arthur/xml_transform/templates/svg/line.xsl diff --git a/scribo/sandbox/arthur/xml_transform/templates/pdf/main_crop.xsl b/scribo/sandbox/arthur/xml_transform/templates/svg/main.xsl similarity index 100% rename from scribo/sandbox/arthur/xml_transform/templates/pdf/main_crop.xsl rename to scribo/sandbox/arthur/xml_transform/templates/svg/main.xsl diff --git a/scribo/sandbox/arthur/xml_transform/templates/pdf/regions_svg.xsl b/scribo/sandbox/arthur/xml_transform/templates/svg/regions.xsl similarity index 100% rename from scribo/sandbox/arthur/xml_transform/templates/pdf/regions_svg.xsl rename to scribo/sandbox/arthur/xml_transform/templates/svg/regions.xsl diff --git a/scribo/sandbox/arthur/xml_transform/templates/xml_transform.sh b/scribo/sandbox/arthur/xml_transform/templates/xml_transform.sh new file mode 100755 index 0000000..05e9569 --- /dev/null +++ b/scribo/sandbox/arthur/xml_transform/templates/xml_transform.sh @@ -0,0 +1,134 @@ +#!/bin/sh + +SRC_DIR=`dirname $0`; + +if [ -z "$1" ] +then + echo "Please give an output type !" +else + if [ -z "$2" ] + then + echo "Please give input XML file !" + else + +# HTML + if [ "$1" = "html" ] + then + which xsltproc > /dev/null; + if [ $? -eq 0 ] + then + out="$3"; + if [ -z "$out" ] + then + out="output.html" + fi + out=`readlink -f $out` + + echo "HTML output : $out" + xsltproc "$SRC_DIR/html/main.xsl" "$2" > "$out" + + rm -f $2; + fi +# HTML BASE 64 + elif [ "$1" = "html-base64" ] + then + which xsltproc > /dev/null; + if [ $? -eq 0 ] + then + out="$3"; + if [ -z "$out" ] + then + out="output.html" + fi + out=`readlink -f $out` + + echo "HTML output : $out" + xsltproc "$SRC_DIR/html/main_base64.xsl" "$2" > "$out" + fi + +# PDF/SVG + elif [ -n "`echo $1 | grep -E 'pdf(\-base64)?|svg'`" ] + then + which fop > /dev/null; + if [ $? -eq 0 ] + then + out="$3"; + +# PDF + if [ "$1" = "pdf" ] + then + if [ -z "$out" ] + then + out="output.pdf" + fi + out=`readlink -f $out` + + echo "PDF output : $out" + fop -xsl "$SRC_DIR/pdf/main.xsl" -xml "$2" -pdf "$out"; + +# SVG + elif [ "$1" = "svg" ] + then + if [ -z "$out" ] + then + out="output.svg" + fi + out=`readlink -f $out` + + echo "SVG output : $out" + fop -xsl "$SRC_DIR/svg/main.xsl" -xml "$2" -svg "$out" + else + echo "Wrong output format !"; + fi + else + echo "fop >= 0.95 required !" + fi + +# Open Document + elif [ "$1" = "oo-doc" ] + then + which xsltproc > /dev/null; + if [ $? -eq 0 ] + then + + out="$3"; + if [ -z "$out" ] + then + out="output.odt" + fi + out=`readlink -f $out` + + xsltproc "$SRC_DIR/html/main.xsl" "$2/output.xml" > "$2/out.html" + + echo "Open Document output : $out" + "$SRC_DIR/ooconvert" "$2/out.html" "$2/out.odt" > /dev/null + + cur_dir=`pwd`; + tmp_dir="$2/oo_tmp" + + mkdir $tmp_dir + unzip $tmp_dir/../out.odt -d $tmp_dir > /dev/null + + cat $tmp_dir/content.xml | sed -re 's!\.\./([a-zA-Z0-9])!Pictures/\1!g' >> $tmp_dir/tmp.xml + + mkdir $tmp_dir/Pictures + + mv $2/*.png $tmp_dir/Pictures/ + + mv $tmp_dir/tmp.xml $tmp_dir/content.xml + + cd $tmp_dir + zip zip.odt -r * > /dev/null + cp zip.odt "$out" + cd $cur_dir; + + rm -r $2; + else + echo "xsltproc required !" + fi + + else + echo "Wrong output format !"; + fi + fi +fi \ No newline at end of file diff --git a/scribo/sandbox/arthur/xml_transform/xml_transform.cc b/scribo/sandbox/arthur/xml_transform/xml_transform.cc index 90af5a5..be5a137 100644 --- a/scribo/sandbox/arthur/xml_transform/xml_transform.cc +++ b/scribo/sandbox/arthur/xml_transform/xml_transform.cc @@ -39,9 +39,15 @@ XmlTransform::XmlTransform(QString xml_file, QString image_file, QString output, QFile fx(xml_file_); if (file_ != QString::Null()) - output_dir_ = "/tmp/xml_transform"; + { + output_dir_ = QDir::tempPath() + "/xml_transform/"; + tmp_ = true; + } else - output_dir_ = output; + { + tmp_ = false; + output_dir_ = output; + } if (output_dir_ != QString::Null() && !loader_->set_output(output_dir_)) { @@ -68,29 +74,15 @@ XmlTransform::~XmlTransform() void XmlTransform::fromBase64() { - QString output = output_dir_; - output.append("img"); - - if (loader_->set_output(output)) - crop_->from_base64(); - else - abort(); + crop_->from_base64(); } -void XmlTransform::toBase64(bool crop) +void XmlTransform::toBase64(bool nocrop) { - QString output = output_dir_; - output.append("img"); - - if (loader_->set_output(output)) - { - if (!crop) - crop_->crop_regions(); + if (!nocrop) + crop_->crop_regions(tmp_); - crop_->to_base64(file_, crop); - } - else - abort(); + crop_->to_base64(file_, nocrop); } void XmlTransform::createPDF (bool crop, bool base64) @@ -99,16 +91,10 @@ void XmlTransform::createPDF (bool crop, bool base64) { if (!base64) { - QString output = output_dir_; - output.append("img"); - - if (loader_->set_output(output)) - { - if (crop) - crop_->crop_regions(); - else - crop_->save_image("image"); - } + if (crop) + crop_->crop_regions(tmp_); + else + crop_->save_image(output_dir_ + "image.png"); } loader_->add_pdf_templates(crop, base64, output_dir_); @@ -120,18 +106,7 @@ void XmlTransform::createPDF (bool crop, bool base64) void XmlTransform::createOpen () { if (loader_->xml_output(xml_file_, false, output_dir_)) - { - QString output = output_dir_; - output.append("img"); - - if (loader_->set_output(output)) - { - crop_->crop_regions(); - loader_->add_open_templates(output_dir_); - } - else - abort(); - } + crop_->crop_regions(tmp_); else abort(); } @@ -139,9 +114,7 @@ void XmlTransform::createOpen () void XmlTransform::createSVG () { if (loader_->xml_output(xml_file_, false, output_dir_)) - { - loader_->add_svg_templates(output_dir_); - } + loader_->add_svg_templates(output_dir_); else abort(); } @@ -151,15 +124,8 @@ void XmlTransform::createHTML(bool base64) if (loader_->xml_output(xml_file_, true, output_dir_)) { if (!base64) - { - QString output = output_dir_; - output.append("img"); - if (loader_->set_output(output)) - crop_->crop_regions(); - else - abort(); - } - loader_->add_html_templates(base64, output_dir_); + crop_->crop_regions(tmp_); + // loader_->add_html_templates(base64, output_dir_); } else abort(); diff --git a/scribo/sandbox/arthur/xml_transform/xml_transform.hh b/scribo/sandbox/arthur/xml_transform/xml_transform.hh index ad83fd5..ee06907 100644 --- a/scribo/sandbox/arthur/xml_transform/xml_transform.hh +++ b/scribo/sandbox/arthur/xml_transform/xml_transform.hh @@ -40,6 +40,7 @@ class XmlTransform : public QObject XmlTransform(QString xml_file, QString image_file, QString output, QString file = QString::Null()); ~XmlTransform(); + QString out() { return output_dir_; } void createHTML(bool base64); void createPDF(bool crop, bool base64); void createSVG(); @@ -54,6 +55,7 @@ private: Loader* loader_; ImageCrop* crop_; QString file_; + bool tmp_; }; #endif // !XML_TRANFORM_HH diff --git a/scribo/sandbox/arthur/xml_transform/xml_transform.pro b/scribo/sandbox/arthur/xml_transform/xml_transform.pro index c038cf6..608ce64 100644 --- a/scribo/sandbox/arthur/xml_transform/xml_transform.pro +++ b/scribo/sandbox/arthur/xml_transform/xml_transform.pro @@ -3,14 +3,14 @@ ###################################################################### TEMPLATE = app -TARGET = +TARGET = DEPENDPATH += . INCLUDEPATH += . QMAKE_CXXFLAGS += -I/amd/beyrouth/home/lrde/stage/crepin/git/olena/milena -I/amd/beyrouth/home/lrde/stage/crepin/git/olena/ QT += xml LIBS += `Magick++-config --libs` - - -# Input HEADERS += common.hh image_crop.hh loader.hh xml_transform.hh SOURCES += common.cc image_crop.cc loader.cc main.cc xml_transform.cc +PWD += + +QMAKE_POST_LINK += cp -r $$PWD/templates $$OUT_PWD -- 1.5.6.5
participants (1)
-
Guillaume Lazzara