feat: PDF form extraction and filling using pdftk

Signed-off-by: Luka Trovic <luka@nextcloud.com>
This commit is contained in:
Luka Trovic 2024-07-31 14:13:41 +02:00 committed by Julius Härtl
parent be2b856862
commit 2aaeac4f7c
No known key found for this signature in database
GPG key ID: 4C614C6ED2CDE6DF
11 changed files with 317 additions and 57 deletions

View file

@ -78,6 +78,6 @@ return [
['name' => 'Target#getPreview', 'url' => '/api/v1/targets/preview', 'verb' => 'GET'],
['name' => 'TemplateField#extractFields', 'url' => '/api/v1/template/fields/extract/{fileId}', 'verb' => 'GET'],
['name' => 'TemplateField#fillFields', 'url' => '/api/v1/template/fields/fill', 'verb' => 'POST'],
['name' => 'TemplateField#fillFields', 'url' => '/api/v1/template/fields/fill/{fileId}', 'verb' => 'POST'],
],
];

View file

@ -10,7 +10,8 @@
},
"require": {
"ext-json": "*",
"ext-simplexml": "*"
"ext-simplexml": "*",
"mikehaertl/php-pdftk": "^0.13.1"
},
"require-dev": {
"roave/security-advisories": "dev-master",

146
composer.lock generated
View file

@ -4,8 +4,148 @@
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
"This file is @generated automatically"
],
"content-hash": "f118a358162577fab7ee75cad4ef1383",
"packages": [],
"content-hash": "f00345e82d316ea492b86fb74fb09c84",
"packages": [
{
"name": "mikehaertl/php-pdftk",
"version": "0.13.1",
"source": {
"type": "git",
"url": "https://github.com/mikehaertl/php-pdftk.git",
"reference": "3851b08c1027489e48387d7c14c27bc295d98239"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/mikehaertl/php-pdftk/zipball/3851b08c1027489e48387d7c14c27bc295d98239",
"reference": "3851b08c1027489e48387d7c14c27bc295d98239",
"shasum": ""
},
"require": {
"mikehaertl/php-shellcommand": "^1.6.3",
"mikehaertl/php-tmpfile": "^1.1.0",
"php": ">=5.3.0"
},
"require-dev": {
"phpunit/phpunit": ">4.0 <9.4"
},
"type": "library",
"autoload": {
"psr-4": {
"mikehaertl\\pdftk\\": "src/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Michael Haertl",
"email": "haertl.mike@gmail.com"
}
],
"description": "A PDF conversion and form utility based on pdftk.",
"keywords": [
"pdf",
"pdftk"
],
"support": {
"issues": "https://github.com/mikehaertl/php-pdftk/issues",
"source": "https://github.com/mikehaertl/php-pdftk/tree/0.13.1"
},
"time": "2023-11-03T16:06:08+00:00"
},
{
"name": "mikehaertl/php-shellcommand",
"version": "1.7.0",
"source": {
"type": "git",
"url": "https://github.com/mikehaertl/php-shellcommand.git",
"reference": "e79ea528be155ffdec6f3bf1a4a46307bb49e545"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/mikehaertl/php-shellcommand/zipball/e79ea528be155ffdec6f3bf1a4a46307bb49e545",
"reference": "e79ea528be155ffdec6f3bf1a4a46307bb49e545",
"shasum": ""
},
"require": {
"php": ">= 5.3.0"
},
"require-dev": {
"phpunit/phpunit": ">4.0 <=9.4"
},
"type": "library",
"autoload": {
"psr-4": {
"mikehaertl\\shellcommand\\": "src/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Michael Härtl",
"email": "haertl.mike@gmail.com"
}
],
"description": "An object oriented interface to shell commands",
"keywords": [
"shell"
],
"support": {
"issues": "https://github.com/mikehaertl/php-shellcommand/issues",
"source": "https://github.com/mikehaertl/php-shellcommand/tree/1.7.0"
},
"time": "2023-04-19T08:25:22+00:00"
},
{
"name": "mikehaertl/php-tmpfile",
"version": "1.2.1",
"source": {
"type": "git",
"url": "https://github.com/mikehaertl/php-tmpfile.git",
"reference": "70a5b70b17bc0d9666388e6a551ecc93d0b40a10"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/mikehaertl/php-tmpfile/zipball/70a5b70b17bc0d9666388e6a551ecc93d0b40a10",
"reference": "70a5b70b17bc0d9666388e6a551ecc93d0b40a10",
"shasum": ""
},
"require-dev": {
"php": ">=5.3.0",
"phpunit/phpunit": ">4.0 <=9.4"
},
"type": "library",
"autoload": {
"psr-4": {
"mikehaertl\\tmp\\": "src/"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Michael Härtl",
"email": "haertl.mike@gmail.com"
}
],
"description": "A convenience class for temporary files",
"keywords": [
"files"
],
"support": {
"issues": "https://github.com/mikehaertl/php-tmpfile/issues",
"source": "https://github.com/mikehaertl/php-tmpfile/tree/1.2.1"
},
"time": "2021-03-01T18:26:25+00:00"
}
],
"packages-dev": [
{
"name": "doctrine/instantiator",
@ -2981,5 +3121,5 @@
"platform-overrides": {
"php": "8.0"
},
"plugin-api-version": "2.6.0"
"plugin-api-version": "2.3.0"
}

View file

@ -6,7 +6,9 @@
},
"autoload" : {
"psr-4": {
"OCA\\Richdocuments\\": "../lib/"
"OCA\\Richdocuments\\": "../lib/",
"mikehaertl\\pdftk\\": "../vendor/mikehaertl/php-pdftk/src/",
"mikehaertl\\shellcommand\\": "../vendor/mikehaertl/php-shellcommand/src/"
}
}
}

View file

@ -45,34 +45,35 @@ class ClassLoader
/** @var \Closure(string):void */
private static $includeFile;
/** @var string|null */
/** @var ?string */
private $vendorDir;
// PSR-4
/**
* @var array<string, array<string, int>>
* @var array[]
* @psalm-var array<string, array<string, int>>
*/
private $prefixLengthsPsr4 = array();
/**
* @var array<string, list<string>>
* @var array[]
* @psalm-var array<string, array<int, string>>
*/
private $prefixDirsPsr4 = array();
/**
* @var list<string>
* @var array[]
* @psalm-var array<string, string>
*/
private $fallbackDirsPsr4 = array();
// PSR-0
/**
* List of PSR-0 prefixes
*
* Structured as array('F (first letter)' => array('Foo\Bar (full prefix)' => array('path', 'path2')))
*
* @var array<string, array<string, list<string>>>
* @var array[]
* @psalm-var array<string, array<string, string[]>>
*/
private $prefixesPsr0 = array();
/**
* @var list<string>
* @var array[]
* @psalm-var array<string, string>
*/
private $fallbackDirsPsr0 = array();
@ -80,7 +81,8 @@ class ClassLoader
private $useIncludePath = false;
/**
* @var array<string, string>
* @var string[]
* @psalm-var array<string, string>
*/
private $classMap = array();
@ -88,20 +90,21 @@ class ClassLoader
private $classMapAuthoritative = false;
/**
* @var array<string, bool>
* @var bool[]
* @psalm-var array<string, bool>
*/
private $missingClasses = array();
/** @var string|null */
/** @var ?string */
private $apcuPrefix;
/**
* @var array<string, self>
* @var self[]
*/
private static $registeredLoaders = array();
/**
* @param string|null $vendorDir
* @param ?string $vendorDir
*/
public function __construct($vendorDir = null)
{
@ -110,7 +113,7 @@ class ClassLoader
}
/**
* @return array<string, list<string>>
* @return string[]
*/
public function getPrefixes()
{
@ -122,7 +125,8 @@ class ClassLoader
}
/**
* @return array<string, list<string>>
* @return array[]
* @psalm-return array<string, array<int, string>>
*/
public function getPrefixesPsr4()
{
@ -130,7 +134,8 @@ class ClassLoader
}
/**
* @return list<string>
* @return array[]
* @psalm-return array<string, string>
*/
public function getFallbackDirs()
{
@ -138,7 +143,8 @@ class ClassLoader
}
/**
* @return list<string>
* @return array[]
* @psalm-return array<string, string>
*/
public function getFallbackDirsPsr4()
{
@ -146,7 +152,8 @@ class ClassLoader
}
/**
* @return array<string, string> Array of classname => path
* @return string[] Array of classname => path
* @psalm-return array<string, string>
*/
public function getClassMap()
{
@ -154,7 +161,8 @@ class ClassLoader
}
/**
* @param array<string, string> $classMap Class to filename map
* @param string[] $classMap Class to filename map
* @psalm-param array<string, string> $classMap
*
* @return void
*/
@ -171,25 +179,24 @@ class ClassLoader
* Registers a set of PSR-0 directories for a given prefix, either
* appending or prepending to the ones previously set for this prefix.
*
* @param string $prefix The prefix
* @param list<string>|string $paths The PSR-0 root directories
* @param bool $prepend Whether to prepend the directories
* @param string $prefix The prefix
* @param string[]|string $paths The PSR-0 root directories
* @param bool $prepend Whether to prepend the directories
*
* @return void
*/
public function add($prefix, $paths, $prepend = false)
{
$paths = (array) $paths;
if (!$prefix) {
if ($prepend) {
$this->fallbackDirsPsr0 = array_merge(
$paths,
(array) $paths,
$this->fallbackDirsPsr0
);
} else {
$this->fallbackDirsPsr0 = array_merge(
$this->fallbackDirsPsr0,
$paths
(array) $paths
);
}
@ -198,19 +205,19 @@ class ClassLoader
$first = $prefix[0];
if (!isset($this->prefixesPsr0[$first][$prefix])) {
$this->prefixesPsr0[$first][$prefix] = $paths;
$this->prefixesPsr0[$first][$prefix] = (array) $paths;
return;
}
if ($prepend) {
$this->prefixesPsr0[$first][$prefix] = array_merge(
$paths,
(array) $paths,
$this->prefixesPsr0[$first][$prefix]
);
} else {
$this->prefixesPsr0[$first][$prefix] = array_merge(
$this->prefixesPsr0[$first][$prefix],
$paths
(array) $paths
);
}
}
@ -219,9 +226,9 @@ class ClassLoader
* Registers a set of PSR-4 directories for a given namespace, either
* appending or prepending to the ones previously set for this namespace.
*
* @param string $prefix The prefix/namespace, with trailing '\\'
* @param list<string>|string $paths The PSR-4 base directories
* @param bool $prepend Whether to prepend the directories
* @param string $prefix The prefix/namespace, with trailing '\\'
* @param string[]|string $paths The PSR-4 base directories
* @param bool $prepend Whether to prepend the directories
*
* @throws \InvalidArgumentException
*
@ -229,18 +236,17 @@ class ClassLoader
*/
public function addPsr4($prefix, $paths, $prepend = false)
{
$paths = (array) $paths;
if (!$prefix) {
// Register directories for the root namespace.
if ($prepend) {
$this->fallbackDirsPsr4 = array_merge(
$paths,
(array) $paths,
$this->fallbackDirsPsr4
);
} else {
$this->fallbackDirsPsr4 = array_merge(
$this->fallbackDirsPsr4,
$paths
(array) $paths
);
}
} elseif (!isset($this->prefixDirsPsr4[$prefix])) {
@ -250,18 +256,18 @@ class ClassLoader
throw new \InvalidArgumentException("A non-empty PSR-4 prefix must end with a namespace separator.");
}
$this->prefixLengthsPsr4[$prefix[0]][$prefix] = $length;
$this->prefixDirsPsr4[$prefix] = $paths;
$this->prefixDirsPsr4[$prefix] = (array) $paths;
} elseif ($prepend) {
// Prepend directories for an already registered namespace.
$this->prefixDirsPsr4[$prefix] = array_merge(
$paths,
(array) $paths,
$this->prefixDirsPsr4[$prefix]
);
} else {
// Append directories for an already registered namespace.
$this->prefixDirsPsr4[$prefix] = array_merge(
$this->prefixDirsPsr4[$prefix],
$paths
(array) $paths
);
}
}
@ -270,8 +276,8 @@ class ClassLoader
* Registers a set of PSR-0 directories for a given prefix,
* replacing any others previously set for this prefix.
*
* @param string $prefix The prefix
* @param list<string>|string $paths The PSR-0 base directories
* @param string $prefix The prefix
* @param string[]|string $paths The PSR-0 base directories
*
* @return void
*/
@ -288,8 +294,8 @@ class ClassLoader
* Registers a set of PSR-4 directories for a given namespace,
* replacing any others previously set for this namespace.
*
* @param string $prefix The prefix/namespace, with trailing '\\'
* @param list<string>|string $paths The PSR-4 base directories
* @param string $prefix The prefix/namespace, with trailing '\\'
* @param string[]|string $paths The PSR-4 base directories
*
* @throws \InvalidArgumentException
*
@ -475,9 +481,9 @@ class ClassLoader
}
/**
* Returns the currently registered loaders keyed by their corresponding vendor directories.
* Returns the currently registered loaders indexed by their corresponding vendor directories.
*
* @return array<string, self>
* @return self[]
*/
public static function getRegisteredLoaders()
{

View file

@ -78,6 +78,7 @@ return array(
'OCA\\Richdocuments\\Service\\FontService' => $baseDir . '/../lib/Service/FontService.php',
'OCA\\Richdocuments\\Service\\InitialStateService' => $baseDir . '/../lib/Service/InitialStateService.php',
'OCA\\Richdocuments\\Service\\RemoteOptionsService' => $baseDir . '/../lib/Service/RemoteOptionsService.php',
'OCA\\Richdocuments\\Service\\PdfService' => $baseDir . '/../lib/Service/PdfService.php',
'OCA\\Richdocuments\\Service\\RemoteService' => $baseDir . '/../lib/Service/RemoteService.php',
'OCA\\Richdocuments\\Service\\TemplateFieldService' => $baseDir . '/../lib/Service/TemplateFieldService.php',
'OCA\\Richdocuments\\Service\\UserScopeService' => $baseDir . '/../lib/Service/UserScopeService.php',
@ -89,4 +90,12 @@ return array(
'OCA\\Richdocuments\\TokenManager' => $baseDir . '/../lib/TokenManager.php',
'OCA\\Richdocuments\\UploadException' => $baseDir . '/../lib/UploadException.php',
'OCA\\Richdocuments\\WOPI\\Parser' => $baseDir . '/../lib/WOPI/Parser.php',
'mikehaertl\\pdftk\\Command' => $baseDir . '/../vendor/mikehaertl/php-pdftk/src/Command.php',
'mikehaertl\\pdftk\\DataFields' => $baseDir . '/../vendor/mikehaertl/php-pdftk/src/DataFields.php',
'mikehaertl\\pdftk\\FdfFile' => $baseDir . '/../vendor/mikehaertl/php-pdftk/src/FdfFile.php',
'mikehaertl\\pdftk\\InfoFields' => $baseDir . '/../vendor/mikehaertl/php-pdftk/src/InfoFields.php',
'mikehaertl\\pdftk\\InfoFile' => $baseDir . '/../vendor/mikehaertl/php-pdftk/src/InfoFile.php',
'mikehaertl\\pdftk\\Pdf' => $baseDir . '/../vendor/mikehaertl/php-pdftk/src/Pdf.php',
'mikehaertl\\pdftk\\XfdfFile' => $baseDir . '/../vendor/mikehaertl/php-pdftk/src/XfdfFile.php',
'mikehaertl\\shellcommand\\Command' => $baseDir . '/../vendor/mikehaertl/php-shellcommand/src/Command.php',
);

View file

@ -6,5 +6,7 @@ $vendorDir = dirname(__DIR__);
$baseDir = $vendorDir;
return array(
'mikehaertl\\shellcommand\\' => array($baseDir . '/../vendor/mikehaertl/php-shellcommand/src'),
'mikehaertl\\pdftk\\' => array($baseDir . '/../vendor/mikehaertl/php-pdftk/src'),
'OCA\\Richdocuments\\' => array($baseDir . '/../lib'),
);

View file

@ -7,14 +7,27 @@ namespace Composer\Autoload;
class ComposerStaticInitRichdocuments
{
public static $prefixLengthsPsr4 = array (
'O' =>
'm' =>
array (
'mikehaertl\\shellcommand\\' => 24,
'mikehaertl\\pdftk\\' => 17,
),
'O' =>
array (
'OCA\\Richdocuments\\' => 18,
),
);
public static $prefixDirsPsr4 = array (
'OCA\\Richdocuments\\' =>
'mikehaertl\\shellcommand\\' =>
array (
0 => __DIR__ . '/..' . '/../vendor/mikehaertl/php-shellcommand/src',
),
'mikehaertl\\pdftk\\' =>
array (
0 => __DIR__ . '/..' . '/../vendor/mikehaertl/php-pdftk/src',
),
'OCA\\Richdocuments\\' =>
array (
0 => __DIR__ . '/..' . '/../lib',
),
@ -93,6 +106,7 @@ class ComposerStaticInitRichdocuments
'OCA\\Richdocuments\\Service\\FontService' => __DIR__ . '/..' . '/../lib/Service/FontService.php',
'OCA\\Richdocuments\\Service\\InitialStateService' => __DIR__ . '/..' . '/../lib/Service/InitialStateService.php',
'OCA\\Richdocuments\\Service\\RemoteOptionsService' => __DIR__ . '/..' . '/../lib/Service/RemoteOptionsService.php',
'OCA\\Richdocuments\\Service\\PdfService' => __DIR__ . '/..' . '/../lib/Service/PdfService.php',
'OCA\\Richdocuments\\Service\\RemoteService' => __DIR__ . '/..' . '/../lib/Service/RemoteService.php',
'OCA\\Richdocuments\\Service\\TemplateFieldService' => __DIR__ . '/..' . '/../lib/Service/TemplateFieldService.php',
'OCA\\Richdocuments\\Service\\UserScopeService' => __DIR__ . '/..' . '/../lib/Service/UserScopeService.php',
@ -104,6 +118,14 @@ class ComposerStaticInitRichdocuments
'OCA\\Richdocuments\\TokenManager' => __DIR__ . '/..' . '/../lib/TokenManager.php',
'OCA\\Richdocuments\\UploadException' => __DIR__ . '/..' . '/../lib/UploadException.php',
'OCA\\Richdocuments\\WOPI\\Parser' => __DIR__ . '/..' . '/../lib/WOPI/Parser.php',
'mikehaertl\\pdftk\\Command' => __DIR__ . '/..' . '/../vendor/mikehaertl/php-pdftk/src/Command.php',
'mikehaertl\\pdftk\\DataFields' => __DIR__ . '/..' . '/../vendor/mikehaertl/php-pdftk/src/DataFields.php',
'mikehaertl\\pdftk\\FdfFile' => __DIR__ . '/..' . '/../vendor/mikehaertl/php-pdftk/src/FdfFile.php',
'mikehaertl\\pdftk\\InfoFields' => __DIR__ . '/..' . '/../vendor/mikehaertl/php-pdftk/src/InfoFields.php',
'mikehaertl\\pdftk\\InfoFile' => __DIR__ . '/..' . '/../vendor/mikehaertl/php-pdftk/src/InfoFile.php',
'mikehaertl\\pdftk\\Pdf' => __DIR__ . '/..' . '/../vendor/mikehaertl/php-pdftk/src/Pdf.php',
'mikehaertl\\pdftk\\XfdfFile' => __DIR__ . '/..' . '/../vendor/mikehaertl/php-pdftk/src/XfdfFile.php',
'mikehaertl\\shellcommand\\Command' => __DIR__ . '/..' . '/../vendor/mikehaertl/php-shellcommand/src/Command.php',
);
public static function getInitializer(ClassLoader $loader)

View file

@ -63,7 +63,7 @@ class TemplateFieldController extends OCSController {
public function fillFields(int $fileId, array $fields): DataResponse {
try {
$this->templateFieldService->fillFields($fileId, $fields);
return new DataResponse([], Http::STATUS_OK);
} catch (\Exception $e) {
return new DataResponse(["Unable to fill fields into the given file"], Http::STATUS_INTERNAL_SERVER_ERROR);

View file

@ -0,0 +1,64 @@
<?php
/**
* SPDX-FileCopyrightText: 2024 Nextcloud GmbH and Nextcloud contributors
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
namespace OCA\Richdocuments\Service;
use mikehaertl\pdftk\Pdf;
use OCP\Files\Node;
use OCP\Files\Template\Field;
use OCP\Files\Template\FieldType;
use Psr\Log\LoggerInterface;
class PdfService {
public function __construct(
private LoggerInterface $logger
) {
}
public function extractFields(Node $file): array {
$filePath = $file->getStorage()->getLocalFile($file->getInternalPath());
try {
$pdf = new Pdf($filePath);
$fields = $pdf->getDataFields();
$templateFields = [];
$index = 0;
foreach ($fields as $field) {
$fieldType = FieldType::tryFrom($field['FieldType']) ?? null;
if ($fieldType === null) {
continue;
}
$templateFields[] = new Field(
$index,
$field['FieldName'],
$fieldType,
);
$index++;
}
return $templateFields;
} catch (\Exception $e) {
$this->logger->error('Failed to extract fields from PDF: {error}', ['error' => $e->getMessage(), 'exception' => $e]);
return [];
}
}
public function fillFields(Node $file, array $fieldValues): void {
$filePath = $file->getStorage()->getLocalFile($file->getInternalPath());
try {
$pdf = new Pdf($filePath);
$pdf->fillForm($fieldValues);
$pdf->flatten();
$pdf->saveAs($filePath);
} catch (\Exception $e) {
$this->logger->error('Failed to fill fields in PDF: {error}', ['error' => $e->getMessage(), 'exception' => $e]);
throw $e;
}
}
}

View file

@ -8,6 +8,8 @@
namespace OCA\Richdocuments\Service;
use OCA\Richdocuments\AppConfig;
use OCA\Richdocuments\Capabilities;
use OCP\Files\File;
use OCP\Files\IRootFolder;
use OCP\Files\Node;
use OCP\Files\NotFoundException;
@ -27,7 +29,8 @@ class TemplateFieldService {
private AppConfig $appConfig,
private IRootFolder $rootFolder,
private LoggerInterface $logger,
private ICacheFactory $cacheFactory
private ICacheFactory $cacheFactory,
private PdfService $pdfService,
) {
}
@ -45,7 +48,7 @@ class TemplateFieldService {
}
try {
if (!$file) {
if (!$file || !$file instanceof File) {
throw new NotFoundException();
}
@ -57,6 +60,12 @@ class TemplateFieldService {
return $cachedResponse;
}
if ($file->getMimeType() === 'application/pdf') {
$fields = $this->pdfService->extractFields($file);
$localCache->set($cacheName, $fields, 3600);
return $fields;
}
$collaboraUrl = $this->appConfig->getCollaboraUrlInternal();
$httpClient = $this->clientService->newClient();
@ -117,7 +126,7 @@ class TemplateFieldService {
if (is_int($file)) {
$file = $this->rootFolder->getFirstNodeById($file);
if (!$file) {
if (!$file || !$file instanceof File) {
$e = new NotFoundException();
$this->logger->error($e->getMessage());
@ -125,6 +134,11 @@ class TemplateFieldService {
}
}
if ($file->getMimeType() === 'application/pdf') {
$this->pdfService->fillFields($file, $fields);
return '';
}
$collaboraUrl = $this->appConfig->getCollaboraUrlInternal();
$httpClient = $this->clientService->newClient();