diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..9decb4b Binary files /dev/null and b/.DS_Store differ diff --git a/.gitignore b/.gitignore index c422267..8cf1a6e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,7 @@ composer.phar +composer.lock /vendor/ +/migration_todo.txt # Commit your application's lock file http://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file # You may choose to ignore a library lock file http://getcomposer.org/doc/02-libraries.md#lock-file diff --git a/README.md b/README.md index 531863a..5249d53 100644 --- a/README.md +++ b/README.md @@ -1,138 +1,126 @@ -# PDF Form Filling with FPDM +# FPDM - PDF Form Data Merge -## Package +A PHP library for filling PDF forms by merging data into form fields. -The FPDM class allows to fill out PDF forms, i.e. populate fields of a PDF file. It is **developed by Olivier Plathey**, author of the [FDPF Library](http://www.fpdf.org/), and has been released as [Skript 93](http://www.fpdf.org/en/script/script93.php). +> **Fork Notice**: This is a **PHP 8.x compatible fork** of [codeshell/fpdm](https://github.com/codeshell/fpdm), originally based on [Olivier Plathey's FPDM script](http://www.fpdf.org/en/script/script93.php). -I created this repository for the following reasons: +## Features -- make the current FPDM source available via [composer](https://packagist.org/packages/tmw/fpdm), autoload via classmaps -- bugfixing - - FIX: compatibility issues with PHP 7.x [e376dc1](https://github.com/codeshell/fpdm/commit/e376dc157655ded24c61e098199586f3325d63c1) v2.9.1 - - FIX: filling forms in multiple files (wrong buffer usage, invalid offsets) [e376dc1](https://github.com/codeshell/fpdm/commit/e376dc157655ded24c61e098199586f3325d63c1) v2.9.1 - - FIX: convert ASCII object names to utf-8 [1eddba7](https://github.com/codeshell/fpdm/commit/1eddba76f610690821e8c0b3753df337a6cf65f7) v2.9.2 -- improvements (changes to the original codebase are prefixed with `//FIX: change description` and ended with `//ENDFIX`) - - ADD: support for checkboxes (disabled by default, activate with `$pdf->useCheckboxParser = true;`) [0375dd9](https://github.com/codeshell/fpdm/commit/0375dd95f05fd2d8d32d9ae1ab882fa0895b07b3) v2.9.2 +- Fill PDF text fields from PHP arrays or FDF files +- Checkbox support with automatic state detection +- Native form flattening (experimental) - make fields read-only without external tools +- PHP 8.0+ compatible +- No external dependencies (pure PHP) -## Version - -Based on version 2.9 (2017-05-11) available from [fpdf.org/en/script/script93.php](http://www.fpdf.org/en/script/script93.php). - -_Note: If you find that a new version has been hosted on fpdf.org, please do not hesitate to drop me [a short note](https://github.com/codeshell/fpdm/issues) to make sure I do not miss it out._ - -This repository only contains the separate php class written for form filling (FPD**M**). If you are looking for a repository containing the main FPD**F** Library, please head over to [github.com/Setasign/FPDF](https://github.com/Setasign/FPDF). - -Once again, all credits to Olivier Plathey for providing an easy to use script for form filling in addition to his FPDF library! - -## Installation +## Installation ### Composer -The preferred way of making FPDM available in your app is to install it via composer with - -`composer require tmw/fpdm` - -## Usage - -### Composer (autoload) - -[autoload](https://getcomposer.org/doc/01-basic-usage.md#autoloading) FPDM class files by adding this to your code: - -`require 'vendor/autoload.php';` - -### Standalone Script (legacy) - -Load the top level entry point by calling - -`require_once '/abolute/path/to/fpdm.php';` - -or - -`require_once './relative/path/to/fpdm.php';` +```bash +composer require tmw/fpdm +``` -## Customization to original code +### Manual -### classmaps vs. psr-4 (or: legacy code vs modern frameworks á la Laravel) +```php +require_once 'path/to/fpdm.php'; +``` -Autoloading classes with [namespaces](https://www.php.net/manual/en/language.namespaces.basics.php) and following [PSR-4: Autoloader](https://www.php-fig.org/psr/psr-4/) would be desireable. Especially reducing the risk of naming conflicts by using vendor namespaces. +## Usage -However, FPDM has been around for a long time and as such is used in many projects that use non-namespaced code (I refer to them as legacy projects). Legacy projects instantiate FPDM by calling `$mypdf = new FPDM()` which is unqualified but defaults to the global namespace with non-namespaced code. +### Basic Text Fields -Using psr-4 would autoload the class to a subnamespace (e.g. \codeshell\fpdm\FPDM) instead of the global namespace (e.g. \FPDM) thus breaking any legacy code no matter if it used `new FPDM()` or `new \FPDM()`. +```php + 'John Doe', + 'address' => '123 Main St', + 'city' => 'New York', +); -That's my reasoning for using classmaps over psr-4 for FPDM. Please let me know if there are use cases where classmaps won't work with modern frameworks. +$pdf = new FPDM('template.pdf'); +$pdf->Load($fields, true); // true = UTF-8, false = ISO-8859-1 +$pdf->Merge(); +$pdf->Output('F', 'filled.pdf'); // Save to file +``` ### Checkboxes -I added support for checkboxes. The feature is not heavily tested but works for me. Can be enabled with `useCheckboxParser = true` like so: +Enable checkbox support to toggle checkboxes in your PDF forms: ```php 'anything that evaluates to true.', // checkbox will be checked; Careful, that includes ANY non-empty string (even "no" or "unchecked") - 'another_checkbox' => false, // checkbox will be UNchecked; empty string or 0 work as well + 'agree_terms' => true, // Checked + 'newsletter' => false, // Unchecked ); -$pdf = new FPDM('template.pdf'); -$pdf->useCheckboxParser = true; // Checkbox parsing is ignored (default FPDM behaviour) unless enabled with this setting +$pdf = new FPDM('form.pdf'); +$pdf->useCheckboxParser = true; // Enable checkbox support $pdf->Load($fields, true); $pdf->Merge(); $pdf->Output(); ``` -You don't have to figure out the technical names of checkbox states. They are retrieved during the parsing process. +Checkbox state names (e.g., `Yes`/`Off`, `Oui`/`Off`) are automatically detected during parsing. -## Original Info Page -_Everything below is mirrored from http://www.fpdf.org/en/script/script93.php ._ +### Flatten (Experimental) -### Information +Make form fields read-only after filling to prevent users from editing: -Author: Olivier +```php +Load(['name' => 'John Doe']); +$pdf->Flatten(); // Enable flatten mode +$pdf->Merge(); +$pdf->Output('F', 'flattened.pdf'); -License: FPDF +// Or use shorthand: +$pdf->Merge(true); // true = flatten +``` -### Description +> **âš ï¸ Experimental**: This feature sets the ReadOnly flag on all form fields. The form structure is preserved but fields cannot be edited. This is a native PHP implementation that doesn't require external tools like pdftk. -This script allows to merge data into a PDF form. Given a template PDF with text fields, it's -possible to inject values in two different ways: +## Output Options -- from a PHP array -- from an FDF file +```php +$pdf->Output(); // Send to browser (inline) +$pdf->Output('D', 'f.pdf'); // Force download +$pdf->Output('F', 'f.pdf'); // Save to file +$pdf->Output('S'); // Return as string +``` -The resulting document is produced by the Output() method, which works the same as for FPDF. +## Debugging -Note: if your template PDF is not compatible with this script, you can process it with -[PDFtk](https://www.pdflabs.com/tools/pdftk-server/) this way: +Enable verbose mode to see parsing details: -`pdftk modele.pdf output modele2.pdf` +```php +$pdf->set_modes('verbose', true); +$pdf->set_modes('verbose_level', 3); // 1-4, higher = more detail +``` -Then try again with modele2.pdf. +> **Note**: Disable verbose mode before generating PDF output. -### Example +## Fork Changes -This example shows how to merge data from an array: +This fork includes: -```php - 'My name', - 'address' => 'My address', - 'city' => 'My city', - 'phone' => 'My phone number' -); +- **Original Author**: [Olivier Plathey](http://www.fpdf.org/) (FPDM v2.9) +- **Upstream Fork**: [codeshell/fpdm](https://github.com/codeshell/fpdm) -$pdf = new FPDM('template.pdf'); -$pdf->Load($fields, false); // second parameter: false if field values are in ISO-8859-1, true if UTF-8 -$pdf->Merge(); -$pdf->Output(); -?> -``` +## License -View the result [here](http://www.fpdf.org/en/script/ex93.pdf). +FPDF License diff --git a/composer.json b/composer.json index 6eb9723..b437b09 100644 --- a/composer.json +++ b/composer.json @@ -31,7 +31,7 @@ } ], "require": { - "php": ">=5.3.0" + "php": ">=8.0" }, "keywords": [ "FPDM", @@ -45,5 +45,9 @@ "support": { "issues": "https://github.com/codeshell/fpdm/issues", "source": "https://github.com/codeshell/fpdm/tree/master" + }, + "require-dev": { + "squizlabs/php_codesniffer": "^3.13", + "phpcompatibility/php-compatibility": "^9.3" } } diff --git a/src/.DS_Store b/src/.DS_Store new file mode 100644 index 0000000..157209d Binary files /dev/null and b/src/.DS_Store differ diff --git a/src/ex-array.php b/src/ex-array.php index 657c062..95524f9 100644 --- a/src/ex-array.php +++ b/src/ex-array.php @@ -15,6 +15,7 @@ $pdf = new FPDM('template.pdf'); $pdf->Load($fields, false); // second parameter: false if field values are in ISO-8859-1, true if UTF-8 +$pdf->Flatten(); $pdf->Merge(); -$pdf->Output(); +$pdf->Output('F', 'output.pdf'); ?> diff --git a/src/ex.pdf b/src/ex.pdf deleted file mode 100644 index 62eb154..0000000 Binary files a/src/ex.pdf and /dev/null differ diff --git a/src/export/fdf/forge_fdf.php b/src/export/fdf/forge_fdf.php index 99845d8..bcc4a6d 100644 --- a/src/export/fdf/forge_fdf.php +++ b/src/export/fdf/forge_fdf.php @@ -29,17 +29,17 @@ function escape_pdf_string( $ss ) $ss_esc= ''; $ss_len= strlen( $ss ); for( $ii= 0; $ii< $ss_len; ++$ii ) { - if( ord($ss{$ii})== 0x28 || // open paren - ord($ss{$ii})== 0x29 || // close paren - ord($ss{$ii})== 0x5c ) // backslash + if( ord($ss[$ii])== 0x28 || // open paren + ord($ss[$ii])== 0x29 || // close paren + ord($ss[$ii])== 0x5c ) // backslash { - $ss_esc.= chr(0x5c).$ss{$ii}; // escape the character w/ backslash + $ss_esc.= chr(0x5c).$ss[$ii]; // escape the character w/ backslash } - else if( ord($ss{$ii}) < 32 || 126 < ord($ss{$ii}) ) { - $ss_esc.= sprintf( "\\%03o", ord($ss{$ii}) ); // use an octal code + else if( ord($ss[$ii]) < 32 || 126 < ord($ss[$ii]) ) { + $ss_esc.= sprintf( "\\%03o", ord($ss[$ii]) ); // use an octal code } else { - $ss_esc.= $ss{$ii}; + $ss_esc.= $ss[$ii]; } } return $ss_esc; @@ -55,13 +55,13 @@ function escape_pdf_name( $ss ) $ss_esc= ''; $ss_len= strlen( $ss ); for( $ii= 0; $ii< $ss_len; ++$ii ) { - if( ord($ss{$ii}) < 33 || 126 < ord($ss{$ii}) || - ord($ss{$ii})== 0x23 ) // hash mark + if( ord($ss[$ii]) < 33 || 126 < ord($ss[$ii]) || + ord($ss[$ii])== 0x23 ) // hash mark { - $ss_esc.= sprintf( "#%02x", ord($ss{$ii}) ); // use a hex code + $ss_esc.= sprintf( "#%02x", ord($ss[$ii]) ); // use a hex code } else { - $ss_esc.= $ss{$ii}; + $ss_esc.= $ss[$ii]; } } return $ss_esc; diff --git a/src/export/pdf/pdftk.php b/src/export/pdf/pdftk.php index 5b9e92a..ff98426 100644 --- a/src/export/pdf/pdftk.php +++ b/src/export/pdf/pdftk.php @@ -15,8 +15,7 @@ ******************************************************************/ if (!defined('URL_TOOLBOX')) die("Requires the URL_TOOLBOX package!"); - - define("PHP5_ENGINE",version_compare(phpversion(), "5")); + //!NOTE try to detect your OS @@ -104,22 +103,12 @@ function pdftk($pdf_file,$fdf_file,$settings) { //echo htmlentities("$cmdline , $descriptorspec, $cwd, $env"); - if(PHP5_ENGINE) { // Php5 - $process = proc_open($cmdline, $descriptorspec, $pipes, $cwd, $env); - }else { //Php4 - $process = proc_open($cmdline, $descriptorspec, $pipes); - } + $process = proc_open($cmdline, $descriptorspec, $pipes, $cwd, $env); if (is_resource($process)) { - if(PHP5_ENGINE) { - $err=stream_get_contents($pipes[2]); - }else { //Php4 - $err= ""; - while (($str = fgets($pipes[2], 4096))) { - $err.= "$str\n"; - } - } + $err=stream_get_contents($pipes[2]); + fclose($pipes[2]); diff --git a/src/filters/FilterASCII85.php b/src/filters/FilterASCII85.php index d5a1c05..1bad986 100644 --- a/src/filters/FilterASCII85.php +++ b/src/filters/FilterASCII85.php @@ -26,8 +26,7 @@ if (!defined('ORD_tilde')) define('ORD_tilde', ord('~')); -$__tmp = version_compare(phpversion(), "5") == -1 ? array('FilterASCII85') : array('FilterASCII85', false); -if (!call_user_func_array('class_exists', $__tmp)) { +if (!class_exists('FilterASCII85', false)) { if(isset($FPDM_FILTERS)) array_push($FPDM_FILTERS,"ASCII85Decode"); @@ -103,6 +102,4 @@ function encode($in) { return $this->error("ASCII85 encoding not implemented."); } } -} - -unset($__tmp); \ No newline at end of file +} \ No newline at end of file diff --git a/src/filters/FilterASCIIHex.php b/src/filters/FilterASCIIHex.php index abad13a..aee26e2 100644 --- a/src/filters/FilterASCIIHex.php +++ b/src/filters/FilterASCIIHex.php @@ -16,7 +16,7 @@ class FilterASCIIHex { *@internal same as _hex2bin ($hexString) *@access public *@note Function was written because PHP has a bin2hex, but not a hex2bin! - *@internal note pack(“C”,hexdec(substr($data,$i,2))) DOES NOT WORK + *@internal note pack(“Câ€,hexdec(substr($data,$i,2))) DOES NOT WORK * **/ function decode($data) { @@ -44,7 +44,7 @@ function decode($data) { * *@internal same as bin2hex *@access public - *@internal dechex(ord($str{$i})); is buggy because for hex value of 0-15 heading 0 is missing! Using sprintf() to get it right. + *@internal dechex(ord($str[$i])); is buggy because for hex value of 0-15 heading 0 is missing! Using sprintf() to get it right. *@param string $str a binary string *@return string hex the hexified string **/ @@ -54,7 +54,7 @@ function encode($data) { $hex = ""; $i = 0; do { - $hex .= sprintf("%02x", ord($str{$i})); + $hex .= sprintf("%02x", ord($str[$i])); $i++; } while ($i < strlen($str)); return $hex; diff --git a/src/filters/FilterFlate.php b/src/filters/FilterFlate.php index 908a3b6..e30724f 100644 --- a/src/filters/FilterFlate.php +++ b/src/filters/FilterFlate.php @@ -4,8 +4,7 @@ // NOTE: requires ZLIB >= 1.0.9! // -$__tmp = version_compare(phpversion(), "5") == -1 ? array('FilterFlateDecode') : array('FilterFlateDecode', false); -if (!call_user_func_array('class_exists', $__tmp)) { +if (!class_exists('FilterFlate', false)) { if(isset($FPDM_FILTERS)) array_push($FPDM_FILTERS,"FlateDecode"); diff --git a/src/filters/FilterLZW.php b/src/filters/FilterLZW.php index 7bc40b6..15ec3fe 100644 --- a/src/filters/FilterLZW.php +++ b/src/filters/FilterLZW.php @@ -17,8 +17,7 @@ // limitations under the License. // -$__tmp = version_compare(phpversion(), "5") == -1 ? array('FilterLZW') : array('FilterLZW', false); -if (!call_user_func_array('class_exists', $__tmp)) { +if (!class_exists('FilterLZW', false)) { if(isset($FPDM_FILTERS)) array_push($FPDM_FILTERS,"LZWDecode"); @@ -157,6 +156,4 @@ function encode($in) { $this->error("LZW encoding not implemented."); } } -} - -unset($__tmp); \ No newline at end of file +} \ No newline at end of file diff --git a/src/fpdm.php b/src/fpdm.php index fb8627d..e52fa04 100644 --- a/src/fpdm.php +++ b/src/fpdm.php @@ -48,16 +48,15 @@ //Major stream filters come from FPDI's stuff but I've added some :) if (!defined('FPDM_DIRECT')) { $FPDM_FILTERS = array("LZWDecode", "ASCIIHexDecode", "ASCII85Decode", "FlateDecode", "Standard" ); + require_once(__DIR__ . "/filters/FilterASCIIHex.php"); + require_once(__DIR__ . "/filters/FilterASCII85.php"); + require_once(__DIR__ . "/filters/FilterFlate.php"); + require_once(__DIR__ . "/filters/FilterLZW.php"); + require_once(__DIR__ . "/filters/FilterStandard.php"); } -// require_once("filters/FilterASCIIHex.php"); -// require_once("filters/FilterASCII85.php"); -// require_once("filters/FilterFlate.php"); -// require_once("filters/FilterLZW.php"); -// require_once("filters/FilterStandard.php"); -$__tmp = version_compare(phpversion(), "5") == -1 ? array('FPDM') : array('FPDM', false); -if (!call_user_func_array('class_exists', $__tmp)) { +if (!class_exists('FPDM', false)) { define('FPDM_VERSION',2.9); @@ -92,6 +91,7 @@ class FPDM { var $streams = ''; //Holds streams configuration found during parsing var $streams_filter = ''; //Regexp to decode filter streams + var $ap_objects = array(); //Maps object IDs to their checkbox state names for indirect /AP references var $safe_mode = false; //boolean, if set, ignore previous offsets do no calculations for the new xref table, seek pos directly in file var $check_mode = false; //boolean, Use this to track offset calculations errors in corrupteds pdfs files for sample @@ -112,6 +112,7 @@ class FPDM { var $needAppearancesTrue = false; //boolean, indicates if /NeedAppearances is already set to true var $isUTF8 = false; //boolean (true for UTF-8, false for ISO-8859-1) + var $n = 0; //integer, Position counter for objects /** * Constructor @@ -357,11 +358,164 @@ function Uncompress() { } /** *Activates the flatten output to remove form from pdf file keeping field datas. + *@note Now supports native flattening without pdftk **/ function Flatten() { //----------------- $this->set_modes('flatten',true); - $this->support="pdftk"; + // Native support is maintained - no longer forces pdftk + } + + /** + *Flattens all form fields by making them read-only + * + *@access private + *@note This is called after values have been merged to make fields non-editable + *@note This approach sets the ReadOnly flag (Ff bit 1) on all fields to prevent editing + * while preserving the field structure needed for value rendering + **/ + function flattenFields() { + //------------------------ + $entries = &$this->pdf_entries; + $countLines = count($entries); + $verbose_flatten = ($this->verbose && ($this->verbose_level > 1)); + + if($verbose_flatten) $this->dumpContent("Starting native flatten process", "Flatten"); + + $in_widget = false; + $current_obj = 0; + $widget_start_line = 0; + $ff_found = false; + $modified_count = 0; + $widgets_to_add_ff = array(); // Widgets that don't have /Ff line + + // First pass: Find all widgets and set read-only flag + for ($i = 0; $i < $countLines; $i++) { + $line = $entries[$i]; + + // Track object boundaries + if (preg_match("/^(\d+) (\d+) obj/", $line, $match)) { + // If we were in a widget that had no /Ff, remember to add it + if ($in_widget && !$ff_found && $widget_start_line > 0) { + $widgets_to_add_ff[] = $widget_start_line; + } + $current_obj = intval($match[1]); + $in_widget = false; + $ff_found = false; + $widget_start_line = 0; + } + + // Detect Widget subtype (form field annotation) + if (preg_match("/\/Subtype\s*\/Widget/", $line)) { + $in_widget = true; + $widget_start_line = $i; + if($verbose_flatten) $this->dumpContent("Found Widget annotation in object $current_obj at line $i", "Flatten"); + } + + // Inside a widget - modify field flags for flattening + if ($in_widget && $current_obj > 0) { + // Set read-only flag (Ff bit 1 = ReadOnly) + if (preg_match("/^\/Ff\s+(\d+)/", $line, $match)) { + $ff_found = true; + $old_len = strlen($line); + $flags = intval($match[1]) | 1; // Set bit 1 (ReadOnly) + $entries[$i] = '/Ff ' . $flags; + $this->shift += strlen($entries[$i]) - $old_len; + $modified_count++; + if($verbose_flatten) $this->dumpContent("Set read-only flag at line $i (flags=$flags)", "Flatten"); + } + } + + // End of object - check if we need to add /Ff + if (preg_match("/^endobj/", $line)) { + if ($in_widget && !$ff_found && $widget_start_line > 0) { + $widgets_to_add_ff[] = $widget_start_line; + } + $in_widget = false; + $current_obj = 0; + $ff_found = false; + } + } + + // Second pass: Add /Ff 1 to widgets that don't have it + // We insert after the /Subtype /Widget line + foreach ($widgets_to_add_ff as $widget_line) { + $old_line = $entries[$widget_line]; + $new_line = $old_line . "\n/Ff 1"; + $entries[$widget_line] = $new_line; + $this->shift += strlen($new_line) - strlen($old_line); + $modified_count++; + if($verbose_flatten) $this->dumpContent("Added /Ff 1 after line $widget_line", "Flatten"); + } + + if($verbose_flatten) $this->dumpContent("Flatten complete. Modified $modified_count field(s) to read-only", "Flatten"); + } + + /** + *Removes the AcroForm dictionary reference from the document catalog + * + *@access private + *@note This disables form functionality at the document level + **/ + function removeAcroForm() { + //------------------------ + $entries = &$this->pdf_entries; + $countLines = count($entries); + $verbose_flatten = ($this->verbose && ($this->verbose_level > 1)); + + $in_catalog = false; + $acroform_removed = false; + + for ($i = 0; $i < $countLines; $i++) { + $line = $entries[$i]; + + // Find the Catalog object + if (preg_match("/\/Type\s*\/Catalog/", $line)) { + $in_catalog = true; + if($verbose_flatten) $this->dumpContent("Found Catalog at line $i", "Flatten"); + } + + // If in catalog, look for AcroForm reference + if ($in_catalog) { + // Match /AcroForm with object reference (e.g., /AcroForm 5 0 R) + if (preg_match("/\/AcroForm\s+\d+\s+\d+\s+R/", $line)) { + $old_len = strlen($line); + // Remove the AcroForm reference from this line + $new_line = preg_replace("/\/AcroForm\s+\d+\s+\d+\s+R\s*/", "", $line); + $entries[$i] = $new_line; + $this->shift += strlen($new_line) - $old_len; + $acroform_removed = true; + if($verbose_flatten) $this->dumpContent("Removed /AcroForm reference at line $i", "Flatten"); + } + // Also handle inline AcroForm dictionary (less common) + if (preg_match("/^\/AcroForm\s*<shift -= $old_len; + $acroform_removed = true; + if($verbose_flatten) $this->dumpContent("Removed inline /AcroForm at line $i", "Flatten"); + } + + // Also remove /NeedAppearances if present (cleanup) + if (preg_match("/\/NeedAppearances\s+(true|false)/", $line)) { + $old_len = strlen($line); + $new_line = preg_replace("/\/NeedAppearances\s+(true|false)\s*/", "", $line); + $entries[$i] = $new_line; + $this->shift += strlen($new_line) - $old_len; + if($verbose_flatten) $this->dumpContent("Removed /NeedAppearances at line $i", "Flatten"); + } + } + + // Stop when we exit the catalog object + if ($in_catalog && preg_match("/^endobj/", $line)) { + $in_catalog = false; + if ($acroform_removed) break; // We're done + } + } + + if($verbose_flatten && !$acroform_removed) { + $this->dumpContent("Warning: /AcroForm reference not found in Catalog", "Flatten"); + } } /*** @@ -463,13 +617,12 @@ function Allow($permissions=null) { //############################# - /** + /** *Merge FDF file with a PDF file * *@access public *@note files has been provided during the instantiation of this class - *@internal flatten mode is not yet supported - *@param Boolean flatten Optional, false by default, if true will use pdftk (requires a shell) to flatten the pdf form + *@param Boolean flatten Optional, false by default, if true will flatten the pdf form (now supported natively) **/ function Merge($flatten=false) { //------------------------------ @@ -489,7 +642,7 @@ function Merge($flatten=false) { if($this->verbose&&($count_fields==0)) $this->dumpContent("The FDF content has either no field data or parsing may failed","FDF parser: "); - + $fields_value_definition_lines=array(); $count_entries=$this->parsePDFEntries($fields_value_definition_lines); @@ -511,6 +664,12 @@ function Merge($flatten=false) { } } //=========================================================== + + //==== Native flatten: remove form field interactivity ===== + if($this->flatten_mode) { + $this->flattenFields(); + } + //=========================================================== //===== Cross refs/size fixes (offsets calculations for objects have been previously be done in set_field_value) ======= @@ -790,16 +949,16 @@ function _set_field_value($line,$value) { $OldLen=strlen($CurLine); - //My PHP4/5 static call hack, only to make the callback $this->replace_value($matches,"$value") possible! - $callback_code='$THIS=new FPDM("[_STATIC_]");return $THIS->replace_value($matches,"'.$value.'");'; - $field_regexp='/^\/(\w+)\s?(\<|\()([^\)\>]*)(\)|\>)/'; if(preg_match($field_regexp,$CurLine)) { //modify it according to the new value $value + $self = $this; $CurLine = preg_replace_callback( $field_regexp, - create_function('$matches',$callback_code), + function($matches) use ($self, $value) { + return $self->replace_value($matches, $value); + }, $CurLine ); }else { @@ -981,6 +1140,105 @@ public function set_field_checkbox($name, $value) } return $offset_shift; } + + /** + * Resolves checkbox state names from an indirect /AP reference + * Looks up the referenced object and extracts state names from /N dictionary + * + * @param int $ap_ref_id The object ID referenced by /AP + * @return array|false Array with 'yes' and 'no' state names, or false if not found + */ + private function resolveIndirectAP($ap_ref_id) { + // Check if already resolved + if (isset($this->ap_objects[$ap_ref_id])) { + return $this->ap_objects[$ap_ref_id]; + } + + $verbose_parsing = ($this->verbose && ($this->verbose_level > 3)); + $entries = &$this->pdf_entries; + $countLines = count($entries); + + // Find the AP object (e.g., "101 0 obj") + $ap_obj_pattern = "/^" . $ap_ref_id . " 0 obj/"; + $n_ref_id = 0; + + for ($i = 0; $i < $countLines; $i++) { + if (preg_match($ap_obj_pattern, $entries[$i])) { + if ($verbose_parsing) { + echo "
Found AP object $ap_ref_id at line $i"; + } + // Look for /N reference within this object (next few lines) + for ($j = $i + 1; $j < min($i + 10, $countLines); $j++) { + if (preg_match("/^\\/N\\s+(\\d+)\\s+0\\s+R/", $entries[$j], $match)) { + $n_ref_id = intval($match[1]); + if ($verbose_parsing) { + echo "
Found /N reference to object $n_ref_id"; + } + break; + } + if (preg_match("/^endobj/", $entries[$j])) { + break; // End of object, /N not found + } + } + break; + } + } + + if (!$n_ref_id) { + if ($verbose_parsing) { + echo "
Could not find /N reference in AP object $ap_ref_id"; + } + return false; + } + + // Find the N object (contains the state names) + $n_obj_pattern = "/^" . $n_ref_id . " 0 obj/"; + $state_yes = ''; + $state_no = ''; + + for ($i = 0; $i < $countLines; $i++) { + if (preg_match($n_obj_pattern, $entries[$i])) { + if ($verbose_parsing) { + echo "
Found N object $n_ref_id at line $i"; + } + // Look for state names within this object (next few lines) + for ($j = $i + 1; $j < min($i + 10, $countLines); $j++) { + // Match state names like "/Oui 137 0 R" or "/Off 138 0 R" or "/Yes 5 0 R" + if (preg_match("/^\\/(\\w+)\\s+\\d+\\s+0\\s+R/", $entries[$j], $match)) { + $state_name = $match[1]; + // "Off" is typically the unchecked state + if (strtolower($state_name) === 'off') { + $state_no = $state_name; + } else { + // Any other state is considered the "checked" state + $state_yes = $state_name; + } + if ($verbose_parsing) { + echo "
Found state name: $state_name"; + } + } + if (preg_match("/^endobj/", $entries[$j]) || preg_match("/^>>/", $entries[$j])) { + break; // End of object + } + } + break; + } + } + + if ($state_yes && $state_no) { + $result = array('yes' => $state_yes, 'no' => $state_no); + $this->ap_objects[$ap_ref_id] = $result; + if ($verbose_parsing) { + echo "
Resolved AP $ap_ref_id: yes='$state_yes', no='$state_no'"; + } + return $result; + } + + if ($verbose_parsing) { + echo "
Could not resolve checkbox states for AP object $ap_ref_id (yes='$state_yes', no='$state_no')"; + } + return false; + } //ENDFIX /** @@ -1114,7 +1372,7 @@ function _hex2bin ($hexString) *Encodes a binary string to its hexadecimal representation * *@access private - *@internal dechex(ord($str{$i})); is buggy because for hex value of 0-15 heading 0 is missing! Using sprintf() to get it right. + *@internal dechex(ord($str[$i])); is buggy because for hex value of 0-15 heading 0 is missing! Using sprintf() to get it right. *@param string $str a binary string *@return string $hex the hexified string **/ @@ -1577,7 +1835,7 @@ function _set_text_value($stream,$value) { //--------------------------------------- $chunks=preg_split("/(\s*Td\s+[\<\(])([^\>\)]+)([\>\)]\s+Tj)/",$stream,0,PREG_SPLIT_DELIM_CAPTURE); $chunks[2]=$value; - $stream=implode($chunks,''); + $stream=implode('', $chunks); return $stream; } @@ -1851,6 +2109,25 @@ function parsePDFEntries(&$lines){ echo("
Found AP Line '$Counter'"); } $ap_line = $Counter; + + // Check if this is an indirect reference (e.g., /AP 101 0 R) + if (preg_match("/^\\/AP\\s+(\\d+)\\s+0\\s+R/", $CurLine, $ap_match)) { + $ap_ref_id = intval($ap_match[1]); + if ($verbose_parsing) { + echo("
Found indirect AP reference to object $ap_ref_id"); + } + // Resolve the indirect reference to get checkbox state names + $resolved = $this->resolveIndirectAP($ap_ref_id); + if ($resolved) { + $ap_d_yes = $resolved['yes']; + $ap_d_no = $resolved['no']; + $object["infos"]["checkbox_yes"] = $ap_d_yes; + $object["infos"]["checkbox_no"] = $ap_d_no; + if ($verbose_parsing) { + echo("
Resolved checkbox states: yes='$ap_d_yes', no='$ap_d_no'"); + } + } + } } elseif (!$ap_d_line && '/D' == substr($CurLine, 0, 2)) { if ($verbose_parsing) { echo("
Found D Line '$Counter'"); @@ -2229,5 +2506,3 @@ function Error($msg) { } } - -unset($__tmp); diff --git a/src/lib/url.php b/src/lib/url.php index 52e946c..ef43761 100644 --- a/src/lib/url.php +++ b/src/lib/url.php @@ -130,7 +130,7 @@ function resolve_path($path) { if ($fold=='' || $fold=='.') continue; if ($fold=='..' && $i>0 && end($out)!='..') array_pop($out); else $out[]= $fold; - } return ($path{0}=='/'?'/':'').join('/', $out); + } return ($path[0]=='/'?'/':'').join('/', $out); }