From 795f2ecbba65055cf3c8bd39d4adfcd01e81710b Mon Sep 17 00:00:00 2001 From: Jens Schuppe Date: Mon, 29 Apr 2024 13:22:54 +0200 Subject: [PATCH 1/2] Replace macros by splitting existing text runs/paragraphs instead of replacing them --- src/PhpWord/TemplateProcessor.php | 83 +++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/src/PhpWord/TemplateProcessor.php b/src/PhpWord/TemplateProcessor.php index 8aee40c546..86c27502a7 100644 --- a/src/PhpWord/TemplateProcessor.php +++ b/src/PhpWord/TemplateProcessor.php @@ -315,6 +315,46 @@ public function setComplexBlock($search, Element\AbstractElement $complexType): $this->replaceXmlBlock($search, $xmlWriter->getData(), 'w:p'); } + /** + * Replaces a search string (macro) with a set of rendered elements, splitting + * surrounding texts, text runs or paragraphs before and after the macro, + * depending on the types of elements to insert. + * + * @param \PhpOffice\PhpWord\Element\AbstractElement[] $elements + */ + public function setElementsValue(string $search, array $elements): void + { + $elementsData = ''; + $hasParagraphs = false; + foreach ($elements as $element) { + $elementName = substr( + get_class($element), + (int) strrpos(get_class($element), '\\') + 1 + ); + $objectClass = 'PhpOffice\\PhpWord\\Writer\\Word2007\\Element\\' . $elementName; + + // For inline elements, do not create a new paragraph. + $withParagraph = Writer\Word2007\Element\Text::class !== $objectClass; + $hasParagraphs = $hasParagraphs || $withParagraph; + + $xmlWriter = new XMLWriter(); + /** @var \PhpOffice\PhpWord\Writer\Word2007\Element\AbstractElement $elementWriter */ + $elementWriter = new $objectClass($xmlWriter, $element, !$withParagraph); + $elementWriter->write(); + $elementsData .= $xmlWriter->getData(); + } + $blockType = $hasParagraphs ? 'w:p' : 'w:r'; + $where = $this->findContainingXmlBlockForMacro($search, $blockType); + if (is_array($where)) { + /** @phpstan-var array{start: int, end: int} $where */ + $block = $this->getSlice($where['start'], $where['end']); + $parts = $hasParagraphs ? $this->splitParagraphIntoParagraphs($block) : $this->splitTextIntoTexts($block); + $this->replaceXmlBlock($search, $parts, $blockType); + $search = static::ensureMacroCompleted($search); + $this->replaceXmlBlock($search, $elementsData, $blockType); + } + } + /** * @param mixed $search * @param mixed $replace @@ -1464,6 +1504,49 @@ protected function splitTextIntoTexts($text) return str_replace(['' . $extractedStyle . '', '', ''], ['', '', ''], $result); } + /** + * Splits a w:p into a list of w:p where each ${macro} is in a separate w:p. + */ + public function splitParagraphIntoParagraphs(string $paragraph): string + { + $matches = []; + if (1 === preg_match('/()/i', $paragraph, $matches)) { + $extractedStyle = $matches[0]; + } else { + $extractedStyle = ''; + } + if (null === $paragraph = preg_replace('/>\s+<', $paragraph)) { + throw new Exception('Error processing PhpWord document.'); + } + $result = str_replace( + [ + '${', + '}', + ], + [ + '' . $extractedStyle . '${', + '}' . $extractedStyle . '', + ], + $paragraph + ); + + // Remove empty paragraphs that might have been created before/after the + // macro. + $result = str_replace( + [ + '' . $extractedStyle . '', + '', + ], + [ + '', + '', + ], + $result + ); + + return $result; + } + /** * Returns true if string contains a macro that is not in it's own w:r. * From f8f2e79beb7e1a07f0d805f54efc10bd4c3571e5 Mon Sep 17 00:00:00 2001 From: Jens Schuppe Date: Fri, 19 Jul 2024 10:35:03 +0200 Subject: [PATCH 2/2] Fix handling of styles --- src/PhpWord/StyleMerger.php | 102 +++++++++++++++++++++++ src/PhpWord/TemplateProcessor.php | 134 ++++++++++++++++++++---------- 2 files changed, 193 insertions(+), 43 deletions(-) create mode 100644 src/PhpWord/StyleMerger.php diff --git a/src/PhpWord/StyleMerger.php b/src/PhpWord/StyleMerger.php new file mode 100644 index 0000000000..8cd8e78ac1 --- /dev/null +++ b/src/PhpWord/StyleMerger.php @@ -0,0 +1,102 @@ + + */ + private $elements = []; + + public function __construct(string $style) + { + $this->styleElement = $this->createStyleElement($style); + foreach ($this->styleElement->childNodes as $node) { + if ($node instanceof \DOMElement) { + $this->elements[$node->tagName] = $node; + } + } + } + + public static function mergeStyles(string $style, string ...$styles): string + { + $styleMerger = new self($style); + foreach ($styles as $styleToMerge) { + $styleMerger->merge($styleToMerge); + } + + return $styleMerger->getStyleString(); + } + + public function merge(string $style): self + { + $styleElement = $this->createStyleElement($style); + foreach ($styleElement->childNodes as $node) { + if ($node instanceof \DOMElement) { + // @todo Do we need recursive merging for some elements? + if (!isset($this->elements[$node->tagName])) { + $importedNode = $this->styleElement->ownerDocument->importNode($node, TRUE); + if (!$importedNode instanceof \DOMElement) { + throw new \RuntimeException('Importing node failed'); + } + + $this->styleElement->appendChild($importedNode); + $this->elements[$node->tagName] = $importedNode; + } + } + } + + return $this; + } + + private function createStyleElement(string $style): \DOMElement + { + if (NULL === $style = preg_replace('/>\s+<', $style)) { + throw new \RuntimeException('Error processing style'); + } + + $doc = new \DOMDocument(); + $doc->loadXML( + '' . $style . '' + ); + + foreach ($doc->documentElement->childNodes as $node) { + if ($node instanceof \DOMElement) { + return $node; + } + } + + throw new \RuntimeException('Could not create style element'); + } + + public function getStyleString(): string + { + return $this->styleElement->ownerDocument->saveXML($this->styleElement); + } + +} diff --git a/src/PhpWord/TemplateProcessor.php b/src/PhpWord/TemplateProcessor.php index 86c27502a7..faffd439ea 100644 --- a/src/PhpWord/TemplateProcessor.php +++ b/src/PhpWord/TemplateProcessor.php @@ -321,11 +321,16 @@ public function setComplexBlock($search, Element\AbstractElement $complexType): * depending on the types of elements to insert. * * @param \PhpOffice\PhpWord\Element\AbstractElement[] $elements + * @param bool $inheritStyle + * If TRUE the style will be inherited from the paragraph/text run the macro + * is inside. If the element already contains styles, they will be merged. + * + * @throws \PhpOffice\PhpWord\Exception\Exception */ - public function setElementsValue(string $search, array $elements): void - { - $elementsData = ''; - $hasParagraphs = false; + public function setElementsValue(string $search, array $elements, bool $inheritStyle = FALSE): void { + $search = static::ensureMacroCompleted($search); + $elementsDataList = []; + $hasParagraphs = FALSE; foreach ($elements as $element) { $elementName = substr( get_class($element), @@ -334,24 +339,37 @@ public function setElementsValue(string $search, array $elements): void $objectClass = 'PhpOffice\\PhpWord\\Writer\\Word2007\\Element\\' . $elementName; // For inline elements, do not create a new paragraph. - $withParagraph = Writer\Word2007\Element\Text::class !== $objectClass; + $withParagraph = \PhpOffice\PhpWord\Writer\Word2007\Element\Text::class !== $objectClass; $hasParagraphs = $hasParagraphs || $withParagraph; $xmlWriter = new XMLWriter(); /** @var \PhpOffice\PhpWord\Writer\Word2007\Element\AbstractElement $elementWriter */ $elementWriter = new $objectClass($xmlWriter, $element, !$withParagraph); $elementWriter->write(); - $elementsData .= $xmlWriter->getData(); + $elementsDataList[] = preg_replace('/>\s+<', $xmlWriter->getData()); } $blockType = $hasParagraphs ? 'w:p' : 'w:r'; $where = $this->findContainingXmlBlockForMacro($search, $blockType); if (is_array($where)) { /** @phpstan-var array{start: int, end: int} $where */ $block = $this->getSlice($where['start'], $where['end']); - $parts = $hasParagraphs ? $this->splitParagraphIntoParagraphs($block) : $this->splitTextIntoTexts($block); + $paragraphStyle = ''; + $textRunStyle = ''; + $parts = $hasParagraphs + ? $this->splitParagraphIntoParagraphs($block, $paragraphStyle, $textRunStyle) + : $this->splitTextIntoTexts($block, $textRunStyle); + if ($inheritStyle) { + $elementsDataList = preg_replace_callback_array([ + '##' => fn() => $paragraphStyle, + '##' => fn (array $matches) => StyleMerger::mergeStyles($matches[0], $paragraphStyle), + // may contain itself so we have to match for inside of + '#.*#' => fn(array $matches) => str_replace('', $textRunStyle, $matches[0]), + '#.*().*#' => fn (array $matches) => + preg_replace('##', StyleMerger::mergeStyles($matches[1], $textRunStyle), $matches[0]), + ], $elementsDataList); + } $this->replaceXmlBlock($search, $parts, $blockType); - $search = static::ensureMacroCompleted($search); - $this->replaceXmlBlock($search, $elementsData, $blockType); + $this->replaceXmlBlock($search, implode('', $elementsDataList), $blockType); } } @@ -1480,71 +1498,101 @@ protected function findXmlBlockEnd($offset, $blockType) } /** - * Splits a w:r/w:t into a list of w:r where each ${macro} is in a separate w:r. + * Adds output parameter for extracted style. * * @param string $text + * @param string $extractedStyle + * Is set to the extracted text run style (w:rPr). * * @return string + * @throws \PhpOffice\PhpWord\Exception\Exception */ - protected function splitTextIntoTexts($text) - { - if (!$this->textNeedsSplitting($text)) { - return $text; + protected function splitTextIntoTexts($text, string &$extractedStyle = '') { + if (NULL === $unformattedText = preg_replace('/>\s+<', $text)) { + throw new Exception('Error processing PhpWord document.'); } + $matches = []; - if (preg_match('/()/i', $text, $matches)) { - $extractedStyle = $matches[0]; - } else { - $extractedStyle = ''; + preg_match('//i', $unformattedText, $matches); + $extractedStyle = $matches[0] ?? ''; + + if (!$this->textNeedsSplitting($text)) { + return $text; } - $unformattedText = preg_replace('/>\s+<', $text); - $result = str_replace([self::$macroOpeningChars, self::$macroClosingChars], ['' . $extractedStyle . '' . self::$macroOpeningChars, self::$macroClosingChars . '' . $extractedStyle . ''], $unformattedText); + $result = str_replace( + ['', '${', '}'], + [ + '', + '' . $extractedStyle . '${', + '}' . $extractedStyle . '', + ], + $unformattedText + ); + + $emptyTextRun = '' . $extractedStyle . ''; - return str_replace(['' . $extractedStyle . '', '', ''], ['', '', ''], $result); + return str_replace($emptyTextRun, '', $result); } /** * Splits a w:p into a list of w:p where each ${macro} is in a separate w:p. + * + * @param string $extractedParagraphStyle + * Is set to the extracted paragraph style (w:pPr). + * @param string $extractedTextRunStyle + * Is set to the extracted text run style (w:rPr). + * + * @throws \PhpOffice\PhpWord\Exception\Exception */ - public function splitParagraphIntoParagraphs(string $paragraph): string - { - $matches = []; - if (1 === preg_match('/()/i', $paragraph, $matches)) { - $extractedStyle = $matches[0]; - } else { - $extractedStyle = ''; - } - if (null === $paragraph = preg_replace('/>\s+<', $paragraph)) { + public function splitParagraphIntoParagraphs( + string $paragraph, + string &$extractedParagraphStyle = '', + string &$extractedTextRunStyle = '' + ): string { + if (NULL === $paragraph = preg_replace('/>\s+<', $paragraph)) { throw new Exception('Error processing PhpWord document.'); } + + $matches = []; + preg_match('##i', $paragraph, $matches); + $extractedParagraphStyle = $matches[0] ?? ''; + + // may contain itself so we have to match for inside of + preg_match('#.*().*#i', $paragraph, $matches); + $extractedTextRunStyle = $matches[1] ?? ''; + $result = str_replace( [ + '', '${', '}', ], [ - '' . $extractedStyle . '${', - '}' . $extractedStyle . '', + '', + sprintf( + '%s%s${', + $extractedParagraphStyle, + $extractedTextRunStyle + ), + sprintf( + '}%s%s', + $extractedParagraphStyle, + $extractedTextRunStyle + ), ], $paragraph ); // Remove empty paragraphs that might have been created before/after the // macro. - $result = str_replace( - [ - '' . $extractedStyle . '', - '', - ], - [ - '', - '', - ], - $result + $emptyParagraph = sprintf( + '%s%s', + $extractedParagraphStyle, + $extractedTextRunStyle ); - return $result; + return str_replace($emptyParagraph, '', $result); } /**