diff --git a/src/S2/Rose/Helper/StringHelper.php b/src/S2/Rose/Helper/StringHelper.php index 83e0a8e..6dbfe94 100644 --- a/src/S2/Rose/Helper/StringHelper.php +++ b/src/S2/Rose/Helper/StringHelper.php @@ -45,15 +45,19 @@ public static function sentencesFromText(string $text, bool $hasFormatting): arr $substrings = str_replace("�", ' ', $substrings); if ($hasFormatting) { - // We keep the formatting scope within a single sentence. + // We keep the formatting scope through several sentences. // // For example, consider the input: 'Sentence 1. Sentence 2. Sentence 3.' - // After processing, it becomes ['Sentence 1.', 'Sentence 2.', 'Sentence 3.']. - // - // This approach is reasonable because individual sentences are typically joined into snippets, - // and preserving formatting across multiple sentences may not be meaningful. - array_walk($substrings, static function (string &$text) { - $text = self::fixUnbalancedInternalFormatting($text); + // After processing, it becomes ['Sentence 1.', 'Sentence 2.', 'Sentence 3.']. + $tagsFromPrevSentence = []; + array_walk($substrings, static function (string &$text) use (&$tagsFromPrevSentence) { + foreach (array_reverse($tagsFromPrevSentence) as $possibleTag => $num) { + if ($num > 0) { + $text = str_repeat('\\' . $possibleTag, $num) . $text; + $tagsFromPrevSentence[$possibleTag] = 0; + } + } + $text = self::fixUnbalancedInternalFormatting($text, $tagsFromPrevSentence); }); } @@ -101,11 +105,11 @@ public static function clearInternalFormatting(string $text): string ]); } - public static function fixUnbalancedInternalFormatting(string $text): string + public static function fixUnbalancedInternalFormatting(string $text, array &$tagsNum): string { preg_match_all('#\\\\([' . self::FORMATTING_SYMBOLS . '])#i', $text, $matches); - $tagsNum = []; +// $tagsNum = []; foreach ($matches[1] as $match) { $lowerMatch = strtolower($match); $tagsNum[$lowerMatch] = ($tagsNum[$lowerMatch] ?? 0) + ($match === $lowerMatch ? 1 : -1); @@ -118,8 +122,7 @@ public static function fixUnbalancedInternalFormatting(string $text): string $result = str_repeat('\\' . $possibleTag, -$num) . $result; } } - $tagsNum = array_reverse($tagsNum); - foreach ($tagsNum as $possibleTag => $num) { + foreach (array_reverse($tagsNum) as $possibleTag => $num) { if ($num > 0) { $result .= str_repeat('\\' . strtoupper($possibleTag), $num); } diff --git a/tests/unit/Rose/Helper/StringHelperTest.php b/tests/unit/Rose/Helper/StringHelperTest.php index 2e8c4bc..be617a3 100644 --- a/tests/unit/Rose/Helper/StringHelperTest.php +++ b/tests/unit/Rose/Helper/StringHelperTest.php @@ -1,6 +1,6 @@ $str) { + foreach (StringHelper::sentencesFromText($text, $hasFormatting) as $i => $str) { $this->assertEquals($sentences[$i], $str); } } @@ -36,6 +36,9 @@ public function sentenceDataProvider(): array '1, 2, 3 и т. д.', 'Цифры, буквы, и т. п., могут встретиться.', ]], + ['Sentence \i1. Sentence 2. Sentence\I 3.', ['Sentence \i1.\I', '\iSentence 2.\I', '\iSentence\I 3.'], true], + ['Sentence \i1. Sentence 2. Sentence\B 3.', ['Sentence \i1.\I', '\iSentence 2.\I', '\b\iSentence\B 3.\I'], true], + ['\i\uSentence \b1\B. Sentence 2. Sentence 3.\U\I', ['\i\uSentence \b1\B.\U\I', '\i\uSentence 2.\U\I', '\i\uSentence 3.\U\I'], true], [ 'Поезд отправился из пункта А в пункт Б. Затем вернулся назад.', [ @@ -97,13 +100,49 @@ public function sentenceDataProvider(): array ]; } - public function testFixUnbalancedInternalFormatting(): void + /** + * @dataProvider unbalancedInternalFormattingDataProvider + */ + public function testFixUnbalancedInternalFormatting(string $text, string $expected, array $expectedTags): void + { + $tags = []; + $this->assertEquals($expected, StringHelper::fixUnbalancedInternalFormatting($text, $tags)); + $this->assertEquals($expectedTags, $tags); + } + + public function unbalancedInternalFormattingDataProvider(): array { - $this->assertEquals('\\iThis is \\bformatted text\\I with \\Bspecial characters\\i.\\I', StringHelper::fixUnbalancedInternalFormatting('\\iThis is \\bformatted text\\I with \\Bspecial characters\\i.')); - $this->assertEquals('', StringHelper::fixUnbalancedInternalFormatting('')); - $this->assertEquals('456789i', StringHelper::fixUnbalancedInternalFormatting('456789i')); - $this->assertEquals('\\i456789\\I', StringHelper::fixUnbalancedInternalFormatting('456789\\I')); - $this->assertEquals('\\u456789\\U', StringHelper::fixUnbalancedInternalFormatting('\\u456789')); - $this->assertEquals('\\i\\d\\u\\D\\\\I\\b\\B\\U', StringHelper::fixUnbalancedInternalFormatting('\\u\\D\\\\I\\b')); + return [ + [ + '\\iThis is \\bformatted text\\I with \\Bspecial characters\\i.', + '\\iThis is \\bformatted text\\I with \\Bspecial characters\\i.\\I', + ['i' => 1, 'b' => 0], + ], + [ + '', + '', + [], + ], + [ + '456789i', + '456789i', + [], + ], + [ + '456789\\I', + '\\i456789\\I', + ['i' => -1], + ], + [ + '\\u456789', + '\\u456789\\U', + ['u' => 1], + ], + [ + '\\u\\D\\\\I\\b', + '\\i\\d\\u\\D\\\\I\\b\\B\\U', + ['i' => -1, 'd' => -1, 'u' => 1, 'b' => 1], + ], + ]; } }