Skip to content

Commit

Permalink
Now formatting is kept through sentences when breaking a paragraph.
Browse files Browse the repository at this point in the history
  • Loading branch information
parpalak committed Apr 5, 2024
1 parent d0f5a50 commit 1dcef3d
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 21 deletions.
25 changes: 14 additions & 11 deletions src/S2/Rose/Helper/StringHelper.php
Original file line number Diff line number Diff line change
Expand Up @@ -45,15 +45,19 @@ public static function sentencesFromText(string $text, bool $hasFormatting): arr
$substrings = str_replace("", ' ', $substrings);

if ($hasFormatting) {
// We keep the formatting scope within a single sentence.
// We keep the formatting scope through several sentences.
//
// For example, consider the input: 'Sentence <i>1. Sentence 2. Sentence</i> 3.'
// After processing, it becomes ['Sentence <i>1.</i>', 'Sentence 2.', '<i>Sentence</i> 3.'].
//
// This approach is reasonable because individual sentences are typically joined into snippets,
// and preserving formatting across multiple sentences may not be meaningful.
array_walk($substrings, static function (string &$text) {
$text = self::fixUnbalancedInternalFormatting($text);
// After processing, it becomes ['Sentence <i>1.</i>', '<i>Sentence 2.</i>', '<i>Sentence</i> 3.'].
$tagsFromPrevSentence = [];
array_walk($substrings, static function (string &$text) use (&$tagsFromPrevSentence) {
foreach (array_reverse($tagsFromPrevSentence) as $possibleTag => $num) {
if ($num > 0) {
$text = str_repeat('\\' . $possibleTag, $num) . $text;
$tagsFromPrevSentence[$possibleTag] = 0;
}
}
$text = self::fixUnbalancedInternalFormatting($text, $tagsFromPrevSentence);
});
}

Expand Down Expand Up @@ -101,11 +105,11 @@ public static function clearInternalFormatting(string $text): string
]);
}

public static function fixUnbalancedInternalFormatting(string $text): string
public static function fixUnbalancedInternalFormatting(string $text, array &$tagsNum): string
{
preg_match_all('#\\\\([' . self::FORMATTING_SYMBOLS . '])#i', $text, $matches);

$tagsNum = [];
// $tagsNum = [];
foreach ($matches[1] as $match) {
$lowerMatch = strtolower($match);
$tagsNum[$lowerMatch] = ($tagsNum[$lowerMatch] ?? 0) + ($match === $lowerMatch ? 1 : -1);
Expand All @@ -118,8 +122,7 @@ public static function fixUnbalancedInternalFormatting(string $text): string
$result = str_repeat('\\' . $possibleTag, -$num) . $result;
}
}
$tagsNum = array_reverse($tagsNum);
foreach ($tagsNum as $possibleTag => $num) {
foreach (array_reverse($tagsNum) as $possibleTag => $num) {
if ($num > 0) {
$result .= str_repeat('\\' . strtoupper($possibleTag), $num);
}
Expand Down
59 changes: 49 additions & 10 deletions tests/unit/Rose/Helper/StringHelperTest.php
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<?php declare(strict_types=1);
/**
* @copyright 2023 Roman Parpalak
* @copyright 2023-2024 Roman Parpalak
* @license MIT
*/

Expand All @@ -17,9 +17,9 @@ class StringHelperTest extends Unit
/**
* @dataProvider sentenceDataProvider
*/
public function testSentences(string $text, array $sentences): void
public function testSentences(string $text, array $sentences, bool $hasFormatting = false): void
{
foreach (StringHelper::sentencesFromText($text, false) as $i => $str) {
foreach (StringHelper::sentencesFromText($text, $hasFormatting) as $i => $str) {
$this->assertEquals($sentences[$i], $str);
}
}
Expand All @@ -36,6 +36,9 @@ public function sentenceDataProvider(): array
'1, 2, 3 и т. д.',
'Цифры, буквы, и т. п., могут встретиться.',
]],
['Sentence \i1. Sentence 2. Sentence\I 3.', ['Sentence \i1.\I', '\iSentence 2.\I', '\iSentence\I 3.'], true],
['Sentence \i1. Sentence 2. Sentence\B 3.', ['Sentence \i1.\I', '\iSentence 2.\I', '\b\iSentence\B 3.\I'], true],
['\i\uSentence \b1\B. Sentence 2. Sentence 3.\U\I', ['\i\uSentence \b1\B.\U\I', '\i\uSentence 2.\U\I', '\i\uSentence 3.\U\I'], true],
[
'Поезд отправился из пункта А в пункт Б. Затем вернулся назад.',
[
Expand Down Expand Up @@ -97,13 +100,49 @@ public function sentenceDataProvider(): array
];
}

public function testFixUnbalancedInternalFormatting(): void
/**
* @dataProvider unbalancedInternalFormattingDataProvider
*/
public function testFixUnbalancedInternalFormatting(string $text, string $expected, array $expectedTags): void
{
$tags = [];
$this->assertEquals($expected, StringHelper::fixUnbalancedInternalFormatting($text, $tags));
$this->assertEquals($expectedTags, $tags);
}

public function unbalancedInternalFormattingDataProvider(): array
{
$this->assertEquals('\\iThis is \\bformatted text\\I with \\Bspecial characters\\i.\\I', StringHelper::fixUnbalancedInternalFormatting('\\iThis is \\bformatted text\\I with \\Bspecial characters\\i.'));
$this->assertEquals('', StringHelper::fixUnbalancedInternalFormatting(''));
$this->assertEquals('456789i', StringHelper::fixUnbalancedInternalFormatting('456789i'));
$this->assertEquals('\\i456789\\I', StringHelper::fixUnbalancedInternalFormatting('456789\\I'));
$this->assertEquals('\\u456789\\U', StringHelper::fixUnbalancedInternalFormatting('\\u456789'));
$this->assertEquals('\\i\\d\\u\\D\\\\I\\b\\B\\U', StringHelper::fixUnbalancedInternalFormatting('\\u\\D\\\\I\\b'));
return [
[
'\\iThis is \\bformatted text\\I with \\Bspecial characters\\i.',
'\\iThis is \\bformatted text\\I with \\Bspecial characters\\i.\\I',
['i' => 1, 'b' => 0],
],
[
'',
'',
[],
],
[
'456789i',
'456789i',
[],
],
[
'456789\\I',
'\\i456789\\I',
['i' => -1],
],
[
'\\u456789',
'\\u456789\\U',
['u' => 1],
],
[
'\\u\\D\\\\I\\b',
'\\i\\d\\u\\D\\\\I\\b\\B\\U',
['i' => -1, 'd' => -1, 'u' => 1, 'b' => 1],
],
];
}
}

0 comments on commit 1dcef3d

Please sign in to comment.