Skip to content

Commit

Permalink
Merge branch 'MDL-74097' of https://github.com/stronk7/moodle
Browse files Browse the repository at this point in the history
  • Loading branch information
junpataleta committed Mar 9, 2022
2 parents aa53d83 + 1af724e commit 8ac5e00
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 15 deletions.
24 changes: 10 additions & 14 deletions lib/classes/text.php
Original file line number Diff line number Diff line change
Expand Up @@ -158,21 +158,13 @@ public static function convert($text, $fromCS, $toCS='utf-8') {
}

if ($toCS === 'ascii') {
// Try to normalize the conversion a bit.
$text = self::specialtoascii($text, $fromCS);
// Try to normalize the conversion a bit if the target is ascii.
return self::specialtoascii($text, $fromCS);
}

// Prevent any error notices, do not use //IGNORE so that we get
// consistent result if iconv fails.
$result = @iconv($fromCS, $toCS.'//TRANSLIT', $text);

if ($result === false or $result === '') {
// Note: iconv is prone to return empty string when invalid char encountered, or false if encoding unsupported.
$oldlevel = error_reporting(E_PARSE);
error_reporting($oldlevel);
}

return $result;
return @iconv($fromCS, $toCS.'//TRANSLIT', $text);
}

/**
Expand Down Expand Up @@ -341,10 +333,14 @@ public static function specialtoascii($text, $charset='utf-8') {
$charset = self::parse_charset($charset);
$oldlevel = error_reporting(E_PARSE);

if ($charset == 'utf-8') {
$text = transliterator_transliterate('Any-Latin; Latin-ASCII', (string) $text);
// Always convert to utf-8, so transliteration can do its work always.
if ($charset !== 'utf-8') {
$text = iconv($charset, 'utf-8'.'//TRANSLIT', $text);
}
$result = iconv($charset, 'ASCII//TRANSLIT//IGNORE', (string) $text);
$text = transliterator_transliterate('Any-Latin; Latin-ASCII', (string) $text);

// Still, apply iconv because some chars are not handled by transliterate.
$result = iconv('utf-8', 'ASCII//TRANSLIT//IGNORE', (string) $text);

error_reporting($oldlevel);
return $result;
Expand Down
60 changes: 59 additions & 1 deletion lib/tests/text_test.php
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,15 @@
* @category phpunit
* @copyright 2010 Petr Skoda (http://skodak.org)
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
* @coversDefaultClass \core_text
*
*/
class text_test extends advanced_testcase {

/**
* Tests the static parse charset method.
*
* @covers ::parse_charset()
*/
public function test_parse_charset() {
$this->assertSame('windows-1250', core_text::parse_charset('Cp1250'));
Expand All @@ -47,8 +51,11 @@ public function test_parse_charset() {

/**
* Tests the static convert method.
*
* @covers ::convert()
*/
public function test_convert() {
$this->assertSame('', core_text::convert('', 'utf-8', 'utf-8'));
$utf8 = "Žluťoučký koníček";
$iso2 = pack("H*", "ae6c75bb6f75e86bfd206b6f6eede8656b");
$win = pack("H*", "8e6c759d6f75e86bfd206b6f6eede8656b");
Expand Down Expand Up @@ -103,6 +110,8 @@ public function test_convert() {

/**
* Tests the static sub string method.
*
* @covers ::substr()
*/
public function test_substr() {
$str = "Žluťoučký koníček";
Expand Down Expand Up @@ -145,6 +154,8 @@ public function test_substr() {

/**
* Tests the static string length method.
*
* @covers ::strlen()
*/
public function test_strlen() {
$str = "Žluťoučký koníček";
Expand Down Expand Up @@ -184,6 +195,8 @@ public function test_strlen() {

/**
* Test unicode safe string truncation.
*
* @covers ::str_max_bytes()
*/
public function test_str_max_bytes() {
// These are all 3 byte characters, so this is a 12-byte string.
Expand Down Expand Up @@ -234,6 +247,8 @@ public function test_str_max_bytes() {

/**
* Tests the static strtolower method.
*
* @covers ::strtolower()
*/
public function test_strtolower() {
$str = "Žluťoučký koníček";
Expand Down Expand Up @@ -270,6 +285,8 @@ public function test_strtolower() {

/**
* Tests the static strtoupper.
*
* @covers ::strtoupper()
*/
public function test_strtoupper() {
$str = "Žluťoučký koníček";
Expand Down Expand Up @@ -303,6 +320,8 @@ public function test_strtoupper() {

/**
* Test the strrev method.
*
* @covers ::strrev()
*/
public function test_strrev() {
$strings = array(
Expand All @@ -323,6 +342,8 @@ public function test_strrev() {

/**
* Tests the static strpos method.
*
* @covers ::strpos()
*/
public function test_strpos() {
$str = "Žluťoučký koníček";
Expand All @@ -331,6 +352,8 @@ public function test_strpos() {

/**
* Tests the static strrpos.
*
* @covers ::strrpos()
*/
public function test_strrpos() {
$str = "Žluťoučký koníček";
Expand All @@ -339,17 +362,33 @@ public function test_strrpos() {

/**
* Tests the static specialtoascii method.
*
* @covers ::specialtoascii()
*/
public function test_specialtoascii() {
$str = "Žluťoučký koníček";
$this->assertSame('Zlutoucky konicek', core_text::specialtoascii($str));

$utf8 = "Der eine stößt den Speer zum Mann";
$iso1 = core_text::convert($utf8, 'utf-8', 'iso-8859-1');
$this->assertSame('Der eine stosst den Speer zum Mann', core_text::specialtoascii($utf8));
$this->assertSame('Der eine stosst den Speer zum Mann', core_text::specialtoascii($iso1, 'iso-8859-1'));

$str = 'àáâãäçèéêëìíîïñòóôõöùúûüýÿÀÁÂÃÄÇÈÉÊËÌÍÎÏÑÒÓÔÕÖÙÚÛÜÝ';
$this->assertSame('aaaaaceeeeiiiinooooouuuuyyAAAAACEEEEIIIINOOOOOUUUUY', core_text::specialtoascii($str));

$utf8 = 'A æ Übérmensch på høyeste nivå! И я люблю PHP! есть. fi';
$this->assertSame('A ae Ubermensch pa hoyeste niva! I a lublu PHP! est\'. fi', core_text::specialtoascii($utf8, 'utf8'));

$utf8 = 'キャンパス Αλφαβητικός Κατάλογος Лорем ипсум долор сит амет';
$this->assertSame('kyanpasu Alphabetikos Katalogos Lorem ipsum dolor sit amet', core_text::specialtoascii($utf8));
}

/**
* Tests the static encode_mimeheader method.
* This also tests method moodle_phpmailer::encodeHeader that calls core_text::encode_mimeheader
*
* @covers ::encode_mimeheader()
* @covers \moodle_phpmailer::encodeHeader()
*/
public function test_encode_mimeheader() {
global $CFG;
Expand Down Expand Up @@ -388,6 +427,8 @@ public function test_encode_mimeheader() {

/**
* Tests the static entities_to_utf8 method.
*
* @covers ::entities_to_utf8()
*/
public function test_entities_to_utf8() {
$str = "Žluťoučký koníček©"&<>§«";
Expand All @@ -396,6 +437,8 @@ public function test_entities_to_utf8() {

/**
* Tests the static utf8_to_entities method.
*
* @covers ::utf8_to_entities()
*/
public function test_utf8_to_entities() {
$str = "Žluťoučký koníček©"&<>§«";
Expand All @@ -409,6 +452,8 @@ public function test_utf8_to_entities() {

/**
* Tests the static trim_utf8_bom method.
*
* @covers ::trim_utf8_bom()
*/
public function test_trim_utf8_bom() {
$bom = "\xef\xbb\xbf";
Expand All @@ -418,6 +463,8 @@ public function test_trim_utf8_bom() {

/**
* Tests the static remove_unicode_non_characters method.
*
* @covers ::remove_unicode_non_characters()
*/
public function test_remove_unicode_non_characters() {
// Confirm that texts which don't contain these characters are unchanged.
Expand All @@ -439,6 +486,8 @@ public function test_remove_unicode_non_characters() {

/**
* Tests the static get_encodings method.
*
* @covers ::get_encodings()
*/
public function test_get_encodings() {
$encodings = core_text::get_encodings();
Expand All @@ -449,13 +498,17 @@ public function test_get_encodings() {

/**
* Tests the static code2utf8 method.
*
* @covers ::code2utf8()
*/
public function test_code2utf8() {
$this->assertSame('Ž', core_text::code2utf8(381));
}

/**
* Tests the static utf8ord method.
*
* @covers ::utf8ord()
*/
public function test_utf8ord() {
$this->assertSame(ord(''), core_text::utf8ord(''));
Expand All @@ -468,6 +521,8 @@ public function test_utf8ord() {

/**
* Tests the static strtotitle method.
*
* @covers ::strtotitle()
*/
public function test_strtotitle() {
$str = "žluťoučký koníček";
Expand All @@ -476,6 +531,8 @@ public function test_strtotitle() {

/**
* Test strrchr.
*
* @covers ::strrchr()
*/
public function test_strrchr() {
$str = "Žluťoučký koníček";
Expand All @@ -491,6 +548,7 @@ public function test_strrchr() {
* @dataProvider is_charset_supported_provider()
* @param string $charset
* @param bool $expected
* @covers ::is_charset_supported()
*/
public function test_is_charset_supported(string $charset, bool $expected) {
$charset = core_text::parse_charset($charset);
Expand Down

0 comments on commit 8ac5e00

Please sign in to comment.