diff --git a/simple-search/v1.1/simple_search.php b/simple-search/v1.1/simple_search.php index 439fc86..c26f22f 100644 --- a/simple-search/v1.1/simple_search.php +++ b/simple-search/v1.1/simple_search.php @@ -557,6 +557,14 @@ public function convert_nonascii($wordin) { $wordin2 = $this->clean_slp1($wordin1); return $wordin2; } + // 03-01-2023. detect cyrillic (e.g. Russian)by converting to slp1 + $wordin1 = transcoder_processString($wordin,'cyrillic','slp1'); + if ($wordin1 != $wordin) { + // Assume $wordin is spelled in cyrillic + $wordin2 = $this->clean_slp1($wordin1); + return $wordin2; + } + // $wordin might have letters with diacritics. // We will lower-case the string first. Try to handle diacritics. $wordin0 = mb_strtolower($wordin, 'UTF-8'); diff --git a/utilities/transcoder/cyrillic_slp1.xml b/utilities/transcoder/cyrillic_slp1.xml new file mode 100644 index 0000000..3c5f56d --- /dev/null +++ b/utilities/transcoder/cyrillic_slp1.xml @@ -0,0 +1,69 @@ + + + INIT а a INIT + INIT А A INIT + INIT и i INIT + INIT И I INIT + INIT у u INIT + INIT У U INIT + INIT ри f INIT + INIT Ри F INIT + INIT ли x INIT + INIT Ли X INIT + INIT э e INIT + INIT е e INIT + INIT Э E INIT + INIT ай E INIT + INIT о o INIT + INIT ау O INIT + INIT О O INIT + INIT н M INIT + INIT х H INIT + + INIT к k INIT + INIT кх K INIT + INIT г g INIT + INIT гх G INIT + INIT Н N INIT + + INIT ч c INIT + INIT чх C INIT + INIT дж j INIT + INIT джх J INIT + INIT нь Y INIT + + INIT т w INIT + INIT тх W INIT + INIT д q INIT + INIT дх Q INIT + INIT н R INIT + + INIT т t INIT + INIT тх T INIT + INIT д d INIT + INIT дх D INIT + INIT н n INIT + + INIT п p INIT + INIT пх P INIT + INIT б b INIT + INIT бх B INIT + INIT м m INIT + + INIT й y INIT + INIT р r INIT + INIT л l INIT + + INIT в v INIT + + INIT ш S INIT + INIT ш z INIT + INIT с s INIT + INIT х h INIT + +