From 20bc9b6a06a26040c311878fe3fad10c98c22947 Mon Sep 17 00:00:00 2001 From: Mohamed Amgd Date: Sun, 26 Nov 2023 18:39:01 +0200 Subject: [PATCH] added word to letters method --- .../arabic_services/ArabicServices.java | 25 ++++++++++++ .../seen_arabic/arabic_services/Data.java | 38 +++++++++++++++++++ .../arabic_services/ArabicServicesTest.java | 32 ++++++++++++++++ 3 files changed, 95 insertions(+) diff --git a/src/main/java/io/github/seen_arabic/arabic_services/ArabicServices.java b/src/main/java/io/github/seen_arabic/arabic_services/ArabicServices.java index ff37aef..0db7509 100644 --- a/src/main/java/io/github/seen_arabic/arabic_services/ArabicServices.java +++ b/src/main/java/io/github/seen_arabic/arabic_services/ArabicServices.java @@ -88,6 +88,31 @@ public static String tashfeer(String text) { return newSentence.toString().trim(); } + public static String wordToLetters(String word) { + StringBuilder newWord = new StringBuilder(); + + // Loop through each character in the input word + for (int i = 0; i < word.length(); i++) { + String letter = word.charAt(i) + ""; + + // Check if the current letter has a pronunciation in PRONOUNCED_LETTERS + if (Data.PRONOUNCED_LETTERS.containsKey(letter)) { + newWord.append(Data.PRONOUNCED_LETTERS.get(letter)); + + // Add a space after the pronounced letter unless it's the last letter in the + // word + if (i != word.length() - 1) { + newWord.append(' '); + } + } else { + // If the letter is not in PRONOUNCED_LETTERS, keep it unchanged + newWord.append(letter); + } + } + + return newWord.toString().trim(); + } + private static String handleNoonIssue(String text) { String arabicLetters = String.join("", Data.LETTERS_DICT.keySet()) + "ـ"; String regex = Data.NOON + "(" + "?=[^" + arabicLetters + "]" + ")|" + Data.NOON + "\\z"; diff --git a/src/main/java/io/github/seen_arabic/arabic_services/Data.java b/src/main/java/io/github/seen_arabic/arabic_services/Data.java index 2fda571..fad177c 100644 --- a/src/main/java/io/github/seen_arabic/arabic_services/Data.java +++ b/src/main/java/io/github/seen_arabic/arabic_services/Data.java @@ -15,6 +15,7 @@ class Data { static final Map LETTERS_DICT; static final Map LETTERS_TASHFEER_REPLACEMENT_DICT; + static final Map PRONOUNCED_LETTERS; static final String[] TASHKEEL = { "\u0600", @@ -193,6 +194,43 @@ class Data { LETTERS_TASHFEER_REPLACEMENT_DICT.put('و', new String[] { "ۅ", "ۆ", "ۇ", "ۈ", "ۏ", "ۉ", "ۋ" }); LETTERS_TASHFEER_REPLACEMENT_DICT.put('ي', new String[] { "ۍ", "ێ", "ې", "ے", "ی۪" }); + PRONOUNCED_LETTERS = new HashMap<>(); + PRONOUNCED_LETTERS.put("ا", "ألف"); + PRONOUNCED_LETTERS.put("إ", "ألف_مكسورة"); + PRONOUNCED_LETTERS.put("أ", "ألف"); + PRONOUNCED_LETTERS.put("آ", "ألف_مد"); + PRONOUNCED_LETTERS.put("ء", "همزة"); // Not in STANDARD_LETTERS[] + PRONOUNCED_LETTERS.put("ب", "باء"); + PRONOUNCED_LETTERS.put("ت", "تاء"); + PRONOUNCED_LETTERS.put("ث", "ثاء"); + PRONOUNCED_LETTERS.put("ج", "جيم"); + PRONOUNCED_LETTERS.put("ح", "حاء"); + PRONOUNCED_LETTERS.put("خ", "خاء"); + PRONOUNCED_LETTERS.put("د", "دال"); + PRONOUNCED_LETTERS.put("ذ", "ذال"); + PRONOUNCED_LETTERS.put("ر", "راء"); + PRONOUNCED_LETTERS.put("ز", "زاي"); + PRONOUNCED_LETTERS.put("س", "سين"); + PRONOUNCED_LETTERS.put("ش", "شين"); + PRONOUNCED_LETTERS.put("ص", "صاد"); + PRONOUNCED_LETTERS.put("ض", "ضاد"); + PRONOUNCED_LETTERS.put("ط", "طاء"); + PRONOUNCED_LETTERS.put("ظ", "ظاء"); + PRONOUNCED_LETTERS.put("ع", "عين"); + PRONOUNCED_LETTERS.put("غ", "غين"); + PRONOUNCED_LETTERS.put("ف", "فاء"); + PRONOUNCED_LETTERS.put("ق", "قاف"); + PRONOUNCED_LETTERS.put("ك", "كاف"); + PRONOUNCED_LETTERS.put("ل", "لام"); + PRONOUNCED_LETTERS.put("م", "ميم"); + PRONOUNCED_LETTERS.put("ن", "نون"); + PRONOUNCED_LETTERS.put("ه", "هاء"); + PRONOUNCED_LETTERS.put("و", "واو"); + PRONOUNCED_LETTERS.put("ؤ", "همزة_متوسطة_مضمومة"); + PRONOUNCED_LETTERS.put("ى", "ألف_لينة"); + PRONOUNCED_LETTERS.put("ي", "ياء"); + PRONOUNCED_LETTERS.put("ئ", "همزة_متوسطة_مكسورة"); + PRONOUNCED_LETTERS.put("ة", "تاء_مربوطة"); // Not in STANDARD_LETTERS[] } private Data() { diff --git a/src/test/java/io/github/seen_arabic/arabic_services/ArabicServicesTest.java b/src/test/java/io/github/seen_arabic/arabic_services/ArabicServicesTest.java index 3aad8ce..567cf9e 100644 --- a/src/test/java/io/github/seen_arabic/arabic_services/ArabicServicesTest.java +++ b/src/test/java/io/github/seen_arabic/arabic_services/ArabicServicesTest.java @@ -92,4 +92,36 @@ public void testTashfeer() { assertNotEquals(result, inputWord); } + @Test + public void testWordToLetters() { + itShouldReturnAStringWithPronouncedArabicLetters(); + itShouldHandleEmptyInput(); + itShouldHandleInputWithNoPronouncedArabicLetters(); + itShouldHandleInputWithSpaces(); + } + + private void itShouldReturnAStringWithPronouncedArabicLetters() { + String input = "هذه جملة عربية"; + String result = ArabicServices.wordToLetters(input); + assertNotNull(result); + assertNotEquals(input, result); + } + + private void itShouldHandleEmptyInput() { + String input = ""; + String result = ArabicServices.wordToLetters(input); + assertEquals("", result); + } + + private void itShouldHandleInputWithNoPronouncedArabicLetters() { + String input = "12345 not in Arabic letters"; + String result = ArabicServices.wordToLetters(input); + assertEquals(input, result); + } + + private void itShouldHandleInputWithSpaces() { + String input = "هذه جملة اخرى"; + String result = ArabicServices.wordToLetters(input); + assertEquals("هاء ذال هاء جيم ميم لام تاء_مربوطة ألف خاء راء ألف_لينة", result); + } }