Skip to content

Commit

Permalink
Merge pull request #5 from Seen-Arabic/@Feature/WordToLetters
Browse files Browse the repository at this point in the history
[#4] Word to letters feature
  • Loading branch information
MohamedAmgd authored Nov 26, 2023
2 parents f43e5b9 + 20bc9b6 commit 7fde65c
Show file tree
Hide file tree
Showing 3 changed files with 95 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,31 @@ public static String tashfeer(String text) {
return newSentence.toString().trim();
}

public static String wordToLetters(String word) {
StringBuilder newWord = new StringBuilder();

// Loop through each character in the input word
for (int i = 0; i < word.length(); i++) {
String letter = word.charAt(i) + "";

// Check if the current letter has a pronunciation in PRONOUNCED_LETTERS
if (Data.PRONOUNCED_LETTERS.containsKey(letter)) {
newWord.append(Data.PRONOUNCED_LETTERS.get(letter));

// Add a space after the pronounced letter unless it's the last letter in the
// word
if (i != word.length() - 1) {
newWord.append(' ');
}
} else {
// If the letter is not in PRONOUNCED_LETTERS, keep it unchanged
newWord.append(letter);
}
}

return newWord.toString().trim();
}

private static String handleNoonIssue(String text) {
String arabicLetters = String.join("", Data.LETTERS_DICT.keySet()) + "ـ";
String regex = Data.NOON + "(" + "?=[^" + arabicLetters + "]" + ")|" + Data.NOON + "\\z";
Expand Down
38 changes: 38 additions & 0 deletions src/main/java/io/github/seen_arabic/arabic_services/Data.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ class Data {

static final Map<String, String> LETTERS_DICT;
static final Map<Character, String[]> LETTERS_TASHFEER_REPLACEMENT_DICT;
static final Map<String, String> PRONOUNCED_LETTERS;

static final String[] TASHKEEL = {
"\u0600",
Expand Down Expand Up @@ -193,6 +194,43 @@ class Data {
LETTERS_TASHFEER_REPLACEMENT_DICT.put('و', new String[] { "ۅ", "ۆ", "ۇ", "ۈ", "ۏ", "ۉ", "ۋ" });
LETTERS_TASHFEER_REPLACEMENT_DICT.put('ي', new String[] { "ۍ", "ێ", "ې", "ے", "ی۪" });

PRONOUNCED_LETTERS = new HashMap<>();
PRONOUNCED_LETTERS.put("ا", "ألف");
PRONOUNCED_LETTERS.put("إ", "ألف_مكسورة");
PRONOUNCED_LETTERS.put("أ", "ألف");
PRONOUNCED_LETTERS.put("آ", "ألف_مد");
PRONOUNCED_LETTERS.put("ء", "همزة"); // Not in STANDARD_LETTERS[]
PRONOUNCED_LETTERS.put("ب", "باء");
PRONOUNCED_LETTERS.put("ت", "تاء");
PRONOUNCED_LETTERS.put("ث", "ثاء");
PRONOUNCED_LETTERS.put("ج", "جيم");
PRONOUNCED_LETTERS.put("ح", "حاء");
PRONOUNCED_LETTERS.put("خ", "خاء");
PRONOUNCED_LETTERS.put("د", "دال");
PRONOUNCED_LETTERS.put("ذ", "ذال");
PRONOUNCED_LETTERS.put("ر", "راء");
PRONOUNCED_LETTERS.put("ز", "زاي");
PRONOUNCED_LETTERS.put("س", "سين");
PRONOUNCED_LETTERS.put("ش", "شين");
PRONOUNCED_LETTERS.put("ص", "صاد");
PRONOUNCED_LETTERS.put("ض", "ضاد");
PRONOUNCED_LETTERS.put("ط", "طاء");
PRONOUNCED_LETTERS.put("ظ", "ظاء");
PRONOUNCED_LETTERS.put("ع", "عين");
PRONOUNCED_LETTERS.put("غ", "غين");
PRONOUNCED_LETTERS.put("ف", "فاء");
PRONOUNCED_LETTERS.put("ق", "قاف");
PRONOUNCED_LETTERS.put("ك", "كاف");
PRONOUNCED_LETTERS.put("ل", "لام");
PRONOUNCED_LETTERS.put("م", "ميم");
PRONOUNCED_LETTERS.put("ن", "نون");
PRONOUNCED_LETTERS.put("ه", "هاء");
PRONOUNCED_LETTERS.put("و", "واو");
PRONOUNCED_LETTERS.put("ؤ", "همزة_متوسطة_مضمومة");
PRONOUNCED_LETTERS.put("ى", "ألف_لينة");
PRONOUNCED_LETTERS.put("ي", "ياء");
PRONOUNCED_LETTERS.put("ئ", "همزة_متوسطة_مكسورة");
PRONOUNCED_LETTERS.put("ة", "تاء_مربوطة"); // Not in STANDARD_LETTERS[]
}

private Data() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,4 +92,36 @@ public void testTashfeer() {
assertNotEquals(result, inputWord);
}

@Test
public void testWordToLetters() {
itShouldReturnAStringWithPronouncedArabicLetters();
itShouldHandleEmptyInput();
itShouldHandleInputWithNoPronouncedArabicLetters();
itShouldHandleInputWithSpaces();
}

private void itShouldReturnAStringWithPronouncedArabicLetters() {
String input = "هذه جملة عربية";
String result = ArabicServices.wordToLetters(input);
assertNotNull(result);
assertNotEquals(input, result);
}

private void itShouldHandleEmptyInput() {
String input = "";
String result = ArabicServices.wordToLetters(input);
assertEquals("", result);
}

private void itShouldHandleInputWithNoPronouncedArabicLetters() {
String input = "12345 not in Arabic letters";
String result = ArabicServices.wordToLetters(input);
assertEquals(input, result);
}

private void itShouldHandleInputWithSpaces() {
String input = "هذه جملة اخرى";
String result = ArabicServices.wordToLetters(input);
assertEquals("هاء ذال هاء جيم ميم لام تاء_مربوطة ألف خاء راء ألف_لينة", result);
}
}

0 comments on commit 7fde65c

Please sign in to comment.