Skip to content

Commit

Permalink
[uk] uppercase rule adjustment
Browse files Browse the repository at this point in the history
  • Loading branch information
arysin committed Feb 16, 2023
1 parent e98cfd3 commit 9ee85b5
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
import org.languagetool.Language;
import org.languagetool.tokenizers.WordTokenizer;
import org.languagetool.tools.StringTools;
import org.languagetool.tools.Tools;

/**
* Checks that a sentence starts with an uppercase letter.
Expand Down Expand Up @@ -91,6 +90,10 @@ public final String getDescription() {
return messages.getString("desc_uppercase_sentence");
}

protected boolean isException(AnalyzedTokenReadings[] tokens, int tokenIdx) {
return false;
}

@Override
public RuleMatch[] match(List<AnalyzedSentence> sentences) throws IOException {
String lastParagraphString = "";
Expand Down Expand Up @@ -123,6 +126,9 @@ public RuleMatch[] match(List<AnalyzedSentence> sentences) throws IOException {
matchTokenPos = 3;
}

if( isException(tokens, matchTokenPos) )
return toRuleMatchArray(ruleMatches);

String checkToken = firstToken;
if (thirdToken != null) {
checkToken = thirdToken;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ public List<Rule> getRelevantRules(ResourceBundle messages, UserConfig userConfi
Example.fixed("Ми обідали борщем<marker>,</marker> пловом і салатом,— все смачне")),

// TODO: does not handle dot in abbreviations in the middle of the sentence, and also !.., ?..
new UppercaseSentenceStartRule(messages, this,
new UkrainianUppercaseSentenceStartRule(messages, this,
Example.wrong("<marker>речення</marker> має починатися з великої."),
Example.fixed("<marker>Речення</marker> має починатися з великої")),

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package org.languagetool.rules.uk;

import java.util.ResourceBundle;

import org.languagetool.AnalyzedTokenReadings;
import org.languagetool.Language;
import org.languagetool.rules.CorrectExample;
import org.languagetool.rules.IncorrectExample;
import org.languagetool.rules.UppercaseSentenceStartRule;

public class UkrainianUppercaseSentenceStartRule extends UppercaseSentenceStartRule {

public UkrainianUppercaseSentenceStartRule(ResourceBundle messages, Language language, IncorrectExample incorrectExample, CorrectExample correctExample) {
super(messages, language, incorrectExample, correctExample);
}

@Override
protected boolean isException(AnalyzedTokenReadings[] tokens, int tokenIdx) {
// list, e.g. а) б) в)
if( tokenIdx == 1 && tokenIdx < tokens.length-1
&& tokens[tokenIdx].getCleanToken().matches("[а-яіїєґ]")
&& tokens[tokenIdx+1].getToken().equals(")") )
return true;

return false;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -18,25 +18,24 @@
*/
package org.languagetool.rules.uk;

import static org.junit.Assert.assertEquals;

import java.io.IOException;
import java.util.ArrayList;

import org.junit.Test;
import org.languagetool.JLanguageTool;
import org.languagetool.TestTools;
import org.languagetool.language.Ukrainian;
import org.languagetool.rules.Example;
import org.languagetool.rules.RuleMatch;
import org.languagetool.rules.UppercaseSentenceStartRule;

import java.io.IOException;
import java.util.ArrayList;

import static org.junit.Assert.assertEquals;

public class UppercaseSentenceStartRuleTest {

@Test
public void testUkrainian() throws IOException {
Ukrainian ukrainian = new Ukrainian();
UppercaseSentenceStartRule rule = new UppercaseSentenceStartRule(TestTools.getEnglishMessages(), ukrainian,
UkrainianUppercaseSentenceStartRule rule = new UkrainianUppercaseSentenceStartRule(TestTools.getEnglishMessages(), ukrainian,
Example.wrong("<marker>речення</marker> має починатися з великої."),
Example.fixed("<marker>Речення</marker> має починатися з великої"));

Expand All @@ -56,6 +55,9 @@ public void testUkrainian() throws IOException {
assertEquals(new ArrayList<RuleMatch>(), lt.check("Цей список з декількох рядків:\n\nрядок 1,\n\nрядок 2,\n\nрядок 3."));
assertEquals(0, lt.check("Цей список з декількох рядків:\n\nрядок 1;\n\nрядок 2;\n\nрядок 3.").size());
assertEquals(0, lt.check("Цей список з декількох рядків:\n\n 1) рядок 1;\n\n2) рядок 2;\n\n3)рядок 3.").size());


assertEquals(new ArrayList<RuleMatch>(), lt.check("а) водопі́й"));
}

}

0 comments on commit 9ee85b5

Please sign in to comment.