Skip to content

Commit

Permalink
아디다스 운동화 => 공백을 기준으로 두 단어 모두 검색 가능하게 처리
Browse files Browse the repository at this point in the history
  • Loading branch information
ikchoi committed Jun 5, 2018
1 parent d32d1fb commit 277b641
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 73 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
build/
.gradle/
.idea/
.git/
31 changes: 29 additions & 2 deletions src/main/java/org/elasticsearch/analysis/JasoDecomposer.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@ public String runJasoDecompose(String originStr, TokenizerOptions options) {

if (!originStr.isEmpty()) {

//공백, lowercase 처리
originStr = originStr.replace(" ", "");
//lowercase 처리
originStr = originStr.toLowerCase();

char[] termBuffer = originStr.toCharArray();
Expand Down Expand Up @@ -201,16 +200,44 @@ public String runJasoDecompose(String originStr, TokenizerOptions options) {
}

//결과 조합

//공백을 붙인 전체 문자열 (한글)
if (korBuffer.indexOf(" ") != -1) {
if (korBuffer.length() > 0) {
returnBuffer.append(korBuffer.toString().replaceAll(" ", ""));
returnBuffer.append(" ");
}
}

//공백으로 분리된 문자열 (한글)
if (korBuffer.length() > 0) {
returnBuffer.append(korBuffer.toString());
returnBuffer.append(" ");
}

//공백을 붙인 전체 문자열 (영문)
if (engBuffer.indexOf(" ") != -1) {
if (engBuffer.length() > 0) {
returnBuffer.append(engBuffer.toString().replaceAll(" ", ""));
returnBuffer.append(" ");
}
}

//공백으로 분리된 문자열 (영문)
if (engBuffer.length() > 0) {
returnBuffer.append(engBuffer.toString());
returnBuffer.append(" ");
}

//공백을 붙인 전체 문자열 (오타)
if (mistypingBuffer.indexOf(" ") != -1) {
if (mistypingBuffer.length() > 0) {
returnBuffer.append(mistypingBuffer.toString().replaceAll(" ", ""));
returnBuffer.append(" ");
}
}

//공백으로 분리된 문자열 (오타)
if (mistypingBuffer.length() > 0) {
returnBuffer.append(mistypingBuffer.toString());
returnBuffer.append(" ");
Expand Down
85 changes: 14 additions & 71 deletions src/test/java/org/elasticsearch/analysis/JasoTokenizerTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ public void testTokenizer() throws IOException {
TokenizerOptions options = TokenizerOptions.create("testTokenizer");

//한영오타에 대한 토큰 추출여부 (hello -> ㅗㄷㅣㅣㅐ, 최일규 -> chldlfrb)
options.setMistype(true);
options.setMistype(false);

//초성검색을 위한 토큰 추출여부 (최일규 -> ㅊㅇㄱ)
options.setChosung(true);
options.setChosung(false);

List<TestCaseVO> testCase = new ArrayList<TestCaseVO>();

Expand All @@ -36,96 +36,39 @@ public void testTokenizer() throws IOException {
testCase.add(new TestCaseVO("소녀시대", "ㅅㅗㄴㅕㅅㅣㄷㅐ/thsutleo/ㅅㄴㅅㄷ"));
testCase.add(new TestCaseVO("Hello", "hello/ㅗㄷㅣㅣㅐ"));
testCase.add(new TestCaseVO("Hello~", "hello~/ㅗㄷㅣㅣㅐ~"));
testCase.add(new TestCaseVO("무조건 해피엔딩", "ㅁㅜㅈㅗㄱㅓㄴㅎㅐㅍㅣㅇㅔㄴㄷㅣㅇ/anwhrjsgovldpseld/ㅁㅈㄱㅎㅍㅇㄷ"));
testCase.add(new TestCaseVO("소원을 말해봐 genie mixversion##!", "ㅅㅗㅇㅜㅓㄴㅇㅡㄹㅁㅏㄹㅎㅐㅂㅗㅏgeniemixversion##!/thdnjsdmfakfgoqhkgeniemixversion##!/ㅅㅇㅇㅁㅎㅂ"));
testCase.add(new TestCaseVO("hush hush; hush hush", "hushhush;hushhush/ㅗㅕㄴㅗㅗㅕㄴㅗ;ㅗㅕㄴㅗㅗㅕㄴㅗ"));
testCase.add(new TestCaseVO("라벨:모음곡(거울)제4곡(어릿광대의 아침노래)(서현)", "ㄹㅏㅂㅔㄹ:ㅁㅗㅇㅡㅁㄱㅗㄱ(ㄱㅓㅇㅜㄹ)ㅈㅔ4ㄱㅗㄱ(ㅇㅓㄹㅣㅅㄱㅗㅏㅇㄷㅐㅇㅡㅣㅇㅏㅊㅣㅁㄴㅗㄹㅐ)(ㅅㅓㅎㅕㄴ)/fkqpf:ahdmarhr(rjdnf)wp4rhr(djfltrhkdeodmldkclashfo)(tjgus)/ㄹㅂㅁㅇㄱㄱㅇㅈㄱㅇㄹㄱㄷㅇㅇㅊㄴㄹㅅㅎ"));
testCase.add(new TestCaseVO("ㅗ디ㅣㅐ", "ㅗㄷㅣㅣㅐ")); //hello
testCase.add(new TestCaseVO("째깅", "ㅈㅈㅐㄱㅣㅇ/world/ㅈㅈㄱ")); //World
testCase.add(new TestCaseVO("ㅣㅐ햣ㄷ초", "ㅣㅐㅎㅑㅅㄷㅊㅗ")); //logitech
testCase.add(new TestCaseVO("퍗므ㅑㅜ", "ㅍㅑㅅㅁㅡㅑㅜ")); //vitamin
testCase.add(new TestCaseVO("랱", "ㄹㅐㅌ/fox")); //fox
testCase.add(new TestCaseVO("ㅅ딛퍄냐ㅐㅜ", "ㅅㄷㅣㄷㅍㅑㄴㅑㅐㅜ")); //television
testCase.add(new TestCaseVO("최", "ㅊㅗㅣ/chl")); //television
testCase.add(new TestCaseVO("ㅅㄴㅅㄷ", "ㅅㄴㅅㄷ")); //소녀시대 초성검색
testCase.add(new TestCaseVO("##%@#$%()*&^%$#@!", "##%@#$%()*&^%$#@!"));
testCase.add(new TestCaseVO("2000 최신가요", "2000ㅊㅗㅣㅅㅣㄴㄱㅏㅇㅛ/2000chltlsrkdy"));
testCase.add(new TestCaseVO("15&", "15&"));
testCase.add(new TestCaseVO("최신가요 1990", "ㅊㅗㅣㅅㅣㄴㄱㅏㅇㅛ1990/chltlsrkdy1990/ㅊㅅㄱㅇ"));
testCase.add(new TestCaseVO("최신가요&", "ㅊㅗㅣㅅㅣㄴㄱㅏㅇㅛ&/chltlsrkdy&/ㅊㅅㄱㅇ"));
testCase.add(new TestCaseVO("무조건 해피엔딩", "ㅁㅜㅈㅗㄱㅓㄴㅎㅐㅍㅣㅇㅔㄴㄷㅣㅇ/ㅁㅜㅈㅗㄱㅓㄴ/ㅎㅐㅍㅣㅇㅔㄴㄷㅣㅇ/anwhrjsgovldpseld/anwhrjs/govldpseld/ㅁㅈㄱㅎㅍㅇㄷ"));
testCase.add(new TestCaseVO("아디다스 운동화", "ㅇㅏㄷㅣㄷㅏㅅㅡㅇㅜㄴㄷㅗㅇㅎㅗㅏ/ㅇㅏㄷㅣㄷㅏㅅㅡ/ㅇㅜㄴㄷㅗㅇㅎㅗㅏ/dkelektmdnsehdghk/dkelektm/dnsehdghk/ㅇㄷㄷㅅㅇㄷㅎ"));
testCase.add(new TestCaseVO("투데이특가", "ㅌㅜㄷㅔㅇㅣㅌㅡㄱㄱㅏ/xnepdlxmrrk/ㅌㄷㅇㅌㄱ"));

} else if (options.isMistype() == true && options.isChosung() == false) {

testCase.add(new TestCaseVO("최일규", "ㅊㅗㅣㅇㅣㄹㄱㅠ/chldlfrb"));
testCase.add(new TestCaseVO("소녀시대", "ㅅㅗㄴㅕㅅㅣㄷㅐ/thsutleo"));
testCase.add(new TestCaseVO("Hello", "hello/ㅗㄷㅣㅣㅐ"));
testCase.add(new TestCaseVO("Hello~", "hello~/ㅗㄷㅣㅣㅐ~"));
testCase.add(new TestCaseVO("무조건 해피엔딩", "ㅁㅜㅈㅗㄱㅓㄴㅎㅐㅍㅣㅇㅔㄴㄷㅣㅇ/anwhrjsgovldpseld"));
testCase.add(new TestCaseVO("소원을 말해봐 genie mixversion##!", "ㅅㅗㅇㅜㅓㄴㅇㅡㄹㅁㅏㄹㅎㅐㅂㅗㅏgeniemixversion##!/thdnjsdmfakfgoqhkgeniemixversion##!"));
testCase.add(new TestCaseVO("hush hush; hush hush", "hushhush;hushhush/ㅗㅕㄴㅗㅗㅕㄴㅗ;ㅗㅕㄴㅗㅗㅕㄴㅗ"));
testCase.add(new TestCaseVO("라벨:모음곡(거울)제4곡(어릿광대의 아침노래)(서현)", "ㄹㅏㅂㅔㄹ:ㅁㅗㅇㅡㅁㄱㅗㄱ(ㄱㅓㅇㅜㄹ)ㅈㅔ4ㄱㅗㄱ(ㅇㅓㄹㅣㅅㄱㅗㅏㅇㄷㅐㅇㅡㅣㅇㅏㅊㅣㅁㄴㅗㄹㅐ)(ㅅㅓㅎㅕㄴ)/fkqpf:ahdmarhr(rjdnf)wp4rhr(djfltrhkdeodmldkclashfo)(tjgus)"));
testCase.add(new TestCaseVO("ㅗ디ㅣㅐ", "ㅗㄷㅣㅣㅐ")); //hello
testCase.add(new TestCaseVO("째깅", "ㅈㅈㅐㄱㅣㅇ/world")); //World
testCase.add(new TestCaseVO("ㅣㅐ햣ㄷ초", "ㅣㅐㅎㅑㅅㄷㅊㅗ")); //logitech
testCase.add(new TestCaseVO("퍗므ㅑㅜ", "ㅍㅑㅅㅁㅡㅑㅜ")); //vitamin
testCase.add(new TestCaseVO("랱", "ㄹㅐㅌ/fox")); //fox
testCase.add(new TestCaseVO("ㅅ딛퍄냐ㅐㅜ", "ㅅㄷㅣㄷㅍㅑㄴㅑㅐㅜ")); //television
testCase.add(new TestCaseVO("최", "ㅊㅗㅣ/chl")); //television
testCase.add(new TestCaseVO("ㅅㄴㅅㄷ", "ㅅㄴㅅㄷ")); //소녀시대 초성검색
testCase.add(new TestCaseVO("##%@#$%()*&^%$#@!", "##%@#$%()*&^%$#@!"));
testCase.add(new TestCaseVO("2000 최신가요", "2000ㅊㅗㅣㅅㅣㄴㄱㅏㅇㅛ/2000chltlsrkdy"));
testCase.add(new TestCaseVO("15&", "15&"));
testCase.add(new TestCaseVO("최신가요 1990", "ㅊㅗㅣㅅㅣㄴㄱㅏㅇㅛ1990/chltlsrkdy1990"));
testCase.add(new TestCaseVO("최신가요&", "ㅊㅗㅣㅅㅣㄴㄱㅏㅇㅛ&/chltlsrkdy&"));
testCase.add(new TestCaseVO("무조건 해피엔딩", "ㅁㅜㅈㅗㄱㅓㄴㅎㅐㅍㅣㅇㅔㄴㄷㅣㅇ/ㅁㅜㅈㅗㄱㅓㄴ/ㅎㅐㅍㅣㅇㅔㄴㄷㅣㅇ/anwhrjsgovldpseld/anwhrjs/govldpseld"));
testCase.add(new TestCaseVO("아디다스 운동화", "ㅇㅏㄷㅣㄷㅏㅅㅡㅇㅜㄴㄷㅗㅇㅎㅗㅏ/ㅇㅏㄷㅣㄷㅏㅅㅡ/ㅇㅜㄴㄷㅗㅇㅎㅗㅏ/dkelektmdnsehdghk/dkelektm/dnsehdghk"));
testCase.add(new TestCaseVO("투데이특가", "ㅌㅜㄷㅔㅇㅣㅌㅡㄱㄱㅏ/xnepdlxmrrk"));

} else if (options.isMistype() == false && options.isChosung() == true) {

testCase.add(new TestCaseVO("최일규", "ㅊㅗㅣㅇㅣㄹㄱㅠ/ㅊㅇㄱ"));
testCase.add(new TestCaseVO("소녀시대", "ㅅㅗㄴㅕㅅㅣㄷㅐ/ㅅㄴㅅㄷ"));
testCase.add(new TestCaseVO("Hello", "hello"));
testCase.add(new TestCaseVO("Hello~", "hello~"));
testCase.add(new TestCaseVO("무조건 해피엔딩", "ㅁㅜㅈㅗㄱㅓㄴㅎㅐㅍㅣㅇㅔㄴㄷㅣㅇ/ㅁㅈㄱㅎㅍㅇㄷ"));
testCase.add(new TestCaseVO("소원을 말해봐 genie mixversion##!", "ㅅㅗㅇㅜㅓㄴㅇㅡㄹㅁㅏㄹㅎㅐㅂㅗㅏgeniemixversion##!/ㅅㅇㅇㅁㅎㅂ"));
testCase.add(new TestCaseVO("hush hush; hush hush", "hushhush;hushhush"));
testCase.add(new TestCaseVO("라벨:모음곡(거울)제4곡(어릿광대의 아침노래)(서현)", "ㄹㅏㅂㅔㄹ:ㅁㅗㅇㅡㅁㄱㅗㄱ(ㄱㅓㅇㅜㄹ)ㅈㅔ4ㄱㅗㄱ(ㅇㅓㄹㅣㅅㄱㅗㅏㅇㄷㅐㅇㅡㅣㅇㅏㅊㅣㅁㄴㅗㄹㅐ)(ㅅㅓㅎㅕㄴ)/ㄹㅂㅁㅇㄱㄱㅇㅈㄱㅇㄹㄱㄷㅇㅇㅊㄴㄹㅅㅎ"));
testCase.add(new TestCaseVO("ㅗ디ㅣㅐ", "ㅗㄷㅣㅣㅐ")); //hello
testCase.add(new TestCaseVO("째깅", "ㅈㅈㅐㄱㅣㅇ/ㅈㅈㄱ")); //World
testCase.add(new TestCaseVO("ㅣㅐ햣ㄷ초", "ㅣㅐㅎㅑㅅㄷㅊㅗ")); //logitech
testCase.add(new TestCaseVO("퍗므ㅑㅜ", "ㅍㅑㅅㅁㅡㅑㅜ")); //vitamin
testCase.add(new TestCaseVO("랱", "ㄹㅐㅌ")); //fox
testCase.add(new TestCaseVO("ㅅ딛퍄냐ㅐㅜ", "ㅅㄷㅣㄷㅍㅑㄴㅑㅐㅜ")); //television
testCase.add(new TestCaseVO("최", "ㅊㅗㅣ"));
testCase.add(new TestCaseVO("ㅅㄴㅅㄷ", "ㅅㄴㅅㄷ")); //소녀시대 초성검색
testCase.add(new TestCaseVO("##%@#$%()*&^%$#@!", "##%@#$%()*&^%$#@!"));
testCase.add(new TestCaseVO("2000 최신가요", "2000ㅊㅗㅣㅅㅣㄴㄱㅏㅇㅛ"));
testCase.add(new TestCaseVO("15&", "15&"));
testCase.add(new TestCaseVO("최신가요 1990", "ㅊㅗㅣㅅㅣㄴㄱㅏㅇㅛ1990/ㅊㅅㄱㅇ"));
testCase.add(new TestCaseVO("최신가요&", "ㅊㅗㅣㅅㅣㄴㄱㅏㅇㅛ&/ㅊㅅㄱㅇ"));
testCase.add(new TestCaseVO("무조건 해피엔딩", "ㅁㅜㅈㅗㄱㅓㄴㅎㅐㅍㅣㅇㅔㄴㄷㅣㅇ/ㅁㅜㅈㅗㄱㅓㄴ/ㅎㅐㅍㅣㅇㅔㄴㄷㅣㅇ/ㅁㅈㄱㅎㅍㅇㄷ"));
testCase.add(new TestCaseVO("아디다스 운동화", "ㅇㅏㄷㅣㄷㅏㅅㅡㅇㅜㄴㄷㅗㅇㅎㅗㅏ/ㅇㅏㄷㅣㄷㅏㅅㅡ/ㅇㅜㄴㄷㅗㅇㅎㅗㅏ/ㅇㄷㄷㅅㅇㄷㅎ"));
testCase.add(new TestCaseVO("투데이특가", "ㅌㅜㄷㅔㅇㅣㅌㅡㄱㄱㅏ/ㅌㄷㅇㅌㄱ"));

} else if (options.isMistype() == false && options.isChosung() == false) {

testCase.add(new TestCaseVO("최일규", "ㅊㅗㅣㅇㅣㄹㄱㅠ"));
testCase.add(new TestCaseVO("소녀시대", "ㅅㅗㄴㅕㅅㅣㄷㅐ"));
testCase.add(new TestCaseVO("Hello", "hello"));
testCase.add(new TestCaseVO("Hello~", "hello~"));
testCase.add(new TestCaseVO("무조건 해피엔딩", "ㅁㅜㅈㅗㄱㅓㄴㅎㅐㅍㅣㅇㅔㄴㄷㅣㅇ"));
testCase.add(new TestCaseVO("소원을 말해봐 genie mixversion##!", "ㅅㅗㅇㅜㅓㄴㅇㅡㄹㅁㅏㄹㅎㅐㅂㅗㅏgeniemixversion##!"));
testCase.add(new TestCaseVO("hush hush; hush hush", "hushhush;hushhush"));
testCase.add(new TestCaseVO("라벨:모음곡(거울)제4곡(어릿광대의 아침노래)(서현)", "ㄹㅏㅂㅔㄹ:ㅁㅗㅇㅡㅁㄱㅗㄱ(ㄱㅓㅇㅜㄹ)ㅈㅔ4ㄱㅗㄱ(ㅇㅓㄹㅣㅅㄱㅗㅏㅇㄷㅐㅇㅡㅣㅇㅏㅊㅣㅁㄴㅗㄹㅐ)(ㅅㅓㅎㅕㄴ)"));
testCase.add(new TestCaseVO("ㅗ디ㅣㅐ", "ㅗㄷㅣㅣㅐ")); //hello
testCase.add(new TestCaseVO("째깅", "ㅈㅈㅐㄱㅣㅇ")); //World
testCase.add(new TestCaseVO("ㅣㅐ햣ㄷ초", "ㅣㅐㅎㅑㅅㄷㅊㅗ")); //logitech
testCase.add(new TestCaseVO("퍗므ㅑㅜ", "ㅍㅑㅅㅁㅡㅑㅜ")); //vitamin
testCase.add(new TestCaseVO("랱", "ㄹㅐㅌ")); //fox
testCase.add(new TestCaseVO("ㅅ딛퍄냐ㅐㅜ", "ㅅㄷㅣㄷㅍㅑㄴㅑㅐㅜ")); //television
testCase.add(new TestCaseVO("최", "ㅊㅗㅣ"));
testCase.add(new TestCaseVO("ㅅㄴㅅㄷ", "ㅅㄴㅅㄷ")); //소녀시대 초성검색
testCase.add(new TestCaseVO("##%@#$%()*&^%$#@!", "##%@#$%()*&^%$#@!"));
testCase.add(new TestCaseVO("2000 최신가요", "2000ㅊㅗㅣㅅㅣㄴㄱㅏㅇㅛ"));
testCase.add(new TestCaseVO("15&", "15&"));
testCase.add(new TestCaseVO("최신가요 1990", "ㅊㅗㅣㅅㅣㄴㄱㅏㅇㅛ1990"));
testCase.add(new TestCaseVO("최신가요&", "ㅊㅗㅣㅅㅣㄴㄱㅏㅇㅛ&"));

testCase.add(new TestCaseVO("무조건 해피엔딩", "ㅁㅜㅈㅗㄱㅓㄴㅎㅐㅍㅣㅇㅔㄴㄷㅣㅇ/ㅁㅜㅈㅗㄱㅓㄴ/ㅎㅐㅍㅣㅇㅔㄴㄷㅣㅇ"));
testCase.add(new TestCaseVO("아디다스 운동화", "ㅇㅏㄷㅣㄷㅏㅅㅡㅇㅜㄴㄷㅗㅇㅎㅗㅏ/ㅇㅏㄷㅣㄷㅏㅅㅡ/ㅇㅜㄴㄷㅗㅇㅎㅗㅏ"));
testCase.add(new TestCaseVO("투데이특가", "ㅌㅜㄷㅔㅇㅣㅌㅡㄱㄱㅏ"));
}

for (TestCaseVO vo : testCase) {
Expand Down

0 comments on commit 277b641

Please sign in to comment.