diff --git a/pom.xml b/pom.xml
index f094eb6..a95f8b0 100644
--- a/pom.xml
+++ b/pom.xml
@@ -61,6 +61,12 @@
tika-parsers
1.17
+
+
+ org.apache.tika
+ tika-langdetect
+ 1.17
+
com.google.code.gson
@@ -75,5 +81,32 @@
1.7.25
compile
+
+
+ org.xerial
+ sqlite-jdbc
+ 3.21.0.1
+
+
+
+ com.levigo.jbig2
+ levigo-jbig2-imageio
+ 2.0
+ test
+
+
+ com.github.jai-imageio
+ jai-imageio-core
+ 1.3.1
+ test
+
+
+ com.github.jai-imageio
+ jai-imageio-jpeg2000
+ 1.3.0
+ test
+
diff --git a/src/main/java/com/ipfssearch/ipfstika/App.java b/src/main/java/com/ipfssearch/ipfstika/App.java
index 4954771..3fdc712 100644
--- a/src/main/java/com/ipfssearch/ipfstika/App.java
+++ b/src/main/java/com/ipfssearch/ipfstika/App.java
@@ -20,6 +20,8 @@
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.sax.LinkContentHandler;
import org.apache.tika.language.detect.LanguageHandler;
+import org.apache.tika.language.detect.LanguageDetector;
+import org.apache.tika.langdetect.OptimaizeLangDetector;
import org.apache.tika.sax.TeeContentHandler;
import org.apache.tika.sax.Link;
import org.apache.tika.exception.TikaException;
@@ -114,9 +116,11 @@ private String getResponse(String path) throws IOException {
AutoDetectParser parser = new AutoDetectParser();
LinkContentHandler link_handler = new LinkContentHandler();
BodyContentHandler body_handler = new BodyContentHandler(10*1024*1024);
- // This causes weird crashes
- // LanguageHandler language_handler = new LanguageHandler();
- TeeContentHandler handler = new TeeContentHandler(link_handler, body_handler);
+
+ LanguageDetector language_detector = new OptimaizeLangDetector().loadModels();
+ LanguageHandler language_handler = new LanguageHandler(language_detector);
+
+ TeeContentHandler handler = new TeeContentHandler(link_handler, body_handler, language_handler);
Metadata metadata = new Metadata();
// Set filename from path string
@@ -141,16 +145,17 @@ private String getResponse(String path) throws IOException {
/* Now return JSON with:
{
+ "metadata": metadata,
"language": language_handler.getLanguage(),
"content": body_handler.toString(),
- "links": links,
- "metadata": metadata
+ "urls": links,
}
*/
Gson gson = new Gson();
JsonObject output_json = gson.toJsonTree(metadata).getAsJsonObject();
output_json.add("content", gson.toJsonTree(body_handler.toString().trim()));
output_json.add("urls", gson.toJsonTree(links));
+ output_json.add("language", gson.toJsonTree(language_handler.getLanguage()));
return output_json.toString();
}