diff --git a/tools/pos-and-lemmatisation/abltagger-lem.json b/tools/pos-and-lemmatisation/abltagger-lem.json
new file mode 100644
index 0000000..d5ebecf
--- /dev/null
+++ b/tools/pos-and-lemmatisation/abltagger-lem.json
@@ -0,0 +1,18 @@
+{
+ "Name": "ABLTagger (Lemmatizer)",
+ "URL": "http://hdl.handle.net/20.500.12537/134",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "The lemmatiser achieves an accuracy of 98.3% on MIM-Gold (21.05, cross-validation).",
+ "Functionality": ["lemma"],
+ "Language": ["isl"],
+ "Licence": "The MIT License",
+ "Platform": [],
+ "Infrastructure": "CLARIN-IS",
+ "Group": "For a single language",
+ "Input format": ["tokenised plain text"],
+ "Output format": [],
+ "Access": {
+ "Download": "http://hdl.handle.net/20.500.12537/134"
+ },
+ "Publication": "Steingrímsson et al. (2019)"
+}
diff --git a/tools/pos-and-lemmatisation/abltagger-pos.json b/tools/pos-and-lemmatisation/abltagger-pos.json
new file mode 100644
index 0000000..39e3c8d
--- /dev/null
+++ b/tools/pos-and-lemmatisation/abltagger-pos.json
@@ -0,0 +1,18 @@
+{
+ "Name": "ABLTagger (PoS)",
+ "URL": "http://hdl.handle.net/20.500.12537/115",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool is a part of speech tagger for Icelandic. This entry contains pretrained models for ABLTagger v3.0.0. There are two versions, small and large, of PoS taggers that work with the revised tagset that achieve an accuracy of ~96.7% and ~97.8% on MIM-Gold (cross-validation, excluding \"x\" and \"e\" tags), respectively.",
+ "Functionality": ["PoS"],
+ "Language": ["isl"],
+ "Licence": "Apache License 2.0",
+ "Platform": [],
+ "Infrastructure": "CLARIN-IS",
+ "Group": "For a single language",
+ "Input format": ["tokenised plain or pre-tagged text"],
+ "Output format": [],
+ "Access": {
+ "Download": "http://hdl.handle.net/20.500.12537/115"
+ },
+ "Publication": "Steingrímsson et al. (2019)"
+}
diff --git a/tools/pos-and-lemmatisation/afrikaans-tnt.json b/tools/pos-and-lemmatisation/afrikaans-tnt.json
new file mode 100644
index 0000000..2987f45
--- /dev/null
+++ b/tools/pos-and-lemmatisation/afrikaans-tnt.json
@@ -0,0 +1,17 @@
+{
+ "Name": "Afrikaans TnT-Tagger",
+ "URL": "https://hdl.handle.net/20.500.12185/143",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool is based on the TnT tagger (Brants 2000). The tagset used by the tool was especially designed for Afrikaans and consists of 139 PoS-tags.",
+ "Functionality": ["PoS"],
+ "Language": ["afr"],
+ "Licence": "research only",
+ "Platform": [],
+ "Infrastructure": "SADiLaR",
+ "Group": "For a single language",
+ "Input format": ["plain text"],
+ "Output format": ["plain text"],
+ "Access": {
+ },
+ "Publication": ""
+}
diff --git a/tools/pos-and-lemmatisation/assamese-pos.json b/tools/pos-and-lemmatisation/assamese-pos.json
new file mode 100644
index 0000000..b60d74e
--- /dev/null
+++ b/tools/pos-and-lemmatisation/assamese-pos.json
@@ -0,0 +1,17 @@
+{
+ "Name": "Assamese POS Tagger",
+ "URL": "http://hdl.handle.net/11321/620",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool is a CRF++ based PoS-tagger.",
+ "Functionality": ["PoS"],
+ "Language": ["asm"],
+ "Licence": "",
+ "Platform": [],
+ "Infrastructure": "CLARIN-PL",
+ "Group": "For a single language",
+ "Input format": [],
+ "Output format": [],
+ "Access": {
+ },
+ "Publication": ""
+}
diff --git a/tools/pos-and-lemmatisation/char-level-pos-slv.json b/tools/pos-and-lemmatisation/char-level-pos-slv.json
new file mode 100644
index 0000000..7204d4f
--- /dev/null
+++ b/tools/pos-and-lemmatisation/char-level-pos-slv.json
@@ -0,0 +1,18 @@
+{
+ "Name": "Character-level part-of-speech tagger of Slovene language",
+ "URL": "http://hdl.handle.net/11356/1211",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool uses convolutional and LSTM neural networks. The tool has been trained on the ssj500k 2.1 corpus.",
+ "Functionality": ["PoS"],
+ "Language": ["slv"],
+ "Licence": "GNU General Public Licence, version 3",
+ "Platform": [],
+ "Infrastructure": "CLARIN.SI",
+ "Group": "For a single language",
+ "Input format": ["XML", "TEI", "plain text"],
+ "Output format": [],
+ "Access": {
+ "Download": "http://hdl.handle.net/11356/1211"
+ },
+ "Publication": "Belej (2018)"
+}
diff --git a/tools/pos-and-lemmatisation/clarin-dk-nlp-toolbox.json b/tools/pos-and-lemmatisation/clarin-dk-nlp-toolbox.json
new file mode 100644
index 0000000..a3b88a8
--- /dev/null
+++ b/tools/pos-and-lemmatisation/clarin-dk-nlp-toolbox.json
@@ -0,0 +1,18 @@
+{
+ "Name": "CLARIN DK NLP Toolbox",
+ "URL": "https://clarin.dk/clarindk/toolchains-wizard.jsp",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool is an NLP toolchain that is part of the core CLARIN-DK structure.",
+ "Functionality": ["PoS", "lemma", "frequency lists"],
+ "Language": ["dan", "eng"],
+ "Licence": "",
+ "Platform": [],
+ "Infrastructure": "CLARIN-DK",
+ "Group": "For multiple languages",
+ "Input format": ["plain text", "rtf", "pdf"],
+ "Output format": ["plain text", "rtf"],
+ "Access": {
+ "Web application": "https://clarin.dk/clarindk/toolchains-wizard.jsp"
+ },
+ "Publication": ""
+}
diff --git a/tools/pos-and-lemmatisation/clark.json b/tools/pos-and-lemmatisation/clark.json
new file mode 100644
index 0000000..92a4a12
--- /dev/null
+++ b/tools/pos-and-lemmatisation/clark.json
@@ -0,0 +1,18 @@
+{
+ "Name": "CLaRK",
+ "URL": "http://bultreebank.org/en/clark/",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool is an XML-based software system for corpora development implemented in JAVA. The main aim behind the design of the system is the minimization of human intervention during the creation of language resources. CLaRK includes BTB-Pipe, which is a language pipeline for Bulgarian that comprises the following modules: sentence splitting, MSD-tagging, lemmatization, dependency parsing.",
+ "Functionality": ["sentence splitting", "PoS", "lemma", "syntactic parsing"],
+ "Language": ["bul"],
+ "Licence": "",
+ "Platform": [],
+ "Infrastructure": "ClaDA-BG",
+ "Group": "For a single language",
+ "Input format": ["XML"],
+ "Output format": ["XML"],
+ "Access": {
+ "Download": "http://bultreebank.org/en/clark/bulgarian-nlp-pipeline-in-clark-system/"
+ },
+ "Publication": "Simov et al. (2001)"
+}
diff --git a/tools/pos-and-lemmatisation/claws.json b/tools/pos-and-lemmatisation/claws.json
new file mode 100644
index 0000000..9505dbb
--- /dev/null
+++ b/tools/pos-and-lemmatisation/claws.json
@@ -0,0 +1,18 @@
+{
+ "Name": "CLAWS",
+ "URL": "https://www.clarin.ac.uk/claws",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "CLAWS (the Constituent Likelihood Automatic Word-tagging System), has been continuously developed since the early 1980s. The latest version of the tagger, CLAWS4, was used to PoS tag approx. 100 million words of the British National Corpus (BNC), and all the English corpora in Mark Davies' BYU corpus server. Users can choose to have output in either the smaller C5 tagset or the larger C7 tagset.",
+ "Functionality": ["PoS/MSD"],
+ "Language": ["eng"],
+ "Licence": "Terms of Service",
+ "Platform": [],
+ "Infrastructure": "CLARIN UK",
+ "Group": "For a single language",
+ "Input format": ["plain text"],
+ "Output format": ["horizontal", "vertical", "pseudo-XML"],
+ "Access": {
+ "Web application": "http://ucrel-api.lancaster.ac.uk/claws/free.html"
+ },
+ "Publication": "Garside and Smith (1997)"
+}
diff --git a/tools/pos-and-lemmatisation/corpus-by.json b/tools/pos-and-lemmatisation/corpus-by.json
new file mode 100644
index 0000000..8d6dcf8
--- /dev/null
+++ b/tools/pos-and-lemmatisation/corpus-by.json
@@ -0,0 +1,18 @@
+{
+ "Name": "Corpus.by Lemmatizer",
+ "URL": "https://www.corpus.by/Lemmatizer/?lang=en",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool is part of the corpus.by platform.",
+ "Functionality": ["lemma"],
+ "Language": ["bel"],
+ "Licence": "",
+ "Platform": [],
+ "Infrastructure": "CLARIN Knowledge Centre for Belarusian text and speech processing",
+ "Group": "For a single language",
+ "Input format": ["plain text"],
+ "Output format": ["plain text"],
+ "Access": {
+ "Web service": "https://www.corpus.by/Lemmatizer/?lang=en"
+ },
+ "Publication": ""
+}
diff --git a/tools/pos-and-lemmatisation/cst-lemmatizer.json b/tools/pos-and-lemmatisation/cst-lemmatizer.json
new file mode 100644
index 0000000..3ce7de2
--- /dev/null
+++ b/tools/pos-and-lemmatisation/cst-lemmatizer.json
@@ -0,0 +1,18 @@
+{
+ "Name": "CST’s lemmatizer",
+ "URL": "http://hdl.handle.net/11372/LRT-1249",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool uses affix rules (affix: prefix, infix, suffix, circumfix).",
+ "Functionality": ["lemma"],
+ "Language": ["bul", "ces", "dan", "nld", "eng", "est", "fas", "fra", "deu", "ell", "hun", "isl", "ita", "lat", "mkd", "pol", "por", "ron", "rus", "srp", "slk", "slv", "spa", "ukr"],
+ "Licence": "",
+ "Platform": [],
+ "Infrastructure": "LINDAT/CLARIN-DK",
+ "Group": "For multiple languages",
+ "Input format": [],
+ "Output format": [],
+ "Access": {
+ "Download": "http://cst.dk/download/uk/"
+ },
+ "Publication": "Jongejan and Dalianis (2009)"
+}
diff --git a/tools/pos-and-lemmatisation/estnltk.json b/tools/pos-and-lemmatisation/estnltk.json
new file mode 100644
index 0000000..1c15b80
--- /dev/null
+++ b/tools/pos-and-lemmatisation/estnltk.json
@@ -0,0 +1,18 @@
+{
+ "Name": "EstNLTK",
+ "URL": "https://estnltk.github.io/estnltk/1.4.1/",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool provides common natural language processing functionality such as morphological analysis and named entity recognition for the Estonian language.\nWeb API documentation is available here.",
+ "Functionality": ["MSD", "NER"],
+ "Language": ["est"],
+ "Licence": "Available - Unrestricted Use",
+ "Platform": [],
+ "Infrastructure": "CELR",
+ "Group": "For a single language",
+ "Input format": ["plain text"],
+ "Output format": ["plain text"],
+ "Access": {
+ "Download": "https://estnltk.github.io/estnltk/1.4.1/"
+ },
+ "Publication": "Orasmaa et al. (2016)"
+}
diff --git a/tools/pos-and-lemmatisation/fintag.json b/tools/pos-and-lemmatisation/fintag.json
new file mode 100644
index 0000000..fb9f8c2
--- /dev/null
+++ b/tools/pos-and-lemmatisation/fintag.json
@@ -0,0 +1,19 @@
+{
+ "Name": "FinTag",
+ "URL": "http://urn.fi/urn:nbn:fi:lb-201908161",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This toolchain provides finnish-postag, a part-of-speech and morphology tagger for Finnish, and finnish-nertag, a named entity recogniser for Finnish. Both tools take running text from standard input and produce tabular output (one token per line) to standard output.",
+ "Functionality": ["PoS", "lemma", "NER"],
+ "Language": ["fin"],
+ "Licence": "GPL",
+ "Platform": [],
+ "Infrastructure": "FIN-CLARIN",
+ "Group": "For a single language",
+ "Input format": ["plain text", "pdf", "doc", "scv", "epub", "html", "odt", "xls"],
+ "Output format": ["TSV"],
+ "Access": {
+ "Download": "http://urn.fi/urn:nbn:fi:lb-201908162",
+ "Web application": "https://www.kielipankki.fi/tools/demo/cgi-bin/fintag.py"
+ },
+ "Publication": ""
+}
diff --git a/tools/pos-and-lemmatisation/freeling.json b/tools/pos-and-lemmatisation/freeling.json
new file mode 100644
index 0000000..d82755d
--- /dev/null
+++ b/tools/pos-and-lemmatisation/freeling.json
@@ -0,0 +1,18 @@
+{
+ "Name": "Freeling",
+ "URL": "http://hdl.handle.net/20.500.11752/ILC-72",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This toolchain was developed in the PANACEA project and implements Freeling 2.1 libraries.",
+ "Functionality": ["PoS", "lemma"],
+ "Language": ["ita"],
+ "Licence": "",
+ "Platform": [],
+ "Infrastructure": "CLARIN-IT",
+ "Group": "For a single language",
+ "Input format": [],
+ "Output format": [],
+ "Access": {
+ "Web application": "https://ilc4clarin.ilc.cnr.it/en/service/freeling-it"
+ },
+ "Publication": "Padró et al. (2010)"
+}
diff --git a/tools/pos-and-lemmatisation/frog.json b/tools/pos-and-lemmatisation/frog.json
new file mode 100644
index 0000000..422e986
--- /dev/null
+++ b/tools/pos-and-lemmatisation/frog.json
@@ -0,0 +1,18 @@
+{
+ "Name": "Frog",
+ "URL": "http://hdl.handle.net/10032/198143d2010e74ae17d4223dfc00e2a8",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool is an integration of memory-based NLP modules developed for Dutch. All NLP modules are based on TiMBL, the Tilburg memory-based learning software package. Where possible, Frog makes use of multi-processor support to run subtasks in parallel.",
+ "Functionality": ["PoS", "MSD", "lemma", "NE", "phrase chunks", "dependency relations with head words"],
+ "Language": ["nld"],
+ "Licence": "GNU General Public Licence",
+ "Platform": [],
+ "Infrastructure": "CLARIAH-NL",
+ "Group": "For a single language",
+ "Input format": [],
+ "Output format": ["FoLiA XML"],
+ "Access": {
+ "Download": "https://github.com/LanguageMachines/frog/releases/"
+ },
+ "Publication": "van den Bosch et al. (2007)"
+}
diff --git a/tools/pos-and-lemmatisation/genia-tagger.json b/tools/pos-and-lemmatisation/genia-tagger.json
new file mode 100644
index 0000000..c3bef99
--- /dev/null
+++ b/tools/pos-and-lemmatisation/genia-tagger.json
@@ -0,0 +1,18 @@
+{
+ "Name": "GENIA Tagger",
+ "URL": "https://hdl.handle.net/21.11115/0000-000B-D330-0",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool is used for annotating biomedical texts such as MEDLINE abstracts.",
+ "Functionality": ["PoS", "lemma", "chunks", "named entities"],
+ "Language": ["eng", "ces", "slk"],
+ "Licence": "proprietary - commercial",
+ "Platform": [],
+ "Infrastructure": "PORTULAN",
+ "Group": "For multiple languages",
+ "Input format": [],
+ "Output format": [],
+ "Access": {
+ "Download": "http://www.nactem.ac.uk/GENIA/tagger/"
+ },
+ "Publication": "Tsurouka et al. (2015)"
+}
diff --git a/tools/pos-and-lemmatisation/hmm-tagger.json b/tools/pos-and-lemmatisation/hmm-tagger.json
new file mode 100644
index 0000000..70f49de
--- /dev/null
+++ b/tools/pos-and-lemmatisation/hmm-tagger.json
@@ -0,0 +1,18 @@
+{
+ "Name": "HMM tagger",
+ "URL": "http://hdl.handle.net/11858/00-097C-0000-0001-48F9-4",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool uses Hidden Markov Models and is an implementation of the UFAL tagger.",
+ "Functionality": ["MSD"],
+ "Language": ["ces"],
+ "Licence": "GNU General Public Licence, version 2",
+ "Platform": [],
+ "Infrastructure": "LINDAT",
+ "Group": "For a single language",
+ "Input format": [],
+ "Output format": [],
+ "Access": {
+ "Download": "http://hdl.handle.net/11858/00-097C-0000-0001-48F9-4"
+ },
+ "Publication": ""
+}
diff --git a/tools/pos-and-lemmatisation/hunpos.json b/tools/pos-and-lemmatisation/hunpos.json
new file mode 100644
index 0000000..4929550
--- /dev/null
+++ b/tools/pos-and-lemmatisation/hunpos.json
@@ -0,0 +1,18 @@
+{
+ "Name": "hunpos",
+ "URL": "https://hdl.handle.net/11372/LRT-1205",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool is an open source reimplementation of the TnT tagger (Brants 2000).",
+ "Functionality": ["PoS"],
+ "Language": ["hun"],
+ "Licence": "New BSD License",
+ "Platform": [],
+ "Infrastructure": "LINDAT",
+ "Group": "For a single language",
+ "Input format": [],
+ "Output format": [],
+ "Access": {
+ "Download": "https://code.google.com/archive/p/hunpos/downloads"
+ },
+ "Publication": "Halácsy et al. (2007)"
+}
diff --git a/tools/pos-and-lemmatisation/icenlp.json b/tools/pos-and-lemmatisation/icenlp.json
new file mode 100644
index 0000000..57d16d4
--- /dev/null
+++ b/tools/pos-and-lemmatisation/icenlp.json
@@ -0,0 +1,19 @@
+{
+ "Name": "IceNLP Natural Language Processing toolkit",
+ "URL": "http://hdl.handle.net/20.500.12537/8",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool is an open source NLP toolkit for analyzing and processing Icelandic text. The toolkit is implemented in Java.",
+ "Functionality": ["PoS", "lemma", "shallow syntactic parsing"],
+ "Language": ["isl"],
+ "Licence": "GNU General Public License, version 2",
+ "Platform": [],
+ "Infrastructure": "CLARIN-IS",
+ "Group": "For a single language",
+ "Input format": ["plain text"],
+ "Output format": ["plain text"],
+ "Access": {
+ "Download": "https://github.com/hrafnl/icenlp",
+ "Web application": "http://nlp.cs.ru.is:8080/IceNLPWeb/icenlp.html"
+ },
+ "Publication": "Loftsson and Rögnvaldsson (2007)"
+}
diff --git a/tools/pos-and-lemmatisation/ilsp-feature.json b/tools/pos-and-lemmatisation/ilsp-feature.json
new file mode 100644
index 0000000..c7fa26f
--- /dev/null
+++ b/tools/pos-and-lemmatisation/ilsp-feature.json
@@ -0,0 +1,18 @@
+{
+ "Name": "ILSP Feature-based multi-tiered POS Tagger",
+ "URL": "http://hdl.grnet.gr/11500/ATHENA-0000-0000-23E8-3",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool is a FBT-based multitiered tagger. FBT is a variant of the well-known transformation based learning paradigm aiming at improving the quality of tagging highly inflective languages such as Greek.",
+ "Functionality": ["PoS"],
+ "Language": ["ell"],
+ "Licence": "terms of service (Restrictions: Academic - Non Commercial Use)",
+ "Platform": [],
+ "Infrastructure": "CLARIN:EL",
+ "Group": "For a single language",
+ "Input format": ["Application/vnd.xmi+xml"],
+ "Output format": ["Application/vnd.xmi+xml"],
+ "Access": {
+ "Web application": "http://hdl.grnet.gr/11500/ATHENA-0000-0000-23E8-3"
+ },
+ "Publication": "Papageorgiou et al. (2000)"
+}
diff --git a/tools/pos-and-lemmatisation/inl-labs.json b/tools/pos-and-lemmatisation/inl-labs.json
new file mode 100644
index 0000000..bfa0808
--- /dev/null
+++ b/tools/pos-and-lemmatisation/inl-labs.json
@@ -0,0 +1,18 @@
+{
+ "Name": "INL Labs tagger/lemmatizer tools",
+ "URL": "http://hdl.handle.net/10032/79a7f85fc70d1cf276c4c6a0a56dd176",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool employs a PoS tagger that is trained on the \"Letters as loot\" historical corpus and a lemmatizer that is trained on the INL historical lexicon.",
+ "Functionality": ["PoS", "lemma"],
+ "Language": ["nld"],
+ "Licence": "CLARIN PUB",
+ "Platform": [],
+ "Infrastructure": "CLARIAH-NL",
+ "Group": "For a single language",
+ "Input format": ["plain text", "TEI", "epub", "html", "docx", "alto"],
+ "Output format": ["styled", "XML"],
+ "Access": {
+ "Web application": "http://inl-labs.taalbanknederlands.inl.nl/succeed/tagger/ui"
+ },
+ "Publication": ""
+}
diff --git a/tools/pos-and-lemmatisation/janes-tagger.json b/tools/pos-and-lemmatisation/janes-tagger.json
new file mode 100644
index 0000000..4c3fbb8
--- /dev/null
+++ b/tools/pos-and-lemmatisation/janes-tagger.json
@@ -0,0 +1,18 @@
+{
+ "Name": "janes-tagger",
+ "URL": "https://github.com/clarinsi/janes-tagger",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool, which was developed in the context of the JANES project, tags non-standard Slovenian, with Croatian and Serbian to follow.",
+ "Functionality": ["PoS", "lemma"],
+ "Language": ["slv"],
+ "Licence": "",
+ "Platform": [],
+ "Infrastructure": "CLARIN.SI",
+ "Group": "For a single language",
+ "Input format": ["plain text"],
+ "Output format": [],
+ "Access": {
+ "Download": "https://github.com/clarinsi/janes-tagger"
+ },
+ "Publication": ""
+}
diff --git a/tools/pos-and-lemmatisation/lem-por.json b/tools/pos-and-lemmatisation/lem-por.json
new file mode 100644
index 0000000..ff8a655
--- /dev/null
+++ b/tools/pos-and-lemmatisation/lem-por.json
@@ -0,0 +1,18 @@
+{
+ "Name": "Lemmatizer for Portuguese",
+ "URL": "https://hdl.handle.net/21.11115/0000-000B-D31E-6",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool is based on the MXPOST part of speech tagger and is trained on UNITEX dictionaries for Portuguese.",
+ "Functionality": ["lemma"],
+ "Language": ["por"],
+ "Licence": "Apache Licence 2.0 (academic)",
+ "Platform": [],
+ "Infrastructure": "PORTULAN",
+ "Group": "For a single language",
+ "Input format": ["plain text"],
+ "Output format": ["plain text"],
+ "Access": {
+ "Download": "https://hdl.handle.net/21.11115/0000-000B-D31E-6"
+ },
+ "Publication": ""
+}
diff --git a/tools/pos-and-lemmatisation/lx-tagger.json b/tools/pos-and-lemmatisation/lx-tagger.json
new file mode 100644
index 0000000..041b03b
--- /dev/null
+++ b/tools/pos-and-lemmatisation/lx-tagger.json
@@ -0,0 +1,18 @@
+{
+ "Name": "LX-Tagger",
+ "URL": "https://hdl.handle.net/21.11115/0000-000B-D325-D",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool is based on the TnT tagger (Brants 2000).",
+ "Functionality": ["MSD"],
+ "Language": ["por"],
+ "Licence": "Academic - Non-Commercial use",
+ "Platform": [],
+ "Infrastructure": "PORTULAN",
+ "Group": "For a single language",
+ "Input format": [],
+ "Output format": [],
+ "Access": {
+ "Download": "http://lxcenter.di.fc.ul.pt/tools/en/LXTaggerEN.html"
+ },
+ "Publication": "Silva (2007)"
+}
diff --git a/tools/pos-and-lemmatisation/lx-verbal-lem.json b/tools/pos-and-lemmatisation/lx-verbal-lem.json
new file mode 100644
index 0000000..4d6962f
--- /dev/null
+++ b/tools/pos-and-lemmatisation/lx-verbal-lem.json
@@ -0,0 +1,17 @@
+{
+ "Name": "LX-Verbal Lemmatizer",
+ "URL": "https://hdl.handle.net/21.11129/0000-000C-D890-D",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool performs fully-fledged lemmatization of Portuguese verbs, including the full range of pronominal conjugation forms.",
+ "Functionality": ["lemma (verbs)"],
+ "Language": ["por"],
+ "Licence": "Terms of Service",
+ "Platform": [],
+ "Infrastructure": "PORTULAN",
+ "Group": "For a single language",
+ "Input format": [],
+ "Output format": [],
+ "Access": {
+ },
+ "Publication": ""
+}
diff --git a/tools/pos-and-lemmatisation/mlss.json b/tools/pos-and-lemmatisation/mlss.json
new file mode 100644
index 0000000..491924f
--- /dev/null
+++ b/tools/pos-and-lemmatisation/mlss.json
@@ -0,0 +1,18 @@
+{
+ "Name": "MLSS Tagger Web Service",
+ "URL": "https://hdl.handle.net/21.11115/0000-000B-D348-6",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool is an implementation of the TnT tagger (Brants 2000). The model for Maltese was trained on manually tagged texts and has reached an accuracy of 96%. The tagset tailored to Maltese is available here.",
+ "Functionality": ["PoS"],
+ "Language": ["mlt"],
+ "Licence": "CLARIN ACA",
+ "Platform": [],
+ "Infrastructure": "PORTULAN",
+ "Group": "For a single language",
+ "Input format": [],
+ "Output format": [],
+ "Access": {
+ "Web application": "http://metanet4u.research.um.edu.mt/tools.jsp"
+ },
+ "Publication": ""
+}
diff --git a/tools/pos-and-lemmatisation/morfeusz2.json b/tools/pos-and-lemmatisation/morfeusz2.json
new file mode 100644
index 0000000..b73bfe9
--- /dev/null
+++ b/tools/pos-and-lemmatisation/morfeusz2.json
@@ -0,0 +1,19 @@
+{
+ "Name": "Morfeusz 2",
+ "URL": "http://hdl.handle.net/11321/257",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool is a dictionary-based morphological analyser and generator for Polish. This version of the program is decoupled from the dictionary. Two dictionaries of Polish developed within other projects are distributed with Morfeusz 2, namely SGJP and Polimorf.",
+ "Functionality": ["MSD"],
+ "Language": ["pol"],
+ "Licence": "BSD 2 (public)",
+ "Platform": [],
+ "Infrastructure": "CLARIN-PL",
+ "Group": "For a single language",
+ "Input format": ["various"],
+ "Output format": ["various"],
+ "Access": {
+ "Download": "http://hdl.handle.net/11321/257",
+ "Web application": "https://ws.clarin-pl.eu/"
+ },
+ "Publication": "Woliński (2014)"
+}
diff --git a/tools/pos-and-lemmatisation/morphadorner.json b/tools/pos-and-lemmatisation/morphadorner.json
new file mode 100644
index 0000000..50c00cd
--- /dev/null
+++ b/tools/pos-and-lemmatisation/morphadorner.json
@@ -0,0 +1,18 @@
+{
+ "Name": "MorphAdorner Lemmatizer",
+ "URL": "http://hdl.handle.net/11022/0000-0000-83A0-6",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool is implemented in WebLicht and is derived from the MorphAdorner morphological analyser.",
+ "Functionality": ["lemma"],
+ "Language": ["eng"],
+ "Licence": "",
+ "Platform": [],
+ "Infrastructure": "CLARIN-D",
+ "Group": "For a single language",
+ "Input format": ["TCF", "XML"],
+ "Output format": [],
+ "Access": {
+ "WebLicht": "https://weblicht.sfs.uni-tuebingen.de/weblichtwiki/index.php/Main_Page"
+ },
+ "Publication": ""
+}
diff --git a/tools/pos-and-lemmatisation/morphodita-pol.json b/tools/pos-and-lemmatisation/morphodita-pol.json
new file mode 100644
index 0000000..3a7752d
--- /dev/null
+++ b/tools/pos-and-lemmatisation/morphodita-pol.json
@@ -0,0 +1,18 @@
+{
+ "Name": "MorphoDiTa-based tagger for Polish language",
+ "URL": "http://hdl.handle.net/11321/425",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool is based on the MorphoDiTa tagger, adapted to Polish. The tool employs the NKJP tagset.",
+ "Functionality": ["MSD"],
+ "Language": ["pol"],
+ "Licence": "GNU LGPL 3.0",
+ "Platform": [],
+ "Infrastructure": "CLARIN-PL",
+ "Group": "For a single language",
+ "Input format": [],
+ "Output format": [],
+ "Access": {
+ "Download": "http://hdl.handle.net/11321/425"
+ },
+ "Publication": ""
+}
diff --git a/tools/pos-and-lemmatisation/morphodita.json b/tools/pos-and-lemmatisation/morphodita.json
new file mode 100644
index 0000000..3d0cb3d
--- /dev/null
+++ b/tools/pos-and-lemmatisation/morphodita.json
@@ -0,0 +1,20 @@
+{
+ "Name": "MorphoDiTa: Morphological Dictionary and Tagger",
+ "URL": "http://hdl.handle.net/11858/00-097C-0000-0023-43CD-0",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool performs morphological analysis, morphological generation, tagging and tokenization and is distributed as a standalone tool or a library, along with trained linguistic models. For Czech, the tool achieves state-of-the-art results with a throughput around 10,000-200,000 words per second. The tool is versioned using Semantic Versioning.\nThe following language models are available through LINDAT under the CC BY licence: Czech and English.",
+ "Functionality": ["MSD", "lemma"],
+ "Language": ["eng", "ces", "slk"],
+ "Licence": "Mozilla Public Licence 2.0 (software); CC BY-NC-SA (models)",
+ "Platform": [],
+ "Infrastructure": "LINDAT",
+ "Group": "For multiple languages",
+ "Input format": ["plain text", "vertical"],
+ "Output format": ["vertical", "XML"],
+ "Access": {
+ "Download": "https://github.com/ufal/morphodita/releases/tag/v1.9.2",
+ "Web application": "http://lindat.mff.cuni.cz/services/morphodita/",
+ "API": "http://lindat.mff.cuni.cz/services/morphodita/api-reference.php"
+ },
+ "Publication": "Straková et al. (2014)"
+}
diff --git a/tools/pos-and-lemmatisation/nchlt-afr-lem.json b/tools/pos-and-lemmatisation/nchlt-afr-lem.json
new file mode 100644
index 0000000..40814d3
--- /dev/null
+++ b/tools/pos-and-lemmatisation/nchlt-afr-lem.json
@@ -0,0 +1,18 @@
+{
+ "Name": "NCHLT Afrikaans Lemmatiser",
+ "URL": "https://hdl.handle.net/20.500.12185/297@format=cmdi",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool is a lemmatizer for Afrikaans developed during the NCHLT Text project (Barnard et al. 2014).",
+ "Functionality": ["lemma"],
+ "Language": ["afr"],
+ "Licence": "CC-BY 2.5 South Africa Licence",
+ "Platform": [],
+ "Infrastructure": "SADiLaR",
+ "Group": "For a single language",
+ "Input format": ["Text data (encoding: UTF8 without BOM), one lowercase token per line"],
+ "Output format": ["Token tab, lemma"],
+ "Access": {
+ "Download": "https://hdl.handle.net/20.500.12185/297@format=cmdi"
+ },
+ "Publication": ""
+}
diff --git a/tools/pos-and-lemmatisation/nchlt-isindebele.json b/tools/pos-and-lemmatisation/nchlt-isindebele.json
new file mode 100644
index 0000000..cf5299b
--- /dev/null
+++ b/tools/pos-and-lemmatisation/nchlt-isindebele.json
@@ -0,0 +1,18 @@
+{
+ "Name": "NCHLT isiNdebele Lemmatiser",
+ "URL": "https://hdl.handle.net/20.500.12185/303",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool is a lemmatizer for Ndebele Bantu language developed during the NCHLT Text project.",
+ "Functionality": ["lemma"],
+ "Language": ["Ndebele"],
+ "Licence": "CC-BY 2.5 South Africa Licence",
+ "Platform": [],
+ "Infrastructure": "SADiLaR",
+ "Group": "For a single language",
+ "Input format": ["Text data (encoding: UTF8 without BOM), one lowercase token per line"],
+ "Output format": ["Token tab, lemma"],
+ "Access": {
+ "Download": "https://hdl.handle.net/20.500.12185/303"
+ },
+ "Publication": "Barnard et al. 2014"
+}
diff --git a/tools/pos-and-lemmatisation/nchlt-isizulu.json b/tools/pos-and-lemmatisation/nchlt-isizulu.json
new file mode 100644
index 0000000..bcfba70
--- /dev/null
+++ b/tools/pos-and-lemmatisation/nchlt-isizulu.json
@@ -0,0 +1,18 @@
+{
+ "Name": "NCHLT isiZulu Lemmatiser",
+ "URL": "https://hdl.handle.net/20.500.12185/317",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool is a lemmatizer for the Zulu Bantu language developed during the NCHLT Text project (Barnard et al. 2014).",
+ "Functionality": ["lemma"],
+ "Language": ["Zulu"],
+ "Licence": "CC-BY 2.5 South Africa Licence",
+ "Platform": [],
+ "Infrastructure": "SADiLaR",
+ "Group": "For a single language",
+ "Input format": ["Text data (encoding: UTF8 without BOM), one lowercase token per line"],
+ "Output format": ["Token tab, lemma"],
+ "Access": {
+ "Download": "https://hdl.handle.net/20.500.12185/317"
+ },
+ "Publication": ""
+}
diff --git a/tools/pos-and-lemmatisation/nchlt-sepedi.json b/tools/pos-and-lemmatisation/nchlt-sepedi.json
new file mode 100644
index 0000000..042bfb0
--- /dev/null
+++ b/tools/pos-and-lemmatisation/nchlt-sepedi.json
@@ -0,0 +1,18 @@
+{
+ "Name": "NCHLT Sepedi Lemmatiser",
+ "URL": "https://hdl.handle.net/20.500.12185/326",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool is a lemmatizer for the Sepedi (Northern Sotho) Bantu language developed during the NCHLT Text project (Barnard et al. 2014).",
+ "Functionality": ["lemma"],
+ "Language": ["Sepedi"],
+ "Licence": "CC-BY 2.5 South Africa Licence",
+ "Platform": [],
+ "Infrastructure": "SADiLaR",
+ "Group": "For a single language",
+ "Input format": ["Text data (encoding: UTF8 without BOM), one lowercase token per line"],
+ "Output format": ["Token tab, lemma"],
+ "Access": {
+ "Download": "https://hdl.handle.net/20.500.12185/326"
+ },
+ "Publication": ""
+}
diff --git a/tools/pos-and-lemmatisation/nchlt-sesotho.json b/tools/pos-and-lemmatisation/nchlt-sesotho.json
new file mode 100644
index 0000000..50d6a59
--- /dev/null
+++ b/tools/pos-and-lemmatisation/nchlt-sesotho.json
@@ -0,0 +1,18 @@
+{
+ "Name": "NCHLT Sesotho Lemmatiser",
+ "URL": "https://hdl.handle.net/20.500.12185/333",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool is a lemmatizer for the Sesotho Bantu language developed during the NCHLT Text project (Barnard et al. 2014).",
+ "Functionality": ["lemma"],
+ "Language": ["Sesotho"],
+ "Licence": "CC-BY 2.5 South Africa Licence",
+ "Platform": [],
+ "Infrastructure": "SADiLaR",
+ "Group": "For a single language",
+ "Input format": ["Text data (encoding: UTF8 without BOM), one lowercase token per line"],
+ "Output format": ["Token tab, lemma"],
+ "Access": {
+ "Download": "https://hdl.handle.net/20.500.12185/333"
+ },
+ "Publication": ""
+}
diff --git a/tools/pos-and-lemmatisation/nchlt-setswana.json b/tools/pos-and-lemmatisation/nchlt-setswana.json
new file mode 100644
index 0000000..bacaffb
--- /dev/null
+++ b/tools/pos-and-lemmatisation/nchlt-setswana.json
@@ -0,0 +1,18 @@
+{
+ "Name": "NCHLT Setswana Lemmatiser",
+ "URL": "https://hdl.handle.net/20.500.12185/339",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool is a lemmatizer for the Tswana Bantu language developed during the NCHLT Text project (Barnard et al. 2014).",
+ "Functionality": ["lemma"],
+ "Language": ["Tswana"],
+ "Licence": "CC-BY 2.5 South Africa Licence",
+ "Platform": [],
+ "Infrastructure": "SADiLaR",
+ "Group": "For a single language",
+ "Input format": ["Text data (encoding: UTF8 without BOM), one lowercase token per line"],
+ "Output format": ["Token tab, lemma"],
+ "Access": {
+ "Download": "https://hdl.handle.net/20.500.12185/339"
+ },
+ "Publication": ""
+}
diff --git a/tools/pos-and-lemmatisation/nchlt-siswati.json b/tools/pos-and-lemmatisation/nchlt-siswati.json
new file mode 100644
index 0000000..268fb83
--- /dev/null
+++ b/tools/pos-and-lemmatisation/nchlt-siswati.json
@@ -0,0 +1,18 @@
+{
+ "Name": "NCHLT Siswati Lemmatiser",
+ "URL": "https://hdl.handle.net/20.500.12185/345",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool is a lemmatizer for the Swazi Bantu language developed during the NCHLT Text project (Barnard et al. 2014).",
+ "Functionality": ["lemma"],
+ "Language": ["Swazi"],
+ "Licence": "CC-BY 2.5 South Africa Licence",
+ "Platform": [],
+ "Infrastructure": "SADiLaR",
+ "Group": "For a single language",
+ "Input format": ["Text data (encoding: UTF8 without BOM), one lowercase token per line"],
+ "Output format": ["Token tab, lemma"],
+ "Access": {
+ "Download": "https://hdl.handle.net/20.500.12185/317"
+ },
+ "Publication": ""
+}
diff --git a/tools/pos-and-lemmatisation/nchlt-tagger.json b/tools/pos-and-lemmatisation/nchlt-tagger.json
new file mode 100644
index 0000000..e23c416
--- /dev/null
+++ b/tools/pos-and-lemmatisation/nchlt-tagger.json
@@ -0,0 +1,18 @@
+{
+ "Name": "NCHLT Tagger",
+ "URL": "https://hdl.handle.net/20.500.12185/351",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool is used to annotate texts in Afrikaans and a variety of Bantu languages.",
+ "Functionality": ["PoS", "phrase chunks", "NE"],
+ "Language": ["afr", "eng", "Ndebele", "xho", "zul", "Sesotho sa Leboa", "tsn", "Sesotho", "ssw", "ven", "tso"],
+ "Licence": "CC-BY 2.5 South Africa Licence",
+ "Platform": [],
+ "Infrastructure": "SADiLaR",
+ "Group": "For multiple languages",
+ "Input format": ["Utf8 text file containing running text"],
+ "Output format": ["tab-delimited text file containing each token followed by its the assigned class."],
+ "Access": {
+ "Download": "https://hdl.handle.net/20.500.12185/351"
+ },
+ "Publication": ""
+}
diff --git a/tools/pos-and-lemmatisation/nchlt-tshivenda.json b/tools/pos-and-lemmatisation/nchlt-tshivenda.json
new file mode 100644
index 0000000..82133fb
--- /dev/null
+++ b/tools/pos-and-lemmatisation/nchlt-tshivenda.json
@@ -0,0 +1,18 @@
+{
+ "Name": "NCHLT Tshivenda Lemmatiser",
+ "URL": "https://hdl.handle.net/20.500.12185/354",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool is a lemmatizer for the Venda Bantu language developed during the NCHLT Text project (Barnard et al. 2014).",
+ "Functionality": ["lemma"],
+ "Language": ["Venda"],
+ "Licence": "CC-BY 2.5 South Africa Licence",
+ "Platform": [],
+ "Infrastructure": "SADiLaR",
+ "Group": "For a single language",
+ "Input format": ["Text data (encoding: UTF8 without BOM), one lowercase token per line"],
+ "Output format": ["Token tab, lemma"],
+ "Access": {
+ "Download": "https://hdl.handle.net/20.500.12185/354"
+ },
+ "Publication": ""
+}
diff --git a/tools/pos-and-lemmatisation/nchlt-xitsonga.json b/tools/pos-and-lemmatisation/nchlt-xitsonga.json
new file mode 100644
index 0000000..88a2569
--- /dev/null
+++ b/tools/pos-and-lemmatisation/nchlt-xitsonga.json
@@ -0,0 +1,18 @@
+{
+ "Name": "NCHLT Xitsonga Lemmatiser",
+ "URL": "https://hdl.handle.net/20.500.12185/361",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool is a lemmatizer of the Tsonga Bantu language developed during the NCHLT Text project (Barnard et al. 2014).",
+ "Functionality": ["lemma"],
+ "Language": ["Tsonga"],
+ "Licence": "CC-BY 2.5 South Africa Licence",
+ "Platform": [],
+ "Infrastructure": "SADiLaR",
+ "Group": "For a single language",
+ "Input format": ["Text data (encoding: UTF8 without BOM), one lowercase token per line"],
+ "Output format": ["Token tab, lemma"],
+ "Access": {
+ "Download": "https://hdl.handle.net/20.500.12185/361"
+ },
+ "Publication": ""
+}
diff --git a/tools/pos-and-lemmatisation/nlp-pipe.json b/tools/pos-and-lemmatisation/nlp-pipe.json
new file mode 100644
index 0000000..ca1a6cf
--- /dev/null
+++ b/tools/pos-and-lemmatisation/nlp-pipe.json
@@ -0,0 +1,18 @@
+{
+ "Name": "NLP-PIPE",
+ "URL": "http://nlp.ailab.lv/",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool is a modular toolchain that allows researchers to combine multiple natural language processing tools in a unified framework. It provides the gluing code that is used to combine tools even if they are written in different programming languages and rely on conflicting library versions. It was created to make NLP technology more accessible to linguists, and to make new tool creation and integration easier to researchers and software developers.",
+ "Functionality": ["MSD", "syntactic parsing", "NER"],
+ "Language": ["lav"],
+ "Licence": "GNU General Public Licence 3",
+ "Platform": [],
+ "Infrastructure": "CLARIN-LV",
+ "Group": "For a single language",
+ "Input format": [],
+ "Output format": [],
+ "Access": {
+ "Download": "https://github.com/LUMII-AILab/nlp-pipe"
+ },
+ "Publication": "Znotins and Cirule (2018)"
+}
diff --git a/tools/pos-and-lemmatisation/opennlp-pos-deu.json b/tools/pos-and-lemmatisation/opennlp-pos-deu.json
new file mode 100644
index 0000000..e24d7e5
--- /dev/null
+++ b/tools/pos-and-lemmatisation/opennlp-pos-deu.json
@@ -0,0 +1,18 @@
+{
+ "Name": "OpenNLP Part-of-Speech Tagger (German)",
+ "URL": "http://hdl.grnet.gr/11500/ATHENA-0000-0000-2792-F",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool is based on the Apache OpenNLP library, which is a perception and maximum entropy–based machine learning toolkit for the processing of natural language text.",
+ "Functionality": ["PoS"],
+ "Language": ["deu"],
+ "Licence": "Apache Licence 2.0 (restricted)",
+ "Platform": [],
+ "Infrastructure": "CLARIN:EL",
+ "Group": "For a single language",
+ "Input format": ["application/xml"],
+ "Output format": ["application/xml"],
+ "Access": {
+ "Web application": "http://hdl.grnet.gr/11500/ATHENA-0000-0000-2792-F"
+ },
+ "Publication": ""
+}
diff --git a/tools/pos-and-lemmatisation/opennlp-pos-eng.json b/tools/pos-and-lemmatisation/opennlp-pos-eng.json
new file mode 100644
index 0000000..2e43646
--- /dev/null
+++ b/tools/pos-and-lemmatisation/opennlp-pos-eng.json
@@ -0,0 +1,18 @@
+{
+ "Name": "OpenNLP Part-of-Speech Tagger (English)",
+ "URL": "http://hdl.grnet.gr/11500/ATHENA-0000-0000-2790-1",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool is based on the Apache OpenNLP library, which is a perception and maximum entropy-based machine learning toolkit for the processing of natural language text.",
+ "Functionality": ["PoS"],
+ "Language": ["eng"],
+ "Licence": "Apache Licence 2.0 (restricted)",
+ "Platform": [],
+ "Infrastructure": "CLARIN:EL",
+ "Group": "For a single language",
+ "Input format": ["application/xml"],
+ "Output format": ["application/xml"],
+ "Access": {
+ "Web application": "http://hdl.grnet.gr/11500/ATHENA-0000-0000-2790-1"
+ },
+ "Publication": ""
+}
diff --git a/tools/pos-and-lemmatisation/opennlp-pos-por.json b/tools/pos-and-lemmatisation/opennlp-pos-por.json
new file mode 100644
index 0000000..991bb6e
--- /dev/null
+++ b/tools/pos-and-lemmatisation/opennlp-pos-por.json
@@ -0,0 +1,18 @@
+{
+ "Name": "OpenNLP Part-of-Speech Tagger (Portuguese)",
+ "URL": "http://hdl.grnet.gr/11500/ATHENA-0000-0000-2794-D",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool is based on the Apache OpenNLP library, which is a perception and maximum entropy-based machine learning toolkit for the processing of natural language text.",
+ "Functionality": ["PoS"],
+ "Language": ["por"],
+ "Licence": "Apache Licence 2.0 (restricted)",
+ "Platform": [],
+ "Infrastructure": "CLARIN:EL",
+ "Group": "For a single language",
+ "Input format": ["application/xml"],
+ "Output format": ["application/xml"],
+ "Access": {
+ "Web application": "http://hdl.grnet.gr/11500/ATHENA-0000-0000-2794-D"
+ },
+ "Publication": ""
+}
diff --git a/tools/pos-and-lemmatisation/oslo-bergen.json b/tools/pos-and-lemmatisation/oslo-bergen.json
new file mode 100644
index 0000000..b383455
--- /dev/null
+++ b/tools/pos-and-lemmatisation/oslo-bergen.json
@@ -0,0 +1,18 @@
+{
+ "Name": "The Oslo-Bergen tagger",
+ "URL": "http://www.tekstlab.uio.no/obt-ny/",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool consists of three main modules: a pre-processor with a composition analyzer and multitagger, a grammar module for morphological and syntactic disambiguation (based on the constraint grammar paradigm) and a statistical module that removes the last residual morphological ambiguity (only for bookmarks). The tool is trained on the Norwegian wordbank.",
+ "Functionality": ["MSD", "syntactic parsing"],
+ "Language": ["Norwegian (Bokmål and Nynorsk)"],
+ "Licence": "GNU General public licence",
+ "Platform": [],
+ "Infrastructure": "CLARINO",
+ "Group": "For a single language",
+ "Input format": [],
+ "Output format": [],
+ "Access": {
+ "Download": "https://github.com/noklesta/The-Oslo-Bergen-Tagger"
+ },
+ "Publication": "Johannessen et al. (2012)"
+}
diff --git a/tools/pos-and-lemmatisation/pos-opennlp.json b/tools/pos-and-lemmatisation/pos-opennlp.json
new file mode 100644
index 0000000..c56f26a
--- /dev/null
+++ b/tools/pos-and-lemmatisation/pos-opennlp.json
@@ -0,0 +1,18 @@
+{
+ "Name": "PoS Tagger OpenNLP Project",
+ "URL": "http://hdl.handle.net/11858/00-1778-0000-0004-BA92-E",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool is a PoS tagger implemented in WebLicht.\nThe model for Italian is trained on the MIDT corpus.",
+ "Functionality": ["PoS"],
+ "Language": ["deu", "eng", "ita"],
+ "Licence": "",
+ "Platform": [],
+ "Infrastructure": "CLARIN-D",
+ "Group": "For multiple languages",
+ "Input format": ["TCF", "XML"],
+ "Output format": [],
+ "Access": {
+ "WebLicht": "https://weblicht.sfs.uni-tuebingen.de/weblichtwiki/index.php/Main_Page"
+ },
+ "Publication": ""
+}
diff --git a/tools/pos-and-lemmatisation/reldianno.json b/tools/pos-and-lemmatisation/reldianno.json
new file mode 100644
index 0000000..8057de5
--- /dev/null
+++ b/tools/pos-and-lemmatisation/reldianno.json
@@ -0,0 +1,19 @@
+{
+ "Name": "ReLDIanno",
+ "URL": "http://clarin.si/services/web/query",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool, which was developed in the context of the ReLDI project, employs the MULTEXT tagset for part of speech tagging and Universal Dependencies for syntactic parsing.",
+ "Functionality": ["PoS", "lemma", "NER", "syntactic parsing"],
+ "Language": ["hrv", "srp", "slv"],
+ "Licence": "CC-BY (for webservice); Apache 2 for library",
+ "Platform": [],
+ "Infrastructure": "CLARIN.SI",
+ "Group": "For multiple languages",
+ "Input format": ["plain text", "TCF"],
+ "Output format": ["vertical", "plain text"],
+ "Access": {
+ "Download": "https://github.com/clarinsi/reldi-lib",
+ "Web application": "SEPhttp://clarin.si/services/web/query"
+ },
+ "Publication": "Ljubešić et al. (2016)"
+}
diff --git a/tools/pos-and-lemmatisation/rftagger.json b/tools/pos-and-lemmatisation/rftagger.json
new file mode 100644
index 0000000..c9649cd
--- /dev/null
+++ b/tools/pos-and-lemmatisation/rftagger.json
@@ -0,0 +1,19 @@
+{
+ "Name": "RFTagger",
+ "URL": "http://hdl.handle.net/11022/1007-0000-0000-8E4F-9",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool is a PoS tagger implemented in WebLicht.",
+ "Functionality": ["PoS"],
+ "Language": ["deu", "ces", "slv", "hun"],
+ "Licence": "",
+ "Platform": [],
+ "Infrastructure": "CLARIN-D",
+ "Group": "For multiple languages",
+ "Input format": [],
+ "Output format": [],
+ "Access": {
+ "Download": "https://www.cis.uni-muenchen.de/~schmid/tools/TreeTagger/",
+ "WebLicht": "https://weblicht.sfs.uni-tuebingen.de/weblichtwiki/index.php/Main_Page"
+ },
+ "Publication": "Schmid and Laws (1995)"
+}
diff --git a/tools/pos-and-lemmatisation/sentione2.json b/tools/pos-and-lemmatisation/sentione2.json
new file mode 100644
index 0000000..d95bb8b
--- /dev/null
+++ b/tools/pos-and-lemmatisation/sentione2.json
@@ -0,0 +1,18 @@
+{
+ "Name": "Tagger SentiOne - version 2",
+ "URL": "http://hdl.handle.net/11321/634",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool is the second version of tagger developed in the sentione project, adapted to UGC-processing. The tool has been enriched with some heuristics to improve its accuracy and a tokenizer.",
+ "Functionality": ["MSD"],
+ "Language": ["pol"],
+ "Licence": "GNU GPL3",
+ "Platform": [],
+ "Infrastructure": "CLARIN-PL",
+ "Group": "For a single language",
+ "Input format": [],
+ "Output format": [],
+ "Access": {
+ "Download": "http://hdl.handle.net/11321/634"
+ },
+ "Publication": ""
+}
diff --git a/tools/pos-and-lemmatisation/sepedi-pos-tagger.json b/tools/pos-and-lemmatisation/sepedi-pos-tagger.json
new file mode 100644
index 0000000..721d4ee
--- /dev/null
+++ b/tools/pos-and-lemmatisation/sepedi-pos-tagger.json
@@ -0,0 +1,17 @@
+{
+ "Name": "Sepedi Part of Speech Tagger",
+ "URL": "https://hdl.handle.net/20.500.12185/264",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool is based on Helmut Schmidt stochastic tagger (see Schmid 1994) supported by additional noun and verb guessing modules and a tokenizer.",
+ "Functionality": ["PoS"],
+ "Language": ["Sepedi"],
+ "Licence": "",
+ "Platform": [],
+ "Infrastructure": "SADiLaR",
+ "Group": "For a single language",
+ "Input format": [],
+ "Output format": [],
+ "Access": {
+ },
+ "Publication": ""
+}
diff --git a/tools/pos-and-lemmatisation/sepverb.json b/tools/pos-and-lemmatisation/sepverb.json
new file mode 100644
index 0000000..ed9b4a8
--- /dev/null
+++ b/tools/pos-and-lemmatisation/sepverb.json
@@ -0,0 +1,18 @@
+{
+ "Name": "SepVerb Lemmatizer",
+ "URL": "http://hdl.handle.net/11022/0000-0000-1CAB-1",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool is based on the Mate toolkit.",
+ "Functionality": ["lemma"],
+ "Language": ["deu"],
+ "Licence": "",
+ "Platform": [],
+ "Infrastructure": "CLARIN-D",
+ "Group": "For a single language",
+ "Input format": ["TCF", "XML"],
+ "Output format": [],
+ "Access": {
+ "WebLicht": "https://weblicht.sfs.uni-tuebingen.de/weblichtwiki/index.php/Main_Page"
+ },
+ "Publication": ""
+}
diff --git a/tools/pos-and-lemmatisation/smor.json b/tools/pos-and-lemmatisation/smor.json
new file mode 100644
index 0000000..056fff2
--- /dev/null
+++ b/tools/pos-and-lemmatisation/smor.json
@@ -0,0 +1,17 @@
+{
+ "Name": "SMOR lemmatizer",
+ "URL": "http://hdl.handle.net/11022/1007-0000-0000-8E23-9",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool is implemented in WebLicht.",
+ "Functionality": ["PoS", "lemma"],
+ "Language": ["deu"],
+ "Licence": "",
+ "Platform": [],
+ "Infrastructure": "CLARIN-D",
+ "Group": "For a single language",
+ "Input format": ["TCF", "XML"],
+ "Output format": [],
+ "Access": {
+ },
+ "Publication": ""
+}
diff --git a/tools/pos-and-lemmatisation/sparv.json b/tools/pos-and-lemmatisation/sparv.json
new file mode 100644
index 0000000..6120fb0
--- /dev/null
+++ b/tools/pos-and-lemmatisation/sparv.json
@@ -0,0 +1,19 @@
+{
+ "Name": "Sparv",
+ "URL": "https://spraakbanken.gu.se/verktyg/sparv",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool is Språkbanken's corpus annotation pipeline infrastructure. The pipeline uses in-house and external tools on the text to segment it into sentences and paragraphs, tokenise, tag parts-of-speech, look up in dictionaries and analyse compounds. The pipeline can also be run using a web API with XML results, and it is run locally to prepare the documents in Korp, which is SWE-LANG’s corpus search tool. While the most sophisticated support is for modern Swedish, the pipeline supports additional 19 languages.",
+ "Functionality": ["PoS", "MSD", "lemma", "compound analysis", "dictionary lookup"],
+ "Language": ["Bulgarian, English, Estonian, Finnish, French, Galician, Italian, Catalan, Latin, Dutch, Norwegian, Polish, Portuguese, Romanian, Russian, Slovak, Slovenian, Spanish, Swedish, German"],
+ "Licence": "",
+ "Platform": [],
+ "Infrastructure": "SWE-CLARIN",
+ "Group": "For multiple languages",
+ "Input format": ["plain text", "XML"],
+ "Output format": ["plain text", "XML"],
+ "Access": {
+ "Web application": "https://spraakbanken.gu.se/sparv/#input=plain&lang=sv&language=sv",
+ "Web API": "https://ws.spraakbanken.gu.se/ws/sparv"
+ },
+ "Publication": "Borin et al. (2016)"
+}
diff --git a/tools/pos-and-lemmatisation/stanford-dep.json b/tools/pos-and-lemmatisation/stanford-dep.json
new file mode 100644
index 0000000..e4a132f
--- /dev/null
+++ b/tools/pos-and-lemmatisation/stanford-dep.json
@@ -0,0 +1,18 @@
+{
+ "Name": "Stanford Dependency Parser",
+ "URL": "http://hdl.handle.net/11022/0000-0001-3309-C",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool is a WebLicht implementation of the Stanford Parser.",
+ "Functionality": ["PoS", "syntactic parsing"],
+ "Language": ["eng"],
+ "Licence": "",
+ "Platform": [],
+ "Infrastructure": "CLARIN-D",
+ "Group": "For a single language",
+ "Input format": ["plain text", "pdf", "rtf", "XML"],
+ "Output format": ["plain text", "pdf", "rtf", "XML"],
+ "Access": {
+ "WebLicht": "https://weblicht.sfs.uni-tuebingen.de/weblichtwiki/index.php/Main_Page"
+ },
+ "Publication": "Hinrichs et al. (2010)"
+}
diff --git a/tools/pos-and-lemmatisation/stanford-phrase.json b/tools/pos-and-lemmatisation/stanford-phrase.json
new file mode 100644
index 0000000..c1ec46d
--- /dev/null
+++ b/tools/pos-and-lemmatisation/stanford-phrase.json
@@ -0,0 +1,18 @@
+{
+ "Name": "Stanford Phrase Structure Parser",
+ "URL": "http://hdl.handle.net/11858/00-1778-0000-0004-BA31-A",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool is a Weblicht implementation of the Stanford Parser.",
+ "Functionality": ["PoS", "syntactic parsing"],
+ "Language": ["eng", "deu"],
+ "Licence": "",
+ "Platform": [],
+ "Infrastructure": "CLARIN-D",
+ "Group": "For multiple languages",
+ "Input format": ["plain text", "pdf", "rtf", "XML"],
+ "Output format": ["plain text", "pdf", "rtf", "XML"],
+ "Access": {
+ "WebLicht": "https://weblicht.sfs.uni-tuebingen.de/weblichtwiki/index.php/Main_Page"
+ },
+ "Publication": "Hinrichs et al. (2010)"
+}
diff --git a/tools/pos-and-lemmatisation/stepp-tagger.json b/tools/pos-and-lemmatisation/stepp-tagger.json
new file mode 100644
index 0000000..85916c6
--- /dev/null
+++ b/tools/pos-and-lemmatisation/stepp-tagger.json
@@ -0,0 +1,18 @@
+{
+ "Name": "STEPP Tagger",
+ "URL": "https://hdl.handle.net/21.11115/0000-000B-D32C-6",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool is used for annotating biomedical texts such as MEDLINE abstracts.",
+ "Functionality": ["PoS"],
+ "Language": ["eng", "ces", "slk"],
+ "Licence": "proprietary - commercial",
+ "Platform": [],
+ "Infrastructure": "PORTULAN",
+ "Group": "For multiple languages",
+ "Input format": ["plain text"],
+ "Output format": ["plain text"],
+ "Access": {
+ "Download": ""
+ },
+ "Publication": ""
+}
diff --git a/tools/pos-and-lemmatisation/sticker-ud.json b/tools/pos-and-lemmatisation/sticker-ud.json
new file mode 100644
index 0000000..1c11f48
--- /dev/null
+++ b/tools/pos-and-lemmatisation/sticker-ud.json
@@ -0,0 +1,19 @@
+{
+ "Name": "Sticker part-of-speech tagger UD",
+ "URL": "http://hdl.handle.net/11022/0000-0007-DA27-8",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool is a PoS tagger, syntactic parser and named entity recognizer implemented in WebLicht. The PoS tagger uses the Universal Dependencies tagset.",
+ "Functionality": ["PoS", "syntactic parsing", "NER"],
+ "Language": ["deu", "nld"],
+ "Licence": "Blue Oak Mode Licence version 1.0.0",
+ "Platform": [],
+ "Infrastructure": "CLARIN-D",
+ "Group": "For multiple languages",
+ "Input format": [],
+ "Output format": [],
+ "Access": {
+ "Download": "https://github.com/stickeritis/sticker",
+ "WebLicht": "https://weblicht.sfs.uni-tuebingen.de/weblichtwiki/index.php/Main_Page"
+ },
+ "Publication": "Ling et al. (2015)"
+}
diff --git a/tools/pos-and-lemmatisation/stuttgart-dep.json b/tools/pos-and-lemmatisation/stuttgart-dep.json
new file mode 100644
index 0000000..bf2c106
--- /dev/null
+++ b/tools/pos-and-lemmatisation/stuttgart-dep.json
@@ -0,0 +1,18 @@
+{
+ "Name": "Stuttgart Dependency Parser",
+ "URL": "http://hdl.handle.net/11022/1007-0000-0000-8DEE-6",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool is a Weblicht implementation of the Stuttgart parser.",
+ "Functionality": ["PoS", "syntactic parsing"],
+ "Language": ["deu"],
+ "Licence": "",
+ "Platform": [],
+ "Infrastructure": "CLARIN-D",
+ "Group": "For a single language",
+ "Input format": ["plain text", "pdf", "rtf", "XML"],
+ "Output format": ["plain text", "pdf", "rtf", "XML"],
+ "Access": {
+ "WebLicht": "https://weblicht.sfs.uni-tuebingen.de/weblichtwiki/index.php/Main_Page"
+ },
+ "Publication": "Hinrichs et al. (2010)"
+}
diff --git a/tools/pos-and-lemmatisation/tadpole.json b/tools/pos-and-lemmatisation/tadpole.json
new file mode 100644
index 0000000..1214b86
--- /dev/null
+++ b/tools/pos-and-lemmatisation/tadpole.json
@@ -0,0 +1,17 @@
+{
+ "Name": "Tadpole",
+ "URL": "http://hdl.handle.net/11372/LRT-1293",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "An integrated tokenizer, tagger-lemmatizer, morphological analyzer, and dependency parser for Dutch.",
+ "Functionality": ["PoS/MSD", "lemma", "syntactic parsing"],
+ "Language": ["nld"],
+ "Licence": "",
+ "Platform": [],
+ "Infrastructure": "LINDAT/CLARIAH-CZ",
+ "Group": "For a single language",
+ "Input format": [],
+ "Output format": [],
+ "Access": {
+ },
+ "Publication": ""
+}
diff --git a/tools/pos-and-lemmatisation/tagger-ws.json b/tools/pos-and-lemmatisation/tagger-ws.json
new file mode 100644
index 0000000..2280a48
--- /dev/null
+++ b/tools/pos-and-lemmatisation/tagger-ws.json
@@ -0,0 +1,18 @@
+{
+ "Name": "Tagger WS",
+ "URL": "http://hdl.handle.net/11321/30",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool uses the NKJP tagset and implements the Morfeusz SGJP dictionary. The service is based on WCRFT.",
+ "Functionality": ["MSD", "lemma"],
+ "Language": ["pol"],
+ "Licence": "",
+ "Platform": [],
+ "Infrastructure": "CLARIN-PL",
+ "Group": "For a single language",
+ "Input format": ["plain text", "XML"],
+ "Output format": ["plain text", "XML"],
+ "Access": {
+ "Web application": "http://clarin-pl.eu/synat/ws/tagger/"
+ },
+ "Publication": ""
+}
diff --git a/tools/pos-and-lemmatisation/takipi.json b/tools/pos-and-lemmatisation/takipi.json
new file mode 100644
index 0000000..f31636a
--- /dev/null
+++ b/tools/pos-and-lemmatisation/takipi.json
@@ -0,0 +1,18 @@
+{
+ "Name": "TaKIPI",
+ "URL": "http://hdl.handle.net/11321/31",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool assumes the morpho-syntactic description of the IPI PAN corpus tagset (Przepiórkowski 2005).",
+ "Functionality": ["MSD"],
+ "Language": ["pol"],
+ "Licence": "",
+ "Platform": [],
+ "Infrastructure": "CLARIN-PL",
+ "Group": "For a single language",
+ "Input format": [],
+ "Output format": [],
+ "Access": {
+ "Download": ""
+ },
+ "Publication": ["Przepiórkowski (2005)", "Piasecki (2007)"]
+}
diff --git a/tools/pos-and-lemmatisation/template.json b/tools/pos-and-lemmatisation/template.json
new file mode 100644
index 0000000..bf6c5d7
--- /dev/null
+++ b/tools/pos-and-lemmatisation/template.json
@@ -0,0 +1,18 @@
+{
+ "Name": "",
+ "URL": "",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "",
+ "Functionality": [],
+ "Language": [],
+ "Licence": "",
+ "Platform": [],
+ "Infrastructure": "",
+ "Group": "For a single language",
+ "Input format": [],
+ "Output format": [],
+ "Access": {
+ "Download": ""
+ },
+ "Publication": ""
+}
diff --git a/tools/pos-and-lemmatisation/treetagger.json b/tools/pos-and-lemmatisation/treetagger.json
new file mode 100644
index 0000000..ff518c8
--- /dev/null
+++ b/tools/pos-and-lemmatisation/treetagger.json
@@ -0,0 +1,19 @@
+{
+ "Name": "TreeTagger",
+ "URL": "http://hdl.handle.net/11022/1007-0000-0000-8E4D-B",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool is a PoS tagger and lemmatizer implemented in WebLicht.",
+ "Functionality": ["PoS", "lemma"],
+ "Language": ["deu", "eng", "fra", "ita", "nld", "spa", "bul", "rus", "ell", "por", "zho", "swh", "lat", "est", "fro"],
+ "Licence": "free but unspecified",
+ "Platform": [],
+ "Infrastructure": "CLARIN-D",
+ "Group": "For multiple languages",
+ "Input format": [],
+ "Output format": ["plain text"],
+ "Access": {
+ "Download": "http://hdl.handle.net/11022/1007-0000-0000-8E4D-B",
+ "WebLicht": "https://weblicht.sfs.uni-tuebingen.de/weblichtwiki/index.php/Main_Page"
+ },
+ "Publication": "Schmid (1999)"
+}
diff --git a/tools/pos-and-lemmatisation/turku-neural.json b/tools/pos-and-lemmatisation/turku-neural.json
new file mode 100644
index 0000000..75aaa2c
--- /dev/null
+++ b/tools/pos-and-lemmatisation/turku-neural.json
@@ -0,0 +1,19 @@
+{
+ "Name": "Turku-neural-parser-pipeline",
+ "URL": "https://turkunlp.org/Turku-neural-parser-pipeline/",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "A neural parsing pipeline for segmentation, morphological tagging, dependency parsing and lemmatization with pre-trained models for more than 50 languages. Top ranker in the CoNLL-18 Shared Task.",
+ "Functionality": ["segmentation", "MSD", "syntactic parsing", "lemma"],
+ "Language": ["More than 50 languages"],
+ "Licence": "Apache License 2.0",
+ "Platform": [],
+ "Infrastructure": "FIN-CLARIN",
+ "Group": "For multiple languages",
+ "Input format": ["utf-8 encoded plain text"],
+ "Output format": ["CoNLL-U"],
+ "Access": {
+ "Download": "https://github.com/TurkuNLP/Turku-neural-parser-pipeline",
+ "Web application": "http://bionlp-www.utu.fi/parser_demo"
+ },
+ "Publication": "Kanerva et al. (2018)"
+}
diff --git a/tools/pos-and-lemmatisation/udpipe.json b/tools/pos-and-lemmatisation/udpipe.json
new file mode 100644
index 0000000..0dcc107
--- /dev/null
+++ b/tools/pos-and-lemmatisation/udpipe.json
@@ -0,0 +1,19 @@
+{
+ "Name": "UDPipe",
+ "URL": "http://hdl.handle.net/11234/1-1702",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool is a trainable pipeline for annotating CoNLL-U files. UDPipe is language-agnostic and can be trained given annotated data in the CoNLL-U format. Trained models are provided for nearly all Universal Dependency treebanks.",
+ "Functionality": ["PoS", "lemma", "syntactic parsing"],
+ "Language": ["Language independent"],
+ "Licence": "Mozilla Public Licence 2.0 (software); CC BY-NC-SA UD (models)",
+ "Platform": [],
+ "Infrastructure": "LINDAT",
+ "Group": "For multiple languages",
+ "Input format": ["plain text"],
+ "Output format": ["CoNLL-U"],
+ "Access": {
+ "Download": "http://github.com/ufal/udpipe/releases/latest",
+ "Web application": "http://lindat.mff.cuni.cz/services/udpipe/api-reference.php"
+ },
+ "Publication": "Straka and Straková (2017)"
+}
diff --git a/tools/pos-and-lemmatisation/vabamorf.json b/tools/pos-and-lemmatisation/vabamorf.json
new file mode 100644
index 0000000..15f2ca4
--- /dev/null
+++ b/tools/pos-and-lemmatisation/vabamorf.json
@@ -0,0 +1,19 @@
+{
+ "Name": "Vabamorf open source morphology tagger for Estonian",
+ "URL": "https://github.com/Filosoft/vabamorf",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool performs various tasks of morphological analysis, including morphological disambiguation and synthesis.",
+ "Functionality": ["PoS", "MSD", "lemma"],
+ "Language": ["est"],
+ "Licence": "Available - Unrestricted Use",
+ "Platform": [],
+ "Infrastructure": "CELR",
+ "Group": "For a single language",
+ "Input format": ["plain text"],
+ "Output format": ["plain text"],
+ "Access": {
+ "Download": "https://github.com/Filosoft/vabamorf",
+ "Web application": "http://www.filosoft.ee/html_morf_et/"
+ },
+ "Publication": "Kaalep (2015)"
+}
diff --git a/tools/pos-and-lemmatisation/wcrft.json b/tools/pos-and-lemmatisation/wcrft.json
new file mode 100644
index 0000000..028c2ce
--- /dev/null
+++ b/tools/pos-and-lemmatisation/wcrft.json
@@ -0,0 +1,18 @@
+{
+ "Name": "WCRFT (Wrocław CRF Tagger)",
+ "URL": "http://hdl.handle.net/11321/35",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool combines tiered tagging, conditional random fields (CRF) and features tailored for inflective languages written in WCCL. The algorithm and code are inspired by Wrocław Memory-Based Tagger (WMBT).",
+ "Functionality": ["MSD"],
+ "Language": ["pol"],
+ "Licence": "GNU LGPL 3.0",
+ "Platform": [],
+ "Infrastructure": "CLARIN-PL",
+ "Group": "For a single language",
+ "Input format": [],
+ "Output format": [],
+ "Access": {
+ "Download": "http://nlp.pwr.wroc.pl/redmine/projects/wcrft/wiki/"
+ },
+ "Publication": "Radziszewski (2013)"
+}
diff --git a/tools/pos-and-lemmatisation/weblicht-pos.json b/tools/pos-and-lemmatisation/weblicht-pos.json
new file mode 100644
index 0000000..8725b21
--- /dev/null
+++ b/tools/pos-and-lemmatisation/weblicht-pos.json
@@ -0,0 +1,19 @@
+{
+ "Name": "Weblicht Part-of-Speech Tagger",
+ "URL": "http://hdl.handle.net/21.11120/0000-0003-7D95-9",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool is a PoS tagger and lemmatizer implemented in WebLicht.",
+ "Functionality": ["PoS", "lemma"],
+ "Language": ["deu"],
+ "Licence": "",
+ "Platform": [],
+ "Infrastructure": "CLARIN-D",
+ "Group": "For a single language",
+ "Input format": ["TCF", "XML"],
+ "Output format": [],
+ "Access": {
+ "Web application": "http://zwei.dwds.de/d/suche#stts",
+ "WebLicht": "https://weblicht.sfs.uni-tuebingen.de/weblichtwiki/index.php/Main_Page"
+ },
+ "Publication": ""
+}
diff --git a/tools/pos-and-lemmatisation/wmbt.json b/tools/pos-and-lemmatisation/wmbt.json
new file mode 100644
index 0000000..3c59b6d
--- /dev/null
+++ b/tools/pos-and-lemmatisation/wmbt.json
@@ -0,0 +1,18 @@
+{
+ "Name": "WMBT (Wrocław Memory-Based Tagger)",
+ "URL": "http://hdl.handle.net/11321/26",
+ "Family": "Part-of-Speech Tagging and Lemmatisation",
+ "Description": "This tool uses the TiMBL API as the underlying memory-based learning implementation. The features for classification are generated by using the WCCL formalism. The tool uses a tiered tagging approach. Grammatical class is disambiguated first, then subsequent attributes (as defined in a config file) are taken care of. Each attribute may be supplied a different set of features. The software package comes with default configurations for KIPI/IPIC and NKJP tagsets.",
+ "Functionality": ["MSD"],
+ "Language": ["pol"],
+ "Licence": "GNU LGPL 3.0",
+ "Platform": [],
+ "Infrastructure": "CLARIN-PL",
+ "Group": "For a single language",
+ "Input format": ["various, default is XCES XML"],
+ "Output format": ["various, default is XCES XML"],
+ "Access": {
+ "Download": "http://nlp.pwr.wroc.pl/redmine/projects/wmbt/wiki"
+ },
+ "Publication": ""
+}