merge

lt3 · Jul 1, 2021 · 2c9b782 · 2c9b782
1 parent 3a15b74
commit 2c9b782
Show file tree

Hide file tree

Showing 16 changed files with 2,168 additions and 14 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,142 @@
+Pipfile*
+TODO.rst
+dummy.*
+examples/*.txt
+data/
+sent2vec/
+polyglot/
+
+# .idea (JetBrains)
+.idea/
+.idea/**/*
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+.DS_Store
diff --git a/CITATION b/CITATION
@@ -11,3 +11,17 @@
     ABSTRACT = {We identify a number of aspects that can boost the performance of Neural Fuzzy Repair (NFR), an easy-to-implement method to integrate translation memory matches and neural machine translation (NMT). We explore various ways of maximising the added value of retrieved matches within the NFR paradigm for eight language combinations, using Transformer NMT systems. In particular, we test the impact of different fuzzy matching techniques, sub-word-level segmentation methods and alignment-based features on overall translation quality. Furthermore, we propose a fuzzy match combination technique that aims to maximise the coverage of source words. This is supplemented with an analysis of how translation quality is affected by input sentence length and fuzzy match score. The results show that applying a combination of the tested modifications leads to a significant increase in estimated translation quality over all baselines for all language combinations.},
     DOI = {10.3390/informatics8010007}
 }
+
+@inproceedings{bulte2019neural,
+    AUTHOR = {Bulte, Bram  and Tezcan, Arda},
+    TITLE = {Neural Fuzzy Repair: Integrating Fuzzy Matches into Neural Machine Translation},
+    BOOKTITLE = {Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics},
+    MONTH = jul,
+    YEAR = {2019},
+    ADDRESS = {Florence, Italy},
+    PUBLISHER = {Association for Computational Linguistics},
+    URL = {https://www.aclweb.org/anthology/P19-1175},
+    PAGES = {1800--1809},
+    ABSTRACT = {We present a simple yet powerful data augmentation method for boosting Neural Machine Translation (NMT) performance by leveraging information retrieved from a Translation Memory (TM). We propose and test two methods for augmenting NMT training data with fuzzy TM matches. Tests on the DGT-TM data set for two language pairs show consistent and substantial improvements over a range of baseline systems. The results suggest that this method is promising for any translation environment in which a sizeable TM is available and a certain amount of repetition across translations is to be expected, especially considering its ease of implementation.},
+    DOI = {10.18653/v1/P19-1175},
+}
diff --git a/LICENSE b/LICENSE
@@ -175,18 +175,7 @@
 
    END OF TERMS AND CONDITIONS
 
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright [yyyy] [name of copyright owner]
+   Copyright 2020 TEZCAN Arda, BULTE Bram, VANROY Bram
 
    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.

diff --git a/MANIFEST.in b/MANIFEST.in
@@ -0,0 +1 @@
+include CITATION
diff --git a/Makefile b/Makefile
@@ -0,0 +1,10 @@
+# Format source code automatically
+style:
+	black --line-length 119 --target-version py36 nfr
+	isort nfr
+
+# Control quality
+quality:
+	black --check --line-length 119 --target-version py36 nfr
+	isort --check-only nfr
+	flake8 nfr --exclude __pycache__,__init__.py