Initial commit

XueWei · May 15, 2014 · f81da75 · f81da75
commit f81da75
Show file tree

Hide file tree

Showing 10 changed files with 334 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,53 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.cache
+nosetests.xml
+coverage.xml
+
+# Translations
+*.mo
+
+# Mr Developer
+.mr.developer.cfg
+.project
+.pydevproject
+
+# Rope
+.ropeproject
+
+# Django stuff:
+*.log
+*.pot
+
+# Sphinx documentation
+docs/_build/
+
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2014 Pierre Nicolas Durette
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
@@ -0,0 +1,80 @@
+gTTS
+====
+
+*gTTS* (Google Text to Speech): a Python interface to Google's Text to Speech API. Create an mp3 file with the `gTTS` module or `tts-cli.py` command line utility.
+
+Module
+------
+
+Initialize a new `GoogleTTS`:
+
+    >> from gtts import gTTS
+    >> tts = gTTS(text='Hello', lang='en')
+    >> tts.save("hello.mp3")
+
+**Parameters**:
+
+  *  `text` is the text to speak to file;
+  *  `lang` is the language to speak in. A ISO 639-1 language code supported by the Google Text to Speech API.
+
+Command line utility
+--------------------
+Invoke `tts-cli.py`:
+
+    tts-cli.py --help
+    usage: tts-cli.py [-h] (-t TEXT | -f FILE) [-l LANG] [--debug] destination
+
+(Ex.) Read the string 'Hello' in English:
+
+    $ tts-cli.py -t "Hello" -l 'en' hello.mp3
+
+(Ex.) Read the contents of file 'hello.txt' in Czech:
+
+    $ tts-cli.py -f hello.txt -l 'cs' hello.mp3
+
+Supported Languages
+-------------------
+
+  * 'af' : 'Afrikaans'
+  * 'sq' : 'Albanian'
+  * 'ar' : 'Arabic'
+  * 'hy' : 'Armenian'
+  * 'ca' : 'Catalan'
+  * 'zh-CN' : 'Mandarin (simplified)'
+  * 'zh-TW' : 'Mandarin (traditional)'
+  * 'hr' : 'Croatian'
+  * 'cs' : 'Czech'
+  * 'da' : 'Danish'
+  * 'nl' : 'Dutch'
+  * 'en' : 'English'
+  * 'eo' : 'Esperanto'
+  * 'fi' : 'Finnish'
+  * 'fr' : 'French'
+  * 'de' : 'German'
+  * 'el' : 'Greek'
+  * 'ht' : 'Haitian Creole'
+  * 'hi' : 'Hindi'
+  * 'hu' : 'Hungarian'
+  * 'is' : 'Icelandic'
+  * 'id' : 'Indonesian'
+  * 'it' : 'Italian'
+  * 'ja' : 'Japanese'
+  * 'ko' : 'Korean'
+  * 'la' : 'Latin'
+  * 'lv' : 'Latvian'
+  * 'mk' : 'Macedonian'
+  * 'no' : 'Norwegian'
+  * 'pl' : 'Polish'
+  * 'pt' : 'Portuguese'
+  * 'ro' : 'Romanian'
+  * 'ru' : 'Russian'
+  * 'sr' : 'Serbian'
+  * 'sk' : 'Slovak'
+  * 'es' : 'Spanish'
+  * 'sw' : 'Swahili'
+  * 'sv' : 'Swedish'
+  * 'ta' : 'Tamil'
+  * 'th' : 'Thai'
+  * 'tr' : 'Turkish'
+  * 'vi' : 'Vietnamese'
+  * 'cy' : 'Welsh'
diff --git a/README.txt b/README.txt
@@ -0,0 +1 @@
+README.md
diff --git a/bin/gtts-cli b/bin/gtts-cli
@@ -0,0 +1,4 @@
+#!/bin/bash
+DIR="$( cd "$( dirname "$0" )" && pwd )"
+PYTHON=$(which python)
+exec $PYTHON $DIR/gtts-cli.py "$@" 
diff --git a/bin/gtts-cli.py b/bin/gtts-cli.py
@@ -0,0 +1,28 @@
+#! /usr/bin/python
+
+from gtts import gTTS
+import argparse
+
+# Args
+desc = "Creates an mp3 file from spoken text via the Google Text-to-Speech API"
+parser = argparse.ArgumentParser(description=desc)
+text_group = parser.add_mutually_exclusive_group(required=True)
+text_group.add_argument('-t', '--text', help="text to speak")
+text_group.add_argument('-f', '--file', help="file to speak")
+args = parser.add_argument("destination", help="destination mp3 file", action='store')
+args = parser.add_argument('-l', '--lang', default='en', help="ISO 639-1 language code to speak in: " + str(gTTS.LANGUAGES))
+args = parser.add_argument('--debug', default=False, action="store_true")
+args = parser.parse_args()
+
+try:
+    if args.text:
+        text = args.text
+    else:
+        with open(args.file, "r") as f:
+            text = f.read()
+
+    # TTSTF (Text to Speech to File)
+    tts = gTTS(text=text, lang=args.lang, debug=args.debug)
+    tts.save(args.destination)
+except Exception, e:
+    print(str(e))
diff --git a/gtts/__init__.py b/gtts/__init__.py
@@ -0,0 +1,2 @@
+from .version import __version__
+from .tts import gTTS
diff --git a/gtts/tts.py b/gtts/tts.py
@@ -0,0 +1,122 @@
+# -*- coding: utf-8 -*-
+import re, requests
+
+class gTTS:
+    """ gTTS (Google Text to Speech): an interface to Google's Text to Speech API """
+
+    GOOGLE_TTS_URL = 'http://translate.google.com/translate_tts'
+    MAX_CHARS = 100 # Max characters the Google TTS API takes at a time
+    LANGUAGES = {
+        'af' : 'Afrikaans',
+        'sq' : 'Albanian',
+        'ar' : 'Arabic',
+        'hy' : 'Armenian',
+        'ca' : 'Catalan',
+        'zh-CN' : 'Mandarin (simplified)',
+        'zh-TW' : 'Mandarin (traditional)',
+        'hr' : 'Croatian',
+        'cs' : 'Czech',
+        'da' : 'Danish',
+        'nl' : 'Dutch',
+        'en' : 'English',
+        'eo' : 'Esperanto',
+        'fi' : 'Finnish',
+        'fr' : 'French',
+        'de' : 'German',
+        'el' : 'Greek',
+        'ht' : 'Haitian Creole',
+        'hi' : 'Hindi',
+        'hu' : 'Hungarian',
+        'is' : 'Icelandic',
+        'id' : 'Indonesian',
+        'it' : 'Italian',
+        'ja' : 'Japanese',
+        'ko' : 'Korean',
+        'la' : 'Latin',
+        'lv' : 'Latvian',
+        'mk' : 'Macedonian',
+        'no' : 'Norwegian',
+        'pl' : 'Polish',
+        'pt' : 'Portuguese',
+        'ro' : 'Romanian',
+        'ru' : 'Russian',
+        'sr' : 'Serbian',
+        'sk' : 'Slovak',
+        'es' : 'Spanish',
+        'sw' : 'Swahili',
+        'sv' : 'Swedish',
+        'ta' : 'Tamil',
+        'th' : 'Thai',
+        'tr' : 'Turkish',
+        'vi' : 'Vietnamese',
+        'cy' : 'Welsh'
+    }
+
+    def __init__(self, text, lang = 'en', debug = False):
+        self.debug = debug
+        if lang not in self.LANGUAGES:
+            raise Exception('Language not supported: %s' % lang)
+        else:
+            self.lang = lang
+
+        if not text:
+            raise Exception('No text to speak')
+        else:
+            self.text = text
+
+        # Split text in parts
+        if len(text) <= self.MAX_CHARS: 
+            text_parts = [text]
+        else:
+            text_parts = self._tokenize(text, self.MAX_CHARS)           
+
+        # Clean 
+        def strip(x): return x.replace('\n', '').strip()
+        def empty(x): return len(x) > 0
+        text_parts = map(strip, text_parts)
+        text_parts = filter(empty, text_parts)
+        self.text_parts = text_parts
+
+    def save(self, savefile):
+        """ Do the Web request and save to `savefile` """
+        with open(savefile, 'wb') as f:
+            for idx, part in enumerate(self.text_parts):
+                payload = { 'ie' : 'UTF-8',
+                            'tl' : self.lang,
+                            'q' : part,
+                            'total' : len(self.text_parts),
+                            'idx' : idx,
+                            'textlen' : len(part) }
+                if self.debug: print payload
+                try:
+                    r = requests.get(self.GOOGLE_TTS_URL, params=payload)
+                    for chunk in r.iter_content(chunk_size=1024):
+                        f.write(chunk)
+                except Exception, e:
+                    raise
+
+    def _tokenize(self, text, max_size):
+        """ Tokenizer on basic roman punctuation """ 
+
+        punc = "¡!()[]¿?.,;:—«»\n"
+        punc_list = [re.escape(c) for c in punc]
+        pattern = '|'.join(punc_list)
+        parts = re.split(pattern, text)
+
+        min_parts = []
+        for p in parts:
+            min_parts += self._minimize(p, " ", max_size)
+        return min_parts
+
+    def _minimize(self, thestring, delim, max_size):
+        """ Recursive function that splits `thestring` in chunks
+        of maximum `max_size` chars delimited by `delim`. Returns list. """ 
+
+        if len(thestring) > max_size:
+            idx = thestring.rfind(delim, 0, max_size)
+            return [thestring[:idx]] + self._minimize(thestring[idx:], delim, max_size)
+        else:
+            return [thestring]
+
+if __name__ == "__main__":
+        pass
diff --git a/gtts/version.py b/gtts/version.py
@@ -0,0 +1 @@
+__version__ = '1.0'
diff --git a/setup.py b/setup.py
@@ -0,0 +1,22 @@
+try:
+    from setuptools import setup
+except ImportError:
+    from distutils.core import setup
+
+exec(open('gtts/version.py').read())
+
+setup(
+    name='gTTS',
+    version=__version__,
+    author='Pierre Nicolas Durette',
+    author_email='[email protected]',
+    url='https://github.com/pndurette/gTTS',
+    packages=['gtts'],
+    scripts=['bin/gtts-cli', 'bin/gtts-cli.py'],
+    license='MIT',
+    description='Create an mp3 file from spoken text via the Google TTS (Text-to-Speech) API',
+    long_description=open('README.txt').read(),
+    install_requires=[
+        "requests"
+    ]
+)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		from .version import __version__
		from .tts import gTTS