first commit

audiolabs · Jan 23, 2019 · fde0836 · fde0836
commit fde0836
Show file tree

Hide file tree

Showing 22 changed files with 649 additions and 0 deletions.
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,36 @@
+Software License for the WoS Software
+
+Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V., 2019
+
+1. INTRODUCTION
+
+The WoS software tools are tools to conduct Wizard of Oz tests with speech assistants. Patent licenses may be required for using the WoS software and it is in your responsibility to obtain those if necessary.
+
+2. COPYRIGHT LICENSE
+
+Redistribution and use in source and binary forms, with or without modification, are permitted without payment of copyright license fees provided that you satisfy the following conditions:
+
+You must retain the complete text of this software license in redistributions of the WoS software or your modifications thereto in source code form.
+
+You must retain the complete text of this software license in the documentation and/or other materials provided with redistributions of the WoS software or your modifications thereto in binary form.
+
+Except as contained in this notice, the name of the Fraunhofer shall not be used in advertising or otherwise to promote the sale, use or other dealings in this Software without prior written authorization from Fraunhofer.
+
+3. NO PATENT LICENSE
+
+NO EXPRESS OR IMPLIED LICENSES TO ANY PATENT CLAIMS, including without limitation the patents of Fraunhofer, ARE GRANTED BY THIS SOFTWARE LICENSE. Fraunhofer provides no warranty of patent non-infringement with respect to this software.
+You may use this WoS software or modifications thereto only for purposes that are authorized by appropriate patent licenses.
+
+4. DISCLAIMER
+
+This WoS software is provided by Fraunhofer on behalf of the copyright holders and contributors "AS IS" and WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, including but not limited to the implied warranties of merchantability and fitness for a particular purpose. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE for any direct, indirect, incidental, special, exemplary, or consequential damages, including but not limited to procurement of substitute goods or services; loss of use, data, or profits, or business interruption, however caused and on any theory of liability, whether in contract, strict liability, or tort (including negligence), arising in any way out of the use of this software, even if advised of the possibility of such damage.
+
+5. CONTACT INFORMATION
+
+Fraunhofer Institute for Integrated Circuits IIS
+Attention: Audio and Multimedia Departments – WoS software
+Am Wolfsmantel 33
+91058 Erlangen, Germany
+
+http://www.audiolabs-erlangen.de/fraunhofer
+[email protected]
diff --git a/README.md b/README.md
@@ -0,0 +1,68 @@
+# WoS
+
+## Overview
+
+An implementation of the Wizard of Oz experiment for speech systems (WoS).
+
+![Image](/wos-diagram.png)
+
+## Frontstage Installation (tested on Raspbian Stretch Lite 2018-06-27)
+
+Configure the Raspberry Pi:
+
+```
+$ sudo raspi-config
+```
+
+- go to Network Options -> Wi-fi and enter the wifi settings
+- go to Interfacing Options -> SSH -> Yes
+- select finish
+
+Install the server software:
+
+```
+$ sudo apt-get install ffmpeg mpg321 git python-pip
+$ cd /home/pi
+$ git clone https://github.com/audiolabs/wos
+$ cd wos/srv
+$ pip install -r requirements.txt
+```
+
+To start the server automatically when the Raspberry Pi boots, add this line above "exit 0" in /etc/rc.local:
+
+```
+su - pi -c "bash /home/pi/wos/bin/start.sh" &
+```
+
+## Backstage Installation 
+
+### Linux (tested on Ubuntu 16.04)
+```
+$ sudo apt-get install ffmpeg nodejs npm
+$ git clone https://github.com/audiolabs/wos
+$ cd wos/web
+$ npm install
+```
+
+### Mac (tested on macOS Sierra 10.12.6, requires homebrew)
+```
+$ brew install nodejs npm
+$ brew install --with-ffplay ffmpeg
+$ git clone https://github.com/audiolabs/wos
+$ cd wos/web
+$ npm install
+```
+
+## Usage
+
+If the above instructions were followed, the server will be started next time the Raspberry Pi boots, alternatively you can start the server manually:
+```
+$ cd wos/srv
+$ python server.py
+```
+
+Start the client UI on another machine by providing the server IP, for example:
+```
+$ cd wos/web
+$ HOST=192.168.0.120 npm start
+```
diff --git a/bin/start.sh b/bin/start.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+cd /home/pi/wos/srv
+
+while [ 1 ]; do
+
+	python server.py &>>/tmp/wos.log
+	sleep 3
+
+done
diff --git a/srv/etc/config.json b/srv/etc/config.json
@@ -0,0 +1 @@
+{"device_in":"hw:0","device_out":"hw:0","tts":"gtts"}
diff --git a/srv/requirements.txt b/srv/requirements.txt
@@ -0,0 +1,2 @@
+tornado
+gtts
diff --git a/srv/res/silence_500.mp3 b/srv/res/silence_500.mp3
diff --git a/srv/server.py b/srv/server.py
@@ -0,0 +1,116 @@
+from __future__ import print_function
+
+import json
+import tornado.ioloop
+import tornado.web
+import uuid
+import shlex
+import subprocess
+import sys
+import time
+from speak import speak
+
+APP = 'WoS'
+PORT = 9500
+
+config = {}
+stream_process = None
+media_process = None
+
+class ApiV1SpeakHandler(tornado.web.RequestHandler):
+
+    def post(self):
+        global config
+        print("[%s] synthesizing ..." % (APP), file=sys.stderr)
+        msg = tornado.escape.json_decode(self.request.body)
+        device_out = "hw:0"
+        if 'device_out' in config:
+            device_out = config['device_out']
+        speak(msg['text'], lang=msg['lang'], device=device_out,
+            tts=config['tts'])
+        self.set_status(200)
+        self.finish({ "ok": True })
+
+class ApiV1MediaPlayHandler(tornado.web.RequestHandler):
+
+    def post(self):
+        global config
+        global media_process
+
+        msg = tornado.escape.json_decode(self.request.body)
+        f = msg['file']
+
+        print("[%s] playing mp3: %s ..." % (APP, f), file=sys.stderr)
+
+        if media_process:
+            media_process.terminate()
+
+        device_out = "hw:0"
+        if 'device_out' in config:
+            device_out = config['device_out']
+        args = shlex.split("mpg321 -a " + device_out + " --stereo " + f)
+        media_process = subprocess.Popen(args)
+
+        self.set_status(200)
+        self.finish({ "ok": True })
+
+class ApiV1MediaStopHandler(tornado.web.RequestHandler):
+
+    def post(self):
+        global media_process
+
+        print("[%s] stopping mp3 ..." % (APP), file=sys.stderr)
+
+        if media_process:
+            media_process.terminate()
+
+        self.set_status(200)
+        self.finish({ "ok": True })
+
+class ApiV1StreamHandler(tornado.web.RequestHandler):
+
+    def post(self):
+        global stream_process
+        global config
+
+        print("[%s] starting stream ..." % (APP), file=sys.stderr)
+
+        ip = self.request.remote_ip
+
+        if stream_process:
+            stream_process.terminate()
+            time.sleep(3)
+
+        device_in = "hw:0"
+
+        if 'device_in' in config:
+            device_in = config['device_in']
+
+        args = shlex.split("ffmpeg -re -f alsa -ac 1 -i " + device_in +
+            " -acodec libmp3lame " +
+            "-f rtp rtp://" + ip + ":1234")
+        stream_process = subprocess.Popen(args)
+
+        self.set_status(200)
+        self.finish({ "ok": True, "ip": ip })
+
+def make_app():
+    return tornado.web.Application([
+        (r"/api/v1/speak", ApiV1SpeakHandler),
+        (r"/api/v1/stream", ApiV1StreamHandler),
+        (r"/api/v1/media/play", ApiV1MediaPlayHandler),
+        (r"/api/v1/media/stop", ApiV1MediaStopHandler),
+    ])
+
+def main():
+    print("[%s] starting server on port %s" % (APP, PORT),
+        file=sys.stderr)
+    app = make_app()
+    app.listen(PORT)
+    tornado.ioloop.IOLoop.current().start()
+
+if __name__ == "__main__":
+    with open('etc/config.json') as f:
+        config = json.load(f)
+    print("[%s] config: %s" % (APP, config), file=sys.stderr)
+    main()
diff --git a/srv/speak.py b/srv/speak.py
@@ -0,0 +1,18 @@
+import tempfile
+import io
+import os
+
+import speak_festival
+import speak_espeak
+import speak_gtts
+
+def speak(text, lang='en', device='hw:0', tts='festival'):
+
+    if tts == 'festival':
+        speak_festival.speak(text, lang=lang, device=device)
+
+    if tts == 'espeak':
+        speak_espeak.speak(text, lang=lang, device=device)
+
+    if tts == 'gtts':
+        speak_gtts.speak(text, lang=lang, device=device)
diff --git a/srv/speak_espeak.py b/srv/speak_espeak.py
@@ -0,0 +1,12 @@
+import tempfile
+import os
+
+def speak(text, lang='en', device='hw:0'):
+
+    fd, file_txt = tempfile.mkstemp()
+    with os.fdopen(fd, 'w') as f:
+        f.write(text)
+        f.close()
+
+    os.system("espeak -f %s -v %s" % (file_txt, lang))
+    os.remove(file_txt)
diff --git a/srv/speak_festival.py b/srv/speak_festival.py
@@ -0,0 +1,12 @@
+import tempfile
+import os
+
+def speak(text, lang='en', device='hw:0'):
+
+    fd, file_txt = tempfile.mkstemp()
+    with os.fdopen(fd, 'w') as f:
+        f.write(text)
+        f.close()
+
+    os.system("festival --tts " + file_txt)
+    os.remove(file_txt)
diff --git a/srv/speak_gtts.py b/srv/speak_gtts.py
@@ -0,0 +1,29 @@
+import io
+import os
+from gtts import gTTS
+
+SILENCE_FILE = "res/silence_500.mp3"
+
+def speak(text, lang='en', device='hw:0'):
+
+    if os.path.isdir('/run/user/1000'):
+        audio_mp3 = '/run/user/1000/audio.mp3'
+        audio_tmp_mp3 = '/run/user/1000/audio_tmp.mp3'
+        audio_out_mp3 = '/run/user/1000/audio_out.mp3'
+    else:
+        audio_mp3 = '/tmp/audio.mp3'
+        audio_tmp_mp3 = '/tmp/audio_tmp.mp3'
+        audio_out_mp3 = '/tmp/audio_out.mp3'
+
+    gtts = gTTS(text=text, lang=lang, lang_check=False)
+    gtts.save(audio_mp3)
+
+    os.system("ffmpeg -y -i " + audio_mp3 + " -ar 48000 " + audio_tmp_mp3)
+    os.system("ffmpeg -y -i " +
+        "concat:\"" + SILENCE_FILE + "|" + audio_tmp_mp3 + "\" -codec copy " +
+        audio_out_mp3)
+    os.system("mpg321 -a " + device + " --stereo " + audio_out_mp3)
+
+    os.remove(audio_mp3)
+    os.remove(audio_out_mp3)
+    os.remove(audio_tmp_mp3)
diff --git a/srv/speak_test.py b/srv/speak_test.py
@@ -0,0 +1,6 @@
+from speak import speak
+
+#speak("wie spat ist es", device="hw:1", lang="de")
+#speak("this is a test", device="hw:0", lang="en")
+speak("this is a test", device="hw:0", lang="en", tts='espeak')
+
diff --git a/web/css/wos.css b/web/css/wos.css
@@ -0,0 +1,19 @@
+body {
+	font-family: sans-serif;
+}
+
+input, select {
+	padding: 5px;
+}
+
+td, th { text-align: left; vertical-align: top; padding: 3px; }
+tr { padding: 5px; }
+th { background-color: #f0f0f0; }
+
+div {
+	margin-top: 10px;
+	margin-bottom: 10px;
+}
+
+#visbox #visualization { height: 64px; position: relative; }
+#visbox #visualization div { background-color: #009374; display: inline-block; position: absolute; bottom: 0px; width: 2.5%; }
diff --git a/web/index.html b/web/index.html
@@ -0,0 +1,34 @@
+<!DOCTYPE html>
+<html>
+  <head>
+    <meta charset="UTF-8">
+    <title>WoS UI</title>
+    <link rel="stylesheet" href="css/wos.css">
+  </head>
+  <body>
+
+    <input type="text" id="txt_text" size="48" maxlength="256">
+    <select id="sel_lang">
+      <option value="en">English</option>
+      <option value="de">German</option>
+    </select>
+    <input type="button" id="btn_speak" value="Speak/Play">
+    <input type="button" id="btn_stop" value="Stop">
+
+    <div id="status"></div>
+
+    Responses:<br/>
+    <select id="sel_response" size="10"></select>
+    <br/><br/>
+
+    Media:<br/>
+    <select id="sel_media" size="10"></select>
+    <br/><br/>
+
+    <script>
+      window.$ = window.jQuery = require('./js/jquery-3.3.1.min.js')
+      require('./js/wos.js')
+    </script>
+
+  </body>
+</html>
diff --git a/web/js/jquery-3.3.1.min.js b/web/js/jquery-3.3.1.min.js
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"device_in":"hw:0","device_out":"hw:0","tts":"gtts"}