diff --git a/.gitignore b/.gitignore index 5c577a6e..7291c2a3 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,10 @@ .vscode/* !.vscode/c_cpp_properties.json +src/tts_package/resource/* +src/tts_package/resource/config.json +src/tts_package/resource/model.pth +src/tts_package/resource/output.wav build devel diff --git a/README.md b/README.md index b0212d35..2004a7f9 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,7 @@ Joint -> Servo mappings are defined in two files. Configuration file connects se * src/inmoov_description - robot files, which define the robot geometry and configuration for simulation (URDF, SRDF & rviz configuration) * src/robot - robot launch files & servo controller configurations * src/robot_hardware - hardware interface for ros2_controller, communicates with U2D2 via dynamixel workbench + * src/tts_package - Text-to-speech package for finnish speech synthesis ## Servo Table diff --git a/docs/BRINGUP.md b/docs/BRINGUP.md index e05a0aca..5932200d 100644 --- a/docs/BRINGUP.md +++ b/docs/BRINGUP.md @@ -55,6 +55,22 @@ Anyway, you are able to test the face tracking and eye movements like this. **Note: currently, only jaw, eyes, right hand & head pan movement can be simulated** +### Launching text-to-speech service + +Text-to-speech works as a service which can be called from terminal utilizing the ros2 client in package. + +Run the service in a (new) terminal + +```console +ros2 run tts_package service +``` + +Call the service from terminal using client and synthetize speech + +```console +ros2 run tts_package client "Tämä lause syntentisoidaan puheeksi." +``` + ## Bring-up real HW robot ### (0. Test servo communication) @@ -142,6 +158,22 @@ Finally, start the eye movement node in a new terminal window ros2 run eye_movement eye_movement_node ``` +### 5. Launching text-to-speech service + +Text-to-speech works as a service which can be called from terminal utilizing the ros2 client in package. + +Run the service in a (new) terminal + +```console +ros2 run tts_package service +``` + +Call the service from terminal using client and synthetize speech + +```console +ros2 run tts_package client "Tämä lause syntentisoidaan puheeksi." +``` + **Todo: simplify bring up process (add the starting of the controllers to the launch file)** ## Sending action goals manually diff --git a/src/tts_msgs/CMakeLists.txt b/src/tts_msgs/CMakeLists.txt new file mode 100644 index 00000000..51b8b3a5 --- /dev/null +++ b/src/tts_msgs/CMakeLists.txt @@ -0,0 +1,40 @@ +cmake_minimum_required(VERSION 3.5) +project(tts_msgs) + +# Default to C99 +if(NOT CMAKE_C_STANDARD) + set(CMAKE_C_STANDARD 99) +endif() + +# Default to C++14 +if(NOT CMAKE_CXX_STANDARD) + set(CMAKE_CXX_STANDARD 14) +endif() + +if(CMAKE_COMPILER_IS_GNUCXX OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") + add_compile_options(-Wall -Wextra -Wpedantic) +endif() + +# find dependencies +find_package(ament_cmake REQUIRED) +# uncomment the following section in order to fill in +# further dependencies manually. +# find_package( REQUIRED) +find_package(rosidl_default_generators REQUIRED) + +rosidl_generate_interfaces(${PROJECT_NAME} + "srv/StringToWav.srv" +) + +if(BUILD_TESTING) + find_package(ament_lint_auto REQUIRED) + # the following line skips the linter which checks for copyrights + # uncomment the line when a copyright and license is not present in all source files + #set(ament_cmake_copyright_FOUND TRUE) + # the following line skips cpplint (only works in a git repo) + # uncomment the line when this package is not in a git repo + #set(ament_cmake_cpplint_FOUND TRUE) + ament_lint_auto_find_test_dependencies() +endif() + +ament_package() diff --git a/src/tts_msgs/package.xml b/src/tts_msgs/package.xml new file mode 100644 index 00000000..85867a7f --- /dev/null +++ b/src/tts_msgs/package.xml @@ -0,0 +1,23 @@ + + + + tts_msgs + 1.0.0 + Interface for tts_package + Konsta Laurila + TODO: License declaration + + ament_cmake + + ament_lint_auto + ament_lint_common + + geometry_msgs + rosidl_default_generators + rosidl_default_runtime + rosidl_interface_packages + + + ament_cmake + + diff --git a/src/tts_msgs/srv/StringToWav.srv b/src/tts_msgs/srv/StringToWav.srv new file mode 100644 index 00000000..cfec3c56 --- /dev/null +++ b/src/tts_msgs/srv/StringToWav.srv @@ -0,0 +1,3 @@ +string data +--- +bool success \ No newline at end of file diff --git a/src/tts_package/README.md b/src/tts_package/README.md new file mode 100644 index 00000000..318c5065 --- /dev/null +++ b/src/tts_package/README.md @@ -0,0 +1,34 @@ +This package contains service and client for finnish text-to-speech feature. Service will automatically play synthesized speech when called with wanted sentence as an argument. + +# Usage + +## Before +Check that model.pth and config.json are located in src/tts_package/resource/ folder. These should be downloaded and installed automatically when installing environment with vagrant. Scripts located in /vagrant-scripts/bootstrap.sh. + +## Dependencies + +* `TTS` +* `espeak-ng` +* `simpleaudio` + +These are included in the newest version of the vagrantfile. If these are not installed during bootstrap, they need to be installed to VM before starting the service. + +Install TTS +> pip install TTS
+ +And install espeak +> apt -y install espeak + +## Run TTS service +> ros2 run tts_package service + + +## Using the service +Service can be used by calling client with terminal, giving sentences as an argument. Note that sentences should be inside quotes and in finnish. +> ros2 run tts_package client "Hei. Tässä on lause joka syntentisoidaan puheeksi." + +Service will now try to synthentize sentence into .wav file located in 'src/tts_package/resource/output.wav' which will then be played automatically. + +## Potential future improvements + +* Implement this feature to work with potential speech-to-text and chatbot features. diff --git a/src/tts_package/package.xml b/src/tts_package/package.xml new file mode 100644 index 00000000..fef5137e --- /dev/null +++ b/src/tts_package/package.xml @@ -0,0 +1,23 @@ + + + + tts_package + 1.0.0 + Text-to-speech pacakge for synthetizing speech. + Konsta Laurila + TODO: License declaration + + python3-TTS + python3-simpleaudio + + tts_msgs + + ament_copyright + ament_flake8 + ament_pep257 + python3-pytest + + + ament_python + + diff --git a/src/tts_package/resource/tts_package b/src/tts_package/resource/tts_package new file mode 100644 index 00000000..e69de29b diff --git a/src/tts_package/setup.cfg b/src/tts_package/setup.cfg new file mode 100644 index 00000000..5fd11624 --- /dev/null +++ b/src/tts_package/setup.cfg @@ -0,0 +1,4 @@ +[develop] +script-dir=$base/lib/tts_package +[install] +install-scripts=$base/lib/tts_package diff --git a/src/tts_package/setup.py b/src/tts_package/setup.py new file mode 100644 index 00000000..953c11ab --- /dev/null +++ b/src/tts_package/setup.py @@ -0,0 +1,27 @@ +from setuptools import setup + +package_name = 'tts_package' + +setup( + name=package_name, + version='0.0.0', + packages=[package_name], + data_files=[ + ('share/ament_index/resource_index/packages', + ['resource/' + package_name]), + ('share/' + package_name, ['package.xml']), + ], + install_requires=['setuptools'], + zip_safe=True, + maintainer='vagrant', + maintainer_email='vagrant@todo.todo', + description='TODO: Package description', + license='TODO: License declaration', + tests_require=['pytest'], + entry_points={ + 'console_scripts': [ + 'service = tts_package.tts_node:main', + 'client = tts_package.tts_member_function:main', + ], + }, +) diff --git a/src/tts_package/test/test_copyright.py b/src/tts_package/test/test_copyright.py new file mode 100644 index 00000000..cc8ff03f --- /dev/null +++ b/src/tts_package/test/test_copyright.py @@ -0,0 +1,23 @@ +# Copyright 2015 Open Source Robotics Foundation, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ament_copyright.main import main +import pytest + + +@pytest.mark.copyright +@pytest.mark.linter +def test_copyright(): + rc = main(argv=['.', 'test']) + assert rc == 0, 'Found errors' diff --git a/src/tts_package/test/test_flake8.py b/src/tts_package/test/test_flake8.py new file mode 100644 index 00000000..27ee1078 --- /dev/null +++ b/src/tts_package/test/test_flake8.py @@ -0,0 +1,25 @@ +# Copyright 2017 Open Source Robotics Foundation, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ament_flake8.main import main_with_errors +import pytest + + +@pytest.mark.flake8 +@pytest.mark.linter +def test_flake8(): + rc, errors = main_with_errors(argv=[]) + assert rc == 0, \ + 'Found %d code style errors / warnings:\n' % len(errors) + \ + '\n'.join(errors) diff --git a/src/tts_package/test/test_pep257.py b/src/tts_package/test/test_pep257.py new file mode 100644 index 00000000..b234a384 --- /dev/null +++ b/src/tts_package/test/test_pep257.py @@ -0,0 +1,23 @@ +# Copyright 2015 Open Source Robotics Foundation, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ament_pep257.main import main +import pytest + + +@pytest.mark.linter +@pytest.mark.pep257 +def test_pep257(): + rc = main(argv=['.', 'test']) + assert rc == 0, 'Found code style errors / warnings' diff --git a/src/tts_package/tts_package/__init__.py b/src/tts_package/tts_package/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/tts_package/tts_package/tts_member_function.py b/src/tts_package/tts_package/tts_member_function.py new file mode 100644 index 00000000..9fd701a4 --- /dev/null +++ b/src/tts_package/tts_package/tts_member_function.py @@ -0,0 +1,46 @@ +import sys + +from tts_msgs.srv import StringToWav + +import rclpy +from rclpy.node import Node + + +class ttsClientAsync(Node): + + # Client Node that is used to call the TTS service. Takes sentence as an argument that service will try to synthetize. + # ros2 run tts_package client "Tässä teksti joka syntentisoidaan. Voi sisältää useampiakin lauseita kunhan ne ovat lainausmerkkien sisällä". + + def __init__(self): + super().__init__('tts_client_async') + self.cli = self.create_client(StringToWav, 'StringToWav') + while not self.cli.wait_for_service(timeout_sec=1.0): + self.get_logger().info('service not available, waiting again...') + self.req = StringToWav.Request() + + def send_request(self, data): + self.req.data = data + self.future = self.cli.call_async(self.req) + rclpy.spin_until_future_complete(self, self.future) + return self.future.result() + +def main(): + rclpy.init() + + tts_client = ttsClientAsync() + + response = tts_client.send_request(sys.argv[1]) + + if(response.success): + tts_client.get_logger().info( + 'Succesfully synthentized!') + else: + tts_client.get_logger().info( + 'Failed to synthentize!' + ) + tts_client.destroy_node() + rclpy.shutdown() + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/src/tts_package/tts_package/tts_node.py b/src/tts_package/tts_package/tts_node.py new file mode 100644 index 00000000..2cbce953 --- /dev/null +++ b/src/tts_package/tts_package/tts_node.py @@ -0,0 +1,57 @@ +import simpleaudio as sa + +from TTS.utils.synthesizer import Synthesizer + +from rclpy.node import Node + +from tts_msgs.srv import StringToWav + +import rclpy + + +class TTSService(Node): + + # Initialize node + def __init__(self): + super().__init__('TTS_service') + self.srv = self.create_service( + StringToWav, 'StringToWav', self.stringToWav_callback) + self.synthetizer = Synthesizer( + "src/tts_package/resource/model.pth", + "src/tts_package/resource/config.json") + self.output = "src/tts_package/resource/output.wav" + self.get_logger().info("Service running...") + + # Callback function. Waits for call and then synthetizes given request and plays synthetized speech. + def stringToWav_callback(self, request, response): + self.get_logger().info("Incoming request to synthentize string: %s" % (request.data)) + try: + wav = self.synthetizer.tts(request.data) + self.synthetizer.save_wav(wav, self.output) + self.play_audio() + except Exception as e: + self.get_logger().info(f"Error happened: {str(e)}") + response.success = False + else: + response.success = True + self.get_logger().info("Callback over. Service running...") + return response + + # Function that plays created .wav file. + def play_audio(self): + wave_obj = sa.WaveObject.from_wave_file(self.output) + play_obj = wave_obj.play() + play_obj.wait_done() + +def main(): + rclpy.init() + + tts_service = TTSService() + + rclpy.spin(tts_service) + + rclpy.shutdown() + + +if __name__ == '__main__': + main() diff --git a/vagrant-scripts/bootstrap.sh b/vagrant-scripts/bootstrap.sh index 132d2397..0470dd18 100644 --- a/vagrant-scripts/bootstrap.sh +++ b/vagrant-scripts/bootstrap.sh @@ -113,7 +113,8 @@ source /opt/ros/foxy/setup.bash apt install -y \ ros-foxy-test-msgs ros-foxy-control-msgs \ ros-foxy-realtime-tools ros-foxy-xacro ros-foxy-angles \ - v4l-utils + v4l-utils \ + espeak # Install ros2_control (https://github.com/ros-controls/ros2_control) @@ -157,6 +158,14 @@ cd /workspace rosdep update rosdep install --from-paths src --ignore-src --rosdistro foxy -r -y +# install TTS dependency +python3 -m pip install TTS +python3 -m pip install simpleaudio + +# curl .zip file containing model and config for TTS, unzip it into tts_package/resource folder +curl -L 'https://www.dropbox.com/scl/fo/vtx8ieqs8n6x4khjcc9nj/h?rlkey=65mddh9yke5wag1zlauwepjg2&dl=1' --output src/tts_package/resource/model.zip +unzip src/tts_package/resource/model.zip -d src/tts_package/resource + # Enable sourcing of built ros2 environment to bash configuration echo "source install/setup.bash" >> /home/vagrant/.bashrc