Skip to content
This repository has been archived by the owner on Feb 9, 2023. It is now read-only.

Voicekit - a problem about bluetooth #379

Open
wants to merge 6 commits into
base: aiyprojects
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions HACKING.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Setting up the image

**Deprecated:** The newest instructions are in the
[aiyprojects
branch](https://github.com/google/aiyprojects-raspbian/blob/aiyprojects/HACKING.md).

We recommend using [the images](https://aiyprojects.withgoogle.com/voice) we
provide. Those images are based on [Raspbian](https://www.raspberrypi.org/downloads/raspbian/),
with a few customizations and are tested on the Raspberry Pi 3. If you prefer
Expand Down
13 changes: 10 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<img src="https://aiyprojects.withgoogle.com/static/images/icons/aiy-square-logo.svg" width="40%">
<img src="https://aiyprojects.withgoogle.com/static/images/icons/aiy-circular-logo.svg" width="40%">

This repository contains an easy-to-use API for the AIY Voice Kit.
You can use it to create voice commands with simple while loops - have a look at the [demos](https://github.com/google/aiyprojects-raspbian/tree/voicekit/src).
Expand All @@ -9,8 +9,15 @@ If you're using Raspbian instead of Google's provided image, read
[HACKING.md](HACKING.md) for information on getting started.

For returning users:
The old voice-recognizer demo remains in the [master branch](https://github.com/google/aiyprojects-raspbian/tree/master) of this project.
The new code is in the `voicekit` branch, and is included in images starting with aiyprojects-2017-09-11.img.
The newest code is in the
[aiyprojects
branch](https://github.com/google/aiyprojects-raspbian/tree/aiyprojects), which
supports all AIY kits.
The `voicekit` branch is deprecated, and users should switch to the
`aiyprojects` branch.
The original, deprecated voice-recognizer demo remains in the [master
branch](https://github.com/google/aiyprojects-raspbian/tree/master) of this
project.

# Support

Expand Down
6 changes: 3 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
google-assistant-grpc==0.0.2
grpc-google-cloud-speech-v1beta1==0.14.0
google-auth-oauthlib==0.1.0
google-assistant-grpc==0.1.0
google-cloud-speech==0.30.0
google-auth-oauthlib==0.2.0
6 changes: 3 additions & 3 deletions scripts/install-deps.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ cd "${scripts_dir}/.."
virtualenv --system-site-packages -p python3 env
env/bin/pip install -r requirements.txt

# The google-assistant-library is only available on ARMv7.
if [[ "$(uname -m)" == "armv7l" ]] ; then
env/bin/pip install google-assistant-library==0.0.3
# The google-assistant-library is only available on some platforms.
if [[ "$(uname -m)" == "armv7l" || "$(uname -m)" == "x86_64" ]] ; then
env/bin/pip install google-assistant-library==0.1.0
fi
64 changes: 37 additions & 27 deletions src/aiy/_apis/_speech.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,28 @@
import collections
import logging
import os
import sys
import tempfile
import wave

import google.auth
import google.auth.exceptions
import google.auth.transport.grpc
import google.auth.transport.requests
from google.cloud.grpc.speech.v1beta1 import cloud_speech_pb2 as cloud_speech
try:
from google.cloud import speech
from google.cloud.speech import enums
from google.cloud.speech import types
except ImportError:
print("Failed to import google.cloud.speech. Try:")
print(" env/bin/pip install -r requirements.txt")
sys.exit(1)

from google.rpc import code_pb2 as error_code
from google.assistant.embedded.v1alpha1 import embedded_assistant_pb2
from google.assistant.embedded.v1alpha1 import (
embedded_assistant_pb2,
embedded_assistant_pb2_grpc,
)
import grpc
from six.moves import queue

Expand Down Expand Up @@ -135,7 +147,7 @@ def _get_speech_context(self):
"""Return a SpeechContext instance to bias recognition towards certain
phrases.
"""
return cloud_speech.SpeechContext(
return types.SpeechContext(
phrases=self._phrases,
)

Expand Down Expand Up @@ -289,53 +301,51 @@ def __init__(self, credentials_file):

self.language_code = aiy.i18n.get_language_code()

if not hasattr(cloud_speech, 'StreamingRecognizeRequest'):
raise ValueError("cloud_speech_pb2.py doesn't have StreamingRecognizeRequest.")

self._transcript = None

def reset(self):
super().reset()
self._transcript = None

def _make_service(self, channel):
return cloud_speech.SpeechStub(channel)
return speech.SpeechClient()

def _create_config_request(self):
recognition_config = cloud_speech.RecognitionConfig(
# There are a bunch of config options you can specify. See
# https://goo.gl/KPZn97 for the full list.
encoding='LINEAR16', # raw 16-bit signed LE samples
sample_rate=AUDIO_SAMPLE_RATE_HZ,
recognition_config = types.RecognitionConfig(
encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=AUDIO_SAMPLE_RATE_HZ,
# For a list of supported languages see:
# https://cloud.google.com/speech/docs/languages.
language_code=self.language_code, # a BCP-47 language tag
speech_context=self._get_speech_context(),
speech_contexts=[self._get_speech_context()],
)
streaming_config = cloud_speech.StreamingRecognitionConfig(
streaming_config = types.StreamingRecognitionConfig(
config=recognition_config,
single_utterance=True, # TODO(rodrigoq): find a way to handle pauses
single_utterance=True,
)

return cloud_speech.StreamingRecognizeRequest(
streaming_config=streaming_config)
# TODO(rodrigoq): we're actually returning a Config, not a Request, as
# the v1 API takes the Config and wraps it up in a Request, but we still
# want to share code with the Assistant API. Can we clean this up?
return streaming_config

def _create_audio_request(self, data):
return cloud_speech.StreamingRecognizeRequest(audio_content=data)
return types.StreamingRecognizeRequest(audio_content=data)

def _create_response_stream(self, service, request_stream, deadline):
return service.StreamingRecognize(request_stream, deadline)
def _create_response_stream(self, client, request_stream, deadline):
config = next(request_stream)
return client.streaming_recognize(config, request_stream)

def _stop_sending_audio(self, resp):
"""Check the endpointer type to see if an utterance has ended."""

if resp.endpointer_type:
endpointer_type = cloud_speech.StreamingRecognizeResponse.EndpointerType.Name(
resp.endpointer_type)
logger.info('endpointer_type: %s', endpointer_type)
if resp.speech_event_type:
speech_event_type = types.StreamingRecognizeResponse.SpeechEventType.Name(
resp.speech_event_type)
logger.info('endpointer_type: %s', speech_event_type)

END_OF_AUDIO = cloud_speech.StreamingRecognizeResponse.EndpointerType.Value('END_OF_AUDIO')
return resp.endpointer_type == END_OF_AUDIO
END_OF_SINGLE_UTTERANCE = types.StreamingRecognizeResponse.SpeechEventType.Value('END_OF_SINGLE_UTTERANCE')
return resp.speech_event_type == END_OF_SINGLE_UTTERANCE

def _handle_response(self, resp):
"""Store the last transcript we received."""
Expand Down Expand Up @@ -367,7 +377,7 @@ def reset(self):
self._transcript = None

def _make_service(self, channel):
return embedded_assistant_pb2.EmbeddedAssistantStub(channel)
return embedded_assistant_pb2_grpc.EmbeddedAssistantStub(channel)

def _create_config_request(self):
audio_in_config = embedded_assistant_pb2.AudioInConfig(
Expand Down
118 changes: 118 additions & 0 deletions src/aiy/assistant/device_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
#!/usr/bin/env python3
# Copyright 2017 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Device registration helpers for the Google Assistant API."""

import json
import os
import uuid

import google.auth.transport.requests

import aiy.assistant.auth_helpers


_DEVICE_MODEL = "voice-kit"
_DEVICE_MANUFACTURER = "AIY Projects"
_DEVICE_NAME = "Voice Kit"
_DEVICE_TYPE = "action.devices.types.LIGHT"

_DEVICE_ID_FILE = os.path.join(
aiy.assistant.auth_helpers._VR_CACHE_DIR, 'device_id.json')


def _get_project_id():
with open(aiy.assistant.auth_helpers._ASSISTANT_CREDENTIALS_FILE) as f:
client_secrets_data = json.load(f)
return client_secrets_data["installed"]["project_id"]


def _get_api_url(*args):
return "/".join(
("https://embeddedassistant.googleapis.com/v1alpha2/projects",) + args)

def _load_ids(id_path):
with open(id_path, 'r') as f:
id_data = json.load(f)
return id_data["model_id"], id_data["device_id"]


def _save_ids(id_path, model_id, device_id):
if not os.path.exists(os.path.dirname(id_path)):
os.makedirs(os.path.dirname(id_path))

id_data = {
"model_id": model_id,
"device_id": device_id,
}
with open(id_path, 'w') as f:
json.dump(id_data, f)


def _get_model_id(credentials, session, project_id):
model_id = "%s-%s" % (project_id, _DEVICE_MODEL)
payload = {
"device_model_id": model_id,
"project_id": project_id,
"device_type": _DEVICE_TYPE,
"manifest": {
"manufacturer": _DEVICE_MANUFACTURER,
"product_name": _DEVICE_NAME,
},
}
r = session.post(_get_api_url(project_id, "deviceModels"),
data=json.dumps(payload))
# Ignore 409, which means we've already created the model ID.
if r.status_code != 409:
r.raise_for_status()
return model_id


def get_ids(credentials, model_id=None):
"""get_ids gets a Device ID for use with the Google Assistant SDK.

It optionally also gets a Device Model ID if one is not given. The IDs are
cached on disk so that a device keeps a consistent ID.

Returns:
a tuple: (model_id, device_id)
"""

if os.path.exists(_DEVICE_ID_FILE):
return _load_ids(_DEVICE_ID_FILE)

session = google.auth.transport.requests.AuthorizedSession(credentials)
project_id = _get_project_id()
model_id = model_id or _get_model_id(credentials, session, project_id)

device_id = "%s-%s" % (model_id, uuid.uuid4())
# We can hardcode client_type as SDK_SERVICE, because the Assistant Library
# creates its own device_id.
payload = {
"id": device_id,
"model_id": model_id,
"client_type": "SDK_SERVICE",
}
r = session.post(_get_api_url(project_id, "devices"),
data=json.dumps(payload))
r.raise_for_status()

_save_ids(_DEVICE_ID_FILE, model_id, device_id)
return model_id, device_id


if __name__ == "__main__":
credentials = aiy.assistant.auth_helpers.get_assistant_credentials()
print("ids:", get_ids(credentials))
16 changes: 14 additions & 2 deletions src/aiy/cloudspeech.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,18 +36,30 @@ def __init__(self, credentials_file):
self._recorder = aiy.audio.get_recorder()
self._hotwords = []

def recognize(self):
def recognize(self, immediate=False):
"""Recognizes the user's speech and transcript it into text.

This function listens to the user's speech via the VoiceHat speaker. Then it
contacts Google CloudSpeech APIs and returns a textual transcript if possible.
If hotword list is populated this method will only respond if hotword is said.

Args:
immediate: ignore the hotword list, even if it has been populated
may be used to create a conversational experience, for example:

text = recognizer.recognize()
if 'call a friend' in text:
aiy.audio.say('OK, which one?')
friend = recognizer.recognize(immediate=True)
make_a_call(friend)
"""
self._request.reset()
self._request.set_endpointer_cb(self._endpointer_callback)
self._recorder.add_processor(self._request)
text = self._request.do_request().transcript
if self._hotwords and text:
if immediate:
return text
elif self._hotwords and text:
text = text.lower()
loc_min = len(text)
hotword_found = ''
Expand Down
4 changes: 3 additions & 1 deletion src/assistant_library_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import sys

import aiy.assistant.auth_helpers
import aiy.assistant.device_helpers
import aiy.voicehat
from google.assistant.library import Assistant
from google.assistant.library.event import EventType
Expand Down Expand Up @@ -60,7 +61,8 @@ def process_event(event):

def main():
credentials = aiy.assistant.auth_helpers.get_assistant_credentials()
with Assistant(credentials) as assistant:
model_id, device_id = aiy.assistant.device_helpers.get_ids(credentials)
with Assistant(credentials, model_id) as assistant:
for event in assistant.start():
process_event(event)

Expand Down
4 changes: 3 additions & 1 deletion src/assistant_library_with_button_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import threading

import aiy.assistant.auth_helpers
import aiy.assistant.device_helpers
import aiy.voicehat
from google.assistant.library import Assistant
from google.assistant.library.event import EventType
Expand Down Expand Up @@ -61,7 +62,8 @@ def start(self):

def _run_task(self):
credentials = aiy.assistant.auth_helpers.get_assistant_credentials()
with Assistant(credentials) as assistant:
model_id, device_id = aiy.assistant.device_helpers.get_ids(credentials)
with Assistant(credentials, model_id) as assistant:
self._assistant = assistant
for event in assistant.start():
self._process_event(event)
Expand Down
4 changes: 3 additions & 1 deletion src/assistant_library_with_local_commands_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import sys

import aiy.assistant.auth_helpers
import aiy.assistant.device_helpers
import aiy.audio
import aiy.voicehat
from google.assistant.library import Assistant
Expand Down Expand Up @@ -90,7 +91,8 @@ def process_event(assistant, event):

def main():
credentials = aiy.assistant.auth_helpers.get_assistant_credentials()
with Assistant(credentials) as assistant:
model_id, device_id = aiy.assistant.device_helpers.get_ids(credentials)
with Assistant(credentials, model_id) as assistant:
for event in assistant.start():
process_event(assistant, event)

Expand Down