Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speech to Text Module #31

Open
wants to merge 41 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
3d9eda1
added files
phuang00 Apr 23, 2021
f2dda25
init frontend
jlin00 Apr 23, 2021
196f09c
added speech to text
phuang00 Apr 23, 2021
24c55b2
fixed imports
phuang00 Apr 23, 2021
65d4040
Create speech_to_text.py
sharabhuiyan Apr 23, 2021
66e59a7
Merge branch 'speech-to-text' of https://github.com/dhmit/lang_learn …
sharabhuiyan Apr 23, 2021
b2ddf0f
Update speech_to_text.py
sharabhuiyan Apr 23, 2021
a865616
added proptypes
jlin00 Apr 23, 2021
5b2569b
updated views
phuang00 Apr 23, 2021
bd45d09
Update speech_to_text.py
sharabhuiyan Apr 23, 2021
6d9885c
Merge branch 'speech-to-text' of https://github.com/dhmit/lang_learn …
sharabhuiyan Apr 23, 2021
e50467d
added api to pass sentences to frontend
phuang00 Apr 23, 2021
f1ebac7
added react-mic
phuang00 Apr 23, 2021
80a3d2c
added react-mic
jlin00 Apr 23, 2021
cf54a8a
removed print statement
phuang00 Apr 30, 2021
cdd7d74
Update requirements.txt
phuang00 Apr 30, 2021
b846024
added playback functionality
jlin00 Apr 30, 2021
871e136
Merge branch 'speech-to-text' of https://github.com/dhmit/lang_learn …
jlin00 Apr 30, 2021
570a058
removed some changes
phuang00 Apr 30, 2021
1355ceb
Merge branch 'speech-to-text' of https://github.com/dhmit/lang_learn …
phuang00 Apr 30, 2021
bfc36fb
add IBM api, will put in apikey later
phuang00 May 12, 2021
e894af5
added api to views
phuang00 May 12, 2021
dd03b7e
updated config for API key without putting it on github
phuang00 May 12, 2021
7499f41
added sentences and made page look more like figma
phuang00 May 12, 2021
af35ce2
add keywords to IBM api
phuang00 May 12, 2021
61dc25b
tested sphinx audio transcription
sharabhuiyan May 13, 2021
f4c684c
updated add_text
sharabhuiyan May 14, 2021
d046298
deleted pocket sphinx
jlin00 May 14, 2021
4e229cd
added scoring based on edit distance
phuang00 May 14, 2021
c9934e2
Merge branch 'master' into speech-to-text
phuang00 May 14, 2021
16be4b1
loading btn
jlin00 May 14, 2021
e3542f4
Merge branch 'speech-to-text' of https://github.com/dhmit/lang_learn …
phuang00 May 14, 2021
6c7812f
fixed merge error
jlin00 May 14, 2021
d96fe4c
added instructions button
phuang00 May 14, 2021
d2609bc
Merge branch 'speech-to-text' of https://github.com/dhmit/lang_learn …
phuang00 May 14, 2021
37a9d74
added instructions to the module and cleaned some frontend code
phuang00 May 17, 2021
17df133
added stt module to index and instructor page
phuang00 May 24, 2021
9246f38
Merge branch 'master' into speech-to-text
phuang00 May 24, 2021
6bd2f06
fixed linter errors (except api key issues)
sharabhuiyan May 25, 2021
22ac7a5
updated SpeechToTextView
sharabhuiyan May 25, 2021
5ae1a5d
Merge branch 'speech-to-text' of https://github.com/dhmit/lang_learn …
sharabhuiyan May 25, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions backend/app/analysis/speech_to_text.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import nltk
from nltk.metrics import edit_distance

def sentence_feeder(text):
"""
Given text, returns a list of sentences in the text.
"""
return nltk.tokenize.sent_tokenize(text)

def tokenize_sentence(sentence):
"""
Given a sentence, return a list of words in the text.
"""
return [word for word in nltk.tokenize.word_tokenize(sentence) if word.isalpha()]

def get_transcript_score(expected, sentence):
"""
Given a transcribed sentence and the expected sentence, return a score out of 100
based on the edit distance.
"""
expected = ' '.join(expected)
edit_dist = edit_distance(sentence, expected)
return int((len(expected) - edit_dist) / len(expected) * 100)
3 changes: 3 additions & 0 deletions backend/app/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ def default_module():
'adjective': False,
'adverb': False,
},
'speech to text': {
'sentence by sentence': False,
},
}
return mods

Expand Down
53 changes: 49 additions & 4 deletions backend/app/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@
import json
import random

from django.conf import settings
from django.http import Http404

from ibm_watson import SpeechToTextV1
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
from rest_framework.decorators import api_view
from rest_framework.response import Response

Expand All @@ -32,7 +34,16 @@
from .analysis.crosswords import (
get_crosswords,
)
from .quiz_creation.conjugation_quiz import get_quiz_sentences
from .quiz_creation.conjugation_quiz import (
get_quiz_sentences,
)
from .analysis.speech_to_text import (
sentence_feeder,
tokenize_sentence,
get_transcript_score,
)




@api_view(['GET'])
Expand Down Expand Up @@ -158,8 +169,7 @@ def add_text(request):
"""
API endpoint for adding a piece of text
"""
body = json.loads(request.body.decode('utf-8'))
new_text_obj = Text(title=body['title'], content=body['content'])
new_text_obj = Text(title=request.data['title'], content=request.data['content'])
new_text_obj.save()
get_text_data(new_text_obj)
serializer = TextSerializer(new_text_obj)
Expand Down Expand Up @@ -230,6 +240,41 @@ def get_quiz_data(request, text_id):
res = get_quiz_sentences(text_obj.content)
return Response(res)

@api_view(['GET'])
def get_text_sentences(request, text_id):
"""
API endpoint to get a single piece of text based on the ID (maybe we want to change this).
"""
text_obj = Text.objects.get(id=text_id)
res = [{'sentence': sentence} for sentence in sentence_feeder(text_obj.content)]
return Response(res)

@api_view(['POST'])
def get_transcript(request):
"""
API endpoint to get a text transcript from an audio file.
"""
authenticator = IAMAuthenticator(settings.IBM_KEY)
speech_to_text = SpeechToTextV1(authenticator=authenticator)
service_url = 'https://api.us-east.speech-to-text.watson.cloud.ibm.com/instances/0a741a70' \
'-e987-4969-85b8-3e6e290d31f6 '
speech_to_text.set_service_url(service_url)
audio_file = request.FILES.get('audio')
expected_words = tokenize_sentence(request.POST.get('sentence').lower())
speech_recognition_results = speech_to_text.recognize(
audio=audio_file,
content_type='audio/webm;codecs=opus',
keyword=expected_words,
word_alternatives_threshold=0.9,
).get_result()
transcript = speech_recognition_results['results'][0]['alternatives'][0]['transcript']
res = [
{
'transcript': transcript,
'score': get_transcript_score(expected_words, transcript)
}
]
return Response(res)

@api_view(['GET'])
def get_response_quiz_data(request, text_id):
Expand Down
7 changes: 7 additions & 0 deletions backend/config/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"""

import os
import dotenv

CONFIG_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
BACKEND_DIR = os.path.dirname(CONFIG_DIR)
Expand Down Expand Up @@ -159,3 +160,9 @@
'STATS_FILE': os.path.join(PROJECT_ROOT, 'webpack-stats.json'),
}
}

# Store API Key
DOTENV_FILE = os.path.join(PROJECT_ROOT, ".env")
if os.path.isfile(DOTENV_FILE):
dotenv.load_dotenv(DOTENV_FILE)
IBM_KEY = os.environ['IBM_KEY']
9 changes: 7 additions & 2 deletions backend/config/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,10 @@
get_picturebook_data,
get_crossword,
get_quiz_data,
text, get_response_quiz_data,
text,
get_text_sentences,
get_transcript,
get_response_quiz_data,
)


Expand Down Expand Up @@ -61,7 +64,8 @@ def react_view_path(route, component_name):
path('api/text/<int:text_id>', text),
path('api/get_picturebook_prompt/<int:text_id>/<str:part_of_speech>', get_picturebook_prompt),
path('api/get_picturebook_data', get_picturebook_data),

path('api/get_text_sentences/<int:text_id>', get_text_sentences),
path('api/get_transcript', get_transcript),
# View paths
react_view_path('', 'IndexView'),
react_view_path('anagrams/<int:textID>/<str:partOfSpeech>', 'AnagramView'),
Expand All @@ -72,6 +76,7 @@ def react_view_path(route, component_name):
react_view_path('quiz/', 'AllQuizView'),
react_view_path('quiz/<int:textId>/', 'QuizView'),
react_view_path('picturebook/<int:textID>/<str:partOfSpeech>', 'PictureBookView'),
react_view_path('stt/<int:textID>/', 'SpeechToTextView'),
react_view_path('response_quiz/', 'ResponseAllQuizView'),
react_view_path('response_quiz/<int:textID>/', 'ResponseQuizView'),
]
Loading