dhmit · sharabhuiyan · Apr 23, 2021 · Apr 23, 2021 · Apr 23, 2021 · Apr 23, 2021
diff --git a/backend/app/analysis/speech_to_text.py b/backend/app/analysis/speech_to_text.py
@@ -0,0 +1,23 @@
+import nltk
+from nltk.metrics import edit_distance
+
+def sentence_feeder(text):
+    """
+    Given text, returns a list of sentences in the text.
+    """
+    return nltk.tokenize.sent_tokenize(text)
+
+def tokenize_sentence(sentence):
+    """
+    Given a sentence, return a list of words in the text.
+    """
+    return [word for word in nltk.tokenize.word_tokenize(sentence) if word.isalpha()]
+
+def get_transcript_score(expected, sentence):
+    """
+    Given a transcribed sentence and the expected sentence, return a score out of 100
+    based on the edit distance.
+    """
+    expected = ' '.join(expected)
+    edit_dist = edit_distance(sentence, expected)
+    return int((len(expected) - edit_dist) / len(expected) * 100)
diff --git a/backend/app/models.py b/backend/app/models.py
@@ -33,6 +33,9 @@ def default_module():
             'adjective': False,
             'adverb': False,
         },
+        'speech to text': {
+            'sentence by sentence': False,
+        },
     }
     return mods
 

diff --git a/backend/app/views.py b/backend/app/views.py
@@ -4,8 +4,10 @@
 import json
 import random
 
+from django.conf import settings
 from django.http import Http404
-
+from ibm_watson import SpeechToTextV1
+from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
 from rest_framework.decorators import api_view
 from rest_framework.response import Response
 
@@ -32,7 +34,16 @@
 from .analysis.crosswords import (
     get_crosswords,
 )
-from .quiz_creation.conjugation_quiz import get_quiz_sentences
+from .quiz_creation.conjugation_quiz import (
+    get_quiz_sentences,
+)
+from .analysis.speech_to_text import (
+    sentence_feeder,
+    tokenize_sentence,
+    get_transcript_score,
+)
+
+
 
 
 @api_view(['GET'])
@@ -158,8 +169,7 @@ def add_text(request):
     """
     API endpoint for adding a piece of text
     """
-    body = json.loads(request.body.decode('utf-8'))
-    new_text_obj = Text(title=body['title'], content=body['content'])
+    new_text_obj = Text(title=request.data['title'], content=request.data['content'])
     new_text_obj.save()
     get_text_data(new_text_obj)
     serializer = TextSerializer(new_text_obj)
@@ -230,6 +240,41 @@ def get_quiz_data(request, text_id):
     res = get_quiz_sentences(text_obj.content)
     return Response(res)
 
+@api_view(['GET'])
+def get_text_sentences(request, text_id):
+    """
+    API endpoint to get a single piece of text based on the ID (maybe we want to change this).
+    """
+    text_obj = Text.objects.get(id=text_id)
+    res = [{'sentence': sentence} for sentence in sentence_feeder(text_obj.content)]
+    return Response(res)
+
+@api_view(['POST'])
+def get_transcript(request):
+    """
+    API endpoint to get a text transcript from an audio file.
+    """
+    authenticator = IAMAuthenticator(settings.IBM_KEY)
+    speech_to_text = SpeechToTextV1(authenticator=authenticator)
+    service_url = 'https://api.us-east.speech-to-text.watson.cloud.ibm.com/instances/0a741a70' \
+                  '-e987-4969-85b8-3e6e290d31f6 '
+    speech_to_text.set_service_url(service_url)
+    audio_file = request.FILES.get('audio')
+    expected_words = tokenize_sentence(request.POST.get('sentence').lower())
+    speech_recognition_results = speech_to_text.recognize(
+        audio=audio_file,
+        content_type='audio/webm;codecs=opus',
+        keyword=expected_words,
+        word_alternatives_threshold=0.9,
+    ).get_result()
+    transcript = speech_recognition_results['results'][0]['alternatives'][0]['transcript']
+    res = [
+        {
+            'transcript': transcript,
+            'score': get_transcript_score(expected_words, transcript)
+        }
+    ]
+    return Response(res)
 
 @api_view(['GET'])
 def get_response_quiz_data(request, text_id):

diff --git a/backend/config/settings/base.py b/backend/config/settings/base.py
@@ -9,6 +9,7 @@
 """
 
 import os
+import dotenv
 
 CONFIG_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 BACKEND_DIR = os.path.dirname(CONFIG_DIR)
@@ -159,3 +160,9 @@
         'STATS_FILE': os.path.join(PROJECT_ROOT, 'webpack-stats.json'),
     }
 }
+
+# Store API Key
+DOTENV_FILE = os.path.join(PROJECT_ROOT, ".env")
+if os.path.isfile(DOTENV_FILE):
+    dotenv.load_dotenv(DOTENV_FILE)
+IBM_KEY = os.environ['IBM_KEY']
diff --git a/backend/config/urls.py b/backend/config/urls.py
@@ -28,7 +28,10 @@
     get_picturebook_data,
     get_crossword,
     get_quiz_data,
-    text, get_response_quiz_data,
+    text,
+    get_text_sentences,
+    get_transcript,
+    get_response_quiz_data,
 )
 
 
@@ -61,7 +64,8 @@ def react_view_path(route, component_name):
     path('api/text/<int:text_id>', text),
     path('api/get_picturebook_prompt/<int:text_id>/<str:part_of_speech>', get_picturebook_prompt),
     path('api/get_picturebook_data', get_picturebook_data),
-
+    path('api/get_text_sentences/<int:text_id>', get_text_sentences),
+    path('api/get_transcript', get_transcript),
     # View paths
     react_view_path('', 'IndexView'),
     react_view_path('anagrams/<int:textID>/<str:partOfSpeech>', 'AnagramView'),
@@ -72,6 +76,7 @@ def react_view_path(route, component_name):
     react_view_path('quiz/', 'AllQuizView'),
     react_view_path('quiz/<int:textId>/', 'QuizView'),
     react_view_path('picturebook/<int:textID>/<str:partOfSpeech>', 'PictureBookView'),
+    react_view_path('stt/<int:textID>/', 'SpeechToTextView'),
     react_view_path('response_quiz/', 'ResponseAllQuizView'),
     react_view_path('response_quiz/<int:textID>/', 'ResponseQuizView'),
 ]