Skip to content

Commit

Permalink
fix: changed api contract, tokenization logic and faq entries
Browse files Browse the repository at this point in the history
  • Loading branch information
willianantunes committed Apr 12, 2021
1 parent d5f2121 commit e2e6aca
Show file tree
Hide file tree
Showing 11 changed files with 533 additions and 228 deletions.
4 changes: 2 additions & 2 deletions backend/rave_of_phonetics/apps/core/api/v2/api_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@ def transcribe(request: Request) -> Response:
transcriptions = check_and_retrieve_transcriptions(words, language)
logger.debug(f"Transcriptions: {transcriptions}")

result = []
result = {}
for transcription in transcriptions:
transcription_as_dict = asdict(transcription)
result.append(transcription_as_dict)
result[transcription.word] = transcription_as_dict["entries"]

return Response(result)
4 changes: 4 additions & 0 deletions backend/rave_of_phonetics/apps/core/api/v2/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,8 @@ def validate(self, data):
if language == self.supported_languages[1]:
data["language"] = "en-gb-x-rp"

# Without repeated items
words = data["words"]
data["words"] = list(dict.fromkeys(words))

return data
79 changes: 33 additions & 46 deletions backend/tests/int/apps/core/api/v2/test_api_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,7 @@ def test_should_receive_empty_entries_as_the_words_does_not_exist_in_database(cl

assert ResearchedWord.objects.count() == 3
assert response.status_code == 200
assert result == [
{"word": "rave", "entries": None},
{"word": "of", "entries": None},
{"word": "phonetics", "entries": None},
]
assert result == {"of": None, "phonetics": None, "rave": None}


@pytest.mark.django_db
Expand All @@ -79,44 +75,35 @@ def test_should_receive_transcriptions(client, mock_recaptcha_verify):

assert ResearchedWord.objects.count() == 3
assert response.status_code == 200
assert result == [
{
"entries": [
{
"classification": "Undefined",
"phonemic": "ɹ eɪ v",
"phonemic_syllables": "ɹ eɪ v",
"phonetic": None,
"phonetic_syllables": None,
"version": "Version 1",
}
],
"word": "rave",
},
{
"entries": [
{
"classification": "Undefined",
"phonemic": "ə v",
"phonemic_syllables": "ə v",
"phonetic": None,
"phonetic_syllables": None,
"version": "Version 1",
},
],
"word": "of",
},
{
"entries": [
{
"classification": "Undefined",
"phonemic": "f ə ˈn ɛ t ɪ k s",
"phonemic_syllables": "f ə • ˈn ɛ • t ɪ k s",
"phonetic": None,
"phonetic_syllables": None,
"version": "Version 1",
}
],
"word": "phonetics",
},
]
assert result == {
"of": [
{
"classification": "Undefined",
"phonemic": "ə v",
"phonemic_syllables": "ə v",
"phonetic": None,
"phonetic_syllables": None,
"version": "Version 1",
}
],
"phonetics": [
{
"classification": "Undefined",
"phonemic": "f ə ˈn ɛ t ɪ k s",
"phonemic_syllables": "f ə • ˈn ɛ • t ɪ k s",
"phonetic": None,
"phonetic_syllables": None,
"version": "Version 1",
}
],
"rave": [
{
"classification": "Undefined",
"phonemic": "ɹ eɪ v",
"phonemic_syllables": "ɹ eɪ v",
"phonetic": None,
"phonetic_syllables": None,
"version": "Version 1",
}
],
}
11 changes: 11 additions & 0 deletions backend/tests/int/apps/core/api/v2/test_serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,14 @@ def test_should_inform_that_is_valid_and_change_language_to_correct_one(self):
words, language = serializer.validated_data["words"], serializer.validated_data["language"]
assert words == fake_data["words"]
assert language == "en-gb-x-rp"

def test_should_inform_that_is_valid_and_words_must_not_be_repeatable(self):
fake_data = {"words": ["you", "if", "you", "won't", "won't"], "language": "en-gb"}
serializer = TranscriberSerializer(data=fake_data)

assert serializer.is_valid()

words, language = serializer.validated_data["words"], serializer.validated_data["language"]
assert len(words) == 3
assert words == ["you", "if", "won't"]
assert language == "en-gb-x-rp"
47 changes: 24 additions & 23 deletions frontend/src/components/FrequentlyAskedQuestions/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ import { slugify } from "../../utils/general"
const entries = [
{
question: "How do I use Rave of Phonetics?",
text: `The main function of Rave of Phonetics is to provide you a phonemic transcription of a word or text in
order to help you pronounce it. You can also see its syllables, stress marks and the phonetic version as well, if
they are available. Simply type a word in the space provided and read the transcription as well as listen to
the audio to improve your listening skills.`,
text: `The main function of Rave of Phonetics is to provide you a phonemic transcription of a word or text in order
to help you pronounce it. You can also see its syllables, stress marks and the phonetic version as well, if they are
available. Simply type a word in the space provided and read the transcription as well as listen to the audio
to improve your listening skills.`,
},
{
question: "Can I improve my accent with this page?",
Expand All @@ -27,15 +27,15 @@ const entries = [
},
{
question: "How do I share my transcriptions?",
text: `Sharing is caring. At the bottom of the <strong>IPA Transcription Tool</strong> panel you have a bottom named
<strong>copy link</strong>. Just set the tool as you'd like, let's say, you choose the word THING, using AMERICAN ENGLISH,
with SHOW STRESS and SHOW SYLLABLES activated, after that, you can simply click on <strong>copy link</strong> and then
it will be available in your transfer area! Just press CTRL+V on your social media and you'll see it!`,
text: `Sharing is caring. At the bottom of the <strong>IPA Transcription Tool</strong> panel, there is an option named
<strong>copy link</strong>. Just type in the desired word you would like to transcribe, apply your options of stress,
syllables, etc. and after that you can simply click on <strong>copy link</strong>. Then
it will be available on your clipboard! Just press CTRL+V on your social media and you'll see it!`,
},
{
question: "Is there a blog for this page?",
text: `Of course, there is. If you click <a href="/blog">here</a>, you will find a blog section that has interesting
topics related to phonetic and languages. Please share with all your friends
topics related to Phonetics and Languages. Please share with all your friends
<span role="img" aria-label="slightly smiling face">😊</span>`,
},
{
Expand All @@ -53,8 +53,8 @@ const entries = [
},
{
question: "What does ‘show stress’ mean?",
text: `Glad you asked, no need to stress. This option is used to see where the syllables of the words are and which
one is pronounce, or stressed, with standard pronunciation.`,
text: `Glad you asked, no need to stress. This option is used to see which syllable of the word has primary and
secondary stress. This option shows standard pronunciation.`,
},
{
question: "Why do I need to loop the speech?",
Expand All @@ -64,9 +64,9 @@ const entries = [
},
{
question: "How do I leave a comment?",
text: `Ah, yes. Please let us know what you think. If you want to leave a comment you can go to the bottom of the page.
They are available in our home, changelog, FAQ and blog pages. Also you can get in touch with us through our social
medias (see the bottom bar).`,
text: `Ah, yes. Please let us know what you think. If you want to leave a comment you can go to the bottom of the
page and find our comment section. They are available in our home, changelog, FAQ and blog sections. Also, you can
get in touch with us through our social medias (see the bottom bar).`,
},
{
question: "How can I ask questions?",
Expand All @@ -82,18 +82,19 @@ const entries = [
options but for now you will see mainly phonemic transcriptions.`,
},
{
question: "Is there an option for allophone variations?",
text: `I knew we would have some experts ask this question. For the moment, we mainly provide phonemic transcriptions,
as phonetic, syllables and allophones are being filled by the community through suggestions. If you'd like to check
all sort of variations, you should check if the transcription is underlined, if so, just click on it to see its
variations. The details can be seen if you click on the word, which will be underlined as well.`,
question: "Is there an option for phonetic variations of the word?",
text: `I knew we would have some experts ask this question. For the moment, we mainly provide phonemic transcriptions.
Phonetic transcription, syllables and allophone variations are still being developed as well as receiving
contributions and suggestions by our great community of learners and experts in the area. If you'd like to check
alternate variations of the word, you should check if the transcription is underlined, if so, just click on it to
see its variations. The details can be seen if you click on the word, which will be underlined as well.`,
},
{
question: "I would like to add or fix a transcription. Is it possible? How do I do that?",
text: `Sure thing! First you try to transcribe the desired word or phrase, after you receive the transcription, you
click on the underlined word. You should see the option <strong>apply suggestion</strong>. If you click on it, a
window will be opened describing what you can do. If you'd like to provide only the phonemic, just fill the field
related to it, give us some reasons and click on <strong>send suggestion</strong>. The same applies to phonetic.
text: `Sure thing! First you try to transcribe the desired word or phrase and then click on the underlined word.
You should see the option <strong>apply suggestion</strong>. If you click on it, a window will open describing what
you can do. If you'd like to provide only the phonemic, just fill the field related to that and give us some reasons
why you made the suggestion before you click on <strong>send suggestion</strong>. The same applies to phonetic.
Syllables will be handled by us, so you don't have to worry.`,
},
]
Expand Down
16 changes: 10 additions & 6 deletions frontend/src/domains/TranscriptionDetails.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { extractRawWordsFromText } from "../utils/tokenization"
import { extractRawWordsAndTheirTokensFromText } from "../utils/tokenization"

export class TranscriptionDetails {
constructor(
Expand Down Expand Up @@ -149,16 +149,20 @@ export class TranscriptionDetails {
// REGEX to deal with stress marks and punctuations
const regexToExtractStressMarks = /[ˈˌ]+/g
// Words that may have punctuations
const wordsFromText = extractRawWordsFromText(this._text)
const rawWordsAndTheirTokens = extractRawWordsAndTheirTokensFromText(this._text)
// What will be returned
const changedTranscription = []
// Filling changedTranscription array with data
for (const [index, word] of wordsFromText.entries()) {
const wordDetails = this._transcriptionSetup[index]
for (const tokenDetails of rawWordsAndTheirTokens) {
// Extracting objetcs
const word = tokenDetails.raw
const token = tokenDetails.token
// Creating a new entry to insert into changedTranscription array
const entries = this._transcriptionSetup[token]
const changedWord = { word }
const changedEntries = []
if (wordDetails.entries) {
wordDetails.entries.forEach(transcription => {
if (entries) {
entries.forEach(transcription => {
const changedTranscription = {}
Object.assign(changedTranscription, transcription)
if (!this._showStress) {
Expand Down
4 changes: 2 additions & 2 deletions frontend/src/redux/slices/transcription-slice.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { createSlice } from "@reduxjs/toolkit"
import { transcribe } from "../../services/rop-api"
import { findById } from "../../domains/transcription-details-dao"
import { extractWordsFromText } from "../../utils/tokenization"
import { extractTokensFromText } from "../../utils/tokenization"

const initialState = {
text: "",
Expand Down Expand Up @@ -102,7 +102,7 @@ export const transcriptionFromText = (text, chosenLanguage, token, hookWhenError
dispatch(analysingText())

try {
const words = extractWordsFromText(text)
const words = extractTokensFromText(text)
const result = await transcribe(words, chosenLanguage, token)
dispatch(textWasTranscribed(result))
dispatch(transcriptionToBeSaved())
Expand Down
21 changes: 15 additions & 6 deletions frontend/src/utils/tokenization.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,18 @@
export function extractWordsFromText(text) {
const regexToExtractWordsAndEmojis = /([\w'\-\u00a9\u00ae\u2000-\u3300\ud83c\ud000-\udfff\ud83d\ud000-\udfff\ud83e\ud000-\udfff])+/g
return text.match(regexToExtractWordsAndEmojis).map(value => value.toLowerCase())
}
const regexNegationToExtractWordsAndEmojis = /([^\w'\-\u00a9\u00ae\u2000-\u3300\ud83c\ud000-\udfff\ud83d\ud000-\udfff\ud83e\ud000-\udfff])+/g

export function extractRawWordsFromText(text) {
export function extractRawWordsAndTheirTokensFromText(text) {
const splitText = text.split(" ")
return splitText.filter(entry => entry).map(dirtyWord => dirtyWord.trim())

return splitText
.filter(entry => entry)
.map(dirtyWord => dirtyWord.trim())
.map(cleanedWord => {
const token = cleanedWord.toLowerCase().replace(regexNegationToExtractWordsAndEmojis, "")
return { raw: cleanedWord, token: token ? token : null }
})
}

export function extractTokensFromText(text) {
const tokens = extractRawWordsAndTheirTokensFromText(text)
return tokens.map(({ raw, token }) => (token ? token : raw.toLowerCase()))
}
Loading

0 comments on commit e2e6aca

Please sign in to comment.