From 7848e24fbc68da2f14e1424c428d2c1a1c99d990 Mon Sep 17 00:00:00 2001 From: candaceng Date: Thu, 20 Aug 2020 17:28:00 -0400 Subject: [PATCH] Improved prediction accuracy --- .../Text_Prediction_Using_N-grams/app.R | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/2. Web Application/Text_Prediction_Using_N-grams/app.R b/2. Web Application/Text_Prediction_Using_N-grams/app.R index 1a4e443..d12eeb7 100644 --- a/2. Web Application/Text_Prediction_Using_N-grams/app.R +++ b/2. Web Application/Text_Prediction_Using_N-grams/app.R @@ -45,22 +45,17 @@ server <- function(input, output) { bigram <- readRDS("bigram.rds") trigram <- readRDS("trigram.rds") - clean_input <- function(x) { - xclean <- removeNumbers(removePunctuation(tolower(x))) - return(strsplit(xclean, " ")[[1]]) - } - query <- reactive({clean_input(input$text)}) - output$prediction <- renderText({ - if(length(query()) == 0) { " " } - else if(length(query()) >= 2 & !(identical(integer(0), which(startsWith(trigram$word, query()))))) { - tail(strsplit(trigram$word[which(startsWith(trigram$word, query()))], " ")[[1]], 1) + query <- strsplit(removeNumbers(removePunctuation(tolower(input$text))), " ")[[1]] + if(length(query) == 0) { " " } + else if(!(identical(integer(0), which(startsWith(trigram$word, paste(tail(query, 2)[1], tail(query, 1))))))) { + tail(strsplit(trigram$word[which(startsWith(trigram$word, paste(tail(query, 2)[1], tail(query, 1))))], " ")[[1]], 1) } - else if(length(query()) == 1 & !(identical(integer(0), which(startsWith(bigram$word, query()))))) { - tail(strsplit(bigram$word[which(startsWith(bigram$word, query()))], " ")[[1]], 1) + else if(!(identical(integer(0), which(startsWith(bigram$word, tail(query, 1)))))) { + tail(strsplit(bigram$word[which(startsWith(bigram$word, tail(query, 1)))], " ")[[1]], 1) } - else{ sample(c("and", "is", "the"), 1) } + else{ sample(c("and", "is", "the", unigram$word[1:50]), 1) } }) output$unigram <- renderPlot({