Skip to content

Commit

Permalink
Added deploy for streamlit
Browse files Browse the repository at this point in the history
Signed-off-by: Stefano Savare <[email protected]>
  • Loading branch information
deatinor committed Jun 14, 2021
1 parent 5cdba5c commit fc8a471
Show file tree
Hide file tree
Showing 7 changed files with 559 additions and 15 deletions.
3 changes: 2 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@ FROM python:3.9.5
COPY scripts/testing/ app/
COPY Makefile app/
COPY streamlit-requirements.txt app/
COPY data/streamlit app/data/streamlit
WORKDIR app/
RUN apt-get update && apt-get install make && apt-get install gcc && apt-get install g++
RUN pip install --upgrade pip setuptools wheel
RUN make streamlit
EXPOSE 8501
ENTRYPOINT ["streamlit", "run"]
CMD ["sector_pred_with_st.py"]
CMD ["subpillar_pred_with_st.py"]

#Streamlit parameters
ENV LC_ALL=C.UTF-8
Expand Down
4 changes: 1 addition & 3 deletions Dockerrun.aws.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,7 @@
"AWSEBDockerrunVersion": "1",
"Image": {
"Name": "961104659532.dkr.ecr.us-east-1.amazonaws.com/streamlit:latest",
"Update": "true",
"Memory": 16,
"Cpu": 4
"Update": "true"
},
"Ports": [
{
Expand Down
11 changes: 11 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,14 @@ cloud-install:
streamlit:
pip install -r streamlit-requirements.txt
pip install git+https://github.com/casics/nostril.git

streamlit-build:
docker build . -t deatinor/streamlit --no-cache --platform=linux/amd64

streamlit-build-arm:
docker build . -t deatinor/streamlit-m1 --no-cache

streamlit-deploy:
aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin 961104659532.dkr.ecr.us-east-1.amazonaws.com
docker tag deatinor/streamlit 961104659532.dkr.ecr.us-east-1.amazonaws.com/streamlit
docker push 961104659532.dkr.ecr.us-east-1.amazonaws.com/streamlit
448 changes: 442 additions & 6 deletions notebooks/models/stefano/ssa-0.5-merge-pillars-subpillars.ipynb

Large diffs are not rendered by default.

9 changes: 5 additions & 4 deletions notebooks/models/stefano/ssa-7.1-lm-bff-remote.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -403,7 +403,7 @@
{
"data": {
"text/plain": [
"S3Path('s3://sagemaker-deep-experiments-dev/training/input_data/pytorch-2021-06-09-14-03-18-647-entailment-masked-en')"
"S3Path('s3://sagemaker-deep-experiments-dev/training/input_data/pytorch-2021-06-09-17-46-37-168-entailment-masked-en')"
]
},
"execution_count": 16,
Expand All @@ -417,7 +417,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 18,
"id": "6cf7b118",
"metadata": {
"ExecuteTime": {
Expand Down Expand Up @@ -446,6 +446,7 @@
" code_location=str(input_path),\n",
" instance_type='ml.p3.2xlarge',\n",
" instance_count=1,\n",
" volume_size=40,\n",
" role=role,\n",
" framework_version='1.8',\n",
" py_version='py36',\n",
Expand All @@ -458,7 +459,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 19,
"id": "d78b1234",
"metadata": {
"ExecuteTime": {
Expand All @@ -476,7 +477,7 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 20,
"id": "5d38873b",
"metadata": {
"ExecuteTime": {
Expand Down
95 changes: 95 additions & 0 deletions scripts/testing/subpillar_pred_with_st.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
from io import StringIO
from pdfminer.layout import LAParams
from pdfminer.converter import TextConverter
from pdfminer.pdfpage import PDFPage
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
import re
from nostril import nonsense
from urllib.parse import urlparse
import streamlit as st
import pandas as pd

import nltk
from nltk.tokenize import sent_tokenize

nltk.download("punkt")

MIN_NUM_TOKENS = 5
MIN_WORD_LEN = 4


# TODO: @Stefano please use prediction API
def predict_sector(sentence):
return "Cross"


def preprocess_sentence(sentence):
sentence = re.sub(r"\s+", " ", sentence)
tokens = sentence.split(" ")
if len(tokens) < MIN_NUM_TOKENS:
return ""
# remoe url
tokens = [token for token in tokens if not urlparse(token).scheme]
sensible_token_count = 0
for token in tokens:
if len(token) > MIN_WORD_LEN or (len(token) > 7 and not nonsense(token)):
sensible_token_count += 1
if sensible_token_count < MIN_NUM_TOKENS:
return ""
sentence = " ".join(tokens)
keep = re.escape("/\\$.:,;-_()[]{}!'\"% ")
sentence = re.sub(r"[^\w" + keep + "]", "", sentence)
return sentence


def page_to_sentences(page):
sentences = sent_tokenize(page)
sentences = [preprocess_sentence(sentence) for sentence in sentences]
return sentences


def pdf_parser(fp):
with st.spinner("Converting PDF to text.."):
rsrcmgr = PDFResourceManager()
retstr = StringIO()
laparams = LAParams()
device = TextConverter(rsrcmgr, retstr, laparams=laparams)
# Create a PDF interpreter object.
interpreter = PDFPageInterpreter(rsrcmgr, device)
# Process each page contained in the document.
sentences = []
for page in PDFPage.get_pages(fp):
interpreter.process_page(page)
parsed_page = retstr.getvalue()
sentences.extend(page_to_sentences(parsed_page))

seen = set()
seen_add = seen.add
sentences = [s for s in sentences if not (s in seen or seen_add(s))]
return sentences


DATA_PATH = "data/streamlit/fastai-5ep-english.pickle"
data = pd.read_pickle(DATA_PATH).sample(20)
index, sentences, preds, targets = data.index, data.excerpt, data.Predictions, data.Targets

st.set_page_config(layout="wide")
col0, col1, col2, col3, col4, col5, col6 = st.beta_columns([2, 1, 10, 1, 7, 1, 7])
col0.write("Index")
col1.text(" ")
col2.write("Excerpt")
col3.text(" ")
col4.text("Prediction")
col5.text(" ")
col6.text("Target")
st.markdown("""---""")
for ind, sentence, pred, target in zip(index, sentences, preds, targets):
col0, col1, col2, col3, col4, col5, col6 = st.beta_columns([2, 1, 10, 1, 7, 1, 7])
col0.write(ind)
col1.text(" ")
col2.write(sentence)
col3.text(" ")
col4.text("\n".join(pred))
col5.text(" ")
col6.text("\n".join(target))
st.markdown("""---""")
4 changes: 3 additions & 1 deletion streamlit-requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
streamlit==0.82.0
pdfminer==20191125
nltk==3.6.2
nltk==3.6.2
pandas==1.2.4
watchdog==2.1.2

0 comments on commit fc8a471

Please sign in to comment.