Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Import csv #63

Open
wants to merge 17 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .idea/temp.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 18 additions & 0 deletions backend/app/services/parse_csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from app.models import Document, Corpus

def parse_csv(csv_reader, title="new_data"):
doc_list = []
for data in csv_reader:
new_doc = None
new_doc = Document.objects.create_document(title=data['title'], text=data['transcript'], author=data['speaker_1'], year=data['recorded_date'][:4])
# The line above creates a new Document instance (You should probably find some way of getting the user to assign columns to keys.For now, assume these keys match whatever dataset we're using.)
new_doc.save()
doc_list.append(new_doc)

new_corpus = Corpus(title=f"{title} Corpus")
new_corpus.save()

new_corpus.documents.set(doc_list)
new_corpus.save()

return new_corpus
16 changes: 15 additions & 1 deletion backend/app/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
}
"""
import json
import csv


from rest_framework.decorators import api_view
from rest_framework.response import Response
Expand All @@ -36,7 +38,7 @@
GenderSerializer,
CorpusSerializer
)

from app.services.parse_csv import parse_csv

@api_view(['GET'])
def get_example(request, example_id):
Expand Down Expand Up @@ -102,6 +104,7 @@ def add_document(request):
"""
API endpoint for adding a piece of document
"""
print("we are in add_document")
Nisha-Nathan marked this conversation as resolved.
Show resolved Hide resolved
attributes = request.data
new_attributes = {}
for attribute in attributes['newAttributes']:
Expand Down Expand Up @@ -129,6 +132,17 @@ def all_documents(request):
serializer = SimpleDocumentSerializer(doc_objs, many=True)
return Response(serializer.data)

@api_view(['POST'])
def upload_document(request):
"""
API endpoint for uploading csv files that are to be converted to an instance of csv_reader
"""
file = request.data["file"]
content = file.read().decode('utf-8').splitlines()
csv_reader = csv.DictReader(content)
new_corpus = parse_csv(csv_reader)
serializer = CorpusSerializer(new_corpus)
return Response(serializer.data)

@api_view(['GET'])
def get_document(request, doc_id):
Expand Down
3 changes: 2 additions & 1 deletion backend/config/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
path('api/update_corpus_docs', views.update_corpus_docs),
path('api/delete_corpus', views.delete_corpus),
path('api/corpus/<int:corpus_id>', views.get_corpus),
path('api/upload_document', views.upload_document),
Nisha-Nathan marked this conversation as resolved.
Show resolved Hide resolved

# View paths
path('', views.index, name='index'),
Expand All @@ -43,5 +44,5 @@
path('documents', views.documents, name='documents'),
path('document/<int:doc_id>', views.single_document, name='document'),
path('corpora', views.corpora, name='corpora'),
path('corpus/<int:corpus_id>', views.corpus, name='corpus')
path('corpus/<int:corpus_id>', views.corpus, name='corpus'),
]