From f3114ec0bd2580de554345eb6ea722fdf39545ca Mon Sep 17 00:00:00 2001 From: Watermelanie <46464201+Watermelanie@users.noreply.github.com> Date: Wed, 21 Jul 2021 17:09:52 -0400 Subject: [PATCH 01/11] created an API endpoint for posting proximity analysis Co-Authored-By: yaroluchko <85574367+yaroluchko@users.noreply.github.com> --- backend/app/models.py | 2 +- backend/app/views.py | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/backend/app/models.py b/backend/app/models.py index 3b7bd911..494e57bb 100644 --- a/backend/app/models.py +++ b/backend/app/models.py @@ -554,4 +554,4 @@ def __eq__(self, other): return list(self.documents.values_list('pk', flat=True)) == list(other.documents.values_list('pk', flat=True)) def __hash__(self): - return super().__hash__() + return super().__hash_ diff --git a/backend/app/views.py b/backend/app/views.py index faf82e45..da4eef87 100644 --- a/backend/app/views.py +++ b/backend/app/views.py @@ -36,6 +36,7 @@ GenderSerializer, CorpusSerializer ) +from .analysis.proximity import run_analysis @api_view(['GET']) @@ -243,6 +244,15 @@ def get_corpus(request, corpus_id): return Response(serializer.data) +@api_view(['POST']) +def add_proximity_analysis(request, corpus_id, word_window): + """ + API endpoint for posting the proximity analysis + """ + results = json.dumps(run_analysis(corpus_id, word_window)) + return results + + def corpora(request): """ Corpora page From 4d8860555f9dd0abe48a9fb1c6ebfa8958c1bd32 Mon Sep 17 00:00:00 2001 From: Joshua Feliciano Date: Wed, 21 Jul 2021 17:45:08 -0400 Subject: [PATCH 02/11] Resolved `__hash__` error --- backend/app/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/app/models.py b/backend/app/models.py index 494e57bb..3b7bd911 100644 --- a/backend/app/models.py +++ b/backend/app/models.py @@ -554,4 +554,4 @@ def __eq__(self, other): return list(self.documents.values_list('pk', flat=True)) == list(other.documents.values_list('pk', flat=True)) def __hash__(self): - return super().__hash_ + return super().__hash__() From 0f8b7aaf19c9de940ff6a8a725024563988799a6 Mon Sep 17 00:00:00 2001 From: yaroluchko Date: Thu, 22 Jul 2021 16:45:54 -0400 Subject: [PATCH 03/11] Proximity Analysis API endpoint This creates the Proximity Analysis API endpoint and a way to see all the analyses --- backend/app/views.py | 34 +++++++++++++++++++++++++++------- backend/config/urls.py | 2 ++ 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/backend/app/views.py b/backend/app/views.py index 72e3c2cb..e171f01b 100644 --- a/backend/app/views.py +++ b/backend/app/views.py @@ -20,7 +20,6 @@ 'component_name': 'ExampleId' } """ -import json from rest_framework.decorators import api_view from rest_framework.response import Response @@ -28,13 +27,15 @@ from .models import ( Document, Gender, - Corpus + Corpus, + ProximityAnalysis ) from .serializers import ( DocumentSerializer, SimpleDocumentSerializer, GenderSerializer, - CorpusSerializer + CorpusSerializer, + ProximityAnalysisSerializer, ) from .analysis.proximity import run_analysis @@ -243,14 +244,34 @@ def get_corpus(request, corpus_id): serializer = CorpusSerializer(corpus_obj) return Response(serializer.data) +@api_view(['GET']) +def all_proximity(request): + prox_objs = ProximityAnalysis.objects.all() + serializer = ProximityAnalysisSerializer(prox_objs, many=True) + return Response(serializer.data) @api_view(['POST']) -def add_proximity_analysis(request, corpus_id, word_window): +def add_proximity_analysis(request): """ API endpoint for posting the proximity analysis """ - results = json.dumps(run_analysis(corpus_id, word_window)) - return results + attributes = request.data + corpus_id = attributes['corpus_id'] + word_window = int(attributes['word_window']) + results = run_analysis(corpus_id, word_window) + proximity_query = ProximityAnalysis.objects.filter(corpus__id=corpus_id, word_window=word_window) + if proximity_query.exists(): + proximity_obj = proximity_query.get() + else: + fields = { + 'corpus': Corpus.objects.get(pk=corpus_id), + 'word_window': word_window, + 'results': results, + } + proximity_obj = ProximityAnalysis.objects.create(**fields) + proximity_obj.genders.add(Gender.objects.all()) + serializer = ProximityAnalysisSerializer(proximity_obj) + return Response(serializer.data) def corpora(request): @@ -284,4 +305,3 @@ def corpus(request, corpus_id): } return render(request, 'index.html', context) - diff --git a/backend/config/urls.py b/backend/config/urls.py index 014a46de..7f2ad401 100644 --- a/backend/config/urls.py +++ b/backend/config/urls.py @@ -35,6 +35,8 @@ path('api/update_corpus_docs', views.update_corpus_docs), path('api/delete_corpus', views.delete_corpus), path('api/corpus/', views.get_corpus), + path('api/proximity', views.add_proximity_analysis), + path('api/all_proximity', views.all_proximity), # View paths path('', views.index, name='index'), From 4da0311728e75a2b87e60fbb23c0d779c40ff26a Mon Sep 17 00:00:00 2001 From: yaroluchko Date: Thu, 22 Jul 2021 18:21:06 -0400 Subject: [PATCH 04/11] A fix to make the key a string instead of Gender --- backend/app/analysis/proximity.py | 5 ++--- backend/app/views.py | 8 +++++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/backend/app/analysis/proximity.py b/backend/app/analysis/proximity.py index 5a7c2699..8a09a235 100644 --- a/backend/app/analysis/proximity.py +++ b/backend/app/analysis/proximity.py @@ -32,7 +32,6 @@ def run_analysis(corpus_id, word_window): genders, word_window ) - breakpoint() return results @@ -55,12 +54,12 @@ def generate_gender_token_counters(pos_tags, genders, word_window): results = {} for gender in genders: - results[gender] = dict() + results[str(gender)] = dict() for PRONOUN_TYPE in PronounSeries.PRONOUN_TYPES: pronoun_set = gender.pronoun_series.values_list(PRONOUN_TYPE, flat=True) doc_result = generate_token_counter(pos_tags, pronoun_set, word_window) - results[gender][PRONOUN_TYPE] = doc_result + results[str(gender)][PRONOUN_TYPE] = doc_result return results diff --git a/backend/app/views.py b/backend/app/views.py index e171f01b..d9f67bae 100644 --- a/backend/app/views.py +++ b/backend/app/views.py @@ -39,7 +39,6 @@ ) from .analysis.proximity import run_analysis - @api_view(['GET']) def get_example(request, example_id): """ @@ -244,19 +243,21 @@ def get_corpus(request, corpus_id): serializer = CorpusSerializer(corpus_obj) return Response(serializer.data) + @api_view(['GET']) def all_proximity(request): prox_objs = ProximityAnalysis.objects.all() serializer = ProximityAnalysisSerializer(prox_objs, many=True) return Response(serializer.data) + @api_view(['POST']) def add_proximity_analysis(request): """ API endpoint for posting the proximity analysis """ attributes = request.data - corpus_id = attributes['corpus_id'] + corpus_id = int(attributes['corpus_id']) word_window = int(attributes['word_window']) results = run_analysis(corpus_id, word_window) proximity_query = ProximityAnalysis.objects.filter(corpus__id=corpus_id, word_window=word_window) @@ -269,7 +270,8 @@ def add_proximity_analysis(request): 'results': results, } proximity_obj = ProximityAnalysis.objects.create(**fields) - proximity_obj.genders.add(Gender.objects.all()) + gender_ids = list(Gender.objects.values_list('pk', flat=True)) + proximity_obj.genders.add(*gender_ids) serializer = ProximityAnalysisSerializer(proximity_obj) return Response(serializer.data) From e4ee3587a91cc34e5d3ce0e905c3df51d3f826c5 Mon Sep 17 00:00:00 2001 From: yaroluchko Date: Thu, 22 Jul 2021 19:01:17 -0400 Subject: [PATCH 05/11] Create proximity_test.py A manual test for proximity posting --- backend/app/analysis/proximity_test.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 backend/app/analysis/proximity_test.py diff --git a/backend/app/analysis/proximity_test.py b/backend/app/analysis/proximity_test.py new file mode 100644 index 00000000..dd0585ba --- /dev/null +++ b/backend/app/analysis/proximity_test.py @@ -0,0 +1,18 @@ +from ..models import Document, Corpus +from rest_framework.test import APIRequestFactory +from app.views import add_proximity_analysis + + +def proximity_view_test(text): + """A function for testing the proximity analysis posting, + check api/all_proximity to see the updates list + """ + c1 = Corpus(title="Corpus Test", description="This is the testing corpus") + c1.save() + Document.objects.create_document(title='document_1', year=2021, text=text) + d1 = Document.objects.get(title='document_1') + c1.documents.add(d1) + factory = APIRequestFactory() + request = factory.post('api/all_proximity', {'word_window': '2', 'corpus_id': c1.id}) + add_proximity_analysis(request) + From 191daa6a166a13cd77c07e62752de4a3d9d7542b Mon Sep 17 00:00:00 2001 From: yaroluchko Date: Thu, 22 Jul 2021 19:12:55 -0400 Subject: [PATCH 06/11] Update views.py Fix unncessary result calculation if already done before --- backend/app/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/app/views.py b/backend/app/views.py index d9f67bae..56a9a76c 100644 --- a/backend/app/views.py +++ b/backend/app/views.py @@ -259,11 +259,11 @@ def add_proximity_analysis(request): attributes = request.data corpus_id = int(attributes['corpus_id']) word_window = int(attributes['word_window']) - results = run_analysis(corpus_id, word_window) proximity_query = ProximityAnalysis.objects.filter(corpus__id=corpus_id, word_window=word_window) if proximity_query.exists(): proximity_obj = proximity_query.get() else: + results = run_analysis(corpus_id, word_window) fields = { 'corpus': Corpus.objects.get(pk=corpus_id), 'word_window': word_window, From 30ef3957c6e056fa1b69666f9e8d303eb60527e6 Mon Sep 17 00:00:00 2001 From: yaroluchko Date: Thu, 22 Jul 2021 19:32:21 -0400 Subject: [PATCH 07/11] Update views.py --- backend/app/views.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/backend/app/views.py b/backend/app/views.py index 56a9a76c..0c76b1ff 100644 --- a/backend/app/views.py +++ b/backend/app/views.py @@ -259,9 +259,12 @@ def add_proximity_analysis(request): attributes = request.data corpus_id = int(attributes['corpus_id']) word_window = int(attributes['word_window']) + proximity_query = ProximityAnalysis.objects.filter(corpus__id=corpus_id, word_window=word_window) + if proximity_query.exists(): proximity_obj = proximity_query.get() + else: results = run_analysis(corpus_id, word_window) fields = { @@ -269,9 +272,11 @@ def add_proximity_analysis(request): 'word_window': word_window, 'results': results, } + proximity_obj = ProximityAnalysis.objects.create(**fields) gender_ids = list(Gender.objects.values_list('pk', flat=True)) proximity_obj.genders.add(*gender_ids) + serializer = ProximityAnalysisSerializer(proximity_obj) return Response(serializer.data) From e2c54487fb7441e7742257725a2401fa51f70591 Mon Sep 17 00:00:00 2001 From: yaroluchko Date: Fri, 23 Jul 2021 15:51:47 -0400 Subject: [PATCH 08/11] Test Case for proximity analysis object creation --- backend/app/tests.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/backend/app/tests.py b/backend/app/tests.py index 631ecec1..3e745f08 100644 --- a/backend/app/tests.py +++ b/backend/app/tests.py @@ -5,12 +5,15 @@ from django.test import TestCase from django.core.exceptions import ObjectDoesNotExist +from rest_framework.test import APITestCase +from rest_framework import status from .models import ( PronounSeries, Document, Corpus, Gender, + ProximityAnalysis ) from .analysis import proximity @@ -445,3 +448,16 @@ def test_proximity(self): } self.assertEqual(results, expected) + + +class ProximityObjectCreation(APITestCase, ProximityTestCase): + def test_create_proximity(self): + """ + Ensure we can create a proximity analysis object + """ + corpus = Corpus.objects.get(title='Test Corpus') + url = '/api/proximity' + data = {"word_window": "2", "corpus_id": "1"} + response = self.client.post(url, data) + self.assertEqual(response.status_code, 200) + self.assertEqual(ProximityAnalysis.objects.count(), 1) From 2130b241828f8bbbf6ff57c1a4d366200b6c4ab9 Mon Sep 17 00:00:00 2001 From: yaroluchko Date: Fri, 23 Jul 2021 15:55:15 -0400 Subject: [PATCH 09/11] Delete proximity_test.py --- backend/app/analysis/proximity_test.py | 18 ------------------ 1 file changed, 18 deletions(-) delete mode 100644 backend/app/analysis/proximity_test.py diff --git a/backend/app/analysis/proximity_test.py b/backend/app/analysis/proximity_test.py deleted file mode 100644 index dd0585ba..00000000 --- a/backend/app/analysis/proximity_test.py +++ /dev/null @@ -1,18 +0,0 @@ -from ..models import Document, Corpus -from rest_framework.test import APIRequestFactory -from app.views import add_proximity_analysis - - -def proximity_view_test(text): - """A function for testing the proximity analysis posting, - check api/all_proximity to see the updates list - """ - c1 = Corpus(title="Corpus Test", description="This is the testing corpus") - c1.save() - Document.objects.create_document(title='document_1', year=2021, text=text) - d1 = Document.objects.get(title='document_1') - c1.documents.add(d1) - factory = APIRequestFactory() - request = factory.post('api/all_proximity', {'word_window': '2', 'corpus_id': c1.id}) - add_proximity_analysis(request) - From 41382a6f40383f381aea700fad866497051367b0 Mon Sep 17 00:00:00 2001 From: Joshua Feliciano Date: Fri, 23 Jul 2021 15:59:57 -0400 Subject: [PATCH 10/11] Altered import statement `frequency.py` also has a `run_analysis` function; this change will prevent aliasing errors should the other team import the `frequency` counterpart into `views.py`. --- backend/app/views.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/backend/app/views.py b/backend/app/views.py index 0c76b1ff..f9cdb653 100644 --- a/backend/app/views.py +++ b/backend/app/views.py @@ -37,7 +37,9 @@ CorpusSerializer, ProximityAnalysisSerializer, ) -from .analysis.proximity import run_analysis +from .analysis import ( + proximity +) @api_view(['GET']) def get_example(request, example_id): @@ -266,7 +268,7 @@ def add_proximity_analysis(request): proximity_obj = proximity_query.get() else: - results = run_analysis(corpus_id, word_window) + results = proximity.run_analysis(corpus_id, word_window) fields = { 'corpus': Corpus.objects.get(pk=corpus_id), 'word_window': word_window, From bfa00608594774696b11c1b8d0eff7a8a3a1cb8d Mon Sep 17 00:00:00 2001 From: Joshua Feliciano Date: Fri, 23 Jul 2021 16:01:15 -0400 Subject: [PATCH 11/11] Squashed commit of the following: commit 0fdfcc6ada81e5f5b1e142827bc50d30e2148b0a Merge: 1276da0 b10cfa9 Author: Peihua Huang <32581282+phuang00@users.noreply.github.com> Date: Fri Jul 23 15:48:02 2021 -0400 Merge pull request #25 from dhmit/transfer_frequency_module_PR1 Transfer frequency module pr1 commit 1276da064e4fd5dcc9da858aa78d852524c54b6d Merge: 657b20f b5a9482 Author: YifanWang0 <47550739+YifanWang0@users.noreply.github.com> Date: Fri Jul 23 15:38:12 2021 -0400 Merge pull request #54 from dhmit/transfer_frequency_model Added FrequencyAnalysis model commit b5a948238525814c9086af246e3197f1b2f0016d Author: Yifan Wang Date: Fri Jul 23 14:08:54 2021 -0400 added FrequencyAnalysis model commit b10cfa95e08ad4132ec3a3391649d85e8fa5c234 Author: Peihua Huang Date: Fri Jul 23 13:41:17 2021 -0400 update run_analysis to use primary keys instead of labels and update result dictionary to key by gender object commit 1196fc14726aeb228902877e2ea55311654e1321 Author: Peihua Huang Date: Fri Jul 23 13:36:54 2021 -0400 Revert "update frequency result to key by gender primary key instead of label" This reverts commit 151a4bc066c66225e6f6c6b8cd8fcf9d673df7b6. commit 151a4bc066c66225e6f6c6b8cd8fcf9d673df7b6 Author: Peihua Huang Date: Fri Jul 23 13:34:11 2021 -0400 update frequency result to key by gender primary key instead of label commit 657b20f92d417acfbc0cc633c21ce596da7208ea Merge: 6619e68 3c6e5bf Author: Joshua Feliciano <41080599+joshfeli@users.noreply.github.com> Date: Fri Jul 23 11:42:15 2021 -0400 Merge pull request #52 from dhmit/remove_proximity_breakpoint Removed debugging breakpoint commit 3c6e5bf9170190342a18605022fff1b8e8a38b91 Author: Joshua Feliciano Date: Thu Jul 22 17:50:15 2021 -0400 Removed debugging breakpoint commit 0684a02abd7ebee39e149b70671abefee63fa9c1 Author: Peihua Huang Date: Thu Jul 22 16:43:50 2021 -0400 got frequency analysis working with a corpus commit 2003ce64ffec878238c4f5e3d1c04c57a1135f5e Author: Peihua Huang Date: Thu Jul 22 16:28:27 2021 -0400 convert run_analysis to run_single_analysis, such that the function only takes in one document and update docstrings and added test commit 0f817ba5ae500c05b4ecc0ce5ac7460c8e75271c Merge: dc21391 6619e68 Author: Peihua Huang Date: Thu Jul 22 14:54:15 2021 -0400 Merge branch 'main' into transfer_frequency_module_PR1 commit dc213913a32b5920863fce5fb5286ee1f2a071ca Author: Yifan Wang Date: Wed Jul 7 16:44:55 2021 -0400 fixed small mistakes according to pr comments commit a852cd3db1a370eac384b04969cf3a27eaf42580 Author: Yifan Wang Date: Mon Jun 28 16:42:28 2021 -0400 added docstrings commit 32731f4ab3759730ded193bb115ea92e7f3baab5 Author: Yifan Wang Date: Mon Jun 28 16:01:57 2021 -0400 fixed minor bug with _run_analysis function commit 233e2e6602e99bc212fffab6442a72d60a83e8db Author: Yifan Wang Date: Fri Jun 25 16:52:31 2021 -0400 moved over run analysis but still need to fix bug with helper function --- backend/app/admin.py | 1 + backend/app/analysis/frequency.py | 87 +++++++++++++++++++ .../app/migrations/0011_frequencyanalysis.py | 26 ++++++ backend/app/models.py | 12 +++ backend/app/serializers.py | 16 +++- backend/app/tests.py | 74 +++++++++++++++- 6 files changed, 214 insertions(+), 2 deletions(-) create mode 100644 backend/app/migrations/0011_frequencyanalysis.py diff --git a/backend/app/admin.py b/backend/app/admin.py index a6e5c4d9..845d2188 100644 --- a/backend/app/admin.py +++ b/backend/app/admin.py @@ -11,6 +11,7 @@ models.Gender, models.Corpus, models.ProximityAnalysis, + models.FrequencyAnalysis, ] for model in models_to_register: diff --git a/backend/app/analysis/frequency.py b/backend/app/analysis/frequency.py index e69de29b..a935c6f5 100644 --- a/backend/app/analysis/frequency.py +++ b/backend/app/analysis/frequency.py @@ -0,0 +1,87 @@ +from collections import Counter +from ..models import ( + Corpus, + Document, + Gender +) + + +def _get_gender_word_frequencies_relative(gender_word_counts): + """ + A private helper function that examines identifier counts keyed to Gender instances, + determines the total count value of all identifiers across Gender instances, + and returns the percentage of each identifier count over the total count. + + :param gender_word_counts: a dictionary keying gender instances to string identifiers keyed to + integer counts. + :return: a dictionary with the integer counts transformed into float values representing + the identifier count as a percentage of the total identifier counts across all + identifiers. + """ + + output = {} + total_word_count = 0 + for gender in gender_word_counts: + for word in gender_word_counts[gender]: + total_word_count += gender_word_counts[gender][word] + + for gender in gender_word_counts: + output[gender] = {} + for word, original_count in gender_word_counts[gender].items(): + try: + frequency = original_count / total_word_count + except ZeroDivisionError: + frequency = 0 + output[gender][word] = frequency + + return output + + +def run_single_analysis(doc_obj, genders): + """ + This method generates a dictionary that includes a Counter (count) that keys + Document instances to Gender instances to Counter instances representing the total + number of instances of each Gender's pronouns in a given Document, a dictionary (frequency) + keying Document instances to Gender instances to dictionaries of the shape {str:float} + representing the total number of instances of each Gender's pronouns over the total word count + of that Document; and a dictionary (relative) keying Document instances to Gender instances + to dictionaries of the shape {str:float} representing the relative percentage of Gender + pronouns across all Gender instances in a given Document instance. + + :param doc_obj: an instance of the Document model + :param genders: a list of Gender objects + :return: a dictionary containing the frequency analyses of the Document instance + """ + count = Counter() + frequency = {} + + for gender in genders: + count[gender] = doc_obj.get_count_of_words(gender.pronouns) + frequency[gender] = doc_obj.get_word_freqs(gender.pronouns) + relative = _get_gender_word_frequencies_relative(count) + + output = { + 'count': count, + 'frequency': frequency, + 'relative': relative + } + + return output + + +def run_analysis(corpus_id, gender_ids): + """ + This method generates a dictionary of dictionaries for each Document instance in the Corpus. + Each dictionary maps the type of frequency analysis (count, frequency, relative) to the + analysis itself. + + :param corpus_id: the ID of a Corpus instance + :param gender_ids: a list of integers representing Gender primary keys + :return: a dictionary mapping the Document IDs to the frequency analyses of the Document instance + """ + results = {} + genders = Gender.objects.filter(id__in=gender_ids) + doc_ids = Corpus.objects.filter(pk=corpus_id).values_list('documents__pk', flat=True) + for pk in doc_ids: + results[pk] = run_single_analysis(Document.objects.get(id=pk), genders) + return results diff --git a/backend/app/migrations/0011_frequencyanalysis.py b/backend/app/migrations/0011_frequencyanalysis.py new file mode 100644 index 00000000..cf05bdc1 --- /dev/null +++ b/backend/app/migrations/0011_frequencyanalysis.py @@ -0,0 +1,26 @@ +# Generated by Django 3.1.5 on 2021-07-23 18:08 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('app', '0010_merge_proximity_db_seed'), + ] + + operations = [ + migrations.CreateModel( + name='FrequencyAnalysis', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('results', models.JSONField()), + ('corpus', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='frequency_analyses', to='app.corpus')), + ('genders', models.ManyToManyField(related_name='frequency_analyses', to='app.Gender')), + ], + options={ + 'verbose_name_plural': 'Frequency Analyses', + }, + ), + ] diff --git a/backend/app/models.py b/backend/app/models.py index cd31836c..1cbe7f33 100644 --- a/backend/app/models.py +++ b/backend/app/models.py @@ -569,3 +569,15 @@ class ProximityAnalysis(models.Model): class Meta: verbose_name_plural = 'proximity analyses' + +class FrequencyAnalysis(models.Model): + """ + This model will persist the results from the frequency analysis functions. + """ + + corpus = models.ForeignKey(Corpus, related_name='frequency_analyses', on_delete=models.CASCADE) + genders = models.ManyToManyField(Gender, related_name='frequency_analyses') + results = models.JSONField() + + class Meta: + verbose_name_plural = 'Frequency Analyses' diff --git a/backend/app/serializers.py b/backend/app/serializers.py index 57951be6..0038d0db 100644 --- a/backend/app/serializers.py +++ b/backend/app/serializers.py @@ -10,7 +10,8 @@ Gender, Document, Corpus, - ProximityAnalysis + ProximityAnalysis, + FrequencyAnalysis ) @@ -76,3 +77,16 @@ class ProximityAnalysisSerializer(serializers.ModelSerializer): class Meta: model = ProximityAnalysis fields = ['id', 'corpus', 'genders', 'word_window', 'results'] + + +class FrequencyAnalysisSerializer(serializers.ModelSerializer): + """ + Serializes a FrequencyAnalysis object + """ + + corpus = serializers.StringRelatedField() + genders = serializers.PrimaryKeyRelatedField(read_only=True, many=True) + + class Meta: + model = FrequencyAnalysis + fields = ['id', 'corpus', 'genders', 'results'] diff --git a/backend/app/tests.py b/backend/app/tests.py index 3e745f08..6f6a3bb8 100644 --- a/backend/app/tests.py +++ b/backend/app/tests.py @@ -15,7 +15,10 @@ Gender, ProximityAnalysis ) -from .analysis import proximity +from .analysis import ( + proximity, + frequency +) class PronounSeriesTestCase(TestCase): @@ -200,6 +203,75 @@ def test_update_metadata(self): self.assertEqual(doc.word_count, 9) +class FrequencyTestCase(TestCase): + """ + Test cases for the frequency analysis + """ + def setUp(self): + text1 = """She took a lighter out of her purse and handed it over to him. + He lit his cigarette and took a deep drag from it, and then began + his speech which ended in a proposal. Her tears drowned the ring.""" + Document.objects.create_document(title='doc1', year=2021, text=text1) + Corpus.objects.create(title='corpus1') + Corpus.objects.get(title='corpus1').documents.add(Document.objects.get(title='doc1')) + + def test_single_frequency(self): + doc1 = Document.objects.get(title='doc1') + male = Gender.objects.get(pk=1, label='Male') + female = Gender.objects.get(pk=2, label='Female') + nonbinary = Gender.objects.get(pk=3, label='Nonbinary') + result = frequency.run_single_analysis(doc1, [male, female, nonbinary]) + expected = { + 'count': Counter({ + male: Counter({'his': 2, 'him': 1, 'he': 1, 'himself': 0}), + female: Counter({'her': 2, 'she': 1, 'herself': 0, 'hers': 0}), + nonbinary: Counter({'theirs': 0, 'themself': 0, 'them': 0, 'their': 0, 'they': 0})}), + 'frequency': { + male: {'his': 0.05, 'him': 0.025, 'he': 0.025, 'himself': 0.0}, + female: {'herself': 0.0, 'she': 0.025, 'her': 0.05, 'hers': 0.0}, + nonbinary: {'theirs': 0.0, 'themself': 0.0, 'them': 0.0, 'their': 0.0, 'they': 0.0}}, + 'relative': { + male: { + 'his': 0.2857142857142857, + 'him': 0.14285714285714285, + 'he': 0.14285714285714285, + 'himself': 0.0}, + female: { + 'herself': 0.0, + 'she': 0.14285714285714285, + 'her': 0.2857142857142857, 'hers': 0.0}, + nonbinary: {'theirs': 0.0, 'themself': 0.0, 'them': 0.0, 'their': 0.0, 'they': 0.0}}} + self.assertEqual(result, expected) + + + def test_run_analysis(self): + result = frequency.run_analysis(1, [1, 2, 3]) + male = Gender.objects.get(pk=1, label='Male') + female = Gender.objects.get(pk=2, label='Female') + nonbinary = Gender.objects.get(pk=3, label='Nonbinary') + expected = { + 1: {'count': Counter({ + male: Counter({'his': 2, 'him': 1, 'he': 1, 'himself': 0}), + female: Counter({'her': 2, 'she': 1, 'herself': 0, 'hers': 0}), + nonbinary: Counter({'theirs': 0, 'themself': 0, 'them': 0, 'their': 0, 'they': 0})}), + 'frequency': { + male: {'his': 0.05, 'him': 0.025, 'he': 0.025, 'himself': 0.0}, + female: {'herself': 0.0, 'she': 0.025, 'her': 0.05, 'hers': 0.0}, + nonbinary: {'theirs': 0.0, 'themself': 0.0, 'them': 0.0, 'their': 0.0, 'they': 0.0}}, + 'relative': { + male: { + 'his': 0.2857142857142857, + 'him': 0.14285714285714285, + 'he': 0.14285714285714285, + 'himself': 0.0}, + female: { + 'herself': 0.0, + 'she': 0.14285714285714285, + 'her': 0.2857142857142857, 'hers': 0.0}, + nonbinary: {'theirs': 0.0, 'themself': 0.0, 'them': 0.0, 'their': 0.0, 'they': 0.0}}}} + self.assertEqual(result, expected) + + class CorpusTestCase(TestCase): """ Test Cases for the Corpus Model