Skip to content

Commit

Permalink
Merge pull request #25 from dhmit/transfer_frequency_module_PR1
Browse files Browse the repository at this point in the history
Transfer frequency module pr1
  • Loading branch information
phuang00 authored Jul 23, 2021
2 parents 1276da0 + b10cfa9 commit 0fdfcc6
Show file tree
Hide file tree
Showing 2 changed files with 160 additions and 1 deletion.
87 changes: 87 additions & 0 deletions backend/app/analysis/frequency.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
from collections import Counter
from ..models import (
Corpus,
Document,
Gender
)


def _get_gender_word_frequencies_relative(gender_word_counts):
"""
A private helper function that examines identifier counts keyed to Gender instances,
determines the total count value of all identifiers across Gender instances,
and returns the percentage of each identifier count over the total count.
:param gender_word_counts: a dictionary keying gender instances to string identifiers keyed to
integer counts.
:return: a dictionary with the integer counts transformed into float values representing
the identifier count as a percentage of the total identifier counts across all
identifiers.
"""

output = {}
total_word_count = 0
for gender in gender_word_counts:
for word in gender_word_counts[gender]:
total_word_count += gender_word_counts[gender][word]

for gender in gender_word_counts:
output[gender] = {}
for word, original_count in gender_word_counts[gender].items():
try:
frequency = original_count / total_word_count
except ZeroDivisionError:
frequency = 0
output[gender][word] = frequency

return output


def run_single_analysis(doc_obj, genders):
"""
This method generates a dictionary that includes a Counter (count) that keys
Document instances to Gender instances to Counter instances representing the total
number of instances of each Gender's pronouns in a given Document, a dictionary (frequency)
keying Document instances to Gender instances to dictionaries of the shape {str:float}
representing the total number of instances of each Gender's pronouns over the total word count
of that Document; and a dictionary (relative) keying Document instances to Gender instances
to dictionaries of the shape {str:float} representing the relative percentage of Gender
pronouns across all Gender instances in a given Document instance.
:param doc_obj: an instance of the Document model
:param genders: a list of Gender objects
:return: a dictionary containing the frequency analyses of the Document instance
"""
count = Counter()
frequency = {}

for gender in genders:
count[gender] = doc_obj.get_count_of_words(gender.pronouns)
frequency[gender] = doc_obj.get_word_freqs(gender.pronouns)
relative = _get_gender_word_frequencies_relative(count)

output = {
'count': count,
'frequency': frequency,
'relative': relative
}

return output


def run_analysis(corpus_id, gender_ids):
"""
This method generates a dictionary of dictionaries for each Document instance in the Corpus.
Each dictionary maps the type of frequency analysis (count, frequency, relative) to the
analysis itself.
:param corpus_id: the ID of a Corpus instance
:param gender_ids: a list of integers representing Gender primary keys
:return: a dictionary mapping the Document IDs to the frequency analyses of the Document instance
"""
results = {}
genders = Gender.objects.filter(id__in=gender_ids)
doc_ids = Corpus.objects.filter(pk=corpus_id).values_list('documents__pk', flat=True)
for pk in doc_ids:
results[pk] = run_single_analysis(Document.objects.get(id=pk), genders)
return results
74 changes: 73 additions & 1 deletion backend/app/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@
Corpus,
Gender,
)
from .analysis import proximity
from .analysis import (
proximity,
frequency
)


class PronounSeriesTestCase(TestCase):
Expand Down Expand Up @@ -197,6 +200,75 @@ def test_update_metadata(self):
self.assertEqual(doc.word_count, 9)


class FrequencyTestCase(TestCase):
"""
Test cases for the frequency analysis
"""
def setUp(self):
text1 = """She took a lighter out of her purse and handed it over to him.
He lit his cigarette and took a deep drag from it, and then began
his speech which ended in a proposal. Her tears drowned the ring."""
Document.objects.create_document(title='doc1', year=2021, text=text1)
Corpus.objects.create(title='corpus1')
Corpus.objects.get(title='corpus1').documents.add(Document.objects.get(title='doc1'))

def test_single_frequency(self):
doc1 = Document.objects.get(title='doc1')
male = Gender.objects.get(pk=1, label='Male')
female = Gender.objects.get(pk=2, label='Female')
nonbinary = Gender.objects.get(pk=3, label='Nonbinary')
result = frequency.run_single_analysis(doc1, [male, female, nonbinary])
expected = {
'count': Counter({
male: Counter({'his': 2, 'him': 1, 'he': 1, 'himself': 0}),
female: Counter({'her': 2, 'she': 1, 'herself': 0, 'hers': 0}),
nonbinary: Counter({'theirs': 0, 'themself': 0, 'them': 0, 'their': 0, 'they': 0})}),
'frequency': {
male: {'his': 0.05, 'him': 0.025, 'he': 0.025, 'himself': 0.0},
female: {'herself': 0.0, 'she': 0.025, 'her': 0.05, 'hers': 0.0},
nonbinary: {'theirs': 0.0, 'themself': 0.0, 'them': 0.0, 'their': 0.0, 'they': 0.0}},
'relative': {
male: {
'his': 0.2857142857142857,
'him': 0.14285714285714285,
'he': 0.14285714285714285,
'himself': 0.0},
female: {
'herself': 0.0,
'she': 0.14285714285714285,
'her': 0.2857142857142857, 'hers': 0.0},
nonbinary: {'theirs': 0.0, 'themself': 0.0, 'them': 0.0, 'their': 0.0, 'they': 0.0}}}
self.assertEqual(result, expected)


def test_run_analysis(self):
result = frequency.run_analysis(1, [1, 2, 3])
male = Gender.objects.get(pk=1, label='Male')
female = Gender.objects.get(pk=2, label='Female')
nonbinary = Gender.objects.get(pk=3, label='Nonbinary')
expected = {
1: {'count': Counter({
male: Counter({'his': 2, 'him': 1, 'he': 1, 'himself': 0}),
female: Counter({'her': 2, 'she': 1, 'herself': 0, 'hers': 0}),
nonbinary: Counter({'theirs': 0, 'themself': 0, 'them': 0, 'their': 0, 'they': 0})}),
'frequency': {
male: {'his': 0.05, 'him': 0.025, 'he': 0.025, 'himself': 0.0},
female: {'herself': 0.0, 'she': 0.025, 'her': 0.05, 'hers': 0.0},
nonbinary: {'theirs': 0.0, 'themself': 0.0, 'them': 0.0, 'their': 0.0, 'they': 0.0}},
'relative': {
male: {
'his': 0.2857142857142857,
'him': 0.14285714285714285,
'he': 0.14285714285714285,
'himself': 0.0},
female: {
'herself': 0.0,
'she': 0.14285714285714285,
'her': 0.2857142857142857, 'hers': 0.0},
nonbinary: {'theirs': 0.0, 'themself': 0.0, 'them': 0.0, 'their': 0.0, 'they': 0.0}}}}
self.assertEqual(result, expected)


class CorpusTestCase(TestCase):
"""
Test Cases for the Corpus Model
Expand Down

0 comments on commit 0fdfcc6

Please sign in to comment.