Skip to content

Commit

Permalink
Write test cases for distinctiveness.py
Browse files Browse the repository at this point in the history
  • Loading branch information
kmorhun committed Nov 19, 2021
1 parent 5d65f6e commit 7074199
Show file tree
Hide file tree
Showing 3 changed files with 126 additions and 7 deletions.
4 changes: 2 additions & 2 deletions backend/app/analysis/distinctiveness.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def dunning_total(corpus_1, corpus_2):

for word in counter_1:
counter1_wordcount = counter_1[word]
if word in counter_2:
try:
counter2_wordcount = counter_2[word]

dunning_word = dunn_individual_word(total_word_count_1,
Expand All @@ -106,7 +106,7 @@ def dunning_total(corpus_1, corpus_2):
'count_corp1': counter1_wordcount,
'count_corp2': counter2_wordcount
}
else: # if word is not in counter_2
except ValueError: # if word is not in counter_2
result["unique_to_corp_1"].add(word)

# consider words unique to corpus_2
Expand Down
17 changes: 16 additions & 1 deletion backend/app/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@
Document,
Corpus,
ProximityAnalysis,
FrequencyAnalysis
FrequencyAnalysis,
DistinctivenessAnalysis

)


Expand Down Expand Up @@ -90,3 +92,16 @@ class FrequencyAnalysisSerializer(serializers.ModelSerializer):
class Meta:
model = FrequencyAnalysis
fields = ['id', 'corpus', 'genders', 'results']


class DistinctivenessAnalysisSerializer(serializers.ModelSerializer):
"""
Serializes a DistinctivenessAnalysis object
"""

corpora = serializers.PrimaryKeyRelatedField(read_only=True, many=True)

class Meta:
model = DistinctivenessAnalysis
fields = ['corpora', 'corpora']

112 changes: 108 additions & 4 deletions backend/app/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@
)
from .analysis import (
proximity,
frequency
frequency,
distinctiveness
)


Expand Down Expand Up @@ -57,8 +58,8 @@ def test_PronounSeries_save(self):
By testing the model's .save() mechanism, this also tests that LowercaseCharField works
as expected.
"""
masc_1 = PronounSeries.objects.get(pk=1)
masc_2 = PronounSeries.objects.get(pk=2)
masc_1 = PronounSeries.objects.get(pk=11)
masc_2 = PronounSeries.objects.get(pk=12)
self.assertEqual(str(masc_1), 'Masc_1-series')
self.assertEqual(repr(masc_2), "<Masc_2: ['he', 'him', 'himself', 'his']>")

Expand All @@ -84,7 +85,7 @@ def test_PronounSeries_methods(self):
Tests all other PronounSeries methods explicitly specified in the class not already tested
in test_PronounSeries_save.
"""
fem = PronounSeries.objects.get(pk=3)
fem = PronounSeries.objects.get(pk=13)

self.assertEqual(fem.all_pronouns, {'she', 'her', 'hers', 'herself'})
self.assertTrue('SHE' in fem)
Expand Down Expand Up @@ -517,3 +518,106 @@ def test_proximity(self):
}

self.assertEqual(results, expected)


class DistinctivenessTestCase(TestCase):
def setUp(self):
Document.objects.create_document(title='doc1', year=2021, text='The quick brown fox jumped over the lazy dog.')
Document.objects.create_document(title='doc2', year=2021, text='She really likes to eat chocolate, like I do!')
Document.objects.create_document(title='doc3', year=2021, text='Do you like ice cream as much as I do?')
Document.objects.create_document(title='doc4', year=2021, text='Do you see what I see?')
Document.objects.create_document(title='doc5', year=2021, text='The Eiffel tower is really tall.')

corpus1 = Corpus.objects.create(
title='Test Corpus 1',
description='Testing the analysis functions in `distinctiveness.py`'
)
corpus1.documents.add(1, 2)

corpus2 = Corpus.objects.create(
title='Test Corpus 2',
description='Testing the analysis functions in `distinctiveness.py`'
)
corpus2.documents.add(3, 4)

def test_get_wordcount_counter(self):
result1 = distinctiveness._get_wordcount_counter(Corpus.objects.get(id=1))
expected1 = Counter({'the': 2, 'quick': 1, 'brown': 1, 'fox': 1, 'jumped': 1, 'over': 1, 'lazy': 1, 'dog': 1,
'she': 1, 'really': 1, 'likes': 1, 'to': 1, 'eat': 1, 'chocolate': 1, 'like': 1, 'i': 1,
'do': 1})
self.assertEquals(result1, expected1)

result2 = distinctiveness._get_wordcount_counter(Corpus.objects.get(id=2))
expected2 = Counter({'do': 3, 'you': 2, 'like': 1, 'ice': 1, 'cream': 1, 'as': 2, 'much': 1, 'i': 2,
'see': 2, 'what': 1})
self.assertEquals(result2, expected2)

def test_dunn_individual_word(self):
epsilon = 1e-8
counter_1 = distinctiveness._get_wordcount_counter(Corpus.objects.get(id=1))
counter_2 = distinctiveness._get_wordcount_counter(Corpus.objects.get(id=2))

words_in_corpus_1 = sum(counter_1.values())
words_in_corpus_2 = sum(counter_2.values())
self.assertEquals(words_in_corpus_1, 18)
self.assertEquals(words_in_corpus_2, 16)

# testing same word count
count_in_1 = counter_1['like']
count_in_2 = counter_2['like']
self.assertEquals(count_in_1, 1)
self.assertEquals(count_in_2, 1)

result = distinctiveness.dunn_individual_word(words_in_corpus_1, words_in_corpus_2, count_in_1, count_in_2)
expected = -0.00693241595681
self.assertAlmostEquals(result, expected, delta=epsilon)

# more common in Corpus 2
count_in_1 = counter_1['do']
count_in_2 = counter_2['do']
self.assertEquals(count_in_1, 1)
self.assertEquals(count_in_2, 3)

result = distinctiveness.dunn_individual_word(words_in_corpus_1, words_in_corpus_2, count_in_1, count_in_2)
expected = -1.29592719074
self.assertAlmostEquals(result, expected, delta=epsilon)

# unique to Corpus 1
count_in_1 = counter_1['quick']
count_in_2 = counter_2['quick']

with self.assertRaises(ValueError):
distinctiveness.dunn_individual_word(words_in_corpus_1,
words_in_corpus_2,
count_in_1,
count_in_2)

def test_dunning_total(self):
self.maxDiff = None
c1 = Corpus.objects.get(id=1)
c2 = Corpus.objects.get(id=2)

result = distinctiveness.dunning_total(c1, c2)
expected = {"unique_to_corp_1": {'the', 'quick', 'brown', 'fox', 'jumped', 'over', 'lazy', 'dog',
'she', 'really', 'likes', 'to', 'eat', 'chocolate'},
"unique_to_corp_2": {'you', 'ice', 'cream', 'as', 'much', 'see', 'what'},
"common_words": {
'like': {
'dunning': -0.006932415952972407,
'count_corp1': 1,
'count_corp2': 1},
'i': {
'dunning': -0.46797973317663666,
'count_corp1': 1,
'count_corp2': 2},
'do': {
'dunning': -1.2959271907478076,
'count_corp1': 1,
'count_corp2': 3}}}

self.assertEquals(result['unique_to_corp_1'], expected['unique_to_corp_1'])
self.assertEquals(result['unique_to_corp_2'], expected['unique_to_corp_2'])
self.assertEquals(result['common_words'], expected['common_words'])



1 comment on commit 7074199

@kmorhun
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • And also writing the serializer for distinctiveness analysis

Please sign in to comment.