Skip to content

Commit

Permalink
Evaluation on both English and Danish data sets
Browse files Browse the repository at this point in the history
  • Loading branch information
Rasmus authored and Rasmus committed Dec 14, 2023
1 parent 898aa71 commit a26b614
Show file tree
Hide file tree
Showing 6 changed files with 377 additions and 78 deletions.
4 changes: 1 addition & 3 deletions relation_extraction/evaluation/DanskEvaluering.xml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
<originaltripleset>
<otriple>DanskeFartøjer | powerType | Elektrisk</otriple>
<otriple>
DanskeFartøjer | length | "24000"^^<http://dbpedia.org/datatype/millimetre>
DanskeFartøjer | length | "24000"^^&lt;http://dbpedia.org/datatype/millimetre&gt;
</otriple>
</originaltripleset>
<modifiedtripleset>
Expand All @@ -61,14 +61,12 @@
<originaltripleset>
<otriple>København_Tårn | architect | Lars Mikkelsen</otriple>
<otriple>København_Tårn | address | "Købmagergade 52"@da</otriple>
<otriple>København_Tårn | currentTenants | Danmarks Radio</otriple>
<otriple>København_Tårn | location | København</otriple>
<otriple>Danmarks Radio | country | Denmark</otriple>
</originaltripleset>
<modifiedtripleset>
<mtriple>København_Tårn | architect | Lars Mikkelsen</mtriple>
<mtriple>København_Tårn | address | "Købmagergade 52"</mtriple>
<mtriple>København_Tårn | currentTenants | Danmarks Radio</mtriple>
<mtriple>København_Tårn | location | København</mtriple>
<mtriple>Danmarks Radio | country | Denmark</mtriple>
</modifiedtripleset>
Expand Down
24 changes: 15 additions & 9 deletions relation_extraction/evaluation/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def printProgressBar (iteration, total, prefix = '', suffix = '', decimals = 3,

def convert_testdata_to_input_format():
objs = []
tree = ET.parse('relation_extraction/Evaluation/testdataMini.xml')
tree = ET.parse('relation_extraction/evaluation/DanskEvaluering.xml')
root = tree.getroot()
for entry in root.findall('.//entry'):
sentence = entry.findall('lex')[0].text
Expand All @@ -42,6 +42,12 @@ def calculate_metrics(data):
FP = 0
FN = 0

data_without_duplicates = data.deepcopy()

for triples in data_without_duplicates["multilingual"]["triples"]:
triples["triples_from_solution"] = set(tuple(triple) for triple in triples["triples_from_solution"])
triples["triples_from_solution"] = list(list(triple) for triple in triples["triples_from_solution"])

for element in data["triples"]:
TP += element["contains_hits"]
FP += len(element["triples_from_solution"]) - element["contains_hits"]
Expand Down Expand Up @@ -116,15 +122,15 @@ def main():
progress_suffix = f"Complete. Timeusage: {round((datetime.datetime.now()-dt).total_seconds()/60,5)} minutes. Eta {eta} minutes."
printProgressBar(i + 1, len(input_objs), prefix = 'Progress:', suffix = progress_suffix, length = 50)

print(f"Solution {name} finished. Hit {hits}/{total_triples}. Hit percentage: {(hits/total_triples)*100}%")
evaluation_results[name] = {
"triples": evaluation_result_triples,
"result": {"total_expected_triples": total_triples, "hits": hits, "hit_percentage": hits/total_triples},
"score": calculate_metrics({"triples": evaluation_result_triples})
}
print(f"Solution {name} finished. Hit {hits}/{total_triples}. Hit percentage: {(hits/total_triples)*100}%")
evaluation_results[name] = {
"triples": evaluation_result_triples,
"result": {"total_expected_triples": total_triples, "hits": hits, "hit_percentage": hits/total_triples},
"score": calculate_metrics({"triples": evaluation_result_triples})
}

with open("relation_extraction/Evaluation/evaluation_results.json", "w") as f:
json.dump(evaluation_results, f, indent=4)
with open("relation_extraction/Evaluation/evaluation_results.json", "w") as f:
json.dump(evaluation_results, f, indent=4)



Expand Down
Loading

0 comments on commit a26b614

Please sign in to comment.