From a26b614d74ed75904e716d5b7f0547b23f673d93 Mon Sep 17 00:00:00 2001 From: Rasmus Date: Thu, 14 Dec 2023 12:36:58 +0100 Subject: [PATCH] Evaluation on both English and Danish data sets --- .../evaluation/DanskEvaluering.xml | 4 +- relation_extraction/evaluation/evaluation.py | 24 +- .../evaluation/evaluation_results.json | 258 ++++++++++++++---- .../manual_evaluation_calculation.py | 31 +++ .../evaluation/testdataMini.xml | 114 ++++++++ .../multilingual/llm_messenger.py | 24 +- 6 files changed, 377 insertions(+), 78 deletions(-) create mode 100644 relation_extraction/evaluation/manual_evaluation_calculation.py diff --git a/relation_extraction/evaluation/DanskEvaluering.xml b/relation_extraction/evaluation/DanskEvaluering.xml index f87b77c..fc51d20 100644 --- a/relation_extraction/evaluation/DanskEvaluering.xml +++ b/relation_extraction/evaluation/DanskEvaluering.xml @@ -48,7 +48,7 @@ DanskeFartøjer | powerType | Elektrisk - DanskeFartøjer | length | "24000"^^ + DanskeFartøjer | length | "24000"^^<http://dbpedia.org/datatype/millimetre> @@ -61,14 +61,12 @@ København_Tårn | architect | Lars Mikkelsen København_Tårn | address | "Købmagergade 52"@da - København_Tårn | currentTenants | Danmarks Radio København_Tårn | location | København Danmarks Radio | country | Denmark København_Tårn | architect | Lars Mikkelsen København_Tårn | address | "Købmagergade 52" - København_Tårn | currentTenants | Danmarks Radio København_Tårn | location | København Danmarks Radio | country | Denmark diff --git a/relation_extraction/evaluation/evaluation.py b/relation_extraction/evaluation/evaluation.py index 940be1a..ea0d9b9 100644 --- a/relation_extraction/evaluation/evaluation.py +++ b/relation_extraction/evaluation/evaluation.py @@ -21,7 +21,7 @@ def printProgressBar (iteration, total, prefix = '', suffix = '', decimals = 3, def convert_testdata_to_input_format(): objs = [] - tree = ET.parse('relation_extraction/Evaluation/testdataMini.xml') + tree = ET.parse('relation_extraction/evaluation/DanskEvaluering.xml') root = tree.getroot() for entry in root.findall('.//entry'): sentence = entry.findall('lex')[0].text @@ -42,6 +42,12 @@ def calculate_metrics(data): FP = 0 FN = 0 + data_without_duplicates = data.deepcopy() + + for triples in data_without_duplicates["multilingual"]["triples"]: + triples["triples_from_solution"] = set(tuple(triple) for triple in triples["triples_from_solution"]) + triples["triples_from_solution"] = list(list(triple) for triple in triples["triples_from_solution"]) + for element in data["triples"]: TP += element["contains_hits"] FP += len(element["triples_from_solution"]) - element["contains_hits"] @@ -116,15 +122,15 @@ def main(): progress_suffix = f"Complete. Timeusage: {round((datetime.datetime.now()-dt).total_seconds()/60,5)} minutes. Eta {eta} minutes." printProgressBar(i + 1, len(input_objs), prefix = 'Progress:', suffix = progress_suffix, length = 50) - print(f"Solution {name} finished. Hit {hits}/{total_triples}. Hit percentage: {(hits/total_triples)*100}%") - evaluation_results[name] = { - "triples": evaluation_result_triples, - "result": {"total_expected_triples": total_triples, "hits": hits, "hit_percentage": hits/total_triples}, - "score": calculate_metrics({"triples": evaluation_result_triples}) - } + print(f"Solution {name} finished. Hit {hits}/{total_triples}. Hit percentage: {(hits/total_triples)*100}%") + evaluation_results[name] = { + "triples": evaluation_result_triples, + "result": {"total_expected_triples": total_triples, "hits": hits, "hit_percentage": hits/total_triples}, + "score": calculate_metrics({"triples": evaluation_result_triples}) + } - with open("relation_extraction/Evaluation/evaluation_results.json", "w") as f: - json.dump(evaluation_results, f, indent=4) + with open("relation_extraction/Evaluation/evaluation_results.json", "w") as f: + json.dump(evaluation_results, f, indent=4) diff --git a/relation_extraction/evaluation/evaluation_results.json b/relation_extraction/evaluation/evaluation_results.json index 575a08b..6529d65 100644 --- a/relation_extraction/evaluation/evaluation_results.json +++ b/relation_extraction/evaluation/evaluation_results.json @@ -2,146 +2,296 @@ "multilingual": { "triples": [ { - "sentence": "Turn Me On is a 35.1 minute long album produced by Wharton Tiers that was followed by the album entitled Take it Off.", + "sentence": "Hjertevarme er et album p\u00e5 42,5 minutter produceret af Mikael Rasmussen, efterfulgt af albummet Solopgang.", "triples_from_solution": [ [ - "Wharton_Tiers", + "Mikael_Rasmussen", "producer", - "Turn_Me_On" + "Hjertevarme" ], [ - "Turn_Me_On", + "Mikael_Rasmussen", + "producer", + "Hjertevarme" + ], + [ + "Hjertevarme", + "producedBy", + "Mikael_Rasmussen" + ], + [ + "Solopgang", + "follows", + "Hjertevarme" + ], + [ + "Hjertevarme", "producedBy", - "Wharton_Tiers" + "Mikael_Rasmussen" + ], + [ + "Solopgang", + "follows", + "Hjertevarme" ] ], "expected_triples": [ [ - "Turn_Me_On", + "Hjertevarme", "runtime", - "35.1" + "42.5" ], [ - "Turn_Me_On", + "Hjertevarme", "producer", - "Wharton_Tiers" + "Mikael_Rasmussen" ], [ - "Turn_Me_On", + "Hjertevarme", "followedBy", - "Take_It_Off!" + "Solopgang" ] ], "contains_hits": 0 }, { - "sentence": "The location of Trane is Swords, Dublin.", + "sentence": "HyggeHjem har sin placering i Aarhus.", "triples_from_solution": [ [ - "Trane", + "HyggeHjem", "location", - "Swords,_Dublin" + "Aarhus" ], [ - "Trane", + "HyggeHjem", "location", - "Swords,_Dublin" + "Aarhus" ], [ - "Trane", + "HyggeHjem", "location", - "Swords,_Dublin" - ], - [ - "Trane", - "location", - "Swords,_Dublin" + "Aarhus" ] ], "expected_triples": [ [ - "Trane", + "HyggeHjem", "location", - "Swords,_Dublin" + "Aarhus" ] ], "contains_hits": 1 }, { - "sentence": "The Ciudad Ayala city, a part of Morelos with population density and population of 1604.0 and 1,777,539 respectively, has a UTC offset of -6. The government type of Ciudad Ayala is council-manager government and City Manager is one of the leaders.", + "sentence": "Byen Odense, en del af Fyn med en befolkningst\u00e6thed p\u00e5 1500,0 og en metro-befolkning p\u00e5 178.329, har en UTC-offset p\u00e5 +1. Regeringstypen i Odense er kommunalregering, og borgmesteren er en af lederne.", "triples_from_solution": [ [ - "Ciudad_Ayala", + "Odense", "location", - "Morelos" + "Fyn" + ], + [ + "178329", + "population", + "Odense" ] ], "expected_triples": [ [ - "Ciudad_Ayala", + "Odense", "populationMetro", - "1777539" + "178329" ], [ - "Ciudad_Ayala", + "Odense", "leaderTitle", - "\"City_Manager\"" + "\"Borgmester\"" ], [ - "Ciudad_Ayala", + "Odense", "type", - "City" + "By" ], [ - "Ciudad_Ayala", + "Odense", "populationDensity", - "1604.0" + "1500.0" ], [ - "Ciudad_Ayala", + "Odense", "governmentType", - "Council-manager_government" + "Kommunalregering" ], [ - "Ciudad_Ayala", + "Odense", "utcOffset", - "\u22126" + "+1" ], [ - "Ciudad_Ayala", + "Odense", "isPartOf", - "Morelos" + "Fyn" ] ], "contains_hits": 0 }, { - "sentence": "The 17068.8 millimeter long ALCO RS-3 has a diesel-electric transmission.", - "triples_from_solution": [], + "sentence": "DanskeFart\u00f8jer er 24.000 millimeter langt og har en elektrisk motortype.", + "triples_from_solution": [ + [ + "DanskeFart\u00f8jer", + "length", + "24000" + ], + [ + "DanskeFart\u00f8jer", + "location", + "24000" + ], + [ + "DanskeFart\u00f8jer", + "location", + "24000" + ], + [ + "DanskeFart\u00f8jer", + "length", + "24000" + ], + [ + "DanskeFart\u00f8jer", + "length", + "24000" + ], + [ + "DanskeFart\u00f8jer", + "length", + "24000" + ] + ], "expected_triples": [ [ - "ALCO_RS-3", + "DanskeFart\u00f8jer", "powerType", - "Diesel-electric_transmission" + "Elektrisk" ], [ - "ALCO_RS-3", + "DanskeFart\u00f8jer", "length", - "17068.8" + "24000" + ] + ], + "contains_hits": 1 + }, + { + "sentence": "K\u00f8benhavn T\u00e5rn, beliggende i K\u00f8benhavn, Danmark, er designet af Lars Mikkelsen. Adressen p\u00e5 t\u00e5rnet er \"K\u00f8bmagergade 52\" og nuv\u00e6rende lejere er Danmarks Radio.", + "triples_from_solution": [ + [ + "K\u00f8benhavn_T\u00e5rn", + "location", + "K\u00f8benhavn" + ], + [ + "Lars_Mikkelsen", + "designer", + "K\u00f8benhavn_T\u00e5rn" + ] + ], + "expected_triples": [ + [ + "K\u00f8benhavn_T\u00e5rn", + "architect", + "Lars_Mikkelsen" + ], + [ + "K\u00f8benhavn_T\u00e5rn", + "address", + "\"K\u00f8bmagergade_52\"" + ], + [ + "K\u00f8benhavn_T\u00e5rn", + "location", + "K\u00f8benhavn" + ], + [ + "Danmarks_Radio", + "country", + "Denmark" + ] + ], + "contains_hits": 1 + }, + { + "sentence": "Jens Larsen blev f\u00f8dt i Aalborg og d\u00f8de i K\u00f8benhavn. Etniske grupper i K\u00f8benhavn inkluderer danskere.", + "triples_from_solution": [ + [ + "Jens_Larsen", + "location", + "Aalborg" + ], + [ + "Jens_Larsen", + "location", + "Aalborg" + ] + ], + "expected_triples": [ + [ + "Jens_Larsen", + "birthPlace", + "Aalborg" + ], + [ + "Jens_Larsen", + "deathPlace", + "K\u00f8benhavn" + ], + [ + "K\u00f8benhavn", + "ethnicGroup", + "Dansk" + ] + ], + "contains_hits": 0 + }, + { + "sentence": "Det Tabte Rige er en film redigeret af Anna J\u00f8rgensen.", + "triples_from_solution": [ + [ + "Det_Tabte_Rige", + "editor", + "Anna_J\u00f8rgensen" + ], + [ + "Det_Tabte_Rige", + "editor", + "Anna_J\u00f8rgensen" + ], + [ + "Det_Tabte_Rige", + "film", + "Anna_J\u00f8rgensen" + ] + ], + "expected_triples": [ + [ + "Det_Tabte_Rige", + "editing", + "Anna_J\u00f8rgensen" ] ], "contains_hits": 0 } ], "result": { - "total_expected_triples": 13, - "hits": 1, - "hit_percentage": 0.07692307692307693 + "total_expected_triples": 21, + "hits": 3, + "hit_percentage": 0.14285714285714285 }, "score": { - "precision": 0.14285714285714285, - "recall": 0.07692307692307693, - "F1_score": 0.1 + "precision": 0.125, + "recall": 0.14285714285714285, + "F1_score": 0.13333333333333333 } } } \ No newline at end of file diff --git a/relation_extraction/evaluation/manual_evaluation_calculation.py b/relation_extraction/evaluation/manual_evaluation_calculation.py new file mode 100644 index 0000000..234b833 --- /dev/null +++ b/relation_extraction/evaluation/manual_evaluation_calculation.py @@ -0,0 +1,31 @@ +import json + + +def calculate_metrics(data): + TP = 0 + FP = 0 + FN = 0 + data_without_duplicates = data + + for triples in data_without_duplicates["multilingual"]["triples"]: + triples["triples_from_solution"] = set(tuple(triple) for triple in triples["triples_from_solution"]) + triples["triples_from_solution"] = list(list(triple) for triple in triples["triples_from_solution"]) + + for element in data_without_duplicates["multilingual"]["triples"]: + TP += element["contains_hits"] + FP += len(element["triples_from_solution"]) - element["contains_hits"] + FN += len(element["expected_triples"]) - element["contains_hits"] + + precision = TP / (TP + FP) if (TP + FP) else 0 + recall = TP / (TP + FN) if (TP + FN) else 0 + F1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) else 0 + + return {"precision": precision, "recall": recall, "F1_score": F1} + +def main(): + with open("relation_extraction/Evaluation/evaluation_results.json") as f: + res_obj = json.load(f) + + print(calculate_metrics(res_obj)) + +main() \ No newline at end of file diff --git a/relation_extraction/evaluation/testdataMini.xml b/relation_extraction/evaluation/testdataMini.xml index 45aee75..d40d704 100644 --- a/relation_extraction/evaluation/testdataMini.xml +++ b/relation_extraction/evaluation/testdataMini.xml @@ -55,5 +55,119 @@ The 17068.8 millimeter long ALCO RS-3 has a diesel-electric transmission. + + + Alan_B._Miller_Hall | architect | Robert_A._M._Stern + Alan_B._Miller_Hall | address | "101 Ukrop Way"@en + Alan_B._Miller_Hall | currentTenants | Mason_School_of_Business + Alan_B._Miller_Hall | location | Virginia + Mason_School_of_Business | country | United_States + + + Alan_B._Miller_Hall | architect | Robert_A._M._Stern + Alan_B._Miller_Hall | address | "101 Ukrop Way" + Alan_B._Miller_Hall | currentTenants | Mason_School_of_Business + Alan_B._Miller_Hall | location | Virginia + Mason_School_of_Business | country | United_States + + Alan B. Miller Hall, in Virginia, USA, was designed by Robert A.M. Stern. The address of the hall is "101 Ukrop Way" and the current tenants are the Mason School of Business. + + + + Liselotte_Grschebina | birthPlace | Karlsruhe + Liselotte_Grschebina | deathPlace | Israel + Israel | ethnicGroup | Arab_citizens_of_Israel + + + Liselotte_Grschebina | birthPlace | Karlsruhe + Liselotte_Grschebina | deathPlace | Israel + Israel | ethnicGroup | Arab_citizens_of_Israel + + Liselotte Grschebina was born in Karlsruhe and died in Israel. Ethnic groups in Israel include Arabs. + + + + It's_Great_to_Be_Young_(1956_film) | editing | Max_Benedict + + + It's_Great_to_Be_Young_(1956_film) | editing | Max_Benedict + + It’s Great to Be Young is a film edited by Max Benedict. + + + + Turkey | leaderTitle | President + Nurhan_Atasoy | birthPlace | Turkey + + + Turkey | leaderTitle | President + Nurhan_Atasoy | birthPlace | Turkey + + Nurhan Atasoy was born in Turkey led by the President. + + + + Agremiação_Sportiva_Arapiraquense | league | Campeonato_Brasileiro_Série_C + Campeonato_Brasileiro_Série_C | country | Brazil + Agremiação_Sportiva_Arapiraquense | capacity | "17000"^^xsd:nonNegativeInteger + Agremiação_Sportiva_Arapiraquense | manager | Vica + + + Agremiação_Sportiva_Arapiraquense | league | Campeonato_Brasileiro_Série_C + Campeonato_Brasileiro_Série_C | country | Brazil + Agremiação_Sportiva_Arapiraquense | numberOfMembers | 17000 + Agremiação_Sportiva_Arapiraquense | manager | Vica + + Agremiação Sportiva Arapiraquense managed by Vica has 17000 members and play in the Campeonato Brasileiro Série C league which is from Brazil. + + + + Bananaman | creator | Steve_Bright + Bananaman | network | BBC + Bananaman | firstAired | "1983-10-03"^^xsd:date + + + Bananaman | creator | Steve_Bright + Bananaman | broadcastedBy | BBC + Bananaman | firstAired | "1983-10-03" + + Bananaman first aired on the 10th of March, 1983 and was created by Steve Bright. It was broadcast by the BBC. + + + + English_Without_Tears | cinematography | Bernard_Knowles + English_Without_Tears | writer | Terence_Rattigan + English_Without_Tears | musicComposer | Nicholas_Brodszky + English_Without_Tears | producer | Anatole_de_Grunwald + English_Without_Tears | director | Harold_French + + + English_Without_Tears | cinematography | Bernard_Knowles + English_Without_Tears | writer | Terence_Rattigan + English_Without_Tears | musicComposer | Nicholas_Brodszky + English_Without_Tears | producer | Anatole_de_Grunwald + English_Without_Tears | director | Harold_French + + The movie English Without Tears is written by Terence Rattigan and directed by Harold French. Anatole de Grunwald is the producer and Bernard Knowles is the cinematographer. Nicholas Brodszky was a composer of the songs. + + + + 11th_Mississippi_Infantry_Monument | established | 2000 + 11th_Mississippi_Infantry_Monument | region | Adams_County,_Pennsylvania + 11th_Mississippi_Infantry_Monument | municipality | Gettysburg,_Pennsylvania + 11th_Mississippi_Infantry_Monument | category | Contributing_property + Adams_County,_Pennsylvania | north | Cumberland_County,_Pennsylvania + 11th_Mississippi_Infantry_Monument | country | "United States"@en + + + 11th_Mississippi_Infantry_Monument | established | 2000 + 11th_Mississippi_Infantry_Monument | location | Adams_County,_Pennsylvania + 11th_Mississippi_Infantry_Monument | municipality | Gettysburg,_Pennsylvania + 11th_Mississippi_Infantry_Monument | category | Contributing_property + Adams_County,_Pennsylvania | hasToItsNorth | Cumberland_County,_Pennsylvania + 11th_Mississippi_Infantry_Monument | country | "United States" + + The 11th Mississippi Infantry Monument, built in 2000, is placed in the municipality of Gettysburg in Pennsylvania which is in Adams County, USA. The 11th Mississippi Infantry Monument is classified as a Contributing Property. Cumberland county, Pennsylvania is to the north of Adams County. + diff --git a/relation_extraction/multilingual/llm_messenger.py b/relation_extraction/multilingual/llm_messenger.py index d5981ce..db45772 100644 --- a/relation_extraction/multilingual/llm_messenger.py +++ b/relation_extraction/multilingual/llm_messenger.py @@ -10,22 +10,22 @@ def API_endpoint(): return "http://knox-proxy01.srv.aau.dk/llama-api/llama" def send_request(request): - HEADERS = {"Access-Authorization": os.getenv("ACCESS_SECRET")} - response = requests.post(url=LLMMessenger.API_endpoint(), json=request, headers=HEADERS) + # HEADERS = {"Access-Authorization": os.getenv("ACCESS_SECRET")} + # response = requests.post(url=LLMMessenger.API_endpoint(), json=request, headers=HEADERS) - # # Put the location of to the GGUF model that you've download from HuggingFace here - # model_path = "./relation_extraction/multilingual/llama-2-13b-chat.Q2_K.gguf" + # Put the location of to the GGUF model that you've download from HuggingFace here (https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF/resolve/main/llama-2-13b-chat.Q2_K.gguf?download=true) + model_path = "./relation_extraction/multilingual/llama-2-13b-chat.Q2_K.gguf" - # # Create a llama model - # model = Llama(model_path=model_path, n_ctx=4096) + # Create a llama model + model = Llama(model_path=model_path, n_ctx=4096) - # prompt = f"""[INST] <> - # {request["system_message"]} - # <> - # {request["user_message"]} [/INST]""" + prompt = f"""[INST] <> + {request["system_message"]} + <> + {request["user_message"]} [/INST]""" - # # Run the model - # output = model(prompt, max_tokens=request["max_tokens"], echo=True) + # Run the model + output = model(prompt, max_tokens=request["max_tokens"], echo=True) return output