Skip to content

Commit

Permalink
Merge pull request #17 from WSE-research/add-language-specific-queries
Browse files Browse the repository at this point in the history
Add language specific queries and functions
  • Loading branch information
heinpa authored Sep 2, 2024
2 parents f080512 + aa8416a commit c210ce7
Show file tree
Hide file tree
Showing 3 changed files with 222 additions and 14 deletions.
191 changes: 191 additions & 0 deletions qanary_helpers/language_queries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
from qanary_helpers.qanary_queries import select_from_triplestore, get_text_question_from_uri
import logging


class QuestionTextWithLanguage:
"""Holds data of question texts in the triplestore that have an associated language, either through previous translation or language recognition."""

def __init__(self, uri: str, text: str, lang: str):
"""Inits QuestionTextWithLanguage with question URI, question text and question language.
Keyword arguments:
uri (str) -- URI of the question inside of the triplestore
text (str) -- Textual representation of the question
lang (str) -- Language of the question text
"""
self.uri = uri
self.text = text
self.lang = lang

def get_uri(self):
return self.uri

def get_text(self):
return self.text

def get_language(self):
return self.lang


def get_texts_with_detected_language_in_triplestore(triplestore_endpoint: str, graph_uri: str, lang: str) -> list[QuestionTextWithLanguage]:
"""Retrieves question texts from the triplestore for which a specific language has been detected.
Keyword arguments:
triplestore_endpoint (str) -- URL of the triplestore endpoint
graph_uri (str) -- URI of the graph to query inside of the triplestore
lang (str) -- Expected detected language
Returns:
list -- A list of appropriate QuestionTextWithLanguage objects with information from the triplestore.
"""
source_texts = list()
sparql_find_ld = """
PREFIX qa: <http://www.wdaqua.eu/qa#>
PREFIX oa: <http://www.w3.org/ns/openannotation/core/>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
SELECT *
FROM <{graph}>
WHERE {{
?annotationId a qa:AnnotationOfQuestionLanguage .
?annotationId oa:hasTarget ?hasTarget ;
oa:hasBody ?hasBody ;
oa:annotatedBy ?annotatedBy ;
oa:annotatedAt ?annotatedAt .
FILTER(STR(?hasBody) = \"{lang}\")
}}
""".format(
graph = graph_uri,
lang=lang
)
results = select_from_triplestore(triplestore_endpoint, sparql_find_ld)
for result in results["results"]["bindings"]:
question_uri = result["hasTarget"]["value"]
question_text = get_text_question_from_uri(triplestore_endpoint=triplestore_endpoint, question_uri=question_uri)
source_texts.append(QuestionTextWithLanguage(uri=question_uri, text=question_text, lang=lang))

return source_texts


def get_translated_texts_in_triplestore(triplestore_endpoint: str, graph_uri: str, lang: str) -> list[QuestionTextWithLanguage]:
"""Retrieves question texts from the triplestore that were translated into a specific language.
Keyword arguments:
triplestore_endpoint (str) -- URL of the triplestore endpoint
graph_uri (str) -- URI of the graph to query inside of the triplestore
lang (str) -- Target language of the translation
Returns:
list -- A list of appropriate QuestionTextWithLanguage objects with information from the triplestore.
"""
source_texts = list()
sparql_find_ld = """
PREFIX qa: <http://www.wdaqua.eu/qa#>
PREFIX oa: <http://www.w3.org/ns/openannotation/core/>
SELECT *
FROM <{graph}>
WHERE {{
?annotationId a qa:AnnotationOfQuestionTranslation .
?annotationId oa:hasTarget ?hasTarget ;
oa:hasBody ?hasBody ;
oa:annotatedBy ?annotatedBy ;
oa:annotatedAt ?annotatedAt .
FILTER(lang(?hasBody) = \"{lang}\").
}}
""".format(
graph = graph_uri,
lang=lang
)
results = select_from_triplestore(triplestore_endpoint, sparql_find_ld)
for result in results["results"]["bindings"]:
question_uri = result["hasTarget"]["value"]
question_text = result["hasBody"]["value"]
source_texts.append(QuestionTextWithLanguage(question_uri, question_text, lang))

return source_texts


def create_annotation_of_question_translation(graph_uri: str, question_uri: str, translation: str, translation_language: str, app_name: str) -> str:
"""Creates an INSERT SPARQL query to annotate the question translation in the triplestore.
Keyword Arguments:
graph_uri (str) -- URI of the graph to query inside of the triplestore
question_uri (str) -- URI of the question inside of the triplestore
translation (str) -- Translation of the question text
translation_language (str) -- Target language of the translation
app_name (str) -- Name of the component making the annotation
Returns:
str -- The generated INSERT query
"""

SPARQLqueryAnnotationOfQuestionTranslation = """
PREFIX qa: <http://www.wdaqua.eu/qa#>
PREFIX oa: <http://www.w3.org/ns/openannotation/core/>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
INSERT {{
GRAPH <{uuid}> {{
?a a qa:AnnotationOfQuestionTranslation ;
oa:hasTarget <{qanary_question_uri}> ;
oa:hasBody "{translation_result}"@{target_lang} ;
oa:annotatedBy <urn:qanary:{app_name}> ;
oa:annotatedAt ?time .
}}
}}
WHERE {{
BIND (IRI(str(RAND())) AS ?a) .
BIND (now() as ?time)
}}
""".format(
uuid=graph_uri,
qanary_question_uri=question_uri,
translation_result=translation,
target_lang=translation_language,
app_name=app_name
)
logging.info(f'SPARQL: {SPARQLqueryAnnotationOfQuestionTranslation}')
return SPARQLqueryAnnotationOfQuestionTranslation


def create_annotation_of_question_language(graph_uri: str, question_uri: str, language: str, app_name: str) -> str:
"""Creates an INSERT SPARQL query to annotate the language of a question in the triplestore.
Keyword Arguments:
graph_uri (str) -- URI of the graph to query inside of the triplestore
question_uri (str) -- URI of the question inside of the triplestore
language (str) -- Determined language of the question
app_name (str) -- Name of the component making the annotation
Returns:
str -- The generated INSERT query
"""

SPARQLqueryAnnotationOfQuestionLanguage = """
PREFIX qa: <http://www.wdaqua.eu/qa#>
PREFIX oa: <http://www.w3.org/ns/openannotation/core/>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
INSERT {{
GRAPH <{uuid}> {{
?b a qa:AnnotationOfQuestionLanguage ;
oa:hasTarget <{qanary_question_uri}> ;
oa:hasBody "{src_lang}"^^xsd:string ;
oa:annotatedBy <urn:qanary:{app_name}> ;
oa:annotatedAt ?time .
}}
}}
WHERE {{
BIND (IRI(str(RAND())) AS ?b) .
BIND (now() as ?time)
}}
""".format(
uuid=graph_uri,
qanary_question_uri=question_uri,
src_lang=language,
app_name=app_name
)

logging.info(f'SPARQL: {SPARQLqueryAnnotationOfQuestionLanguage}')
return SPARQLqueryAnnotationOfQuestionLanguage
39 changes: 28 additions & 11 deletions qanary_helpers/qanary_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,30 @@
import re


def get_text_question_from_uri(triplestore_endpoint: str, question_uri: str) -> str:
"""Retrieves the textual representation for a question identified by a URI
Keyword arguments:
triplestore_endpoint (str) -- URL of the triplestore endpoint
question_uri (str) -- URI of the question
Returns:
str -- The question text
"""
question_raw = question_uri + "/raw"
logging.info("found: questionURI={0} questionURIraw={1}".format(
question_uri,
question_raw
))
hostname = urlparse(triplestore_endpoint).hostname
if hostname == None:
raise ValueError("No valid host name could be extracted from the supplied triplestore_endpoint: {0}"
.format(triplestore_endpoint))
question_text = requests.get(question_raw.replace("localhost", hostname))
return question_text.text


def get_text_question_in_graph(triplestore_endpoint, graph):
"""
Retrieves the questions from the triplestore returns an array
Expand All @@ -17,7 +41,7 @@ def get_text_question_in_graph(triplestore_endpoint, graph):
query = """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
SELECT DISTINCT ?questionURI
FROM <{uri}>
FROM <{uri}>
WHERE {{
?questionURI rdf:type <http://www.wdaqua.eu/qa#Question> .
}}
Expand All @@ -26,16 +50,9 @@ def get_text_question_in_graph(triplestore_endpoint, graph):
results = select_from_triplestore(triplestore_endpoint, query)
for result in results["results"]["bindings"]:
question_uri = result['questionURI']['value']
question_raw = question_uri + "/raw"
logging.info("found: questionURI={0} questionURIraw={1}".format(
question_uri,
question_raw
))
question_text = requests.get(question_raw.replace(
"localhost", urlparse(triplestore_endpoint).hostname)
)
logging.info("found question: \"{0}\"".format(question_text.text))
questions.append({"uri": question_uri, "text": question_text.text})
question_text = get_text_question_from_uri(triplestore_endpoint, question_uri)
logging.info("found question: \"{0}\"".format(question_text))
questions.append({"uri": question_uri, "text": question_text})

return questions

Expand Down
6 changes: 3 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,13 @@ def read_requirements():

setuptools.setup(
name="qanary-helpers",
version="0.2.2",
version="0.3.2",
author="Andreas Both, Aleksandr Perevalov",
author_email="[email protected], aleksandr.perevalov@hs-anhalt.de",
author_email="[email protected], aleksandr.perevalov@htwk-leipzig.de",
description="A package that helps to build Python components for the Qanary Question Answering framework",
long_description=long_description,
long_description_content_type="text/markdown",
url="https://github.com/Perevalov/qanary_helpers",
url="https://github.com/WSE-research/qanary_helpers",
packages=setuptools.find_packages(),
classifiers=[
"Programming Language :: Python",
Expand Down

0 comments on commit c210ce7

Please sign in to comment.