Skip to content

Commit

Permalink
add flavor (#82)
Browse files Browse the repository at this point in the history
add flavor parameter
  • Loading branch information
lfoppiano authored Jan 24, 2025
1 parent dcf1b50 commit 481ceff
Showing 1 changed file with 21 additions and 2 deletions.
23 changes: 21 additions & 2 deletions grobid_client/grobid_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ def process(
segment_sentences=False,
force=True,
verbose=False,
flavor=None
):
batch_size_pdf = self.config["batch_size"]
input_files = []
Expand Down Expand Up @@ -147,6 +148,7 @@ def process(
segment_sentences,
force,
verbose,
flavor
)
input_files = []

Expand Down Expand Up @@ -185,6 +187,7 @@ def process_batch(
segment_sentences,
force,
verbose=False,
flavor=None
):
if verbose:
print(len(input_files), "files to process in current batch")
Expand All @@ -203,6 +206,9 @@ def process_batch(
selected_process = self.process_pdf
if service == 'processCitationList':
selected_process = self.process_txt

if verbose:
print(f"Adding {input_file} to the queue.")

r = executor.submit(
selected_process,
Expand All @@ -214,7 +220,8 @@ def process_batch(
include_raw_citations,
include_raw_affiliations,
tei_coordinates,
segment_sentences)
segment_sentences,
flavor)

results.append(r)

Expand Down Expand Up @@ -255,7 +262,8 @@ def process_pdf(
tei_coordinates,
segment_sentences,
start=-1,
end=-1
end=-1,
flavor=None
):
pdf_handle = open(pdf_file, "rb")
files = {
Expand Down Expand Up @@ -285,6 +293,8 @@ def process_pdf(
the_data["teiCoordinates"] = self.config["coordinates"]
if segment_sentences:
the_data["segmentSentences"] = "1"
if flavor:
the_data["flavor"] = flavor
if start > 0:
the_data["start"] = str(start)
if end > 0:
Expand Down Expand Up @@ -368,6 +378,7 @@ def process_txt(

def main():
valid_services = [
"processFulltextDocumentBlank",
"processFulltextDocument",
"processHeaderDocument",
"processReferences",
Expand Down Expand Up @@ -441,11 +452,18 @@ def main():
help="print information about processed files in the console",
)

parser.add_argument(
"--flavor",
default=None,
help="Define the flavor to be used for the fulltext extraction",
)

args = parser.parse_args()

input_path = args.input
config_path = args.config
output_path = args.output
flavor = args.flavor

if args.n is not None:
try:
Expand Down Expand Up @@ -500,6 +518,7 @@ def main():
segment_sentences=segment_sentences,
force=force,
verbose=verbose,
flavor=flavor
)

runtime = round(time.time() - start_time, 3)
Expand Down

0 comments on commit 481ceff

Please sign in to comment.