From 1ea3e18adf84927d0d3bac68b2ba8d28bb24f7c8 Mon Sep 17 00:00:00 2001 From: DreamySleepyNightySnoozySnooze Date: Sun, 11 Jun 2023 09:52:35 +1000 Subject: [PATCH 1/4] Add whispar wdtv metadata scraper --- scrapers/WhisparrWDTV.py | 87 +++++++++++++++++++++++++++++++++++++++ scrapers/WhisparrWDTV.yml | 7 ++++ 2 files changed, 94 insertions(+) create mode 100644 scrapers/WhisparrWDTV.py create mode 100644 scrapers/WhisparrWDTV.yml diff --git a/scrapers/WhisparrWDTV.py b/scrapers/WhisparrWDTV.py new file mode 100644 index 000000000..b2452a148 --- /dev/null +++ b/scrapers/WhisparrWDTV.py @@ -0,0 +1,87 @@ +import sys +import pathlib + +import mimetypes +import base64 + +import json +import xml.etree.ElementTree as ET + +import py_common.graphql as graphql +import py_common.log as log +""" +This script parses kodi nfo files for metadata. The .nfo file must be in the same directory as the video file and must be named exactly alike. +""" + +# If you want to ingest image files from the .nfo the path to these files may need to be rewritten. Especially when using a docker container. +rewriteBasePath = False +# Example: Z:\Videos\Studio_XXX\example_cover.jpg -> /data/Studio_XXX/example_cover.jpg +basePathBefore = 'Z:\Videos' +basePathAfter = "/data" + +def query_xml(path, title): + res = {"title": title} + try: + tree = ET.parse(path) + except Exception as e: + log.error(f'xml parsing failed:{e}') + print(json.dumps(res)) + exit(1) + + if title == tree.find("episode_name").text: + log.info("Exact match found for " + title) + else: + log.info("No exact match found for " + title + ". Matching with " + tree.find("title").text + "!") + + # Extract matadata from xml + if tree.find("episode_name") != None: + res["title"] = tree.find("episode_name").text + + if tree.find("overview") != None: + res["details"] = tree.find("overview").text + + if tree.find("firstaired") != None: + res["date"] = tree.find("firstaired").text + + if tree.find("actor") != None and tree.find("actor").text: + res["performers"] = [] + for actor in tree.find("actor").text.split(" / "): + res["performers"].append({"name": actor.split(" - ")[0]}) + + if tree.find("series_name") != None: + res["studio"] = {"name":tree.find("series_name").text} + + return res + +def make_image_data_url(image_path): + # type: (str,) -> str + mime, _ = mimetypes.guess_type(image_path) + with open(image_path, 'rb') as img: + encoded = base64.b64encode(img.read()).decode() + return 'data:{0};base64,{1}'.format(mime, encoded) + +if sys.argv[1] == "query": + fragment = json.loads(sys.stdin.read()) + s_id = fragment.get("id") + if not s_id: + log.error(f"No ID found") + sys.exit(1) + + # Assume that .nfo/.xml is named exactly alike the video file and is at the same location + # Query graphQL for the file path + scene = graphql.getScene(s_id) + if scene: + scene_path = scene.get("path") + if scene_path: + p = pathlib.Path(scene_path) + + res = {"title": fragment["title"]} + + f = p.with_suffix(".xml") + if f.is_file(): + res = query_xml(f, fragment["title"]) + else: + log.info(f"No nfo/xml files found for the scene: {p}") + + print(json.dumps(res)) + exit(0) diff --git a/scrapers/WhisparrWDTV.yml b/scrapers/WhisparrWDTV.yml new file mode 100644 index 000000000..765c227a8 --- /dev/null +++ b/scrapers/WhisparrWDTV.yml @@ -0,0 +1,7 @@ +name: "Whisparr WDTV XML" +sceneByFragment: + action: script + script: + - python + - WhisparrWDTV.py + - query From 67921f67d2659198fc10c09ad64c42073c35ff73 Mon Sep 17 00:00:00 2001 From: DreamySleepyNightySnoozySnooze Date: Sun, 11 Jun 2023 10:05:27 +1000 Subject: [PATCH 2/4] Remove redundant code --- scrapers/WhisparrWDTV.py | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/scrapers/WhisparrWDTV.py b/scrapers/WhisparrWDTV.py index b2452a148..1c9af30c1 100644 --- a/scrapers/WhisparrWDTV.py +++ b/scrapers/WhisparrWDTV.py @@ -13,12 +13,6 @@ This script parses kodi nfo files for metadata. The .nfo file must be in the same directory as the video file and must be named exactly alike. """ -# If you want to ingest image files from the .nfo the path to these files may need to be rewritten. Especially when using a docker container. -rewriteBasePath = False -# Example: Z:\Videos\Studio_XXX\example_cover.jpg -> /data/Studio_XXX/example_cover.jpg -basePathBefore = 'Z:\Videos' -basePathAfter = "/data" - def query_xml(path, title): res = {"title": title} try: @@ -53,13 +47,6 @@ def query_xml(path, title): return res -def make_image_data_url(image_path): - # type: (str,) -> str - mime, _ = mimetypes.guess_type(image_path) - with open(image_path, 'rb') as img: - encoded = base64.b64encode(img.read()).decode() - return 'data:{0};base64,{1}'.format(mime, encoded) - if sys.argv[1] == "query": fragment = json.loads(sys.stdin.read()) s_id = fragment.get("id") @@ -67,7 +54,7 @@ def make_image_data_url(image_path): log.error(f"No ID found") sys.exit(1) - # Assume that .nfo/.xml is named exactly alike the video file and is at the same location + # Assume that .xml is named exactly alike the video file and is at the same location # Query graphQL for the file path scene = graphql.getScene(s_id) if scene: @@ -81,7 +68,7 @@ def make_image_data_url(image_path): if f.is_file(): res = query_xml(f, fragment["title"]) else: - log.info(f"No nfo/xml files found for the scene: {p}") + log.info(f"No xml files found for the scene: {p}") print(json.dumps(res)) exit(0) From 578c38156b630c9416cd8590d1e16131f2d60768 Mon Sep 17 00:00:00 2001 From: DreamySleepyNightySnoozySnooze Date: Sun, 11 Jun 2023 10:17:32 +1000 Subject: [PATCH 3/4] Update comments --- scrapers/WhisparrWDTV.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/scrapers/WhisparrWDTV.py b/scrapers/WhisparrWDTV.py index 1c9af30c1..a9bec056a 100644 --- a/scrapers/WhisparrWDTV.py +++ b/scrapers/WhisparrWDTV.py @@ -9,10 +9,17 @@ import py_common.graphql as graphql import py_common.log as log + """ -This script parses kodi nfo files for metadata. The .nfo file must be in the same directory as the video file and must be named exactly alike. -""" +This script parses WDTV xml metadata files. +The .xml file must be in the same directory as the video file and must be named exactly alike. + +Code borrowed from the kodi nfo scraper (in https://github.com/stashapp/CommunityScrapers/pull/689) +It was found the .nfo files exported from Whisparr, did not contain all details required. +Using the WDTV format instead had all information. +The intention is not to be a generic WDTV metadata parser, but one that specifically parses WDTV metadata from Whisparr. Based on version v2.0.0.168. This simplifies the integration of Whisparr and Stash. +""" def query_xml(path, title): res = {"title": title} try: @@ -36,7 +43,10 @@ def query_xml(path, title): if tree.find("firstaired") != None: res["date"] = tree.find("firstaired").text - + + # This is based on how my version of Whisparr (v2.0.0.168) output the WDTV .xml + # It seperated actors by " / " + # then for some reason had duplicated the name seperated by " - " if tree.find("actor") != None and tree.find("actor").text: res["performers"] = [] for actor in tree.find("actor").text.split(" / "): From 1dd8af02f16e8d1ea0f593077589b9f385b99d25 Mon Sep 17 00:00:00 2001 From: DreamySleepyNightySnoozySnooze <136205037+DreamySleepyNightySnoozySnooze@users.noreply.github.com> Date: Sat, 17 Jun 2023 16:54:20 +1000 Subject: [PATCH 4/4] Update WhisparrWDTV.yml Add last modified date --- scrapers/WhisparrWDTV.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/scrapers/WhisparrWDTV.yml b/scrapers/WhisparrWDTV.yml index 765c227a8..6fb296295 100644 --- a/scrapers/WhisparrWDTV.yml +++ b/scrapers/WhisparrWDTV.yml @@ -5,3 +5,4 @@ sceneByFragment: - python - WhisparrWDTV.py - query +# Last Updated June 17, 2023