From 1ea3e18adf84927d0d3bac68b2ba8d28bb24f7c8 Mon Sep 17 00:00:00 2001
From: DreamySleepyNightySnoozySnooze <the.g.banger.is.coming@gmail.com>
Date: Sun, 11 Jun 2023 09:52:35 +1000
Subject: [PATCH 1/4] Add whispar wdtv metadata scraper

---
 scrapers/WhisparrWDTV.py  | 87 +++++++++++++++++++++++++++++++++++++++
 scrapers/WhisparrWDTV.yml |  7 ++++
 2 files changed, 94 insertions(+)
 create mode 100644 scrapers/WhisparrWDTV.py
 create mode 100644 scrapers/WhisparrWDTV.yml

diff --git a/scrapers/WhisparrWDTV.py b/scrapers/WhisparrWDTV.py
new file mode 100644
index 000000000..b2452a148
--- /dev/null
+++ b/scrapers/WhisparrWDTV.py
@@ -0,0 +1,87 @@
+import sys
+import pathlib
+
+import mimetypes
+import base64
+
+import json
+import xml.etree.ElementTree as ET
+
+import py_common.graphql as graphql
+import py_common.log as log
+"""  
+This script parses kodi nfo files for metadata. The .nfo file must be in the same directory as the video file and must be named exactly alike.
+"""
+
+# If you want to ingest image files from the .nfo the path to these files may need to be rewritten. Especially when using a docker container.
+rewriteBasePath = False
+# Example: Z:\Videos\Studio_XXX\example_cover.jpg -> /data/Studio_XXX/example_cover.jpg
+basePathBefore = 'Z:\Videos'
+basePathAfter = "/data"
+
+def query_xml(path, title):
+    res = {"title": title}
+    try:        
+        tree = ET.parse(path)
+    except Exception as e:
+        log.error(f'xml parsing failed:{e}')
+        print(json.dumps(res))
+        exit(1)
+    
+    if title == tree.find("episode_name").text:
+        log.info("Exact match found for " + title)
+    else:
+        log.info("No exact match found for " + title + ". Matching with " + tree.find("title").text + "!")
+    
+    # Extract matadata from xml
+    if tree.find("episode_name") != None:
+        res["title"] = tree.find("episode_name").text
+    
+    if tree.find("overview") != None:
+        res["details"] = tree.find("overview").text
+    
+    if tree.find("firstaired") != None:
+        res["date"] = tree.find("firstaired").text
+    
+    if tree.find("actor") != None and tree.find("actor").text:
+        res["performers"] = []
+        for actor in tree.find("actor").text.split(" / "):
+            res["performers"].append({"name": actor.split(" - ")[0]})
+    
+    if tree.find("series_name") != None:
+        res["studio"] = {"name":tree.find("series_name").text}
+    
+    return res
+
+def make_image_data_url(image_path):
+    # type: (str,) -> str
+    mime, _ = mimetypes.guess_type(image_path)
+    with open(image_path, 'rb') as img:
+        encoded = base64.b64encode(img.read()).decode()
+    return 'data:{0};base64,{1}'.format(mime, encoded)
+
+if sys.argv[1] == "query":
+    fragment = json.loads(sys.stdin.read())
+    s_id = fragment.get("id")
+    if not s_id:
+        log.error(f"No ID found")
+        sys.exit(1)
+    
+    # Assume that .nfo/.xml is named exactly alike the video file and is at the same location
+    # Query graphQL for the file path
+    scene = graphql.getScene(s_id)
+    if scene:
+        scene_path = scene.get("path")
+        if scene_path:
+            p = pathlib.Path(scene_path)
+            
+            res = {"title": fragment["title"]}
+            
+            f = p.with_suffix(".xml")
+            if f.is_file():
+                res = query_xml(f, fragment["title"])
+            else:
+                log.info(f"No nfo/xml files found for the scene: {p}")
+            
+            print(json.dumps(res))
+            exit(0)
diff --git a/scrapers/WhisparrWDTV.yml b/scrapers/WhisparrWDTV.yml
new file mode 100644
index 000000000..765c227a8
--- /dev/null
+++ b/scrapers/WhisparrWDTV.yml
@@ -0,0 +1,7 @@
+name: "Whisparr WDTV XML"
+sceneByFragment:
+    action: script
+    script:
+      - python
+      - WhisparrWDTV.py
+      - query

From 67921f67d2659198fc10c09ad64c42073c35ff73 Mon Sep 17 00:00:00 2001
From: DreamySleepyNightySnoozySnooze <the.g.banger.is.coming@gmail.com>
Date: Sun, 11 Jun 2023 10:05:27 +1000
Subject: [PATCH 2/4] Remove redundant code

---
 scrapers/WhisparrWDTV.py | 17 ++---------------
 1 file changed, 2 insertions(+), 15 deletions(-)

diff --git a/scrapers/WhisparrWDTV.py b/scrapers/WhisparrWDTV.py
index b2452a148..1c9af30c1 100644
--- a/scrapers/WhisparrWDTV.py
+++ b/scrapers/WhisparrWDTV.py
@@ -13,12 +13,6 @@
 This script parses kodi nfo files for metadata. The .nfo file must be in the same directory as the video file and must be named exactly alike.
 """
 
-# If you want to ingest image files from the .nfo the path to these files may need to be rewritten. Especially when using a docker container.
-rewriteBasePath = False
-# Example: Z:\Videos\Studio_XXX\example_cover.jpg -> /data/Studio_XXX/example_cover.jpg
-basePathBefore = 'Z:\Videos'
-basePathAfter = "/data"
-
 def query_xml(path, title):
     res = {"title": title}
     try:        
@@ -53,13 +47,6 @@ def query_xml(path, title):
     
     return res
 
-def make_image_data_url(image_path):
-    # type: (str,) -> str
-    mime, _ = mimetypes.guess_type(image_path)
-    with open(image_path, 'rb') as img:
-        encoded = base64.b64encode(img.read()).decode()
-    return 'data:{0};base64,{1}'.format(mime, encoded)
-
 if sys.argv[1] == "query":
     fragment = json.loads(sys.stdin.read())
     s_id = fragment.get("id")
@@ -67,7 +54,7 @@ def make_image_data_url(image_path):
         log.error(f"No ID found")
         sys.exit(1)
     
-    # Assume that .nfo/.xml is named exactly alike the video file and is at the same location
+    # Assume that .xml is named exactly alike the video file and is at the same location
     # Query graphQL for the file path
     scene = graphql.getScene(s_id)
     if scene:
@@ -81,7 +68,7 @@ def make_image_data_url(image_path):
             if f.is_file():
                 res = query_xml(f, fragment["title"])
             else:
-                log.info(f"No nfo/xml files found for the scene: {p}")
+                log.info(f"No xml files found for the scene: {p}")
             
             print(json.dumps(res))
             exit(0)

From 578c38156b630c9416cd8590d1e16131f2d60768 Mon Sep 17 00:00:00 2001
From: DreamySleepyNightySnoozySnooze <the.g.banger.is.coming@gmail.com>
Date: Sun, 11 Jun 2023 10:17:32 +1000
Subject: [PATCH 3/4] Update comments

---
 scrapers/WhisparrWDTV.py | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/scrapers/WhisparrWDTV.py b/scrapers/WhisparrWDTV.py
index 1c9af30c1..a9bec056a 100644
--- a/scrapers/WhisparrWDTV.py
+++ b/scrapers/WhisparrWDTV.py
@@ -9,10 +9,17 @@
 
 import py_common.graphql as graphql
 import py_common.log as log
+
 """  
-This script parses kodi nfo files for metadata. The .nfo file must be in the same directory as the video file and must be named exactly alike.
-"""
+This script parses WDTV xml metadata files. 
+The .xml file must be in the same directory as the video file and must be named exactly alike.
+
+Code borrowed from the kodi nfo scraper (in https://github.com/stashapp/CommunityScrapers/pull/689)
+It was found the .nfo files exported from Whisparr, did not contain all details required.
+Using the WDTV format instead had all information. 
 
+The intention is not to be a generic WDTV metadata parser, but one that specifically parses WDTV metadata from Whisparr. Based on version v2.0.0.168. This simplifies the integration of Whisparr and Stash.
+"""
 def query_xml(path, title):
     res = {"title": title}
     try:        
@@ -36,7 +43,10 @@ def query_xml(path, title):
     
     if tree.find("firstaired") != None:
         res["date"] = tree.find("firstaired").text
-    
+
+    # This is based on how my version of Whisparr (v2.0.0.168) output the WDTV .xml
+    # It seperated actors by " / " 
+    # then for some reason had duplicated the name seperated by " - "
     if tree.find("actor") != None and tree.find("actor").text:
         res["performers"] = []
         for actor in tree.find("actor").text.split(" / "):

From 1dd8af02f16e8d1ea0f593077589b9f385b99d25 Mon Sep 17 00:00:00 2001
From: DreamySleepyNightySnoozySnooze
 <136205037+DreamySleepyNightySnoozySnooze@users.noreply.github.com>
Date: Sat, 17 Jun 2023 16:54:20 +1000
Subject: [PATCH 4/4] Update WhisparrWDTV.yml

Add last modified date
---
 scrapers/WhisparrWDTV.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scrapers/WhisparrWDTV.yml b/scrapers/WhisparrWDTV.yml
index 765c227a8..6fb296295 100644
--- a/scrapers/WhisparrWDTV.yml
+++ b/scrapers/WhisparrWDTV.yml
@@ -5,3 +5,4 @@ sceneByFragment:
       - python
       - WhisparrWDTV.py
       - query
+# Last Updated June 17, 2023