Merge pull request Webperf-se#369 from cockroacher/main

pylint for engines\util.py and engines\sitespeed_result.py
7h3Rabbit · Apr 7, 2024 · e2ef602 · e2ef602
2 parents b5ea5a0 + 3a1320f
commit e2ef602
Show file tree

Hide file tree

Showing 5 changed files with 62 additions and 98 deletions.
diff --git a/carbon-rating.py b/carbon-rating.py
@@ -75,23 +75,7 @@ def main(argv):
             sys.exit(2)
         elif opt in ("-i", "--input"):  # input file path
             input_filename = arg
-
-            file_ending = ""
-            file_long_ending = ""
-            if (len(input_filename) > 4):
-                file_ending = input_filename[-4:].lower()
-            if (len(input_filename) > 7):
-                file_long_ending = input_filename[-7:].lower()
-
-            if file_long_ending == ".sqlite":
-                from engines.sqlite import read_sites, add_site, delete_site
-            elif (file_ending == ".csv"):
-                from engines.csv_engine import read_sites, add_site, delete_site
-            elif (file_ending == ".xml"):  # https://example.com/sitemap.xml
-                from engines.sitemap import read_sites, add_site, delete_site
-            else:
-                from engines.json_engine import read_tests, read_sites, add_site, delete_site
-            pass
+            from engines.json_engine import read_tests
         elif opt in ("-o", "--output"):  # output file path
             output_filename = arg
             pass

diff --git a/default.py b/default.py
@@ -11,12 +11,8 @@
     add_site as csv_add_site,\
     delete_site as csv_delete_site,\
     write_tests as csv_write_tests
-from engines.sitemap import read_sites as sitemap_read_sites,\
-    add_site as sitemap_add_site,\
-    delete_site as sitemap_delete_site
-from engines.sitespeed_result import read_sites as sitespeed_read_sites,\
-    add_site as sitespeed_add_site,\
-    delete_site as sitespeed_delete_site
+from engines.sitemap import read_sites as sitemap_read_sites
+from engines.sitespeed_result import read_sites as sitespeed_read_sites
 from engines.webperf import read_sites as webperf_read_sites,\
     add_site as webperf_add_site,\
     delete_site as webperf_delete_site
@@ -336,15 +332,11 @@ def set_input_handlers(self, input_filename):
             add_site = csv_add_site
             delete_site = csv_delete_site
         elif file_ending == ".xml" or file_long_ending == ".xml.gz":
-                    # https://example.com/sitemap.xml
-                    # https://example.com/sitemap.xml.gz
+            # https://example.com/sitemap.xml
+            # https://example.com/sitemap.xml.gz
             read_sites = sitemap_read_sites
-            add_site = sitemap_add_site
-            delete_site = sitemap_delete_site
         elif file_long_ending == ".result":
             read_sites = sitespeed_read_sites
-            add_site = sitespeed_add_site
-            delete_site = sitespeed_delete_site
         elif file_long_ending == ".webprf":
             read_sites = webperf_read_sites
             add_site = webperf_add_site
@@ -482,14 +474,14 @@ def main(argv):
             options.input_skip,
             options.input_take)
 
-    if options.add_url != '':
+    if options.add_url != '' and options.add_site is not None:
         # check if website url should be added
         options.sites = options.add_site(
             options.input_filename,
             options.add_url,
             options.input_skip,
             options.input_take)
-    elif options.delete_url != '':
+    elif options.delete_url != '' and options.delete_site is not None:
         # check if website url should be deleted
         options.sites = options.delete_site(
             options.input_filename,

diff --git a/engines/sitemap.py b/engines/sitemap.py
@@ -164,46 +164,3 @@ def get_root_element(sitemap_content):
             root_element = element
             break
     return root_element
-
-
-def add_site(input_url, _, input_skip, input_take):
-    """
-    This function reads site data from a specific sitemap,
-    prints a warning message (because it is read only),
-    
-    Parameters:
-    input_url (str): Absolute url to sitemap, .xml and .xml.bz fileendings are supported.
-    input_skip (int): The number of lines to skip in the input file.
-    input_take (int): The number of lines to take from the input file after skipping.
-    
-    Returns:
-    list: The list of sites read from the specified sitemap.
-    """
-
-    print("WARNING: sitemap engine is a read only method for testing all pages in a sitemap.xml,"
-          ,"NO changes will be made")
-
-    sites = read_sites(input_url, input_skip, input_take)
-
-    return sites
-
-
-def delete_site(input_url, _, input_skip, input_take):
-    """
-    This function reads site data from a specific sitemap,
-    prints a warning message (because it is read only),
-    
-    Parameters:
-    input_url (str): Absolute url to sitemap, .xml and .xml.bz fileendings are supported.
-    input_skip (int): The number of lines to skip in the input file.
-    input_take (int): The number of lines to take from the input file after skipping.
-    
-    Returns:
-    list: The list of sites read from the specified sitemap.
-    """
-    print("WARNING: sitemap engine is a read only method for testing all pages in a sitemap.xml,"
-          ,"NO changes will be made")
-
-    sites = read_sites(input_url, input_skip, input_take)
-
-    return sites
diff --git a/engines/sitespeed_result.py b/engines/sitespeed_result.py
@@ -3,54 +3,67 @@
 from pathlib import Path
 from urllib.parse import urlparse
 import re
+from engines.utils import use_item
 
-def add_site(input_filename, url, input_skip, input_take):
-    sites = []
-    return sites
+def get_url_from_file_content(input_filename):
+    """
+    Extracts the URL from the content of a HAR file.
 
+    The function opens the file and reads the first 1024 bytes.
+    It then uses a regular expression to find the URL in the read data.
+    If the file does not exist, it prints an error message and returns None.
 
-def delete_site(input_filename, url, input_skip, input_take):
-    tmpSites = []
-    return tmpSites
+    Parameters:
+    input_filename (str): The path of the HAR file from which to extract the URL.
 
+    Returns:
+    str: The extracted URL. Returns None if the file does not exist or no URL is found.
 
-def get_url_from_file_content(input_filename):
+    """
     try:
         # No need to read all content, just read the first 1024 bytes as our url will be there
         # we are doing this for performance
         with open(input_filename, 'r', encoding='utf-8') as file:
             data = file.read(1024)
             regex = r"\"[_]{0,1}url\":[ ]{0,1}\"(?P<url>[^\"]+)\""
             matches = re.finditer(regex, data, re.MULTILINE)
-            for matchNum, match in enumerate(matches, start=1):
+            for _, match in enumerate(matches, start=1):
                 return match.group('url')
-    except:
-        print('error in get_local_file_content. No such file or directory: {0}'.format(
-            input_filename))
+    except OSError:
+        print(f'Error. No such file or directory: {input_filename}')
         return None
 
     return None
 
 
 def read_sites(hostname_or_argument, input_skip, input_take):
+    """
+    Reads the sites from the cache directory based on the hostname or
+    the argument that ends with '.result'.
+
+    Parameters:
+    hostname_or_argument (str): The hostname or the argument that ends with '.result'.
+    input_skip (int): The number of items to skip from the start.
+    input_take (int): The number of items to take after skipping. If -1, takes all items.
+
+    Returns:
+    list: A list of sites where each site is represented as a
+          list containing the path to the HAR file and the URL.
+    """
     sites = []
     hostname = hostname_or_argument
     if hostname_or_argument.endswith('.result'):
-        tmp = hostname_or_argument[:hostname_or_argument.rfind('.result')]
-        o = urlparse(tmp)
-        hostname = o.hostname
-
-    if len(sites) > 0:
-        return sites
+        tmp_url = hostname_or_argument[:hostname_or_argument.rfind('.result')]
+        hostname = urlparse(tmp_url).hostname
 
-    dir = Path(os.path.dirname(
+    base_directory = Path(os.path.dirname(
         os.path.realpath(__file__)) + os.path.sep).parent
 
-    data_dir = os.path.join(dir, 'cache', hostname) + os.path.sep
-    if not os.path.exists(data_dir):
+    cache_dir = os.path.join(base_directory, 'cache', hostname) + os.path.sep
+    if not os.path.exists(cache_dir):
         return sites
 
-    dirs = os.listdir(data_dir)
+    dirs = os.listdir(cache_dir)
 
     urls = {}
 
@@ -62,14 +75,15 @@ def read_sites(hostname_or_argument, input_skip, input_take):
             continue
 
         full_path = os.path.join(
-            data_dir, file_name)
+            cache_dir, file_name)
 
         url = get_url_from_file_content(full_path)
         urls[url] = full_path
 
     current_index = 0
-    for tmp_url in urls.keys():
-        sites.append([urls[tmp_url], tmp_url])
+    for url, har_path in urls.items():
+        if use_item(current_index, input_skip, input_take):
+            sites.append([har_path, url])
         current_index += 1
 
     return sites
diff --git a/engines/utils.py b/engines/utils.py
@@ -2,6 +2,23 @@
 
 
 def use_item(current_index, skip, take):
+    """
+    Determines whether an item at a given index should be used based on the skip and
+    take parameters.
+
+    Parameters:
+    current_index (int): The index of the current item.
+    skip (int): The number of items to skip.
+    take (int): The number of items to take after skipping. If -1, takes all items.
+
+    Returns:
+    bool: True if the item should be used, False otherwise.
+
+    The function returns False if the current index is less than the number of items to skip or
+    if the current index is greater than or
+    equal to the sum of the skip and take parameters (unless take is -1).
+    Otherwise, it returns True.
+    """
     if skip > 0 and current_index < skip:
         return False