From ff0406eaae5d68732f39ced8e7e4abc4c86637f5 Mon Sep 17 00:00:00 2001 From: cockroacher <163405488+cockroacher@users.noreply.github.com> Date: Sun, 7 Apr 2024 14:31:09 +0200 Subject: [PATCH 1/2] pylint --- carbon-rating.py | 18 +---------- default.py | 20 ++++-------- engines/sitemap.py | 43 ------------------------- engines/sitespeed_result.py | 62 +++++++++++++++++++++++-------------- 4 files changed, 45 insertions(+), 98 deletions(-) diff --git a/carbon-rating.py b/carbon-rating.py index 44f47a23..42105dbb 100644 --- a/carbon-rating.py +++ b/carbon-rating.py @@ -75,23 +75,7 @@ def main(argv): sys.exit(2) elif opt in ("-i", "--input"): # input file path input_filename = arg - - file_ending = "" - file_long_ending = "" - if (len(input_filename) > 4): - file_ending = input_filename[-4:].lower() - if (len(input_filename) > 7): - file_long_ending = input_filename[-7:].lower() - - if file_long_ending == ".sqlite": - from engines.sqlite import read_sites, add_site, delete_site - elif (file_ending == ".csv"): - from engines.csv_engine import read_sites, add_site, delete_site - elif (file_ending == ".xml"): # https://example.com/sitemap.xml - from engines.sitemap import read_sites, add_site, delete_site - else: - from engines.json_engine import read_tests, read_sites, add_site, delete_site - pass + from engines.json_engine import read_tests elif opt in ("-o", "--output"): # output file path output_filename = arg pass diff --git a/default.py b/default.py index 8c8b313d..08328509 100644 --- a/default.py +++ b/default.py @@ -11,12 +11,8 @@ add_site as csv_add_site,\ delete_site as csv_delete_site,\ write_tests as csv_write_tests -from engines.sitemap import read_sites as sitemap_read_sites,\ - add_site as sitemap_add_site,\ - delete_site as sitemap_delete_site -from engines.sitespeed_result import read_sites as sitespeed_read_sites,\ - add_site as sitespeed_add_site,\ - delete_site as sitespeed_delete_site +from engines.sitemap import read_sites as sitemap_read_sites +from engines.sitespeed_result import read_sites as sitespeed_read_sites from engines.webperf import read_sites as webperf_read_sites,\ add_site as webperf_add_site,\ delete_site as webperf_delete_site @@ -336,15 +332,11 @@ def set_input_handlers(self, input_filename): add_site = csv_add_site delete_site = csv_delete_site elif file_ending == ".xml" or file_long_ending == ".xml.gz": - # https://example.com/sitemap.xml - # https://example.com/sitemap.xml.gz + # https://example.com/sitemap.xml + # https://example.com/sitemap.xml.gz read_sites = sitemap_read_sites - add_site = sitemap_add_site - delete_site = sitemap_delete_site elif file_long_ending == ".result": read_sites = sitespeed_read_sites - add_site = sitespeed_add_site - delete_site = sitespeed_delete_site elif file_long_ending == ".webprf": read_sites = webperf_read_sites add_site = webperf_add_site @@ -482,14 +474,14 @@ def main(argv): options.input_skip, options.input_take) - if options.add_url != '': + if options.add_url != '' and options.add_site is not None: # check if website url should be added options.sites = options.add_site( options.input_filename, options.add_url, options.input_skip, options.input_take) - elif options.delete_url != '': + elif options.delete_url != '' and options.delete_site is not None: # check if website url should be deleted options.sites = options.delete_site( options.input_filename, diff --git a/engines/sitemap.py b/engines/sitemap.py index d86c995c..5369441e 100644 --- a/engines/sitemap.py +++ b/engines/sitemap.py @@ -164,46 +164,3 @@ def get_root_element(sitemap_content): root_element = element break return root_element - - -def add_site(input_url, _, input_skip, input_take): - """ - This function reads site data from a specific sitemap, - prints a warning message (because it is read only), - - Parameters: - input_url (str): Absolute url to sitemap, .xml and .xml.bz fileendings are supported. - input_skip (int): The number of lines to skip in the input file. - input_take (int): The number of lines to take from the input file after skipping. - - Returns: - list: The list of sites read from the specified sitemap. - """ - - print("WARNING: sitemap engine is a read only method for testing all pages in a sitemap.xml," - ,"NO changes will be made") - - sites = read_sites(input_url, input_skip, input_take) - - return sites - - -def delete_site(input_url, _, input_skip, input_take): - """ - This function reads site data from a specific sitemap, - prints a warning message (because it is read only), - - Parameters: - input_url (str): Absolute url to sitemap, .xml and .xml.bz fileendings are supported. - input_skip (int): The number of lines to skip in the input file. - input_take (int): The number of lines to take from the input file after skipping. - - Returns: - list: The list of sites read from the specified sitemap. - """ - print("WARNING: sitemap engine is a read only method for testing all pages in a sitemap.xml," - ,"NO changes will be made") - - sites = read_sites(input_url, input_skip, input_take) - - return sites diff --git a/engines/sitespeed_result.py b/engines/sitespeed_result.py index 8ebda004..09c6d504 100644 --- a/engines/sitespeed_result.py +++ b/engines/sitespeed_result.py @@ -3,18 +3,23 @@ from pathlib import Path from urllib.parse import urlparse import re +from engines.utils import use_item -def add_site(input_filename, url, input_skip, input_take): - sites = [] - return sites +def get_url_from_file_content(input_filename): + """ + Extracts the URL from the content of a HAR file. + The function opens the file and reads the first 1024 bytes. + It then uses a regular expression to find the URL in the read data. + If the file does not exist, it prints an error message and returns None. -def delete_site(input_filename, url, input_skip, input_take): - tmpSites = [] - return tmpSites + Parameters: + input_filename (str): The path of the HAR file from which to extract the URL. + Returns: + str: The extracted URL. Returns None if the file does not exist or no URL is found. -def get_url_from_file_content(input_filename): + """ try: # No need to read all content, just read the first 1024 bytes as our url will be there # we are doing this for performance @@ -22,35 +27,43 @@ def get_url_from_file_content(input_filename): data = file.read(1024) regex = r"\"[_]{0,1}url\":[ ]{0,1}\"(?P[^\"]+)\"" matches = re.finditer(regex, data, re.MULTILINE) - for matchNum, match in enumerate(matches, start=1): + for _, match in enumerate(matches, start=1): return match.group('url') - except: - print('error in get_local_file_content. No such file or directory: {0}'.format( - input_filename)) + except OSError: + print(f'Error. No such file or directory: {input_filename}') return None return None def read_sites(hostname_or_argument, input_skip, input_take): + """ + Reads the sites from the cache directory based on the hostname or + the argument that ends with '.result'. + + Parameters: + hostname_or_argument (str): The hostname or the argument that ends with '.result'. + input_skip (int): The number of items to skip from the start. + input_take (int): The number of items to take after skipping. If -1, takes all items. + + Returns: + list: A list of sites where each site is represented as a + list containing the path to the HAR file and the URL. + """ sites = [] hostname = hostname_or_argument if hostname_or_argument.endswith('.result'): - tmp = hostname_or_argument[:hostname_or_argument.rfind('.result')] - o = urlparse(tmp) - hostname = o.hostname - - if len(sites) > 0: - return sites + tmp_url = hostname_or_argument[:hostname_or_argument.rfind('.result')] + hostname = urlparse(tmp_url).hostname - dir = Path(os.path.dirname( + base_directory = Path(os.path.dirname( os.path.realpath(__file__)) + os.path.sep).parent - data_dir = os.path.join(dir, 'cache', hostname) + os.path.sep - if not os.path.exists(data_dir): + cache_dir = os.path.join(base_directory, 'cache', hostname) + os.path.sep + if not os.path.exists(cache_dir): return sites - dirs = os.listdir(data_dir) + dirs = os.listdir(cache_dir) urls = {} @@ -62,14 +75,15 @@ def read_sites(hostname_or_argument, input_skip, input_take): continue full_path = os.path.join( - data_dir, file_name) + cache_dir, file_name) url = get_url_from_file_content(full_path) urls[url] = full_path current_index = 0 - for tmp_url in urls.keys(): - sites.append([urls[tmp_url], tmp_url]) + for url, har_path in urls.items(): + if use_item(current_index, input_skip, input_take): + sites.append([har_path, url]) current_index += 1 return sites From 3a1320f3aa1dccf533ec5e22a17631983dfca62b Mon Sep 17 00:00:00 2001 From: cockroacher <163405488+cockroacher@users.noreply.github.com> Date: Sun, 7 Apr 2024 14:38:10 +0200 Subject: [PATCH 2/2] pylint engines\utils.py --- engines/utils.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/engines/utils.py b/engines/utils.py index 3fd0e13d..e7b50b05 100644 --- a/engines/utils.py +++ b/engines/utils.py @@ -2,6 +2,23 @@ def use_item(current_index, skip, take): + """ + Determines whether an item at a given index should be used based on the skip and + take parameters. + + Parameters: + current_index (int): The index of the current item. + skip (int): The number of items to skip. + take (int): The number of items to take after skipping. If -1, takes all items. + + Returns: + bool: True if the item should be used, False otherwise. + + The function returns False if the current index is less than the number of items to skip or + if the current index is greater than or + equal to the sum of the skip and take parameters (unless take is -1). + Otherwise, it returns True. + """ if skip > 0 and current_index < skip: return False