Skip to content

Commit

Permalink
Merge pull request Webperf-se#369 from cockroacher/main
Browse files Browse the repository at this point in the history
pylint for engines\util.py and engines\sitespeed_result.py
  • Loading branch information
7h3Rabbit authored Apr 7, 2024
2 parents b5ea5a0 + 3a1320f commit e2ef602
Show file tree
Hide file tree
Showing 5 changed files with 62 additions and 98 deletions.
18 changes: 1 addition & 17 deletions carbon-rating.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,23 +75,7 @@ def main(argv):
sys.exit(2)
elif opt in ("-i", "--input"): # input file path
input_filename = arg

file_ending = ""
file_long_ending = ""
if (len(input_filename) > 4):
file_ending = input_filename[-4:].lower()
if (len(input_filename) > 7):
file_long_ending = input_filename[-7:].lower()

if file_long_ending == ".sqlite":
from engines.sqlite import read_sites, add_site, delete_site
elif (file_ending == ".csv"):
from engines.csv_engine import read_sites, add_site, delete_site
elif (file_ending == ".xml"): # https://example.com/sitemap.xml
from engines.sitemap import read_sites, add_site, delete_site
else:
from engines.json_engine import read_tests, read_sites, add_site, delete_site
pass
from engines.json_engine import read_tests
elif opt in ("-o", "--output"): # output file path
output_filename = arg
pass
Expand Down
20 changes: 6 additions & 14 deletions default.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,8 @@
add_site as csv_add_site,\
delete_site as csv_delete_site,\
write_tests as csv_write_tests
from engines.sitemap import read_sites as sitemap_read_sites,\
add_site as sitemap_add_site,\
delete_site as sitemap_delete_site
from engines.sitespeed_result import read_sites as sitespeed_read_sites,\
add_site as sitespeed_add_site,\
delete_site as sitespeed_delete_site
from engines.sitemap import read_sites as sitemap_read_sites
from engines.sitespeed_result import read_sites as sitespeed_read_sites
from engines.webperf import read_sites as webperf_read_sites,\
add_site as webperf_add_site,\
delete_site as webperf_delete_site
Expand Down Expand Up @@ -336,15 +332,11 @@ def set_input_handlers(self, input_filename):
add_site = csv_add_site
delete_site = csv_delete_site
elif file_ending == ".xml" or file_long_ending == ".xml.gz":
# https://example.com/sitemap.xml
# https://example.com/sitemap.xml.gz
# https://example.com/sitemap.xml
# https://example.com/sitemap.xml.gz
read_sites = sitemap_read_sites
add_site = sitemap_add_site
delete_site = sitemap_delete_site
elif file_long_ending == ".result":
read_sites = sitespeed_read_sites
add_site = sitespeed_add_site
delete_site = sitespeed_delete_site
elif file_long_ending == ".webprf":
read_sites = webperf_read_sites
add_site = webperf_add_site
Expand Down Expand Up @@ -482,14 +474,14 @@ def main(argv):
options.input_skip,
options.input_take)

if options.add_url != '':
if options.add_url != '' and options.add_site is not None:
# check if website url should be added
options.sites = options.add_site(
options.input_filename,
options.add_url,
options.input_skip,
options.input_take)
elif options.delete_url != '':
elif options.delete_url != '' and options.delete_site is not None:
# check if website url should be deleted
options.sites = options.delete_site(
options.input_filename,
Expand Down
43 changes: 0 additions & 43 deletions engines/sitemap.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,46 +164,3 @@ def get_root_element(sitemap_content):
root_element = element
break
return root_element


def add_site(input_url, _, input_skip, input_take):
"""
This function reads site data from a specific sitemap,
prints a warning message (because it is read only),
Parameters:
input_url (str): Absolute url to sitemap, .xml and .xml.bz fileendings are supported.
input_skip (int): The number of lines to skip in the input file.
input_take (int): The number of lines to take from the input file after skipping.
Returns:
list: The list of sites read from the specified sitemap.
"""

print("WARNING: sitemap engine is a read only method for testing all pages in a sitemap.xml,"
,"NO changes will be made")

sites = read_sites(input_url, input_skip, input_take)

return sites


def delete_site(input_url, _, input_skip, input_take):
"""
This function reads site data from a specific sitemap,
prints a warning message (because it is read only),
Parameters:
input_url (str): Absolute url to sitemap, .xml and .xml.bz fileendings are supported.
input_skip (int): The number of lines to skip in the input file.
input_take (int): The number of lines to take from the input file after skipping.
Returns:
list: The list of sites read from the specified sitemap.
"""
print("WARNING: sitemap engine is a read only method for testing all pages in a sitemap.xml,"
,"NO changes will be made")

sites = read_sites(input_url, input_skip, input_take)

return sites
62 changes: 38 additions & 24 deletions engines/sitespeed_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,54 +3,67 @@
from pathlib import Path
from urllib.parse import urlparse
import re
from engines.utils import use_item

def add_site(input_filename, url, input_skip, input_take):
sites = []
return sites
def get_url_from_file_content(input_filename):
"""
Extracts the URL from the content of a HAR file.
The function opens the file and reads the first 1024 bytes.
It then uses a regular expression to find the URL in the read data.
If the file does not exist, it prints an error message and returns None.
def delete_site(input_filename, url, input_skip, input_take):
tmpSites = []
return tmpSites
Parameters:
input_filename (str): The path of the HAR file from which to extract the URL.
Returns:
str: The extracted URL. Returns None if the file does not exist or no URL is found.
def get_url_from_file_content(input_filename):
"""
try:
# No need to read all content, just read the first 1024 bytes as our url will be there
# we are doing this for performance
with open(input_filename, 'r', encoding='utf-8') as file:
data = file.read(1024)
regex = r"\"[_]{0,1}url\":[ ]{0,1}\"(?P<url>[^\"]+)\""
matches = re.finditer(regex, data, re.MULTILINE)
for matchNum, match in enumerate(matches, start=1):
for _, match in enumerate(matches, start=1):
return match.group('url')
except:
print('error in get_local_file_content. No such file or directory: {0}'.format(
input_filename))
except OSError:
print(f'Error. No such file or directory: {input_filename}')
return None

return None


def read_sites(hostname_or_argument, input_skip, input_take):
"""
Reads the sites from the cache directory based on the hostname or
the argument that ends with '.result'.
Parameters:
hostname_or_argument (str): The hostname or the argument that ends with '.result'.
input_skip (int): The number of items to skip from the start.
input_take (int): The number of items to take after skipping. If -1, takes all items.
Returns:
list: A list of sites where each site is represented as a
list containing the path to the HAR file and the URL.
"""
sites = []
hostname = hostname_or_argument
if hostname_or_argument.endswith('.result'):
tmp = hostname_or_argument[:hostname_or_argument.rfind('.result')]
o = urlparse(tmp)
hostname = o.hostname

if len(sites) > 0:
return sites
tmp_url = hostname_or_argument[:hostname_or_argument.rfind('.result')]
hostname = urlparse(tmp_url).hostname

dir = Path(os.path.dirname(
base_directory = Path(os.path.dirname(
os.path.realpath(__file__)) + os.path.sep).parent

data_dir = os.path.join(dir, 'cache', hostname) + os.path.sep
if not os.path.exists(data_dir):
cache_dir = os.path.join(base_directory, 'cache', hostname) + os.path.sep
if not os.path.exists(cache_dir):
return sites

dirs = os.listdir(data_dir)
dirs = os.listdir(cache_dir)

urls = {}

Expand All @@ -62,14 +75,15 @@ def read_sites(hostname_or_argument, input_skip, input_take):
continue

full_path = os.path.join(
data_dir, file_name)
cache_dir, file_name)

url = get_url_from_file_content(full_path)
urls[url] = full_path

current_index = 0
for tmp_url in urls.keys():
sites.append([urls[tmp_url], tmp_url])
for url, har_path in urls.items():
if use_item(current_index, input_skip, input_take):
sites.append([har_path, url])
current_index += 1

return sites
17 changes: 17 additions & 0 deletions engines/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,23 @@


def use_item(current_index, skip, take):
"""
Determines whether an item at a given index should be used based on the skip and
take parameters.
Parameters:
current_index (int): The index of the current item.
skip (int): The number of items to skip.
take (int): The number of items to take after skipping. If -1, takes all items.
Returns:
bool: True if the item should be used, False otherwise.
The function returns False if the current index is less than the number of items to skip or
if the current index is greater than or
equal to the sum of the skip and take parameters (unless take is -1).
Otherwise, it returns True.
"""
if skip > 0 and current_index < skip:
return False

Expand Down

0 comments on commit e2ef602

Please sign in to comment.