Skip to content

Commit

Permalink
Merge pull request Webperf-se#812 from Webperf-se/har-spec-1-2-valida…
Browse files Browse the repository at this point in the history
…tion

Added more HAR 1.2 validation
  • Loading branch information
7h3Rabbit authored Dec 22, 2024
2 parents e78c11d + d82ea04 commit 9135c66
Showing 1 changed file with 106 additions and 25 deletions.
131 changes: 106 additions & 25 deletions .github/workflows/verify_browsertime_har.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,9 @@ def main(argv):
"""

try:
opts, _ = getopt.getopt(argv, "hb:", [
opts, _ = getopt.getopt(argv, "hfb:", [
"help",
"full",
"browsertime="])
except getopt.GetoptError:
print(main.__doc__)
Expand All @@ -37,23 +38,26 @@ def main(argv):

full_validation = False
validate = False
browsertime_har = None
for opt, arg in opts:
if opt in ('-h', '--help'): # help
print(main.__doc__)
sys.exit(0)
elif opt in ('-f', '--full'): # validate also stuff that is not used by webperf_core
full_validation = True
elif opt in ("-b", "--browsertime"): # validate browsertime.har for supported browsers
browsertime_har = arg
validate = True

if validate:
validate_browsertime(arg, full_validation)
validate_browsertime(browsertime_har, full_validation)


# No match for command so return error code to fail verification
sys.exit(2)

def validate_browsertime(browsertime_har_path, full_validation):
# http://www.softwareishard.com/blog/har-12-spec/
is_ok = True
with open(browsertime_har_path, encoding='utf-8') as json_input_file:
browsertime_har = json.load(json_input_file)
Expand All @@ -68,31 +72,32 @@ def validate_browsertime(browsertime_har_path, full_validation):
is_ok = validate_creator(browsertime_har) and is_ok
is_ok = validate_browser(browsertime_har) and is_ok

is_ok = validate_pages(browsertime_har, full_validation) and is_ok
is_ok = validate_entries(browsertime_har, full_validation) and is_ok
pages_is_ok, page_refs = validate_pages(browsertime_har, full_validation)
is_ok = pages_is_ok and is_ok
is_ok = validate_entries(browsertime_har, page_refs, full_validation) and is_ok

if is_ok:
print('browsertime.har file is OK')
sys.exit(0)
else:
sys.exit(2)

def validate_entries(browsertime_har, full_validation):
def validate_entries(browsertime_har, page_refs, full_validation):
is_ok = True
if 'entries' not in browsertime_har['log']:
print('Error: log.entries array is missing in browsertime.har file')
is_ok = False
else:
entry_index = 0
for entry in browsertime_har['log']['entries']:
is_ok = validate_entry(entry_index, entry, full_validation) and is_ok
is_ok = validate_entry(entry_index, entry, page_refs, full_validation) and is_ok
entry_index += 1
if entry_index < 1:
print('Error: log.entries array has less than 1 entry in browsertime.har file')
is_ok = False
return is_ok

def validate_entry(entry_index, entry, full_validation):
def validate_entry(entry_index, entry, page_refs, full_validation):
is_ok = True

is_ok = validate_entry_request(entry_index, entry, full_validation) and is_ok
Expand All @@ -118,18 +123,39 @@ def validate_entry(entry_index, entry, full_validation):
if 'timings' not in entry:
print(f'Error: log.entries[{entry_index}].timings is missing in browsertime.har file')
is_ok = False
else:
if 'send' not in entry['timings']:
print(f'Error: log.entries[{entry_index}].timings.send is missing in browsertime.har file')
is_ok = False
elif not isinstance(entry['timings']['send'], float) and not isinstance(entry['timings']['send'], int) and entry['timings']['send'] < 0:
print(f'Error: log.entries[{entry_index}].timings.send property is wrong value in browsertime.har file')
is_ok = False

if 'wait' not in entry['timings']:
print(f'Error: log.entries[{entry_index}].timings.wait is missing in browsertime.har file')
is_ok = False
elif not isinstance(entry['timings']['wait'], float) and not isinstance(entry['timings']['wait'], int) and entry['timings']['wait'] < 0:
print(f'Error: log.entries[{entry_index}].timings.wait property is wrong value in browsertime.har file')
is_ok = False

if 'receive' not in entry['timings']:
print(f'Error: log.entries[{entry_index}].timings.receive is missing in browsertime.har file')
is_ok = False
elif not isinstance(entry['timings']['receive'], float) and not isinstance(entry['timings']['receive'], int) and entry['timings']['receive'] < 0:
print(f'Error: log.entries[{entry_index}].timings.receive property is wrong value in browsertime.har file')
is_ok = False

if 'pageref' not in entry:
print(f'Error: log.entries[{entry_index}].pageref is missing in browsertime.har file')
is_ok = False
elif entry['pageref'] != 'page_1':
elif entry['pageref'] not in page_refs:
print(f'Error: log.entries[{entry_index}].pageref has wrong value, actual value: {entry['pageref']}')
is_ok = False

if 'time' not in entry:
print(f'Error: log.entries[{entry_index}].time is missing in browsertime.har file')
is_ok = False
elif not isinstance(entry['time'], float):
elif not isinstance(entry['time'], float) and not isinstance(entry['time'], int):
print(f'Error: log.entries[{entry_index}].time has wrong value, actual value: {entry['time']}')
is_ok = False

Expand All @@ -144,7 +170,19 @@ def validate_entry_response(entry_index, entry, full_validation):
if 'status' not in entry['response']:
print(f'Error: log.entries[{entry_index}].response.status is missing in browsertime.har file')
is_ok = False
elif entry['response']['status'] not in(200, 204):
elif not isinstance(entry['response']['status'], int):
print(f'Error: log.entries[{entry_index}].response.status has wrong value, actual value: {entry['response']['status']}')
is_ok = False
elif entry['response']['status'] not in (
100, 101, 102, 103,
200, 201, 202, 203, 204, 205, 206, 207, 208, 226,
300, 301, 302, 303, 304, 307, 308,
400, 401, 402, 403, 404, 405, 406, 407, 408, 409,
410, 411, 412, 413, 414, 415, 416, 417, 418,
421, 422, 423, 424, 425, 426, 428, 429,
431, 451,
500, 501, 502, 503, 504, 505, 506, 507, 508, 510, 511
):
print(f'Error: log.entries[{entry_index}].response.status has wrong value, actual value: {entry['response']['status']}')
is_ok = False

Expand All @@ -162,10 +200,17 @@ def validate_entry_response(entry_index, entry, full_validation):
if 'size' not in entry['response']['content']:
print(f'Error: log.entries[{entry_index}].response.content.size is missing in browsertime.har file')
is_ok = False
elif not isinstance(entry['response']['content']['size'], int):
print(f'Error: log.entries[{entry_index}].response.content.size has wrong value, actual value: {entry['response']['content']['size']}')
is_ok = False
elif entry['response']['content']['size'] < 1 and ('status' not in entry['response'] and entry['response']['status'] != 204):
print(f'Error: log.entries[{entry_index}].response.content.size has wrong value, actual value: {entry['response']['content']['size']}')
is_ok = False

if full_validation and 'compression' in entry['response']['content'] and not isinstance(entry['response']['content']['compression'], int):
print(f'Error: log.entries[{entry_index}].response.content.compression has wrong value type, actual value type: {type(entry['response']['content']['compression'])}')
is_ok = False

if 'text' not in entry['response']['content']:
if 'mimeType' in entry['response']['content'] and entry['response']['content']['mimeType'] in ('text/html;charset=utf-8', 'text/css', 'text/javascript'):
print(f'Error: log.entries[{entry_index}].response.content.text is missing in browsertime.har file {entry['response']['content']['mimeType']}')
Expand All @@ -175,11 +220,11 @@ def validate_entry_response(entry_index, entry, full_validation):
a = 1
elif len(entry['response']['content']['text']) < 1 and\
('mimeType' in entry['response']['content'] and entry['response']['content']['mimeType'] in ('text/html', 'text/html;charset=utf-8', 'text/html;charset=UTF-8', 'text/css', 'text/javascript')):
print(f'Warning: log.entries[{entry_index}].response.content.text has wrong value, actual value: {entry['response']['content']['text']}, content-type: {entry['response']['content']['mimeType']}')
# NOTE / WORKAROUND:
# We would prefer to be able to use Firefox for this but currently we have a workaround
# in place to only use chrome for when we need to read content of text based files.
if full_validation:
print(f'Error: log.entries[{entry_index}].response.content.text has wrong value, actual value: {entry['response']['content']['text']}, content-type: {entry['response']['content']['mimeType']}')
is_ok = False

if 'httpVersion' not in entry['response']:
Expand All @@ -197,7 +242,7 @@ def validate_entry_response(entry_index, entry, full_validation):
if 'statusText' not in entry['response']:
print(f'Error: log.entries[{entry_index}].response.statusText is missing in browsertime.har file')
is_ok = False
elif entry['response']['statusText'] not in(""):
elif not isinstance(entry['response']['statusText'], str):
print(f'Error: log.entries[{entry_index}].response.statusText has wrong value, actual value: {entry['response']['statusText']}')
is_ok = False

Expand Down Expand Up @@ -250,17 +295,21 @@ def validate_entry_request(entry_index, entry, full_validation):
if 'url' not in entry['request']:
print(f'Error: log.entries[{entry_index}].request.url is missing in browsertime.har file')
is_ok = False
elif entry_index == 0 and entry['request']['url'] != 'https://webperf.se/':
print(f'Error: log.entries[{entry_index}].request.url has wrong value, actual value: {entry['request']['url']}')
is_ok = False
else:
if entry_index == 0 and entry['request']['url'] != 'https://webperf.se/':
print(f'Error: log.entries[{entry_index}].request.url has wrong value, actual value: {entry['request']['url']}')
is_ok = False
if not entry['request']['url'].startswith('http://') and not entry['request']['url'].startswith('https://'):
print(f'Error: log.entries[{entry_index}].request.url has wrong value, actual value: {entry['request']['url']}')
is_ok = False

if not full_validation:
return is_ok

if 'method' not in entry['request']:
print(f'Error: log.entries[{entry_index}].request.method is missing in browsertime.har file')
is_ok = False
elif entry['request']['method'] not in ('GET','POST'):
elif entry['request']['method'] not in ('GET','POST', 'HEAD', 'PUT', 'DELETE', 'CONNECT', 'OPTIONS', 'TRACE', 'PATCH'):
print(f'Error: log.entries[{entry_index}].request.method has wrong value, actual value: {entry['request']['method']}')
is_ok = False

Expand All @@ -274,16 +323,24 @@ def validate_entry_request(entry_index, entry, full_validation):
if 'headersSize' not in entry['request']:
print(f'Error: log.entries[{entry_index}].request.headersSize is missing in browsertime.har file')
is_ok = False
elif not isinstance(entry['request']['headersSize'], int) or entry['request']['headersSize'] == -1:
elif not isinstance(entry['request']['headersSize'], int):
print(f'Error: log.entries[{entry_index}].request.headersSize has wrong value, actual value: {entry['request']['headersSize']}')
is_ok = False
# NOTE: Not required by HAR specification 1.2
# elif entry['request']['headersSize'] == -1:
# print(f'Warning: log.entries[{entry_index}].request.headersSize has wrong value, actual value: {entry['request']['headersSize']}')
# is_ok = False

if 'bodySize' not in entry['request']:
print(f'Error: log.entries[{entry_index}].request.bodySize is missing in browsertime.har file')
is_ok = False
elif not isinstance(entry['request']['bodySize'], int) or entry['request']['bodySize'] != 0:
elif not isinstance(entry['request']['bodySize'], int):
print(f'Error: log.entries[{entry_index}].request.bodySize has wrong value, actual value: {entry['request']['bodySize']}')
is_ok = False
# NOTE: Not required by HAR specification 1.2
# elif entry['request']['bodySize'] != 0:
# print(f'Error: log.entries[{entry_index}].request.bodySize has wrong value, actual value: {entry['request']['bodySize']}')
# is_ok = False

if 'cookies' not in entry['request']:
print(f'Error: log.entries[{entry_index}].request.cookies array is missing in browsertime.har file')
Expand Down Expand Up @@ -330,48 +387,72 @@ def validate_entry_request_headers(entry_index, entry):

def validate_pages(browsertime_har, full_validation):
is_ok = True
page_refs = []
if 'pages' not in browsertime_har['log']:
print('Error: log.pages array is missing in browsertime.har file')
is_ok = False
else:
page_index = 0
for page in browsertime_har['log']['pages']:
is_ok = validate_page(page_index, page, full_validation) and is_ok
page_is_ok, page_ref = validate_page(page_index, page, full_validation)
if page_ref is not None:
page_refs.append(page_ref)
is_ok = page_is_ok and is_ok
page_index += 1
if page_index < 1:
print('Error: log.pages array has less than 1 page in browsertime.har file')
is_ok = False
return is_ok

return is_ok, page_refs

def validate_page(page_index, page, full_validation):
is_ok = True
page_ref = None
if page['_url'] != 'https://webperf.se':
print(f'Error: log.pages[{page_index}]._url has wrong value, actual value: {page['_url']}')
is_ok = False

if not full_validation:
return is_ok
return is_ok, None

if 'id' not in page:
print(f'Error: log.pages[{page_index}].id is missing in browsertime.har file')
is_ok = False
if f'page_{page_index +1 }' not in page['id']:
elif f'page_{page_index +1 }' not in page['id']:
print(f'Error: log.pages[{page_index}].id has wrong value, actual value: {page['id']}')
is_ok = False
page_ref = page['id']
else:
page_ref = page['id']

if 'startedDateTime' not in page:
print(f'Error: log.pages[{page_index}].startedDateTime is missing in browsertime.har file')
is_ok = False
if re.match(r'[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}.[0-9]{3}Z', page['startedDateTime']) is None:
print(f'Error: log.pages[{page_index}].startedDateTime property is wrong value in browsertime.har file')
elif re.match(r'[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}.[0-9]{3}Z', page['startedDateTime']) is None:
print(f'Error: log.pages[{page_index}].startedDateTime property has wrong value in browsertime.har file')
is_ok = False

if 'title' not in page:
print(f'Error: log.pages[{page_index}].title is missing in browsertime.har file')
is_ok = False

if 'pageTimings' not in page:
print(f'Error: log.pages[{page_index}].pageTimings is missing in browsertime.har file')
is_ok = False
else:
if 'onContentLoad' not in page['pageTimings']:
print(f'Error: log.pages[{page_index}].pageTimings.onContentLoad is missing in browsertime.har file')
is_ok = False
elif not isinstance(page['pageTimings']['onContentLoad'], float) and not isinstance(page['pageTimings']['onContentLoad'], int):
print(f'Error: log.pages[{page_index}].pageTimings.onContentLoad has wrong value, actual value: {page['pageTimings']['onContentLoad']}')
is_ok = False

if 'onLoad' not in page['pageTimings']:
print(f'Error: log.pages[{page_index}].pageTimings.onLoad is missing in browsertime.har file')
is_ok = False
elif not isinstance(page['pageTimings']['onLoad'], float) and not isinstance(page['pageTimings']['onLoad'], int):
print(f'Error: log.pages[{page_index}].pageTimings.onLoad has wrong value, actual value: {page['pageTimings']['onLoad']}')
is_ok = False

if '_url' not in page:
print(f'Error: log.pages[{page_index}]._url is missing in browsertime.har file')
Expand All @@ -383,7 +464,7 @@ def validate_page(page_index, page, full_validation):
if '_meta' not in page:
print(f'Error: log.pages[{page_index}]._meta is missing in browsertime.har file')
is_ok = False
return is_ok
return is_ok, page_ref

def validate_browser(browsertime_har):
is_ok = True
Expand Down

0 comments on commit 9135c66

Please sign in to comment.