diff --git a/drishti/handlers/handle_tau.py b/drishti/handlers/handle_tau.py index 4ec69fd..744d813 100644 --- a/drishti/handlers/handle_tau.py +++ b/drishti/handlers/handle_tau.py @@ -5,7 +5,7 @@ from rich import print from drishti.includes.module import * -from otf2_parser.otf2csv import otf2_to_csv +from drishti.includes.supports.otf2csv import otf2_to_csv def handler(): diff --git a/drishti/includes/supports/__init__.py b/drishti/includes/supports/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/drishti/includes/supports/lookup3.py b/drishti/includes/supports/lookup3.py new file mode 100644 index 0000000..37478bb --- /dev/null +++ b/drishti/includes/supports/lookup3.py @@ -0,0 +1,71 @@ +# Need to constrain U32 to only 32 bits using the & 0xffffffff +# since Python has no native notion of integers limited to 32 bit +# http://docs.python.org/library/stdtypes.html#numeric-types-int-float-long-complex + +'''Original copyright notice: + By Bob Jenkins, 1996. bob_jenkins@burtleburtle.net. You may use this + code any way you wish, private, educational, or commercial. Its free. +''' + +def rot(x,k): + return (((x)<<(k)) | ((x)>>(32-(k)))) + +def mix(a, b, c): + a &= 0xffffffff; b &= 0xffffffff; c &= 0xffffffff + a -= c; a &= 0xffffffff; a ^= rot(c,4); a &= 0xffffffff; c += b; c &= 0xffffffff + b -= a; b &= 0xffffffff; b ^= rot(a,6); b &= 0xffffffff; a += c; a &= 0xffffffff + c -= b; c &= 0xffffffff; c ^= rot(b,8); c &= 0xffffffff; b += a; b &= 0xffffffff + a -= c; a &= 0xffffffff; a ^= rot(c,16); a &= 0xffffffff; c += b; c &= 0xffffffff + b -= a; b &= 0xffffffff; b ^= rot(a,19); b &= 0xffffffff; a += c; a &= 0xffffffff + c -= b; c &= 0xffffffff; c ^= rot(b,4); c &= 0xffffffff; b += a; b &= 0xffffffff + return a, b, c + +def final(a, b, c): + a &= 0xffffffff; b &= 0xffffffff; c &= 0xffffffff + c ^= b; c &= 0xffffffff; c -= rot(b,14); c &= 0xffffffff + a ^= c; a &= 0xffffffff; a -= rot(c,11); a &= 0xffffffff + b ^= a; b &= 0xffffffff; b -= rot(a,25); b &= 0xffffffff + c ^= b; c &= 0xffffffff; c -= rot(b,16); c &= 0xffffffff + a ^= c; a &= 0xffffffff; a -= rot(c,4); a &= 0xffffffff + b ^= a; b &= 0xffffffff; b -= rot(a,14); b &= 0xffffffff + c ^= b; c &= 0xffffffff; c -= rot(b,24); c &= 0xffffffff + return a, b, c + +def hashlittle2(data, initval = 0, initval2 = 0): + length = lenpos = len(data) + + a = b = c = (0xdeadbeef + (length) + initval) + + c += initval2; c &= 0xffffffff + + p = 0 # string offset + while lenpos > 12: + a += (ord(data[p+0]) + (ord(data[p+1])<<8) + (ord(data[p+2])<<16) + (ord(data[p+3])<<24)); a &= 0xffffffff + b += (ord(data[p+4]) + (ord(data[p+5])<<8) + (ord(data[p+6])<<16) + (ord(data[p+7])<<24)); b &= 0xffffffff + c += (ord(data[p+8]) + (ord(data[p+9])<<8) + (ord(data[p+10])<<16) + (ord(data[p+11])<<24)); c &= 0xffffffff + a, b, c = mix(a, b, c) + p += 12 + lenpos -= 12 + + if lenpos == 12: c += (ord(data[p+8]) + (ord(data[p+9])<<8) + (ord(data[p+10])<<16) + (ord(data[p+11])<<24)); b += (ord(data[p+4]) + (ord(data[p+5])<<8) + (ord(data[p+6])<<16) + (ord(data[p+7])<<24)); a += (ord(data[p+0]) + (ord(data[p+1])<<8) + (ord(data[p+2])<<16) + (ord(data[p+3])<<24)); + if lenpos == 11: c += (ord(data[p+8]) + (ord(data[p+9])<<8) + (ord(data[p+10])<<16)); b += (ord(data[p+4]) + (ord(data[p+5])<<8) + (ord(data[p+6])<<16) + (ord(data[p+7])<<24)); a += (ord(data[p+0]) + (ord(data[p+1])<<8) + (ord(data[p+2])<<16) + (ord(data[p+3])<<24)); + if lenpos == 10: c += (ord(data[p+8]) + (ord(data[p+9])<<8)); b += (ord(data[p+4]) + (ord(data[p+5])<<8) + (ord(data[p+6])<<16) + (ord(data[p+7])<<24)); a += (ord(data[p+0]) + (ord(data[p+1])<<8) + (ord(data[p+2])<<16) + (ord(data[p+3])<<24)); + if lenpos == 9: c += (ord(data[p+8])); b += (ord(data[p+4]) + (ord(data[p+5])<<8) + (ord(data[p+6])<<16) + (ord(data[p+7])<<24)); a += (ord(data[p+0]) + (ord(data[p+1])<<8) + (ord(data[p+2])<<16) + (ord(data[p+3])<<24)); + if lenpos == 8: b += (ord(data[p+4]) + (ord(data[p+5])<<8) + (ord(data[p+6])<<16) + (ord(data[p+7])<<24)); a += (ord(data[p+0]) + (ord(data[p+1])<<8) + (ord(data[p+2])<<16) + (ord(data[p+3])<<24)); + if lenpos == 7: b += (ord(data[p+4]) + (ord(data[p+5])<<8) + (ord(data[p+6])<<16)); a += (ord(data[p+0]) + (ord(data[p+1])<<8) + (ord(data[p+2])<<16) + (ord(data[p+3])<<24)); + if lenpos == 6: b += ((ord(data[p+5])<<8) + ord(data[p+4])); a += (ord(data[p+0]) + (ord(data[p+1])<<8) + (ord(data[p+2])<<16) + (ord(data[p+3])<<24)) + if lenpos == 5: b += (ord(data[p+4])); a += (ord(data[p+0]) + (ord(data[p+1])<<8) + (ord(data[p+2])<<16) + (ord(data[p+3])<<24)); + if lenpos == 4: a += (ord(data[p+0]) + (ord(data[p+1])<<8) + (ord(data[p+2])<<16) + (ord(data[p+3])<<24)) + if lenpos == 3: a += (ord(data[p+0]) + (ord(data[p+1])<<8) + (ord(data[p+2])<<16)) + if lenpos == 2: a += (ord(data[p+0]) + (ord(data[p+1])<<8)) + if lenpos == 1: a += ord(data[p+0]) + a &= 0xffffffff; b &= 0xffffffff; c &= 0xffffffff + if lenpos == 0: return c, b + + a, b, c = final(a, b, c) + + return c, b + +def hashlittle(data, initval=0): + c, b = hashlittle2(data, initval, 0) + return c \ No newline at end of file diff --git a/drishti/includes/supports/otf2csv.py b/drishti/includes/supports/otf2csv.py new file mode 100644 index 0000000..4299b28 --- /dev/null +++ b/drishti/includes/supports/otf2csv.py @@ -0,0 +1,115 @@ +#! /usr/bin/env python3 + +import csv +import otf2 +import argparse + +from typing import Dict, Any +from collections import defaultdict +from drishti.includes.supports.lookup3 import hashlittle + + +def file_name(handle: otf2.definitions.IoHandle) -> str: + ''' Extract the correct name from `IoHandle`, which is either the file name + or the handle name. ''' + try: + return handle.file.name + except AttributeError: + return handle.name + + +def ignored_file(filename): + ignored_prefixes = {'/sys/', '/proc', '/etc/', 'STDIN_FILENO', 'STDOUT_FILENO', 'STDERR_FILENO', '/cvmfs/'} + for prefix in ignored_prefixes: + if filename.startswith(prefix): + return True + return False + + +def isIoOperation(function): + posixOperations = {'read', 'write', 'fgets', 'fread', 'fwrite', 'pread', 'pwrite', 'pread64', 'pwrite64'} + mpiOperations = {'MPI_File_iread', 'MPI_File_iread_shared', 'MPI_File_iread_at', + 'MPI_File_iwrite', 'MPI_File_iwrite_shared', 'MPI_File_iwrite_at', + 'MPI_File_read_all_begin', 'MPI_File_read_all', 'MPI_File_read_at', 'MPI_File_read_at_all', 'MPI_File_read_at_all_begin', + 'MPI_File_write_all_begin', 'MPI_File_write_all', 'MPI_File_write_at', 'MPI_File_write_at_all', 'MPI_File_write_at_all_begin', + 'MPI_File_read_ordered_begin', 'MPI_File_read_ordered', + 'MPI_File_write_ordered_begin', 'MPI_File_write_ordered', + 'MPI_File_read_shared', 'MPI_File_write_shared'} + + if function in posixOperations or function in mpiOperations: + return True + return False + +def otf2_to_csv(tracefile: str, csvfile: str) -> None: + ''' Open `tracefile` and write it as CSV in to `csvfile`. ''' + rank_stat = defaultdict(dict) + file_map = {} + + TIMER_GRANULARITY = 1000000 + start_time = 0 + + with otf2.reader.open(tracefile) as trace: + + with open(csvfile, "w") as outfile: + writer = csv.writer(outfile) + writer.writerow(['file_name', 'file_id', 'function', 'rank', 'start', 'end', 'size', 'offset']) + + for location, event in trace.events: + + if isinstance(event, otf2.events.ProgramBegin): + + start_time = event.time + + elif isinstance(event, otf2.events.Metric): + metric_class = event.metric + metric_value = event.values + + print(metric_class) + + + elif isinstance(event, otf2.events.Enter): + region_name = event.region.name + if region_name.endswith('()'): + region_name = region_name[:-2] + + if isIoOperation(region_name): + rank_stat[location.group.name]['function'] = region_name + + #print(region_name) + + elif isinstance(event, otf2.events.IoOperationBegin): + filename = file_name(event.handle) + + if filename and not ignored_file(filename): + + if filename not in file_map: + file_map[filename] = hashlittle(filename) + + attributes: Dict[str, Any] = {} + if event.attributes: + attributes = {attr.name.lower(): value for attr, value in event.attributes.items()} + + rank_stat[location.group.name]['start'] = (event.time - start_time) / TIMER_GRANULARITY + rank_stat[location.group.name]['filename'] = filename + rank_stat[location.group.name]['fileid'] = file_map[filename] + rank_stat[location.group.name]['size'] = event.bytes_request + if 'offset' in attributes: + rank_stat[location.group.name]['offset'] = attributes['offset'] + else: + rank_stat[location.group.name]['offset'] = 'Unknown' + + elif isinstance(event, otf2.events.IoOperationComplete): + filename = file_name(event.handle) + if filename and not ignored_file(filename): + rank = location.group.name + + function = rank_stat[rank]['function'] + filename = rank_stat[rank]['filename'] + fileid = rank_stat[rank]['fileid'] + start = rank_stat[rank]['start'] + end = (event.time - start_time) / TIMER_GRANULARITY + size = rank_stat[rank]['size'] + offset = rank_stat[rank]['offset'] + writer.writerow([filename, fileid, function, rank.split()[2], start, end, size, offset]) + + else: continue diff --git a/requirements.txt b/requirements.txt index d4c9d2c..4c6eda8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,5 +2,4 @@ argparse darshan==3.4.4.0 pandas rich==12.5.1 -recorder-utils -otf2-parser \ No newline at end of file +recorder-utils \ No newline at end of file diff --git a/setup.py b/setup.py index f590898..3e75113 100644 --- a/setup.py +++ b/setup.py @@ -22,7 +22,6 @@ 'darshan==3.4.4.0', 'rich==12.5.1', 'recorder-utils', - 'otf2-parser', ], packages=find_packages(), package_data={