Skip to content

Commit

Permalink
Add otf2 parser
Browse files Browse the repository at this point in the history
  • Loading branch information
onewbiek committed Jul 26, 2024
1 parent ee66d48 commit 778241a
Show file tree
Hide file tree
Showing 6 changed files with 188 additions and 4 deletions.
2 changes: 1 addition & 1 deletion drishti/handlers/handle_tau.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from rich import print
from drishti.includes.module import *
from otf2_parser.otf2csv import otf2_to_csv
from drishti.includes.supports.otf2csv import otf2_to_csv


def handler():
Expand Down
Empty file.
71 changes: 71 additions & 0 deletions drishti/includes/supports/lookup3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# Need to constrain U32 to only 32 bits using the & 0xffffffff
# since Python has no native notion of integers limited to 32 bit
# http://docs.python.org/library/stdtypes.html#numeric-types-int-float-long-complex

'''Original copyright notice:
By Bob Jenkins, 1996. [email protected]. You may use this
code any way you wish, private, educational, or commercial. Its free.
'''

def rot(x,k):
return (((x)<<(k)) | ((x)>>(32-(k))))

def mix(a, b, c):
a &= 0xffffffff; b &= 0xffffffff; c &= 0xffffffff
a -= c; a &= 0xffffffff; a ^= rot(c,4); a &= 0xffffffff; c += b; c &= 0xffffffff
b -= a; b &= 0xffffffff; b ^= rot(a,6); b &= 0xffffffff; a += c; a &= 0xffffffff
c -= b; c &= 0xffffffff; c ^= rot(b,8); c &= 0xffffffff; b += a; b &= 0xffffffff
a -= c; a &= 0xffffffff; a ^= rot(c,16); a &= 0xffffffff; c += b; c &= 0xffffffff
b -= a; b &= 0xffffffff; b ^= rot(a,19); b &= 0xffffffff; a += c; a &= 0xffffffff
c -= b; c &= 0xffffffff; c ^= rot(b,4); c &= 0xffffffff; b += a; b &= 0xffffffff
return a, b, c

def final(a, b, c):
a &= 0xffffffff; b &= 0xffffffff; c &= 0xffffffff
c ^= b; c &= 0xffffffff; c -= rot(b,14); c &= 0xffffffff
a ^= c; a &= 0xffffffff; a -= rot(c,11); a &= 0xffffffff
b ^= a; b &= 0xffffffff; b -= rot(a,25); b &= 0xffffffff
c ^= b; c &= 0xffffffff; c -= rot(b,16); c &= 0xffffffff
a ^= c; a &= 0xffffffff; a -= rot(c,4); a &= 0xffffffff
b ^= a; b &= 0xffffffff; b -= rot(a,14); b &= 0xffffffff
c ^= b; c &= 0xffffffff; c -= rot(b,24); c &= 0xffffffff
return a, b, c

def hashlittle2(data, initval = 0, initval2 = 0):
length = lenpos = len(data)

a = b = c = (0xdeadbeef + (length) + initval)

c += initval2; c &= 0xffffffff

p = 0 # string offset
while lenpos > 12:
a += (ord(data[p+0]) + (ord(data[p+1])<<8) + (ord(data[p+2])<<16) + (ord(data[p+3])<<24)); a &= 0xffffffff
b += (ord(data[p+4]) + (ord(data[p+5])<<8) + (ord(data[p+6])<<16) + (ord(data[p+7])<<24)); b &= 0xffffffff
c += (ord(data[p+8]) + (ord(data[p+9])<<8) + (ord(data[p+10])<<16) + (ord(data[p+11])<<24)); c &= 0xffffffff
a, b, c = mix(a, b, c)
p += 12
lenpos -= 12

if lenpos == 12: c += (ord(data[p+8]) + (ord(data[p+9])<<8) + (ord(data[p+10])<<16) + (ord(data[p+11])<<24)); b += (ord(data[p+4]) + (ord(data[p+5])<<8) + (ord(data[p+6])<<16) + (ord(data[p+7])<<24)); a += (ord(data[p+0]) + (ord(data[p+1])<<8) + (ord(data[p+2])<<16) + (ord(data[p+3])<<24));
if lenpos == 11: c += (ord(data[p+8]) + (ord(data[p+9])<<8) + (ord(data[p+10])<<16)); b += (ord(data[p+4]) + (ord(data[p+5])<<8) + (ord(data[p+6])<<16) + (ord(data[p+7])<<24)); a += (ord(data[p+0]) + (ord(data[p+1])<<8) + (ord(data[p+2])<<16) + (ord(data[p+3])<<24));
if lenpos == 10: c += (ord(data[p+8]) + (ord(data[p+9])<<8)); b += (ord(data[p+4]) + (ord(data[p+5])<<8) + (ord(data[p+6])<<16) + (ord(data[p+7])<<24)); a += (ord(data[p+0]) + (ord(data[p+1])<<8) + (ord(data[p+2])<<16) + (ord(data[p+3])<<24));
if lenpos == 9: c += (ord(data[p+8])); b += (ord(data[p+4]) + (ord(data[p+5])<<8) + (ord(data[p+6])<<16) + (ord(data[p+7])<<24)); a += (ord(data[p+0]) + (ord(data[p+1])<<8) + (ord(data[p+2])<<16) + (ord(data[p+3])<<24));
if lenpos == 8: b += (ord(data[p+4]) + (ord(data[p+5])<<8) + (ord(data[p+6])<<16) + (ord(data[p+7])<<24)); a += (ord(data[p+0]) + (ord(data[p+1])<<8) + (ord(data[p+2])<<16) + (ord(data[p+3])<<24));
if lenpos == 7: b += (ord(data[p+4]) + (ord(data[p+5])<<8) + (ord(data[p+6])<<16)); a += (ord(data[p+0]) + (ord(data[p+1])<<8) + (ord(data[p+2])<<16) + (ord(data[p+3])<<24));
if lenpos == 6: b += ((ord(data[p+5])<<8) + ord(data[p+4])); a += (ord(data[p+0]) + (ord(data[p+1])<<8) + (ord(data[p+2])<<16) + (ord(data[p+3])<<24))
if lenpos == 5: b += (ord(data[p+4])); a += (ord(data[p+0]) + (ord(data[p+1])<<8) + (ord(data[p+2])<<16) + (ord(data[p+3])<<24));
if lenpos == 4: a += (ord(data[p+0]) + (ord(data[p+1])<<8) + (ord(data[p+2])<<16) + (ord(data[p+3])<<24))
if lenpos == 3: a += (ord(data[p+0]) + (ord(data[p+1])<<8) + (ord(data[p+2])<<16))
if lenpos == 2: a += (ord(data[p+0]) + (ord(data[p+1])<<8))
if lenpos == 1: a += ord(data[p+0])
a &= 0xffffffff; b &= 0xffffffff; c &= 0xffffffff
if lenpos == 0: return c, b

a, b, c = final(a, b, c)

return c, b

def hashlittle(data, initval=0):
c, b = hashlittle2(data, initval, 0)
return c
115 changes: 115 additions & 0 deletions drishti/includes/supports/otf2csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
#! /usr/bin/env python3

import csv
import otf2
import argparse

from typing import Dict, Any
from collections import defaultdict
from drishti.includes.supports.lookup3 import hashlittle


def file_name(handle: otf2.definitions.IoHandle) -> str:
''' Extract the correct name from `IoHandle`, which is either the file name
or the handle name. '''
try:
return handle.file.name
except AttributeError:
return handle.name


def ignored_file(filename):
ignored_prefixes = {'/sys/', '/proc', '/etc/', 'STDIN_FILENO', 'STDOUT_FILENO', 'STDERR_FILENO', '/cvmfs/'}
for prefix in ignored_prefixes:
if filename.startswith(prefix):
return True
return False


def isIoOperation(function):
posixOperations = {'read', 'write', 'fgets', 'fread', 'fwrite', 'pread', 'pwrite', 'pread64', 'pwrite64'}
mpiOperations = {'MPI_File_iread', 'MPI_File_iread_shared', 'MPI_File_iread_at',
'MPI_File_iwrite', 'MPI_File_iwrite_shared', 'MPI_File_iwrite_at',
'MPI_File_read_all_begin', 'MPI_File_read_all', 'MPI_File_read_at', 'MPI_File_read_at_all', 'MPI_File_read_at_all_begin',
'MPI_File_write_all_begin', 'MPI_File_write_all', 'MPI_File_write_at', 'MPI_File_write_at_all', 'MPI_File_write_at_all_begin',
'MPI_File_read_ordered_begin', 'MPI_File_read_ordered',
'MPI_File_write_ordered_begin', 'MPI_File_write_ordered',
'MPI_File_read_shared', 'MPI_File_write_shared'}

if function in posixOperations or function in mpiOperations:
return True
return False

def otf2_to_csv(tracefile: str, csvfile: str) -> None:
''' Open `tracefile` and write it as CSV in to `csvfile`. '''
rank_stat = defaultdict(dict)
file_map = {}

TIMER_GRANULARITY = 1000000
start_time = 0

with otf2.reader.open(tracefile) as trace:

with open(csvfile, "w") as outfile:
writer = csv.writer(outfile)
writer.writerow(['file_name', 'file_id', 'function', 'rank', 'start', 'end', 'size', 'offset'])

for location, event in trace.events:

if isinstance(event, otf2.events.ProgramBegin):

start_time = event.time

elif isinstance(event, otf2.events.Metric):
metric_class = event.metric
metric_value = event.values

print(metric_class)


elif isinstance(event, otf2.events.Enter):
region_name = event.region.name
if region_name.endswith('()'):
region_name = region_name[:-2]

if isIoOperation(region_name):
rank_stat[location.group.name]['function'] = region_name

#print(region_name)

elif isinstance(event, otf2.events.IoOperationBegin):
filename = file_name(event.handle)

if filename and not ignored_file(filename):

if filename not in file_map:
file_map[filename] = hashlittle(filename)

attributes: Dict[str, Any] = {}
if event.attributes:
attributes = {attr.name.lower(): value for attr, value in event.attributes.items()}

rank_stat[location.group.name]['start'] = (event.time - start_time) / TIMER_GRANULARITY
rank_stat[location.group.name]['filename'] = filename
rank_stat[location.group.name]['fileid'] = file_map[filename]
rank_stat[location.group.name]['size'] = event.bytes_request
if 'offset' in attributes:
rank_stat[location.group.name]['offset'] = attributes['offset']
else:
rank_stat[location.group.name]['offset'] = 'Unknown'

elif isinstance(event, otf2.events.IoOperationComplete):
filename = file_name(event.handle)
if filename and not ignored_file(filename):
rank = location.group.name

function = rank_stat[rank]['function']
filename = rank_stat[rank]['filename']
fileid = rank_stat[rank]['fileid']
start = rank_stat[rank]['start']
end = (event.time - start_time) / TIMER_GRANULARITY
size = rank_stat[rank]['size']
offset = rank_stat[rank]['offset']
writer.writerow([filename, fileid, function, rank.split()[2], start, end, size, offset])

else: continue
3 changes: 1 addition & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,4 @@ argparse
darshan==3.4.4.0
pandas
rich==12.5.1
recorder-utils
otf2-parser
recorder-utils
1 change: 0 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
'darshan==3.4.4.0',
'rich==12.5.1',
'recorder-utils',
'otf2-parser',
],
packages=find_packages(),
package_data={
Expand Down

0 comments on commit 778241a

Please sign in to comment.