Add otf2 parser

hpc-io · Jul 26, 2024 · 778241a · 778241a
1 parent ee66d48
commit 778241a
Show file tree

Hide file tree

Showing 6 changed files with 188 additions and 4 deletions.
diff --git a/drishti/handlers/handle_tau.py b/drishti/handlers/handle_tau.py
@@ -5,7 +5,7 @@
 
 from rich import print
 from drishti.includes.module import *
-from otf2_parser.otf2csv import otf2_to_csv
+from drishti.includes.supports.otf2csv import otf2_to_csv
 
 
 def handler():

diff --git a/drishti/includes/supports/__init__.py b/drishti/includes/supports/__init__.py
diff --git a/drishti/includes/supports/lookup3.py b/drishti/includes/supports/lookup3.py
@@ -0,0 +1,71 @@
+# Need to constrain U32 to only 32 bits using the & 0xffffffff
+# since Python has no native notion of integers limited to 32 bit
+# http://docs.python.org/library/stdtypes.html#numeric-types-int-float-long-complex
+
+'''Original copyright notice:
+    By Bob Jenkins, 1996.  [email protected].  You may use this
+    code any way you wish, private, educational, or commercial.  Its free.
+'''
+
+def rot(x,k):
+    return (((x)<<(k)) | ((x)>>(32-(k))))
+
+def mix(a, b, c):
+    a &= 0xffffffff; b &= 0xffffffff; c &= 0xffffffff
+    a -= c; a &= 0xffffffff; a ^= rot(c,4);  a &= 0xffffffff; c += b; c &= 0xffffffff
+    b -= a; b &= 0xffffffff; b ^= rot(a,6);  b &= 0xffffffff; a += c; a &= 0xffffffff
+    c -= b; c &= 0xffffffff; c ^= rot(b,8);  c &= 0xffffffff; b += a; b &= 0xffffffff
+    a -= c; a &= 0xffffffff; a ^= rot(c,16); a &= 0xffffffff; c += b; c &= 0xffffffff
+    b -= a; b &= 0xffffffff; b ^= rot(a,19); b &= 0xffffffff; a += c; a &= 0xffffffff
+    c -= b; c &= 0xffffffff; c ^= rot(b,4);  c &= 0xffffffff; b += a; b &= 0xffffffff
+    return a, b, c
+
+def final(a, b, c):
+    a &= 0xffffffff; b &= 0xffffffff; c &= 0xffffffff
+    c ^= b; c &= 0xffffffff; c -= rot(b,14); c &= 0xffffffff
+    a ^= c; a &= 0xffffffff; a -= rot(c,11); a &= 0xffffffff
+    b ^= a; b &= 0xffffffff; b -= rot(a,25); b &= 0xffffffff
+    c ^= b; c &= 0xffffffff; c -= rot(b,16); c &= 0xffffffff
+    a ^= c; a &= 0xffffffff; a -= rot(c,4);  a &= 0xffffffff
+    b ^= a; b &= 0xffffffff; b -= rot(a,14); b &= 0xffffffff
+    c ^= b; c &= 0xffffffff; c -= rot(b,24); c &= 0xffffffff
+    return a, b, c
+
+def hashlittle2(data, initval = 0, initval2 = 0):
+    length = lenpos = len(data)
+
+    a = b = c = (0xdeadbeef + (length) + initval)
+
+    c += initval2; c &= 0xffffffff
+
+    p = 0  # string offset
+    while lenpos > 12:
+        a += (ord(data[p+0]) + (ord(data[p+1])<<8) + (ord(data[p+2])<<16) + (ord(data[p+3])<<24)); a &= 0xffffffff
+        b += (ord(data[p+4]) + (ord(data[p+5])<<8) + (ord(data[p+6])<<16) + (ord(data[p+7])<<24)); b &= 0xffffffff
+        c += (ord(data[p+8]) + (ord(data[p+9])<<8) + (ord(data[p+10])<<16) + (ord(data[p+11])<<24)); c &= 0xffffffff
+        a, b, c = mix(a, b, c)
+        p += 12
+        lenpos -= 12
+
+    if lenpos == 12: c += (ord(data[p+8]) + (ord(data[p+9])<<8) + (ord(data[p+10])<<16) + (ord(data[p+11])<<24)); b += (ord(data[p+4]) + (ord(data[p+5])<<8) + (ord(data[p+6])<<16) + (ord(data[p+7])<<24)); a += (ord(data[p+0]) + (ord(data[p+1])<<8) + (ord(data[p+2])<<16) + (ord(data[p+3])<<24));
+    if lenpos == 11: c += (ord(data[p+8]) + (ord(data[p+9])<<8) + (ord(data[p+10])<<16)); b += (ord(data[p+4]) + (ord(data[p+5])<<8) + (ord(data[p+6])<<16) + (ord(data[p+7])<<24)); a += (ord(data[p+0]) + (ord(data[p+1])<<8) + (ord(data[p+2])<<16) + (ord(data[p+3])<<24));
+    if lenpos == 10: c += (ord(data[p+8]) + (ord(data[p+9])<<8)); b += (ord(data[p+4]) + (ord(data[p+5])<<8) + (ord(data[p+6])<<16) + (ord(data[p+7])<<24)); a += (ord(data[p+0]) + (ord(data[p+1])<<8) + (ord(data[p+2])<<16) + (ord(data[p+3])<<24));
+    if lenpos == 9:  c += (ord(data[p+8])); b += (ord(data[p+4]) + (ord(data[p+5])<<8) + (ord(data[p+6])<<16) + (ord(data[p+7])<<24)); a += (ord(data[p+0]) + (ord(data[p+1])<<8) + (ord(data[p+2])<<16) + (ord(data[p+3])<<24));
+    if lenpos == 8:  b += (ord(data[p+4]) + (ord(data[p+5])<<8) + (ord(data[p+6])<<16) + (ord(data[p+7])<<24)); a += (ord(data[p+0]) + (ord(data[p+1])<<8) + (ord(data[p+2])<<16) + (ord(data[p+3])<<24));
+    if lenpos == 7:  b += (ord(data[p+4]) + (ord(data[p+5])<<8) + (ord(data[p+6])<<16)); a += (ord(data[p+0]) + (ord(data[p+1])<<8) + (ord(data[p+2])<<16) + (ord(data[p+3])<<24));
+    if lenpos == 6:  b += ((ord(data[p+5])<<8) + ord(data[p+4])); a += (ord(data[p+0]) + (ord(data[p+1])<<8) + (ord(data[p+2])<<16) + (ord(data[p+3])<<24))
+    if lenpos == 5:  b += (ord(data[p+4])); a += (ord(data[p+0]) + (ord(data[p+1])<<8) + (ord(data[p+2])<<16) + (ord(data[p+3])<<24));
+    if lenpos == 4:  a += (ord(data[p+0]) + (ord(data[p+1])<<8) + (ord(data[p+2])<<16) + (ord(data[p+3])<<24))
+    if lenpos == 3:  a += (ord(data[p+0]) + (ord(data[p+1])<<8) + (ord(data[p+2])<<16))
+    if lenpos == 2:  a += (ord(data[p+0]) + (ord(data[p+1])<<8))
+    if lenpos == 1:  a += ord(data[p+0])
+    a &= 0xffffffff; b &= 0xffffffff; c &= 0xffffffff
+    if lenpos == 0: return c, b
+
+    a, b, c = final(a, b, c)
+
+    return c, b
+
+def hashlittle(data, initval=0):
+    c, b = hashlittle2(data, initval, 0)
+    return c
diff --git a/drishti/includes/supports/otf2csv.py b/drishti/includes/supports/otf2csv.py
@@ -0,0 +1,115 @@
+#! /usr/bin/env python3
+
+import csv
+import otf2
+import argparse
+
+from typing import Dict, Any
+from collections import defaultdict
+from drishti.includes.supports.lookup3 import hashlittle
+
+
+def file_name(handle: otf2.definitions.IoHandle) -> str:
+    ''' Extract the correct name from `IoHandle`, which is either the file name
+        or the handle name. '''
+    try:
+        return handle.file.name
+    except AttributeError:
+        return handle.name
+
+
+def ignored_file(filename):
+    ignored_prefixes = {'/sys/', '/proc', '/etc/', 'STDIN_FILENO', 'STDOUT_FILENO', 'STDERR_FILENO', '/cvmfs/'}
+    for prefix in ignored_prefixes:
+        if filename.startswith(prefix):
+            return True
+    return False
+
+
+def isIoOperation(function):
+    posixOperations = {'read', 'write', 'fgets', 'fread', 'fwrite', 'pread', 'pwrite', 'pread64', 'pwrite64'}
+    mpiOperations = {'MPI_File_iread', 'MPI_File_iread_shared', 'MPI_File_iread_at', 
+                    'MPI_File_iwrite', 'MPI_File_iwrite_shared', 'MPI_File_iwrite_at',
+                    'MPI_File_read_all_begin', 'MPI_File_read_all', 'MPI_File_read_at', 'MPI_File_read_at_all', 'MPI_File_read_at_all_begin',
+                    'MPI_File_write_all_begin', 'MPI_File_write_all', 'MPI_File_write_at', 'MPI_File_write_at_all', 'MPI_File_write_at_all_begin',
+                    'MPI_File_read_ordered_begin', 'MPI_File_read_ordered',
+                    'MPI_File_write_ordered_begin', 'MPI_File_write_ordered',
+                    'MPI_File_read_shared', 'MPI_File_write_shared'}
+
+    if function in posixOperations or function in mpiOperations:
+        return True
+    return False
+
+def otf2_to_csv(tracefile: str, csvfile: str) -> None:
+    ''' Open `tracefile` and write it as CSV in to `csvfile`. '''
+    rank_stat = defaultdict(dict)
+    file_map = {}
+
+    TIMER_GRANULARITY = 1000000
+    start_time = 0
+
+    with otf2.reader.open(tracefile) as trace:
+
+        with open(csvfile, "w") as outfile:
+            writer = csv.writer(outfile)
+            writer.writerow(['file_name', 'file_id', 'function', 'rank', 'start', 'end', 'size', 'offset'])
+
+            for location, event in trace.events:
+
+                if isinstance(event, otf2.events.ProgramBegin):
+
+                    start_time = event.time
+
+                elif isinstance(event, otf2.events.Metric):
+                    metric_class = event.metric
+                    metric_value = event.values
+
+                    print(metric_class)
+
+
+                elif isinstance(event, otf2.events.Enter):
+                    region_name = event.region.name
+                    if region_name.endswith('()'):
+                        region_name = region_name[:-2]
+
+                    if isIoOperation(region_name):
+                        rank_stat[location.group.name]['function'] = region_name
+
+                        #print(region_name)
+
+                elif isinstance(event, otf2.events.IoOperationBegin):
+                    filename = file_name(event.handle)
+
+                    if filename and not ignored_file(filename):
+
+                        if filename not in file_map:
+                            file_map[filename] = hashlittle(filename)
+
+                        attributes: Dict[str, Any] = {}
+                        if event.attributes:
+                            attributes = {attr.name.lower(): value for attr, value in event.attributes.items()}
+
+                        rank_stat[location.group.name]['start'] = (event.time - start_time) / TIMER_GRANULARITY
+                        rank_stat[location.group.name]['filename'] = filename
+                        rank_stat[location.group.name]['fileid'] = file_map[filename]
+                        rank_stat[location.group.name]['size'] = event.bytes_request
+                        if 'offset' in attributes:
+                            rank_stat[location.group.name]['offset'] = attributes['offset']
+                        else:
+                            rank_stat[location.group.name]['offset'] = 'Unknown'
+
+                elif isinstance(event, otf2.events.IoOperationComplete):
+                    filename = file_name(event.handle)
+                    if filename and not ignored_file(filename):
+                        rank = location.group.name
+
+                        function = rank_stat[rank]['function']
+                        filename = rank_stat[rank]['filename']
+                        fileid = rank_stat[rank]['fileid']
+                        start = rank_stat[rank]['start']
+                        end = (event.time - start_time) / TIMER_GRANULARITY
+                        size = rank_stat[rank]['size']
+                        offset = rank_stat[rank]['offset']
+                        writer.writerow([filename, fileid, function, rank.split()[2], start, end, size, offset])
+
+                else: continue
diff --git a/requirements.txt b/requirements.txt
@@ -2,5 +2,4 @@ argparse
 darshan==3.4.4.0
 pandas
 rich==12.5.1
-recorder-utils
-otf2-parser
+recorder-utils
diff --git a/setup.py b/setup.py
@@ -22,7 +22,6 @@
         'darshan==3.4.4.0',
         'rich==12.5.1',
         'recorder-utils',
-        'otf2-parser',
     ],
     packages=find_packages(),
     package_data={