adds python script to convert SU files to ASCII-format

cenjinzhiyu · Feb 22, 2018 · 56951d3 · 56951d3
1 parent de26c03
commit 56951d3
Showing 1 changed file with 201 additions and 0 deletions.
diff --git a/utils/convert_SU_to_ASCII.py b/utils/convert_SU_to_ASCII.py
@@ -0,0 +1,201 @@
+#!/usr/bin/env python
+#
+# reads in an SU-format file generated by SPECFEM and outputs ASCII files for each trace
+#
+"""
+usage: ./convert_SU_to_ASCII.py file_in
+
+  with file_in - input SU-format file, e.g., OUTPUT_FILES/0_dz_SU
+
+"""
+from __future__ import print_function
+
+import sys,os
+import numpy as np
+
+# based on obspy reader
+try:
+    from obspy import read
+except:
+    print("Failed importing obspy. Please make sure to have version >= 1.1.0 working properly.")
+    sys.exit(1)
+
+# arguments
+try:
+    file_in = sys.argv[1]
+except:
+    print(__doc__)
+    sys.exit(1)
+
+# for example: ./convert_SU_to_ASCII.py OUTPUT_FILES/0_dz_SU
+#              then filename = './OUTPUT_FILES/0_dz_SU' and single traces
+#              will be outputted as 0_dz_SU.ascii_0, 0_dz_SU.ascii_1, etc.
+out_file = file_in + '.ascii'
+
+# by default: uses Seismic Unix format to read in file.
+#             one could also use SEG-Y files and change this setting.
+read_segy = False
+read_su = True
+
+# endianness:
+# On the Macintosh platform, PowerPC-based Macintosh computers use big endian addressing,
+# while Intel-based Macs use little-endian addressing. Most non-Intel-based computers are big endian.
+# see: https://developer.apple.com/library/content/documentation/DeviceDrivers/Conceptual/WritingPCIDrivers/endianness/endianness.html
+
+
+# we will need to specify endianness, otherwise:
+# ..
+# obspy.io.segy.segy.SEGYError: Unable to determine the endianness of the file. Please specify it.
+# ..
+endian = '<' # little endian '<' or big endian '>'
+
+# Seismic Unix
+if read_su:
+    # reads as stream
+    st = read(file_in,format='SU',byteorder=endian)
+
+# SEG-Y
+if read_segy:
+    # reads as stream
+    st = read(file_in,format='SEGY',byteorder=endian)
+
+    ## more detailed call:
+    #from obspy.io.segy.segy import _read_segy
+    #import obspy.io as io
+
+    # By default trace data are read into memory, but this may be impractical for very large datasets
+    # see: https://docs.obspy.org/packages/obspy.io.segy.html
+    #
+    #segy = io.segy.segy._read_segy(file_in, headonly=True,endian=endian)
+    #print(len(segy.traces[0].data))
+
+    ## reading w/ data
+    #segy = io.segy.segy._read_segy(file_in,endian=endian)
+    #print(segy)
+
+    ## reads as stream
+    #st = io.segy.core._read_segy(file_in,byteorder=endian)
+
+# output info
+print("traces:")
+print(st)
+print("")
+
+# timing
+# how long it takes to determine minimum and maximum values in an array
+from timeit import default_timer
+
+def minmax_np(xy):
+    # uses numpy internal functions
+    minval = np.min(xy[:,1])
+    maxval = np.max(xy[:,1])
+    return (minval,maxval)
+
+def minmax(xy):
+    # combines loop over all elements to determine min/max at the same time
+    minval = xy[0,1]
+    maxval = xy[0,1]
+    for f1 in xy[1:,1]:
+        if f1 > maxval:
+            maxval = f1
+        elif f1 < minval:
+            minval = f1
+    return (minval, maxval)
+
+def minmax2(xy):
+    # combines loop over all elements and takes steps of 2 to minimize number of comparisons
+    minval = xy[0,1]
+    maxval = xy[0,1]
+    if len(xy[:,1]) % 2 == 0:
+      istart = 0
+    else:
+      istart = 1
+    for ii in xrange(0, len(xy[:,1]), 2):
+        f1 = xy[ii  ,1]
+        f2 = xy[ii+1,1]
+        if (f1 < f2):
+            if f1 < minval: minval = f1
+            if f2 > maxval: maxval = f2
+        else:
+            if f2 < minval: minval = f2
+            if f1 > maxval: maxval = f1
+    return (minval, maxval)
+
+
+# direct looping without prior reading in of stream...
+#for tr in obspy.io.segy.segy.iread_su('OUTPUT_FILES/0_dz_SU',endian='<'):
+
+# prints traces
+# see: https://docs.obspy.org/tutorial/code_snippets/export_seismograms_to_ascii.html
+
+for i,tr in enumerate(st):
+    # file output
+    filename = str.format("%s_%d" % (out_file, i))
+    print(filename)
+
+    # length
+    length = len(tr.data)
+    t0 = 0.0
+    dt = tr.stats.delta
+    print("  length = ",length," dt = ",dt)
+
+    # SPECFEM won't fill station and channel header infos
+    # station name
+    if len(tr.stats.station) > 0:
+        sta = tr.stats.station
+    else:
+        sta = 'A' + str(i)                    # A0, A1, ..
+    # channel name
+    if len(tr.stats.channel) > 0:
+        cha = tr.stats.channel
+    else:
+        # using file name for a guess
+        basename = os.path.basename(file_in)  # 0_dz_SU
+        comps = basename.upper().split("_")   # ['0','DZ','SU']
+        if len(comps) > 1:
+            if len(comps[1]) > 1:
+                cha = comps[1][-1:]           # 'Z'
+            else:
+                cha = '*'
+        else:
+            cha = '*'
+
+    # file output
+    f = open(filename, "w")
+
+    # file header
+    f.write("# STATION %s\n" % (sta))
+    f.write("# CHANNEL %s\n" % (cha))
+    f.write("# START_TIME %s\n" % (str(tr.stats.starttime)))
+    f.write("# SAMP_FREQ %f\n" % (tr.stats.sampling_rate))
+    f.write("# NDAT %d\n" % (tr.stats.npts))
+
+    # data section
+    # fills numpy array
+    xy = np.empty(shape=(0,2))
+    for ii in range(length):
+        time = ii*dt + t0
+        xy = np.append(xy,[[time,tr.data[ii]]],axis=0)
+
+    # data column
+    #print(xy[:,1])
+    #print(xy[0,1],xy[1,1],xy[2,1],xy[3,1])
+
+    # saves as ascii
+    np.savetxt(f, xy, fmt="%f\t%f")
+    f.close()
+
+    # statistics
+    #start = default_timer()
+    minval,maxval = minmax_np(xy)   # 7.414818e-05 s
+    #minval,maxval = minmax(xy)     # 6.389618e-04 s
+    #minval,maxval = minmax2(xy)    # 1.811981e-03 s
+    #end = default_timer()
+    #print("  min/max = %f / %f  (timing: %e s)" %(minval,maxval,end-start))
+    print("  min/max = %f / %f" %(minval,maxval))
+    print("  done")
+
+# figure output
+#st.plot()
+
+