From f8ebc93960242f367e733171bde6eff7268f2e97 Mon Sep 17 00:00:00 2001
From: Alec Miller <alecmiller@yahoo.com>
Date: Fri, 6 Sep 2024 19:44:28 -0700
Subject: [PATCH] kram - add ZipStream and Perf

These are a gzip stream that can compress data passed to it.  Perfetto can take in gz and zip compressed files.  Gzip has a 10B header + 8B footer, and this uses miniz deflate calls.  Had to disable the default zlib header/footer, but would be interesting to support that too.  Use this to unpack and keep the original file, since this is compress only right now.   gzunzip -k file.gz

Needed to expose some of the thread calls for Perf to obtain the id and name.
---
 kramv/KramViewerBase.cpp        |  62 +++++--
 libkram/json11/json11.cpp       |   4 +-
 libkram/kram/Kram.cpp           |   2 +-
 libkram/kram/KramFileHelper.cpp |  10 +-
 libkram/kram/KramFileHelper.h   |   2 +-
 libkram/kram/KramLog.cpp        |   5 +-
 libkram/kram/KramTimer.cpp      | 316 +++++++++++++++++++++++++++-----
 libkram/kram/KramTimer.h        |  82 ++++++++-
 libkram/kram/KramZipStream.cpp  |  81 +++++---
 libkram/kram/KramZipStream.h    |  15 +-
 libkram/kram/TaskSystem.cpp     |  71 +++----
 libkram/kram/TaskSystem.h       |  15 +-
 12 files changed, 515 insertions(+), 150 deletions(-)

diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp
index 572d6650..b7a66b2e 100644
--- a/kramv/KramViewerBase.cpp
+++ b/kramv/KramViewerBase.cpp
@@ -1179,7 +1179,21 @@ bool Data::isArchive() const
 bool Data::loadFile()
 {
     if (isArchive()) {
-        return loadFileFromArchive();
+        // This test perf layer and the ZipStream
+        Perf* perf = nullptr; // Perf::instance();
+        
+        // TODO: have to have permision to write file
+        if (perf) {
+            if (!perf->start("/Users/Alec/Library/Containers/com.hialec.kramv/Data/Traces/"
+                             "load.perftrace.gz"))
+                perf = nullptr;
+        }
+        bool success = loadFileFromArchive();
+        
+        if (perf)
+            perf->stop();
+        
+        return success;
     }
     
     // now lookup the filename and data at that entry
@@ -1379,6 +1393,8 @@ bool Data::loadFileFromArchive()
         return false;
     }
     
+    KPERFT("loadFileFromArchive");
+    
     const uint8_t* imageData = nullptr;
     uint64_t imageDataLength = 0;
 
@@ -1391,6 +1407,8 @@ bool Data::loadFileFromArchive()
     vector<uint8_t> bufferForImage;
     
     if (isFileUncompressed) {
+        KPERFT("ZipExtractRaw");
+        
         // search for main file - can be albedo or normal
         if (!zip.extractRaw(filename, &imageData, imageDataLength)) {
             return false;
@@ -1398,6 +1416,8 @@ bool Data::loadFileFromArchive()
 
     }
     else {
+        KPERFT("ZipExtract");
+        
         // need to decompress first
         if (!zip.extract(filename, bufferForImage)) {
             return false;
@@ -1430,10 +1450,14 @@ bool Data::loadFileFromArchive()
                 bool isNormalUncompressed = normalEntry->compressedSize == entry->uncompressedSize;
                 
                 if (isNormalUncompressed) {
+                    KPERFT("ZipExtractRawNormal");
+                    
                     zip.extractRaw(name.c_str(), &imageNormalData,
                                    imageNormalDataLength);
                 }
                 else {
+                    KPERFT("ZipExtractNormal");
+                    
                     // need to decompress first
                     if (!zip.extract(filename, bufferForNormal)) {
                         return false;
@@ -1459,29 +1483,43 @@ bool Data::loadFileFromArchive()
 
     // TODO: do imageDiff here?
     
+    KPERFT_START(1, "KTXOpen");
+    
     if (!imageDataKTX.open(imageData, imageDataLength, image)) {
         return false;
     }
 
-    if (hasNormal && imageNormalDataKTX.open(
-                         imageNormalData, imageNormalDataLength, imageNormal)) {
-        // shaders only pull from albedo + normal on these texture types
-        if (imageNormal.textureType == image.textureType &&
-            (imageNormal.textureType == MyMTLTextureType2D ||
-             imageNormal.textureType == MyMTLTextureType2DArray)) {
-            // hasNormal = true;
-        }
-        else {
-            hasNormal = false;
-        }
+    KPERFT_STOP(1);
+   
+    
+    if (hasNormal) {
+        KPERFT("KTXOpenNormal");
+       
+        if (imageNormalDataKTX.open(
+            imageNormalData, imageNormalDataLength, imageNormal)) {
+                // shaders only pull from albedo + normal on these texture types
+                if (imageNormal.textureType == image.textureType &&
+                    (imageNormal.textureType == MyMTLTextureType2D ||
+                     imageNormal.textureType == MyMTLTextureType2DArray)) {
+                    // hasNormal = true;
+                }
+                else {
+                    hasNormal = false;
+                }
+            }
     }
 
+    
     //---------------------------------
     
+    KPERFT_START(3, "KTXLoad");
+   
     if (!_delegate.loadTextureFromImage(fullFilename.c_str(), (double)timestamp, image, hasNormal ? &imageNormal : nullptr, nullptr, true)) {
         return false;
     }
 
+    KPERFT_STOP(3);
+   
     //---------------------------------
     
    // NSArray<NSURL*>* urls_ = (NSArray<NSURL*>*)_delegate._urls;
diff --git a/libkram/json11/json11.cpp b/libkram/json11/json11.cpp
index 8e6dea8f..9f5dbba9 100644
--- a/libkram/json11/json11.cpp
+++ b/libkram/json11/json11.cpp
@@ -284,7 +284,7 @@ void JsonWriter::writeFormat(const char* fmt, ...) {
     if (_stream && _out->size() >= _stream->compressLimit())
     {
         // flush the output to a compression stream
-        _stream->compress(Slice((uint8_t*)_out->data(), _out->size())); // losing const
+        _stream->compress(Slice((uint8_t*)_out->data(), _out->size()), false); // losing const
         
         // reset the buffer
         _out->clear();
@@ -295,7 +295,7 @@ JsonWriter::~JsonWriter()
 {
     if (_stream)  {
         if (!_out->empty()) {
-            _stream->compress(Slice((uint8_t*)_out->data(), _out->size())); // losing const
+            _stream->compress(Slice((uint8_t*)_out->data(), _out->size()), true); // losing const
         }
     }
 }
diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp
index 5727f0d1..0a42ed05 100644
--- a/libkram/kram/Kram.cpp
+++ b/libkram/kram/Kram.cpp
@@ -919,7 +919,7 @@ bool SavePNG(Image& image, const char* filename)
 
 bool SetupTmpFile(FileHelper& tmpFileHelper, const char* suffix)
 {
-    return tmpFileHelper.openTemporaryFile(suffix, "w+b");
+    return tmpFileHelper.openTemporaryFile("kramimage-", suffix, "w+b");
 }
 
 bool SetupSourceImage(const string& srcFilename, Image& sourceImage,
diff --git a/libkram/kram/KramFileHelper.cpp b/libkram/kram/KramFileHelper.cpp
index a1b87126..ca3608b2 100644
--- a/libkram/kram/KramFileHelper.cpp
+++ b/libkram/kram/KramFileHelper.cpp
@@ -65,7 +65,7 @@ static FILE* fopen_mkdir(const char* path, const char* mode)
 FileHelper::~FileHelper() { close(); }
 
 // no current extension
-bool FileHelper::openTemporaryFile(const char* suffix, const char* access)
+bool FileHelper::openTemporaryFile(const char* prefix, const char* suffix, const char* access)
 {
     close();
 
@@ -82,7 +82,7 @@ bool FileHelper::openTemporaryFile(const char* suffix, const char* access)
     int keep = 0;
 
     // Note: can't pass . either, always opened as rw
-    _fp = tmpfileplus("/tmp/", "kramimage-", suffix, &pathname, keep);
+    _fp = tmpfileplus("/tmp/", prefix, suffix, &pathname, keep);
     if (!_fp) {
         return false;
     }
@@ -139,8 +139,10 @@ size_t FileHelper::pagesize()
 
 bool FileHelper::copyTemporaryFileTo(const char* dstFilename)
 {
-    if (!_fp) return false;
-    if (_filename.empty()) return false;
+    if (!_fp) 
+        return false;
+    if (_filename.empty()) 
+        return false;
 
     // since we're not closing, need to flush output
     fflush(_fp);
diff --git a/libkram/kram/KramFileHelper.h b/libkram/kram/KramFileHelper.h
index f468acd0..23d98e1c 100644
--- a/libkram/kram/KramFileHelper.h
+++ b/libkram/kram/KramFileHelper.h
@@ -29,7 +29,7 @@ class FileHelper {
     bool open(const char* filename, const char* access);
 
     // this file is auto-deleted by close(), is that okay with renameFile use?
-    bool openTemporaryFile(const char* suffix, const char* access);
+    bool openTemporaryFile(const char* prefix, const char* suffix, const char* access);
 
     // mainly for tmp files, file can be closed, but this does rename of tmp file.
     // may fail if tmp file and dst are different volumes.
diff --git a/libkram/kram/KramLog.cpp b/libkram/kram/KramLog.cpp
index c3227d91..7b09c342 100644
--- a/libkram/kram/KramLog.cpp
+++ b/libkram/kram/KramLog.cpp
@@ -43,13 +43,10 @@
 #include "KramFmt.h"
 #include "KramTimer.h"
 #include "format.h" // really fmt/format.h
+#include "TaskSystem.h"
 
 namespace kram {
 
-// Pulled in from TaskSystem.cpp
-constexpr const uint32_t kMaxThreadName = 32;
-extern void getCurrentThreadName(char name[kMaxThreadName]);
-
 using mymutex = std::recursive_mutex;
 using mylock = std::unique_lock<mymutex>;
 
diff --git a/libkram/kram/KramTimer.cpp b/libkram/kram/KramTimer.cpp
index b00b3a7a..c8f70a72 100644
--- a/libkram/kram/KramTimer.cpp
+++ b/libkram/kram/KramTimer.cpp
@@ -3,8 +3,7 @@
 // in all copies or substantial portions of the Software.
 
 #include "KramTimer.h"
-
-#if 1
+#include "TaskSystem.h"
 
 #if KRAM_WIN
 #include <windows.h>
@@ -12,6 +11,8 @@
 #include <mach/mach_time.h>
 #endif
 
+#define nl '\n'
+
 namespace kram {
 
 using namespace NAMESPACE_STL;
@@ -70,54 +71,285 @@ double currentTimestamp()
     return (double)delta * gQueryPeriod;
 }
 
-} // namespace kram
+//-------------------
 
-#else
-
-/*
-// see sources here
-// https://codebrowser.dev/llvm/libcxx/src/chrono.cpp.html
-// but steady on macOS uses clock_gettime(CLOCK_MONOTONIC_RAW, &tp)
-//   which should be mach_continuous_time()
-//
-// also see sources here for timers
-// https://opensource.apple.com/source/Libc/Libc-1158.1.2/gen/clock_gettime.c.auto.html
-// mach_continuous_time() vs. mach_absolute_time()
-// https://developer.apple.com/library/archive/qa/qa1398/_index.html
- 
-#if USE_EASTL
-#include "EASTL/chrono.h"
-#else
-#include <chrono>
-#endif
- 
-namespace kram {
+// TODO: also look into the Perfetto binary format and library/api.
+// https://perfetto.dev/docs/instrumentation/tracing-sdk
 
-using namespace NAMESPACE_STL;
+// TODO: escape strings, but it's just more work
+Perf* Perf::_instance = new Perf();
 
-#if USE_EASTL
-using namespace eastl::chrono;
-#else
-using namespace std::chrono;
-#endif
+thread_local uint32_t gPerfStackDepth = 0;
 
-// high-res  (defaults to steady or system in libcxx)
-//using myclock = high_resolution_clock;
-//using myclock = system_clock;
-using myclock = steady_clock;
+PerfScope::PerfScope(const char* name_)
+: name(name_), time(currentTimestamp())
+{
+    gPerfStackDepth++;
+}
 
-static const myclock::time_point gStartTime = myclock::now();
 
-double currentTimestamp()
+void PerfScope::close()
 {
-    auto t = myclock::now();
-    duration<double, std::milli> timeSpan = t - gStartTime;
-    double count = (double)timeSpan.count() * 1e-3;
-    return count;
+    if (time != 0.0) {
+        --gPerfStackDepth;
+        
+        Perf::instance()->addTimer(name, time, currentTimestamp() - time);
+        time = 0.0;
+    }
 }
 
-}  // namespace kram
-*/
+void addPerfCounter(const char* name, int64_t value)
+{
+    Perf::instance()->addCounter(name, currentTimestamp(), value);
+}
 
-#endif
+//---------------
+
+bool Perf::start(const char* filename, uint32_t maxStackDepth)
+{
+    mylock lock(_mutex);
+    
+    if (isRunning()) {
+        KLOGW("Perf", "start already called");
+        return true;
+    }
+    
+    _filename = filename;
+    _maxStackDepth = maxStackDepth;
+    
+    // test the compressor
+    bool testZipStream = false;
+    if (testZipStream) {
+        string filename2 = filename;
+        filename2 += ".txt";
+        FileHelper fileHelper;
+        ZipStream stream;
+        if (fileHelper.open(filename2.c_str(), "w+b")) {
+            const char* testStr = 
+R"(id,name
+1,TEST_1
+2,TEST_2
+3,TEST_3
+4,TEST_4
+)";
+            
+            if (stream.open(&fileHelper, false)) {
+                stream.compress(Slice((uint8_t*)testStr, strlen(testStr)), true);
+                stream.close();
+            }
+            fileHelper.close();
+        }
+    }
+    
+    // write json as binary, so win doesn't replace \n with \r\n
+    if (!_fileHelper.openTemporaryFile("perf-", ".perftrace.gz", "w+b")) {
+        KLOGW("Perf", "Could not open oerf temp file");
+        return false;
+    }
+    
+    bool isUncompressed = false; // can test, extension still .gz though
+    if (!_stream.open(&_fileHelper, isUncompressed)) {
+        _fileHelper.close();
+        return false;
+    }
+    
+    // TODO: store _startTime in json starting params, also the
+    _startTime = currentTimestamp();
+    
+    _threadIdToTidMap.clear();
+    _threadNames.clear();
+   
+    string buf;
+    
+    // displya timeUnit must be ns (nanos) or ms (micros), default is ms
+    // "displayTimeUnit": "ns"
+    sprintf(buf, R"({"traceEvents":[%c)", nl);
+    write(buf);
+    
+    // can store file info here, only using one pid
+    uint32_t processId = 0;
+    const char* processName = "kram"; // TODO: add platform, config, use filename instead?
+    sprintf(buf, R"({"name":"process_name","ph":"M","pid":%u,"args":{"name":"%s"}},%c)",
+           processId, processName, nl);
+    write(buf);
+    
+    return true;
+}
+
+void Perf::stop() 
+{
+    mylock lock(_mutex);
+    
+    if (!isRunning()) {
+        KLOGW("Perf", "stop called, but never started");
+        return;
+    }
+    
+    // write end of array and object, and force flush
+    bool forceFlush = true;
+    string buf;
+    sprintf(buf, R"(]}%c)", nl);
+    write(buf, forceFlush);
+    
+    _stream.close();
+    
+    bool success = _fileHelper.copyTemporaryFileTo(_filename.c_str());
+    if (!success) {
+        KLOGW("Perf", "Couldn't move temp file");
+    }
+    
+    _fileHelper.close();
+    
+    // TODO: now open the file in kram-profile by opening it
+    // okay to use system, but it uses a global mutex on macOS
+    // This will be a .gz file, so not sure that kram-profile can respond
+    //
+    // sprintf(buf, "open %s", _filename.c_str());
+    // system(buf.c_str());
+    
+    _startTime = 0.0;
+}
+
+void Perf::write(const string& str, bool forceFlush) 
+{
+    mylock lock(_mutex);
+    
+    _buffer += str;
+    
+    if (forceFlush || _buffer.size() >= _stream.compressLimit()) {
+        _stream.compress(Slice((uint8_t*)_buffer.data(), _buffer.size()), forceFlush);
+        _buffer.clear();
+    }
+}
+
+uint32_t Perf::addThreadIfNeeded() 
+{
+    auto threadId = getCurrentThread();
+    
+    // don't need this, it's already locked by caller
+    //mylock lock(_mutex);
+    
+    auto it = _threadIdToTidMap.find(threadId);
+    if (it != _threadIdToTidMap.end()) {
+        return it->second;
+    }
+    
+    // add the new name and tid
+    char threadName[kMaxThreadName];
+    getThreadName(threadId, threadName);
+    
+    // don't really need to store name if not sorting, just need tid counter
+    uint32_t tid = _threadNames.size();
+    _threadNames.push_back(threadName);
+    
+    _threadIdToTidMap.insert(make_pair(threadId, tid));
+    
+    // this assumes the map is wiped each time
+    string buf;
+    sprintf(buf, R"({"name":"thread_name","ph":"M","tid":%u,"args":{"name":"%s"}},%c)",
+           tid, threadName, nl);
+    write(buf);
+    
+    return tid;
+}
+
+void Perf::addTimer(const char* name, double time, double elapsed)
+{
+    if (!isRunning()) {
+        return;
+    }
+    
+    // About Perfetto ts sorting.  This is now fixed to sort duration.
+    // https://github.com/google/perfetto/issues/878
+    
+    if (_maxStackDepth && gPerfStackDepth >= _maxStackDepth)
+        return;
+    
+    // zero out the time, so times are smaller to store
+    time -= _startTime;
+    
+    // problem with duration is that existing events can overlap the start time
+    bool isClamped = time < 0.0;
+    if (isClamped) {
+        elapsed += time;
+        time = 0.0;
+    }
+    if (elapsed <= 0.0)
+        return;
+    
+    // Catapult timings are suppoed to be in micros.
+    // Convert seconds to micros (as integer), lose nanos.  Note that
+    // Perfetto will convert all values to nanos anyways and lacks a ms format.
+    // Raw means nanos, and Seconds is too small of a fraction.
+    // Also printf does IEEE round to nearest even.
+    uint32_t timeDigits = 0; // or 3 for nanos
+    time *= 1e6;
+    elapsed *= 1e6;
+
+    // TODO: worth aliasing the strings, just replacing one string with another
+    // but less chars for id.
+    
+    // now lock across isRunning, addThread, and write call
+    mylock lock(_mutex);
+    if (!isRunning()) {
+        return;
+    }
+    // This requires a lock, so buffering the events would help
+    // what about sorting the names instead of first-come, first-serve?
+    uint32_t tid = addThreadIfNeeded();
+
+    // write out the event in micros, default is displayed in ms
+    string buf;
+    sprintf(buf, R"({"name":"%s","ph":"X","tid":%d,"ts":%.*f,"dur":%.*f},%c)",
+            name, tid, timeDigits, time, timeDigits, elapsed, nl);
+    write(buf);
+}
+
+// Can also use begin/end but these aren't a atomic
+//  R"({"name":"%s","ph":"B","tid":%d,"ts":%.0f},%c)",
+//  R"({"ph":"E","tid":%d,"ts":%.0f},%c)",
+
+void Perf::addCounter(const char* name, double time, int64_t amount) 
+{
+    if (!isRunning()) {
+        return;
+    }
+    
+    // also reject nested counters off perf stack depth
+    if (_maxStackDepth && gPerfStackDepth >= _maxStackDepth)
+        return;
+    
+    // zero out the time, so times are smaller to store
+    time -= _startTime;
+    
+    // problem with duration is that events can occur outside the start time
+    if (time < 0.0) {
+        return;
+    }
+   
+    // Catapult timings are suppoed to be in micros.
+    // Convert seconds to micros (as integer), lose nanos.  Note that
+    // Perfetto will convert all values to nanos anyways.
+    // Raw means nanos, and Seconds is too small of a fraction.
+    // Also printf does IEEE round to nearest even.
+    // https://github.com/google/perfetto/issues/879
+    
+    time *= 1e6;
+    uint32_t timeDigits = 0; // or 3 for nanos
+    
+    // TODO: worth aliasing the strings?, just replacing one string with another
+    // but less chars for id.
+    
+    // Note: can also have multiple named values passed in args
+    // Note: unclear if Perfetto can handle negative values
+    
+    // write out the event in micros, default is displayed in ms
+    // lld not portable to Win
+    string buf;
+    sprintf(buf, R"({"name":"%s","ph":"C","ts":%.*f,"args":{"v":%lld}},%c)",
+            name, timeDigits, time, amount, nl);
+    write(buf);
+}
+
+} // namespace kram
 
diff --git a/libkram/kram/KramTimer.h b/libkram/kram/KramTimer.h
index 326aac76..3ad3d54d 100644
--- a/libkram/kram/KramTimer.h
+++ b/libkram/kram/KramTimer.h
@@ -6,6 +6,9 @@
 
 #include <cassert>
 
+#include "KramFileHelper.h"
+#include "KramZipStream.h"
+
 //#include "KramConfig.h"
 
 namespace kram {
@@ -26,13 +29,13 @@ class Timer {
         assert(_timeElapsed >= 0.0);
         _timeElapsed -= currentTimestamp();
     }
-
+    
     void stop()
     {
         assert(_timeElapsed < 0.0);
         _timeElapsed += currentTimestamp();
     }
-
+    
     double timeElapsed() const
     {
         double time = _timeElapsed;
@@ -48,7 +51,7 @@ class Timer {
     }
     
     bool isStopped() const { return _timeElapsed < 0.0; }
-
+    
 private:
     double _timeElapsed = 0.0;
 };
@@ -63,12 +66,12 @@ class TimerScope {
             _timer->start();
         }
     }
-
+    
     ~TimerScope()
     {
         close();
     }
-
+    
     void close()
     {
         if (_timer) {
@@ -76,9 +79,76 @@ class TimerScope {
             _timer = nullptr;
         }
     }
-
+    
 private:
     Timer* _timer = nullptr;
 };
+    
+// This implements PERF macros, sending timing data to kram-profile, perfetto, and/or Tracy.
+class Perf {
+public:
+    bool isRunning() const { return _startTime != 0.0; }
+    
+    bool start(const char* filename, uint32_t maxStackDepth = 0);
+    void stop();
+    
+    void addTimer(const char* name, double time, double elapsed);
+    void addCounter(const char* name, double time, int64_t value);
+    
+    // singleton getter, but really want to split Perf from macros.
+    static Perf* instance() { return _instance; }
+    
+    // on it's own track/tid, add a frame vsync marker
+    // TODO: void addFrameMarker(double time);
+    
+private:
+    void write(const string& str, bool forceFlush = false);
+    uint32_t addThreadIfNeeded();
+    
+    ZipStream  _stream;
+    FileHelper _fileHelper;
+    double _startTime = 0.0;
+    string _filename;
+    
+    using mymutex = recursive_mutex;
+    using mylock = unique_lock<mymutex>;
+
+    mymutex _mutex;
+    unordered_map<thread::native_handle_type, uint32_t> _threadIdToTidMap;
+    vector<string> _threadNames;
+    string _buffer;
+    uint32_t _maxStackDepth = 0; // 0 means no limit
+    
+    static Perf* _instance;
+};
+
+class PerfScope {
+public:
+    // This means that the timers are running even when not profiling
+    PerfScope(const char* name_);
+    ~PerfScope() { close(); }
+    
+    void close();
+    
+private:
+    const char* name;
+    double time;
+};
+
+// This is here to split off Perf
+void addPerfCounter(const char* name, int64_t value);
+
+#define KPERF_SCOPENAME2(a,b) scope ## b
+#define KPERF_SCOPENAME(b) KPERF_SCOPENAME2(scope,b)
+
+#define KPERFT(x) PerfScope KPERF_SCOPENAME(__COUNTER__)(x)
+
+#define KPERFT_START(num,x) PerfScope KPERF_SCOPENAME(num)(x)
+#define KPERFT_STOP(num) KPERF_SCOPENAME(num).close()
+
+#define KPERFC(x, value) addPerfCounter(x, value)
 
 }  // namespace kram
+
+
+   
diff --git a/libkram/kram/KramZipStream.cpp b/libkram/kram/KramZipStream.cpp
index f914393b..fe6601e7 100644
--- a/libkram/kram/KramZipStream.cpp
+++ b/libkram/kram/KramZipStream.cpp
@@ -1,5 +1,6 @@
 #include "KramZipStream.h"
 
+#include "KramFileHelper.h"
 #include "miniz.h"
 
 namespace kram {
@@ -14,24 +15,37 @@ ZipStream::~ZipStream() {
     close();
 }
 
-bool ZipStream::open() {
-    KVERIFY(_fileHelper.isOpen());
+bool ZipStream::open(FileHelper* fileHelper, bool isUncompressed) {
+    _fileHelper = fileHelper;
+    if (!_fileHelper->isOpen()) {
+        return false;
+    }
+    
+    _isUncompressed = isUncompressed;
+    if (_isUncompressed) {
+        return true;
+    }
+    
+    memset(_stream.get(), 0, sizeof(mz_stream));
     
     // https://www.zlib.net/zlib_how.html
     // https://www.ietf.org/rfc/rfc1952.txt
     
     // can also install custom allocators (allocates 256KB buffer otherwise)
-    _stream->zalloc = NULL;
-    _stream->zfree = NULL;
-    _stream->opaque = NULL;
-    
+//    _stream->zalloc = NULL;
+//    _stream->zfree = NULL;
+//    _stream->opaque = NULL;
+//    
     // Just making this double the default mz_stream buffer.
-    // Should be able to get about 2x compression (is there an estimator?).
+    // Should be able to get about 2x compression (there an estimator in miniz).
     // TODO: what if input is bigger than output buffer?
     // The larger this number, the bigger the stall to compress.
     _compressLimit = 2*256*1024;
     
-    KVERIFY(mz_deflateInit(_stream.get(), MZ_DEFAULT_LEVEL) == MZ_OK);
+    // TODO: control level
+    // https://stackoverflow.com/questions/32225133/how-to-use-miniz-to-create-a-compressed-file-that-can-be-decompressd-by-gzip
+    // turning off zlib footer here with WINDOW_BITS
+    KVERIFY(mz_deflateInit2(_stream.get(), MZ_DEFAULT_LEVEL, MZ_DEFLATED, -MZ_DEFAULT_WINDOW_BITS, 9, MZ_DEFAULT_STRATEGY) == MZ_OK);
     
     // These are all optional fields
     enum GzipFlag : uint8_t {
@@ -63,20 +77,22 @@ bool ZipStream::open() {
         0x08, // (compression method - deflate)
         0x00, // flags
               // The time is in Unix format, i.e., seconds since 00:00:00 GMT, Jan.  1, 1970.
-        0x00, 0x00, 0x00, 0x00,  // TODO: timestamp mtime - start of compression or of src file
+        //0x00, 0x00, 0x00, 0x00,  // TODO: timestamp mtime - start of compression or of src file
+        0xAD, 0x38, 0x4D, 0x5E, // stolen from another file
+        
         kGzipCompressionUnknown, // compression id
-        kGzipPlatformMac         // os platform id
+        kGzipPlatformUnix        // os platform id
     };
     
     // Not writing any of the flagged fields.
     
     // clear the data
-    _sourceCRC32 = 0;
+    _sourceCRC32 = MZ_CRC32_INIT; // is 0
     _sourceSize = 0;
     
-    bool success = _fileHelper.write((const uint8_t*)&header, sizeof(header));
+    bool success = _fileHelper->write((const uint8_t*)&header, sizeof(header));
     if (!success) {
-        KLOGE("ZipStream", "Could not write gzip header to %s", _fileHelper.filename().c_str());
+        KLOGE("ZipStream", "Could not write gzip header to %s", _fileHelper->filename().c_str());
     }
     
     return success;
@@ -87,9 +103,21 @@ bool ZipStream::open() {
 }
 
 void ZipStream::close() {
+    // this means it was already closed
+    if (!_fileHelper) {
+        return;
+    }
+    
+    if (_isUncompressed) {
+        return;
+    }
+    
     // do this to end the stream and cleanup
     KVERIFY(mz_deflateEnd(_stream.get()) == MZ_OK);
     
+    // can also reset and then reuse the stream, instead of end?
+    //mz_deflateReset(_stream.get());
+    
     // data is already all written, so just need the footer
     
     const uint32_t footer[2] = {
@@ -100,13 +128,15 @@ void ZipStream::close() {
     // gzip 8B trailer
     // 4b crc checksum of original data (can use mz_crc32())
     // 4b length of data (mod 0xFFFFFFFF), if bigger than 4gb then can only validate bottom 4B of length.
-    bool success = _fileHelper.write((const uint8_t*)&footer, sizeof(footer));
+    bool success = _fileHelper->write((const uint8_t*)&footer, sizeof(footer));
     if (!success) {
-        KLOGE("ZipStream", "Could not write gzip footer to %s", _fileHelper.filename().c_str());
+        KLOGE("ZipStream", "Could not write gzip footer to %s", _fileHelper->filename().c_str());
     }
+    
+    _fileHelper = nullptr;
 }
 
-Slice ZipStream::write(const Slice& in) {
+Slice ZipStream::compressSlice(const Slice& in, bool finish) {
     // If in.size is huge, then don't resize like this.
     // But stream is assumed to take in smaller buffers
     // and know compressed stream is smaller than input size
@@ -120,22 +150,31 @@ Slice ZipStream::write(const Slice& in) {
     _stream->next_out = _compressed.data();
     
     // Hope don't need to do this in a loop
-    KVERIFY(mz_deflate(_stream.get(), MZ_FULL_FLUSH) == MZ_OK);
+    int status = mz_deflate(_stream.get(), finish ? MZ_FINISH : MZ_SYNC_FLUSH);
+    if (finish)
+        KASSERT(status == MZ_STREAM_END);
+    else
+        KASSERT(status == MZ_OK);
     
     // TODO: would be nice to skip crc32 work
     _sourceSize += in.size();
-    mz_crc32(_sourceCRC32, in.data(), in.size());
+    _sourceCRC32 = mz_crc32(_sourceCRC32, in.data(), in.size());
     
     // return the compressed output
     int numBytesCompressed = _compressed.size() - _stream->avail_out;
     return Slice(_compressed.data(), numBytesCompressed);
 }
 
-void ZipStream::compress(const Slice& uncompressedData) {
-    Slice compressedSlice = write(uncompressedData);
+void ZipStream::compress(const Slice& uncompressedData, bool finish) {
+    if (_isUncompressed) {
+        _fileHelper->write(uncompressedData.data(), uncompressedData.size());
+        return;
+    }
+    
+    Slice compressedSlice = compressSlice(uncompressedData, finish);
     
     // This writes out to a fileHelper
-    _fileHelper.write(compressedSlice.data(), compressedSlice.size());
+    _fileHelper->write(compressedSlice.data(), compressedSlice.size());
 }
 
 
diff --git a/libkram/kram/KramZipStream.h b/libkram/kram/KramZipStream.h
index 9ce57459..d5f2ee73 100644
--- a/libkram/kram/KramZipStream.h
+++ b/libkram/kram/KramZipStream.h
@@ -2,7 +2,6 @@
 
 #include "KramConfig.h"
 
-#include "KramFileHelper.h"
 #include <span>
 
 struct mz_stream;
@@ -10,6 +9,8 @@ struct mz_stream;
 namespace kram {
 using namespace NAMESPACE_STL;
 
+class FileHelper;
+
 // This can be passed a count
 template<typename T>
 using Span = span<T, dynamic_extent>;
@@ -22,7 +23,7 @@ class ICompressedStream {
     virtual ~ICompressedStream() {}
     
     // compress and store the data
-    virtual void compress(const Slice& uncompressedData) = 0;
+    virtual void compress(const Slice& uncompressedData, bool finish) = 0;
     
     // when reached then call compress
     virtual uint32_t compressLimit() const = 0;
@@ -36,11 +37,12 @@ class ZipStream : public ICompressedStream {
     virtual ~ZipStream();
     
     // writes opening header and closing footer
-    bool open();
+    // Can disable compression for testing the src content.
+    bool open(FileHelper* fileHelper, bool isUncompressed = false);
     void close();
     
     // compress and write data to helper
-    virtual void compress(const Slice& uncompressedData) override;
+    virtual void compress(const Slice& uncompressedData, bool finish) override;
     
     // test this for when to call compress
     virtual uint32_t compressLimit() const override {
@@ -48,15 +50,16 @@ class ZipStream : public ICompressedStream {
     }
     
 private:
-    Slice write(const Slice& in);
+    Slice compressSlice(const Slice& in, bool finish);
     
     vector<uint8_t> _compressed;
     unique_ptr<mz_stream> _stream;
-    FileHelper _fileHelper;
+    FileHelper* _fileHelper = nullptr;
     
     uint32_t _sourceCRC32 = 0;
     size_t _sourceSize = 0;
     uint32_t _compressLimit = 0;
+    bool _isUncompressed = false;
 };
 
 
diff --git a/libkram/kram/TaskSystem.cpp b/libkram/kram/TaskSystem.cpp
index ef2628f2..13d2d181 100644
--- a/libkram/kram/TaskSystem.cpp
+++ b/libkram/kram/TaskSystem.cpp
@@ -266,7 +266,7 @@ std::thread::native_handle_type getCurrentThread()
 // Of course, Windows has to make portability difficult.
 // And Mac non-standardly, doesn't even pass thread to call.
 //   This requires it to be set from thread itself.
-constexpr const uint32_t kMaxThreadName = 32;
+// Also linux (and Android?) limited to 15chars.
 
 #if KRAM_WIN
 
@@ -274,8 +274,10 @@ constexpr const uint32_t kMaxThreadName = 32;
 // Can just set in manifest file.
 // SetConsoleOutputCP(CP_UTF8);
 
-void setThreadName(std::thread::native_handle_type handle, const char* threadName)
+void setCurrentThreadName(const char* threadName)
 {
+    std::thread::native_handle_type handle = getCurrentThread();
+    
     // TODO: use std::wstring_convert();
     // std::codecvt_utf8_utf16
     
@@ -291,22 +293,12 @@ void setThreadName(std::thread::native_handle_type handle, const char* threadNam
     ::SetThreadDescription(handle, str.c_str());
 }
 
-void setCurrentThreadName(const char* threadName)
-{
-    setThreadName(getCurrentThread(), threadName);
-}
-
-void setThreadName(std::thread& thread, const char* threadName)
-{
-    setThreadName(thread.native_handle(), threadName);
-}
-
-void getCurrentThreadName(char name[kMaxThreadName])
+void getThreadName(std::thread::native_handle_type threadHandle, char name[kMaxThreadName])
 {
     name[0] = 0;
     
     wchar_t* threadNameW = nullptr;
-    HRESULT hr = ::GetThreadDescription(getCurrentThread(), &threadNameW);
+    HRESULT hr = ::GetThreadDescription(threadHandle, &threadNameW);
     if (SUCCEEDED(hr)) {
         // convert name back
         uint32_t len = wcslen(threadNameW);
@@ -321,60 +313,39 @@ void getCurrentThreadName(char name[kMaxThreadName])
     }
 }
 
-#elif KRAM_MAC || KRAM_IOS
+#else
 
-void setThreadName(std::thread::native_handle_type macroUnusedArg(handle), const char* threadName)
+void setCurrentThreadName(const char* threadName)
 {
-    // This can only set on self
+    #if KRAM_MAC || KRAM_IOS
+    // can only set thread from thread on macOS, sucks
     int val = pthread_setname_np(threadName);
+    #else
+    // 15 char name limit on Linux/Android, how modern!
+    int val = pthread_setname_np(getCurrentThread(), threadName);
+    #endif
+    
     if (val != 0)
         KLOGW("Thread", "Could not set thread name");
 }
 
-void setCurrentThreadName(const char* threadName)
-{
-    setThreadName(getCurrentThread(), threadName);
-}
-
-// This doesn't exist on macOS. What a pain.  Doesn't line up with getter calls.
-// Means can't set threadName externally without telling thread to wake and set itself.
-//void setThreadName(std::thread& thread, const char* threadName)
-//{
-//    auto handle = thread.native_handle();
-//    setThreadName(handle, threadName);
-//}
-
-void getCurrentThreadName(char name[kMaxThreadName])
+void getThreadName(std::thread::native_handle_type threadHandle, char name[kMaxThreadName])
 {
-    pthread_getname_np(getCurrentThread(), name, kMaxThreadName);
+    pthread_getname_np(threadHandle, name, kMaxThreadName);
 }
-#else
 
-// 15 char name limit on Linux/Android, how modern!
-void setThreadName(std::thread::native_handle_type handle, const char* threadName)
-{
-    int val = pthread_setname_np(handle, threadName);
-    if (val != 0)
-        KLOGW("Thread", "Could not set thread name");
-}
-
-void setCurrentThreadName(const char* threadName)
-{
-    setThreadName(getCurrentThread(), threadName);
-}
+#endif
 
-void setThreadName(std::thread& thread, const char* threadName)
+void getThreadName(std::thread& thread, char name[kMaxThreadName])
 {
-    setThreadName(thread.native_handle(), threadName);
+    getThreadName(thread.native_handle(), name);
 }
 
 void getCurrentThreadName(char name[kMaxThreadName])
 {
-    pthread_getname_np(getCurrentThread(), name, kMaxThreadName);
+    getThreadName(getCurrentThread(), name);
 }
 
-#endif
-
 //------------------
 
 #if KRAM_MAC || KRAM_IOS
diff --git a/libkram/kram/TaskSystem.h b/libkram/kram/TaskSystem.h
index a4118afd..a74586b3 100644
--- a/libkram/kram/TaskSystem.h
+++ b/libkram/kram/TaskSystem.h
@@ -151,10 +151,23 @@ struct ThreadInfo {
     int affinity = 0; // single core for now
 };
 
+std::thread::native_handle_type getCurrentThread();
+
 // This only works for current thread, but simplifies setting several thread params.
 void setThreadInfo(ThreadInfo& info);
 
-    
+// This is limited to 16 on linux
+// #define TASK_COMM_LEN 16
+constexpr const uint32_t kMaxThreadName = 32;
+
+void setCurrentThreadName(const char* threadName);
+
+void getThreadName(std::thread::native_handle_type threadHandle, char name[kMaxThreadName]);
+
+void getThreadName(std::thread& thread, char name[kMaxThreadName]);
+
+void getCurrentThreadName(char name[kMaxThreadName]);
+
 class task_system {
     NOT_COPYABLE(task_system);