Skip to content

Commit

Permalink
gzipio works
Browse files Browse the repository at this point in the history
  • Loading branch information
zzjjzzgggg committed Jan 23, 2017
1 parent 54b49fc commit 82d56b9
Show file tree
Hide file tree
Showing 14 changed files with 993 additions and 64 deletions.
2 changes: 1 addition & 1 deletion argsparser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ const string ArgsParser::SPACES(24, ' ');

string ArgsParser::formatHelp(const string& help) const {
if (help.find('\n') == std::string::npos) return help;
std::vector<string> elems = strutils::split(help, '\n');
std::vector<string> elems = stringutils::split(help, '\n');
fmt::MemoryWriter w;
w.write("{}", elems[0]);
for (size_t i = 1; i < elems.size(); i++) {
Expand Down
2 changes: 1 addition & 1 deletion fmt
138 changes: 138 additions & 0 deletions gzipio.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
#include "gzipio.h"

const std::unordered_set<std::string> GZipOut::gzip_ext_set{
{".gz", ".zip", ".7z", ".bzip2", ".bz2"}};
const size_t GZipOut::MxBfL = 4 * 1024;

void GZipOut::FlushBf() {
size_t BytesOut = fwrite(Bf, 1, BfL, ZipStdinWr);
assert(BytesOut == BfL);
BfL = 0;
}

void GZipOut::CreateZipProcess(const std::string& cmd,
const std::string& zip_fnm) {
std::string cmd_line =
fmt::format("{} {}", cmd.c_str(), zip_fnm.c_str());
cmd_line += " >/dev/null";
ZipStdinWr = popen(cmd_line.c_str(), "w");
assert_msg(ZipStdinWr != NULL, "Can not execute '%s'",
cmd_line.c_str());
}

GZipOut::GZipOut(const std::string& filename)
: ZipStdinRd(NULL), ZipStdinWr(NULL), Bf(NULL), BfL(0) {
CreateZipProcess(getCmd(filename), filename);
Bf = new char[MxBfL];
BfL = 0;
}

GZipOut::~GZipOut() {
close();
if (Bf != NULL) delete[] Bf;
}

void GZipOut::close() {
if (BfL != 0) FlushBf();
if (ZipStdinWr != NULL) {
assert_msg(pclose(ZipStdinWr) != -1,
"Closing of the process failed");
ZipStdinWr = NULL;
}
}

int GZipOut::putChar(const char& Ch) {
if (BfL == MxBfL) FlushBf();
return Bf[BfL++] = Ch;
}

void GZipOut::write(const void* data, const size_t length) {
if (BfL + length > MxBfL) {
for (size_t LBfC = 0; LBfC < length; LBfC++)
putChar(((char*)data)[LBfC]);
} else {
for (size_t LBfC = 0; LBfC < length; LBfC++)
Bf[BfL++] = ((char*)data)[LBfC];
}
}

void GZipOut::Flush() { FlushBf(); }

bool GZipOut::isZip(const std::string& filename) {
std::string base, name, ext;
stringutils::splitFilename(filename, base, name, ext);
return gzip_ext_set.find(ext) != gzip_ext_set.end();
}

std::string GZipOut::getCmd(const std::string& zip_fnm) {
std::string base, name, ext;
stringutils::splitFilename(zip_fnm, base, name, ext);
assert_msg(gzip_ext_set.find(ext) != gzip_ext_set.end(),
"Unknown file extension '%s'.", ext.c_str());
return "7za a -y -bd -si" + name;
}

/////////////////////////////////////////////////////////
const std::unordered_set<std::string> GZipIn::gzip_ext_set{
{".gz", ".zip", ".7z", ".bzip2", ".bz2"}};
const int GZipIn::MxBfL = 32 * 1024;

void GZipIn::CreateZipProcess(const std::string& cmd,
const std::string& zip_fnm) {
std::string cmd_line =
fmt::format("{} {}", cmd.c_str(), zip_fnm.c_str());
cmd_line += " 2>/dev/null";
ZipStdoutRd = popen(cmd_line.c_str(), "r");
assert_msg(ZipStdoutRd != NULL, "Can not execute '%s'",
cmd_line.c_str());
}

void GZipIn::FillBf() {
size_t BytesRead = fread(Bf, 1, MxBfL, ZipStdoutRd);
BfL = (int)BytesRead;
CurFPos += BytesRead;
BfC = 0;
}

GZipIn::GZipIn(const std::string& filename)
: ZipStdoutRd(NULL), ZipStdoutWr(NULL), CurFPos(0),
Bf(NULL), BfC(0), BfL(0) {
CreateZipProcess(getCmd(filename), filename);
Bf = new char[MxBfL];
BfC = BfL = -1;
FillBf();
}

GZipIn::~GZipIn() {
if (ZipStdoutRd != NULL)
assert_msg(pclose(ZipStdoutRd) != -1,
"Closing of the process failed");
if (Bf != NULL) delete[] Bf;
}

size_t GZipIn::read(const void* LBf, const size_t LBfL) {
size_t LBfS = 0;
if (BfC + LBfL > BfL) {
for (size_t LBfC = 0; LBfC < LBfL; LBfC++) {
if (BfC == BfL) FillBf();
LBfS += ((char*)LBf)[LBfC] = Bf[BfC++];
}
} else {
for (size_t LBfC = 0; LBfC < LBfL; LBfC++)
LBfS += (((char*)LBf)[LBfC] = Bf[BfC++]);
}
return LBfS;
}

bool GZipIn::isZip(const std::string& filename) {
std::string base, name, ext;
stringutils::splitFilename(filename, base, name, ext);
return gzip_ext_set.find(ext) != gzip_ext_set.end();
}

std::string GZipIn::getCmd(const std::string& zip_fnm) {
assert_msg(isZip(zip_fnm),
"Unsupported file extension '%s'",
zip_fnm.c_str());
return "7za e -y -bd -so";
}
87 changes: 87 additions & 0 deletions gzipio.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
#ifndef __GZIPIO_H__
#define __GZIPIO_H__
// Compressed input and output streams.
// 7za.exe or 7z.exe must be in the path
// (http://www.7-zip.org)
// 7za.exe is a stand-alone program, which supports
// -- extraction: .gz, .7z, .rar, .zip, .cab, .arj. bzip2
// -- compression: .7z
// 7z.exe uses DLLs in folders Codecs and Formats
// -- extraction: .gz, .7z, .rar, .zip, .cab, .arj. bzip2
// -- compression: .gz, .7z, .rar, .zip, .cab, .arj. bzip2

#include <cstdio>
#include <cassert>
#include <cstring>

#include <unordered_set>
#include <stringutils.h>

#include <ppk_assert.h>
#define assert_msg PPK_ASSERT

class GZipOut {
private:
static const size_t MxBfL;
static const std::unordered_set<std::string> gzip_ext_set;
FILE *ZipStdinRd, *ZipStdinWr;
char* Bf;
size_t BfL;

private:
void FlushBf();
void CreateZipProcess(const std::string& cmd,
const std::string& filename);

public:
GZipOut(const std::string& filename);
~GZipOut();

int putChar(const char& ch);
void write(const void* data, const size_t length);
void save(const char* str) { write(str, strlen(str)); }
void save(const int val) { write(&val, sizeof(int)); }
void save(const long val) { write(&val, sizeof(long)); }
void save(const double val) { write(&val, sizeof(double)); }

void Flush();
void close();

static bool isZip(const std::string& filename);
std::string getCmd(const std::string& filename);
};

class GZipIn {
private:
static const int MxBfL;
static const std::unordered_set<std::string> gzip_ext_set;
FILE *ZipStdoutRd, *ZipStdoutWr;
size_t CurFPos;
char* Bf;
int BfC, BfL;

private:
void FillBf();
void CreateZipProcess(const std::string& cmd,
const std::string& zip_fnm);

public:
GZipIn(const std::string& filename);
~GZipIn();

bool eof() { return BfL < MxBfL && BfC == BfL; }
char getChar() {
if (BfC == BfL) FillBf();
return Bf[BfC++];
}

size_t read(const void* LBf, const size_t LBfL);
void load(int& val) { read(&val, sizeof(int)); }
void load(long& val) { read(&val, sizeof(long)); }
void load(double& val) { read(&val, sizeof(double)); }

std::string getCmd(const std::string& zip_fnm);
static bool isZip(const std::string& filename);
};

#endif /* __GZIPIO_H__ */
16 changes: 16 additions & 0 deletions ioutils.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#ifndef __IOUTILS_H__
#define __IOUTILS_H__

#include <gzipio.h>

template <TKey key, TVal val>
saveMap(std::unordered_map<TKey, TVal> map_to_save,
std::string zip_filename) {
GZipOut gzo(zip_filename);
for (auto& pr : map_to_save) {
gzo.save(fmt::format("{}\t{}\n", source, id, pr.second)
.c_str());
}
}

#endif /* __IOUTILS_H__ */
1 change: 1 addition & 0 deletions lz4
Submodule lz4 added at 7bb64f
Loading

0 comments on commit 82d56b9

Please sign in to comment.