Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
giannitedesco committed May 23, 2021
0 parents commit c5a4fd9
Show file tree
Hide file tree
Showing 46 changed files with 3,143 additions and 0 deletions.
19 changes: 19 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# perf
/perf.data
/perf.data.old

# python
__pycache__
*.pyc
/.mypy_cache
/build
/dist
/MANIFEST

# Text editors
.*.swp

/*.egg-info

/bench/bin
/bench/obj
674 changes: 674 additions & 0 deletions LICENSE.txt

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
include README.md COPYING
recursive-include examples *
recursive-include xpdt/templates *
14 changes: 14 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
MAKEFLAGS += --no-builtin-rules
.SUFFIXES:

TARGET: all

.PHONY: all
all:
./setup.py build

.PHONY: clean
clean:
./setup.py clean
rm -rf build dist *.egg-info
find . -regex '^.*\(__pycache__\|\.py[co]\)$$' -delete
102 changes: 102 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
# xpdt: eXPeditious Data Transfer

<div align="center">
<img src="https://img.shields.io/pypi/v/xpdt?label=pypi" alt="PyPI version">
</div>

## About
xpdt is (yet another) language for defining data-types and generating code for
serializing and deserializing them. It aims to produce code with little or no
overhead and is based on fixed-length representations which allows for
zero-copy deserialization and (at-most-)one-copy writes (source to buffer).

The generated C code, in particular, is highly optimized and often permits the
elimination of data-copying for writes and enables optimizations such as
loop-unrolling for fixed-length objects. This can lead to read speeds in
excess of 500 million objects per second (~1.8 nsec per object).

## Examples
The xpdt source language looks similar to C struct definitions:

```
type timestamp {
u32 tv_sec;
u32 tv_nsec;
};
type point {
s32 x;
s32 y;
s32 z;
};
type line {
timestamp time;
point line_start;
point line_end;
blob comment;
};
```

Fixed width integer types from 8 to 128 bit are supported, along with the
`blob` type, which is a variable-length sequence of bytes.

## Target Languages
The following target languages are currently supported:
- C
- Python

The C code is very highly optimized, and the Python code is fairly well
optimized, it uses typed `NamedTuple` for objects and uses `struct.Struct` for
packing/unpacking. Performance of the pure Python code is comparable to a JSON
library implemented in C or Rust.

For better performance in Python, it may be desirable to develop a Cython
target.

Target languages are implemented purely as `jinja2` templates.

## Serialization format
The serialization format for fixed-length objects is simly a packed C struct.

For any object which contains `blob` type fields:
- a 32bit unsigned record length is prepended to the struct
- all `blob` type fields are converted to `u32` and contain the length of the blob
- all blob contents are appended after the struct in the order in which they appear

For example, following the example above, the serialization would be:

```
u32 tot_len # = 41
u32 time.tv_sec
u32 time.tv_usec
s32 line_start.x
s32 line_start.y
s32 line_start.z
s32 line_end.x
s32 line_end.y
s32 line_end.z
u32 comment # = 5
u8 'H'
u8 'e'
u8 'l'
u8 'l'
u8 'o'
```

## Features
The feature-set is, as of now, pretty slim.

There are no array / sequence / map types, and no keyed unions.

Support for such things may be added in future provided that suitable
implementations exist. An implementation is suitable if:
- It admits a zero (or close to zero) overhead implementation
- it causes no overhead when the feature isn't being used

# License
The compiler is released under the GPLv3.

The C support code/headers are released under the MIT license.

The generated code is yours.
Empty file added benchmark/__init__.py
Empty file.
50 changes: 50 additions & 0 deletions benchmark/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from timeit import default_timer as timer
from pathlib import Path

from xpdt import NameSpace, StructDef, MemberDef, BaseType


def main():
cls = NameSpace(
structs=(
StructDef(
name='SomeStruct',
members=(
MemberDef('a', BaseType.u64),
MemberDef('b', BaseType.u64),
MemberDef('c', BaseType.u64),
MemberDef('d', BaseType.u64),
MemberDef('e', BaseType.u64),
MemberDef('f', BaseType.u64),
MemberDef('g', BaseType.u64),
MemberDef('h', BaseType.u64),
MemberDef('i', BaseType.u64),
MemberDef('j', BaseType.u64),
MemberDef('k', BaseType.u64),
MemberDef('l', BaseType.u64),
MemberDef('m', BaseType.u64),
MemberDef('n', BaseType.u64),
MemberDef('o', BaseType.u64),
MemberDef('p', BaseType.u64),
),
),
),
).gen_dynamic_python().SomeStruct

sz = cls._bin_size

with Path('/dev/zero').open('rb') as f:
iters = 1000000
start = timer()
for i in range(iters):
buf = f.read(sz)
obj = cls._frombytes(buf)
end = timer()
elapsed = end - start
per_iter = elapsed / iters
print(f'{elapsed:.2f} sec elapsed')
print(f'{per_iter*1e9:.0f} nsec per record')


if __name__ == '__main__':
main()
24 changes: 24 additions & 0 deletions examples/example1.xpdt
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
## This is a comment

type timestamp {
u32 tv_sec;
u32 tv_nsec;
};

type point {
s32 x;
s32 y;
s32 z;
};

type timespan {
timestamp begin;
timestamp end;
};

type item {
u32 id; # User id
blob first_name; # given name
blob surname; # surname
timestamp last_login;
};
43 changes: 43 additions & 0 deletions include/xpdt/x1b.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#pragma once

/* Copyright (c) 2020 Gianni Tedesco
* https://www.scaramanga.co.uk
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/

#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <stdbool.h>

#define x1b_unlikely(x) __builtin_expect((bool)(x), false)

#include "xu128.h"
#include "xbuf.h"
#include "xbuf_iter.h"
#include "xfilemap.h"

typedef uint32_t x1b_size_t;
typedef uint32_t x1b_strlen_t;

typedef struct xostream *xostream_t;
uint8_t *xostream_prepare(xostream_t, size_t);
bool xostream_commit(xostream_t, size_t);
60 changes: 60 additions & 0 deletions include/xpdt/xbuf.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#pragma once

/* Copyright (c) 2020 Gianni Tedesco
* https://www.scaramanga.co.uk
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/

#include <stdlib.h>

#include <stdlib.h>

struct xbuf {
const uint8_t *ptr;
size_t len;
};

#define XBUF_INIT(_len, _ptr) \
(struct xbuf){.len = _len, .ptr = _ptr}

#define XBUF_NIL \
XBUF_INIT(0, NULL)

static inline struct xbuf xbuf(size_t len,
const uint8_t buf[static len])
{
return XBUF_INIT(len, buf);
}

static inline struct xbuf xbuf_nil(void)
{
return XBUF_NIL;
}

static inline struct xbuf xbuf_cstring(const char *str)
{
return XBUF_INIT(strlen(str), (uint8_t *)str);
}

static inline bool xbuf_isset(const struct xbuf *str)
{
return str->len;
}
Loading

0 comments on commit c5a4fd9

Please sign in to comment.