Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use bit twiddling to speed up JSON generation. #738

Draft
wants to merge 2 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
176 changes: 129 additions & 47 deletions ext/json/ext/generator/generator.c
Original file line number Diff line number Diff line change
Expand Up @@ -192,61 +192,143 @@ static inline void convert_UTF8_to_JSON(FBuffer *out_buffer, VALUE str, const un
unsigned long beg = 0, pos = 0;

#define FLUSH_POS(bytes) if (pos > beg) { fbuffer_append(out_buffer, &ptr[beg], pos - beg); } pos += bytes; beg = pos;
#define PROCESS_BYTE if (RB_UNLIKELY(ch_len)) { \
switch (ch_len) { \
case 9: { \
FLUSH_POS(1); \
switch (ch) { \
case '"': fbuffer_append(out_buffer, "\\\"", 2); break; \
case '\\': fbuffer_append(out_buffer, "\\\\", 2); break; \
case '/': fbuffer_append(out_buffer, "\\/", 2); break; \
case '\b': fbuffer_append(out_buffer, "\\b", 2); break; \
case '\f': fbuffer_append(out_buffer, "\\f", 2); break; \
case '\n': fbuffer_append(out_buffer, "\\n", 2); break; \
case '\r': fbuffer_append(out_buffer, "\\r", 2); break; \
case '\t': fbuffer_append(out_buffer, "\\t", 2); break; \
default: { \
scratch[2] = '0'; \
scratch[3] = '0'; \
scratch[4] = hexdig[(ch >> 4) & 0xf]; \
scratch[5] = hexdig[ch & 0xf]; \
fbuffer_append(out_buffer, scratch, 6); \
break; \
} \
} \
break; \
} \
case 11: { \
unsigned char b2 = ptr[pos + 1]; \
if (RB_UNLIKELY(b2 == 0x80)) { \
unsigned char b3 = ptr[pos + 2]; \
if (b3 == 0xA8) { \
FLUSH_POS(3); \
fbuffer_append(out_buffer, "\\u2028", 6); \
break; \
} else if (b3 == 0xA9) { \
FLUSH_POS(3); \
fbuffer_append(out_buffer, "\\u2029", 6); \
break; \
} \
} \
ch_len = 3; \
} \
default: \
pos += ch_len; \
break; \
} \
} else { \
pos++; \
}

while (pos < len) {
unsigned char ch = ptr[pos];
unsigned char ch_len = escape_table[ch];
/* JSON encoding */
if (escape_table != script_safe_escape_table) {
// Taken from: https://github.com/ruby/ruby/blob/96a5da67864a15eea7b79e552c7684ddd182f76c/string.c#L671-L748
// TODO revisit for the compiler version check

// These macros from https://graphics.stanford.edu/~seander/bithacks.html
# if SIZEOF_UINTPTR_T == 8
#define hasless(x,n) (((x)-~0ULL/255*(n))&~(x)&~0ULL/255*128)
#define haszero(v) (((v) - 0x0101010101010101ULL) & ~(v) & 0x8080808080808080ULL)
#define MASK_DOUBLEQUOTE 0x2222222222222222ULL
#define MASK_BACKSLASH 0x5c5c5c5c5c5c5c5cULL
# elif SIZEOF_UINTPTR_T == 4
#define hasless(x,n) (((x)-~0UL/255*(n))&~(x)&~0UL/255*128)
#define haszero(v) (((v) - 0x01010101UL) & ~(v) & 0x80808080UL)
#define MASK_DOUBLEQUOTE 0x22222222UL
#define MASK_BACKSLASH 0x5c5c5c5cUL
# else
# error "don't know what to do."
#endif

if (RB_UNLIKELY(ch_len)) {
switch (ch_len) {
case 9: {
FLUSH_POS(1);
switch (ch) {
case '"': fbuffer_append(out_buffer, "\\\"", 2); break;
case '\\': fbuffer_append(out_buffer, "\\\\", 2); break;
case '/': fbuffer_append(out_buffer, "\\/", 2); break;
case '\b': fbuffer_append(out_buffer, "\\b", 2); break;
case '\f': fbuffer_append(out_buffer, "\\f", 2); break;
case '\n': fbuffer_append(out_buffer, "\\n", 2); break;
case '\r': fbuffer_append(out_buffer, "\\r", 2); break;
case '\t': fbuffer_append(out_buffer, "\\t", 2); break;
default: {
scratch[2] = '0';
scratch[3] = '0';
scratch[4] = hexdig[(ch >> 4) & 0xf];
scratch[5] = hexdig[ch & 0xf];
fbuffer_append(out_buffer, scratch, 6);
break;
}
if ((pos + SIZEOF_UINTPTR_T*2) < len) {
/*
* Align the pointer to a sizeof(uintptr_t)-byte boundary.
* TODO: Use the same alignment technique as https://github.com/ruby/ruby/blob/96a5da67864a15eea7b79e552c7684ddd182f76c/string.c#L671-L748
*/
char *char_ptr;
for (char_ptr = (char *) ptr; pos < len && (uintptr_t) char_ptr % SIZEOF_UINTPTR_T != 0;) {
unsigned long start = pos;
char ch = *char_ptr;
unsigned char ch_len = escape_table[(uint8_t) ch];
PROCESS_BYTE;
// PROCESS_BYTE might process more than one byte. Ensure we increment char_ptr appropriately.
char_ptr += (pos - start);
}

uintptr_t *lp = (uintptr_t *) char_ptr;

while (pos + SIZEOF_UINTPTR_T < len) {
uintptr_t chunk = *lp;

uintptr_t has_less_than_0x20 = hasless(chunk, ' ');

/*
* This is effectively a memchr(3). Look for both a double quote
* and a forward slash.
*/
uintptr_t tmp1 = chunk ^ MASK_DOUBLEQUOTE;
uintptr_t haszero1 = haszero(tmp1);

uintptr_t tmp2 = chunk ^ MASK_BACKSLASH;
uintptr_t haszero2 = haszero(tmp2);

if ((has_less_than_0x20 | haszero1 | haszero2) != 0) {
for(size_t i=0; i<sizeof(uintptr_t); i++) {
unsigned char ch = ptr[pos];
unsigned char ch_len = escape_table[ch];
PROCESS_BYTE;
}
break;
}
case 11: {
unsigned char b2 = ptr[pos + 1];
if (RB_UNLIKELY(b2 == 0x80)) {
unsigned char b3 = ptr[pos + 2];
if (b3 == 0xA8) {
FLUSH_POS(3);
fbuffer_append(out_buffer, "\\u2028", 6);
break;
} else if (b3 == 0xA9) {
FLUSH_POS(3);
fbuffer_append(out_buffer, "\\u2029", 6);
break;
}
/*
* Realign lp as the previous loop may have left us in a position that's
* no longer aligned on a sizeof(uintptr_t) boundary.
*/
for (char_ptr = (char *) ptr+pos; pos < len && (uintptr_t) char_ptr % SIZEOF_UINTPTR_T != 0;) {
unsigned long start = pos;
char ch = *char_ptr;
unsigned char ch_len = escape_table[(uint8_t) ch];
PROCESS_BYTE;
// This might process more than one byte. Ensure we increment char_ptr appropriately.
char_ptr += (pos - start);
}
ch_len = 3;
// fallthrough
lp = (uintptr_t *) char_ptr;
} else {
lp++;
pos += SIZEOF_UINTPTR_T;
continue;
}
default:
pos += ch_len;
break;
}
} else {
pos++;
}
}

#undef hasless
#undef haszero

while (pos < len) {
unsigned char ch = ptr[pos];
unsigned char ch_len = escape_table[ch];
/* JSON encoding */

PROCESS_BYTE;
}
#undef FLUSH_POS

if (beg < len) {
Expand Down
36 changes: 36 additions & 0 deletions test/json/json_generator_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,10 @@ def test_backslash
json = '["/"]'
assert_equal json, generate(data)
#
data = [ '////////////////////////////////////////////////////////////////////////////////////' ]
json = '["////////////////////////////////////////////////////////////////////////////////////"]'
assert_equal json, generate(data)
#
data = [ '/' ]
json = '["\/"]'
assert_equal json, generate(data, :script_safe => true)
Expand All @@ -455,13 +459,45 @@ def test_backslash
json = '["\""]'
assert_equal json, generate(data)
#
data = [ '///////////' ]
json = '["\/\/\/\/\/\/\/\/\/\/\/"]'
assert_equal json, generate(data, :script_safe => true)
#
data = [ '///////////////////////////////////////////////////////' ]
json = '["\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/"]'
assert_equal json, generate(data, :script_safe => true)
#
data = ["'"]
json = '["\\\'"]'
assert_equal '["\'"]', generate(data)
#
data = ["倩", "瀨"]
json = '["倩","瀨"]'
assert_equal json, generate(data, script_safe: true)
#
data = '["This is a "test" of the emergency broadcast system."]'
json = "\"[\\\"This is a \\\"test\\\" of the emergency broadcast system.\\\"]\""
assert_equal json, generate(data)
#
data = '\tThis is a test of the emergency broadcast system.'
json = "\"\\\\tThis is a test of the emergency broadcast system.\""
assert_equal json, generate(data)
#
data = 'This\tis a test of the emergency broadcast system.'
json = "\"This\\\\tis a test of the emergency broadcast system.\""
assert_equal json, generate(data)
#
data = 'This is\ta test of the emergency broadcast system.'
json = "\"This is\\\\ta test of the emergency broadcast system.\""
assert_equal json, generate(data)
#
data = 'This is a test of the emergency broadcast\tsystem.'
json = "\"This is a test of the emergency broadcast\\\\tsystem.\""
assert_equal json, generate(data)
#
data = 'This is a test of the emergency broadcast\tsystem.\n'
json = "\"This is a test of the emergency broadcast\\\\tsystem.\\\\n\""
assert_equal json, generate(data)
end

def test_string_subclass
Expand Down