-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathucd-xml2json.js
executable file
·74 lines (65 loc) · 2.2 KB
/
ucd-xml2json.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#!/usr/bin/env node
/*
* Read in a huge mess of XML and output some nice-ish JSON, which we will
* later compact.
*/
var expat = require('node-expat'),
JSONStream = require('JSONStream'),
fs = require('fs');
var argparse = require('optimist')
.usage('Usage: $0 [-v] [-o <outfile>] [-i <infile>]')
.alias('o', 'output')
.alias('i', 'input');
var args = argparse.argv;
// {{{ fixedFromCharCode(codePoint)
// https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects/String/fromCharCode
function fixedFromCharCode (codePt) {
if (codePt > 0xFFFF) {
codePt -= 0x10000;
return String.fromCharCode(0xD800 + (codePt >> 10), 0xDC00 + (codePt & 0x3FF));
} else {
return String.fromCharCode(codePt);
}
}
// }}}
// Parse XML
var parser = new expat.Parser("UTF-8"),
output = args.output ? fs.createWriteStream(args.output) : process.stdout,
input = args.input ? fs.createReadStream(args.input) : process.stdin,
jsonStream = JSONStream.stringifyObject("{", ",\n", "}\n");
jsonStream.pipe(output);
// Parse!
var currentChar = {};
parser.on('startElement', function (name, attrs) {
if (name === 'char' && attrs.cp) {
currentChar = {
code: parseInt(attrs.cp, 16), // Codepoint
name: attrs.na.toLowerCase(), // Name
block: attrs.blk.toLowerCase(), // Block
}
} else if (name === 'name-alias') {
currentChar.alias = currentChar.alias || [];
currentChar.alias.push(attrs.alias.toLowerCase());
}
});
parser.on('endElement', function (name) {
if (name === 'char') {
// If we already have the letter in our output, we simply pick the one
// with the lowest unicode index.
// TODO: Probably should merge these, somehow
/*
var newChar = fixedFromCharCode(currentChar.code);
if (newChar in chars_object && currentChar.code >= chars_object[newChar].code) {
return;
}
*/
jsonStream.write([fixedFromCharCode(currentChar.code), currentChar]);
} else if (name === 'ucd') {
jsonStream.end();
}
});
// Connect input to parser && start
input.on('data', function (data) {
parser.parse(data);
});
input.resume()