Skip to content

Commit

Permalink
Merge pull request #91 from may-ben-arie/xml-support
Browse files Browse the repository at this point in the history
XML support
  • Loading branch information
mechanik-daniel authored Jul 13, 2024
2 parents 9540606 + c0489c7 commit 0c3ae3b
Show file tree
Hide file tree
Showing 5 changed files with 306 additions and 13 deletions.
31 changes: 29 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
"csvtojson": "^2.0.10",
"dotenv": "^16.4.1",
"express": "^4.18.2",
"fast-xml-parser": "^4.4.0",
"fhir-package-loader": "^0.2.0",
"fs-extra": "^11.1.1",
"hl7-dictionary": "^1.0.1",
Expand Down
34 changes: 23 additions & 11 deletions src/controllers/root.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import { pretty, transform } from '../helpers/jsonataFunctions';
import { getLogger } from '../helpers/logger';
import { toJsonataString } from '../helpers/parser/toJsonataString';
import { parseCsv } from '../helpers/stringFunctions';
import { parseXml } from '../helpers/xml';

const get = async (req: Request, res: Response) => {
return res.status(200).json(
Expand All @@ -30,23 +31,34 @@ const get = async (req: Request, res: Response) => {

const evaluate = async (req: Request, res: Response) => {
try {
let inputJson;
let contentType = req.body.contentType;
if (!req.body.contentType || req.body.contentType === '') {
getLogger().info('Content-Type is empty - defaulting to \'application/json\'');
contentType = 'application/json';
}

if (req.body.contentType === 'x-application/hl7-v2+er7') {
let inputJson;
if (contentType === 'x-application/hl7-v2+er7') {
getLogger().info('Content-Type suggests HL7 V2.x message');
getLogger().info('Trying to parse V2 message as JSON...');
inputJson = await v2json(req.body.input);
getLogger().info('Parsed V2 message');
} else if (contentType === 'text/csv') {
getLogger().info('Content-Type suggests CSV input');
getLogger().info('Trying to parse CSV to JSON...');
inputJson = await parseCsv(req.body.input);
getLogger().info('Parsed CSV to JSON');
} else if (contentType === 'application/xml') {
getLogger().info('Content-Type suggests XML input');
getLogger().info('Trying to parse XML to JSON...');
inputJson = parseXml(req.body.input);
getLogger().info('Parsed XML to JSON');
} else if (contentType === 'application/json') {
getLogger().info('Content-Type suggests JSON input');
inputJson = req.body.input;
} else {
if (req.body.contentType === 'text/csv') {
getLogger().info('Content-Type suggests CSV input');
getLogger().info('Trying to parse CSV to JSON...');
inputJson = await parseCsv(req.body.input);
getLogger().info('Parsed CSV to JSON');
} else {
inputJson = req.body.input;
}
};
throw new Error(`Unsupported Content-Type: '${contentType}'`);
}

const extraBindings = config.getBindings();
const response = await transform(inputJson, req.body.fume, extraBindings);
Expand Down
10 changes: 10 additions & 0 deletions src/helpers/xml/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
/**
* © Copyright Outburn Ltd. 2022-2024 All Rights Reserved
* Project name: FUME-COMMUNITY
*/

import { parseXml } from '../xml/xmlToJson';

export {
parseXml
};
243 changes: 243 additions & 0 deletions src/helpers/xml/xmlToJson.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
/**
* © Copyright Outburn Ltd. 2022-2024 All Rights Reserved
* Project name: FUME-COMMUNITY
*/

import { XMLParser } from 'fast-xml-parser';

const ATTRIBUTE_PREFIX = '@_';

export const parseXml = (xml: string) => {
const json = parseXmlWithXhtmlHandling(xml);

const values: any[] = [];
if (Object.keys(json).length === 1) {
const rootKey = Object.keys(json)[0];
if (Array.isArray(json[rootKey])) {
for (const jsonValue of json[rootKey]) {
values.push(standardizeJson(jsonValue, rootKey));
}
} else {
values.push(standardizeJson(json[rootKey], rootKey));
}
}

if (Object.keys(json).length > 1) {
for (const rootKey of Object.keys(json)) {
values.push(standardizeJson(json[rootKey], rootKey));
}
}

return values.length === 1 ? values[0] : values;
};

const parseXmlWithXhtmlHandling = (xml: string): any => {
const options: Record<string, any> = {
ignoreAttributes: false,
attributeNamePrefix: ATTRIBUTE_PREFIX,
allowBooleanAttributes: true,
alwaysCreateTextNode: true,
numberParseOptions: {
leadingZeros: false,
hex: false,
skipLike: /\*/
}
};
const firstParser = new XMLParser(options);
const firstParsing = firstParser.parse(xml);
const xhtmlPaths = getXhtmlPaths('', firstParsing, []);

if (xhtmlPaths.length === 0) {
return firstParsing;
}

options.stopNodes = [...new Set(xhtmlPaths)]; // remove duplications;
const secondParser = new XMLParser(options);
const secondParsing = secondParser.parse(xml);
return secondParsing;
};

const getXhtmlPaths = (currentPath, currentValue, xhtmlPaths) => {
if (Array.isArray(currentValue)) {
for (let i = 0; i < currentValue.length; i++) {
getXhtmlPaths(`${currentPath}`, currentValue[i], xhtmlPaths);
}
} else if (typeof currentValue === 'object') {
if (currentValue[`${ATTRIBUTE_PREFIX}xmlns`] === 'http://www.w3.org/1999/xhtml') {
xhtmlPaths.push(currentPath);
} else {
for (const key of Object.keys(currentValue)) {
getXhtmlPaths(`${currentPath ? `${currentPath}.` : ''}${key}`, currentValue[key], xhtmlPaths);
}
}
}
return xhtmlPaths;
};

const standardizeJson = (json, rootKey: string): Record<string, any> => {
const parsedXml = recursiveStandardize(json, rootKey);
const value = parsedXml[rootKey];
if (typeof value === 'object') {
const namespaceIndex = rootKey.indexOf(':');
if (namespaceIndex !== -1) {
value._namespace = rootKey.slice(0, namespaceIndex);
if (!value.resourceType) {
value._xmlTagName = rootKey.slice(namespaceIndex + 1);
}
} else if (!value.resourceType) {
value._xmlTagName = rootKey;
}
}
return value;
};

const recursiveStandardize = (node: any, key: string) => {
const newNode: Record<string, any | any[]> = {};
let newKey: string = key;

// extract values and attributes
let textValue: any | undefined;
const complexValue = {};
let valueAttribute: string | undefined;
const attributes: Record<string, any> = {};
for (const key of Object.keys(node)) {
if (key === '#text' && (typeof node[key] !== 'string' || node[key].length > 0)) {
textValue = String(node[key]);
} else if (key.startsWith(ATTRIBUTE_PREFIX)) {
attributes[key.slice(ATTRIBUTE_PREFIX.length)] = node[key];
if (key === `${ATTRIBUTE_PREFIX}value`) {
valueAttribute = node[key];
}
} else {
complexValue[key] = node[key];
}
}

// extract namespace
const namespaceIndex = key.indexOf(':');
let namespaceValue;
if (namespaceIndex !== -1) {
newKey = key.slice(namespaceIndex + 1);
namespaceValue = key.slice(0, namespaceIndex);
}

// extract complex childs
const complexChildsObject = createComplexChildsObject(complexValue);

// replace xmlns="http://hl7.org/fhir" with resourceType
if (attributes.xmlns === 'http://hl7.org/fhir') {
attributes.resourceType = newKey;
delete attributes.xmlns;
}

// build new node
if (textValue && attributes.xmlns === 'http://www.w3.org/1999/xhtml') {
textValue = `<${key}${buildAttributesString(attributes)}>${textValue}</${key}>`;
newNode[newKey] = textValue;
} else if (textValue) {
newNode[newKey] = textValue;
addInnerKeys(newNode, `_${newKey}`, { _namespace: namespaceValue });
addInnerKeys(newNode, `_${newKey}`, attributes);
} else if (valueAttribute) {
newNode[newKey] = valueAttribute;
addInnerKeys(newNode, `_${newKey}`, { _namespace: namespaceValue });
delete attributes.value;
addInnerKeys(newNode, `_${newKey}`, attributes);
addInnerKeys(newNode, `_${newKey}`, complexChildsObject);
} else if (Object.keys(complexChildsObject).length > 0) {
addInnerKeys(newNode, newKey, complexChildsObject);
addInnerKeys(newNode, newKey, { _namespace: namespaceValue });
addInnerKeys(newNode, newKey, attributes);
} else if (Object.keys(attributes).length > 0) {
addInnerKeys(newNode, newKey, { _namespace: namespaceValue });
addInnerKeys(newNode, newKey, attributes);
}
return newNode;
};

const createComplexChildsObject = (complexChilds) => {
const complexChildsObject = {};
for (const childKey of Object.keys(complexChilds)) {
if (Array.isArray(complexChilds[childKey])) {
const childValues = complexChilds[childKey];
for (const childValue of childValues) {
const child = recursiveStandardize(childValue, childKey);
addChildToParent(complexChildsObject, child);
}
} else {
const child = recursiveStandardize(complexChilds[childKey], childKey);
addChildToParent(complexChildsObject, child);
}
}
return complexChildsObject;
};
// /_xmlTagName

const addChildToParent = (parentNode, child) => {
const childKey: string | undefined = Object.keys(child).find(key => !key.startsWith('_'));
const childAttKey: string | undefined = Object.keys(child).find(key => key.startsWith('_'));

if (childKey === undefined && childAttKey === undefined) return;
const key = childKey ?? childAttKey!.slice(1);
const attKey = childAttKey ?? `_${childKey}`;

if ((parentNode[key]) || (parentNode[attKey])) {
const keySize = parentNode[key] ? (Array.isArray(parentNode[key]) ? parentNode[key].length : 1) : 0;
const attKeySize = parentNode[attKey] ? (Array.isArray(parentNode[attKey]) ? parentNode[attKey].length : 1) : 0;

if (keySize > 1) {
parentNode[key].push(child[key] ?? null);
} else if (keySize === 1) {
parentNode[key] = [parentNode[key], child[key] ?? null];
} else if (childKey) {
parentNode[key] = Array.from({ length: attKeySize }, (x, i) => null);
parentNode[key].push(child[key] ?? null);
}

if (attKeySize > 1) {
parentNode[attKey].push(child[attKey] ?? null);
} else if (attKeySize === 1) {
parentNode[attKey] = [parentNode[attKey], child[attKey] ?? null];
} else if (childAttKey) {
parentNode[attKey] = Array.from({ length: keySize }, (x, i) => null);
parentNode[attKey].push(child[attKey] ?? null);
}
} else {
if (childKey) {
parentNode[key] = child[key];
}
if (childAttKey) {
parentNode[attKey] = child[attKey];
}
}
};

const addInnerKeys = (object, key, innerObject) => {
for (const innerKey of Object.keys(innerObject)) {
addInnerKey(object, key, innerKey, innerObject[innerKey]);
}
};

const addInnerKey = (object, key, innerKey, innerValue) => {
if (innerValue === undefined) return;
if (object[key] === undefined) {
object[key] = {};
}

if (innerKey === 'resourceType') {
object[key] = {
resourceType: innerValue,
...object[key]
};
} else {
object[key][innerKey] = innerValue;
}
};

const buildAttributesString = (attributs) => {
let string = '';
for (const key of Object.keys(attributs)) {
string += ` ${key}="${attributs[key]}"`;
}
return string;
};

0 comments on commit 0c3ae3b

Please sign in to comment.