diff --git a/bin/asciidoc-to-html b/bin/asciidoc-to-html new file mode 100755 index 0000000..2ee59b7 --- /dev/null +++ b/bin/asciidoc-to-html @@ -0,0 +1,69 @@ +#!/usr/bin/env node + +'use strict' + +const { Console } = require('node:console') +const fs = require('node:fs') +const parse = require('asciidoc-parsing-lab') +const { parseArgs } = require('node:util') + +const options = { + attribute: { + type: 'string', + short: 'a', + multiple: true, + desc: 'set one or more AsciiDoc attributes', + hint: 'name=value', + }, + embedded: { type: 'boolean', short: 'e', desc: 'output document without root element for embedding' }, + format: { + type: 'string', + short: 'f', + default: 'html', + desc: 'generate the specified format', + hint: 'ext', + choices: ['html', 'asg'], + }, + output: { type: 'string', short: 'o', desc: 'specify a different output file or - for stdout', hint: 'path' }, + timings: { type: 'boolean', short: 't', desc: 'print a timings report to stderr' }, + help: { type: 'boolean', short: 'h', desc: 'output this help and exit' }, +} + +const { positionals: [sourceFile], values: opts } = parseArgs({ args: process.argv.slice(2), options, strict: false }) +if (opts.help) printUsage(options) +if (!sourceFile?.endsWith('.adoc')) printUsage(options, true) +const outputFile = opts.output ?? sourceFile.replace(/\.adoc$/, '.' + opts.format) +if (opts.embedded) (opts.attribute ??= []).push('embedded') +let timer +opts.timings && (timer = new Console(process.stderr)).time('elapsed') +const source = fs.readFileSync(sourceFile, 'utf8').trimEnd() +// NOTE parseInlines significantly increases the parsing time +const asg = parse(source, { attributes: opts.attribute, parseInlines: true, showWarnings: true }) +const output = opts.format === 'asg' + ? require('#test-harness').stringifyASG(asg) + : require('asciidoc-parsing-lab/html-converter')(asg).trimEnd() +outputFile === '-' ? console.log(output) : fs.writeFileSync(outputFile, output + '\n', 'utf8') +timer && timer.timeEnd('elapsed') + +function printUsage (options, error) { + let usage = [ + 'Usage: asciidoc-to-html [OPTION]... FILE', + 'Convert the specified AsciiDoc FILE to the specified output file and format.', + 'Example: asciidoc-to-html README.adoc', + ] + if (error) { + usage = usage.slice(0, 1).concat("Run 'asciidoc-to-html --help' for more information.") + } else { + usage.push('') + Object.entries(options).forEach(([long, { short, choices, default: default_, hint, multiple, desc }]) => { + const option = short ? `-${short}, --${long}${hint ? ' ' + hint : ''}` : `--${long}` + if (multiple) desc += '; can be specified more than once' + if (choices) desc += ` [${choices.join(', ')}]` + if (default_) desc += ` (default: ${default_})` + usage.push(` ${option.padEnd(27, ' ')}${desc}`) + }) + usage.push('', 'If --output is not specified, the output file path is derived from FILE (e.g., README.html).') + } + usage.reduce((stream, line) => typeof stream.write(line + '\n') && stream, error ? process.stderr : process.stdout) + process.exit(error ? 1 : 0) +} diff --git a/lib/html-converter.js b/lib/html-converter.js new file mode 100644 index 0000000..8315857 --- /dev/null +++ b/lib/html-converter.js @@ -0,0 +1,241 @@ +'use strict' + +const HTML_TAG_NAME_BY_SPAN_VARIANT = { code: 'code', emphasis: 'em', strong: 'strong' } + +function convert (node, documentAttributes) { + let output = '' + let tagName + switch (node.name) { + case 'document': { + documentAttributes = Object.assign({}, node.attributes) + let convertedTitle + const header = node.header + if (header) { + if (header.attributes) { + for (const [name, { value }] of Object.entries(header.attributes)) { + if (!documentAttributes[name]?.locked) documentAttributes[name] = { value, origin: 'header' } + } + } + if (header.title) convertedTitle = convertInlines(header.title) + } + const standalone = documentAttributes.embedded == null + if (standalone) { + output += '\n\n\n' + // FIXME downconvert contents of title tag to plain text + if (convertedTitle) output += `${convertedTitle}\n` + output += `\n` + output += '\n\n' + } + output += '
\n' + if (convertedTitle) output += `
\n

${convertedTitle}

\n
\n` + if (node.blocks.length) { + for (const child of node.blocks) output += convert(child, documentAttributes) + } + output += '
' + if (standalone) output += '\n\n' + break + } + case 'paragraph': + if (node.metadata?.options.includes('hardbreaks')) { + output += `${convertInlines(node.inlines).replace(/\n/g, '
')}

\n` + } else { + output += `${convertInlines(node.inlines)}

\n` + } + break + case 'section': + output += `\n` + output += `<${(tagName = `h${node.level + 1}`)}>${convertInlines(node.title)}\n` + if (node.blocks.length) { + for (const child of node.blocks) output += convert(child, documentAttributes) + } + output += '\n' + break + case 'preamble': + // Q: should preamble have an enclosure? + for (const child of node.blocks) output += convert(child, documentAttributes) + break + case 'heading': + output += `<${(tagName = `h${node.level + 1}`)}${commonAttributes(node.metadata, 'discrete')}>${convertInlines(node.title)}\n` + break + case 'literal': + case 'listing': + if (node.metadata?.attributes.style === 'source') { + const language = node.metadata.attributes.language + output += `${convertInlines(node.inlines)}\n` + } else { + output += `${convertInlines(node.inlines)}\n` + } + break + case 'list': { + let listAttrs = '' + if (node.variant === 'ordered') { + tagName = 'ol' + const start = node.metadata?.attributes.start + if (start) listAttrs = ` start="${start}"` + } else { + tagName = 'ul' + } + output += `<${tagName}${commonAttributes(node.metadata)}${listAttrs}>\n` + for (const item of node.items) { + output += '
  • \n' + output += `${convertInlines(item.principal)}\n` + if (item.blocks.length) { + for (const child of item.blocks) output += convert(child, documentAttributes) + } + output += '
  • \n' + } + output += `\n` + break + } + case 'dlist': + output += `\n` + for (const item of node.items) { + for (const term of item.terms) output += `
    ${convertInlines(term)}
    \n` + if (item.principal || item.blocks.length) { + output += '
    \n' + if (item.principal) output += `${convertInlines(item.principal)}\n` + if (item.blocks.length) { + for (const child of item.blocks) output += convert(child, documentAttributes) + } + output += '
    \n' + } + } + output += '\n' + break + case 'admonition': + output += `\n` + for (const child of node.blocks) output += convert(child, documentAttributes) + output += '\n' + break + case 'sidebar': + output += `\n` + for (const child of node.blocks) output += convert(child, documentAttributes) + output += '\n' + break + case 'example': + output += `\n` + for (const child of node.blocks) output += convert(child, documentAttributes) + output += '\n' + break + case 'image': + output += `\n` + output += `${node.metadata?.attributes.alt}\n` + output += '\n' + break + case 'attributes': + for (const [name, { value }] of Object.entries(node.attributes)) { + if (!documentAttributes[name]?.locked) documentAttributes[name] = { value, origin: 'body' } + } + break + default: + console.warn(`${node.name} not converted`) + } + return output +} + +function css () { + return ` +body { + color: #222222; + font-family: sans-serif; + margin: 0; +} +article { + display: flow-root; + margin: 2em auto; + width: 80vw; +} +article > header h1 { + margin-top: 0; + font-size: 2em; +} +article > :first-child:not(header) { + margin-top: 0; +} +a { + color: #0000cc; +} +p, +li > .principal:first-child, +dd > .principal:first-child { + line-height: 1.6; +} +dt { + font-weight: bold; +} +dd { + margin-left: 1.5em; +} +code, +pre { + color: #aa0000; + font-size: 1.25em; +} +pre { + line-height: 1.25; +} +pre code { + font-size: inherit; +} +.admonition, +.example { + border: 1px solid currentColor; + margin-block: 1em 0; + padding: 0 1em; +} +.admonition::before { + content: attr(data-severity); + display: block; + font-weight: bold; + text-transform: uppercase; + margin-top: 1em; +} +figure { + margin-left: 0; +} +img { + display: inline-block; + max-width: 100%; + vertical-align: middle; +} +`.trim() +} + +function convertInlines (nodes) { + return nodes.reduce((buffer, node) => { + let tagName + switch (node.name) { + case 'text': + //buffer.push(node.value) + // FIXME grammar should be giving us a hard break inline + buffer.push(node.value.replace(/ \+(?=\n)/g, '
    ')) + break + case 'ref': + buffer.push(`${convertInlines(node.inlines)}`) + break + case 'span': + buffer.push(`<${(tagName = HTML_TAG_NAME_BY_SPAN_VARIANT[node.variant])}>${convertInlines(node.inlines)}`) + break + default: + console.warn(`${node.name} not converted`) + } + return buffer + }, []).join('') +} + +function commonAttributes (metadata, primaryRole) { + if (!metadata) return primaryRole ? ` class="${primaryRole}"` : '' + const { attributes, id, roles: secondaryRoles = [] } = metadata + const roles = primaryRole ? [primaryRole] : [] + for (const role of secondaryRoles) roles.push(role) + const dataAttributes = Object.keys(attributes).filter((n) => n.startsWith('data-')) + const data = dataAttributes.length ? dataAttributes.map((n) => ` ${n}="${attributes[n]}"`).join('') : '' + if (id) { + return roles.length ? ` id="${id}" class="${roles.join(' ')}"` : ` id="${id}"${data}` + } else if (roles.length) { + return ` class="${roles.join(' ')}"${data}` + } + return data +} + +module.exports = convert diff --git a/package.json b/package.json index ed10de3..d3e7b84 100644 --- a/package.json +++ b/package.json @@ -18,12 +18,14 @@ "gen:preprocessor-parser": "peggy -c grammar/asciidoc-preprocessor-config.js -t '' > /dev/null" }, "bin": { - "asciidoc-tck-adapter": "bin/asciidoc-tck-adapter" + "asciidoc-tck-adapter": "bin/asciidoc-tck-adapter", + "asciidoc-to-html": "bin/asciidoc-to-html" }, "main": "lib/index.js", "exports": { ".": "./lib/index.js", - "./package.json": "./package.json" + "./package.json": "./package.json", + "./html-converter": "./lib/html-converter.js" }, "imports": { "#attrlist-parser": "./lib/asciidoc-attrlist-parser.js",