Skip to content

Commit

Permalink
Default to safe operation (commonmark#123)
Browse files Browse the repository at this point in the history
* default to safe

* fix setter test
  • Loading branch information
Ashe Connor authored Oct 17, 2018
1 parent a9ed0e2 commit f64691b
Show file tree
Hide file tree
Showing 8 changed files with 47 additions and 44 deletions.
17 changes: 9 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -163,14 +163,15 @@ be found in the man pages in the `man` subdirectory.
Security
--------

By default, the library will pass through raw HTML and potentially
dangerous links (`javascript:`, `vbscript:`, `data:`, `file:`).

It is recommended that users either disable this potentially unsafe
feature by using the option `CMARK_OPT_SAFE` (or `--safe` with the
command-line program), or run the output through an HTML sanitizer
to protect against
[XSS attacks](http://en.wikipedia.org/wiki/Cross-site_scripting).
By default, the library will scrub raw HTML and potentially dangerous links
(`javascript:`, `vbscript:`, `data:`, `file:`). Please note this is the
_opposite_ of the upstream [`cmark`](https://github.com/CommonMark/cmark)
library, a change introduced in `cmark-gfm` in version `0.28.3.gfm.18`.

To allow these, use the option `CMARK_OPT_UNSAFE` (or `--unsafe`) with the
command line program. If doing so, we recommend you use a HTML sanitizer
specific to your needs to protect against [XSS
attacks](http://en.wikipedia.org/wiki/Cross-site_scripting).

Contributing
------------
Expand Down
4 changes: 2 additions & 2 deletions api_test/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ static void accessors(test_batch_runner *runner) {
OK(runner, cmark_node_set_literal(string, literal + sizeof("prefix")),
"set_literal suffix");

char *rendered_html = cmark_render_html(doc, CMARK_OPT_DEFAULT, NULL);
char *rendered_html = cmark_render_html(doc, CMARK_OPT_DEFAULT | CMARK_OPT_UNSAFE, NULL);
static const char expected_html[] =
"<h3>Header</h3>\n"
"<ol start=\"3\">\n"
Expand Down Expand Up @@ -910,7 +910,7 @@ static void test_safe(test_batch_runner *runner) {
"a>\n[link](JAVAscript:alert('hi'))\n![image]("
"file:my.js)\n";
char *html = cmark_markdown_to_html(raw_html, sizeof(raw_html) - 1,
CMARK_OPT_DEFAULT | CMARK_OPT_SAFE);
CMARK_OPT_DEFAULT);
STR_EQ(runner, html, "<!-- raw HTML omitted -->\n<p><!-- raw HTML omitted "
"-->hi<!-- raw HTML omitted -->\n<a "
"href=\"\">link</a>\n<img src=\"\" alt=\"image\" "
Expand Down
35 changes: 18 additions & 17 deletions man/man3/cmark-gfm.3
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
.TH cmark-gfm 3 "September 17, 2018" "LOCAL" "Library Functions Manual"
.TH cmark-gfm 3 "October 17, 2018" "LOCAL" "Library Functions Manual"
.SH
NAME
.PP
Expand Down Expand Up @@ -852,22 +852,6 @@ Include a \f[C]data\-sourcepos\f[] attribute on all block elements.
.PP
Render \f[C]softbreak\f[] elements as hard line breaks.

.PP
.nf
\fC
.RS 0n
#define CMARK_OPT_SAFE (1 << 3)
.RE
\f[]
.fi

.PP
Suppress raw HTML and unsafe links (\f[C]javascript:\f[],
\f[C]vbscript:\f[], \f[C]file:\f[], and \f[C]data:\f[], except for
\f[C]image/png\f[], \f[C]image/gif\f[], \f[C]image/jpeg\f[], or
\f[C]image/webp\f[] mime types). Raw HTML is replaced by a placeholder
HTML comment. Unsafe links are replaced by empty strings.

.PP
.nf
\fC
Expand Down Expand Up @@ -995,6 +979,23 @@ Use style attributes to align table cells instead of align attributes.
Include the remainder of the info string in code blocks in a separate
attribute.

.PP
.nf
\fC
.RS 0n
#define CMARK_OPT_UNSAFE (1 << 17)
.RE
\f[]
.fi

.PP
Allow raw HTML and unsafe links, \f[C]javascript:\f[],
\f[C]vbscript:\f[], \f[C]file:\f[], and all \f[C]data:\f[] URLs \-\- by
default, only \f[C]image/png\f[], \f[C]image/gif\f[],
\f[C]image/jpeg\f[], or \f[C]image/webp\f[] mime types are allowed.
Without this option, raw HTML is replaced by a placeholder HTML comment,
and unsafe links are replaced by empty strings.

.SS
Version information

Expand Down
16 changes: 8 additions & 8 deletions src/cmark-gfm.h
Original file line number Diff line number Diff line change
Expand Up @@ -690,14 +690,6 @@ char *cmark_render_latex_with_mem(cmark_node *root, int options, int width, cmar
*/
#define CMARK_OPT_HARDBREAKS (1 << 2)

/** Suppress raw HTML and unsafe links (`javascript:`, `vbscript:`,
* `file:`, and `data:`, except for `image/png`, `image/gif`,
* `image/jpeg`, or `image/webp` mime types). Raw HTML is replaced
* by a placeholder HTML comment. Unsafe links are replaced by
* empty strings.
*/
#define CMARK_OPT_SAFE (1 << 3)

/** Render `softbreak` elements as spaces.
*/
#define CMARK_OPT_NOBREAKS (1 << 4)
Expand Down Expand Up @@ -746,6 +738,14 @@ char *cmark_render_latex_with_mem(cmark_node *root, int options, int width, cmar
*/
#define CMARK_OPT_FULL_INFO_STRING (1 << 16)

/** Allow raw HTML and unsafe links, `javascript:`, `vbscript:`, `file:`, and
* all `data:` URLs -- by default, only `image/png`, `image/gif`, `image/jpeg`,
* or `image/webp` mime types are allowed. Without this option, raw HTML is
* replaced by a placeholder HTML comment, and unsafe links are replaced by
* empty strings.
*/
#define CMARK_OPT_UNSAFE (1 << 17)

/**
* ## Version information
*/
Expand Down
8 changes: 4 additions & 4 deletions src/html.c
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node,

case CMARK_NODE_HTML_BLOCK:
cmark_html_render_cr(html);
if (options & CMARK_OPT_SAFE) {
if (!(options & CMARK_OPT_UNSAFE)) {
cmark_strbuf_puts(html, "<!-- raw HTML omitted -->");
} else if (renderer->filter_extensions) {
filter_html_block(renderer, node->as.literal.data, node->as.literal.len);
Expand Down Expand Up @@ -305,7 +305,7 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node,
break;

case CMARK_NODE_HTML_INLINE:
if (options & CMARK_OPT_SAFE) {
if (!(options & CMARK_OPT_UNSAFE)) {
cmark_strbuf_puts(html, "<!-- raw HTML omitted -->");
} else {
filtered = false;
Expand Down Expand Up @@ -354,7 +354,7 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node,
case CMARK_NODE_LINK:
if (entering) {
cmark_strbuf_puts(html, "<a href=\"");
if (!((options & CMARK_OPT_SAFE) &&
if (!(!(options & CMARK_OPT_UNSAFE) &&
scan_dangerous_url(&node->as.link.url, 0))) {
houdini_escape_href(html, node->as.link.url.data,
node->as.link.url.len);
Expand All @@ -372,7 +372,7 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node,
case CMARK_NODE_IMAGE:
if (entering) {
cmark_strbuf_puts(html, "<img src=\"");
if (!((options & CMARK_OPT_SAFE) &&
if (!(!(options & CMARK_OPT_UNSAFE) &&
scan_dangerous_url(&node->as.link.url, 0))) {
houdini_escape_href(html, node->as.link.url.data,
node->as.link.url.len);
Expand Down
6 changes: 3 additions & 3 deletions src/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ void print_usage() {
printf(" --sourcepos Include source position attribute\n");
printf(" --hardbreaks Treat newlines as hard line breaks\n");
printf(" --nobreaks Render soft line breaks as spaces\n");
printf(" --safe Suppress raw HTML and dangerous URLs\n");
printf(" --unsafe Allow raw HTML and dangerous URLs\n");
printf(" --smart Use smart punctuation\n");
printf(" --validate-utf8 Replace UTF-8 invalid sequences with U+FFFD\n");
printf(" --github-pre-lang Use GitHub-style <pre lang> for code blocks\n");
Expand Down Expand Up @@ -150,8 +150,8 @@ int main(int argc, char *argv[]) {
options |= CMARK_OPT_SMART;
} else if (strcmp(argv[i], "--github-pre-lang") == 0) {
options |= CMARK_OPT_GITHUB_PRE_LANG;
} else if (strcmp(argv[i], "--safe") == 0) {
options |= CMARK_OPT_SAFE;
} else if (strcmp(argv[i], "--unsafe") == 0) {
options |= CMARK_OPT_UNSAFE;
} else if (strcmp(argv[i], "--validate-utf8") == 0) {
options |= CMARK_OPT_VALIDATE_UTF8;
} else if (strcmp(argv[i], "--liberal-html-tag") == 0) {
Expand Down
2 changes: 1 addition & 1 deletion test/cmark-fuzz.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
int options = *(const int *)data;

/* Mask off valid option bits */
options = options & (CMARK_OPT_SOURCEPOS | CMARK_OPT_HARDBREAKS | CMARK_OPT_SAFE | CMARK_OPT_NOBREAKS | CMARK_OPT_NORMALIZE | CMARK_OPT_VALIDATE_UTF8 | CMARK_OPT_SMART);
options = options & (CMARK_OPT_SOURCEPOS | CMARK_OPT_HARDBREAKS | CMARK_OPT_UNSAFE | CMARK_OPT_NOBREAKS | CMARK_OPT_NORMALIZE | CMARK_OPT_VALIDATE_UTF8 | CMARK_OPT_SMART);

/* Remainder of input is the markdown */
const char *markdown = (const char *)(data + sizeof(options));
Expand Down
3 changes: 2 additions & 1 deletion test/cmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def to_html(lib, extlib, text, extensions):
render_html = lib.cmark_render_html
render_html.restype = c_char_p
render_html.argtypes = [c_void_p, c_int, c_void_p]
result = render_html(document, 0, syntax_extensions).decode('utf-8')
result = render_html(document, 1 << 17, syntax_extensions).decode('utf-8')
return [0, result, '']

def to_commonmark(lib, extlib, text, extensions):
Expand All @@ -77,6 +77,7 @@ def __init__(self, prog=None, library_dir=None, extensions=None):
self.extensions = extensions.split()

if prog:
prog += ' --unsafe'
extsfun = lambda exts: ''.join([' -e ' + e for e in set(exts)])
self.to_html = lambda x, exts=[]: pipe_through_prog(prog + extsfun(exts + self.extensions), x)
self.to_commonmark = lambda x, exts=[]: pipe_through_prog(prog + ' -t commonmark' + extsfun(exts + self.extensions), x)
Expand Down

0 comments on commit f64691b

Please sign in to comment.