Skip to content

Commit

Permalink
Make rendering safe by default.
Browse files Browse the repository at this point in the history
Removes CMARK_OPT_SAFE from options.

Adds CMARK_OPT_UNSAFE, with the opposite meaning.
The new default behavior is to suppress raw HTML and
potentially dangerous links.  The CMARK_OPT_UNSAFE
option has to be set explicitly to prevent this.

--------------------------------------------------------
NOTE: This change will require modifications in
bindings for cmark and in most libraries and programs
that use cmark.
--------------------------------------------------------

Closes commonmark#239, commonmark#273.

Borrows heavily from @kivikakk's patch in github#123.
  • Loading branch information
jgm committed Mar 18, 2019
1 parent ca8ef74 commit 325a147
Show file tree
Hide file tree
Showing 8 changed files with 32 additions and 29 deletions.
12 changes: 6 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -156,14 +156,14 @@ be found in the man pages in the `man` subdirectory.
Security
--------

By default, the library will pass through raw HTML and potentially
By default, the library will scrub raw HTML and potentially
dangerous links (`javascript:`, `vbscript:`, `data:`, `file:`).

It is recommended that users either disable this potentially unsafe
feature by using the option `CMARK_OPT_SAFE` (or `--safe` with the
command-line program), or run the output through an HTML sanitizer
to protect against
[XSS attacks](http://en.wikipedia.org/wiki/Cross-site_scripting).
To allow these, use the option `CMARK_OPT_UNSAFE` (or
`--unsafe`) with the command line program. If doing so, we
recommend you use a HTML sanitizer specific to your needs to
protect against [XSS
attacks](http://en.wikipedia.org/wiki/Cross-site_scripting).

Contributing
------------
Expand Down
5 changes: 3 additions & 2 deletions api_test/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,8 @@ static void accessors(test_batch_runner *runner) {
OK(runner, cmark_node_set_literal(string, literal + sizeof("prefix")),
"set_literal suffix");

char *rendered_html = cmark_render_html(doc, CMARK_OPT_DEFAULT);
char *rendered_html = cmark_render_html(doc,
CMARK_OPT_DEFAULT | CMARK_OPT_UNSAFE);
static const char expected_html[] =
"<h3>Header</h3>\n"
"<ol start=\"3\">\n"
Expand Down Expand Up @@ -859,7 +860,7 @@ static void test_safe(test_batch_runner *runner) {
"a>\n[link](JAVAscript:alert('hi'))\n![image]("
"file:my.js)\n";
char *html = cmark_markdown_to_html(raw_html, sizeof(raw_html) - 1,
CMARK_OPT_DEFAULT | CMARK_OPT_SAFE);
CMARK_OPT_DEFAULT);
STR_EQ(runner, html, "<!-- raw HTML omitted -->\n<p><!-- raw HTML omitted "
"-->hi<!-- raw HTML omitted -->\n<a "
"href=\"\">link</a>\n<img src=\"\" alt=\"image\" "
Expand Down
10 changes: 5 additions & 5 deletions man/man3/cmark.3
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
.TH cmark 3 "June 02, 2017" "LOCAL" "Library Functions Manual"
.TH cmark 3 "March 17, 2019" "LOCAL" "Library Functions Manual"
.SH
NAME
.PP
Expand Down Expand Up @@ -721,17 +721,17 @@ Render \f[C]softbreak\f[] elements as hard line breaks.
.nf
\fC
.RS 0n
#define CMARK_OPT_SAFE (1 << 3)
#define CMARK_OPT_UNSAFE (1 << 17)
.RE
\f[]
.fi

.PP
Suppress raw HTML and unsafe links (\f[C]javascript:\f[],
Render raw HTML and unsafe links (\f[C]javascript:\f[],
\f[C]vbscript:\f[], \f[C]file:\f[], and \f[C]data:\f[], except for
\f[C]image/png\f[], \f[C]image/gif\f[], \f[C]image/jpeg\f[], or
\f[C]image/webp\f[] mime types). Raw HTML is replaced by a placeholder
HTML comment. Unsafe links are replaced by empty strings.
\f[C]image/webp\f[] mime types). By default, raw HTML is replaced by a
placeholder HTML comment. Unsafe links are replaced by empty strings.

.PP
.nf
Expand Down
10 changes: 5 additions & 5 deletions src/cmark.h
Original file line number Diff line number Diff line change
Expand Up @@ -552,13 +552,13 @@ char *cmark_render_latex(cmark_node *root, int options, int width);
*/
#define CMARK_OPT_HARDBREAKS (1 << 2)

/** Suppress raw HTML and unsafe links (`javascript:`, `vbscript:`,
/** Render raw HTML and unsafe links (`javascript:`, `vbscript:`,
* `file:`, and `data:`, except for `image/png`, `image/gif`,
* `image/jpeg`, or `image/webp` mime types). Raw HTML is replaced
* by a placeholder HTML comment. Unsafe links are replaced by
* empty strings.
* `image/jpeg`, or `image/webp` mime types). By default,
* raw HTML is replaced by a placeholder HTML comment. Unsafe
* links are replaced by empty strings.
*/
#define CMARK_OPT_SAFE (1 << 3)
#define CMARK_OPT_UNSAFE (1 << 17)

/** Render `softbreak` elements as spaces.
*/
Expand Down
12 changes: 6 additions & 6 deletions src/html.c
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type,

case CMARK_NODE_HTML_BLOCK:
cr(html);
if (options & CMARK_OPT_SAFE) {
if (!(options & CMARK_OPT_UNSAFE)) {
cmark_strbuf_puts(html, "<!-- raw HTML omitted -->");
} else {
cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len);
Expand Down Expand Up @@ -242,7 +242,7 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type,
break;

case CMARK_NODE_HTML_INLINE:
if (options & CMARK_OPT_SAFE) {
if (!(options & CMARK_OPT_UNSAFE)) {
cmark_strbuf_puts(html, "<!-- raw HTML omitted -->");
} else {
cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len);
Expand Down Expand Up @@ -278,8 +278,8 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type,
case CMARK_NODE_LINK:
if (entering) {
cmark_strbuf_puts(html, "<a href=\"");
if (!((options & CMARK_OPT_SAFE) &&
scan_dangerous_url(&node->as.link.url, 0))) {
if ((options & CMARK_OPT_UNSAFE) ||
!(scan_dangerous_url(&node->as.link.url, 0))) {
houdini_escape_href(html, node->as.link.url.data,
node->as.link.url.len);
}
Expand All @@ -296,8 +296,8 @@ static int S_render_node(cmark_node *node, cmark_event_type ev_type,
case CMARK_NODE_IMAGE:
if (entering) {
cmark_strbuf_puts(html, "<img src=\"");
if (!((options & CMARK_OPT_SAFE) &&
scan_dangerous_url(&node->as.link.url, 0))) {
if ((options & CMARK_OPT_UNSAFE) ||
!(scan_dangerous_url(&node->as.link.url, 0))) {
houdini_escape_href(html, node->as.link.url.data,
node->as.link.url.len);
}
Expand Down
6 changes: 3 additions & 3 deletions src/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ void print_usage() {
printf(" --sourcepos Include source position attribute\n");
printf(" --hardbreaks Treat newlines as hard line breaks\n");
printf(" --nobreaks Render soft line breaks as spaces\n");
printf(" --safe Suppress raw HTML and dangerous URLs\n");
printf(" --unsafe Render raw HTML and dangerous URLs\n");
printf(" --smart Use smart punctuation\n");
printf(" --validate-utf8 Replace UTF-8 invalid sequences with U+FFFD\n");
printf(" --help, -h Print usage information\n");
Expand Down Expand Up @@ -112,8 +112,8 @@ int main(int argc, char *argv[]) {
options |= CMARK_OPT_NOBREAKS;
} else if (strcmp(argv[i], "--smart") == 0) {
options |= CMARK_OPT_SMART;
} else if (strcmp(argv[i], "--safe") == 0) {
options |= CMARK_OPT_SAFE;
} else if (strcmp(argv[i], "--unsafe") == 0) {
options |= CMARK_OPT_UNSAFE;
} else if (strcmp(argv[i], "--validate-utf8") == 0) {
options |= CMARK_OPT_VALIDATE_UTF8;
} else if ((strcmp(argv[i], "--help") == 0) ||
Expand Down
2 changes: 1 addition & 1 deletion test/cmark-fuzz.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
memcpy(&fuzz_config, data, sizeof(fuzz_config));

/* Mask off valid option bits */
fuzz_config.options &= (CMARK_OPT_SOURCEPOS | CMARK_OPT_HARDBREAKS | CMARK_OPT_SAFE | CMARK_OPT_NOBREAKS | CMARK_OPT_NORMALIZE | CMARK_OPT_VALIDATE_UTF8 | CMARK_OPT_SMART);
fuzz_config.options &= (CMARK_OPT_SOURCEPOS | CMARK_OPT_HARDBREAKS | CMARK_OPT_UNSAFE | CMARK_OPT_NOBREAKS | CMARK_OPT_NORMALIZE | CMARK_OPT_VALIDATE_UTF8 | CMARK_OPT_SMART);

/* Remainder of input is the markdown */
const char *markdown = (const char *)(data + sizeof(fuzz_config));
Expand Down
4 changes: 3 additions & 1 deletion test/cmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ def to_html(lib, text):
markdown.argtypes = [c_char_p, c_size_t, c_int]
textbytes = text.encode('utf-8')
textlen = len(textbytes)
result = markdown(textbytes, textlen, 0).decode('utf-8')
# 1 << 17 == CMARK_OPT_UNSAFE
result = markdown(textbytes, textlen, 1 << 17).decode('utf-8')
return [0, result, '']

def to_commonmark(lib, text):
Expand All @@ -37,6 +38,7 @@ class CMark:
def __init__(self, prog=None, library_dir=None):
self.prog = prog
if prog:
prog += ' --unsafe'
self.to_html = lambda x: pipe_through_prog(prog, x)
self.to_commonmark = lambda x: pipe_through_prog(prog + ' -t commonmark', x)
else:
Expand Down

0 comments on commit 325a147

Please sign in to comment.