Skip to content

Commit

Permalink
Added CMARK_OPT_SAFE option and --safe command-line flag.
Browse files Browse the repository at this point in the history
* Added `CMARK_OPT_SAFE`.  This option disables rendering of raw HTML
  and potentially dangerous links.
* Added `--safe` option in command-line program.
* Updated `cmark.3` man page.
* Added `scan_dangerous_url` to scanners.
* In HTML, suppress rendering of raw HTML and potentially dangerous
  links if `CMARK_OPT_SAFE`.  Dangerous URLs are those that begin
  with `javascript:`, `vbscript:`, `file:`, or `data:` (except for
  `image/png`, `image/gif`, `image/jpeg`, or `image/webp` mime types).
* Added `api_test` for `OPT_CMARK_SAFE`.
* Rewrote `README.md` on security.
  • Loading branch information
jgm committed Jul 13, 2015
1 parent 6dcd2be commit ac39623
Show file tree
Hide file tree
Showing 10 changed files with 422 additions and 15 deletions.
15 changes: 10 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -139,11 +139,16 @@ Usage
Instructions for the use of the command line program and library can
be found in the man pages in the `man` subdirectory.

**A note on security:**
This library does not attempt to sanitize link attributes or
raw HTML. If you use it in applications that accept
untrusted user input, you must run the output through an HTML
sanitizer to protect against
Security
--------

By default, the library will pass through raw HTML and potentially
dangerous links (`javascript:`, `vbscript:`, `data:`, `file:`).

It is recommended that users either disable this potentially unsafe
feature by using the option `CMARK_OPT_SAFE` (or `--safe` with the
command-line program), or run the output through an HTML sanitizer
to protect against
[XSS attacks](http://en.wikipedia.org/wiki/Cross-site_scripting).

Contributing
Expand Down
16 changes: 16 additions & 0 deletions api_test/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -713,6 +713,21 @@ numeric_entities(test_batch_runner *runner)
"Max hexadecimal entity length");
}

static void
test_safe(test_batch_runner *runner)
{
// Test safe mode
static const char raw_html[] =
"<div>\nhi\n</div>\n\n<a>hi</a>\n[link](JAVAscript:alert('hi'))\n![image](file:my.js)\n";
char *html = cmark_markdown_to_html(raw_html,
sizeof(raw_html) - 1,
CMARK_OPT_DEFAULT |
CMARK_OPT_SAFE);
STR_EQ(runner, html, "<!-- raw HTML omitted -->\n<p><!-- raw HTML omitted -->hi<!-- raw HTML omitted -->\n<a href=\"\">link</a>\n<img src=\"\" alt=\"image\" /></p>\n",
"input with raw HTML and dangerous links");
free(html);
}

static void
test_md_to_html(test_batch_runner *runner, const char *markdown,
const char *expected_html, const char *msg)
Expand Down Expand Up @@ -741,6 +756,7 @@ int main() {
line_endings(runner);
numeric_entities(runner);
test_cplusplus(runner);
test_safe(runner);

test_print_summary(runner);
retval = test_ok(runner) ? 0 : 1;
Expand Down
8 changes: 8 additions & 0 deletions man/man1/cmark.1
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,14 @@ be rendered as curly quotes, depending on their position.
\f[C]\-\-\-\f[] will be rendered as an em-dash.
\f[C]...\f[] will be rendered as ellipses.
.TP 12n
.B \-\-safe
Do not render raw HTML or potentially dangerous URLs.
(Raw HTML is replaced by a placeholder comment; potentially
dangerous URLs are replaced by empty strings.) Dangerous
URLs are those that begin with `javascript:`, `vbscript:`,
`file:`, or `data:` (except for `image/png`, `image/gif`,
`image/jpeg`, or `image/webp` mime types).
.TP 12n
.B \-\-help
Print usage information.
.TP 12n
Expand Down
18 changes: 17 additions & 1 deletion man/man3/cmark.3
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
.TH cmark 3 "July 12, 2015" "LOCAL" "Library Functions Manual"
.TH cmark 3 "July 13, 2015" "LOCAL" "Library Functions Manual"
.SH
NAME
.PP
Expand Down Expand Up @@ -569,6 +569,22 @@ dashes.
Validate UTF\-8 in the input before parsing, replacing illegal sequences
with the replacement character U+FFFD.

.PP
.nf
\fC
.RS 0n
#define CMARK_OPT_SAFE 32
.RE
\f[]
.fi

.PP
Suppress raw HTML and unsafe links (\f[C]javascript:\f[],
\f[C]vbscript:\f[], \f[C]file:\f[], and \f[C]data:\f[], except for
\f[C]image/png\f[], \f[C]image/gif\f[], \f[C]image/jpeg\f[], or
\f[C]image/webp\f[] mime types). Raw HTML is replaced by a placeholder
HTML comment. Unsafe links are replaced by empty strings.

.SS
Version information

Expand Down
8 changes: 8 additions & 0 deletions src/cmark.h
Original file line number Diff line number Diff line change
Expand Up @@ -516,6 +516,14 @@ char *cmark_render_latex(cmark_node *root, int options, int width);
*/
#define CMARK_OPT_VALIDATE_UTF8 16

/** Suppress raw HTML and unsafe links (`javascript:`, `vbscript:`,
* `file:`, and `data:`, except for `image/png`, `image/gif`,
* `image/jpeg`, or `image/webp` mime types). Raw HTML is replaced
* by a placeholder HTML comment. Unsafe links are replaced by
* empty strings.
*/
#define CMARK_OPT_SAFE 32

/**
* ## Version information
*/
Expand Down
38 changes: 29 additions & 9 deletions src/html.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include "node.h"
#include "buffer.h"
#include "houdini.h"
#include "scanners.h"

// Functions to convert cmark_nodes to HTML strings.

Expand Down Expand Up @@ -174,7 +175,13 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,

case CMARK_NODE_HTML:
cr(html);
cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len);
if (options & CMARK_OPT_SAFE) {
cmark_strbuf_puts(html, "<!-- raw HTML omitted -->");
} else {
cmark_strbuf_put(html, node->as.literal.data,
node->as.literal.len);
}
cr(html);
break;

case CMARK_NODE_HRULE:
Expand Down Expand Up @@ -228,7 +235,12 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
break;

case CMARK_NODE_INLINE_HTML:
cmark_strbuf_put(html, node->as.literal.data, node->as.literal.len);
if (options & CMARK_OPT_SAFE) {
cmark_strbuf_puts(html, "<!-- raw HTML omitted -->");
} else {
cmark_strbuf_put(html, node->as.literal.data,
node->as.literal.len);
}
break;

case CMARK_NODE_STRONG:
Expand All @@ -250,15 +262,19 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
case CMARK_NODE_LINK:
if (entering) {
cmark_strbuf_puts(html, "<a href=\"");
houdini_escape_href(html, node->as.link.url.data,
node->as.link.url.len);
if (!((options & CMARK_OPT_SAFE) &&
scan_dangerous_url(&node->as.link.url, 0))) {
houdini_escape_href(html,
node->as.link.url.data,
node->as.link.url.len);

}
if (node->as.link.title.len) {
cmark_strbuf_puts(html, "\" title=\"");
escape_html(html, node->as.link.title.data,
node->as.link.title.len);
escape_html(html,
node->as.link.title.data,
node->as.link.title.len);
}

cmark_strbuf_puts(html, "\">");
} else {
cmark_strbuf_puts(html, "</a>");
Expand All @@ -268,9 +284,13 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
case CMARK_NODE_IMAGE:
if (entering) {
cmark_strbuf_puts(html, "<img src=\"");
houdini_escape_href(html, node->as.link.url.data,
node->as.link.url.len);
if (!((options & CMARK_OPT_SAFE) &&
scan_dangerous_url(&node->as.link.url, 0))) {
houdini_escape_href(html,
node->as.link.url.data,
node->as.link.url.len);

}
cmark_strbuf_puts(html, "\" alt=\"");
state->plain = node;
} else {
Expand Down
3 changes: 3 additions & 0 deletions src/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ void print_usage()
printf(" --width WIDTH Specify wrap width (default 0 = nowrap)\n");
printf(" --sourcepos Include source position attribute\n");
printf(" --hardbreaks Treat newlines as hard line breaks\n");
printf(" --safe Suppress raw HTML and dangerous URLs\n");
printf(" --smart Use smart punctuation\n");
printf(" --normalize Consolidate adjacent text nodes\n");
printf(" --help, -h Print usage information\n");
Expand Down Expand Up @@ -93,6 +94,8 @@ int main(int argc, char *argv[])
options |= CMARK_OPT_HARDBREAKS;
} else if (strcmp(argv[i], "--smart") == 0) {
options |= CMARK_OPT_SMART;
} else if (strcmp(argv[i], "--safe") == 0) {
options |= CMARK_OPT_SAFE;
} else if (strcmp(argv[i], "--normalize") == 0) {
options |= CMARK_OPT_NORMALIZE;
} else if (strcmp(argv[i], "--validate-utf8") == 0) {
Expand Down
Loading

0 comments on commit ac39623

Please sign in to comment.