From f64691b85dff279a4bec5a0694ac3793e64946ab Mon Sep 17 00:00:00 2001 From: Ashe Connor Date: Wed, 17 Oct 2018 14:42:51 +1100 Subject: [PATCH] Default to safe operation (#123) * default to safe * fix setter test --- README.md | 17 +++++++++-------- api_test/main.c | 4 ++-- man/man3/cmark-gfm.3 | 35 ++++++++++++++++++----------------- src/cmark-gfm.h | 16 ++++++++-------- src/html.c | 8 ++++---- src/main.c | 6 +++--- test/cmark-fuzz.c | 2 +- test/cmark.py | 3 ++- 8 files changed, 47 insertions(+), 44 deletions(-) diff --git a/README.md b/README.md index 354c09a94..0b2975742 100644 --- a/README.md +++ b/README.md @@ -163,14 +163,15 @@ be found in the man pages in the `man` subdirectory. Security -------- -By default, the library will pass through raw HTML and potentially -dangerous links (`javascript:`, `vbscript:`, `data:`, `file:`). - -It is recommended that users either disable this potentially unsafe -feature by using the option `CMARK_OPT_SAFE` (or `--safe` with the -command-line program), or run the output through an HTML sanitizer -to protect against -[XSS attacks](http://en.wikipedia.org/wiki/Cross-site_scripting). +By default, the library will scrub raw HTML and potentially dangerous links +(`javascript:`, `vbscript:`, `data:`, `file:`). Please note this is the +_opposite_ of the upstream [`cmark`](https://github.com/CommonMark/cmark) +library, a change introduced in `cmark-gfm` in version `0.28.3.gfm.18`. + +To allow these, use the option `CMARK_OPT_UNSAFE` (or `--unsafe`) with the +command line program. If doing so, we recommend you use a HTML sanitizer +specific to your needs to protect against [XSS +attacks](http://en.wikipedia.org/wiki/Cross-site_scripting). Contributing ------------ diff --git a/api_test/main.c b/api_test/main.c index 92f78c877..246192954 100644 --- a/api_test/main.c +++ b/api_test/main.c @@ -178,7 +178,7 @@ static void accessors(test_batch_runner *runner) { OK(runner, cmark_node_set_literal(string, literal + sizeof("prefix")), "set_literal suffix"); - char *rendered_html = cmark_render_html(doc, CMARK_OPT_DEFAULT, NULL); + char *rendered_html = cmark_render_html(doc, CMARK_OPT_DEFAULT | CMARK_OPT_UNSAFE, NULL); static const char expected_html[] = "

Header

\n" "
    \n" @@ -910,7 +910,7 @@ static void test_safe(test_batch_runner *runner) { "a>\n[link](JAVAscript:alert('hi'))\n![image](" "file:my.js)\n"; char *html = cmark_markdown_to_html(raw_html, sizeof(raw_html) - 1, - CMARK_OPT_DEFAULT | CMARK_OPT_SAFE); + CMARK_OPT_DEFAULT); STR_EQ(runner, html, "\n

    hi\nlink\n\"image\""); } else if (renderer->filter_extensions) { filter_html_block(renderer, node->as.literal.data, node->as.literal.len); @@ -305,7 +305,7 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node, break; case CMARK_NODE_HTML_INLINE: - if (options & CMARK_OPT_SAFE) { + if (!(options & CMARK_OPT_UNSAFE)) { cmark_strbuf_puts(html, ""); } else { filtered = false; @@ -354,7 +354,7 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node, case CMARK_NODE_LINK: if (entering) { cmark_strbuf_puts(html, "as.link.url, 0))) { houdini_escape_href(html, node->as.link.url.data, node->as.link.url.len); @@ -372,7 +372,7 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node, case CMARK_NODE_IMAGE: if (entering) { cmark_strbuf_puts(html, "as.link.url, 0))) { houdini_escape_href(html, node->as.link.url.data, node->as.link.url.len); diff --git a/src/main.c b/src/main.c index 877735f6b..cb9610e1d 100644 --- a/src/main.c +++ b/src/main.c @@ -37,7 +37,7 @@ void print_usage() { printf(" --sourcepos Include source position attribute\n"); printf(" --hardbreaks Treat newlines as hard line breaks\n"); printf(" --nobreaks Render soft line breaks as spaces\n"); - printf(" --safe Suppress raw HTML and dangerous URLs\n"); + printf(" --unsafe Allow raw HTML and dangerous URLs\n"); printf(" --smart Use smart punctuation\n"); printf(" --validate-utf8 Replace UTF-8 invalid sequences with U+FFFD\n"); printf(" --github-pre-lang Use GitHub-style

     for code blocks\n");
    @@ -150,8 +150,8 @@ int main(int argc, char *argv[]) {
           options |= CMARK_OPT_SMART;
         } else if (strcmp(argv[i], "--github-pre-lang") == 0) {
           options |= CMARK_OPT_GITHUB_PRE_LANG;
    -    } else if (strcmp(argv[i], "--safe") == 0) {
    -      options |= CMARK_OPT_SAFE;
    +    } else if (strcmp(argv[i], "--unsafe") == 0) {
    +      options |= CMARK_OPT_UNSAFE;
         } else if (strcmp(argv[i], "--validate-utf8") == 0) {
           options |= CMARK_OPT_VALIDATE_UTF8;
         } else if (strcmp(argv[i], "--liberal-html-tag") == 0) {
    diff --git a/test/cmark-fuzz.c b/test/cmark-fuzz.c
    index 3a263efc9..be571d842 100644
    --- a/test/cmark-fuzz.c
    +++ b/test/cmark-fuzz.c
    @@ -9,7 +9,7 @@ int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
         int options = *(const int *)data;
     
         /* Mask off valid option bits */
    -    options = options & (CMARK_OPT_SOURCEPOS | CMARK_OPT_HARDBREAKS | CMARK_OPT_SAFE | CMARK_OPT_NOBREAKS | CMARK_OPT_NORMALIZE | CMARK_OPT_VALIDATE_UTF8 | CMARK_OPT_SMART);
    +    options = options & (CMARK_OPT_SOURCEPOS | CMARK_OPT_HARDBREAKS | CMARK_OPT_UNSAFE | CMARK_OPT_NOBREAKS | CMARK_OPT_NORMALIZE | CMARK_OPT_VALIDATE_UTF8 | CMARK_OPT_SMART);
     
         /* Remainder of input is the markdown */
         const char *markdown = (const char *)(data + sizeof(options));
    diff --git a/test/cmark.py b/test/cmark.py
    index c9f484b06..f9a2909a3 100644
    --- a/test/cmark.py
    +++ b/test/cmark.py
    @@ -57,7 +57,7 @@ def to_html(lib, extlib, text, extensions):
         render_html = lib.cmark_render_html
         render_html.restype = c_char_p
         render_html.argtypes = [c_void_p, c_int, c_void_p]
    -    result = render_html(document, 0, syntax_extensions).decode('utf-8')
    +    result = render_html(document, 1 << 17, syntax_extensions).decode('utf-8')
         return [0, result, '']
     
     def to_commonmark(lib, extlib, text, extensions):
    @@ -77,6 +77,7 @@ def __init__(self, prog=None, library_dir=None, extensions=None):
                 self.extensions = extensions.split()
     
             if prog:
    +            prog += ' --unsafe'
                 extsfun = lambda exts: ''.join([' -e ' + e for e in set(exts)])
                 self.to_html = lambda x, exts=[]: pipe_through_prog(prog + extsfun(exts + self.extensions), x)
                 self.to_commonmark = lambda x, exts=[]: pipe_through_prog(prog + ' -t commonmark' + extsfun(exts + self.extensions), x)