From f64691b85dff279a4bec5a0694ac3793e64946ab Mon Sep 17 00:00:00 2001
From: Ashe Connor <kivikakk@github.com>
Date: Wed, 17 Oct 2018 14:42:51 +1100
Subject: [PATCH] Default to safe operation (#123)

* default to safe

* fix setter test
---
 README.md            | 17 +++++++++--------
 api_test/main.c      |  4 ++--
 man/man3/cmark-gfm.3 | 35 ++++++++++++++++++-----------------
 src/cmark-gfm.h      | 16 ++++++++--------
 src/html.c           |  8 ++++----
 src/main.c           |  6 +++---
 test/cmark-fuzz.c    |  2 +-
 test/cmark.py        |  3 ++-
 8 files changed, 47 insertions(+), 44 deletions(-)
diff --git a/README.md b/README.md
index 354c09a94..0b2975742 100644
--- a/README.md
+++ b/README.md
@@ -163,14 +163,15 @@ be found in the man pages in the `man` subdirectory.
 Security
 --------
 
-By default, the library will pass through raw HTML and potentially
-dangerous links (`javascript:`, `vbscript:`, `data:`, `file:`).
-
-It is recommended that users either disable this potentially unsafe
-feature by using the option `CMARK_OPT_SAFE` (or `--safe` with the
-command-line program), or run the output through an HTML sanitizer
-to protect against
-[XSS attacks](http://en.wikipedia.org/wiki/Cross-site_scripting).
+By default, the library will scrub raw HTML and potentially dangerous links
+(`javascript:`, `vbscript:`, `data:`, `file:`). Please note this is the
+_opposite_ of the upstream [`cmark`](https://github.com/CommonMark/cmark)
+library, a change introduced in `cmark-gfm` in version `0.28.3.gfm.18`.
+
+To allow these, use the option `CMARK_OPT_UNSAFE` (or `--unsafe`) with the
+command line program. If doing so, we recommend you use a HTML sanitizer
+specific to your needs to protect against [XSS
+attacks](http://en.wikipedia.org/wiki/Cross-site_scripting).
 
 Contributing
 ------------
diff --git a/api_test/main.c b/api_test/main.c
index 92f78c877..246192954 100644
--- a/api_test/main.c
+++ b/api_test/main.c
@@ -178,7 +178,7 @@ static void accessors(test_batch_runner *runner) {
   OK(runner, cmark_node_set_literal(string, literal + sizeof("prefix")),
      "set_literal suffix");
 
-  char *rendered_html = cmark_render_html(doc, CMARK_OPT_DEFAULT, NULL);
+  char *rendered_html = cmark_render_html(doc, CMARK_OPT_DEFAULT | CMARK_OPT_UNSAFE, NULL);
   static const char expected_html[] =
       "<h3>Header</h3>\n"
       "<ol start=\"3\">\n"
@@ -910,7 +910,7 @@ static void test_safe(test_batch_runner *runner) {
                                  "a>\n[link](JAVAscript:alert('hi'))\n![image]("
                                  "file:my.js)\n";
   char *html = cmark_markdown_to_html(raw_html, sizeof(raw_html) - 1,
-                                      CMARK_OPT_DEFAULT | CMARK_OPT_SAFE);
+                                      CMARK_OPT_DEFAULT);
   STR_EQ(runner, html, "<!-- raw HTML omitted -->\n<p><!-- raw HTML omitted "
                        "-->hi<!-- raw HTML omitted -->\n<a "
                        "href=\"\">link</a>\n<img src=\"\" alt=\"image\" "
diff --git a/man/man3/cmark-gfm.3 b/man/man3/cmark-gfm.3
index 75749121f..f218abb1c 100644
--- a/man/man3/cmark-gfm.3
+++ b/man/man3/cmark-gfm.3
@@ -1,4 +1,4 @@
-.TH cmark-gfm 3 "September 17, 2018" "LOCAL" "Library Functions Manual"
+.TH cmark-gfm 3 "October 17, 2018" "LOCAL" "Library Functions Manual"
 .SH
 NAME
 .PP
@@ -852,22 +852,6 @@ Include a \f[C]data\-sourcepos\f[] attribute on all block elements.
 .PP
 Render \f[C]softbreak\f[] elements as hard line breaks.
 
-.PP
-.nf
-\fC
-.RS 0n
-#define CMARK_OPT_SAFE (1 << 3)
-.RE
-\f[]
-.fi
-
-.PP
-Suppress raw HTML and unsafe links (\f[C]javascript:\f[],
-\f[C]vbscript:\f[], \f[C]file:\f[], and \f[C]data:\f[], except for
-\f[C]image/png\f[], \f[C]image/gif\f[], \f[C]image/jpeg\f[], or
-\f[C]image/webp\f[] mime types). Raw HTML is replaced by a placeholder
-HTML comment. Unsafe links are replaced by empty strings.
-
 .PP
 .nf
 \fC
@@ -995,6 +979,23 @@ Use style attributes to align table cells instead of align attributes.
 Include the remainder of the info string in code blocks in a separate
 attribute.
 
+.PP
+.nf
+\fC
+.RS 0n
+#define CMARK_OPT_UNSAFE (1 << 17)
+.RE
+\f[]
+.fi
+
+.PP
+Allow raw HTML and unsafe links, \f[C]javascript:\f[],
+\f[C]vbscript:\f[], \f[C]file:\f[], and all \f[C]data:\f[] URLs \-\- by
+default, only \f[C]image/png\f[], \f[C]image/gif\f[],
+\f[C]image/jpeg\f[], or \f[C]image/webp\f[] mime types are allowed.
+Without this option, raw HTML is replaced by a placeholder HTML comment,
+and unsafe links are replaced by empty strings.
+
 .SS
 Version information
 
diff --git a/src/cmark-gfm.h b/src/cmark-gfm.h
index 02a1e71c0..6baac9a8c 100644
--- a/src/cmark-gfm.h
+++ b/src/cmark-gfm.h
@@ -690,14 +690,6 @@ char *cmark_render_latex_with_mem(cmark_node *root, int options, int width, cmar
  */
 #define CMARK_OPT_HARDBREAKS (1 << 2)
 
-/** Suppress raw HTML and unsafe links (`javascript:`, `vbscript:`,
- * `file:`, and `data:`, except for `image/png`, `image/gif`,
- * `image/jpeg`, or `image/webp` mime types).  Raw HTML is replaced
- * by a placeholder HTML comment. Unsafe links are replaced by
- * empty strings.
- */
-#define CMARK_OPT_SAFE (1 << 3)
-
 /** Render `softbreak` elements as spaces.
  */
 #define CMARK_OPT_NOBREAKS (1 << 4)
@@ -746,6 +738,14 @@ char *cmark_render_latex_with_mem(cmark_node *root, int options, int width, cmar
  */
 #define CMARK_OPT_FULL_INFO_STRING (1 << 16)
 
+/** Allow raw HTML and unsafe links, `javascript:`, `vbscript:`, `file:`, and
+ * all `data:` URLs -- by default, only `image/png`, `image/gif`, `image/jpeg`,
+ * or `image/webp` mime types are allowed. Without this option, raw HTML is
+ * replaced by a placeholder HTML comment, and unsafe links are replaced by
+ * empty strings.
+ */
+#define CMARK_OPT_UNSAFE (1 << 17)
+
 /**
  * ## Version information
  */
diff --git a/src/html.c b/src/html.c
index 7a34f7e91..d4d6cf13e 100644
--- a/src/html.c
+++ b/src/html.c
@@ -227,7 +227,7 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node,
 
   case CMARK_NODE_HTML_BLOCK:
     cmark_html_render_cr(html);
-    if (options & CMARK_OPT_SAFE) {
+    if (!(options & CMARK_OPT_UNSAFE)) {
       cmark_strbuf_puts(html, "<!-- raw HTML omitted -->");
     } else if (renderer->filter_extensions) {
       filter_html_block(renderer, node->as.literal.data, node->as.literal.len);
@@ -305,7 +305,7 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node,
     break;
 
   case CMARK_NODE_HTML_INLINE:
-    if (options & CMARK_OPT_SAFE) {
+    if (!(options & CMARK_OPT_UNSAFE)) {
       cmark_strbuf_puts(html, "<!-- raw HTML omitted -->");
     } else {
       filtered = false;
@@ -354,7 +354,7 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node,
   case CMARK_NODE_LINK:
     if (entering) {
       cmark_strbuf_puts(html, "<a href=\"");
-      if (!((options & CMARK_OPT_SAFE) &&
+      if (!(!(options & CMARK_OPT_UNSAFE) &&
             scan_dangerous_url(&node->as.link.url, 0))) {
         houdini_escape_href(html, node->as.link.url.data,
                             node->as.link.url.len);
@@ -372,7 +372,7 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node,
   case CMARK_NODE_IMAGE:
     if (entering) {
       cmark_strbuf_puts(html, "<img src=\"");
-      if (!((options & CMARK_OPT_SAFE) &&
+      if (!(!(options & CMARK_OPT_UNSAFE) &&
             scan_dangerous_url(&node->as.link.url, 0))) {
         houdini_escape_href(html, node->as.link.url.data,
                             node->as.link.url.len);
diff --git a/src/main.c b/src/main.c
index 877735f6b..cb9610e1d 100644
--- a/src/main.c
+++ b/src/main.c
@@ -37,7 +37,7 @@ void print_usage() {
   printf("  --sourcepos       Include source position attribute\n");
   printf("  --hardbreaks      Treat newlines as hard line breaks\n");
   printf("  --nobreaks        Render soft line breaks as spaces\n");
-  printf("  --safe            Suppress raw HTML and dangerous URLs\n");
+  printf("  --unsafe          Allow raw HTML and dangerous URLs\n");
   printf("  --smart           Use smart punctuation\n");
   printf("  --validate-utf8   Replace UTF-8 invalid sequences with U+FFFD\n");
   printf("  --github-pre-lang Use GitHub-style <pre lang> for code blocks\n");
@@ -150,8 +150,8 @@ int main(int argc, char *argv[]) {
       options |= CMARK_OPT_SMART;
     } else if (strcmp(argv[i], "--github-pre-lang") == 0) {
       options |= CMARK_OPT_GITHUB_PRE_LANG;
-    } else if (strcmp(argv[i], "--safe") == 0) {
-      options |= CMARK_OPT_SAFE;
+    } else if (strcmp(argv[i], "--unsafe") == 0) {
+      options |= CMARK_OPT_UNSAFE;
     } else if (strcmp(argv[i], "--validate-utf8") == 0) {
       options |= CMARK_OPT_VALIDATE_UTF8;
     } else if (strcmp(argv[i], "--liberal-html-tag") == 0) {
diff --git a/test/cmark-fuzz.c b/test/cmark-fuzz.c
index 3a263efc9..be571d842 100644
--- a/test/cmark-fuzz.c
+++ b/test/cmark-fuzz.c
@@ -9,7 +9,7 @@ int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
     int options = *(const int *)data;
 
     /* Mask off valid option bits */
-    options = options & (CMARK_OPT_SOURCEPOS | CMARK_OPT_HARDBREAKS | CMARK_OPT_SAFE | CMARK_OPT_NOBREAKS | CMARK_OPT_NORMALIZE | CMARK_OPT_VALIDATE_UTF8 | CMARK_OPT_SMART);
+    options = options & (CMARK_OPT_SOURCEPOS | CMARK_OPT_HARDBREAKS | CMARK_OPT_UNSAFE | CMARK_OPT_NOBREAKS | CMARK_OPT_NORMALIZE | CMARK_OPT_VALIDATE_UTF8 | CMARK_OPT_SMART);
 
     /* Remainder of input is the markdown */
     const char *markdown = (const char *)(data + sizeof(options));
diff --git a/test/cmark.py b/test/cmark.py
index c9f484b06..f9a2909a3 100644
--- a/test/cmark.py
+++ b/test/cmark.py
@@ -57,7 +57,7 @@ def to_html(lib, extlib, text, extensions):
     render_html = lib.cmark_render_html
     render_html.restype = c_char_p
     render_html.argtypes = [c_void_p, c_int, c_void_p]
-    result = render_html(document, 0, syntax_extensions).decode('utf-8')
+    result = render_html(document, 1 << 17, syntax_extensions).decode('utf-8')
     return [0, result, '']
 
 def to_commonmark(lib, extlib, text, extensions):
@@ -77,6 +77,7 @@ def __init__(self, prog=None, library_dir=None, extensions=None):
             self.extensions = extensions.split()
 
         if prog:
+            prog += ' --unsafe'
             extsfun = lambda exts: ''.join([' -e ' + e for e in set(exts)])
             self.to_html = lambda x, exts=[]: pipe_through_prog(prog + extsfun(exts + self.extensions), x)
             self.to_commonmark = lambda x, exts=[]: pipe_through_prog(prog + ' -t commonmark' + extsfun(exts + self.extensions), x)