From 376a55eb4449b67ce1797222ca2e8a516abfe6a1 Mon Sep 17 00:00:00 2001 From: Tatsunori Uchino Date: Mon, 19 Aug 2024 06:21:30 +0900 Subject: [PATCH 1/3] Ignore VS15 (suppressing rendering character as emoji) --- src/inlines.c | 4 +++- test/cjkemphasis.txt | 7 +++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/inlines.c b/src/inlines.c index 695bbd0c4..bbec18e43 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -444,8 +444,10 @@ static int scan_delims(subject *subj, unsigned char c, bool *can_open, if (len == -1) { before_char = 10; } - if (before_char >= 0xfe00 && before_char <= 0xfe02) { + if ((before_char >> 4) == 0xfe0 && ((before_char >= 0xfe00 && before_char <= 0xfe02) || before_char == 0xfe0e)) { // standard variation selector, go back one more code point: + // U+FE00..U+FE02: can follow a ideograph + // U+FE0E: forces the previous character to be rendered as not emoji but text (e.g. U+303D, U+3297) before_char_pos -= 1; while (peek_at(subj, before_char_pos) >> 6 == 2 && before_char_pos > 0) { diff --git a/test/cjkemphasis.txt b/test/cjkemphasis.txt index a0a04110d..c5bcc2b0b 100644 --- a/test/cjkemphasis.txt +++ b/test/cjkemphasis.txt @@ -624,3 +624,10 @@ Git**(注:不是GitHub)** ```````````````````````````````` +```````````````````````````````` example +〽︎**(庵点)**は、 +. +

〽︎(庵点)は、

+```````````````````````````````` + + From 60e884fc91b65a8d43bbcf5a9dfb1b6770101ecf Mon Sep 17 00:00:00 2001 From: Tatsunori Uchino Date: Mon, 19 Aug 2024 08:19:49 +0900 Subject: [PATCH 2/3] Add Yijing Hexagram Symbols to CJK reduces the number of product terms --- src/utf8.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/utf8.c b/src/utf8.c index 55758bf04..2c03b480f 100644 --- a/src/utf8.c +++ b/src/utf8.c @@ -455,8 +455,9 @@ int cmark_utf8proc_is_CJK(int32_t uc) { || (uc >= 0x31f0 && uc <= 0x31ff) // Katakana Phonetic Extensions || (uc >= 0x3200 && uc <= 0x32ff) // Enclosed CJK Letters & Months || (uc >= 0x3300 && uc <= 0x33ff) // CJK Compatibility - || (uc >= 0x3400 && */ uc <= 0x4dbf) // CJK Unified Ideographs Extension A - || (uc >= 0x4e00 && /* uc <= 0x9fff) // CJK Unified Ideographs + || (uc >= 0x3400 && uc <= 0x4dbf) // CJK Unified Ideographs Extension A + || (uc >= 0x4dc0 && uc <= 0x4dff) // Yijing Hexagram Symbols + || (uc >= 0x4e00 && uc <= 0x9fff) // CJK Unified Ideographs || (uc >= 0xa000 && uc <= 0xa48f) // Yi Syllables || (uc >= 0xa490 && */ uc <= 0xa4cf) // Yi Radicals || (uc >= 0xf900 && uc <= 0xfaff) // CJK Compatibility Ideographs From f01749f355d42e08751f10588af948f1d74a0bb3 Mon Sep 17 00:00:00 2001 From: Tatsunori Uchino Date: Mon, 19 Aug 2024 18:51:52 +0900 Subject: [PATCH 3/3] Fix Unicode block description --- src/utf8.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utf8.c b/src/utf8.c index 2c03b480f..740f81e6b 100644 --- a/src/utf8.c +++ b/src/utf8.c @@ -448,7 +448,7 @@ int cmark_utf8proc_is_CJK(int32_t uc) { || (uc >= 0x3040 && uc <= 0x309f) // Hiragana || (uc >= 0x30a0 && uc <= 0x30ff) // Katakana || (uc >= 0x3100 && uc <= 0x312f) // Bopomofo - || (uc >= 0x3130 && uc <= 0x318f) // Kanbun + || (uc >= 0x3130 && uc <= 0x318f) // Hangul Compatibility Jamo || (uc >= 0x3190 && uc <= 0x319f) // Kanbun || (uc >= 0x31a0 && uc <= 0x31bf) // Bopomofo Extended || (uc >= 0x31c0 && uc <= 0x31ef) // CJK Strokes