From aa6708c05e1403e397c46a4266b74bb5ded1ef74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Breitbart?= Date: Fri, 28 Sep 2018 22:20:42 +0200 Subject: [PATCH 1/7] initial version of multiple unicode version support --- demo/client.ts | 7 +- src/Buffer.test.ts | 2 +- src/InputHandler.ts | 4 +- src/Linkifier.ts | 6 +- src/Terminal.ts | 7 +- src/Types.ts | 18 ++ src/UnicodeProvider.ts | 147 ++++++++++ src/unicode/v11.ts | 555 ++++++++++++++++++++++++++++++++++++ src/unicode/v6.ts | 175 ++++++++++++ src/utils/TestUtils.test.ts | 5 +- 10 files changed, 917 insertions(+), 9 deletions(-) create mode 100644 src/UnicodeProvider.ts create mode 100644 src/unicode/v11.ts create mode 100644 src/unicode/v6.ts diff --git a/demo/client.ts b/demo/client.ts index 91cebf07cd..c20e210a3c 100644 --- a/demo/client.ts +++ b/demo/client.ts @@ -208,7 +208,8 @@ function initOptions(term: TerminalType): void { fontFamily: null, fontWeight: ['normal', 'bold', '100', '200', '300', '400', '500', '600', '700', '800', '900'], fontWeightBold: ['normal', 'bold', '100', '200', '300', '400', '500', '600', '700', '800', '900'], - rendererType: ['dom', 'canvas'] + rendererType: ['dom', 'canvas'], + unicodeVersion: (term as any)._core.unicodeProvider.registeredVersions() }; const options = Object.keys((term)._core.options); const booleanOptions = []; @@ -219,7 +220,9 @@ function initOptions(term: TerminalType): void { booleanOptions.push(o); break; case 'number': - numberOptions.push(o); + if (o !== 'unicodeVersion') { + numberOptions.push(o); + } break; default: if (Object.keys(stringOptions).indexOf(o) === -1) { diff --git a/src/Buffer.test.ts b/src/Buffer.test.ts index 0dcdd02dfa..d31c980c51 100644 --- a/src/Buffer.test.ts +++ b/src/Buffer.test.ts @@ -487,7 +487,7 @@ describe('Buffer', () => { }); it('fullwidth combining with emoji - match emoji cell', () => { - const input = 'Lots of ¥\u0301 make me 😃.'; + const input = 'Lots of ¥\u0301 make me very 😃.'; terminal.writeSync(input); const s = terminal.buffer.iterator(true).next().content; assert.equal(input, s); diff --git a/src/InputHandler.ts b/src/InputHandler.ts index ad5aad3906..fcccef8ec6 100644 --- a/src/InputHandler.ts +++ b/src/InputHandler.ts @@ -9,7 +9,6 @@ import { C0, C1 } from './common/data/EscapeSequences'; import { CHARSETS, DEFAULT_CHARSET } from './core/data/Charsets'; import { CHAR_DATA_CHAR_INDEX, CHAR_DATA_WIDTH_INDEX, CHAR_DATA_CODE_INDEX, DEFAULT_ATTR, NULL_CELL_CHAR, NULL_CELL_WIDTH, NULL_CELL_CODE } from './Buffer'; import { FLAGS } from './renderer/Types'; -import { wcwidth } from './CharWidth'; import { EscapeSequenceParser } from './EscapeSequenceParser'; import { ICharset } from './core/Types'; import { Disposable } from './common/Lifecycle'; @@ -364,7 +363,8 @@ export class InputHandler extends Disposable implements IInputHandler { // calculate print space // expensive call, therefore we save width in line buffer - chWidth = wcwidth(code); + // chWidth = wcwidth(code); + chWidth = this._terminal.unicodeProvider.wcwidth(code); // get charset replacement character if (charset) { diff --git a/src/Linkifier.ts b/src/Linkifier.ts index 2ec3ed400d..13c737b8fa 100644 --- a/src/Linkifier.ts +++ b/src/Linkifier.ts @@ -8,7 +8,6 @@ import { ILinkHoverEvent, ILinkMatcher, LinkMatcherHandler, LinkHoverEventTypes, import { MouseZone } from './ui/MouseZoneManager'; import { EventEmitter } from './common/EventEmitter'; import { CHAR_DATA_ATTR_INDEX } from './Buffer'; -import { getStringCellWidth } from './CharWidth'; /** * The Linkifier applies links to rows shortly after they have been refreshed. @@ -256,7 +255,10 @@ export class Linkifier extends EventEmitter implements ILinkifier { * @param fg The link color for hover event. */ private _addLink(x: number, y: number, uri: string, matcher: ILinkMatcher, fg: number): void { - const width = getStringCellWidth(uri); + // FIXME: to make runtime changes of the unicode version possible + // this may not rely on getStringCellWidth anymore + // instead sum widths saved in the buffer + const width = (this._terminal as any).unicodeProvider.getStringCellWidth(uri); const x1 = x % this._terminal.cols; const y1 = y + Math.floor(x / this._terminal.cols); let x2 = (x1 + width) % this._terminal.cols; diff --git a/src/Terminal.ts b/src/Terminal.ts index ef9cb9ab68..c0915313b2 100644 --- a/src/Terminal.ts +++ b/src/Terminal.ts @@ -53,6 +53,7 @@ import { IKeyboardEvent } from './common/Types'; import { evaluateKeyboardEvent } from './core/input/Keyboard'; import { KeyboardResultType, ICharset } from './core/Types'; import { BufferLine } from './BufferLine'; +import { UnicodeProvider } from './UnicodeProvider'; // Let it work inside Node.js for automated testing purposes. const document = (typeof window !== 'undefined') ? window.document : null; @@ -106,7 +107,8 @@ const DEFAULT_OPTIONS: ITerminalOptions = { tabStopWidth: 8, theme: null, rightClickSelectsWord: Browser.isMac, - rendererType: 'canvas' + rendererType: 'canvas', + unicodeVersion: 11 }; export class Terminal extends EventEmitter implements ITerminal, IDisposable, IInputHandlingTerminal { @@ -194,6 +196,7 @@ export class Terminal extends EventEmitter implements ITerminal, IDisposable, II private _userScrolling: boolean; private _inputHandler: InputHandler; + public unicodeProvider: UnicodeProvider; public soundManager: SoundManager; public renderer: IRenderer; public selectionManager: SelectionManager; @@ -300,6 +303,7 @@ export class Terminal extends EventEmitter implements ITerminal, IDisposable, II // this._writeStopped = false; this._userScrolling = false; + this.unicodeProvider = new UnicodeProvider(); this._inputHandler = new InputHandler(this); this.register(this._inputHandler); // Reuse renderer if the Terminal is being recreated via a reset call. @@ -493,6 +497,7 @@ export class Terminal extends EventEmitter implements ITerminal, IDisposable, II } break; case 'tabStopWidth': this.buffers.setupTabStops(); break; + case 'unicodeVersion': this.unicodeProvider.setActiveVersion(parseFloat(value)); break; } // Inform renderer of changes if (this.renderer) { diff --git a/src/Types.ts b/src/Types.ts index 40b3bd0259..b76ab41b08 100644 --- a/src/Types.ts +++ b/src/Types.ts @@ -87,6 +87,7 @@ export interface IInputHandlingTerminal extends IEventEmitter { handleTitle(title: string): void; index(): void; reverseIndex(): void; + unicodeProvider: IUnicodeProvider; } export interface IViewport extends IDisposable { @@ -523,3 +524,20 @@ export interface IBufferLine { deleteCells(pos: number, n: number, fill: CharData): void; replaceCells(start: number, end: number, fill: CharData): void; } + +/** + * Interface for unicode version implementations. + */ +export interface IUnicodeImplementation { + version: number; + wcwidth(ucs: number): number; +} + +export interface IUnicodeProvider { + onRegister(callback: (version: number, provider: IUnicodeProvider) => void): void; + registeredVersions(): number[]; + getActiveVersion(): number; + setActiveVersion(version: number, mode?: 'exact' | 'closest' | 'next' | 'previous'): number; + wcwidth(ucs: number): number; + getStringCellWidth(s: string): number; +} diff --git a/src/UnicodeProvider.ts b/src/UnicodeProvider.ts new file mode 100644 index 0000000000..e0a2ce13d5 --- /dev/null +++ b/src/UnicodeProvider.ts @@ -0,0 +1,147 @@ +/** + * Copyright (c) 2018 The xterm.js authors. All rights reserved. + * @license MIT + */ +import { IUnicodeImplementation, IUnicodeProvider } from './Types'; +import { v6 } from './unicode/v6'; +import { v11 } from './unicode/v11'; + +/** + * Class to provide access to different unicode version implementations. + * + * The version related implementations are stored statically + * to avoid recreating them for every single instance. + * + * An instance of this class is meant to serve unicode specific implementations + * for a single terminal instance. This way multiple terminals can have + * different unicode settings active while still referring to the + * same underlying implementations. + */ +export class UnicodeProvider implements IUnicodeProvider { + static versions: {[key: string]: IUnicodeImplementation} = {}; + private static _registerCallbacks: ((version: number) => void)[] = []; + + static onRegister(callback: (version: number) => void): void { + UnicodeProvider._registerCallbacks.push(callback); + } + + /** + * Register an unicode implementation. + * Possible entry point for unicode addons. + * In conjuction with `onRegister` it can be used + * to load implementations lazy. + */ + static registerVersion(impl: IUnicodeImplementation): void { + if (UnicodeProvider.versions[impl.version]) { + throw new Error(`unicode version "${impl.version}" already registered`); + } + UnicodeProvider.versions[impl.version] = impl; + UnicodeProvider._registerCallbacks.forEach(cb => cb(impl.version)); + } + + static registeredVersions(): number[] { + return Object.getOwnPropertyNames(UnicodeProvider.versions).map(parseFloat).sort((a, b) => a - b); + } + + private _version: number; + public wcwidth: (ucs: number) => number; + + // defaults to the highest available version + constructor(version: number = 20) { + this.setActiveVersion(version); + } + + /** + * Callback to run when a version got registered. + * Gets the newly registered version and + * the `UnicodeProvider` instance as arguments. + */ + public onRegister(callback: (version: number, provider: UnicodeProvider) => void): void { + UnicodeProvider.onRegister((version) => callback(version, this)); + } + + /** + * Get a list of currently registered unicode versions. + */ + public registeredVersions(): number[] { + return Object.getOwnPropertyNames(UnicodeProvider.versions).map(parseFloat).sort((a, b) => a - b); + } + + /** + * Get the currently active unicode version. + */ + public getActiveVersion(): number { + return this._version; + } + + /** + * Activate a registered unicode version. By default the closest version will be activated + * (can be higher or lower). Setting `mode` to 'next' tries to get at least that version, + * 'previous' tries to get the closest lower version. + * Unless there is no version registered this method will always succeed. + * Returns the activated version number. + */ + public setActiveVersion(version: number, mode?: 'exact' | 'closest' | 'next' | 'previous'): number { + if (!this.registeredVersions().length) { + throw new Error('no unicode versions registered'); + } + + // find closest matching version + // Although not quite correct for typical versioning schemes 5.9 is treated closer to 6.0 than to 5.7. + // Typically we will not ship subversions so this approximation should be close enough. + const versions = this.registeredVersions(); + const distances = versions.map(el => Math.abs(version - el)); + const closestIndex = distances.reduce((iMin, x, i, arr) => x < arr[iMin] ? i : iMin, 0); + let newVersion = versions[closestIndex]; + + if (mode === 'exact') { + // exact version match requested + if (version !== newVersion) { + throw new Error(`unicode version "${version}" not registered`); + } + } else { + // take the higher one if available + if (mode === 'next') { + if (newVersion < version && closestIndex < versions.length - 1) { + newVersion = versions[closestIndex + 1]; + } + // take the lower one if available + } else if (mode === 'previous') { + if (newVersion > version && closestIndex) { + newVersion = versions[closestIndex - 1]; + } + } + } + + // swap wcwidth impl + this.wcwidth = UnicodeProvider.versions[newVersion].wcwidth; + this._version = newVersion; + return this._version; + } + + /** + * Get the terminal cell width for a string. + */ + public getStringCellWidth(s: string): number { + let result = 0; + for (let i = 0; i < s.length; ++i) { + let code = s.charCodeAt(i); + if (0xD800 <= code && code <= 0xDBFF) { + const low = s.charCodeAt(i + 1); + if (isNaN(low)) { + return result; + } + code = ((code - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000; + } + if (0xDC00 <= code && code <= 0xDFFF) { + continue; + } + result += this.wcwidth(code); + } + return result; + } +} + +// register statically shipped versions +UnicodeProvider.registerVersion(v6); +UnicodeProvider.registerVersion(v11); diff --git a/src/unicode/v11.ts b/src/unicode/v11.ts new file mode 100644 index 0000000000..dd710a93c3 --- /dev/null +++ b/src/unicode/v11.ts @@ -0,0 +1,555 @@ +/** + * Copyright (c) 2018 The xterm.js authors. All rights reserved. + * @license MIT + */ +import { IUnicodeImplementation } from '../Types'; + +export const v11: IUnicodeImplementation = { + version: 11, + wcwidth: (function(opts: {nul: number, control: number}): (ucs: number) => number { + // Generated: 2018-09-24T16:45:44.483077 + // Source: DerivedGeneralCategory-11.0.0.txt + // Date: 2018-02-21, 05:34:04 GMT + const ZERO_WIDTH = [ + [0x0300, 0x036f], // Combining Grave Accent ..Combining Latin Small Le + [0x0483, 0x0489], // Combining Cyrillic Titlo..Combining Cyrillic Milli + [0x0591, 0x05bd], // Hebrew Accent Etnahta ..Hebrew Point Meteg + [0x05bf, 0x05bf], // Hebrew Point Rafe ..Hebrew Point Rafe + [0x05c1, 0x05c2], // Hebrew Point Shin Dot ..Hebrew Point Sin Dot + [0x05c4, 0x05c5], // Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot + [0x05c7, 0x05c7], // Hebrew Point Qamats Qata..Hebrew Point Qamats Qata + [0x0610, 0x061a], // Arabic Sign Sallallahou ..Arabic Small Kasra + [0x064b, 0x065f], // Arabic Fathatan .. + [0x0670, 0x0670], // Arabic Letter Superscrip..Arabic Letter Superscrip + [0x06d6, 0x06dc], // Arabic Small High Ligatu..Arabic Small High Seen + [0x06df, 0x06e4], // Arabic Small High Rounde..Arabic Small High Madda + [0x06e7, 0x06e8], // Arabic Small High Yeh ..Arabic Small High Noon + [0x06ea, 0x06ed], // Arabic Empty Centre Low ..Arabic Small Low Meem + [0x0711, 0x0711], // Syriac Letter Superscrip..Syriac Letter Superscrip + [0x0730, 0x074a], // Syriac Pthaha Above ..Syriac Barrekh + [0x07a6, 0x07b0], // Thaana Abafili ..Thaana Sukun + [0x07eb, 0x07f3], // Nko Combining Short High..Nko Combining Double Dot + [0x07fd, 0x07fd], // (nil) .. + [0x0816, 0x0819], // Samaritan Mark In ..Samaritan Mark Dagesh + [0x081b, 0x0823], // Samaritan Mark Epentheti..Samaritan Vowel Sign A + [0x0825, 0x0827], // Samaritan Vowel Sign Sho..Samaritan Vowel Sign U + [0x0829, 0x082d], // Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa + [0x0859, 0x085b], // (nil) .. + [0x08d3, 0x08e1], // (nil) .. + [0x08e3, 0x0902], // (nil) ..Devanagari Sign Anusvara + [0x093a, 0x093a], // (nil) .. + [0x093c, 0x093c], // Devanagari Sign Nukta ..Devanagari Sign Nukta + [0x0941, 0x0948], // Devanagari Vowel Sign U ..Devanagari Vowel Sign Ai + [0x094d, 0x094d], // Devanagari Sign Virama ..Devanagari Sign Virama + [0x0951, 0x0957], // Devanagari Stress Sign U.. + [0x0962, 0x0963], // Devanagari Vowel Sign Vo..Devanagari Vowel Sign Vo + [0x0981, 0x0981], // Bengali Sign Candrabindu..Bengali Sign Candrabindu + [0x09bc, 0x09bc], // Bengali Sign Nukta ..Bengali Sign Nukta + [0x09c1, 0x09c4], // Bengali Vowel Sign U ..Bengali Vowel Sign Vocal + [0x09cd, 0x09cd], // Bengali Sign Virama ..Bengali Sign Virama + [0x09e2, 0x09e3], // Bengali Vowel Sign Vocal..Bengali Vowel Sign Vocal + [0x09fe, 0x09fe], // (nil) .. + [0x0a01, 0x0a02], // Gurmukhi Sign Adak Bindi..Gurmukhi Sign Bindi + [0x0a3c, 0x0a3c], // Gurmukhi Sign Nukta ..Gurmukhi Sign Nukta + [0x0a41, 0x0a42], // Gurmukhi Vowel Sign U ..Gurmukhi Vowel Sign Uu + [0x0a47, 0x0a48], // Gurmukhi Vowel Sign Ee ..Gurmukhi Vowel Sign Ai + [0x0a4b, 0x0a4d], // Gurmukhi Vowel Sign Oo ..Gurmukhi Sign Virama + [0x0a51, 0x0a51], // Gurmukhi Sign Udaat ..Gurmukhi Sign Udaat + [0x0a70, 0x0a71], // Gurmukhi Tippi ..Gurmukhi Addak + [0x0a75, 0x0a75], // Gurmukhi Sign Yakash ..Gurmukhi Sign Yakash + [0x0a81, 0x0a82], // Gujarati Sign Candrabind..Gujarati Sign Anusvara + [0x0abc, 0x0abc], // Gujarati Sign Nukta ..Gujarati Sign Nukta + [0x0ac1, 0x0ac5], // Gujarati Vowel Sign U ..Gujarati Vowel Sign Cand + [0x0ac7, 0x0ac8], // Gujarati Vowel Sign E ..Gujarati Vowel Sign Ai + [0x0acd, 0x0acd], // Gujarati Sign Virama ..Gujarati Sign Virama + [0x0ae2, 0x0ae3], // Gujarati Vowel Sign Voca..Gujarati Vowel Sign Voca + [0x0afa, 0x0aff], // (nil) .. + [0x0b01, 0x0b01], // Oriya Sign Candrabindu ..Oriya Sign Candrabindu + [0x0b3c, 0x0b3c], // Oriya Sign Nukta ..Oriya Sign Nukta + [0x0b3f, 0x0b3f], // Oriya Vowel Sign I ..Oriya Vowel Sign I + [0x0b41, 0x0b44], // Oriya Vowel Sign U ..Oriya Vowel Sign Vocalic + [0x0b4d, 0x0b4d], // Oriya Sign Virama ..Oriya Sign Virama + [0x0b56, 0x0b56], // Oriya Ai Length Mark ..Oriya Ai Length Mark + [0x0b62, 0x0b63], // Oriya Vowel Sign Vocalic..Oriya Vowel Sign Vocalic + [0x0b82, 0x0b82], // Tamil Sign Anusvara ..Tamil Sign Anusvara + [0x0bc0, 0x0bc0], // Tamil Vowel Sign Ii ..Tamil Vowel Sign Ii + [0x0bcd, 0x0bcd], // Tamil Sign Virama ..Tamil Sign Virama + [0x0c00, 0x0c00], // (nil) .. + [0x0c04, 0x0c04], // (nil) .. + [0x0c3e, 0x0c40], // Telugu Vowel Sign Aa ..Telugu Vowel Sign Ii + [0x0c46, 0x0c48], // Telugu Vowel Sign E ..Telugu Vowel Sign Ai + [0x0c4a, 0x0c4d], // Telugu Vowel Sign O ..Telugu Sign Virama + [0x0c55, 0x0c56], // Telugu Length Mark ..Telugu Ai Length Mark + [0x0c62, 0x0c63], // Telugu Vowel Sign Vocali..Telugu Vowel Sign Vocali + [0x0c81, 0x0c81], // (nil) .. + [0x0cbc, 0x0cbc], // Kannada Sign Nukta ..Kannada Sign Nukta + [0x0cbf, 0x0cbf], // Kannada Vowel Sign I ..Kannada Vowel Sign I + [0x0cc6, 0x0cc6], // Kannada Vowel Sign E ..Kannada Vowel Sign E + [0x0ccc, 0x0ccd], // Kannada Vowel Sign Au ..Kannada Sign Virama + [0x0ce2, 0x0ce3], // Kannada Vowel Sign Vocal..Kannada Vowel Sign Vocal + [0x0d00, 0x0d01], // (nil) .. + [0x0d3b, 0x0d3c], // (nil) .. + [0x0d41, 0x0d44], // Malayalam Vowel Sign U ..Malayalam Vowel Sign Voc + [0x0d4d, 0x0d4d], // Malayalam Sign Virama ..Malayalam Sign Virama + [0x0d62, 0x0d63], // Malayalam Vowel Sign Voc..Malayalam Vowel Sign Voc + [0x0dca, 0x0dca], // Sinhala Sign Al-lakuna ..Sinhala Sign Al-lakuna + [0x0dd2, 0x0dd4], // Sinhala Vowel Sign Ketti..Sinhala Vowel Sign Ketti + [0x0dd6, 0x0dd6], // Sinhala Vowel Sign Diga ..Sinhala Vowel Sign Diga + [0x0e31, 0x0e31], // Thai Character Mai Han-a..Thai Character Mai Han-a + [0x0e34, 0x0e3a], // Thai Character Sara I ..Thai Character Phinthu + [0x0e47, 0x0e4e], // Thai Character Maitaikhu..Thai Character Yamakkan + [0x0eb1, 0x0eb1], // Lao Vowel Sign Mai Kan ..Lao Vowel Sign Mai Kan + [0x0eb4, 0x0eb9], // Lao Vowel Sign I ..Lao Vowel Sign Uu + [0x0ebb, 0x0ebc], // Lao Vowel Sign Mai Kon ..Lao Semivowel Sign Lo + [0x0ec8, 0x0ecd], // Lao Tone Mai Ek ..Lao Niggahita + [0x0f18, 0x0f19], // Tibetan Astrological Sig..Tibetan Astrological Sig + [0x0f35, 0x0f35], // Tibetan Mark Ngas Bzung ..Tibetan Mark Ngas Bzung + [0x0f37, 0x0f37], // Tibetan Mark Ngas Bzung ..Tibetan Mark Ngas Bzung + [0x0f39, 0x0f39], // Tibetan Mark Tsa -phru ..Tibetan Mark Tsa -phru + [0x0f71, 0x0f7e], // Tibetan Vowel Sign Aa ..Tibetan Sign Rjes Su Nga + [0x0f80, 0x0f84], // Tibetan Vowel Sign Rever..Tibetan Mark Halanta + [0x0f86, 0x0f87], // Tibetan Sign Lci Rtags ..Tibetan Sign Yang Rtags + [0x0f8d, 0x0f97], // (nil) ..Tibetan Subjoined Letter + [0x0f99, 0x0fbc], // Tibetan Subjoined Letter..Tibetan Subjoined Letter + [0x0fc6, 0x0fc6], // Tibetan Symbol Padma Gda..Tibetan Symbol Padma Gda + [0x102d, 0x1030], // Myanmar Vowel Sign I ..Myanmar Vowel Sign Uu + [0x1032, 0x1037], // Myanmar Vowel Sign Ai ..Myanmar Sign Dot Below + [0x1039, 0x103a], // Myanmar Sign Virama ..Myanmar Sign Asat + [0x103d, 0x103e], // Myanmar Consonant Sign M..Myanmar Consonant Sign M + [0x1058, 0x1059], // Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal + [0x105e, 0x1060], // Myanmar Consonant Sign M..Myanmar Consonant Sign M + [0x1071, 0x1074], // Myanmar Vowel Sign Geba ..Myanmar Vowel Sign Kayah + [0x1082, 0x1082], // Myanmar Consonant Sign S..Myanmar Consonant Sign S + [0x1085, 0x1086], // Myanmar Vowel Sign Shan ..Myanmar Vowel Sign Shan + [0x108d, 0x108d], // Myanmar Sign Shan Counci..Myanmar Sign Shan Counci + [0x109d, 0x109d], // Myanmar Vowel Sign Aiton..Myanmar Vowel Sign Aiton + [0x135d, 0x135f], // (nil) ..Ethiopic Combining Gemin + [0x1712, 0x1714], // Tagalog Vowel Sign I ..Tagalog Sign Virama + [0x1732, 0x1734], // Hanunoo Vowel Sign I ..Hanunoo Sign Pamudpod + [0x1752, 0x1753], // Buhid Vowel Sign I ..Buhid Vowel Sign U + [0x1772, 0x1773], // Tagbanwa Vowel Sign I ..Tagbanwa Vowel Sign U + [0x17b4, 0x17b5], // Khmer Vowel Inherent Aq ..Khmer Vowel Inherent Aa + [0x17b7, 0x17bd], // Khmer Vowel Sign I ..Khmer Vowel Sign Ua + [0x17c6, 0x17c6], // Khmer Sign Nikahit ..Khmer Sign Nikahit + [0x17c9, 0x17d3], // Khmer Sign Muusikatoan ..Khmer Sign Bathamasat + [0x17dd, 0x17dd], // Khmer Sign Atthacan ..Khmer Sign Atthacan + [0x180b, 0x180d], // Mongolian Free Variation..Mongolian Free Variation + [0x1885, 0x1886], // Mongolian Letter Ali Gal..Mongolian Letter Ali Gal + [0x18a9, 0x18a9], // Mongolian Letter Ali Gal..Mongolian Letter Ali Gal + [0x1920, 0x1922], // Limbu Vowel Sign A ..Limbu Vowel Sign U + [0x1927, 0x1928], // Limbu Vowel Sign E ..Limbu Vowel Sign O + [0x1932, 0x1932], // Limbu Small Letter Anusv..Limbu Small Letter Anusv + [0x1939, 0x193b], // Limbu Sign Mukphreng ..Limbu Sign Sa-i + [0x1a17, 0x1a18], // Buginese Vowel Sign I ..Buginese Vowel Sign U + [0x1a1b, 0x1a1b], // Buginese Vowel Sign Ae ..Buginese Vowel Sign Ae + [0x1a56, 0x1a56], // Tai Tham Consonant Sign ..Tai Tham Consonant Sign + [0x1a58, 0x1a5e], // Tai Tham Sign Mai Kang L..Tai Tham Consonant Sign + [0x1a60, 0x1a60], // Tai Tham Sign Sakot ..Tai Tham Sign Sakot + [0x1a62, 0x1a62], // Tai Tham Vowel Sign Mai ..Tai Tham Vowel Sign Mai + [0x1a65, 0x1a6c], // Tai Tham Vowel Sign I ..Tai Tham Vowel Sign Oa B + [0x1a73, 0x1a7c], // Tai Tham Vowel Sign Oa A..Tai Tham Sign Khuen-lue + [0x1a7f, 0x1a7f], // Tai Tham Combining Crypt..Tai Tham Combining Crypt + [0x1ab0, 0x1abe], // (nil) .. + [0x1b00, 0x1b03], // Balinese Sign Ulu Ricem ..Balinese Sign Surang + [0x1b34, 0x1b34], // Balinese Sign Rerekan ..Balinese Sign Rerekan + [0x1b36, 0x1b3a], // Balinese Vowel Sign Ulu ..Balinese Vowel Sign Ra R + [0x1b3c, 0x1b3c], // Balinese Vowel Sign La L..Balinese Vowel Sign La L + [0x1b42, 0x1b42], // Balinese Vowel Sign Pepe..Balinese Vowel Sign Pepe + [0x1b6b, 0x1b73], // Balinese Musical Symbol ..Balinese Musical Symbol + [0x1b80, 0x1b81], // Sundanese Sign Panyecek ..Sundanese Sign Panglayar + [0x1ba2, 0x1ba5], // Sundanese Consonant Sign..Sundanese Vowel Sign Pan + [0x1ba8, 0x1ba9], // Sundanese Vowel Sign Pam..Sundanese Vowel Sign Pan + [0x1bab, 0x1bad], // (nil) .. + [0x1be6, 0x1be6], // (nil) .. + [0x1be8, 0x1be9], // (nil) .. + [0x1bed, 0x1bed], // (nil) .. + [0x1bef, 0x1bf1], // (nil) .. + [0x1c2c, 0x1c33], // Lepcha Vowel Sign E ..Lepcha Consonant Sign T + [0x1c36, 0x1c37], // Lepcha Sign Ran ..Lepcha Sign Nukta + [0x1cd0, 0x1cd2], // Vedic Tone Karshana ..Vedic Tone Prenkha + [0x1cd4, 0x1ce0], // Vedic Sign Yajurvedic Mi..Vedic Tone Rigvedic Kash + [0x1ce2, 0x1ce8], // Vedic Sign Visarga Svari..Vedic Sign Visarga Anuda + [0x1ced, 0x1ced], // Vedic Sign Tiryak ..Vedic Sign Tiryak + [0x1cf4, 0x1cf4], // (nil) .. + [0x1cf8, 0x1cf9], // (nil) .. + [0x1dc0, 0x1df9], // Combining Dotted Grave A.. + [0x1dfb, 0x1dff], // (nil) ..Combining Right Arrowhea + [0x20d0, 0x20f0], // Combining Left Harpoon A..Combining Asterisk Above + [0x2cef, 0x2cf1], // Coptic Combining Ni Abov..Coptic Combining Spiritu + [0x2d7f, 0x2d7f], // (nil) .. + [0x2de0, 0x2dff], // Combining Cyrillic Lette..Combining Cyrillic Lette + [0x302a, 0x302d], // Ideographic Level Tone M..Ideographic Entering Ton + [0x3099, 0x309a], // Combining Katakana-hirag..Combining Katakana-hirag + [0xa66f, 0xa672], // Combining Cyrillic Vzmet..Combining Cyrillic Thous + [0xa674, 0xa67d], // (nil) ..Combining Cyrillic Payer + [0xa69e, 0xa69f], // (nil) .. + [0xa6f0, 0xa6f1], // Bamum Combining Mark Koq..Bamum Combining Mark Tuk + [0xa802, 0xa802], // Syloti Nagri Sign Dvisva..Syloti Nagri Sign Dvisva + [0xa806, 0xa806], // Syloti Nagri Sign Hasant..Syloti Nagri Sign Hasant + [0xa80b, 0xa80b], // Syloti Nagri Sign Anusva..Syloti Nagri Sign Anusva + [0xa825, 0xa826], // Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign + [0xa8c4, 0xa8c5], // Saurashtra Sign Virama .. + [0xa8e0, 0xa8f1], // Combining Devanagari Dig..Combining Devanagari Sig + [0xa8ff, 0xa8ff], // (nil) .. + [0xa926, 0xa92d], // Kayah Li Vowel Ue ..Kayah Li Tone Calya Plop + [0xa947, 0xa951], // Rejang Vowel Sign I ..Rejang Consonant Sign R + [0xa980, 0xa982], // Javanese Sign Panyangga ..Javanese Sign Layar + [0xa9b3, 0xa9b3], // Javanese Sign Cecak Telu..Javanese Sign Cecak Telu + [0xa9b6, 0xa9b9], // Javanese Vowel Sign Wulu..Javanese Vowel Sign Suku + [0xa9bc, 0xa9bc], // Javanese Vowel Sign Pepe..Javanese Vowel Sign Pepe + [0xa9e5, 0xa9e5], // (nil) .. + [0xaa29, 0xaa2e], // Cham Vowel Sign Aa ..Cham Vowel Sign Oe + [0xaa31, 0xaa32], // Cham Vowel Sign Au ..Cham Vowel Sign Ue + [0xaa35, 0xaa36], // Cham Consonant Sign La ..Cham Consonant Sign Wa + [0xaa43, 0xaa43], // Cham Consonant Sign Fina..Cham Consonant Sign Fina + [0xaa4c, 0xaa4c], // Cham Consonant Sign Fina..Cham Consonant Sign Fina + [0xaa7c, 0xaa7c], // (nil) .. + [0xaab0, 0xaab0], // Tai Viet Mai Kang ..Tai Viet Mai Kang + [0xaab2, 0xaab4], // Tai Viet Vowel I ..Tai Viet Vowel U + [0xaab7, 0xaab8], // Tai Viet Mai Khit ..Tai Viet Vowel Ia + [0xaabe, 0xaabf], // Tai Viet Vowel Am ..Tai Viet Tone Mai Ek + [0xaac1, 0xaac1], // Tai Viet Tone Mai Tho ..Tai Viet Tone Mai Tho + [0xaaec, 0xaaed], // (nil) .. + [0xaaf6, 0xaaf6], // (nil) .. + [0xabe5, 0xabe5], // Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + [0xabe8, 0xabe8], // Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + [0xabed, 0xabed], // Meetei Mayek Apun Iyek ..Meetei Mayek Apun Iyek + [0xfb1e, 0xfb1e], // Hebrew Point Judeo-spani..Hebrew Point Judeo-spani + [0xfe00, 0xfe0f], // Variation Selector-1 ..Variation Selector-16 + [0xfe20, 0xfe2f], // Combining Ligature Left .. + [0x101fd, 0x101fd], // Phaistos Disc Sign Combi..Phaistos Disc Sign Combi + [0x102e0, 0x102e0], // (nil) .. + [0x10376, 0x1037a], // (nil) .. + [0x10a01, 0x10a03], // Kharoshthi Vowel Sign I ..Kharoshthi Vowel Sign Vo + [0x10a05, 0x10a06], // Kharoshthi Vowel Sign E ..Kharoshthi Vowel Sign O + [0x10a0c, 0x10a0f], // Kharoshthi Vowel Length ..Kharoshthi Sign Visarga + [0x10a38, 0x10a3a], // Kharoshthi Sign Bar Abov..Kharoshthi Sign Dot Belo + [0x10a3f, 0x10a3f], // Kharoshthi Virama ..Kharoshthi Virama + [0x10ae5, 0x10ae6], // (nil) .. + [0x10d24, 0x10d27], // (nil) .. + [0x10f46, 0x10f50], // (nil) .. + [0x11001, 0x11001], // (nil) .. + [0x11038, 0x11046], // (nil) .. + [0x1107f, 0x11081], // (nil) ..Kaithi Sign Anusvara + [0x110b3, 0x110b6], // Kaithi Vowel Sign U ..Kaithi Vowel Sign Ai + [0x110b9, 0x110ba], // Kaithi Sign Virama ..Kaithi Sign Nukta + [0x11100, 0x11102], // (nil) .. + [0x11127, 0x1112b], // (nil) .. + [0x1112d, 0x11134], // (nil) .. + [0x11173, 0x11173], // (nil) .. + [0x11180, 0x11181], // (nil) .. + [0x111b6, 0x111be], // (nil) .. + [0x111c9, 0x111cc], // (nil) .. + [0x1122f, 0x11231], // (nil) .. + [0x11234, 0x11234], // (nil) .. + [0x11236, 0x11237], // (nil) .. + [0x1123e, 0x1123e], // (nil) .. + [0x112df, 0x112df], // (nil) .. + [0x112e3, 0x112ea], // (nil) .. + [0x11300, 0x11301], // (nil) .. + [0x1133b, 0x1133c], // (nil) .. + [0x11340, 0x11340], // (nil) .. + [0x11366, 0x1136c], // (nil) .. + [0x11370, 0x11374], // (nil) .. + [0x11438, 0x1143f], // (nil) .. + [0x11442, 0x11444], // (nil) .. + [0x11446, 0x11446], // (nil) .. + [0x1145e, 0x1145e], // (nil) .. + [0x114b3, 0x114b8], // (nil) .. + [0x114ba, 0x114ba], // (nil) .. + [0x114bf, 0x114c0], // (nil) .. + [0x114c2, 0x114c3], // (nil) .. + [0x115b2, 0x115b5], // (nil) .. + [0x115bc, 0x115bd], // (nil) .. + [0x115bf, 0x115c0], // (nil) .. + [0x115dc, 0x115dd], // (nil) .. + [0x11633, 0x1163a], // (nil) .. + [0x1163d, 0x1163d], // (nil) .. + [0x1163f, 0x11640], // (nil) .. + [0x116ab, 0x116ab], // (nil) .. + [0x116ad, 0x116ad], // (nil) .. + [0x116b0, 0x116b5], // (nil) .. + [0x116b7, 0x116b7], // (nil) .. + [0x1171d, 0x1171f], // (nil) .. + [0x11722, 0x11725], // (nil) .. + [0x11727, 0x1172b], // (nil) .. + [0x1182f, 0x11837], // (nil) .. + [0x11839, 0x1183a], // (nil) .. + [0x11a01, 0x11a0a], // (nil) .. + [0x11a33, 0x11a38], // (nil) .. + [0x11a3b, 0x11a3e], // (nil) .. + [0x11a47, 0x11a47], // (nil) .. + [0x11a51, 0x11a56], // (nil) .. + [0x11a59, 0x11a5b], // (nil) .. + [0x11a8a, 0x11a96], // (nil) .. + [0x11a98, 0x11a99], // (nil) .. + [0x11c30, 0x11c36], // (nil) .. + [0x11c38, 0x11c3d], // (nil) .. + [0x11c3f, 0x11c3f], // (nil) .. + [0x11c92, 0x11ca7], // (nil) .. + [0x11caa, 0x11cb0], // (nil) .. + [0x11cb2, 0x11cb3], // (nil) .. + [0x11cb5, 0x11cb6], // (nil) .. + [0x11d31, 0x11d36], // (nil) .. + [0x11d3a, 0x11d3a], // (nil) .. + [0x11d3c, 0x11d3d], // (nil) .. + [0x11d3f, 0x11d45], // (nil) .. + [0x11d47, 0x11d47], // (nil) .. + [0x11d90, 0x11d91], // (nil) .. + [0x11d95, 0x11d95], // (nil) .. + [0x11d97, 0x11d97], // (nil) .. + [0x11ef3, 0x11ef4], // (nil) .. + [0x16af0, 0x16af4], // (nil) .. + [0x16b30, 0x16b36], // (nil) .. + [0x16f8f, 0x16f92], // (nil) .. + [0x1bc9d, 0x1bc9e], // (nil) .. + [0x1d167, 0x1d169], // Musical Symbol Combining..Musical Symbol Combining + [0x1d17b, 0x1d182], // Musical Symbol Combining..Musical Symbol Combining + [0x1d185, 0x1d18b], // Musical Symbol Combining..Musical Symbol Combining + [0x1d1aa, 0x1d1ad], // Musical Symbol Combining..Musical Symbol Combining + [0x1d242, 0x1d244], // Combining Greek Musical ..Combining Greek Musical + [0x1da00, 0x1da36], // (nil) .. + [0x1da3b, 0x1da6c], // (nil) .. + [0x1da75, 0x1da75], // (nil) .. + [0x1da84, 0x1da84], // (nil) .. + [0x1da9b, 0x1da9f], // (nil) .. + [0x1daa1, 0x1daaf], // (nil) .. + [0x1e000, 0x1e006], // (nil) .. + [0x1e008, 0x1e018], // (nil) .. + [0x1e01b, 0x1e021], // (nil) .. + [0x1e023, 0x1e024], // (nil) .. + [0x1e026, 0x1e02a], // (nil) .. + [0x1e8d0, 0x1e8d6], // (nil) .. + [0x1e944, 0x1e94a], // (nil) .. + [0xe0100, 0xe01ef] // Variation Selector-17 ..Variation Selector-256 + ]; + + // Generated: 2018-09-24T16:45:44.464578 + // Source: EastAsianWidth-11.0.0.txt + // Date: 2018-05-14, 09:41:59 GMT [KW, LI] + const WIDE_EASTASIAN = [ + [0x1100, 0x115f], // Hangul Choseong Kiyeok ..Hangul Choseong Filler + [0x231a, 0x231b], // Watch ..Hourglass + [0x2329, 0x232a], // Left-pointing Angle Brac..Right-pointing Angle Bra + [0x23e9, 0x23ec], // (nil) .. + [0x23f0, 0x23f0], // (nil) .. + [0x23f3, 0x23f3], // (nil) .. + [0x25fd, 0x25fe], // White Medium Small Squar..Black Medium Small Squar + [0x2614, 0x2615], // Umbrella With Rain Drops..Hot Beverage + [0x2648, 0x2653], // Aries ..Pisces + [0x267f, 0x267f], // Wheelchair Symbol ..Wheelchair Symbol + [0x2693, 0x2693], // Anchor ..Anchor + [0x26a1, 0x26a1], // High Voltage Sign ..High Voltage Sign + [0x26aa, 0x26ab], // Medium White Circle ..Medium Black Circle + [0x26bd, 0x26be], // Soccer Ball ..Baseball + [0x26c4, 0x26c5], // Snowman Without Snow ..Sun Behind Cloud + [0x26ce, 0x26ce], // (nil) .. + [0x26d4, 0x26d4], // No Entry ..No Entry + [0x26ea, 0x26ea], // Church ..Church + [0x26f2, 0x26f3], // Fountain ..Flag In Hole + [0x26f5, 0x26f5], // Sailboat ..Sailboat + [0x26fa, 0x26fa], // Tent ..Tent + [0x26fd, 0x26fd], // Fuel Pump ..Fuel Pump + [0x2705, 0x2705], // (nil) .. + [0x270a, 0x270b], // (nil) .. + [0x2728, 0x2728], // (nil) .. + [0x274c, 0x274c], // (nil) .. + [0x274e, 0x274e], // (nil) .. + [0x2753, 0x2755], // (nil) .. + [0x2757, 0x2757], // Heavy Exclamation Mark S..Heavy Exclamation Mark S + [0x2795, 0x2797], // (nil) .. + [0x27b0, 0x27b0], // (nil) .. + [0x27bf, 0x27bf], // (nil) .. + [0x2b1b, 0x2b1c], // Black Large Square ..White Large Square + [0x2b50, 0x2b50], // White Medium Star ..White Medium Star + [0x2b55, 0x2b55], // Heavy Large Circle ..Heavy Large Circle + [0x2e80, 0x2e99], // Cjk Radical Repeat ..Cjk Radical Rap + [0x2e9b, 0x2ef3], // Cjk Radical Choke ..Cjk Radical C-simplified + [0x2f00, 0x2fd5], // Kangxi Radical One ..Kangxi Radical Flute + [0x2ff0, 0x2ffb], // Ideographic Description ..Ideographic Description + [0x3000, 0x303e], // Ideographic Space ..Ideographic Variation In + [0x3041, 0x3096], // Hiragana Letter Small A ..Hiragana Letter Small Ke + [0x3099, 0x30ff], // Combining Katakana-hirag..Katakana Digraph Koto + [0x3105, 0x312f], // Bopomofo Letter B .. + [0x3131, 0x318e], // Hangul Letter Kiyeok ..Hangul Letter Araeae + [0x3190, 0x31ba], // Ideographic Annotation L.. + [0x31c0, 0x31e3], // Cjk Stroke T ..Cjk Stroke Q + [0x31f0, 0x321e], // Katakana Letter Small Ku..Parenthesized Korean Cha + [0x3220, 0x3247], // Parenthesized Ideograph ..Circled Ideograph Koto + [0x3250, 0x32fe], // Partnership Sign ..Circled Katakana Wo + [0x3300, 0x4dbf], // Square Apaato .. + [0x4e00, 0xa48c], // Cjk Unified Ideograph-4e..Yi Syllable Yyr + [0xa490, 0xa4c6], // Yi Radical Qot ..Yi Radical Ke + [0xa960, 0xa97c], // Hangul Choseong Tikeut-m..Hangul Choseong Ssangyeo + [0xac00, 0xd7a3], // Hangul Syllable Ga ..Hangul Syllable Hih + [0xf900, 0xfaff], // Cjk Compatibility Ideogr.. + [0xfe10, 0xfe19], // Presentation Form For Ve..Presentation Form For Ve + [0xfe30, 0xfe52], // Presentation Form For Ve..Small Full Stop + [0xfe54, 0xfe66], // Small Semicolon ..Small Equals Sign + [0xfe68, 0xfe6b], // Small Reverse Solidus ..Small Commercial At + [0xff01, 0xff60], // Fullwidth Exclamation Ma..Fullwidth Right White Pa + [0xffe0, 0xffe6], // Fullwidth Cent Sign ..Fullwidth Won Sign + [0x16fe0, 0x16fe1], // (nil) .. + [0x17000, 0x187f1], // (nil) .. + [0x18800, 0x18af2], // (nil) .. + [0x1b000, 0x1b11e], // (nil) .. + [0x1b170, 0x1b2fb], // (nil) .. + [0x1f004, 0x1f004], // Mahjong Tile Red Dragon ..Mahjong Tile Red Dragon + [0x1f0cf, 0x1f0cf], // (nil) .. + [0x1f18e, 0x1f18e], // (nil) .. + [0x1f191, 0x1f19a], // (nil) .. + [0x1f200, 0x1f202], // Square Hiragana Hoka .. + [0x1f210, 0x1f23b], // Squared Cjk Unified Ideo.. + [0x1f240, 0x1f248], // Tortoise Shell Bracketed..Tortoise Shell Bracketed + [0x1f250, 0x1f251], // (nil) .. + [0x1f260, 0x1f265], // (nil) .. + [0x1f300, 0x1f320], // (nil) .. + [0x1f32d, 0x1f335], // (nil) .. + [0x1f337, 0x1f37c], // (nil) .. + [0x1f37e, 0x1f393], // (nil) .. + [0x1f3a0, 0x1f3ca], // (nil) .. + [0x1f3cf, 0x1f3d3], // (nil) .. + [0x1f3e0, 0x1f3f0], // (nil) .. + [0x1f3f4, 0x1f3f4], // (nil) .. + [0x1f3f8, 0x1f43e], // (nil) .. + [0x1f440, 0x1f440], // (nil) .. + [0x1f442, 0x1f4fc], // (nil) .. + [0x1f4ff, 0x1f53d], // (nil) .. + [0x1f54b, 0x1f54e], // (nil) .. + [0x1f550, 0x1f567], // (nil) .. + [0x1f57a, 0x1f57a], // (nil) .. + [0x1f595, 0x1f596], // (nil) .. + [0x1f5a4, 0x1f5a4], // (nil) .. + [0x1f5fb, 0x1f64f], // (nil) .. + [0x1f680, 0x1f6c5], // (nil) .. + [0x1f6cc, 0x1f6cc], // (nil) .. + [0x1f6d0, 0x1f6d2], // (nil) .. + [0x1f6eb, 0x1f6ec], // (nil) .. + [0x1f6f4, 0x1f6f9], // (nil) .. + [0x1f910, 0x1f93e], // (nil) .. + [0x1f940, 0x1f970], // (nil) .. + [0x1f973, 0x1f976], // (nil) .. + [0x1f97a, 0x1f97a], // (nil) .. + [0x1f97c, 0x1f9a2], // (nil) .. + [0x1f9b0, 0x1f9b9], // (nil) .. + [0x1f9c0, 0x1f9c2], // (nil) .. + [0x1f9d0, 0x1f9ff], // (nil) .. + [0x20000, 0x2fffd], // Cjk Unified Ideograph-20.. + [0x30000, 0x3fffd] // (nil) .. + ]; + + // binary search + function bisearch(ucs: number, data: number[][]): boolean { + let min = 0; + let max = data.length - 1; + let mid; + if (ucs < data[0][0] || ucs > data[max][1]) { + return false; + } + while (max >= min) { + mid = (min + max) >> 1; + if (ucs > data[mid][1]) { + min = mid + 1; + } else if (ucs < data[mid][0]) { + max = mid - 1; + } else { + return true; + } + } + return false; + } + function wcwidthBMP(ucs: number): number { + // test for 8-bit control characters + if (ucs === 0) { + return opts.nul; + } + if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0)) { + return opts.control; + } + // binary search in table of non-spacing characters + if (bisearch(ucs, ZERO_WIDTH)) { + return 0; + } + // if we arrive here, ucs is not a combining or C0/C1 control character + if (isWideBMP(ucs)) { + return 2; + } + return 1; + } + function isWideBMP(ucs: number): boolean { + return ( + ucs >= 0x1100 && ( + ucs <= 0x115f || // Hangul Jamo init. consonants + ucs === 0x2329 || + ucs === 0x232a || + (ucs >= 0x2e80 && ucs <= 0xa4cf && ucs !== 0x303f) || // CJK..Yi + (ucs >= 0xac00 && ucs <= 0xd7a3) || // Hangul Syllables + (ucs >= 0xf900 && ucs <= 0xfaff) || // CJK Compat Ideographs + (ucs >= 0xfe10 && ucs <= 0xfe19) || // Vertical forms + (ucs >= 0xfe30 && ucs <= 0xfe6f) || // CJK Compat Forms + (ucs >= 0xff00 && ucs <= 0xff60) || // Fullwidth Forms + (ucs >= 0xffe0 && ucs <= 0xffe6))); + } + function wcwidthHigh(ucs: number): 0 | 1 | 2 { + if (bisearch(ucs, ZERO_WIDTH)) { + return 0; + } + + if (bisearch(ucs, WIDE_EASTASIAN)) { + return 2; + } + + return 1; + } + const control = opts.control | 0; + let table: number[] | Uint32Array = null; + function initTable(): number[] | Uint32Array { + // lookup table for BMP + const CODEPOINTS = 65536; // BMP holds 65536 codepoints + const BITWIDTH = 2; // a codepoint can have a width of 0, 1 or 2 + const ITEMSIZE = 32; // using uint32_t + const CONTAINERSIZE = CODEPOINTS * BITWIDTH / ITEMSIZE; + const CODEPOINTS_PER_ITEM = ITEMSIZE / BITWIDTH; + table = (typeof Uint32Array === 'undefined') + ? new Array(CONTAINERSIZE) + : new Uint32Array(CONTAINERSIZE); + for (let i = 0; i < CONTAINERSIZE; ++i) { + let num = 0; + let pos = CODEPOINTS_PER_ITEM; + while (pos--) { + num = (num << 2) | wcwidthBMP(CODEPOINTS_PER_ITEM * i + pos); + } + table[i] = num; + } + return table; + } + // get width from lookup table + // position in container : num / CODEPOINTS_PER_ITEM + // ==> n = table[Math.floor(num / 16)] + // ==> n = table[num >> 4] + // 16 codepoints per number: FFEEDDCCBBAA99887766554433221100 + // position in number : (num % CODEPOINTS_PER_ITEM) * BITWIDTH + // ==> m = (n % 16) * 2 + // ==> m = (num & 15) << 1 + // right shift to position m + // ==> n = n >> m e.g. m=12 000000000000FFEEDDCCBBAA99887766 + // we are only interested in 2 LSBs, cut off higher bits + // ==> n = n & 3 e.g. 000000000000000000000000000000XX + return function (num: number): number { + num = num | 0; // get asm.js like optimization under V8 + if (num < 32) { + return control | 0; + } + if (num < 127) { + return 1; + } + if (num === 0x1F600) return 2; + const t = table || initTable(); + if (num < 65536) { + return t[num >> 4] >> ((num & 15) << 1) & 3; + } + // do a full search for high codepoints + return wcwidthHigh(num); + }; + })({nul: 0, control: 0}) // configurable options +}; diff --git a/src/unicode/v6.ts b/src/unicode/v6.ts new file mode 100644 index 0000000000..c440467482 --- /dev/null +++ b/src/unicode/v6.ts @@ -0,0 +1,175 @@ +/** + * Copyright (c) 2018 The xterm.js authors. All rights reserved. + * @license MIT + */ +import { IUnicodeImplementation } from '../Types'; + +export const v6: IUnicodeImplementation = { + version: 6, + wcwidth: (function(opts: {nul: number, control: number}): (ucs: number) => number { + // extracted from https://www.cl.cam.ac.uk/%7Emgk25/ucs/wcwidth.c + // combining characters + const COMBINING_BMP = [ + [0x0300, 0x036F], [0x0483, 0x0486], [0x0488, 0x0489], + [0x0591, 0x05BD], [0x05BF, 0x05BF], [0x05C1, 0x05C2], + [0x05C4, 0x05C5], [0x05C7, 0x05C7], [0x0600, 0x0603], + [0x0610, 0x0615], [0x064B, 0x065E], [0x0670, 0x0670], + [0x06D6, 0x06E4], [0x06E7, 0x06E8], [0x06EA, 0x06ED], + [0x070F, 0x070F], [0x0711, 0x0711], [0x0730, 0x074A], + [0x07A6, 0x07B0], [0x07EB, 0x07F3], [0x0901, 0x0902], + [0x093C, 0x093C], [0x0941, 0x0948], [0x094D, 0x094D], + [0x0951, 0x0954], [0x0962, 0x0963], [0x0981, 0x0981], + [0x09BC, 0x09BC], [0x09C1, 0x09C4], [0x09CD, 0x09CD], + [0x09E2, 0x09E3], [0x0A01, 0x0A02], [0x0A3C, 0x0A3C], + [0x0A41, 0x0A42], [0x0A47, 0x0A48], [0x0A4B, 0x0A4D], + [0x0A70, 0x0A71], [0x0A81, 0x0A82], [0x0ABC, 0x0ABC], + [0x0AC1, 0x0AC5], [0x0AC7, 0x0AC8], [0x0ACD, 0x0ACD], + [0x0AE2, 0x0AE3], [0x0B01, 0x0B01], [0x0B3C, 0x0B3C], + [0x0B3F, 0x0B3F], [0x0B41, 0x0B43], [0x0B4D, 0x0B4D], + [0x0B56, 0x0B56], [0x0B82, 0x0B82], [0x0BC0, 0x0BC0], + [0x0BCD, 0x0BCD], [0x0C3E, 0x0C40], [0x0C46, 0x0C48], + [0x0C4A, 0x0C4D], [0x0C55, 0x0C56], [0x0CBC, 0x0CBC], + [0x0CBF, 0x0CBF], [0x0CC6, 0x0CC6], [0x0CCC, 0x0CCD], + [0x0CE2, 0x0CE3], [0x0D41, 0x0D43], [0x0D4D, 0x0D4D], + [0x0DCA, 0x0DCA], [0x0DD2, 0x0DD4], [0x0DD6, 0x0DD6], + [0x0E31, 0x0E31], [0x0E34, 0x0E3A], [0x0E47, 0x0E4E], + [0x0EB1, 0x0EB1], [0x0EB4, 0x0EB9], [0x0EBB, 0x0EBC], + [0x0EC8, 0x0ECD], [0x0F18, 0x0F19], [0x0F35, 0x0F35], + [0x0F37, 0x0F37], [0x0F39, 0x0F39], [0x0F71, 0x0F7E], + [0x0F80, 0x0F84], [0x0F86, 0x0F87], [0x0F90, 0x0F97], + [0x0F99, 0x0FBC], [0x0FC6, 0x0FC6], [0x102D, 0x1030], + [0x1032, 0x1032], [0x1036, 0x1037], [0x1039, 0x1039], + [0x1058, 0x1059], [0x1160, 0x11FF], [0x135F, 0x135F], + [0x1712, 0x1714], [0x1732, 0x1734], [0x1752, 0x1753], + [0x1772, 0x1773], [0x17B4, 0x17B5], [0x17B7, 0x17BD], + [0x17C6, 0x17C6], [0x17C9, 0x17D3], [0x17DD, 0x17DD], + [0x180B, 0x180D], [0x18A9, 0x18A9], [0x1920, 0x1922], + [0x1927, 0x1928], [0x1932, 0x1932], [0x1939, 0x193B], + [0x1A17, 0x1A18], [0x1B00, 0x1B03], [0x1B34, 0x1B34], + [0x1B36, 0x1B3A], [0x1B3C, 0x1B3C], [0x1B42, 0x1B42], + [0x1B6B, 0x1B73], [0x1DC0, 0x1DCA], [0x1DFE, 0x1DFF], + [0x200B, 0x200F], [0x202A, 0x202E], [0x2060, 0x2063], + [0x206A, 0x206F], [0x20D0, 0x20EF], [0x302A, 0x302F], + [0x3099, 0x309A], [0xA806, 0xA806], [0xA80B, 0xA80B], + [0xA825, 0xA826], [0xFB1E, 0xFB1E], [0xFE00, 0xFE0F], + [0xFE20, 0xFE23], [0xFEFF, 0xFEFF], [0xFFF9, 0xFFFB] + ]; + const COMBINING_HIGH = [ + [0x10A01, 0x10A03], [0x10A05, 0x10A06], [0x10A0C, 0x10A0F], + [0x10A38, 0x10A3A], [0x10A3F, 0x10A3F], [0x1D167, 0x1D169], + [0x1D173, 0x1D182], [0x1D185, 0x1D18B], [0x1D1AA, 0x1D1AD], + [0x1D242, 0x1D244], [0xE0001, 0xE0001], [0xE0020, 0xE007F], + [0xE0100, 0xE01EF] + ]; + // binary search + function bisearch(ucs: number, data: number[][]): boolean { + let min = 0; + let max = data.length - 1; + let mid; + if (ucs < data[0][0] || ucs > data[max][1]) { + return false; + } + while (max >= min) { + mid = (min + max) >> 1; + if (ucs > data[mid][1]) { + min = mid + 1; + } else if (ucs < data[mid][0]) { + max = mid - 1; + } else { + return true; + } + } + return false; + } + function wcwidthBMP(ucs: number): number { + // test for 8-bit control characters + if (ucs === 0) { + return opts.nul; + } + if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0)) { + return opts.control; + } + // binary search in table of non-spacing characters + if (bisearch(ucs, COMBINING_BMP)) { + return 0; + } + // if we arrive here, ucs is not a combining or C0/C1 control character + if (isWideBMP(ucs)) { + return 2; + } + return 1; + } + function isWideBMP(ucs: number): boolean { + return ( + ucs >= 0x1100 && ( + ucs <= 0x115f || // Hangul Jamo init. consonants + ucs === 0x2329 || + ucs === 0x232a || + (ucs >= 0x2e80 && ucs <= 0xa4cf && ucs !== 0x303f) || // CJK..Yi + (ucs >= 0xac00 && ucs <= 0xd7a3) || // Hangul Syllables + (ucs >= 0xf900 && ucs <= 0xfaff) || // CJK Compat Ideographs + (ucs >= 0xfe10 && ucs <= 0xfe19) || // Vertical forms + (ucs >= 0xfe30 && ucs <= 0xfe6f) || // CJK Compat Forms + (ucs >= 0xff00 && ucs <= 0xff60) || // Fullwidth Forms + (ucs >= 0xffe0 && ucs <= 0xffe6))); + } + function wcwidthHigh(ucs: number): 0 | 1 | 2 { + if (bisearch(ucs, COMBINING_HIGH)) { + return 0; + } + if ((ucs >= 0x20000 && ucs <= 0x2fffd) || (ucs >= 0x30000 && ucs <= 0x3fffd)) { + return 2; + } + return 1; + } + const control = opts.control | 0; + let table: number[] | Uint32Array = null; + function initTable(): number[] | Uint32Array { + // lookup table for BMP + const CODEPOINTS = 65536; // BMP holds 65536 codepoints + const BITWIDTH = 2; // a codepoint can have a width of 0, 1 or 2 + const ITEMSIZE = 32; // using uint32_t + const CONTAINERSIZE = CODEPOINTS * BITWIDTH / ITEMSIZE; + const CODEPOINTS_PER_ITEM = ITEMSIZE / BITWIDTH; + table = (typeof Uint32Array === 'undefined') + ? new Array(CONTAINERSIZE) + : new Uint32Array(CONTAINERSIZE); + for (let i = 0; i < CONTAINERSIZE; ++i) { + let num = 0; + let pos = CODEPOINTS_PER_ITEM; + while (pos--) { + num = (num << 2) | wcwidthBMP(CODEPOINTS_PER_ITEM * i + pos); + } + table[i] = num; + } + return table; + } + // get width from lookup table + // position in container : num / CODEPOINTS_PER_ITEM + // ==> n = table[Math.floor(num / 16)] + // ==> n = table[num >> 4] + // 16 codepoints per number: FFEEDDCCBBAA99887766554433221100 + // position in number : (num % CODEPOINTS_PER_ITEM) * BITWIDTH + // ==> m = (n % 16) * 2 + // ==> m = (num & 15) << 1 + // right shift to position m + // ==> n = n >> m e.g. m=12 000000000000FFEEDDCCBBAA99887766 + // we are only interested in 2 LSBs, cut off higher bits + // ==> n = n & 3 e.g. 000000000000000000000000000000XX + return function (num: number): number { + num = num | 0; // get asm.js like optimization under V8 + if (num < 32) { + return control | 0; + } + if (num < 127) { + return 1; + } + const t = table || initTable(); + if (num < 65536) { + return t[num >> 4] >> ((num & 15) << 1) & 3; + } + // do a full search for high codepoints + return wcwidthHigh(num); + }; + })({nul: 0, control: 0}) // configurable options +}; diff --git a/src/utils/TestUtils.test.ts b/src/utils/TestUtils.test.ts index b38f4b858f..0892295102 100644 --- a/src/utils/TestUtils.test.ts +++ b/src/utils/TestUtils.test.ts @@ -4,12 +4,13 @@ */ import { IColorSet, IRenderer, IRenderDimensions, IColorManager } from '../renderer/Types'; -import { IInputHandlingTerminal, IViewport, ICompositionHelper, ITerminal, IBuffer, IBufferSet, IBrowser, ICharMeasure, ISelectionManager, ITerminalOptions, ILinkifier, IMouseHelper, ILinkMatcherOptions, CharacterJoinerHandler, IBufferLine, IBufferStringIterator } from '../Types'; +import { IInputHandlingTerminal, IViewport, ICompositionHelper, ITerminal, IBuffer, IBufferSet, IBrowser, ICharMeasure, ISelectionManager, ITerminalOptions, ILinkifier, IMouseHelper, ILinkMatcherOptions, CharacterJoinerHandler, IBufferLine, IBufferStringIterator, IUnicodeProvider } from '../Types'; import { ICircularList, XtermListener } from '../common/Types'; import { Buffer } from '../Buffer'; import * as Browser from '../shared/utils/Browser'; import { ITheme, IDisposable, IMarker } from 'xterm'; import { Terminal } from '../Terminal'; +import { UnicodeProvider } from '../UnicodeProvider'; export class TestTerminal extends Terminal { writeSync(data: string): void { @@ -156,6 +157,7 @@ export class MockTerminal implements ITerminal { } registerCharacterJoiner(handler: CharacterJoinerHandler): number { return 0; } deregisterCharacterJoiner(joinerId: number): void { } + unicodeProvider: IUnicodeProvider = new UnicodeProvider(); } export class MockCharMeasure implements ICharMeasure { @@ -197,6 +199,7 @@ export class MockInputHandlingTerminal implements IInputHandlingTerminal { buffer: IBuffer = new MockBuffer(); viewport: IViewport; selectionManager: ISelectionManager; + unicodeProvider: IUnicodeProvider; focus(): void { throw new Error('Method not implemented.'); } From c8ff889795f1a83624e84e5417e4a97b2f6a938e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Breitbart?= Date: Fri, 26 Oct 2018 18:33:04 +0200 Subject: [PATCH 2/7] disposable provider, testcases, cleanup --- src/InputHandler.ts | 3 +- src/Linkifier.ts | 3 - src/Terminal.ts | 1 + src/Types.ts | 4 +- src/UnicodeProvider.test.ts | 144 ++++++++++++++++++++++++++++++++++++ src/UnicodeProvider.ts | 67 +++++++++++++---- 6 files changed, 203 insertions(+), 19 deletions(-) create mode 100644 src/UnicodeProvider.test.ts diff --git a/src/InputHandler.ts b/src/InputHandler.ts index 5f89507c46..d5c880e066 100644 --- a/src/InputHandler.ts +++ b/src/InputHandler.ts @@ -314,6 +314,7 @@ export class InputHandler extends Disposable implements IInputHandler { const wraparoundMode: boolean = this._terminal.wraparoundMode; const insertMode: boolean = this._terminal.insertMode; const curAttr: number = this._terminal.curAttr; + const wcwidth = this._terminal.unicodeProvider.wcwidth; let bufferRow = buffer.lines.get(buffer.y + buffer.ybase); this._terminal.updateRange(buffer.y); @@ -342,7 +343,7 @@ export class InputHandler extends Disposable implements IInputHandler { // calculate print space // expensive call, therefore we save width in line buffer // chWidth = wcwidth(code); - chWidth = this._terminal.unicodeProvider.wcwidth(code); + chWidth = wcwidth(code); // get charset replacement character if (charset) { diff --git a/src/Linkifier.ts b/src/Linkifier.ts index 13c737b8fa..38093738ce 100644 --- a/src/Linkifier.ts +++ b/src/Linkifier.ts @@ -255,9 +255,6 @@ export class Linkifier extends EventEmitter implements ILinkifier { * @param fg The link color for hover event. */ private _addLink(x: number, y: number, uri: string, matcher: ILinkMatcher, fg: number): void { - // FIXME: to make runtime changes of the unicode version possible - // this may not rely on getStringCellWidth anymore - // instead sum widths saved in the buffer const width = (this._terminal as any).unicodeProvider.getStringCellWidth(uri); const x1 = x % this._terminal.cols; const y1 = y + Math.floor(x / this._terminal.cols); diff --git a/src/Terminal.ts b/src/Terminal.ts index 82ae3779b4..9b671a810d 100644 --- a/src/Terminal.ts +++ b/src/Terminal.ts @@ -304,6 +304,7 @@ export class Terminal extends EventEmitter implements ITerminal, IDisposable, II this._userScrolling = false; this.unicodeProvider = new UnicodeProvider(); + this.register(this.unicodeProvider); this._inputHandler = new InputHandler(this); this.register(this._inputHandler); // Reuse renderer if the Terminal is being recreated via a reset call. diff --git a/src/Types.ts b/src/Types.ts index 7c26853963..9436e0989f 100644 --- a/src/Types.ts +++ b/src/Types.ts @@ -540,8 +540,8 @@ export interface IUnicodeImplementation { } export interface IUnicodeProvider { - onRegister(callback: (version: number, provider: IUnicodeProvider) => void): void; - registeredVersions(): number[]; + addRegisterListener(callback: (version: number, provider: IUnicodeProvider) => void): void; + getRegisteredVersions(): number[]; getActiveVersion(): number; setActiveVersion(version: number, mode?: 'exact' | 'closest' | 'next' | 'previous'): number; wcwidth(ucs: number): number; diff --git a/src/UnicodeProvider.test.ts b/src/UnicodeProvider.test.ts new file mode 100644 index 0000000000..09a41eb4eb --- /dev/null +++ b/src/UnicodeProvider.test.ts @@ -0,0 +1,144 @@ +/** + * Copyright (c) 2018 The xterm.js authors. All rights reserved. + * @license MIT + */ +import { assert } from 'chai'; +import { UnicodeProvider } from './UnicodeProvider'; +import { IUnicodeImplementation } from './Types'; + +const VERSION_DUMMY1: IUnicodeImplementation = { + version: 15, + wcwidth: (n: number) => n +}; +const VERSION_DUMMY2: IUnicodeImplementation = { + version: 17, + wcwidth: (n: number) => n +}; + +describe('UnicodeProvider', function(): void { + describe('static part', function(): void { + + it('provided default versions', function(): void { + assert.deepEqual(UnicodeProvider.getRegisteredVersions(), [6, 11]); + }); + + it('add version', function(): void { + UnicodeProvider.registerVersion(VERSION_DUMMY1); + assert.deepEqual(UnicodeProvider.getRegisteredVersions(), [6, 11, 15]); + delete UnicodeProvider.versions[15]; + }); + + it('register callback', function(): void { + UnicodeProvider.addRegisterListener((version) => { + assert.equal(version, 15); + }); + UnicodeProvider.registerVersion(VERSION_DUMMY1); + delete UnicodeProvider.versions[15]; + UnicodeProvider.removeAllRegisterListener(); + }); + + it('remove callback', function(): void { + let gotCalled = false; + const listener = (version: number) => { + assert.equal(version, 15); + gotCalled = true; + }; + UnicodeProvider.addRegisterListener(listener); + UnicodeProvider.registerVersion(VERSION_DUMMY1); + assert.equal(gotCalled, true); + gotCalled = false; + UnicodeProvider.removeRegisterListener(listener); + UnicodeProvider.registerVersion(VERSION_DUMMY2); + assert.equal(gotCalled, false); + delete UnicodeProvider.versions[15]; + delete UnicodeProvider.versions[17]; + UnicodeProvider.removeAllRegisterListener(); + }); + }); + describe('instance', function(): void { + let provider: UnicodeProvider; + + beforeEach(function(): void { + provider = new UnicodeProvider(); + }); + + it('highest version activated by default', function(): void { + assert.equal(provider.getActiveVersion(), 11); + }); + + it('activate nearest version', function(): void { + provider.setActiveVersion(0); + assert.equal(provider.getActiveVersion(), 6); + provider.setActiveVersion(5); + assert.equal(provider.getActiveVersion(), 6); + provider.setActiveVersion(7); + assert.equal(provider.getActiveVersion(), 6); + provider.setActiveVersion(8); + assert.equal(provider.getActiveVersion(), 6); + provider.setActiveVersion(10); + assert.equal(provider.getActiveVersion(), 11); + provider.setActiveVersion(13); + assert.equal(provider.getActiveVersion(), 11); + }); + + it('activate next lower', function(): void { + provider.setActiveVersion(15, 'previous'); + assert.equal(provider.getActiveVersion(), 11); + provider.setActiveVersion(11, 'previous'); + assert.equal(provider.getActiveVersion(), 11); + provider.setActiveVersion(10.5, 'previous'); + assert.equal(provider.getActiveVersion(), 6); + provider.setActiveVersion(5, 'previous'); + assert.equal(provider.getActiveVersion(), 6); + }); + + it('activate next higher', function(): void { + provider.setActiveVersion(5, 'next'); + assert.equal(provider.getActiveVersion(), 6); + provider.setActiveVersion(6, 'next'); + assert.equal(provider.getActiveVersion(), 6); + provider.setActiveVersion(10.5, 'next'); + assert.equal(provider.getActiveVersion(), 11); + provider.setActiveVersion(15, 'next'); + assert.equal(provider.getActiveVersion(), 11); + }); + + it('activate exact', function(): void { + assert.throws(() => provider.setActiveVersion(5, 'exact')); + assert.throws(() => provider.setActiveVersion(7, 'exact')); + assert.throws(() => provider.setActiveVersion(10, 'exact')); + assert.throws(() => provider.setActiveVersion(12, 'exact')); + assert.throws(() => provider.setActiveVersion(200, 'exact')); + assert.doesNotThrow(() => provider.setActiveVersion(6, 'exact')); + assert.doesNotThrow(() => provider.setActiveVersion(11, 'exact')); + }); + + it('register/remove callback', function(): void { + let gotCalled = false; + const listener = (version: number, prov: UnicodeProvider) => { + assert.equal(version, 15); + assert.equal(prov, provider); + gotCalled = true; + }; + provider.addRegisterListener(listener); + UnicodeProvider.registerVersion(VERSION_DUMMY1); + assert.equal(gotCalled, true); + gotCalled = false; + provider.removeRegisterListener(listener); + UnicodeProvider.registerVersion(VERSION_DUMMY2); + assert.equal(gotCalled, false); + delete UnicodeProvider.versions[15]; + delete UnicodeProvider.versions[17]; + UnicodeProvider.removeAllRegisterListener(); + provider.dispose(); + }); + + it('unicode test', function(): void { + const data = '🔷🔷🔷🔷🔷'; + provider.setActiveVersion(6); + assert.equal(provider.getStringCellWidth(data), 5); + provider.setActiveVersion(11); + assert.equal(provider.getStringCellWidth(data), 10); + }); + }); +}); diff --git a/src/UnicodeProvider.ts b/src/UnicodeProvider.ts index e0a2ce13d5..9add7cd31e 100644 --- a/src/UnicodeProvider.ts +++ b/src/UnicodeProvider.ts @@ -5,6 +5,9 @@ import { IUnicodeImplementation, IUnicodeProvider } from './Types'; import { v6 } from './unicode/v6'; import { v11 } from './unicode/v11'; +import { Disposable } from './common/Lifecycle'; + +type RegisterCallback = [(version: number, provider: UnicodeProvider) => void, (version: number) => void]; /** * Class to provide access to different unicode version implementations. @@ -17,21 +20,32 @@ import { v11 } from './unicode/v11'; * different unicode settings active while still referring to the * same underlying implementations. */ -export class UnicodeProvider implements IUnicodeProvider { - static versions: {[key: string]: IUnicodeImplementation} = {}; +export class UnicodeProvider extends Disposable implements IUnicodeProvider { + public static versions: {[key: string]: IUnicodeImplementation} = {}; private static _registerCallbacks: ((version: number) => void)[] = []; - static onRegister(callback: (version: number) => void): void { + public static addRegisterListener(callback: (version: number) => void): void { UnicodeProvider._registerCallbacks.push(callback); } + public static removeRegisterListener(callback: (version: number) => void): void { + const pos = UnicodeProvider._registerCallbacks.indexOf(callback); + if (pos !== -1) { + UnicodeProvider._registerCallbacks.splice(pos, 1); + } + } + + public static removeAllRegisterListener(): void { + UnicodeProvider._registerCallbacks = []; + } + /** * Register an unicode implementation. * Possible entry point for unicode addons. - * In conjuction with `onRegister` it can be used - * to load implementations lazy. + * In conjuction with `addRegisterListener` it can be used + * to load and use implementations lazy. */ - static registerVersion(impl: IUnicodeImplementation): void { + public static registerVersion(impl: IUnicodeImplementation): void { if (UnicodeProvider.versions[impl.version]) { throw new Error(`unicode version "${impl.version}" already registered`); } @@ -39,31 +53,58 @@ export class UnicodeProvider implements IUnicodeProvider { UnicodeProvider._registerCallbacks.forEach(cb => cb(impl.version)); } - static registeredVersions(): number[] { + public static getRegisteredVersions(): number[] { return Object.getOwnPropertyNames(UnicodeProvider.versions).map(parseFloat).sort((a, b) => a - b); } private _version: number; + private _registerCallbacks: RegisterCallback[] = []; public wcwidth: (ucs: number) => number; // defaults to the highest available version - constructor(version: number = 20) { + constructor(version: number = 200) { + super(); this.setActiveVersion(version); } + public dispose(): void { + this._registerCallbacks.forEach(el => UnicodeProvider.removeRegisterListener(el[1])); + this._registerCallbacks = null; + this.wcwidth = null; + } + /** * Callback to run when a version got registered. * Gets the newly registered version and * the `UnicodeProvider` instance as arguments. */ - public onRegister(callback: (version: number, provider: UnicodeProvider) => void): void { - UnicodeProvider.onRegister((version) => callback(version, this)); + public addRegisterListener(callback: (version: number, provider: UnicodeProvider) => void): void { + const func: (version: number) => void = (version) => callback(version, this); + this._registerCallbacks.push([callback, func]); + UnicodeProvider.addRegisterListener(func); + } + + /** + * Remove register listener. + */ + public removeRegisterListener(callback: (version: number, provider: UnicodeProvider) => void): void { + let pos = -1; + for (let i = 0; i < this._registerCallbacks.length; ++i) { + if (this._registerCallbacks[i][0] === callback) { + pos = i; + break; + } + } + if (pos !== -1) { + UnicodeProvider.removeRegisterListener(this._registerCallbacks[pos][1]); + this._registerCallbacks.splice(pos, 1); + } } /** * Get a list of currently registered unicode versions. */ - public registeredVersions(): number[] { + public getRegisteredVersions(): number[] { return Object.getOwnPropertyNames(UnicodeProvider.versions).map(parseFloat).sort((a, b) => a - b); } @@ -82,14 +123,14 @@ export class UnicodeProvider implements IUnicodeProvider { * Returns the activated version number. */ public setActiveVersion(version: number, mode?: 'exact' | 'closest' | 'next' | 'previous'): number { - if (!this.registeredVersions().length) { + if (!this.getRegisteredVersions().length) { throw new Error('no unicode versions registered'); } // find closest matching version // Although not quite correct for typical versioning schemes 5.9 is treated closer to 6.0 than to 5.7. // Typically we will not ship subversions so this approximation should be close enough. - const versions = this.registeredVersions(); + const versions = this.getRegisteredVersions(); const distances = versions.map(el => Math.abs(version - el)); const closestIndex = distances.reduce((iMin, x, i, arr) => x < arr[iMin] ? i : iMin, 0); let newVersion = versions[closestIndex]; From b57eaa8d24088badb8d3f71c00ae1de6d822038f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Breitbart?= Date: Fri, 26 Oct 2018 18:41:01 +0200 Subject: [PATCH 3/7] fix changed method name --- demo/client.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/demo/client.ts b/demo/client.ts index f74a092b4c..5c2dd2f64d 100644 --- a/demo/client.ts +++ b/demo/client.ts @@ -208,7 +208,7 @@ function initOptions(term: TerminalType): void { fontWeight: ['normal', 'bold', '100', '200', '300', '400', '500', '600', '700', '800', '900'], fontWeightBold: ['normal', 'bold', '100', '200', '300', '400', '500', '600', '700', '800', '900'], rendererType: ['dom', 'canvas'], - unicodeVersion: (term as any)._core.unicodeProvider.registeredVersions(), + unicodeVersion: (term as any)._core.unicodeProvider.getRegisteredVersions(), experimentalBufferLineImpl: ['JsArray', 'TypedArray'] }; const options = Object.keys((term)._core.options); From c57c258c89a095b213bb6c1e8613c232067c9bda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Breitbart?= Date: Thu, 8 Nov 2018 08:22:32 +0100 Subject: [PATCH 4/7] review cleanup --- demo/client.ts | 4 +- src/InputHandler.ts | 3 +- src/Linkifier.ts | 3 +- src/Terminal.ts | 12 +-- src/Types.ts | 14 +-- src/UnicodeManager.test.ts | 110 +++++++++++++++++++++ src/UnicodeManager.ts | 157 ++++++++++++++++++++++++++++++ src/UnicodeProvider.test.ts | 144 --------------------------- src/UnicodeProvider.ts | 188 ------------------------------------ src/unicode/v11.ts | 4 +- src/unicode/v6.ts | 4 +- src/utils/TestUtils.test.ts | 8 +- typings/xterm.d.ts | 6 ++ 13 files changed, 298 insertions(+), 359 deletions(-) create mode 100644 src/UnicodeManager.test.ts create mode 100644 src/UnicodeManager.ts delete mode 100644 src/UnicodeProvider.test.ts delete mode 100644 src/UnicodeProvider.ts diff --git a/demo/client.ts b/demo/client.ts index 5c2dd2f64d..dc72fde3b3 100644 --- a/demo/client.ts +++ b/demo/client.ts @@ -208,7 +208,7 @@ function initOptions(term: TerminalType): void { fontWeight: ['normal', 'bold', '100', '200', '300', '400', '500', '600', '700', '800', '900'], fontWeightBold: ['normal', 'bold', '100', '200', '300', '400', '500', '600', '700', '800', '900'], rendererType: ['dom', 'canvas'], - unicodeVersion: (term as any)._core.unicodeProvider.getRegisteredVersions(), + unicodeVersion: (term as any)._core.unicodeManager.registeredVersions.map(String), experimentalBufferLineImpl: ['JsArray', 'TypedArray'] }; const options = Object.keys((term)._core.options); @@ -220,9 +220,7 @@ function initOptions(term: TerminalType): void { booleanOptions.push(o); break; case 'number': - if (o !== 'unicodeVersion') { numberOptions.push(o); - } break; default: if (Object.keys(stringOptions).indexOf(o) === -1) { diff --git a/src/InputHandler.ts b/src/InputHandler.ts index 517f9c3b0b..6cf967c788 100644 --- a/src/InputHandler.ts +++ b/src/InputHandler.ts @@ -335,7 +335,7 @@ export class InputHandler extends Disposable implements IInputHandler { const wraparoundMode: boolean = this._terminal.wraparoundMode; const insertMode: boolean = this._terminal.insertMode; const curAttr: number = this._terminal.curAttr; - const wcwidth = this._terminal.unicodeProvider.wcwidth; + const wcwidth = this._terminal.unicodeManager.wcwidth; let bufferRow = buffer.lines.get(buffer.y + buffer.ybase); this._terminal.updateRange(buffer.y); @@ -363,7 +363,6 @@ export class InputHandler extends Disposable implements IInputHandler { // calculate print space // expensive call, therefore we save width in line buffer - // chWidth = wcwidth(code); chWidth = wcwidth(code); // get charset replacement character diff --git a/src/Linkifier.ts b/src/Linkifier.ts index 38093738ce..2a8d296365 100644 --- a/src/Linkifier.ts +++ b/src/Linkifier.ts @@ -255,7 +255,8 @@ export class Linkifier extends EventEmitter implements ILinkifier { * @param fg The link color for hover event. */ private _addLink(x: number, y: number, uri: string, matcher: ILinkMatcher, fg: number): void { - const width = (this._terminal as any).unicodeProvider.getStringCellWidth(uri); + // FIXME: to support unicode version runtime switch replace this by endIndex calculation + const width = this._terminal.unicodeManager.getStringCellWidth(uri); const x1 = x % this._terminal.cols; const y1 = y + Math.floor(x / this._terminal.cols); let x2 = (x1 + width) % this._terminal.cols; diff --git a/src/Terminal.ts b/src/Terminal.ts index b95769bdf5..f192df7c39 100644 --- a/src/Terminal.ts +++ b/src/Terminal.ts @@ -52,7 +52,7 @@ import { DomRenderer } from './renderer/dom/DomRenderer'; import { IKeyboardEvent } from './common/Types'; import { evaluateKeyboardEvent } from './core/input/Keyboard'; import { KeyboardResultType, ICharset } from './core/Types'; -import { UnicodeProvider } from './UnicodeProvider'; +import { UnicodeVersionManager } from './UnicodeManager'; // Let it work inside Node.js for automated testing purposes. const document = (typeof window !== 'undefined') ? window.document : null; @@ -107,7 +107,7 @@ const DEFAULT_OPTIONS: ITerminalOptions = { theme: null, rightClickSelectsWord: Browser.isMac, rendererType: 'canvas', - unicodeVersion: 11, + unicodeVersion: '11', experimentalBufferLineImpl: 'JsArray' }; @@ -196,7 +196,7 @@ export class Terminal extends EventEmitter implements ITerminal, IDisposable, II private _userScrolling: boolean; private _inputHandler: InputHandler; - public unicodeProvider: UnicodeProvider; + public unicodeManager: UnicodeVersionManager; public soundManager: SoundManager; public renderer: IRenderer; public selectionManager: SelectionManager; @@ -303,8 +303,8 @@ export class Terminal extends EventEmitter implements ITerminal, IDisposable, II // this._writeStopped = false; this._userScrolling = false; - this.unicodeProvider = new UnicodeProvider(); - this.register(this.unicodeProvider); + this.unicodeManager = new UnicodeVersionManager(); + this.register(this.unicodeManager); this._inputHandler = new InputHandler(this); this.register(this._inputHandler); // Reuse renderer if the Terminal is being recreated via a reset call. @@ -499,7 +499,7 @@ export class Terminal extends EventEmitter implements ITerminal, IDisposable, II } break; case 'tabStopWidth': this.buffers.setupTabStops(); break; - case 'unicodeVersion': this.unicodeProvider.setActiveVersion(parseFloat(value)); break; + case 'unicodeVersion': this.unicodeManager.activeVersion = parseFloat(value); break; case 'experimentalBufferLineImpl': this.buffers.normal.setBufferLineFactory(value); this.buffers.alt.setBufferLineFactory(value); diff --git a/src/Types.ts b/src/Types.ts index 9436e0989f..a13dd17fad 100644 --- a/src/Types.ts +++ b/src/Types.ts @@ -87,7 +87,7 @@ export interface IInputHandlingTerminal extends IEventEmitter { handleTitle(title: string): void; index(): void; reverseIndex(): void; - unicodeProvider: IUnicodeProvider; + unicodeManager: IUnicodeVersionManager; } export interface IViewport extends IDisposable { @@ -222,6 +222,7 @@ export interface ITerminal extends PublicTerminal, IElementAccessor, IBufferAcce viewport: IViewport; bracketedPasteMode: boolean; applicationCursor: boolean; + unicodeManager: IUnicodeVersionManager; /** * Emit the 'data' event and populate the given data. @@ -534,16 +535,15 @@ export interface IBufferLineConstructor { /** * Interface for unicode version implementations. */ -export interface IUnicodeImplementation { +export interface IUnicodeVersionProvider { version: number; wcwidth(ucs: number): number; } -export interface IUnicodeProvider { - addRegisterListener(callback: (version: number, provider: IUnicodeProvider) => void): void; - getRegisteredVersions(): number[]; - getActiveVersion(): number; - setActiveVersion(version: number, mode?: 'exact' | 'closest' | 'next' | 'previous'): number; +export interface IUnicodeVersionManager { + addRegisterListener(callback: (version: number, provider: IUnicodeVersionManager) => void): void; + registeredVersions: number[]; + activeVersion: number; wcwidth(ucs: number): number; getStringCellWidth(s: string): number; } diff --git a/src/UnicodeManager.test.ts b/src/UnicodeManager.test.ts new file mode 100644 index 0000000000..6d10cfaa2a --- /dev/null +++ b/src/UnicodeManager.test.ts @@ -0,0 +1,110 @@ +/** + * Copyright (c) 2018 The xterm.js authors. All rights reserved. + * @license MIT + */ +import { assert } from 'chai'; +import { UnicodeVersionManager } from './UnicodeManager'; +import { IUnicodeVersionProvider } from './Types'; + +const VERSION_DUMMY1: IUnicodeVersionProvider = { + version: 15, + wcwidth: (n: number) => n +}; +const VERSION_DUMMY2: IUnicodeVersionProvider = { + version: 17, + wcwidth: (n: number) => n +}; + +describe('UnicodeProvider', function(): void { + describe('static part', function(): void { + + it('provided default versions', function(): void { + assert.deepEqual(UnicodeVersionManager.registeredVersions, [6, 11]); + }); + + it('add version', function(): void { + UnicodeVersionManager.registerVersion(VERSION_DUMMY1); + assert.deepEqual(UnicodeVersionManager.registeredVersions, [6, 11, 15]); + delete UnicodeVersionManager.versions[15]; + }); + + it('register callback', function(): void { + let gotCalled = false; + UnicodeVersionManager.addRegisterListener((version) => { + assert.equal(version, 15); + gotCalled = true; + }); + UnicodeVersionManager.registerVersion(VERSION_DUMMY1); + assert.equal(gotCalled, true); + delete UnicodeVersionManager.versions[15]; + UnicodeVersionManager.removeAllRegisterListener(); + }); + + it('remove callback', function(): void { + let gotCalled = false; + const listener = (version: number) => { + assert.equal(version, 15); + gotCalled = true; + }; + UnicodeVersionManager.addRegisterListener(listener); + UnicodeVersionManager.registerVersion(VERSION_DUMMY1); + assert.equal(gotCalled, true); + gotCalled = false; + UnicodeVersionManager.removeRegisterListener(listener); + UnicodeVersionManager.registerVersion(VERSION_DUMMY2); + assert.equal(gotCalled, false); + delete UnicodeVersionManager.versions[15]; + delete UnicodeVersionManager.versions[17]; + UnicodeVersionManager.removeAllRegisterListener(); + }); + }); + describe('instance', function(): void { + let provider: UnicodeVersionManager; + + beforeEach(function(): void { + provider = new UnicodeVersionManager(); + }); + + it('highest version activated by default', function(): void { + assert.equal(provider.activeVersion, 11); + }); + + it('activate exact', function(): void { + assert.throws(() => provider.activeVersion = 5); + assert.throws(() => provider.activeVersion = 7); + assert.throws(() => provider.activeVersion = 10); + assert.throws(() => provider.activeVersion = 12); + assert.throws(() => provider.activeVersion = 200); + assert.doesNotThrow(() => provider.activeVersion = 6); + assert.doesNotThrow(() => provider.activeVersion = 11); + }); + + it('register/remove callback', function(): void { + let gotCalled = false; + const listener = (version: number, prov: UnicodeVersionManager) => { + assert.equal(version, 15); + assert.equal(prov, provider); + gotCalled = true; + }; + provider.addRegisterListener(listener); + UnicodeVersionManager.registerVersion(VERSION_DUMMY1); + assert.equal(gotCalled, true); + gotCalled = false; + provider.removeRegisterListener(listener); + UnicodeVersionManager.registerVersion(VERSION_DUMMY2); + assert.equal(gotCalled, false); + delete UnicodeVersionManager.versions[15]; + delete UnicodeVersionManager.versions[17]; + UnicodeVersionManager.removeAllRegisterListener(); + provider.dispose(); + }); + + it('unicode test', function(): void { + const data = '🔷🔷🔷🔷🔷'; + provider.activeVersion = 6; + assert.equal(provider.getStringCellWidth(data), 5); + provider.activeVersion = 11; + assert.equal(provider.getStringCellWidth(data), 10); + }); + }); +}); diff --git a/src/UnicodeManager.ts b/src/UnicodeManager.ts new file mode 100644 index 0000000000..f254d0307c --- /dev/null +++ b/src/UnicodeManager.ts @@ -0,0 +1,157 @@ +/** + * Copyright (c) 2018 The xterm.js authors. All rights reserved. + * @license MIT + */ +import { IUnicodeVersionProvider, IUnicodeVersionManager } from './Types'; +import { v6 } from './unicode/v6'; +import { v11 } from './unicode/v11'; +import { Disposable } from './common/Lifecycle'; + +type RegisterCallback = [(version: number, provider: UnicodeVersionManager) => void, (version: number) => void]; + +/** + * Class to provide access to different unicode version implementations. + * + * The version related implementations are stored statically + * to avoid recreating them for every single instance. + * + * An instance of this class is meant to serve unicode specific implementations + * for a single terminal instance. This way multiple terminals can have + * different unicode settings active while still referring to the + * same underlying implementations. + */ +export class UnicodeVersionManager extends Disposable implements IUnicodeVersionManager { + public static versions: {[version: number]: IUnicodeVersionProvider} = {}; + private static _registerCallbacks: ((version: number) => void)[] = []; + + public static addRegisterListener(callback: (version: number) => void): void { + UnicodeVersionManager._registerCallbacks.push(callback); + } + + public static removeRegisterListener(callback: (version: number) => void): void { + const pos = UnicodeVersionManager._registerCallbacks.indexOf(callback); + if (pos !== -1) { + UnicodeVersionManager._registerCallbacks.splice(pos, 1); + } + } + + public static removeAllRegisterListener(): void { + UnicodeVersionManager._registerCallbacks = []; + } + + /** + * Register an unicode implementation. + * Possible entry point for unicode addons. + * In conjuction with `addRegisterListener` it can be used + * to load and use implementations lazy. + */ + public static registerVersion(impl: IUnicodeVersionProvider): void { + if (UnicodeVersionManager.versions[impl.version]) { + throw new Error(`unicode version "${impl.version}" already registered`); + } + UnicodeVersionManager.versions[impl.version] = impl; + UnicodeVersionManager._registerCallbacks.forEach(cb => cb(impl.version)); + } + + public static get registeredVersions(): number[] { + return Object.getOwnPropertyNames(UnicodeVersionManager.versions).map(parseFloat).sort((a, b) => a - b); + } + + private _version: number; + private _registerCallbacks: RegisterCallback[] = []; + public wcwidth: (ucs: number) => number; + + // defaults to the highest available version + constructor(version?: number) { + super(); + const versions = this.registeredVersions; + this.activeVersion = versions[version || versions.length - 1]; + } + + public dispose(): void { + this._registerCallbacks.forEach(el => UnicodeVersionManager.removeRegisterListener(el[1])); + this._registerCallbacks = null; + this.wcwidth = null; + } + + /** + * Callback to run when a version got registered. + * Gets the newly registered version and + * the `UnicodeProvider` instance as arguments. + */ + public addRegisterListener(callback: (version: number, provider: UnicodeVersionManager) => void): void { + const func: (version: number) => void = (version) => callback(version, this); + this._registerCallbacks.push([callback, func]); + UnicodeVersionManager.addRegisterListener(func); + } + + /** + * Remove register listener. + */ + public removeRegisterListener(callback: (version: number, provider: UnicodeVersionManager) => void): void { + let pos = -1; + for (let i = 0; i < this._registerCallbacks.length; ++i) { + if (this._registerCallbacks[i][0] === callback) { + pos = i; + break; + } + } + if (pos !== -1) { + UnicodeVersionManager.removeRegisterListener(this._registerCallbacks[pos][1]); + this._registerCallbacks.splice(pos, 1); + } + } + + /** + * Get a list of currently registered unicode versions. + */ + public get registeredVersions(): number[] { + return Object.getOwnPropertyNames(UnicodeVersionManager.versions).map(parseFloat).sort((a, b) => a - b); + } + + /** + * Get active unicode version. + */ + public get activeVersion(): number { + return this._version; + } + + /** + * Set active unicode version. + */ + public set activeVersion(version: number) { + if (!this.registeredVersions.length || !UnicodeVersionManager.versions[version]) { + throw new Error(`unicode version "${version}" not registered`); + } + + // swap wcwidth impl + this.wcwidth = UnicodeVersionManager.versions[version].wcwidth; + this._version = version; + } + + /** + * Get the terminal cell width for a given string. + */ + public getStringCellWidth(s: string): number { + let result = 0; + for (let i = 0; i < s.length; ++i) { + let code = s.charCodeAt(i); + if (0xD800 <= code && code <= 0xDBFF) { + const low = s.charCodeAt(i + 1); + if (isNaN(low)) { + return result; + } + code = ((code - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000; + } + if (0xDC00 <= code && code <= 0xDFFF) { + continue; + } + result += this.wcwidth(code); + } + return result; + } +} + +// register statically shipped versions +UnicodeVersionManager.registerVersion(v6); +UnicodeVersionManager.registerVersion(v11); diff --git a/src/UnicodeProvider.test.ts b/src/UnicodeProvider.test.ts deleted file mode 100644 index 09a41eb4eb..0000000000 --- a/src/UnicodeProvider.test.ts +++ /dev/null @@ -1,144 +0,0 @@ -/** - * Copyright (c) 2018 The xterm.js authors. All rights reserved. - * @license MIT - */ -import { assert } from 'chai'; -import { UnicodeProvider } from './UnicodeProvider'; -import { IUnicodeImplementation } from './Types'; - -const VERSION_DUMMY1: IUnicodeImplementation = { - version: 15, - wcwidth: (n: number) => n -}; -const VERSION_DUMMY2: IUnicodeImplementation = { - version: 17, - wcwidth: (n: number) => n -}; - -describe('UnicodeProvider', function(): void { - describe('static part', function(): void { - - it('provided default versions', function(): void { - assert.deepEqual(UnicodeProvider.getRegisteredVersions(), [6, 11]); - }); - - it('add version', function(): void { - UnicodeProvider.registerVersion(VERSION_DUMMY1); - assert.deepEqual(UnicodeProvider.getRegisteredVersions(), [6, 11, 15]); - delete UnicodeProvider.versions[15]; - }); - - it('register callback', function(): void { - UnicodeProvider.addRegisterListener((version) => { - assert.equal(version, 15); - }); - UnicodeProvider.registerVersion(VERSION_DUMMY1); - delete UnicodeProvider.versions[15]; - UnicodeProvider.removeAllRegisterListener(); - }); - - it('remove callback', function(): void { - let gotCalled = false; - const listener = (version: number) => { - assert.equal(version, 15); - gotCalled = true; - }; - UnicodeProvider.addRegisterListener(listener); - UnicodeProvider.registerVersion(VERSION_DUMMY1); - assert.equal(gotCalled, true); - gotCalled = false; - UnicodeProvider.removeRegisterListener(listener); - UnicodeProvider.registerVersion(VERSION_DUMMY2); - assert.equal(gotCalled, false); - delete UnicodeProvider.versions[15]; - delete UnicodeProvider.versions[17]; - UnicodeProvider.removeAllRegisterListener(); - }); - }); - describe('instance', function(): void { - let provider: UnicodeProvider; - - beforeEach(function(): void { - provider = new UnicodeProvider(); - }); - - it('highest version activated by default', function(): void { - assert.equal(provider.getActiveVersion(), 11); - }); - - it('activate nearest version', function(): void { - provider.setActiveVersion(0); - assert.equal(provider.getActiveVersion(), 6); - provider.setActiveVersion(5); - assert.equal(provider.getActiveVersion(), 6); - provider.setActiveVersion(7); - assert.equal(provider.getActiveVersion(), 6); - provider.setActiveVersion(8); - assert.equal(provider.getActiveVersion(), 6); - provider.setActiveVersion(10); - assert.equal(provider.getActiveVersion(), 11); - provider.setActiveVersion(13); - assert.equal(provider.getActiveVersion(), 11); - }); - - it('activate next lower', function(): void { - provider.setActiveVersion(15, 'previous'); - assert.equal(provider.getActiveVersion(), 11); - provider.setActiveVersion(11, 'previous'); - assert.equal(provider.getActiveVersion(), 11); - provider.setActiveVersion(10.5, 'previous'); - assert.equal(provider.getActiveVersion(), 6); - provider.setActiveVersion(5, 'previous'); - assert.equal(provider.getActiveVersion(), 6); - }); - - it('activate next higher', function(): void { - provider.setActiveVersion(5, 'next'); - assert.equal(provider.getActiveVersion(), 6); - provider.setActiveVersion(6, 'next'); - assert.equal(provider.getActiveVersion(), 6); - provider.setActiveVersion(10.5, 'next'); - assert.equal(provider.getActiveVersion(), 11); - provider.setActiveVersion(15, 'next'); - assert.equal(provider.getActiveVersion(), 11); - }); - - it('activate exact', function(): void { - assert.throws(() => provider.setActiveVersion(5, 'exact')); - assert.throws(() => provider.setActiveVersion(7, 'exact')); - assert.throws(() => provider.setActiveVersion(10, 'exact')); - assert.throws(() => provider.setActiveVersion(12, 'exact')); - assert.throws(() => provider.setActiveVersion(200, 'exact')); - assert.doesNotThrow(() => provider.setActiveVersion(6, 'exact')); - assert.doesNotThrow(() => provider.setActiveVersion(11, 'exact')); - }); - - it('register/remove callback', function(): void { - let gotCalled = false; - const listener = (version: number, prov: UnicodeProvider) => { - assert.equal(version, 15); - assert.equal(prov, provider); - gotCalled = true; - }; - provider.addRegisterListener(listener); - UnicodeProvider.registerVersion(VERSION_DUMMY1); - assert.equal(gotCalled, true); - gotCalled = false; - provider.removeRegisterListener(listener); - UnicodeProvider.registerVersion(VERSION_DUMMY2); - assert.equal(gotCalled, false); - delete UnicodeProvider.versions[15]; - delete UnicodeProvider.versions[17]; - UnicodeProvider.removeAllRegisterListener(); - provider.dispose(); - }); - - it('unicode test', function(): void { - const data = '🔷🔷🔷🔷🔷'; - provider.setActiveVersion(6); - assert.equal(provider.getStringCellWidth(data), 5); - provider.setActiveVersion(11); - assert.equal(provider.getStringCellWidth(data), 10); - }); - }); -}); diff --git a/src/UnicodeProvider.ts b/src/UnicodeProvider.ts deleted file mode 100644 index 9add7cd31e..0000000000 --- a/src/UnicodeProvider.ts +++ /dev/null @@ -1,188 +0,0 @@ -/** - * Copyright (c) 2018 The xterm.js authors. All rights reserved. - * @license MIT - */ -import { IUnicodeImplementation, IUnicodeProvider } from './Types'; -import { v6 } from './unicode/v6'; -import { v11 } from './unicode/v11'; -import { Disposable } from './common/Lifecycle'; - -type RegisterCallback = [(version: number, provider: UnicodeProvider) => void, (version: number) => void]; - -/** - * Class to provide access to different unicode version implementations. - * - * The version related implementations are stored statically - * to avoid recreating them for every single instance. - * - * An instance of this class is meant to serve unicode specific implementations - * for a single terminal instance. This way multiple terminals can have - * different unicode settings active while still referring to the - * same underlying implementations. - */ -export class UnicodeProvider extends Disposable implements IUnicodeProvider { - public static versions: {[key: string]: IUnicodeImplementation} = {}; - private static _registerCallbacks: ((version: number) => void)[] = []; - - public static addRegisterListener(callback: (version: number) => void): void { - UnicodeProvider._registerCallbacks.push(callback); - } - - public static removeRegisterListener(callback: (version: number) => void): void { - const pos = UnicodeProvider._registerCallbacks.indexOf(callback); - if (pos !== -1) { - UnicodeProvider._registerCallbacks.splice(pos, 1); - } - } - - public static removeAllRegisterListener(): void { - UnicodeProvider._registerCallbacks = []; - } - - /** - * Register an unicode implementation. - * Possible entry point for unicode addons. - * In conjuction with `addRegisterListener` it can be used - * to load and use implementations lazy. - */ - public static registerVersion(impl: IUnicodeImplementation): void { - if (UnicodeProvider.versions[impl.version]) { - throw new Error(`unicode version "${impl.version}" already registered`); - } - UnicodeProvider.versions[impl.version] = impl; - UnicodeProvider._registerCallbacks.forEach(cb => cb(impl.version)); - } - - public static getRegisteredVersions(): number[] { - return Object.getOwnPropertyNames(UnicodeProvider.versions).map(parseFloat).sort((a, b) => a - b); - } - - private _version: number; - private _registerCallbacks: RegisterCallback[] = []; - public wcwidth: (ucs: number) => number; - - // defaults to the highest available version - constructor(version: number = 200) { - super(); - this.setActiveVersion(version); - } - - public dispose(): void { - this._registerCallbacks.forEach(el => UnicodeProvider.removeRegisterListener(el[1])); - this._registerCallbacks = null; - this.wcwidth = null; - } - - /** - * Callback to run when a version got registered. - * Gets the newly registered version and - * the `UnicodeProvider` instance as arguments. - */ - public addRegisterListener(callback: (version: number, provider: UnicodeProvider) => void): void { - const func: (version: number) => void = (version) => callback(version, this); - this._registerCallbacks.push([callback, func]); - UnicodeProvider.addRegisterListener(func); - } - - /** - * Remove register listener. - */ - public removeRegisterListener(callback: (version: number, provider: UnicodeProvider) => void): void { - let pos = -1; - for (let i = 0; i < this._registerCallbacks.length; ++i) { - if (this._registerCallbacks[i][0] === callback) { - pos = i; - break; - } - } - if (pos !== -1) { - UnicodeProvider.removeRegisterListener(this._registerCallbacks[pos][1]); - this._registerCallbacks.splice(pos, 1); - } - } - - /** - * Get a list of currently registered unicode versions. - */ - public getRegisteredVersions(): number[] { - return Object.getOwnPropertyNames(UnicodeProvider.versions).map(parseFloat).sort((a, b) => a - b); - } - - /** - * Get the currently active unicode version. - */ - public getActiveVersion(): number { - return this._version; - } - - /** - * Activate a registered unicode version. By default the closest version will be activated - * (can be higher or lower). Setting `mode` to 'next' tries to get at least that version, - * 'previous' tries to get the closest lower version. - * Unless there is no version registered this method will always succeed. - * Returns the activated version number. - */ - public setActiveVersion(version: number, mode?: 'exact' | 'closest' | 'next' | 'previous'): number { - if (!this.getRegisteredVersions().length) { - throw new Error('no unicode versions registered'); - } - - // find closest matching version - // Although not quite correct for typical versioning schemes 5.9 is treated closer to 6.0 than to 5.7. - // Typically we will not ship subversions so this approximation should be close enough. - const versions = this.getRegisteredVersions(); - const distances = versions.map(el => Math.abs(version - el)); - const closestIndex = distances.reduce((iMin, x, i, arr) => x < arr[iMin] ? i : iMin, 0); - let newVersion = versions[closestIndex]; - - if (mode === 'exact') { - // exact version match requested - if (version !== newVersion) { - throw new Error(`unicode version "${version}" not registered`); - } - } else { - // take the higher one if available - if (mode === 'next') { - if (newVersion < version && closestIndex < versions.length - 1) { - newVersion = versions[closestIndex + 1]; - } - // take the lower one if available - } else if (mode === 'previous') { - if (newVersion > version && closestIndex) { - newVersion = versions[closestIndex - 1]; - } - } - } - - // swap wcwidth impl - this.wcwidth = UnicodeProvider.versions[newVersion].wcwidth; - this._version = newVersion; - return this._version; - } - - /** - * Get the terminal cell width for a string. - */ - public getStringCellWidth(s: string): number { - let result = 0; - for (let i = 0; i < s.length; ++i) { - let code = s.charCodeAt(i); - if (0xD800 <= code && code <= 0xDBFF) { - const low = s.charCodeAt(i + 1); - if (isNaN(low)) { - return result; - } - code = ((code - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000; - } - if (0xDC00 <= code && code <= 0xDFFF) { - continue; - } - result += this.wcwidth(code); - } - return result; - } -} - -// register statically shipped versions -UnicodeProvider.registerVersion(v6); -UnicodeProvider.registerVersion(v11); diff --git a/src/unicode/v11.ts b/src/unicode/v11.ts index dd710a93c3..1b9c02dcaf 100644 --- a/src/unicode/v11.ts +++ b/src/unicode/v11.ts @@ -2,9 +2,9 @@ * Copyright (c) 2018 The xterm.js authors. All rights reserved. * @license MIT */ -import { IUnicodeImplementation } from '../Types'; +import { IUnicodeVersionProvider } from '../Types'; -export const v11: IUnicodeImplementation = { +export const v11: IUnicodeVersionProvider = { version: 11, wcwidth: (function(opts: {nul: number, control: number}): (ucs: number) => number { // Generated: 2018-09-24T16:45:44.483077 diff --git a/src/unicode/v6.ts b/src/unicode/v6.ts index c440467482..3bf775079f 100644 --- a/src/unicode/v6.ts +++ b/src/unicode/v6.ts @@ -2,9 +2,9 @@ * Copyright (c) 2018 The xterm.js authors. All rights reserved. * @license MIT */ -import { IUnicodeImplementation } from '../Types'; +import { IUnicodeVersionProvider } from '../Types'; -export const v6: IUnicodeImplementation = { +export const v6: IUnicodeVersionProvider = { version: 6, wcwidth: (function(opts: {nul: number, control: number}): (ucs: number) => number { // extracted from https://www.cl.cam.ac.uk/%7Emgk25/ucs/wcwidth.c diff --git a/src/utils/TestUtils.test.ts b/src/utils/TestUtils.test.ts index 59a385ed05..ed08ae5969 100644 --- a/src/utils/TestUtils.test.ts +++ b/src/utils/TestUtils.test.ts @@ -4,13 +4,13 @@ */ import { IColorSet, IRenderer, IRenderDimensions, IColorManager } from '../renderer/Types'; -import { IInputHandlingTerminal, IViewport, ICompositionHelper, ITerminal, IBuffer, IBufferSet, IBrowser, ICharMeasure, ISelectionManager, ITerminalOptions, ILinkifier, IMouseHelper, ILinkMatcherOptions, CharacterJoinerHandler, IBufferLine, IBufferStringIterator, IUnicodeProvider } from '../Types'; +import { IInputHandlingTerminal, IViewport, ICompositionHelper, ITerminal, IBuffer, IBufferSet, IBrowser, ICharMeasure, ISelectionManager, ITerminalOptions, ILinkifier, IMouseHelper, ILinkMatcherOptions, CharacterJoinerHandler, IBufferLine, IBufferStringIterator, IUnicodeVersionManager } from '../Types'; import { ICircularList, XtermListener } from '../common/Types'; import { Buffer } from '../Buffer'; import * as Browser from '../shared/utils/Browser'; import { ITheme, IDisposable, IMarker } from 'xterm'; import { Terminal } from '../Terminal'; -import { UnicodeProvider } from '../UnicodeProvider'; +import { UnicodeVersionManager } from '../UnicodeManager'; export class TestTerminal extends Terminal { writeSync(data: string): void { @@ -157,7 +157,7 @@ export class MockTerminal implements ITerminal { } registerCharacterJoiner(handler: CharacterJoinerHandler): number { return 0; } deregisterCharacterJoiner(joinerId: number): void { } - unicodeProvider: IUnicodeProvider = new UnicodeProvider(); + unicodeManager: IUnicodeVersionManager = new UnicodeVersionManager(); } export class MockCharMeasure implements ICharMeasure { @@ -199,7 +199,7 @@ export class MockInputHandlingTerminal implements IInputHandlingTerminal { buffer: IBuffer = new MockBuffer(); viewport: IViewport; selectionManager: ISelectionManager; - unicodeProvider: IUnicodeProvider; + unicodeManager: IUnicodeVersionManager; focus(): void { throw new Error('Method not implemented.'); } diff --git a/typings/xterm.d.ts b/typings/xterm.d.ts index c6b6b1e59f..52d20dd7a3 100644 --- a/typings/xterm.d.ts +++ b/typings/xterm.d.ts @@ -199,6 +199,12 @@ declare module 'xterm' { * The color theme of the terminal. */ theme?: ITheme; + + /** + * Set the unicode version. + * Defaults to the highest available version. + */ + unicodeVersion?: string | number; } /** From 30734f010a70296d56671a8f8fe0f2f26392ae15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Breitbart?= Date: Thu, 8 Nov 2018 08:37:50 +0100 Subject: [PATCH 5/7] cleanup --- src/Types.ts | 3 ++- src/UnicodeManager.ts | 9 ++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Types.ts b/src/Types.ts index a13dd17fad..ee6fe20793 100644 --- a/src/Types.ts +++ b/src/Types.ts @@ -541,7 +541,8 @@ export interface IUnicodeVersionProvider { } export interface IUnicodeVersionManager { - addRegisterListener(callback: (version: number, provider: IUnicodeVersionManager) => void): void; + addRegisterListener(callback: (version: number, manager: IUnicodeVersionManager) => void): void; + removeRegisterListener(callback: (version: number, provider: IUnicodeVersionManager) => void): void; registeredVersions: number[]; activeVersion: number; wcwidth(ucs: number): number; diff --git a/src/UnicodeManager.ts b/src/UnicodeManager.ts index f254d0307c..0d288b5f03 100644 --- a/src/UnicodeManager.ts +++ b/src/UnicodeManager.ts @@ -40,10 +40,10 @@ export class UnicodeVersionManager extends Disposable implements IUnicodeVersion } /** - * Register an unicode implementation. + * Register an unicode version. * Possible entry point for unicode addons. * In conjuction with `addRegisterListener` it can be used - * to load and use implementations lazy. + * to load and use versions lazy. */ public static registerVersion(impl: IUnicodeVersionProvider): void { if (UnicodeVersionManager.versions[impl.version]) { @@ -79,7 +79,7 @@ export class UnicodeVersionManager extends Disposable implements IUnicodeVersion * Gets the newly registered version and * the `UnicodeProvider` instance as arguments. */ - public addRegisterListener(callback: (version: number, provider: UnicodeVersionManager) => void): void { + public addRegisterListener(callback: (version: number, manager: IUnicodeVersionManager) => void): void { const func: (version: number) => void = (version) => callback(version, this); this._registerCallbacks.push([callback, func]); UnicodeVersionManager.addRegisterListener(func); @@ -88,7 +88,7 @@ export class UnicodeVersionManager extends Disposable implements IUnicodeVersion /** * Remove register listener. */ - public removeRegisterListener(callback: (version: number, provider: UnicodeVersionManager) => void): void { + public removeRegisterListener(callback: (version: number, manager: IUnicodeVersionManager) => void): void { let pos = -1; for (let i = 0; i < this._registerCallbacks.length; ++i) { if (this._registerCallbacks[i][0] === callback) { @@ -123,7 +123,6 @@ export class UnicodeVersionManager extends Disposable implements IUnicodeVersion if (!this.registeredVersions.length || !UnicodeVersionManager.versions[version]) { throw new Error(`unicode version "${version}" not registered`); } - // swap wcwidth impl this.wcwidth = UnicodeVersionManager.versions[version].wcwidth; this._version = version; From 8306da6e84e39770cec1aec5a19294d636c40d40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Breitbart?= Date: Sat, 24 Nov 2018 02:01:40 +0100 Subject: [PATCH 6/7] apply new wcwidth logic to versions; init lookup table on activate --- src/Types.ts | 1 + src/UnicodeManager.ts | 3 +- src/unicode/v11.test.ts | 566 +++++++++++++++++++++ src/unicode/v11.ts | 1067 +++++++++++++++++++-------------------- src/unicode/v6.test.ts | 186 +++++++ src/unicode/v6.ts | 295 +++++------ 6 files changed, 1418 insertions(+), 700 deletions(-) create mode 100644 src/unicode/v11.test.ts create mode 100644 src/unicode/v6.test.ts diff --git a/src/Types.ts b/src/Types.ts index 01fef1e57b..638ddfc9b1 100644 --- a/src/Types.ts +++ b/src/Types.ts @@ -537,6 +537,7 @@ export interface IBufferLineConstructor { */ export interface IUnicodeVersionProvider { version: number; + init(): void; wcwidth(ucs: number): number; } diff --git a/src/UnicodeManager.ts b/src/UnicodeManager.ts index 0d288b5f03..8aef8f10f1 100644 --- a/src/UnicodeManager.ts +++ b/src/UnicodeManager.ts @@ -123,7 +123,8 @@ export class UnicodeVersionManager extends Disposable implements IUnicodeVersion if (!this.registeredVersions.length || !UnicodeVersionManager.versions[version]) { throw new Error(`unicode version "${version}" not registered`); } - // swap wcwidth impl + // init lookup table and swap wcwidth impl + UnicodeVersionManager.versions[version].init(); this.wcwidth = UnicodeVersionManager.versions[version].wcwidth; this._version = version; } diff --git a/src/unicode/v11.test.ts b/src/unicode/v11.test.ts new file mode 100644 index 0000000000..ddb04b6909 --- /dev/null +++ b/src/unicode/v11.test.ts @@ -0,0 +1,566 @@ +/** + * Copyright (c) 2018 The xterm.js authors. All rights reserved. + * @license MIT + */ + +import { assert } from 'chai'; +import { v11 } from './v11'; + +// old implementation +const wcwidthOld = (function(opts: {nul: number, control: number}): (ucs: number) => number { + // Generated: 2018-09-24T16:45:44.483077 + // Source: DerivedGeneralCategory-11.0.0.txt + // Date: 2018-02-21, 05:34:04 GMT + const ZERO_WIDTH = [ + [0x0300, 0x036f], // Combining Grave Accent ..Combining Latin Small Le + [0x0483, 0x0489], // Combining Cyrillic Titlo..Combining Cyrillic Milli + [0x0591, 0x05bd], // Hebrew Accent Etnahta ..Hebrew Point Meteg + [0x05bf, 0x05bf], // Hebrew Point Rafe ..Hebrew Point Rafe + [0x05c1, 0x05c2], // Hebrew Point Shin Dot ..Hebrew Point Sin Dot + [0x05c4, 0x05c5], // Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot + [0x05c7, 0x05c7], // Hebrew Point Qamats Qata..Hebrew Point Qamats Qata + [0x0610, 0x061a], // Arabic Sign Sallallahou ..Arabic Small Kasra + [0x064b, 0x065f], // Arabic Fathatan .. + [0x0670, 0x0670], // Arabic Letter Superscrip..Arabic Letter Superscrip + [0x06d6, 0x06dc], // Arabic Small High Ligatu..Arabic Small High Seen + [0x06df, 0x06e4], // Arabic Small High Rounde..Arabic Small High Madda + [0x06e7, 0x06e8], // Arabic Small High Yeh ..Arabic Small High Noon + [0x06ea, 0x06ed], // Arabic Empty Centre Low ..Arabic Small Low Meem + [0x0711, 0x0711], // Syriac Letter Superscrip..Syriac Letter Superscrip + [0x0730, 0x074a], // Syriac Pthaha Above ..Syriac Barrekh + [0x07a6, 0x07b0], // Thaana Abafili ..Thaana Sukun + [0x07eb, 0x07f3], // Nko Combining Short High..Nko Combining Double Dot + [0x07fd, 0x07fd], // (nil) .. + [0x0816, 0x0819], // Samaritan Mark In ..Samaritan Mark Dagesh + [0x081b, 0x0823], // Samaritan Mark Epentheti..Samaritan Vowel Sign A + [0x0825, 0x0827], // Samaritan Vowel Sign Sho..Samaritan Vowel Sign U + [0x0829, 0x082d], // Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa + [0x0859, 0x085b], // (nil) .. + [0x08d3, 0x08e1], // (nil) .. + [0x08e3, 0x0902], // (nil) ..Devanagari Sign Anusvara + [0x093a, 0x093a], // (nil) .. + [0x093c, 0x093c], // Devanagari Sign Nukta ..Devanagari Sign Nukta + [0x0941, 0x0948], // Devanagari Vowel Sign U ..Devanagari Vowel Sign Ai + [0x094d, 0x094d], // Devanagari Sign Virama ..Devanagari Sign Virama + [0x0951, 0x0957], // Devanagari Stress Sign U.. + [0x0962, 0x0963], // Devanagari Vowel Sign Vo..Devanagari Vowel Sign Vo + [0x0981, 0x0981], // Bengali Sign Candrabindu..Bengali Sign Candrabindu + [0x09bc, 0x09bc], // Bengali Sign Nukta ..Bengali Sign Nukta + [0x09c1, 0x09c4], // Bengali Vowel Sign U ..Bengali Vowel Sign Vocal + [0x09cd, 0x09cd], // Bengali Sign Virama ..Bengali Sign Virama + [0x09e2, 0x09e3], // Bengali Vowel Sign Vocal..Bengali Vowel Sign Vocal + [0x09fe, 0x09fe], // (nil) .. + [0x0a01, 0x0a02], // Gurmukhi Sign Adak Bindi..Gurmukhi Sign Bindi + [0x0a3c, 0x0a3c], // Gurmukhi Sign Nukta ..Gurmukhi Sign Nukta + [0x0a41, 0x0a42], // Gurmukhi Vowel Sign U ..Gurmukhi Vowel Sign Uu + [0x0a47, 0x0a48], // Gurmukhi Vowel Sign Ee ..Gurmukhi Vowel Sign Ai + [0x0a4b, 0x0a4d], // Gurmukhi Vowel Sign Oo ..Gurmukhi Sign Virama + [0x0a51, 0x0a51], // Gurmukhi Sign Udaat ..Gurmukhi Sign Udaat + [0x0a70, 0x0a71], // Gurmukhi Tippi ..Gurmukhi Addak + [0x0a75, 0x0a75], // Gurmukhi Sign Yakash ..Gurmukhi Sign Yakash + [0x0a81, 0x0a82], // Gujarati Sign Candrabind..Gujarati Sign Anusvara + [0x0abc, 0x0abc], // Gujarati Sign Nukta ..Gujarati Sign Nukta + [0x0ac1, 0x0ac5], // Gujarati Vowel Sign U ..Gujarati Vowel Sign Cand + [0x0ac7, 0x0ac8], // Gujarati Vowel Sign E ..Gujarati Vowel Sign Ai + [0x0acd, 0x0acd], // Gujarati Sign Virama ..Gujarati Sign Virama + [0x0ae2, 0x0ae3], // Gujarati Vowel Sign Voca..Gujarati Vowel Sign Voca + [0x0afa, 0x0aff], // (nil) .. + [0x0b01, 0x0b01], // Oriya Sign Candrabindu ..Oriya Sign Candrabindu + [0x0b3c, 0x0b3c], // Oriya Sign Nukta ..Oriya Sign Nukta + [0x0b3f, 0x0b3f], // Oriya Vowel Sign I ..Oriya Vowel Sign I + [0x0b41, 0x0b44], // Oriya Vowel Sign U ..Oriya Vowel Sign Vocalic + [0x0b4d, 0x0b4d], // Oriya Sign Virama ..Oriya Sign Virama + [0x0b56, 0x0b56], // Oriya Ai Length Mark ..Oriya Ai Length Mark + [0x0b62, 0x0b63], // Oriya Vowel Sign Vocalic..Oriya Vowel Sign Vocalic + [0x0b82, 0x0b82], // Tamil Sign Anusvara ..Tamil Sign Anusvara + [0x0bc0, 0x0bc0], // Tamil Vowel Sign Ii ..Tamil Vowel Sign Ii + [0x0bcd, 0x0bcd], // Tamil Sign Virama ..Tamil Sign Virama + [0x0c00, 0x0c00], // (nil) .. + [0x0c04, 0x0c04], // (nil) .. + [0x0c3e, 0x0c40], // Telugu Vowel Sign Aa ..Telugu Vowel Sign Ii + [0x0c46, 0x0c48], // Telugu Vowel Sign E ..Telugu Vowel Sign Ai + [0x0c4a, 0x0c4d], // Telugu Vowel Sign O ..Telugu Sign Virama + [0x0c55, 0x0c56], // Telugu Length Mark ..Telugu Ai Length Mark + [0x0c62, 0x0c63], // Telugu Vowel Sign Vocali..Telugu Vowel Sign Vocali + [0x0c81, 0x0c81], // (nil) .. + [0x0cbc, 0x0cbc], // Kannada Sign Nukta ..Kannada Sign Nukta + [0x0cbf, 0x0cbf], // Kannada Vowel Sign I ..Kannada Vowel Sign I + [0x0cc6, 0x0cc6], // Kannada Vowel Sign E ..Kannada Vowel Sign E + [0x0ccc, 0x0ccd], // Kannada Vowel Sign Au ..Kannada Sign Virama + [0x0ce2, 0x0ce3], // Kannada Vowel Sign Vocal..Kannada Vowel Sign Vocal + [0x0d00, 0x0d01], // (nil) .. + [0x0d3b, 0x0d3c], // (nil) .. + [0x0d41, 0x0d44], // Malayalam Vowel Sign U ..Malayalam Vowel Sign Voc + [0x0d4d, 0x0d4d], // Malayalam Sign Virama ..Malayalam Sign Virama + [0x0d62, 0x0d63], // Malayalam Vowel Sign Voc..Malayalam Vowel Sign Voc + [0x0dca, 0x0dca], // Sinhala Sign Al-lakuna ..Sinhala Sign Al-lakuna + [0x0dd2, 0x0dd4], // Sinhala Vowel Sign Ketti..Sinhala Vowel Sign Ketti + [0x0dd6, 0x0dd6], // Sinhala Vowel Sign Diga ..Sinhala Vowel Sign Diga + [0x0e31, 0x0e31], // Thai Character Mai Han-a..Thai Character Mai Han-a + [0x0e34, 0x0e3a], // Thai Character Sara I ..Thai Character Phinthu + [0x0e47, 0x0e4e], // Thai Character Maitaikhu..Thai Character Yamakkan + [0x0eb1, 0x0eb1], // Lao Vowel Sign Mai Kan ..Lao Vowel Sign Mai Kan + [0x0eb4, 0x0eb9], // Lao Vowel Sign I ..Lao Vowel Sign Uu + [0x0ebb, 0x0ebc], // Lao Vowel Sign Mai Kon ..Lao Semivowel Sign Lo + [0x0ec8, 0x0ecd], // Lao Tone Mai Ek ..Lao Niggahita + [0x0f18, 0x0f19], // Tibetan Astrological Sig..Tibetan Astrological Sig + [0x0f35, 0x0f35], // Tibetan Mark Ngas Bzung ..Tibetan Mark Ngas Bzung + [0x0f37, 0x0f37], // Tibetan Mark Ngas Bzung ..Tibetan Mark Ngas Bzung + [0x0f39, 0x0f39], // Tibetan Mark Tsa -phru ..Tibetan Mark Tsa -phru + [0x0f71, 0x0f7e], // Tibetan Vowel Sign Aa ..Tibetan Sign Rjes Su Nga + [0x0f80, 0x0f84], // Tibetan Vowel Sign Rever..Tibetan Mark Halanta + [0x0f86, 0x0f87], // Tibetan Sign Lci Rtags ..Tibetan Sign Yang Rtags + [0x0f8d, 0x0f97], // (nil) ..Tibetan Subjoined Letter + [0x0f99, 0x0fbc], // Tibetan Subjoined Letter..Tibetan Subjoined Letter + [0x0fc6, 0x0fc6], // Tibetan Symbol Padma Gda..Tibetan Symbol Padma Gda + [0x102d, 0x1030], // Myanmar Vowel Sign I ..Myanmar Vowel Sign Uu + [0x1032, 0x1037], // Myanmar Vowel Sign Ai ..Myanmar Sign Dot Below + [0x1039, 0x103a], // Myanmar Sign Virama ..Myanmar Sign Asat + [0x103d, 0x103e], // Myanmar Consonant Sign M..Myanmar Consonant Sign M + [0x1058, 0x1059], // Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal + [0x105e, 0x1060], // Myanmar Consonant Sign M..Myanmar Consonant Sign M + [0x1071, 0x1074], // Myanmar Vowel Sign Geba ..Myanmar Vowel Sign Kayah + [0x1082, 0x1082], // Myanmar Consonant Sign S..Myanmar Consonant Sign S + [0x1085, 0x1086], // Myanmar Vowel Sign Shan ..Myanmar Vowel Sign Shan + [0x108d, 0x108d], // Myanmar Sign Shan Counci..Myanmar Sign Shan Counci + [0x109d, 0x109d], // Myanmar Vowel Sign Aiton..Myanmar Vowel Sign Aiton + [0x135d, 0x135f], // (nil) ..Ethiopic Combining Gemin + [0x1712, 0x1714], // Tagalog Vowel Sign I ..Tagalog Sign Virama + [0x1732, 0x1734], // Hanunoo Vowel Sign I ..Hanunoo Sign Pamudpod + [0x1752, 0x1753], // Buhid Vowel Sign I ..Buhid Vowel Sign U + [0x1772, 0x1773], // Tagbanwa Vowel Sign I ..Tagbanwa Vowel Sign U + [0x17b4, 0x17b5], // Khmer Vowel Inherent Aq ..Khmer Vowel Inherent Aa + [0x17b7, 0x17bd], // Khmer Vowel Sign I ..Khmer Vowel Sign Ua + [0x17c6, 0x17c6], // Khmer Sign Nikahit ..Khmer Sign Nikahit + [0x17c9, 0x17d3], // Khmer Sign Muusikatoan ..Khmer Sign Bathamasat + [0x17dd, 0x17dd], // Khmer Sign Atthacan ..Khmer Sign Atthacan + [0x180b, 0x180d], // Mongolian Free Variation..Mongolian Free Variation + [0x1885, 0x1886], // Mongolian Letter Ali Gal..Mongolian Letter Ali Gal + [0x18a9, 0x18a9], // Mongolian Letter Ali Gal..Mongolian Letter Ali Gal + [0x1920, 0x1922], // Limbu Vowel Sign A ..Limbu Vowel Sign U + [0x1927, 0x1928], // Limbu Vowel Sign E ..Limbu Vowel Sign O + [0x1932, 0x1932], // Limbu Small Letter Anusv..Limbu Small Letter Anusv + [0x1939, 0x193b], // Limbu Sign Mukphreng ..Limbu Sign Sa-i + [0x1a17, 0x1a18], // Buginese Vowel Sign I ..Buginese Vowel Sign U + [0x1a1b, 0x1a1b], // Buginese Vowel Sign Ae ..Buginese Vowel Sign Ae + [0x1a56, 0x1a56], // Tai Tham Consonant Sign ..Tai Tham Consonant Sign + [0x1a58, 0x1a5e], // Tai Tham Sign Mai Kang L..Tai Tham Consonant Sign + [0x1a60, 0x1a60], // Tai Tham Sign Sakot ..Tai Tham Sign Sakot + [0x1a62, 0x1a62], // Tai Tham Vowel Sign Mai ..Tai Tham Vowel Sign Mai + [0x1a65, 0x1a6c], // Tai Tham Vowel Sign I ..Tai Tham Vowel Sign Oa B + [0x1a73, 0x1a7c], // Tai Tham Vowel Sign Oa A..Tai Tham Sign Khuen-lue + [0x1a7f, 0x1a7f], // Tai Tham Combining Crypt..Tai Tham Combining Crypt + [0x1ab0, 0x1abe], // (nil) .. + [0x1b00, 0x1b03], // Balinese Sign Ulu Ricem ..Balinese Sign Surang + [0x1b34, 0x1b34], // Balinese Sign Rerekan ..Balinese Sign Rerekan + [0x1b36, 0x1b3a], // Balinese Vowel Sign Ulu ..Balinese Vowel Sign Ra R + [0x1b3c, 0x1b3c], // Balinese Vowel Sign La L..Balinese Vowel Sign La L + [0x1b42, 0x1b42], // Balinese Vowel Sign Pepe..Balinese Vowel Sign Pepe + [0x1b6b, 0x1b73], // Balinese Musical Symbol ..Balinese Musical Symbol + [0x1b80, 0x1b81], // Sundanese Sign Panyecek ..Sundanese Sign Panglayar + [0x1ba2, 0x1ba5], // Sundanese Consonant Sign..Sundanese Vowel Sign Pan + [0x1ba8, 0x1ba9], // Sundanese Vowel Sign Pam..Sundanese Vowel Sign Pan + [0x1bab, 0x1bad], // (nil) .. + [0x1be6, 0x1be6], // (nil) .. + [0x1be8, 0x1be9], // (nil) .. + [0x1bed, 0x1bed], // (nil) .. + [0x1bef, 0x1bf1], // (nil) .. + [0x1c2c, 0x1c33], // Lepcha Vowel Sign E ..Lepcha Consonant Sign T + [0x1c36, 0x1c37], // Lepcha Sign Ran ..Lepcha Sign Nukta + [0x1cd0, 0x1cd2], // Vedic Tone Karshana ..Vedic Tone Prenkha + [0x1cd4, 0x1ce0], // Vedic Sign Yajurvedic Mi..Vedic Tone Rigvedic Kash + [0x1ce2, 0x1ce8], // Vedic Sign Visarga Svari..Vedic Sign Visarga Anuda + [0x1ced, 0x1ced], // Vedic Sign Tiryak ..Vedic Sign Tiryak + [0x1cf4, 0x1cf4], // (nil) .. + [0x1cf8, 0x1cf9], // (nil) .. + [0x1dc0, 0x1df9], // Combining Dotted Grave A.. + [0x1dfb, 0x1dff], // (nil) ..Combining Right Arrowhea + [0x20d0, 0x20f0], // Combining Left Harpoon A..Combining Asterisk Above + [0x2cef, 0x2cf1], // Coptic Combining Ni Abov..Coptic Combining Spiritu + [0x2d7f, 0x2d7f], // (nil) .. + [0x2de0, 0x2dff], // Combining Cyrillic Lette..Combining Cyrillic Lette + [0x302a, 0x302d], // Ideographic Level Tone M..Ideographic Entering Ton + [0x3099, 0x309a], // Combining Katakana-hirag..Combining Katakana-hirag + [0xa66f, 0xa672], // Combining Cyrillic Vzmet..Combining Cyrillic Thous + [0xa674, 0xa67d], // (nil) ..Combining Cyrillic Payer + [0xa69e, 0xa69f], // (nil) .. + [0xa6f0, 0xa6f1], // Bamum Combining Mark Koq..Bamum Combining Mark Tuk + [0xa802, 0xa802], // Syloti Nagri Sign Dvisva..Syloti Nagri Sign Dvisva + [0xa806, 0xa806], // Syloti Nagri Sign Hasant..Syloti Nagri Sign Hasant + [0xa80b, 0xa80b], // Syloti Nagri Sign Anusva..Syloti Nagri Sign Anusva + [0xa825, 0xa826], // Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign + [0xa8c4, 0xa8c5], // Saurashtra Sign Virama .. + [0xa8e0, 0xa8f1], // Combining Devanagari Dig..Combining Devanagari Sig + [0xa8ff, 0xa8ff], // (nil) .. + [0xa926, 0xa92d], // Kayah Li Vowel Ue ..Kayah Li Tone Calya Plop + [0xa947, 0xa951], // Rejang Vowel Sign I ..Rejang Consonant Sign R + [0xa980, 0xa982], // Javanese Sign Panyangga ..Javanese Sign Layar + [0xa9b3, 0xa9b3], // Javanese Sign Cecak Telu..Javanese Sign Cecak Telu + [0xa9b6, 0xa9b9], // Javanese Vowel Sign Wulu..Javanese Vowel Sign Suku + [0xa9bc, 0xa9bc], // Javanese Vowel Sign Pepe..Javanese Vowel Sign Pepe + [0xa9e5, 0xa9e5], // (nil) .. + [0xaa29, 0xaa2e], // Cham Vowel Sign Aa ..Cham Vowel Sign Oe + [0xaa31, 0xaa32], // Cham Vowel Sign Au ..Cham Vowel Sign Ue + [0xaa35, 0xaa36], // Cham Consonant Sign La ..Cham Consonant Sign Wa + [0xaa43, 0xaa43], // Cham Consonant Sign Fina..Cham Consonant Sign Fina + [0xaa4c, 0xaa4c], // Cham Consonant Sign Fina..Cham Consonant Sign Fina + [0xaa7c, 0xaa7c], // (nil) .. + [0xaab0, 0xaab0], // Tai Viet Mai Kang ..Tai Viet Mai Kang + [0xaab2, 0xaab4], // Tai Viet Vowel I ..Tai Viet Vowel U + [0xaab7, 0xaab8], // Tai Viet Mai Khit ..Tai Viet Vowel Ia + [0xaabe, 0xaabf], // Tai Viet Vowel Am ..Tai Viet Tone Mai Ek + [0xaac1, 0xaac1], // Tai Viet Tone Mai Tho ..Tai Viet Tone Mai Tho + [0xaaec, 0xaaed], // (nil) .. + [0xaaf6, 0xaaf6], // (nil) .. + [0xabe5, 0xabe5], // Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + [0xabe8, 0xabe8], // Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + [0xabed, 0xabed], // Meetei Mayek Apun Iyek ..Meetei Mayek Apun Iyek + [0xfb1e, 0xfb1e], // Hebrew Point Judeo-spani..Hebrew Point Judeo-spani + [0xfe00, 0xfe0f], // Variation Selector-1 ..Variation Selector-16 + [0xfe20, 0xfe2f], // Combining Ligature Left .. + [0x101fd, 0x101fd], // Phaistos Disc Sign Combi..Phaistos Disc Sign Combi + [0x102e0, 0x102e0], // (nil) .. + [0x10376, 0x1037a], // (nil) .. + [0x10a01, 0x10a03], // Kharoshthi Vowel Sign I ..Kharoshthi Vowel Sign Vo + [0x10a05, 0x10a06], // Kharoshthi Vowel Sign E ..Kharoshthi Vowel Sign O + [0x10a0c, 0x10a0f], // Kharoshthi Vowel Length ..Kharoshthi Sign Visarga + [0x10a38, 0x10a3a], // Kharoshthi Sign Bar Abov..Kharoshthi Sign Dot Belo + [0x10a3f, 0x10a3f], // Kharoshthi Virama ..Kharoshthi Virama + [0x10ae5, 0x10ae6], // (nil) .. + [0x10d24, 0x10d27], // (nil) .. + [0x10f46, 0x10f50], // (nil) .. + [0x11001, 0x11001], // (nil) .. + [0x11038, 0x11046], // (nil) .. + [0x1107f, 0x11081], // (nil) ..Kaithi Sign Anusvara + [0x110b3, 0x110b6], // Kaithi Vowel Sign U ..Kaithi Vowel Sign Ai + [0x110b9, 0x110ba], // Kaithi Sign Virama ..Kaithi Sign Nukta + [0x11100, 0x11102], // (nil) .. + [0x11127, 0x1112b], // (nil) .. + [0x1112d, 0x11134], // (nil) .. + [0x11173, 0x11173], // (nil) .. + [0x11180, 0x11181], // (nil) .. + [0x111b6, 0x111be], // (nil) .. + [0x111c9, 0x111cc], // (nil) .. + [0x1122f, 0x11231], // (nil) .. + [0x11234, 0x11234], // (nil) .. + [0x11236, 0x11237], // (nil) .. + [0x1123e, 0x1123e], // (nil) .. + [0x112df, 0x112df], // (nil) .. + [0x112e3, 0x112ea], // (nil) .. + [0x11300, 0x11301], // (nil) .. + [0x1133b, 0x1133c], // (nil) .. + [0x11340, 0x11340], // (nil) .. + [0x11366, 0x1136c], // (nil) .. + [0x11370, 0x11374], // (nil) .. + [0x11438, 0x1143f], // (nil) .. + [0x11442, 0x11444], // (nil) .. + [0x11446, 0x11446], // (nil) .. + [0x1145e, 0x1145e], // (nil) .. + [0x114b3, 0x114b8], // (nil) .. + [0x114ba, 0x114ba], // (nil) .. + [0x114bf, 0x114c0], // (nil) .. + [0x114c2, 0x114c3], // (nil) .. + [0x115b2, 0x115b5], // (nil) .. + [0x115bc, 0x115bd], // (nil) .. + [0x115bf, 0x115c0], // (nil) .. + [0x115dc, 0x115dd], // (nil) .. + [0x11633, 0x1163a], // (nil) .. + [0x1163d, 0x1163d], // (nil) .. + [0x1163f, 0x11640], // (nil) .. + [0x116ab, 0x116ab], // (nil) .. + [0x116ad, 0x116ad], // (nil) .. + [0x116b0, 0x116b5], // (nil) .. + [0x116b7, 0x116b7], // (nil) .. + [0x1171d, 0x1171f], // (nil) .. + [0x11722, 0x11725], // (nil) .. + [0x11727, 0x1172b], // (nil) .. + [0x1182f, 0x11837], // (nil) .. + [0x11839, 0x1183a], // (nil) .. + [0x11a01, 0x11a0a], // (nil) .. + [0x11a33, 0x11a38], // (nil) .. + [0x11a3b, 0x11a3e], // (nil) .. + [0x11a47, 0x11a47], // (nil) .. + [0x11a51, 0x11a56], // (nil) .. + [0x11a59, 0x11a5b], // (nil) .. + [0x11a8a, 0x11a96], // (nil) .. + [0x11a98, 0x11a99], // (nil) .. + [0x11c30, 0x11c36], // (nil) .. + [0x11c38, 0x11c3d], // (nil) .. + [0x11c3f, 0x11c3f], // (nil) .. + [0x11c92, 0x11ca7], // (nil) .. + [0x11caa, 0x11cb0], // (nil) .. + [0x11cb2, 0x11cb3], // (nil) .. + [0x11cb5, 0x11cb6], // (nil) .. + [0x11d31, 0x11d36], // (nil) .. + [0x11d3a, 0x11d3a], // (nil) .. + [0x11d3c, 0x11d3d], // (nil) .. + [0x11d3f, 0x11d45], // (nil) .. + [0x11d47, 0x11d47], // (nil) .. + [0x11d90, 0x11d91], // (nil) .. + [0x11d95, 0x11d95], // (nil) .. + [0x11d97, 0x11d97], // (nil) .. + [0x11ef3, 0x11ef4], // (nil) .. + [0x16af0, 0x16af4], // (nil) .. + [0x16b30, 0x16b36], // (nil) .. + [0x16f8f, 0x16f92], // (nil) .. + [0x1bc9d, 0x1bc9e], // (nil) .. + [0x1d167, 0x1d169], // Musical Symbol Combining..Musical Symbol Combining + [0x1d17b, 0x1d182], // Musical Symbol Combining..Musical Symbol Combining + [0x1d185, 0x1d18b], // Musical Symbol Combining..Musical Symbol Combining + [0x1d1aa, 0x1d1ad], // Musical Symbol Combining..Musical Symbol Combining + [0x1d242, 0x1d244], // Combining Greek Musical ..Combining Greek Musical + [0x1da00, 0x1da36], // (nil) .. + [0x1da3b, 0x1da6c], // (nil) .. + [0x1da75, 0x1da75], // (nil) .. + [0x1da84, 0x1da84], // (nil) .. + [0x1da9b, 0x1da9f], // (nil) .. + [0x1daa1, 0x1daaf], // (nil) .. + [0x1e000, 0x1e006], // (nil) .. + [0x1e008, 0x1e018], // (nil) .. + [0x1e01b, 0x1e021], // (nil) .. + [0x1e023, 0x1e024], // (nil) .. + [0x1e026, 0x1e02a], // (nil) .. + [0x1e8d0, 0x1e8d6], // (nil) .. + [0x1e944, 0x1e94a], // (nil) .. + [0xe0100, 0xe01ef] // Variation Selector-17 ..Variation Selector-256 + ]; + + // Generated: 2018-09-24T16:45:44.464578 + // Source: EastAsianWidth-11.0.0.txt + // Date: 2018-05-14, 09:41:59 GMT [KW, LI] + const WIDE_EASTASIAN = [ + [0x1100, 0x115f], // Hangul Choseong Kiyeok ..Hangul Choseong Filler + [0x231a, 0x231b], // Watch ..Hourglass + [0x2329, 0x232a], // Left-pointing Angle Brac..Right-pointing Angle Bra + [0x23e9, 0x23ec], // (nil) .. + [0x23f0, 0x23f0], // (nil) .. + [0x23f3, 0x23f3], // (nil) .. + [0x25fd, 0x25fe], // White Medium Small Squar..Black Medium Small Squar + [0x2614, 0x2615], // Umbrella With Rain Drops..Hot Beverage + [0x2648, 0x2653], // Aries ..Pisces + [0x267f, 0x267f], // Wheelchair Symbol ..Wheelchair Symbol + [0x2693, 0x2693], // Anchor ..Anchor + [0x26a1, 0x26a1], // High Voltage Sign ..High Voltage Sign + [0x26aa, 0x26ab], // Medium White Circle ..Medium Black Circle + [0x26bd, 0x26be], // Soccer Ball ..Baseball + [0x26c4, 0x26c5], // Snowman Without Snow ..Sun Behind Cloud + [0x26ce, 0x26ce], // (nil) .. + [0x26d4, 0x26d4], // No Entry ..No Entry + [0x26ea, 0x26ea], // Church ..Church + [0x26f2, 0x26f3], // Fountain ..Flag In Hole + [0x26f5, 0x26f5], // Sailboat ..Sailboat + [0x26fa, 0x26fa], // Tent ..Tent + [0x26fd, 0x26fd], // Fuel Pump ..Fuel Pump + [0x2705, 0x2705], // (nil) .. + [0x270a, 0x270b], // (nil) .. + [0x2728, 0x2728], // (nil) .. + [0x274c, 0x274c], // (nil) .. + [0x274e, 0x274e], // (nil) .. + [0x2753, 0x2755], // (nil) .. + [0x2757, 0x2757], // Heavy Exclamation Mark S..Heavy Exclamation Mark S + [0x2795, 0x2797], // (nil) .. + [0x27b0, 0x27b0], // (nil) .. + [0x27bf, 0x27bf], // (nil) .. + [0x2b1b, 0x2b1c], // Black Large Square ..White Large Square + [0x2b50, 0x2b50], // White Medium Star ..White Medium Star + [0x2b55, 0x2b55], // Heavy Large Circle ..Heavy Large Circle + [0x2e80, 0x2e99], // Cjk Radical Repeat ..Cjk Radical Rap + [0x2e9b, 0x2ef3], // Cjk Radical Choke ..Cjk Radical C-simplified + [0x2f00, 0x2fd5], // Kangxi Radical One ..Kangxi Radical Flute + [0x2ff0, 0x2ffb], // Ideographic Description ..Ideographic Description + [0x3000, 0x303e], // Ideographic Space ..Ideographic Variation In + [0x3041, 0x3096], // Hiragana Letter Small A ..Hiragana Letter Small Ke + [0x3099, 0x30ff], // Combining Katakana-hirag..Katakana Digraph Koto + [0x3105, 0x312f], // Bopomofo Letter B .. + [0x3131, 0x318e], // Hangul Letter Kiyeok ..Hangul Letter Araeae + [0x3190, 0x31ba], // Ideographic Annotation L.. + [0x31c0, 0x31e3], // Cjk Stroke T ..Cjk Stroke Q + [0x31f0, 0x321e], // Katakana Letter Small Ku..Parenthesized Korean Cha + [0x3220, 0x3247], // Parenthesized Ideograph ..Circled Ideograph Koto + [0x3250, 0x32fe], // Partnership Sign ..Circled Katakana Wo + [0x3300, 0x4dbf], // Square Apaato .. + [0x4e00, 0xa48c], // Cjk Unified Ideograph-4e..Yi Syllable Yyr + [0xa490, 0xa4c6], // Yi Radical Qot ..Yi Radical Ke + [0xa960, 0xa97c], // Hangul Choseong Tikeut-m..Hangul Choseong Ssangyeo + [0xac00, 0xd7a3], // Hangul Syllable Ga ..Hangul Syllable Hih + [0xf900, 0xfaff], // Cjk Compatibility Ideogr.. + [0xfe10, 0xfe19], // Presentation Form For Ve..Presentation Form For Ve + [0xfe30, 0xfe52], // Presentation Form For Ve..Small Full Stop + [0xfe54, 0xfe66], // Small Semicolon ..Small Equals Sign + [0xfe68, 0xfe6b], // Small Reverse Solidus ..Small Commercial At + [0xff01, 0xff60], // Fullwidth Exclamation Ma..Fullwidth Right White Pa + [0xffe0, 0xffe6], // Fullwidth Cent Sign ..Fullwidth Won Sign + [0x16fe0, 0x16fe1], // (nil) .. + [0x17000, 0x187f1], // (nil) .. + [0x18800, 0x18af2], // (nil) .. + [0x1b000, 0x1b11e], // (nil) .. + [0x1b170, 0x1b2fb], // (nil) .. + [0x1f004, 0x1f004], // Mahjong Tile Red Dragon ..Mahjong Tile Red Dragon + [0x1f0cf, 0x1f0cf], // (nil) .. + [0x1f18e, 0x1f18e], // (nil) .. + [0x1f191, 0x1f19a], // (nil) .. + [0x1f200, 0x1f202], // Square Hiragana Hoka .. + [0x1f210, 0x1f23b], // Squared Cjk Unified Ideo.. + [0x1f240, 0x1f248], // Tortoise Shell Bracketed..Tortoise Shell Bracketed + [0x1f250, 0x1f251], // (nil) .. + [0x1f260, 0x1f265], // (nil) .. + [0x1f300, 0x1f320], // (nil) .. + [0x1f32d, 0x1f335], // (nil) .. + [0x1f337, 0x1f37c], // (nil) .. + [0x1f37e, 0x1f393], // (nil) .. + [0x1f3a0, 0x1f3ca], // (nil) .. + [0x1f3cf, 0x1f3d3], // (nil) .. + [0x1f3e0, 0x1f3f0], // (nil) .. + [0x1f3f4, 0x1f3f4], // (nil) .. + [0x1f3f8, 0x1f43e], // (nil) .. + [0x1f440, 0x1f440], // (nil) .. + [0x1f442, 0x1f4fc], // (nil) .. + [0x1f4ff, 0x1f53d], // (nil) .. + [0x1f54b, 0x1f54e], // (nil) .. + [0x1f550, 0x1f567], // (nil) .. + [0x1f57a, 0x1f57a], // (nil) .. + [0x1f595, 0x1f596], // (nil) .. + [0x1f5a4, 0x1f5a4], // (nil) .. + [0x1f5fb, 0x1f64f], // (nil) .. + [0x1f680, 0x1f6c5], // (nil) .. + [0x1f6cc, 0x1f6cc], // (nil) .. + [0x1f6d0, 0x1f6d2], // (nil) .. + [0x1f6eb, 0x1f6ec], // (nil) .. + [0x1f6f4, 0x1f6f9], // (nil) .. + [0x1f910, 0x1f93e], // (nil) .. + [0x1f940, 0x1f970], // (nil) .. + [0x1f973, 0x1f976], // (nil) .. + [0x1f97a, 0x1f97a], // (nil) .. + [0x1f97c, 0x1f9a2], // (nil) .. + [0x1f9b0, 0x1f9b9], // (nil) .. + [0x1f9c0, 0x1f9c2], // (nil) .. + [0x1f9d0, 0x1f9ff], // (nil) .. + [0x20000, 0x2fffd], // Cjk Unified Ideograph-20.. + [0x30000, 0x3fffd] // (nil) .. + ]; + + // binary search + function bisearch(ucs: number, data: number[][]): boolean { + let min = 0; + let max = data.length - 1; + let mid; + if (ucs < data[0][0] || ucs > data[max][1]) { + return false; + } + while (max >= min) { + mid = (min + max) >> 1; + if (ucs > data[mid][1]) { + min = mid + 1; + } else if (ucs < data[mid][0]) { + max = mid - 1; + } else { + return true; + } + } + return false; + } + function wcwidthBMP(ucs: number): number { + // test for 8-bit control characters + if (ucs === 0) { + return opts.nul; + } + if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0)) { + return opts.control; + } + // binary search in table of non-spacing characters + if (bisearch(ucs, ZERO_WIDTH)) { + return 0; + } + // if we arrive here, ucs is not a combining or C0/C1 control character + if (isWideBMP(ucs)) { + return 2; + } + return 1; + } + function isWideBMP(ucs: number): boolean { + return ( + ucs >= 0x1100 && ( + ucs <= 0x115f || // Hangul Jamo init. consonants + ucs === 0x2329 || + ucs === 0x232a || + (ucs >= 0x2e80 && ucs <= 0xa4cf && ucs !== 0x303f) || // CJK..Yi + (ucs >= 0xac00 && ucs <= 0xd7a3) || // Hangul Syllables + (ucs >= 0xf900 && ucs <= 0xfaff) || // CJK Compat Ideographs + (ucs >= 0xfe10 && ucs <= 0xfe19) || // Vertical forms + (ucs >= 0xfe30 && ucs <= 0xfe6f) || // CJK Compat Forms + (ucs >= 0xff00 && ucs <= 0xff60) || // Fullwidth Forms + (ucs >= 0xffe0 && ucs <= 0xffe6))); + } + function wcwidthHigh(ucs: number): 0 | 1 | 2 { + if (bisearch(ucs, ZERO_WIDTH)) { + return 0; + } + + if (bisearch(ucs, WIDE_EASTASIAN)) { + return 2; + } + + return 1; + } + const control = opts.control | 0; + let table: number[] | Uint32Array = null; + function initTable(): number[] | Uint32Array { + // lookup table for BMP + const CODEPOINTS = 65536; // BMP holds 65536 codepoints + const BITWIDTH = 2; // a codepoint can have a width of 0, 1 or 2 + const ITEMSIZE = 32; // using uint32_t + const CONTAINERSIZE = CODEPOINTS * BITWIDTH / ITEMSIZE; + const CODEPOINTS_PER_ITEM = ITEMSIZE / BITWIDTH; + table = (typeof Uint32Array === 'undefined') + ? new Array(CONTAINERSIZE) + : new Uint32Array(CONTAINERSIZE); + for (let i = 0; i < CONTAINERSIZE; ++i) { + let num = 0; + let pos = CODEPOINTS_PER_ITEM; + while (pos--) { + num = (num << 2) | wcwidthBMP(CODEPOINTS_PER_ITEM * i + pos); + } + table[i] = num; + } + return table; + } + // get width from lookup table + // position in container : num / CODEPOINTS_PER_ITEM + // ==> n = table[Math.floor(num / 16)] + // ==> n = table[num >> 4] + // 16 codepoints per number: FFEEDDCCBBAA99887766554433221100 + // position in number : (num % CODEPOINTS_PER_ITEM) * BITWIDTH + // ==> m = (n % 16) * 2 + // ==> m = (num & 15) << 1 + // right shift to position m + // ==> n = n >> m e.g. m=12 000000000000FFEEDDCCBBAA99887766 + // we are only interested in 2 LSBs, cut off higher bits + // ==> n = n & 3 e.g. 000000000000000000000000000000XX + return function (num: number): number { + num = num | 0; // get asm.js like optimization under V8 + if (num < 32) { + return control | 0; + } + if (num < 127) { + return 1; + } + if (num === 0x1F600) return 2; + const t = table || initTable(); + if (num < 65536) { + return t[num >> 4] >> ((num & 15) << 1) & 3; + } + // do a full search for high codepoints + return wcwidthHigh(num); + }; +})({nul: 0, control: 0}); // configurable options + +describe('unicode - v11', () => { + const versionProvider = v11; + versionProvider.init(); + it('wcwidth should match all values from the old implementation', () => { + // test full BMP range old vs new implmenetation + for (let i = 0; i < 65536; ++i) { + assert.equal(versionProvider.wcwidth(i), wcwidthOld(i), `mismatch for i: ${i}`); + } + }); +}); diff --git a/src/unicode/v11.ts b/src/unicode/v11.ts index 1b9c02dcaf..2ed0bdd430 100644 --- a/src/unicode/v11.ts +++ b/src/unicode/v11.ts @@ -3,553 +3,542 @@ * @license MIT */ import { IUnicodeVersionProvider } from '../Types'; +import { fill } from '../common/TypedArrayUtils'; -export const v11: IUnicodeVersionProvider = { - version: 11, - wcwidth: (function(opts: {nul: number, control: number}): (ucs: number) => number { - // Generated: 2018-09-24T16:45:44.483077 - // Source: DerivedGeneralCategory-11.0.0.txt - // Date: 2018-02-21, 05:34:04 GMT - const ZERO_WIDTH = [ - [0x0300, 0x036f], // Combining Grave Accent ..Combining Latin Small Le - [0x0483, 0x0489], // Combining Cyrillic Titlo..Combining Cyrillic Milli - [0x0591, 0x05bd], // Hebrew Accent Etnahta ..Hebrew Point Meteg - [0x05bf, 0x05bf], // Hebrew Point Rafe ..Hebrew Point Rafe - [0x05c1, 0x05c2], // Hebrew Point Shin Dot ..Hebrew Point Sin Dot - [0x05c4, 0x05c5], // Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot - [0x05c7, 0x05c7], // Hebrew Point Qamats Qata..Hebrew Point Qamats Qata - [0x0610, 0x061a], // Arabic Sign Sallallahou ..Arabic Small Kasra - [0x064b, 0x065f], // Arabic Fathatan .. - [0x0670, 0x0670], // Arabic Letter Superscrip..Arabic Letter Superscrip - [0x06d6, 0x06dc], // Arabic Small High Ligatu..Arabic Small High Seen - [0x06df, 0x06e4], // Arabic Small High Rounde..Arabic Small High Madda - [0x06e7, 0x06e8], // Arabic Small High Yeh ..Arabic Small High Noon - [0x06ea, 0x06ed], // Arabic Empty Centre Low ..Arabic Small Low Meem - [0x0711, 0x0711], // Syriac Letter Superscrip..Syriac Letter Superscrip - [0x0730, 0x074a], // Syriac Pthaha Above ..Syriac Barrekh - [0x07a6, 0x07b0], // Thaana Abafili ..Thaana Sukun - [0x07eb, 0x07f3], // Nko Combining Short High..Nko Combining Double Dot - [0x07fd, 0x07fd], // (nil) .. - [0x0816, 0x0819], // Samaritan Mark In ..Samaritan Mark Dagesh - [0x081b, 0x0823], // Samaritan Mark Epentheti..Samaritan Vowel Sign A - [0x0825, 0x0827], // Samaritan Vowel Sign Sho..Samaritan Vowel Sign U - [0x0829, 0x082d], // Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa - [0x0859, 0x085b], // (nil) .. - [0x08d3, 0x08e1], // (nil) .. - [0x08e3, 0x0902], // (nil) ..Devanagari Sign Anusvara - [0x093a, 0x093a], // (nil) .. - [0x093c, 0x093c], // Devanagari Sign Nukta ..Devanagari Sign Nukta - [0x0941, 0x0948], // Devanagari Vowel Sign U ..Devanagari Vowel Sign Ai - [0x094d, 0x094d], // Devanagari Sign Virama ..Devanagari Sign Virama - [0x0951, 0x0957], // Devanagari Stress Sign U.. - [0x0962, 0x0963], // Devanagari Vowel Sign Vo..Devanagari Vowel Sign Vo - [0x0981, 0x0981], // Bengali Sign Candrabindu..Bengali Sign Candrabindu - [0x09bc, 0x09bc], // Bengali Sign Nukta ..Bengali Sign Nukta - [0x09c1, 0x09c4], // Bengali Vowel Sign U ..Bengali Vowel Sign Vocal - [0x09cd, 0x09cd], // Bengali Sign Virama ..Bengali Sign Virama - [0x09e2, 0x09e3], // Bengali Vowel Sign Vocal..Bengali Vowel Sign Vocal - [0x09fe, 0x09fe], // (nil) .. - [0x0a01, 0x0a02], // Gurmukhi Sign Adak Bindi..Gurmukhi Sign Bindi - [0x0a3c, 0x0a3c], // Gurmukhi Sign Nukta ..Gurmukhi Sign Nukta - [0x0a41, 0x0a42], // Gurmukhi Vowel Sign U ..Gurmukhi Vowel Sign Uu - [0x0a47, 0x0a48], // Gurmukhi Vowel Sign Ee ..Gurmukhi Vowel Sign Ai - [0x0a4b, 0x0a4d], // Gurmukhi Vowel Sign Oo ..Gurmukhi Sign Virama - [0x0a51, 0x0a51], // Gurmukhi Sign Udaat ..Gurmukhi Sign Udaat - [0x0a70, 0x0a71], // Gurmukhi Tippi ..Gurmukhi Addak - [0x0a75, 0x0a75], // Gurmukhi Sign Yakash ..Gurmukhi Sign Yakash - [0x0a81, 0x0a82], // Gujarati Sign Candrabind..Gujarati Sign Anusvara - [0x0abc, 0x0abc], // Gujarati Sign Nukta ..Gujarati Sign Nukta - [0x0ac1, 0x0ac5], // Gujarati Vowel Sign U ..Gujarati Vowel Sign Cand - [0x0ac7, 0x0ac8], // Gujarati Vowel Sign E ..Gujarati Vowel Sign Ai - [0x0acd, 0x0acd], // Gujarati Sign Virama ..Gujarati Sign Virama - [0x0ae2, 0x0ae3], // Gujarati Vowel Sign Voca..Gujarati Vowel Sign Voca - [0x0afa, 0x0aff], // (nil) .. - [0x0b01, 0x0b01], // Oriya Sign Candrabindu ..Oriya Sign Candrabindu - [0x0b3c, 0x0b3c], // Oriya Sign Nukta ..Oriya Sign Nukta - [0x0b3f, 0x0b3f], // Oriya Vowel Sign I ..Oriya Vowel Sign I - [0x0b41, 0x0b44], // Oriya Vowel Sign U ..Oriya Vowel Sign Vocalic - [0x0b4d, 0x0b4d], // Oriya Sign Virama ..Oriya Sign Virama - [0x0b56, 0x0b56], // Oriya Ai Length Mark ..Oriya Ai Length Mark - [0x0b62, 0x0b63], // Oriya Vowel Sign Vocalic..Oriya Vowel Sign Vocalic - [0x0b82, 0x0b82], // Tamil Sign Anusvara ..Tamil Sign Anusvara - [0x0bc0, 0x0bc0], // Tamil Vowel Sign Ii ..Tamil Vowel Sign Ii - [0x0bcd, 0x0bcd], // Tamil Sign Virama ..Tamil Sign Virama - [0x0c00, 0x0c00], // (nil) .. - [0x0c04, 0x0c04], // (nil) .. - [0x0c3e, 0x0c40], // Telugu Vowel Sign Aa ..Telugu Vowel Sign Ii - [0x0c46, 0x0c48], // Telugu Vowel Sign E ..Telugu Vowel Sign Ai - [0x0c4a, 0x0c4d], // Telugu Vowel Sign O ..Telugu Sign Virama - [0x0c55, 0x0c56], // Telugu Length Mark ..Telugu Ai Length Mark - [0x0c62, 0x0c63], // Telugu Vowel Sign Vocali..Telugu Vowel Sign Vocali - [0x0c81, 0x0c81], // (nil) .. - [0x0cbc, 0x0cbc], // Kannada Sign Nukta ..Kannada Sign Nukta - [0x0cbf, 0x0cbf], // Kannada Vowel Sign I ..Kannada Vowel Sign I - [0x0cc6, 0x0cc6], // Kannada Vowel Sign E ..Kannada Vowel Sign E - [0x0ccc, 0x0ccd], // Kannada Vowel Sign Au ..Kannada Sign Virama - [0x0ce2, 0x0ce3], // Kannada Vowel Sign Vocal..Kannada Vowel Sign Vocal - [0x0d00, 0x0d01], // (nil) .. - [0x0d3b, 0x0d3c], // (nil) .. - [0x0d41, 0x0d44], // Malayalam Vowel Sign U ..Malayalam Vowel Sign Voc - [0x0d4d, 0x0d4d], // Malayalam Sign Virama ..Malayalam Sign Virama - [0x0d62, 0x0d63], // Malayalam Vowel Sign Voc..Malayalam Vowel Sign Voc - [0x0dca, 0x0dca], // Sinhala Sign Al-lakuna ..Sinhala Sign Al-lakuna - [0x0dd2, 0x0dd4], // Sinhala Vowel Sign Ketti..Sinhala Vowel Sign Ketti - [0x0dd6, 0x0dd6], // Sinhala Vowel Sign Diga ..Sinhala Vowel Sign Diga - [0x0e31, 0x0e31], // Thai Character Mai Han-a..Thai Character Mai Han-a - [0x0e34, 0x0e3a], // Thai Character Sara I ..Thai Character Phinthu - [0x0e47, 0x0e4e], // Thai Character Maitaikhu..Thai Character Yamakkan - [0x0eb1, 0x0eb1], // Lao Vowel Sign Mai Kan ..Lao Vowel Sign Mai Kan - [0x0eb4, 0x0eb9], // Lao Vowel Sign I ..Lao Vowel Sign Uu - [0x0ebb, 0x0ebc], // Lao Vowel Sign Mai Kon ..Lao Semivowel Sign Lo - [0x0ec8, 0x0ecd], // Lao Tone Mai Ek ..Lao Niggahita - [0x0f18, 0x0f19], // Tibetan Astrological Sig..Tibetan Astrological Sig - [0x0f35, 0x0f35], // Tibetan Mark Ngas Bzung ..Tibetan Mark Ngas Bzung - [0x0f37, 0x0f37], // Tibetan Mark Ngas Bzung ..Tibetan Mark Ngas Bzung - [0x0f39, 0x0f39], // Tibetan Mark Tsa -phru ..Tibetan Mark Tsa -phru - [0x0f71, 0x0f7e], // Tibetan Vowel Sign Aa ..Tibetan Sign Rjes Su Nga - [0x0f80, 0x0f84], // Tibetan Vowel Sign Rever..Tibetan Mark Halanta - [0x0f86, 0x0f87], // Tibetan Sign Lci Rtags ..Tibetan Sign Yang Rtags - [0x0f8d, 0x0f97], // (nil) ..Tibetan Subjoined Letter - [0x0f99, 0x0fbc], // Tibetan Subjoined Letter..Tibetan Subjoined Letter - [0x0fc6, 0x0fc6], // Tibetan Symbol Padma Gda..Tibetan Symbol Padma Gda - [0x102d, 0x1030], // Myanmar Vowel Sign I ..Myanmar Vowel Sign Uu - [0x1032, 0x1037], // Myanmar Vowel Sign Ai ..Myanmar Sign Dot Below - [0x1039, 0x103a], // Myanmar Sign Virama ..Myanmar Sign Asat - [0x103d, 0x103e], // Myanmar Consonant Sign M..Myanmar Consonant Sign M - [0x1058, 0x1059], // Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal - [0x105e, 0x1060], // Myanmar Consonant Sign M..Myanmar Consonant Sign M - [0x1071, 0x1074], // Myanmar Vowel Sign Geba ..Myanmar Vowel Sign Kayah - [0x1082, 0x1082], // Myanmar Consonant Sign S..Myanmar Consonant Sign S - [0x1085, 0x1086], // Myanmar Vowel Sign Shan ..Myanmar Vowel Sign Shan - [0x108d, 0x108d], // Myanmar Sign Shan Counci..Myanmar Sign Shan Counci - [0x109d, 0x109d], // Myanmar Vowel Sign Aiton..Myanmar Vowel Sign Aiton - [0x135d, 0x135f], // (nil) ..Ethiopic Combining Gemin - [0x1712, 0x1714], // Tagalog Vowel Sign I ..Tagalog Sign Virama - [0x1732, 0x1734], // Hanunoo Vowel Sign I ..Hanunoo Sign Pamudpod - [0x1752, 0x1753], // Buhid Vowel Sign I ..Buhid Vowel Sign U - [0x1772, 0x1773], // Tagbanwa Vowel Sign I ..Tagbanwa Vowel Sign U - [0x17b4, 0x17b5], // Khmer Vowel Inherent Aq ..Khmer Vowel Inherent Aa - [0x17b7, 0x17bd], // Khmer Vowel Sign I ..Khmer Vowel Sign Ua - [0x17c6, 0x17c6], // Khmer Sign Nikahit ..Khmer Sign Nikahit - [0x17c9, 0x17d3], // Khmer Sign Muusikatoan ..Khmer Sign Bathamasat - [0x17dd, 0x17dd], // Khmer Sign Atthacan ..Khmer Sign Atthacan - [0x180b, 0x180d], // Mongolian Free Variation..Mongolian Free Variation - [0x1885, 0x1886], // Mongolian Letter Ali Gal..Mongolian Letter Ali Gal - [0x18a9, 0x18a9], // Mongolian Letter Ali Gal..Mongolian Letter Ali Gal - [0x1920, 0x1922], // Limbu Vowel Sign A ..Limbu Vowel Sign U - [0x1927, 0x1928], // Limbu Vowel Sign E ..Limbu Vowel Sign O - [0x1932, 0x1932], // Limbu Small Letter Anusv..Limbu Small Letter Anusv - [0x1939, 0x193b], // Limbu Sign Mukphreng ..Limbu Sign Sa-i - [0x1a17, 0x1a18], // Buginese Vowel Sign I ..Buginese Vowel Sign U - [0x1a1b, 0x1a1b], // Buginese Vowel Sign Ae ..Buginese Vowel Sign Ae - [0x1a56, 0x1a56], // Tai Tham Consonant Sign ..Tai Tham Consonant Sign - [0x1a58, 0x1a5e], // Tai Tham Sign Mai Kang L..Tai Tham Consonant Sign - [0x1a60, 0x1a60], // Tai Tham Sign Sakot ..Tai Tham Sign Sakot - [0x1a62, 0x1a62], // Tai Tham Vowel Sign Mai ..Tai Tham Vowel Sign Mai - [0x1a65, 0x1a6c], // Tai Tham Vowel Sign I ..Tai Tham Vowel Sign Oa B - [0x1a73, 0x1a7c], // Tai Tham Vowel Sign Oa A..Tai Tham Sign Khuen-lue - [0x1a7f, 0x1a7f], // Tai Tham Combining Crypt..Tai Tham Combining Crypt - [0x1ab0, 0x1abe], // (nil) .. - [0x1b00, 0x1b03], // Balinese Sign Ulu Ricem ..Balinese Sign Surang - [0x1b34, 0x1b34], // Balinese Sign Rerekan ..Balinese Sign Rerekan - [0x1b36, 0x1b3a], // Balinese Vowel Sign Ulu ..Balinese Vowel Sign Ra R - [0x1b3c, 0x1b3c], // Balinese Vowel Sign La L..Balinese Vowel Sign La L - [0x1b42, 0x1b42], // Balinese Vowel Sign Pepe..Balinese Vowel Sign Pepe - [0x1b6b, 0x1b73], // Balinese Musical Symbol ..Balinese Musical Symbol - [0x1b80, 0x1b81], // Sundanese Sign Panyecek ..Sundanese Sign Panglayar - [0x1ba2, 0x1ba5], // Sundanese Consonant Sign..Sundanese Vowel Sign Pan - [0x1ba8, 0x1ba9], // Sundanese Vowel Sign Pam..Sundanese Vowel Sign Pan - [0x1bab, 0x1bad], // (nil) .. - [0x1be6, 0x1be6], // (nil) .. - [0x1be8, 0x1be9], // (nil) .. - [0x1bed, 0x1bed], // (nil) .. - [0x1bef, 0x1bf1], // (nil) .. - [0x1c2c, 0x1c33], // Lepcha Vowel Sign E ..Lepcha Consonant Sign T - [0x1c36, 0x1c37], // Lepcha Sign Ran ..Lepcha Sign Nukta - [0x1cd0, 0x1cd2], // Vedic Tone Karshana ..Vedic Tone Prenkha - [0x1cd4, 0x1ce0], // Vedic Sign Yajurvedic Mi..Vedic Tone Rigvedic Kash - [0x1ce2, 0x1ce8], // Vedic Sign Visarga Svari..Vedic Sign Visarga Anuda - [0x1ced, 0x1ced], // Vedic Sign Tiryak ..Vedic Sign Tiryak - [0x1cf4, 0x1cf4], // (nil) .. - [0x1cf8, 0x1cf9], // (nil) .. - [0x1dc0, 0x1df9], // Combining Dotted Grave A.. - [0x1dfb, 0x1dff], // (nil) ..Combining Right Arrowhea - [0x20d0, 0x20f0], // Combining Left Harpoon A..Combining Asterisk Above - [0x2cef, 0x2cf1], // Coptic Combining Ni Abov..Coptic Combining Spiritu - [0x2d7f, 0x2d7f], // (nil) .. - [0x2de0, 0x2dff], // Combining Cyrillic Lette..Combining Cyrillic Lette - [0x302a, 0x302d], // Ideographic Level Tone M..Ideographic Entering Ton - [0x3099, 0x309a], // Combining Katakana-hirag..Combining Katakana-hirag - [0xa66f, 0xa672], // Combining Cyrillic Vzmet..Combining Cyrillic Thous - [0xa674, 0xa67d], // (nil) ..Combining Cyrillic Payer - [0xa69e, 0xa69f], // (nil) .. - [0xa6f0, 0xa6f1], // Bamum Combining Mark Koq..Bamum Combining Mark Tuk - [0xa802, 0xa802], // Syloti Nagri Sign Dvisva..Syloti Nagri Sign Dvisva - [0xa806, 0xa806], // Syloti Nagri Sign Hasant..Syloti Nagri Sign Hasant - [0xa80b, 0xa80b], // Syloti Nagri Sign Anusva..Syloti Nagri Sign Anusva - [0xa825, 0xa826], // Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign - [0xa8c4, 0xa8c5], // Saurashtra Sign Virama .. - [0xa8e0, 0xa8f1], // Combining Devanagari Dig..Combining Devanagari Sig - [0xa8ff, 0xa8ff], // (nil) .. - [0xa926, 0xa92d], // Kayah Li Vowel Ue ..Kayah Li Tone Calya Plop - [0xa947, 0xa951], // Rejang Vowel Sign I ..Rejang Consonant Sign R - [0xa980, 0xa982], // Javanese Sign Panyangga ..Javanese Sign Layar - [0xa9b3, 0xa9b3], // Javanese Sign Cecak Telu..Javanese Sign Cecak Telu - [0xa9b6, 0xa9b9], // Javanese Vowel Sign Wulu..Javanese Vowel Sign Suku - [0xa9bc, 0xa9bc], // Javanese Vowel Sign Pepe..Javanese Vowel Sign Pepe - [0xa9e5, 0xa9e5], // (nil) .. - [0xaa29, 0xaa2e], // Cham Vowel Sign Aa ..Cham Vowel Sign Oe - [0xaa31, 0xaa32], // Cham Vowel Sign Au ..Cham Vowel Sign Ue - [0xaa35, 0xaa36], // Cham Consonant Sign La ..Cham Consonant Sign Wa - [0xaa43, 0xaa43], // Cham Consonant Sign Fina..Cham Consonant Sign Fina - [0xaa4c, 0xaa4c], // Cham Consonant Sign Fina..Cham Consonant Sign Fina - [0xaa7c, 0xaa7c], // (nil) .. - [0xaab0, 0xaab0], // Tai Viet Mai Kang ..Tai Viet Mai Kang - [0xaab2, 0xaab4], // Tai Viet Vowel I ..Tai Viet Vowel U - [0xaab7, 0xaab8], // Tai Viet Mai Khit ..Tai Viet Vowel Ia - [0xaabe, 0xaabf], // Tai Viet Vowel Am ..Tai Viet Tone Mai Ek - [0xaac1, 0xaac1], // Tai Viet Tone Mai Tho ..Tai Viet Tone Mai Tho - [0xaaec, 0xaaed], // (nil) .. - [0xaaf6, 0xaaf6], // (nil) .. - [0xabe5, 0xabe5], // Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign - [0xabe8, 0xabe8], // Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign - [0xabed, 0xabed], // Meetei Mayek Apun Iyek ..Meetei Mayek Apun Iyek - [0xfb1e, 0xfb1e], // Hebrew Point Judeo-spani..Hebrew Point Judeo-spani - [0xfe00, 0xfe0f], // Variation Selector-1 ..Variation Selector-16 - [0xfe20, 0xfe2f], // Combining Ligature Left .. - [0x101fd, 0x101fd], // Phaistos Disc Sign Combi..Phaistos Disc Sign Combi - [0x102e0, 0x102e0], // (nil) .. - [0x10376, 0x1037a], // (nil) .. - [0x10a01, 0x10a03], // Kharoshthi Vowel Sign I ..Kharoshthi Vowel Sign Vo - [0x10a05, 0x10a06], // Kharoshthi Vowel Sign E ..Kharoshthi Vowel Sign O - [0x10a0c, 0x10a0f], // Kharoshthi Vowel Length ..Kharoshthi Sign Visarga - [0x10a38, 0x10a3a], // Kharoshthi Sign Bar Abov..Kharoshthi Sign Dot Belo - [0x10a3f, 0x10a3f], // Kharoshthi Virama ..Kharoshthi Virama - [0x10ae5, 0x10ae6], // (nil) .. - [0x10d24, 0x10d27], // (nil) .. - [0x10f46, 0x10f50], // (nil) .. - [0x11001, 0x11001], // (nil) .. - [0x11038, 0x11046], // (nil) .. - [0x1107f, 0x11081], // (nil) ..Kaithi Sign Anusvara - [0x110b3, 0x110b6], // Kaithi Vowel Sign U ..Kaithi Vowel Sign Ai - [0x110b9, 0x110ba], // Kaithi Sign Virama ..Kaithi Sign Nukta - [0x11100, 0x11102], // (nil) .. - [0x11127, 0x1112b], // (nil) .. - [0x1112d, 0x11134], // (nil) .. - [0x11173, 0x11173], // (nil) .. - [0x11180, 0x11181], // (nil) .. - [0x111b6, 0x111be], // (nil) .. - [0x111c9, 0x111cc], // (nil) .. - [0x1122f, 0x11231], // (nil) .. - [0x11234, 0x11234], // (nil) .. - [0x11236, 0x11237], // (nil) .. - [0x1123e, 0x1123e], // (nil) .. - [0x112df, 0x112df], // (nil) .. - [0x112e3, 0x112ea], // (nil) .. - [0x11300, 0x11301], // (nil) .. - [0x1133b, 0x1133c], // (nil) .. - [0x11340, 0x11340], // (nil) .. - [0x11366, 0x1136c], // (nil) .. - [0x11370, 0x11374], // (nil) .. - [0x11438, 0x1143f], // (nil) .. - [0x11442, 0x11444], // (nil) .. - [0x11446, 0x11446], // (nil) .. - [0x1145e, 0x1145e], // (nil) .. - [0x114b3, 0x114b8], // (nil) .. - [0x114ba, 0x114ba], // (nil) .. - [0x114bf, 0x114c0], // (nil) .. - [0x114c2, 0x114c3], // (nil) .. - [0x115b2, 0x115b5], // (nil) .. - [0x115bc, 0x115bd], // (nil) .. - [0x115bf, 0x115c0], // (nil) .. - [0x115dc, 0x115dd], // (nil) .. - [0x11633, 0x1163a], // (nil) .. - [0x1163d, 0x1163d], // (nil) .. - [0x1163f, 0x11640], // (nil) .. - [0x116ab, 0x116ab], // (nil) .. - [0x116ad, 0x116ad], // (nil) .. - [0x116b0, 0x116b5], // (nil) .. - [0x116b7, 0x116b7], // (nil) .. - [0x1171d, 0x1171f], // (nil) .. - [0x11722, 0x11725], // (nil) .. - [0x11727, 0x1172b], // (nil) .. - [0x1182f, 0x11837], // (nil) .. - [0x11839, 0x1183a], // (nil) .. - [0x11a01, 0x11a0a], // (nil) .. - [0x11a33, 0x11a38], // (nil) .. - [0x11a3b, 0x11a3e], // (nil) .. - [0x11a47, 0x11a47], // (nil) .. - [0x11a51, 0x11a56], // (nil) .. - [0x11a59, 0x11a5b], // (nil) .. - [0x11a8a, 0x11a96], // (nil) .. - [0x11a98, 0x11a99], // (nil) .. - [0x11c30, 0x11c36], // (nil) .. - [0x11c38, 0x11c3d], // (nil) .. - [0x11c3f, 0x11c3f], // (nil) .. - [0x11c92, 0x11ca7], // (nil) .. - [0x11caa, 0x11cb0], // (nil) .. - [0x11cb2, 0x11cb3], // (nil) .. - [0x11cb5, 0x11cb6], // (nil) .. - [0x11d31, 0x11d36], // (nil) .. - [0x11d3a, 0x11d3a], // (nil) .. - [0x11d3c, 0x11d3d], // (nil) .. - [0x11d3f, 0x11d45], // (nil) .. - [0x11d47, 0x11d47], // (nil) .. - [0x11d90, 0x11d91], // (nil) .. - [0x11d95, 0x11d95], // (nil) .. - [0x11d97, 0x11d97], // (nil) .. - [0x11ef3, 0x11ef4], // (nil) .. - [0x16af0, 0x16af4], // (nil) .. - [0x16b30, 0x16b36], // (nil) .. - [0x16f8f, 0x16f92], // (nil) .. - [0x1bc9d, 0x1bc9e], // (nil) .. - [0x1d167, 0x1d169], // Musical Symbol Combining..Musical Symbol Combining - [0x1d17b, 0x1d182], // Musical Symbol Combining..Musical Symbol Combining - [0x1d185, 0x1d18b], // Musical Symbol Combining..Musical Symbol Combining - [0x1d1aa, 0x1d1ad], // Musical Symbol Combining..Musical Symbol Combining - [0x1d242, 0x1d244], // Combining Greek Musical ..Combining Greek Musical - [0x1da00, 0x1da36], // (nil) .. - [0x1da3b, 0x1da6c], // (nil) .. - [0x1da75, 0x1da75], // (nil) .. - [0x1da84, 0x1da84], // (nil) .. - [0x1da9b, 0x1da9f], // (nil) .. - [0x1daa1, 0x1daaf], // (nil) .. - [0x1e000, 0x1e006], // (nil) .. - [0x1e008, 0x1e018], // (nil) .. - [0x1e01b, 0x1e021], // (nil) .. - [0x1e023, 0x1e024], // (nil) .. - [0x1e026, 0x1e02a], // (nil) .. - [0x1e8d0, 0x1e8d6], // (nil) .. - [0x1e944, 0x1e94a], // (nil) .. - [0xe0100, 0xe01ef] // Variation Selector-17 ..Variation Selector-256 - ]; +// Generated: 2018-09-24T16:45:44.483077 +// Source: DerivedGeneralCategory-11.0.0.txt +// Date: 2018-02-21, 05:34:04 GMT +const ZERO_WIDTH_BMP = [ + [0x0300, 0x036f], // Combining Grave Accent ..Combining Latin Small Le + [0x0483, 0x0489], // Combining Cyrillic Titlo..Combining Cyrillic Milli + [0x0591, 0x05bd], // Hebrew Accent Etnahta ..Hebrew Point Meteg + [0x05bf, 0x05bf], // Hebrew Point Rafe ..Hebrew Point Rafe + [0x05c1, 0x05c2], // Hebrew Point Shin Dot ..Hebrew Point Sin Dot + [0x05c4, 0x05c5], // Hebrew Mark Upper Dot ..Hebrew Mark Lower Dot + [0x05c7, 0x05c7], // Hebrew Point Qamats Qata..Hebrew Point Qamats Qata + [0x0610, 0x061a], // Arabic Sign Sallallahou ..Arabic Small Kasra + [0x064b, 0x065f], // Arabic Fathatan .. + [0x0670, 0x0670], // Arabic Letter Superscrip..Arabic Letter Superscrip + [0x06d6, 0x06dc], // Arabic Small High Ligatu..Arabic Small High Seen + [0x06df, 0x06e4], // Arabic Small High Rounde..Arabic Small High Madda + [0x06e7, 0x06e8], // Arabic Small High Yeh ..Arabic Small High Noon + [0x06ea, 0x06ed], // Arabic Empty Centre Low ..Arabic Small Low Meem + [0x0711, 0x0711], // Syriac Letter Superscrip..Syriac Letter Superscrip + [0x0730, 0x074a], // Syriac Pthaha Above ..Syriac Barrekh + [0x07a6, 0x07b0], // Thaana Abafili ..Thaana Sukun + [0x07eb, 0x07f3], // Nko Combining Short High..Nko Combining Double Dot + [0x07fd, 0x07fd], // (nil) .. + [0x0816, 0x0819], // Samaritan Mark In ..Samaritan Mark Dagesh + [0x081b, 0x0823], // Samaritan Mark Epentheti..Samaritan Vowel Sign A + [0x0825, 0x0827], // Samaritan Vowel Sign Sho..Samaritan Vowel Sign U + [0x0829, 0x082d], // Samaritan Vowel Sign Lon..Samaritan Mark Nequdaa + [0x0859, 0x085b], // (nil) .. + [0x08d3, 0x08e1], // (nil) .. + [0x08e3, 0x0902], // (nil) ..Devanagari Sign Anusvara + [0x093a, 0x093a], // (nil) .. + [0x093c, 0x093c], // Devanagari Sign Nukta ..Devanagari Sign Nukta + [0x0941, 0x0948], // Devanagari Vowel Sign U ..Devanagari Vowel Sign Ai + [0x094d, 0x094d], // Devanagari Sign Virama ..Devanagari Sign Virama + [0x0951, 0x0957], // Devanagari Stress Sign U.. + [0x0962, 0x0963], // Devanagari Vowel Sign Vo..Devanagari Vowel Sign Vo + [0x0981, 0x0981], // Bengali Sign Candrabindu..Bengali Sign Candrabindu + [0x09bc, 0x09bc], // Bengali Sign Nukta ..Bengali Sign Nukta + [0x09c1, 0x09c4], // Bengali Vowel Sign U ..Bengali Vowel Sign Vocal + [0x09cd, 0x09cd], // Bengali Sign Virama ..Bengali Sign Virama + [0x09e2, 0x09e3], // Bengali Vowel Sign Vocal..Bengali Vowel Sign Vocal + [0x09fe, 0x09fe], // (nil) .. + [0x0a01, 0x0a02], // Gurmukhi Sign Adak Bindi..Gurmukhi Sign Bindi + [0x0a3c, 0x0a3c], // Gurmukhi Sign Nukta ..Gurmukhi Sign Nukta + [0x0a41, 0x0a42], // Gurmukhi Vowel Sign U ..Gurmukhi Vowel Sign Uu + [0x0a47, 0x0a48], // Gurmukhi Vowel Sign Ee ..Gurmukhi Vowel Sign Ai + [0x0a4b, 0x0a4d], // Gurmukhi Vowel Sign Oo ..Gurmukhi Sign Virama + [0x0a51, 0x0a51], // Gurmukhi Sign Udaat ..Gurmukhi Sign Udaat + [0x0a70, 0x0a71], // Gurmukhi Tippi ..Gurmukhi Addak + [0x0a75, 0x0a75], // Gurmukhi Sign Yakash ..Gurmukhi Sign Yakash + [0x0a81, 0x0a82], // Gujarati Sign Candrabind..Gujarati Sign Anusvara + [0x0abc, 0x0abc], // Gujarati Sign Nukta ..Gujarati Sign Nukta + [0x0ac1, 0x0ac5], // Gujarati Vowel Sign U ..Gujarati Vowel Sign Cand + [0x0ac7, 0x0ac8], // Gujarati Vowel Sign E ..Gujarati Vowel Sign Ai + [0x0acd, 0x0acd], // Gujarati Sign Virama ..Gujarati Sign Virama + [0x0ae2, 0x0ae3], // Gujarati Vowel Sign Voca..Gujarati Vowel Sign Voca + [0x0afa, 0x0aff], // (nil) .. + [0x0b01, 0x0b01], // Oriya Sign Candrabindu ..Oriya Sign Candrabindu + [0x0b3c, 0x0b3c], // Oriya Sign Nukta ..Oriya Sign Nukta + [0x0b3f, 0x0b3f], // Oriya Vowel Sign I ..Oriya Vowel Sign I + [0x0b41, 0x0b44], // Oriya Vowel Sign U ..Oriya Vowel Sign Vocalic + [0x0b4d, 0x0b4d], // Oriya Sign Virama ..Oriya Sign Virama + [0x0b56, 0x0b56], // Oriya Ai Length Mark ..Oriya Ai Length Mark + [0x0b62, 0x0b63], // Oriya Vowel Sign Vocalic..Oriya Vowel Sign Vocalic + [0x0b82, 0x0b82], // Tamil Sign Anusvara ..Tamil Sign Anusvara + [0x0bc0, 0x0bc0], // Tamil Vowel Sign Ii ..Tamil Vowel Sign Ii + [0x0bcd, 0x0bcd], // Tamil Sign Virama ..Tamil Sign Virama + [0x0c00, 0x0c00], // (nil) .. + [0x0c04, 0x0c04], // (nil) .. + [0x0c3e, 0x0c40], // Telugu Vowel Sign Aa ..Telugu Vowel Sign Ii + [0x0c46, 0x0c48], // Telugu Vowel Sign E ..Telugu Vowel Sign Ai + [0x0c4a, 0x0c4d], // Telugu Vowel Sign O ..Telugu Sign Virama + [0x0c55, 0x0c56], // Telugu Length Mark ..Telugu Ai Length Mark + [0x0c62, 0x0c63], // Telugu Vowel Sign Vocali..Telugu Vowel Sign Vocali + [0x0c81, 0x0c81], // (nil) .. + [0x0cbc, 0x0cbc], // Kannada Sign Nukta ..Kannada Sign Nukta + [0x0cbf, 0x0cbf], // Kannada Vowel Sign I ..Kannada Vowel Sign I + [0x0cc6, 0x0cc6], // Kannada Vowel Sign E ..Kannada Vowel Sign E + [0x0ccc, 0x0ccd], // Kannada Vowel Sign Au ..Kannada Sign Virama + [0x0ce2, 0x0ce3], // Kannada Vowel Sign Vocal..Kannada Vowel Sign Vocal + [0x0d00, 0x0d01], // (nil) .. + [0x0d3b, 0x0d3c], // (nil) .. + [0x0d41, 0x0d44], // Malayalam Vowel Sign U ..Malayalam Vowel Sign Voc + [0x0d4d, 0x0d4d], // Malayalam Sign Virama ..Malayalam Sign Virama + [0x0d62, 0x0d63], // Malayalam Vowel Sign Voc..Malayalam Vowel Sign Voc + [0x0dca, 0x0dca], // Sinhala Sign Al-lakuna ..Sinhala Sign Al-lakuna + [0x0dd2, 0x0dd4], // Sinhala Vowel Sign Ketti..Sinhala Vowel Sign Ketti + [0x0dd6, 0x0dd6], // Sinhala Vowel Sign Diga ..Sinhala Vowel Sign Diga + [0x0e31, 0x0e31], // Thai Character Mai Han-a..Thai Character Mai Han-a + [0x0e34, 0x0e3a], // Thai Character Sara I ..Thai Character Phinthu + [0x0e47, 0x0e4e], // Thai Character Maitaikhu..Thai Character Yamakkan + [0x0eb1, 0x0eb1], // Lao Vowel Sign Mai Kan ..Lao Vowel Sign Mai Kan + [0x0eb4, 0x0eb9], // Lao Vowel Sign I ..Lao Vowel Sign Uu + [0x0ebb, 0x0ebc], // Lao Vowel Sign Mai Kon ..Lao Semivowel Sign Lo + [0x0ec8, 0x0ecd], // Lao Tone Mai Ek ..Lao Niggahita + [0x0f18, 0x0f19], // Tibetan Astrological Sig..Tibetan Astrological Sig + [0x0f35, 0x0f35], // Tibetan Mark Ngas Bzung ..Tibetan Mark Ngas Bzung + [0x0f37, 0x0f37], // Tibetan Mark Ngas Bzung ..Tibetan Mark Ngas Bzung + [0x0f39, 0x0f39], // Tibetan Mark Tsa -phru ..Tibetan Mark Tsa -phru + [0x0f71, 0x0f7e], // Tibetan Vowel Sign Aa ..Tibetan Sign Rjes Su Nga + [0x0f80, 0x0f84], // Tibetan Vowel Sign Rever..Tibetan Mark Halanta + [0x0f86, 0x0f87], // Tibetan Sign Lci Rtags ..Tibetan Sign Yang Rtags + [0x0f8d, 0x0f97], // (nil) ..Tibetan Subjoined Letter + [0x0f99, 0x0fbc], // Tibetan Subjoined Letter..Tibetan Subjoined Letter + [0x0fc6, 0x0fc6], // Tibetan Symbol Padma Gda..Tibetan Symbol Padma Gda + [0x102d, 0x1030], // Myanmar Vowel Sign I ..Myanmar Vowel Sign Uu + [0x1032, 0x1037], // Myanmar Vowel Sign Ai ..Myanmar Sign Dot Below + [0x1039, 0x103a], // Myanmar Sign Virama ..Myanmar Sign Asat + [0x103d, 0x103e], // Myanmar Consonant Sign M..Myanmar Consonant Sign M + [0x1058, 0x1059], // Myanmar Vowel Sign Vocal..Myanmar Vowel Sign Vocal + [0x105e, 0x1060], // Myanmar Consonant Sign M..Myanmar Consonant Sign M + [0x1071, 0x1074], // Myanmar Vowel Sign Geba ..Myanmar Vowel Sign Kayah + [0x1082, 0x1082], // Myanmar Consonant Sign S..Myanmar Consonant Sign S + [0x1085, 0x1086], // Myanmar Vowel Sign Shan ..Myanmar Vowel Sign Shan + [0x108d, 0x108d], // Myanmar Sign Shan Counci..Myanmar Sign Shan Counci + [0x109d, 0x109d], // Myanmar Vowel Sign Aiton..Myanmar Vowel Sign Aiton + [0x135d, 0x135f], // (nil) ..Ethiopic Combining Gemin + [0x1712, 0x1714], // Tagalog Vowel Sign I ..Tagalog Sign Virama + [0x1732, 0x1734], // Hanunoo Vowel Sign I ..Hanunoo Sign Pamudpod + [0x1752, 0x1753], // Buhid Vowel Sign I ..Buhid Vowel Sign U + [0x1772, 0x1773], // Tagbanwa Vowel Sign I ..Tagbanwa Vowel Sign U + [0x17b4, 0x17b5], // Khmer Vowel Inherent Aq ..Khmer Vowel Inherent Aa + [0x17b7, 0x17bd], // Khmer Vowel Sign I ..Khmer Vowel Sign Ua + [0x17c6, 0x17c6], // Khmer Sign Nikahit ..Khmer Sign Nikahit + [0x17c9, 0x17d3], // Khmer Sign Muusikatoan ..Khmer Sign Bathamasat + [0x17dd, 0x17dd], // Khmer Sign Atthacan ..Khmer Sign Atthacan + [0x180b, 0x180d], // Mongolian Free Variation..Mongolian Free Variation + [0x1885, 0x1886], // Mongolian Letter Ali Gal..Mongolian Letter Ali Gal + [0x18a9, 0x18a9], // Mongolian Letter Ali Gal..Mongolian Letter Ali Gal + [0x1920, 0x1922], // Limbu Vowel Sign A ..Limbu Vowel Sign U + [0x1927, 0x1928], // Limbu Vowel Sign E ..Limbu Vowel Sign O + [0x1932, 0x1932], // Limbu Small Letter Anusv..Limbu Small Letter Anusv + [0x1939, 0x193b], // Limbu Sign Mukphreng ..Limbu Sign Sa-i + [0x1a17, 0x1a18], // Buginese Vowel Sign I ..Buginese Vowel Sign U + [0x1a1b, 0x1a1b], // Buginese Vowel Sign Ae ..Buginese Vowel Sign Ae + [0x1a56, 0x1a56], // Tai Tham Consonant Sign ..Tai Tham Consonant Sign + [0x1a58, 0x1a5e], // Tai Tham Sign Mai Kang L..Tai Tham Consonant Sign + [0x1a60, 0x1a60], // Tai Tham Sign Sakot ..Tai Tham Sign Sakot + [0x1a62, 0x1a62], // Tai Tham Vowel Sign Mai ..Tai Tham Vowel Sign Mai + [0x1a65, 0x1a6c], // Tai Tham Vowel Sign I ..Tai Tham Vowel Sign Oa B + [0x1a73, 0x1a7c], // Tai Tham Vowel Sign Oa A..Tai Tham Sign Khuen-lue + [0x1a7f, 0x1a7f], // Tai Tham Combining Crypt..Tai Tham Combining Crypt + [0x1ab0, 0x1abe], // (nil) .. + [0x1b00, 0x1b03], // Balinese Sign Ulu Ricem ..Balinese Sign Surang + [0x1b34, 0x1b34], // Balinese Sign Rerekan ..Balinese Sign Rerekan + [0x1b36, 0x1b3a], // Balinese Vowel Sign Ulu ..Balinese Vowel Sign Ra R + [0x1b3c, 0x1b3c], // Balinese Vowel Sign La L..Balinese Vowel Sign La L + [0x1b42, 0x1b42], // Balinese Vowel Sign Pepe..Balinese Vowel Sign Pepe + [0x1b6b, 0x1b73], // Balinese Musical Symbol ..Balinese Musical Symbol + [0x1b80, 0x1b81], // Sundanese Sign Panyecek ..Sundanese Sign Panglayar + [0x1ba2, 0x1ba5], // Sundanese Consonant Sign..Sundanese Vowel Sign Pan + [0x1ba8, 0x1ba9], // Sundanese Vowel Sign Pam..Sundanese Vowel Sign Pan + [0x1bab, 0x1bad], // (nil) .. + [0x1be6, 0x1be6], // (nil) .. + [0x1be8, 0x1be9], // (nil) .. + [0x1bed, 0x1bed], // (nil) .. + [0x1bef, 0x1bf1], // (nil) .. + [0x1c2c, 0x1c33], // Lepcha Vowel Sign E ..Lepcha Consonant Sign T + [0x1c36, 0x1c37], // Lepcha Sign Ran ..Lepcha Sign Nukta + [0x1cd0, 0x1cd2], // Vedic Tone Karshana ..Vedic Tone Prenkha + [0x1cd4, 0x1ce0], // Vedic Sign Yajurvedic Mi..Vedic Tone Rigvedic Kash + [0x1ce2, 0x1ce8], // Vedic Sign Visarga Svari..Vedic Sign Visarga Anuda + [0x1ced, 0x1ced], // Vedic Sign Tiryak ..Vedic Sign Tiryak + [0x1cf4, 0x1cf4], // (nil) .. + [0x1cf8, 0x1cf9], // (nil) .. + [0x1dc0, 0x1df9], // Combining Dotted Grave A.. + [0x1dfb, 0x1dff], // (nil) ..Combining Right Arrowhea + [0x20d0, 0x20f0], // Combining Left Harpoon A..Combining Asterisk Above + [0x2cef, 0x2cf1], // Coptic Combining Ni Abov..Coptic Combining Spiritu + [0x2d7f, 0x2d7f], // (nil) .. + [0x2de0, 0x2dff], // Combining Cyrillic Lette..Combining Cyrillic Lette + [0x302a, 0x302d], // Ideographic Level Tone M..Ideographic Entering Ton + [0x3099, 0x309a], // Combining Katakana-hirag..Combining Katakana-hirag + [0xa66f, 0xa672], // Combining Cyrillic Vzmet..Combining Cyrillic Thous + [0xa674, 0xa67d], // (nil) ..Combining Cyrillic Payer + [0xa69e, 0xa69f], // (nil) .. + [0xa6f0, 0xa6f1], // Bamum Combining Mark Koq..Bamum Combining Mark Tuk + [0xa802, 0xa802], // Syloti Nagri Sign Dvisva..Syloti Nagri Sign Dvisva + [0xa806, 0xa806], // Syloti Nagri Sign Hasant..Syloti Nagri Sign Hasant + [0xa80b, 0xa80b], // Syloti Nagri Sign Anusva..Syloti Nagri Sign Anusva + [0xa825, 0xa826], // Syloti Nagri Vowel Sign ..Syloti Nagri Vowel Sign + [0xa8c4, 0xa8c5], // Saurashtra Sign Virama .. + [0xa8e0, 0xa8f1], // Combining Devanagari Dig..Combining Devanagari Sig + [0xa8ff, 0xa8ff], // (nil) .. + [0xa926, 0xa92d], // Kayah Li Vowel Ue ..Kayah Li Tone Calya Plop + [0xa947, 0xa951], // Rejang Vowel Sign I ..Rejang Consonant Sign R + [0xa980, 0xa982], // Javanese Sign Panyangga ..Javanese Sign Layar + [0xa9b3, 0xa9b3], // Javanese Sign Cecak Telu..Javanese Sign Cecak Telu + [0xa9b6, 0xa9b9], // Javanese Vowel Sign Wulu..Javanese Vowel Sign Suku + [0xa9bc, 0xa9bc], // Javanese Vowel Sign Pepe..Javanese Vowel Sign Pepe + [0xa9e5, 0xa9e5], // (nil) .. + [0xaa29, 0xaa2e], // Cham Vowel Sign Aa ..Cham Vowel Sign Oe + [0xaa31, 0xaa32], // Cham Vowel Sign Au ..Cham Vowel Sign Ue + [0xaa35, 0xaa36], // Cham Consonant Sign La ..Cham Consonant Sign Wa + [0xaa43, 0xaa43], // Cham Consonant Sign Fina..Cham Consonant Sign Fina + [0xaa4c, 0xaa4c], // Cham Consonant Sign Fina..Cham Consonant Sign Fina + [0xaa7c, 0xaa7c], // (nil) .. + [0xaab0, 0xaab0], // Tai Viet Mai Kang ..Tai Viet Mai Kang + [0xaab2, 0xaab4], // Tai Viet Vowel I ..Tai Viet Vowel U + [0xaab7, 0xaab8], // Tai Viet Mai Khit ..Tai Viet Vowel Ia + [0xaabe, 0xaabf], // Tai Viet Vowel Am ..Tai Viet Tone Mai Ek + [0xaac1, 0xaac1], // Tai Viet Tone Mai Tho ..Tai Viet Tone Mai Tho + [0xaaec, 0xaaed], // (nil) .. + [0xaaf6, 0xaaf6], // (nil) .. + [0xabe5, 0xabe5], // Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + [0xabe8, 0xabe8], // Meetei Mayek Vowel Sign ..Meetei Mayek Vowel Sign + [0xabed, 0xabed], // Meetei Mayek Apun Iyek ..Meetei Mayek Apun Iyek + [0xfb1e, 0xfb1e], // Hebrew Point Judeo-spani..Hebrew Point Judeo-spani + [0xfe00, 0xfe0f], // Variation Selector-1 ..Variation Selector-16 + [0xfe20, 0xfe2f] // Combining Ligature Left .. +]; + +const ZERO_WIDTH_HIGH = [ + [0x101fd, 0x101fd], // Phaistos Disc Sign Combi..Phaistos Disc Sign Combi + [0x102e0, 0x102e0], // (nil) .. + [0x10376, 0x1037a], // (nil) .. + [0x10a01, 0x10a03], // Kharoshthi Vowel Sign I ..Kharoshthi Vowel Sign Vo + [0x10a05, 0x10a06], // Kharoshthi Vowel Sign E ..Kharoshthi Vowel Sign O + [0x10a0c, 0x10a0f], // Kharoshthi Vowel Length ..Kharoshthi Sign Visarga + [0x10a38, 0x10a3a], // Kharoshthi Sign Bar Abov..Kharoshthi Sign Dot Belo + [0x10a3f, 0x10a3f], // Kharoshthi Virama ..Kharoshthi Virama + [0x10ae5, 0x10ae6], // (nil) .. + [0x10d24, 0x10d27], // (nil) .. + [0x10f46, 0x10f50], // (nil) .. + [0x11001, 0x11001], // (nil) .. + [0x11038, 0x11046], // (nil) .. + [0x1107f, 0x11081], // (nil) ..Kaithi Sign Anusvara + [0x110b3, 0x110b6], // Kaithi Vowel Sign U ..Kaithi Vowel Sign Ai + [0x110b9, 0x110ba], // Kaithi Sign Virama ..Kaithi Sign Nukta + [0x11100, 0x11102], // (nil) .. + [0x11127, 0x1112b], // (nil) .. + [0x1112d, 0x11134], // (nil) .. + [0x11173, 0x11173], // (nil) .. + [0x11180, 0x11181], // (nil) .. + [0x111b6, 0x111be], // (nil) .. + [0x111c9, 0x111cc], // (nil) .. + [0x1122f, 0x11231], // (nil) .. + [0x11234, 0x11234], // (nil) .. + [0x11236, 0x11237], // (nil) .. + [0x1123e, 0x1123e], // (nil) .. + [0x112df, 0x112df], // (nil) .. + [0x112e3, 0x112ea], // (nil) .. + [0x11300, 0x11301], // (nil) .. + [0x1133b, 0x1133c], // (nil) .. + [0x11340, 0x11340], // (nil) .. + [0x11366, 0x1136c], // (nil) .. + [0x11370, 0x11374], // (nil) .. + [0x11438, 0x1143f], // (nil) .. + [0x11442, 0x11444], // (nil) .. + [0x11446, 0x11446], // (nil) .. + [0x1145e, 0x1145e], // (nil) .. + [0x114b3, 0x114b8], // (nil) .. + [0x114ba, 0x114ba], // (nil) .. + [0x114bf, 0x114c0], // (nil) .. + [0x114c2, 0x114c3], // (nil) .. + [0x115b2, 0x115b5], // (nil) .. + [0x115bc, 0x115bd], // (nil) .. + [0x115bf, 0x115c0], // (nil) .. + [0x115dc, 0x115dd], // (nil) .. + [0x11633, 0x1163a], // (nil) .. + [0x1163d, 0x1163d], // (nil) .. + [0x1163f, 0x11640], // (nil) .. + [0x116ab, 0x116ab], // (nil) .. + [0x116ad, 0x116ad], // (nil) .. + [0x116b0, 0x116b5], // (nil) .. + [0x116b7, 0x116b7], // (nil) .. + [0x1171d, 0x1171f], // (nil) .. + [0x11722, 0x11725], // (nil) .. + [0x11727, 0x1172b], // (nil) .. + [0x1182f, 0x11837], // (nil) .. + [0x11839, 0x1183a], // (nil) .. + [0x11a01, 0x11a0a], // (nil) .. + [0x11a33, 0x11a38], // (nil) .. + [0x11a3b, 0x11a3e], // (nil) .. + [0x11a47, 0x11a47], // (nil) .. + [0x11a51, 0x11a56], // (nil) .. + [0x11a59, 0x11a5b], // (nil) .. + [0x11a8a, 0x11a96], // (nil) .. + [0x11a98, 0x11a99], // (nil) .. + [0x11c30, 0x11c36], // (nil) .. + [0x11c38, 0x11c3d], // (nil) .. + [0x11c3f, 0x11c3f], // (nil) .. + [0x11c92, 0x11ca7], // (nil) .. + [0x11caa, 0x11cb0], // (nil) .. + [0x11cb2, 0x11cb3], // (nil) .. + [0x11cb5, 0x11cb6], // (nil) .. + [0x11d31, 0x11d36], // (nil) .. + [0x11d3a, 0x11d3a], // (nil) .. + [0x11d3c, 0x11d3d], // (nil) .. + [0x11d3f, 0x11d45], // (nil) .. + [0x11d47, 0x11d47], // (nil) .. + [0x11d90, 0x11d91], // (nil) .. + [0x11d95, 0x11d95], // (nil) .. + [0x11d97, 0x11d97], // (nil) .. + [0x11ef3, 0x11ef4], // (nil) .. + [0x16af0, 0x16af4], // (nil) .. + [0x16b30, 0x16b36], // (nil) .. + [0x16f8f, 0x16f92], // (nil) .. + [0x1bc9d, 0x1bc9e], // (nil) .. + [0x1d167, 0x1d169], // Musical Symbol Combining..Musical Symbol Combining + [0x1d17b, 0x1d182], // Musical Symbol Combining..Musical Symbol Combining + [0x1d185, 0x1d18b], // Musical Symbol Combining..Musical Symbol Combining + [0x1d1aa, 0x1d1ad], // Musical Symbol Combining..Musical Symbol Combining + [0x1d242, 0x1d244], // Combining Greek Musical ..Combining Greek Musical + [0x1da00, 0x1da36], // (nil) .. + [0x1da3b, 0x1da6c], // (nil) .. + [0x1da75, 0x1da75], // (nil) .. + [0x1da84, 0x1da84], // (nil) .. + [0x1da9b, 0x1da9f], // (nil) .. + [0x1daa1, 0x1daaf], // (nil) .. + [0x1e000, 0x1e006], // (nil) .. + [0x1e008, 0x1e018], // (nil) .. + [0x1e01b, 0x1e021], // (nil) .. + [0x1e023, 0x1e024], // (nil) .. + [0x1e026, 0x1e02a], // (nil) .. + [0x1e8d0, 0x1e8d6], // (nil) .. + [0x1e944, 0x1e94a], // (nil) .. + [0xe0100, 0xe01ef] // Variation Selector-17 ..Variation Selector-256 +]; + +/* + Ideally the following values should be used to create the lookup table, + needs to be tested before we activate this. + +// Generated: 2018-09-24T16:45:44.464578 +// Source: EastAsianWidth-11.0.0.txt +// Date: 2018-05-14, 09:41:59 GMT [KW, LI] +const WIDE_EASTASIAN_BMP = [ + [0x1100, 0x115f], // Hangul Choseong Kiyeok ..Hangul Choseong Filler + [0x231a, 0x231b], // Watch ..Hourglass + [0x2329, 0x232a], // Left-pointing Angle Brac..Right-pointing Angle Bra + [0x23e9, 0x23ec], // (nil) .. + [0x23f0, 0x23f0], // (nil) .. + [0x23f3, 0x23f3], // (nil) .. + [0x25fd, 0x25fe], // White Medium Small Squar..Black Medium Small Squar + [0x2614, 0x2615], // Umbrella With Rain Drops..Hot Beverage + [0x2648, 0x2653], // Aries ..Pisces + [0x267f, 0x267f], // Wheelchair Symbol ..Wheelchair Symbol + [0x2693, 0x2693], // Anchor ..Anchor + [0x26a1, 0x26a1], // High Voltage Sign ..High Voltage Sign + [0x26aa, 0x26ab], // Medium White Circle ..Medium Black Circle + [0x26bd, 0x26be], // Soccer Ball ..Baseball + [0x26c4, 0x26c5], // Snowman Without Snow ..Sun Behind Cloud + [0x26ce, 0x26ce], // (nil) .. + [0x26d4, 0x26d4], // No Entry ..No Entry + [0x26ea, 0x26ea], // Church ..Church + [0x26f2, 0x26f3], // Fountain ..Flag In Hole + [0x26f5, 0x26f5], // Sailboat ..Sailboat + [0x26fa, 0x26fa], // Tent ..Tent + [0x26fd, 0x26fd], // Fuel Pump ..Fuel Pump + [0x2705, 0x2705], // (nil) .. + [0x270a, 0x270b], // (nil) .. + [0x2728, 0x2728], // (nil) .. + [0x274c, 0x274c], // (nil) .. + [0x274e, 0x274e], // (nil) .. + [0x2753, 0x2755], // (nil) .. + [0x2757, 0x2757], // Heavy Exclamation Mark S..Heavy Exclamation Mark S + [0x2795, 0x2797], // (nil) .. + [0x27b0, 0x27b0], // (nil) .. + [0x27bf, 0x27bf], // (nil) .. + [0x2b1b, 0x2b1c], // Black Large Square ..White Large Square + [0x2b50, 0x2b50], // White Medium Star ..White Medium Star + [0x2b55, 0x2b55], // Heavy Large Circle ..Heavy Large Circle + [0x2e80, 0x2e99], // Cjk Radical Repeat ..Cjk Radical Rap + [0x2e9b, 0x2ef3], // Cjk Radical Choke ..Cjk Radical C-simplified + [0x2f00, 0x2fd5], // Kangxi Radical One ..Kangxi Radical Flute + [0x2ff0, 0x2ffb], // Ideographic Description ..Ideographic Description + [0x3000, 0x303e], // Ideographic Space ..Ideographic Variation In + [0x3041, 0x3096], // Hiragana Letter Small A ..Hiragana Letter Small Ke + [0x3099, 0x30ff], // Combining Katakana-hirag..Katakana Digraph Koto + [0x3105, 0x312f], // Bopomofo Letter B .. + [0x3131, 0x318e], // Hangul Letter Kiyeok ..Hangul Letter Araeae + [0x3190, 0x31ba], // Ideographic Annotation L.. + [0x31c0, 0x31e3], // Cjk Stroke T ..Cjk Stroke Q + [0x31f0, 0x321e], // Katakana Letter Small Ku..Parenthesized Korean Cha + [0x3220, 0x3247], // Parenthesized Ideograph ..Circled Ideograph Koto + [0x3250, 0x32fe], // Partnership Sign ..Circled Katakana Wo + [0x3300, 0x4dbf], // Square Apaato .. + [0x4e00, 0xa48c], // Cjk Unified Ideograph-4e..Yi Syllable Yyr + [0xa490, 0xa4c6], // Yi Radical Qot ..Yi Radical Ke + [0xa960, 0xa97c], // Hangul Choseong Tikeut-m..Hangul Choseong Ssangyeo + [0xac00, 0xd7a3], // Hangul Syllable Ga ..Hangul Syllable Hih + [0xf900, 0xfaff], // Cjk Compatibility Ideogr.. + [0xfe10, 0xfe19], // Presentation Form For Ve..Presentation Form For Ve + [0xfe30, 0xfe52], // Presentation Form For Ve..Small Full Stop + [0xfe54, 0xfe66], // Small Semicolon ..Small Equals Sign + [0xfe68, 0xfe6b], // Small Reverse Solidus ..Small Commercial At + [0xff01, 0xff60], // Fullwidth Exclamation Ma..Fullwidth Right White Pa + [0xffe0, 0xffe6], // Fullwidth Cent Sign ..Fullwidth Won Sign +]; +*/ - // Generated: 2018-09-24T16:45:44.464578 - // Source: EastAsianWidth-11.0.0.txt - // Date: 2018-05-14, 09:41:59 GMT [KW, LI] - const WIDE_EASTASIAN = [ - [0x1100, 0x115f], // Hangul Choseong Kiyeok ..Hangul Choseong Filler - [0x231a, 0x231b], // Watch ..Hourglass - [0x2329, 0x232a], // Left-pointing Angle Brac..Right-pointing Angle Bra - [0x23e9, 0x23ec], // (nil) .. - [0x23f0, 0x23f0], // (nil) .. - [0x23f3, 0x23f3], // (nil) .. - [0x25fd, 0x25fe], // White Medium Small Squar..Black Medium Small Squar - [0x2614, 0x2615], // Umbrella With Rain Drops..Hot Beverage - [0x2648, 0x2653], // Aries ..Pisces - [0x267f, 0x267f], // Wheelchair Symbol ..Wheelchair Symbol - [0x2693, 0x2693], // Anchor ..Anchor - [0x26a1, 0x26a1], // High Voltage Sign ..High Voltage Sign - [0x26aa, 0x26ab], // Medium White Circle ..Medium Black Circle - [0x26bd, 0x26be], // Soccer Ball ..Baseball - [0x26c4, 0x26c5], // Snowman Without Snow ..Sun Behind Cloud - [0x26ce, 0x26ce], // (nil) .. - [0x26d4, 0x26d4], // No Entry ..No Entry - [0x26ea, 0x26ea], // Church ..Church - [0x26f2, 0x26f3], // Fountain ..Flag In Hole - [0x26f5, 0x26f5], // Sailboat ..Sailboat - [0x26fa, 0x26fa], // Tent ..Tent - [0x26fd, 0x26fd], // Fuel Pump ..Fuel Pump - [0x2705, 0x2705], // (nil) .. - [0x270a, 0x270b], // (nil) .. - [0x2728, 0x2728], // (nil) .. - [0x274c, 0x274c], // (nil) .. - [0x274e, 0x274e], // (nil) .. - [0x2753, 0x2755], // (nil) .. - [0x2757, 0x2757], // Heavy Exclamation Mark S..Heavy Exclamation Mark S - [0x2795, 0x2797], // (nil) .. - [0x27b0, 0x27b0], // (nil) .. - [0x27bf, 0x27bf], // (nil) .. - [0x2b1b, 0x2b1c], // Black Large Square ..White Large Square - [0x2b50, 0x2b50], // White Medium Star ..White Medium Star - [0x2b55, 0x2b55], // Heavy Large Circle ..Heavy Large Circle - [0x2e80, 0x2e99], // Cjk Radical Repeat ..Cjk Radical Rap - [0x2e9b, 0x2ef3], // Cjk Radical Choke ..Cjk Radical C-simplified - [0x2f00, 0x2fd5], // Kangxi Radical One ..Kangxi Radical Flute - [0x2ff0, 0x2ffb], // Ideographic Description ..Ideographic Description - [0x3000, 0x303e], // Ideographic Space ..Ideographic Variation In - [0x3041, 0x3096], // Hiragana Letter Small A ..Hiragana Letter Small Ke - [0x3099, 0x30ff], // Combining Katakana-hirag..Katakana Digraph Koto - [0x3105, 0x312f], // Bopomofo Letter B .. - [0x3131, 0x318e], // Hangul Letter Kiyeok ..Hangul Letter Araeae - [0x3190, 0x31ba], // Ideographic Annotation L.. - [0x31c0, 0x31e3], // Cjk Stroke T ..Cjk Stroke Q - [0x31f0, 0x321e], // Katakana Letter Small Ku..Parenthesized Korean Cha - [0x3220, 0x3247], // Parenthesized Ideograph ..Circled Ideograph Koto - [0x3250, 0x32fe], // Partnership Sign ..Circled Katakana Wo - [0x3300, 0x4dbf], // Square Apaato .. - [0x4e00, 0xa48c], // Cjk Unified Ideograph-4e..Yi Syllable Yyr - [0xa490, 0xa4c6], // Yi Radical Qot ..Yi Radical Ke - [0xa960, 0xa97c], // Hangul Choseong Tikeut-m..Hangul Choseong Ssangyeo - [0xac00, 0xd7a3], // Hangul Syllable Ga ..Hangul Syllable Hih - [0xf900, 0xfaff], // Cjk Compatibility Ideogr.. - [0xfe10, 0xfe19], // Presentation Form For Ve..Presentation Form For Ve - [0xfe30, 0xfe52], // Presentation Form For Ve..Small Full Stop - [0xfe54, 0xfe66], // Small Semicolon ..Small Equals Sign - [0xfe68, 0xfe6b], // Small Reverse Solidus ..Small Commercial At - [0xff01, 0xff60], // Fullwidth Exclamation Ma..Fullwidth Right White Pa - [0xffe0, 0xffe6], // Fullwidth Cent Sign ..Fullwidth Won Sign - [0x16fe0, 0x16fe1], // (nil) .. - [0x17000, 0x187f1], // (nil) .. - [0x18800, 0x18af2], // (nil) .. - [0x1b000, 0x1b11e], // (nil) .. - [0x1b170, 0x1b2fb], // (nil) .. - [0x1f004, 0x1f004], // Mahjong Tile Red Dragon ..Mahjong Tile Red Dragon - [0x1f0cf, 0x1f0cf], // (nil) .. - [0x1f18e, 0x1f18e], // (nil) .. - [0x1f191, 0x1f19a], // (nil) .. - [0x1f200, 0x1f202], // Square Hiragana Hoka .. - [0x1f210, 0x1f23b], // Squared Cjk Unified Ideo.. - [0x1f240, 0x1f248], // Tortoise Shell Bracketed..Tortoise Shell Bracketed - [0x1f250, 0x1f251], // (nil) .. - [0x1f260, 0x1f265], // (nil) .. - [0x1f300, 0x1f320], // (nil) .. - [0x1f32d, 0x1f335], // (nil) .. - [0x1f337, 0x1f37c], // (nil) .. - [0x1f37e, 0x1f393], // (nil) .. - [0x1f3a0, 0x1f3ca], // (nil) .. - [0x1f3cf, 0x1f3d3], // (nil) .. - [0x1f3e0, 0x1f3f0], // (nil) .. - [0x1f3f4, 0x1f3f4], // (nil) .. - [0x1f3f8, 0x1f43e], // (nil) .. - [0x1f440, 0x1f440], // (nil) .. - [0x1f442, 0x1f4fc], // (nil) .. - [0x1f4ff, 0x1f53d], // (nil) .. - [0x1f54b, 0x1f54e], // (nil) .. - [0x1f550, 0x1f567], // (nil) .. - [0x1f57a, 0x1f57a], // (nil) .. - [0x1f595, 0x1f596], // (nil) .. - [0x1f5a4, 0x1f5a4], // (nil) .. - [0x1f5fb, 0x1f64f], // (nil) .. - [0x1f680, 0x1f6c5], // (nil) .. - [0x1f6cc, 0x1f6cc], // (nil) .. - [0x1f6d0, 0x1f6d2], // (nil) .. - [0x1f6eb, 0x1f6ec], // (nil) .. - [0x1f6f4, 0x1f6f9], // (nil) .. - [0x1f910, 0x1f93e], // (nil) .. - [0x1f940, 0x1f970], // (nil) .. - [0x1f973, 0x1f976], // (nil) .. - [0x1f97a, 0x1f97a], // (nil) .. - [0x1f97c, 0x1f9a2], // (nil) .. - [0x1f9b0, 0x1f9b9], // (nil) .. - [0x1f9c0, 0x1f9c2], // (nil) .. - [0x1f9d0, 0x1f9ff], // (nil) .. - [0x20000, 0x2fffd], // Cjk Unified Ideograph-20.. - [0x30000, 0x3fffd] // (nil) .. - ]; +const WIDE_EASTASIAN_HIGH = [ + [0x16fe0, 0x16fe1], // (nil) .. + [0x17000, 0x187f1], // (nil) .. + [0x18800, 0x18af2], // (nil) .. + [0x1b000, 0x1b11e], // (nil) .. + [0x1b170, 0x1b2fb], // (nil) .. + [0x1f004, 0x1f004], // Mahjong Tile Red Dragon ..Mahjong Tile Red Dragon + [0x1f0cf, 0x1f0cf], // (nil) .. + [0x1f18e, 0x1f18e], // (nil) .. + [0x1f191, 0x1f19a], // (nil) .. + [0x1f200, 0x1f202], // Square Hiragana Hoka .. + [0x1f210, 0x1f23b], // Squared Cjk Unified Ideo.. + [0x1f240, 0x1f248], // Tortoise Shell Bracketed..Tortoise Shell Bracketed + [0x1f250, 0x1f251], // (nil) .. + [0x1f260, 0x1f265], // (nil) .. + [0x1f300, 0x1f320], // (nil) .. + [0x1f32d, 0x1f335], // (nil) .. + [0x1f337, 0x1f37c], // (nil) .. + [0x1f37e, 0x1f393], // (nil) .. + [0x1f3a0, 0x1f3ca], // (nil) .. + [0x1f3cf, 0x1f3d3], // (nil) .. + [0x1f3e0, 0x1f3f0], // (nil) .. + [0x1f3f4, 0x1f3f4], // (nil) .. + [0x1f3f8, 0x1f43e], // (nil) .. + [0x1f440, 0x1f440], // (nil) .. + [0x1f442, 0x1f4fc], // (nil) .. + [0x1f4ff, 0x1f53d], // (nil) .. + [0x1f54b, 0x1f54e], // (nil) .. + [0x1f550, 0x1f567], // (nil) .. + [0x1f57a, 0x1f57a], // (nil) .. + [0x1f595, 0x1f596], // (nil) .. + [0x1f5a4, 0x1f5a4], // (nil) .. + [0x1f5fb, 0x1f64f], // (nil) .. + [0x1f680, 0x1f6c5], // (nil) .. + [0x1f6cc, 0x1f6cc], // (nil) .. + [0x1f6d0, 0x1f6d2], // (nil) .. + [0x1f6eb, 0x1f6ec], // (nil) .. + [0x1f6f4, 0x1f6f9], // (nil) .. + [0x1f910, 0x1f93e], // (nil) .. + [0x1f940, 0x1f970], // (nil) .. + [0x1f973, 0x1f976], // (nil) .. + [0x1f97a, 0x1f97a], // (nil) .. + [0x1f97c, 0x1f9a2], // (nil) .. + [0x1f9b0, 0x1f9b9], // (nil) .. + [0x1f9c0, 0x1f9c2], // (nil) .. + [0x1f9d0, 0x1f9ff], // (nil) .. + [0x20000, 0x2fffd], // Cjk Unified Ideograph-20.. + [0x30000, 0x3fffd] // (nil) .. +]; - // binary search - function bisearch(ucs: number, data: number[][]): boolean { - let min = 0; - let max = data.length - 1; - let mid; - if (ucs < data[0][0] || ucs > data[max][1]) { - return false; - } - while (max >= min) { - mid = (min + max) >> 1; - if (ucs > data[mid][1]) { - min = mid + 1; - } else if (ucs < data[mid][0]) { - max = mid - 1; - } else { - return true; - } - } - return false; +let lookupTable: Uint8Array | null = null; + +// binary search +function bisearch(ucs: number, data: number[][]): boolean { + let min = 0; + let max = data.length - 1; + let mid; + if (ucs < data[0][0] || ucs > data[max][1]) { + return false; + } + while (max >= min) { + mid = (min + max) >> 1; + if (ucs > data[mid][1]) { + min = mid + 1; + } else if (ucs < data[mid][0]) { + max = mid - 1; + } else { + return true; } - function wcwidthBMP(ucs: number): number { - // test for 8-bit control characters - if (ucs === 0) { - return opts.nul; - } - if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0)) { - return opts.control; - } - // binary search in table of non-spacing characters - if (bisearch(ucs, ZERO_WIDTH)) { - return 0; - } - // if we arrive here, ucs is not a combining or C0/C1 control character - if (isWideBMP(ucs)) { - return 2; - } + } + return false; +} + +function wcwidthHigh(ucs: number): 0 | 1 | 2 { + if (bisearch(ucs, ZERO_WIDTH_HIGH)) { + return 0; + } + if (bisearch(ucs, WIDE_EASTASIAN_HIGH)) { + return 2; + } + return 1; +} + + +export const v11: IUnicodeVersionProvider = { + version: 11, + wcwidth: (ucs: number): number => { + if (ucs < 32) { + return 0; + } + if (ucs < 127) { return 1; } - function isWideBMP(ucs: number): boolean { - return ( - ucs >= 0x1100 && ( - ucs <= 0x115f || // Hangul Jamo init. consonants - ucs === 0x2329 || - ucs === 0x232a || - (ucs >= 0x2e80 && ucs <= 0xa4cf && ucs !== 0x303f) || // CJK..Yi - (ucs >= 0xac00 && ucs <= 0xd7a3) || // Hangul Syllables - (ucs >= 0xf900 && ucs <= 0xfaff) || // CJK Compat Ideographs - (ucs >= 0xfe10 && ucs <= 0xfe19) || // Vertical forms - (ucs >= 0xfe30 && ucs <= 0xfe6f) || // CJK Compat Forms - (ucs >= 0xff00 && ucs <= 0xff60) || // Fullwidth Forms - (ucs >= 0xffe0 && ucs <= 0xffe6))); + if (ucs < 65536) { + return lookupTable[ucs]; + } + // do a full search for high codepoints + return wcwidthHigh(ucs); + }, + init: () => { + // init only once + if (lookupTable) { + return; } - function wcwidthHigh(ucs: number): 0 | 1 | 2 { - if (bisearch(ucs, ZERO_WIDTH)) { - return 0; - } - if (bisearch(ucs, WIDE_EASTASIAN)) { - return 2; - } + // create lookup table for BMP plane + const table = new Uint8Array(65536); + fill(table, 1); + table[0] = 0; + // control chars + fill(table, 0, 1, 32); + fill(table, 0, 0x7f, 0xa0); - return 1; - } - const control = opts.control | 0; - let table: number[] | Uint32Array = null; - function initTable(): number[] | Uint32Array { - // lookup table for BMP - const CODEPOINTS = 65536; // BMP holds 65536 codepoints - const BITWIDTH = 2; // a codepoint can have a width of 0, 1 or 2 - const ITEMSIZE = 32; // using uint32_t - const CONTAINERSIZE = CODEPOINTS * BITWIDTH / ITEMSIZE; - const CODEPOINTS_PER_ITEM = ITEMSIZE / BITWIDTH; - table = (typeof Uint32Array === 'undefined') - ? new Array(CONTAINERSIZE) - : new Uint32Array(CONTAINERSIZE); - for (let i = 0; i < CONTAINERSIZE; ++i) { - let num = 0; - let pos = CODEPOINTS_PER_ITEM; - while (pos--) { - num = (num << 2) | wcwidthBMP(CODEPOINTS_PER_ITEM * i + pos); - } - table[i] = num; - } - return table; + // FIXME: after testing and use WIDE_EASTASIAN_BMP below instead + // apply wide char rules first + // wide chars + fill(table, 2, 0x1100, 0x1160); + table[0x2329] = 2; + table[0x232a] = 2; + fill(table, 2, 0x2e80, 0xa4d0); + table[0x303f] = 1; // wrongly in last line + + fill(table, 2, 0xac00, 0xd7a4); + fill(table, 2, 0xf900, 0xfb00); + fill(table, 2, 0xfe10, 0xfe1a); + fill(table, 2, 0xfe30, 0xfe70); + fill(table, 2, 0xff00, 0xff61); + fill(table, 2, 0xffe0, 0xffe7); + + // for (let r = 0; r < WIDE_EASTASIAN_BMP.length; ++r) { + // fill(table, 2, WIDE_EASTASIAN_BMP[r][0], WIDE_EASTASIAN_BMP[r][1] + 1); + // } + + // apply combining last to ensure we overwrite + // wrongly wide set chars: + // the original algo evals combining first and falls + // through to wide check so we simply do here the opposite + // combining 0 + for (let r = 0; r < ZERO_WIDTH_BMP.length; ++r) { + fill(table, 0, ZERO_WIDTH_BMP[r][0], ZERO_WIDTH_BMP[r][1] + 1); } - // get width from lookup table - // position in container : num / CODEPOINTS_PER_ITEM - // ==> n = table[Math.floor(num / 16)] - // ==> n = table[num >> 4] - // 16 codepoints per number: FFEEDDCCBBAA99887766554433221100 - // position in number : (num % CODEPOINTS_PER_ITEM) * BITWIDTH - // ==> m = (n % 16) * 2 - // ==> m = (num & 15) << 1 - // right shift to position m - // ==> n = n >> m e.g. m=12 000000000000FFEEDDCCBBAA99887766 - // we are only interested in 2 LSBs, cut off higher bits - // ==> n = n & 3 e.g. 000000000000000000000000000000XX - return function (num: number): number { - num = num | 0; // get asm.js like optimization under V8 - if (num < 32) { - return control | 0; - } - if (num < 127) { - return 1; - } - if (num === 0x1F600) return 2; - const t = table || initTable(); - if (num < 65536) { - return t[num >> 4] >> ((num & 15) << 1) & 3; - } - // do a full search for high codepoints - return wcwidthHigh(num); - }; - })({nul: 0, control: 0}) // configurable options + + lookupTable = table; + } }; diff --git a/src/unicode/v6.test.ts b/src/unicode/v6.test.ts new file mode 100644 index 0000000000..d0f57273e8 --- /dev/null +++ b/src/unicode/v6.test.ts @@ -0,0 +1,186 @@ +/** + * Copyright (c) 2018 The xterm.js authors. All rights reserved. + * @license MIT + */ + +import { assert } from 'chai'; +import { v6 } from './v6'; + +// old implementation +const wcwidthOld = (function(opts: {nul: number, control: number}): (ucs: number) => number { + // extracted from https://www.cl.cam.ac.uk/%7Emgk25/ucs/wcwidth.c + // combining characters + const COMBINING_BMP = [ + [0x0300, 0x036F], [0x0483, 0x0486], [0x0488, 0x0489], + [0x0591, 0x05BD], [0x05BF, 0x05BF], [0x05C1, 0x05C2], + [0x05C4, 0x05C5], [0x05C7, 0x05C7], [0x0600, 0x0603], + [0x0610, 0x0615], [0x064B, 0x065E], [0x0670, 0x0670], + [0x06D6, 0x06E4], [0x06E7, 0x06E8], [0x06EA, 0x06ED], + [0x070F, 0x070F], [0x0711, 0x0711], [0x0730, 0x074A], + [0x07A6, 0x07B0], [0x07EB, 0x07F3], [0x0901, 0x0902], + [0x093C, 0x093C], [0x0941, 0x0948], [0x094D, 0x094D], + [0x0951, 0x0954], [0x0962, 0x0963], [0x0981, 0x0981], + [0x09BC, 0x09BC], [0x09C1, 0x09C4], [0x09CD, 0x09CD], + [0x09E2, 0x09E3], [0x0A01, 0x0A02], [0x0A3C, 0x0A3C], + [0x0A41, 0x0A42], [0x0A47, 0x0A48], [0x0A4B, 0x0A4D], + [0x0A70, 0x0A71], [0x0A81, 0x0A82], [0x0ABC, 0x0ABC], + [0x0AC1, 0x0AC5], [0x0AC7, 0x0AC8], [0x0ACD, 0x0ACD], + [0x0AE2, 0x0AE3], [0x0B01, 0x0B01], [0x0B3C, 0x0B3C], + [0x0B3F, 0x0B3F], [0x0B41, 0x0B43], [0x0B4D, 0x0B4D], + [0x0B56, 0x0B56], [0x0B82, 0x0B82], [0x0BC0, 0x0BC0], + [0x0BCD, 0x0BCD], [0x0C3E, 0x0C40], [0x0C46, 0x0C48], + [0x0C4A, 0x0C4D], [0x0C55, 0x0C56], [0x0CBC, 0x0CBC], + [0x0CBF, 0x0CBF], [0x0CC6, 0x0CC6], [0x0CCC, 0x0CCD], + [0x0CE2, 0x0CE3], [0x0D41, 0x0D43], [0x0D4D, 0x0D4D], + [0x0DCA, 0x0DCA], [0x0DD2, 0x0DD4], [0x0DD6, 0x0DD6], + [0x0E31, 0x0E31], [0x0E34, 0x0E3A], [0x0E47, 0x0E4E], + [0x0EB1, 0x0EB1], [0x0EB4, 0x0EB9], [0x0EBB, 0x0EBC], + [0x0EC8, 0x0ECD], [0x0F18, 0x0F19], [0x0F35, 0x0F35], + [0x0F37, 0x0F37], [0x0F39, 0x0F39], [0x0F71, 0x0F7E], + [0x0F80, 0x0F84], [0x0F86, 0x0F87], [0x0F90, 0x0F97], + [0x0F99, 0x0FBC], [0x0FC6, 0x0FC6], [0x102D, 0x1030], + [0x1032, 0x1032], [0x1036, 0x1037], [0x1039, 0x1039], + [0x1058, 0x1059], [0x1160, 0x11FF], [0x135F, 0x135F], + [0x1712, 0x1714], [0x1732, 0x1734], [0x1752, 0x1753], + [0x1772, 0x1773], [0x17B4, 0x17B5], [0x17B7, 0x17BD], + [0x17C6, 0x17C6], [0x17C9, 0x17D3], [0x17DD, 0x17DD], + [0x180B, 0x180D], [0x18A9, 0x18A9], [0x1920, 0x1922], + [0x1927, 0x1928], [0x1932, 0x1932], [0x1939, 0x193B], + [0x1A17, 0x1A18], [0x1B00, 0x1B03], [0x1B34, 0x1B34], + [0x1B36, 0x1B3A], [0x1B3C, 0x1B3C], [0x1B42, 0x1B42], + [0x1B6B, 0x1B73], [0x1DC0, 0x1DCA], [0x1DFE, 0x1DFF], + [0x200B, 0x200F], [0x202A, 0x202E], [0x2060, 0x2063], + [0x206A, 0x206F], [0x20D0, 0x20EF], [0x302A, 0x302F], + [0x3099, 0x309A], [0xA806, 0xA806], [0xA80B, 0xA80B], + [0xA825, 0xA826], [0xFB1E, 0xFB1E], [0xFE00, 0xFE0F], + [0xFE20, 0xFE23], [0xFEFF, 0xFEFF], [0xFFF9, 0xFFFB] + ]; + const COMBINING_HIGH = [ + [0x10A01, 0x10A03], [0x10A05, 0x10A06], [0x10A0C, 0x10A0F], + [0x10A38, 0x10A3A], [0x10A3F, 0x10A3F], [0x1D167, 0x1D169], + [0x1D173, 0x1D182], [0x1D185, 0x1D18B], [0x1D1AA, 0x1D1AD], + [0x1D242, 0x1D244], [0xE0001, 0xE0001], [0xE0020, 0xE007F], + [0xE0100, 0xE01EF] + ]; + // binary search + function bisearch(ucs: number, data: number[][]): boolean { + let min = 0; + let max = data.length - 1; + let mid; + if (ucs < data[0][0] || ucs > data[max][1]) { + return false; + } + while (max >= min) { + mid = (min + max) >> 1; + if (ucs > data[mid][1]) { + min = mid + 1; + } else if (ucs < data[mid][0]) { + max = mid - 1; + } else { + return true; + } + } + return false; + } + function wcwidthBMP(ucs: number): number { + // test for 8-bit control characters + if (ucs === 0) { + return opts.nul; + } + if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0)) { + return opts.control; + } + // binary search in table of non-spacing characters + if (bisearch(ucs, COMBINING_BMP)) { + return 0; + } + // if we arrive here, ucs is not a combining or C0/C1 control character + if (isWideBMP(ucs)) { + return 2; + } + return 1; + } + function isWideBMP(ucs: number): boolean { + return ( + ucs >= 0x1100 && ( + ucs <= 0x115f || // Hangul Jamo init. consonants + ucs === 0x2329 || + ucs === 0x232a || + (ucs >= 0x2e80 && ucs <= 0xa4cf && ucs !== 0x303f) || // CJK..Yi + (ucs >= 0xac00 && ucs <= 0xd7a3) || // Hangul Syllables + (ucs >= 0xf900 && ucs <= 0xfaff) || // CJK Compat Ideographs + (ucs >= 0xfe10 && ucs <= 0xfe19) || // Vertical forms + (ucs >= 0xfe30 && ucs <= 0xfe6f) || // CJK Compat Forms + (ucs >= 0xff00 && ucs <= 0xff60) || // Fullwidth Forms + (ucs >= 0xffe0 && ucs <= 0xffe6))); + } + function wcwidthHigh(ucs: number): 0 | 1 | 2 { + if (bisearch(ucs, COMBINING_HIGH)) { + return 0; + } + if ((ucs >= 0x20000 && ucs <= 0x2fffd) || (ucs >= 0x30000 && ucs <= 0x3fffd)) { + return 2; + } + return 1; + } + const control = opts.control | 0; + let table: number[] | Uint32Array = null; + function initTable(): number[] | Uint32Array { + // lookup table for BMP + const CODEPOINTS = 65536; // BMP holds 65536 codepoints + const BITWIDTH = 2; // a codepoint can have a width of 0, 1 or 2 + const ITEMSIZE = 32; // using uint32_t + const CONTAINERSIZE = CODEPOINTS * BITWIDTH / ITEMSIZE; + const CODEPOINTS_PER_ITEM = ITEMSIZE / BITWIDTH; + table = (typeof Uint32Array === 'undefined') + ? new Array(CONTAINERSIZE) + : new Uint32Array(CONTAINERSIZE); + for (let i = 0; i < CONTAINERSIZE; ++i) { + let num = 0; + let pos = CODEPOINTS_PER_ITEM; + while (pos--) { + num = (num << 2) | wcwidthBMP(CODEPOINTS_PER_ITEM * i + pos); + } + table[i] = num; + } + return table; + } + // get width from lookup table + // position in container : num / CODEPOINTS_PER_ITEM + // ==> n = table[Math.floor(num / 16)] + // ==> n = table[num >> 4] + // 16 codepoints per number: FFEEDDCCBBAA99887766554433221100 + // position in number : (num % CODEPOINTS_PER_ITEM) * BITWIDTH + // ==> m = (n % 16) * 2 + // ==> m = (num & 15) << 1 + // right shift to position m + // ==> n = n >> m e.g. m=12 000000000000FFEEDDCCBBAA99887766 + // we are only interested in 2 LSBs, cut off higher bits + // ==> n = n & 3 e.g. 000000000000000000000000000000XX + return function (num: number): number { + num = num | 0; // get asm.js like optimization under V8 + if (num < 32) { + return control | 0; + } + if (num < 127) { + return 1; + } + const t = table || initTable(); + if (num < 65536) { + return t[num >> 4] >> ((num & 15) << 1) & 3; + } + // do a full search for high codepoints + return wcwidthHigh(num); + }; +})({nul: 0, control: 0}); // configurable options + +describe('unicode - v6', () => { + const versionProvider = v6; + versionProvider.init(); + it('wcwidth should match all values from the old implementation', () => { + // test full BMP range old vs new implmenetation + for (let i = 0; i < 65536; ++i) { + assert.equal(versionProvider.wcwidth(i), wcwidthOld(i), `mismatch for i: ${i}`); + } + }); +}); diff --git a/src/unicode/v6.ts b/src/unicode/v6.ts index 3bf775079f..18012c09b1 100644 --- a/src/unicode/v6.ts +++ b/src/unicode/v6.ts @@ -3,173 +3,148 @@ * @license MIT */ import { IUnicodeVersionProvider } from '../Types'; +import { fill } from '../common/TypedArrayUtils'; + +// extracted from https://www.cl.cam.ac.uk/%7Emgk25/ucs/wcwidth.c +const COMBINING_BMP = [ + [0x0300, 0x036F], [0x0483, 0x0486], [0x0488, 0x0489], + [0x0591, 0x05BD], [0x05BF, 0x05BF], [0x05C1, 0x05C2], + [0x05C4, 0x05C5], [0x05C7, 0x05C7], [0x0600, 0x0603], + [0x0610, 0x0615], [0x064B, 0x065E], [0x0670, 0x0670], + [0x06D6, 0x06E4], [0x06E7, 0x06E8], [0x06EA, 0x06ED], + [0x070F, 0x070F], [0x0711, 0x0711], [0x0730, 0x074A], + [0x07A6, 0x07B0], [0x07EB, 0x07F3], [0x0901, 0x0902], + [0x093C, 0x093C], [0x0941, 0x0948], [0x094D, 0x094D], + [0x0951, 0x0954], [0x0962, 0x0963], [0x0981, 0x0981], + [0x09BC, 0x09BC], [0x09C1, 0x09C4], [0x09CD, 0x09CD], + [0x09E2, 0x09E3], [0x0A01, 0x0A02], [0x0A3C, 0x0A3C], + [0x0A41, 0x0A42], [0x0A47, 0x0A48], [0x0A4B, 0x0A4D], + [0x0A70, 0x0A71], [0x0A81, 0x0A82], [0x0ABC, 0x0ABC], + [0x0AC1, 0x0AC5], [0x0AC7, 0x0AC8], [0x0ACD, 0x0ACD], + [0x0AE2, 0x0AE3], [0x0B01, 0x0B01], [0x0B3C, 0x0B3C], + [0x0B3F, 0x0B3F], [0x0B41, 0x0B43], [0x0B4D, 0x0B4D], + [0x0B56, 0x0B56], [0x0B82, 0x0B82], [0x0BC0, 0x0BC0], + [0x0BCD, 0x0BCD], [0x0C3E, 0x0C40], [0x0C46, 0x0C48], + [0x0C4A, 0x0C4D], [0x0C55, 0x0C56], [0x0CBC, 0x0CBC], + [0x0CBF, 0x0CBF], [0x0CC6, 0x0CC6], [0x0CCC, 0x0CCD], + [0x0CE2, 0x0CE3], [0x0D41, 0x0D43], [0x0D4D, 0x0D4D], + [0x0DCA, 0x0DCA], [0x0DD2, 0x0DD4], [0x0DD6, 0x0DD6], + [0x0E31, 0x0E31], [0x0E34, 0x0E3A], [0x0E47, 0x0E4E], + [0x0EB1, 0x0EB1], [0x0EB4, 0x0EB9], [0x0EBB, 0x0EBC], + [0x0EC8, 0x0ECD], [0x0F18, 0x0F19], [0x0F35, 0x0F35], + [0x0F37, 0x0F37], [0x0F39, 0x0F39], [0x0F71, 0x0F7E], + [0x0F80, 0x0F84], [0x0F86, 0x0F87], [0x0F90, 0x0F97], + [0x0F99, 0x0FBC], [0x0FC6, 0x0FC6], [0x102D, 0x1030], + [0x1032, 0x1032], [0x1036, 0x1037], [0x1039, 0x1039], + [0x1058, 0x1059], [0x1160, 0x11FF], [0x135F, 0x135F], + [0x1712, 0x1714], [0x1732, 0x1734], [0x1752, 0x1753], + [0x1772, 0x1773], [0x17B4, 0x17B5], [0x17B7, 0x17BD], + [0x17C6, 0x17C6], [0x17C9, 0x17D3], [0x17DD, 0x17DD], + [0x180B, 0x180D], [0x18A9, 0x18A9], [0x1920, 0x1922], + [0x1927, 0x1928], [0x1932, 0x1932], [0x1939, 0x193B], + [0x1A17, 0x1A18], [0x1B00, 0x1B03], [0x1B34, 0x1B34], + [0x1B36, 0x1B3A], [0x1B3C, 0x1B3C], [0x1B42, 0x1B42], + [0x1B6B, 0x1B73], [0x1DC0, 0x1DCA], [0x1DFE, 0x1DFF], + [0x200B, 0x200F], [0x202A, 0x202E], [0x2060, 0x2063], + [0x206A, 0x206F], [0x20D0, 0x20EF], [0x302A, 0x302F], + [0x3099, 0x309A], [0xA806, 0xA806], [0xA80B, 0xA80B], + [0xA825, 0xA826], [0xFB1E, 0xFB1E], [0xFE00, 0xFE0F], + [0xFE20, 0xFE23], [0xFEFF, 0xFEFF], [0xFFF9, 0xFFFB] +]; + +const COMBINING_HIGH = [ + [0x10A01, 0x10A03], [0x10A05, 0x10A06], [0x10A0C, 0x10A0F], + [0x10A38, 0x10A3A], [0x10A3F, 0x10A3F], [0x1D167, 0x1D169], + [0x1D173, 0x1D182], [0x1D185, 0x1D18B], [0x1D1AA, 0x1D1AD], + [0x1D242, 0x1D244], [0xE0001, 0xE0001], [0xE0020, 0xE007F], + [0xE0100, 0xE01EF] +]; + +let lookupTable: Uint8Array | null = null; + +function bisearch(ucs: number, data: number[][]): boolean { + let min = 0; + let max = data.length - 1; + let mid; + if (ucs < data[0][0] || ucs > data[max][1]) { + return false; + } + while (max >= min) { + mid = (min + max) >> 1; + if (ucs > data[mid][1]) { + min = mid + 1; + } else if (ucs < data[mid][0]) { + max = mid - 1; + } else { + return true; + } + } + return false; +} + +function wcwidthHigh(ucs: number): 0 | 1 | 2 { + if (bisearch(ucs, COMBINING_HIGH)) { + return 0; + } + if ((ucs >= 0x20000 && ucs <= 0x2fffd) || (ucs >= 0x30000 && ucs <= 0x3fffd)) { + return 2; + } + return 1; +} export const v6: IUnicodeVersionProvider = { version: 6, - wcwidth: (function(opts: {nul: number, control: number}): (ucs: number) => number { - // extracted from https://www.cl.cam.ac.uk/%7Emgk25/ucs/wcwidth.c - // combining characters - const COMBINING_BMP = [ - [0x0300, 0x036F], [0x0483, 0x0486], [0x0488, 0x0489], - [0x0591, 0x05BD], [0x05BF, 0x05BF], [0x05C1, 0x05C2], - [0x05C4, 0x05C5], [0x05C7, 0x05C7], [0x0600, 0x0603], - [0x0610, 0x0615], [0x064B, 0x065E], [0x0670, 0x0670], - [0x06D6, 0x06E4], [0x06E7, 0x06E8], [0x06EA, 0x06ED], - [0x070F, 0x070F], [0x0711, 0x0711], [0x0730, 0x074A], - [0x07A6, 0x07B0], [0x07EB, 0x07F3], [0x0901, 0x0902], - [0x093C, 0x093C], [0x0941, 0x0948], [0x094D, 0x094D], - [0x0951, 0x0954], [0x0962, 0x0963], [0x0981, 0x0981], - [0x09BC, 0x09BC], [0x09C1, 0x09C4], [0x09CD, 0x09CD], - [0x09E2, 0x09E3], [0x0A01, 0x0A02], [0x0A3C, 0x0A3C], - [0x0A41, 0x0A42], [0x0A47, 0x0A48], [0x0A4B, 0x0A4D], - [0x0A70, 0x0A71], [0x0A81, 0x0A82], [0x0ABC, 0x0ABC], - [0x0AC1, 0x0AC5], [0x0AC7, 0x0AC8], [0x0ACD, 0x0ACD], - [0x0AE2, 0x0AE3], [0x0B01, 0x0B01], [0x0B3C, 0x0B3C], - [0x0B3F, 0x0B3F], [0x0B41, 0x0B43], [0x0B4D, 0x0B4D], - [0x0B56, 0x0B56], [0x0B82, 0x0B82], [0x0BC0, 0x0BC0], - [0x0BCD, 0x0BCD], [0x0C3E, 0x0C40], [0x0C46, 0x0C48], - [0x0C4A, 0x0C4D], [0x0C55, 0x0C56], [0x0CBC, 0x0CBC], - [0x0CBF, 0x0CBF], [0x0CC6, 0x0CC6], [0x0CCC, 0x0CCD], - [0x0CE2, 0x0CE3], [0x0D41, 0x0D43], [0x0D4D, 0x0D4D], - [0x0DCA, 0x0DCA], [0x0DD2, 0x0DD4], [0x0DD6, 0x0DD6], - [0x0E31, 0x0E31], [0x0E34, 0x0E3A], [0x0E47, 0x0E4E], - [0x0EB1, 0x0EB1], [0x0EB4, 0x0EB9], [0x0EBB, 0x0EBC], - [0x0EC8, 0x0ECD], [0x0F18, 0x0F19], [0x0F35, 0x0F35], - [0x0F37, 0x0F37], [0x0F39, 0x0F39], [0x0F71, 0x0F7E], - [0x0F80, 0x0F84], [0x0F86, 0x0F87], [0x0F90, 0x0F97], - [0x0F99, 0x0FBC], [0x0FC6, 0x0FC6], [0x102D, 0x1030], - [0x1032, 0x1032], [0x1036, 0x1037], [0x1039, 0x1039], - [0x1058, 0x1059], [0x1160, 0x11FF], [0x135F, 0x135F], - [0x1712, 0x1714], [0x1732, 0x1734], [0x1752, 0x1753], - [0x1772, 0x1773], [0x17B4, 0x17B5], [0x17B7, 0x17BD], - [0x17C6, 0x17C6], [0x17C9, 0x17D3], [0x17DD, 0x17DD], - [0x180B, 0x180D], [0x18A9, 0x18A9], [0x1920, 0x1922], - [0x1927, 0x1928], [0x1932, 0x1932], [0x1939, 0x193B], - [0x1A17, 0x1A18], [0x1B00, 0x1B03], [0x1B34, 0x1B34], - [0x1B36, 0x1B3A], [0x1B3C, 0x1B3C], [0x1B42, 0x1B42], - [0x1B6B, 0x1B73], [0x1DC0, 0x1DCA], [0x1DFE, 0x1DFF], - [0x200B, 0x200F], [0x202A, 0x202E], [0x2060, 0x2063], - [0x206A, 0x206F], [0x20D0, 0x20EF], [0x302A, 0x302F], - [0x3099, 0x309A], [0xA806, 0xA806], [0xA80B, 0xA80B], - [0xA825, 0xA826], [0xFB1E, 0xFB1E], [0xFE00, 0xFE0F], - [0xFE20, 0xFE23], [0xFEFF, 0xFEFF], [0xFFF9, 0xFFFB] - ]; - const COMBINING_HIGH = [ - [0x10A01, 0x10A03], [0x10A05, 0x10A06], [0x10A0C, 0x10A0F], - [0x10A38, 0x10A3A], [0x10A3F, 0x10A3F], [0x1D167, 0x1D169], - [0x1D173, 0x1D182], [0x1D185, 0x1D18B], [0x1D1AA, 0x1D1AD], - [0x1D242, 0x1D244], [0xE0001, 0xE0001], [0xE0020, 0xE007F], - [0xE0100, 0xE01EF] - ]; - // binary search - function bisearch(ucs: number, data: number[][]): boolean { - let min = 0; - let max = data.length - 1; - let mid; - if (ucs < data[0][0] || ucs > data[max][1]) { - return false; - } - while (max >= min) { - mid = (min + max) >> 1; - if (ucs > data[mid][1]) { - min = mid + 1; - } else if (ucs < data[mid][0]) { - max = mid - 1; - } else { - return true; - } - } - return false; + wcwidth: (ucs: number): number => { + if (ucs < 32) { + return 0; } - function wcwidthBMP(ucs: number): number { - // test for 8-bit control characters - if (ucs === 0) { - return opts.nul; - } - if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0)) { - return opts.control; - } - // binary search in table of non-spacing characters - if (bisearch(ucs, COMBINING_BMP)) { - return 0; - } - // if we arrive here, ucs is not a combining or C0/C1 control character - if (isWideBMP(ucs)) { - return 2; - } + if (ucs < 127) { return 1; } - function isWideBMP(ucs: number): boolean { - return ( - ucs >= 0x1100 && ( - ucs <= 0x115f || // Hangul Jamo init. consonants - ucs === 0x2329 || - ucs === 0x232a || - (ucs >= 0x2e80 && ucs <= 0xa4cf && ucs !== 0x303f) || // CJK..Yi - (ucs >= 0xac00 && ucs <= 0xd7a3) || // Hangul Syllables - (ucs >= 0xf900 && ucs <= 0xfaff) || // CJK Compat Ideographs - (ucs >= 0xfe10 && ucs <= 0xfe19) || // Vertical forms - (ucs >= 0xfe30 && ucs <= 0xfe6f) || // CJK Compat Forms - (ucs >= 0xff00 && ucs <= 0xff60) || // Fullwidth Forms - (ucs >= 0xffe0 && ucs <= 0xffe6))); + if (ucs < 65536) { + return lookupTable[ucs]; } - function wcwidthHigh(ucs: number): 0 | 1 | 2 { - if (bisearch(ucs, COMBINING_HIGH)) { - return 0; - } - if ((ucs >= 0x20000 && ucs <= 0x2fffd) || (ucs >= 0x30000 && ucs <= 0x3fffd)) { - return 2; - } - return 1; + // do a full search for high codepoints + return wcwidthHigh(ucs); + }, + init: () => { + // init only once + if (lookupTable) { + return; } - const control = opts.control | 0; - let table: number[] | Uint32Array = null; - function initTable(): number[] | Uint32Array { - // lookup table for BMP - const CODEPOINTS = 65536; // BMP holds 65536 codepoints - const BITWIDTH = 2; // a codepoint can have a width of 0, 1 or 2 - const ITEMSIZE = 32; // using uint32_t - const CONTAINERSIZE = CODEPOINTS * BITWIDTH / ITEMSIZE; - const CODEPOINTS_PER_ITEM = ITEMSIZE / BITWIDTH; - table = (typeof Uint32Array === 'undefined') - ? new Array(CONTAINERSIZE) - : new Uint32Array(CONTAINERSIZE); - for (let i = 0; i < CONTAINERSIZE; ++i) { - let num = 0; - let pos = CODEPOINTS_PER_ITEM; - while (pos--) { - num = (num << 2) | wcwidthBMP(CODEPOINTS_PER_ITEM * i + pos); - } - table[i] = num; - } - return table; + + // create lookup table for BMP plane + const table = new Uint8Array(65536); + fill(table, 1); + table[0] = 0; + // control chars + fill(table, 0, 1, 32); + fill(table, 0, 0x7f, 0xa0); + + // apply wide char rules first + // wide chars + fill(table, 2, 0x1100, 0x1160); + table[0x2329] = 2; + table[0x232a] = 2; + fill(table, 2, 0x2e80, 0xa4d0); + table[0x303f] = 1; // wrongly in last line + + fill(table, 2, 0xac00, 0xd7a4); + fill(table, 2, 0xf900, 0xfb00); + fill(table, 2, 0xfe10, 0xfe1a); + fill(table, 2, 0xfe30, 0xfe70); + fill(table, 2, 0xff00, 0xff61); + fill(table, 2, 0xffe0, 0xffe7); + + // apply combining last to ensure we overwrite + // wrongly wide set chars: + // the original algo evals combining first and falls + // through to wide check so we simply do here the opposite + // combining 0 + for (let r = 0; r < COMBINING_BMP.length; ++r) { + fill(table, 0, COMBINING_BMP[r][0], COMBINING_BMP[r][1] + 1); } - // get width from lookup table - // position in container : num / CODEPOINTS_PER_ITEM - // ==> n = table[Math.floor(num / 16)] - // ==> n = table[num >> 4] - // 16 codepoints per number: FFEEDDCCBBAA99887766554433221100 - // position in number : (num % CODEPOINTS_PER_ITEM) * BITWIDTH - // ==> m = (n % 16) * 2 - // ==> m = (num & 15) << 1 - // right shift to position m - // ==> n = n >> m e.g. m=12 000000000000FFEEDDCCBBAA99887766 - // we are only interested in 2 LSBs, cut off higher bits - // ==> n = n & 3 e.g. 000000000000000000000000000000XX - return function (num: number): number { - num = num | 0; // get asm.js like optimization under V8 - if (num < 32) { - return control | 0; - } - if (num < 127) { - return 1; - } - const t = table || initTable(); - if (num < 65536) { - return t[num >> 4] >> ((num & 15) << 1) & 3; - } - // do a full search for high codepoints - return wcwidthHigh(num); - }; - })({nul: 0, control: 0}) // configurable options + + lookupTable = table; + } }; From a1257a1184ad9d79a8db1174c4259978ba9b8c9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Breitbart?= Date: Sat, 24 Nov 2018 02:14:52 +0100 Subject: [PATCH 7/7] fix indentation; fix test dummies --- demo/client.ts | 2 +- src/UnicodeManager.test.ts | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/demo/client.ts b/demo/client.ts index dc72fde3b3..baa1f11b4c 100644 --- a/demo/client.ts +++ b/demo/client.ts @@ -220,7 +220,7 @@ function initOptions(term: TerminalType): void { booleanOptions.push(o); break; case 'number': - numberOptions.push(o); + numberOptions.push(o); break; default: if (Object.keys(stringOptions).indexOf(o) === -1) { diff --git a/src/UnicodeManager.test.ts b/src/UnicodeManager.test.ts index 6d10cfaa2a..f666479191 100644 --- a/src/UnicodeManager.test.ts +++ b/src/UnicodeManager.test.ts @@ -8,11 +8,13 @@ import { IUnicodeVersionProvider } from './Types'; const VERSION_DUMMY1: IUnicodeVersionProvider = { version: 15, - wcwidth: (n: number) => n + wcwidth: (n: number) => n, + init: () => {} }; const VERSION_DUMMY2: IUnicodeVersionProvider = { version: 17, - wcwidth: (n: number) => n + wcwidth: (n: number) => n, + init: () => {} }; describe('UnicodeProvider', function(): void {