Skip to content

Commit

Permalink
drop zero-width character
Browse files Browse the repository at this point in the history
  • Loading branch information
poderosaproject committed Feb 20, 2023
1 parent 53a53f2 commit 9ca6e68
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 39 deletions.
3 changes: 3 additions & 0 deletions Core/GLine.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1376,6 +1376,9 @@ public static GLine CreateSimpleGLine(string text, TextDecoration dec) {
if (!conv.Feed(originalChar, out unicodeChar)) {
continue;
}
if (unicodeChar.IsZeroWidth) {
continue; // drop
}

GChar gchar = new GChar(unicodeChar);
buff[offset].Set(gchar, attr);
Expand Down
121 changes: 82 additions & 39 deletions Core/Unicode.cs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ namespace Poderosa.Document {
[Flags]
public enum UnicodeCharFlags : uint {
None = 0u,
/// <summary>Zero-width character (should not be displayed)</summary>
ZeroWidth = 1u << 29,
/// <summary>CJK character (should be displayed with the CJK font)</summary>
CJK = 1u << 30,
/// <summary>Wide-width character (should be displayed with two columns)</summary>
Expand All @@ -42,6 +44,7 @@ public struct UnicodeChar {

// bit 0..20 : Unicode Code Point
//
// bit 29 : zero width
// bit 30 : CJK
// bit 31 : wide width

Expand Down Expand Up @@ -87,12 +90,21 @@ internal uint RawData {
}
}

/// <summary>
/// Whether this character is a zero-width character.
/// </summary>
public bool IsZeroWidth {
get {
return (_bits & (uint)UnicodeCharFlags.ZeroWidth) != 0u;
}
}

/// <summary>
/// Whether this character is a wide-width character.
/// </summary>
public bool IsWideWidth {
get {
return Has(UnicodeCharFlags.WideWidth);
return (_bits & (uint)UnicodeCharFlags.WideWidth) != 0u;
}
}

Expand All @@ -101,7 +113,7 @@ public bool IsWideWidth {
/// </summary>
public bool IsCJK {
get {
return Has(UnicodeCharFlags.CJK);
return (_bits & (uint)UnicodeCharFlags.CJK) != 0u;
}
}

Expand All @@ -110,10 +122,8 @@ public bool IsCJK {
/// </summary>
/// <param name="ch">a character</param>
/// <param name="cjk">allow cjk mode</param>
public UnicodeChar(char ch, bool cjk) {
uint codePoint = (uint)ch;
UnicodeCharFlags flags = Unicode.DetermineUnicodeCharFlags(codePoint, cjk);
_bits = codePoint | (uint)flags;
public UnicodeChar(char ch, bool cjk)
: this((uint)ch, cjk) {
}

/// <summary>
Expand All @@ -122,10 +132,45 @@ public UnicodeChar(char ch, bool cjk) {
/// <param name="highSurrogate">high surrogate code</param>
/// <param name="lowSurrogate">low surrogate code</param>
/// <param name="cjk">allow cjk mode</param>
public UnicodeChar(char highSurrogate, char lowSurrogate, bool cjk) {
uint codePoint = Unicode.SurrogatePairToCodePoint(highSurrogate, lowSurrogate);
UnicodeCharFlags flags = Unicode.DetermineUnicodeCharFlags(codePoint, cjk);
_bits = codePoint | (uint)flags;
public UnicodeChar(char highSurrogate, char lowSurrogate, bool cjk)
: this(Unicode.SurrogatePairToCodePoint(highSurrogate, lowSurrogate), cjk) {
}

/// <summary>
/// Constructor
/// </summary>
/// <param name="codePoint">Unicode code point</param>
/// <param name="cjk">allow cjk mode</param>
private UnicodeChar(uint codePoint, bool cjk)
: this(codePoint, Unicode.DetermineWidthAndFontType(codePoint, cjk)) {
}

/// <summary>
/// Constructor
/// </summary>
/// <param name="codePoint">Unicode code point</param>
/// <param name="widthAndFontType">character width and font-type</param>
private UnicodeChar(uint codePoint, Unicode.WidthAndFontType widthAndFontType)
: this(codePoint, ToUnicodeCharFlags(widthAndFontType)) {
}

private static UnicodeCharFlags ToUnicodeCharFlags(Unicode.WidthAndFontType widthAndFontType) {
UnicodeCharFlags f;
switch (widthAndFontType.Width) {
case 0:
f = UnicodeCharFlags.ZeroWidth;
break;
case 2:
f = UnicodeCharFlags.WideWidth;
break;
default:
f = UnicodeCharFlags.None;
break;
}
if (widthAndFontType.UseCJKFont) {
f |= UnicodeCharFlags.CJK;
}
return f;
}

/// <summary>
Expand All @@ -146,15 +191,6 @@ public UnicodeChar(uint codePoint, UnicodeCharFlags flags) {
public int WriteTo(char[] seq, int index) {
return Unicode.WriteCodePointTo(this._bits & CodePointMask, seq, index);
}

/// <summary>
/// Checks if the specified flags were set.
/// </summary>
/// <param name="flags"></param>
/// <returns>true if all of the specified flags were set.</returns>
public bool Has(UnicodeCharFlags flags) {
return (this._bits & (uint)flags) == (uint)flags;
}
}

/// <summary>
Expand Down Expand Up @@ -209,6 +245,23 @@ public bool Feed(char c, out UnicodeChar unicodeChar) {
/// </summary>
public static class Unicode {

public struct WidthAndFontType {
/// <summary>
/// Character width (0, 1 or 2)
/// </summary>
public readonly int Width;

/// <summary>
/// Whether the character should be displayed using CJK font
/// </summary>
public readonly bool UseCJKFont;

internal WidthAndFontType(int width, bool useCJKFont) {
Width = width;
UseCJKFont = useCJKFont;
}
}

private static readonly UnicodeWidthAndFontTypeTable _table = new UnicodeWidthAndFontTypeTable();

/// <summary>
Expand All @@ -223,21 +276,9 @@ public static void Initialize() {
/// Gets <see cref="UnicodeCharFlags"/> for a Unicode code point.
/// </summary>
/// <param name="codePoint">Unicode code point</param>
/// <param name="cjk">allow cjk mode</param>
/// <returns></returns>
public static UnicodeCharFlags DetermineUnicodeCharFlags(uint codePoint, bool cjk) {
int charWidth;
bool useCjkFont;
_table.GetWidthAndFontType(codePoint, cjk, out charWidth, out useCjkFont);

UnicodeCharFlags f = UnicodeCharFlags.None;
if (charWidth >= 2) {
f |= UnicodeCharFlags.WideWidth;
}
if (useCjkFont) {
f |= UnicodeCharFlags.CJK;
}
return f;
/// <param name="cjk">returns values for the CJK mode</param>
public static WidthAndFontType DetermineWidthAndFontType(uint codePoint, bool cjk) {
return _table.GetWidthAndFontType(codePoint, cjk);
}

/// <summary>
Expand Down Expand Up @@ -380,11 +421,10 @@ public UnicodeWidthAndFontTypeTable() {
/// </summary>
/// <param name="codePoint">code point</param>
/// <param name="cjk">returns values for the CJK mode</param>
/// <param name="charWidth">returns character width; 0 (invisible), 1 (narrow), or 2 (wide)</param>
/// <param name="useCjkFont">returns whether the character should be displayed using CJK font.</param>
public void GetWidthAndFontType(uint codePoint, bool cjk, out int charWidth, out bool useCjkFont) {
public Unicode.WidthAndFontType GetWidthAndFontType(uint codePoint, bool cjk) {
uint tableIndex = codePoint / SUB_TABLE_CHARS;

int charWidth;
if (tableIndex < _charWidthTables.Length) {
int w = _charWidthTables[tableIndex].GetValue(codePoint);
charWidth = (w <= 2) ? w : (cjk ? 2 : 1);
Expand All @@ -393,13 +433,16 @@ public void GetWidthAndFontType(uint codePoint, bool cjk, out int charWidth, out
charWidth = 1; // default width
}

bool useCjkFont;
if (tableIndex < _fontTypeTables.Length) {
int w = _fontTypeTables[tableIndex].GetValue(codePoint);
useCjkFont = w == 2 || (w > 2 && cjk);
int t = _fontTypeTables[tableIndex].GetValue(codePoint);
useCjkFont = t == 2 || (t > 2 && cjk);
}
else {
useCjkFont = false; // default
}

return new Unicode.WidthAndFontType(charWidth, useCjkFont);
}

/// <summary>
Expand Down
3 changes: 3 additions & 0 deletions TerminalEmulator/TerminalBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -773,6 +773,9 @@ protected ProcessCharResult ProcessNormalChar(char ch) {
if (!base.UnicodeCharConverter.Feed(ch, out unicodeChar)) {
return ProcessCharResult.Processed;
}
if (unicodeChar.IsZeroWidth) {
return ProcessCharResult.Processed; // drop
}

return ProcessNormalUnicodeChar(unicodeChar);
}
Expand Down

0 comments on commit 9ca6e68

Please sign in to comment.