From 9eddb0f8c89673f446d21f3628fc0572d7823cfb Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Wed, 7 Feb 2024 09:53:53 +0100 Subject: [PATCH 01/17] Format --- tests/shaping/aots.rs | 1 - tests/shaping/in_house.rs | 1 - tests/shaping/text_rendering_tests.rs | 1 - 3 files changed, 3 deletions(-) diff --git a/tests/shaping/aots.rs b/tests/shaping/aots.rs index 6dffc7df..8c44a219 100644 --- a/tests/shaping/aots.rs +++ b/tests/shaping/aots.rs @@ -4986,4 +4986,3 @@ fn lookupflag_ignore_marks_001() { 21" ); } - diff --git a/tests/shaping/in_house.rs b/tests/shaping/in_house.rs index adb47cde..482f390f 100644 --- a/tests/shaping/in_house.rs +++ b/tests/shaping/in_house.rs @@ -13686,4 +13686,3 @@ fn zero_width_marks_011() { A=2+1368" ); } - diff --git a/tests/shaping/text_rendering_tests.rs b/tests/shaping/text_rendering_tests.rs index 4f9e21ba..f8436289 100644 --- a/tests/shaping/text_rendering_tests.rs +++ b/tests/shaping/text_rendering_tests.rs @@ -13077,4 +13077,3 @@ fn shlana_9_006() { uni1A5D@2424,0" ); } - From 6cd7cf9d96907fb71065ad99b707974b2a539208 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Wed, 7 Feb 2024 09:56:02 +0100 Subject: [PATCH 02/17] [ot-shape] Enabled two more features: 'Harf' and 'Buzz' https://github.com/harfbuzz/harfbuzz/commit/5a1a6f1fe --- src/plan.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/plan.rs b/src/plan.rs index 9a2663c4..a5c6909c 100644 --- a/src/plan.rs +++ b/src/plan.rs @@ -187,14 +187,18 @@ impl<'a> ShapePlanner<'a> { .enable_feature(Tag::from_bytes(b"trak"), FeatureFlags::HAS_FALLBACK, 1); self.ot_map - .enable_feature(Tag::from_bytes(b"HARF"), empty, 1); + .enable_feature(Tag::from_bytes(b"Harf"), empty, 1); // Considered required. + self.ot_map + .enable_feature(Tag::from_bytes(b"HARF"), empty, 1); // Considered discretionary. if let Some(func) = self.shaper.collect_features { func(self); } self.ot_map - .enable_feature(Tag::from_bytes(b"BUZZ"), empty, 1); + .enable_feature(Tag::from_bytes(b"Buzz"), empty, 1); // Considered required. + self.ot_map + .enable_feature(Tag::from_bytes(b"BUZZ"), empty, 1); // Considered discretionary. for &(tag, flags) in COMMON_FEATURES { self.ot_map.add_feature(tag, flags, 1); From 4a775279744ddfb5e3336e380c6eb5adf20c2a1e Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Wed, 7 Feb 2024 10:10:01 +0100 Subject: [PATCH 03/17] [Unicode 14] Add new `hb_script_t` values https://github.com/harfbuzz/harfbuzz/commit/67f1aa4d2 --- src/common.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/common.rs b/src/common.rs index 8c4689bd..462aebd6 100644 --- a/src/common.rs +++ b/src/common.rs @@ -442,6 +442,12 @@ pub mod script { pub const DIVES_AKURU: Script = Script::from_bytes(b"Diak"); pub const KHITAN_SMALL_SCRIPT: Script = Script::from_bytes(b"Kits"); pub const YEZIDI: Script = Script::from_bytes(b"Yezi"); + // Since 14.0 + pub const CYPRO_MINOAN: Script = Script::from_bytes(b"Cpmn"); + pub const OLD_UYGHUR: Script = Script::from_bytes(b"Ougr"); + pub const TANGSA: Script = Script::from_bytes(b"Tnsa"); + pub const TOTO: Script = Script::from_bytes(b"Toto"); + pub const VITHKUQI: Script = Script::from_bytes(b"Vith"); // https://github.com/harfbuzz/harfbuzz/issues/1162 pub const MYANMAR_ZAWGYI: Script = Script::from_bytes(b"Qaag"); From d910a326f3d5c7cdd617fbd277dfacac1768cbe8 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Wed, 7 Feb 2024 10:34:34 +0100 Subject: [PATCH 04/17] [Unicode 14] Update the script direction list https://github.com/harfbuzz/harfbuzz/commit/7b05eec05 --- src/common.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/common.rs b/src/common.rs index 462aebd6..1bc3c6f8 100644 --- a/src/common.rs +++ b/src/common.rs @@ -129,7 +129,10 @@ impl Direction { // Unicode-13.0 additions script::CHORASMIAN | - script::YEZIDI => { + script::YEZIDI | + + // Unicode-14.0 additions + script::OLD_UYGHUR => { Some(Direction::RightToLeft) } From 69ae0b0e06c35852ded52742fc56ec371555871a Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Wed, 7 Feb 2024 10:56:31 +0100 Subject: [PATCH 05/17] [Unicode 14] Update the Indic table https://github.com/harfbuzz/harfbuzz/commit/11d5334d8 --- scripts/gen-indic-table.py | 2 +- src/complex/indic.rs | 2 ++ src/complex/indic_table.rs | 17 +++++++++-------- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/scripts/gen-indic-table.py b/scripts/gen-indic-table.py index 521ecb4b..d935233d 100755 --- a/scripts/gen-indic-table.py +++ b/scripts/gen-indic-table.py @@ -15,7 +15,7 @@ for dep in DEPENDENCIES: if not os.path.exists(dep): - urllib.request.urlretrieve('https://unicode.org/Public/12.0.0/ucd/' + dep, dep) + urllib.request.urlretrieve('https://unicode.org/Public/14.0.0/ucd/' + dep, dep) ALLOWED_SINGLES = [0x00A0, 0x25CC] ALLOWED_BLOCKS = [ diff --git a/src/complex/indic.rs b/src/complex/indic.rs index 6ced8249..50267808 100644 --- a/src/complex/indic.rs +++ b/src/complex/indic.rs @@ -143,6 +143,7 @@ pub enum MatraCategory { LeftAndRight, TopAndBottom, TopAndBottomAndRight, + TopAndBottomAndLeft, TopAndLeft, TopAndLeftAndRight, TopAndRight, @@ -2026,6 +2027,7 @@ pub fn get_category_and_position(u: u32) -> (Category, Position) { MatraCategory::LeftAndRight => position::POST_C, MatraCategory::TopAndBottom => position::BELOW_C, MatraCategory::TopAndBottomAndRight => position::POST_C, + MatraCategory::TopAndBottomAndLeft => position::BELOW_C, MatraCategory::TopAndLeft => position::ABOVE_C, MatraCategory::TopAndLeftAndRight => position::POST_C, MatraCategory::TopAndRight => position::POST_C, diff --git a/src/complex/indic_table.rs b/src/complex/indic_table.rs index 024a56ad..5302e36b 100644 --- a/src/complex/indic_table.rs +++ b/src/complex/indic_table.rs @@ -52,6 +52,7 @@ use MatraCategory::Overstruck as IMC_O; use MatraCategory::Right as IMC_R; use MatraCategory::Top as IMC_T; use MatraCategory::TopAndBottom as IMC_TB; +use MatraCategory::TopAndBottomAndLeft as IMC_TBL; use MatraCategory::TopAndBottomAndRight as IMC_TBR; use MatraCategory::TopAndLeft as IMC_TL; use MatraCategory::TopAndLeftAndRight as IMC_TLR; @@ -164,7 +165,7 @@ const TABLE: &[(SyllabicCategory, MatraCategory)] = &[ /* 0B38 */ (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_N,IMC_B), (ISC_A,IMC_x), (ISC_M,IMC_R), (ISC_M,IMC_T), /* 0B40 */ (ISC_M,IMC_R), (ISC_M,IMC_B), (ISC_M,IMC_B), (ISC_M,IMC_B), (ISC_M,IMC_B), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_M,IMC_L), /* 0B48 */ (ISC_M,IMC_TL), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_M,IMC_LR),(ISC_M,IMC_TLR), (ISC_V,IMC_B), (ISC_x,IMC_x), (ISC_x,IMC_x), - /* 0B50 */ (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_M,IMC_T), (ISC_M,IMC_TR), + /* 0B50 */ (ISC_x, IMC_x), (ISC_x, IMC_x), (ISC_x, IMC_x), (ISC_x, IMC_x), (ISC_x, IMC_x), (ISC_M, IMC_T), (ISC_M, IMC_T), (ISC_M, IMC_TR), /* 0B58 */ (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_x,IMC_x), (ISC_C,IMC_x), /* 0B60 */ (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_M,IMC_B), (ISC_M,IMC_B), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), /* 0B68 */ (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), @@ -199,11 +200,11 @@ const TABLE: &[(SyllabicCategory, MatraCategory)] = &[ /* 0C20 */ (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), /* 0C28 */ (ISC_C,IMC_x), (ISC_x,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), /* 0C30 */ (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), - /* 0C38 */ (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_A,IMC_x), (ISC_M,IMC_T), (ISC_M,IMC_T), + /* 0C38 */ (ISC_C, IMC_x), (ISC_C, IMC_x), (ISC_x, IMC_x), (ISC_x, IMC_x), (ISC_N, IMC_B), (ISC_A, IMC_x), (ISC_M, IMC_T), (ISC_M, IMC_T), /* 0C40 */ (ISC_M,IMC_T), (ISC_M,IMC_R), (ISC_M,IMC_R), (ISC_M,IMC_R), (ISC_M,IMC_R), (ISC_x,IMC_x), (ISC_M,IMC_T), (ISC_M,IMC_T), /* 0C48 */ (ISC_M,IMC_TB), (ISC_x,IMC_x), (ISC_M,IMC_T), (ISC_M,IMC_T), (ISC_M,IMC_T), (ISC_V,IMC_T), (ISC_x,IMC_x), (ISC_x,IMC_x), /* 0C50 */ (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_M,IMC_T), (ISC_M,IMC_B), (ISC_x,IMC_x), - /* 0C58 */ (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), + /* 0C58 */ (ISC_C, IMC_x), (ISC_C, IMC_x), (ISC_C, IMC_x), (ISC_x, IMC_x), (ISC_x, IMC_x), (ISC_CD, IMC_x), (ISC_x, IMC_x), (ISC_x, IMC_x), /* 0C60 */ (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_M,IMC_B), (ISC_M,IMC_B), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), /* 0C68 */ (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), /* 0C70 */ (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), @@ -222,7 +223,7 @@ const TABLE: &[(SyllabicCategory, MatraCategory)] = &[ /* 0CC0 */ (ISC_M,IMC_TR), (ISC_M,IMC_R), (ISC_M,IMC_R), (ISC_M,IMC_R), (ISC_M,IMC_R), (ISC_x,IMC_x), (ISC_M,IMC_T), (ISC_M,IMC_TR), /* 0CC8 */ (ISC_M,IMC_TR), (ISC_x,IMC_x), (ISC_M,IMC_TR), (ISC_M,IMC_TR), (ISC_M,IMC_T), (ISC_V,IMC_T), (ISC_x,IMC_x), (ISC_x,IMC_x), /* 0CD0 */ (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_M,IMC_R), (ISC_M,IMC_R), (ISC_x,IMC_x), - /* 0CD8 */ (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_C,IMC_x), (ISC_x,IMC_x), + /* 0CD8 */ (ISC_x, IMC_x), (ISC_x, IMC_x), (ISC_x, IMC_x), (ISC_x, IMC_x), (ISC_x, IMC_x), (ISC_CD, IMC_x), (ISC_C, IMC_x), (ISC_x, IMC_x), /* 0CE0 */ (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_M,IMC_B), (ISC_M,IMC_B), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), /* 0CE8 */ (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), /* 0CF0 */ (ISC_x,IMC_x),(ISC_CWS,IMC_x),(ISC_CWS,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), @@ -230,7 +231,7 @@ const TABLE: &[(SyllabicCategory, MatraCategory)] = &[ /* Malayalam */ - /* 0D00 */ (ISC_Bi,IMC_T), (ISC_Bi,IMC_T), (ISC_Bi,IMC_R), (ISC_Vs,IMC_R), (ISC_x,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), + /* 0D00 */ (ISC_Bi, IMC_T), (ISC_Bi, IMC_T), (ISC_Bi, IMC_R), (ISC_Vs, IMC_R), (ISC_Bi, IMC_x), (ISC_VI, IMC_x), (ISC_VI, IMC_x), (ISC_VI, IMC_x), /* 0D08 */ (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_x,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), /* 0D10 */ (ISC_VI,IMC_x), (ISC_x,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), /* 0D18 */ (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), @@ -239,7 +240,7 @@ const TABLE: &[(SyllabicCategory, MatraCategory)] = &[ /* 0D30 */ (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), /* 0D38 */ (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_PK,IMC_T), (ISC_PK,IMC_T), (ISC_A,IMC_x), (ISC_M,IMC_R), (ISC_M,IMC_R), /* 0D40 */ (ISC_M,IMC_R), (ISC_M,IMC_R), (ISC_M,IMC_R), (ISC_M,IMC_B), (ISC_M,IMC_B), (ISC_x,IMC_x), (ISC_M,IMC_L), (ISC_M,IMC_L), - /* 0D48 */ (ISC_M,IMC_L), (ISC_x,IMC_x), (ISC_M,IMC_LR), (ISC_M,IMC_LR), (ISC_M,IMC_LR), (ISC_V,IMC_T),(ISC_CPR,IMC_x), (ISC_x,IMC_x), + /* 0D48 */ (ISC_M, IMC_L), (ISC_x, IMC_x), (ISC_M, IMC_LR), (ISC_M, IMC_LR), (ISC_M, IMC_LR), (ISC_V, IMC_T), (ISC_CPR, IMC_T), (ISC_x, IMC_x), /* 0D50 */ (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_CD,IMC_x), (ISC_CD,IMC_x), (ISC_CD,IMC_x), (ISC_M,IMC_R), /* 0D58 */ (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_VI,IMC_x), /* 0D60 */ (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_M,IMC_B), (ISC_M,IMC_B), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), @@ -249,7 +250,7 @@ const TABLE: &[(SyllabicCategory, MatraCategory)] = &[ /* Sinhala */ - /* 0D80 */ (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_Bi,IMC_R), (ISC_Vs,IMC_R), (ISC_x,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), + /* 0D80 */ (ISC_x, IMC_x), (ISC_Bi, IMC_T), (ISC_Bi, IMC_R), (ISC_Vs, IMC_R), (ISC_x, IMC_x), (ISC_VI, IMC_x), (ISC_VI, IMC_x), (ISC_VI, IMC_x), /* 0D88 */ (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), /* 0D90 */ (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_x,IMC_x), /* 0D98 */ (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), @@ -274,7 +275,7 @@ const TABLE: &[(SyllabicCategory, MatraCategory)] = &[ /* 1020 */ (ISC_C,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), /* 1028 */ (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_M,IMC_R), (ISC_M,IMC_R), (ISC_M,IMC_T), (ISC_M,IMC_T), (ISC_M,IMC_B), /* 1030 */ (ISC_M,IMC_B), (ISC_M,IMC_L), (ISC_M,IMC_T), (ISC_M,IMC_T), (ISC_M,IMC_T), (ISC_M,IMC_T), (ISC_Bi,IMC_T), (ISC_TM,IMC_B), - /* 1038 */ (ISC_Vs,IMC_R), (ISC_IS,IMC_x), (ISC_PK,IMC_T), (ISC_CM,IMC_R), (ISC_CM,IMC_x), (ISC_CM,IMC_B), (ISC_CM,IMC_B), (ISC_C,IMC_x), + /* 1038 */ (ISC_Vs, IMC_R), (ISC_IS, IMC_x), (ISC_PK, IMC_T), (ISC_CM, IMC_R), (ISC_CM, IMC_TBL), (ISC_CM, IMC_B), (ISC_CM, IMC_B), (ISC_C, IMC_x), /* 1040 */ (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), /* 1048 */ (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), (ISC_x,IMC_x), (ISC_CP,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_CP,IMC_x), (ISC_x,IMC_x), /* 1050 */ (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_M,IMC_R), (ISC_M,IMC_R), From b9eb3f8916427d5cc92fbc8e837cc5886a8fe4f0 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Wed, 7 Feb 2024 11:02:10 +0100 Subject: [PATCH 06/17] [Unicode 14] Update the vowel constraint table https://github.com/harfbuzz/harfbuzz/commit/ec5688f50 --- scripts/gen-vowel-constraints.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/gen-vowel-constraints.py b/scripts/gen-vowel-constraints.py index a5a08dd4..046119f7 100755 --- a/scripts/gen-vowel-constraints.py +++ b/scripts/gen-vowel-constraints.py @@ -15,7 +15,7 @@ import urllib.request if not os.path.exists('Scripts.txt'): - urllib.request.urlretrieve('https://unicode.org/Public/12.0.0/ucd/Scripts.txt', 'Scripts.txt') + urllib.request.urlretrieve('https://unicode.org/Public/14.0.0/ucd/Scripts.txt', 'Scripts.txt') with io.open('Scripts.txt', encoding='utf-8') as f: scripts_header = [f.readline() for i in range(2)] From 74472fdbc414fd5ec69f9dd6c6e51d540644eda2 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Wed, 7 Feb 2024 11:28:43 +0100 Subject: [PATCH 07/17] [Unicode 14] Update the USE table https://github.com/harfbuzz/harfbuzz/commit/20736d9bb --- scripts/gen-universal-table.py | 2 +- src/complex/universal_table.rs | 58 ++++++++++++++++++---------------- 2 files changed, 31 insertions(+), 29 deletions(-) diff --git a/scripts/gen-universal-table.py b/scripts/gen-universal-table.py index f690f1b5..5999e10f 100755 --- a/scripts/gen-universal-table.py +++ b/scripts/gen-universal-table.py @@ -14,7 +14,7 @@ for f in files: if not os.path.exists(f): urllib.request.urlretrieve( - 'https://unicode.org/Public/13.0.0/ucd/' + f, f) + 'https://unicode.org/Public/14.0.0/ucd/' + f, f) files = [io.open(x, encoding='utf-8') for x in files] diff --git a/src/complex/universal_table.rs b/src/complex/universal_table.rs index e8c6861b..6054e0ee 100644 --- a/src/complex/universal_table.rs +++ b/src/complex/universal_table.rs @@ -103,7 +103,7 @@ const USE_TABLE: &[Category] = &[ /* 0C00 */ VMABV, VMPST, VMPST, VMPST, VMABV, B, B, B, B, B, B, B, B, O, B, B, /* 0C10 */ B, O, B, B, B, B, B, B, B, B, B, B, B, B, B, B, /* 0C20 */ B, B, B, B, B, B, B, B, B, O, B, B, B, B, B, B, - /* 0C30 */ B, B, B, B, B, B, B, B, B, B, O, O, O, B, VABV, VABV, + /* 0C30 */ B, B, B, B, B, B, B, B, B, B, O, O, CMBLW, B, VABV, VABV, /* 0C40 */ VABV, VPST, VPST, VPST, VPST, O, VABV, VABV, VABV, O, VABV, VABV, VABV, H, O, O, /* 0C50 */ O, O, O, O, O, VABV, VBLW, O, B, B, B, O, O, O, O, O, /* 0C60 */ B, B, VBLW, VBLW, O, O, B, B, B, B, B, B, B, B, B, B, @@ -173,13 +173,13 @@ const USE_TABLE: &[Category] = &[ /* Tagalog */ - /* 1700 */ B, B, B, B, B, B, B, B, B, B, B, B, B, O, B, B, - /* 1710 */ B, B, VABV, VBLW, VBLW, O, O, O, O, O, O, O, O, O, O, O, + /* 1700 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 1710 */ B, B, VABV, VBLW, VBLW, VPST, O, O, O, O, O, O, O, O, O, B, /* Hanunoo */ /* 1720 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, - /* 1730 */ B, B, VABV, VBLW, VBLW, O, O, O, O, O, O, O, O, O, O, O, + /* 1730 */ B, B, VABV, VBLW, VPST, O, O, O, O, O, O, O, O, O, O, O, /* Buhid */ @@ -263,7 +263,7 @@ const USE_TABLE: &[Category] = &[ /* 1B10 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, /* 1B20 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, /* 1B30 */ B, B, B, B, CMABV, VPST, VABV, VABV, VBLW, VBLW, VBLW, VBLW, VABV, VABV, VPRE, VPRE, - /* 1B40 */ VPRE, VPRE, VABV, VABV, H, B, B, B, B, B, B, B, O, O, O, O, + /* 1B40 */ VPRE, VPRE, VABV, VABV, H, B, B, B, B, B, B, B, B, O, O, O, /* 1B50 */ B, B, B, B, B, B, B, B, B, B, O, GB, GB, O, O, GB, /* 1B60 */ O, S, GB, S, S, S, S, S, GB, S, S, SMABV, SMBLW, SMABV, SMABV, SMABV, /* 1B70 */ SMABV, SMABV, SMABV, SMABV, O, O, O, O, O, O, O, O, O, O, O, O, @@ -471,7 +471,7 @@ const USE_TABLE: &[Category] = &[ /* 11040 */ VBLW, VBLW, VABV, VABV, VABV, VABV, HVM, O, O, O, O, O, O, O, O, O, /* 11050 */ O, O, N, N, N, N, N, N, N, N, N, N, N, N, N, N, /* 11060 */ N, N, N, N, N, N, B, B, B, B, B, B, B, B, B, B, - /* 11070 */ O, O, O, O, O, O, O, O, O, O, O, O, O, O, O, HN, + /* 11070 */ VABV, B, B, VABV, VABV, B, O, O, O, O, O, O, O, O, O, HN, /* Kaithi */ @@ -479,6 +479,7 @@ const USE_TABLE: &[Category] = &[ /* 11090 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, /* 110A0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, /* 110B0 */ VPST, VPRE, VPST, VBLW, VBLW, VABV, VABV, VPST, VPST, H, CMBLW, O, O, O, O, O, + /* 110C0 */ O, O, VBLW, O, O, O, O, O, /* Chakma */ @@ -599,6 +600,7 @@ const USE_TABLE: &[Category] = &[ /* 11710 */ B, B, B, B, B, B, B, B, B, B, B, O, O, MBLW, MPRE, MABV, /* 11720 */ VPST, VPST, VABV, VABV, VBLW, VBLW, VPRE, VABV, VBLW, VABV, VABV, VABV, O, O, O, O, /* 11730 */ B, B, B, B, B, B, B, B, B, B, B, B, O, O, O, O, + /* 11740 */ B, B, B, B, B, B, B, O, /* Dogra */ @@ -878,25 +880,25 @@ const USE_OFFSET_0X10D00: usize = 4176; const USE_OFFSET_0X10E80: usize = 4240; const USE_OFFSET_0X10F30: usize = 4296; const USE_OFFSET_0X10FB0: usize = 4336; -const USE_OFFSET_0X11100: usize = 4608; -const USE_OFFSET_0X11280: usize = 4928; -const USE_OFFSET_0X11400: usize = 5176; -const USE_OFFSET_0X11580: usize = 5400; -const USE_OFFSET_0X11800: usize = 5848; -const USE_OFFSET_0X11900: usize = 5912; -const USE_OFFSET_0X119A0: usize = 6008; -const USE_OFFSET_0X11C00: usize = 6264; -const USE_OFFSET_0X11D00: usize = 6448; -const USE_OFFSET_0X11EE0: usize = 6624; -const USE_OFFSET_0X13000: usize = 6648; -const USE_OFFSET_0X16B00: usize = 7736; -const USE_OFFSET_0X16F00: usize = 7792; -const USE_OFFSET_0X16FE0: usize = 7944; -const USE_OFFSET_0X18B00: usize = 7952; -const USE_OFFSET_0X1BC00: usize = 8424; -const USE_OFFSET_0X1E100: usize = 8584; -const USE_OFFSET_0X1E2C0: usize = 8664; -const USE_OFFSET_0X1E900: usize = 8728; +const USE_OFFSET_0X11100: usize = 4616; +const USE_OFFSET_0X11280: usize = 4936; +const USE_OFFSET_0X11400: usize = 5184; +const USE_OFFSET_0X11580: usize = 5408; +const USE_OFFSET_0X11800: usize = 5864; +const USE_OFFSET_0X11900: usize = 5928; +const USE_OFFSET_0X119A0: usize = 6024; +const USE_OFFSET_0X11C00: usize = 6280; +const USE_OFFSET_0X11D00: usize = 6464; +const USE_OFFSET_0X11EE0: usize = 6640; +const USE_OFFSET_0X13000: usize = 6664; +const USE_OFFSET_0X16B00: usize = 7752; +const USE_OFFSET_0X16F00: usize = 7808; +const USE_OFFSET_0X16FE0: usize = 7960; +const USE_OFFSET_0X18B00: usize = 7968; +const USE_OFFSET_0X1BC00: usize = 8440; +const USE_OFFSET_0X1E100: usize = 8600; +const USE_OFFSET_0X1E2C0: usize = 8680; +const USE_OFFSET_0X1E900: usize = 8744; #[rustfmt::skip] pub fn get_category(u: u32) -> Category { @@ -936,14 +938,14 @@ pub fn get_category(u: u32) -> Category { if (0x10D00..=0x10D3F).contains(&u) { return USE_TABLE[u as usize - 0x10D00 + USE_OFFSET_0X10D00]; } if (0x10E80..=0x10EB7).contains(&u) { return USE_TABLE[u as usize - 0x10E80 + USE_OFFSET_0X10E80]; } if (0x10F30..=0x10F57).contains(&u) { return USE_TABLE[u as usize - 0x10F30 + USE_OFFSET_0X10F30]; } - if (0x10FB0..=0x110BF).contains(&u) { return USE_TABLE[u as usize - 0x10FB0 + USE_OFFSET_0X10FB0]; } + if (0x10FB0..=0x110C7).contains(&u) { return USE_TABLE[u as usize - 0x10FB0 + USE_OFFSET_0X10FB0]; } } 0x11 => { - if (0x10FB0..=0x110BF).contains(&u) { return USE_TABLE[u as usize - 0x10FB0 + USE_OFFSET_0X10FB0]; } + if (0x10FB0..=0x110C7).contains(&u) { return USE_TABLE[u as usize - 0x10FB0 + USE_OFFSET_0X10FB0]; } if (0x11100..=0x1123F).contains(&u) { return USE_TABLE[u as usize - 0x11100 + USE_OFFSET_0X11100]; } if (0x11280..=0x11377).contains(&u) { return USE_TABLE[u as usize - 0x11280 + USE_OFFSET_0X11280]; } if (0x11400..=0x114DF).contains(&u) { return USE_TABLE[u as usize - 0x11400 + USE_OFFSET_0X11400]; } - if (0x11580..=0x1173F).contains(&u) { return USE_TABLE[u as usize - 0x11580 + USE_OFFSET_0X11580]; } + if (0x11580..=0x11747).contains(&u) { return USE_TABLE[u as usize - 0x11580 + USE_OFFSET_0X11580]; } if (0x11800..=0x1183F).contains(&u) { return USE_TABLE[u as usize - 0x11800 + USE_OFFSET_0X11800]; } if (0x11900..=0x1195F).contains(&u) { return USE_TABLE[u as usize - 0x11900 + USE_OFFSET_0X11900]; } if (0x119A0..=0x11A9F).contains(&u) { return USE_TABLE[u as usize - 0x119A0 + USE_OFFSET_0X119A0]; } From 0263c565aaa1a5cf49c5e8db68d09f67100d2c74 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Wed, 7 Feb 2024 11:36:50 +0100 Subject: [PATCH 08/17] [Unicode 14] Send all the new scripts to USE https://github.com/harfbuzz/harfbuzz/commit/2c3e4b279 --- src/complex/mod.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/complex/mod.rs b/src/complex/mod.rs index 922ed479..895bed7a 100644 --- a/src/complex/mod.rs +++ b/src/complex/mod.rs @@ -306,7 +306,14 @@ pub fn complex_categorize( // Unicode-13.0 additions | script::CHORASMIAN - | script::DIVES_AKURU => { + | script::DIVES_AKURU + + // Unicode-14.0 additions + | script::CYPRO_MINOAN + | script::OLD_UYGHUR + | script::TANGSA + | script::TOTO + | script::VITHKUQI => { // If the designer designed the font for the 'DFLT' script, // (or we ended up arbitrarily pick 'latn'), use the default shaper. // Otherwise, use the specific shaper. From f3b771a7f99d1dc1cceb122bb2e07d8d921986ad Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Wed, 7 Feb 2024 11:41:23 +0100 Subject: [PATCH 09/17] [Unicode 14] Update the Arabic joining script list https://github.com/harfbuzz/harfbuzz/commit/f1493357f --- src/complex/universal.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/complex/universal.rs b/src/complex/universal.rs index 22b0df99..21bcd48b 100644 --- a/src/complex/universal.rs +++ b/src/complex/universal.rs @@ -506,6 +506,7 @@ fn has_arabic_joining(script: Script) -> bool { | script::MANICHAEAN | script::MONGOLIAN | script::NKO + | script::OLD_UYGHUR | script::PHAGS_PA | script::PSALTER_PAHLAVI | script::SOGDIAN From 3c4cc525948fdae2e0ecfa52c96f0c00b820e0a3 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Wed, 7 Feb 2024 11:43:35 +0100 Subject: [PATCH 10/17] [USE] Send Khitan Small Script and Yezidi to USE https://github.com/harfbuzz/harfbuzz/commit/470686e8 --- src/complex/mod.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/complex/mod.rs b/src/complex/mod.rs index 895bed7a..e673e075 100644 --- a/src/complex/mod.rs +++ b/src/complex/mod.rs @@ -307,6 +307,8 @@ pub fn complex_categorize( // Unicode-13.0 additions | script::CHORASMIAN | script::DIVES_AKURU + | script::KHITAN_SMALL_SCRIPT + | script::YEZIDI // Unicode-14.0 additions | script::CYPRO_MINOAN From 33d217cf8ce9fc2979aff10c8abbd5840d810222 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Wed, 7 Feb 2024 13:47:07 +0100 Subject: [PATCH 11/17] Update unicode_norm to 14.0 --- scripts/gen-unicode-norm-table.py | 4 ++-- src/unicode_norm.rs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/gen-unicode-norm-table.py b/scripts/gen-unicode-norm-table.py index 4153563a..fbe970e0 100755 --- a/scripts/gen-unicode-norm-table.py +++ b/scripts/gen-unicode-norm-table.py @@ -3,7 +3,7 @@ import urllib.request import os -URL = 'https://www.unicode.org/Public/13.0.0/ucd/UnicodeData.txt' +URL = 'https://www.unicode.org/Public/14.0.0/ucd/UnicodeData.txt' FILE_NAME = 'UnicodeData.txt' @@ -22,7 +22,7 @@ def hex_to_char_rs(c): print('//! The current implementation is not the fastest one. Just good enough.') print() print('#[allow(dead_code)]') -print('pub const UNICODE_VERSION: (u8, u8, u8) = (13, 0, 0);') +print('pub const UNICODE_VERSION: (u8, u8, u8) = (14, 0, 0);') print() print('// Rust support `Option` layout optimization, so it will take only 4 bytes.') print('pub const DECOMPOSITION_TABLE: &[(char, char, Option)] = &[') diff --git a/src/unicode_norm.rs b/src/unicode_norm.rs index 6bd233d8..f8614d9d 100644 --- a/src/unicode_norm.rs +++ b/src/unicode_norm.rs @@ -5,7 +5,7 @@ //! The current implementation is not the fastest one. Just good enough. #[allow(dead_code)] -pub const UNICODE_VERSION: (u8, u8, u8) = (13, 0, 0); +pub const UNICODE_VERSION: (u8, u8, u8) = (14, 0, 0); // Rust support `Option` layout optimization, so it will take only 4 bytes. pub const DECOMPOSITION_TABLE: &[(char, char, Option)] = &[ From 0e8de0650a05611625dc61c6973c92d7023ea4d2 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Wed, 7 Feb 2024 14:51:41 +0100 Subject: [PATCH 12/17] Update unicode norm to 14 in test --- src/unicode.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/unicode.rs b/src/unicode.rs index 7b961bcd..18530af2 100644 --- a/src/unicode.rs +++ b/src/unicode.rs @@ -850,7 +850,7 @@ mod tests { assert_eq!(unicode_ccc::UNICODE_VERSION, (13, 0, 0)); assert_eq!(unicode_properties::UNICODE_VERSION, (15, 0, 0)); assert_eq!(unicode_script::UNICODE_VERSION, (15, 0, 0)); - assert_eq!(crate::unicode_norm::UNICODE_VERSION, (13, 0, 0)); + assert_eq!(crate::unicode_norm::UNICODE_VERSION, (14, 0, 0)); } } From c97ba6a39c45bdd41dbf213174679be1ea8a1758 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Wed, 7 Feb 2024 14:52:57 +0100 Subject: [PATCH 13/17] [Unicode 14] Update the Arabic table https://github.com/harfbuzz/harfbuzz/commit/18a46eeaf --- scripts/gen-arabic-table.py | 177 ++++++++++++++++++++++++++++++++++++ src/complex/arabic_table.rs | 155 ++++++++++++++++--------------- 2 files changed, 254 insertions(+), 78 deletions(-) create mode 100644 scripts/gen-arabic-table.py diff --git a/scripts/gen-arabic-table.py b/scripts/gen-arabic-table.py new file mode 100644 index 00000000..c7909a6f --- /dev/null +++ b/scripts/gen-arabic-table.py @@ -0,0 +1,177 @@ +#!/usr/bin/env python3 + +# Based on harfbuzz/src/gen-arabic-table.py + +import os +import urllib.request + +DEPENDENCIES = [ + "ArabicShaping.txt", + "UnicodeData.txt", + "Blocks.txt", +] + +for dep in DEPENDENCIES: + if not os.path.exists(dep): + urllib.request.urlretrieve("https://unicode.org/Public/14.0.0/ucd/" + dep, dep) + +files = [open(x, encoding="utf-8") for x in DEPENDENCIES] + +headers = [ + [files[0].readline(), files[0].readline()], + [files[2].readline(), files[2].readline()], + ["UnicodeData.txt does not have a header."], +] +while files[0].readline().find("##################") < 0: + pass + +blocks = {} + + +def read_blocks(f): + global blocks + for line in f: + j = line.find("#") + if j >= 0: + line = line[:j] + + fields = [x.strip() for x in line.split(";")] + if len(fields) == 1: + continue + + uu = fields[0].split("..") + start = int(uu[0], 16) + if len(uu) == 1: + end = start + else: + end = int(uu[1], 16) + + t = fields[1] + + for u in range(start, end + 1): + blocks[u] = t + + +def print_joining_table(f): + values = {} + for line in f: + if line[0] == "#": + continue + + fields = [x.strip() for x in line.split(";")] + if len(fields) == 1: + continue + + u = int(fields[0], 16) + + if fields[3] in ["ALAPH", "DALATH RISH"]: + value = "JOINING_GROUP_" + fields[3].replace(" ", "_") + else: + value = "JOINING_TYPE_" + fields[2] + values[u] = value + + short_value = {} + for value in sorted(set([v for v in values.values()] + ["JOINING_TYPE_X"])): + short = "".join(x[0] for x in value.split("_")[2:]) + assert short not in short_value.values() + + short_value[value] = short + + uu = sorted(values.keys()) + num = len(values) + all_blocks = set([blocks[u] for u in uu]) + + last = -100000 + ranges = [] + for u in uu: + if u - last <= 1 + 16 * 5: + ranges[-1][-1] = u + else: + ranges.append([u, u]) + last = u + + print("#[rustfmt::skip]") + print("pub const JOINING_TABLE: &[JoiningType] = &[") + last_block = None + offset = 0 + + join_offsets = [] + + for start, end in ranges: + join_offsets.append( + "const JOINING_OFFSET_0X%04X: usize = %d;" % (start, offset) + ) + + for u in range(start, end + 1): + block = blocks.get(u, last_block) + value = values.get(u, "JOINING_TYPE_X") + + if block != last_block or u == start: + if u != start: + print() + if block in all_blocks: + print("\n /* %s */" % block) + else: + print("\n /* FILLER */") + last_block = block + if u % 32 != 0: + print() + print(" /* %04X */" % (u // 32 * 32), " " * (u % 32), end="") + + if u % 32 == 0: + print() + print(" /* %04X */ " % u, end="") + + val = short_value[value] + + if val == "C": + val = "D" + + print("%s," % val, end="") + print() + + offset += end - start + 1 + print("];") + print() + + for offset in join_offsets: + print(offset) + + page_bits = 12 + print() + print("pub fn joining_type(u: char) -> JoiningType {") + print(" let u = u as u32;") + print(" match u >> %d {" % page_bits) + pages = set( + [u >> page_bits for u in [s for s, e in ranges] + [e for s, e in ranges]] + ) + for p in sorted(pages): + print(" 0x%0X => {" % p) + for start, end in ranges: + if p not in [start >> page_bits, end >> page_bits]: + continue + offset = "JOINING_OFFSET_0X%04X" % start + print(" if (0x%04X..=0x%04X).contains(&u) {" % (start, end)) + print( + " return JOINING_TABLE[u as usize - 0x%04X + %s]" + % (start, offset) + ) + print(" }") + print(" }") + print(" _ => {}") + print(" }") + print() + print(" X") + print("}") + print() + + +print("// WARNING: this file was generated by ../scripts/gen-arabic-table.py") +print() +print( + "use super::arabic::JoiningType::{self, GroupAlaph as A, GroupDalathRish as DR, D, L, R, T, U, X};" +) +print() + +read_blocks(files[2]) +print_joining_table(files[0]) diff --git a/src/complex/arabic_table.rs b/src/complex/arabic_table.rs index 0099f493..f19e6c7d 100644 --- a/src/complex/arabic_table.rs +++ b/src/complex/arabic_table.rs @@ -1,110 +1,108 @@ +// WARNING: this file was generated by ../scripts/gen-arabic-table.py + use super::arabic::JoiningType::{self, GroupAlaph as A, GroupDalathRish as DR, D, L, R, T, U, X}; #[rustfmt::skip] pub const JOINING_TABLE: &[JoiningType] = &[ /* Arabic */ - /* 0600 */ U,U,U,U,U,U,X,X,U,X,X,U,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, - /* 0620 */ D,U,R,R,R,R,D,R,D,R,D,D,D,D,D,R,R,R,R,D,D,D,D,D,D,D,D,D,D,D,D,D, - /* 0640 */ D,D,D,D,D,D,D,D,R,D,D,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, - /* 0660 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,D,D,X,R,R,R,U,R,R,R,D,D,D,D,D,D,D,D, - /* 0680 */ D,D,D,D,D,D,D,D,R,R,R,R,R,R,R,R,R,R,R,R,R,R,R,R,R,R,D,D,D,D,D,D, - /* 06A0 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D, - /* 06C0 */ R,D,D,R,R,R,R,R,R,R,R,R,D,R,D,R,D,D,R,R,X,R,X,X,X,X,X,X,X,U,X,X, - /* 06E0 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,R,R,X,X,X,X,X,X,X,X,X,X,D,D,D,X,X,D, - + /* 0600 */ U, U, U, U, U, U, X, X, U, X, X, U, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, + /* 0620 */ D, U, R, R, R, R, D, R, D, R, D, D, D, D, D, R, R, R, R, D, D, D, D, D, D, D, D, D, D, D, D, D, + /* 0640 */ D, D, D, D, D, D, D, D, R, D, D, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, + /* 0660 */ X, X, X, X, X, X, X, X, X, X, X, X, X, X, D, D, X, R, R, R, U, R, R, R, D, D, D, D, D, D, D, D, + /* 0680 */ D, D, D, D, D, D, D, D, R, R, R, R, R, R, R, R, R, R, R, R, R, R, R, R, R, R, D, D, D, D, D, D, + /* 06A0 */ D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, + /* 06C0 */ R, D, D, R, R, R, R, R, R, R, R, R, D, R, D, R, D, D, R, R, X, R, X, X, X, X, X, X, X, U, X, X, + /* 06E0 */ X, X, X, X, X, X, X, X, X, X, X, X, X, X, R, R, X, X, X, X, X, X, X, X, X, X, D, D, D, X, X, D, /* Syriac */ - /* 0700 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,T,A,X,D,D,D,DR,DR,R,R,R,D,D,D,D,R,D, - /* 0720 */ D,D,D,D,D,D,D,D,R,D,DR,D,R,D,D,DR,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, - /* 0740 */ X,X,X,X,X,X,X,X,X,X,X,X,X,R,D,D, - + /* 0700 */ X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, T, A, X, D, D, D, DR, DR, R, R, R, D, D, D, D, R, D, + /* 0720 */ D, D, D, D, D, D, D, D, R, D, DR, D, R, D, D, DR, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, + /* 0740 */ X, X, X, X, X, X, X, X, X, X, X, X, X, R, D, D, /* Arabic Supplement */ - /* 0740 */ D,D,D,D,D,D,D,D,D,R,R,R,D,D,D,D, - /* 0760 */ D,D,D,D,D,D,D,D,D,D,D,R,R,D,D,D,D,R,D,R,R,D,D,D,R,R,D,D,D,D,D,D, - + /* 0740 */ D, D, D, D, D, D, D, D, D, R, R, R, D, D, D, D, + /* 0760 */ D, D, D, D, D, D, D, D, D, D, D, R, R, D, D, D, D, R, D, R, R, D, D, D, R, R, D, D, D, D, D, D, /* FILLER */ - /* 0780 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, - /* 07A0 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, - + /* 0780 */ X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, + /* 07A0 */ X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* NKo */ - /* 07C0 */ X,X,X,X,X,X,X,X,X,X,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D, - /* 07E0 */ D,D,D,D,D,D,D,D,D,D,D,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,D,X,X,X,X,X, - + /* 07C0 */ X, X, X, X, X, X, X, X, X, X, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, + /* 07E0 */ D, D, D, D, D, D, D, D, D, D, D, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, D, X, X, X, X, X, /* FILLER */ - /* 0800 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, - /* 0820 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, - + /* 0800 */ X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, + /* 0820 */ X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* Mandaic */ - /* 0840 */ R,D,D,D,D,D,R,R,D,R,D,D,D,D,D,D,D,D,D,D,R,D,U,U,U,X,X,X,X,X,X,X, - + /* 0840 */ R, D, D, D, D, D, R, R, D, R, D, D, D, D, D, D, D, D, D, D, R, D, R, R, R, X, X, X, X, X, X, X, /* Syriac Supplement */ - /* 0860 */ D,U,D,D,D,D,U,R,D,R,R,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, - /* 0880 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, + /* 0860 */ D, U, D, D, D, D, U, R, D, R, R, X, X, X, X, X, + /* Arabic Extended-B */ + /* 0860 */ R, R, R, R, R, R, R, R, R, R, R, R, R, R, R, R, + /* 0880 */ R, R, R, D, D, D, D, U, U, D, D, D, D, D, R, X, U, U, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* Arabic Extended-A */ - /* 08A0 */ D,D,D,D,D,D,D,D,D,D,R,R,R,U,R,D,D,R,R,D,D,X,D,D,D,R,D,D,D,D,X,X, - /* 08C0 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, - /* 08E0 */ X,X,U, - + /* 08A0 */ D, D, D, D, D, D, D, D, D, D, R, R, R, U, R, D, D, R, R, D, D, D, D, D, D, R, D, D, D, D, D, D, + /* 08C0 */ D, D, D, D, D, D, D, D, D, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, + /* 08E0 */ X, X, U, /* Mongolian */ - /* 1800 */ U,D,X,X,D,X,X,X,U,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, - /* 1820 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D, - /* 1840 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D, - /* 1860 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,X,X,X,X,X,X,X, - /* 1880 */ U,U,U,U,U,T,T,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D, - /* 18A0 */ D,D,D,D,D,D,D,D,D,X,D, - + /* 1800 */ U, D, X, X, D, X, X, X, U, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, + /* 1820 */ D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, + /* 1840 */ D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, + /* 1860 */ D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, X, X, X, X, X, X, X, + /* 1880 */ U, U, U, U, U, T, T, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, + /* 18A0 */ D, D, D, D, D, D, D, D, D, X, D, /* General Punctuation */ - /* 2000 */ U,D,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, - /* 2020 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,U,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, - /* 2040 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, - /* 2060 */ X,X,X,X,X,X,U,U,U,U, - + /* 2000 */ U, D, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, + /* 2020 */ X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, U, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, + /* 2040 */ X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, + /* 2060 */ X, X, X, X, X, X, U, U, U, U, /* Phags-pa */ - /* A840 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D, - /* A860 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,L,U, - + /* A840 */ D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, + /* A860 */ D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, L, U, /* Manichaean */ - /* 10AC0 */ D,D,D,D,D,R,U,R,U,R,R,U,U,L,R,R,R,R,R,D,D,D,D,L,D,D,D,D,D,R,D,D, - /* 10AE0 */ D,R,U,U,R,X,X,X,X,X,X,D,D,D,D,R, - + /* 10AC0 */ D, D, D, D, D, R, U, R, U, R, R, U, U, L, R, R, R, R, R, D, D, D, D, L, D, D, D, D, D, R, D, D, + /* 10AE0 */ D, R, U, U, R, X, X, X, X, X, X, D, D, D, D, R, /* Psalter Pahlavi */ - /* 10B80 */ D,R,D,R,R,R,D,D,D,R,D,D,R,D,R,R,D,R,X,X,X,X,X,X,X,X,X,X,X,X,X,X, - /* 10BA0 */ X,X,X,X,X,X,X,X,X,R,R,R,R,D,D,U, - + /* 10B80 */ D, R, D, R, R, R, D, D, D, R, D, D, R, D, R, R, D, R, X, X, X, X, X, X, X, X, X, X, X, X, X, X, + /* 10BA0 */ X, X, X, X, X, X, X, X, X, R, R, R, R, D, D, U, /* Hanifi Rohingya */ - /* 10D00 */ L,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D, - /* 10D20 */ D,D,R,D, - + /* 10D00 */ L, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, + /* 10D20 */ D, D, R, D, /* Sogdian */ - /* 10F20 */ D,D,D,R,D,D,D,D,D,D,D,D,D,D,D,D, - /* 10F40 */ D,D,D,D,D,U,X,X,X,X,X,X,X,X,X,X,X,D,D,D,R, + /* 10F20 */ D, D, D, R, D, D, D, D, D, D, D, D, D, D, D, D, + /* 10F40 */ D, D, D, D, D, U, X, X, X, X, X, X, X, X, X, X, X, D, D, D, R, X, X, X, X, X, X, X, X, X, X, X, + /* 10F60 */ X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, + /* Old Uyghur */ - /* Kaithi */ + /* 10F60 */ D, D, D, D, R, R, D, D, D, D, D, D, D, D, D, D, + /* 10F80 */ D, D, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, + /* 10FA0 */ X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, + /* Chorasmian */ - /* 110A0 */ U,X,X, - /* 110C0 */ X,X,X,X,X,X,X,X,X,X,X,X,X,U, + /* 10FA0 */ D, U, D, D, R, R, R, U, D, R, R, D, D, R, D, D, + /* 10FC0 */ U, D, R, R, D, U, U, U, U, R, D, L, + /* Kaithi */ + /* 110A0 */ U, X, X, + /* 110C0 */ X, X, X, X, X, X, X, X, X, X, X, X, X, U, /* Adlam */ - /* 1E900 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D, - /* 1E920 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D, - /* 1E940 */ D,D,D,D,X,X,X,X,X,X,X,T, + /* 1E900 */ D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, + /* 1E920 */ D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, + /* 1E940 */ D, D, D, D, X, X, X, X, X, X, X, T, ]; const JOINING_OFFSET_0X0600: usize = 0; @@ -115,54 +113,54 @@ const JOINING_OFFSET_0X10AC0: usize = 1050; const JOINING_OFFSET_0X10B80: usize = 1098; const JOINING_OFFSET_0X10D00: usize = 1146; const JOINING_OFFSET_0X10F30: usize = 1182; -const JOINING_OFFSET_0X110BD: usize = 1219; -const JOINING_OFFSET_0X1E900: usize = 1236; +const JOINING_OFFSET_0X110BD: usize = 1338; +const JOINING_OFFSET_0X1E900: usize = 1355; pub fn joining_type(u: char) -> JoiningType { let u = u as u32; match u >> 12 { 0x0 => { if (0x0600..=0x08E2).contains(&u) { - return JOINING_TABLE[u as usize - 0x0600 + JOINING_OFFSET_0X0600]; + return JOINING_TABLE[u as usize - 0x0600 + JOINING_OFFSET_0X0600] } } 0x1 => { if (0x1806..=0x18AA).contains(&u) { - return JOINING_TABLE[u as usize - 0x1806 + JOINING_OFFSET_0X1806]; + return JOINING_TABLE[u as usize - 0x1806 + JOINING_OFFSET_0X1806] } } 0x2 => { if (0x200C..=0x2069).contains(&u) { - return JOINING_TABLE[u as usize - 0x200C + JOINING_OFFSET_0X200C]; + return JOINING_TABLE[u as usize - 0x200C + JOINING_OFFSET_0X200C] } } 0xA => { if (0xA840..=0xA873).contains(&u) { - return JOINING_TABLE[u as usize - 0xA840 + JOINING_OFFSET_0XA840]; + return JOINING_TABLE[u as usize - 0xA840 + JOINING_OFFSET_0XA840] } } 0x10 => { if (0x10AC0..=0x10AEF).contains(&u) { - return JOINING_TABLE[u as usize - 0x10AC0 + JOINING_OFFSET_0X10AC0]; + return JOINING_TABLE[u as usize - 0x10AC0 + JOINING_OFFSET_0X10AC0] } if (0x10B80..=0x10BAF).contains(&u) { - return JOINING_TABLE[u as usize - 0x10B80 + JOINING_OFFSET_0X10B80]; + return JOINING_TABLE[u as usize - 0x10B80 + JOINING_OFFSET_0X10B80] } if (0x10D00..=0x10D23).contains(&u) { - return JOINING_TABLE[u as usize - 0x10D00 + JOINING_OFFSET_0X10D00]; + return JOINING_TABLE[u as usize - 0x10D00 + JOINING_OFFSET_0X10D00] } - if (0x10F30..=0x10F54).contains(&u) { - return JOINING_TABLE[u as usize - 0x10F30 + JOINING_OFFSET_0X10F30]; + if (0x10F30..=0x10FCB).contains(&u) { + return JOINING_TABLE[u as usize - 0x10F30 + JOINING_OFFSET_0X10F30] } } 0x11 => { if (0x110BD..=0x110CD).contains(&u) { - return JOINING_TABLE[u as usize - 0x110BD + JOINING_OFFSET_0X110BD]; + return JOINING_TABLE[u as usize - 0x110BD + JOINING_OFFSET_0X110BD] } } 0x1E => { if (0x1E900..=0x1E94B).contains(&u) { - return JOINING_TABLE[u as usize - 0x1E900 + JOINING_OFFSET_0X1E900]; + return JOINING_TABLE[u as usize - 0x1E900 + JOINING_OFFSET_0X1E900] } } _ => {} @@ -170,3 +168,4 @@ pub fn joining_type(u: char) -> JoiningType { X } + From 54ef9906ba2b05849cc66d5e0173db9566cf138a Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Wed, 7 Feb 2024 14:53:10 +0100 Subject: [PATCH 14/17] Remove useless import in gen-indic-table.py --- scripts/gen-indic-table.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/gen-indic-table.py b/scripts/gen-indic-table.py index d935233d..26a9abee 100755 --- a/scripts/gen-indic-table.py +++ b/scripts/gen-indic-table.py @@ -4,7 +4,6 @@ import io import os -import sys import urllib.request DEPENDENCIES = [ From 457119af2add28edabfc4ec8afa87161ec089e8e Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Wed, 7 Feb 2024 14:55:13 +0100 Subject: [PATCH 15/17] Bump Unicode crates --- Cargo.toml | 4 ++-- src/unicode.rs | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 616e13e8..57a79316 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,8 +16,8 @@ exclude = ["benches/", "tests/"] bitflags = "2.4.1" bytemuck = { version = "1.5", features = ["extern_crate_alloc"] } smallvec = "1.6" -unicode-bidi-mirroring = "0.1" -unicode-ccc = "0.1.2" +unicode-bidi-mirroring = "0.2" +unicode-ccc = "0.2" unicode-properties = { version = "0.1.0", default-features = false, features = ["general-category"] } unicode-script = "0.5.2" libm = { version = "0.2.2", optional = true } diff --git a/src/unicode.rs b/src/unicode.rs index 18530af2..1fbc0426 100644 --- a/src/unicode.rs +++ b/src/unicode.rs @@ -846,8 +846,8 @@ pub fn decompose_hangul(ab: char) -> Option<(char, char)> { mod tests { #[test] fn check_unicode_version() { - assert_eq!(unicode_bidi_mirroring::UNICODE_VERSION, (13, 0, 0)); - assert_eq!(unicode_ccc::UNICODE_VERSION, (13, 0, 0)); + assert_eq!(unicode_bidi_mirroring::UNICODE_VERSION, (14, 0, 0)); + assert_eq!(unicode_ccc::UNICODE_VERSION, (14, 0, 0)); assert_eq!(unicode_properties::UNICODE_VERSION, (15, 0, 0)); assert_eq!(unicode_script::UNICODE_VERSION, (15, 0, 0)); assert_eq!(crate::unicode_norm::UNICODE_VERSION, (14, 0, 0)); From 383082005664f3d3249204b94bc8c3ca7bc9874d Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Wed, 7 Feb 2024 14:57:33 +0100 Subject: [PATCH 16/17] Fix formatting --- src/complex/arabic_table.rs | 240 +++++++++++++++++++----------------- src/complex/indic_table.rs | 16 +-- 2 files changed, 138 insertions(+), 118 deletions(-) diff --git a/src/complex/arabic_table.rs b/src/complex/arabic_table.rs index f19e6c7d..27357240 100644 --- a/src/complex/arabic_table.rs +++ b/src/complex/arabic_table.rs @@ -4,105 +4,126 @@ use super::arabic::JoiningType::{self, GroupAlaph as A, GroupDalathRish as DR, D #[rustfmt::skip] pub const JOINING_TABLE: &[JoiningType] = &[ - /* Arabic */ - - /* 0600 */ U, U, U, U, U, U, X, X, U, X, X, U, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, - /* 0620 */ D, U, R, R, R, R, D, R, D, R, D, D, D, D, D, R, R, R, R, D, D, D, D, D, D, D, D, D, D, D, D, D, - /* 0640 */ D, D, D, D, D, D, D, D, R, D, D, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, - /* 0660 */ X, X, X, X, X, X, X, X, X, X, X, X, X, X, D, D, X, R, R, R, U, R, R, R, D, D, D, D, D, D, D, D, - /* 0680 */ D, D, D, D, D, D, D, D, R, R, R, R, R, R, R, R, R, R, R, R, R, R, R, R, R, R, D, D, D, D, D, D, - /* 06A0 */ D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, - /* 06C0 */ R, D, D, R, R, R, R, R, R, R, R, R, D, R, D, R, D, D, R, R, X, R, X, X, X, X, X, X, X, U, X, X, - /* 06E0 */ X, X, X, X, X, X, X, X, X, X, X, X, X, X, R, R, X, X, X, X, X, X, X, X, X, X, D, D, D, X, X, D, - /* Syriac */ - - /* 0700 */ X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, T, A, X, D, D, D, DR, DR, R, R, R, D, D, D, D, R, D, - /* 0720 */ D, D, D, D, D, D, D, D, R, D, DR, D, R, D, D, DR, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, - /* 0740 */ X, X, X, X, X, X, X, X, X, X, X, X, X, R, D, D, - /* Arabic Supplement */ - - /* 0740 */ D, D, D, D, D, D, D, D, D, R, R, R, D, D, D, D, - /* 0760 */ D, D, D, D, D, D, D, D, D, D, D, R, R, D, D, D, D, R, D, R, R, D, D, D, R, R, D, D, D, D, D, D, - /* FILLER */ - - /* 0780 */ X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, - /* 07A0 */ X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, - /* NKo */ - - /* 07C0 */ X, X, X, X, X, X, X, X, X, X, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, - /* 07E0 */ D, D, D, D, D, D, D, D, D, D, D, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, D, X, X, X, X, X, - /* FILLER */ - - /* 0800 */ X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, - /* 0820 */ X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, - /* Mandaic */ - - /* 0840 */ R, D, D, D, D, D, R, R, D, R, D, D, D, D, D, D, D, D, D, D, R, D, R, R, R, X, X, X, X, X, X, X, - /* Syriac Supplement */ - - /* 0860 */ D, U, D, D, D, D, U, R, D, R, R, X, X, X, X, X, - /* Arabic Extended-B */ - - /* 0860 */ R, R, R, R, R, R, R, R, R, R, R, R, R, R, R, R, - /* 0880 */ R, R, R, D, D, D, D, U, U, D, D, D, D, D, R, X, U, U, X, X, X, X, X, X, X, X, X, X, X, X, X, X, - /* Arabic Extended-A */ - - /* 08A0 */ D, D, D, D, D, D, D, D, D, D, R, R, R, U, R, D, D, R, R, D, D, D, D, D, D, R, D, D, D, D, D, D, - /* 08C0 */ D, D, D, D, D, D, D, D, D, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, - /* 08E0 */ X, X, U, - /* Mongolian */ - - /* 1800 */ U, D, X, X, D, X, X, X, U, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, - /* 1820 */ D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, - /* 1840 */ D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, - /* 1860 */ D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, X, X, X, X, X, X, X, - /* 1880 */ U, U, U, U, U, T, T, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, - /* 18A0 */ D, D, D, D, D, D, D, D, D, X, D, - /* General Punctuation */ - - /* 2000 */ U, D, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, - /* 2020 */ X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, U, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, - /* 2040 */ X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, - /* 2060 */ X, X, X, X, X, X, U, U, U, U, - /* Phags-pa */ - - /* A840 */ D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, - /* A860 */ D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, L, U, - /* Manichaean */ - - /* 10AC0 */ D, D, D, D, D, R, U, R, U, R, R, U, U, L, R, R, R, R, R, D, D, D, D, L, D, D, D, D, D, R, D, D, - /* 10AE0 */ D, R, U, U, R, X, X, X, X, X, X, D, D, D, D, R, - /* Psalter Pahlavi */ - - /* 10B80 */ D, R, D, R, R, R, D, D, D, R, D, D, R, D, R, R, D, R, X, X, X, X, X, X, X, X, X, X, X, X, X, X, - /* 10BA0 */ X, X, X, X, X, X, X, X, X, R, R, R, R, D, D, U, - /* Hanifi Rohingya */ - - /* 10D00 */ L, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, - /* 10D20 */ D, D, R, D, - /* Sogdian */ - - /* 10F20 */ D, D, D, R, D, D, D, D, D, D, D, D, D, D, D, D, - /* 10F40 */ D, D, D, D, D, U, X, X, X, X, X, X, X, X, X, X, X, D, D, D, R, X, X, X, X, X, X, X, X, X, X, X, - /* 10F60 */ X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, - /* Old Uyghur */ - - /* 10F60 */ D, D, D, D, R, R, D, D, D, D, D, D, D, D, D, D, - /* 10F80 */ D, D, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, - /* 10FA0 */ X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, - /* Chorasmian */ - - /* 10FA0 */ D, U, D, D, R, R, R, U, D, R, R, D, D, R, D, D, - /* 10FC0 */ U, D, R, R, D, U, U, U, U, R, D, L, - /* Kaithi */ - - /* 110A0 */ U, X, X, - /* 110C0 */ X, X, X, X, X, X, X, X, X, X, X, X, X, U, - /* Adlam */ - - /* 1E900 */ D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, - /* 1E920 */ D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, D, - /* 1E940 */ D, D, D, D, X, X, X, X, X, X, X, T, + + /* Arabic */ + + /* 0600 */ U,U,U,U,U,U,X,X,U,X,X,U,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, + /* 0620 */ D,U,R,R,R,R,D,R,D,R,D,D,D,D,D,R,R,R,R,D,D,D,D,D,D,D,D,D,D,D,D,D, + /* 0640 */ D,D,D,D,D,D,D,D,R,D,D,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, + /* 0660 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,D,D,X,R,R,R,U,R,R,R,D,D,D,D,D,D,D,D, + /* 0680 */ D,D,D,D,D,D,D,D,R,R,R,R,R,R,R,R,R,R,R,R,R,R,R,R,R,R,D,D,D,D,D,D, + /* 06A0 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D, + /* 06C0 */ R,D,D,R,R,R,R,R,R,R,R,R,D,R,D,R,D,D,R,R,X,R,X,X,X,X,X,X,X,U,X,X, + /* 06E0 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,R,R,X,X,X,X,X,X,X,X,X,X,D,D,D,X,X,D, + + /* Syriac */ + + /* 0700 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,T,A,X,D,D,D,DR,DR,R,R,R,D,D,D,D,R,D, + /* 0720 */ D,D,D,D,D,D,D,D,R,D,DR,D,R,D,D,DR,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, + /* 0740 */ X,X,X,X,X,X,X,X,X,X,X,X,X,R,D,D, + + /* Arabic Supplement */ + + /* 0740 */ D,D,D,D,D,D,D,D,D,R,R,R,D,D,D,D, + /* 0760 */ D,D,D,D,D,D,D,D,D,D,D,R,R,D,D,D,D,R,D,R,R,D,D,D,R,R,D,D,D,D,D,D, + + /* FILLER */ + + /* 0780 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, + /* 07A0 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, + + /* NKo */ + + /* 07C0 */ X,X,X,X,X,X,X,X,X,X,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D, + /* 07E0 */ D,D,D,D,D,D,D,D,D,D,D,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,D,X,X,X,X,X, + + /* FILLER */ + + /* 0800 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, + /* 0820 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, + + /* Mandaic */ + + /* 0840 */ R,D,D,D,D,D,R,R,D,R,D,D,D,D,D,D,D,D,D,D,R,D,R,R,R,X,X,X,X,X,X,X, + + /* Syriac Supplement */ + + /* 0860 */ D,U,D,D,D,D,U,R,D,R,R,X,X,X,X,X, + + /* Arabic Extended-B */ + + /* 0860 */ R,R,R,R,R,R,R,R,R,R,R,R,R,R,R,R, + /* 0880 */ R,R,R,D,D,D,D,U,U,D,D,D,D,D,R,X,U,U,X,X,X,X,X,X,X,X,X,X,X,X,X,X, + + /* Arabic Extended-A */ + + /* 08A0 */ D,D,D,D,D,D,D,D,D,D,R,R,R,U,R,D,D,R,R,D,D,D,D,D,D,R,D,D,D,D,D,D, + /* 08C0 */ D,D,D,D,D,D,D,D,D,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, + /* 08E0 */ X,X,U, + + /* Mongolian */ + + /* 1800 */ U,D,X,X,D,X,X,X,U,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, + /* 1820 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D, + /* 1840 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D, + /* 1860 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,X,X,X,X,X,X,X, + /* 1880 */ U,U,U,U,U,T,T,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D, + /* 18A0 */ D,D,D,D,D,D,D,D,D,X,D, + + /* General Punctuation */ + + /* 2000 */ U,D,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, + /* 2020 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,U,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, + /* 2040 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, + /* 2060 */ X,X,X,X,X,X,U,U,U,U, + + /* Phags-pa */ + + /* A840 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D, + /* A860 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,L,U, + + /* Manichaean */ + + /* 10AC0 */ D,D,D,D,D,R,U,R,U,R,R,U,U,L,R,R,R,R,R,D,D,D,D,L,D,D,D,D,D,R,D,D, + /* 10AE0 */ D,R,U,U,R,X,X,X,X,X,X,D,D,D,D,R, + + /* Psalter Pahlavi */ + + /* 10B80 */ D,R,D,R,R,R,D,D,D,R,D,D,R,D,R,R,D,R,X,X,X,X,X,X,X,X,X,X,X,X,X,X, + /* 10BA0 */ X,X,X,X,X,X,X,X,X,R,R,R,R,D,D,U, + + /* Hanifi Rohingya */ + + /* 10D00 */ L,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D, + /* 10D20 */ D,D,R,D, + + /* Sogdian */ + + /* 10F20 */ D,D,D,R,D,D,D,D,D,D,D,D,D,D,D,D, + /* 10F40 */ D,D,D,D,D,U,X,X,X,X,X,X,X,X,X,X,X,D,D,D,R,X,X,X,X,X,X,X,X,X,X,X, + /* 10F60 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, + + /* Old Uyghur */ + + /* 10F60 */ D,D,D,D,R,R,D,D,D,D,D,D,D,D,D,D, + /* 10F80 */ D,D,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, + /* 10FA0 */ X,X,X,X,X,X,X,X,X,X,X,X,X,X,X,X, + + /* Chorasmian */ + + /* 10FA0 */ D,U,D,D,R,R,R,U,D,R,R,D,D,R,D,D, + /* 10FC0 */ U,D,R,R,D,U,U,U,U,R,D,L, + + /* Kaithi */ + + /* 110A0 */ U,X,X, + /* 110C0 */ X,X,X,X,X,X,X,X,X,X,X,X,X,U, + + /* Adlam */ + + /* 1E900 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D, + /* 1E920 */ D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D,D, + /* 1E940 */ D,D,D,D,X,X,X,X,X,X,X,T, ]; const JOINING_OFFSET_0X0600: usize = 0; @@ -121,46 +142,46 @@ pub fn joining_type(u: char) -> JoiningType { match u >> 12 { 0x0 => { if (0x0600..=0x08E2).contains(&u) { - return JOINING_TABLE[u as usize - 0x0600 + JOINING_OFFSET_0X0600] + return JOINING_TABLE[u as usize - 0x0600 + JOINING_OFFSET_0X0600]; } } 0x1 => { if (0x1806..=0x18AA).contains(&u) { - return JOINING_TABLE[u as usize - 0x1806 + JOINING_OFFSET_0X1806] + return JOINING_TABLE[u as usize - 0x1806 + JOINING_OFFSET_0X1806]; } } 0x2 => { if (0x200C..=0x2069).contains(&u) { - return JOINING_TABLE[u as usize - 0x200C + JOINING_OFFSET_0X200C] + return JOINING_TABLE[u as usize - 0x200C + JOINING_OFFSET_0X200C]; } } 0xA => { if (0xA840..=0xA873).contains(&u) { - return JOINING_TABLE[u as usize - 0xA840 + JOINING_OFFSET_0XA840] + return JOINING_TABLE[u as usize - 0xA840 + JOINING_OFFSET_0XA840]; } } 0x10 => { if (0x10AC0..=0x10AEF).contains(&u) { - return JOINING_TABLE[u as usize - 0x10AC0 + JOINING_OFFSET_0X10AC0] + return JOINING_TABLE[u as usize - 0x10AC0 + JOINING_OFFSET_0X10AC0]; } if (0x10B80..=0x10BAF).contains(&u) { - return JOINING_TABLE[u as usize - 0x10B80 + JOINING_OFFSET_0X10B80] + return JOINING_TABLE[u as usize - 0x10B80 + JOINING_OFFSET_0X10B80]; } if (0x10D00..=0x10D23).contains(&u) { - return JOINING_TABLE[u as usize - 0x10D00 + JOINING_OFFSET_0X10D00] + return JOINING_TABLE[u as usize - 0x10D00 + JOINING_OFFSET_0X10D00]; } if (0x10F30..=0x10FCB).contains(&u) { - return JOINING_TABLE[u as usize - 0x10F30 + JOINING_OFFSET_0X10F30] + return JOINING_TABLE[u as usize - 0x10F30 + JOINING_OFFSET_0X10F30]; } } 0x11 => { if (0x110BD..=0x110CD).contains(&u) { - return JOINING_TABLE[u as usize - 0x110BD + JOINING_OFFSET_0X110BD] + return JOINING_TABLE[u as usize - 0x110BD + JOINING_OFFSET_0X110BD]; } } 0x1E => { if (0x1E900..=0x1E94B).contains(&u) { - return JOINING_TABLE[u as usize - 0x1E900 + JOINING_OFFSET_0X1E900] + return JOINING_TABLE[u as usize - 0x1E900 + JOINING_OFFSET_0X1E900]; } } _ => {} @@ -168,4 +189,3 @@ pub fn joining_type(u: char) -> JoiningType { X } - diff --git a/src/complex/indic_table.rs b/src/complex/indic_table.rs index 5302e36b..4e9fbd2b 100644 --- a/src/complex/indic_table.rs +++ b/src/complex/indic_table.rs @@ -165,7 +165,7 @@ const TABLE: &[(SyllabicCategory, MatraCategory)] = &[ /* 0B38 */ (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_N,IMC_B), (ISC_A,IMC_x), (ISC_M,IMC_R), (ISC_M,IMC_T), /* 0B40 */ (ISC_M,IMC_R), (ISC_M,IMC_B), (ISC_M,IMC_B), (ISC_M,IMC_B), (ISC_M,IMC_B), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_M,IMC_L), /* 0B48 */ (ISC_M,IMC_TL), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_M,IMC_LR),(ISC_M,IMC_TLR), (ISC_V,IMC_B), (ISC_x,IMC_x), (ISC_x,IMC_x), - /* 0B50 */ (ISC_x, IMC_x), (ISC_x, IMC_x), (ISC_x, IMC_x), (ISC_x, IMC_x), (ISC_x, IMC_x), (ISC_M, IMC_T), (ISC_M, IMC_T), (ISC_M, IMC_TR), + /* 0B50 */ (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_M,IMC_T), (ISC_M,IMC_T), (ISC_M,IMC_TR), /* 0B58 */ (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_x,IMC_x), (ISC_C,IMC_x), /* 0B60 */ (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_M,IMC_B), (ISC_M,IMC_B), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), /* 0B68 */ (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), @@ -200,11 +200,11 @@ const TABLE: &[(SyllabicCategory, MatraCategory)] = &[ /* 0C20 */ (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), /* 0C28 */ (ISC_C,IMC_x), (ISC_x,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), /* 0C30 */ (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), - /* 0C38 */ (ISC_C, IMC_x), (ISC_C, IMC_x), (ISC_x, IMC_x), (ISC_x, IMC_x), (ISC_N, IMC_B), (ISC_A, IMC_x), (ISC_M, IMC_T), (ISC_M, IMC_T), + /* 0C38 */ (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_N,IMC_B), (ISC_A,IMC_x), (ISC_M,IMC_T), (ISC_M,IMC_T), /* 0C40 */ (ISC_M,IMC_T), (ISC_M,IMC_R), (ISC_M,IMC_R), (ISC_M,IMC_R), (ISC_M,IMC_R), (ISC_x,IMC_x), (ISC_M,IMC_T), (ISC_M,IMC_T), /* 0C48 */ (ISC_M,IMC_TB), (ISC_x,IMC_x), (ISC_M,IMC_T), (ISC_M,IMC_T), (ISC_M,IMC_T), (ISC_V,IMC_T), (ISC_x,IMC_x), (ISC_x,IMC_x), /* 0C50 */ (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_M,IMC_T), (ISC_M,IMC_B), (ISC_x,IMC_x), - /* 0C58 */ (ISC_C, IMC_x), (ISC_C, IMC_x), (ISC_C, IMC_x), (ISC_x, IMC_x), (ISC_x, IMC_x), (ISC_CD, IMC_x), (ISC_x, IMC_x), (ISC_x, IMC_x), + /* 0C58 */ (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_CD,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), /* 0C60 */ (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_M,IMC_B), (ISC_M,IMC_B), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), /* 0C68 */ (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), /* 0C70 */ (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), @@ -223,7 +223,7 @@ const TABLE: &[(SyllabicCategory, MatraCategory)] = &[ /* 0CC0 */ (ISC_M,IMC_TR), (ISC_M,IMC_R), (ISC_M,IMC_R), (ISC_M,IMC_R), (ISC_M,IMC_R), (ISC_x,IMC_x), (ISC_M,IMC_T), (ISC_M,IMC_TR), /* 0CC8 */ (ISC_M,IMC_TR), (ISC_x,IMC_x), (ISC_M,IMC_TR), (ISC_M,IMC_TR), (ISC_M,IMC_T), (ISC_V,IMC_T), (ISC_x,IMC_x), (ISC_x,IMC_x), /* 0CD0 */ (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_M,IMC_R), (ISC_M,IMC_R), (ISC_x,IMC_x), - /* 0CD8 */ (ISC_x, IMC_x), (ISC_x, IMC_x), (ISC_x, IMC_x), (ISC_x, IMC_x), (ISC_x, IMC_x), (ISC_CD, IMC_x), (ISC_C, IMC_x), (ISC_x, IMC_x), + /* 0CD8 */ (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_CD,IMC_x), (ISC_C,IMC_x), (ISC_x,IMC_x), /* 0CE0 */ (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_M,IMC_B), (ISC_M,IMC_B), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), /* 0CE8 */ (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), /* 0CF0 */ (ISC_x,IMC_x),(ISC_CWS,IMC_x),(ISC_CWS,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), @@ -231,7 +231,7 @@ const TABLE: &[(SyllabicCategory, MatraCategory)] = &[ /* Malayalam */ - /* 0D00 */ (ISC_Bi, IMC_T), (ISC_Bi, IMC_T), (ISC_Bi, IMC_R), (ISC_Vs, IMC_R), (ISC_Bi, IMC_x), (ISC_VI, IMC_x), (ISC_VI, IMC_x), (ISC_VI, IMC_x), + /* 0D00 */ (ISC_Bi,IMC_T), (ISC_Bi,IMC_T), (ISC_Bi,IMC_R), (ISC_Vs,IMC_R), (ISC_Bi,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), /* 0D08 */ (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_x,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), /* 0D10 */ (ISC_VI,IMC_x), (ISC_x,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), /* 0D18 */ (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), @@ -240,7 +240,7 @@ const TABLE: &[(SyllabicCategory, MatraCategory)] = &[ /* 0D30 */ (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), /* 0D38 */ (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_PK,IMC_T), (ISC_PK,IMC_T), (ISC_A,IMC_x), (ISC_M,IMC_R), (ISC_M,IMC_R), /* 0D40 */ (ISC_M,IMC_R), (ISC_M,IMC_R), (ISC_M,IMC_R), (ISC_M,IMC_B), (ISC_M,IMC_B), (ISC_x,IMC_x), (ISC_M,IMC_L), (ISC_M,IMC_L), - /* 0D48 */ (ISC_M, IMC_L), (ISC_x, IMC_x), (ISC_M, IMC_LR), (ISC_M, IMC_LR), (ISC_M, IMC_LR), (ISC_V, IMC_T), (ISC_CPR, IMC_T), (ISC_x, IMC_x), + /* 0D48 */ (ISC_M,IMC_L), (ISC_x,IMC_x), (ISC_M,IMC_LR), (ISC_M,IMC_LR), (ISC_M,IMC_LR), (ISC_V,IMC_T),(ISC_CPR,IMC_T), (ISC_x,IMC_x), /* 0D50 */ (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_CD,IMC_x), (ISC_CD,IMC_x), (ISC_CD,IMC_x), (ISC_M,IMC_R), /* 0D58 */ (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_VI,IMC_x), /* 0D60 */ (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_M,IMC_B), (ISC_M,IMC_B), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), @@ -250,7 +250,7 @@ const TABLE: &[(SyllabicCategory, MatraCategory)] = &[ /* Sinhala */ - /* 0D80 */ (ISC_x, IMC_x), (ISC_Bi, IMC_T), (ISC_Bi, IMC_R), (ISC_Vs, IMC_R), (ISC_x, IMC_x), (ISC_VI, IMC_x), (ISC_VI, IMC_x), (ISC_VI, IMC_x), + /* 0D80 */ (ISC_x,IMC_x), (ISC_Bi,IMC_T), (ISC_Bi,IMC_R), (ISC_Vs,IMC_R), (ISC_x,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), /* 0D88 */ (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), /* 0D90 */ (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_x,IMC_x), /* 0D98 */ (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_C,IMC_x), @@ -275,7 +275,7 @@ const TABLE: &[(SyllabicCategory, MatraCategory)] = &[ /* 1020 */ (ISC_C,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), /* 1028 */ (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_M,IMC_R), (ISC_M,IMC_R), (ISC_M,IMC_T), (ISC_M,IMC_T), (ISC_M,IMC_B), /* 1030 */ (ISC_M,IMC_B), (ISC_M,IMC_L), (ISC_M,IMC_T), (ISC_M,IMC_T), (ISC_M,IMC_T), (ISC_M,IMC_T), (ISC_Bi,IMC_T), (ISC_TM,IMC_B), - /* 1038 */ (ISC_Vs, IMC_R), (ISC_IS, IMC_x), (ISC_PK, IMC_T), (ISC_CM, IMC_R), (ISC_CM, IMC_TBL), (ISC_CM, IMC_B), (ISC_CM, IMC_B), (ISC_C, IMC_x), + /* 1038 */ (ISC_Vs,IMC_R), (ISC_IS,IMC_x), (ISC_PK,IMC_T), (ISC_CM,IMC_R),(ISC_CM,IMC_TBL), (ISC_CM,IMC_B), (ISC_CM,IMC_B), (ISC_C,IMC_x), /* 1040 */ (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), /* 1048 */ (ISC_Nd,IMC_x), (ISC_Nd,IMC_x), (ISC_x,IMC_x), (ISC_CP,IMC_x), (ISC_x,IMC_x), (ISC_x,IMC_x), (ISC_CP,IMC_x), (ISC_x,IMC_x), /* 1050 */ (ISC_C,IMC_x), (ISC_C,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_VI,IMC_x), (ISC_M,IMC_R), (ISC_M,IMC_R), From c8db555f1275ad7b48b014f9fd7b25fe7e932e19 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl <47084093+LaurenzV@users.noreply.github.com> Date: Wed, 7 Feb 2024 16:26:10 +0100 Subject: [PATCH 17/17] Fix formatting --- src/complex/universal_table.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/complex/universal_table.rs b/src/complex/universal_table.rs index 6054e0ee..1e016491 100644 --- a/src/complex/universal_table.rs +++ b/src/complex/universal_table.rs @@ -103,7 +103,7 @@ const USE_TABLE: &[Category] = &[ /* 0C00 */ VMABV, VMPST, VMPST, VMPST, VMABV, B, B, B, B, B, B, B, B, O, B, B, /* 0C10 */ B, O, B, B, B, B, B, B, B, B, B, B, B, B, B, B, /* 0C20 */ B, B, B, B, B, B, B, B, B, O, B, B, B, B, B, B, - /* 0C30 */ B, B, B, B, B, B, B, B, B, B, O, O, CMBLW, B, VABV, VABV, + /* 0C30 */ B, B, B, B, B, B, B, B, B, B, O, O, CMBLW, B, VABV, VABV, /* 0C40 */ VABV, VPST, VPST, VPST, VPST, O, VABV, VABV, VABV, O, VABV, VABV, VABV, H, O, O, /* 0C50 */ O, O, O, O, O, VABV, VBLW, O, B, B, B, O, O, O, O, O, /* 0C60 */ B, B, VBLW, VBLW, O, O, B, B, B, B, B, B, B, B, B, B, @@ -173,13 +173,13 @@ const USE_TABLE: &[Category] = &[ /* Tagalog */ - /* 1700 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, - /* 1710 */ B, B, VABV, VBLW, VBLW, VPST, O, O, O, O, O, O, O, O, O, B, + /* 1700 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, + /* 1710 */ B, B, VABV, VBLW, VBLW, VPST, O, O, O, O, O, O, O, O, O, B, /* Hanunoo */ /* 1720 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, - /* 1730 */ B, B, VABV, VBLW, VPST, O, O, O, O, O, O, O, O, O, O, O, + /* 1730 */ B, B, VABV, VBLW, VPST, O, O, O, O, O, O, O, O, O, O, O, /* Buhid */ @@ -263,7 +263,7 @@ const USE_TABLE: &[Category] = &[ /* 1B10 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, /* 1B20 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, /* 1B30 */ B, B, B, B, CMABV, VPST, VABV, VABV, VBLW, VBLW, VBLW, VBLW, VABV, VABV, VPRE, VPRE, - /* 1B40 */ VPRE, VPRE, VABV, VABV, H, B, B, B, B, B, B, B, B, O, O, O, + /* 1B40 */ VPRE, VPRE, VABV, VABV, H, B, B, B, B, B, B, B, B, O, O, O, /* 1B50 */ B, B, B, B, B, B, B, B, B, B, O, GB, GB, O, O, GB, /* 1B60 */ O, S, GB, S, S, S, S, S, GB, S, S, SMABV, SMBLW, SMABV, SMABV, SMABV, /* 1B70 */ SMABV, SMABV, SMABV, SMABV, O, O, O, O, O, O, O, O, O, O, O, O, @@ -471,7 +471,7 @@ const USE_TABLE: &[Category] = &[ /* 11040 */ VBLW, VBLW, VABV, VABV, VABV, VABV, HVM, O, O, O, O, O, O, O, O, O, /* 11050 */ O, O, N, N, N, N, N, N, N, N, N, N, N, N, N, N, /* 11060 */ N, N, N, N, N, N, B, B, B, B, B, B, B, B, B, B, - /* 11070 */ VABV, B, B, VABV, VABV, B, O, O, O, O, O, O, O, O, O, HN, + /* 11070 */ VABV, B, B, VABV, VABV, B, O, O, O, O, O, O, O, O, O, HN, /* Kaithi */ @@ -479,7 +479,7 @@ const USE_TABLE: &[Category] = &[ /* 11090 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, /* 110A0 */ B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, /* 110B0 */ VPST, VPRE, VPST, VBLW, VBLW, VABV, VABV, VPST, VPST, H, CMBLW, O, O, O, O, O, - /* 110C0 */ O, O, VBLW, O, O, O, O, O, + /* 110C0 */ O, O, VBLW, O, O, O, O, O, /* Chakma */ @@ -600,7 +600,7 @@ const USE_TABLE: &[Category] = &[ /* 11710 */ B, B, B, B, B, B, B, B, B, B, B, O, O, MBLW, MPRE, MABV, /* 11720 */ VPST, VPST, VABV, VABV, VBLW, VBLW, VPRE, VABV, VBLW, VABV, VABV, VABV, O, O, O, O, /* 11730 */ B, B, B, B, B, B, B, B, B, B, B, B, O, O, O, O, - /* 11740 */ B, B, B, B, B, B, B, O, + /* 11740 */ B, B, B, B, B, B, B, O, /* Dogra */