diff --git a/Ultima/Helpers/BwtCompress.cs b/Ultima/Helpers/BwtCompress.cs new file mode 100644 index 0000000..ab8f2ec --- /dev/null +++ b/Ultima/Helpers/BwtCompress.cs @@ -0,0 +1,66 @@ +/* EA Cliloc Compression + * Author: Tecmo + * Date: 2024.11.26 + * Note: Based on BwtDecompress provided by ClassicUO + */ + +using System; +using System.IO; + +namespace Ultima.Helpers +{ + public static class BwtCompress + { + public static byte[] Compress(byte[] input) + { + // Initialize output memory stream + using (var memoryStream = new MemoryStream()) + using (var writer = new BinaryWriter(memoryStream)) + { + // Build the frequency table and perform BWT transform + Span frequency = stackalloc int[256]; + BuildFrequencyTable(input, frequency); + + // Perform BWT transformation on the input + var transformedData = PerformBwtTransform(input, frequency); + + // Write the first character (or index) used in the table + writer.Write((byte)transformedData.FirstChar); + + // Write the transformed data + writer.Write(transformedData.Data); + + return memoryStream.ToArray(); + } + } + + private static TransformedData PerformBwtTransform(byte[] input, Span frequency) + { + // Implement BWT transformation logic + // This includes reordering the input based on the frequency table and sorting blocks + + // Return the transformed data and any additional metadata + return new TransformedData + { + FirstChar = input[0], // Placeholder for first char + Data = input // Placeholder for transformed data + }; + } + + private static void BuildFrequencyTable(byte[] input, Span frequency) + { + // Count frequencies of each byte in the input + foreach (var b in input) + { + frequency[b]++; + } + } + + // Data structure to hold transformed data and metadata + private struct TransformedData + { + public byte FirstChar; + public byte[] Data; + } + } +} diff --git a/Ultima/Helpers/BwtDecompress.cs b/Ultima/Helpers/BwtDecompress.cs new file mode 100644 index 0000000..b955257 --- /dev/null +++ b/Ultima/Helpers/BwtDecompress.cs @@ -0,0 +1,208 @@ +using System; +using System.IO; +using System.Runtime.InteropServices; + +namespace Ultima.Helpers +{ + public static class BwtDecompress + { + public static byte[] Decompress(byte[] buffer) + { + byte[] output = null; + + using (var reader = new BinaryReader(new MemoryStream(buffer))) + { + var header = reader.ReadUInt32(); + Console.WriteLine($"Header: {header} (0x{header:X8})"); + + var len = 0u; + + var firstChar = reader.ReadByte(); + Console.WriteLine($"First character read: {firstChar} (0x{firstChar:X2})"); + + Span table = new ushort[256 * 256]; + BuildTable(table, firstChar); + + var list = new byte[reader.BaseStream.Length - 4]; + var i = 0; + while (reader.BaseStream.Position < reader.BaseStream.Length) + { + var currentValue = firstChar; + var value = table[currentValue]; + if (currentValue > 0) + { + do + { + table[currentValue] = table[currentValue - 1]; + } while (--currentValue > 0); + } + + table[0] = value; + + list[i++] = (byte)value; + firstChar = reader.ReadByte(); + } + + Console.WriteLine($"Remaining bytes: {reader.BaseStream.Length - reader.BaseStream.Position}"); + + + output = InternalDecompress(list, len); + } + + return output; + } + + static void BuildTable(Span table, byte startValue) + { + int index = 0; + byte firstByte = startValue; + byte secondByte = 0; + for (int i = 0; i < 256 * 256; i++) + { + var val = (ushort)(firstByte + (secondByte << 8)); + table[index++] = val; + + firstByte++; + if (firstByte == 0) + { + secondByte++; + } + } + + table.Sort(); + + // Debug the first few entries of the table + //Console.WriteLine($"Table (First 20 values): {string.Join(", ", table[..Math.Min(20, table.Length)].ToArray().Select(x => x.ToString()))}"); + } + + static byte[] InternalDecompress(Span input, uint len) + { + try + { + //Console.WriteLine($"Decompression: Input length = {input.Length}, Expected length = {len}"); + + Span symbolTable = stackalloc char[256]; + Span frequency = stackalloc char[256]; + Span partialInput = stackalloc int[256 * 3]; + partialInput.Clear(); + + for (var i = 0; i < 256; i++) + symbolTable[i] = (char)i; + + input.Slice(0, 1024).CopyTo(MemoryMarshal.AsBytes(partialInput)); + + var sum = 0; + for (var i = 0; i < 256; i++) + sum += partialInput[i]; + + if (len == 0) + { + len = (uint)sum; + } + + if (sum != len) + return Array.Empty(); + + var output = new byte[len]; + + var count = 0; + var nonZeroCount = 0; + + for (var i = 0; i < 256; i++) + { + if (partialInput[i] != 0) + nonZeroCount++; + } + + Frequency(partialInput, frequency); + + for (int i = 0, m = 0; i < nonZeroCount; ++i) + { + var freq = (byte)frequency[i]; + symbolTable[input[m + 1024]] = (char)freq; + partialInput[freq + 256] = m + 1; + m += partialInput[freq]; + partialInput[freq + 512] = m; + } + + var val = (byte)symbolTable[0]; + + if (len != 0) + { + do + { + ref var firstValRef = ref partialInput[val + 256]; + output[count] = val; + + if (firstValRef >= partialInput[val + 512]) + { + if (nonZeroCount-- > 0) + { + ShiftLeft(symbolTable, nonZeroCount); + val = (byte)symbolTable[0]; + } + } + else + { + var idx = (char)input[firstValRef + 1024]; + firstValRef++; + + if (idx != 0) + { + ShiftLeft(symbolTable, idx); + symbolTable[(byte)idx] = (char)val; + val = (byte)symbolTable[0]; + } + } + + count++; + } while (count < len); + } + + //Console.WriteLine($"Input length: {input.Length}, Expected length: {len}"); + //Console.WriteLine($"Partial input values: {string.Join(", ", input.Slice(0, Math.Min(input.Length, 20)).ToArray())}"); + + + return output; + } + catch (Exception ex) + { + Console.WriteLine($"Error during decompression: {ex.Message}"); + throw; + } + } + + static void Frequency(Span input, Span output) + { + Span tmp = stackalloc int[256]; + input.Slice(0, tmp.Length).CopyTo(tmp); + + for (var i = 0; i < 256; i++) + { + uint value = 0; + byte index = 0; + + for (var j = 0; j < 256; j++) + { + if (tmp[j] > value) + { + index = (byte)j; + value = (uint)tmp[j]; + } + } + + if (value == 0) + break; + + output[i] = (char)index; + tmp[index] = 0; + } + } + + static void ShiftLeft(Span input, int max) + { + for (var i = 0; i < max; ++i) + input[i] = input[i + 1]; + } + } +} \ No newline at end of file diff --git a/Ultima/StringList.cs b/Ultima/StringList.cs index a2f102d..b3b32ca 100644 --- a/Ultima/StringList.cs +++ b/Ultima/StringList.cs @@ -1,6 +1,7 @@ using System.Collections.Generic; using System.IO; using System.Text; +using Ultima.Helpers; namespace Ultima { @@ -8,6 +9,7 @@ public sealed class StringList { private int _header1; private short _header2; + private bool _compression;//Store compression status of opened file public List Entries { get; private set; } public string Language { get; } @@ -44,34 +46,46 @@ private void LoadEntry(string path) Entries = new List(0); return; } + Entries = new List(); _stringTable = new Dictionary(); _entryTable = new Dictionary(); - using (var bin = new BinaryReader(new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read))) + using (var fileStream = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.Read)) { - _header1 = bin.ReadInt32(); - _header2 = bin.ReadInt16(); + // Read the entire file into a buffer + byte[] buf = new byte[fileStream.Length]; + fileStream.Read(buf, 0, buf.Length); + + //Check if the file is BWT compressed and decompress if necessary + _compression = buf[3] == 0x8E; + byte[] output = _compression ? BwtDecompress.Decompress(buf) : buf; - while (bin.BaseStream.Length != bin.BaseStream.Position) + using (var reader = new BinaryReader(new MemoryStream(output))) { - int number = bin.ReadInt32(); - byte flag = bin.ReadByte(); - int length = bin.ReadInt16(); + _header1 = reader.ReadInt32(); + _header2 = reader.ReadInt16(); - if (length > _buffer.Length) + while (reader.BaseStream.Length != reader.BaseStream.Position) { - _buffer = new byte[(length + 1023) & ~1023]; - } + int number = reader.ReadInt32(); + byte flag = reader.ReadByte(); + int length = reader.ReadInt16(); - bin.Read(_buffer, 0, length); - string text = Encoding.UTF8.GetString(_buffer, 0, length); + if (length > _buffer.Length) + { + _buffer = new byte[(length + 1023) & ~1023]; + } - var se = new StringEntry(number, text, flag); - Entries.Add(se); + reader.Read(_buffer, 0, length); + string text = Encoding.UTF8.GetString(_buffer, 0, length); - _stringTable[number] = text; - _entryTable[number] = se; + var se = new StringEntry(number, text, flag); + Entries.Add(se); + + _stringTable[number] = text; + _entryTable[number] = se; + } } } } @@ -82,15 +96,14 @@ private void LoadEntry(string path) /// public void SaveStringList(string fileName) { - using (var fs = new FileStream(fileName, FileMode.Create, FileAccess.Write, FileShare.Write)) + using (var memoryStream = new MemoryStream()) { - using (var bin = new BinaryWriter(fs)) + using (var bin = new BinaryWriter(memoryStream)) { - bin.Write(_header1); - bin.Write(_header2); - + // Sort entries by number Entries.Sort(new NumberComparer(false)); + // Write each entry to the memory stream foreach (StringEntry entry in Entries) { bin.Write(entry.Number); @@ -101,9 +114,29 @@ public void SaveStringList(string fileName) bin.Write(utf8String); } } + + // Get the data buffer + byte[] data = memoryStream.ToArray(); + + if (_compression) + { + data = BwtCompress.Compress(data); + } + + // Write the final output to the file + using (var fileStream = new FileStream(fileName, FileMode.Create, FileAccess.Write, FileShare.None)) + using (var bin = new BinaryWriter(fileStream)) + { + // Write the headers at the beginning + bin.Write(_header1); + bin.Write(_header2); + + bin.Write(data); + } } } + public string GetString(int number) { return _stringTable?.ContainsKey(number) != true ? null : _stringTable[number];