From 21b1a82f0fb7f2f0c332b15e47f571b056f5463e Mon Sep 17 00:00:00 2001 From: Armanbqt Date: Fri, 10 Jan 2020 17:50:49 -0800 Subject: [PATCH 01/10] Fix for malformed utf8 sequence --- .../Integration/VectorBad.cs | 5 ++--- .../Internals/Text/UnicodeStream.cs | 22 +++++++++++++++++-- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/Amazon.IonDotnet.Tests/Integration/VectorBad.cs b/Amazon.IonDotnet.Tests/Integration/VectorBad.cs index 14a95586..8240e0ef 100644 --- a/Amazon.IonDotnet.Tests/Integration/VectorBad.cs +++ b/Amazon.IonDotnet.Tests/Integration/VectorBad.cs @@ -29,9 +29,8 @@ public class VectorBad { private static readonly HashSet Excludes = new HashSet { - "shortUtf8Sequence_1.ion", - "shortUtf8Sequence_2.ion", - "shortUtf8Sequence_3.ion" + // To exclude a test file of ion-test submodule from running, add the + // test file with its extension here. For example: "test.ion" }; private static readonly DirectoryInfo IonTestDir = DirStructure.IonTestDir(); diff --git a/Amazon.IonDotnet/Internals/Text/UnicodeStream.cs b/Amazon.IonDotnet/Internals/Text/UnicodeStream.cs index 66b3d665..ee3e8569 100644 --- a/Amazon.IonDotnet/Internals/Text/UnicodeStream.cs +++ b/Amazon.IonDotnet/Internals/Text/UnicodeStream.cs @@ -11,6 +11,7 @@ internal class UnicodeStream : TextStream { private readonly StreamReader _streamReader; private readonly Stack _unreadStack; + private long remainingChars; public UnicodeStream(Stream inputStream) : this(inputStream, Encoding.UTF8) { @@ -20,9 +21,9 @@ public UnicodeStream(Stream inputStream, Encoding encoding) { if (!inputStream.CanRead) throw new ArgumentException("Input stream must be readable", nameof(inputStream)); - _streamReader = new StreamReader(inputStream, encoding); _unreadStack = new Stack(); + remainingChars = inputStream.Length; } public UnicodeStream(Stream inputStream, Span readBytes) @@ -42,6 +43,7 @@ public UnicodeStream(Stream inputStream, Encoding encoding, Span readBytes throw new ArgumentException("Input stream must be readable", nameof(inputStream)); _streamReader = new StreamReader(inputStream, encoding); + remainingChars = inputStream.Length; if (inputStream.CanSeek) { InputStream.Seek(-readBytes.Length, SeekOrigin.Current); @@ -57,7 +59,15 @@ public UnicodeStream(Stream inputStream, Encoding encoding, Span readBytes public override int Read() { - return _unreadStack.Count > 0 ? _unreadStack.Pop() : _streamReader.Read(); + var value = _unreadStack.Count > 0 ? _unreadStack.Pop() : _streamReader.Read(); + remainingChars--; + + if (_streamReader.CurrentEncoding == Encoding.UTF8) + { + IsValidUTF8Character(); + } + + return value; } public override void Unread(int c) @@ -72,5 +82,13 @@ public override void Unread(int c) } private Stream InputStream => _streamReader.BaseStream; + + private void IsValidUTF8Character() + { + if (remainingChars > 0 && _streamReader.Peek() == -1) + { + throw new IonException("Input stream is not a valid UTF-8 stream."); + } + } } } From 5981a03f970b12c6f56ad9cfb794d97f1c09a3ff Mon Sep 17 00:00:00 2001 From: Armanbqt Date: Mon, 13 Jan 2020 10:33:00 -0800 Subject: [PATCH 02/10] Update excluded tests --- Amazon.IonDotnet.Tests/Integration/Vector.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Amazon.IonDotnet.Tests/Integration/Vector.cs b/Amazon.IonDotnet.Tests/Integration/Vector.cs index 578b49e9..af4cc551 100644 --- a/Amazon.IonDotnet.Tests/Integration/Vector.cs +++ b/Amazon.IonDotnet.Tests/Integration/Vector.cs @@ -41,8 +41,8 @@ public class Vector private static readonly HashSet Excludes = new HashSet { "subfieldVarInt.ion", - "localSymbolTableAppend.ion", - "clobNewlines.ion" + "whitespace.ion", + "localSymbolTableAppend.ion" }; private static readonly DirectoryInfo IonTestDir = DirStructure.IonTestDir(); From 4822661aeed7afcc6cc839aa53f987ca32e2c1b0 Mon Sep 17 00:00:00 2001 From: Armanbqt Date: Mon, 13 Jan 2020 10:38:46 -0800 Subject: [PATCH 03/10] Exclude the failing test --- Amazon.IonDotnet.Tests/Integration/Vector.cs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Amazon.IonDotnet.Tests/Integration/Vector.cs b/Amazon.IonDotnet.Tests/Integration/Vector.cs index af4cc551..54bf0899 100644 --- a/Amazon.IonDotnet.Tests/Integration/Vector.cs +++ b/Amazon.IonDotnet.Tests/Integration/Vector.cs @@ -42,7 +42,8 @@ public class Vector { "subfieldVarInt.ion", "whitespace.ion", - "localSymbolTableAppend.ion" + "localSymbolTableAppend.ion", + "stringUtf8.ion" }; private static readonly DirectoryInfo IonTestDir = DirStructure.IonTestDir(); From e915fda62c159bf6ee7ad3a83da873ddd5b17caf Mon Sep 17 00:00:00 2001 From: Armanbqt Date: Mon, 13 Jan 2020 10:33:00 -0800 Subject: [PATCH 04/10] Update excluded tests --- Amazon.IonDotnet.Tests/Integration/Vector.cs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Amazon.IonDotnet.Tests/Integration/Vector.cs b/Amazon.IonDotnet.Tests/Integration/Vector.cs index 54bf0899..af4cc551 100644 --- a/Amazon.IonDotnet.Tests/Integration/Vector.cs +++ b/Amazon.IonDotnet.Tests/Integration/Vector.cs @@ -42,8 +42,7 @@ public class Vector { "subfieldVarInt.ion", "whitespace.ion", - "localSymbolTableAppend.ion", - "stringUtf8.ion" + "localSymbolTableAppend.ion" }; private static readonly DirectoryInfo IonTestDir = DirStructure.IonTestDir(); From 729f364aab70fe58b8c0bebf2f81d0060a774885 Mon Sep 17 00:00:00 2001 From: Armanbqt Date: Mon, 13 Jan 2020 10:38:46 -0800 Subject: [PATCH 05/10] Exclude the failing test --- Amazon.IonDotnet.Tests/Integration/Vector.cs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Amazon.IonDotnet.Tests/Integration/Vector.cs b/Amazon.IonDotnet.Tests/Integration/Vector.cs index af4cc551..54bf0899 100644 --- a/Amazon.IonDotnet.Tests/Integration/Vector.cs +++ b/Amazon.IonDotnet.Tests/Integration/Vector.cs @@ -42,7 +42,8 @@ public class Vector { "subfieldVarInt.ion", "whitespace.ion", - "localSymbolTableAppend.ion" + "localSymbolTableAppend.ion", + "stringUtf8.ion" }; private static readonly DirectoryInfo IonTestDir = DirStructure.IonTestDir(); From 5dd3d6611c2f0a739e947c80a8c869ceb8c0af0d Mon Sep 17 00:00:00 2001 From: Armanbqt Date: Mon, 13 Jan 2020 10:33:00 -0800 Subject: [PATCH 06/10] Update excluded tests --- Amazon.IonDotnet.Tests/Integration/Vector.cs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Amazon.IonDotnet.Tests/Integration/Vector.cs b/Amazon.IonDotnet.Tests/Integration/Vector.cs index 54bf0899..af4cc551 100644 --- a/Amazon.IonDotnet.Tests/Integration/Vector.cs +++ b/Amazon.IonDotnet.Tests/Integration/Vector.cs @@ -42,8 +42,7 @@ public class Vector { "subfieldVarInt.ion", "whitespace.ion", - "localSymbolTableAppend.ion", - "stringUtf8.ion" + "localSymbolTableAppend.ion" }; private static readonly DirectoryInfo IonTestDir = DirStructure.IonTestDir(); From 83ae5573e0e97764e6f190be2f54663fee87cac4 Mon Sep 17 00:00:00 2001 From: Armanbqt Date: Mon, 13 Jan 2020 10:38:46 -0800 Subject: [PATCH 07/10] Exclude the failing test --- Amazon.IonDotnet.Tests/Integration/Vector.cs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Amazon.IonDotnet.Tests/Integration/Vector.cs b/Amazon.IonDotnet.Tests/Integration/Vector.cs index af4cc551..54bf0899 100644 --- a/Amazon.IonDotnet.Tests/Integration/Vector.cs +++ b/Amazon.IonDotnet.Tests/Integration/Vector.cs @@ -42,7 +42,8 @@ public class Vector { "subfieldVarInt.ion", "whitespace.ion", - "localSymbolTableAppend.ion" + "localSymbolTableAppend.ion", + "stringUtf8.ion" }; private static readonly DirectoryInfo IonTestDir = DirStructure.IonTestDir(); From 1d106c295a701e8b2ae46ef05ecee7ad65b45639 Mon Sep 17 00:00:00 2001 From: Armanbqt Date: Mon, 13 Jan 2020 10:33:00 -0800 Subject: [PATCH 08/10] Update excluded tests --- Amazon.IonDotnet.Tests/Integration/Vector.cs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Amazon.IonDotnet.Tests/Integration/Vector.cs b/Amazon.IonDotnet.Tests/Integration/Vector.cs index 54bf0899..af4cc551 100644 --- a/Amazon.IonDotnet.Tests/Integration/Vector.cs +++ b/Amazon.IonDotnet.Tests/Integration/Vector.cs @@ -42,8 +42,7 @@ public class Vector { "subfieldVarInt.ion", "whitespace.ion", - "localSymbolTableAppend.ion", - "stringUtf8.ion" + "localSymbolTableAppend.ion" }; private static readonly DirectoryInfo IonTestDir = DirStructure.IonTestDir(); From 145a38b7babcc58e2dd5f6de6c513c6c6cfbd486 Mon Sep 17 00:00:00 2001 From: Armanbqt Date: Mon, 13 Jan 2020 10:38:46 -0800 Subject: [PATCH 09/10] Exclude the failing test --- Amazon.IonDotnet.Tests/Integration/Vector.cs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Amazon.IonDotnet.Tests/Integration/Vector.cs b/Amazon.IonDotnet.Tests/Integration/Vector.cs index af4cc551..54bf0899 100644 --- a/Amazon.IonDotnet.Tests/Integration/Vector.cs +++ b/Amazon.IonDotnet.Tests/Integration/Vector.cs @@ -42,7 +42,8 @@ public class Vector { "subfieldVarInt.ion", "whitespace.ion", - "localSymbolTableAppend.ion" + "localSymbolTableAppend.ion", + "stringUtf8.ion" }; private static readonly DirectoryInfo IonTestDir = DirStructure.IonTestDir(); From aaee9683de1aaf6ab9bdf013f6e71af51d8192f4 Mon Sep 17 00:00:00 2001 From: Arman A Date: Mon, 30 Mar 2020 15:08:09 -0700 Subject: [PATCH 10/10] Check for implementations of Stream without support of Stream.Length --- Amazon.IonDotnet.Tests/Integration/Vector.cs | 3 +-- .../Internals/Text/UnicodeStream.cs | 22 +++++++++++-------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/Amazon.IonDotnet.Tests/Integration/Vector.cs b/Amazon.IonDotnet.Tests/Integration/Vector.cs index 54bf0899..578b49e9 100644 --- a/Amazon.IonDotnet.Tests/Integration/Vector.cs +++ b/Amazon.IonDotnet.Tests/Integration/Vector.cs @@ -41,9 +41,8 @@ public class Vector private static readonly HashSet Excludes = new HashSet { "subfieldVarInt.ion", - "whitespace.ion", "localSymbolTableAppend.ion", - "stringUtf8.ion" + "clobNewlines.ion" }; private static readonly DirectoryInfo IonTestDir = DirStructure.IonTestDir(); diff --git a/Amazon.IonDotnet/Internals/Text/UnicodeStream.cs b/Amazon.IonDotnet/Internals/Text/UnicodeStream.cs index ee3e8569..6c226c83 100644 --- a/Amazon.IonDotnet/Internals/Text/UnicodeStream.cs +++ b/Amazon.IonDotnet/Internals/Text/UnicodeStream.cs @@ -1,9 +1,7 @@ using System; using System.Collections.Generic; -using System.Diagnostics; using System.IO; using System.Text; -using Amazon.IonDotnet.Utils; namespace Amazon.IonDotnet.Internals.Text { @@ -11,7 +9,7 @@ internal class UnicodeStream : TextStream { private readonly StreamReader _streamReader; private readonly Stack _unreadStack; - private long remainingChars; + private long? remainingChars = null; public UnicodeStream(Stream inputStream) : this(inputStream, Encoding.UTF8) { @@ -23,7 +21,10 @@ public UnicodeStream(Stream inputStream, Encoding encoding) throw new ArgumentException("Input stream must be readable", nameof(inputStream)); _streamReader = new StreamReader(inputStream, encoding); _unreadStack = new Stack(); - remainingChars = inputStream.Length; + if (inputStream.CanSeek) + { + remainingChars = inputStream.Length; + } } public UnicodeStream(Stream inputStream, Span readBytes) @@ -43,9 +44,9 @@ public UnicodeStream(Stream inputStream, Encoding encoding, Span readBytes throw new ArgumentException("Input stream must be readable", nameof(inputStream)); _streamReader = new StreamReader(inputStream, encoding); - remainingChars = inputStream.Length; if (inputStream.CanSeek) { + remainingChars = inputStream.Length; InputStream.Seek(-readBytes.Length, SeekOrigin.Current); return; } @@ -60,11 +61,14 @@ public UnicodeStream(Stream inputStream, Encoding encoding, Span readBytes public override int Read() { var value = _unreadStack.Count > 0 ? _unreadStack.Pop() : _streamReader.Read(); - remainingChars--; - - if (_streamReader.CurrentEncoding == Encoding.UTF8) + + if (remainingChars.HasValue) { - IsValidUTF8Character(); + remainingChars--; + if (_streamReader.CurrentEncoding == Encoding.UTF8) + { + IsValidUTF8Character(); + } } return value;