From 2f2d7cf8a8389d714be771ac06e83fe570c894b8 Mon Sep 17 00:00:00 2001 From: Tom Maenan Read Cutting Date: Sat, 27 Aug 2022 09:59:05 +0100 Subject: [PATCH] Fix carriage return crash in multiline string Follow the guidance of #38: > However CR directly before NL is interpreted as only a newline and not part of the multiline string. zig fmt will delete the CR. Zig fmt already had code for deleting carriage returns, but would still crash - now it no longer does so. Carriage returns encountered before line-feeds are now appropriately removed on program compilation as well. --- doc/langref.html.in | 3 ++- lib/std/zig/tokenizer.zig | 2 +- src/AstGen.zig | 26 ++++++++++++++++++++++---- 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/doc/langref.html.in b/doc/langref.html.in index 162ba447003b..13c4c4f39651 100644 --- a/doc/langref.html.in +++ b/doc/langref.html.in @@ -11708,7 +11708,8 @@ fn readU32Be() u32 {}

Each LF may be immediately preceded by a single CR (byte value 0x0d, code point U+000d, {#syntax#}'\r'{#endsyntax#}) - to form a Windows style line ending, but this is discouraged. + to form a Windows style line ending, but this is discouraged. Note that in mulitline strings, CRLF sequences will + be encoded as LF when compiled into a zig program. A CR in any other context is not allowed.

diff --git a/lib/std/zig/tokenizer.zig b/lib/std/zig/tokenizer.zig index f43aa9d79336..9718ebc3cadb 100644 --- a/lib/std/zig/tokenizer.zig +++ b/lib/std/zig/tokenizer.zig @@ -935,7 +935,7 @@ pub const Tokenizer = struct { self.index += 1; break; }, - '\t' => {}, + '\t', '\r' => {}, else => self.checkLiteralCharacter(), }, diff --git a/src/AstGen.zig b/src/AstGen.zig index 1502b970178a..1687e2a7a79f 100644 --- a/src/AstGen.zig +++ b/src/AstGen.zig @@ -2227,7 +2227,7 @@ fn blockExprStmts(gz: *GenZir, parent_scope: *Scope, statements: []const Ast.Nod .assign_add_wrap => try assignOp(gz, scope, statement, .addwrap), .assign_mul => try assignOp(gz, scope, statement, .mul), .assign_mul_wrap => try assignOp(gz, scope, statement, .mulwrap), - + .grouped_expression => { inner_node = node_data[statement].lhs; continue; @@ -9977,16 +9977,34 @@ fn strLitNodeAsString(astgen: *AstGen, node: Ast.Node.Index) !IndexSlice { { const slice = tree.tokenSlice(tok_i); const line_bytes = slice[2 .. slice.len - 1]; - try string_bytes.appendSlice(gpa, line_bytes); + const carriage_return_count = mem.count(u8, line_bytes, "\r"); + if (carriage_return_count > 0) { + try string_bytes.ensureUnusedCapacity(gpa, line_bytes.len - carriage_return_count); + for (line_bytes) |line_byte| { + if (line_byte == '\r') continue; + string_bytes.appendAssumeCapacity(line_byte); + } + } else { + try string_bytes.appendSlice(gpa, line_bytes); + } tok_i += 1; } // Following lines: each line prepends a newline. while (tok_i <= end) : (tok_i += 1) { const slice = tree.tokenSlice(tok_i); const line_bytes = slice[2 .. slice.len - 1]; - try string_bytes.ensureUnusedCapacity(gpa, line_bytes.len + 1); + + const carriage_return_count = mem.count(u8, line_bytes, "\r"); + try string_bytes.ensureUnusedCapacity(gpa, line_bytes.len - carriage_return_count + 1); string_bytes.appendAssumeCapacity('\n'); - string_bytes.appendSliceAssumeCapacity(line_bytes); + if (carriage_return_count > 0) { + for (line_bytes) |line_byte| { + if (line_byte == '\r') continue; + string_bytes.appendAssumeCapacity(line_byte); + } + } else { + string_bytes.appendSliceAssumeCapacity(line_bytes); + } } const len = string_bytes.items.len - str_index; try string_bytes.append(gpa, 0);