Skip to content

Commit

Permalink
Fix carriage return crash in multiline string
Browse files Browse the repository at this point in the history
Follow the guidance of #38:

> However CR directly before NL is interpreted as only a newline and not part of the multiline string. zig fmt will delete the CR.

Zig fmt already had code for deleting carriage returns, but would still
crash - now it no longer does so. Carriage returns encountered before
line-feeds are now appropriately removed on program compilation as well.
  • Loading branch information
moosichu committed Sep 5, 2022
1 parent 533901a commit 2f2d7cf
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 6 deletions.
3 changes: 2 additions & 1 deletion doc/langref.html.in
Original file line number Diff line number Diff line change
Expand Up @@ -11708,7 +11708,8 @@ fn readU32Be() u32 {}
</p>
<p>
Each LF may be immediately preceded by a single CR (byte value 0x0d, code point U+000d, {#syntax#}'\r'{#endsyntax#})
to form a Windows style line ending, but this is discouraged.
to form a Windows style line ending, but this is discouraged. Note that in mulitline strings, CRLF sequences will
be encoded as LF when compiled into a zig program.
A CR in any other context is not allowed.
</p>
<p>
Expand Down
2 changes: 1 addition & 1 deletion lib/std/zig/tokenizer.zig
Original file line number Diff line number Diff line change
Expand Up @@ -935,7 +935,7 @@ pub const Tokenizer = struct {
self.index += 1;
break;
},
'\t' => {},
'\t', '\r' => {},
else => self.checkLiteralCharacter(),
},

Expand Down
26 changes: 22 additions & 4 deletions src/AstGen.zig
Original file line number Diff line number Diff line change
Expand Up @@ -2227,7 +2227,7 @@ fn blockExprStmts(gz: *GenZir, parent_scope: *Scope, statements: []const Ast.Nod
.assign_add_wrap => try assignOp(gz, scope, statement, .addwrap),
.assign_mul => try assignOp(gz, scope, statement, .mul),
.assign_mul_wrap => try assignOp(gz, scope, statement, .mulwrap),

.grouped_expression => {
inner_node = node_data[statement].lhs;
continue;
Expand Down Expand Up @@ -9977,16 +9977,34 @@ fn strLitNodeAsString(astgen: *AstGen, node: Ast.Node.Index) !IndexSlice {
{
const slice = tree.tokenSlice(tok_i);
const line_bytes = slice[2 .. slice.len - 1];
try string_bytes.appendSlice(gpa, line_bytes);
const carriage_return_count = mem.count(u8, line_bytes, "\r");
if (carriage_return_count > 0) {
try string_bytes.ensureUnusedCapacity(gpa, line_bytes.len - carriage_return_count);
for (line_bytes) |line_byte| {
if (line_byte == '\r') continue;
string_bytes.appendAssumeCapacity(line_byte);
}
} else {
try string_bytes.appendSlice(gpa, line_bytes);
}
tok_i += 1;
}
// Following lines: each line prepends a newline.
while (tok_i <= end) : (tok_i += 1) {
const slice = tree.tokenSlice(tok_i);
const line_bytes = slice[2 .. slice.len - 1];
try string_bytes.ensureUnusedCapacity(gpa, line_bytes.len + 1);

const carriage_return_count = mem.count(u8, line_bytes, "\r");
try string_bytes.ensureUnusedCapacity(gpa, line_bytes.len - carriage_return_count + 1);
string_bytes.appendAssumeCapacity('\n');
string_bytes.appendSliceAssumeCapacity(line_bytes);
if (carriage_return_count > 0) {
for (line_bytes) |line_byte| {
if (line_byte == '\r') continue;
string_bytes.appendAssumeCapacity(line_byte);
}
} else {
string_bytes.appendSliceAssumeCapacity(line_bytes);
}
}
const len = string_bytes.items.len - str_index;
try string_bytes.append(gpa, 0);
Expand Down

0 comments on commit 2f2d7cf

Please sign in to comment.