Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Safe memoization during parse graph flattening #2136

Open
wants to merge 18 commits into
base: feat/error-recovery
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (c) 2024, NWO-I Centrum Wiskunde & Informatica (CWI)
* Copyright (c) 2024-2025, NWO-I Centrum Wiskunde & Informatica (CWI)
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
Expand All @@ -17,9 +17,7 @@ import lang::rascal::tests::concrete::recovery::RecoveryTestSupport;
import IO;
import ValueIO;
import util::Benchmark;
import util::SystemAPI;
import String;
import List;

void runTestC() { testRecoveryC(); }
void runTestDiff() { testRecoveryDiff(); }
Expand All @@ -37,6 +35,8 @@ FileStats testRecoveryJson() = testErrorRecovery(|std:///lang/json/syntax/JSON.r
FileStats testRecoveryPico() = testErrorRecovery(|std:///lang/pico/syntax/Main.rsc|, "Program", |std:///lang/pico/examples/fac.pico|);
FileStats testRecoveryRascal() = testErrorRecovery(|std:///lang/rascal/syntax/Rascal.rsc|, "Module", |std:///lang/rascal/vis/ImportGraph.rsc|);

FileStats testMemoBug() = testErrorRecovery(|std:///lang/rascal/syntax/Rascal.rsc|, "Module", |std:///lang/rascal/tests/concrete/PostParseFilter.rsc|);

void runLanguageTests() {
testRecoveryC();
testRecoveryDiff();
Expand All @@ -47,42 +47,59 @@ void runLanguageTests() {
testRecoveryRascal();
}

void runRascalBatchTest(int maxFiles=1000, int minFileSize=0, int maxFileSize=4000, int fromFile=0) {
void runRascalBatchTest(RecoveryTestConfig config) {
int startTime = realTime();

map[str,str] env = getSystemEnvironment();
loc statFile = "STATFILE" in env ? readTextValueString(#loc, env["STATFILE"]) : |unknown:///|;

println("Writing stats to <statFile>");

TestStats stats = batchRecoveryTest(|std:///lang/rascal/syntax/Rascal.rsc|, "Module", |std:///|, ".rsc", maxFiles, minFileSize, maxFileSize, fromFile, statFile);
TestStats stats = batchRecoveryTest(config);
int duration = realTime() - startTime;
println();
println("================================================================");
println("Rascal batch test done in <duration/1000> seconds, total result:");
printStats(stats);
}

// Usage: ErrorRecoveryBenchmark [\<max-files\> [\<min-file-size\> [\<max-file-size\> [\<from-file\>]]]]
// Usage: ErrorRecoveryBenchmark <base-loc> [<max-files> [<min-file-size> [<max-file-size> [<from-file>]]]]
int main(list[str] args) {
loc baseLoc = readTextValueString(#loc, args[0]);
int maxFiles = 1000;
int maxFileSize = 1000000;
int minFileSize = 0;
int fromFile = 0;
if (size(args) > 0) {
maxFiles = toInt(args[0]);
}
if (size(args) > 1) {
minFileSize = toInt(args[1]);
}
if (size(args) > 2) {
maxFileSize = toInt(args[2]);
}
if (size(args) > 3) {
fromFile = toInt(args[3]);
loc statFile = |tmp:///error-recovery-test.stats|; // |unknown:///| to disable stat writing
int memoVerificationTimeout = 0;
bool abortOnNoMemoTimeout = false;

for (str arg <- args) {
if (/<name:[^=]*>=<val:.*>/ := arg) {
switch (toLowerCase(name)) {
case "max-files": maxFiles = toInt(val);
case "max-file-size": maxFileSize = toInt(val);
case "min-file-size": minFileSize = toInt(val);
case "from-file": fromFile = toInt(val);
case "stat-file": statFile = readTextValueString(#loc, val);
case "memo-verification-timeout": memoVerificationTimeout = toInt(val);
}
println("arg: <arg>");
} else switch (toLowerCase(arg)) {
case "abort-on-no-memo-timeout": abortOnNoMemoTimeout = true;
}
}

runRascalBatchTest(maxFiles=maxFiles, minFileSize=minFileSize, maxFileSize=maxFileSize, fromFile=fromFile);
RecoveryTestConfig config = recoveryTestConfig(
syntaxFile=|std:///lang/rascal/syntax/Rascal.rsc|,
topSort="Module",
dir=baseLoc,
ext=".rsc",
maxFiles=maxFiles,
minFileSize=minFileSize,
maxFileSize=maxFileSize,
fromFile=fromFile,
statFile=statFile
);
runRascalBatchTest(config);

return 0;
}
}

int rascalSmokeTest() = main(["|std:///|", "max-files=3", "max-file-size=500"]);
int rascalStandardTest() = main(["|std:///|", "max-files=1000", "max-file-size=5120"]);
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
/**
* Copyright (c) 2025, NWO-I Centrum Wiskunde & Informatica (CWI)
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**/
module lang::rascal::tests::concrete::recovery::ParseBenchmark

import IO;
import String;
import ValueIO;
import ParseTree;
import Grammar;
import util::Benchmark;
import lang::rascal::\syntax::Rascal;
import lang::rascal::grammar::definition::Modules;

public data ParseBenchmarkConfig = parseBenchmarkConfig(
loc syntaxFile = |std:///lang/rascal/syntax/Rascal.rsc|,
str topSort = "Module",
str extension = ".rsc",
loc files = |unknown:///|,
loc statFile = |tmp:///parse-benchmark-stats.csv|,
int warmupIterations = 100,
int parseIterations = 10
);

list[loc] gatherFiles(loc dir, str ext) {
list[loc] files = [];
for (entry <- listEntries(dir)) {
loc file = dir + entry;
if (isFile(file)) {
if (endsWith(file.path, ext)) {
files += file;
}
} else if (isDirectory(file)) {
files += gatherFiles(file, ext);
}
}

return files;
}

private void warmupParser(&T (value input, loc origin) benchmarkParser, list[loc] files, int iterations) {
println("Warming up parser (<iterations>)");
for (int i <- [0..iterations]) {
loc file = files[i % size(files)];
try {
benchmarkParser(readFile(file), file);
} catch ParseError(_): {
println("Skipping warmup for file with parse errors: <file>");
}
}
}

private void runBenchmark(ParseBenchmarkConfig config, &T (value input, loc origin) benchmarkParser, loc file) {
println("Benchmarking <file>");
str content = readFile(file);
int contentSize = size(content);
int iterations = config.parseIterations;
int i = 0;
try {
int begin = realTime();
while (i<iterations) {
benchmarkParser(content, file);
i += 1;
}
int duration = realTime() - begin;

appendToFile(config.statFile, "<file>,<contentSize>,<duration>\n");
} catch ParseError(_): {
println("Ignoring file with parse errors: <file>");
}
}

private void runBenchmark(ParseBenchmarkConfig config, &T (value input, loc origin) benchmarkParser, list[loc] files) {
for (loc file <- files) {
runBenchmark(config, benchmarkParser, file);
}
}

private str syntaxLocToModuleName(loc syntaxFile) {
str path = replaceLast(substring(syntaxFile.path, 1), ".rsc", "");
return replaceAll(path, "/", "::");
}


private void benchmark(ParseBenchmarkConfig config) {
writeFile(config.statFile, "input,size,duration\n");
Module \module = parse(#start[Module], config.syntaxFile).top;
str modName = syntaxLocToModuleName(config.syntaxFile);
Grammar gram = modules2grammar(modName, {\module});

str topSort = config.topSort;
if (sym:\start(\sort(topSort)) <- gram.starts) {
type[value] begin = type(sym, gram.rules);
benchmarkParser = parser(begin);
list[loc] files = gatherFiles(config.files, config.extension);

warmupParser(benchmarkParser, files, config.warmupIterations);
runBenchmark(config, benchmarkParser, files);
} else {
throw "Cannot find top sort <topSort> in <gram>";
}

}

int main(list[str] args) {
ParseBenchmarkConfig config = parseBenchmarkConfig();

for (str arg <- args) {
if (/<name:[^=]*>=<val:.*>/ := arg) {
switch (toLowerCase(name)) {
case "syntax": config.syntaxFile = readTextValueString(#loc, val);
case "sort": config.topSort = val;
case "files": config.files = readTextValueString(#loc, val);
case "ext": config.extension = val;
case "stats": config.statFile = readTextValueString(#loc, val);
case "warmup": config.warmupIterations = toInt(val);
case "parses": config.parseIterations = toInt(val);
}
}
}

benchmark(config);

return 0;
}

public int benchmarkRascal() = main(["files=|std:///|"]);
Loading
Loading