feat: Verification logger (TRX only) (#1552)

This change adds a new `/verificationLogger` option, which enables logging verification results in convenient formats for further analysis. It is meant to imitate the `--logger` option that can be passed to `dotnet test` to choose a logger for test results, and the implementation reuses the same test results model and logger interface. The motivation behind this is to provide two end-user benefits: 1. Enable reporting verification results in a easy-to-read format, especially in CI for Dafny code bases, through existing tools that support common test result formats (e.g. https://github.com/dorny/test-reporter#supported-formats) 2. Recording the verification time needed for each procedure, to enable identifying hot spots. We have observed that proof conditions that take a long time to verify are also prone to timing out or failing to verify when Dafny code is changed over time, and once this PR is merged I will add a wiki page calling attention to this and how to use this new option to pay attention to verification time in the development process. The only currently supported `/verificationLogger` option is `trx`, which produces the widely-supported VSTest TRX file format. The mapping of verification results to "test results" is experimental and still subject to improvement, but here is a sample of what it looks like: ``` <?xml version="1.0" encoding="utf-8"?> <TestRun id="782a53bd-962c-460a-9c1e-347392c7cfea" name="@a483e79acc1e 2021-11-02 21:18:24" xmlns="http://microsoft.com/schemas/VisualStudio/TeamTest/2010"> <Times creation="2021-11-02T21:18:24.7568110-07:00" queuing="2021-11-02T21:18:24.7568110-07:00" start="2021-11-02T21:18:24.7373890-07:00" finish="2021-11-02T21:18:24.7872770-07:00" /> <TestSettings name="default" id="28f50e45-c211-49b3-9464-1878b090a068"> <Deployment runDeploymentRoot="_a483e79acc1e_2021-11-02_21_18_24" /> </TestSettings> <Results> <UnitTestResult executionId="284b9663-40ec-46bd-b5d7-a3b92cf7fdac" testId="b346bd77-ae8b-e18e-94e7-4711e20a10b8" testName="Impl$$_module.__default.Same2" computerName="a483e79acc1e" duration="00:00:00.1680000" startTime="2021-11-02T21:18:24.0000000-07:00" endTime="2021-11-02T21:18:24.0000000-07:00" testType="13cdc9d9-ddb5-4fa4-a97d-d965ccfc6d4b" outcome="Passed" testListId="8c84fa94-04c1-424b-9868-57a2d4851a1d" relativeResultsDirectory="284b9663-40ec-46bd-b5d7-a3b92cf7fdac" /> <UnitTestResult executionId="309ab74b-92b6-4963-a346-681f26f11ed4" testId="72f213c0-00a3-454b-d21a-61094039a2b0" testName="CheckWellformed$$_module.__default.IRP__Alt" computerName="a483e79acc1e" duration="00:00:00.0870000" startTime="2021-11-02T21:18:23.0000000-07:00" endTime="2021-11-02T21:18:23.0000000-07:00" testType="13cdc9d9-ddb5-4fa4-a97d-d965ccfc6d4b" outcome="Passed" testListId="8c84fa94-04c1-424b-9868-57a2d4851a1d" relativeResultsDirectory="309ab74b-92b6-4963-a346-681f26f11ed4" /> ... </Results> <TestDefinitions> <UnitTest name="Impl$$_module.__default.Prefix" storage="test/verifythis2015/problem1.dfy" id="20f05010-f9dc-e8d1-a417-9bb05176768a"> <Execution id="6a99bc61-e035-4f86-a20b-99b03318997e" /> <TestMethod codeBase="Test/VerifyThis2015/Problem1.dfy" adapterTypeName="executor://dafnyverifier/v1" className="Impl$$_module.__default" name="Prefix" /> </UnitTest> <UnitTest name="Impl$$_module.__default.Same1" storage="test/verifythis2015/problem1.dfy" id="6fb25bcd-2493-278f-b561-850c81ed8687"> <Execution id="bc130fd9-dbbd-4b83-84d3-fd6592ed1049" /> <TestMethod codeBase="Test/VerifyThis2015/Problem1.dfy" adapterTypeName="executor://dafnyverifier/v1" className="Impl$$_module.__default" name="Same1" /> </UnitTest> ... </TestDefinitions> <TestEntries> <TestEntry testId="b346bd77-ae8b-e18e-94e7-4711e20a10b8" executionId="284b9663-40ec-46bd-b5d7-a3b92cf7fdac" testListId="8c84fa94-04c1-424b-9868-57a2d4851a1d" /> <TestEntry testId="72f213c0-00a3-454b-d21a-61094039a2b0" executionId="309ab74b-92b6-4963-a346-681f26f11ed4" testListId="8c84fa94-04c1-424b-9868-57a2d4851a1d" /> ... </TestEntries> <TestLists> <TestList name="Results Not in a List" id="8c84fa94-04c1-424b-9868-57a2d4851a1d" /> <TestList name="All Loaded Results" id="19431567-8539-422a-85d7-44ee4e166bda" /> </TestLists> <ResultSummary outcome="Completed"> <Counters total="18" executed="18" passed="18" failed="0" error="0" timeout="0" aborted="0" inconclusive="0" passedButRunAborted="0" notRunnable="0" notExecuted="0" disconnected="0" warning="0" completed="0" inProgress="0" pending="0" /> </ResultSummary> </TestRun> ``` Note in particular the `duration` field on `UnitTestResult` nodes. Disappointingly, most tools that handle TRX files do not seem to support sorting by duration, which is how I'd expect users to identify hot spots. I will offer suggestions in the wiki for how to extract this data. Known issues: * I still need to add an integration test to sanity check this option is working, but need to find a portable way to search for a substring in an output TRX file. We weren't using OutputCheck when we were on lit and the xUnit test runner intentionally doesn't support it.
dafny-lang · Nov 4, 2021 · 71d8782 · 71d8782
1 parent 13660ea
commit 71d8782
Show file tree

Hide file tree

Showing 5 changed files with 586 additions and 6 deletions.
diff --git a/Source/Dafny/DafnyOptions.cs b/Source/Dafny/DafnyOptions.cs
@@ -103,6 +103,9 @@ public enum IncludesModes { None, Immediate, Transitive }
     public bool WarningsAsErrors = false;
     [CanBeNull] private TestGenerationOptions testGenOptions = null;
     public bool ExtractCounterexample = false;
+    public string VerificationLoggerConfig = null;
+    // Working around the fact that xmlFilename is private
+    public string BoogieXmlFilename = null;
 
     public virtual TestGenerationOptions TestGenOptions =>
       testGenOptions ??= new TestGenerationOptions();
@@ -134,7 +137,7 @@ protected override bool ParseOption(string name, Bpl.CommandLineOptionEngine.Com
             } else if (args[ps.i].Equals("DllEmbed")) {
               PrintMode = PrintModes.DllEmbed;
             } else {
-              throw new Exception("Invalid value for printMode");
+              InvalidArgumentError(name, ps);
             }
           }
           return true;
@@ -176,7 +179,7 @@ protected override bool ParseOption(string name, Bpl.CommandLineOptionEngine.Com
             } else if (args[ps.i].Equals("php")) {
               CompileTarget = CompilationTarget.Php;
             } else {
-              throw new Exception("Invalid value for compileTarget");
+              InvalidArgumentError(name, ps);
             }
           }
           return true;
@@ -385,7 +388,7 @@ protected override bool ParseOption(string name, Bpl.CommandLineOptionEngine.Com
             } else if (args[ps.i].Equals("Transitive")) {
               PrintIncludesMode = IncludesModes.Transitive;
             } else {
-              throw new Exception("Invalid value for includesMode");
+              InvalidArgumentError(name, ps);
             }
 
             if (PrintIncludesMode == IncludesModes.Immediate || PrintIncludesMode == IncludesModes.Transitive) {
@@ -407,7 +410,7 @@ protected override bool ParseOption(string name, Bpl.CommandLineOptionEngine.Com
               } else if (args[ps.i].Equals("1")) {
                 ShowSnippets = true;
               } else {
-                throw new Exception("Invalid value for showSnippets");
+                InvalidArgumentError(name, ps);
               }
             }
             return true;
@@ -420,15 +423,37 @@ protected override bool ParseOption(string name, Bpl.CommandLineOptionEngine.Com
         case "extractCounterexample":
           ExtractCounterexample = true;
           return true;
+
+        case "verificationLogger":
+          if (ps.ConfirmArgumentCount(1)) {
+            if (args[ps.i] == "trx") {
+              VerificationLoggerConfig = args[ps.i];
+            } else {
+              InvalidArgumentError(name, ps);
+            }
+          }
+          return true;
       }
 
       // Unless this is an option for test generation, defer to superclass
       return TestGenOptions.ParseOption(name, ps) || base.ParseOption(name, ps);
     }
 
+    protected void InvalidArgumentError(string name, CommandLineParseState ps) {
+      ps.Error("Invalid argument \"{0}\" to option {1}", ps.args[ps.i], name);
+    }
+
     public override void ApplyDefaultOptions() {
       base.ApplyDefaultOptions();
 
+      if (VerificationLoggerConfig != null) {
+        if (XmlSink != null) {
+          throw new Exception("The /verificationLogger and /xml options cannot be used at the same time.");
+        }
+        BoogieXmlFilename = Path.GetTempFileName();
+        XmlSink = new Bpl.XmlSink(BoogieXmlFilename);
+      }
+
       // expand macros in filenames, now that LogPrefix is fully determined
       ExpandFilename(ref DafnyPrelude, LogPrefix, FileTimestamp);
       ExpandFilename(ref DafnyPrintFile, LogPrefix, FileTimestamp);
@@ -876,6 +901,12 @@ Treat warnings as errors.
     If verification fails, report a detailed counterexample for the first
     failing assertion. Requires specifying the /mv option as well as
     /proverOpt:0:model_compress=false and /proverOpt:0:model.completion=true.
+/verificationLogger:<configuration string>
+    Logs verification results to the given test result logger.
+    The only currently supported value is ""trx"", the XML-based format
+    commonly used for test results for .NET languages.
+    The exact mapping of verification concepts to the TRX format is
+    experimental and subject to change!
 {TestGenOptions.Help}
 
 Dafny generally accepts Boogie options and passes these on to Boogie. However,

diff --git a/Source/DafnyDriver/BoogieXmlConvertor.cs b/Source/DafnyDriver/BoogieXmlConvertor.cs
@@ -0,0 +1,106 @@
+//-----------------------------------------------------------------------------
+//
+// Copyright (C) Microsoft Corporation.  All Rights Reserved.
+// Copyright by the contributors to the Dafny Project
+// SPDX-License-Identifier: MIT
+//
+//-----------------------------------------------------------------------------
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Xml.Linq;
+using Microsoft.VisualStudio.TestPlatform.Extensions.TrxLogger;
+using Microsoft.VisualStudio.TestPlatform.ObjectModel;
+using Microsoft.VisualStudio.TestPlatform.ObjectModel.Client;
+using Microsoft.VisualStudio.TestPlatform.ObjectModel.Logging;
+
+namespace Microsoft.Dafny {
+
+  /// <summary>
+  /// Utility to parse the XML format produced by Boogie's /xml option and emit the
+  /// results therein as test logger events, allowing us to deliver verification results
+  /// as test results through common loggers on the .NET platform. For now we are just supporting
+  /// outputting TRX files, which can be understood and visualized by various tools.
+  /// </summary>
+  public static class BoogieXmlConvertor {
+
+    public static void RaiseTestLoggerEvents(string fileName, string loggerConfig) {
+      // The only supported value for now
+      if (loggerConfig != "trx") {
+        throw new ArgumentException($"Unsupported verification logger config: {loggerConfig}");
+      }
+
+      var events = new LocalTestLoggerEvents();
+      var logger = new TrxLogger();
+      // Provide just enough configuration for the TRX logger to work
+      logger.Initialize(events, new Dictionary<string, string> {
+        ["TestRunDirectory"] = Constants.DefaultResultsDirectory
+      });
+      events.EnableEvents();
+
+      // Sort failures to the top, and then slower procedures first.
+      // Loggers may not maintain this ordering unfortunately.
+      var results = ReadTestResults(fileName)
+        .OrderBy(r => r.Outcome == TestOutcome.Passed)
+        .ThenByDescending(r => r.Duration);
+      foreach (var result in results) {
+        events.RaiseTestResult(new TestResultEventArgs(result));
+      }
+
+      events.RaiseTestRunComplete(new TestRunCompleteEventArgs(
+        new TestRunStatistics(),
+        false, false, null, null, new TimeSpan()
+      ));
+    }
+
+    public static IEnumerable<TestResult> ReadTestResults(string xmlFileName) {
+      string currentFileFragment = null;
+      var root = XElement.Load(xmlFileName);
+      var testResults = new List<TestResult>();
+      foreach (var child in root.Nodes().OfType<XElement>()) {
+        switch (child.Name.LocalName) {
+          case "fileFragment":
+            currentFileFragment = child.Attribute("name")!.Value;
+            break;
+          case "method":
+            testResults.Add(ToTestResult(child, currentFileFragment));
+            break;
+        }
+      }
+
+      return testResults;
+    }
+
+    private static TestResult ToTestResult(XElement node, string currentFileFragment) {
+      var name = node.Attribute("name")!.Value;
+      var startTime = node.Attribute("startTime")!.Value;
+      var conclusionNode = node.Nodes()
+                                       .OfType<XElement>()
+                                       .Single(n => n.Name.LocalName == "conclusion");
+      var endTime = conclusionNode.Attribute("endTime")!.Value;
+      var duration = float.Parse(conclusionNode.Attribute("duration")!.Value);
+      var outcome = conclusionNode.Attribute("outcome")!.Value;
+
+      var testCase = new TestCase {
+        FullyQualifiedName = name,
+        ExecutorUri = new Uri("executor://dafnyverifier/v1"),
+        Source = currentFileFragment
+      };
+
+      var testResult = new TestResult(testCase) {
+        StartTime = DateTimeOffset.Parse(startTime),
+        EndTime = DateTimeOffset.Parse(endTime),
+        Duration = TimeSpan.FromMilliseconds((long)(duration * 1000))
+      };
+
+      if (outcome == "correct") {
+        testResult.Outcome = TestOutcome.Passed;
+      } else {
+        testResult.Outcome = TestOutcome.Failed;
+        testResult.ErrorMessage = outcome;
+      }
+
+      return testResult;
+    }
+  }
+}
diff --git a/Source/DafnyDriver/DafnyDriver.cs b/Source/DafnyDriver/DafnyDriver.cs
@@ -10,9 +10,7 @@
 //       - main program for taking a Dafny program and verifying it
 //---------------------------------------------------------------------------------------------
 
-using System.Security;
 using DafnyServer.CounterexampleGeneration;
-using DafnyTestGeneration;
 
 namespace Microsoft.Dafny {
   using System;
@@ -66,6 +64,9 @@ public static int ThreadMain(string[] args) {
 
       if (CommandLineOptions.Clo.XmlSink != null) {
         CommandLineOptions.Clo.XmlSink.Close();
+        if (DafnyOptions.O.VerificationLoggerConfig != null) {
+          BoogieXmlConvertor.RaiseTestLoggerEvents(DafnyOptions.O.BoogieXmlFilename, DafnyOptions.O.VerificationLoggerConfig);
+        }
       }
       if (CommandLineOptions.Clo.Wait) {
         Console.WriteLine("Press Enter to exit.");

diff --git a/Source/DafnyDriver/DafnyDriver.csproj b/Source/DafnyDriver/DafnyDriver.csproj
@@ -14,6 +14,22 @@
     <AppendTargetFrameworkToOutputPath>false</AppendTargetFrameworkToOutputPath>
     <AppendRuntimeIdentifierToOutputPath>false</AppendRuntimeIdentifierToOutputPath>
   </PropertyGroup>
+
+  <ItemGroup>
+    <PackageReference Include="Microsoft.TestPlatform.Extensions.TrxLogger" Version="17.0.0" />
+    <PackageReference Include="Microsoft.TestPlatform.TestHost" Version="16.11.0" />
+
+    <!-- Locking down versions that would otherwise be downgraded -->
+    <PackageReference Include="System.Security.Principal.Windows" Version="4.5.0" />
+    <PackageReference Include="System.Collections" Version="4.3.0" />
+    <PackageReference Include="System.Diagnostics.Debug" Version="4.3.0" />
+    <PackageReference Include="System.IO.FileSystem.Primitives" Version="4.3.0" />
+    <PackageReference Include="System.Runtime.Handles" Version="4.3.0" />
+    <PackageReference Include="System.Runtime.InteropServices" Version="4.3.0" />
+    <PackageReference Include="System.Text.Encoding.Extensions" Version="4.3.0" />
+    <PackageReference Include="System.Threading" Version="4.3.0" />
+  </ItemGroup>
+
   <ItemGroup>
     <ProjectReference Include="..\Dafny\DafnyPipeline.csproj" />
     <ProjectReference Include="..\DafnyRuntime\DafnyRuntime.csproj" />