diff --git a/src/Directory.Packages.props b/src/Directory.Packages.props
index b756fd0..311b398 100644
--- a/src/Directory.Packages.props
+++ b/src/Directory.Packages.props
@@ -5,6 +5,7 @@
+
@@ -13,6 +14,6 @@
-
+
\ No newline at end of file
diff --git a/src/LinkValidator.Tests/RelativeUriSpecs.cs b/src/LinkValidator.Tests/RelativeUriSpecs.cs
new file mode 100644
index 0000000..0d5667d
--- /dev/null
+++ b/src/LinkValidator.Tests/RelativeUriSpecs.cs
@@ -0,0 +1,46 @@
+using FluentAssertions;
+using LinkValidator.Actors;
+
+namespace LinkValidator.Tests;
+
+public class RelativeUriSpecs
+{
+ public RelativeUri Uri1 { get; } = new(new Uri("/path", UriKind.Relative));
+
+ [Fact]
+ public void RelativeUri_should_throw_when_not_relative()
+ {
+ // Arrange
+ var uri = new Uri("http://example.com", UriKind.RelativeOrAbsolute);
+
+ // Act
+ Action act = () => new RelativeUri(uri);
+
+ // Assert
+ act.Should().Throw();
+ }
+
+ [Fact]
+ public void RelativeUri_should_equal_copy_of_itself()
+ {
+ // Arrange
+ var uri2 = new RelativeUri(new Uri(Uri1.Value.ToString(), UriKind.Relative));
+
+ // Assert
+ uri2.Should().Be(Uri1);
+ Uri1.GetHashCode().Should().Be(uri2.GetHashCode());
+ }
+
+ [Fact]
+ public void RelativeUri_should_print_path()
+ {
+ // Arrange
+ var uri = new RelativeUri(new Uri("/path-to-file.html", UriKind.Relative));
+
+ // Act
+ var result = uri.ToString();
+
+ // Assert
+ result.Should().Be("/path-to-file.html");
+ }
+}
\ No newline at end of file
diff --git a/src/LinkValidator/Actors/IndexerActor.cs b/src/LinkValidator/Actors/IndexerActor.cs
index 24348aa..c9c3464 100644
--- a/src/LinkValidator/Actors/IndexerActor.cs
+++ b/src/LinkValidator/Actors/IndexerActor.cs
@@ -1,7 +1,9 @@
-using System.Net;
+using System.Collections.Immutable;
+using System.Net;
using Akka.Actor;
using Akka.Event;
using Akka.Routing;
+using LinkValidator.Util;
namespace LinkValidator.Actors;
@@ -51,10 +53,11 @@ private ReportStatistics() {}
private readonly ILoggingAdapter _log = Context.GetLogger();
private readonly CrawlConfiguration _crawlConfiguration;
private IActorRef _crawlers = ActorRefs.Nobody;
-
- public IndexerActor(CrawlConfiguration crawlConfiguration)
+ private readonly TaskCompletionSource> _completionSource;
+ public IndexerActor(CrawlConfiguration crawlConfiguration, TaskCompletionSource> completionSource)
{
_crawlConfiguration = crawlConfiguration;
+ _completionSource = completionSource;
}
public Dictionary IndexedDocuments { get; } = new();
@@ -83,6 +86,13 @@ protected override void OnReceive(object message)
if (IsCrawlComplete)
{
_log.Info("Crawl complete!");
+
+ var finalOutput = IndexedDocuments
+ .Where(x => x.Value.status == CrawlStatus.Visited)
+ .ToImmutableSortedDictionary(x => UriHelpers.ToRelativeUri(_crawlConfiguration.BaseUrl, x.Key).ToString(), x => x.Value.Item2 ?? HttpStatusCode.NotFound);
+
+ _completionSource.SetResult(finalOutput);
+
Context.Stop(Self);
}
break;
diff --git a/src/LinkValidator/Actors/UriTypes.cs b/src/LinkValidator/Actors/UriTypes.cs
index 5fbb918..521924d 100644
--- a/src/LinkValidator/Actors/UriTypes.cs
+++ b/src/LinkValidator/Actors/UriTypes.cs
@@ -1,3 +1,5 @@
+using System.Text;
+
namespace LinkValidator.Actors;
public record struct AbsoluteUri
@@ -25,4 +27,6 @@ public RelativeUri(Uri value)
}
public Uri Value { get; }
+
+ public override string ToString() => Value.ToString();
}
\ No newline at end of file
diff --git a/src/LinkValidator/LinkValidator.csproj b/src/LinkValidator/LinkValidator.csproj
index daa27bb..d74b746 100644
--- a/src/LinkValidator/LinkValidator.csproj
+++ b/src/LinkValidator/LinkValidator.csproj
@@ -9,6 +9,7 @@
+
diff --git a/src/LinkValidator/Program.cs b/src/LinkValidator/Program.cs
index c2036bd..a039d85 100644
--- a/src/LinkValidator/Program.cs
+++ b/src/LinkValidator/Program.cs
@@ -1,9 +1,75 @@
-namespace LinkValidator;
+using System.Collections.Immutable;
+using System.CommandLine;
+using System.Net;
+using Akka.Actor;
+using LinkValidator.Actors;
+using static LinkValidator.Util.DiffHelper;
+using static LinkValidator.Util.MarkdownHelper;
+
+namespace LinkValidator;
class Program
{
- static void Main(string[] args)
+ public static async Task Main(string[] args)
+ {
+ var urlOption = new Option("--url", "The URL to crawl") { IsRequired = true };
+ var outputOption = new Option("--output", "Optional output file path for the sitemap");
+ var diffOption = new Option("--diff", "Previous sitemap file to compare against");
+ var strictOption = new Option("--strict", () => false,
+ "Return error code if pages are missing or returning 400+ status codes");
+
+ var rootCommand = new RootCommand("Website crawler and sitemap generator")
+ {
+ urlOption,
+ outputOption,
+ diffOption,
+ strictOption
+ };
+
+ rootCommand.SetHandler(async (url, output, diff, strict) =>
+ {
+ using var system = ActorSystem.Create("CrawlerSystem", "akka.loglevel = INFO");
+ var absoluteUri = new AbsoluteUri(new Uri(url));
+ var results = await CrawlWebsite(system, absoluteUri);
+ var markdown = GenerateMarkdown(absoluteUri, results);
+
+ _ = system.Terminate();
+
+ if (output != null)
+ {
+ await File.WriteAllTextAsync(output, markdown);
+ }
+ else
+ {
+ Console.WriteLine(markdown);
+ }
+
+ if (!string.IsNullOrEmpty(diff))
+ {
+ var previousMarkdown = await File.ReadAllTextAsync(diff);
+ var (differences, hasErrors) = CompareSitemapsWithErrors(previousMarkdown, markdown);
+ foreach (var difference in differences)
+ {
+ Console.WriteLine(difference);
+ }
+
+ if (strict && hasErrors)
+ {
+ Environment.Exit(1);
+ }
+ }
+ }, urlOption, outputOption, diffOption, strictOption);
+
+ return await rootCommand.InvokeAsync(args);
+ }
+
+ private static async Task> CrawlWebsite(ActorSystem system, AbsoluteUri url)
{
- Console.WriteLine("Hello, World!");
+ var crawlSettings = new CrawlConfiguration(url, 10, TimeSpan.FromSeconds(5));
+ var tcs = new TaskCompletionSource>();
+
+ var indexer = system.ActorOf(Props.Create(() => new IndexerActor(crawlSettings, tcs)), "indexer");
+ indexer.Tell(IndexerActor.BeginIndexing.Instance);
+ return await tcs.Task;
}
}
\ No newline at end of file
diff --git a/src/LinkValidator/Util/DiffHelper.cs b/src/LinkValidator/Util/DiffHelper.cs
new file mode 100644
index 0000000..987f393
--- /dev/null
+++ b/src/LinkValidator/Util/DiffHelper.cs
@@ -0,0 +1,47 @@
+using System.Text.RegularExpressions;
+
+namespace LinkValidator.Util;
+
+public static partial class DiffHelper
+{
+ public static (IReadOnlyList Differences, bool HasErrors) CompareSitemapsWithErrors(string previous, string current)
+ {
+ var differences = new List();
+ var hasErrors = false;
+
+ var previousLines = previous.Split('\n')
+ .Skip(2)
+ .Where(l => !string.IsNullOrWhiteSpace(l))
+ .ToList();
+
+ var currentLines = current.Split('\n')
+ .Skip(2)
+ .Where(l => !string.IsNullOrWhiteSpace(l))
+ .ToList();
+
+ // Check for missing pages
+ foreach (var line in previousLines.Except(currentLines))
+ {
+ differences.Add($"Missing: {line}");
+ hasErrors = true;
+ }
+
+ // Check for new pages
+ foreach (var line in currentLines.Except(previousLines))
+ {
+ differences.Add($"New: {line}");
+
+ // Check if new page has error status code
+ var statusCodeMatch = MyRegex().Match(line);
+ if (statusCodeMatch.Success && int.Parse(statusCodeMatch.Groups[1].Value) >= 400)
+ {
+ hasErrors = true;
+ }
+ }
+
+ return (differences, hasErrors);
+ }
+
+ [GeneratedRegex(@"\|\s*(\d{3})\s*\|")]
+ private static partial Regex MyRegex();
+}
\ No newline at end of file
diff --git a/src/LinkValidator/Util/MarkdownHelper.cs b/src/LinkValidator/Util/MarkdownHelper.cs
new file mode 100644
index 0000000..d4b4588
--- /dev/null
+++ b/src/LinkValidator/Util/MarkdownHelper.cs
@@ -0,0 +1,27 @@
+using System.Collections.Immutable;
+using System.Net;
+using Grynwald.MarkdownGenerator;
+using LinkValidator.Actors;
+
+namespace LinkValidator.Util;
+
+public static class MarkdownHelper
+{
+ public static string GenerateMarkdown(AbsoluteUri baseUri, ImmutableSortedDictionary results)
+ {
+ var document = new MdDocument();
+
+ // Add a header
+ document.Root.Add(new MdHeading(1, $"Sitemap for [{baseUri.Value.ToString()}]"));
+ var headerRow = new MdTableRow(new MdTextSpan("URL"), new MdTextSpan("StatusCode"));
+ var rows = results.Select(kvp => new MdTableRow(new MdCodeSpan(kvp.Key), new MdTextSpan(kvp.Value.ToString())));
+
+ // Add a table
+ document.Root.Add(new MdTable(headerRow, rows));
+
+ return document.ToString(new MdSerializationOptions()
+ {
+ TableStyle = MdTableStyle.GFM
+ });
+ }
+}
\ No newline at end of file