Skip to content

Commit

Permalink
Added real CLI (#4)
Browse files Browse the repository at this point in the history
* completed first pass at the CLI itself

* added Uri printing

* escape links in markdown
  • Loading branch information
Aaronontheweb authored Jan 3, 2025
1 parent e7072eb commit 6ff645c
Show file tree
Hide file tree
Showing 8 changed files with 209 additions and 7 deletions.
3 changes: 2 additions & 1 deletion src/Directory.Packages.props
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
<ItemGroup Label="App">
<PackageVersion Include="Akka" Version="1.5.33" />
<PackageVersion Include="FluentAssertions" Version="7.0.0" />
<PackageVersion Include="Grynwald.MarkdownGenerator" Version="3.0.106" />
<PackageVersion Include="HtmlAgilityPack" Version="1.11.72" />
<PackageVersion Include="System.CommandLine" Version="2.0.0-beta4.22272.1" />
</ItemGroup>
Expand All @@ -13,6 +14,6 @@
<PackageVersion Include="coverlet.collector" Version="6.0.0" />
<PackageVersion Include="Microsoft.NET.Test.Sdk" Version="17.8.0" />
<PackageVersion Include="xunit" Version="2.9.0" />
<PackageVersion Include="xunit.runner.visualstudio" Version="2.9.0" />
<PackageVersion Include="xunit.runner.visualstudio" Version="2.8.2" />
</ItemGroup>
</Project>
46 changes: 46 additions & 0 deletions src/LinkValidator.Tests/RelativeUriSpecs.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
using FluentAssertions;
using LinkValidator.Actors;

namespace LinkValidator.Tests;

public class RelativeUriSpecs
{
public RelativeUri Uri1 { get; } = new(new Uri("/path", UriKind.Relative));

[Fact]
public void RelativeUri_should_throw_when_not_relative()
{
// Arrange
var uri = new Uri("http://example.com", UriKind.RelativeOrAbsolute);

// Act
Action act = () => new RelativeUri(uri);

// Assert
act.Should().Throw<ArgumentException>();
}

[Fact]
public void RelativeUri_should_equal_copy_of_itself()
{
// Arrange
var uri2 = new RelativeUri(new Uri(Uri1.Value.ToString(), UriKind.Relative));

// Assert
uri2.Should().Be(Uri1);
Uri1.GetHashCode().Should().Be(uri2.GetHashCode());
}

[Fact]
public void RelativeUri_should_print_path()
{
// Arrange
var uri = new RelativeUri(new Uri("/path-to-file.html", UriKind.Relative));

// Act
var result = uri.ToString();

// Assert
result.Should().Be("/path-to-file.html");
}
}
16 changes: 13 additions & 3 deletions src/LinkValidator/Actors/IndexerActor.cs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
using System.Net;
using System.Collections.Immutable;
using System.Net;
using Akka.Actor;
using Akka.Event;
using Akka.Routing;
using LinkValidator.Util;

namespace LinkValidator.Actors;

Expand Down Expand Up @@ -51,10 +53,11 @@ private ReportStatistics() {}
private readonly ILoggingAdapter _log = Context.GetLogger();
private readonly CrawlConfiguration _crawlConfiguration;
private IActorRef _crawlers = ActorRefs.Nobody;

public IndexerActor(CrawlConfiguration crawlConfiguration)
private readonly TaskCompletionSource<ImmutableSortedDictionary<string, HttpStatusCode>> _completionSource;
public IndexerActor(CrawlConfiguration crawlConfiguration, TaskCompletionSource<ImmutableSortedDictionary<string, HttpStatusCode>> completionSource)
{
_crawlConfiguration = crawlConfiguration;
_completionSource = completionSource;
}

public Dictionary<AbsoluteUri, (CrawlStatus status, HttpStatusCode?)> IndexedDocuments { get; } = new();
Expand Down Expand Up @@ -83,6 +86,13 @@ protected override void OnReceive(object message)
if (IsCrawlComplete)
{
_log.Info("Crawl complete!");

var finalOutput = IndexedDocuments
.Where(x => x.Value.status == CrawlStatus.Visited)
.ToImmutableSortedDictionary(x => UriHelpers.ToRelativeUri(_crawlConfiguration.BaseUrl, x.Key).ToString(), x => x.Value.Item2 ?? HttpStatusCode.NotFound);

_completionSource.SetResult(finalOutput);

Context.Stop(Self);
}
break;
Expand Down
4 changes: 4 additions & 0 deletions src/LinkValidator/Actors/UriTypes.cs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
using System.Text;

namespace LinkValidator.Actors;

public record struct AbsoluteUri
Expand Down Expand Up @@ -25,4 +27,6 @@ public RelativeUri(Uri value)
}

public Uri Value { get; }

public override string ToString() => Value.ToString();
}
1 change: 1 addition & 0 deletions src/LinkValidator/LinkValidator.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

<ItemGroup>
<PackageReference Include="Akka" />
<PackageReference Include="Grynwald.MarkdownGenerator" />
<PackageReference Include="HtmlAgilityPack" />
<PackageReference Include="System.CommandLine" />
</ItemGroup>
Expand Down
72 changes: 69 additions & 3 deletions src/LinkValidator/Program.cs
Original file line number Diff line number Diff line change
@@ -1,9 +1,75 @@
namespace LinkValidator;
using System.Collections.Immutable;
using System.CommandLine;
using System.Net;
using Akka.Actor;
using LinkValidator.Actors;
using static LinkValidator.Util.DiffHelper;
using static LinkValidator.Util.MarkdownHelper;

namespace LinkValidator;

class Program
{
static void Main(string[] args)
public static async Task<int> Main(string[] args)
{
var urlOption = new Option<string>("--url", "The URL to crawl") { IsRequired = true };
var outputOption = new Option<string?>("--output", "Optional output file path for the sitemap");
var diffOption = new Option<string?>("--diff", "Previous sitemap file to compare against");
var strictOption = new Option<bool>("--strict", () => false,
"Return error code if pages are missing or returning 400+ status codes");

var rootCommand = new RootCommand("Website crawler and sitemap generator")
{
urlOption,
outputOption,
diffOption,
strictOption
};

rootCommand.SetHandler(async (url, output, diff, strict) =>
{
using var system = ActorSystem.Create("CrawlerSystem", "akka.loglevel = INFO");
var absoluteUri = new AbsoluteUri(new Uri(url));
var results = await CrawlWebsite(system, absoluteUri);
var markdown = GenerateMarkdown(absoluteUri, results);

_ = system.Terminate();

if (output != null)
{
await File.WriteAllTextAsync(output, markdown);
}
else
{
Console.WriteLine(markdown);
}

if (!string.IsNullOrEmpty(diff))
{
var previousMarkdown = await File.ReadAllTextAsync(diff);
var (differences, hasErrors) = CompareSitemapsWithErrors(previousMarkdown, markdown);
foreach (var difference in differences)
{
Console.WriteLine(difference);
}

if (strict && hasErrors)
{
Environment.Exit(1);
}
}
}, urlOption, outputOption, diffOption, strictOption);

return await rootCommand.InvokeAsync(args);
}

private static async Task<ImmutableSortedDictionary<string, HttpStatusCode>> CrawlWebsite(ActorSystem system, AbsoluteUri url)
{
Console.WriteLine("Hello, World!");
var crawlSettings = new CrawlConfiguration(url, 10, TimeSpan.FromSeconds(5));
var tcs = new TaskCompletionSource<ImmutableSortedDictionary<string, HttpStatusCode>>();

var indexer = system.ActorOf(Props.Create(() => new IndexerActor(crawlSettings, tcs)), "indexer");
indexer.Tell(IndexerActor.BeginIndexing.Instance);
return await tcs.Task;
}
}
47 changes: 47 additions & 0 deletions src/LinkValidator/Util/DiffHelper.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
using System.Text.RegularExpressions;

namespace LinkValidator.Util;

public static partial class DiffHelper
{
public static (IReadOnlyList<string> Differences, bool HasErrors) CompareSitemapsWithErrors(string previous, string current)
{
var differences = new List<string>();
var hasErrors = false;

var previousLines = previous.Split('\n')
.Skip(2)
.Where(l => !string.IsNullOrWhiteSpace(l))
.ToList();

var currentLines = current.Split('\n')
.Skip(2)
.Where(l => !string.IsNullOrWhiteSpace(l))
.ToList();

// Check for missing pages
foreach (var line in previousLines.Except(currentLines))
{
differences.Add($"Missing: {line}");
hasErrors = true;
}

// Check for new pages
foreach (var line in currentLines.Except(previousLines))
{
differences.Add($"New: {line}");

// Check if new page has error status code
var statusCodeMatch = MyRegex().Match(line);
if (statusCodeMatch.Success && int.Parse(statusCodeMatch.Groups[1].Value) >= 400)
{
hasErrors = true;
}
}

return (differences, hasErrors);
}

[GeneratedRegex(@"\|\s*(\d{3})\s*\|")]
private static partial Regex MyRegex();
}
27 changes: 27 additions & 0 deletions src/LinkValidator/Util/MarkdownHelper.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
using System.Collections.Immutable;
using System.Net;
using Grynwald.MarkdownGenerator;
using LinkValidator.Actors;

namespace LinkValidator.Util;

public static class MarkdownHelper
{
public static string GenerateMarkdown(AbsoluteUri baseUri, ImmutableSortedDictionary<string, HttpStatusCode> results)
{
var document = new MdDocument();

// Add a header
document.Root.Add(new MdHeading(1, $"Sitemap for [{baseUri.Value.ToString()}]"));
var headerRow = new MdTableRow(new MdTextSpan("URL"), new MdTextSpan("StatusCode"));
var rows = results.Select(kvp => new MdTableRow(new MdCodeSpan(kvp.Key), new MdTextSpan(kvp.Value.ToString())));

// Add a table
document.Root.Add(new MdTable(headerRow, rows));

return document.ToString(new MdSerializationOptions()
{
TableStyle = MdTableStyle.GFM
});
}
}

0 comments on commit 6ff645c

Please sign in to comment.