Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added real CLI #4

Merged
merged 3 commits into from
Jan 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/Directory.Packages.props
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
<ItemGroup Label="App">
<PackageVersion Include="Akka" Version="1.5.33" />
<PackageVersion Include="FluentAssertions" Version="7.0.0" />
<PackageVersion Include="Grynwald.MarkdownGenerator" Version="3.0.106" />
<PackageVersion Include="HtmlAgilityPack" Version="1.11.72" />
<PackageVersion Include="System.CommandLine" Version="2.0.0-beta4.22272.1" />
</ItemGroup>
Expand All @@ -13,6 +14,6 @@
<PackageVersion Include="coverlet.collector" Version="6.0.0" />
<PackageVersion Include="Microsoft.NET.Test.Sdk" Version="17.8.0" />
<PackageVersion Include="xunit" Version="2.9.0" />
<PackageVersion Include="xunit.runner.visualstudio" Version="2.9.0" />
<PackageVersion Include="xunit.runner.visualstudio" Version="2.8.2" />
</ItemGroup>
</Project>
46 changes: 46 additions & 0 deletions src/LinkValidator.Tests/RelativeUriSpecs.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
using FluentAssertions;
using LinkValidator.Actors;

namespace LinkValidator.Tests;

public class RelativeUriSpecs
{
public RelativeUri Uri1 { get; } = new(new Uri("/path", UriKind.Relative));

[Fact]
public void RelativeUri_should_throw_when_not_relative()
{
// Arrange
var uri = new Uri("http://example.com", UriKind.RelativeOrAbsolute);

// Act
Action act = () => new RelativeUri(uri);

// Assert
act.Should().Throw<ArgumentException>();
}

[Fact]
public void RelativeUri_should_equal_copy_of_itself()
{
// Arrange
var uri2 = new RelativeUri(new Uri(Uri1.Value.ToString(), UriKind.Relative));

// Assert
uri2.Should().Be(Uri1);
Uri1.GetHashCode().Should().Be(uri2.GetHashCode());
}

[Fact]
public void RelativeUri_should_print_path()
{
// Arrange
var uri = new RelativeUri(new Uri("/path-to-file.html", UriKind.Relative));

// Act
var result = uri.ToString();

// Assert
result.Should().Be("/path-to-file.html");
}
}
16 changes: 13 additions & 3 deletions src/LinkValidator/Actors/IndexerActor.cs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
using System.Net;
using System.Collections.Immutable;
using System.Net;
using Akka.Actor;
using Akka.Event;
using Akka.Routing;
using LinkValidator.Util;

namespace LinkValidator.Actors;

Expand Down Expand Up @@ -51,10 +53,11 @@ private ReportStatistics() {}
private readonly ILoggingAdapter _log = Context.GetLogger();
private readonly CrawlConfiguration _crawlConfiguration;
private IActorRef _crawlers = ActorRefs.Nobody;

public IndexerActor(CrawlConfiguration crawlConfiguration)
private readonly TaskCompletionSource<ImmutableSortedDictionary<string, HttpStatusCode>> _completionSource;
public IndexerActor(CrawlConfiguration crawlConfiguration, TaskCompletionSource<ImmutableSortedDictionary<string, HttpStatusCode>> completionSource)
{
_crawlConfiguration = crawlConfiguration;
_completionSource = completionSource;
}

public Dictionary<AbsoluteUri, (CrawlStatus status, HttpStatusCode?)> IndexedDocuments { get; } = new();
Expand Down Expand Up @@ -83,6 +86,13 @@ protected override void OnReceive(object message)
if (IsCrawlComplete)
{
_log.Info("Crawl complete!");

var finalOutput = IndexedDocuments
.Where(x => x.Value.status == CrawlStatus.Visited)
.ToImmutableSortedDictionary(x => UriHelpers.ToRelativeUri(_crawlConfiguration.BaseUrl, x.Key).ToString(), x => x.Value.Item2 ?? HttpStatusCode.NotFound);

_completionSource.SetResult(finalOutput);

Context.Stop(Self);
}
break;
Expand Down
4 changes: 4 additions & 0 deletions src/LinkValidator/Actors/UriTypes.cs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
using System.Text;

namespace LinkValidator.Actors;

public record struct AbsoluteUri
Expand Down Expand Up @@ -25,4 +27,6 @@ public RelativeUri(Uri value)
}

public Uri Value { get; }

public override string ToString() => Value.ToString();
}
1 change: 1 addition & 0 deletions src/LinkValidator/LinkValidator.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

<ItemGroup>
<PackageReference Include="Akka" />
<PackageReference Include="Grynwald.MarkdownGenerator" />
<PackageReference Include="HtmlAgilityPack" />
<PackageReference Include="System.CommandLine" />
</ItemGroup>
Expand Down
72 changes: 69 additions & 3 deletions src/LinkValidator/Program.cs
Original file line number Diff line number Diff line change
@@ -1,9 +1,75 @@
namespace LinkValidator;
using System.Collections.Immutable;
using System.CommandLine;
using System.Net;
using Akka.Actor;
using LinkValidator.Actors;
using static LinkValidator.Util.DiffHelper;
using static LinkValidator.Util.MarkdownHelper;

namespace LinkValidator;

class Program
{
static void Main(string[] args)
public static async Task<int> Main(string[] args)
{
var urlOption = new Option<string>("--url", "The URL to crawl") { IsRequired = true };
var outputOption = new Option<string?>("--output", "Optional output file path for the sitemap");
var diffOption = new Option<string?>("--diff", "Previous sitemap file to compare against");
var strictOption = new Option<bool>("--strict", () => false,
"Return error code if pages are missing or returning 400+ status codes");

var rootCommand = new RootCommand("Website crawler and sitemap generator")
{
urlOption,
outputOption,
diffOption,
strictOption
};

rootCommand.SetHandler(async (url, output, diff, strict) =>
{
using var system = ActorSystem.Create("CrawlerSystem", "akka.loglevel = INFO");
var absoluteUri = new AbsoluteUri(new Uri(url));
var results = await CrawlWebsite(system, absoluteUri);
var markdown = GenerateMarkdown(absoluteUri, results);

_ = system.Terminate();

if (output != null)
{
await File.WriteAllTextAsync(output, markdown);
}
else
{
Console.WriteLine(markdown);
}

if (!string.IsNullOrEmpty(diff))
{
var previousMarkdown = await File.ReadAllTextAsync(diff);
var (differences, hasErrors) = CompareSitemapsWithErrors(previousMarkdown, markdown);
foreach (var difference in differences)
{
Console.WriteLine(difference);
}

if (strict && hasErrors)
{
Environment.Exit(1);
}
}
}, urlOption, outputOption, diffOption, strictOption);

return await rootCommand.InvokeAsync(args);
}

private static async Task<ImmutableSortedDictionary<string, HttpStatusCode>> CrawlWebsite(ActorSystem system, AbsoluteUri url)
{
Console.WriteLine("Hello, World!");
var crawlSettings = new CrawlConfiguration(url, 10, TimeSpan.FromSeconds(5));
var tcs = new TaskCompletionSource<ImmutableSortedDictionary<string, HttpStatusCode>>();

var indexer = system.ActorOf(Props.Create(() => new IndexerActor(crawlSettings, tcs)), "indexer");
indexer.Tell(IndexerActor.BeginIndexing.Instance);
return await tcs.Task;
}
}
47 changes: 47 additions & 0 deletions src/LinkValidator/Util/DiffHelper.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
using System.Text.RegularExpressions;

namespace LinkValidator.Util;

public static partial class DiffHelper
{
public static (IReadOnlyList<string> Differences, bool HasErrors) CompareSitemapsWithErrors(string previous, string current)
{
var differences = new List<string>();
var hasErrors = false;

var previousLines = previous.Split('\n')
.Skip(2)
.Where(l => !string.IsNullOrWhiteSpace(l))
.ToList();

var currentLines = current.Split('\n')
.Skip(2)
.Where(l => !string.IsNullOrWhiteSpace(l))
.ToList();

// Check for missing pages
foreach (var line in previousLines.Except(currentLines))
{
differences.Add($"Missing: {line}");
hasErrors = true;
}

// Check for new pages
foreach (var line in currentLines.Except(previousLines))
{
differences.Add($"New: {line}");

// Check if new page has error status code
var statusCodeMatch = MyRegex().Match(line);
if (statusCodeMatch.Success && int.Parse(statusCodeMatch.Groups[1].Value) >= 400)
{
hasErrors = true;
}
}

return (differences, hasErrors);
}

[GeneratedRegex(@"\|\s*(\d{3})\s*\|")]
private static partial Regex MyRegex();
}
27 changes: 27 additions & 0 deletions src/LinkValidator/Util/MarkdownHelper.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
using System.Collections.Immutable;
using System.Net;
using Grynwald.MarkdownGenerator;
using LinkValidator.Actors;

namespace LinkValidator.Util;

public static class MarkdownHelper
{
public static string GenerateMarkdown(AbsoluteUri baseUri, ImmutableSortedDictionary<string, HttpStatusCode> results)
{
var document = new MdDocument();

// Add a header
document.Root.Add(new MdHeading(1, $"Sitemap for [{baseUri.Value.ToString()}]"));
var headerRow = new MdTableRow(new MdTextSpan("URL"), new MdTextSpan("StatusCode"));
var rows = results.Select(kvp => new MdTableRow(new MdCodeSpan(kvp.Key), new MdTextSpan(kvp.Value.ToString())));

// Add a table
document.Root.Add(new MdTable(headerRow, rows));

return document.ToString(new MdSerializationOptions()
{
TableStyle = MdTableStyle.GFM
});
}
}
Loading