-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* completed first pass at the CLI itself * added Uri printing * escape links in markdown
- Loading branch information
1 parent
e7072eb
commit 6ff645c
Showing
8 changed files
with
209 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
using FluentAssertions; | ||
using LinkValidator.Actors; | ||
|
||
namespace LinkValidator.Tests; | ||
|
||
public class RelativeUriSpecs | ||
{ | ||
public RelativeUri Uri1 { get; } = new(new Uri("/path", UriKind.Relative)); | ||
|
||
[Fact] | ||
public void RelativeUri_should_throw_when_not_relative() | ||
{ | ||
// Arrange | ||
var uri = new Uri("http://example.com", UriKind.RelativeOrAbsolute); | ||
|
||
// Act | ||
Action act = () => new RelativeUri(uri); | ||
|
||
// Assert | ||
act.Should().Throw<ArgumentException>(); | ||
} | ||
|
||
[Fact] | ||
public void RelativeUri_should_equal_copy_of_itself() | ||
{ | ||
// Arrange | ||
var uri2 = new RelativeUri(new Uri(Uri1.Value.ToString(), UriKind.Relative)); | ||
|
||
// Assert | ||
uri2.Should().Be(Uri1); | ||
Uri1.GetHashCode().Should().Be(uri2.GetHashCode()); | ||
} | ||
|
||
[Fact] | ||
public void RelativeUri_should_print_path() | ||
{ | ||
// Arrange | ||
var uri = new RelativeUri(new Uri("/path-to-file.html", UriKind.Relative)); | ||
|
||
// Act | ||
var result = uri.ToString(); | ||
|
||
// Assert | ||
result.Should().Be("/path-to-file.html"); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,75 @@ | ||
namespace LinkValidator; | ||
using System.Collections.Immutable; | ||
using System.CommandLine; | ||
using System.Net; | ||
using Akka.Actor; | ||
using LinkValidator.Actors; | ||
using static LinkValidator.Util.DiffHelper; | ||
using static LinkValidator.Util.MarkdownHelper; | ||
|
||
namespace LinkValidator; | ||
|
||
class Program | ||
{ | ||
static void Main(string[] args) | ||
public static async Task<int> Main(string[] args) | ||
{ | ||
var urlOption = new Option<string>("--url", "The URL to crawl") { IsRequired = true }; | ||
var outputOption = new Option<string?>("--output", "Optional output file path for the sitemap"); | ||
var diffOption = new Option<string?>("--diff", "Previous sitemap file to compare against"); | ||
var strictOption = new Option<bool>("--strict", () => false, | ||
"Return error code if pages are missing or returning 400+ status codes"); | ||
|
||
var rootCommand = new RootCommand("Website crawler and sitemap generator") | ||
{ | ||
urlOption, | ||
outputOption, | ||
diffOption, | ||
strictOption | ||
}; | ||
|
||
rootCommand.SetHandler(async (url, output, diff, strict) => | ||
{ | ||
using var system = ActorSystem.Create("CrawlerSystem", "akka.loglevel = INFO"); | ||
var absoluteUri = new AbsoluteUri(new Uri(url)); | ||
var results = await CrawlWebsite(system, absoluteUri); | ||
var markdown = GenerateMarkdown(absoluteUri, results); | ||
|
||
_ = system.Terminate(); | ||
|
||
if (output != null) | ||
{ | ||
await File.WriteAllTextAsync(output, markdown); | ||
} | ||
else | ||
{ | ||
Console.WriteLine(markdown); | ||
} | ||
|
||
if (!string.IsNullOrEmpty(diff)) | ||
{ | ||
var previousMarkdown = await File.ReadAllTextAsync(diff); | ||
var (differences, hasErrors) = CompareSitemapsWithErrors(previousMarkdown, markdown); | ||
foreach (var difference in differences) | ||
{ | ||
Console.WriteLine(difference); | ||
} | ||
|
||
if (strict && hasErrors) | ||
{ | ||
Environment.Exit(1); | ||
} | ||
} | ||
}, urlOption, outputOption, diffOption, strictOption); | ||
|
||
return await rootCommand.InvokeAsync(args); | ||
} | ||
|
||
private static async Task<ImmutableSortedDictionary<string, HttpStatusCode>> CrawlWebsite(ActorSystem system, AbsoluteUri url) | ||
{ | ||
Console.WriteLine("Hello, World!"); | ||
var crawlSettings = new CrawlConfiguration(url, 10, TimeSpan.FromSeconds(5)); | ||
var tcs = new TaskCompletionSource<ImmutableSortedDictionary<string, HttpStatusCode>>(); | ||
|
||
var indexer = system.ActorOf(Props.Create(() => new IndexerActor(crawlSettings, tcs)), "indexer"); | ||
indexer.Tell(IndexerActor.BeginIndexing.Instance); | ||
return await tcs.Task; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
using System.Text.RegularExpressions; | ||
|
||
namespace LinkValidator.Util; | ||
|
||
public static partial class DiffHelper | ||
{ | ||
public static (IReadOnlyList<string> Differences, bool HasErrors) CompareSitemapsWithErrors(string previous, string current) | ||
{ | ||
var differences = new List<string>(); | ||
var hasErrors = false; | ||
|
||
var previousLines = previous.Split('\n') | ||
.Skip(2) | ||
.Where(l => !string.IsNullOrWhiteSpace(l)) | ||
.ToList(); | ||
|
||
var currentLines = current.Split('\n') | ||
.Skip(2) | ||
.Where(l => !string.IsNullOrWhiteSpace(l)) | ||
.ToList(); | ||
|
||
// Check for missing pages | ||
foreach (var line in previousLines.Except(currentLines)) | ||
{ | ||
differences.Add($"Missing: {line}"); | ||
hasErrors = true; | ||
} | ||
|
||
// Check for new pages | ||
foreach (var line in currentLines.Except(previousLines)) | ||
{ | ||
differences.Add($"New: {line}"); | ||
|
||
// Check if new page has error status code | ||
var statusCodeMatch = MyRegex().Match(line); | ||
if (statusCodeMatch.Success && int.Parse(statusCodeMatch.Groups[1].Value) >= 400) | ||
{ | ||
hasErrors = true; | ||
} | ||
} | ||
|
||
return (differences, hasErrors); | ||
} | ||
|
||
[GeneratedRegex(@"\|\s*(\d{3})\s*\|")] | ||
private static partial Regex MyRegex(); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
using System.Collections.Immutable; | ||
using System.Net; | ||
using Grynwald.MarkdownGenerator; | ||
using LinkValidator.Actors; | ||
|
||
namespace LinkValidator.Util; | ||
|
||
public static class MarkdownHelper | ||
{ | ||
public static string GenerateMarkdown(AbsoluteUri baseUri, ImmutableSortedDictionary<string, HttpStatusCode> results) | ||
{ | ||
var document = new MdDocument(); | ||
|
||
// Add a header | ||
document.Root.Add(new MdHeading(1, $"Sitemap for [{baseUri.Value.ToString()}]")); | ||
var headerRow = new MdTableRow(new MdTextSpan("URL"), new MdTextSpan("StatusCode")); | ||
var rows = results.Select(kvp => new MdTableRow(new MdCodeSpan(kvp.Key), new MdTextSpan(kvp.Value.ToString()))); | ||
|
||
// Add a table | ||
document.Root.Add(new MdTable(headerRow, rows)); | ||
|
||
return document.ToString(new MdSerializationOptions() | ||
{ | ||
TableStyle = MdTableStyle.GFM | ||
}); | ||
} | ||
} |