Skip to content

Commit

Permalink
#72: Cache StreamWriter instances for text downloading
Browse files Browse the repository at this point in the history
Stores StreamWriter instances in Dictionary and reuses them for recurring text appends in text post downloading.
This prevents massive seek I/O in large blog downloads.
  • Loading branch information
johanneszab committed Jun 13, 2020
1 parent 4276c44 commit 3a2980c
Show file tree
Hide file tree
Showing 10 changed files with 35 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ private async Task RunCrawlerTasksAsync(PauseToken pt, CancellationToken ct)

ICrawler crawler = _crawlerFactory.GetCrawler(blog, new Progress<DownloadProgress>(), pt, ct);
crawler.IsBlogOnlineAsync().Wait(4000);
crawler.Dispose();

if (_crawlerService.ActiveItems.Any(item =>
item.Blog.Name.Equals(nextQueueItem.Blog.Name) &&
Expand Down Expand Up @@ -225,6 +226,7 @@ private async Task StartSiteSpecificDownloaderAsync(QueueListItem queueListItem,

ICrawler crawler = _crawlerFactory.GetCrawler(blog, progress, pt, ct);
await crawler.CrawlAsync();
crawler.Dispose();

Monitor.Enter(_lockObject);
QueueOnDispatcher.CheckBeginInvokeOnUI(() => _crawlerService.RemoveActiveItem(queueListItem));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,7 @@ private async Task CheckStatusOfBlogsAsync(SemaphoreSlim semaphoreSlim, IBlog bl
ICrawler crawler = _crawlerFactory.GetCrawler(blog, new Progress<DownloadProgress>(), new PauseToken(),
new CancellationToken());
await crawler.IsBlogOnlineAsync();
crawler.Dispose();
}
finally
{
Expand Down Expand Up @@ -677,6 +678,7 @@ private async Task UpdateMetaInformationAsync(IBlog blog)
new CancellationToken());

await crawler.UpdateMetaInformationAsync();
crawler.Dispose();
}

private IBlog CheckIfCrawlableBlog(string blogUrl)
Expand Down
5 changes: 3 additions & 2 deletions src/TumblThree/TumblThree.Applications/Crawler/ICrawler.cs
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
using System.Threading.Tasks;
using System;
using System.Threading.Tasks;

namespace TumblThree.Applications.Crawler
{
public interface ICrawler
public interface ICrawler : IDisposable
{
Task CrawlAsync();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -831,6 +831,7 @@ protected virtual void Dispose(bool disposing)
if (disposing)
{
semaphoreSlim?.Dispose();
downloader.Dispose();
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -793,6 +793,7 @@ protected virtual void Dispose(bool disposing)
if (disposing)
{
semaphoreSlim?.Dispose();
downloader.Dispose();
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,7 @@ protected virtual void Dispose(bool disposing)
if (disposing)
{
semaphoreSlim?.Dispose();
downloader.Dispose();
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,7 @@ protected virtual void Dispose(bool disposing)
if (disposing)
{
semaphoreSlim?.Dispose();
downloader.Dispose();
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,7 @@ protected virtual void Dispose(bool disposing)
if (disposing)
{
semaphoreSlim.Dispose();
downloader.Dispose();
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ public abstract class AbstractDownloader : IDownloader, IDisposable

private SemaphoreSlim concurrentConnectionsSemaphore;
private SemaphoreSlim concurrentVideoConnectionsSemaphore;
private readonly Dictionary<string, StreamWriter> streamWriters = new Dictionary<string, StreamWriter>();

protected AbstractDownloader(IShellService shellService, IManagerService managerService, CancellationToken ct, PauseToken pt, IProgress<DownloadProgress> progress, IPostQueue<TumblrPost> postQueue, FileDownloader fileDownloader, ICrawlerService crawlerService = null, IBlog blog = null, IFiles files = null)
{
Expand Down Expand Up @@ -122,12 +123,9 @@ protected virtual bool AppendToTextFile(string fileLocation, string text)
{
lock (lockObjectDownload)
{
using (var sw = new StreamWriter(fileLocation, true))
{
sw.WriteLine(text);
}
StreamWriter sw = GetTextAppenderStreamWriter(fileLocation);
sw.WriteLine(text);
}

return true;
}
catch (IOException ex) when ((ex.HResult & 0xFFFF) == 0x27 || (ex.HResult & 0xFFFF) == 0x70)
Expand All @@ -143,6 +141,18 @@ protected virtual bool AppendToTextFile(string fileLocation, string text)
}
}

private StreamWriter GetTextAppenderStreamWriter(string key)
{
if (streamWriters.ContainsKey(key))
{
return streamWriters[key];
}
StreamWriter sw = new StreamWriter(key, true);
streamWriters.Add(key, sw);

return sw;
}

public virtual async Task<bool> DownloadBlogAsync()
{
concurrentConnectionsSemaphore = new SemaphoreSlim(shellService.Settings.ConcurrentConnections / crawlerService.ActiveItems.Count);
Expand Down Expand Up @@ -369,6 +379,11 @@ protected virtual void Dispose(bool disposing)
{
concurrentConnectionsSemaphore?.Dispose();
concurrentVideoConnectionsSemaphore?.Dispose();

foreach (var sw in streamWriters.Values)
{
sw.Dispose();
}
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
using System.Threading.Tasks;
using System;
using System.Threading.Tasks;

namespace TumblThree.Applications.Downloader
{
public interface IDownloader
public interface IDownloader : IDisposable
{
Task<bool> DownloadBlogAsync();

Expand Down

0 comments on commit 3a2980c

Please sign in to comment.