Skip to content

Commit

Permalink
Add issue #125 Crawl posts from search results under "Recent" sort
Browse files Browse the repository at this point in the history
- Until now the search crawler downloaded the posts with the default order ("most popular"). There was no possibility to download them with the other sort order "recent".
- Now the url field also accepts search urls with the ending "/recent". Because of some quirks (same property changing between object and array) in Tumblr's json responses the Json.NET framework has been introduced.
  • Loading branch information
thomas694 committed Mar 3, 2021
1 parent f9732cf commit 28cd8b1
Show file tree
Hide file tree
Showing 9 changed files with 1,706 additions and 99 deletions.
24 changes: 24 additions & 0 deletions src/TumblThree/TumblThree.Applications/Converter/PropertyCopier.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
namespace TumblThree.Applications.Converter
{
public static class PropertyCopier<TSrc, TDst> where TSrc : class
where TDst : class
{
public static void Copy(TSrc src, TDst dst)
{
var srcProperties = src.GetType().GetProperties();
var dstProperties = dst.GetType().GetProperties();

foreach (var srcProperty in srcProperties)
{
foreach (var dstProperty in dstProperties)
{
if (srcProperty.Name == dstProperty.Name && srcProperty.PropertyType == dstProperty.PropertyType)
{
dstProperty.SetValue(dst, srcProperty.GetValue(src));
break;
}
}
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;
using System;
using System.Collections.Generic;

namespace TumblThree.Applications.Converter
{
public class SingleOrArrayConverter<T> : JsonConverter
{
public override bool CanConvert(Type objectType)
{
return (objectType == typeof(List<T>));
}

public override object ReadJson(JsonReader reader, Type objectType, object existingValue, JsonSerializer serializer)
{
JToken token = JToken.Load(reader);
if (token.Type == JTokenType.Array)
{
return token.ToObject<List<T>>();
}
return new List<T> { token.ToObject<T>() };
}

public override bool CanWrite
{
get { return false; }
}

public override void WriteJson(JsonWriter writer, object value, JsonSerializer serializer)
{
throw new NotImplementedException();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
using System.Threading;
using System.Threading.Tasks;
using System.Web;

using TumblThree.Applications.Converter;
using TumblThree.Applications.DataModels;
using TumblThree.Applications.DataModels.TumblrPosts;
using TumblThree.Applications.Downloader;
Expand Down Expand Up @@ -135,6 +135,11 @@ protected async Task<string> RequestDataAsync(string url, Dictionary<string, str

return responseDetails.Response;
}
catch (Exception e)
{
Logger.Error("AbstractCrawler.RequestDataAsync: {0}", e);
throw;
}
finally
{
requestRegistration.Dispose();
Expand Down Expand Up @@ -185,6 +190,29 @@ protected async Task<string> RequestApiDataAsync(string url, string bearerToken,
}
}

public virtual T ConvertJsonToClassNew<T>(string json) where T : new()
{
try
{
using (var ms = new MemoryStream(Encoding.UTF8.GetBytes(json)))
{
var deserializer = new Newtonsoft.Json.JsonSerializer();
deserializer.Converters.Add(new SingleOrArrayConverter<T>());
using (StreamReader sr = new StreamReader(ms))
using (var jsonTextReader = new Newtonsoft.Json.JsonTextReader(sr))
{
return deserializer.Deserialize<T>(jsonTextReader);
}
}
}
catch (Newtonsoft.Json.JsonException serializationException)
{
Logger.Error("AbstractCrawler:ConvertJsonToClassNew<T>: {0}", "Could not parse data");
ShellService.ShowError(serializationException, Resources.PostNotParsable, Blog.Name);
return new T();
}
}

protected static string UrlEncode(IDictionary<string, string> parameters)
{
var sb = new StringBuilder();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,11 @@ public ICrawler GetCrawler(IBlog blog, IProgress<DownloadProgress> progress, Pau
imgurParser, gfycatParser, GetWebmshareParser(), GetMixtapeParser(), GetUguuParser(),
GetSafeMoeParser(), GetLoliSafeParser(), GetCatBoxParser(), postQueue, blog, progress, pt, ct);
case BlogTypes.tumblrsearch:
IPostQueue<TumblrCrawlerData<DataModels.TumblrSearchJson.Datum>> jsonQueue = GetJsonQueue<DataModels.TumblrSearchJson.Datum>();
return new TumblrSearchCrawler(shellService, crawlerService, webRequestFactory,
cookieService, GetTumblrDownloader(progress, blog, files, postQueue, pt, ct), GetTumblrParser(),
imgurParser, gfycatParser, GetWebmshareParser(), GetMixtapeParser(), GetUguuParser(),
GetSafeMoeParser(), GetLoliSafeParser(), GetCatBoxParser(), postQueue, blog, progress, pt, ct);
cookieService, GetTumblrDownloader(progress, blog, files, postQueue, pt, ct), GetTumblrJsonDownloader(jsonQueue, blog, pt, ct),
GetTumblrParser(), imgurParser, gfycatParser, GetWebmshareParser(), GetMixtapeParser(), GetUguuParser(),
GetSafeMoeParser(), GetLoliSafeParser(), GetCatBoxParser(), postQueue, jsonQueue, blog, progress, pt, ct);
case BlogTypes.tumblrtagsearch:
IPostQueue<TumblrCrawlerData<DataModels.TumblrTaggedSearchJson.Datum>> jsonTagSearchQueue =
GetJsonQueue<DataModels.TumblrTaggedSearchJson.Datum>();
Expand Down
Loading

0 comments on commit 28cd8b1

Please sign in to comment.