Skip to content

Commit

Permalink
Add issue #56/#104 Save photo sets with similar filenames
Browse files Browse the repository at this point in the history
- Since Tumblr introduced a new file naming scheme, which gives each file a unique guid, the images of one post are no longer side by side like before. Old files and blogs still have those old filenames starting with "tumblr_" which have a "group part" for photo sets.
- There is a new option "group photo sets" in the settings. If a blog has this option enabled and a normal/reblogged post with a photo set that has new style filenames, the files are renamed.
- When updating an old backup with the options "force rescan" and "group photo sets" enabled, already downloaded files with new style filenames are renamed.
  • Loading branch information
thomas694 committed Jan 17, 2021
1 parent cc5122b commit cac6556
Show file tree
Hide file tree
Showing 40 changed files with 390 additions and 237 deletions.
Empty file added src/TumblThree/.editorconfig
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -137,8 +137,7 @@ protected void AddWebmshareUrl(string post, string timestamp)
{
if (CheckIfSkipGif(imageUrl)) { continue; }

AddToDownloadList(new VideoPost(imageUrl, WebmshareParser.GetWebmshareId(imageUrl),
timestamp));
AddToDownloadList(new VideoPost(imageUrl, WebmshareParser.GetWebmshareId(imageUrl), timestamp));
}
}

Expand Down Expand Up @@ -232,7 +231,7 @@ protected void AddTumblrPhotoUrl(string post)
url = ResizeTumblrImageUrl(url);
url = RetrieveOriginalImageUrl(url, 2000, 3000);
// TODO: postID
AddToDownloadList(new PhotoPost(url, Guid.NewGuid().ToString("N")));
AddToDownloadList(new PhotoPost(url, Guid.NewGuid().ToString("N"), -1));
}
}

Expand Down Expand Up @@ -272,7 +271,7 @@ protected void AddGenericPhotoUrl(string post)
if (TumblrParser.IsTumblrUrl(imageUrl)) { continue; }
if (CheckIfSkipGif(imageUrl)) { continue; }

AddToDownloadList(new PhotoPost(imageUrl, Guid.NewGuid().ToString("N")));
AddToDownloadList(new PhotoPost(imageUrl, Guid.NewGuid().ToString("N"), -1));
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -719,7 +719,10 @@ private void AddPhotoUrl(Post post)
string imageUrl = ParseImageUrl(post);
if (CheckIfSkipGif(imageUrl)) return;

AddToDownloadList(new PhotoPost(imageUrl, post.Id, post.UnixTimestamp.ToString()));
int index = -1;
if (post.Photos?.Count > 0 && post.PhotoUrl1280 == post.Photos[0].PhotoUrl1280 && !post.Photos[0].PhotoUrl1280.Split('/').Last().StartsWith("tumblr_")) index = 1;

AddToDownloadList(new PhotoPost(imageUrl, post.Id, index, post.UnixTimestamp.ToString()));
AddToJsonQueue(new TumblrCrawlerData<Post>(Path.ChangeExtension(imageUrl.Split('/').Last(), ".json"), post));
}

Expand All @@ -730,10 +733,13 @@ private void AddPhotoSetUrl(Post post)
return;
}

int i = 1;
if (post.Photos[0].PhotoUrl1280.Split('/').Last().StartsWith("tumblr_")) i = -1;
foreach (string imageUrl in post.Photos.Select(ParseImageUrl).Where(imgUrl => !CheckIfSkipGif(imgUrl)))
{
AddToDownloadList(new PhotoPost(imageUrl, post.Id, post.UnixTimestamp.ToString()));
AddToDownloadList(new PhotoPost(imageUrl, post.Id, i, post.UnixTimestamp.ToString()));
AddToJsonQueue(new TumblrCrawlerData<Post>(Path.ChangeExtension(imageUrl.Split('/').Last(), ".json"), post));
if (i != -1) i++;
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,8 @@ private void AddPhotoUrlToDownloadList(Post post)
private void AddPhotoUrl(Post post)
{
string postId = post.Id;
int i = 1;
if (post.Photos.Count != 0 && post.Photos[0].AltSizes.FirstOrDefault().Url.Split('/').Last().StartsWith("tumblr_")) i = -1;
foreach (Photo photo in post.Photos)
{
string imageUrl = photo.AltSizes.Where(url => url.Width == int.Parse(ImageSizeForSearching())).Select(url => url.Url)
Expand All @@ -464,8 +466,9 @@ private void AddPhotoUrl(Post post)

if (CheckIfSkipGif(imageUrl)) { continue; }

AddToDownloadList(new PhotoPost(imageUrl, postId, post.Timestamp.ToString()));
AddToDownloadList(new PhotoPost(imageUrl, postId, i, post.Timestamp.ToString()));
AddToJsonQueue(new TumblrCrawlerData<Post>(Path.ChangeExtension(imageUrl.Split('/').Last(), ".json"), post));
if (i != -1) i++;
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ private void DownloadMedia(Content content, String id, long timestamp, IList<str
if (Blog.DownloadPhoto)
{
url = RetrieveOriginalImageUrl(url, 2000, 3000);
AddToDownloadList(new PhotoPost(url, id, timestamp.ToString()));
AddToDownloadList(new PhotoPost(url, id, -1, timestamp.ToString()));
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ namespace TumblThree.Applications.DataModels.TumblrPosts
public class AnswerPost : TumblrPost
{
public AnswerPost(string url, string id, string date)
: base(url, id, date)
: base(url, id, -1, date)
{
PostType = PostType.Text;
DbType = "DownloadedAnswers";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ namespace TumblThree.Applications.DataModels.TumblrPosts
public class AudioMetaPost : TumblrPost
{
public AudioMetaPost(string url, string id, string date)
: base(url, id, date)
: base(url, id, -1, date)
{
PostType = PostType.Text;
DbType = "DownloadedAudioMetas";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ namespace TumblThree.Applications.DataModels.TumblrPosts
public class AudioPost : TumblrPost
{
public AudioPost(string url, string id, string date)
: base(url, id, date)
: base(url, id, -1, date)
{
PostType = PostType.Binary;
DbType = "DownloadedAudios";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ namespace TumblThree.Applications.DataModels.TumblrPosts
public class ConversationPost : TumblrPost
{
public ConversationPost(string url, string id, string date)
: base(url, id, date)
: base(url, id, -1, date)
{
PostType = PostType.Text;
DbType = "DownloadedConversations";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ namespace TumblThree.Applications.DataModels.TumblrPosts
public class ExternalPhotoPost : TumblrPost
{
public ExternalPhotoPost(string url, string id, string date)
: base(url, id, date)
: base(url, id, -1, date)
{
PostType = PostType.Binary;
DbType = "DownloadedPhotos";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ namespace TumblThree.Applications.DataModels.TumblrPosts
public class ExternalVideoPost : TumblrPost
{
public ExternalVideoPost(string url, string id, string date)
: base(url, id, date)
: base(url, id, -1, date)
{
PostType = PostType.Binary;
DbType = "DownloadedVideos";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ namespace TumblThree.Applications.DataModels.TumblrPosts
public class LinkPost : TumblrPost
{
public LinkPost(string url, string id, string date)
: base(url, id, date)
: base(url, id, -1, date)
{
PostType = PostType.Text;
DbType = "DownloadedLinks";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ namespace TumblThree.Applications.DataModels.TumblrPosts
public class PhotoMetaPost : TumblrPost
{
public PhotoMetaPost(string url, string id, string date)
: base(url, id, date)
: base(url, id, -1, date)
{
PostType = PostType.Text;
DbType = "DownloadedPhotoMetas";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,16 @@ namespace TumblThree.Applications.DataModels.TumblrPosts
{
public class PhotoPost : TumblrPost
{
public PhotoPost(string url, string id, string date)
: base(url, id, date)
public PhotoPost(string url, string id, int index, string date)
: base(url, id, index, date)
{
PostType = PostType.Binary;
DbType = "DownloadedPhotos";
TextFileLocation = Resources.FileNamePhotos;
}

public PhotoPost(string url, string id)
: this(url, id, string.Empty)
public PhotoPost(string url, string id, int index)
: this(url, id, index, string.Empty)
{
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ namespace TumblThree.Applications.DataModels.TumblrPosts
public class QuotePost : TumblrPost
{
public QuotePost(string url, string id, string date)
: base(url, id, date)
: base(url, id, -1, date)
{
PostType = PostType.Text;
DbType = "DownloadedQuotes";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ namespace TumblThree.Applications.DataModels.TumblrPosts
public class TextPost : TumblrPost
{
public TextPost(string url, string id, string date)
: base(url, id, date)
: base(url, id, -1, date)
{
PostType = PostType.Text;
DbType = "DownloadedTexts";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,19 @@ public abstract class TumblrPost

public string Id { get; }

public int Index { get; }

public string Date { get; }

public string DbType { get; protected set; }

public string TextFileLocation { get; protected set; }

protected TumblrPost(string url, string id, string date)
protected TumblrPost(string url, string id, int index, string date)
{
Url = url;
Id = id;
Index = index;
Date = date;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ namespace TumblThree.Applications.DataModels.TumblrPosts
public class VideoMetaPost : TumblrPost
{
public VideoMetaPost(string url, string id, string date)
: base(url, id, date)
: base(url, id, -1, date)
{
PostType = PostType.Text;
DbType = "DownloadedVideoMetas";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ namespace TumblThree.Applications.DataModels.TumblrPosts
public class VideoPost : TumblrPost
{
public VideoPost(string url, string id, string date)
: base(url, id, date)
: base(url, id, -1, date)
{
PostType = PostType.Binary;
DbType = "DownloadedVideos";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -250,21 +250,20 @@ public virtual async Task<string> DownloadPageAsync(string url)

protected virtual async Task<bool> DownloadBinaryPostAsync(TumblrPost downloadItem)
{
string url = Url(downloadItem);
if (CheckIfFileExistsInDB(url))
if (CheckIfFileExistsInDB(downloadItem))
{
string fileName = FileName(downloadItem);
string fileName = FileNameNew(downloadItem) ?? FileName(downloadItem);
UpdateProgressQueueInformation(Resources.ProgressSkipFile, fileName);
}
else
{
string blogDownloadLocation = blog.DownloadLocation();
string fileName = FileName(downloadItem);
string fileName = FileNameNew(downloadItem) ?? FileName(downloadItem);
string fileLocation = FileLocation(blogDownloadLocation, fileName);
string fileLocationUrlList = FileLocationLocalized(blogDownloadLocation, downloadItem.TextFileLocation);
DateTime postDate = PostDate(downloadItem);
UpdateProgressQueueInformation(Resources.ProgressDownloadImage, fileName);
if (!await DownloadBinaryFileAsync(fileLocation, fileLocationUrlList, url))
if (!await DownloadBinaryFileAsync(fileLocation, fileLocationUrlList, Url(downloadItem)))
{
return false;
}
Expand Down Expand Up @@ -293,20 +292,22 @@ protected virtual async Task<bool> DownloadBinaryPostAsync(TumblrPost downloadIt
return true;
}

private bool CheckIfFileExistsInDB(string url)
private bool CheckIfFileExistsInDB(TumblrPost downloadItem)
{
string filename = FileName(downloadItem);
string filenameNew = FileNameNew(downloadItem);
if (shellService.Settings.LoadAllDatabases)
{
return managerService.CheckIfFileExistsInDB(url);
return managerService.CheckIfFileExistsInDB(filename, filenameNew, blog.Name);
}

return files.CheckIfFileExistsInDB(url) || blog.CheckIfBlogShouldCheckDirectory(GetCoreImageUrl(url));
return files.CheckIfFileExistsInDB(filename, filenameNew, true) || blog.CheckIfBlogShouldCheckDirectory(filename, filenameNew);
}

private void DownloadTextPost(TumblrPost downloadItem)
{
string postId = PostId(downloadItem);
if (CheckIfFileExistsInDB(postId))
if (files.CheckIfFileExistsInDB(postId, null, false))
{
UpdateProgressQueueInformation(Resources.ProgressSkipFile, postId);
}
Expand Down Expand Up @@ -350,6 +351,11 @@ private static string FileName(TumblrPost downloadItem)
return downloadItem.Url.Split('/').Last();
}

private string FileNameNew(TumblrPost downloadItem)
{
return (blog.GroupPhotoSets && downloadItem.Index != -1) ? $"{downloadItem.Id}_{downloadItem.Index}_{FileName(downloadItem)}" : null;
}

protected static string FileLocation(string blogDownloadLocation, string fileName)
{
return Path.Combine(blogDownloadLocation, fileName);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ protected override async Task<bool> DownloadBinaryPostAsync(TumblrPost downloadI
foreach (string host in shellService.Settings.TumblrHosts)
{
url = BuildRawImageUrl(url, host);
if (await base.DownloadBinaryPostAsync(new PhotoPost(url, downloadItem.Id, downloadItem.Date)))
if (await base.DownloadBinaryPostAsync(new PhotoPost(url, downloadItem.Id, downloadItem.Index, downloadItem.Date)))
{
return true;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,9 @@ public AppSettings()
[DataMember]
public string LogLevel { get; set; }

[DataMember]
public bool GroupPhotoSets { get; set; }

[DataMember]
public int SettingsTabIndex { get; set; }

Expand Down Expand Up @@ -464,6 +467,7 @@ private void Initialize()
#else
LogLevel = nameof(System.Diagnostics.TraceLevel.Info);
#endif
GroupPhotoSets = false;
ColumnSettings = new Dictionary<object, Tuple<int, double, Visibility>>();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ public interface IManagerService

IEnumerable<IFiles> Databases { get; }

bool CheckIfFileExistsInDB(string url);
bool CheckIfFileExistsInDB(string filename, string filenameNew, string blogName);

void RemoveDatabase(IFiles database);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,13 @@ public ManagerService()

public IEnumerable<IFiles> Databases => databases;

public bool CheckIfFileExistsInDB(string url)
public bool CheckIfFileExistsInDB(string filename, string filenameNew, string blogName)
{
lock (databasesLock)
{
foreach (IFiles db in databases)
{
if (db.CheckIfFileExistsInDB(url)) return true;
if (db.CheckIfFileExistsInDB(filename, filenameNew, db.Name == blogName)) return true;
}
return false;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ public IBlog TransferGlobalSettingsToBlog(IBlog blog)
blog.DumpCrawlerData = shellService.Settings.DumpCrawlerData;
blog.RegExPhotos = shellService.Settings.RegExPhotos;
blog.RegExVideos = shellService.Settings.RegExVideos;
blog.GroupPhotoSets = shellService.Settings.GroupPhotoSets;
return blog;
}
}
Expand Down
Loading

0 comments on commit cac6556

Please sign in to comment.