Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow custom file extensions for directory-based journal #1789

Draft
wants to merge 2 commits into
base: develop
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 17 additions & 9 deletions jrnl/journals/FolderJournal.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import codecs
import os
import pathlib
import re
from typing import TYPE_CHECKING

from jrnl import time
Expand All @@ -17,7 +18,6 @@
DIGIT_PATTERN = "[0123456789]"
YEAR_PATTERN = DIGIT_PATTERN * 4
MONTH_PATTERN = "[01]" + DIGIT_PATTERN
DAY_PATTERN = "[0123]" + DIGIT_PATTERN + ".txt"


class Folder(Journal):
Expand All @@ -34,7 +34,7 @@ def open(self) -> "Folder":
self.entries = []

if os.path.exists(self.config["journal"]):
filenames = Folder._get_files(self.config["journal"])
filenames = Folder._get_files(self, self.config["journal"])
for filename in filenames:
with codecs.open(filename, "r", "utf-8") as f:
journal = f.read()
Expand All @@ -45,6 +45,8 @@ def open(self) -> "Folder":

def write(self) -> None:
"""Writes only the entries that have been modified into proper files."""
if self.config["extension"] is not None:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This line is one of the reasons the tests are failing -- the "extension" key doesn't always exist in every config, so there's a KeyError when trying to retrieve it. Checking self.config.get("extension") instead of self.config["extension"] should do the trick.

EXTENSION = self.config["extension"]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A little style note - could you keep this extension variable lower case? Upper case is meant for constants.

# Create a list of dates of modified entries. Start with diff_entry_dates
modified_dates = self._diff_entry_dates
seen_dates = set(self._diff_entry_dates)
Expand All @@ -63,7 +65,7 @@ def write(self) -> None:
self.config["journal"],
d.strftime("%Y"),
d.strftime("%m"),
d.strftime("%d") + ".txt",
d.strftime("%d") + "." + EXTENSION,
)
dirname = os.path.dirname(filename)
# create directory if it doesn't exist
Expand All @@ -81,7 +83,7 @@ def write(self) -> None:
journal_file.write(journal)
# look for and delete empty files
filenames = []
filenames = Folder._get_files(self.config["journal"])
filenames = Folder._get_files(self, self.config["journal"])
for filename in filenames:
if os.stat(filename).st_size <= 0:
os.remove(filename)
Expand Down Expand Up @@ -121,12 +123,14 @@ def parse_editable_str(self, edited: str) -> None:
self.entries = mod_entries

@staticmethod
def _get_files(journal_path: str) -> list[str]:
def _get_files(self, journal_path: str) -> list[str]:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since this is a static method, it shouldn't take self. Instead, I think it would be better to pass the extension as a argument with a default value of None, so the _get_files method doesn't have to be "aware" of how the config works.

"""Searches through sub directories starting with journal_path and find all text
files that look like entries"""
if self.config["extension"] is not None:
EXTENSION = self.config["extension"]
for year_folder in Folder._get_year_folders(pathlib.Path(journal_path)):
for month_folder in Folder._get_month_folders(year_folder):
yield from Folder._get_day_files(month_folder)
yield from Folder._get_day_files(self, EXTENSION, month_folder)

@staticmethod
def _get_year_folders(path: pathlib.Path) -> list[pathlib.Path]:
Expand All @@ -143,10 +147,14 @@ def _get_month_folders(path: pathlib.Path) -> list[pathlib.Path]:
return

@staticmethod
def _get_day_files(path: pathlib.Path) -> list[str]:
for child in path.glob(DAY_PATTERN):
def _get_day_files(self, extension, path: pathlib.Path) -> list[str]:
EXTENSION = extension
DAY_PATTERN = "[0-3][0-9]." + EXTENSION
for child in path.iterdir():
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would prefer to keep the glob method rather than using iterdir and regex, for performance reasons. We've seen issues in the past where unpredictable things can happen for users when pointing jrnl to a large directory, and iterdir has been the culprit since it iterates through all files instead of just the expected files.

We can still use the glob approach here by adding the configured extension the day pattern at the start of this method, though.

match = re.fullmatch(DAY_PATTERN, str(child.name))
if (
int(child.stem) > 0
match is not None
and int(child.stem) > 0
and int(child.stem) <= 31
and time.is_valid_date(
year=int(path.parent.name),
Expand Down