Skip to content

Commit

Permalink
Minor improvements to type inference and readibility, not enough to w…
Browse files Browse the repository at this point in the history
…arrant a new release
  • Loading branch information
mangiucugna committed Jan 23, 2024
1 parent 6e17875 commit f31be6e
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 22 deletions.
27 changes: 7 additions & 20 deletions src/json_repair/json_repair.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,17 +207,18 @@ def parse_string(self, use_single_quotes=False) -> str:
if fixed_quotes:
if self.context == "object_key" and (char == ":" or char.isspace()):
break
elif self.context == "object_value" and (char == "," or char == "}"):
elif self.context == "object_value" and char in [",", "}"]:
break
self.index += 1
char = self.get_char_at()
# ChatGPT sometimes forget to quote links in markdown like: { "content": "[LINK]("https://google.com")" }
if (
char == string_terminator
and self.get_next_char() != ","
# Next character is not a comma
and self.get_char_at(1) != ","
and (
fix_broken_markdown_link
or (self.get_prev_char(2) + self.get_prev_char()) == "]("
or (self.get_char_at(-2) == "]" and self.get_char_at(-1)) == "("
)
):
fix_broken_markdown_link = not fix_broken_markdown_link
Expand All @@ -239,7 +240,7 @@ def parse_string(self, use_single_quotes=False) -> str:

return self.json_str[start:end]

def parse_number(self) -> Union[float, int]:
def parse_number(self) -> Union[float, int, str]:
# <number> is a valid real number expressed in one of a number of given formats
number_str = ""
number_chars = set("0123456789-.eE")
Expand All @@ -257,7 +258,7 @@ def parse_number(self) -> Union[float, int]:
# This is a string then
return self.parse_string()

def parse_boolean_or_null(self) -> Union[bool, None]:
def parse_boolean_or_null(self) -> Union[bool, str, None]:
# <boolean> is one of the literal strings 'true', 'false', or 'null' (unquoted)
boolean_map = {"true": (True, 4), "false": (False, 5), "null": (None, 4)}
for key, (value, length) in boolean_map.items():
Expand All @@ -272,21 +273,7 @@ def insert_char_at(self, char: str) -> None:
self.json_str = self.json_str[: self.index] + char + self.json_str[self.index :]
self.index += 1

def get_char_at(self) -> Union[str, bool]:
# Why not use something simpler? Because we might be out of bounds and doing this check all the time is annoying
try:
return self.json_str[self.index]
except IndexError:
return False

def get_prev_char(self, count=1):
# Why not use something simpler? Because we might be out of bounds and doing this check all the time is annoying
try:
return self.json_str[self.index - count]
except IndexError:
return False

def get_next_char(self, count=1):
def get_char_at(self, count: int = 0) -> Union[str, bool]:
# Why not use something simpler? Because we might be out of bounds and doing this check all the time is annoying
try:
return self.json_str[self.index + count]
Expand Down
4 changes: 2 additions & 2 deletions tests/test_performance.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ def test_true_true(benchmark):
mean_time = benchmark.stats.get("median")

# Define your time threshold in seconds (100ms in this case)
max_time = 1.1 / 10 ** 6 # 1.1 microsecond
max_time = 1.2 / 10 ** 6 # 1.2 microsecond

# Assert that the average time is below the threshold
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
Expand All @@ -304,7 +304,7 @@ def test_false_true(benchmark):
mean_time = benchmark.stats.get("median")

# Define your time threshold in seconds (ms in this case)
max_time = 1 / 10 ** 3 # 1 millisecond
max_time = 1.2 / 10 ** 3 # 1.2 millisecond

# Assert that the average time is below the threshold
assert mean_time < max_time, f"Benchmark exceeded threshold: {mean_time:.3f}s > {max_time:.3f}s"
Expand Down

0 comments on commit f31be6e

Please sign in to comment.