Skip to content

Commit

Permalink
Add logs during register_upper
Browse files Browse the repository at this point in the history
To inform whether all available new uppercase entries could be registered or not.
  • Loading branch information
OrianeN committed Oct 7, 2024
1 parent 8b162cb commit 97a25ac
Showing 1 changed file with 8 additions and 2 deletions.
10 changes: 8 additions & 2 deletions pie/data/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,8 +248,14 @@ def register_upper(self):
if self.max_size:
t = len(self.inverse_table)
r = len(self.reserved)
if (self.max_size - t - r) > 0:
new_chars = new_chars[:min(len(new_chars), self.max_size-t-r)]
slots_left = self.max_size - t - r
if slots_left > 0:
new_chars = new_chars[:min(len(new_chars), slots_left)]
if slots_left < len(new_chars):
logger.info(f"Could not register all available uppercase vocab entries "
f"({slots_left} slots < {len(new_chars)} upper chars)")
else:
logger.info(f"All uppercase ({self.name}) vocab registered ({len(new_chars)} new entries)")
else:
return # We have too much in the vocab already

Expand Down

0 comments on commit 97a25ac

Please sign in to comment.