diff --git a/pie/data/dataset.py b/pie/data/dataset.py index dee3b41..dc7b35b 100644 --- a/pie/data/dataset.py +++ b/pie/data/dataset.py @@ -248,8 +248,14 @@ def register_upper(self): if self.max_size: t = len(self.inverse_table) r = len(self.reserved) - if (self.max_size - t - r) > 0: - new_chars = new_chars[:min(len(new_chars), self.max_size-t-r)] + slots_left = self.max_size - t - r + if slots_left > 0: + new_chars = new_chars[:min(len(new_chars), slots_left)] + if slots_left < len(new_chars): + logger.info(f"Could not register all available uppercase vocab entries " + f"({slots_left} slots < {len(new_chars)} upper chars)") + else: + logger.info(f"All uppercase ({self.name}) vocab registered ({len(new_chars)} new entries)") else: return # We have too much in the vocab already