Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Include subsegment information when aggregating split statistics #433

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# splat Release Notes

### 0.32.1
* Include subsegment information when aggregating split statistics.

### 0.32.0

* Tag `PSYQ` as a compiler that uses `j` instructions as branches.
Expand Down
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -46,5 +46,8 @@ build-backend = "hatchling.build"
[tool.hatch.build.targets.wheel]
packages = ["src/splat"]

[tool.hatch.envs.dev]
features = ["dev"]

[project.scripts]
splat = "splat.__main__:splat_main"
12 changes: 6 additions & 6 deletions src/splat/scripts/split.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,11 +271,9 @@ def do_scan(
for segment in scan_bar:
assert isinstance(segment, Segment)
scan_bar.set_description(f"Scanning {brief_seg_name(segment, 20)}")
typ = segment.type
if segment.type == "bin" and segment.is_name_default():
typ = "unk"

stats.add_size(typ, segment.size)
for ty, sub_stats in segment.statistics.items():
stats.add_size(ty, sub_stats.size)

if segment.should_scan():
# Check cache but don't write anything
Expand All @@ -287,7 +285,8 @@ def do_scan(

processed_segments.append(segment)

stats.count_split(typ)
for ty, sub_stats in segment.statistics.items():
stats.count_split(ty, sub_stats.count)

symbols.mark_c_funcs_as_defined()
return processed_segments
Expand All @@ -305,7 +304,8 @@ def do_split(
split_bar.set_description(f"Splitting {brief_seg_name(segment, 20)}")

if cache.check_cache_hit(segment, True):
stats.count_cached(segment.type)
for ty, sub_stats in segment.statistics.items():
stats.count_cached(ty, sub_stats.count)
continue

if segment.should_split():
Expand Down
9 changes: 8 additions & 1 deletion src/splat/segtypes/common/bin.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from ...util import log, options

from .segment import CommonSegment
from .segment import CommonSegment, SegmentType


class CommonSegBin(CommonSegment):
Expand Down Expand Up @@ -33,3 +33,10 @@ def split(self, rom_bytes):

f.write(rom_bytes[self.rom_start : self.rom_end])
self.log(f"Wrote {self.name} to {path}")

@property
def statistics_type(self) -> SegmentType:
stats_type = self.type
if self.is_name_default():
stats_type = "unk"
Comment on lines +40 to +41
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The old code was checking for bin types too.
I'm not sure if we should keep that check tho, naming the section itself could be considered as it being known, idk.
What do you think @ethteck ?

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah I think considering it known if it's named is totally fine, personally

return stats_type
10 changes: 9 additions & 1 deletion src/splat/segtypes/common/group.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from ...util import log

from .segment import CommonSegment
from ..segment import Segment
from ..segment import empty_statistics, Segment, SegmentStatistics


class CommonSegGroup(CommonSegment):
Expand Down Expand Up @@ -124,6 +124,14 @@ def needs_symbols(self) -> bool:
return True
return False

@property
def statistics(self) -> SegmentStatistics:
stats = empty_statistics()
for sub in self.subsegments:
for ty, info in sub.statistics.items():
stats[ty] = stats[ty].merge(info)
return stats

def get_linker_entries(self):
return [entry for sub in self.subsegments for entry in sub.get_linker_entries()]

Expand Down
2 changes: 1 addition & 1 deletion src/splat/segtypes/common/segment.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from ...segtypes.segment import Segment
from ...segtypes.segment import Segment, SegmentType

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason for this change?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a type alias to make the types for the code I introduced more self-documenting — certainly not necessary

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ahh I hadn't understood the full context of it. thank you!


class CommonSegment(Segment):
Expand Down
34 changes: 34 additions & 0 deletions src/splat/segtypes/segment.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import collections
import dataclasses
import importlib
import importlib.util
from pathlib import Path
Expand Down Expand Up @@ -67,6 +69,27 @@ def parse_segment_section_order(segment: Union[dict, list]) -> List[str]:
return default


SegmentType = str


@dataclasses.dataclass
class SegmentStatisticsInfo:
size: int
count: int

def merge(self, other: "SegmentStatisticsInfo") -> "SegmentStatisticsInfo":
return SegmentStatisticsInfo(
size=self.size + other.size, count=self.count + other.count
)


SegmentStatistics = dict[SegmentType, SegmentStatisticsInfo]


def empty_statistics() -> SegmentStatistics:
return collections.defaultdict(lambda: SegmentStatisticsInfo(size=0, count=0))


class Segment:
require_unique_name = True

Expand Down Expand Up @@ -520,6 +543,17 @@ def size(self) -> Optional[int]:
else:
return None

@property
def statistics(self) -> SegmentStatistics:
stats = empty_statistics()
if self.size is not None:
stats[self.statistics_type] = SegmentStatisticsInfo(size=self.size, count=1)
return stats

@property
def statistics_type(self) -> SegmentType:
return self.type

@property
def vram_end(self) -> Optional[int]:
if self.vram_start is not None and self.size is not None:
Expand Down
8 changes: 4 additions & 4 deletions src/splat/util/statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,15 @@ def add_size(self, typ: str, size: Optional[int]):
self.seg_sizes[typ] = 0
self.seg_sizes[typ] += 0 if size is None else size

def count_split(self, typ: str):
def count_split(self, typ: str, count: int = 1):
if typ not in self.seg_split:
self.seg_split[typ] = 0
self.seg_split[typ] += 1
self.seg_split[typ] += count

def count_cached(self, typ: str):
def count_cached(self, typ: str, count: int = 1):
if typ not in self.seg_cached:
self.seg_cached[typ] = 0
self.seg_cached[typ] += 1
self.seg_cached[typ] += count

def print_statistics(self, total_size: int):
unk_size = self.seg_sizes.get("unk", 0)
Expand Down
Loading