Skip to content

Commit

Permalink
Fix/phy (#35)
Browse files Browse the repository at this point in the history
* fix: use physical_position only when exists

* feat: validate the file when specified to speed up the startup
  • Loading branch information
WinChua authored Jun 16, 2024
1 parent 21df69a commit 9406e6c
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 34 deletions.
1 change: 1 addition & 0 deletions src/pyinnodb/cli/iter_record.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def list_first_page(ctx, pageno):
def search(ctx, primary_key, pageno, hidden_col, with_hist):
''' search the primary-key(int support only now) '''
f = ctx.obj["fn"]
#print("search start cost:", time.time() - ctx.obj["start_time"])
fsp_page: MFspPage = ctx.obj["fsp_page"]
f.seek(fsp_page.sdi_page_no * const.PAGE_SIZE)
sdi_page = MSDIPage.parse_stream(f)
Expand Down
34 changes: 22 additions & 12 deletions src/pyinnodb/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from pyinnodb import const
from io import BytesIO
import sys
import os
import time

import dataclasses

Expand All @@ -16,8 +18,11 @@
@click.option(
"--log-level", type=click.Choice(["DEBUG", "ERROR", "INFO"]), default="ERROR"
)
@click.option(
"--validate-first/--no-validate-first", type=click.BOOL, default=False
)
@click.pass_context
def main(ctx, fn, log_level):
def main(ctx, fn, log_level, validate_first):
'''A ibd file parser for MySQL 8.0 above, help you to know innodb better.
It offer several function bellow:
Expand All @@ -30,6 +35,10 @@ def main(ctx, fn, log_level):
many other function to explore your ibd file
'''
# pid = os.getpid()
# start_time = os.stat(f"/proc/{pid}").st_ctime
# print("cost to startup:", time.time() - start_time)
# ctx.obj["start_time"] = start_time
logging.basicConfig(
format="[%(levelname)s]-[%(filename)s:%(lineno)d] %(message)s", level=log_level
)
Expand All @@ -38,17 +47,18 @@ def main(ctx, fn, log_level):
try:
fsp_page = MFspPage.parse_stream(fn)
ctx.obj["fsp_page"] = fsp_page
for pn in range(fsp_page.fsp_header.highest_page_number):
fn.seek(const.PAGE_SIZE * pn)
page_data = fn.read(const.PAGE_SIZE)
fil = MFil.parse(page_data)
if fil.page_type == const.FIL_PAGE_TYPE_ALLOCATED:
continue
checksum = const.page_checksum_crc32c(page_data)
if checksum != fil.checksum:
print(f"PAGE {pn}'s checksum is invalid, stored[{hex(fil.checksum)}] != calculate[{hex(checksum)}]")
print("use validate to get a more detail output of the validation")
sys.exit(1)
if validate_first:
for pn in range(fsp_page.fsp_header.highest_page_number):
fn.seek(const.PAGE_SIZE * pn)
page_data = fn.read(const.PAGE_SIZE)
fil = MFil.parse(page_data)
if fil.page_type == const.FIL_PAGE_TYPE_ALLOCATED:
continue
checksum = const.page_checksum_crc32c(page_data)
if checksum != fil.checksum:
print(f"PAGE {pn}'s checksum is invalid, stored[{hex(fil.checksum)}] != calculate[{hex(checksum)}]")
print("use validate to get a more detail output of the validation")
sys.exit(1)
except Exception as e:
print(e)
print("the file parse faile")
Expand Down
6 changes: 0 additions & 6 deletions src/pyinnodb/disk_struct/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,12 +121,6 @@ def value_parser(rh: MRecordHeader, f):
cols_disk_layout = [d for d in primary_data_layout_col if d[0].version_valid(data_schema_version)]
logger.debug("primary data layout is %s", ",".join(c[0].name for c in primary_data_layout_col))

if rh.instant == 1:
f.seek(-1, 1)
extra_byte = int.from_bytes(f.read(1), "big")
logger.debug("instant col extra byte is %s, &0x80 is %s, len(cols) is %d", hex(extra_byte), extra_byte & 0x80,
len(cols_disk_layout))
cols_disk_layout = cols_disk_layout[:extra_byte]

if rh.instant == 1:
f.seek(-1, 1)
Expand Down
39 changes: 23 additions & 16 deletions src/pyinnodb/sdi/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -697,23 +697,30 @@ def get_column_schema_version(self, version) -> typing.List[Column]:
cols.append(col)
return cols

@cache
def get_disk_data_layout(self):
c_l = {}
for idx in self.indexes:
if idx.name != "PRIMARY":
continue
for ie in idx.elements:
col = self.columns[ie.column_opx]
prekey_len, ok = col.index_prefix(ie)
if ok:
c_l[ie.column_opx] = prekey_len
else:
c_l[ie.column_opx] = ie.length
data_layout_col = []
for i, c in enumerate(self.columns):
data_layout_col.append((c, c_l.get(i, 4294967295)))
data_layout_col.sort(key = lambda c: c[0].private_data.get("physical_pos", 0))
return data_layout_col
phsical_post_exists = False
for c in self.columns:
if "phsical_pos" in c.private_data:
phsical_post_exists = True
break
if phsical_post_exists:
c_l = {}
for idx in self.indexes:
if idx.name != "PRIMARY":
continue
for ie in idx.elements:
col = self.columns[ie.column_opx]
prekey_len, ok = col.index_prefix(ie)
if ok:
c_l[ie.column_opx] = prekey_len
else:
c_l[ie.column_opx] = ie.length
data_layout_col = []
for i, c in enumerate(self.columns):
data_layout_col.append((c, c_l.get(i, 4294967295)))
data_layout_col.sort(key = lambda c: c[0].private_data.get("physical_pos", 0))
return data_layout_col
data_layout_col = []
for idx in self.indexes:
if (
Expand Down

0 comments on commit 9406e6c

Please sign in to comment.