Fix/phy (#35)

* fix: use physical_position only when exists * feat: validate the file when specified to speed up the startup
WinChua · Jun 16, 2024 · 9406e6c · 9406e6c
1 parent 21df69a
commit 9406e6c
Show file tree

Hide file tree

Showing 4 changed files with 46 additions and 34 deletions.
diff --git a/src/pyinnodb/cli/iter_record.py b/src/pyinnodb/cli/iter_record.py
@@ -33,6 +33,7 @@ def list_first_page(ctx, pageno):
 def search(ctx, primary_key, pageno, hidden_col, with_hist):
     ''' search the primary-key(int support only now) '''
     f = ctx.obj["fn"]
+    #print("search start cost:", time.time() - ctx.obj["start_time"])
     fsp_page: MFspPage = ctx.obj["fsp_page"]
     f.seek(fsp_page.sdi_page_no * const.PAGE_SIZE)
     sdi_page = MSDIPage.parse_stream(f)

diff --git a/src/pyinnodb/cli/main.py b/src/pyinnodb/cli/main.py
@@ -5,6 +5,8 @@
 from pyinnodb import const
 from io import BytesIO
 import sys
+import os
+import time
 
 import dataclasses
 
@@ -16,8 +18,11 @@
 @click.option(
     "--log-level", type=click.Choice(["DEBUG", "ERROR", "INFO"]), default="ERROR"
 )
+@click.option(
+    "--validate-first/--no-validate-first", type=click.BOOL, default=False
+)
 @click.pass_context
-def main(ctx, fn, log_level):
+def main(ctx, fn, log_level, validate_first):
     '''A ibd file parser for MySQL 8.0 above, help you to know innodb better.
 
     It offer several function bellow:
@@ -30,6 +35,10 @@ def main(ctx, fn, log_level):
     many other function to explore your ibd file
 
     '''
+    # pid = os.getpid()
+    # start_time = os.stat(f"/proc/{pid}").st_ctime
+    # print("cost to startup:", time.time() - start_time)
+    # ctx.obj["start_time"] = start_time
     logging.basicConfig(
         format="[%(levelname)s]-[%(filename)s:%(lineno)d] %(message)s", level=log_level
     )
@@ -38,17 +47,18 @@ def main(ctx, fn, log_level):
     try:
         fsp_page = MFspPage.parse_stream(fn)
         ctx.obj["fsp_page"] = fsp_page
-        for pn in range(fsp_page.fsp_header.highest_page_number):
-            fn.seek(const.PAGE_SIZE * pn)
-            page_data = fn.read(const.PAGE_SIZE)
-            fil = MFil.parse(page_data)
-            if fil.page_type == const.FIL_PAGE_TYPE_ALLOCATED:
-                continue
-            checksum = const.page_checksum_crc32c(page_data)
-            if checksum != fil.checksum:
-                print(f"PAGE {pn}'s checksum is invalid, stored[{hex(fil.checksum)}] != calculate[{hex(checksum)}]")
-                print("use validate to get a more detail output of the validation")
-                sys.exit(1)
+        if validate_first:
+            for pn in range(fsp_page.fsp_header.highest_page_number):
+                fn.seek(const.PAGE_SIZE * pn)
+                page_data = fn.read(const.PAGE_SIZE)
+                fil = MFil.parse(page_data)
+                if fil.page_type == const.FIL_PAGE_TYPE_ALLOCATED:
+                    continue
+                checksum = const.page_checksum_crc32c(page_data)
+                if checksum != fil.checksum:
+                    print(f"PAGE {pn}'s checksum is invalid, stored[{hex(fil.checksum)}] != calculate[{hex(checksum)}]")
+                    print("use validate to get a more detail output of the validation")
+                    sys.exit(1)
     except Exception as e:
         print(e)
         print("the file parse faile")

diff --git a/src/pyinnodb/disk_struct/index.py b/src/pyinnodb/disk_struct/index.py
@@ -121,12 +121,6 @@ def value_parser(rh: MRecordHeader, f):
             cols_disk_layout = [d for d in primary_data_layout_col if d[0].version_valid(data_schema_version)]
             logger.debug("primary data layout is %s", ",".join(c[0].name for c in primary_data_layout_col))
 
-            if rh.instant == 1:
-                f.seek(-1, 1)
-                extra_byte = int.from_bytes(f.read(1), "big")
-                logger.debug("instant col extra byte is %s, &0x80 is %s, len(cols) is %d", hex(extra_byte), extra_byte & 0x80, 
-                        len(cols_disk_layout))
-                cols_disk_layout = cols_disk_layout[:extra_byte]
 
             if rh.instant == 1:
                 f.seek(-1, 1)

diff --git a/src/pyinnodb/sdi/table.py b/src/pyinnodb/sdi/table.py
@@ -697,23 +697,30 @@ def get_column_schema_version(self, version) -> typing.List[Column]:
             cols.append(col)
         return cols
 
+    @cache
     def get_disk_data_layout(self):
-        c_l = {}
-        for idx in self.indexes:
-            if idx.name != "PRIMARY":
-                continue
-            for ie in idx.elements:
-                col = self.columns[ie.column_opx]
-                prekey_len, ok = col.index_prefix(ie)
-                if ok:
-                    c_l[ie.column_opx] = prekey_len
-                else:
-                    c_l[ie.column_opx] = ie.length
-        data_layout_col = []
-        for i, c in enumerate(self.columns):
-            data_layout_col.append((c, c_l.get(i, 4294967295)))
-        data_layout_col.sort(key = lambda c: c[0].private_data.get("physical_pos", 0))
-        return data_layout_col
+        phsical_post_exists = False
+        for c in self.columns:
+            if "phsical_pos" in c.private_data:
+                phsical_post_exists = True
+                break
+        if phsical_post_exists:
+            c_l = {}
+            for idx in self.indexes:
+                if idx.name != "PRIMARY":
+                    continue
+                for ie in idx.elements:
+                    col = self.columns[ie.column_opx]
+                    prekey_len, ok = col.index_prefix(ie)
+                    if ok:
+                        c_l[ie.column_opx] = prekey_len
+                    else:
+                        c_l[ie.column_opx] = ie.length
+            data_layout_col = []
+            for i, c in enumerate(self.columns):
+                data_layout_col.append((c, c_l.get(i, 4294967295)))
+            data_layout_col.sort(key = lambda c: c[0].private_data.get("physical_pos", 0))
+            return data_layout_col
         data_layout_col = []
         for idx in self.indexes:
             if (