python-openxml · mepwang · Dec 16, 2024 · Dec 27, 2024
diff --git a/requirements.txt b/requirements.txt
@@ -1,2 +1,3 @@
 lxml>=3.1.0
 typing-extensions
+python-magic
diff --git a/src/docx/__init__.py b/src/docx/__init__.py
@@ -13,7 +13,7 @@
 if TYPE_CHECKING:
     from docx.opc.part import Part
 
-__version__ = "1.1.2"
+__version__ = "1.1.2.2"
 
 
 __all__ = ["Document"]

diff --git a/src/docx/opc/pkgreader.py b/src/docx/opc/pkgreader.py
@@ -5,6 +5,7 @@
 from docx.opc.packuri import PACKAGE_URI, PackURI
 from docx.opc.phys_pkg import PhysPkgReader
 from docx.opc.shared import CaseInsensitiveDict
+import magic
 
 
 class PackageReader:
@@ -51,7 +52,10 @@ def _load_serialized_parts(phys_reader, pkg_srels, content_types):
         sparts = []
         part_walker = PackageReader._walk_phys_parts(phys_reader, pkg_srels)
         for partname, blob, reltype, srels in part_walker:
-            content_type = content_types[partname]
+            try:
+                content_type = content_types[partname]
+            except KeyError:
+                content_type = magic.from_buffer(blob, mime=True)
             spart = _SerializedPart(partname, content_type, reltype, blob, srels)
             sparts.append(spart)
         return tuple(sparts)

diff --git a/src/docx/oxml/table.py b/src/docx/oxml/table.py
@@ -82,20 +82,36 @@ def tc_at_grid_offset(self, grid_offset: int) -> CT_Tc:
         Raises ValueError when this `w:tr` contains no `w:tc` with exact starting `grid_offset`.
         """
         # -- account for omitted cells at the start of the row --
-        remaining_offset = grid_offset - self.grid_before
-
+
+        if grid_offset < self.grid_before:
+            raise ValueError(f"no `tc` element at grid_offset={grid_offset}")
+
+        cell_dict = dict()
+        cell_index = 0
         for tc in self.tc_lst:
-            # -- We've gone past grid_offset without finding a tc, no sense searching further. --
-            if remaining_offset < 0:
-                break
-            # -- We've arrived at grid_offset, this is the `w:tc` we're looking for. --
-            if remaining_offset == 0:
-                return tc
-            # -- We're not there yet, skip forward the number of layout-grid cells this cell
-            # -- occupies.
-            remaining_offset -= tc.grid_span
-
-        raise ValueError(f"no `tc` element at grid_offset={grid_offset}")
+            for _ in range(tc.grid_span):
+                cell_dict[cell_index] = tc
+                cell_index += 1
+
+        if grid_offset not in cell_dict:
+            raise ValueError(f"no `tc` element at grid_offset={grid_offset}")
+
+        return cell_dict[grid_offset]
+
+        # remaining_offset = grid_offset - self.grid_before
+
+        # for tc in self.tc_lst:
+        #     # -- We've gone past grid_offset without finding a tc, no sense searching further. --
+        #     if remaining_offset < 0:
+        #         break
+        #     # -- We've arrived at grid_offset, this is the `w:tc` we're looking for. --
+        #     if remaining_offset == 0:
+        #         return tc
+        #     # -- We're not there yet, skip forward the number of layout-grid cells this cell
+        #     # -- occupies.
+        #     remaining_offset -= tc.grid_span
+
+        # raise ValueError(f"no `tc` element at grid_offset={grid_offset}")
 
     @property
     def tr_idx(self) -> int: