Skip to content

Commit

Permalink
More use of Inventory
Browse files Browse the repository at this point in the history
  • Loading branch information
zimeon committed Nov 1, 2024
1 parent 6f88b11 commit d2d3961
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 61 deletions.
49 changes: 38 additions & 11 deletions ocfl/inventory.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def __init__(self, data=None, filepath=None):
elif isinstance(data, Inventory):
self.data = copy.deepcopy(data.data)
elif isinstance(data, dict):
self.data = copy.deepcopy(data)
self.data = data
else:
raise InventoryException("Bad data type supplied to Inventory() creator, " + str(type(data)))

Expand Down Expand Up @@ -161,8 +161,8 @@ def content(self):
digests for each file. Essentially an inversion of the manifest.
"""
files = {}
for digest in self.manifest:
for file in self.manifest[digest]:
for digest, files_for_digest in self.manifest.items():
for file in files_for_digest:
files[file] = digest
return files

Expand Down Expand Up @@ -265,24 +265,27 @@ def content_path_for_digest(self, digest):
return paths[0]
return None

def add_version(self, vdir=None):
def add_version(self, vdir=None, metadata=None):
"""Add new version object to the versions block.
Arguments:
vdir: string with the version directory name (e.g. "v1"). If None
then will create the next version in sequence
metadata: dict to initialize version metadata with, else None to
create empty
Returns a Version object to access version properties.
"""
highest_version = 0
for vvdir in self.version_directories:
highest_version = max(highest_version,
parse_version_directory(vvdir))
# FIXME - Need to deal with zero padding
vdir = "v" + str(highest_version + 1)
if vdir is None:
highest_version = 0
for vvdir in self.version_directories:
highest_version = max(highest_version,
parse_version_directory(vvdir))
# FIXME - Need to deal with zero padding
vdir = "v" + str(highest_version + 1)
if "versions" not in self.data:
self.data["versions"] = {}
self.data["versions"][vdir] = {}
self.data["versions"][vdir] = {} if metadata is None else metadata
self.data["head"] = vdir
return self.version(vdir)

Expand Down Expand Up @@ -311,6 +314,14 @@ def as_json(self):
"""Serlialize JSON representation."""
return json.dumps(self.data, sort_keys=True, indent=2)

def write_json(self, fh):
"""Serialise JSON representation to file.
Arguments:
fh - filehandle to write to
"""
json.dump(self.data, fh, sort_keys=True, indent=2)

def init_manifest_and_versions(self):
"""Initialize manifest and versions blocks for building new inventory."""
self.manifest = {}
Expand All @@ -329,6 +340,22 @@ def add_fixity_type(self, digest_algorithm):
self.data["fixity"] = {}
self.data["fixity"][digest_algorithm] = {}

def add_fixity_data(self, digest_algorithm, digest, filepath):
"""Add fixity information for a file.
Arguments:
digest_algorithm: string of the digest algorithm specifying this
fixity type
Assumes that there is already fixity block and within that a block for
the specific digest_algorithm.
"""
fixities = self.fixity[digest_algorithm]
if digest not in fixities:
fixities[digest] = [filepath]
else:
fixities[digest].append(filepath)


class Version():
"""Version class to represent version information in an Inventory.
Expand Down
57 changes: 30 additions & 27 deletions ocfl/object.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,9 +195,9 @@ def add_version(self, *, inventory, src_fs, src_dir, vdir, metadata=None):
"""Add to inventory data for new version based on files in srcdir.
Arguments:
inventory: the inventory up to (vdir-1) which must include blocks
for ['manifest'] and ['versions']. It must also include
a ['fixity'][algorithm] block for every algorithm in self.fixity
inventory: an Invenory object with data up to versio (vdir-1) which
must include blocks for the manifest and versions. It must also
include a fixity block for every algorithm in self.fixity
src_fs: pyfs filesystem where this new version exist
src_dir: the version directory in src_fs that files are being added
from
Expand All @@ -210,7 +210,7 @@ def add_version(self, *, inventory, src_fs, src_dir, vdir, metadata=None):
content for this new version.
"""
state = {} # state for this new version
manifest = inventory['manifest']
manifest = inventory.manifest
digests_in_version = {}
manifest_to_srcfile = {}
# Go through all files to find new files in manifest and state for this version
Expand Down Expand Up @@ -245,17 +245,14 @@ def add_version(self, *, inventory, src_fs, src_dir, vdir, metadata=None):
# Add extra fixity entries if required
if self.fixity is not None:
for fixity_type in self.fixity:
fixities = inventory['fixity'][fixity_type]
for digest, vfilepaths in digests_in_version.items():
for vfilepath in vfilepaths:
fixity_digest = file_digest(manifest_to_srcfile[vfilepath], fixity_type, pyfs=src_fs)
if fixity_digest not in fixities:
fixities[fixity_digest] = [vfilepath]
else:
fixities[fixity_digest].append(vfilepath)
# Set head to this latest version, and add this version to inventory
inventory['head'] = vdir
inventory['versions'][vdir] = metadata.as_dict(state=state)
inventory.add_fixity_data(digest_algorithm=fixity_type,
digest=fixity_digest,
filepath=vfilepath)
# Add this new version to inventory (also updates head)
inventory.add_version(vdir=vdir, metadata=metadata.as_dict(state=state))
return manifest_to_srcfile

def build_inventory(self, src_fs, versions_metadata=None):
Expand All @@ -274,7 +271,7 @@ def build_inventory(self, src_fs, versions_metadata=None):
"""
if versions_metadata is None:
versions_metadata = {}
inventory = self.start_inventory().data
inventory = self.start_inventory()
# Find the versions
versions = {}
for vdir in src_fs.listdir('/'):
Expand All @@ -297,7 +294,7 @@ def build_inventory(self, src_fs, versions_metadata=None):
src_dir=vdir,
vdir=vdir,
metadata=metadata)
yield (vdir, inventory, manifest_to_srcfile)
yield (vdir, inventory.data, manifest_to_srcfile)

def object_declaration_object(self):
"""NAMASTE object declaration Namaste object."""
Expand All @@ -310,21 +307,27 @@ def write_object_declaration(self):
"""
self.object_declaration_object().write(pyfs=self.obj_fs)

def write_inventory_and_sidecar(self, inventory, vdir='', write_inventory=True):
def write_inventory_and_sidecar(self, inventory=None, vdir=''):
"""Write inventory and sidecar to vdir in the current object.
Arguments:
inventory: an Inventory object to write the inventory, else None
if only the sidecar should be written (default)
vdir: string of the directory name within self.obj_fs that the
inventory and sidecar should be written to. Default is ''
Assumes self.obj_fs is open for this object. Will create vdir if that
does not exist. If vdir is not specified then will write to root of
the object.
the object filesystem.
Returns the inventory sidecar filename.
"""
if not self.obj_fs.exists(vdir):
self.obj_fs.makedir(vdir)
invfile = fs.path.join(vdir, INVENTORY_FILENAME)
if write_inventory:
if inventory is not None:
with self.obj_fs.open(invfile, 'w') as fh:
json.dump(inventory, fh, sort_keys=True, indent=2)
inventory.write_json(fh)
digest = file_digest(invfile, self.digest_algorithm, pyfs=self.obj_fs)
sidecar = fs.path.join(vdir, INVENTORY_FILENAME + '.' + self.digest_algorithm)
with self.obj_fs.open(sidecar, 'w') as fh:
Expand All @@ -336,7 +339,7 @@ def write_inventory_sidecar(self):
Returns the inventory sidecar filename.
"""
return self.write_inventory_and_sidecar(None, write_inventory=False)
return self.write_inventory_and_sidecar(inventory=None)

def build(self, srcdir, versions_metadata=None, objdir=None):
"""Build an OCFL object with multiple versions.
Expand Down Expand Up @@ -367,15 +370,15 @@ def build(self, srcdir, versions_metadata=None, objdir=None):
for (vdir, inventory, manifest_to_srcfile) in self.build_inventory(src_fs, versions_metadata):
num_versions += 1
if objdir is not None:
self.write_inventory_and_sidecar(inventory, vdir)
self.write_inventory_and_sidecar(Inventory(inventory), vdir)
# Copy files into this version
for (path, srcfile) in manifest_to_srcfile.items():
self.copy_into_object(src_fs, srcfile, path, create_dirs=True)
# Finally populate the object root
if objdir is not None:
# Write object declaration, inventory and sidecar
self.write_object_declaration()
self.write_inventory_and_sidecar(inventory)
self.write_inventory_and_sidecar(Inventory(inventory))
logging.info("Built object %s at %s with %s versions", self.id, objdir, num_versions)
# Whether object written or not, return the last inventory
return inventory
Expand All @@ -397,16 +400,16 @@ def create(self, srcdir, metadata=None, objdir=None):
self.open_fs(objdir, create=True)
inventory = self.start_inventory()
vdir = 'v1'
manifest_to_srcfile = self.add_version(inventory=inventory.data, src_fs=src_fs,
manifest_to_srcfile = self.add_version(inventory=inventory, src_fs=src_fs,
src_dir='', vdir=vdir,
metadata=metadata)
if objdir is None:
return inventory.data
# Write out v1 object
self.write_inventory_and_sidecar(inventory.data, vdir)
self.write_inventory_and_sidecar(inventory, vdir)
# Write object root with object declaration, inventory and sidecar
self.write_object_declaration()
self.write_inventory_and_sidecar(inventory.data)
self.write_inventory_and_sidecar(inventory)
# Write version files
for path in inventory.content_paths:
srcfile = manifest_to_srcfile[path]
Expand Down Expand Up @@ -501,15 +504,15 @@ def update(self, objdir, srcdir=None, metadata=None):
inventory['versions'][head] = metadata.as_dict(state=state)
else:
src_fs = open_fs(srcdir)
manifest_to_srcfile = self.add_version(inventory=inventory, src_fs=src_fs,
manifest_to_srcfile = self.add_version(inventory=Inventory(inventory), src_fs=src_fs,
src_dir='', vdir=head,
metadata=metadata)
# Copy files into this version
for (path, srcfile) in manifest_to_srcfile.items():
self.copy_into_object(src_fs, srcfile, path, create_dirs=True)
# Write inventory in both root and head version
self.write_inventory_and_sidecar(inventory, head)
self.write_inventory_and_sidecar(inventory)
self.write_inventory_and_sidecar(Inventory(inventory), head)
self.write_inventory_and_sidecar(Inventory(inventory))
# Delete old root inventory sidecar if we changed digest algorithm
if digest_algorithm != old_digest_algorithm:
self.obj_fs.remove(INVENTORY_FILENAME + '.' + old_digest_algorithm)
Expand Down
46 changes: 23 additions & 23 deletions tests/test_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import fs
import fs.tempfs

from ocfl.inventory import Inventory
from ocfl.object import Object, ObjectException
from ocfl.version_metadata import VersionMetadata

Expand Down Expand Up @@ -69,42 +70,41 @@ def test05_add_version(self):
"""Test add_version method."""
self.maxDiff = None
oo = Object(digest_algorithm="md5")
inventory = {'manifest': {}, 'versions': {}}
inventory = Inventory({'manifest': {}, 'versions': {}})
with open('fixtures/1.0/content/spec-ex-full/v1_inventory.json', 'r', encoding="utf-8") as fh:
v_inventory = json.load(fh)
metadata = VersionMetadata(inventory=v_inventory, version='v1')
src_fs = fs.open_fs('fixtures/1.0/content/spec-ex-full')
oo.add_version(inventory=inventory, src_fs=src_fs,
src_dir='v1', vdir='v1', metadata=metadata)
self.assertEqual(inventory['head'], 'v1')
self.assertEqual(inventory['manifest'],
self.assertEqual(inventory.head, 'v1')
self.assertEqual(inventory.manifest,
{'184f84e28cbe75e050e9c25ea7f2e939': ['v1/content/foo/bar.xml'],
'c289c8ccd4bab6e385f5afdd89b5bda2': ['v1/content/image.tiff'],
'd41d8cd98f00b204e9800998ecf8427e': ['v1/content/empty.txt']})
self.assertEqual(inventory['versions'],
{"v1":
{'created': '2018-01-01T01:01:01Z',
'message': 'Initial import',
'state': {
'184f84e28cbe75e050e9c25ea7f2e939': ['foo/bar.xml'],
'c289c8ccd4bab6e385f5afdd89b5bda2': ['image.tiff'],
'd41d8cd98f00b204e9800998ecf8427e': ['empty.txt']},
'user': {'address': '[email protected]', 'name': 'Alice'}}})
self.assertNotIn('fixity', inventory)
self.assertEqual(inventory.versiondata("v1"),
{'created': '2018-01-01T01:01:01Z',
'message': 'Initial import',
'state': {
'184f84e28cbe75e050e9c25ea7f2e939': ['foo/bar.xml'],
'c289c8ccd4bab6e385f5afdd89b5bda2': ['image.tiff'],
'd41d8cd98f00b204e9800998ecf8427e': ['empty.txt']},
'user': {'address': '[email protected]', 'name': 'Alice'}})
self.assertNotIn('fixity', inventory.data)
# Now add second version to check forward delta
with open('fixtures/1.0/content/spec-ex-full/v2_inventory.json', 'r', encoding="utf-8") as fh:
v_inventory = json.load(fh)
metadata = VersionMetadata(inventory=v_inventory, version='v2')
src_fs = fs.open_fs('fixtures/1.0/content/spec-ex-full/v2')
oo.add_version(inventory=inventory, src_fs=src_fs,
src_dir='', vdir='v2', metadata=metadata)
self.assertEqual(inventory['head'], 'v2')
self.assertEqual(inventory['manifest'],
self.assertEqual(inventory.head, 'v2')
self.assertEqual(inventory.manifest,
{'184f84e28cbe75e050e9c25ea7f2e939': ['v1/content/foo/bar.xml'],
'2673a7b11a70bc7ff960ad8127b4adeb': ['v2/content/foo/bar.xml'],
'c289c8ccd4bab6e385f5afdd89b5bda2': ['v1/content/image.tiff'],
'd41d8cd98f00b204e9800998ecf8427e': ['v1/content/empty.txt']})
self.assertEqual(inventory['versions']['v2'],
self.assertEqual(inventory.versiondata('v2'),
{'created': '2018-02-02T02:02:02Z',
'message': 'Fix bar.xml, remove image.tiff, add empty2.txt',
'state': {
Expand All @@ -113,7 +113,7 @@ def test05_add_version(self):
'user': {'address': '[email protected]', 'name': 'Bob'}})
# Now with fixity
oo = Object(digest_algorithm="md5", fixity=['sha1'])
inventory = {'manifest': {}, 'versions': {}, 'fixity': {'sha1': {}}}
inventory = Inventory({'manifest': {}, 'versions': {}, 'fixity': {'sha1': {}}})
md1 = VersionMetadata()
with open('fixtures/1.0/content/spec-ex-full/v1_inventory.json', 'r', encoding="utf-8") as fh:
v_inventory = json.load(fh)
Expand All @@ -126,10 +126,10 @@ def test05_add_version(self):
'v1/content/empty.txt': 'empty.txt',
'v1/content/foo/bar.xml': 'foo/bar.xml'
})
self.assertEqual(len(inventory['fixity']['sha1']), 3)
self.assertEqual(len(inventory.fixity['sha1']), 3)
# Test dedupe=False and forward_delta=False settings
oo = Object(dedupe=False, forward_delta=False, fixity=['md5'])
inventory = {'manifest': {}, 'versions': {}, 'fixity': {'md5': {}}}
inventory = Inventory({'manifest': {}, 'versions': {}, 'fixity': {'md5': {}}})
md1 = VersionMetadata(inventory={
"id": "http://example.org/dedupe_content",
"versions": {
Expand All @@ -150,7 +150,7 @@ def test05_add_version(self):
'v1/content/empty1.txt': 'v1/empty1.txt',
'v1/content/empty2.txt': 'v1/empty2.txt',
'v1/content/empty3.txt': 'v1/empty3.txt'})
self.assertEqual(inventory['fixity']['md5'], {"d41d8cd98f00b204e9800998ecf8427e": [
self.assertEqual(inventory.fixity['md5'], {"d41d8cd98f00b204e9800998ecf8427e": [
"v1/content/empty1.txt", "v1/content/empty2.txt", "v1/content/empty3.txt"]})
# Add a second version which will test for forward_delta=False
md2 = VersionMetadata(inventory={
Expand All @@ -170,7 +170,7 @@ def test05_add_version(self):
# Because of forward_delta=False we will have an additional copy of the empty file
self.assertEqual(manifest_to_srcfile, {
'v2/content/empty4.txt': 'v2/empty4.txt'})
self.assertEqual(inventory['fixity']['md5'], {"d41d8cd98f00b204e9800998ecf8427e": [
self.assertEqual(inventory.fixity['md5'], {"d41d8cd98f00b204e9800998ecf8427e": [
"v1/content/empty1.txt", "v1/content/empty2.txt",
"v1/content/empty3.txt", "v2/content/empty4.txt"]})

Expand Down Expand Up @@ -201,7 +201,7 @@ def test08_write_inventory_and_sidecar(self):
"""Test write_object_and_sidecar."""
tmpfs = fs.tempfs.TempFS(identifier='test_write_inventory_and_sidecar')
oo = Object(obj_fs=tmpfs)
oo.write_inventory_and_sidecar({'abc': 'def'})
oo.write_inventory_and_sidecar(Inventory({'abc': 'def'}))
self.assertEqual(set(tmpfs.listdir('')),
set(['inventory.json', 'inventory.json.sha512']))
with tmpfs.open('inventory.json') as fh:
Expand All @@ -212,7 +212,7 @@ def test08_write_inventory_and_sidecar(self):
# and now making directory
oo = Object(obj_fs=tmpfs)
invdir = 'xxx'
oo.write_inventory_and_sidecar({'gh': 'ik'}, invdir)
oo.write_inventory_and_sidecar(Inventory({'gh': 'ik'}), invdir)
self.assertEqual(set(tmpfs.listdir(invdir)),
set(['inventory.json', 'inventory.json.sha512']))
with tmpfs.open(fs.path.join(invdir, 'inventory.json')) as fh:
Expand Down

0 comments on commit d2d3961

Please sign in to comment.