Skip to content

Commit

Permalink
check-payload-headers: fix comparison order
Browse files Browse the repository at this point in the history
It turns out that the ordering of the archive entries doesn't reliably
match the file entries; RPM basically just unpacks the archive and then
applies the header information to the given path(s).

This fixes check-payload-headers so that it iterates through the payload
entries and compares the metadata from each of those to the
corresponding header metadata by matching the full file paths.

As at the time of this commit I've run this comparison for each file in
the payload of 400,000 RPMs from CentOS 7, RHEL 7, and Fedora 25 through
29, and found no problems/mismatches, so that's good!

(This also confirms my suspicion that the cpio headers are totally
redundant; rpm2cpio could generate cpio headers from the rpm header
metadata with no loss of data.)
  • Loading branch information
wgwoods committed May 2, 2019
1 parent 0390b45 commit 0102a02
Showing 1 changed file with 28 additions and 29 deletions.
57 changes: 28 additions & 29 deletions check-payload-headers.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,32 +25,25 @@ def checkprobs(probspecs):
probs.append(prob)
return probs

def cpio_sorted(hdrinfo):
hardlinks = []
for i in hdrinfo:
# %ghost files don't show up in the payload, obviously
if Attrs.GHOST in i.flags:
continue
# hardlinks get shuffled to the end of the archive
if i.nlink > 1:
hardlinks.append(i)
# spit out everything else in header order
else:
yield i
# spit out hardlinks at the end
while hardlinks:
yield hardlinks.pop(0)

def compare_payload_to_hdr(rpmfn):
r = rpm(rpmfn)
probcount = 0
probs = []
names = []
with r.payload_reader() as payload:
for payi, hdri in zip(payload, cpio_sorted(r.payloadinfo())):
names.append(hdri.name)
p = checkprobs([
('name', '.'+hdri.name, payi.name),

rpmfileinfo = {"."+f.name:f for f in r.payloadinfo()}
if len(rpmfileinfo) != r.nfiles():
probs.append("rpm has multiple files with the same name")

for payi in r.payload_iter():
names.append(payi.name)
p = []
if payi.name not in rpmfileinfo:
p.append("not listed in RPM headers!")
else:
hdri = rpmfileinfo[payi.name]
# TODO: pop this from rpmfileinfo; emit probs at end for orphan hdris
p += checkprobs([
('mtime', hdri.stat.mtime, payi.mtime),
('mode', hdri.stat.mode, payi.mode),
('rdev', hdri.extra.get('rdev',0), makedev(payi.rdevmajor, payi.rdevminor)),
Expand All @@ -63,14 +56,17 @@ def compare_payload_to_hdr(rpmfn):
('ctime', 0, payi.ctime),
('birthtime', 0, payi.birthtime),
])
# CPIO doesn't store size for directories or hardlinks, I guess?
if not (payi.isdir or (hdri.nlink > 1 and not payi.islnk)):
# CPIO doesn't store size for directories and only stores the size
# of one file of a set of hardlinks, so skip the size check if
# this is a directory or if it's a hardlink with no size listed
if not (payi.isdir or (hdri.nlink > 1 and not payi.size)):
p += checkprobs([('size', hdri.stat.size, payi.size)])
# Stash list of problems for this file
probs.append(p)
# Increase counter if we found any problems
if p:
probcount += 1

# Stash list of problems for this file
probs.append(p)
# Increase counter if we found any problems
if p:
probcount += 1

print("{}: {} files checked, {} problems".format(rpmfn, len(names), probcount))
if probcount:
Expand All @@ -79,8 +75,11 @@ def compare_payload_to_hdr(rpmfn):
print(" "+fname+":")
for p in fprobs:
print(" "+p)
return probcount

if __name__ == '__main__':
import sys
nprobs = 0
for fn in sys.argv[1:]:
compare_payload_to_hdr(fn)
nprobs += compare_payload_to_hdr(fn)
raise SystemExit(1 if nprobs else 0)

0 comments on commit 0102a02

Please sign in to comment.