Skip to content

Commit

Permalink
Basic reporting
Browse files Browse the repository at this point in the history
Example output: https://dpaste.com/FWAYRXSVM

Several TODO's left, but perhaps functional enough to merge?

Towards #9
  • Loading branch information
raboof committed Mar 16, 2024
1 parent 38f35d3 commit a2b1e9c
Show file tree
Hide file tree
Showing 9 changed files with 256 additions and 6 deletions.
48 changes: 48 additions & 0 deletions ReadMe.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,51 @@ Run the server with `uvicorn web:app --reload`
};
```

### Reporting

At the time of writing only reports on run-time closures are supported.
Reporting is experimental and still expected to evolve, change, and
grow support for build-time closures as well.

#### Defining a report

You define a report by uploading a JSON CycloneDX SBOM as produced by
[nix-runtime-tree-to-sbom](https://codeberg.org/raboof/nix-runtime-tree-to-sbom):

```
$ nix-store -q --tree $(nix-build '<nixpkgs/nixos/release-combined.nix>' -A nixos.iso_gnome.x86_64-linux) > tree.txt
$ cat tree.txt | ~/dev/nix-runtime-tree-to-sbom/tree-to-cyclonedx.py > sbom.cdx.json
$ export HASH_COLLECTION_TOKEN=XYX # your token
$ curl -X PUT --data @sbom.cdx.json "http://localhost:8000/reports/gnome-iso-runtime" -H "Content-Type: application/json" -H "Authorization: Bearer $HASH_COLLECTION_TOKEN"
```

#### Populating the report

If you want to populate the report with hashes from different builders (e.g. from
cache.nixos.org and from your own rebuilds), use separate tokens for the different
sources.

##### With hashes from cache.nixos.org

```
$ nix shell .#utils
$ export HASH_COLLECTION_TOKEN=XYX # your token for the cache.nixos.org import
$ ./fetch-from-cache.sh
```

This script is still very much WIP, and will enter an infinite loop retrying failed fetches.

##### By rebuilding

Make sure you have the post-build hook and diff hook configured as documented above.

TODO you have to make sure all derivations are available for building on your system -
is there a smart way to do that?

```
$ export HASH_COLLECTION_TOKEN=XYX # your token for the cache.nixos.org import
$ ./rebuilder.sh
```

This script is still very much WIP, and will enter an infinite loop retrying failed fetches.
You can run multiple rebuilders in parallel.
19 changes: 19 additions & 0 deletions fetch-from-cache.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/usr/bin/env bash

REPORT=$1
export HASH_COLLECTION_SERVER=http://localhost:8000

if [ "x" == "x$REPORT" ]; then
echo "Usage: $0 <report-name>"
exit 1
fi

while true; do
curl -H "Authorization: Bearer $HASH_COLLECTION_TOKEN" $HASH_COLLECTION_SERVER/reports/$REPORT/suggested | jq .[] | head -50 | tr -d \" | while read out
do
echo $out
# TODO some/most of these can probably also be taken found in the
# local cache (with a cache.nixos.org signature), so perhaps take them from there?
copy-from-cache $out
done
done
6 changes: 3 additions & 3 deletions flake.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions rebuilder.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/usr/bin/env bash

REPORT=$1

if [ "x" == "x$REPORT" ]; then
echo "Usage: $0 <report-name>"
exit 1
fi

while true; do
curl -H "Authorization: Bearer $HASH_COLLECTION_TOKEN" http://localhost:8000/reports/$REPORT/suggested | jq .[] | head | tr -d \" | while read out
do
(nix derivation show $out || exit 1) | jq keys.[] | tr -d \" | while read drv
do
# TODO select the right output to rebuild?
nix-build $drv --check
done
done
done
5 changes: 5 additions & 0 deletions utils/src/bin/copy-from-cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ async fn fetch<'a>(out_path: &'a str) -> (String, OutputAttestation<'a>) {
.text()
.await.expect("Fetching the response body");

// TODO handle 404

// TODO Deriver is not populated for static inputs, and may be super useful:
// the same output may have multiple derivers even for non-FOD derivations.
// Should we make it optional in the data model / API as well?
let deriver = Regex::new(r"(?m)Deriver: (.*).drv").unwrap()
.captures(&response).unwrap().get(1).unwrap().as_str().to_owned();
let nar_hash = Regex::new(r"(?m)NarHash: (.*)").unwrap()
Expand Down
92 changes: 91 additions & 1 deletion web/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import json
import random
import typing as t
from fastapi import Depends, FastAPI, HTTPException
from fastapi import Depends, FastAPI, HTTPException, Response
from fastapi.security.http import HTTPAuthorizationCredentials, HTTPBearer
from fastapi.middleware.cors import CORSMiddleware
from sqlalchemy.orm import Session
Expand Down Expand Up @@ -77,6 +79,23 @@ def get_drv(drv_hash: str,
def get_drv_recap(drv_hash: str, db: Session = Depends(get_db)) -> schemas.DerivationAttestation:
return get_drv_recap_or_404(db, drv_hash)

# Suggested rebuilds
@app.get("/reports/{name}/suggested")
def derivations_suggested_for_rebuilding(
name: str,
token: str = Depends(get_token),
db: Session = Depends(get_db),
):
report = crud.report(db, name)
if report == None:
raise HTTPException(status_code=404, detail="Report not found")
paths = report_out_paths(report)

user = crud.get_user_with_token(db, token)
suggestions = crud.suggest(db, paths, user)
random.shuffle(suggestions)
return suggestions[:50]

@app.post("/attestation/{drv_hash}")
def record_attestation(
drv_hash: str,
Expand All @@ -93,4 +112,75 @@ def record_attestation(
"Attestation accepted"
}

@app.get("/attestations/by-output/{output_path}")
def attestations_by_out(output_path: str, db: Session = Depends(get_db)):
return db.query(models.Attestation).filter_by(output_path="/nix/store/"+output_path).all()

def report_out_paths(report):
paths = []
for component in report['components']:
for prop in component['properties']:
if prop['name'] == "nix:out_path":
paths.append(prop['value'])
return paths

@app.get("/reports")
def reports(db: Session = Depends(get_db)):
reports = db.query(models.Report).all()
names = []
for report in reports:
names.append(report.name)
return names

def printtree(root, deps, results, cur_indent=0, seen=None):
if seen is None:
seen = {}
if root in seen:
return " " * cur_indent + "..."
seen[root] = True;

result = " " * cur_indent + root[11:];
if root in results:
result = result + " " + results[root] + "\n"
else:
result = result + "\n"
for dep in deps:
if dep['ref'] == root and 'dependsOn' in dep:
for d in dep['dependsOn']:
result += printtree(d, deps, results, cur_indent+2)
#result = result + "\n " + d
return result

@app.get("/reports/{name}")
def report(
name: str,
db: Session = Depends(get_db),
):
report = crud.report(db, name)
if report == None:
raise HTTPException(status_code=404, detail="Report not found")

paths = report_out_paths(report)

root = report['metadata']['component']['bom-ref']
results = crud.path_summaries(db, paths)

# TODO return as something more suitable for displaying ;)
return Response(
content=printtree(root, report['dependencies'], results),
media_type='text/plain')

@app.put("/reports/{name}")
def define_report(
name: str,
definition: schemas.ReportDefinition,
token: str = Depends(get_token),
db: Session = Depends(get_db),
):
user = crud.get_user_with_token(db, token)
if user == None:
raise HTTPException(status_code=401, detail="User not found")
crud.define_report(db, name, definition.root)
return {
"Report defined"
}
62 changes: 61 additions & 1 deletion web/crud.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from sqlalchemy import values
import json

from sqlalchemy import distinct, func, select, values
from sqlalchemy.dialects.sqlite import insert
from sqlalchemy.orm import Session
from sqlalchemy.sql.functions import user
Expand All @@ -25,8 +27,66 @@ def create_attestation(db: Session, drv_hash: str, output_hash_map: list[schemas
))
db.commit()

def report(db: Session, name: str):
r = db.query(models.Report).filter_by(name=name).one_or_none()
if r == None:
return None
return json.loads(r.definition)

def suggest(db: Session, paths, user_id):
# Derivations in the database might not match derivations on the rebuilder system.
# TODO: can this happen only for FODs or also for other derivations?
# TODO: Add enough metadata to the report so you know what to nix-instantiate to get all relevant drvs
# TODO: don't suggest nodes that have already been rebuilt by the current user
#stmt = select(models.Derivation.drv_hash, models.Attestation.output_path).join(models.Attestation).where(models.Attestation.output_path.in_(paths)).group_by(models.Attestation.output_path).having(func.count(models.Attestation.id) < 2)
#suggestions = []
#for row in db.execute(stmt):
# suggestions.append(row._mapping['drv_hash'])
candidates = paths
if user:
for attestation in db.query(models.Attestation).filter(models.Attestation.output_path.in_(candidates)).filter_by(user_id=user_id).all():
if attestation.output_path in candidates:
candidates.remove(attestation.output_path)
# TODO don't consider attestations that have been built twice by the same user
# as 'rebuilt'
stmt = select(models.Attestation.output_path).where(models.Attestation.output_path.in_(candidates)).group_by(models.Attestation.output_path).having(func.count(models.Attestation.id) > 1)
for row in db.execute(stmt):
candidates.remove(row._mapping['output_path'])
return candidates

# TODO ideally this should take into account derivation paths as well as
# output paths, as for example for a fixed-output derivation we'd want
# to rebuild it with each different collection of inputs, not just once.
# OTOH, it seems caches may also have different derivers for non-FODs?
# To look into further: https://github.com/NixOS/nix/issues/7562
def path_summaries(db: Session, paths):
# TODO make sure multiple identical results from the same submitter
# don't get counted as 'successfully reproduced'
stmt = select(models.Attestation.output_path, func.count(models.Attestation.id), func.count(distinct(models.Attestation.output_hash))).where(models.Attestation.output_path.in_(paths)).group_by(models.Attestation.output_path)
results = {}
for output_path in paths:
results[output_path] = "No builds"
for result in db.execute(stmt):
output_path = result._mapping['output_path']
n_results = result._mapping['count']
distinct_results = result._mapping['count_1']
if n_results == 1:
results[output_path] = "One build"
elif distinct_results == 1:
results[output_path] = "✅ Successfully reproduced"
elif distinct_results < n_results:
results[output_path] = "Partially reproduced"
elif distinct_results == n_results:
results[output_path] = "Consistently nondeterministic"
return results

def define_report(db: Session, name: str, definition: dict):
db.execute(
insert(models.Report).values({
"name": name,
"definition": json.dumps(definition),
}))
db.commit()

def get_user_with_token(db: Session, token_val: str):
token = db.query(models.Token).filter_by(value=token_val).one_or_none()
Expand Down
8 changes: 8 additions & 0 deletions web/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,11 @@ class Attestation(Base):
derivation: Mapped["Derivation"] = relationship(back_populates="attestations")
output_hash: Mapped[str] = mapped_column()

class Report(Base):
__tablename__ = "reports"
id: Mapped[int] = mapped_column(primary_key=True)
name: Mapped[str] = mapped_column()
# For now we store the definition in a CycloneDX JSON blob,
# later we might want to normalize it into its own database
# structure.
definition: Mapped[str] = mapped_column()
3 changes: 2 additions & 1 deletion web/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,5 +57,6 @@ class DerivationAttestation(RootModel):
}
}


class ReportDefinition(RootModel):
root: dict

0 comments on commit a2b1e9c

Please sign in to comment.