-
Notifications
You must be signed in to change notification settings - Fork 19
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
landing_worker: delete patch content after landing (bug 1879536) #374
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -439,6 +439,11 @@ def run_job( | |
LandingJobAction.LAND, commit_id=commit_id, commit=True, db=db | ||
) | ||
|
||
# Patches are no longer necessary, delete them. | ||
for revision in job.revisions: | ||
revision.patch_bytes = b"" | ||
db.session.commit() | ||
|
||
Comment on lines
+442
to
+446
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think we should be deleting these right away, it's useful to have the patch contents around for debugging and other archaeological purposes. It would be a pain if we needed to inspect the patch contents but we couldn't do so, because we want to save a few MB of disk space. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We discussed this previously and concluded that since the commit hash is present, then it's not going to be much added value to have the raw patch after landing (for successful landings, at least). I agree that there could be some benefit to keeping those around, however, these are taking a bit more than a few MBs at this time (a good portion of the 2.3GB as of today) and appear to be increasing dramatically in the last few weeks. As I understand we have a cap of 10GB (or at least, in the current tier). The time limit approach might be better though regardless. Let me think some more about this. |
||
mots_path = Path(hgrepo.path) / "mots.yaml" | ||
if mots_path.exists(): | ||
logger.info(f"{mots_path} found, setting reviewer data.") | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -245,6 +245,23 @@ def test_try_api_success_hgexport( | |
worker = LandingWorker(sleep_seconds=0.01) | ||
hgrepo = HgRepo(hg_clone.strpath) | ||
|
||
assert len(job.revisions) == 1, "Job should be landing a single revision." | ||
revision = job.revisions[0] | ||
assert revision.patch_bytes == ( | ||
b"# HG changeset patch\n" | ||
b"# User Test User <[email protected]>\n" | ||
b"# Date 0 +0000\n" | ||
b"# Diff Start Line 6\n" | ||
b"add another file.\n" | ||
b"\n" | ||
b"diff --git a/test.txt b/test.txt\n" | ||
b"--- a/test.txt\n" | ||
b"+++ b/test.txt\n" | ||
b"@@ -1,1 +1,2 @@\n" | ||
b" TEST\n" | ||
b"+adding another line" | ||
), "Patch diff should be parsed from patch body." | ||
|
||
assert worker.run_job(job, repo, hgrepo, treestatus) | ||
assert job.status == LandingJobStatus.LANDED | ||
assert len(job.landed_commit_id) == 40 | ||
|
@@ -267,20 +284,7 @@ def test_try_api_success_hgexport( | |
assert ( | ||
revision.patch_data["timestamp"] == "0" | ||
), "Timestamp should be parsed from `Date` header." | ||
assert revision.patch_bytes == ( | ||
b"# HG changeset patch\n" | ||
b"# User Test User <[email protected]>\n" | ||
b"# Date 0 +0000\n" | ||
b"# Diff Start Line 6\n" | ||
b"add another file.\n" | ||
b"\n" | ||
b"diff --git a/test.txt b/test.txt\n" | ||
b"--- a/test.txt\n" | ||
b"+++ b/test.txt\n" | ||
b"@@ -1,1 +1,2 @@\n" | ||
b" TEST\n" | ||
b"+adding another line" | ||
), "Patch diff should be parsed from patch body." | ||
assert revision.patch_bytes == b"", "Patch diff should be cleared after landing." | ||
|
||
|
||
def test_try_api_success_gitformatpatch( | ||
|
@@ -334,6 +338,26 @@ def test_try_api_success_gitformatpatch( | |
worker = LandingWorker(sleep_seconds=0.01) | ||
hgrepo = HgRepo(hg_clone.strpath) | ||
|
||
assert len(job.revisions) == 1, "Job should be landing a single revision." | ||
revision = job.revisions[0] | ||
|
||
assert revision.patch_bytes == ( | ||
b"# HG changeset patch\n" | ||
b"# User Connor Sheehan <[email protected]>\n" | ||
b"# Date 1657139769 +0000\n" | ||
b"# Diff Start Line 8\n" | ||
b"add another file\n" | ||
b"\n" | ||
b"add another file to the repo.\n" | ||
b"\n" | ||
b"diff --git a/test.txt b/test.txt\n" | ||
b"--- a/test.txt\n" | ||
b"+++ b/test.txt\n" | ||
b"@@ -1,1 +1,2 @@\n" | ||
b" TEST\n" | ||
b"+adding another line" | ||
), "Patch diff should be parsed from patch body." | ||
|
||
# Assert the job landed against the expected commit hash. | ||
assert worker.run_job(job, repo, hgrepo, treestatus) | ||
assert job.status == LandingJobStatus.LANDED | ||
|
@@ -343,8 +367,6 @@ def test_try_api_success_gitformatpatch( | |
), "Target changeset should match the passed value." | ||
|
||
# Test the revision content matches expected. | ||
assert len(job.revisions) == 1, "Job should have landed a single revision." | ||
revision = job.revisions[0] | ||
assert ( | ||
revision.patch_data["author_name"] == "Connor Sheehan" | ||
), "Patch author should be parsed from `From` header." | ||
|
@@ -357,19 +379,4 @@ def test_try_api_success_gitformatpatch( | |
assert ( | ||
revision.patch_data["timestamp"] == "1657139769" | ||
), "Timestamp should be parsed from `Date` header." | ||
assert revision.patch_bytes == ( | ||
b"# HG changeset patch\n" | ||
b"# User Connor Sheehan <[email protected]>\n" | ||
b"# Date 1657139769 +0000\n" | ||
b"# Diff Start Line 8\n" | ||
b"add another file\n" | ||
b"\n" | ||
b"add another file to the repo.\n" | ||
b"\n" | ||
b"diff --git a/test.txt b/test.txt\n" | ||
b"--- a/test.txt\n" | ||
b"+++ b/test.txt\n" | ||
b"@@ -1,1 +1,2 @@\n" | ||
b" TEST\n" | ||
b"+adding another line" | ||
), "Patch diff should be parsed from patch body." | ||
assert revision.patch_bytes == b"", "Patch diff should be cleared after landing." |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should make the query only consider patches that are older than a certain date, with a CLI flag to set some specific time.