-
Notifications
You must be signed in to change notification settings - Fork 21
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
create test data for quickrun submitted in parallel (#1040)
- Loading branch information
Showing
3 changed files
with
260 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,245 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "8d1899bc-337a-4024-9fa3-9cfbc452e091", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import json \n", | ||
"from gufe.tokenization import JSON_HANDLER\n", | ||
"import numpy as np\n", | ||
"import os \n", | ||
"import shutil\n", | ||
"from pathlib import Path" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "a82b8123-521a-4ca3-a2cf-f73b6504fa14", | ||
"metadata": {}, | ||
"source": [ | ||
"for this dataset, we know we have 3 replicates run in serial for each leg. We want to manipulate the data so that it is equivalent to the output if we re-ran this dataset with each leg run in parallel, with the following directory structure:\n", | ||
"\n", | ||
"```\n", | ||
"results/\n", | ||
" transformations_0/\n", | ||
" rbfe_lig_ejm_31_complex_lig_ejm_42_complex/\n", | ||
" shared_[hashA]_attempt_0/\n", | ||
" rbfe_lig_ejm_31_complex_lig_ejm_42_complex.json\n", | ||
" transformations_1/\n", | ||
" rbfe_lig_ejm_31_complex_lig_ejm_42_complex/\n", | ||
" shared_[hashB]_attempt_0/\n", | ||
" rbfe_lig_ejm_31_complex_lig_ejm_42_complex.json\n", | ||
" transformations_2/\n", | ||
" rbfe_lig_ejm_31_complex_lig_ejm_42_complex/\n", | ||
" shared_[hashC]_attempt_0/\n", | ||
" rbfe_lig_ejm_31_complex_lig_ejm_42_complex.json\n", | ||
"```" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "1c6ed7fe-b42c-4781-b356-85799e25356f", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"def load_json(fpath):\n", | ||
" return json.load(open(fpath, 'r'), cls=JSON_HANDLER.decoder)\n", | ||
"\n", | ||
"def dump_json(data, fpath):\n", | ||
" with open(fpath, \"w\") as f:\n", | ||
" json.dump(data, f, cls=JSON_HANDLER.encoder)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "8eba246a-6123-4d8e-8fd8-2de516fbf881", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"orig_dir = Path(\"results/\")\n", | ||
"new_dir = Path(\"results_parallel/\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "ab4f2587-9b15-422d-9faa-e11ff98fd491", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"leg_names = []\n", | ||
"for name in os.listdir(orig_dir):\n", | ||
" if name.endswith(\".json\"):\n", | ||
" continue\n", | ||
" leg_names.append(name)\n", | ||
"leg_names" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "311a7f0e-9c91-47ae-9e09-0e1bef03aca8", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"! rm -rf $new_dir\n", | ||
"for leg in leg_names:\n", | ||
" json_data = load_json(orig_dir/f\"{leg}.json\")\n", | ||
" srckey_to_protocol = {}\n", | ||
" srckey_to_unit_results = {}\n", | ||
" srckey_to_estimate = {}\n", | ||
" ## collect results on a per-replicate basis\n", | ||
" for k in json_data['protocol_result']['data']: \n", | ||
" rep_source_key = json_data['protocol_result']['data'][k][0]['source_key']\n", | ||
" \n", | ||
" # keep only the data for this replicate\n", | ||
" rep_result = json_data['protocol_result'].copy()\n", | ||
" rep_result['data']={k:json_data['protocol_result']['data'][k]}\n", | ||
" srckey_to_protocol[rep_source_key] = rep_result\n", | ||
"\n", | ||
" # pull just the estimate value so we can put it at the top of the output\n", | ||
" srckey_to_estimate[rep_source_key] = rep_result['data'][k][0]['outputs']['unit_estimate']\n", | ||
" \n", | ||
" for k in json_data['unit_results']:\n", | ||
" rep_source_key = json_data['unit_results'][k]['source_key']\n", | ||
"\n", | ||
" rep_unit_result = json_data['unit_results'].copy()\n", | ||
" rep_unit_result = {k: json_data['unit_results'][k]}\n", | ||
" srckey_to_unit_results[rep_source_key] = rep_unit_result\n", | ||
" \n", | ||
" assert srckey_to_protocol.keys() == srckey_to_unit_results.keys()\n", | ||
" \n", | ||
" ## write to the new directory\n", | ||
" for n, sk in enumerate(sorted(srckey_to_protocol.keys())):\n", | ||
" rep_dir = new_dir/f\"replicate_{n}\"\n", | ||
" os.makedirs(rep_dir/leg)\n", | ||
" \n", | ||
" # build up the data for this replicate\n", | ||
" replicate_data = {'estimate': srckey_to_estimate[sk],\n", | ||
" 'uncertainty': np.std(srckey_to_estimate[sk]),\n", | ||
" 'protocol_result': srckey_to_protocol[sk],\n", | ||
" 'unit_results': srckey_to_unit_results[sk]}\n", | ||
" \n", | ||
" # write!\n", | ||
" dump_json(replicate_data, rep_dir/f\"{leg}.json\")\n", | ||
" working_dir_name = f\"shared_{sk}_attempt_0\"\n", | ||
" ## TODO: make this work for arbitrary number of attempts \n", | ||
" # os.symlink(orig_dir/leg/working_dir_name, rep_dir/leg/working_dir_name)\n", | ||
" shutil.copytree(orig_dir/leg/working_dir_name, rep_dir/leg/working_dir_name)\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"id": "f864dcb3-bebf-425b-9154-bffc2b0e3f07", | ||
"metadata": {}, | ||
"source": [ | ||
"## check that objects reload correctly" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "6c20639c-8ba7-457a-bf8a-76c64aef4a38", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import openfe" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "9cba8316-5500-4d5e-a84d-d72d09ba2a42", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"json_reloaded = load_json(\"results_parallel/replicate_0/easy_rbfe_lig_ejm_31_solvent_lig_ejm_47_solvent.json\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "c0e90b45-ae83-41c1-8748-0a8c1466b378", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"json_reloaded['estimate'], json_reloaded['uncertainty']" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "c0ce2bc6-d960-4521-b71c-316be0557e9d", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"pr_reloaded = openfe.ProtocolResult.from_dict(json_reloaded['protocol_result'])" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "a2fbb695-d4ef-45bd-af53-2ef9d0bc8e0a", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"pr_reloaded.data" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "19662eaa-46de-4eb0-8c78-ddd6c68b12db", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"first_pur_key = list(json_reloaded['unit_results'].keys())[0]\n", | ||
"pur_reloaded = openfe.ProtocolUnit.from_dict(json_reloaded['unit_results'][first_pur_key])" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "0154fda2-4c1a-4064-8bcc-03aeecf13365", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"pur_reloaded" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "3f2bbc84-f59c-40b9-a176-9a733ff275c1", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.12.8" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |
Binary file not shown.