Skip to content

Commit

Permalink
Add experiment date (#46)
Browse files Browse the repository at this point in the history
* added date_of_experiment column

* regen tst data

* edit authors page

* check that dtype of datetime cols is correct

* fixed dtype issue

* balck

---------

Co-authored-by: Daniel Wigh <[email protected]>
  • Loading branch information
Joearrowsmith and dswigh authored Apr 14, 2023
1 parent fdc206e commit 3adc792
Show file tree
Hide file tree
Showing 27 changed files with 1,769 additions and 1,608 deletions.
11 changes: 11 additions & 0 deletions AUTHORS
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# This is the official list of ORDly authors for copyright purposes.
#
# Names should be added to this file as:
# Name or Organization <email address>
# The email address is not required for organizations.

Daniel S. Wigh <[email protected]>
Joe Arrowsmith <[email protected]>
Alexander Pomberger <[email protected]>
Alexei A. Lapkin <[email protected]>
University of Cambridge
206 changes: 150 additions & 56 deletions inspections.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,6 @@
"# inspect df"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
Expand All @@ -23,7 +16,7 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -112,7 +105,7 @@
},
{
"cell_type": "code",
"execution_count": 39,
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -124,7 +117,7 @@
},
{
"cell_type": "code",
"execution_count": 45,
"execution_count": 20,
"metadata": {},
"outputs": [
{
Expand All @@ -137,7 +130,7 @@
"]"
]
},
"execution_count": 45,
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -146,6 +139,152 @@
"data.reactions[0].outcomes[0].products[0].measurements"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'07/01/2008'"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.reactions[0].provenance.experiment_start.value"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"07/01/2008\n"
]
},
{
"data": {
"text/plain": [
"pandas._libs.tslibs.timestamps.Timestamp"
]
},
"execution_count": 44,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"i = 0\n",
"d = data.reactions[i].provenance.experiment_start.value\n",
"print(d)\n",
"type(pd.to_datetime(d, format=\"%m/%d/%Y\"))\n"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"df = pd.DataFrame({'a': [1, 2, 3], 'b': [None, None, None]})"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [
{
"ename": "AttributeError",
"evalue": "'Series' object has no attribute 'isempty'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[49], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m df[\u001b[39m'\u001b[39;49m\u001b[39mb\u001b[39;49m\u001b[39m'\u001b[39;49m]\u001b[39m.\u001b[39;49mdropna()\u001b[39m.\u001b[39;49misempty()\n",
"File \u001b[0;32m~/opt/anaconda3/envs/chemistry/lib/python3.10/site-packages/pandas/core/generic.py:5902\u001b[0m, in \u001b[0;36mNDFrame.__getattr__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 5895\u001b[0m \u001b[39mif\u001b[39;00m (\n\u001b[1;32m 5896\u001b[0m name \u001b[39mnot\u001b[39;00m \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_internal_names_set\n\u001b[1;32m 5897\u001b[0m \u001b[39mand\u001b[39;00m name \u001b[39mnot\u001b[39;00m \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_metadata\n\u001b[1;32m 5898\u001b[0m \u001b[39mand\u001b[39;00m name \u001b[39mnot\u001b[39;00m \u001b[39min\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_accessors\n\u001b[1;32m 5899\u001b[0m \u001b[39mand\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_info_axis\u001b[39m.\u001b[39m_can_hold_identifiers_and_holds_name(name)\n\u001b[1;32m 5900\u001b[0m ):\n\u001b[1;32m 5901\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m[name]\n\u001b[0;32m-> 5902\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mobject\u001b[39;49m\u001b[39m.\u001b[39;49m\u001b[39m__getattribute__\u001b[39;49m(\u001b[39mself\u001b[39;49m, name)\n",
"\u001b[0;31mAttributeError\u001b[0m: 'Series' object has no attribute 'isempty'"
]
}
],
"source": [
"df['b'].dropna()"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Timestamp('2008-07-01 00:00:00')"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.Timestamp(data.reactions[0].provenance.experiment_start.value)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"07/01/2008\n",
"05/16/2008\n",
"05/06/2008\n",
"05/08/2008\n",
"05/15/2008\n",
"09/11/2008\n",
"05/01/2008\n",
"09/05/2008\n",
"04/15/2010\n",
"05/18/2009\n"
]
}
],
"source": [
"for i in range(10):\n",
" print(data.reactions[i].provenance.experiment_start.value)"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down Expand Up @@ -1026,51 +1165,6 @@
"item_frequencies = series.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[Na+] 425\n",
"[K+] 221\n",
"other 203\n",
"Cl 133\n",
"[Pd] 71\n",
"[Na] 37\n",
"O=S(=O)(O)O 34\n",
"O 29\n",
"[Li+] 22\n",
"O=C([O-])/C=C/C(=O)[O-] 18\n",
"[Li]CCCC 18\n",
"C[NH2+]C 16\n",
"[Al+3] 15\n",
"Br 15\n",
"Cc1ccc(S(=O)(=O)O)cc1 15\n",
"[Ni] 15\n",
"C(=NC1CCCCC1)=NC1CCCCC1 13\n",
"Cl[Ti](Cl)(Cl)Cl 10\n",
"[Mg] 8\n",
"[NH4+] 6\n",
"CC(=O)[O-] 4\n",
"[Cl-] 2\n",
"[OH-] 1\n",
"[H-] 1\n",
"[H][H] 1\n",
"Name: agent_0, dtype: int64"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"item_frequencies"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down
Loading

0 comments on commit 3adc792

Please sign in to comment.