Skip to content

Commit

Permalink
Update tutorial_code.ipynb
Browse files Browse the repository at this point in the history
  • Loading branch information
stephengruppetta committed Jan 3, 2025
1 parent 8190985 commit f1f6f29
Showing 1 changed file with 27 additions and 62 deletions.
89 changes: 27 additions & 62 deletions polars-missing-data/tutorial_code.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
"metadata": {},
"outputs": [],
"source": [
"!python -m pip install polars\n"
"!python -m pip install polars"
]
},
{
Expand All @@ -29,7 +29,7 @@
"\n",
"tips = pl.scan_parquet(\"tips.parquet\")\n",
"\n",
"tips.collect()\n"
"tips.collect()"
]
},
{
Expand All @@ -39,10 +39,7 @@
"metadata": {},
"outputs": [],
"source": [
"(\n",
" tips\n",
" .null_count()\n",
").collect()\n"
"(tips.null_count()).collect()"
]
},
{
Expand All @@ -64,12 +61,7 @@
"\n",
"tips = pl.scan_parquet(\"tips.parquet\")\n",
"\n",
"(\n",
" tips\n",
" .filter(\n",
" pl.col(\"total\").is_null() & pl.col(\"tip\").is_null()\n",
" )\n",
").collect()\n"
"(tips.filter(pl.col(\"total\").is_null() & pl.col(\"tip\").is_null())).collect()"
]
},
{
Expand All @@ -80,12 +72,10 @@
"outputs": [],
"source": [
"(\n",
" tips\n",
" .drop_nulls(pl.col(\"total\"))\n",
" .filter(\n",
" tips.drop_nulls(pl.col(\"total\")).filter(\n",
" pl.col(\"total\").is_null() & pl.col(\"tip\").is_null()\n",
" )\n",
").collect()\n"
").collect()"
]
},
{
Expand All @@ -99,7 +89,7 @@
" tips.drop_nulls(pl.col(\"total\"))\n",
" .with_columns(pl.col(\"tip\").fill_null(0))\n",
" .filter(pl.col(\"tip\").is_null())\n",
").collect()\n"
").collect()"
]
},
{
Expand All @@ -121,7 +111,7 @@
"\n",
"tips = pl.scan_parquet(\"tips.parquet\")\n",
"\n",
"(tips.filter(pl.col(\"time\").is_null())).collect()\n"
"(tips.filter(pl.col(\"time\").is_null())).collect()"
]
},
{
Expand All @@ -131,12 +121,7 @@
"metadata": {},
"outputs": [],
"source": [
"(\n",
" tips\n",
" .filter(\n",
" pl.col(\"record_id\").is_in([2, 3, 4, 14, 15, 16])\n",
" )\n",
").collect()\n"
"(tips.filter(pl.col(\"record_id\").is_in([2, 3, 4, 14, 15, 16]))).collect()"
]
},
{
Expand All @@ -147,12 +132,11 @@
"outputs": [],
"source": [
"(\n",
" tips\n",
" .drop_nulls(\"total\")\n",
" tips.drop_nulls(\"total\")\n",
" .with_columns(pl.col(\"tip\").fill_null(0))\n",
" .with_columns(pl.col(\"time\").fill_null(strategy=\"forward\"))\n",
" .filter(pl.col(\"record_id\").is_in([3, 15]))\n",
").collect()\n"
").collect()"
]
},
{
Expand All @@ -174,12 +158,7 @@
"\n",
"tips = pl.scan_parquet(\"tips.parquet\")\n",
"\n",
"(\n",
" tips\n",
" .filter(\n",
" pl.all_horizontal(pl.col(\"total\", \"tip\").is_null())\n",
" )\n",
").collect()\n"
"(tips.filter(pl.all_horizontal(pl.col(\"total\", \"tip\").is_null()))).collect()"
]
},
{
Expand All @@ -191,12 +170,7 @@
"source": [
"tips = pl.scan_parquet(\"tips.parquet\")\n",
"\n",
"(\n",
" tips\n",
" .filter(\n",
" ~pl.all_horizontal(pl.col(\"total\", \"tip\").is_null())\n",
" )\n",
").collect()\n"
"(tips.filter(~pl.all_horizontal(pl.col(\"total\", \"tip\").is_null()))).collect()"
]
},
{
Expand All @@ -211,13 +185,10 @@
"tips = pl.scan_parquet(\"tips.parquet\")\n",
"\n",
"(\n",
" tips\n",
" .filter(\n",
" ~pl.all_horizontal(pl.col(\"total\", \"tip\").is_null())\n",
" )\n",
" tips.filter(~pl.all_horizontal(pl.col(\"total\", \"tip\").is_null()))\n",
" .with_columns(pl.col(\"tip\").fill_null(0))\n",
" .with_columns(pl.col(\"time\").fill_null(strategy=\"forward\"))\n",
").null_count().collect()\n"
").null_count().collect()"
]
},
{
Expand Down Expand Up @@ -247,7 +218,7 @@
" }\n",
")\n",
"\n",
"scientists.collect()\n"
"scientists.collect()"
]
},
{
Expand All @@ -263,7 +234,7 @@
" scientists.with_columns(cs.string().fill_null(\"Unknown\")).with_columns(\n",
" cs.integer().fill_null(0)\n",
" )\n",
").collect()\n"
").collect()"
]
},
{
Expand All @@ -285,7 +256,7 @@
"\n",
"sales_trends = pl.scan_csv(\"sales_trends.csv\")\n",
"\n",
"sales_trends.collect()\n"
"sales_trends.collect()"
]
},
{
Expand All @@ -296,13 +267,12 @@
"outputs": [],
"source": [
"(\n",
" sales_trends\n",
" .with_columns(\n",
" sales_trends.with_columns(\n",
" pl.col(\"next_year\").replace(\n",
" [float(\"inf\"), -float(\"inf\"), float(\"NaN\")], None\n",
" )\n",
" )\n",
").collect()\n"
").collect()"
]
},
{
Expand All @@ -313,19 +283,17 @@
"outputs": [],
"source": [
"(\n",
" sales_trends\n",
" .with_columns(\n",
" sales_trends.with_columns(\n",
" pl.col(\"next_year\").replace(\n",
" [float(\"inf\"), -float(\"inf\"), float(\"NaN\")], None\n",
" )\n",
" )\n",
" .with_columns(\n",
" ).with_columns(\n",
" pl.col(\"next_year\").fill_null(\n",
" pl.col(\"current_year\")\n",
" + (pl.col(\"current_year\") - pl.col(\"last_year\"))\n",
" )\n",
" )\n",
").collect()\n"
").collect()"
]
},
{
Expand All @@ -347,7 +315,7 @@
"\n",
"episodes = pl.scan_parquet(\"ft_exercise.parquet\")\n",
"\n",
"episodes.null_count().collect()\n"
"episodes.null_count().collect()"
]
},
{
Expand All @@ -362,8 +330,7 @@
"episodes = pl.scan_parquet(\"ft_exercise.parquet\")\n",
"\n",
"(\n",
" episodes\n",
" .with_columns(\n",
" episodes.with_columns(\n",
" pl.when(pl.col(\"episode\") == 6)\n",
" .then(pl.col(\"series\").fill_null(strategy=\"forward\"))\n",
" .otherwise(pl.col(\"series\").fill_null(strategy=\"backward\"))\n",
Expand All @@ -373,10 +340,8 @@
" .then(pl.col(\"title\").fill_null(\"The Hotel Inspectors\"))\n",
" .otherwise(pl.col(\"title\").fill_null(\"Waldorf Salad\"))\n",
" )\n",
" .with_columns(\n",
" pl.col(\"original_date\").interpolate()\n",
" )\n",
").null_count().collect()\n"
" .with_columns(pl.col(\"original_date\").interpolate())\n",
").null_count().collect()"
]
}
],
Expand Down

0 comments on commit f1f6f29

Please sign in to comment.