Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed Python Notebook Examples #226

Merged
merged 19 commits into from
May 1, 2024
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

747 changes: 594 additions & 153 deletions cifar10_transformation_with_pca/cifar-10-pca-py.ipynb

Large diffs are not rendered by default.

506 changes: 506 additions & 0 deletions contact_tracing_clustering_with_dbscan/contact_tracing_dbscan_py.ipynb

Large diffs are not rendered by default.

200 changes: 100 additions & 100 deletions customer_personality_clustering/customer_personality_clustering_py.ipynb

Large diffs are not rendered by default.

225 changes: 125 additions & 100 deletions forest_covertype_prediction_with_random_forests/covertype-rf-py.ipynb
Original file line number Diff line number Diff line change
@@ -1,146 +1,171 @@
{
"metadata":{
"language_info":{
"name":"python",
"version":"3.7.6",
"mimetype":"text/x-python",
"codemirror_mode":{
"name":"ipython",
"version":3
},
"pygments_lexer":"ipython3",
"nbconvert_exporter":"python",
"file_extension":".py"
},
"kernelspec":{
"name":"python3",
"display_name":"Python 3",
"language":"python"
}
},
"nbformat_minor":4,
"nbformat":4,
"cells":[
"cells": [
{
"cell_type":"markdown",
"source":"[![Binder](https://mybinder.org/badge_logo.svg)](https://lab.mlpack.org/v2/gh/mlpack/examples/master?urlpath=lab%2Ftree%2Fforest_covertype_prediction_with_random_forests%2Fcovertype-rf-py.ipynb)",
"metadata":{

}
"cell_type": "markdown",
"metadata": {},
"source": [
"[![Binder](https://mybinder.org/badge_logo.svg)](https://lab.mlpack.org/v2/gh/mlpack/examples/master?urlpath=lab%2Ftree%2Fforest_covertype_prediction_with_random_forests%2Fcovertype-rf-py.ipynb)"
]
},
{
"cell_type":"code",
"source":"# @file covertype-rf-py.ipynb\n#\n# Classification using Random Forest on the Covertype dataset.",
"metadata":{
"trusted":true
"cell_type": "code",
"execution_count": 11,
"metadata": {
"trusted": true
},
"execution_count":11,
"outputs":[

"outputs": [],
"source": [
"# @file covertype-rf-py.ipynb\n",
"#\n",
"# Classification using Random Forest on the Covertype dataset."
]
},
{
"cell_type":"code",
"source":"import mlpack\nimport pandas as pd\nimport numpy as np",
"metadata":{
"trusted":true
"cell_type": "code",
"execution_count": 12,
"metadata": {
"trusted": true
},
"execution_count":12,
"outputs":[

"outputs": [],
"source": [
"import mlpack\n",
"import pandas as pd\n",
"import numpy as np\n",
"import ssl"
]
},
{
"cell_type":"code",
"source":"# Load the dataset from an online URL.\ndf = pd.read_csv('https://lab.mlpack.org/data/covertype-small.csv.gz')",
"metadata":{
"trusted":true
"cell_type": "code",
"execution_count": 13,
"metadata": {
"trusted": true
},
"execution_count":13,
"outputs":[

"outputs": [],
"source": [
"# Load the dataset from an online URL.\n",
"ssl._create_default_https_context = ssl._create_unverified_context\n",
"df = pd.read_csv('https://lab.mlpack.org/data/covertype-small.csv.gz')"
]
},
{
"cell_type":"code",
"source":"# Split the labels.\nlabels = df['label']\ndataset = df.drop('label', 1)",
"metadata":{
"trusted":true
"cell_type": "code",
"execution_count": 14,
"metadata": {
"trusted": true
},
"execution_count":14,
"outputs":[

"outputs": [],
"source": [
"# Split the labels.\n",
"labels = df['label']\n",
"dataset = df.drop('label', axis=1)"
]
},
{
"cell_type":"code",
"source":"# Split the dataset using mlpack. The output comes back as a dictionary, which\n# we'll unpack for clarity of code.\noutput = mlpack.preprocess_split(input=dataset, input_labels=labels, test_ratio=0.3)",
"metadata":{
"trusted":true
"cell_type": "code",
"execution_count": 15,
"metadata": {
"trusted": true
},
"execution_count":15,
"outputs":[

"outputs": [],
"source": [
"# Split the dataset using mlpack. The output comes back as a dictionary, which\n",
"# we'll unpack for clarity of code.\n",
"output = mlpack.preprocess_split(input_=dataset, input_labels=labels, test_ratio=0.3)"
]
},
{
"cell_type":"code",
"source":"training_set = output['training']\ntraining_labels = output['training_labels']\ntest_set = output['test']\ntest_labels = output['test_labels']",
"metadata":{
"trusted":true
"cell_type": "code",
"execution_count": 16,
"metadata": {
"trusted": true
},
"execution_count":16,
"outputs":[

"outputs": [],
"source": [
"training_set = output['training']\n",
"training_labels = output['training_labels']\n",
"test_set = output['test']\n",
"test_labels = output['test_labels']"
]
},
{
"cell_type":"code",
"source":"# Train a random forest.\noutput = mlpack.random_forest(training=training_set, labels=training_labels,\n print_training_accuracy=True, num_trees=10, minimum_leaf_size=3)",
"metadata":{
"trusted":true
"cell_type": "code",
"execution_count": 17,
"metadata": {
"trusted": true
},
"execution_count":17,
"outputs":[

"outputs": [],
"source": [
"# Train a random forest.\n",
"output = mlpack.random_forest(training=training_set, labels=training_labels,\n",
" print_training_accuracy=True, num_trees=10, minimum_leaf_size=3)"
]
},
{
"cell_type":"code",
"source":"random_forest = output['output_model']",
"metadata":{
"trusted":true
"cell_type": "code",
"execution_count": 18,
"metadata": {
"trusted": true
},
"execution_count":18,
"outputs":[

"outputs": [],
"source": [
"random_forest = output['output_model']"
]
},
{
"cell_type":"code",
"source":"# Predict the labels of the test points.\noutput = mlpack.random_forest(input_model=random_forest, test=test_set)",
"metadata":{
"trusted":true
"cell_type": "code",
"execution_count": 19,
"metadata": {
"trusted": true
},
"execution_count":19,
"outputs":[

"outputs": [],
"source": [
"# Predict the labels of the test points.\n",
"output = mlpack.random_forest(input_model=random_forest, test=test_set)"
]
},
{
"cell_type":"code",
"source":"# Now print the accuracy. The 'probabilities' output could also be used to\n# generate an ROC curve.\ncorrect = np.sum(output['predictions'] == test_labels.flatten())\nprint(str(correct) + ' correct out of ' + str(len(test_labels)) +\n ' (' + str(100 * float(correct) / float(len(test_labels))) + '%).')",
"metadata":{
"trusted":true
"cell_type": "code",
"execution_count": 20,
"metadata": {
"trusted": true
},
"execution_count":20,
"outputs":[
"outputs": [
{
"name":"stdout",
"text":"24513 correct out of 30000 (81.71%).\n",
"output_type":"stream"
"name": "stdout",
"output_type": "stream",
"text": [
"24513 correct out of 30000 (81.71%).\n"
]
}
],
"source": [
"# Now print the accuracy. The 'probabilities' output could also be used to\n",
"# generate an ROC curve.\n",
"correct = np.sum(output['predictions'] == test_labels.flatten())\n",
"print(str(correct) + ' correct out of ' + str(len(test_labels)) +\n",
" ' (' + str(100 * float(correct) / float(len(test_labels))) + '%).')"
]
}
]
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

Large diffs are not rendered by default.

30 changes: 15 additions & 15 deletions iris-classification/iris-classification-py.ipynb

Large diffs are not rendered by default.

Loading