Skip to content

Commit

Permalink
enh: implement a pipeline running xgboost regression
Browse files Browse the repository at this point in the history
enh: implement a pipeline running a naive regression model
  • Loading branch information
celprov committed Apr 24, 2024
1 parent c9bf3ed commit 04849d5
Showing 1 changed file with 64 additions and 1 deletion.
65 changes: 64 additions & 1 deletion mriqc_learn/models/production.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
from joblib import load
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier as RFC
from sklearn.dummy import DummyRegressor
from xgboost import XGBRegressor
from mriqc_learn.models import preprocess as pp


Expand All @@ -34,6 +36,19 @@ def load_model():


def init_pipeline():
return init_pipeline_rfc()


def init_pipeline_rfc():
"""
Initialize a pipeline running a random forest classifier.
Parameters
----------
model_type : str
The model to use. Only 'rfc' and 'xgboost' are supported.
"""

steps = [
(
"drop_ft",
Expand All @@ -53,7 +68,7 @@ def init_pipeline():
("winnow", pp.NoiseWinnowFeatSelect(use_classifier=True)),
("drop_site", pp.DropColumns(drop=["site"])),
(
"rfc",
"model",
RFC(
bootstrap=True,
class_weight=None,
Expand All @@ -70,5 +85,53 @@ def init_pipeline():
),
),
]
return Pipeline(steps)


def init_pipeline_xgboost(
n_estimators=50,
max_depth=2,
eta=0.1,
subsample=1.0,
learning_rate=0.1,
colsample_bytree=1.0,
):
steps = [
(
"drop_ft",
pp.DropColumns(
drop=[f"size_{ax}" for ax in "xyz"] + [f"spacing_{ax}" for ax in "xyz"]
),
),
("winnow", pp.NoiseWinnowFeatSelect(use_classifier=True)),
(
"model",
XGBRegressor(
n_estimators=n_estimators,
max_depth=max_depth,
eta=eta,
subsample=subsample,
learning_rate=learning_rate,
colsample_bytree=colsample_bytree,
n_jobs=1,
),
),
]
return Pipeline(steps)


def init_pipeline_naive(strategy="mean"):
steps = [
(
"drop_ft",
pp.DropColumns(
drop=[f"size_{ax}" for ax in "xyz"] + [f"spacing_{ax}" for ax in "xyz"]
),
),
("winnow", pp.NoiseWinnowFeatSelect(use_classifier=True)),
(
"model",
DummyRegressor(strategy=strategy),
),
]
return Pipeline(steps)

0 comments on commit 04849d5

Please sign in to comment.