Examples
- # It will automatically identify the probability columns
-# if passed a model fitted with tidymodels
-cal_estimate_beta ( segment_logistic , Class )
+ if ( rlang :: is_installed ( "betacal" ) ) {
+ # It will automatically identify the probability columns
+ # if passed a model fitted with tidymodels
+ cal_estimate_beta ( segment_logistic , Class )
+}
#>
#> ── Probability Calibration
#> Method: Beta calibration
diff --git a/dev/reference/cal_validate_beta.html b/dev/reference/cal_validate_beta.html
index 0c68e832..54748fe1 100644
--- a/dev/reference/cal_validate_beta.html
+++ b/dev/reference/cal_validate_beta.html
@@ -163,9 +163,11 @@
On this page
diff --git a/dev/search.json b/dev/search.json
index f331763f..2a0185af 100644
--- a/dev/search.json
+++ b/dev/search.json
@@ -1 +1 @@
-[{"path":[]},{"path":"https://probably.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"our-pledge","dir":"","previous_headings":"","what":"Our Pledge","title":"Contributor Covenant Code of Conduct","text":"members, contributors, leaders pledge make participation community harassment-free experience everyone, regardless age, body size, visible invisible disability, ethnicity, sex characteristics, gender identity expression, level experience, education, socio-economic status, nationality, personal appearance, race, caste, color, religion, sexual identity orientation. pledge act interact ways contribute open, welcoming, diverse, inclusive, healthy community.","code":""},{"path":"https://probably.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"our-standards","dir":"","previous_headings":"","what":"Our Standards","title":"Contributor Covenant Code of Conduct","text":"Examples behavior contributes positive environment community include: Demonstrating empathy kindness toward people respectful differing opinions, viewpoints, experiences Giving gracefully accepting constructive feedback Accepting responsibility apologizing affected mistakes, learning experience Focusing best just us individuals, overall community Examples unacceptable behavior include: use sexualized language imagery, sexual attention advances kind Trolling, insulting derogatory comments, personal political attacks Public private harassment Publishing others’ private information, physical email address, without explicit permission conduct reasonably considered inappropriate professional setting","code":""},{"path":"https://probably.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"enforcement-responsibilities","dir":"","previous_headings":"","what":"Enforcement Responsibilities","title":"Contributor Covenant Code of Conduct","text":"Community leaders responsible clarifying enforcing standards acceptable behavior take appropriate fair corrective action response behavior deem inappropriate, threatening, offensive, harmful. Community leaders right responsibility remove, edit, reject comments, commits, code, wiki edits, issues, contributions aligned Code Conduct, communicate reasons moderation decisions appropriate.","code":""},{"path":"https://probably.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"scope","dir":"","previous_headings":"","what":"Scope","title":"Contributor Covenant Code of Conduct","text":"Code Conduct applies within community spaces, also applies individual officially representing community public spaces. Examples representing community include using official e-mail address, posting via official social media account, acting appointed representative online offline event.","code":""},{"path":"https://probably.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"enforcement","dir":"","previous_headings":"","what":"Enforcement","title":"Contributor Covenant Code of Conduct","text":"Instances abusive, harassing, otherwise unacceptable behavior may reported community leaders responsible enforcement codeofconduct@posit.co. complaints reviewed investigated promptly fairly. community leaders obligated respect privacy security reporter incident.","code":""},{"path":"https://probably.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"enforcement-guidelines","dir":"","previous_headings":"","what":"Enforcement Guidelines","title":"Contributor Covenant Code of Conduct","text":"Community leaders follow Community Impact Guidelines determining consequences action deem violation Code Conduct:","code":""},{"path":"https://probably.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"id_1-correction","dir":"","previous_headings":"Enforcement Guidelines","what":"1. Correction","title":"Contributor Covenant Code of Conduct","text":"Community Impact: Use inappropriate language behavior deemed unprofessional unwelcome community. Consequence: private, written warning community leaders, providing clarity around nature violation explanation behavior inappropriate. public apology may requested.","code":""},{"path":"https://probably.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"id_2-warning","dir":"","previous_headings":"Enforcement Guidelines","what":"2. Warning","title":"Contributor Covenant Code of Conduct","text":"Community Impact: violation single incident series actions. Consequence: warning consequences continued behavior. interaction people involved, including unsolicited interaction enforcing Code Conduct, specified period time. includes avoiding interactions community spaces well external channels like social media. Violating terms may lead temporary permanent ban.","code":""},{"path":"https://probably.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"id_3-temporary-ban","dir":"","previous_headings":"Enforcement Guidelines","what":"3. Temporary Ban","title":"Contributor Covenant Code of Conduct","text":"Community Impact: serious violation community standards, including sustained inappropriate behavior. Consequence: temporary ban sort interaction public communication community specified period time. public private interaction people involved, including unsolicited interaction enforcing Code Conduct, allowed period. Violating terms may lead permanent ban.","code":""},{"path":"https://probably.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"id_4-permanent-ban","dir":"","previous_headings":"Enforcement Guidelines","what":"4. Permanent Ban","title":"Contributor Covenant Code of Conduct","text":"Community Impact: Demonstrating pattern violation community standards, including sustained inappropriate behavior, harassment individual, aggression toward disparagement classes individuals. Consequence: permanent ban sort public interaction within community.","code":""},{"path":"https://probably.tidymodels.org/dev/CODE_OF_CONDUCT.html","id":"attribution","dir":"","previous_headings":"","what":"Attribution","title":"Contributor Covenant Code of Conduct","text":"Code Conduct adapted Contributor Covenant, version 2.1, available https://www.contributor-covenant.org/version/2/1/code_of_conduct.html. Community Impact Guidelines inspired [Mozilla’s code conduct enforcement ladder][https://github.com/mozilla/inclusion]. answers common questions code conduct, see FAQ https://www.contributor-covenant.org/faq. Translations available https://www.contributor-covenant.org/translations.","code":""},{"path":"https://probably.tidymodels.org/dev/LICENSE.html","id":null,"dir":"","previous_headings":"","what":"MIT License","title":"MIT License","text":"Copyright (c) 2023 probably authors Permission hereby granted, free charge, person obtaining copy software associated documentation files (“Software”), deal Software without restriction, including without limitation rights use, copy, modify, merge, publish, distribute, sublicense, /sell copies Software, permit persons Software furnished , subject following conditions: copyright notice permission notice shall included copies substantial portions Software. SOFTWARE PROVIDED “”, WITHOUT WARRANTY KIND, EXPRESS IMPLIED, INCLUDING LIMITED WARRANTIES MERCHANTABILITY, FITNESS PARTICULAR PURPOSE NONINFRINGEMENT. EVENT SHALL AUTHORS COPYRIGHT HOLDERS LIABLE CLAIM, DAMAGES LIABILITY, WHETHER ACTION CONTRACT, TORT OTHERWISE, ARISING , CONNECTION SOFTWARE USE DEALINGS SOFTWARE.","code":""},{"path":"https://probably.tidymodels.org/dev/articles/equivocal-zones.html","id":"equivocal-zones","dir":"Articles","previous_headings":"","what":"Equivocal zones","title":"Equivocal zones","text":"fields, class probability predictions must meet certain standards firm decision can made using . fail standards, prediction can marked equivocal, just means unsure true result. might want investigate equivocal values, rerun whatever process generated proceeding. example, binary model, prediction returned probability values 52% Yes 48% , really sure isn’t just random noise? case, use buffer surrounding threshold 50% determine whether model sure predictions, mark values unsure equivocal. Another example come Bayesian perspective, prediction comes probability distribution. model might predict 80% Yes, standard deviation around +/- 20%. case, set maximum allowed standard deviation cutoff whether mark values equivocal. work equivocal zones, probably provides new class hard class predictions similar factor, allows mark certain values equivocal. reportable rate fraction values equivocal, relative total number. , can see reportable rate started 100%, soon single value marked equivocal, value dropped 75%. fields equivocal zones used, often tradeoff marking values equivocal keeping certain minimum reportable rate. Generally, won’t create class_pred objects directly, instead create indirectly converting class probabilities class predictions make_class_pred() make_two_class_pred(). buffer used, equivocal zone created around threshold threshold +/- buffer values inside zone automatically marked equivocal. Equivocal values class_pred objects converted NA object converted factor. ’s also worth noting [EQ] label treated separate level. NA behavior feeds probably can used yardstick. Generally, equivocal values removed completely performance evaluation. converting NA leaving default na_rm = TRUE yardstick metric removes consideration. seen , removing equivocal values using simple threshold generally improves performance values model unsure removed. don’t fooled! give cases extra consideration, remember reportable rate decreased removing . production, ’ll likely something predictions!","code":"x <- factor(c(\"Yes\", \"No\", \"Yes\", \"Yes\")) # Create a class_pred object from a factor class_pred(x) #> [1] Yes No Yes Yes #> Levels: No Yes #> Reportable: 100% # Say you aren't sure about that 2nd \"Yes\" value. # You could mark it as equivocal. class_pred(x, which = 3) #> [1] Yes No [EQ] Yes #> Levels: No Yes #> Reportable: 75% library(dplyr) data(\"segment_logistic\") segment_logistic #> # A tibble: 1,010 × 3 #> .pred_poor .pred_good Class #> * #> 1 0.986 0.0142 poor #> 2 0.897 0.103 poor #> 3 0.118 0.882 good #> 4 0.102 0.898 good #> 5 0.991 0.00914 poor #> 6 0.633 0.367 good #> 7 0.770 0.230 good #> 8 0.00842 0.992 good #> 9 0.995 0.00458 poor #> 10 0.765 0.235 poor #> # ℹ 1,000 more rows # Convert probabilities into predictions # > 0.5 = good # < 0.5 = poor segment_logistic_thresh <- segment_logistic %>% mutate( .pred = make_two_class_pred( estimate = .pred_good, levels = levels(Class), threshold = 0.5 ) ) segment_logistic_thresh #> # A tibble: 1,010 × 4 #> .pred_poor .pred_good Class .pred #> #> 1 0.986 0.0142 poor poor #> 2 0.897 0.103 poor poor #> 3 0.118 0.882 good good #> 4 0.102 0.898 good good #> 5 0.991 0.00914 poor poor #> 6 0.633 0.367 good poor #> 7 0.770 0.230 good poor #> 8 0.00842 0.992 good good #> 9 0.995 0.00458 poor poor #> 10 0.765 0.235 poor poor #> # ℹ 1,000 more rows # Convert probabilities into predictions # x > 0.55 = good # x < 0.45 = poor # 0.45 < x < 0.55 = equivocal segment_pred <- segment_logistic %>% mutate( .pred = make_two_class_pred( estimate = .pred_good, levels = levels(Class), threshold = 0.5, buffer = 0.05 ) ) segment_pred %>% count(.pred) #> # A tibble: 3 × 2 #> .pred n #> #> 1 [EQ] 45 #> 2 good 340 #> 3 poor 625 segment_pred %>% summarise(reportable = reportable_rate(.pred)) #> # A tibble: 1 × 1 #> reportable #> #> 1 0.955 segment_pred %>% mutate(.pred_fct = as.factor(.pred)) %>% count(.pred, .pred_fct) #> # A tibble: 3 × 3 #> .pred .pred_fct n #> #> 1 [EQ] NA 45 #> 2 good good 340 #> 3 poor poor 625 levels(segment_pred$.pred) #> [1] \"good\" \"poor\" library(yardstick) # No equivocal zone segment_logistic_thresh %>% mutate(.pred_fct = as.factor(.pred)) %>% precision(Class, .pred_fct) #> # A tibble: 1 × 3 #> .metric .estimator .estimate #> #> 1 precision binary 0.680 # Equivocal zone segment_pred %>% mutate(.pred_fct = as.factor(.pred)) %>% precision(Class, .pred_fct) #> # A tibble: 1 × 3 #> .metric .estimator .estimate #> #> 1 precision binary 0.694"},{"path":"https://probably.tidymodels.org/dev/articles/where-to-use.html","id":"introduction","dir":"Articles","previous_headings":"","what":"Introduction","title":"Where does probably fit in?","text":"obvious question regarding probably might : fit rest tidymodels ecosystem? Like pieces ecosystem, probably designed modular, plays well tidymodels packages. Regarding placement modeling workflow, probably best fits post processing step model fit, model performance calculated.","code":""},{"path":"https://probably.tidymodels.org/dev/articles/where-to-use.html","id":"example","dir":"Articles","previous_headings":"","what":"Example","title":"Where does probably fit in?","text":"example, ’ll use parsnip fit logistic regression Lending Club https://www.lendingclub.com/ loan data, use probably investigate happens performance vary threshold “good” loan . Let’s split 75% training 25% testing something predict . anything, let’s look counts going predicting, Class loan. Clearly large imbalance number good bad loans. probably good thing bank, poses interesting issue us might want ensure sensitive bad loans overwhelmed number good ones. One thing might downsample number good loans total number line number bad loans. fitting model using themis::step_downsample(), now, let’s continue data unchanged. ’ll use parsnip’s logistic_reg() create model specification logistic regression, set engine glm actually fit model using data model formula. output parsnip fit() call parsnip model_fit object, underlying print method glm fit used. Now let’s predict testing set, use type = \"prob\" get class probabilities back rather hard predictions. use probably investigate performance. class probabilities hand, can use make_two_class_pred() convert probabilities hard predictions using threshold. threshold 0.5 just says predicted probability 0.5, classify prediction “good” loan, otherwise, bad. Hmm, 0.5 threshold, almost loans predicted “good”. Perhaps something large class imbalance. hand, bank might want stringent classified “good” loan, might require probability 0.75 threshold. case, 4 bad loans correctly classified bad, good loans also misclassified bad now. tradeoff , can somewhat captured metrics sensitivity specificity. metrics max value 1. sensitivity - proportion predicted “good” loans “good” loans specificity - proportion predicted “bad” loans “bad” loans example, increased specificity (capturing 4 bad loans higher threshold), lowered sensitivity (incorrectly reclassifying good loans bad). nice combination metrics represent tradeoff. Luckily, j_index exactly . j_index=sens+spec−1 j\\_index = sens + spec - 1 j_index maximum value 1 false positives false negatives. can used justification whether increase threshold value worth . increasing threshold results increase specificity decrease sensitivity, can see j_index. Now, way optimize things. care low false positives, might interested keeping sensitivity high, wouldn’t best way tackle problem. now, let’s see can use probably optimize j_index. threshold_perf() recalculate number metrics across varying thresholds. One j_index. ggplot2, can easily visualize varying performance find optimal threshold maximizing j_index. ’s clear visual optimal threshold high, exactly 0.945. pretty high, , optimization method won’t useful cases. wrap , test set metrics threshold value.","code":"library(parsnip) library(probably) library(dplyr) library(rsample) library(modeldata) data(\"lending_club\") # I think it makes more sense to have \"good\" as the first level # By default it comes as the second level lending_club <- lending_club %>% mutate(Class = relevel(Class, \"good\")) # There are a number of columns in this data set, but we will only use a few # for this example lending_club <- select(lending_club, Class, annual_inc, verification_status, sub_grade) lending_club #> # A tibble: 9,857 × 4 #> Class annual_inc verification_status sub_grade #> #> 1 good 35000 Not_Verified C4 #> 2 good 72000 Verified C1 #> 3 good 72000 Source_Verified D1 #> 4 good 101000 Verified C3 #> 5 good 50100 Source_Verified A4 #> 6 good 32000 Source_Verified B5 #> 7 good 65000 Not_Verified A1 #> 8 good 188000 Not_Verified B2 #> 9 good 89000 Source_Verified B3 #> 10 good 48000 Not_Verified C2 #> # ℹ 9,847 more rows # 75% train, 25% test set.seed(123) split <- initial_split(lending_club, prop = 0.75) lending_train <- training(split) lending_test <- testing(split) count(lending_train, Class) #> # A tibble: 2 × 2 #> Class n #> #> 1 good 7008 #> 2 bad 384 logi_reg <- logistic_reg() logi_reg_glm <- logi_reg %>% set_engine(\"glm\") # A small model specification that defines the type of model you are # using and the engine logi_reg_glm #> Logistic Regression Model Specification (classification) #> #> Computational engine: glm # Fit the model logi_reg_fit <- fit( logi_reg_glm, formula = Class ~ annual_inc + verification_status + sub_grade, data = lending_train ) logi_reg_fit #> parsnip model object #> #> #> Call: stats::glm(formula = Class ~ annual_inc + verification_status + #> sub_grade, family = stats::binomial, data = data) #> #> Coefficients: #> (Intercept) annual_inc #> -5.670e+00 1.915e-06 #> verification_statusSource_Verified verification_statusVerified #> 4.324e-02 3.364e-01 #> sub_gradeA2 sub_gradeA3 #> 9.508e-02 1.149e+00 #> sub_gradeA4 sub_gradeA5 #> -5.591e-02 1.510e+00 #> sub_gradeB1 sub_gradeB2 #> 1.637e+00 1.177e+00 #> sub_gradeB3 sub_gradeB4 #> 1.467e+00 1.975e+00 #> sub_gradeB5 sub_gradeC1 #> 2.125e+00 2.234e+00 #> sub_gradeC2 sub_gradeC3 #> 2.176e+00 2.380e+00 #> sub_gradeC4 sub_gradeC5 #> 2.724e+00 3.084e+00 #> sub_gradeD1 sub_gradeD2 #> 3.105e+00 2.816e+00 #> sub_gradeD3 sub_gradeD4 #> 3.165e+00 3.125e+00 #> sub_gradeD5 sub_gradeE1 #> 3.507e+00 3.621e+00 #> sub_gradeE2 sub_gradeE3 #> 3.272e+00 3.542e+00 #> sub_gradeE4 sub_gradeE5 #> 3.428e+00 3.468e+00 #> sub_gradeF1 sub_gradeF2 #> 3.717e+00 4.096e+00 #> sub_gradeF3 sub_gradeF4 #> 3.681e+00 3.662e+00 #> sub_gradeF5 sub_gradeG1 #> 3.586e+00 4.168e+00 #> sub_gradeG2 sub_gradeG3 #> 4.162e+00 4.422e+00 #> sub_gradeG4 sub_gradeG5 #> 5.102e+00 -8.226e+00 #> #> Degrees of Freedom: 7391 Total (i.e. Null); 7354 Residual #> Null Deviance: 3019 #> Residual Deviance: 2716 AIC: 2792 predictions <- logi_reg_fit %>% predict(new_data = lending_test, type = \"prob\") head(predictions, n = 2) #> # A tibble: 2 × 2 #> .pred_good .pred_bad #> #> 1 0.969 0.0311 #> 2 0.965 0.0353 lending_test_pred <- bind_cols(predictions, lending_test) lending_test_pred #> # A tibble: 2,465 × 6 #> .pred_good .pred_bad Class annual_inc verification_status sub_grade #> #> 1 0.969 0.0311 good 32000 Source_Verified B5 #> 2 0.965 0.0353 good 73400 Source_Verified C2 #> 3 0.960 0.0405 good 175000 Source_Verified B5 #> 4 0.972 0.0276 good 70000 Not_Verified B4 #> 5 0.874 0.126 good 36000 Source_Verified E1 #> 6 0.944 0.0560 good 40000 Source_Verified C4 #> 7 0.996 0.00385 good 60000 Not_Verified A1 #> 8 0.951 0.0486 good 65000 Verified C1 #> 9 0.963 0.0370 good 52000 Verified B4 #> 10 0.983 0.0173 good 61000 Verified B2 #> # ℹ 2,455 more rows hard_pred_0.5 <- lending_test_pred %>% mutate( .pred = make_two_class_pred( estimate = .pred_good, levels = levels(Class), threshold = .5 ) ) %>% select(Class, contains(\".pred\")) hard_pred_0.5 %>% count(.truth = Class, .pred) #> # A tibble: 2 × 3 #> .truth .pred n #> #> 1 good good 2332 #> 2 bad good 133 hard_pred_0.75 <- lending_test_pred %>% mutate( .pred = make_two_class_pred( estimate = .pred_good, levels = levels(Class), threshold = .75 ) ) %>% select(Class, contains(\".pred\")) hard_pred_0.75 %>% count(.truth = Class, .pred) #> # A tibble: 4 × 3 #> .truth .pred n #> #> 1 good good 2320 #> 2 good bad 12 #> 3 bad good 129 #> 4 bad bad 4 library(yardstick) sens(hard_pred_0.5, Class, .pred) #> # A tibble: 1 × 3 #> .metric .estimator .estimate #> #> 1 sens binary 1 spec(hard_pred_0.5, Class, .pred) #> # A tibble: 1 × 3 #> .metric .estimator .estimate #> #> 1 spec binary 0 sens(hard_pred_0.75, Class, .pred) #> # A tibble: 1 × 3 #> .metric .estimator .estimate #> #> 1 sens binary 0.995 spec(hard_pred_0.75, Class, .pred) #> # A tibble: 1 × 3 #> .metric .estimator .estimate #> #> 1 spec binary 0.0301 j_index(hard_pred_0.5, Class, .pred) #> # A tibble: 1 × 3 #> .metric .estimator .estimate #> #> 1 j_index binary 0 j_index(hard_pred_0.75, Class, .pred) #> # A tibble: 1 × 3 #> .metric .estimator .estimate #> #> 1 j_index binary 0.0249 threshold_data <- lending_test_pred %>% threshold_perf(Class, .pred_good, thresholds = seq(0.5, 1, by = 0.0025)) threshold_data %>% filter(.threshold %in% c(0.5, 0.6, 0.7)) #> # A tibble: 12 × 4 #> .threshold .metric .estimator .estimate #> #> 1 0.5 sensitivity binary 1 #> 2 0.6 sensitivity binary 0.999 #> 3 0.7 sensitivity binary 0.998 #> 4 0.5 specificity binary 0 #> 5 0.6 specificity binary 0.0226 #> 6 0.7 specificity binary 0.0226 #> 7 0.5 j_index binary 0 #> 8 0.6 j_index binary 0.0217 #> 9 0.7 j_index binary 0.0208 #> 10 0.5 distance binary 1 #> 11 0.6 distance binary 0.955 #> 12 0.7 distance binary 0.955 library(ggplot2) threshold_data <- threshold_data %>% filter(.metric != \"distance\") %>% mutate(group = case_when( .metric == \"sens\" | .metric == \"spec\" ~ \"1\", TRUE ~ \"2\" )) max_j_index_threshold <- threshold_data %>% filter(.metric == \"j_index\") %>% filter(.estimate == max(.estimate)) %>% pull(.threshold) ggplot(threshold_data, aes(x = .threshold, y = .estimate, color = .metric, alpha = group)) + geom_line() + theme_minimal() + scale_color_viridis_d(end = 0.9) + scale_alpha_manual(values = c(.4, 1), guide = \"none\") + geom_vline(xintercept = max_j_index_threshold, alpha = .6, color = \"grey30\") + labs( x = \"'Good' Threshold\\n(above this value is considered 'good')\", y = \"Metric Estimate\", title = \"Balancing performance by varying the threshold\", subtitle = \"Sensitivity or specificity alone might not be enough!\\nVertical line = Max J-Index\" ) threshold_data %>% filter(.threshold == max_j_index_threshold) #> # A tibble: 3 × 5 #> .threshold .metric .estimator .estimate group #> #> 1 0.945 sensitivity binary 0.687 2 #> 2 0.945 specificity binary 0.692 2 #> 3 0.945 j_index binary 0.379 2"},{"path":"https://probably.tidymodels.org/dev/authors.html","id":null,"dir":"","previous_headings":"","what":"Authors","title":"Authors and Citation","text":"Max Kuhn. Author, maintainer. Davis Vaughan. Author. Edgar Ruiz. Author. . Copyright holder, funder.","code":""},{"path":"https://probably.tidymodels.org/dev/authors.html","id":"citation","dir":"","previous_headings":"","what":"Citation","title":"Authors and Citation","text":"Kuhn M, Vaughan D, Ruiz E (2024). probably: Tools Post-Processing Predicted Values. R package version 1.0.3.9001, https://probably.tidymodels.org, https://github.com/tidymodels/probably.","code":"@Manual{, title = {probably: Tools for Post-Processing Predicted Values}, author = {Max Kuhn and Davis Vaughan and Edgar Ruiz}, year = {2024}, note = {R package version 1.0.3.9001, https://probably.tidymodels.org}, url = {https://github.com/tidymodels/probably}, }"},{"path":[]},{"path":"https://probably.tidymodels.org/dev/index.html","id":"introduction","dir":"","previous_headings":"","what":"Introduction","title":"Tools for Post-Processing Predicted Values","text":"probably contains tools facilitate activities : Conversion probabilities discrete class predictions. Investigating estimating optimal probability thresholds. Calibration assessments remediation classification regression models. Inclusion equivocal zones probabilities uncertain report prediction.","code":""},{"path":"https://probably.tidymodels.org/dev/index.html","id":"installation","dir":"","previous_headings":"","what":"Installation","title":"Tools for Post-Processing Predicted Values","text":"can install probably CRAN : can install development version probably GitHub :","code":"install.packages(\"probably\") # install.packages(\"pak\") pak::pak(\"tidymodels/probably\")"},{"path":"https://probably.tidymodels.org/dev/index.html","id":"examples","dir":"","previous_headings":"","what":"Examples","title":"Tools for Post-Processing Predicted Values","text":"Good places look examples using probably vignettes. vignette(\"equivocal-zones\", \"probably\") discusses new class_pred class probably provides working equivocal zones. vignette(\"--use\", \"probably\") discusses probably fits rest tidymodels ecosystem, provides example optimizing class probability thresholds.","code":""},{"path":"https://probably.tidymodels.org/dev/index.html","id":"contributing","dir":"","previous_headings":"","what":"Contributing","title":"Tools for Post-Processing Predicted Values","text":"project released Contributor Code Conduct. contributing project, agree abide terms. questions discussions tidymodels packages, modeling, machine learning, please post RStudio Community. think encountered bug, please submit issue. Either way, learn create share reprex (minimal, reproducible example), clearly communicate code. Check details contributing guidelines tidymodels packages get help.","code":""},{"path":"https://probably.tidymodels.org/dev/reference/append_class_pred.html","id":null,"dir":"Reference","previous_headings":"","what":"Add a class_pred column — append_class_pred","title":"Add a class_pred column — append_class_pred","text":"function similar make_class_pred(), useful large number class probability columns want use tidyselect helpers. appends new class_pred vector column original data frame.","code":""},{"path":"https://probably.tidymodels.org/dev/reference/append_class_pred.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Add a class_pred column — append_class_pred","text":"","code":"append_class_pred( .data, ..., levels, ordered = FALSE, min_prob = 1/length(levels), name = \".class_pred\" )"},{"path":"https://probably.tidymodels.org/dev/reference/append_class_pred.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Add a class_pred column — append_class_pred","text":".data data frame tibble. ... One unquoted expressions separated commas capture columns .data containing class probabilities. can treat variable names like positions, can use expressions like x:y select ranges variables use selector functions choose columns. make_class_pred, columns class probabilities selected (order levels object). two_class_pred, vector class probabilities selected. levels character vector class levels. length number selections made ..., length 2 make_two_class_pred(). ordered single logical determine levels regarded ordered (order given). results class_pred object flagged ordered. min_prob single numeric value. probabilities less value (row), row marked equivocal. name single character value name appended class_pred column.","code":""},{"path":"https://probably.tidymodels.org/dev/reference/append_class_pred.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Add a class_pred column — append_class_pred","text":".data extra class_pred column appended onto .","code":""},{"path":"https://probably.tidymodels.org/dev/reference/append_class_pred.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Add a class_pred column — append_class_pred","text":"","code":"# The following two examples are equivalent and demonstrate # the helper, append_class_pred() library(dplyr) #> #> Attaching package: ‘dplyr’ #> The following objects are masked from ‘package:stats’: #> #> filter, lag #> The following objects are masked from ‘package:base’: #> #> intersect, setdiff, setequal, union species_probs %>% mutate( .class_pred = make_class_pred( .pred_bobcat, .pred_coyote, .pred_gray_fox, levels = levels(Species), min_prob = .5 ) ) #> # A tibble: 110 × 5 #> Species .pred_bobcat .pred_coyote .pred_gray_fox .class_pred #> #> 1 gray_fox 0.0976 0.0530 0.849 gray_fox #> 2 gray_fox 0.155 0.139 0.706 gray_fox #> 3 bobcat 0.501 0.0880 0.411 bobcat #> 4 gray_fox 0.256 0 0.744 gray_fox #> 5 gray_fox 0.463 0.287 0.250 [EQ] #> 6 bobcat 0.811 0 0.189 bobcat #> 7 bobcat 0.911 0.0888 0 bobcat #> 8 bobcat 0.898 0.0517 0.0500 bobcat #> 9 bobcat 0.771 0.229 0 bobcat #> 10 bobcat 0.623 0.325 0.0517 bobcat #> # ℹ 100 more rows lvls <- levels(species_probs$Species) append_class_pred( .data = species_probs, contains(\".pred_\"), levels = lvls, min_prob = .5 ) #> # A tibble: 110 × 5 #> Species .pred_bobcat .pred_coyote .pred_gray_fox .class_pred #> #> 1 gray_fox 0.0976 0.0530 0.849 gray_fox #> 2 gray_fox 0.155 0.139 0.706 gray_fox #> 3 bobcat 0.501 0.0880 0.411 bobcat #> 4 gray_fox 0.256 0 0.744 gray_fox #> 5 gray_fox 0.463 0.287 0.250 [EQ] #> 6 bobcat 0.811 0 0.189 bobcat #> 7 bobcat 0.911 0.0888 0 bobcat #> 8 bobcat 0.898 0.0517 0.0500 bobcat #> 9 bobcat 0.771 0.229 0 bobcat #> 10 bobcat 0.623 0.325 0.0517 bobcat #> # ℹ 100 more rows"},{"path":"https://probably.tidymodels.org/dev/reference/as_class_pred.html","id":null,"dir":"Reference","previous_headings":"","what":"Coerce to a class_pred object — as_class_pred","title":"Coerce to a class_pred object — as_class_pred","text":"as_class_pred() provides coercion class_pred existing objects.","code":""},{"path":"https://probably.tidymodels.org/dev/reference/as_class_pred.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Coerce to a class_pred object — as_class_pred","text":"","code":"as_class_pred(x, which = integer(), equivocal = \"[EQ]\")"},{"path":"https://probably.tidymodels.org/dev/reference/as_class_pred.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Coerce to a class_pred object — as_class_pred","text":"x factor ordered factor. integer vector specifying locations x declare equivocal. equivocal single character specifying equivocal label used printing.","code":""},{"path":"https://probably.tidymodels.org/dev/reference/as_class_pred.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Coerce to a class_pred object — as_class_pred","text":"","code":"x <- factor(c(\"Yes\", \"No\", \"Yes\", \"Yes\")) as_class_pred(x) #> [1] Yes No Yes Yes #> Levels: No Yes #> Reportable: 100%"},{"path":"https://probably.tidymodels.org/dev/reference/boosting_predictions.html","id":null,"dir":"Reference","previous_headings":"","what":"Boosted regression trees predictions — boosting_predictions","title":"Boosted regression trees predictions — boosting_predictions","text":"Boosted regression trees predictions","code":""},{"path":"https://probably.tidymodels.org/dev/reference/boosting_predictions.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Boosted regression trees predictions — boosting_predictions","text":"boosting_predictions_oob,boosting_predictions_test tibbles","code":""},{"path":"https://probably.tidymodels.org/dev/reference/boosting_predictions.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Boosted regression trees predictions — boosting_predictions","text":"data set holdout predictions 10-fold cross-validation separate collection test set predictions boosted tree model. data generated using sim_regression function modeldata package.","code":""},{"path":"https://probably.tidymodels.org/dev/reference/boosting_predictions.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Boosted regression trees predictions — boosting_predictions","text":"","code":"data(boosting_predictions_oob) #> Warning: data set ‘boosting_predictions_oob’ not found str(boosting_predictions_oob) #> tibble [2,000 × 3] (S3: tbl_df/tbl/data.frame) #> $ outcome: num [1:2000] -13.45 43.85 6.14 26.85 -7.43 ... #> $ .pred : num [1:2000] 3.13 32.37 10.3 17.79 12.28 ... #> $ id : chr [1:2000] \"Fold01\" \"Fold01\" \"Fold01\" \"Fold01\" ... str(boosting_predictions_test) #> tibble [500 × 2] (S3: tbl_df/tbl/data.frame) #> $ outcome: num [1:500] -4.65 1.12 14.7 36.28 14.08 ... #> $ .pred : num [1:500] 4.12 1.83 13.05 19.07 14.93 ..."},{"path":"https://probably.tidymodels.org/dev/reference/bound_prediction.html","id":null,"dir":"Reference","previous_headings":"","what":"Truncate a numeric prediction column — bound_prediction","title":"Truncate a numeric prediction column — bound_prediction","text":"user-defined lower_limit /upper_limit bound, ensure values .pred column coerced bounds.","code":""},{"path":"https://probably.tidymodels.org/dev/reference/bound_prediction.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Truncate a numeric prediction column — bound_prediction","text":"","code":"bound_prediction( x, lower_limit = -Inf, upper_limit = Inf, call = rlang::current_env() )"},{"path":"https://probably.tidymodels.org/dev/reference/bound_prediction.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Truncate a numeric prediction column — bound_prediction","text":"x data frame contains numeric column named .pred. lower_limit, upper_limit Single numerics (NA) define constraints .pred. call call displayed warnings errors.","code":""},{"path":"https://probably.tidymodels.org/dev/reference/bound_prediction.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Truncate a numeric prediction column — bound_prediction","text":"x potentially adjusted values.","code":""},{"path":"https://probably.tidymodels.org/dev/reference/bound_prediction.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Truncate a numeric prediction column — bound_prediction","text":"","code":"data(solubility_test, package = \"yardstick\") names(solubility_test) <- c(\"solubility\", \".pred\") bound_prediction(solubility_test, lower_limit = -1) #> solubility .pred #> 1 0.93 0.36775219 #> 2 0.85 -0.15032196 #> 3 0.81 -0.50518438 #> 4 0.74 0.53981158 #> 5 0.61 -0.47927183 #> 6 0.58 0.73772216 #> 7 0.57 0.45857371 #> 8 0.56 0.58613930 #> 9 0.52 0.09587705 #> 10 0.45 -0.39248631 #> 11 0.40 -0.39248631 #> 12 0.36 -1.00000000 #> 13 0.22 -0.42208506 #> 14 0.08 -0.11257503 #> 15 0.07 -0.55242508 #> 16 0.02 -0.97431079 #> 17 0.00 -0.15320302 #> 18 -0.01 -0.37951441 #> 19 -0.07 0.88317691 #> 20 -0.12 -1.00000000 #> 21 -0.17 0.03893535 #> 22 -0.29 -0.02510738 #> 23 -0.38 -0.51435942 #> 24 -0.38 -0.69655564 #> 25 -0.39 -0.83937780 #> 26 -0.42 -0.62948972 #> 27 -0.44 -0.92769946 #> 28 -0.46 0.54452824 #> 29 -0.48 -1.00000000 #> 30 -0.60 -1.00000000 #> 31 -0.63 -1.00000000 #> 32 -0.66 -0.87822537 #> 33 -0.72 -0.45310275 #> 34 -0.72 -0.49465265 #> 35 -0.80 -0.56171857 #> 36 -0.80 -0.89522719 #> 37 -0.82 0.42570777 #> 38 -0.82 -0.87822537 #> 39 -0.84 0.05436526 #> 40 -0.85 -1.00000000 #> 41 -0.85 -0.61627916 #> 42 -0.87 -1.00000000 #> 43 -0.89 -1.00000000 #> 44 -0.90 0.36172934 #> 45 -0.96 -1.00000000 #> 46 -0.96 -0.69388075 #> 47 -0.99 -0.72904783 #> 48 -1.01 -0.87822537 #> 49 -1.09 -1.00000000 #> 50 -1.12 -1.00000000 #> 51 -1.14 -0.47101466 #> 52 -1.17 -1.00000000 #> 53 -1.19 -1.00000000 #> 54 -1.22 -1.00000000 #> 55 -1.27 -1.00000000 #> 56 -1.28 -1.00000000 #> 57 -1.32 -1.00000000 #> 58 -1.38 -1.00000000 #> 59 -1.39 -1.00000000 #> 60 -1.42 -1.00000000 #> 61 -1.47 -1.00000000 #> 62 -1.47 -1.00000000 #> 63 -1.50 -0.59558842 #> 64 -1.52 -0.85624767 #> 65 -1.54 -1.00000000 #> 66 -1.55 -1.00000000 #> 67 -1.56 -1.00000000 #> 68 -1.57 -1.00000000 #> 69 -1.60 -1.00000000 #> 70 -1.60 -1.00000000 #> 71 -1.62 -1.00000000 #> 72 -1.64 -1.00000000 #> 73 -1.67 -1.00000000 #> 74 -1.70 -1.00000000 #> 75 -1.70 -1.00000000 #> 76 -1.71 -1.00000000 #> 77 -1.71 -1.00000000 #> 78 -1.75 -1.00000000 #> 79 -1.78 -1.00000000 #> 80 -1.78 -1.00000000 #> 81 -1.82 -1.00000000 #> 82 -1.87 -1.00000000 #> 83 -1.89 -1.00000000 #> 84 -1.92 -1.00000000 #> 85 -1.92 -1.00000000 #> 86 -1.92 -1.00000000 #> 87 -1.94 -1.00000000 #> 88 -1.99 -1.00000000 #> 89 -2.00 -1.00000000 #> 90 -2.05 -1.00000000 #> 91 -2.06 -1.00000000 #> 92 -2.08 -1.00000000 #> 93 -2.10 -1.00000000 #> 94 -2.11 -0.94582854 #> 95 -2.12 -0.95282378 #> 96 -2.17 -1.00000000 #> 97 -2.21 -1.00000000 #> 98 -2.24 -1.00000000 #> 99 -2.24 -1.00000000 #> 100 -2.29 -1.00000000 #> 101 -2.31 -1.00000000 #> 102 -2.32 -1.00000000 #> 103 -2.35 -1.00000000 #> 104 -2.35 -1.00000000 #> 105 -2.36 -1.00000000 #> 106 -2.36 -1.00000000 #> 107 -2.38 -1.00000000 #> 108 -2.42 -1.00000000 #> 109 -2.43 -1.00000000 #> 110 -2.44 -1.00000000 #> 111 -2.52 -1.00000000 #> 112 -2.53 -1.00000000 #> 113 -2.57 -1.00000000 #> 114 -2.62 -1.00000000 #> 115 -2.62 -1.00000000 #> 116 -2.64 -1.00000000 #> 117 -2.64 -1.00000000 #> 118 -2.70 -1.00000000 #> 119 -2.82 -1.00000000 #> 120 -2.88 -1.00000000 #> 121 -2.89 -1.00000000 #> 122 -2.92 -1.00000000 #> 123 -2.93 -1.00000000 #> 124 -2.96 -1.00000000 #> 125 -2.98 -1.00000000 #> 126 -3.01 -1.00000000 #> 127 -3.01 -1.00000000 #> 128 -3.02 -1.00000000 #> 129 -3.07 -1.00000000 #> 130 -3.09 -1.00000000 #> 131 -3.11 -1.00000000 #> 132 -3.13 -1.00000000 #> 133 -3.14 -1.00000000 #> 134 -3.15 -1.00000000 #> 135 -3.22 -1.00000000 #> 136 -3.26 -1.00000000 #> 137 -3.27 -1.00000000 #> 138 -3.27 -1.00000000 #> 139 -3.30 -1.00000000 #> 140 -3.31 -1.00000000 #> 141 -3.33 -1.00000000 #> 142 -3.37 -1.00000000 #> 143 -3.43 -1.00000000 #> 144 -3.43 -1.00000000 #> 145 -3.48 -1.00000000 #> 146 -3.51 -1.00000000 #> 147 -3.59 -1.00000000 #> 148 -3.61 -1.00000000 #> 149 -3.63 -1.00000000 #> 150 -3.63 -1.00000000 #> 151 -3.68 -1.00000000 #> 152 -3.71 -1.00000000 #> 153 -3.74 -1.00000000 #> 154 -3.75 -1.00000000 #> 155 -3.75 -1.00000000 #> 156 -3.77 -1.00000000 #> 157 -3.77 -1.00000000 #> 158 -3.78 -1.00000000 #> 159 -3.81 -1.00000000 #> 160 -3.95 -1.00000000 #> 161 -3.96 -1.00000000 #> 162 -3.96 -1.00000000 #> 163 -4.00 -1.00000000 #> 164 -4.02 -1.00000000 #> 165 -4.04 -1.00000000 #> 166 -4.12 -1.00000000 #> 167 -4.15 -1.00000000 #> 168 -4.16 -1.00000000 #> 169 -4.17 -1.00000000 #> 170 -4.21 -1.00000000 #> 171 -4.23 -1.00000000 #> 172 -4.25 -1.00000000 #> 173 -4.30 -1.00000000 #> 174 -4.31 -1.00000000 #> 175 -4.35 -1.00000000 #> 176 -4.40 -1.00000000 #> 177 -4.40 -1.00000000 #> 178 -4.43 -1.00000000 #> 179 -4.46 -1.00000000 #> 180 -4.47 -1.00000000 #> 181 -4.51 -1.00000000 #> 182 -4.60 -1.00000000 #> 183 -4.64 -1.00000000 #> 184 -4.69 -1.00000000 #> 185 -4.71 -1.00000000 #> 186 -4.77 -1.00000000 #> 187 -4.95 -1.00000000 #> 188 -4.98 -1.00000000 #> 189 -5.21 -1.00000000 #> 190 -5.22 -1.00000000 #> 191 -5.28 -1.00000000 #> 192 -5.31 -1.00000000 #> 193 -5.35 -1.00000000 #> 194 -5.37 -1.00000000 #> 195 -5.40 -1.00000000 #> 196 -5.43 -1.00000000 #> 197 -5.65 -1.00000000 #> 198 -5.66 -1.00000000 #> 199 -6.70 -1.00000000 #> 200 -5.72 -1.00000000 #> 201 -6.00 -1.00000000 #> 202 -6.25 -1.00000000 #> 203 -6.26 -1.00000000 #> 204 -6.27 -1.00000000 #> 205 -6.35 -1.00000000 #> 206 -6.57 -1.00000000 #> 207 -6.62 -1.00000000 #> 208 -6.96 -1.00000000 #> 209 -7.02 -1.00000000 #> 210 -7.20 -1.00000000 #> 211 -7.28 -1.00000000 #> 212 -7.32 -1.00000000 #> 213 -7.39 -1.00000000 #> 214 -7.82 -1.00000000 #> 215 -8.23 -1.00000000 #> 216 -8.94 -1.00000000 #> 217 1.07 0.04202675 #> 218 0.43 -0.02565046 #> 219 0.32 0.25187579 #> 220 0.00 -0.38765781 #> 221 -0.40 -1.00000000 #> 222 -0.52 -0.48786224 #> 223 -0.55 -1.00000000 #> 224 -0.60 -0.83526706 #> 225 -0.62 -1.00000000 #> 226 -0.85 -1.00000000 #> 227 -0.89 -1.00000000 #> 228 -0.93 -1.00000000 #> 229 -0.96 -0.37328992 #> 230 -1.06 -1.00000000 #> 231 -1.10 -1.00000000 #> 232 -1.12 -0.56163091 #> 233 -1.15 -0.82058137 #> 234 -1.28 -0.22588800 #> 235 -1.30 -1.00000000 #> 236 -1.31 -1.00000000 #> 237 -1.35 -1.00000000 #> 238 -1.39 -1.00000000 #> 239 -1.41 -1.00000000 #> 240 -1.41 -1.00000000 #> 241 -1.42 -0.76974442 #> 242 -1.46 -1.00000000 #> 243 -1.50 -1.00000000 #> 244 -1.50 -1.00000000 #> 245 -1.52 -1.00000000 #> 246 -1.52 -1.00000000 #> 247 -1.59 -1.00000000 #> 248 -1.61 -1.00000000 #> 249 -1.63 -1.00000000 #> 250 -1.71 -1.00000000 #> 251 -1.83 -1.00000000 #> 252 -2.05 -1.00000000 #> 253 -2.06 -1.00000000 #> 254 -2.07 -1.00000000 #> 255 -2.15 -1.00000000 #> 256 -2.16 -1.00000000 #> 257 -1.99 0.13067110 #> 258 -2.36 -1.00000000 #> 259 -2.38 -1.00000000 #> 260 -2.39 -1.00000000 #> 261 -2.46 -1.00000000 #> 262 -2.49 -1.00000000 #> 263 -2.54 -1.00000000 #> 264 -2.55 -1.00000000 #> 265 -2.63 -1.00000000 #> 266 -2.64 -1.00000000 #> 267 -2.67 -1.00000000 #> 268 -2.68 -1.00000000 #> 269 -2.77 -1.00000000 #> 270 -2.78 -1.00000000 #> 271 -2.82 -1.00000000 #> 272 -2.92 -1.00000000 #> 273 -3.03 -1.00000000 #> 274 -3.12 -1.00000000 #> 275 -3.16 -1.00000000 #> 276 -3.19 -1.00000000 #> 277 -3.54 -1.00000000 #> 278 -3.54 -1.00000000 #> 279 -3.59 -1.00000000 #> 280 -3.66 -1.00000000 #> 281 -3.68 -1.00000000 #> 282 -3.75 -1.00000000 #> 283 -3.76 -1.00000000 #> 284 -3.78 -1.00000000 #> 285 -3.80 -1.00000000 #> 286 -3.80 -1.00000000 #> 287 -3.85 -1.00000000 #> 288 -3.89 -1.00000000 #> 289 -3.95 -1.00000000 #> 290 -4.29 -1.00000000 #> 291 -4.42 -1.00000000 #> 292 -4.48 -1.00000000 #> 293 -4.48 -1.00000000 #> 294 -4.53 -1.00000000 #> 295 -4.63 -1.00000000 #> 296 -4.73 -1.00000000 #> 297 -4.84 -1.00000000 #> 298 -4.89 -1.00000000 #> 299 -4.89 -1.00000000 #> 300 -5.26 -1.00000000 #> 301 -6.09 -1.00000000 #> 302 -6.29 -1.00000000 #> 303 -6.29 -1.00000000 #> 304 -6.89 -1.00000000 #> 305 -6.96 -1.00000000 #> 306 -7.00 -1.00000000 #> 307 -7.05 -1.00000000 #> 308 -8.30 -1.00000000 #> 309 -8.66 -1.00000000 #> 310 -9.03 -1.00000000 #> 311 -10.41 -1.00000000 #> 312 -7.89 -1.00000000 #> 313 -2.32 -1.00000000 #> 314 0.39 -1.00000000 #> 315 -2.90 -1.00000000 #> 316 -2.47 -1.00000000"},{"path":"https://probably.tidymodels.org/dev/reference/cal_apply.html","id":null,"dir":"Reference","previous_headings":"","what":"Applies a calibration to a set of existing predictions — cal_apply","title":"Applies a calibration to a set of existing predictions — cal_apply","text":"Applies calibration set existing predictions","code":""},{"path":"https://probably.tidymodels.org/dev/reference/cal_apply.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Applies a calibration to a set of existing predictions — cal_apply","text":"","code":"cal_apply(.data, object, pred_class = NULL, parameters = NULL, ...) # S3 method for class 'data.frame' cal_apply(.data, object, pred_class = NULL, parameters = NULL, ...) # S3 method for class 'tune_results' cal_apply(.data, object, pred_class = NULL, parameters = NULL, ...) # S3 method for class 'cal_object' cal_apply(.data, object, pred_class = NULL, parameters = NULL, ...)"},{"path":"https://probably.tidymodels.org/dev/reference/cal_apply.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Applies a calibration to a set of existing predictions — cal_apply","text":".data object can process calibration object. object calibration object (cal_object). pred_class (Optional, classification ) Column identifier hard class predictions (factor vector). column adjusted based changes calibrated probability columns. parameters (Optional) optional tibble tuning parameter values can used filter predicted values processing. Applies tune_results objects. ... Optional arguments; currently unused.","code":""},{"path":"https://probably.tidymodels.org/dev/reference/cal_apply.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Applies a calibration to a set of existing predictions — cal_apply","text":"cal_apply() currently supports data.frames . extracts truth estimate columns names calibration object.","code":""},{"path":[]},{"path":"https://probably.tidymodels.org/dev/reference/cal_apply.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Applies a calibration to a set of existing predictions — cal_apply","text":"","code":"# ------------------------------------------------------------------------------ # classification example w_calibration <- cal_estimate_logistic(segment_logistic, Class) cal_apply(segment_logistic, w_calibration) #> # A tibble: 1,010 × 3 #> .pred_poor .pred_good Class #> #> 1 0.974 0.0258 poor #> 2 0.930 0.0700 poor #> 3 0.220 0.780 good #> 4 0.205 0.795 good #> 5 0.976 0.0244 poor #> 6 0.590 0.410 good #> 7 0.777 0.223 good #> 8 0.135 0.865 good #> 9 0.977 0.0231 poor #> 10 0.770 0.230 poor #> # ℹ 1,000 more rows"},{"path":"https://probably.tidymodels.org/dev/reference/cal_binary_tables.html","id":null,"dir":"Reference","previous_headings":"","what":"Probability Calibration table — .cal_table_breaks","title":"Probability Calibration table — .cal_table_breaks","text":"Calibration table functions. require data.frame contains predictions probability columns. output another tibble segmented data compares accuracy probability actual outcome.","code":""},{"path":"https://probably.tidymodels.org/dev/reference/cal_binary_tables.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Probability Calibration table — .cal_table_breaks","text":"","code":".cal_table_breaks( .data, truth = NULL, estimate = NULL, .by = NULL, num_breaks = 10, conf_level = 0.9, event_level = c(\"auto\", \"first\", \"second\"), ... ) .cal_table_logistic( .data, truth = NULL, estimate = NULL, .by = NULL, conf_level = 0.9, smooth = TRUE, event_level = c(\"auto\", \"first\", \"second\"), ... ) .cal_table_windowed( .data, truth = NULL, estimate = NULL, .by = NULL, window_size = 0.1, step_size = window_size/2, conf_level = 0.9, event_level = c(\"auto\", \"first\", \"second\"), ... )"},{"path":"https://probably.tidymodels.org/dev/reference/cal_binary_tables.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Probability Calibration table — .cal_table_breaks","text":".data ungrouped data frame object containing predictions probability columns. truth column identifier true class results (factor). unquoted column name. estimate vector column identifiers, one dplyr selector functions choose variables contains class probabilities. defaults prefix used tidymodels (.pred_). order identifiers considered order levels truth variable. .column identifier grouping variable. single unquoted column name selects qualitative variable grouping. Default NULL. .= NULL grouping take place. num_breaks number segments group probabilities. defaults 10. conf_level Confidence level use visualization. defaults 0.9. event_level single string. Either \"first\" \"second\" specify level truth consider \"event\". Defaults \"auto\", allows function decide one use based type model (binary, multi-class linear) ... Additional arguments passed tune_results object.","code":""},{"path":"https://probably.tidymodels.org/dev/reference/cal_binary_tables.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Probability Calibration table — .cal_table_breaks","text":".cal_table_breaks() - Splits data bins, based number breaks provided (num_breaks). bins even ranges, starting 0, ending 1. .cal_table_logistic() - Fits logistic spline regression (GAM) data. creates table predictions based 100 probabilities starting 0, ending 1. .cal_table_windowed() - Creates running percentage probability moves across proportion events.","code":""},{"path":"https://probably.tidymodels.org/dev/reference/cal_binary_tables.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Probability Calibration table — .cal_table_breaks","text":"","code":".cal_table_breaks( segment_logistic, Class, .pred_good ) #> # A tibble: 10 × 6 #> predicted_midpoint event_rate events total lower upper #> #> 1 0.05 0.0350 12 343 0.0208 0.0570 #> 2 0.15 0.0841 9 107 0.0461 0.145 #> 3 0.25 0.324 24 74 0.236 0.426 #> 4 0.35 0.366 26 71 0.272 0.471 #> 5 0.45 0.538 28 52 0.416 0.656 #> 6 0.55 0.473 26 55 0.357 0.591 #> 7 0.65 0.491 27 55 0.374 0.608 #> 8 0.75 0.691 38 55 0.572 0.790 #> 9 0.85 0.722 70 97 0.636 0.794 #> 10 0.95 0.851 86 101 0.779 0.905 .cal_table_logistic( segment_logistic, Class, .pred_good ) #> # A tibble: 101 × 4 #> estimate prob lower upper #> #> 1 0 0.0219 0.0143 0.0335 #> 2 0.01 0.0246 0.0165 0.0365 #> 3 0.02 0.0276 0.0190 0.0399 #> 4 0.03 0.0310 0.0219 0.0437 #> 5 0.04 0.0347 0.0250 0.0479 #> 6 0.05 0.0389 0.0286 0.0527 #> 7 0.06 0.0435 0.0325 0.0580 #> 8 0.07 0.0487 0.0369 0.0640 #> 9 0.08 0.0544 0.0418 0.0706 #> 10 0.09 0.0608 0.0472 0.0780 #> # ℹ 91 more rows .cal_table_windowed( segment_logistic, Class, .pred_good ) #> # A tibble: 21 × 6 #> predicted_midpoint event_rate events total lower upper #> #> 1 0.025 0.0233 6 258 0.0108 0.0468 #> 2 0.05 0.0350 12 343 0.0208 0.0570 #> 3 0.1 0.0559 8 143 0.0293 0.101 #> 4 0.15 0.0841 9 107 0.0461 0.145 #> 5 0.2 0.195 17 87 0.130 0.280 #> 6 0.25 0.324 24 74 0.236 0.426 #> 7 0.3 0.343 24 70 0.251 0.448 #> 8 0.35 0.366 26 71 0.272 0.471 #> 9 0.4 0.433 29 67 0.331 0.540 #> 10 0.45 0.538 28 52 0.416 0.656 #> # ℹ 11 more rows"},{"path":"https://probably.tidymodels.org/dev/reference/cal_estimate_beta.html","id":null,"dir":"Reference","previous_headings":"","what":"Uses a Beta calibration model to calculate new probabilities — cal_estimate_beta","title":"Uses a Beta calibration model to calculate new probabilities — cal_estimate_beta","text":"Uses Beta calibration model calculate new probabilities","code":""},{"path":"https://probably.tidymodels.org/dev/reference/cal_estimate_beta.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Uses a Beta calibration model to calculate new probabilities — cal_estimate_beta","text":"","code":"cal_estimate_beta( .data, truth = NULL, shape_params = 2, location_params = 1, estimate = dplyr::starts_with(\".pred_\"), parameters = NULL, ... ) # S3 method for class 'data.frame' cal_estimate_beta( .data, truth = NULL, shape_params = 2, location_params = 1, estimate = dplyr::starts_with(\".pred_\"), parameters = NULL, ..., .by = NULL ) # S3 method for class 'tune_results' cal_estimate_beta( .data, truth = NULL, shape_params = 2, location_params = 1, estimate = dplyr::starts_with(\".pred_\"), parameters = NULL, ... ) # S3 method for class 'grouped_df' cal_estimate_beta( .data, truth = NULL, shape_params = 2, location_params = 1, estimate = NULL, parameters = NULL, ... )"},{"path":"https://probably.tidymodels.org/dev/reference/cal_estimate_beta.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Uses a Beta calibration model to calculate new probabilities — cal_estimate_beta","text":".data ungrouped data.frame object, tune_results object, contains predictions probability columns. truth column identifier true class results (factor). unquoted column name. shape_params Number shape parameters use. Accepted values 1 2. Defaults 2. location_params Number location parameters use. Accepted values 1 0. Defaults 1. estimate vector column identifiers, one dplyr selector functions choose variables contains class probabilities. defaults prefix used tidymodels (.pred_). order identifiers considered order levels truth variable. parameters (Optional) optional tibble tuning parameter values can used filter predicted values processing. Applies tune_results objects. ... Additional arguments passed models routines used calculate new probabilities. .column identifier grouping variable. single unquoted column name selects qualitative variable grouping. Default NULL. .= NULL grouping take place.","code":""},{"path":"https://probably.tidymodels.org/dev/reference/cal_estimate_beta.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Uses a Beta calibration model to calculate new probabilities — cal_estimate_beta","text":"function uses betacal::beta_calibration() function, retains resulting model.","code":""},{"path":"https://probably.tidymodels.org/dev/reference/cal_estimate_beta.html","id":"multiclass-extension","dir":"Reference","previous_headings":"","what":"Multiclass Extension","title":"Uses a Beta calibration model to calculate new probabilities — cal_estimate_beta","text":"method designed work two classes. multiclass, creates set \"one versus \" calibrations class. applied data, probability estimates re-normalized add one. final step might compromise calibration.","code":""},{"path":"https://probably.tidymodels.org/dev/reference/cal_estimate_beta.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Uses a Beta calibration model to calculate new probabilities — cal_estimate_beta","text":"Meelis Kull, Telmo M. Silva Filho, Peter Flach \"Beyond sigmoids: obtain well-calibrated probabilities binary classifiers beta calibration,\" Electronic Journal Statistics 11(2), 5052-5080, (2017)","code":""},{"path":[]},{"path":"https://probably.tidymodels.org/dev/reference/cal_estimate_beta.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Uses a Beta calibration model to calculate new probabilities — cal_estimate_beta","text":"","code":"# It will automatically identify the probability columns # if passed a model fitted with tidymodels cal_estimate_beta(segment_logistic, Class) #> #> ── Probability Calibration #> Method: Beta calibration #> Type: Binary #> Source class: Data Frame #> Data points: 1,010 #> Truth variable: `Class` #> Estimate variables: #> `.pred_good` ==> good #> `.pred_poor` ==> poor"},{"path":"https://probably.tidymodels.org/dev/reference/cal_estimate_isotonic.html","id":null,"dir":"Reference","previous_headings":"","what":"Uses an Isotonic regression model to calibrate model predictions. — cal_estimate_isotonic","title":"Uses an Isotonic regression model to calibrate model predictions. — cal_estimate_isotonic","text":"Uses Isotonic regression model calibrate model predictions.","code":""},{"path":"https://probably.tidymodels.org/dev/reference/cal_estimate_isotonic.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Uses an Isotonic regression model to calibrate model predictions. — cal_estimate_isotonic","text":"","code":"cal_estimate_isotonic( .data, truth = NULL, estimate = dplyr::starts_with(\".pred\"), parameters = NULL, ... ) # S3 method for class 'data.frame' cal_estimate_isotonic( .data, truth = NULL, estimate = dplyr::starts_with(\".pred\"), parameters = NULL, ..., .by = NULL ) # S3 method for class 'tune_results' cal_estimate_isotonic( .data, truth = NULL, estimate = dplyr::starts_with(\".pred\"), parameters = NULL, ... ) # S3 method for class 'grouped_df' cal_estimate_isotonic( .data, truth = NULL, estimate = NULL, parameters = NULL, ... )"},{"path":"https://probably.tidymodels.org/dev/reference/cal_estimate_isotonic.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Uses an Isotonic regression model to calibrate model predictions. — cal_estimate_isotonic","text":".data ungrouped data.frame object, tune_results object, contains predictions probability columns. truth column identifier true class results (factor). unquoted column name. estimate vector column identifiers, one dplyr selector functions choose variables contains class probabilities. defaults prefix used tidymodels (.pred_). order identifiers considered order levels truth variable. parameters (Optional) optional tibble tuning parameter values can used filter predicted values processing. Applies tune_results objects. ... Additional arguments passed models routines used calculate new probabilities. .column identifier grouping variable. single unquoted column name selects qualitative variable grouping. Default NULL. .= NULL grouping take place.","code":""},{"path":"https://probably.tidymodels.org/dev/reference/cal_estimate_isotonic.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Uses an Isotonic regression model to calibrate model predictions. — cal_estimate_isotonic","text":"function uses stats::isoreg() create obtain calibration values binary classification numeric regression.","code":""},{"path":"https://probably.tidymodels.org/dev/reference/cal_estimate_isotonic.html","id":"multiclass-extension","dir":"Reference","previous_headings":"","what":"Multiclass Extension","title":"Uses an Isotonic regression model to calibrate model predictions. — cal_estimate_isotonic","text":"method designed work two classes. multiclass, creates set \"one versus \" calibrations class. applied data, probability estimates re-normalized add one. final step might compromise calibration.","code":""},{"path":"https://probably.tidymodels.org/dev/reference/cal_estimate_isotonic.html","id":"references","dir":"Reference","previous_headings":"","what":"References","title":"Uses an Isotonic regression model to calibrate model predictions. — cal_estimate_isotonic","text":"Zadrozny, Bianca Elkan, Charles. (2002). Transforming Classifier Scores Accurate Multiclass Probability Estimates. Proceedings ACM SIGKDD International Conference Knowledge Discovery Data Mining.","code":""},{"path":[]},{"path":"https://probably.tidymodels.org/dev/reference/cal_estimate_isotonic.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Uses an Isotonic regression model to calibrate model predictions. — cal_estimate_isotonic","text":"","code":"# ------------------------------------------------------------------------------ # Binary Classification # It will automatically identify the probability columns # if passed a model fitted with tidymodels cal_estimate_isotonic(segment_logistic, Class) #> #> ── Probability Calibration #> Method: Isotonic regression #> Type: Binary #> Source class: Data Frame #> Data points: 1,010 #> Unique Predicted Values: 90 #> Truth variable: `Class` #> Estimate variables: #> `.pred_good` ==> good #> `.pred_poor` ==> poor # Specify the variable names in a vector of unquoted names cal_estimate_isotonic(segment_logistic, Class, c(.pred_poor, .pred_good)) #> #> ── Probability Calibration #> Method: Isotonic regression #> Type: Binary #> Source class: Data Frame #> Data points: 1,010 #> Unique Predicted Values: 80 #> Truth variable: `Class` #> Estimate variables: #> `.pred_good` ==> good #> `.pred_poor` ==> poor # dplyr selector functions are also supported cal_estimate_isotonic(segment_logistic, Class, dplyr::starts_with(\".pred_\")) #> #> ── Probability Calibration #> Method: Isotonic regression #> Type: Binary #> Source class: Data Frame #> Data points: 1,010 #> Unique Predicted Values: 215 #> Truth variable: `Class` #> Estimate variables: #> `.pred_good` ==> good #> `.pred_poor` ==> poor # ------------------------------------------------------------------------------ # Regression (numeric outcomes) cal_estimate_isotonic(boosting_predictions_oob, outcome, .pred) #> #> ── Probability Calibration #> Method: Isotonic regression #> Type: Regression #> Source class: Data Frame #> Data points: 2,000 #> Unique Predicted Values: 39 #> Truth variable: `outcome` #> Estimate variables: #> `.pred` ==> predictions"},{"path":"https://probably.tidymodels.org/dev/reference/cal_estimate_isotonic_boot.html","id":null,"dir":"Reference","previous_headings":"","what":"Uses a bootstrapped Isotonic regression model to calibrate probabilities — cal_estimate_isotonic_boot","title":"Uses a bootstrapped Isotonic regression model to calibrate probabilities — cal_estimate_isotonic_boot","text":"Uses bootstrapped Isotonic regression model calibrate probabilities","code":""},{"path":"https://probably.tidymodels.org/dev/reference/cal_estimate_isotonic_boot.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Uses a bootstrapped Isotonic regression model to calibrate probabilities — cal_estimate_isotonic_boot","text":"","code":"cal_estimate_isotonic_boot( .data, truth = NULL, estimate = dplyr::starts_with(\".pred\"), times = 10, parameters = NULL, ... ) # S3 method for class 'data.frame' cal_estimate_isotonic_boot( .data, truth = NULL, estimate = dplyr::starts_with(\".pred\"), times = 10, parameters = NULL, ..., .by = NULL ) # S3 method for class 'tune_results' cal_estimate_isotonic_boot( .data, truth = NULL, estimate = dplyr::starts_with(\".pred\"), times = 10, parameters = NULL, ... ) # S3 method for class 'grouped_df' cal_estimate_isotonic_boot( .data, truth = NULL, estimate = NULL, times = 10, parameters = NULL, ... )"},{"path":"https://probably.tidymodels.org/dev/reference/cal_estimate_isotonic_boot.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Uses a bootstrapped Isotonic regression model to calibrate probabilities — cal_estimate_isotonic_boot","text":".data ungrouped data.frame object, tune_results object, contains predictions probability columns. truth column identifier true class results (factor). unquoted column name. estimate vector column identifiers, one dplyr selector functions choose variables contains class probabilities. defaults prefix used tidymodels (.pred_). order identifiers considered order levels truth variable. times Number bootstraps. parameters (Optional) optional tibble tuning parameter values can used filter predicted values processing. Applies tune_results objects. ... Additional arguments passed models routines used calculate new probabilities. .column identifier grouping variable. single unquoted column name selects qualitative variable grouping. Default NULL. .= NULL grouping take place.","code":""},{"path":"https://probably.tidymodels.org/dev/reference/cal_estimate_isotonic_boot.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Uses a bootstrapped Isotonic regression model to calibrate probabilities — cal_estimate_isotonic_boot","text":"function uses stats::isoreg() create obtain calibration values. runs stats::isoreg() multiple times, time different seed. results saved inside returned cal_object.","code":""},{"path":"https://probably.tidymodels.org/dev/reference/cal_estimate_isotonic_boot.html","id":"multiclass-extension","dir":"Reference","previous_headings":"","what":"Multiclass Extension","title":"Uses a bootstrapped Isotonic regression model to calibrate probabilities — cal_estimate_isotonic_boot","text":"method designed work two classes. multiclass, creates set \"one versus \" calibrations class. applied data, probability estimates re-normalized add one. final step might compromise calibration.","code":""},{"path":[]},{"path":"https://probably.tidymodels.org/dev/reference/cal_estimate_isotonic_boot.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Uses a bootstrapped Isotonic regression model to calibrate probabilities — cal_estimate_isotonic_boot","text":"","code":"# It will automatically identify the probability columns # if passed a model fitted with tidymodels cal_estimate_isotonic_boot(segment_logistic, Class) #> #> ── Probability Calibration #> Method: Bootstrapped isotonic regression #> Type: Binary #> Source class: Data Frame #> Data points: 1,010 #> Truth variable: `Class` #> Estimate variables: #> `.pred_good` ==> good #> `.pred_poor` ==> poor # Specify the variable names in a vector of unquoted names cal_estimate_isotonic_boot(segment_logistic, Class, c(.pred_poor, .pred_good)) #> #> ── Probability Calibration #> Method: Bootstrapped isotonic regression #> Type: Binary #> Source class: Data Frame #> Data points: 1,010 #> Truth variable: `Class` #> Estimate variables: #> `.pred_good` ==> good #> `.pred_poor` ==> poor # dplyr selector functions are also supported cal_estimate_isotonic_boot(segment_logistic, Class, dplyr::starts_with(\".pred\")) #> #> ── Probability Calibration #> Method: Bootstrapped isotonic regression #> Type: Binary #> Source class: Data Frame #> Data points: 1,010 #> Truth variable: `Class` #> Estimate variables: #> `.pred_good` ==> good #> `.pred_poor` ==> poor"},{"path":"https://probably.tidymodels.org/dev/reference/cal_estimate_linear.html","id":null,"dir":"Reference","previous_headings":"","what":"Uses a linear regression model to calibrate numeric predictions — cal_estimate_linear","title":"Uses a linear regression model to calibrate numeric predictions — cal_estimate_linear","text":"Uses linear regression model calibrate numeric predictions","code":""},{"path":"https://probably.tidymodels.org/dev/reference/cal_estimate_linear.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Uses a linear regression model to calibrate numeric predictions — cal_estimate_linear","text":"","code":"cal_estimate_linear( .data, truth = NULL, estimate = dplyr::matches(\"^.pred$\"), smooth = TRUE, parameters = NULL, ..., .by = NULL ) # S3 method for class 'data.frame' cal_estimate_linear( .data, truth = NULL, estimate = dplyr::matches(\"^.pred$\"), smooth = TRUE, parameters = NULL, ..., .by = NULL ) # S3 method for class 'tune_results' cal_estimate_linear( .data, truth = NULL, estimate = dplyr::matches(\"^.pred$\"), smooth = TRUE, parameters = NULL, ... ) # S3 method for class 'grouped_df' cal_estimate_linear( .data, truth = NULL, estimate = NULL, smooth = TRUE, parameters = NULL, ... )"},{"path":"https://probably.tidymodels.org/dev/reference/cal_estimate_linear.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Uses a linear regression model to calibrate numeric predictions — cal_estimate_linear","text":".data ungrouped data.frame object, tune_results object, contains prediction column. truth column identifier observed outcome data (numeric). unquoted column name. estimate Column identifier predicted values smooth Applies linear models. switches generalized additive model using spline terms TRUE, simple linear regression FALSE. parameters (Optional) optional tibble tuning parameter values can used filter predicted values processing. Applies tune_results objects. ... Additional arguments passed models routines used calculate new predictions. .column identifier grouping variable. single unquoted column name selects qualitative variable grouping. Default NULL. .= NULL grouping take place.","code":""},{"path":"https://probably.tidymodels.org/dev/reference/cal_estimate_linear.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Uses a linear regression model to calibrate numeric predictions — cal_estimate_linear","text":"function uses existing modeling functions packages create calibration: stats::glm() used smooth set FALSE mgcv::gam() used smooth set TRUE methods estimate relationship unmodified predicted values remove trend cal_apply() invoked.","code":""},{"path":[]},{"path":"https://probably.tidymodels.org/dev/reference/cal_estimate_linear.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Uses a linear regression model to calibrate numeric predictions — cal_estimate_linear","text":"","code":"library(dplyr) library(ggplot2) head(boosting_predictions_test) #> # A tibble: 6 × 2 #> outcome .pred #> #> 1 -4.65 4.12 #> 2 1.12 1.83 #> 3 14.7 13.1 #> 4 36.3 19.1 #> 5 14.1 14.9 #> 6 -4.22 8.10 # ------------------------------------------------------------------------------ # Before calibration y_rng <- extendrange(boosting_predictions_test$outcome) boosting_predictions_test %>% ggplot(aes(outcome, .pred)) + geom_abline(lty = 2) + geom_point(alpha = 1 / 2) + geom_smooth(se = FALSE, col = \"blue\", linewidth = 1.2, alpha = 3 / 4) + coord_equal(xlim = y_rng, ylim = y_rng) + ggtitle(\"Before calibration\") #> `geom_smooth()` using method = 'loess' and formula = 'y ~ x' # ------------------------------------------------------------------------------ # Smoothed trend removal smoothed_cal <- boosting_predictions_oob %>% # It will automatically identify the predicted value columns when the # standard tidymodels naming conventions are used. cal_estimate_linear(outcome) smoothed_cal #> #> ── Regression Calibration #> Method: Generalized additive model #> Source class: Data Frame #> Data points: 2,000 #> Truth variable: `outcome` #> Estimate variable: `.pred` boosting_predictions_test %>% cal_apply(smoothed_cal) %>% ggplot(aes(outcome, .pred)) + geom_abline(lty = 2) + geom_point(alpha = 1 / 2) + geom_smooth(se = FALSE, col = \"blue\", linewidth = 1.2, alpha = 3 / 4) + coord_equal(xlim = y_rng, ylim = y_rng) + ggtitle(\"After calibration\") #> `geom_smooth()` using method = 'loess' and formula = 'y ~ x'"},{"path":"https://probably.tidymodels.org/dev/reference/cal_estimate_logistic.html","id":null,"dir":"Reference","previous_headings":"","what":"Uses a logistic regression model to calibrate probabilities — cal_estimate_logistic","title":"Uses a logistic regression model to calibrate probabilities — cal_estimate_logistic","text":"Uses logistic regression model calibrate probabilities","code":""},{"path":"https://probably.tidymodels.org/dev/reference/cal_estimate_logistic.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Uses a logistic regression model to calibrate probabilities — cal_estimate_logistic","text":"","code":"cal_estimate_logistic( .data, truth = NULL, estimate = dplyr::starts_with(\".pred_\"), smooth = TRUE, parameters = NULL, ... ) # S3 method for class 'data.frame' cal_estimate_logistic( .data, truth = NULL, estimate = dplyr::starts_with(\".pred_\"), smooth = TRUE, parameters = NULL, ..., .by = NULL ) # S3 method for class 'tune_results' cal_estimate_logistic( .data, truth = NULL, estimate = dplyr::starts_with(\".pred_\"), smooth = TRUE, parameters = NULL, ... ) # S3 method for class 'grouped_df' cal_estimate_logistic( .data, truth = NULL, estimate = NULL, smooth = TRUE, parameters = NULL, ... )"},{"path":"https://probably.tidymodels.org/dev/reference/cal_estimate_logistic.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Uses a logistic regression model to calibrate probabilities — cal_estimate_logistic","text":".data ungrouped data.frame object, tune_results object, contains predictions probability columns. truth column identifier true class results (factor). unquoted column name. estimate vector column identifiers, one dplyr selector functions choose variables contains class probabilities. defaults prefix used tidymodels (.pred_). order identifiers considered order levels truth variable. smooth Applies logistic models. switches logistic spline TRUE, simple logistic regression FALSE. parameters (Optional) optional tibble tuning parameter values can used filter predicted values processing. Applies tune_results objects. ... Additional arguments passed models routines used calculate new probabilities. .column identifier grouping variable. single unquoted column name selects qualitative variable grouping. Default NULL. .= NULL grouping take place.","code":""},{"path":"https://probably.tidymodels.org/dev/reference/cal_estimate_logistic.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Uses a logistic regression model to calibrate probabilities — cal_estimate_logistic","text":"function uses existing modeling functions packages create calibration: stats::glm() used smooth set FALSE mgcv::gam() used smooth set TRUE","code":""},{"path":"https://probably.tidymodels.org/dev/reference/cal_estimate_logistic.html","id":"multiclass-extension","dir":"Reference","previous_headings":"","what":"Multiclass Extension","title":"Uses a logistic regression model to calibrate probabilities — cal_estimate_logistic","text":"method extended multiclass outcomes. However, natural multiclass extension cal_estimate_multinomial().","code":""},{"path":[]},{"path":"https://probably.tidymodels.org/dev/reference/cal_estimate_logistic.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Uses a logistic regression model to calibrate probabilities — cal_estimate_logistic","text":"","code":"# It will automatically identify the probability columns # if passed a model fitted with tidymodels cal_estimate_logistic(segment_logistic, Class) #> #> ── Probability Calibration #> Method: Generalized additive model #> Type: Binary #> Source class: Data Frame #> Data points: 1,010 #> Truth variable: `Class` #> Estimate variables: #> `.pred_good` ==> good #> `.pred_poor` ==> poor # Specify the variable names in a vector of unquoted names cal_estimate_logistic(segment_logistic, Class, c(.pred_poor, .pred_good)) #> #> ── Probability Calibration #> Method: Generalized additive model #> Type: Binary #> Source class: Data Frame #> Data points: 1,010 #> Truth variable: `Class` #> Estimate variables: #> `.pred_good` ==> good #> `.pred_poor` ==> poor # dplyr selector functions are also supported cal_estimate_logistic(segment_logistic, Class, dplyr::starts_with(\".pred_\")) #> #> ── Probability Calibration #> Method: Generalized additive model #> Type: Binary #> Source class: Data Frame #> Data points: 1,010 #> Truth variable: `Class` #> Estimate variables: #> `.pred_good` ==> good #> `.pred_poor` ==> poor"},{"path":"https://probably.tidymodels.org/dev/reference/cal_estimate_multinomial.html","id":null,"dir":"Reference","previous_headings":"","what":"Uses a Multinomial calibration model to calculate new probabilities — cal_estimate_multinomial","title":"Uses a Multinomial calibration model to calculate new probabilities — cal_estimate_multinomial","text":"Uses Multinomial calibration model calculate new probabilities","code":""},{"path":"https://probably.tidymodels.org/dev/reference/cal_estimate_multinomial.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Uses a Multinomial calibration model to calculate new probabilities — cal_estimate_multinomial","text":"","code":"cal_estimate_multinomial( .data, truth = NULL, estimate = dplyr::starts_with(\".pred_\"), smooth = TRUE, parameters = NULL, ... ) # S3 method for class 'data.frame' cal_estimate_multinomial( .data, truth = NULL, estimate = dplyr::starts_with(\".pred_\"), smooth = TRUE, parameters = NULL, ..., .by = NULL ) # S3 method for class 'tune_results' cal_estimate_multinomial( .data, truth = NULL, estimate = dplyr::starts_with(\".pred_\"), smooth = TRUE, parameters = NULL, ... ) # S3 method for class 'grouped_df' cal_estimate_multinomial( .data, truth = NULL, estimate = NULL, smooth = TRUE, parameters = NULL, ... )"},{"path":"https://probably.tidymodels.org/dev/reference/cal_estimate_multinomial.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Uses a Multinomial calibration model to calculate new probabilities — cal_estimate_multinomial","text":".data ungrouped data.frame object, tune_results object, contains predictions probability columns. truth column identifier true class results (factor). unquoted column name. estimate vector column identifiers, one dplyr selector functions choose variables contains class probabilities. defaults prefix used tidymodels (.pred_). order identifiers considered order levels truth variable. smooth Applies logistic models. switches logistic spline TRUE, simple logistic regression FALSE. parameters (Optional) optional tibble tuning parameter values can used filter predicted values processing. Applies tune_results objects. ... Additional arguments passed models routines used calculate new probabilities. .column identifier grouping variable. single unquoted column name selects qualitative variable grouping. Default NULL. .= NULL grouping take place.","code":""},{"path":"https://probably.tidymodels.org/dev/reference/cal_estimate_multinomial.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Uses a Multinomial calibration model to calculate new probabilities — cal_estimate_multinomial","text":"smooth = FALSE, nnet::multinom() function used estimate model, otherwise mgcv::gam() used.","code":""},{"path":[]},{"path":"https://probably.tidymodels.org/dev/reference/cal_estimate_multinomial.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Uses a Multinomial calibration model to calculate new probabilities — cal_estimate_multinomial","text":"","code":"library(modeldata) library(parsnip) library(dplyr) f <- list( ~ -0.5 + 0.6 * abs(A), ~ ifelse(A > 0 & B > 0, 1.0 + 0.2 * A / B, -2), ~ -0.6 * A + 0.50 * B - A * B ) set.seed(1) tr_dat <- sim_multinomial(500, eqn_1 = f[[1]], eqn_2 = f[[2]], eqn_3 = f[[3]]) cal_dat <- sim_multinomial(500, eqn_1 = f[[1]], eqn_2 = f[[2]], eqn_3 = f[[3]]) te_dat <- sim_multinomial(500, eqn_1 = f[[1]], eqn_2 = f[[2]], eqn_3 = f[[3]]) set.seed(2) rf_fit <- rand_forest() %>% set_mode(\"classification\") %>% set_engine(\"randomForest\") %>% fit(class ~ ., data = tr_dat) cal_pred <- predict(rf_fit, cal_dat, type = \"prob\") %>% bind_cols(cal_dat) te_pred <- predict(rf_fit, te_dat, type = \"prob\") %>% bind_cols(te_dat) cal_plot_windowed(cal_pred, truth = class, window_size = 0.1, step_size = 0.03) smoothed_mn <- cal_estimate_multinomial(cal_pred, truth = class) new_test_pred <- cal_apply(te_pred, smoothed_mn) cal_plot_windowed(new_test_pred, truth = class, window_size = 0.1, step_size = 0.03)"},{"path":"https://probably.tidymodels.org/dev/reference/cal_plot_breaks.html","id":null,"dir":"Reference","previous_headings":"","what":"Probability calibration plots via binning — cal_plot_breaks","title":"Probability calibration plots via binning — cal_plot_breaks","text":"plot created assess whether observed rate event predicted probability event model. sequence even, mutually exclusive bins created zero one. bin, data whose predicted probability falls within range bin used calculate observed event rate (along confidence intervals event rate). predictions well calibrated, fitted curve align diagonal line.","code":""},{"path":"https://probably.tidymodels.org/dev/reference/cal_plot_breaks.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Probability calibration plots via binning — cal_plot_breaks","text":"","code":"cal_plot_breaks( .data, truth = NULL, estimate = dplyr::starts_with(\".pred\"), num_breaks = 10, conf_level = 0.9, include_ribbon = TRUE, include_rug = TRUE, include_points = TRUE, event_level = c(\"auto\", \"first\", \"second\"), ... ) # S3 method for class 'data.frame' cal_plot_breaks( .data, truth = NULL, estimate = dplyr::starts_with(\".pred\"), num_breaks = 10, conf_level = 0.9, include_ribbon = TRUE, include_rug = TRUE, include_points = TRUE, event_level = c(\"auto\", \"first\", \"second\"), ..., .by = NULL ) # S3 method for class 'tune_results' cal_plot_breaks( .data, truth = NULL, estimate = dplyr::starts_with(\".pred\"), num_breaks = 10, conf_level = 0.9, include_ribbon = TRUE, include_rug = TRUE, include_points = TRUE, event_level = c(\"auto\", \"first\", \"second\"), ... ) # S3 method for class 'grouped_df' cal_plot_breaks( .data, truth = NULL, estimate = NULL, num_breaks = 10, conf_level = 0.9, include_ribbon = TRUE, include_rug = TRUE, include_points = TRUE, event_level = c(\"auto\", \"first\", \"second\"), ... )"},{"path":"https://probably.tidymodels.org/dev/reference/cal_plot_breaks.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Probability calibration plots via binning — cal_plot_breaks","text":".data ungrouped data frame object containing predictions probability columns. truth column identifier true class results (factor). unquoted column name. estimate vector column identifiers, one dplyr selector functions choose variables contains class probabilities. defaults prefix used tidymodels (.pred_). order identifiers considered order levels truth variable. num_breaks number segments group probabilities. defaults 10. conf_level Confidence level use visualization. defaults 0.9. include_ribbon Flag indicates ribbon layer included. defaults TRUE. include_rug Flag indicates Rug layer included. defaults TRUE. plot, top side shows frequency event occurring, bottom frequency event occurring. include_points Flag indicates point layer included. event_level single string. Either \"first\" \"second\" specify level truth consider \"event\". Defaults \"auto\", allows function decide one use based type model (binary, multi-class linear) ... Additional arguments passed tune_results object. .column identifier grouping variable. single unquoted column name selects qualitative variable grouping. Default NULL. .= NULL grouping take place.","code":""},{"path":"https://probably.tidymodels.org/dev/reference/cal_plot_breaks.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Probability calibration plots via binning — cal_plot_breaks","text":"ggplot object.","code":""},{"path":[]},{"path":"https://probably.tidymodels.org/dev/reference/cal_plot_breaks.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Probability calibration plots via binning — cal_plot_breaks","text":"","code":"library(ggplot2) library(dplyr) cal_plot_breaks( segment_logistic, Class, .pred_good ) cal_plot_logistic( segment_logistic, Class, .pred_good ) cal_plot_windowed( segment_logistic, Class, .pred_good ) # The functions support dplyr groups model <- glm(Class ~ .pred_good, segment_logistic, family = \"binomial\") preds <- predict(model, segment_logistic, type = \"response\") gl <- segment_logistic %>% mutate(.pred_good = 1 - preds, source = \"glm\") combined <- bind_rows(mutate(segment_logistic, source = \"original\"), gl) combined %>% cal_plot_logistic(Class, .pred_good, .by = source) # The grouping can be faceted in ggplot2 combined %>% cal_plot_logistic(Class, .pred_good, .by = source) + facet_wrap(~source) + theme(legend.position = \"\")"},{"path":"https://probably.tidymodels.org/dev/reference/cal_plot_logistic.html","id":null,"dir":"Reference","previous_headings":"","what":"Probability calibration plots via logistic regression — cal_plot_logistic","title":"Probability calibration plots via logistic regression — cal_plot_logistic","text":"logistic regression model fit original outcome data used outcome estimated class probabilities one class used predictor. smooth = TRUE, generalized additive model fit using mgcv::gam() default smoothing method. Otherwise, simple logistic regression used. predictions well calibrated, fitted curve align diagonal line. Confidence intervals fitted line also shown.","code":""},{"path":"https://probably.tidymodels.org/dev/reference/cal_plot_logistic.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Probability calibration plots via logistic regression — cal_plot_logistic","text":"","code":"cal_plot_logistic( .data, truth = NULL, estimate = dplyr::starts_with(\".pred\"), conf_level = 0.9, smooth = TRUE, include_rug = TRUE, include_ribbon = TRUE, event_level = c(\"auto\", \"first\", \"second\"), ... ) # S3 method for class 'data.frame' cal_plot_logistic( .data, truth = NULL, estimate = dplyr::starts_with(\".pred\"), conf_level = 0.9, smooth = TRUE, include_rug = TRUE, include_ribbon = TRUE, event_level = c(\"auto\", \"first\", \"second\"), ..., .by = NULL ) # S3 method for class 'tune_results' cal_plot_logistic( .data, truth = NULL, estimate = dplyr::starts_with(\".pred\"), conf_level = 0.9, smooth = TRUE, include_rug = TRUE, include_ribbon = TRUE, event_level = c(\"auto\", \"first\", \"second\"), ... ) # S3 method for class 'grouped_df' cal_plot_logistic( .data, truth = NULL, estimate = NULL, conf_level = 0.9, smooth = TRUE, include_rug = TRUE, include_ribbon = TRUE, event_level = c(\"auto\", \"first\", \"second\"), ... )"},{"path":"https://probably.tidymodels.org/dev/reference/cal_plot_logistic.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Probability calibration plots via logistic regression — cal_plot_logistic","text":".data ungrouped data frame object containing predictions probability columns. truth column identifier true class results (factor). unquoted column name. estimate vector column identifiers, one dplyr selector functions choose variables contains class probabilities. defaults prefix used tidymodels (.pred_). order identifiers considered order levels truth variable. conf_level Confidence level use visualization. defaults 0.9. smooth logical using generalized additive model smooth terms predictor via mgcv::gam() mgcv::s(). include_rug Flag indicates Rug layer included. defaults TRUE. plot, top side shows frequency event occurring, bottom frequency event occurring. include_ribbon Flag indicates ribbon layer included. defaults TRUE. event_level single string. Either \"first\" \"second\" specify level truth consider \"event\". Defaults \"auto\", allows function decide one use based type model (binary, multi-class linear) ... Additional arguments passed tune_results object. .column identifier grouping variable. single unquoted column name selects qualitative variable grouping. Default NULL. .= NULL grouping take place.","code":""},{"path":"https://probably.tidymodels.org/dev/reference/cal_plot_logistic.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Probability calibration plots via logistic regression — cal_plot_logistic","text":"ggplot object.","code":""},{"path":[]},{"path":"https://probably.tidymodels.org/dev/reference/cal_plot_logistic.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Probability calibration plots via logistic regression — cal_plot_logistic","text":"","code":"library(ggplot2) library(dplyr) cal_plot_logistic( segment_logistic, Class, .pred_good ) cal_plot_logistic( segment_logistic, Class, .pred_good, smooth = FALSE )"},{"path":"https://probably.tidymodels.org/dev/reference/cal_plot_regression.html","id":null,"dir":"Reference","previous_headings":"","what":"Regression calibration plots — cal_plot_regression","title":"Regression calibration plots — cal_plot_regression","text":"scatter plot observed predicted values computed axes . smooth = TRUE, generalized additive model fit shown. predictions well calibrated, fitted curve align diagonal line.","code":""},{"path":"https://probably.tidymodels.org/dev/reference/cal_plot_regression.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Regression calibration plots — cal_plot_regression","text":"","code":"cal_plot_regression(.data, truth = NULL, estimate = NULL, smooth = TRUE, ...) # S3 method for class 'data.frame' cal_plot_regression( .data, truth = NULL, estimate = NULL, smooth = TRUE, ..., .by = NULL ) # S3 method for class 'tune_results' cal_plot_regression(.data, truth = NULL, estimate = NULL, smooth = TRUE, ...) # S3 method for class 'grouped_df' cal_plot_regression(.data, truth = NULL, estimate = NULL, smooth = TRUE, ...)"},{"path":"https://probably.tidymodels.org/dev/reference/cal_plot_regression.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Regression calibration plots — cal_plot_regression","text":".data ungrouped data frame object containing prediction column. truth column identifier true results (numeric). unquoted column name. estimate column identifier predictions. unquoted column name smooth logical: smoother curve added. ... Additional arguments passed ggplot2::geom_point(). .column identifier grouping variable. single unquoted column name selects qualitative variable grouping. Default NULL. .= NULL grouping take place.","code":""},{"path":"https://probably.tidymodels.org/dev/reference/cal_plot_regression.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Regression calibration plots — cal_plot_regression","text":"ggplot object.","code":""},{"path":"https://probably.tidymodels.org/dev/reference/cal_plot_regression.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Regression calibration plots — cal_plot_regression","text":"","code":"cal_plot_regression(boosting_predictions_oob, outcome, .pred) cal_plot_regression(boosting_predictions_oob, outcome, .pred, alpha = 1 / 6, cex = 3, smooth = FALSE ) cal_plot_regression(boosting_predictions_oob, outcome, .pred, .by = id, alpha = 1 / 6, cex = 3, smooth = FALSE )"},{"path":"https://probably.tidymodels.org/dev/reference/cal_plot_windowed.html","id":null,"dir":"Reference","previous_headings":"","what":"Probability calibration plots via moving windows — cal_plot_windowed","title":"Probability calibration plots via moving windows — cal_plot_windowed","text":"plot created assess whether observed rate event sample predicted probability event model. similar cal_plot_breaks(), except bins overlapping. sequence bins created zero one. bin, data whose predicted probability falls within range bin used calculate observed event rate (along confidence intervals event rate). predictions well calibrated, fitted curve align diagonal line.","code":""},{"path":"https://probably.tidymodels.org/dev/reference/cal_plot_windowed.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Probability calibration plots via moving windows — cal_plot_windowed","text":"","code":"cal_plot_windowed( .data, truth = NULL, estimate = dplyr::starts_with(\".pred\"), window_size = 0.1, step_size = window_size/2, conf_level = 0.9, include_ribbon = TRUE, include_rug = TRUE, include_points = TRUE, event_level = c(\"auto\", \"first\", \"second\"), ... ) # S3 method for class 'data.frame' cal_plot_windowed( .data, truth = NULL, estimate = dplyr::starts_with(\".pred\"), window_size = 0.1, step_size = window_size/2, conf_level = 0.9, include_ribbon = TRUE, include_rug = TRUE, include_points = TRUE, event_level = c(\"auto\", \"first\", \"second\"), ..., .by = NULL ) # S3 method for class 'tune_results' cal_plot_windowed( .data, truth = NULL, estimate = dplyr::starts_with(\".pred\"), window_size = 0.1, step_size = window_size/2, conf_level = 0.9, include_ribbon = TRUE, include_rug = TRUE, include_points = TRUE, event_level = c(\"auto\", \"first\", \"second\"), ... ) # S3 method for class 'grouped_df' cal_plot_windowed( .data, truth = NULL, estimate = NULL, window_size = 0.1, step_size = window_size/2, conf_level = 0.9, include_ribbon = TRUE, include_rug = TRUE, include_points = TRUE, event_level = c(\"auto\", \"first\", \"second\"), ... )"},{"path":"https://probably.tidymodels.org/dev/reference/cal_plot_windowed.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Probability calibration plots via moving windows — cal_plot_windowed","text":".data ungrouped data frame object containing predictions probability columns. truth column identifier true class results (factor). unquoted column name. estimate vector column identifiers, one dplyr selector functions choose variables contains class probabilities. defaults prefix used tidymodels (.pred_). order identifiers considered order levels truth variable. window_size size segments. Used windowed probability calculations. defaults 10% segments. step_size gap segments. Used windowed probability calculations. defaults half size window_size conf_level Confidence level use visualization. defaults 0.9. include_ribbon Flag indicates ribbon layer included. defaults TRUE. include_rug Flag indicates Rug layer included. defaults TRUE. plot, top side shows frequency event occurring, bottom frequency event occurring. include_points Flag indicates point layer included. event_level single string. Either \"first\" \"second\" specify level truth consider \"event\". Defaults \"auto\", allows function decide one use based type model (binary, multi-class linear) ... Additional arguments passed tune_results object. .column identifier grouping variable. single unquoted column name selects qualitative variable grouping. Default NULL. .= NULL grouping take place.","code":""},{"path":"https://probably.tidymodels.org/dev/reference/cal_plot_windowed.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Probability calibration plots via moving windows — cal_plot_windowed","text":"ggplot object.","code":""},{"path":[]},{"path":"https://probably.tidymodels.org/dev/reference/cal_plot_windowed.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Probability calibration plots via moving windows — cal_plot_windowed","text":"","code":"library(ggplot2) library(dplyr) cal_plot_windowed( segment_logistic, Class, .pred_good ) # More breaks cal_plot_windowed( segment_logistic, Class, .pred_good, window_size = 0.05 )"},{"path":"https://probably.tidymodels.org/dev/reference/cal_validate_beta.html","id":null,"dir":"Reference","previous_headings":"","what":"Measure performance with and without using Beta calibration — cal_validate_beta","title":"Measure performance with and without using Beta calibration — cal_validate_beta","text":"function uses resampling measure effect calibrating predicted values.","code":""},{"path":"https://probably.tidymodels.org/dev/reference/cal_validate_beta.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Measure performance with and without using Beta calibration — cal_validate_beta","text":"","code":"cal_validate_beta( .data, truth = NULL, estimate = dplyr::starts_with(\".pred_\"), metrics = NULL, save_pred = FALSE, ... ) # S3 method for class 'resample_results' cal_validate_beta( .data, truth = NULL, estimate = dplyr::starts_with(\".pred_\"), metrics = NULL, save_pred = FALSE, ... ) # S3 method for class 'rset' cal_validate_beta( .data, truth = NULL, estimate = dplyr::starts_with(\".pred_\"), metrics = NULL, save_pred = FALSE, ... ) # S3 method for class 'tune_results' cal_validate_beta( .data, truth = NULL, estimate = NULL, metrics = NULL, save_pred = FALSE, ... )"},{"path":"https://probably.tidymodels.org/dev/reference/cal_validate_beta.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Measure performance with and without using Beta calibration — cal_validate_beta","text":".data rset object results tune::fit_resamples() .predictions column. truth column identifier true class results (factor). unquoted column name. estimate vector column identifiers, one dplyr selector functions choose variables contains class probabilities. defaults prefix used tidymodels (.pred_). order identifiers considered order levels truth variable. metrics set metrics passed created via yardstick::metric_set() save_pred Indicates whether column post-calibration predictions. ... Options pass cal_estimate_beta(), shape_params location_params arguments.","code":""},{"path":"https://probably.tidymodels.org/dev/reference/cal_validate_beta.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Measure performance with and without using Beta calibration — cal_validate_beta","text":"original object .metrics_cal column , optionally, additional .predictions_cal column. class cal_rset also added.","code":""},{"path":"https://probably.tidymodels.org/dev/reference/cal_validate_beta.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Measure performance with and without using Beta calibration — cal_validate_beta","text":"functions designed calculate performance without calibration. use resampling measure --sample effectiveness. two ways pass data : data frame predictions, rset object can created via rsample functions. See example . already made resampling object original data used tune::fit_resamples(), can pass object calibration function use resampling scheme. different resampling scheme used, run tune::collect_predictions() object use process previous bullet point. Please note functions apply tune_result objects. notion \"validation\" implies tuning parameter selection resolved. collect_predictions() can used aggregate metrics analysis.","code":""},{"path":"https://probably.tidymodels.org/dev/reference/cal_validate_beta.html","id":"performance-metrics","dir":"Reference","previous_headings":"","what":"Performance Metrics","title":"Measure performance with and without using Beta calibration — cal_validate_beta","text":"default, average Brier scores returned. appropriate yardstick::metric_set() can used. validation function compares average metrics , calibration.","code":""},{"path":[]},{"path":"https://probably.tidymodels.org/dev/reference/cal_validate_beta.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Measure performance with and without using Beta calibration — cal_validate_beta","text":"","code":"library(dplyr) segment_logistic %>% rsample::vfold_cv() %>% cal_validate_beta(Class) #> # 10-fold cross-validation #> # A tibble: 10 × 4 #> splits id .metrics .metrics_cal #> #> 1 Fold01 #> 2 Fold02 #> 3 Fold03 #> 4 Fold04 #> 5 Fold05 #> 6 Fold06 #> 7 Fold07 #> 8 Fold08 #> 9 Fold09 #> 10 Fold10 "},{"path":"https://probably.tidymodels.org/dev/reference/cal_validate_isotonic.html","id":null,"dir":"Reference","previous_headings":"","what":"Measure performance with and without using isotonic regression calibration — cal_validate_isotonic","title":"Measure performance with and without using isotonic regression calibration — cal_validate_isotonic","text":"function uses resampling measure effect calibrating predicted values.","code":""},{"path":"https://probably.tidymodels.org/dev/reference/cal_validate_isotonic.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Measure performance with and without using isotonic regression calibration — cal_validate_isotonic","text":"","code":"cal_validate_isotonic( .data, truth = NULL, estimate = dplyr::starts_with(\".pred\"), metrics = NULL, save_pred = FALSE, ... ) # S3 method for class 'resample_results' cal_validate_isotonic( .data, truth = NULL, estimate = dplyr::starts_with(\".pred\"), metrics = NULL, save_pred = FALSE, ... ) # S3 method for class 'rset' cal_validate_isotonic( .data, truth = NULL, estimate = dplyr::starts_with(\".pred\"), metrics = NULL, save_pred = FALSE, ... ) # S3 method for class 'tune_results' cal_validate_isotonic( .data, truth = NULL, estimate = NULL, metrics = NULL, save_pred = FALSE, ... )"},{"path":"https://probably.tidymodels.org/dev/reference/cal_validate_isotonic.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Measure performance with and without using isotonic regression calibration — cal_validate_isotonic","text":".data rset object results tune::fit_resamples() .predictions column. truth column identifier true class results (factor). unquoted column name. estimate vector column identifiers, one dplyr selector functions choose variables contains class probabilities. defaults prefix used tidymodels (.pred_). order identifiers considered order levels truth variable. metrics set metrics passed created via yardstick::metric_set() save_pred Indicates whether column post-calibration predictions. ... Options pass cal_estimate_logistic(), smooth argument.","code":""},{"path":"https://probably.tidymodels.org/dev/reference/cal_validate_isotonic.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Measure performance with and without using isotonic regression calibration — cal_validate_isotonic","text":"original object .metrics_cal column , optionally, additional .predictions_cal column. class cal_rset also added.","code":""},{"path":"https://probably.tidymodels.org/dev/reference/cal_validate_isotonic.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Measure performance with and without using isotonic regression calibration — cal_validate_isotonic","text":"functions designed calculate performance without calibration. use resampling measure --sample effectiveness. two ways pass data : data frame predictions, rset object can created via rsample functions. See example . already made resampling object original data used tune::fit_resamples(), can pass object calibration function use resampling scheme. different resampling scheme used, run tune::collect_predictions() object use process previous bullet point. Please note functions apply tune_result objects. notion \"validation\" implies tuning parameter selection resolved. collect_predictions() can used aggregate metrics analysis.","code":""},{"path":"https://probably.tidymodels.org/dev/reference/cal_validate_isotonic.html","id":"performance-metrics","dir":"Reference","previous_headings":"","what":"Performance Metrics","title":"Measure performance with and without using isotonic regression calibration — cal_validate_isotonic","text":"default, average Brier scores (classification calibration) root mean squared error (regression) returned. appropriate yardstick::metric_set() can used. validation function compares average metrics , calibration.","code":""},{"path":[]},{"path":"https://probably.tidymodels.org/dev/reference/cal_validate_isotonic.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Measure performance with and without using isotonic regression calibration — cal_validate_isotonic","text":"","code":"library(dplyr) segment_logistic %>% rsample::vfold_cv() %>% cal_validate_isotonic(Class) #> # 10-fold cross-validation #> # A tibble: 10 × 4 #> splits id .metrics .metrics_cal #> #> 1 Fold01 #> 2 Fold02 #> 3 Fold03 #> 4 Fold04 #> 5 Fold05 #> 6 Fold06 #> 7 Fold07 #> 8 Fold08 #> 9 Fold09 #> 10 Fold10 "},{"path":"https://probably.tidymodels.org/dev/reference/cal_validate_isotonic_boot.html","id":null,"dir":"Reference","previous_headings":"","what":"Measure performance with and without using bagged isotonic regression calibration — cal_validate_isotonic_boot","title":"Measure performance with and without using bagged isotonic regression calibration — cal_validate_isotonic_boot","text":"function uses resampling measure effect calibrating predicted values.","code":""},{"path":"https://probably.tidymodels.org/dev/reference/cal_validate_isotonic_boot.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Measure performance with and without using bagged isotonic regression calibration — cal_validate_isotonic_boot","text":"","code":"cal_validate_isotonic_boot( .data, truth = NULL, estimate = dplyr::starts_with(\".pred\"), metrics = NULL, save_pred = FALSE, ... ) # S3 method for class 'resample_results' cal_validate_isotonic_boot( .data, truth = NULL, estimate = dplyr::starts_with(\".pred\"), metrics = NULL, save_pred = FALSE, ... ) # S3 method for class 'rset' cal_validate_isotonic_boot( .data, truth = NULL, estimate = dplyr::starts_with(\".pred\"), metrics = NULL, save_pred = FALSE, ... ) # S3 method for class 'tune_results' cal_validate_isotonic_boot( .data, truth = NULL, estimate = NULL, metrics = NULL, save_pred = FALSE, ... )"},{"path":"https://probably.tidymodels.org/dev/reference/cal_validate_isotonic_boot.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Measure performance with and without using bagged isotonic regression calibration — cal_validate_isotonic_boot","text":".data rset object results tune::fit_resamples() .predictions column. truth column identifier true class results (factor). unquoted column name. estimate vector column identifiers, one dplyr selector functions choose variables contains class probabilities. defaults prefix used tidymodels (.pred_). order identifiers considered order levels truth variable. metrics set metrics passed created via yardstick::metric_set() save_pred Indicates whether column post-calibration predictions. ... Options pass cal_estimate_isotonic_boot(), times argument.","code":""},{"path":"https://probably.tidymodels.org/dev/reference/cal_validate_isotonic_boot.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Measure performance with and without using bagged isotonic regression calibration — cal_validate_isotonic_boot","text":"original object .metrics_cal column , optionally, additional .predictions_cal column. class cal_rset also added.","code":""},{"path":"https://probably.tidymodels.org/dev/reference/cal_validate_isotonic_boot.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Measure performance with and without using bagged isotonic regression calibration — cal_validate_isotonic_boot","text":"functions designed calculate performance without calibration. use resampling measure --sample effectiveness. two ways pass data : data frame predictions, rset object can created via rsample functions. See example . already made resampling object original data used tune::fit_resamples(), can pass object calibration function use resampling scheme. different resampling scheme used, run tune::collect_predictions() object use process previous bullet point. Please note functions apply tune_result objects. notion \"validation\" implies tuning parameter selection resolved. collect_predictions() can used aggregate metrics analysis.","code":""},{"path":"https://probably.tidymodels.org/dev/reference/cal_validate_isotonic_boot.html","id":"performance-metrics","dir":"Reference","previous_headings":"","what":"Performance Metrics","title":"Measure performance with and without using bagged isotonic regression calibration — cal_validate_isotonic_boot","text":"default, average Brier scores (classification calibration) root mean squared error (regression) returned. appropriate yardstick::metric_set() can used. validation function compares average metrics , calibration.","code":""},{"path":[]},{"path":"https://probably.tidymodels.org/dev/reference/cal_validate_isotonic_boot.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Measure performance with and without using bagged isotonic regression calibration — cal_validate_isotonic_boot","text":"","code":"library(dplyr) segment_logistic %>% rsample::vfold_cv() %>% cal_validate_isotonic_boot(Class) #> # 10-fold cross-validation #> # A tibble: 10 × 4 #> splits id .metrics .metrics_cal #> #> 1 Fold01 #> 2 Fold02 #> 3 Fold03 #> 4 Fold04 #> 5 Fold05 #> 6 Fold06 #> 7 Fold07 #> 8 Fold08 #> 9 Fold09 #> 10 Fold10 "},{"path":"https://probably.tidymodels.org/dev/reference/cal_validate_linear.html","id":null,"dir":"Reference","previous_headings":"","what":"Measure performance with and without using linear regression calibration — cal_validate_linear","title":"Measure performance with and without using linear regression calibration — cal_validate_linear","text":"Measure performance without using linear regression calibration","code":""},{"path":"https://probably.tidymodels.org/dev/reference/cal_validate_linear.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Measure performance with and without using linear regression calibration — cal_validate_linear","text":"","code":"cal_validate_linear( .data, truth = NULL, estimate = dplyr::starts_with(\".pred\"), metrics = NULL, save_pred = FALSE, ... ) # S3 method for class 'resample_results' cal_validate_linear( .data, truth = NULL, estimate = dplyr::starts_with(\".pred\"), metrics = NULL, save_pred = FALSE, ... ) # S3 method for class 'rset' cal_validate_linear( .data, truth = NULL, estimate = dplyr::starts_with(\".pred\"), metrics = NULL, save_pred = FALSE, ... )"},{"path":"https://probably.tidymodels.org/dev/reference/cal_validate_linear.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Measure performance with and without using linear regression calibration — cal_validate_linear","text":".data rset object results tune::fit_resamples() .predictions column. truth column identifier true class results (factor). unquoted column name. estimate vector column identifiers, one dplyr selector functions choose variables contains class probabilities. defaults prefix used tidymodels (.pred_). order identifiers considered order levels truth variable. metrics set metrics passed created via yardstick::metric_set() save_pred Indicates whether column post-calibration predictions. ... Options pass cal_estimate_logistic(), smooth argument.","code":""},{"path":"https://probably.tidymodels.org/dev/reference/cal_validate_linear.html","id":"performance-metrics","dir":"Reference","previous_headings":"","what":"Performance Metrics","title":"Measure performance with and without using linear regression calibration — cal_validate_linear","text":"default, average root mean square error (RMSE) returned. appropriate yardstick::metric_set() can used. validation function compares average metrics , calibration.","code":""},{"path":[]},{"path":"https://probably.tidymodels.org/dev/reference/cal_validate_linear.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Measure performance with and without using linear regression calibration — cal_validate_linear","text":"","code":"library(dplyr) library(yardstick) library(rsample) head(boosting_predictions_test) #> # A tibble: 6 × 2 #> outcome .pred #> #> 1 -4.65 4.12 #> 2 1.12 1.83 #> 3 14.7 13.1 #> 4 36.3 19.1 #> 5 14.1 14.9 #> 6 -4.22 8.10 reg_stats <- metric_set(rmse, ccc) set.seed(828) boosting_predictions_oob %>% # Resample with 10-fold cross-validation vfold_cv() %>% cal_validate_linear(truth = outcome, smooth = FALSE, metrics = reg_stats) #> # 10-fold cross-validation #> # A tibble: 10 × 4 #> splits id .metrics .metrics_cal #> #> 1 Fold01 #> 2 Fold02 #> 3 Fold03 #> 4 Fold04 #> 5 Fold05 #> 6 Fold06 #> 7 Fold07 #> 8 Fold08 #> 9 Fold09 #> 10 Fold10 "},{"path":"https://probably.tidymodels.org/dev/reference/cal_validate_logistic.html","id":null,"dir":"Reference","previous_headings":"","what":"Measure performance with and without using logistic calibration — cal_validate_logistic","title":"Measure performance with and without using logistic calibration — cal_validate_logistic","text":"function uses resampling measure effect calibrating predicted values.","code":""},{"path":"https://probably.tidymodels.org/dev/reference/cal_validate_logistic.html","id":"ref-usage","dir":"Reference","previous_headings":"","what":"Usage","title":"Measure performance with and without using logistic calibration — cal_validate_logistic","text":"","code":"cal_validate_logistic( .data, truth = NULL, estimate = dplyr::starts_with(\".pred_\"), metrics = NULL, save_pred = FALSE, ... ) # S3 method for class 'resample_results' cal_validate_logistic( .data, truth = NULL, estimate = dplyr::starts_with(\".pred_\"), metrics = NULL, save_pred = FALSE, ... ) # S3 method for class 'rset' cal_validate_logistic( .data, truth = NULL, estimate = dplyr::starts_with(\".pred_\"), metrics = NULL, save_pred = FALSE, ... ) # S3 method for class 'tune_results' cal_validate_logistic( .data, truth = NULL, estimate = NULL, metrics = NULL, save_pred = FALSE, ... )"},{"path":"https://probably.tidymodels.org/dev/reference/cal_validate_logistic.html","id":"arguments","dir":"Reference","previous_headings":"","what":"Arguments","title":"Measure performance with and without using logistic calibration — cal_validate_logistic","text":".data rset object results tune::fit_resamples() .predictions column. truth column identifier true class results (factor). unquoted column name. estimate vector column identifiers, one dplyr selector functions choose variables contains class probabilities. defaults prefix used tidymodels (.pred_). order identifiers considered order levels truth variable. metrics set metrics passed created via yardstick::metric_set() save_pred Indicates whether column post-calibration predictions. ... Options pass cal_estimate_logistic(), smooth argument.","code":""},{"path":"https://probably.tidymodels.org/dev/reference/cal_validate_logistic.html","id":"value","dir":"Reference","previous_headings":"","what":"Value","title":"Measure performance with and without using logistic calibration — cal_validate_logistic","text":"original object .metrics_cal column , optionally, additional .predictions_cal column. class cal_rset also added.","code":""},{"path":"https://probably.tidymodels.org/dev/reference/cal_validate_logistic.html","id":"details","dir":"Reference","previous_headings":"","what":"Details","title":"Measure performance with and without using logistic calibration — cal_validate_logistic","text":"functions designed calculate performance without calibration. use resampling measure --sample effectiveness. two ways pass data : data frame predictions, rset object can created via rsample functions. See example . already made resampling object original data used tune::fit_resamples(), can pass object calibration function use resampling scheme. different resampling scheme used, run tune::collect_predictions() object use process previous bullet point. Please note functions apply tune_result objects. notion \"validation\" implies tuning parameter selection resolved. collect_predictions() can used aggregate metrics analysis.","code":""},{"path":"https://probably.tidymodels.org/dev/reference/cal_validate_logistic.html","id":"performance-metrics","dir":"Reference","previous_headings":"","what":"Performance Metrics","title":"Measure performance with and without using logistic calibration — cal_validate_logistic","text":"default, average Brier scores returned. appropriate yardstick::metric_set() can used. validation function compares average metrics , calibration.","code":""},{"path":[]},{"path":"https://probably.tidymodels.org/dev/reference/cal_validate_logistic.html","id":"ref-examples","dir":"Reference","previous_headings":"","what":"Examples","title":"Measure performance with and without using logistic calibration — cal_validate_logistic","text":"","code":"library(dplyr) # --------------------------------------------------------------------------- # classification example segment_logistic %>% rsample::vfold_cv() %>% cal_validate_logistic(Class) #> # 10-fold cross-validation #> # A tibble: 10 × 4 #> splits id .metrics .metrics_cal #> #> 1 Fold01 #> 2 Fold02 #> 3 Fold03 #> 4 Fold04 #> 5