-
Notifications
You must be signed in to change notification settings - Fork 96
/
Copy pathboxinfl.sas
143 lines (139 loc) · 5.04 KB
/
boxinfl.sas
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
/*------------------------------------------------------------------*/
/* NAME: boxinfl.sas */
/* PURPOSE: Computes Atkinson's score-test for power transformation */
/* and produces a constructed-variable influence plot for */
/* the impact of observations on the choise of power. */
/*------------------------------------------------------------------*/
/* Author: Michael Friendly <[email protected]> *
* Created: 12 Nov 1991 11:21:20 *
* Revised: 10 Dec 1991 16:43:59 *
* Version: 1.1 *
* *
*-------------------------------------------------------------------*/
%macro boxinfl(
resp=, /* name of response variable */
model=, /* independent variables in regression */
data=_last_, /* input dataset */
id=, /* ID variable for observations */
class=, /* names of class variables if any */
gplot=INFL, /* Graphic plot? */
pplot=INFL); /* Printer plot? */
data _cvar_;
set &data;
if &resp ^=. ;
logy = log(&resp); *--find geometric mean =mean log(y);
%if %length(&id)=0 %then %do;
%let id =_id_;
_id_ = _n_;
%end;
proc means noprint;
var logy;
output out=_gm_ mean=gmean;
data _cvar_;
set _cvar_;
drop gmean;
if _n_=1 then do;
set _gm_;
gmean = exp(gmean);
end;
g = &resp * ( log ( &resp / gmean ) - 1);
label g='Constructed variable';
/* Handle class variables via dummy variables from GLMMOD
Change the terms in the model to reflect the COL1-COLn
variables produced by GLMMOD.
*/
%if %length(&class)>0 %then %do;
proc glmmod data=_cvar_ outdesign=_cmat_ outparm=_parm_ noprint;
class &class;
model &resp = g &model / noint;
data _null_; set _parm_ end=eof;
by effname notsorted;
length modl $100;
retain modl ' ';
if effname='G' then return;
else do;
keepit = not last.effname;
effname= ' COL'||trim(left(put(_colnum_,2.)));
end;
if keepit then modl = trim(modl) || effname;
if eof then call symput('model',modl);
run;
data _cvar_;
merge _cmat_(rename=(col1 = g))
_cvar_(keep=&id);
%end;
/* produce values for partial-regression plot of residuals from
/ &resp vs. residuals from constructed variable. 1-slope =
/ power for transformation based on score test
/ -------------------------------------------------------------*/
proc reg data=_cvar_ outest=_parm_ ;
id &id;
m0: model &resp=&model g; * y = Xb + lambda g;
output out=m0 rstudent=_resy_ cookd=_infl_;
*proc print data=_parm_;
proc reg data=_cvar_ noprint;
id &id;
m1: model &resp=&model;
output out=m1 r=_resx_; * y - Xb;
m2: model g =&model;
output out=m2 r=_resg_; * g - Xb;
data _part_;
keep _resx_ _resg_ &id _resy_ _infl_;
merge m0 m1 m2;
proc means noprint data=_part_;
var _resg_;
output out=_gm_ min=rg_min max=rg_max;
data _slope_;
set _parm_(keep=_model_ g);
length function $8 text $20;
if (_model_='M0');
xsys='1'; ysys='1';
x=8; y=90;
function = 'LABEL';
position='3'; size=1.2;
text = 'Slope: '|| put(g,best5.); output;
position='F';
text = 'Power: '|| put(round(1-g,.25),best5.); output;
call symput('slope',put(g,best5.));
call symput('power',put(round(1-g,.25),best5.));
run;
%if %index(&pplot,INFL)>0 %then %do;
proc plot data=_part_;
plot _resx_ * _resg_ = &id / vref=0;
label _resx_ ="Partial &resp"
_resg_ ='Partial Constructed Variable';
title 'Partial Regression Influence plot for Box-Cox power';
title2 "Slope: &slope Power: &power";
run;quit;
%end;
%if %index(&gplot,INFL)>0 %then %do;
data _anno_;
set _part_ nobs=n;
length text $16;
if _n_=1 then set _gm_;
if abs(_resg_/(rg_max-rg_min)) > .5
| abs(_resy_) > 3
| _infl_> 4/(n-1);
xsys='2'; ysys='2';
x = _resg_; y=_resx_ ;
function='LABEL';
if _resg_ > 0 then position='1';
else position='3';
text=&id;
data _anno_;
set _slope_ _anno_;
proc gplot data=_part_;
plot _resx_ * _resg_ /
vaxis=axis1 haxis=axis1 vminor=1 hminor=1
vref=0 lvref=34 anno=_anno_;
axis1 label=(h=1.3) value=(h=1.2);
symbol i=rl h=1.3 v=circle;
label _resx_ ="Partial &resp"
_resg_ ='Partial Constructed Variable';
title h=1.4 'Partial Regression Influence plot for Box-Cox power';
run;quit;
%end;
proc datasets nolist;
delete _cvar_ _parm_ m1 m2 _slope_;
quit;
%mend;