로그인   |  회원가입  |  사이트맵  |  Contact Us
  아이디 저장하기
 
홈 > SAS Tech & Tip > SAS 고수
[결측값 대체] 결측값 대체 후 SCORE 산출(mi 와 mianalyze 프로시져 사용) 2017.05.30
백승민 110 0
http://www.mysas.co.kr/SAS_tiptech/a_all.asp?b_no=7308&gotopage=1&con=subject&keyword=&cmd=content&bd_no=33&gubun=

* 연속형 변수를 사용한 결측치 대체;

 

data Fitness1;

   input Oxygen RunTime RunPulse @@;

   datalines;

44.609  11.37  178     45.313  10.07  185

54.297   8.65  156     59.571    .      .

49.874   9.22    .     44.811  11.63  176

  .     11.95  176          .  10.85    .

39.442  13.08  174     60.055   8.63  170

50.541    .      .     37.388  14.03  186

44.754  11.12  176     47.273    .      .

51.855  10.33  166     49.156   8.95  180

40.836  10.95  168     46.672  10.00    .

46.774  10.25    .     50.388  10.08  168

39.407  12.63  174     46.080  11.17  156

45.441   9.63  164       .      8.92    .

45.118  11.08    .     39.203  12.88  168

45.790  10.47  186     50.545   9.93  148

48.673   9.40  186     47.920  11.50  170

47.467  10.50  170

;

 

 

proc mi data=Fitness1 seed=3237851 noprint out=outmi;

   var Oxygen RunTime RunPulse;

run;

 

proc glm data=outmi;

   model Oxygen= RunTime RunPulse/inverse;

   by _Imputation_;

   ods output ParameterEstimates=glmparms

              InvXPX=glmxpxi;

quit;

 

proc print data=glmparms (obs=6);

   var _Imputation_ Parameter Estimate StdErr;

   title 'GLM Model Coefficients (First Two Imputations)';

run;

 

proc print data=glmxpxi (obs=8);

   var _Imputation_ Parameter Intercept RunTime RunPulse;

   title 'GLM X''X Inverse Matrices (First Two Imputations)';

run;

 

proc mianalyze parms=glmparms xpxi=glmxpxi edf=28;

   modeleffects Intercept RunTime RunPulse;

   Ods output parameterestimates=parms_1;

run;

 

* parms_1 데이터 세트로 구현가능;

data parms_2;

     _MODEL_   = 'missing';

     _TYPE_    = 'PARMS';

     Intercept = 92.700420;

     RunTime   = -3.030325;

     RunPulse = -0.079621;

run;

 

proc score data=Fitness1 score=parms_2 out=RScoreP type=parms;

     var RunTime RunPulse;

run;



* 범주형 변수를 포함한 결측치 대체;

data Fish2;
   title 'Fish Measurement Data';
   input Species $ Length Height Width @@;
   datalines;
Bream   30.0  11.520  4.020         .   31.2  12.480  4.306
Bream   31.1  12.378  4.696     Bream   33.5  12.730  4.456
    .   34.0  12.444   .        Bream   34.7  13.602  4.927
Bream   34.5  14.180  5.279     Bream   35.0  12.670  4.690
Bream   35.1  14.005  4.844     Bream   36.2  14.227  4.959
    .   36.2  14.263   .        Bream   36.2  14.371  4.815
Bream   36.4  13.759  4.368     Bream   37.3  13.913  5.073
Bream   37.2  14.954  5.171     Bream   37.2  15.438  5.580
Bream   38.3  14.860  5.285     Bream   38.5  14.938  5.198
    .   38.6  15.633  5.134     Bream   38.7  14.474  5.728
Bream   39.5  15.129  5.570         .   39.2  15.994   .
Bream   39.7  15.523  5.280     Bream   40.6  15.469  6.131
    .   40.5    .      .        Bream   40.9  16.360  6.053
Bream   40.6  16.362  6.090     Bream   41.5  16.517  5.852
Bream   41.6  16.890  6.198     Bream   42.6  18.957  6.603
Bream   44.1  18.037  6.306     Bream   44.0  18.084  6.292
Bream   45.3  18.754  6.750     Bream   45.9  18.635  6.747
Bream   46.5  17.624  6.371
Pike    34.8   5.568  3.376     Pike    37.8   5.708  4.158
Pike    38.8   5.936  4.384        .    39.8    .      .
Pike    40.5   7.290  4.577     Pike    41.0   6.396  3.977
   .    45.5   7.280  4.323     Pike    45.5   6.825  4.459
Pike    45.8   7.786  5.130     Pike    48.0   6.960  4.896
Pike    48.7   7.792  4.870     Pike    51.2   7.680  5.376
Pike    55.1   8.926  6.171        .    59.7  10.686   .
Pike    64.0   9.600  6.144     Pike    64.0   9.600  6.144
Pike    68.0  10.812  7.480
;


proc mi data=Fish2 seed=1305417 out=outfish;
     class Species;
     monotone reg (Width)
              discrim( Species= Length Height Width);
     var Length Height Width Species;
run;

proc mixed data=outfish;
     class Species;
     model Length= Species Height Width/ solution covb;
     by _Imputation_;
     ods output SolutionF=mxparms CovB=mxcovb;
run;

proc print data=mxparms (obs=10);
     var _Imputation_ Effect Species Estimate StdErr;
     title 'MIXED Model Coefficients (First Two Imputations)';
run;

proc mianalyze parms(classvar=full)=mxparms;
     class Species;
     modeleffects Intercept Species Height Width;
     Ods output parameterestimates=parms_1;
run;


data parms_2;
 set parms_1;
     length pred_1 $3000.;
     if      parm = 'Intercept'             then pred_1 = compress(Estimate);
     else if Species = '' and Estimate >= 0 then pred_1 = cats("+",Estimate,"*",compress(Parm));
     else if Species = '' and Estimate <  0 then pred_1 = cats(Estimate,"*",compress(Parm));
     else if Estimate >= 0                  then pred_1 = cats("+",Estimate,"*(",parm,"='",compress(Species),"')");
     else                                        pred_1 = cats(Estimate,"*(",parm,"='",compress(Species),"')");
RUN;


proc transpose data=parms_2 out=parms_3;
     var pred_1;
     where estimate NE 0;
run;

data _nuLL_;
 set parms_3;
     length parms_1 $3000.;
     parms_1 = catt(of col:);
     call symput('pred3',parms_1);
     %put &pred3;
run;

%put &pred3;

DATA MISSING;
 SET Fish2;
     NEW_Length = &pred3;

RUN; 

 
 
 
 
 [HELP] Sashelp Data Sets 리스트
 SAS Programmer의 변천사 와 추세(SASMater)