Lesson 40:  Bootstrap for Model Selection Frequencies


 Video:  Bootstrap for Regression

libname s "c:\stat510";
options nonotes;
%let ds=s.manp; *name of dataset to be analyzed;
%let n=25; *number of observations in source dataset;
*Reserved variable should not be in original data: i, idr;
*Reserved dataset names should not be in work: temp, rands, sub;
%macro boot;
  *This data step reads the source data and adds an idr column;
  data temp;
    set &ds;
  %do j=1 %to 100;
    *This data step creates random numbers for bootstrap sample;
    data rands(drop=i);
      do i=1 to &n;
    *This sql step creates the bootstrap sample;
    proc sql;
    create table sub as 
      select temp.* from temp, rands where temp.idr=rands.idr;
    *Here is where the analysis goes.  Depending on the task, you
      need to send output to an output data set, then pull it into
      a summary data set (probably via sql).  Finally, the summary 
      data set must be summarized to generate the desired bootstrap
    *Example using proc reg and adjusted r-square selection to evaluate
	  model selection frequencies;
    proc reg data=sub outest=est noprint;
      model y=x1-x7 /selection=adjrsq best=1;
    %if &j=1 %then %do;
	  *Creates the summary data set on the first iteration;
      data summ; 
        set est;
    %else %do;
	  *Adds to the summary data set on subsequent iterations;
      proc sql;
        insert into summ select * from est;
  *process summary data set;
  data summ2 (keep=modl);
    set summ;
	length modl $21.;
	if x1 ne . then modl=" x1-";
	if x2 ne . then modl=trim(modl)||"x2-";
	if x3 ne . then modl=trim(modl)||"x3-";
	if x4 ne . then modl=trim(modl)||"x4-";
    if x5 ne . then modl=trim(modl)||"x5-";
    if x6 ne . then modl=trim(modl)||"x6-";
    if x7 ne . then modl=trim(modl)||"x7-";

  proc freq data=summ2;
  tables modl;
%mend boot;




