**********************************************************************;
* *;
* SAS Macro: The Maximum a posteriori Estimate using the Monte Carlo *;
* Expectation-Maximization Algorithm. Paul Johnson [Dec '98] (7k) *;
* *;
**********************************************************************;
/*-------------------------------------------------------------------*
| |
| Seeking the Maximum a posteriori Estimate (MAP) using the |
| Monte Carlo Expectation-Maximization (MCEM) Algorithm |
| Performing the Bootsrap at each Monte Carlo E-Step Iteration |
| |
| The MCEM Algorithm: Small values of M are initially use and M |
| is increased as the algorithm moves closer to convergence. |
| 50 Iterations are carried out. The user can change this if |
| needed. A Monte Carlo E-Step is carried out, followed by the |
| M-Step. This results in the MCEM Estimate. When M = 1 and the |
| randomly drawn Z(1) is replaced by the mean of p(Z|y,x) and if |
| the complete-data log likelihood is linear in z, then we have |
| the EM Algorithm. The MCEM Estimate is obtained by calculating |
| the mean of the average of the last 10 iterations, with the |
| average of the last 5, together with the average of the last 3, |
| the last 2 and the final iteration. This is done in order to |
| compensate for the random flctuations that occur due to the |
| Monte Carlo approach at the E-Step. The final MCEM estimate is |
| the estimate of the parameter (or vector of parameters) of |
| interest, for a Monte Carlo Expectation-Maximization Estimate. |
| The final estimate is compared to the mean of the last 5 |
| in order to get some form of idea and measure of the random |
| fluctuation. |
| |
| DATA: The data used is from a genetic linkage model examined by |
| Tanner and Wong (1987). The model is a multinomial with |
| four categories, having observed counts of (125,18,20,34). |
| |
| In this example a Beta(v1,v2) prior was used. The user inputs |
| the values for v1 and v2. If v1 = v2 = 1, then in this example |
| the MCEM (MAP) Estimate is the same as the MCEM (MLE) Estimate. |
| 100 Bootstraps are carried out at each of the iterated E-Steps. |
| |
| References: |
| Wei, G.C.G., and Tanner, M.A. (1990). "A Monte Carlo |
| Implementation of the EM Algorithm and the Poor Man's |
| Augmentation Algorithms," JASA, 85: 699-704. |
| |
| McLachlan, G.J., and Thriyambakam Krishnan (1997) The EM |
| Algorithm and Extensions. New York: Wiley. |
| |
| Tanner, M.A., and Wong, W.H. (1987) "The Calculation of |
| Posterior Distributions by Data Augmentation," JASA, 82: 528-550. |
| |
| |
| Author: Paul Johnson [JohnsonP12@aol.com] December, 1998 |
| |
| This macro can be freely used for non-commercial purposes and |
| can be freely distributed. The author is willing to help people |
| who have problems, Paul Johnson (JohnsonP12@aol.com) [Dec 98]. |
| |
--------------------------------------------------------------------*/;
OPTIONS NODATE NONUMBER PAGESIZE = 60 LINESIZE =132;
data a;
y1 = 125;
y2 = 18;
y3 = 20;
y4 = 34;
v1 = 1;
v2 = 1;
init = 0.057;
%macro mcem;
%let m = 3;
x1 = init;
%do l = 2 %to 50;
seed2&l=int(ranuni(6724)*12*&l);
%let j = %EVAL(&l-1);
p&j = (0.25*x&j)/(0.5 + 0.25*x&j);
%let m = %eval(&m+1);
%do k = 1 %to &m;
seed&k=floor((1000000000+(&m*&k))*(sqrt(time())-floor(sqrt(time()))))
+ seed2&l;
z&k = ranbin(seed&k,y1,p&j);
%end;
proc transpose data = a out = aa2;
var z1-z&m;
data aa2;set aa2;z = col1;keep z;
%macro boot;
%do i=1 %to 100;
data a2;scan: set aa2 end=last;
n+1;
if not last then goto scan;
do i=1 to n;
seed=floor(1000000000*(sqrt(time())-floor(sqrt(time()))));
k=ceil(ranuni(seed)*n);
set aa2 point=k;
if _error_ then abort;
output;
end;
stop;
data a2;set a2;
proc means data=a2 noprint; var z; output out = a3 mean = mean_z;
data tt54&i;set a3;
%end;
%do j = 2 %to 100;
proc append base = tt541 data = tt54&j;
%end;
%mend boot;
%boot
proc means data = tt541 noprint;var mean_z;
output out = c mean = meanz;
data a;set c;
y1 = 125;
y2 = 18;
y3 = 20;
y4 = 34;
v1 = 1;
v2 = 1;
x&l = (meanz + y4 + v1 - 1)/(meanz + y2 + y3 + y4 + v1 + v2 - 2);
data b&l;set a;dummy = 'a';
proc sort data =b&l;by dummy;
data a;set a;
%end;
%mend;
%mcem;
data b1;x1 = 0.057;dummy = 'a';
data d;merge b1 b2 b3 b4 b5 b6 b7 b8 b9 b10
b11 b12 b13 b14 b15 b16 b17 b18 b19 b20
b21 b22 b23 b24 b25 b26 b27 b28 b29 b30
b31 b32 b33 b34 b35 b36 b37 b38 b39 b40
b41 b42 b43 b44 b45 b46 b47 b48 b49 b50;
by dummy;
proc print data = d; var x1-x50;
TITLE1 'MAP Parameter Estimates Using the MCEM Algorithm';
data d;set d;
sum1 = x41 + x42 + x43 + x44 + x45 + x46 + x47 + x48 + x49 + x50;
sum2 = x46 + x47 + x48 + x49 + x50;
sum3 = x48 + x49 + x50;
sum4 = x49 + x50;
sum_mcem = (sum1/10) + (sum2/5) + (sum3/3) + (sum4/2) + x50;
mcem = sum_mcem/5;
proc print data = d; var mcem;
TITLE1 'The Final MCEM [MAP] Parameter Estimate';
data b;set d; mcem_5 = sum2/5;
proc print data = b; var v1 v2 mcem_5;
TITLE1 'The MCEM [MAP] Estimate using the average of the last 5 Iterations';
TITLE2 'Compare to the Final Estimate, in order to gain a measure of the';
TITLE3 'Random Fluctuation for the Monte Carlo Expectation-Maximization';
TITLE4 'Maximum a posteriori Estimate - with the use of the Bootstrap';
TITLE5 'If V1 = 1 and V2 = 1 the the MCEM-MAP is the same as the MLE';
run;