/**************************************************************************** * Macro name : poisreg * * Version : 12.02 * * Author : Bart Spiessens * * Date : 20DEC2004 * * ------------------------------------------------------------------------ * * Revisions : Versions Date Author * * * * 12.00 20DEC2004 Bart Spiessens * * Creation of the macro * * 12.01 02FEB2005 Bart Spiessens * * Improvement: Replace &data by wk_pois * * 12.02 21AUG2006 Bart Spiessens * * Improvement: option to print and save covariance matrix * * and correlation matrix of parameter estimates* * ------------------------------------------------------------------------ * * Description : This macro fits a Poisson (case series) * * regression model with the possibility to eliminate a * * factor from the model, to reduce the computation time * * Reference : R. THOMPSON, Biometrics, 1977, 33, 485-495 'The estimation * * of heritability with unbalanced data. I. Observations * * available on parents and offspring * * ------------------------------------------------------------------------ * * INPUT - parameters * * * * data : Input dataset * * y : Response variable for poisson regression (count data) * * covar : Covariates in model that are not in class or elim * * class : Class variables in model * * elim : Variable that should be eliminated from the model * * i.e., this variable is in the model but not estimated * * e.g., pid * * offset : offset variable * * beta0 : starting values, if empty * * outdata : dataset with parameter estimates (DEFAULT =out) * * eps : convergence criteria (DEFAULT=1e-08), the maximum of the * * absolute value of the first derivative is calculated, if * * smaller than eps, the algorithm stops * * alpha : significance level for confidence interval (DEFAULT=0.05) * * prntyn : print results (DEFAULT=Y) * * * * INPUT - macros variables (other than standards) * * * * * * INPUT - datasets from lib_data * * * * * * INPUT - datasets from work * * * * ------------------------------------------------------------------------ * * OUTPUT - macros variables * * * * * * OUTPUT datasets in work * * * * ------------------------------------------------------------------------ * * UPDATE - table * * * ****************************************************************************/ %MACRO poisreg(data=,y=,covar=,class=,elim=,offset=,beta0=,outdata=out,eps=1e-08,alpha=0.05,prntyn=Y,covb=N,title=) /* / STORE DES='Poisson regression with elimination factor' */ ; TITLE &title; %LET covb=%UPCASE(%SUBSTR(&covb,1,1)); DATA wk_pois; SET &data; RUN; /* Create matrix UIT with number of measurements per level of &elim */ /* This matrix is read in IML later */ PROC SORT DATA=wk_pois; BY &elim; RUN; PROC FREQ DATA=wk_pois; TABLES &elim / out=uit NOPRINT; RUN; /* Start IML procedure*/ PROC IML; /* Write PUT statements to the SAS log */ FILE log; /* Module INVDERIV calculates the inverse of minus the second derivative matrix times the first derivative to update the parameter in the Newton-Raphson procedure */ START invderiv(aantal,W,mu,deriv) GLOBAL(invSa); /* A is diagonal matrix, instead of using the whole matrix, we work only with the diagnonal */ *A = t(U)*diag(mu)*U; p1 = nrow(aantal)-1; p2 = ncol(W); p = p1 + p2; /* Calculate diagonal of A */ first = 1; DO i=1 TO p1; ni = aantal[i]; last = first + ni - 1; hulp = sum(mu[first:last,]); dA = dA//hulp; first = last + 1; END; /* Calculate B */ muW = mu#W; first = 1; DO i=1 TO p1; ni = aantal[i]; last = first + ni - 1; muWi = muW[first:last,]; muWi = muWi[+,]; B = B//muWi; first = last + 1; END; C = t(B); D = t(W)*(mu#W); invdA = 1/dA; Sa = D-C*(invdA#B); invSa = inv(Sa); IA = invdA#deriv[1:p1] + (invdA#B)*(invSa*((C#t(invdA))*deriv[1:p1])); IB = -(invdA#B)*(invSa*deriv[(p1+1):p]); IC = -invSa*(C#t(invdA))*deriv[1:p1]; ID = invSa*deriv[(p1+1):p]; out = (IA+IB)//(IC+ID); RETURN(out); FINISH; /* Read in necessary vectors and matrices */ USE uit; READ all var {count} INTO aantal; CLOSE uit; USE wk_pois; READ all var {&y} INTO y; %IF &covar NE %THEN %DO; READ all var {&covar} INTO W1; %END; %IF &class NE %THEN %DO; READ all var {&class} INTO W2; %END; %IF &offset NE %THEN %DO; READ all var {&offset} INTO offset; %END; CLOSE wk_pois; /* Calculate the log of the offset */ %IF &offset NE %THEN %DO; l_off = log(offset); %END; %ELSE %DO; l_off = 0; %END; W = W1; %IF &class NE %THEN %DO; DO i=1 TO ncol(W2); hulp = design(W2[,i]); nclass = nclass//ncol(hulp); W = W||hulp[,1:ncol(hulp)-1]; END; %END; /* Convergence criteria */ eps = &eps; /* n = number of observations */ n = nrow(W); /* p1 = number of categories in &elim - 1 = number of dummy variables for &elim */ p1 = nrow(aantal)-1; /* p2 = number of parameters in the rest of the model */ p2 = ncol(W); /* p = total number of parameters */ p = p1 + p2; /* If starting values are not given than initialize beta0 to 0 */ %IF &beta0 NE %THEN %DO; hulp = &beta0; beta0 = repeat(0,p1,1)//hulp; %END; %ELSE %DO; beta0 = repeat(0,p,1); %END; /* We want to calculate mu = exp(X*beta + l_off) where X = (U|W) and U is a design matrix containing the design vectors for &elim. This is a huge matrix (s x s-1) where s = number of subjects and contains a lot of zeros. All calculations will be done using the number of measurements for each level of &elim instead of U. */ /* We split the calculation of X*beta as U*beta1 + W*beta2 where beta1 is p1x1 and beta2 = p2x1. U*beta1 is a nx1 column vector where the beta1[1] is repeated aantal[1] times, ... */ DO i=1 TO p1; Ubeta0 = Ubeta0//repeat(beta0[i],aantal[i],1); END; /* At the end we have to add zeros because of the reference subject */ Ubeta0 = Ubeta0//repeat(0,aantal[p1+1],1); /* Add W part to calculation */ mu0 = exp(Ubeta0+W*beta0[(p1+1):p] + l_off); /* Calculate the first derivative */ /* deriv1 = t(X)*(y-mu) and this can be written as t(U)*(y-mu)//t(W)*(y-mu) */ /* t(U)*x is a p1x1 vector with elements x[1] + ... + x[n1], x[n1+1]+...+x[n1+n2],... where ni is the number of measurements for the ith level of the factor &elim */ first = 1; DO i=1 TO p1; ni = aantal[i]; last = first + ni - 1; hulp = sum(y[first:last,]-mu0[first:last,]); Uderiv1 = Uderiv1//hulp; first = last + 1; END; /* Add W part to calculation */ deriv1 = Uderiv1//(t(W)*(y-mu0)); mu = mu0; beta = beta0; /* tel counts the number of iterations */ tel = 0; /* Convergence criteria is the maximum of the absolute value of the first derivative */ DO WHILE (max(abs(deriv1)) > eps); tel = tel + 1; /* put tel; */ hulp = max(abs(deriv1)); /* put hulp; */ /* Update parameter using Newton-Raphson */ beta = beta + invderiv(aantal,W,mu,deriv1); /* Initialize Ubeta matrix */ Ubeta = loc(0); /* See above for explanation of update */ DO i=1 TO p1; Ubeta = Ubeta//repeat(beta[i],aantal[i],1); END; Ubeta = Ubeta//repeat(0,aantal[p1+1],1); mu = exp(Ubeta+W*beta[(p1+1):p] + l_off); /* Initialize Uderiv1 matrix */ Uderiv1 = loc(0); /* See above for explanation of update */ first = 1; DO i=1 TO p1; ni = aantal[i]; last = first + ni - 1; hulp = sum(y[first:last,]-mu[first:last,]); Uderiv1 = Uderiv1//hulp; first = last + 1; END; deriv1 = Uderiv1//(t(W)*(y-mu)); END; /* Print output */ ll = sum(-mu + y#log(mu)); stderr = sqrt(vecdiag(invSa)); %IF &prntyn = Y %THEN %DO; print "Log-likelihood"; print ll; %END; *print "Parameters"; params = beta[(p1+1):p]; *print params; *print "Standard errors"; *print stderr; mat = params[1:ncol(W1)]||stderr[1:ncol(W1)]; %IF &class NE %THEN %DO; first = ncol(W1)+1; DO i = 1 TO ncol(W2); ni = nclass[i]-1; last = first+ni-1; hulp=(params[first:last]||stderr[first:last])//repeat(0,1,2); mat=mat//hulp; first = last + 1; END; %END; *mat = params||stderr; %IF &covar NE %THEN %DO; rownm = t({&covar}); %END; %IF &class NE %THEN %DO; rownm2 = {&class}; DO i = 1 TO ncol(rownm2); hulp = t(char(unique(W2[,i]))); rownm = rownm//concat(repeat(rownm2[,i],nclass[i],1),hulp); *print rownm; END; %END; colnm = "Estimate"||"StErr"; CREATE labels FROM rownm [COLNAME={Params}]; APPEND FROM rownm; CREATE &outdata FROM mat [COLNAME=colnm]; APPEND FROM mat; %IF &prntyn = Y AND &covb = Y %THEN %DO; print "Estimated covariance matrix"; print invSa; CREATE _covb FROM invSa; APPEND FROM invSa; %END; QUIT; DATA &outdata; MERGE labels &outdata; z = estimate/sterr; chisq = (z)**2; pvalue = 1-probchi(chisq,1); ll = estimate - probit(1-&alpha/2)*sterr; ul = estimate + probit(1-&alpha/2)*sterr; expest = exp(estimate); expll = exp(ll); expul = exp(ul); RUN; %LET conf = %SYSEVALF(100*(1-&alpha)); %IF &prntyn = Y %THEN %DO; PROC REPORT DATA=&outdata SPLIT="#" NOWINDOWS; COLUMN params estimate sterr ("Wald &conf% Confidence Limits" ll ul) chisq pvalue expest ("Exp &conf% Confidence Limits" expll expul); DEFINE params / ID WIDTH=20 "Parameter"; DEFINE estimate / WIDTH=12 FORMAT=8.4 "Estimate"; DEFINE sterr / WIDTH=12 FORMAT=8.4 "Standard#Error"; DEFINE ll / WIDTH=12 FORMAT=8.4 "Lower"; DEFINE ul / WIDTH=12 FORMAT=8.4 "Upper"; DEFINE chisq / WIDTH=12 FORMAT=8.2 "Chi-square"; DEFINE pvalue / WIDTH=12 FORMAT=pvalue8.4 "Pr>ChiSq"; DEFINE expest / WIDTH=12 FORMAT=8.4 "Exp Estimate"; DEFINE expll / WIDTH=12 FORMAT=8.4 "Lower"; DEFINE expul / WIDTH=12 FORMAT=8.4 "Upper"; RUN; TITLE ; %END; %MEND poisreg;