# Random-Effects Logit outsourced to R # (glmer function in lme4 package) # (test code at the bottom) # Sven Schreiber function bundle RElogitR(series y, list X, \ int quadpoints[0::1], int test[-2::-1]) /* This function needs R (apparently version >= 3.1.2), and the "lme4" package/library must be installed inside R. A constant term is not explicitly added by the function, but if it exists it is moved to the top of the list! Interpretation of 'quadpoints': - default 1 invokes the glmer default of Laplacian, - setting it to zero invokes our default of 32 for GHQ, - and any other positive number is passed verbatim. Interpretation of 'test': tests the RE spec against pooled logit; - if non-zero does an LR test (although fishy because param boundary) - default -1 also does Hausman test on all coefficients (jointly) except const - -2 Hausman test on all coeffs including the constant - 0 suppresses the test - any positive number: Hausman test on first n vars (excl. the const) Contents of return bundle: - beta (matrix): coefficients of X - vcov (matrix): VCov-matrix of beta - yname (string): name of the y variable - Xnames (string): comma-separated string of X names - LL (scalar): log likelihood - sigma (scalar): resid. variance (as given by R's lme4 of course) - ngroups (scalar): number of panel units/groups in sample - quadpoints (scalar): effective number of quadrature points used - REsd (scalar): Std.Dev. of the random effects (sigma_u) - rho (scalar): variance portion from RE (sigma_u^2 / (sigma_u^2 / (\pi/3)) --- The following only if test != 0: - Hstat (scalar): Hausman test stat pooled vs. RE - Hpv (scalar): Hausman test p-value - lstat (scalar): LR test stat pooled vs. RE - lpv (scalar: LR p-value (approx.) */ set echo off set messages off if !isdummy(y) funcerr "Dep. var must be a dummy!" endif catch series groups = $unit if $error funcerr "Is this really a panel dataset?" endif # make sure the constant term comes first if it exists list Xn = X - const # hopefully works even w/o const if nelem(Xn) == nelem(X) - 1 # there was a constant list X = const Xn cflag = 1 else cflag = 0 endif # pass the quadpoints option to R quadpoints = quadpoints==0 ? 32 : quadpoints matrix mquadp = {quadpoints} mwrite(mquadp, "RElogitR_mquadp.mat", 1) # prepare the data for passing to R list pass = y X groups # setopt foreign --send-data=pass # needs very recent snapshot foreign language=R # --send-data=pass # needs very recent snapshot q <- gretl.loadmat("RElogitR_mquadp.mat") library(lme4) # use all vars in the model except "groups" (and "y"), # and include random effects based on "groups" m <- glmer(y ~ . - groups + (1|groups), data=gretldata, family = binomial(link=logit), nAGQ=q) summary(m) # retrieve output/results rbeta <- as.matrix(fixef(m)) # coeffs of standard regressors rvcov <- as.matrix(vcov(m, correlation=F)) rLL <- as.matrix(logLik(m)) rsigma <- as.matrix(sigma(m)) rngroups <- as.matrix(ngrps(m)) tmp <- VarCorr(m) # appends RE variances to the model rREsd <- as.matrix(as.data.frame(tmp, row.names="groups")[5]) # (5 for StdErr position in the frame) gretl.export(rbeta) gretl.export(rvcov) gretl.export(rLL) gretl.export(rsigma) gretl.export(rngroups) gretl.export(rREsd) end foreign --send-data=pass bundle b string b.yname = argname(y) string b.Xnames = varname(X) scalar b.quadpoints = quadpoints loop foreach i beta vcov -q # the actual matrix stuff matrix b.$i = mread("r$i.mat", 1) endloop loop foreach i LL sigma ngroups REsd -q # the scalar stuff matrix m$i = mread("r$i.mat", 1) scalar s$i = m$i[1,1] # these copies necessary to get scalars scalar b.$i = s$i # (dito) endloop # also calculate proportion of RE variance: \rho = su^2 / (su^2 + \pi^2/3) su2 = sREsd^2 b.rho = su2 / (su2 + ($pi^2)/3) if test != 0 # the pooled logit logit y X --quiet matrix contrast = b.beta - $coeff matrix Vdiff = b.vcov - $vcv if test == -2 string msg = "(based on all coeffs including constant)" elif test == -1 string msg = "(based on all coeffs except constant)" matrix contrast = contrast[1+cflag: ] matrix Vdiff = Vdiff[1+cflag: , 1+cflag:] catch matrix iVdiff = inv(Vdiff) elif test > 0 sprintf msg "(based on the first %d coeffs except constant)", test matrix contrast = contrast[1+cflag : test+cflag] matrix Vdiff = Vdiff[1+cflag : test+cflag, 1+cflag : test+cflag] endif dim = rows(Vdiff) catch matrix iVdiff = inv(Vdiff) if $error print "Sorry, Hausman test failed." r = rank(Vdiff) printf "(Rank of Cov diff.: %d (dim: %d))\n", r, dim else scalar b.Hstat = qform(contrast', iVdiff) printf "Hausman test RE vs. pooled logit %s:\n", msg scalar b.Hpv = pvalue(X, dim, b.Hstat) printf "stat: %f (for %d coeffs), p-value: %f\n", b.Hstat, dim, b.Hpv endif # RE LR test stuff: pooLL = $lnl b.lstat = 2 * (b.LL - pooLL) b.lpv = pvalue(X, 1, b.lstat) printf "Likelihoods: RE %f, pooled %f\nLRstat %f, (half of naive) p-val %f\n", \ b.LL, pooLL, b.lstat, 0.5*b.lpv # (this 0.5*pval comes from approx. mixing a chi2(0) and a chi2(1)) endif # Hausman test block return b end function ## test code test = 1 if test open abdata.gdt # --preserve series binary = (INDOUTPT > INDOUTPT(-1)) list rhs = const EMP WAGE bundle myREl = RElogitR(binary, rhs, 20) loop foreach i beta vcov LL # matrices matrix mtemp = myREl.$i print mtemp endloop loop foreach i LL sigma ngroups REsd rho # scalars scalar temp = myREl.$i print temp endloop matrix mtemp = sqrt(diag(myREl.vcov)) print mtemp endif