****************** * Stata programs * ****************** program collapsetofile /* newvar oldvar wtype wvar byvars */ syntax anything [aweight/] [if] [in] using/, by(varlist) [cw replace nomacros] // parse syntax gettoken fcn rest : anything, parse(" ") bind if "`fcn'" != "(sum)" & "`fcn'" != "(mean)" { di as error "must specify (sum) or (mean)" di as text "(you specified `fcn')" error 10 } local fcn : subinstr local fcn "(" "", all local fcn : subinstr local fcn ")" "", all unab varlist : `rest' cap confirm variable `varlist' if _rc { di as error "`varlist' not a varlist" error 10 } local aweight `exp' // confirm using doesn't exist cap confirm new file `"`using'"' if _rc != 0 { if "`replace'" == "replace" { rm `"`using'"' noi di as text `"file `using' erased"' } else { noi di as error "file `using' already exists" error 10 } } // check sort order local sortvars : sortedby if !strmatch("`sortvars'","`by'*") { di as error "data not sorted" di as text "(data sorted by `sortvars', but should be sorted by `by')" error 10 } // create touse variable common to all vars in varlist if "`cw'" == "" local novarlist novarlist marksample touse, `novarlist' // rather than (by.. keep if _n == _N), just tag these observations tempvar select by `by': gen byte `select' = (_n == _N) * (sum(`touse')!=0) // save metadata to file qui count if `select' local obs = r(N) foreach var of local by { local types `types' `: type `var'' // keep original types of byvars } foreach var of local varlist { local typ : type `var' if "`typ'" == "long" | "`typ'" == "double" { local types `types' double } else { local types `types' float } } mata: varcharstofile("`by' `varlist'","`types'","`using'",`obs') // now calculate means and write collapsed data to file foreach byvar of local by { mata : storetofile("`byvar'","`select'","`using'") } foreach x of local varlist { tempvar newvar _`fcn' `newvar' `x' 0 `""' `"`aweight'"' `"`by'"' `"`touse'"' mata : storetofile("`newvar'","`select'","`using'") drop `newvar' } end program _mean /* newvar oldvar wtype wvar byvars */ args y x sortpreserve wt w by touse if (`"`w'"'=="") local w 1 if (`"`by'"' != "") local by `"by `by':"' // get appropriate data type local ty : type `x' if (`"`ty'"'=="double" | `"`ty'"'=="long") local ty "double" else local ty /* erase macro */ // create touse variable specific to this x variable tempvar touse_x gen byte `touse_x' = `touse' markout `touse_x' `x' // calculate mean quietly `by' gen `ty' `y' = sum(`touse_x'*`w'*`x')/sum(cond(`touse_x',`w',0)) end program _sum args y x sortpreserve wt w by touse if (`"`w'"'=="") local w 1 if (`"`by'"' != "") local by `"by `by':"' // create touse variable specific to this x variable tempvar touse_x gen byte `touse_x' = `touse' markout `touse_x' `x' quietly { if `"`w'"'!="1" { tempvar new remakew `x' `w' `new' `"`by'"' `"`touse_x'"' local w `"`new'"' } // calculate sum `by' gen double `y' = sum(`w'*`x') } end /* utility for if weights specified with (sum) */ program remakew /* xvar oldw neww by-prefix */ args x w new by touse /* by is either "" or "by vn vn ...:" */ tempvar sum obs `by' gen long `obs' = sum(`touse') `by' gen double `sum' = sum(cond(`touse',`w',0)) `by' gen double `new' = cond(`touse', `w'*`obs'[_N]/`sum'[_N], .) end program define recover syntax anything confirm file "`anything'" qui mata: recovercollapsedfile("`anything'") end ****************** * mata functions * ****************** mata void storetofile(string matrix vars, string scalar select, string scalar fileloc) { st_view(v=., ., vars, select) fh = fopen(fileloc, "a") fputmatrix(fh, v) fclose(fh) } void varcharstofile(string scalar varlist, string scalar types, string scalar fileloc, real scalar obs) { fh = fopen(fileloc, "w") // this is the first matrix being written to file fputmatrix(fh, obs) fputmatrix(fh, getvarcharacteristics(tokens(varlist)',tokens(types)')) fputmatrix(fh, getstatacharacteristics()) fclose(fh) } pointer matrix getvarcharacteristics(string colvector varnames, |string colvector vartypes) { metadata = J(rows(varnames), 7, NULL) for (i=1;i<=rows(varnames);i++) { // store varname, vartype, and varformat metadata[i,1] = &varnames[i,1] if (args() == 1) metadata[i,2] = &st_vartype(varnames[i,1]) else metadata[i,2] = &vartypes[i,1] metadata[i,3] = &st_varformat(varnames[i,1]) metadata[i,4] = &st_varlabel(varnames[i,1]) metadata[i,5] = &st_varvaluelabel(varnames[i,1]) // value label values and text if (*metadata[i,5] != "") { metadata[i,6] = &vlload_labels(*metadata[i,5],"vals") metadata[i,7] = &vlload_labels(*metadata[i,5],"text") } } return(metadata) } pointer matrix getstatacharacteristics() { pointer rowvector metadata local_macros = st_dir("local","macro","*") N = rows(local_macros) local_macros = local_macros, J(N,1,"") for (i=1;i<=N;i++) { local_macros[i,2] = st_local(local_macros[i,1]) } global_macros = st_dir("global","macro","*") N = rows(global_macros) global_macros = global_macros, J(N,1,"") for (i=1;i<=N;i++) { global_macros[i,2] = st_global(global_macros[i,1]) } global_numscalars = st_dir("global","numscalar","*") N = rows(global_numscalars) global_numscalars = global_numscalars, J(N,1,"") for (i=1;i<=N;i++) { global_numscalars[i,2] = strofreal(st_numscalar(global_numscalars[i,1])) } global_strscalars = st_dir("global","strscalar","*") N = rows(global_strscalars) global_strscalars = global_strscalars, J(N,1,"") for (i=1;i<=N;i++) { global_strscalars[i,2] = st_strscalar(global_strscalars[i,1]) } global_matrixs = st_dir("global","matrix","*") N = rows(global_matrixs) global_matrixs = global_matrixs, J(N,1,"") for (i=1;i<=N;i++) { global_matrixs[i,2] = st_matrix(global_matrixs[i,1]) } char__dtas = st_dir("char","_dta","*") N = rows(char__dtas) char__dtas = char__dtas, J(N,1,"") for (i=1;i<=N;i++) { char__dtas[i,2] = st_global("_dta[" + char__dtas[i,1] + "]") } metadata = J(1, 6, NULL) metadata[1,1] = &local_macros metadata[1,2] = &global_macros metadata[1,3] = &global_numscalars metadata[1,4] = &global_strscalars metadata[1,5] = &global_matrixs metadata[1,6] = &char__dtas return(metadata) } matrix vlload_labels(thisvar, valsvstext) { real colvector vlload_vals string colvector vlload_text st_vlload(thisvar, vlload_vals, vlload_text) if (valsvstext == "vals") return(vlload_vals) else if (valsvstext == "text") return(vlload_text) else return } end