******************
* Stata programs *
******************
program collapsetofile /* newvar oldvar wtype wvar byvars */
syntax anything [aweight/] [if] [in] using/, by(varlist) [cw replace nomacros]
// parse syntax
gettoken fcn rest : anything, parse(" ") bind
if "`fcn'" != "(sum)" & "`fcn'" != "(mean)" {
di as error "must specify (sum) or (mean)"
di as text "(you specified `fcn')"
error 10
}
local fcn : subinstr local fcn "(" "", all
local fcn : subinstr local fcn ")" "", all
unab varlist : `rest'
cap confirm variable `varlist'
if _rc {
di as error "`varlist' not a varlist"
error 10
}
local aweight `exp'
// confirm using doesn't exist
cap confirm new file `"`using'"'
if _rc != 0 {
if "`replace'" == "replace" {
rm `"`using'"'
noi di as text `"file `using' erased"'
}
else {
noi di as error "file `using' already exists"
error 10
}
}
// check sort order
local sortvars : sortedby
if !strmatch("`sortvars'","`by'*") {
di as error "data not sorted"
di as text "(data sorted by `sortvars', but should be sorted by `by')"
error 10
}
// create touse variable common to all vars in varlist
if "`cw'" == "" local novarlist novarlist
marksample touse, `novarlist'
// rather than (by.. keep if _n == _N), just tag these observations
tempvar select
by `by': gen byte `select' = (_n == _N) * (sum(`touse')!=0)
// save metadata to file
qui count if `select'
local obs = r(N)
foreach var of local by {
local types `types' `: type `var'' // keep original types of byvars
}
foreach var of local varlist {
local typ : type `var'
if "`typ'" == "long" | "`typ'" == "double" {
local types `types' double
}
else {
local types `types' float
}
}
mata: varcharstofile("`by' `varlist'","`types'","`using'",`obs')
// now calculate means and write collapsed data to file
foreach byvar of local by {
mata : storetofile("`byvar'","`select'","`using'")
}
foreach x of local varlist {
tempvar newvar
_`fcn' `newvar' `x' 0 `""' `"`aweight'"' `"`by'"' `"`touse'"'
mata : storetofile("`newvar'","`select'","`using'")
drop `newvar'
}
end
program _mean /* newvar oldvar wtype wvar byvars */
args y x sortpreserve wt w by touse
if (`"`w'"'=="") local w 1
if (`"`by'"' != "") local by `"by `by':"'
// get appropriate data type
local ty : type `x'
if (`"`ty'"'=="double" | `"`ty'"'=="long") local ty "double"
else local ty /* erase macro */
// create touse variable specific to this x variable
tempvar touse_x
gen byte `touse_x' = `touse'
markout `touse_x' `x'
// calculate mean
quietly `by' gen `ty' `y' = sum(`touse_x'*`w'*`x')/sum(cond(`touse_x',`w',0))
end
program _sum
args y x sortpreserve wt w by touse
if (`"`w'"'=="") local w 1
if (`"`by'"' != "") local by `"by `by':"'
// create touse variable specific to this x variable
tempvar touse_x
gen byte `touse_x' = `touse'
markout `touse_x' `x'
quietly {
if `"`w'"'!="1" {
tempvar new
remakew `x' `w' `new' `"`by'"' `"`touse_x'"'
local w `"`new'"'
}
// calculate sum
`by' gen double `y' = sum(`w'*`x')
}
end
/* utility for if weights specified with (sum) */
program remakew /* xvar oldw neww by-prefix */
args x w new by touse
/* by is either "" or "by vn vn ...:" */
tempvar sum obs
`by' gen long `obs' = sum(`touse')
`by' gen double `sum' = sum(cond(`touse',`w',0))
`by' gen double `new' = cond(`touse', `w'*`obs'[_N]/`sum'[_N], .)
end
program define recover
syntax anything
confirm file "`anything'"
qui mata: recovercollapsedfile("`anything'")
end
******************
* mata functions *
******************
mata
void storetofile(string matrix vars, string scalar select, string scalar fileloc)
{
st_view(v=., ., vars, select)
fh = fopen(fileloc, "a")
fputmatrix(fh, v)
fclose(fh)
}
void varcharstofile(string scalar varlist, string scalar types, string scalar fileloc, real scalar obs) {
fh = fopen(fileloc, "w") // this is the first matrix being written to file
fputmatrix(fh, obs)
fputmatrix(fh, getvarcharacteristics(tokens(varlist)',tokens(types)'))
fputmatrix(fh, getstatacharacteristics())
fclose(fh)
}
pointer matrix getvarcharacteristics(string colvector varnames, |string colvector vartypes) {
metadata = J(rows(varnames), 7, NULL)
for (i=1;i<=rows(varnames);i++) {
// store varname, vartype, and varformat
metadata[i,1] = &varnames[i,1]
if (args() == 1) metadata[i,2] = &st_vartype(varnames[i,1])
else metadata[i,2] = &vartypes[i,1]
metadata[i,3] = &st_varformat(varnames[i,1])
metadata[i,4] = &st_varlabel(varnames[i,1])
metadata[i,5] = &st_varvaluelabel(varnames[i,1])
// value label values and text
if (*metadata[i,5] != "") {
metadata[i,6] = &vlload_labels(*metadata[i,5],"vals")
metadata[i,7] = &vlload_labels(*metadata[i,5],"text")
}
}
return(metadata)
}
pointer matrix getstatacharacteristics() {
pointer rowvector metadata
local_macros = st_dir("local","macro","*")
N = rows(local_macros)
local_macros = local_macros, J(N,1,"")
for (i=1;i<=N;i++) {
local_macros[i,2] = st_local(local_macros[i,1])
}
global_macros = st_dir("global","macro","*")
N = rows(global_macros)
global_macros = global_macros, J(N,1,"")
for (i=1;i<=N;i++) {
global_macros[i,2] = st_global(global_macros[i,1])
}
global_numscalars = st_dir("global","numscalar","*")
N = rows(global_numscalars)
global_numscalars = global_numscalars, J(N,1,"")
for (i=1;i<=N;i++) {
global_numscalars[i,2] = strofreal(st_numscalar(global_numscalars[i,1]))
}
global_strscalars = st_dir("global","strscalar","*")
N = rows(global_strscalars)
global_strscalars = global_strscalars, J(N,1,"")
for (i=1;i<=N;i++) {
global_strscalars[i,2] = st_strscalar(global_strscalars[i,1])
}
global_matrixs = st_dir("global","matrix","*")
N = rows(global_matrixs)
global_matrixs = global_matrixs, J(N,1,"")
for (i=1;i<=N;i++) {
global_matrixs[i,2] = st_matrix(global_matrixs[i,1])
}
char__dtas = st_dir("char","_dta","*")
N = rows(char__dtas)
char__dtas = char__dtas, J(N,1,"")
for (i=1;i<=N;i++) {
char__dtas[i,2] = st_global("_dta[" + char__dtas[i,1] + "]")
}
metadata = J(1, 6, NULL)
metadata[1,1] = &local_macros
metadata[1,2] = &global_macros
metadata[1,3] = &global_numscalars
metadata[1,4] = &global_strscalars
metadata[1,5] = &global_matrixs
metadata[1,6] = &char__dtas
return(metadata)
}
matrix vlload_labels(thisvar, valsvstext)
{
real colvector vlload_vals
string colvector vlload_text
st_vlload(thisvar, vlload_vals, vlload_text)
if (valsvstext == "vals") return(vlload_vals)
else if (valsvstext == "text") return(vlload_text)
else return
}
end