[ create a new paste ] login | about

Link: http://codepad.org/DcwtvDEb    [ raw code | fork ]

Plain Text, pasted on Feb 28:
******************
* Stata programs *
******************

program collapsetofile /* newvar oldvar wtype wvar byvars */
	syntax anything [aweight/] [if] [in] using/, by(varlist) [cw replace nomacros]
	
	// parse syntax
	gettoken fcn rest : anything, parse(" ") bind
	if "`fcn'" != "(sum)" & "`fcn'" != "(mean)" {
		di as error "must specify (sum) or (mean)"
		di as text "(you specified `fcn')"
		error 10
	}
	local fcn : subinstr local fcn "(" "", all
	local fcn : subinstr local fcn ")" "", all
	unab varlist : `rest'
	cap confirm variable `varlist' 
	if _rc {
		di as error "`varlist' not a varlist"
		error 10
	}
	local aweight `exp'
	
	// confirm using doesn't exist
	cap confirm new file `"`using'"'
	if _rc != 0 {
		if "`replace'" == "replace" {
			rm `"`using'"'
			noi di as text `"file `using' erased"'
		}
		else {
			noi di as error "file `using' already exists"
			error 10
		}
	}
	
	// check sort order
	local sortvars : sortedby
	if !strmatch("`sortvars'","`by'*") {
		di as error "data not sorted"
		di as text "(data sorted by `sortvars', but should be sorted by `by')"
		error 10
	}
		
	// create touse variable common to all vars in varlist
	if "`cw'" == ""  local novarlist novarlist
	marksample touse, `novarlist'
	
	// rather than (by.. keep if _n == _N), just tag these observations
	tempvar select
	by `by': gen byte `select' = (_n == _N) * (sum(`touse')!=0)
	
	// save metadata to file
	qui count if `select'
	local obs = r(N)
	foreach var of local by {
		local types `types' `: type `var'' // keep original types of byvars
	}
	foreach var of local varlist {
		local typ : type `var'
		if "`typ'" == "long" | "`typ'" == "double" {
			local types `types' double
		}
		else {
			local types `types' float
		}
	}
	mata: varcharstofile("`by' `varlist'","`types'","`using'",`obs')
		
	// now calculate means and write collapsed data to file
	foreach byvar of local by {
		mata : storetofile("`byvar'","`select'","`using'")
	}
	foreach x of local varlist {
		tempvar newvar
		_`fcn' `newvar' `x' 0 `""' `"`aweight'"' `"`by'"' `"`touse'"'
		mata : storetofile("`newvar'","`select'","`using'")
		drop `newvar'
	}
	
end

program _mean /* newvar oldvar wtype wvar byvars */
	args y x sortpreserve wt w by touse

	if (`"`w'"'=="") local w 1
	if (`"`by'"' != "") local by `"by `by':"'
	
	// get appropriate data type
	local ty : type `x'
	if (`"`ty'"'=="double" | `"`ty'"'=="long") local ty "double"
	else	local ty 			/* erase macro */

	// create touse variable specific to this x variable
	tempvar touse_x
	gen byte `touse_x' = `touse'
	markout `touse_x' `x'
	
	// calculate mean
	quietly `by' gen `ty' `y' = sum(`touse_x'*`w'*`x')/sum(cond(`touse_x',`w',0))
	
end

program _sum
	args y x sortpreserve wt w by touse

	if (`"`w'"'=="") local w 1
	if (`"`by'"' != "") local by `"by `by':"'
	
	// create touse variable specific to this x variable
	tempvar touse_x
	gen byte `touse_x' = `touse'
	markout `touse_x' `x'
	
	quietly {
		if `"`w'"'!="1" {
			tempvar new
			remakew `x' `w' `new' `"`by'"' `"`touse_x'"'
			local w `"`new'"'
		}
		
		// calculate sum
		`by' gen double `y' = sum(`w'*`x')
	}

end

/* utility for if weights specified with (sum) */
program remakew /* xvar oldw neww by-prefix */
	args x w new by touse
			/* by is either "" or "by vn vn ...:"	*/
	tempvar sum obs
	`by' gen long `obs' = sum(`touse')
	`by' gen double `sum' = sum(cond(`touse',`w',0))
	`by' gen double `new' = cond(`touse', `w'*`obs'[_N]/`sum'[_N], .)
end

program define recover 
	syntax anything 
	confirm file "`anything'"
	qui mata: recovercollapsedfile("`anything'")
end







******************
* mata functions *
******************


mata
void storetofile(string matrix vars, string scalar select, string scalar fileloc)
{
	st_view(v=., ., vars, select)

	fh = fopen(fileloc, "a")
	fputmatrix(fh, v)
	fclose(fh)
}

void varcharstofile(string scalar varlist, string scalar types, string scalar fileloc, real scalar obs) {
	fh = fopen(fileloc, "w") // this is the first matrix being written to file
	fputmatrix(fh, obs)
	fputmatrix(fh, getvarcharacteristics(tokens(varlist)',tokens(types)'))
	fputmatrix(fh, getstatacharacteristics())
	fclose(fh)
}

pointer matrix getvarcharacteristics(string colvector varnames, |string colvector vartypes) {
	metadata = J(rows(varnames), 7, NULL)

	for (i=1;i<=rows(varnames);i++) {
		// store varname, vartype, and varformat
		metadata[i,1] = &varnames[i,1]
		if (args() == 1) metadata[i,2] = &st_vartype(varnames[i,1])
		else metadata[i,2] = &vartypes[i,1]
		metadata[i,3] = &st_varformat(varnames[i,1])
		metadata[i,4] = &st_varlabel(varnames[i,1])
		metadata[i,5] = &st_varvaluelabel(varnames[i,1])
		
		// value label values and text
		if (*metadata[i,5] != "") {
			metadata[i,6] = &vlload_labels(*metadata[i,5],"vals")
			metadata[i,7] = &vlload_labels(*metadata[i,5],"text")
		}
	}
	return(metadata)
}

pointer matrix getstatacharacteristics() {
		
		pointer rowvector metadata
		
		local_macros = st_dir("local","macro","*")
		N = rows(local_macros)
		local_macros = local_macros, J(N,1,"")
		for (i=1;i<=N;i++) {
			local_macros[i,2] = st_local(local_macros[i,1])
		}
		
		global_macros = st_dir("global","macro","*")
		N = rows(global_macros)
		global_macros = global_macros, J(N,1,"")
		for (i=1;i<=N;i++) {
			global_macros[i,2] = st_global(global_macros[i,1])
		}
		
		global_numscalars = st_dir("global","numscalar","*")
		N = rows(global_numscalars)
		global_numscalars = global_numscalars, J(N,1,"")
		for (i=1;i<=N;i++) {
			global_numscalars[i,2] = strofreal(st_numscalar(global_numscalars[i,1]))
		}
		
		global_strscalars = st_dir("global","strscalar","*")
		N = rows(global_strscalars)
		global_strscalars = global_strscalars, J(N,1,"")
		for (i=1;i<=N;i++) {
			global_strscalars[i,2] = st_strscalar(global_strscalars[i,1])
		}
		
		global_matrixs = st_dir("global","matrix","*")
		N = rows(global_matrixs)
		global_matrixs = global_matrixs, J(N,1,"")
		for (i=1;i<=N;i++) {
			global_matrixs[i,2] = st_matrix(global_matrixs[i,1])
		}
		
		char__dtas = st_dir("char","_dta","*")
		N = rows(char__dtas)
		char__dtas = char__dtas, J(N,1,"")
		for (i=1;i<=N;i++) {
			char__dtas[i,2] = st_global("_dta[" + char__dtas[i,1] + "]")
		}
		
		metadata = J(1, 6, NULL)
		metadata[1,1] = &local_macros
		metadata[1,2] = &global_macros
		metadata[1,3] = &global_numscalars
		metadata[1,4] = &global_strscalars
		metadata[1,5] = &global_matrixs
		metadata[1,6] = &char__dtas
		
		return(metadata)
}

matrix vlload_labels(thisvar, valsvstext) 
{
	real colvector vlload_vals
	string colvector vlload_text

	st_vlload(thisvar, vlload_vals, vlload_text)
	if (valsvstext == "vals") return(vlload_vals)
	else if (valsvstext == "text") return(vlload_text)
	else return
}

end






Create a new paste based on this one


Comments: