[ create a new paste ] login | about

Link: http://codepad.org/csZhQvb0    [ raw code | fork ]

Plain Text, pasted on Feb 28:
******************
* Stata programs *
******************

program define recover 
	syntax anything(id="filename"), [clear]
	
	// must start with empty dataset
	cap unab varlist : _all
	if "`clear'" == "" & !_rc {
		di as error "no; data in memory would be lost"
		error 10
	}
	else if "`clear'" == "clear" {
		clear
	}
	
	// file must exist
	confirm file "`anything'"
	
	qui mata: recovercollapsedfile("`anything'")
	
end


******************
* mata functions *
******************

mata
void recovercollapsedfile(string fileloc) {
	
	// must start with a clear dataset
	stata("clear")
	
	// recover data
	fh = fopen(fileloc, "r")
	//locals = fgetmatrix(fh)
	obs = fgetmatrix(fh)
	metadata = fgetmatrix(fh)
	moremetadata = fgetmatrix(fh)
		
	// construct dataset
	for (i=1;i<=rows(metadata);i++) {
		
		// extract variable metadata from file
		thisvarname = *metadata[i,1] // eg contains "date"
        thisvartype = *metadata[i,2] // eg contains "int"

		// populate an empty dataset with varnames, labels, etc
		st_addvar(thisvartype,thisvarname)
		st_varformat(thisvarname,*metadata[i,3])
		st_varlabel(thisvarname,*metadata[i,4])
		st_varvaluelabel(thisvarname,*metadata[i,5])
		
		// recover value labels
		if (*metadata[i,5] != "") st_vlmodify(*metadata[i,5],*metadata[i,6],*metadata[i,7])
		
	}
		
	// populate dataset
	st_addobs(obs)
	st_view(X=., ., .)
	for (i=1;i<=rows(metadata);i++) {
		X[.,i] = fgetmatrix(fh)
	}
	
	// populate metadata
	givestatacharacteristics(moremetadata)
	
	// here's the cool part - read data directly into view matrix.
	// this way the data doesn't need to be stored twice in memory.
	// (eg worse alternative would be Z = fgetmatrix(fh); X[.,.] = Z;)
	//X[.,.] = fgetmatrix(fh)
	fclose(fh)
	
	// send any locals from original data back to stata
	// givelocals(locals)
	
}

void givestatacharacteristics(pointer rowvector metadata) {
		
		local_macros = *metadata[1,1]
		global_macros = *metadata[1,2]
		global_numscalars = *metadata[1,3]
		global_strscalars = *metadata[1,4]
		global_matrixs = *metadata[1,5]
		char__dtas = *metadata[1,6]
		
		for (i=1;i<=rows(local_macros);i++) {
			st_local(local_macros[i,1],local_macros[i,2])
		}
		for (i=1;i<=rows(global_macros);i++) {
			st_global(global_macros[i,1],global_macros[i,2])
		}
		for (i=1;i<=rows(global_numscalars);i++) {
			st_numscalar(global_numscalars[i,1],strtoreal(global_numscalars[i,2]))
		}
		for (i=1;i<=rows(global_strscalars);i++) {
			st_strscalar(global_strscalars[i,1],global_strscalars[i,2])
		}
		for (i=1;i<=rows(global_matrixs);i++) {
			st_matrix(global_matrixs[i,1],global_matrixs[i,2])
		}
		for (i=1;i<=rows(global__dtas);i++) {
			st_global("_dta[" + global__dtas[i,1] + "]",global__dtas[i,2])
		}
		
}
end






Create a new paste based on this one


Comments: