******************
* Stata programs *
******************
program define recover
syntax anything(id="filename"), [clear]
// must start with empty dataset
cap unab varlist : _all
if "`clear'" == "" & !_rc {
di as error "no; data in memory would be lost"
error 10
}
else if "`clear'" == "clear" {
clear
}
// file must exist
confirm file "`anything'"
qui mata: recovercollapsedfile("`anything'")
end
******************
* mata functions *
******************
mata
void recovercollapsedfile(string fileloc) {
// must start with a clear dataset
stata("clear")
// recover data
fh = fopen(fileloc, "r")
//locals = fgetmatrix(fh)
obs = fgetmatrix(fh)
metadata = fgetmatrix(fh)
moremetadata = fgetmatrix(fh)
// construct dataset
for (i=1;i<=rows(metadata);i++) {
// extract variable metadata from file
thisvarname = *metadata[i,1] // eg contains "date"
thisvartype = *metadata[i,2] // eg contains "int"
// populate an empty dataset with varnames, labels, etc
st_addvar(thisvartype,thisvarname)
st_varformat(thisvarname,*metadata[i,3])
st_varlabel(thisvarname,*metadata[i,4])
st_varvaluelabel(thisvarname,*metadata[i,5])
// recover value labels
if (*metadata[i,5] != "") st_vlmodify(*metadata[i,5],*metadata[i,6],*metadata[i,7])
}
// populate dataset
st_addobs(obs)
st_view(X=., ., .)
for (i=1;i<=rows(metadata);i++) {
X[.,i] = fgetmatrix(fh)
}
// populate metadata
givestatacharacteristics(moremetadata)
// here's the cool part - read data directly into view matrix.
// this way the data doesn't need to be stored twice in memory.
// (eg worse alternative would be Z = fgetmatrix(fh); X[.,.] = Z;)
//X[.,.] = fgetmatrix(fh)
fclose(fh)
// send any locals from original data back to stata
// givelocals(locals)
}
void givestatacharacteristics(pointer rowvector metadata) {
local_macros = *metadata[1,1]
global_macros = *metadata[1,2]
global_numscalars = *metadata[1,3]
global_strscalars = *metadata[1,4]
global_matrixs = *metadata[1,5]
char__dtas = *metadata[1,6]
for (i=1;i<=rows(local_macros);i++) {
st_local(local_macros[i,1],local_macros[i,2])
}
for (i=1;i<=rows(global_macros);i++) {
st_global(global_macros[i,1],global_macros[i,2])
}
for (i=1;i<=rows(global_numscalars);i++) {
st_numscalar(global_numscalars[i,1],strtoreal(global_numscalars[i,2]))
}
for (i=1;i<=rows(global_strscalars);i++) {
st_strscalar(global_strscalars[i,1],global_strscalars[i,2])
}
for (i=1;i<=rows(global_matrixs);i++) {
st_matrix(global_matrixs[i,1],global_matrixs[i,2])
}
for (i=1;i<=rows(global__dtas);i++) {
st_global("_dta[" + global__dtas[i,1] + "]",global__dtas[i,2])
}
}
end