****************** * Stata programs * ****************** program define recover syntax anything(id="filename"), [clear] // must start with empty dataset cap unab varlist : _all if "`clear'" == "" & !_rc { di as error "no; data in memory would be lost" error 10 } else if "`clear'" == "clear" { clear } // file must exist confirm file "`anything'" qui mata: recovercollapsedfile("`anything'") end ****************** * mata functions * ****************** mata void recovercollapsedfile(string fileloc) { // must start with a clear dataset stata("clear") // recover data fh = fopen(fileloc, "r") //locals = fgetmatrix(fh) obs = fgetmatrix(fh) metadata = fgetmatrix(fh) moremetadata = fgetmatrix(fh) // construct dataset for (i=1;i<=rows(metadata);i++) { // extract variable metadata from file thisvarname = *metadata[i,1] // eg contains "date" thisvartype = *metadata[i,2] // eg contains "int" // populate an empty dataset with varnames, labels, etc st_addvar(thisvartype,thisvarname) st_varformat(thisvarname,*metadata[i,3]) st_varlabel(thisvarname,*metadata[i,4]) st_varvaluelabel(thisvarname,*metadata[i,5]) // recover value labels if (*metadata[i,5] != "") st_vlmodify(*metadata[i,5],*metadata[i,6],*metadata[i,7]) } // populate dataset st_addobs(obs) st_view(X=., ., .) for (i=1;i<=rows(metadata);i++) { X[.,i] = fgetmatrix(fh) } // populate metadata givestatacharacteristics(moremetadata) // here's the cool part - read data directly into view matrix. // this way the data doesn't need to be stored twice in memory. // (eg worse alternative would be Z = fgetmatrix(fh); X[.,.] = Z;) //X[.,.] = fgetmatrix(fh) fclose(fh) // send any locals from original data back to stata // givelocals(locals) } void givestatacharacteristics(pointer rowvector metadata) { local_macros = *metadata[1,1] global_macros = *metadata[1,2] global_numscalars = *metadata[1,3] global_strscalars = *metadata[1,4] global_matrixs = *metadata[1,5] char__dtas = *metadata[1,6] for (i=1;i<=rows(local_macros);i++) { st_local(local_macros[i,1],local_macros[i,2]) } for (i=1;i<=rows(global_macros);i++) { st_global(global_macros[i,1],global_macros[i,2]) } for (i=1;i<=rows(global_numscalars);i++) { st_numscalar(global_numscalars[i,1],strtoreal(global_numscalars[i,2])) } for (i=1;i<=rows(global_strscalars);i++) { st_strscalar(global_strscalars[i,1],global_strscalars[i,2]) } for (i=1;i<=rows(global_matrixs);i++) { st_matrix(global_matrixs[i,1],global_matrixs[i,2]) } for (i=1;i<=rows(global__dtas);i++) { st_global("_dta[" + global__dtas[i,1] + "]",global__dtas[i,2]) } } end