#include <iostream>
#include <boost/timer.hpp>
#include <boost/random/linear_congruential.hpp>
#include <boost/random/uniform_int.hpp>
#include <boost/random/uniform_real.hpp>
#include <boost/random/variate_generator.hpp>
#include <boost/generator_iterator.hpp>
#include <boost/program_options.hpp>
// This is a basic application program which try
// to show the performance difference between an
// NRC storage with optimzed element access and
// a classic one
//
// The computation result must be the same between
// nsr and classic access.
//
// Here is the program option
// Allowed options:
// --help produce help message
// --width arg (=1024) width of the array
// --height arg (=768) height of the array
// --loop arg (=1000) number of run loop
// --radius arg (=8) working radius
// --seed arg (=42) seed for random generator
//
//
// To compile this application, you'll need
// the boost program options library. (This is the
// libboost-program-options-dev under debian)
//
// I've only try to compile this program with gcc:
// g++ array.cpp -W -Wall -O3 -lboost_program_options
// a random generator using a static seed
// to reproduce result
typedef boost::minstd_rand base_generator_type ;
base_generator_type generator(42u) ;
// 2d array allocation using NRC storage
template <typename T>
T** alloc_array(int h, int w)
{
typedef T* ptr_type ;
typedef ptr_type* return_type ;
return_type m = new ptr_type[h] ;
if (!m) return 0 ;
m[0] = new T[w*h] ;
if (!m[0]) { delete[] m; return 0 ; }
for (int i=1 ; i < h ; ++i) m[i] = m[i-1] + w ;
return m ;
}
// helper function to release memory
template <typename T>
void release_array(T** array)
{
delete[] array[0] ;
delete[] array ;
}
// fill array with value between bv & uv
template <typename T>
void fill_array(T** array, int h, int w, T bv=-10, T uv=10)
{
boost::uniform_real<T> uni_dist(bv, uv) ;
boost::variate_generator<base_generator_type&, boost::uniform_real<T> > uni(generator, uni_dist) ;
for (int i = 0 ; i < h ; ++i)
for (int j = 0 ; j < w ; ++j) {
array[i][j] = uni() ;
}
}
// function using to simulate work and array access
// if ncr_access is true, then ncr access is used,
// otherwise, "classic" unidimensionnal access is used.
template <typename T>
T simulate_work(T** array, int h, int w, int loop, bool ncr_access=true)
{
T res = 0 ;
// we generate some amount of work here
if (ncr_access) { // using ncr access
for (int i = 0 ; i < loop ; ++i) {
res = 0 ;
// we loop over the whole array using loop tiling
for (int ti = 0 ; ti < h ; ++ti) {
for (int tj = 0 ; tj < w ; ++tj) {
res += array[ti][tj] ;
}
}
}
return res ;
}
else { // use naive access
for (int i = 0 ; i < loop ; ++i) {
// begin will help us to mimic the naive access
// using the NRC allocated array
// It's just like begin = new T[w*h]
T* begin = array[0] ;
res = 0 ;
// we loop over the whole array using loop tiling
for (int ti = 0 ; ti < h ; ++ti) {
for (int tj = 0 ; tj < w ; ++tj) {
res += begin[w*ti + tj] ; //classic element access
}
}
}
return res ;
}
}
int main(int argc, char* argv[])
{
namespace po = boost::program_options ;
// we will use a 2d float array
typedef float value_type ;
typedef value_type** Array ;
int width, height, bench_loop ;
unsigned int seed ;
// options parsing
po::options_description desc("Allowed options") ;
desc.add_options()
("help", "produce help message")
("width", po::value<int>(&width)->default_value(255), "width of the array")
("height", po::value<int>(&height)->default_value(255), "height of the array")
("loop", po::value<int>(&bench_loop)->default_value(1000), "number of run loop")
("seed", po::value<unsigned int>(&seed)->default_value(42u), "seed for random generator") ;
po::variables_map vm ;
try {
po::store(po::parse_command_line(argc, argv, desc), vm) ;
po::notify(vm) ;
}
catch (po::unknown_option& e) {
std::cout << desc << '\n' ;
return 1 ;
}
if (vm.count("help")) {
std::cout << desc << '\n' ;
return 1;
}
// array allocation
Array ncr_array = alloc_array<value_type>(height, width) ;
if (!ncr_array) {
std::cerr << "Error during array allocation\n" ;
return -1 ;
}
// we fill the array with some random value
generator.seed(seed) ;
fill_array(ncr_array, height, width) ;
value_type res_nsr, res_classic ;
double tt,ttu;
boost::timer timer ;
{
res_nsr = simulate_work(ncr_array, height, width, bench_loop, true) ;
tt = timer.elapsed() ;
std::cout << "NRC access : " << tt << " s\n" ;
}
// cleanup
release_array(ncr_array) ;
// reallocation
ncr_array = alloc_array<value_type>(height, width) ;
if (!ncr_array) {
std::cerr << "Error during array allocation\n" ;
return -1 ;
}
// again, we will the array with some random value
generator.seed(seed) ;
fill_array(ncr_array, height, width) ;
{
timer.restart() ;
res_classic = simulate_work(ncr_array, height, width, bench_loop, false) ;
ttu = timer.elapsed() ;
std::cout << "Naive access: " << ttu << " s\n" ;
}
std::cout << "Ratio: " << ((tt-ttu)/ttu)*100 << " percent \n" ;
if (res_classic == res_nsr) {
std::cout << "Computation is ok\n" ;
}
else {
std::cout << "Result between nsr access and classic access are different. Not normal.\n" ;
}
release_array(ncr_array) ;
}