#include <iostream>
#include <iterator>
#include <mkl_lapack.h>
#include <windows.h>
// \see Numerical Recipes 3rd edition section 2.4
// The problem then looks like:
//
// b0 c0 0 0 0 | | u0 | r0
// a1 b1 c1 0 0 | | u1 | r1
// 0 a2 b2 c2 0 | | u2 | r2
// | * | =
// 0 0 0 0 an-2 bn-2 cn-2 | | un-2 | rn-2
// 0 0 0 0 0 an-1 bn-1 | | un-1 | rn-1
//
// the iterator to a is used as reference
// Requires all containers a b c r and u to have the same size = aend - abegin
//
// Investigated the possibility to use LAPACK routines from the Intel MKL
// however the size and simplicity of the problem argues against this
//
template <typename Ran, typename ConstRan>
bool TriDiagonal( Ran u,
ConstRan abegin, ConstRan aend, ConstRan b, ConstRan c,
ConstRan r)
{
typedef typename std::iterator_traits<Ran>::value_type uType;
typedef typename std::iterator_traits<ConstRan>::value_type vType;
if ( b[0] == static_cast<vType>(0) )
{
return false; // ill-posed problem, rewrite it as a set of n-1 eqns instead of n
}
const size_t n = aend - abegin;
//boost::scoped_array<uType> gam( new uType[n] ); // will be deleted when we go out of scope
if (n>=32)
return false;
uType gam[32];
vType bet = b[0];
u[0] = r[0]/bet;
for (size_t j=1; j<n; ++j) // Decomposition and forward substitution
{
gam[j] = c[j-1] / bet;
bet = b[j] - abegin[j]*gam[j];
if (bet == static_cast<vType>(0))
{
return false; // see Numerical Recipes
}
u[j] = (r[j] - abegin[j]*u[j-1])/bet;
}
for (size_t j=(n-2); j>0; --j)
{
u[j] -= gam[j+1]*u[j+1]; // Backsubstitution
}
u[0] -= gam[1]*u[1]; // only reason this iteration is outside the loop is
// size_t is typically an unsigned type and therefore
// a j>=0 test results in a warning
return true;
}
int main()
{
MKL_INT nrhs = 1;
const MKL_INT n = 16384;
MKL_INT nn =n;
double du[n];
double d[n];
double a[n];
double* dl = a+1;
double b[n];
double x[n];
MKL_INT info;
for (size_t s=0; s<n; ++s)
{
du[s] = s+1;
d[s] = s;
a[s] = s-1;
b[s] = n-s;
}
LARGE_INTEGER t0, t1;
QueryPerformanceCounter( &t0 );
TriDiagonal( x, a, a+n, d, du, b);
QueryPerformanceCounter( &t1 );
std::cout<< (t1.QuadPart - t0.QuadPart) <<std::endl;
QueryPerformanceCounter( &t0 );
dgtsv( &nn, &nrhs, dl, d, du, b, &nn, &info );
QueryPerformanceCounter( &t1 );
std::cout<< (t1.QuadPart - t0.QuadPart) <<std::endl;
return 0;
}