import std.stdio: put = writef, putr = writefln;
import std.conv: toInt;
version (Win32) {
import std.c.windows.windows: QueryPerformanceCounter, QueryPerformanceFrequency;
double clock() {
long t;
QueryPerformanceCounter(&t);
return cast(double)t / queryPerformanceFrequency;
}
long queryPerformanceFrequency;
static this() {
QueryPerformanceFrequency(&queryPerformanceFrequency);
}
}
version (linux) {
import std.c.linux.linux: time;
double clock() {
return cast(double)time(null);
}
}
void main(string[] args) {
int n = args.length >= 2 ? toInt(args[1]) : 10;
n *= 8; // to avoid problems with SSE2
int nloops = args.length >= 3 ? toInt(args[2]) : 1;
bool use_vec = args.length == 4 ? cast(bool)toInt(args[3]) : true;
putr("array len= ", n, " nloops= ", nloops, " Use vec ops: ", use_vec);
alias float T;
const string myop = r" / ";
putr("operation: ", myop);
// I don't know if the following ones are well aligned for SSE2
auto a1 = new T[n]; // void?
auto a2 = new T[n]; // void?
auto a3 = new T[n];
foreach (i, ref el; a1)
el = i * 7 + 1;
foreach (i, ref el; a2)
el = i + 1;
auto t = clock();
if (use_vec)
for (int j = 0; j < nloops; j++)
mixin("a3[] = a1[] " ~ myop ~ " a2[];");
else
for (int j = 0; j < nloops; j++)
for (int i; i < a3.length; i++)
mixin("a3[i] = a1[i] " ~ myop ~ " a2[i];");
putr("time= ", clock() - t, " s");
if (a3.length < 300)
putr("\nResult:\n", a3);
}
/*
D code with /:
C:\>array_benchmark.exe 10000 10000 0
array len= 80000 nloops= 10000 Use vec ops: false
time= 7.10563 s
C:\>array_benchmark.exe 10000 10000 1
array len= 80000 nloops= 10000 Use vec ops: true
time= 7.222 s
C:\>array_benchmark.exe 12000000 1 0
array len= 96000000 nloops= 1 Use vec ops: false
time= 0.654696 s
C:\>array_benchmark.exe 12000000 1 1
array len= 96000000 nloops= 1 Use vec ops: true
time= 0.655401 s
D code with *:
C:\>array_benchmark.exe 10000 10000 0
array len= 80000 nloops= 10000 Use vec ops: false
time= 7.10615 s
C:\>array_benchmark.exe 10000 10000 1
array len= 80000 nloops= 10000 Use vec ops: true
time= 7.21904 s
C:\>array_benchmark.exe 12000000 1 0
array len= 96000000 nloops= 1 Use vec ops: false
time= 0.65515 s
C:\>array_benchmark.exe 12000000 1 1
array len= 96000000 nloops= 1 Use vec ops: true
time= 0.65566 s
(Note that 0.65566 > 0.65515 isn't due to noise)
D code with +:
C:\>array_benchmark.exe 10000 10000 0
array len= 80000 nloops= 10000 Use vec ops: false
time= 7.10848 s
C:\>array_benchmark.exe 10000 10000 1
array len= 80000 nloops= 10000 Use vec ops: true
time= 7.22527 s
C:\>array_benchmark.exe 12000000 1 0
array len= 96000000 nloops= 1 Use vec ops: false
time= 0.654797 s
C:\>array_benchmark.exe 12000000 1 1
array len= 96000000 nloops= 1 Use vec ops: true
time= 0.654991 s
*/