import std.stdio : putr = writefln;
import tango.time.StopWatch;
void fastsqrt( float[] a, float[] b )
{
if(a.length != b.length || a.length % 4 != 0)
throw new Exception("fsqrt bad params!");
float* pa = a.ptr, pb = b.ptr;
uint times = a.length>>2;
asm
{
mov ECX, times;
mov EAX, [pa];
mov EBX, [pb];
REP:
movups XMM0, [EAX];
sqrtps XMM0, XMM0;
movups [EBX], XMM0;
add EAX, 16;
add EBX, 16;
loop REP;
}
}
void sqrt( float[] a, float[] b )
{
if(a.length != b.length)
throw new Exception("fsqrt bad params!");
float* pa = a.ptr, pb = b.ptr;
uint times = a.length;
asm
{
mov EAX, [pa];
mov EBX, [pb];
mov ECX, times; //error on a.length
REP2:
fldpi float ptr[EAX];
fsqrt;
fstp float ptr[EBX];
add EAX, 4;
add EBX, 4;
loop REP2;
}
}
void main()
{
float[40_000] a, b, c;
foreach( k, ref i; a )
i = cast(float)k;
double A, B;
StopWatch timer;
timer.start;
fastsqrt( a, b );
A = timer.stop;
timer.start;
sqrt( a, c );
B = timer.stop;
//putr(a, "\n", b, "\n", c);
foreach(k, i; b)
assert( b[k] == c[k] );
putr("SQRTPS:%.6f FSQRT:%.6f", A, B);
}