[ create a new paste ] login | about

Link: http://codepad.org/oqq5jsbJ    [ raw code | output | fork ]

wyverex - D, pasted on Aug 15:
import std.stdio : putr = writefln;
import tango.time.StopWatch;


void fastsqrt( float[] a, float[] b )
{
  if(a.length != b.length || a.length % 4 != 0)
    throw new Exception("fsqrt bad params!");

  float* pa = a.ptr, pb = b.ptr;
  uint times = a.length>>2;

  asm
  {
    mov ECX, times;
    mov EAX, [pa];
    mov EBX, [pb];

  REP:
    movups XMM0, [EAX];
    sqrtps XMM0, XMM0;
    movups [EBX], XMM0;
    add EAX, 16;
    add EBX, 16;
    loop REP;
  }
}

void sqrt( float[] a, float[] b )
{
  if(a.length != b.length)
    throw new Exception("fsqrt bad params!");

  float* pa = a.ptr, pb = b.ptr;
  uint times = a.length;

  asm
  {
    mov EAX, [pa];
    mov EBX, [pb];
    mov ECX, times; //error on a.length

  REP2:
    fldpi float ptr[EAX];
    fsqrt;
    fstp float ptr[EBX];
    add EAX, 4;
    add EBX, 4;
    loop REP2;
  }
}

void main()
{
  float[40_000] a, b, c;

  foreach( k, ref i; a )
    i = cast(float)k;

  double A, B;
  StopWatch timer;

  timer.start;
   fastsqrt( a, b );
  A = timer.stop;

  timer.start;
   sqrt( a, c );
  B = timer.stop;

 //putr(a, "\n", b, "\n", c);

 foreach(k, i; b)
    assert( b[k] == c[k] );

 putr("SQRTPS:%.6f  FSQRT:%.6f", A, B);
}


Output:
1
SQRTPS:0.000291  FSQRT:0.000634


Create a new paste based on this one


Comments: