[ create a new paste ] login | about

Link: http://codepad.org/JQ8nAKDf    [ raw code | fork ]

C, pasted on May 8:
// We hang in SoundEngineGC::PrepLoad because __AXOutDspReady is always
// 1 ("READY") which is caused by HLE DSP being too fast. The loop is
// checking that it can run 50 times with the DSP being idle, before
// the DSP becomes busy. It relies on seeing the 0 ("BUSY") state to
// break out of the counter loop.
//
// Audio data is double-buffered: as one frame is processed by the
// DSP, the previous frame is being flushed to the output.
//
// New command lists are submitted to the DSP as triggered by the AID
// interrupt, which indicates more audio data is needed for output.
// The bug occurs because the DSP triggers INT_DSP with DSP_YIELD
// much too quickly. In fact, it triggers before the AID interrupt
// handler completes. Because of this, immediately upon "rfi"
// (return from interrupt), we will enter the __DSPHandler, which
// will call _AXDSPResumeCallback and set the state back to READY.
//
// (The cached-interpreter is uneffected because it doesn't check
// for pending exceptions/interrupts on rfi.)
//
// This prevents the code in SoundEngineGC::PrepLoad from ever
// observing the "BUSY" value. Even though it is momentarily written
// to __AXOutDspReady, it changes back to "READY" immediately.
//
// The problem in Dolphin is that AXUCode::SignalWorkEnd is invoked
// immediately after processing. In the absence of a DSP thread, this
// makes the result pretty much immediate, and in the presence of a
// DSP thread it makes the result pretty unpredictable (?)
//
// I suspect we should synchronously attempt to count the number of
// cycles taken by a command-list and schedule the DSP_INT based on
// that, rather than triggering it immediately.
//
// A workaround (that other people seem to have noticed?) is to make
// GenerateDSPInterruptFromDSPEmu schedule the event 814 ticks in
// the future, rather than zero. This might be too slow for other
// very fast operations, although it's only ~135 DSP cycles, so I
// wouldn't be entirely surprised if it worked everywhere.
//
// We could also attempt to estimate when to trigger it from the
// DSP thread, or pass a flag so that only mail from SignalWorkEnd
// gets delayed by 814 ticks (which is probably _more_ accurate,
// but not really very accurate at all).

enum AXOutDspReadyState {
  // this means that the DSP is currently processing data
  BUSY = 0, 

  // this means the DSP is idle
  READY = 1,

  // this means the DSP is busy, and must output a new frame
  // as soon as it's done: _AXOsTime stores the time when the
  // frame should have started
  LATE = 2,
};

void __cdecl SoundEngineGC::PrepLoad()
{
  int v2; // r28@1
  unsigned int ticks; // r26@11

  /* ... snip ... */

  OSReport("maxStreak %d\n", v2);
  AXRegisterCallback(0);
  AXOutPtr = (int)&_AXOutFrame;
  do
  {
    ticks = 0;
    // this loop hangs, because the __AXOutDspReady is always ready
    while ( *(_DWORD *)(AXOutPtr + 4) == READY )  // __AXOutDspReady
    {
      ++ticks;
      OSGetTick();
    }
    if ( *(_DWORD *)(AXOutPtr + 4) == LATE )       // __AXOutDspReady
      ticks = 0;
  }
  while ( ticks <= 50 );
  while ( *(_DWORD *)(AXOutPtr + 4) != READY );     // __AXOutDspReady
}

// This function is triggered whenever we run out of audio data,
// it will call _AXOutNewFrame to send the co of the 
// audio data to the output, and start processing the
void __cdecl _AXOutAiCallback()
{
  // it's time to call _AXOutNewFrame to swap the audio buffers

  if ( _AXOutDspReady == BUSY )
  {
    // the DSP is busy, so we can't start outputting the result
    // or processing new data. store the time we should have
    // started.
    _AXOsTime = OSGetTime();
  }

  if ( _AXOutDspReady == READY )
  {
    // swap the buffers, giving the DSP new stuff to work on,
    // and making it busy.
    _AXOutDspReady = BUSY; // copying new content
    _AXOutNewFrame(0);
  }
  else
  {
    // we're late - ensure the AX ucode is currently running
    // on the DSP.
    _AXOutDspReady = LATE; // waiting for the task
    DSPAssertTask((dsp_task *)&task);
  }
}

// This function runs on DSP_YIELD, to indicate the DSP has
// completed its task.
void __fastcall _AXDSPResumeCallback()
{
  if ( _AXOutDspReady == LATE )
  {
    // running late, output new frame immediately
    _AXOutDspReady = BUSY;
    _AXOutNewFrame((OSGetTime() - _AXOsTime) >> 2);
  }
  else
  {
    _AXOutDspReady = READY;
  }
}


void __fastcall _AXOutNewFrame(int lateness_)
{
  int lateness; // r30@1
  int v2; // r3@1
  char *v3; // r30@1
  _BYTE *v4; // r3@7
  signed int v5; // ctr@8
  __int64 *v6; // r4@8
  char v7; // r0@9

  lateness = lateness_;
  _AXLocalProfile = OSGetTime();
  v2 = _AXSyncPBs(lateness);
  _AXPrintStudio(v2);

  // send a command list to the DSP for processing
  // the DSP_YIELD should not come back before we return
  // from our caller's interrupt handler

  v3 = _AXGetCommandListAddress(); // swaps command lists
  DSPSendMailToDSP(0xBABE0000 | 0x180);
  while ( DSPCheckMailToDSP() )
    ;
  DSPSendMailToDSP(v3);
  while ( DSPCheckMailToDSP() )
    ;

  // do stuff
  _AXServiceCallbackStack();
  qword_80638A28 = OSGetTime();
  _AXProcessAux();
  qword_80638A30 = OSGetTime();
  qword_80638A38 = OSGetTime();
  if ( _AXUserFrameCallback )
    _AXUserFrameCallback();
  qword_80638A40 = OSGetTime();

  // write out the command list for next time
  _AXNextFrame((int)&_AXOutSBuffer, (int)&_AXOutBuffer + 640 * _AXOutFrame);
  ++_AXOutFrame;
  _AXOutFrame &= 1u;

  // send the previous frame to the output
  AIInitDMA((int)&_AXOutBuffer + 640 * _AXOutFrame, 0x280u);
  qword_80638A48 = OSGetTime();
  dword_80638A50 = _AXGetNumVoices();
  v4 = (_BYTE *)_AXGetCurrentProfile();
  if ( v4 )
  {
    v5 = 7;
    v6 = &_AXLocalProfile;
    do
    {
      *v4 = *(_BYTE *)v6;
      v4[1] = *((_BYTE *)v6 + 1);
      v4[2] = *((_BYTE *)v6 + 2);
      v4[3] = *((_BYTE *)v6 + 3);
      v4[4] = *((_BYTE *)v6 + 4);
      v4[5] = *((_BYTE *)v6 + 5);
      v4[6] = *((_BYTE *)v6 + 6);
      v7 = *((_BYTE *)v6 + 7);
      ++v6;
      v4[7] = v7;
      v4 += 8;
      --v5;
    }
    while ( v5 );
  }
}


Create a new paste based on this one


Comments: