// We hang in SoundEngineGC::PrepLoad because __AXOutDspReady is always
// 1 ("READY") which is caused by HLE DSP being too fast. The loop is
// checking that it can run 50 times with the DSP being idle, before
// the DSP becomes busy. It relies on seeing the 0 ("BUSY") state to
// break out of the counter loop.
//
// Audio data is double-buffered: as one frame is processed by the
// DSP, the previous frame is being flushed to the output.
//
// New command lists are submitted to the DSP as triggered by the AID
// interrupt, which indicates more audio data is needed for output.
// The bug occurs because the DSP triggers INT_DSP with DSP_YIELD
// much too quickly. In fact, it triggers before the AID interrupt
// handler completes. Because of this, immediately upon "rfi"
// (return from interrupt), we will enter the __DSPHandler, which
// will call _AXDSPResumeCallback and set the state back to READY.
//
// (The cached-interpreter is uneffected because it doesn't check
// for pending exceptions/interrupts on rfi.)
//
// This prevents the code in SoundEngineGC::PrepLoad from ever
// observing the "BUSY" value. Even though it is momentarily written
// to __AXOutDspReady, it changes back to "READY" immediately.
//
// The problem in Dolphin is that AXUCode::SignalWorkEnd is invoked
// immediately after processing. In the absence of a DSP thread, this
// makes the result pretty much immediate, and in the presence of a
// DSP thread it makes the result pretty unpredictable (?)
//
// I suspect we should synchronously attempt to count the number of
// cycles taken by a command-list and schedule the DSP_INT based on
// that, rather than triggering it immediately.
//
// A workaround (that other people seem to have noticed?) is to make
// GenerateDSPInterruptFromDSPEmu schedule the event 814 ticks in
// the future, rather than zero. This might be too slow for other
// very fast operations, although it's only ~135 DSP cycles, so I
// wouldn't be entirely surprised if it worked everywhere.
//
// We could also attempt to estimate when to trigger it from the
// DSP thread, or pass a flag so that only mail from SignalWorkEnd
// gets delayed by 814 ticks (which is probably _more_ accurate,
// but not really very accurate at all).
enum AXOutDspReadyState {
// this means that the DSP is currently processing data
BUSY = 0,
// this means the DSP is idle
READY = 1,
// this means the DSP is busy, and must output a new frame
// as soon as it's done: _AXOsTime stores the time when the
// frame should have started
LATE = 2,
};
void __cdecl SoundEngineGC::PrepLoad()
{
int v2; // r28@1
unsigned int ticks; // r26@11
/* ... snip ... */
OSReport("maxStreak %d\n", v2);
AXRegisterCallback(0);
AXOutPtr = (int)&_AXOutFrame;
do
{
ticks = 0;
// this loop hangs, because the __AXOutDspReady is always ready
while ( *(_DWORD *)(AXOutPtr + 4) == READY ) // __AXOutDspReady
{
++ticks;
OSGetTick();
}
if ( *(_DWORD *)(AXOutPtr + 4) == LATE ) // __AXOutDspReady
ticks = 0;
}
while ( ticks <= 50 );
while ( *(_DWORD *)(AXOutPtr + 4) != READY ); // __AXOutDspReady
}
// This function is triggered whenever we run out of audio data,
// it will call _AXOutNewFrame to send the co of the
// audio data to the output, and start processing the
void __cdecl _AXOutAiCallback()
{
// it's time to call _AXOutNewFrame to swap the audio buffers
if ( _AXOutDspReady == BUSY )
{
// the DSP is busy, so we can't start outputting the result
// or processing new data. store the time we should have
// started.
_AXOsTime = OSGetTime();
}
if ( _AXOutDspReady == READY )
{
// swap the buffers, giving the DSP new stuff to work on,
// and making it busy.
_AXOutDspReady = BUSY; // copying new content
_AXOutNewFrame(0);
}
else
{
// we're late - ensure the AX ucode is currently running
// on the DSP.
_AXOutDspReady = LATE; // waiting for the task
DSPAssertTask((dsp_task *)&task);
}
}
// This function runs on DSP_YIELD, to indicate the DSP has
// completed its task.
void __fastcall _AXDSPResumeCallback()
{
if ( _AXOutDspReady == LATE )
{
// running late, output new frame immediately
_AXOutDspReady = BUSY;
_AXOutNewFrame((OSGetTime() - _AXOsTime) >> 2);
}
else
{
_AXOutDspReady = READY;
}
}
void __fastcall _AXOutNewFrame(int lateness_)
{
int lateness; // r30@1
int v2; // r3@1
char *v3; // r30@1
_BYTE *v4; // r3@7
signed int v5; // ctr@8
__int64 *v6; // r4@8
char v7; // r0@9
lateness = lateness_;
_AXLocalProfile = OSGetTime();
v2 = _AXSyncPBs(lateness);
_AXPrintStudio(v2);
// send a command list to the DSP for processing
// the DSP_YIELD should not come back before we return
// from our caller's interrupt handler
v3 = _AXGetCommandListAddress(); // swaps command lists
DSPSendMailToDSP(0xBABE0000 | 0x180);
while ( DSPCheckMailToDSP() )
;
DSPSendMailToDSP(v3);
while ( DSPCheckMailToDSP() )
;
// do stuff
_AXServiceCallbackStack();
qword_80638A28 = OSGetTime();
_AXProcessAux();
qword_80638A30 = OSGetTime();
qword_80638A38 = OSGetTime();
if ( _AXUserFrameCallback )
_AXUserFrameCallback();
qword_80638A40 = OSGetTime();
// write out the command list for next time
_AXNextFrame((int)&_AXOutSBuffer, (int)&_AXOutBuffer + 640 * _AXOutFrame);
++_AXOutFrame;
_AXOutFrame &= 1u;
// send the previous frame to the output
AIInitDMA((int)&_AXOutBuffer + 640 * _AXOutFrame, 0x280u);
qword_80638A48 = OSGetTime();
dword_80638A50 = _AXGetNumVoices();
v4 = (_BYTE *)_AXGetCurrentProfile();
if ( v4 )
{
v5 = 7;
v6 = &_AXLocalProfile;
do
{
*v4 = *(_BYTE *)v6;
v4[1] = *((_BYTE *)v6 + 1);
v4[2] = *((_BYTE *)v6 + 2);
v4[3] = *((_BYTE *)v6 + 3);
v4[4] = *((_BYTE *)v6 + 4);
v4[5] = *((_BYTE *)v6 + 5);
v4[6] = *((_BYTE *)v6 + 6);
v7 = *((_BYTE *)v6 + 7);
++v6;
v4[7] = v7;
v4 += 8;
--v5;
}
while ( v5 );
}
}