/// halves-optimized, 32-bit using bswap
unsigned rbH4 (unsigned arg)
{
__asm
{
mov eax, arg
lea edx, [eax+eax]
shr eax, 1
and eax, 55555555H
and edx, 0AAAAAAAAH
or eax, edx
lea edx, [eax+eax]
shr eax, 2
add edx, edx
and eax, 33333333H
and edx, 0CCCCCCCCH
or eax, edx
mov edx, eax
shr eax, 4
shl edx, 4
and eax, 0F0F0F0FH
and edx, 0F0F0F0F0H
or eax, edx
bswap eax // reverse bytes
}
// return eax
}