// non-recursive, OpenMP multi-threaded bitonic sort
template <class RanIt>
void parBitonicSort (RanIt begin, RanIt end)
{
const Index thr = omp_get_max_threads(),
len = end - begin;
for (Index i = 2; i <= len; i *= 2)
{
for (Index j = i; 1 < j; j /= 2)
{
const Index half_j = j / 2;
if (len > thr * j)
{
#pragma omp parallel for
for (Index k = 0; k < len; k += j)
{
const bool up = (k & i) == 0;
for (RanIt p = begin + k, pe = p + half_j; p != pe; ++p)
if (up == p[half_j] < *p)
std::swap (*p, p[half_j]);
}
}
else
{
for (Index k = 0; k < len; k += j)
{
const bool up = (k & i) == 0;
RanIt p = begin + k;
#pragma omp parallel for
for (Index l = 0; l < half_j; ++l)
if (up == p[l + half_j] < p[l])
std::swap (p[l], p[l + half_j]);
}
}
}
}
}