C++ code by Evetro - 63 lines

Link:
Evetro - C++, pasted on Mar 1:
// bucket sort 3/p
// (based on variant 1) -- no merge, each thread has its own buckets

template <class RanIt>
void parBucketSort3 (RanIt begin, RanIt end)
{
  using namespace Detail::BucketSort;
  typedef typename std::iterator_traits <RanIt>::value_type T;

  const Index sz = end - begin;
  if (sz < 2) return;
    
  // find min and max
  T minv, maxv;
  parallel_min_and_max (begin, end, minv, maxv);

  if (minv == maxv) return;

  // prepare buckets
  typedef std::vector <T> Bucket;
  typedef std::vector <Bucket> Buckets;

  const int threads = omp_get_max_threads();

  const Index bsz =
    std::max (static_cast <Index> (threads),
              static_cast <Index> ((sz / BucketSize <T>::value / threads) * threads));

  const T inv = T (bsz - 1) / (maxv - minv);
  const Index groups = bsz / threads;
  Buckets B (bsz);
  
  #pragma omp parallel for
  for (int tid = 0; tid < threads; ++tid)
  {
    const Index base = tid * groups;
    for (Index b = base, bend = base + groups; b < bend; ++b)
      B[b].reserve (BucketSize <T>::value + BucketSize <T>::value / 2);

    // scatter
    for (RanIt p = begin; p != end; ++p)
    {
      const Index index = static_cast <Index> ((*p - minv) * inv);
      if (base <= index && (index - groups) < base)
        B[index].push_back (*p);
    }

    // sort
    for (Index b = base, bend = base + groups; b < bend; ++b)
      std::sort (B[b].begin(), B[b].end());
  }

  // calculate offsets
  std::unique_ptr <Index[]> offset (new Index[bsz + 1]);
  offset[0] = 0;
  for (Index i = 1; i <= bsz; ++i)
    offset[i] = B[i - 1].size() + offset[i - 1];

  // gather
  #pragma omp parallel for
  for (Index b = 0; b < bsz; ++b)
    std::copy (B[b].begin(), B[b].end(), begin + offset[b]);
}
Create a new paste based on this one

Comments: