[ create a new paste ] login | about

Link: http://codepad.org/Uz67xxYj    [ raw code | fork ]

Evetro - C++, pasted on Mar 1:
// bucket sort 4
// (based on variant 2) -- no merge, each thread has its own buckets

template <class RanIt>
void parBucketSort4 (RanIt begin, RanIt end)
{
  using namespace Detail::BucketSort;
  typedef typename std::iterator_traits <RanIt>::value_type T;

  const Index sz = end - begin;
  if (sz < 2) return;
  
  // find min and max
  T minv, maxv;
  parallel_min_and_max (begin, end, minv, maxv);

  if (minv == maxv) return;

  // prepare buckets
  const int threads = omp_get_max_threads();
  const Index bsz =
    std::max (static_cast <Index> (threads),
              static_cast <Index> ((sz / BucketSize <T>::value / threads) * threads));

  const T inv = T (bsz - 1) / (maxv - minv);
  const Index groups = bsz / threads;

  std::unique_ptr <T[]> B (new T[sz]); // temporary store
  // offset[i+1] corresponds to end of the i-th bucket
  // pos[i] corresponds current writing position (size) of i-th bucket
  std::unique_ptr <Index[]> offset (new Index[bsz + 1]),
                                     pos (new Index[bsz]);

  offset[0] = 0;

  #pragma omp parallel for
  for (int tid = 0; tid < threads; ++tid)
  {
    const Index base = tid * groups;
    std::fill (offset.get() + base + 1, offset.get() + base + groups + 1, 0);

    // calculate sizes
    for (RanIt p = begin; p != end; ++p)
    {
      const Index index = static_cast <Index> ((*p - minv) * inv);
      if (base <= index && (index - groups) < base)
        offset[index + 1]++;
    }
  }

  // finish offset and pos
  for (Index i = 0; i < bsz; ++i)
  {
    pos[i] = offset[i];
    offset[i + 1] += offset[i];
  }

  #pragma omp parallel for
  for (int tid = 0; tid < threads; ++tid)
  {
    const Index base = tid * groups;

    // scatter
    for (RanIt p = begin; p != end; ++p)
    {
      const Index index = static_cast <Index> ((*p - minv) * inv);
      if (base <= index && (index - groups) < base)
        B[pos[index]++] = *p;
    }

    // sort
    for (Index b = base, bend = base + groups; b < bend; ++b)
      std::sort (B.get() + offset[b], B.get() + offset[b + 1]);
  }

  // copy data back, it seems today we have no profit from multithreaded copy, so
  std::copy (B.get(), B.get() + sz, begin); // just memcpy (rep movs) behind
}


Create a new paste based on this one


Comments: