[ create a new paste ] login | about

Link: http://codepad.org/xJM805gX    [ raw code | fork ]

Evetro - C++, pasted on Jan 10:
/**
 * Parallel Code Count
 * parallel.h
 * Parallel (OpenMP) code count implementation.
 */
#pragma once
#include <memory>
#include <omp.h>
#include "reference.h"

struct ParallelCountCodes
{
  // requires zero-filled amount array
  void operator()(const std::string &text, int amount[COUNTERS]) const
  {
    // two phases:
    // 1) each thread fills its own "amount" array over a part of text,
    // 2) "amount" arrays are reduced
    // phase 2 may be done in parallel but it may be not meaningful,
    // actually it may be deadly hurt by false sharing since amount is quite small (e.g. 1kb)
    // however, SIMD may help to speed-up phase 2 up to RAM subsystem read/write limit
    struct Amount
    {
      int amount[COUNTERS];
      char padding[128];
    };

    // possibly not safe...
    const int threads         = omp_get_max_threads(), 
              amountByteSize  = sizeof(int) * COUNTERS;

    std::unique_ptr<Amount[]> amounts(new Amount[threads]);

    // phase 1: parallel
    #pragma omp parallel
    {
      const int tid = omp_get_thread_num();    
      // fill with zeroes
      int * const myAmount = amounts[tid].amount;
      std::memset(myAmount, 0, amountByteSize);

      // count part of the text
      const std::ptrdiff_t size = text.size();
      //for (std::ptrdiff_t i = tid * size / threads, imax = (tid + 1) * size / threads; i < imax; ++i)
      #pragma omp for
      for (std::ptrdiff_t i = 0; i < size; ++i)
        myAmount[static_cast<unsigned char>(text[i])]++;  
    }

    // phase 2: sequential
    // copy the first
    std::memcpy(amount, amounts[0].amount, amountByteSize);
    // add the rest
    for (int i = 1; i < threads; ++i)
    {
      int * const added = amounts[i].amount;
      for (int c = 0; c < COUNTERS; ++c)
        amount[c] += added[c];
    }
  }
};


Create a new paste based on this one


Comments: