#include <mmintrin.h>
#include <vector>
#include <string>
#include <chrono>
#include <iostream>
#include <random>
#include <algorithm>
#include <boost/align/aligned_allocator.hpp>
using namespace std;
using boost::alignment::aligned_allocator;
struct float2 {
float x, y;
};
struct float3 {
float x, y, z;
friend float3 operator-( const float3 & lhs, const float3 & rhs ) {
return float3{ lhs.x - rhs.x, lhs.y - rhs.y, lhs.z - rhs.z };
}
friend float3 operator+( const float3 & lhs, const float3 & rhs ) {
return float3{ lhs.x + rhs.x, lhs.y + rhs.y, lhs.z + rhs.z };
}
friend float3 operator*( const float3 & lhs, float rhs ) {
return float3{ lhs.x * rhs, lhs.y * rhs, lhs.z * rhs };
}
};
struct float4 {
float x, y, z, w;
};
struct particle_t {
float3 position;
float3 velocity;
float3 acceleration;
float2 size;
float4 color;
float energy;
bool alive;
};
uniform_real_distribution<float> pdist( -10.f, 10.f );
uniform_real_distribution<float> vxzdist( -5.f, 5.f );
uniform_real_distribution<float> vydist( 4.f, 20.f );
uniform_real_distribution<float> adist( -1.f, 1.f );
uniform_real_distribution<float> sdist( 0.1f, 5.f );
uniform_real_distribution<float> cdist( 0.f, 1.f );
uniform_real_distribution<float> edist( 10.f, 1000.f );
constexpr float gravity = -9.8f;
constexpr float dt = 1.0f;
struct aos_emitter_t {
vector<particle_t> particles;
void generate( size_t n, mt19937 & rng ) {
particles.resize( n );
for ( size_t i = 0; i < n; ++i ) {
particles[i].position = float3{ pdist( rng ), pdist( rng ), pdist( rng ) };
particles[i].velocity = float3{ vxzdist( rng ), vydist( rng ), vxzdist( rng ) };
particles[i].acceleration = float3{ adist( rng ), adist( rng ), adist( rng ) };
particles[i].size = float2{ sdist( rng ), sdist( rng ) };
particles[i].color = float4{ cdist( rng ), cdist( rng ), cdist( rng ), cdist( rng ) };
particles[i].energy = edist( rng );
particles[i].alive = true;
}
}
void update() {
for ( auto & p : particles ) {
auto & v = p.velocity;
v = v + (p.acceleration * dt);
v.y += gravity * dt;
p.position = p.position + (v * dt);
p.energy -= dt;
if ( p.energy <= 0 ) {
p.alive = false;
}
}
}
};
struct soa_emitter_t {
vector<float3> position;
vector<float3> velocity;
vector<float3> acceleration;
vector<float2> size;
vector<float4> color;
vector<float> energy;
vector<char> alive;
void generate( size_t n, mt19937 & rng ) {
position.resize( n );
velocity.resize( n );
acceleration.resize( n );
size.resize( n );
color.resize( n );
energy.resize( n );
alive.resize( n, true );
for ( size_t i = 0; i < n; ++i ) {
position[i] = float3{ pdist( rng ), pdist( rng ), pdist( rng ) };
velocity[i] = float3{ vxzdist( rng ), vydist( rng ), vxzdist( rng ) };
acceleration[i] = float3{ adist( rng ), adist( rng ), adist( rng ) };
size[i] = float2{ sdist( rng ), sdist( rng ) };
color[i] = float4{ cdist( rng ), cdist( rng ), cdist( rng ), cdist( rng ) };
energy[i] = edist( rng );
}
}
void update() {
size_t n = position.size();
for ( size_t i = 0; i < n; ++i ) {
auto & v = velocity[i];
v = v + (acceleration[i] * dt);
v.y += gravity * dt;
position[i] = position[i] + (v * dt);
energy[i] -= dt;
if ( energy[i] <= 0 ) {
alive[i] = false;
}
}
}
};
template< typename T >
using sse_vector = vector<T, aligned_allocator<T,16> >;
struct soa_emitter_sse_t {
sse_vector<float> position_x;
sse_vector<float> position_y;
sse_vector<float> position_z;
sse_vector<float> velocity_x;
sse_vector<float> velocity_y;
sse_vector<float> velocity_z;
sse_vector<float> acceleration_x;
sse_vector<float> acceleration_y;
sse_vector<float> acceleration_z;
vector<float2> size;
vector<float4> color;
sse_vector<float> energy;
vector<char> alive;
void generate( size_t n, mt19937 & rng ) {
position_x.resize( n );
position_y.resize( n );
position_z.resize( n );
velocity_x.resize( n );
velocity_y.resize( n );
velocity_z.resize( n );
acceleration_x.resize( n );
acceleration_y.resize( n );
acceleration_z.resize( n );
size.resize( n );
color.resize( n );
energy.resize( n );
alive.resize( n, true );
for ( size_t i = 0; i < n; ++i ) {
position_x[i] = pdist( rng );
position_y[i] = pdist( rng );
position_z[i] = pdist( rng );
velocity_x[i] = vxzdist( rng );
velocity_y[i] = vxzdist( rng );
velocity_z[i] = vxzdist( rng );
acceleration_x[i] = adist( rng );
acceleration_y[i] = adist( rng );
acceleration_z[i] = adist( rng );
size[i] = float2{ sdist( rng ), sdist( rng ) };
color[i] = float4{ cdist( rng ), cdist( rng ), cdist( rng ), cdist( rng ) };
energy[i] = edist( rng );
}
}
void update() {
size_t n = position_x.size();
__m128 vx, vy, vz;
__m128 t = _mm_set1_ps( dt );
__m128 g = _mm_set1_ps( gravity * dt );
__m128 zero = _mm_setzero_ps();
for ( size_t i = 0; i < n; i += 4 ) {
vx = _mm_add_ps( _mm_load_ps( velocity_x.data()+i ), _mm_mul_ps( t, _mm_load_ps( acceleration_x.data()+i ) ) );
vy = _mm_add_ps( _mm_load_ps( velocity_y.data()+i ), _mm_mul_ps( t, _mm_load_ps( acceleration_y.data()+i ) ) );
vz = _mm_add_ps( _mm_load_ps( velocity_z.data()+i ), _mm_mul_ps( t, _mm_load_ps( acceleration_z.data()+i ) ) );
vy = _mm_add_ps( vy, g );
_mm_store_ps( position_x.data()+i, _mm_add_ps(_mm_load_ps(position_x.data()+i), _mm_mul_ps(vx, t)) );
_mm_store_ps( position_y.data()+i, _mm_add_ps(_mm_load_ps(position_y.data()+i), _mm_mul_ps(vy, t)) );
_mm_store_ps( position_z.data()+i, _mm_add_ps(_mm_load_ps(position_z.data()+i), _mm_mul_ps(vz, t)) );
_mm_store_ps( velocity_x.data()+i, vx );
_mm_store_ps( velocity_y.data()+i, vy );
_mm_store_ps( velocity_z.data()+i, vz );
_mm_store_ps( energy.data()+i, _mm_sub_ps(_mm_load_ps(energy.data()+i), t) );
auto a = _mm_movemask_ps( _mm_cmple_ps( _mm_load_ps( energy.data() + i ), zero ) );
for ( int j = 0; j < 4; ++j ) {
alive[i+j] = (a & (1 << j));
}
}
}
};
template< typename emitter_t >
chrono::duration<double> run_test() {
using clock_t = chrono::high_resolution_clock;
constexpr size_t count = 250'000 * 4;
constexpr size_t frames = 1'000;
const auto seed_val = mt19937::default_seed;
mt19937 rng( seed_val );
emitter_t emitter;
emitter.generate( count, rng );
auto start = clock_t::now();
for ( size_t i = 0; i < frames; ++i ) {
emitter.update();
}
auto finish = clock_t::now();
return finish - start;
}
int main() {
auto aos_duration = run_test<aos_emitter_t>();
auto soa_duration = run_test<soa_emitter_t>();
auto soa_sse_duration = run_test<soa_emitter_sse_t>();
cout << "AoS in " << aos_duration.count() << " seconds" << std::endl;
cout << "SoA in " << soa_duration.count() << " seconds" << std::endl;
cout << "SoA SSE in " << soa_sse_duration.count() << " seconds" << std::endl;
return 0;
}