C,
pasted
on Sep 19:
|
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
static void print_abc16(char* a, char* b, char* c)
{
int i;
printf("a = "); for (i=0; i<16; i++) { printf("%d, ",a[i]); } printf("\n");
printf("b = "); for (i=0; i<16; i++) { printf("%d, ",b[i]); } printf("\n");
printf("c = "); for (i=0; i<16; i++) { printf("%d, ",c[i]); } printf("\n");
}
void normal_add_mod16_xor(char* a, char* b, char* c)
{
int i;
for (i=0; i<16; i++) {
a[i] ^= (b[i] + c[i]) % 16;
}
}
void sse_add_mod16_xor(char* a, char* b, char* c)
{
char __attribute__ ((aligned (16))) msk[16]={
0x0F,0x0F,0x0F,0x0F, 0x0F,0x0F,0x0F,0x0F,
0x0F,0x0F,0x0F,0x0F, 0x0F,0x0F,0x0F,0x0F,
};
asm volatile(
"\
movdqa (%1), %%xmm0; \
movdqa (%2), %%xmm1; \
paddb %%xmm1, %%xmm0; \
movdqa (%3), %%xmm1; \
pand %%xmm1, %%xmm0; \
movdqa (%0), %%xmm1; \
pxor %%xmm1, %%xmm0; \
movdqa %%xmm0, (%0); \
"
:
:"r"(a),"r"(b),"r"(c),"r"(msk)
:"memory"
);
}
void main()
{
// 通常
char __attribute__ ((aligned (16))) a[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
char __attribute__ ((aligned (16))) b[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
char __attribute__ ((aligned (16))) c[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
normal_add_mod16_xor(a,b,c);
print_abc16(a,b,c); putchar('\n');
// sse
char __attribute__ ((aligned (16))) A[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
char __attribute__ ((aligned (16))) B[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
char __attribute__ ((aligned (16))) C[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
sse_add_mod16_xor(A,B,C);
print_abc16(A,B,C); putchar('\n');
}
|
Output:
|
a = 0, 3, 6, 5, 12, 15, 10, 9, 8, 11, 14, 13, 4, 7, 2, 1,
b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
c = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
a = 0, 3, 6, 5, 12, 15, 10, 9, 8, 11, 14, 13, 4, 7, 2, 1,
b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
c = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
|