[ create a new paste ] login | about

Link: http://codepad.org/UHWwncjm    [ raw code | output | fork ]

C, pasted on Sep 19:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>



static void print_abc16(char* a, char* b, char* c)
{
	int i; 
	printf("a = ");		for (i=0; i<16; i++) { printf("%d, ",a[i]); }		printf("\n");
	printf("b = ");		for (i=0; i<16; i++) { printf("%d, ",b[i]); }		printf("\n");
	printf("c = ");		for (i=0; i<16; i++) { printf("%d, ",c[i]); }		printf("\n");
}



void normal_add_mod16_xor(char* a, char* b, char* c)
{
	int i; 
	for (i=0; i<16; i++) {
		a[i] ^= (b[i] + c[i]) % 16;
	}
}



void sse_add_mod16_xor(char* a, char* b, char* c)
{
	char __attribute__ ((aligned (16))) msk[16]={
		0x0F,0x0F,0x0F,0x0F,	0x0F,0x0F,0x0F,0x0F,
		0x0F,0x0F,0x0F,0x0F,	0x0F,0x0F,0x0F,0x0F,
	};

	asm volatile(
		"\
			movdqa		(%1),		%%xmm0;				\
			movdqa		(%2),		%%xmm1;				\
			paddb		%%xmm1,		%%xmm0;				\
			movdqa		(%3),		%%xmm1;				\
			pand		%%xmm1,		%%xmm0;				\
			movdqa		(%0),		%%xmm1;				\
			pxor		%%xmm1,		%%xmm0;				\
			movdqa		%%xmm0,		(%0);				\
		"
		:
		:"r"(a),"r"(b),"r"(c),"r"(msk)
		:"memory"
	);
}



void main()
{
	// 通常
	char __attribute__ ((aligned (16))) a[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
	char __attribute__ ((aligned (16))) b[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
	char __attribute__ ((aligned (16))) c[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};

	normal_add_mod16_xor(a,b,c);
	
	print_abc16(a,b,c); putchar('\n');

	
	
	// sse
	char __attribute__ ((aligned (16))) A[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
	char __attribute__ ((aligned (16))) B[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
	char __attribute__ ((aligned (16))) C[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
	
	sse_add_mod16_xor(A,B,C);
	
	print_abc16(A,B,C); putchar('\n');
}


Output:
1
2
3
4
5
6
7
8
a = 0, 3, 6, 5, 12, 15, 10, 9, 8, 11, 14, 13, 4, 7, 2, 1, 
b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 
c = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 

a = 0, 3, 6, 5, 12, 15, 10, 9, 8, 11, 14, 13, 4, 7, 2, 1, 
b = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 
c = 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 



Create a new paste based on this one


Comments: