diff --git a/x86_64/sha256_sse4_amd64.asm b/x86_64/sha256_sse4_amd64.asm
index a86d2db..d4088f3 100644
--- a/x86_64/sha256_sse4_amd64.asm
+++ b/x86_64/sha256_sse4_amd64.asm
@@ -39,6 +39,7 @@ global CalcSha256_x64_sse4
CalcSha256_x64_sse4:
push rbx
+ push r8
%ifidn __OUTPUT_FORMAT__,win64
sub rsp, 16 * 6
movdqa [rsp + 16*0], xmm6
@@ -145,6 +146,7 @@ LAB_CALC:
pop temp
mov rax, 0
+ mov r8, qword g_4sha256_k
; Load the init values of the message into the hash.
@@ -166,9 +168,9 @@ LAB_LOOP:
%macro lab_loop_blk 0
movntdqa xmm6, [data+rax*4]
- movntdqa xmm1, g_4sha256_k[rax*4]
- paddd xmm6, xmm1
+ paddd xmm6, [r8]
add rax, 4
+ add r8, 16
paddd xmm6, xmm10 ; +h
@@ -282,6 +284,7 @@ LAB_RET:
movdqa xmm11, [rsp + 16*5]
add rsp, 16 * 6
%endif
+ pop r8
pop rbx
ret
diff --git a/x86_64/sha256_xmm_amd64.asm b/x86_64/sha256_xmm_amd64.asm
index 6c203e8..08a5f8d 100644
--- a/x86_64/sha256_xmm_amd64.asm
+++ b/x86_64/sha256_xmm_amd64.asm
@@ -314,7 +314,7 @@ sha256_sse2_64_new:
movdqa [hash1+7*16], rH
mov data, hash1
- mov init, sha256_init
+ mov init, qword sha256_init
SHA_256