summaryrefslogtreecommitdiff
path: root/libs/libsodium/src/crypto_scalarmult/curve25519/sandy2x/fe51_mul.S
blob: 33e0347277938cb23c5396d1c45f588a4d1f0e56 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
#ifdef IN_SANDY2X

/*
   This file is basically amd64-51/fe25519_mul.s.
*/
#include "fe51_namespace.h"
#include "consts_namespace.h"
.text
.p2align 5
#ifdef ASM_HIDE_SYMBOL
ASM_HIDE_SYMBOL fe51_mul
ASM_HIDE_SYMBOL _fe51_mul
#endif
.globl fe51_mul
.globl _fe51_mul
#ifdef __ELF__
.type fe51_mul, @function
.type _fe51_mul, @function
#endif
fe51_mul:
_fe51_mul:
mov %rsp,%r11
and $31,%r11
add $96,%r11
sub %r11,%rsp
movq %r11,0(%rsp)
movq %r12,8(%rsp)
movq %r13,16(%rsp)
movq %r14,24(%rsp)
movq %r15,32(%rsp)
movq %rbx,40(%rsp)
movq %rbp,48(%rsp)
movq %rdi,56(%rsp)
mov  %rdx,%rcx
movq   24(%rsi),%rdx
imulq  $19,%rdx,%rax
movq %rax,64(%rsp)
mulq  16(%rcx)
mov  %rax,%r8
mov  %rdx,%r9
movq   32(%rsi),%rdx
imulq  $19,%rdx,%rax
movq %rax,72(%rsp)
mulq  8(%rcx)
add  %rax,%r8
adc %rdx,%r9
movq   0(%rsi),%rax
mulq  0(%rcx)
add  %rax,%r8
adc %rdx,%r9
movq   0(%rsi),%rax
mulq  8(%rcx)
mov  %rax,%r10
mov  %rdx,%r11
movq   0(%rsi),%rax
mulq  16(%rcx)
mov  %rax,%r12
mov  %rdx,%r13
movq   0(%rsi),%rax
mulq  24(%rcx)
mov  %rax,%r14
mov  %rdx,%r15
movq   0(%rsi),%rax
mulq  32(%rcx)
mov  %rax,%rbx
mov  %rdx,%rbp
movq   8(%rsi),%rax
mulq  0(%rcx)
add  %rax,%r10
adc %rdx,%r11
movq   8(%rsi),%rax
mulq  8(%rcx)
add  %rax,%r12
adc %rdx,%r13
movq   8(%rsi),%rax
mulq  16(%rcx)
add  %rax,%r14
adc %rdx,%r15
movq   8(%rsi),%rax
mulq  24(%rcx)
add  %rax,%rbx
adc %rdx,%rbp
movq   8(%rsi),%rdx
imulq  $19,%rdx,%rax
mulq  32(%rcx)
add  %rax,%r8
adc %rdx,%r9
movq   16(%rsi),%rax
mulq  0(%rcx)
add  %rax,%r12
adc %rdx,%r13
movq   16(%rsi),%rax
mulq  8(%rcx)
add  %rax,%r14
adc %rdx,%r15
movq   16(%rsi),%rax
mulq  16(%rcx)
add  %rax,%rbx
adc %rdx,%rbp
movq   16(%rsi),%rdx
imulq  $19,%rdx,%rax
mulq  24(%rcx)
add  %rax,%r8
adc %rdx,%r9
movq   16(%rsi),%rdx
imulq  $19,%rdx,%rax
mulq  32(%rcx)
add  %rax,%r10
adc %rdx,%r11
movq   24(%rsi),%rax
mulq  0(%rcx)
add  %rax,%r14
adc %rdx,%r15
movq   24(%rsi),%rax
mulq  8(%rcx)
add  %rax,%rbx
adc %rdx,%rbp
movq 64(%rsp),%rax
mulq  24(%rcx)
add  %rax,%r10
adc %rdx,%r11
movq 64(%rsp),%rax
mulq  32(%rcx)
add  %rax,%r12
adc %rdx,%r13
movq   32(%rsi),%rax
mulq  0(%rcx)
add  %rax,%rbx
adc %rdx,%rbp
movq 72(%rsp),%rax
mulq  16(%rcx)
add  %rax,%r10
adc %rdx,%r11
movq 72(%rsp),%rax
mulq  24(%rcx)
add  %rax,%r12
adc %rdx,%r13
movq 72(%rsp),%rax
mulq  32(%rcx)
add  %rax,%r14
adc %rdx,%r15
movq REDMASK51(%rip),%rsi
shld $13,%r8,%r9
and  %rsi,%r8
shld $13,%r10,%r11
and  %rsi,%r10
add  %r9,%r10
shld $13,%r12,%r13
and  %rsi,%r12
add  %r11,%r12
shld $13,%r14,%r15
and  %rsi,%r14
add  %r13,%r14
shld $13,%rbx,%rbp
and  %rsi,%rbx
add  %r15,%rbx
imulq  $19,%rbp,%rdx
add  %rdx,%r8
mov  %r8,%rdx
shr  $51,%rdx
add  %r10,%rdx
mov  %rdx,%rcx
shr  $51,%rdx
and  %rsi,%r8
add  %r12,%rdx
mov  %rdx,%r9
shr  $51,%rdx
and  %rsi,%rcx
add  %r14,%rdx
mov  %rdx,%rax
shr  $51,%rdx
and  %rsi,%r9
add  %rbx,%rdx
mov  %rdx,%r10
shr  $51,%rdx
and  %rsi,%rax
imulq  $19,%rdx,%rdx
add  %rdx,%r8
and  %rsi,%r10
movq   %r8,0(%rdi)
movq   %rcx,8(%rdi)
movq   %r9,16(%rdi)
movq   %rax,24(%rdi)
movq   %r10,32(%rdi)
movq 0(%rsp),%r11
movq 8(%rsp),%r12
movq 16(%rsp),%r13
movq 24(%rsp),%r14
movq 32(%rsp),%r15
movq 40(%rsp),%rbx
movq 48(%rsp),%rbp
add %r11,%rsp
mov %rdi,%rax
mov %rsi,%rdx
ret

#endif