diff options
Diffstat (limited to 'plugins/MirOTR/Libgcrypt/mpi/pentium4/mmx/mpih-lshift.S')
-rw-r--r-- | plugins/MirOTR/Libgcrypt/mpi/pentium4/mmx/mpih-lshift.S | 457 |
1 files changed, 457 insertions, 0 deletions
diff --git a/plugins/MirOTR/Libgcrypt/mpi/pentium4/mmx/mpih-lshift.S b/plugins/MirOTR/Libgcrypt/mpi/pentium4/mmx/mpih-lshift.S new file mode 100644 index 0000000000..e2dd184ba3 --- /dev/null +++ b/plugins/MirOTR/Libgcrypt/mpi/pentium4/mmx/mpih-lshift.S @@ -0,0 +1,457 @@ +/* Intel Pentium-4 mpn_lshift -- left shift. + * + * Copyright 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + + +#include "sysdep.h" +#include "asm-syntax.h" + + +/******************* + * mpi_limb_t + * _gcry_mpih_lshift( mpi_ptr_t wp, (sp + 4) + * mpi_ptr_t up, (sp + 8) + * mpi_size_t usize, (sp + 12) + * unsigned cnt) (sp + 16) + * + * P4 Willamette, Northwood: 1.75 cycles/limb + * P4 Prescott: 2.0 cycles/limb + */ + +.text + ALIGN (3) + .globl C_SYMBOL_NAME(_gcry_mpih_lshift) +C_SYMBOL_NAME(_gcry_mpih_lshift:) + + + pushl %ebx + pushl %edi + + + movl 20(%esp), %eax + movl 12(%esp), %edx + + movl 16(%esp), %ebx + movl 24(%esp), %ecx + + cmp $5, %eax + jae .Lunroll + + movl -4(%ebx,%eax,4), %edi + decl %eax + + jnz .Lsimple + + shldl %cl, %edi, %eax + + shll %cl, %edi + + movl %edi, (%edx) + popl %edi + + popl %ebx + + ret + + + + + +.Lsimple: + + + + + + + + + + movd (%ebx,%eax,4), %mm5 + + movd %ecx, %mm6 + negl %ecx + + psllq %mm6, %mm5 + addl $32, %ecx + + movd %ecx, %mm7 + psrlq $32, %mm5 + + +.Lsimple_top: + + + + + + + + + + + + + movq -4(%ebx,%eax,4), %mm0 + decl %eax + + psrlq %mm7, %mm0 + + + + movd %mm0, 4(%edx,%eax,4) + jnz .Lsimple_top + + + movd (%ebx), %mm0 + + movd %mm5, %eax + psllq %mm6, %mm0 + + popl %edi + popl %ebx + + movd %mm0, (%edx) + + emms + + ret + + + + + + .align 8, 0x90 +.Lunroll: + + + + + + + + + + movd -4(%ebx,%eax,4), %mm5 + leal (%ebx,%eax,4), %edi + + movd %ecx, %mm6 + andl $4, %edi + + psllq %mm6, %mm5 + jz .Lstart_src_aligned + + + + + + + + + + + + + + + + + + + + movq -8(%ebx,%eax,4), %mm0 + + psllq %mm6, %mm0 + decl %eax + + psrlq $32, %mm0 + + + + movd %mm0, (%edx,%eax,4) +.Lstart_src_aligned: + + movq -8(%ebx,%eax,4), %mm1 + leal (%edx,%eax,4), %edi + + andl $4, %edi + psrlq $32, %mm5 + + movq -16(%ebx,%eax,4), %mm3 + jz .Lstart_dst_aligned + + + + + + + + + + + + + + + + + + + + + movq %mm1, %mm0 + addl $32, %ecx + + psllq %mm6, %mm0 + + movd %ecx, %mm6 + psrlq $32, %mm0 + + + + movd %mm0, -4(%edx,%eax,4) + subl $4, %edx +.Lstart_dst_aligned: + + + psllq %mm6, %mm1 + negl %ecx + + addl $64, %ecx + movq %mm3, %mm2 + + movd %ecx, %mm7 + subl $8, %eax + + psrlq %mm7, %mm3 + + por %mm1, %mm3 + jc .Lfinish + + + + + .align 8, 0x90 +.Lunroll_loop: + + + + + + + + + + + + + + + + + movq 8(%ebx,%eax,4), %mm0 + psllq %mm6, %mm2 + + movq %mm0, %mm1 + psrlq %mm7, %mm0 + + movq %mm3, 24(%edx,%eax,4) + por %mm2, %mm0 + + movq (%ebx,%eax,4), %mm3 + psllq %mm6, %mm1 + + movq %mm0, 16(%edx,%eax,4) + movq %mm3, %mm2 + + psrlq %mm7, %mm3 + subl $4, %eax + + por %mm1, %mm3 + jnc .Lunroll_loop + + + +.Lfinish: + + + testb $2, %al + + jz .Lfinish_no_two + + movq 8(%ebx,%eax,4), %mm0 + psllq %mm6, %mm2 + + movq %mm0, %mm1 + psrlq %mm7, %mm0 + + movq %mm3, 24(%edx,%eax,4) + por %mm2, %mm0 + + movq %mm1, %mm2 + movq %mm0, %mm3 + + subl $2, %eax +.Lfinish_no_two: + + + + + + + + testb $1, %al + movd %mm5, %eax + + popl %edi + jz .Lfinish_zero + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + movd (%ebx), %mm0 + psllq %mm6, %mm2 + + movq %mm3, 12(%edx) + psllq $32, %mm0 + + movq %mm0, %mm1 + psrlq %mm7, %mm0 + + por %mm2, %mm0 + psllq %mm6, %mm1 + + movq %mm0, 4(%edx) + psrlq $32, %mm1 + + andl $32, %ecx + popl %ebx + + jz .Lfinish_one_unaligned + + movd %mm1, (%edx) +.Lfinish_one_unaligned: + + emms + + ret + + + + +.Lfinish_zero: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + movq %mm3, 8(%edx) + andl $32, %ecx + + psllq %mm6, %mm2 + jz .Lfinish_zero_unaligned + + movq %mm2, (%edx) +.Lfinish_zero_unaligned: + + psrlq $32, %mm2 + popl %ebx + + movd %mm5, %eax + + movd %mm2, 4(%edx) + + emms + + ret |