diff options
author | René Schümann <white06tiger@gmail.com> | 2015-03-14 19:56:55 +0000 |
---|---|---|
committer | René Schümann <white06tiger@gmail.com> | 2015-03-14 19:56:55 +0000 |
commit | c60aed5432e9cda277b9351de51e82dfb8e02475 (patch) | |
tree | 97ccd1ea8e2544f6a9673ee7d04c18b714877a35 /plugins/MirOTR/Libgcrypt/mpi/pentium4/mmx | |
parent | d2b26b1f86326362f56540b5185fa09ab5f2779c (diff) |
MirOTR: part one of many file/folder structure changes
git-svn-id: http://svn.miranda-ng.org/main/trunk@12402 1316c22d-e87f-b044-9b9b-93d7a3e3ba9c
Diffstat (limited to 'plugins/MirOTR/Libgcrypt/mpi/pentium4/mmx')
3 files changed, 912 insertions, 0 deletions
diff --git a/plugins/MirOTR/Libgcrypt/mpi/pentium4/mmx/distfiles b/plugins/MirOTR/Libgcrypt/mpi/pentium4/mmx/distfiles new file mode 100644 index 0000000000..8f0ea426db --- /dev/null +++ b/plugins/MirOTR/Libgcrypt/mpi/pentium4/mmx/distfiles @@ -0,0 +1,2 @@ +mpih-lshift.S +mpih-rshift.S diff --git a/plugins/MirOTR/Libgcrypt/mpi/pentium4/mmx/mpih-lshift.S b/plugins/MirOTR/Libgcrypt/mpi/pentium4/mmx/mpih-lshift.S new file mode 100644 index 0000000000..e2dd184ba3 --- /dev/null +++ b/plugins/MirOTR/Libgcrypt/mpi/pentium4/mmx/mpih-lshift.S @@ -0,0 +1,457 @@ +/* Intel Pentium-4 mpn_lshift -- left shift. + * + * Copyright 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + + +#include "sysdep.h" +#include "asm-syntax.h" + + +/******************* + * mpi_limb_t + * _gcry_mpih_lshift( mpi_ptr_t wp, (sp + 4) + * mpi_ptr_t up, (sp + 8) + * mpi_size_t usize, (sp + 12) + * unsigned cnt) (sp + 16) + * + * P4 Willamette, Northwood: 1.75 cycles/limb + * P4 Prescott: 2.0 cycles/limb + */ + +.text + ALIGN (3) + .globl C_SYMBOL_NAME(_gcry_mpih_lshift) +C_SYMBOL_NAME(_gcry_mpih_lshift:) + + + pushl %ebx + pushl %edi + + + movl 20(%esp), %eax + movl 12(%esp), %edx + + movl 16(%esp), %ebx + movl 24(%esp), %ecx + + cmp $5, %eax + jae .Lunroll + + movl -4(%ebx,%eax,4), %edi + decl %eax + + jnz .Lsimple + + shldl %cl, %edi, %eax + + shll %cl, %edi + + movl %edi, (%edx) + popl %edi + + popl %ebx + + ret + + + + + +.Lsimple: + + + + + + + + + + movd (%ebx,%eax,4), %mm5 + + movd %ecx, %mm6 + negl %ecx + + psllq %mm6, %mm5 + addl $32, %ecx + + movd %ecx, %mm7 + psrlq $32, %mm5 + + +.Lsimple_top: + + + + + + + + + + + + + movq -4(%ebx,%eax,4), %mm0 + decl %eax + + psrlq %mm7, %mm0 + + + + movd %mm0, 4(%edx,%eax,4) + jnz .Lsimple_top + + + movd (%ebx), %mm0 + + movd %mm5, %eax + psllq %mm6, %mm0 + + popl %edi + popl %ebx + + movd %mm0, (%edx) + + emms + + ret + + + + + + .align 8, 0x90 +.Lunroll: + + + + + + + + + + movd -4(%ebx,%eax,4), %mm5 + leal (%ebx,%eax,4), %edi + + movd %ecx, %mm6 + andl $4, %edi + + psllq %mm6, %mm5 + jz .Lstart_src_aligned + + + + + + + + + + + + + + + + + + + + movq -8(%ebx,%eax,4), %mm0 + + psllq %mm6, %mm0 + decl %eax + + psrlq $32, %mm0 + + + + movd %mm0, (%edx,%eax,4) +.Lstart_src_aligned: + + movq -8(%ebx,%eax,4), %mm1 + leal (%edx,%eax,4), %edi + + andl $4, %edi + psrlq $32, %mm5 + + movq -16(%ebx,%eax,4), %mm3 + jz .Lstart_dst_aligned + + + + + + + + + + + + + + + + + + + + + movq %mm1, %mm0 + addl $32, %ecx + + psllq %mm6, %mm0 + + movd %ecx, %mm6 + psrlq $32, %mm0 + + + + movd %mm0, -4(%edx,%eax,4) + subl $4, %edx +.Lstart_dst_aligned: + + + psllq %mm6, %mm1 + negl %ecx + + addl $64, %ecx + movq %mm3, %mm2 + + movd %ecx, %mm7 + subl $8, %eax + + psrlq %mm7, %mm3 + + por %mm1, %mm3 + jc .Lfinish + + + + + .align 8, 0x90 +.Lunroll_loop: + + + + + + + + + + + + + + + + + movq 8(%ebx,%eax,4), %mm0 + psllq %mm6, %mm2 + + movq %mm0, %mm1 + psrlq %mm7, %mm0 + + movq %mm3, 24(%edx,%eax,4) + por %mm2, %mm0 + + movq (%ebx,%eax,4), %mm3 + psllq %mm6, %mm1 + + movq %mm0, 16(%edx,%eax,4) + movq %mm3, %mm2 + + psrlq %mm7, %mm3 + subl $4, %eax + + por %mm1, %mm3 + jnc .Lunroll_loop + + + +.Lfinish: + + + testb $2, %al + + jz .Lfinish_no_two + + movq 8(%ebx,%eax,4), %mm0 + psllq %mm6, %mm2 + + movq %mm0, %mm1 + psrlq %mm7, %mm0 + + movq %mm3, 24(%edx,%eax,4) + por %mm2, %mm0 + + movq %mm1, %mm2 + movq %mm0, %mm3 + + subl $2, %eax +.Lfinish_no_two: + + + + + + + + testb $1, %al + movd %mm5, %eax + + popl %edi + jz .Lfinish_zero + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + movd (%ebx), %mm0 + psllq %mm6, %mm2 + + movq %mm3, 12(%edx) + psllq $32, %mm0 + + movq %mm0, %mm1 + psrlq %mm7, %mm0 + + por %mm2, %mm0 + psllq %mm6, %mm1 + + movq %mm0, 4(%edx) + psrlq $32, %mm1 + + andl $32, %ecx + popl %ebx + + jz .Lfinish_one_unaligned + + movd %mm1, (%edx) +.Lfinish_one_unaligned: + + emms + + ret + + + + +.Lfinish_zero: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + movq %mm3, 8(%edx) + andl $32, %ecx + + psllq %mm6, %mm2 + jz .Lfinish_zero_unaligned + + movq %mm2, (%edx) +.Lfinish_zero_unaligned: + + psrlq $32, %mm2 + popl %ebx + + movd %mm5, %eax + + movd %mm2, 4(%edx) + + emms + + ret diff --git a/plugins/MirOTR/Libgcrypt/mpi/pentium4/mmx/mpih-rshift.S b/plugins/MirOTR/Libgcrypt/mpi/pentium4/mmx/mpih-rshift.S new file mode 100644 index 0000000000..e3374e3ba3 --- /dev/null +++ b/plugins/MirOTR/Libgcrypt/mpi/pentium4/mmx/mpih-rshift.S @@ -0,0 +1,453 @@ +/* Intel Pentium-4 mpn_rshift -- right shift. + * + * Copyright 2001, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + */ + + +#include "sysdep.h" +#include "asm-syntax.h" + + +/******************* + * mpi_limb_t + * _gcry_mpih_rshift( mpi_ptr_t wp, (sp + 4) + * mpi_ptr_t up, (sp + 8) + * mpi_size_t usize, (sp + 12) + * unsigned cnt) (sp + 16) + * + * P4 Willamette, Northwood: 1.75 cycles/limb + * P4 Prescott: 2.0 cycles/limb + */ + +.text + ALIGN (3) + .globl C_SYMBOL_NAME(_gcry_mpih_rshift) +C_SYMBOL_NAME(_gcry_mpih_rshift:) + pushl %ebx + pushl %edi + + + movl 20(%esp), %eax + movl 12(%esp), %edx + + movl 16(%esp), %ebx + movl 24(%esp), %ecx + + cmp $5, %eax + jae .Lunroll + + decl %eax + movl (%ebx), %edi + + jnz .Lsimple + + shrdl %cl, %edi, %eax + + shrl %cl, %edi + + movl %edi, (%edx) + popl %edi + + popl %ebx + + ret + + + + + + .align 8, 0x90 +.Lsimple: + + + + + + + + + + movd (%ebx), %mm5 + leal (%ebx,%eax,4), %ebx + + movd %ecx, %mm6 + leal -4(%edx,%eax,4), %edx + + psllq $32, %mm5 + negl %eax + + + + + + + +.Lsimple_top: + + + + + + + + + + movq (%ebx,%eax,4), %mm0 + incl %eax + + psrlq %mm6, %mm0 + + movd %mm0, (%edx,%eax,4) + jnz .Lsimple_top + + + movd (%ebx), %mm0 + psrlq %mm6, %mm5 + + psrlq %mm6, %mm0 + popl %edi + + movd %mm5, %eax + popl %ebx + + movd %mm0, 4(%edx) + + emms + + ret + + + + + + .align 8, 0x90 +.Lunroll: + + + + + + + + + + movd (%ebx), %mm5 + movl $4, %edi + + movd %ecx, %mm6 + testl %edi, %ebx + + psllq $32, %mm5 + jz .Lstart_src_aligned + + + + + + + + + + + + + + + + + movq (%ebx), %mm0 + + psrlq %mm6, %mm0 + addl $4, %ebx + + decl %eax + + movd %mm0, (%edx) + addl $4, %edx +.Lstart_src_aligned: + + + movq (%ebx), %mm1 + testl %edi, %edx + + psrlq %mm6, %mm5 + jz .Lstart_dst_aligned + + + + + + + + + + + + + + + + + + movq %mm1, %mm0 + addl $32, %ecx + + psrlq %mm6, %mm0 + + movd %ecx, %mm6 + + movd %mm0, (%edx) + addl $4, %edx +.Lstart_dst_aligned: + + + movq 8(%ebx), %mm3 + negl %ecx + + movq %mm3, %mm2 + addl $64, %ecx + + movd %ecx, %mm7 + psrlq %mm6, %mm1 + + leal -12(%ebx,%eax,4), %ebx + leal -20(%edx,%eax,4), %edx + + psllq %mm7, %mm3 + subl $7, %eax + + por %mm1, %mm3 + negl %eax + + jns .Lfinish + + + + + + + + + + + + + + + + .align 8, 0x90 +.Lunroll_loop: + + + + + + + + + + + + + + + + + movq (%ebx,%eax,4), %mm0 + psrlq %mm6, %mm2 + + movq %mm0, %mm1 + psllq %mm7, %mm0 + + movq %mm3, -8(%edx,%eax,4) + por %mm2, %mm0 + + movq 8(%ebx,%eax,4), %mm3 + psrlq %mm6, %mm1 + + movq %mm0, (%edx,%eax,4) + movq %mm3, %mm2 + + psllq %mm7, %mm3 + addl $4, %eax + + por %mm1, %mm3 + js .Lunroll_loop + + +.Lfinish: + + + testb $2, %al + + jnz .Lfinish_no_two + + movq (%ebx,%eax,4), %mm0 + psrlq %mm6, %mm2 + + movq %mm0, %mm1 + psllq %mm7, %mm0 + + movq %mm3, -8(%edx,%eax,4) + por %mm2, %mm0 + + movq %mm1, %mm2 + movq %mm0, %mm3 + + addl $2, %eax +.Lfinish_no_two: + + + + + + + + testb $1, %al + popl %edi + + movd %mm5, %eax + jnz .Lfinish_zero + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + movd 8(%ebx), %mm0 + psrlq %mm6, %mm2 + + movq %mm0, %mm1 + psllq %mm7, %mm0 + + movq %mm3, (%edx) + por %mm2, %mm0 + + psrlq %mm6, %mm1 + andl $32, %ecx + + popl %ebx + jz .Lfinish_one_unaligned + + + movd %mm1, 16(%edx) +.Lfinish_one_unaligned: + + movq %mm0, 8(%edx) + + emms + + ret + + + + +.Lfinish_zero: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + movq %mm3, 4(%edx) + psrlq %mm6, %mm2 + + movd %mm2, 12(%edx) + andl $32, %ecx + + popl %ebx + jz .Lfinish_zero_unaligned + + movq %mm2, 12(%edx) +.Lfinish_zero_unaligned: + + emms + + ret |