diff options
Diffstat (limited to 'plugins/MirOTR/libgcrypt-1.4.6/mpi/pentium4/sse2')
6 files changed, 0 insertions, 567 deletions
diff --git a/plugins/MirOTR/libgcrypt-1.4.6/mpi/pentium4/sse2/distfiles b/plugins/MirOTR/libgcrypt-1.4.6/mpi/pentium4/sse2/distfiles deleted file mode 100644 index 7252cd7e3f..0000000000 --- a/plugins/MirOTR/libgcrypt-1.4.6/mpi/pentium4/sse2/distfiles +++ /dev/null @@ -1,5 +0,0 @@ -mpih-add1.S -mpih-mul1.S -mpih-mul2.S -mpih-mul3.S -mpih-sub1.S diff --git a/plugins/MirOTR/libgcrypt-1.4.6/mpi/pentium4/sse2/mpih-add1.S b/plugins/MirOTR/libgcrypt-1.4.6/mpi/pentium4/sse2/mpih-add1.S deleted file mode 100644 index 55ed663032..0000000000 --- a/plugins/MirOTR/libgcrypt-1.4.6/mpi/pentium4/sse2/mpih-add1.S +++ /dev/null @@ -1,91 +0,0 @@ -/* Intel Pentium-4 mpn_add_n -- mpn addition. - * - * Copyright 2001, 2002 Free Software Foundation, Inc. - * - * This file is part of Libgcrypt. - * - * Libgcrypt is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as - * published by the Free Software Foundation; either version 2.1 of - * the License, or (at your option) any later version. - * - * Libgcrypt is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - * - * Note: This code is heavily based on the GNU MP Library. - * Actually it's the same code with only minor changes in the - * way the data is stored; this is to support the abstraction - * of an optional secure memory allocation which may be used - * to avoid revealing of sensitive data due to paging etc. - */ - - -#include "sysdep.h" -#include "asm-syntax.h" - - - /******************* - * mpi_limb_t - * _gcry_mpih_add_n( mpi_ptr_t res_ptr, (sp + 4) - * mpi_ptr_t s1_ptr, (sp + 8) - * mpi_ptr_t s2_ptr, (sp + 12) - * mpi_size_t size) (sp + 16) - * - * P4 Willamette, Northwood: 4.0 cycles/limb if dst!=src1 and dst!=src2 - * 6.0 cycles/limb if dst==src1 or dst==src2 - * P4 Prescott: >= 5 cycles/limb - * - * The 4 c/l achieved here isn't particularly good, but is better than 9 c/l - * for a basic adc loop. - */ - - TEXT - ALIGN (3) - GLOBL C_SYMBOL_NAME(_gcry_mpih_add_n) -C_SYMBOL_NAME(_gcry_mpih_add_n:) - - pxor %mm0, %mm0 - - movl 8(%esp), %eax /* s1_ptr */ - movl %ebx, 8(%esp) /* re-use parameter space */ - movl 12(%esp), %ebx /* res_ptr */ - movl 4(%esp), %edx /* s2_ptr */ - movl 16(%esp), %ecx /* size */ - - leal (%eax,%ecx,4), %eax /* src1 end */ - leal (%ebx,%ecx,4), %ebx /* src2 end */ - leal (%edx,%ecx,4), %edx /* dst end */ - negl %ecx /* -size */ - -Ltop: -/* - C eax src1 end - C ebx src2 end - C ecx counter, limbs, negative - C edx dst end - C mm0 carry bit -*/ - - movd (%eax,%ecx,4), %mm1 - movd (%ebx,%ecx,4), %mm2 - paddq %mm2, %mm1 - - paddq %mm1, %mm0 - movd %mm0, (%edx,%ecx,4) - - psrlq $32, %mm0 - - addl $1, %ecx - jnz Ltop - - - movd %mm0, %eax - movl 8(%esp), %ebx /* restore saved EBX */ - emms - ret diff --git a/plugins/MirOTR/libgcrypt-1.4.6/mpi/pentium4/sse2/mpih-mul1.S b/plugins/MirOTR/libgcrypt-1.4.6/mpi/pentium4/sse2/mpih-mul1.S deleted file mode 100644 index a0c98fb4dd..0000000000 --- a/plugins/MirOTR/libgcrypt-1.4.6/mpi/pentium4/sse2/mpih-mul1.S +++ /dev/null @@ -1,96 +0,0 @@ -/* Intel Pentium-4 mpn_mul_1 -- Multiply a limb vector with a limb and store - * the result in a second limb vector. - * - * Copyright 2001, 2002, 2003, 2005 Free Software Foundation, Inc. - * - * This file is part of Libgcrypt. - * - * Libgcrypt is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as - * published by the Free Software Foundation; either version 2.1 of - * the License, or (at your option) any later version. - * - * Libgcrypt is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - * - * Note: This code is heavily based on the GNU MP Library. - * Actually it's the same code with only minor changes in the - * way the data is stored; this is to support the abstraction - * of an optional secure memory allocation which may be used - * to avoid revealing of sensitive data due to paging etc. - */ - - -#include "sysdep.h" -#include "asm-syntax.h" - - -/******************* - * mpi_limb_t - * _gcry_mpih_mul_1( mpi_ptr_t res_ptr, (sp + 4) - * mpi_ptr_t s1_ptr, (sp + 8) - * mpi_size_t s1_size, (sp + 12) - * mpi_limb_t s2_limb) (sp + 16) - * - * src != dst src == dst - * P6 model 9 (Banias) ?.? - * P6 model 13 (Dothan) 4.75 4.75 - * P4 model 0 (Willamette) 4.0 6.0 - * P4 model 1 (?) 4.0 6.0 - * P4 model 2 (Northwood) 4.0 6.0 - * P4 model 3 (Prescott) ?.? ?.? - * P4 model 4 (Nocona) ?.? ?.? - * Unfortunately when src==dst the write-combining described in - * pentium4/README takes us up to 6 c/l. - * - */ - - TEXT - ALIGN (3) - GLOBL C_SYMBOL_NAME(_gcry_mpih_mul_1) -C_SYMBOL_NAME(_gcry_mpih_mul_1:); - - pxor %mm0, %mm0 - -.Lstart_1c: - movl 8(%esp), %eax - movd 16(%esp), %mm7 - movl 4(%esp), %edx - movl 12(%esp), %ecx - -.Ltop: - -/* - C eax src, incrementing - C ebx - C ecx counter, size iterations - C edx dst, incrementing - C - C mm0 carry limb - C mm7 multiplier -*/ - - movd (%eax), %mm1 - addl $4, %eax - pmuludq %mm7, %mm1 - - paddq %mm1, %mm0 - movd %mm0, (%edx) - addl $4, %edx - - psrlq $32, %mm0 - - subl $1, %ecx - jnz .Ltop - - - movd %mm0, %eax - emms - ret - diff --git a/plugins/MirOTR/libgcrypt-1.4.6/mpi/pentium4/sse2/mpih-mul2.S b/plugins/MirOTR/libgcrypt-1.4.6/mpi/pentium4/sse2/mpih-mul2.S deleted file mode 100644 index f975adfca5..0000000000 --- a/plugins/MirOTR/libgcrypt-1.4.6/mpi/pentium4/sse2/mpih-mul2.S +++ /dev/null @@ -1,136 +0,0 @@ -/* Intel Pentium-4 mpn_addmul_1 -- Multiply a limb vector with a limb and add - * the result to a second limb vector. - * - * Copyright 2001, 2002, 2004, 2005 Free Software Foundation, Inc. - * - * This file is part of Libgcrypt. - * - * Libgcrypt is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as - * published by the Free Software Foundation; either version 2.1 of - * the License, or (at your option) any later version. - * - * Libgcrypt is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - * - * Note: This code is heavily based on the GNU MP Library. - * Actually it's the same code with only minor changes in the - * way the data is stored; this is to support the abstraction - * of an optional secure memory allocation which may be used - * to avoid revealing of sensitive data due to paging etc. - */ - - -#include "sysdep.h" -#include "asm-syntax.h" - - -/******************* - * mpi_limb_t - * _gcry_mpih_addmul_1( mpi_ptr_t res_ptr, (sp + 4) - * mpi_ptr_t s1_ptr, (sp + 8) - * mpi_size_t s1_size, (sp + 12) - * mpi_limb_t s2_limb) (sp + 16) - * - * P3 model 9 (Banias) ?.? - * P3 model 13 (Dothan) 5.8 - * P4 model 0 (Willamette) 5.5 - * P4 model 1 (?) 5.5 - * P4 model 2 (Northwood) 5.5 - * P4 model 3 (Prescott) 6.0 - * P4 model 4 (Nocona) - * - * Only the carry limb propagation is on the dependent chain, but some other - * Pentium4 pipeline magic brings down performance to 6 cycles/l from the - * ideal 4 cycles/l. - */ - - - TEXT - ALIGN (4) - GLOBL C_SYMBOL_NAME(_gcry_mpih_addmul_1) -C_SYMBOL_NAME(_gcry_mpih_addmul_1:) - - pxor %mm4, %mm4 -.Lstart_1c: - movl 8(%esp), %eax - movl 12(%esp), %ecx - movl 4(%esp), %edx - movd 16(%esp), %mm7 - -/* - C eax src, incrementing ; 5B - C ecx loop counter, decrementing - C edx dst, incrementing - C - C mm4 carry, low 32-bits - C mm7 multiplier -*/ - - movd (%eax), %mm2 - pmuludq %mm7, %mm2 - - shrl $1, %ecx - jnc .Leven - - leal 4(%eax), %eax - movd (%edx), %mm1 - paddq %mm2, %mm1 - paddq %mm1, %mm4 - movd %mm4, (%edx) - psrlq $32, %mm4 - - testl %ecx, %ecx - jz .Lrtn - leal 4(%edx), %edx - - movd (%eax), %mm2 - pmuludq %mm7, %mm2 -.Leven: - movd 4(%eax), %mm0 - movd (%edx), %mm1 - pmuludq %mm7, %mm0 - - subl $1, %ecx - jz .Lend -.Lloop: - paddq %mm2, %mm1 - movd 8(%eax), %mm2 - paddq %mm1, %mm4 - movd 4(%edx), %mm3 - pmuludq %mm7, %mm2 - movd %mm4, (%edx) - psrlq $32, %mm4 - - paddq %mm0, %mm3 - movd 12(%eax), %mm0 - paddq %mm3, %mm4 - movd 8(%edx), %mm1 - pmuludq %mm7, %mm0 - movd %mm4, 4(%edx) - psrlq $32, %mm4 - - leal 8(%eax), %eax - leal 8(%edx), %edx - subl $1, %ecx - jnz .Lloop -.Lend: - paddq %mm2, %mm1 - paddq %mm1, %mm4 - movd 4(%edx), %mm3 - movd %mm4, (%edx) - psrlq $32, %mm4 - paddq %mm0, %mm3 - paddq %mm3, %mm4 - movd %mm4, 4(%edx) - psrlq $32, %mm4 -.Lrtn: - movd %mm4, %eax - emms - ret diff --git a/plugins/MirOTR/libgcrypt-1.4.6/mpi/pentium4/sse2/mpih-mul3.S b/plugins/MirOTR/libgcrypt-1.4.6/mpi/pentium4/sse2/mpih-mul3.S deleted file mode 100644 index ebcd2a68ea..0000000000 --- a/plugins/MirOTR/libgcrypt-1.4.6/mpi/pentium4/sse2/mpih-mul3.S +++ /dev/null @@ -1,127 +0,0 @@ -/* Intel Pentium-4 mpn_submul_1 -- Multiply a limb vector with a limb and - * subtract the result from a second limb vector. - * - * Copyright 2001, 2002 Free Software Foundation, Inc. - * - * This file is part of Libgcrypt. - * - * Libgcrypt is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as - * published by the Free Software Foundation; either version 2.1 of - * the License, or (at your option) any later version. - * - * Libgcrypt is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - * - * Note: This code is heavily based on the GNU MP Library. - * Actually it's the same code with only minor changes in the - * way the data is stored; this is to support the abstraction - * of an optional secure memory allocation which may be used - * to avoid revealing of sensitive data due to paging etc. - */ - - -#include "sysdep.h" -#include "asm-syntax.h" - - -/******************* - * mpi_limb_t - * _gcry_mpih_submul_1( mpi_ptr_t res_ptr, (sp + 4) - * mpi_ptr_t s1_ptr, (sp + 8) - * mpi_size_t s1_size, (sp + 12) - * mpi_limb_t s2_limb) (sp + 16) - * - * P4: 7 cycles/limb, unstable timing, at least on early Pentium4 silicon - * (stepping 10). - * - * This code is not particularly good at 7 c/l. The dependent chain is only - * 4 c/l and there's only 4 MMX unit instructions, so it's not clear why that - * speed isn't achieved. - * - * The arrangements made here to get a two instruction dependent chain are - * slightly subtle. In the loop the carry (or borrow rather) is a negative - * so that a paddq can be used to give a low limb ready to store, and a high - * limb ready to become the new carry after a psrlq. - * - * If the carry was a simple twos complement negative then the psrlq shift - * would need to bring in 0 bits or 1 bits according to whether the high was - * zero or non-zero, since a non-zero value would represent a negative - * needing sign extension. That wouldn't be particularly easy to arrange and - * certainly would add an instruction to the dependent chain, so instead an - * offset is applied so that the high limb will be 0xFFFFFFFF+c. With c in - * the range -0xFFFFFFFF to 0, the value 0xFFFFFFFF+c is in the range 0 to - * 0xFFFFFFFF and is therefore always positive and can always have 0 bits - * shifted in, which is what psrlq does. - * - * The extra 0xFFFFFFFF must be subtracted before c is used, but that can be - * done off the dependent chain. The total adjustment then is to add - * 0xFFFFFFFF00000000 to offset the new carry, and subtract - * 0x00000000FFFFFFFF to remove the offset from the current carry, for a net - * add of 0xFFFFFFFE00000001. In the code this is applied to the destination - * limb when fetched. - * - * It's also possible to view the 0xFFFFFFFF adjustment as a ones-complement - * negative, which is how it's undone for the return value, but that doesn't - * seem as clear. -*/ - - TEXT - ALIGN (4) - GLOBL C_SYMBOL_NAME(_gcry_mpih_submul_1) -C_SYMBOL_NAME(_gcry_mpih_submul_1:) - - pxor %mm1, %mm1 - -.Lstart_1c: - movl 8(%esp), %eax - pcmpeqd %mm0, %mm0 - - movd 16(%esp), %mm7 - pcmpeqd %mm6, %mm6 - - movl 4(%esp), %edx - psrlq $32, %mm0 - - movl 12(%esp), %ecx - psllq $32, %mm6 - - psubq %mm0, %mm6 - - psubq %mm1, %mm0 - -/* - C eax src, incrementing - C ebx - C ecx loop counter, decrementing - C edx dst, incrementing - C - C mm0 0xFFFFFFFF - borrow - C mm6 0xFFFFFFFE00000001 - C mm7 multiplier -*/ - -.Lloop: - movd (%eax), %mm1 - leal 4(%eax), %eax - movd (%edx), %mm2 - paddq %mm6, %mm2 - pmuludq %mm7, %mm1 - psubq %mm1, %mm2 - paddq %mm2, %mm0 - subl $1, %ecx - movd %mm0, (%edx) - psrlq $32, %mm0 - leal 4(%edx), %edx - jnz .Lloop - - movd %mm0, %eax - notl %eax - emms - ret diff --git a/plugins/MirOTR/libgcrypt-1.4.6/mpi/pentium4/sse2/mpih-sub1.S b/plugins/MirOTR/libgcrypt-1.4.6/mpi/pentium4/sse2/mpih-sub1.S deleted file mode 100644 index 33900c742e..0000000000 --- a/plugins/MirOTR/libgcrypt-1.4.6/mpi/pentium4/sse2/mpih-sub1.S +++ /dev/null @@ -1,112 +0,0 @@ -/* Intel Pentium-4 mpn_sub_n -- mpn subtraction. - * - * Copyright 2001, 2002 Free Software Foundation, Inc. - * - * This file is part of Libgcrypt. - * - * Libgcrypt is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as - * published by the Free Software Foundation; either version 2.1 of - * the License, or (at your option) any later version. - * - * Libgcrypt is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - * - * Note: This code is heavily based on the GNU MP Library. - * Actually it's the same code with only minor changes in the - * way the data is stored; this is to support the abstraction - * of an optional secure memory allocation which may be used - * to avoid revealing of sensitive data due to paging etc. - */ - - -#include "sysdep.h" -#include "asm-syntax.h" - - -/******************* - * mpi_limb_t - * _gcry_mpih_sub_n( mpi_ptr_t res_ptr, (sp + 4) - * mpi_ptr_t s1_ptr, (sp + 8) - * mpi_ptr_t s2_ptr, (sp + 12) - * mpi_size_t size) (sp + 16) - * - * P4 Willamette, Northwood: 4.0 cycles/limb if dst!=src1 and dst!=src2 - * 6.0 cycles/limb if dst==src1 or dst==src2 - * P4 Prescott: >= 5 cycles/limb - * - * The main loop code is 2x unrolled so that the carry bit can alternate - * between mm0 and mm1. - */ - - -.text - ALIGN (3) - .globl C_SYMBOL_NAME(_gcry_mpih_sub_n) -C_SYMBOL_NAME(_gcry_mpih_sub_n:) - - pxor %mm0, %mm0 -.Lstart_nc: - movl 8(%esp), %eax - movl %ebx, 8(%esp) - movl 12(%esp), %ebx - movl 4(%esp), %edx - movl 16(%esp), %ecx - - leal (%eax,%ecx,4), %eax - leal (%ebx,%ecx,4), %ebx - leal (%edx,%ecx,4), %edx - negl %ecx - -.Ltop: -/* - C eax src1 end - C ebx src2 end - C ecx counter, limbs, negative - C edx dst end - C mm0 carry bit -*/ - - movd (%eax,%ecx,4), %mm1 - movd (%ebx,%ecx,4), %mm2 - psubq %mm2, %mm1 - - psubq %mm0, %mm1 - movd %mm1, (%edx,%ecx,4) - - psrlq $63, %mm1 - - addl $1, %ecx - jz .Ldone_mm1 - - movd (%eax,%ecx,4), %mm0 - movd (%ebx,%ecx,4), %mm2 - psubq %mm2, %mm0 - - psubq %mm1, %mm0 - movd %mm0, (%edx,%ecx,4) - - psrlq $63, %mm0 - - addl $1, %ecx - jnz .Ltop - - - movd %mm0, %eax - movl 8(%esp), %ebx - emms - ret - - - -.Ldone_mm1: - movd %mm1, %eax - movl 8(%esp), %ebx - emms - ret |