From d5201cf94e47f06b35ada2451a348938af576f16 Mon Sep 17 00:00:00 2001 From: Guillaume Melquiond <guillaume.melquiond@inria.fr> Date: Thu, 6 Feb 2020 10:04:00 +0100 Subject: [PATCH] Add version 0.1. --- .gitignore | 10 + .gitmodules | 4 + README.md | 13 +- generated-src/AUTHORS | 2 + generated-src/Makefile.am | 17 + generated-src/NEWS | 4 + generated-src/README | 45 ++ generated-src/configure.ac | 9 + generated-src/lib/add.c | 225 +++++++++ generated-src/lib/add.h | 20 + generated-src/lib/array.h | 10 + generated-src/lib/c.h | 6 + generated-src/lib/compare.c | 39 ++ generated-src/lib/compare.h | 7 + generated-src/lib/computerdivision.h | 5 + generated-src/lib/div.c | 657 ++++++++++++++++++++++++++ generated-src/lib/div.h | 55 +++ generated-src/lib/euclideandivision.h | 5 + generated-src/lib/fxp.h | 9 + generated-src/lib/int.h | 11 + generated-src/lib/int32.h | 7 + generated-src/lib/logical.c | 194 ++++++++ generated-src/lib/logical.h | 24 + generated-src/lib/map.h | 7 + generated-src/lib/minmax.h | 7 + generated-src/lib/mul.c | 136 ++++++ generated-src/lib/mul.h | 14 + generated-src/lib/power.h | 5 + generated-src/lib/realinfix.h | 14 + generated-src/lib/sqrt.c | 306 ++++++++++++ generated-src/lib/sqrt.h | 12 + generated-src/lib/sqrt1.c | 60 +++ generated-src/lib/sqrt1.h | 7 + generated-src/lib/sqrtinit.h | 51 ++ generated-src/lib/sub.c | 219 +++++++++ generated-src/lib/sub.h | 20 + generated-src/lib/toom.c | 537 +++++++++++++++++++++ generated-src/lib/toom.h | 24 + generated-src/lib/types.h | 5 + generated-src/lib/uint64.h | 6 + generated-src/lib/uint64gmp.h | 91 ++++ generated-src/lib/util.c | 65 +++ generated-src/lib/util.h | 11 + generated-src/lib/valuation.h | 6 + generated-src/wmp.h | 57 +++ why3 | 1 + 46 files changed, 3033 insertions(+), 6 deletions(-) create mode 100644 .gitignore create mode 100644 .gitmodules create mode 100644 generated-src/AUTHORS create mode 100644 generated-src/Makefile.am create mode 100644 generated-src/NEWS create mode 100644 generated-src/README create mode 100644 generated-src/configure.ac create mode 100644 generated-src/lib/add.c create mode 100644 generated-src/lib/add.h create mode 100644 generated-src/lib/array.h create mode 100644 generated-src/lib/c.h create mode 100644 generated-src/lib/compare.c create mode 100644 generated-src/lib/compare.h create mode 100644 generated-src/lib/computerdivision.h create mode 100644 generated-src/lib/div.c create mode 100644 generated-src/lib/div.h create mode 100644 generated-src/lib/euclideandivision.h create mode 100644 generated-src/lib/fxp.h create mode 100644 generated-src/lib/int.h create mode 100644 generated-src/lib/int32.h create mode 100644 generated-src/lib/logical.c create mode 100644 generated-src/lib/logical.h create mode 100644 generated-src/lib/map.h create mode 100644 generated-src/lib/minmax.h create mode 100644 generated-src/lib/mul.c create mode 100644 generated-src/lib/mul.h create mode 100644 generated-src/lib/power.h create mode 100644 generated-src/lib/realinfix.h create mode 100644 generated-src/lib/sqrt.c create mode 100644 generated-src/lib/sqrt.h create mode 100644 generated-src/lib/sqrt1.c create mode 100644 generated-src/lib/sqrt1.h create mode 100644 generated-src/lib/sqrtinit.h create mode 100644 generated-src/lib/sub.c create mode 100644 generated-src/lib/sub.h create mode 100644 generated-src/lib/toom.c create mode 100644 generated-src/lib/toom.h create mode 100644 generated-src/lib/types.h create mode 100644 generated-src/lib/uint64.h create mode 100644 generated-src/lib/uint64gmp.h create mode 100644 generated-src/lib/util.c create mode 100644 generated-src/lib/util.h create mode 100644 generated-src/lib/valuation.h create mode 100644 generated-src/wmp.h create mode 160000 why3 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f0ad91c --- /dev/null +++ b/.gitignore @@ -0,0 +1,10 @@ +generated-src/aclocal.m4 +generated-src/ChangeLog +generated-src/compile +generated-src/configure +generated-src/COPYING +generated-src/depcomp +generated-src/INSTALL +generated-src/install-sh +generated-src/Makefile.in +generated-src/missing diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..7a005c1 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,4 @@ +[submodule "why3"] + path = why3 + url = https://gitlab.inria.fr/why3/why3.git + branch = wmpz diff --git a/README.md b/README.md index ee7a39a..555e7cd 100644 --- a/README.md +++ b/README.md @@ -1,26 +1,27 @@ # WhyMP -WhyMP is an arbitrary-precision integer library for C that is inspired by GMP, implemented in WhyML, and formally verified using Why3. +WhyMP is an arbitrary-precision integer library for C that is inspired by [GMP](https://gmplib.org/), implemented in WhyML, and formally verified using [Why3](http://why3.lri.fr/). -The algorithms are heavily inspired from [the mpn layer of the GMP library](https://gmplib.org/manual/Low_002dlevel-Functions.html). The implemented algorithms are addition, subtraction, multiplication (schoolbook and Toom-2), division, square root and modular exponentiation. +The algorithms are heavily inspired from [the mpn layer](https://gmplib.org/manual/Low_002dlevel-Functions.html) of the GMP library. The implemented algorithms are addition, subtraction, multiplication (schoolbook and Toom-2), division, square root, and modular exponentiation. The library is compatible with GMP, and its performance is comparable to its standalone version, mini-gmp. For numbers under about 1000 bits (100,000 for multiplication), it is about 10% slower than the generic, pure-C version of GMP, and about twice as slow as GMP with hand-coded assembly. ## Generating the sources -You may regenerate the C sources of the library using the most recent version of Why3. -To do so, clone [the master branch of Why3](https://gitlab.inria.fr/why3/why3), and then use the following commands: +To regenerate the C sources of the library using Why3, go into the `why3` submodule, and then use the following commands: ./autogen.sh ./configure --enable-local && make cd examples/multiprecision PATH=../../bin:$PATH make dist +This produces a `.tar.gz` file containing a standalone C library. +  ## Project overview -We have specified and reimplemented GMP's algorithms in WhyML. To do so, we have developed a simple memory model of the C language in WhyML. This memory model allows us to write WhyML programs in a fragment of the language that is practically a shallow embedding of C. +We have specified and reimplemented GMP's algorithms in WhyML. To do so, we have developed a simple [memory model](https://gitlab.inria.fr/why3/why3/raw/master/stdlib/mach/c.mlw) of the C language in WhyML. This memory model allows us to write WhyML programs in a fragment of the language that is practically a shallow embedding of C. We have added to Why3 an extraction mechanism from Why3 to C. It only supports WhyML programs that are in the C-like fragment of the language, but the compilation is transparent and does not add inefficiencies that the compiler cannot optimize away. Thus, the resulting C program is easily predictable to the WhyML programmer. @@ -56,4 +57,4 @@ int32_t wmpn_cmp(uint64_t * x, uint64_t * y, int32_t sz) { return 0; } ``` -The WhyML proofs are about 20,000 lines long in total (for about 5000 lines of extracted C). The split is about 1/3 program code to 2/3 specifications and assertions. \ No newline at end of file +The WhyML proofs are about 20,000 lines long in total (for about 5000 lines of extracted C). The split is about 1/3 program code to 2/3 specifications and assertions. diff --git a/generated-src/AUTHORS b/generated-src/AUTHORS new file mode 100644 index 0000000..2ba9097 --- /dev/null +++ b/generated-src/AUTHORS @@ -0,0 +1,2 @@ +Raphaël Rieu-Helft <raphael.rieu-helft@trust-in-soft.com> +Guillaume Melquiond <guillaume.melquiond@inria.fr> diff --git a/generated-src/Makefile.am b/generated-src/Makefile.am new file mode 100644 index 0000000..a7a4d30 --- /dev/null +++ b/generated-src/Makefile.am @@ -0,0 +1,17 @@ +AUTOMAKE_OPTIONS = subdir-objects + +lib_LIBRARIES = libwmp.a + +libwmp_a_SOURCES = \ + lib/int.h lib/int32.h lib/uint64.h lib/uint64gmp.h lib/fxp.h \ + lib/euclideandivision.h lib/computerdivision.h lib/valuation.h \ + lib/array.h lib/map.h lib/c.h lib/power.h lib/types.h lib/realinfix.h lib/minmax.h \ + lib/util.c lib/util.h \ + lib/logical.c lib/logical.h \ + lib/compare.c lib/compare.h \ + lib/add.c lib/add.h lib/sub.c lib/sub.h \ + lib/mul.c lib/mul.h lib/toom.c lib/toom.h \ + lib/div.c lib/div.h \ + lib/sqrt.c lib/sqrt.h lib/sqrt1.c lib/sqrt1.h lib/sqrtinit.h + +include_HEADERS = wmp.h diff --git a/generated-src/NEWS b/generated-src/NEWS new file mode 100644 index 0000000..2f4d450 --- /dev/null +++ b/generated-src/NEWS @@ -0,0 +1,4 @@ +Version 0.1 +----------- + +- initial release diff --git a/generated-src/README b/generated-src/README new file mode 100644 index 0000000..0a6568d --- /dev/null +++ b/generated-src/README @@ -0,0 +1,45 @@ +WMP +=== + +wmp is a formally verified arbitrary-precision library. It is +compatible with GMP's mpn layer, and implements a subset of the +functions found at +https://gmplib.org/manual/Low_002dlevel-Functions.html (which also +serves as documentation). The function names are prefixed by the +letter w, e.g. wmpn_add. wmp uses 64-bit limbs and 32-bit sizes. + +All declarations needed to use wmp are collected in the include file +wmp.h. It is designed to work with both C and C++ compilers. + + #include <wmp.h> + +All programs using wmp must link against the libwmp library. On a +typical Unix-like system this can be done with `-lwmp`, for example + + gcc myprogram.c -lwmp + +The proofs assume that the source operands (of type wmpn_srcptr) are +entirely separated from the destination operands. This is a departure +from GMP, which allows them to be either equal or separated. When they +are equal, you should use the *_in_place versions instead. In +practice, using the baseline versions should work even in place, but +this fact is not formally verified. + +The C files are generated using the Why3 platform. The algorithms were +reimplemented and verified in WhyML (see +https://gitlab.inria.fr/why3/why3/tree/master/examples/multiprecision), +and extracted to C. + +The implemented algorithms are addition, subtraction, multiplication +(basecase and Toom-2), division, logical shifts and square root. A +complete list of exported functions can be found in the header +wmp.h. + +For operands under about 1000 bits, the performances are comparable to +GMP's standalone version, mini-gmp, and about twice as slow as GMP +with hand-coded assembly. For multiplication, this comparison holds +until about 100000 bits. Above those thresholds, GMP uses +asymptotically faster algorithms that we have not implemented, so the +gap widens. + + diff --git a/generated-src/configure.ac b/generated-src/configure.ac new file mode 100644 index 0000000..57ac4da --- /dev/null +++ b/generated-src/configure.ac @@ -0,0 +1,9 @@ +AC_INIT([libwmp], [0.1], [Raphaël Rieu-Helft <raphael.rieu-helft@trust-in-soft.com>]) +AM_INIT_AUTOMAKE + +AC_PROG_CC +AC_PROG_INSTALL +AC_PROG_RANLIB + +AC_CONFIG_FILES([Makefile]) +AC_OUTPUT diff --git a/generated-src/lib/add.c b/generated-src/lib/add.c new file mode 100644 index 0000000..dedb08a --- /dev/null +++ b/generated-src/lib/add.c @@ -0,0 +1,225 @@ +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <assert.h> +#include <alloca.h> +#include "int.h" + +#include "int32.h" + +#include "uint64gmp.h" + +#include "power.h" + +#include "c.h" + +#include "array.h" + +#include "map.h" + +#include "types.h" + +#include "euclideandivision.h" + +uint64_t wmpn_add_1(uint64_t * r, uint64_t * x, int32_t sz, uint64_t y) +{ + uint64_t lx; + uint64_t res; + int32_t i; + uint64_t c; + uint64_t res1; + lx = *x; + res = lx + y; + i = 1; + c = UINT64_C(0); + *r = res; + if (res < lx) { + c = UINT64_C(1); + while (i < sz) { + lx = x[i]; + res1 = lx + UINT64_C(1); + r[i] = res1; + i = i + 1; + if (!(res1 == UINT64_C(0))) { + c = UINT64_C(0); + break; + } + } + } + while (i < sz) { + lx = x[i]; + r[i] = lx; + i = i + 1; + } + return c; +} + +uint64_t wmpn_add_n(uint64_t * r, uint64_t * x, uint64_t * y, int32_t sz) +{ + uint64_t lx, ly, c; + int32_t i; + uint64_t res, carry; + struct __add64_with_carry_result struct_res; + lx = UINT64_C(0); + ly = UINT64_C(0); + c = UINT64_C(0); + i = 0; + while (i < sz) { + lx = x[i]; + ly = y[i]; + struct_res = add64_with_carry(lx, ly, c); + res = struct_res.__field_0; + carry = struct_res.__field_1; + r[i] = res; + c = carry; + i = i + 1; + } + return c; +} + +uint64_t wmpn_add(uint64_t * r, uint64_t * x, int32_t sx, uint64_t * y, + int32_t sy) +{ + uint64_t lx, ly, c; + int32_t i; + uint64_t res, carry, res1; + struct __add64_with_carry_result struct_res; + lx = UINT64_C(0); + ly = UINT64_C(0); + c = UINT64_C(0); + i = 0; + while (i < sy) { + lx = x[i]; + ly = y[i]; + struct_res = add64_with_carry(lx, ly, c); + res = struct_res.__field_0; + carry = struct_res.__field_1; + r[i] = res; + c = carry; + i = i + 1; + } + if (!(c == UINT64_C(0))) { + while (i < sx) { + lx = x[i]; + res1 = lx + UINT64_C(1); + r[i] = res1; + i = i + 1; + if (!(res1 == UINT64_C(0))) { + c = UINT64_C(0); + break; + } + } + } + while (i < sx) { + lx = x[i]; + r[i] = lx; + i = i + 1; + } + return c; +} + +uint64_t wmpn_add_in_place(uint64_t * x, int32_t sx, uint64_t * y, int32_t sy) +{ + uint64_t lx, ly, c; + int32_t i; + uint64_t res, carry, res1; + struct __add64_with_carry_result struct_res; + lx = UINT64_C(0); + ly = UINT64_C(0); + c = UINT64_C(0); + i = 0; + while (i < sy) { + lx = x[i]; + ly = y[i]; + struct_res = add64_with_carry(lx, ly, c); + res = struct_res.__field_0; + carry = struct_res.__field_1; + x[i] = res; + c = carry; + i = i + 1; + } + if (!(c == UINT64_C(0))) { + while (i < sx) { + lx = x[i]; + res1 = lx + UINT64_C(1); + x[i] = res1; + i = i + 1; + if (!(res1 == UINT64_C(0))) { + c = UINT64_C(0); + break; + } + } + } + return c; +} + +void wmpn_incr(uint64_t * x, uint64_t y) +{ + uint64_t c, lx; + uint64_t * xp; + uint64_t res, res1; + c = UINT64_C(0); + lx = *x; + xp = x + 1; + res = lx + y; + *x = res; + if (res < lx) { + c = UINT64_C(1); + while (!(c == UINT64_C(0))) { + lx = *xp; + res1 = lx + UINT64_C(1); + *xp = res1; + xp = xp + 1; + if (!(res1 == UINT64_C(0))) { + c = UINT64_C(0); + break; + } + } + } else { + return; + } +} + +void wmpn_incr_1(uint64_t * x) +{ + uint64_t r, lx; + uint64_t * xp; + uint64_t res; + r = UINT64_C(0); + lx = UINT64_C(0); + xp = x + 0; + while (r == UINT64_C(0)) { + lx = *xp; + res = lx + UINT64_C(1); + r = res; + *xp = res; + xp = xp + 1; + } +} + +uint64_t wmpn_add_1_in_place(uint64_t * x, int32_t sz, uint64_t y) +{ + uint64_t c, lx; + int32_t i; + uint64_t res, res1; + c = UINT64_C(0); + lx = *x; + i = 1; + res = lx + y; + *x = res; + if (res < lx) { + c = UINT64_C(1); + while (i < sz) { + lx = x[i]; + res1 = lx + UINT64_C(1); + x[i] = res1; + i = i + 1; + if (!(res1 == UINT64_C(0))) { + c = UINT64_C(0); + break; + } + } + } + return c; +} + diff --git a/generated-src/lib/add.h b/generated-src/lib/add.h new file mode 100644 index 0000000..1fde9b5 --- /dev/null +++ b/generated-src/lib/add.h @@ -0,0 +1,20 @@ +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <assert.h> +#include <alloca.h> +uint64_t wmpn_add_1(uint64_t * r, uint64_t * x, int32_t sz, uint64_t y); + +uint64_t wmpn_add_n(uint64_t * r, uint64_t * x, uint64_t * y, int32_t sz); + +uint64_t wmpn_add(uint64_t * r, uint64_t * x, int32_t sx, uint64_t * y, + int32_t sy); + +uint64_t wmpn_add_in_place(uint64_t * x, int32_t sx, uint64_t * y, int32_t sy); + +void wmpn_incr(uint64_t * x, uint64_t y); + +void wmpn_incr_1(uint64_t * x); + +uint64_t wmpn_add_1_in_place(uint64_t * x, int32_t sz, uint64_t y); + diff --git a/generated-src/lib/array.h b/generated-src/lib/array.h new file mode 100644 index 0000000..4d5fc34 --- /dev/null +++ b/generated-src/lib/array.h @@ -0,0 +1,10 @@ +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <assert.h> +#include <alloca.h> + + + + + diff --git a/generated-src/lib/c.h b/generated-src/lib/c.h new file mode 100644 index 0000000..6b55272 --- /dev/null +++ b/generated-src/lib/c.h @@ -0,0 +1,6 @@ +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <assert.h> +#include <alloca.h> +#define IGNORE2(x,y) do { (void)(x); (void)(y); } while (0) diff --git a/generated-src/lib/compare.c b/generated-src/lib/compare.c new file mode 100644 index 0000000..cd0064d --- /dev/null +++ b/generated-src/lib/compare.c @@ -0,0 +1,39 @@ +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <assert.h> +#include <alloca.h> +#include "int.h" + +#include "int32.h" + +#include "uint64gmp.h" + +#include "power.h" + +#include "c.h" + +#include "map.h" + +#include "types.h" + +int32_t wmpn_cmp(uint64_t * x, uint64_t * y, int32_t sz) +{ + int32_t i; + uint64_t lx, ly; + i = sz; + while (i >= 1) { + i = i - 1; + lx = x[i]; + ly = y[i]; + if (!(lx == ly)) { + if (lx > ly) { + return 1; + } else { + return -1; + } + } + } + return 0; +} + diff --git a/generated-src/lib/compare.h b/generated-src/lib/compare.h new file mode 100644 index 0000000..bb451d6 --- /dev/null +++ b/generated-src/lib/compare.h @@ -0,0 +1,7 @@ +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <assert.h> +#include <alloca.h> +int32_t wmpn_cmp(uint64_t * x, uint64_t * y, int32_t sz); + diff --git a/generated-src/lib/computerdivision.h b/generated-src/lib/computerdivision.h new file mode 100644 index 0000000..d8a7321 --- /dev/null +++ b/generated-src/lib/computerdivision.h @@ -0,0 +1,5 @@ +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <assert.h> +#include <alloca.h> diff --git a/generated-src/lib/div.c b/generated-src/lib/div.c new file mode 100644 index 0000000..0b8069c --- /dev/null +++ b/generated-src/lib/div.c @@ -0,0 +1,657 @@ +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <assert.h> +#include <alloca.h> +#include "int.h" + +#include "int32.h" + +#include "uint64gmp.h" + +#include "power.h" + +#include "c.h" + +#include "array.h" + +#include "map.h" + +#include "types.h" + +#include "compare.h" + +#include "util.h" + +#include "add.h" + +#include "sub.h" + +#include "logical.h" + +#include "euclideandivision.h" + +#include "minmax.h" + +uint64_t invert_limb(uint64_t d) +{ return div64_2by1((0xffffffffffffffffUL), (0xffffffffffffffffUL) - d, d); +} + +struct __div2by1_inv_result +{ uint64_t __field_0; + uint64_t __field_1; +}; + +struct __div2by1_inv_result div2by1_inv(uint64_t uh, uint64_t ul, uint64_t d, + uint64_t v) +{ + uint64_t l, h, sl, c; + struct __mul64_double_result struct_res; + struct __add64_with_carry_result struct_res1, struct_res2; + uint64_t sh, p; + uint64_t qh, ql, r; + struct __div2by1_inv_result result; + struct_res = mul64_double(v, uh); + l = struct_res.__field_0; + h = struct_res.__field_1; + struct_res1 = add64_with_carry(l, ul, UINT64_C(0)); + sl = struct_res1.__field_0; + c = struct_res1.__field_1; + struct_res2 = add64_with_carry(uh, h, c); + sh = struct_res2.__field_0; + qh = sh; + ql = sl; + qh = qh + UINT64_C(1); + p = qh * d; + r = ul - p; + if (r > ql) { + qh = qh - UINT64_C(1); + r = r + d; + } + if (r >= d) { + qh = qh + UINT64_C(1); + r = r - d; + } + result.__field_0 = qh; + result.__field_1 = r; + return result; +} + +uint64_t wmpn_divrem_1(uint64_t * q, uint64_t * x, int32_t sz, uint64_t y) +{ + int32_t msb; + uint64_t lx, r; + int32_t i; + int32_t clz; + uint64_t ry, v, l, h, qu, rem; + struct __lsld_ext_result struct_res; + struct __div2by1_inv_result struct_res1, struct_res2; + uint64_t v1, qu1, rem1; + msb = sz - 1; + lx = UINT64_C(0); + i = msb; + r = UINT64_C(0); + clz = __builtin_clzll(y); + if (clz > 0) { + ry = y << (uint64_t)clz; + v = invert_limb(ry); + while (i >= 0) { + lx = x[i]; + struct_res = lsld_ext(lx, (uint64_t)clz); + l = struct_res.__field_0; + h = struct_res.__field_1; + struct_res1 = div2by1_inv(r + h, l, ry, v); + qu = struct_res1.__field_0; + rem = struct_res1.__field_1; + r = rem; + q[i] = qu; + i = i - 1; + } + return r >> (uint64_t)clz; + } else { + v1 = invert_limb(y); + while (i >= 0) { + lx = x[i]; + struct_res2 = div2by1_inv(r, lx, y, v1); + qu1 = struct_res2.__field_0; + rem1 = struct_res2.__field_1; + r = rem1; + q[i] = qu1; + i = i - 1; + } + return r; + } +} + +struct __div3by2_inv_result +{ uint64_t __field_0; + uint64_t __field_1; + uint64_t __field_2; +}; + +struct __div3by2_inv_result div3by2_inv(uint64_t uh, uint64_t um, + uint64_t ul, uint64_t dh, + uint64_t dl, uint64_t v) +{ + uint64_t q1, r0, r1; + uint64_t l, h, sl, c; + struct __mul64_double_result struct_res; + struct __add64_with_carry_result struct_res1, struct_res2; + uint64_t sh, p, tl, th, il, b; + struct __mul64_double_result struct_res3; + struct __sub64_with_borrow_result struct_res4, struct_res5, struct_res6, + struct_res7; + uint64_t ih, bl, b2, bh, rl, c1, rh, bl1, b1; + struct __add64_with_carry_result struct_res8, struct_res9; + struct __sub64_with_borrow_result struct_res10, struct_res11; + uint64_t bh1; + struct __div3by2_inv_result result; + q1 = UINT64_C(0); + r0 = UINT64_C(0); + r1 = UINT64_C(0); + struct_res = mul64_double(v, uh); + l = struct_res.__field_0; + h = struct_res.__field_1; + struct_res1 = add64_with_carry(um, l, UINT64_C(0)); + sl = struct_res1.__field_0; + c = struct_res1.__field_1; + struct_res2 = add64_with_carry(uh, h, c); + sh = struct_res2.__field_0; + q1 = sh; + q1 = q1 + UINT64_C(1); + p = dh * sh; + r1 = um - p; + struct_res3 = mul64_double(sh, dl); + tl = struct_res3.__field_0; + th = struct_res3.__field_1; + struct_res4 = sub64_with_borrow(ul, tl, UINT64_C(0)); + il = struct_res4.__field_0; + b = struct_res4.__field_1; + struct_res5 = sub64_with_borrow(r1, th, b); + ih = struct_res5.__field_0; + struct_res6 = sub64_with_borrow(il, dl, UINT64_C(0)); + bl = struct_res6.__field_0; + b2 = struct_res6.__field_1; + struct_res7 = sub64_with_borrow(ih, dh, b2); + bh = struct_res7.__field_0; + r1 = bh; + r0 = bl; + if (r1 >= sl) { + q1 = q1 - UINT64_C(1); + struct_res8 = add64_with_carry(r0, dl, UINT64_C(0)); + rl = struct_res8.__field_0; + c1 = struct_res8.__field_1; + struct_res9 = add64_with_carry(r1, dh, c1); + rh = struct_res9.__field_0; + r1 = rh; + r0 = rl; + } + if (__builtin_expect(r1 > dh || (r1 == dh && r0 >= dl),0)) { + struct_res10 = sub64_with_borrow(r0, dl, UINT64_C(0)); + bl1 = struct_res10.__field_0; + b1 = struct_res10.__field_1; + struct_res11 = sub64_with_borrow(r1, dh, b1); + bh1 = struct_res11.__field_0; + q1 = q1 + UINT64_C(1); + r1 = bh1; + r0 = bl1; + } + result.__field_0 = q1; + result.__field_1 = r0; + result.__field_2 = r1; + return result; +} + +uint64_t reciprocal_word_3by2(uint64_t dh, uint64_t dl) +{ + uint64_t v, p; + uint64_t tl, th; + struct __mul64_double_result struct_res; + v = invert_limb(dh); + p = dh * v; + p = p + dl; + if (p < dl) { + if (p >= dh) { + v = v - UINT64_C(1); + p = p - dh; + } + v = v - UINT64_C(1); + p = p - dh; + } + struct_res = mul64_double(v, dl); + tl = struct_res.__field_0; + th = struct_res.__field_1; + p = p + th; + if (p < th) { + if (p > dh || (p == dh && tl >= dl)) { + v = v - UINT64_C(1); + } + v = v - UINT64_C(1); + } + return v; +} + +struct __sub3_result +{ uint64_t __field_0; + uint64_t __field_1; +}; + +struct __sub3_result sub3(uint64_t x, uint64_t y, uint64_t z) +{ + uint64_t u1, b1, u2, b2; + struct __sub64_with_borrow_result struct_res, struct_res1; + struct __sub3_result result; + struct_res = sub64_with_borrow(x, y, UINT64_C(0)); + u1 = struct_res.__field_0; + b1 = struct_res.__field_1; + struct_res1 = sub64_with_borrow(u1, z, UINT64_C(0)); + u2 = struct_res1.__field_0; + b2 = struct_res1.__field_1; + result.__field_0 = u2; + result.__field_1 = b1 + b2; + return result; +} + +uint64_t wmpn_submul_1(uint64_t * r, uint64_t * x, int32_t sz, uint64_t y) +{ + uint64_t lx, lr, b; + int32_t i; + uint64_t rl, rh, res, borrow; + struct __mul64_double_result struct_res; + struct __sub3_result struct_res1; + lx = UINT64_C(0); + lr = UINT64_C(0); + b = UINT64_C(0); + i = 0; + while (i < sz) { + lx = x[i]; + lr = r[i]; + struct_res = mul64_double(lx, y); + rl = struct_res.__field_0; + rh = struct_res.__field_1; + struct_res1 = sub3(lr, rl, b); + res = struct_res1.__field_0; + borrow = struct_res1.__field_1; + r[i] = res; + b = rh + borrow; + i = i + 1; + } + return b; +} + +uint64_t div_sb_qr(uint64_t * q, uint64_t * x, int32_t sx, uint64_t * y, + int32_t sy) +{ + uint64_t * xp; + uint64_t * qp; + uint64_t dh, dl, v; + int32_t i; + int32_t mdn; + uint64_t ql, x1, x0; + uint64_t * xd; + int32_t r; + uint64_t qh, nx0, xp0, xp1, qu, rl, rh; + uint64_t * xd1; + struct __div3by2_inv_result struct_res; + uint64_t cy, cy1, cy2, c; + xp = x + (sx - 2); + qp = q + (sx - sy); + dh = y[sy - 1]; + dl = y[sy - 2]; + v = reciprocal_word_3by2(dh, dl); + i = sx - sy; + mdn = 2 - sy; + ql = UINT64_C(0); + xd = xp + mdn; + x1 = UINT64_C(0); + x0 = UINT64_C(0); + r = wmpn_cmp(xd, y, sy); + if (r >= 0) { + qh = UINT64_C(1); + } else { + qh = UINT64_C(0); + } + if (!(qh == UINT64_C(0))) { + wmpn_sub_in_place(xd, sy, y, sy); + } + x1 = xp[1]; + while (i > 0) { + i = i - 1; + xp = xp + -1; + xd1 = xp + mdn; + nx0 = xp[1]; + if (__builtin_expect(x1 == dh && nx0 == dl,0)) { + ql = (0xffffffffffffffffUL); + wmpn_submul_1(xd1, y, sy, ql); + x1 = xp[1]; + qp = qp + -1; + *qp = ql; + } else { + xp0 = *xp; + xp1 = xp[1]; + struct_res = div3by2_inv(x1, xp1, xp0, dh, dl, v); + qu = struct_res.__field_0; + rl = struct_res.__field_1; + rh = struct_res.__field_2; + ql = qu; + x1 = rh; + x0 = rl; + cy = wmpn_submul_1(xd1, y, sy - 2, ql); + if (x0 < cy) { + cy1 = UINT64_C(1); + } else { + cy1 = UINT64_C(0); + } + x0 = x0 - cy; + if (x1 < cy1) { + cy2 = UINT64_C(1); + } else { + cy2 = UINT64_C(0); + } + x1 = x1 - cy1; + *xp = x0; + if (__builtin_expect(!(cy2 == UINT64_C(0)),0)) { + c = wmpn_add_in_place(xd1, sy - 1, y, sy - 1); + x1 = x1 + (dh + c); + ql = ql - UINT64_C(1); + qp = qp + -1; + *qp = ql; + } else { + qp = qp + -1; + *qp = ql; + } + } + } + xp[1] = x1; + return qh; +} + +uint64_t wmpn_divrem_2(uint64_t * q, uint64_t * x, uint64_t * y, int32_t sx) +{ + uint64_t * xp; + uint64_t dh, dl; + uint64_t rh, rl, qh, lx; + int32_t i; + uint64_t dinv, r0, b, r1, qu, r01, r11; + struct __sub64_with_borrow_result struct_res, struct_res1; + struct __div3by2_inv_result struct_res2; + xp = x + (sx - 2); + dh = y[1]; + dl = *y; + rh = xp[1]; + rl = *xp; + qh = UINT64_C(0); + lx = UINT64_C(0); + i = sx - 2; + dinv = reciprocal_word_3by2(dh, dl); + if (rh >= dh && (rh > dh || rl >= dl)) { + struct_res = sub64_with_borrow(rl, dl, UINT64_C(0)); + r0 = struct_res.__field_0; + b = struct_res.__field_1; + struct_res1 = sub64_with_borrow(rh, dh, b); + r1 = struct_res1.__field_0; + rh = r1; + rl = r0; + qh = UINT64_C(1); + } + while (i > 0) { + xp = xp + -1; + lx = *xp; + struct_res2 = div3by2_inv(rh, rl, lx, dh, dl, dinv); + qu = struct_res2.__field_0; + r01 = struct_res2.__field_1; + r11 = struct_res2.__field_2; + rh = r11; + rl = r01; + i = i - 1; + q[i] = qu; + } + x[1] = rh; + *x = rl; + return qh; +} + +void div_qr(uint64_t * q, uint64_t * r, uint64_t * x, uint64_t * y, + uint64_t * nx, uint64_t * ny, int32_t sx, int32_t sy) +{ + uint64_t lr; + int32_t clz; + int32_t i; + uint64_t * xp; + uint64_t * rp; + int32_t i1; + uint64_t * xp1; + uint64_t * rp1; + uint64_t h; + int32_t adjust, clz1; + int32_t i2; + uint64_t * xp2; + uint64_t * rp2; + int32_t i3; + uint64_t * xp3; + uint64_t * rp3; + uint64_t h1; + if (sy == 1) { + lr = wmpn_divrem_1(q, x, sx, *y); + *r = lr; + return; + } else { + if (sy == 2) { + clz = __builtin_clzll(y[sy - 1]); + if (clz == 0) { + i = 0; + xp = x + 0; + rp = nx + 0; + while (i < sx) { + *rp = *xp; + rp = rp + 1; + xp = xp + 1; + i = i + 1; + } + nx[sx] = UINT64_C(0); + wmpn_divrem_2(q, nx, y, sx + 1); + i1 = 0; + xp1 = nx + 0; + rp1 = r + 0; + while (i1 < sy) { + *rp1 = *xp1; + rp1 = rp1 + 1; + xp1 = xp1 + 1; + i1 = i1 + 1; + } + } else { + wmpn_lshift(ny, y, sy, (uint64_t)clz); + h = wmpn_lshift(nx, x, sx, (uint64_t)clz); + nx[sx] = h; + wmpn_divrem_2(q, nx, ny, sx + 1); + wmpn_rshift(r, nx, sy, (uint64_t)clz); + return; + } + } else { + if (x[sx - 1] >= y[sy - 1]) { + adjust = 1; + } else { + adjust = 0; + } + clz1 = __builtin_clzll(y[sy - 1]); + if (clz1 == 0) { + i2 = 0; + xp2 = x + 0; + rp2 = nx + 0; + while (i2 < sx) { + *rp2 = *xp2; + rp2 = rp2 + 1; + xp2 = xp2 + 1; + i2 = i2 + 1; + } + nx[sx] = UINT64_C(0); + div_sb_qr(q, nx, sx + adjust, y, sy); + i3 = 0; + xp3 = nx + 0; + rp3 = r + 0; + while (i3 < sy) { + *rp3 = *xp3; + rp3 = rp3 + 1; + xp3 = xp3 + 1; + i3 = i3 + 1; + } + if (adjust == 0) { + q[sx - sy] = UINT64_C(0); + return; + } else { + return; + } + } else { + wmpn_lshift(ny, y, sy, (uint64_t)clz1); + h1 = wmpn_lshift(nx, x, sx, (uint64_t)clz1); + nx[sx] = h1; + div_sb_qr(q, nx, sx + adjust, ny, sy); + wmpn_rshift(r, nx, sy, (uint64_t)clz1); + if (adjust == 0) { + q[sx - sy] = UINT64_C(0); + return; + } else { + return; + } + } + } + } +} + +void wmpn_tdiv_qr(uint64_t * q, uint64_t * r, uint64_t * x, int32_t sx, + uint64_t * y, int32_t sy) +{ + uint64_t * nx; + uint64_t * ny; + nx = malloc(((uint32_t)sx + 1U) * sizeof(uint64_t)); + assert (nx); + ny = malloc((uint32_t)sy * sizeof(uint64_t)); + assert (ny); + div_qr(q, r, x, y, nx, ny, sx, sy); + free(nx); + free(ny); + return; +} + +void div_qr_in_place(uint64_t * q, uint64_t * x, uint64_t * y, uint64_t * nx, + uint64_t * ny, int32_t sx, int32_t sy) +{ + uint64_t lr; + int32_t clz; + int32_t i; + uint64_t * xp; + uint64_t * rp; + int32_t i1; + uint64_t * xp1; + uint64_t * rp1; + uint64_t h; + int32_t adjust, clz1; + int32_t i2; + uint64_t * xp2; + uint64_t * rp2; + int32_t i3; + uint64_t * xp3; + uint64_t * rp3; + uint64_t h1; + if (sy == 1) { + lr = wmpn_divrem_1(q, x, sx, *y); + *x = lr; + return; + } else { + if (sy == 2) { + clz = __builtin_clzll(y[sy - 1]); + if (clz == 0) { + i = 0; + xp = x + 0; + rp = nx + 0; + while (i < sx) { + *rp = *xp; + rp = rp + 1; + xp = xp + 1; + i = i + 1; + } + nx[sx] = UINT64_C(0); + wmpn_divrem_2(q, nx, y, sx + 1); + i1 = 0; + xp1 = nx + 0; + rp1 = x + 0; + while (i1 < sy) { + *rp1 = *xp1; + rp1 = rp1 + 1; + xp1 = xp1 + 1; + i1 = i1 + 1; + } + } else { + wmpn_lshift(ny, y, sy, (uint64_t)clz); + h = wmpn_lshift(nx, x, sx, (uint64_t)clz); + nx[sx] = h; + wmpn_divrem_2(q, nx, ny, sx + 1); + wmpn_rshift(x, nx, sy, (uint64_t)clz); + return; + } + } else { + if (x[sx - 1] >= y[sy - 1]) { + adjust = 1; + } else { + adjust = 0; + } + clz1 = __builtin_clzll(y[sy - 1]); + if (clz1 == 0) { + i2 = 0; + xp2 = x + 0; + rp2 = nx + 0; + while (i2 < sx) { + *rp2 = *xp2; + rp2 = rp2 + 1; + xp2 = xp2 + 1; + i2 = i2 + 1; + } + nx[sx] = UINT64_C(0); + div_sb_qr(q, nx, sx + adjust, y, sy); + i3 = 0; + xp3 = nx + 0; + rp3 = x + 0; + while (i3 < sy) { + *rp3 = *xp3; + rp3 = rp3 + 1; + xp3 = xp3 + 1; + i3 = i3 + 1; + } + if (adjust == 0) { + q[sx - sy] = UINT64_C(0); + return; + } else { + return; + } + } else { + wmpn_lshift(ny, y, sy, (uint64_t)clz1); + h1 = wmpn_lshift(nx, x, sx, (uint64_t)clz1); + nx[sx] = h1; + div_sb_qr(q, nx, sx + adjust, ny, sy); + wmpn_rshift(x, nx, sy, (uint64_t)clz1); + if (adjust == 0) { + q[sx - sy] = UINT64_C(0); + return; + } else { + return; + } + } + } + } +} + +void wmpn_tdiv_qr_in_place(uint64_t * q, uint64_t * x, int32_t sx, + uint64_t * y, int32_t sy) +{ + uint64_t * nx; + uint64_t * ny; + nx = malloc(((uint32_t)sx + 1U) * sizeof(uint64_t)); + assert (nx); + ny = malloc((uint32_t)sy * sizeof(uint64_t)); + assert (ny); + div_qr_in_place(q, x, y, nx, ny, sx, sy); + free(nx); + free(ny); + return; +} + diff --git a/generated-src/lib/div.h b/generated-src/lib/div.h new file mode 100644 index 0000000..714b33c --- /dev/null +++ b/generated-src/lib/div.h @@ -0,0 +1,55 @@ +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <assert.h> +#include <alloca.h> +uint64_t invert_limb(uint64_t d); + +struct __div2by1_inv_result +{ uint64_t __field_0; + uint64_t __field_1; +}; + +struct __div2by1_inv_result div2by1_inv(uint64_t uh, uint64_t ul, uint64_t d, + uint64_t v); + +uint64_t wmpn_divrem_1(uint64_t * q, uint64_t * x, int32_t sz, uint64_t y); + +struct __div3by2_inv_result +{ uint64_t __field_0; + uint64_t __field_1; + uint64_t __field_2; +}; + +struct __div3by2_inv_result div3by2_inv(uint64_t uh, uint64_t um, + uint64_t ul, uint64_t dh, + uint64_t dl, uint64_t v); + +uint64_t reciprocal_word_3by2(uint64_t dh, uint64_t dl); + +struct __sub3_result +{ uint64_t __field_0; + uint64_t __field_1; +}; + +struct __sub3_result sub3(uint64_t x, uint64_t y, uint64_t z); + +uint64_t wmpn_submul_1(uint64_t * r, uint64_t * x, int32_t sz, uint64_t y); + +uint64_t div_sb_qr(uint64_t * q, uint64_t * x, int32_t sx, uint64_t * y, + int32_t sy); + +uint64_t wmpn_divrem_2(uint64_t * q, uint64_t * x, uint64_t * y, int32_t sx); + +void div_qr(uint64_t * q, uint64_t * r, uint64_t * x, uint64_t * y, + uint64_t * nx, uint64_t * ny, int32_t sx, int32_t sy); + +void wmpn_tdiv_qr(uint64_t * q, uint64_t * r, uint64_t * x, int32_t sx, + uint64_t * y, int32_t sy); + +void div_qr_in_place(uint64_t * q, uint64_t * x, uint64_t * y, uint64_t * nx, + uint64_t * ny, int32_t sx, int32_t sy); + +void wmpn_tdiv_qr_in_place(uint64_t * q, uint64_t * x, int32_t sx, + uint64_t * y, int32_t sy); + diff --git a/generated-src/lib/euclideandivision.h b/generated-src/lib/euclideandivision.h new file mode 100644 index 0000000..d8a7321 --- /dev/null +++ b/generated-src/lib/euclideandivision.h @@ -0,0 +1,5 @@ +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <assert.h> +#include <alloca.h> diff --git a/generated-src/lib/fxp.h b/generated-src/lib/fxp.h new file mode 100644 index 0000000..5d8a348 --- /dev/null +++ b/generated-src/lib/fxp.h @@ -0,0 +1,9 @@ +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <assert.h> +#include <alloca.h> +uint64_t fxp_init(uint64_t x); + +uint64_t fxp_id(uint64_t x); + diff --git a/generated-src/lib/int.h b/generated-src/lib/int.h new file mode 100644 index 0000000..e592d99 --- /dev/null +++ b/generated-src/lib/int.h @@ -0,0 +1,11 @@ +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <assert.h> +#include <alloca.h> + + + + + + diff --git a/generated-src/lib/int32.h b/generated-src/lib/int32.h new file mode 100644 index 0000000..6935395 --- /dev/null +++ b/generated-src/lib/int32.h @@ -0,0 +1,7 @@ +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <assert.h> +#include <alloca.h> + + diff --git a/generated-src/lib/logical.c b/generated-src/lib/logical.c new file mode 100644 index 0000000..6c79034 --- /dev/null +++ b/generated-src/lib/logical.c @@ -0,0 +1,194 @@ +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <assert.h> +#include <alloca.h> +#include "int.h" + +#include "int32.h" + +#include "uint64gmp.h" + +#include "power.h" + +#include "c.h" + +#include "array.h" + +#include "map.h" + +#include "types.h" + +#include "euclideandivision.h" + +uint64_t lsl_mod_ext(uint64_t x, uint64_t cnt) +{ return x << cnt; +} + +struct __lsld_ext_result +{ uint64_t __field_0; + uint64_t __field_1; +}; + +struct __lsld_ext_result lsld_ext(uint64_t x, uint64_t cnt) +{ + struct __lsld_ext_result result; + uint64_t r, d; + struct __lsld64_result struct_res; + struct __lsld_ext_result result1; + if (cnt == UINT64_C(0)) { + result.__field_0 = x; + result.__field_1 = UINT64_C(0); + return result; + } else { + struct_res = lsld64(x, cnt); + r = struct_res.__field_0; + d = struct_res.__field_1; + result1.__field_0 = r; + result1.__field_1 = d; + return result1; + } +} + +int32_t clz_ext(uint64_t x) +{ return __builtin_clzll(x); +} + +uint64_t wmpn_lshift(uint64_t * r, uint64_t * x, int32_t sz, uint64_t cnt) +{ + int32_t msb; + uint64_t * xp; + uint64_t * rp; + uint64_t high, low; + int32_t i; + uint64_t l, retval, l1, h; + struct __lsld_ext_result struct_res, struct_res1; + msb = sz - 1; + xp = x + msb; + rp = r + msb; + high = UINT64_C(0); + low = *xp; + i = msb; + struct_res = lsld_ext(low, cnt); + l = struct_res.__field_0; + retval = struct_res.__field_1; + high = l; + while (i > 0) { + xp = xp + -1; + low = *xp; + struct_res1 = lsld_ext(low, cnt); + l1 = struct_res1.__field_0; + h = struct_res1.__field_1; + *rp = high + h; + rp = rp + -1; + high = l1; + i = i - 1; + } + *r = high; + return retval; +} + +uint64_t wmpn_rshift(uint64_t * r, uint64_t * x, int32_t sz, uint64_t cnt) +{ + uint64_t tnc; + int32_t msb; + uint64_t * xp; + uint64_t * rp; + uint64_t high; + uint64_t retval, h; + struct __lsld_ext_result struct_res; + uint64_t low; + int32_t i; + uint64_t l, h1; + struct __lsld_ext_result struct_res1; + tnc = UINT64_C(64) - cnt; + msb = sz - 1; + xp = x + 0; + rp = r + 0; + high = *xp; + struct_res = lsld_ext(high, tnc); + retval = struct_res.__field_0; + h = struct_res.__field_1; + low = h; + i = 0; + while (i < msb) { + xp = xp + 1; + high = *xp; + struct_res1 = lsld_ext(high, tnc); + l = struct_res1.__field_0; + h1 = struct_res1.__field_1; + *rp = low + l; + low = h1; + i = i + 1; + rp = rp + 1; + } + *rp = low; + return retval; +} + +uint64_t wmpn_lshift_in_place(uint64_t * x, int32_t sz, uint64_t cnt) +{ + int32_t msb; + uint64_t * xp; + uint64_t high, low; + int32_t i; + uint64_t l, retval, l1, h; + struct __lsld_ext_result struct_res, struct_res1; + msb = sz - 1; + xp = x + msb; + high = UINT64_C(0); + low = *xp; + i = msb; + struct_res = lsld_ext(low, cnt); + l = struct_res.__field_0; + retval = struct_res.__field_1; + high = l; + while (i > 0) { + xp = xp + -1; + low = *xp; + struct_res1 = lsld_ext(low, cnt); + l1 = struct_res1.__field_0; + h = struct_res1.__field_1; + xp[1] = high + h; + high = l1; + i = i - 1; + } + *x = high; + return retval; +} + +uint64_t wmpn_rshift_in_place(uint64_t * x, int32_t sz, uint64_t cnt) +{ + uint64_t tnc; + int32_t msb; + uint64_t * xp; + uint64_t high; + uint64_t retval, h; + struct __lsld_ext_result struct_res; + uint64_t low; + int32_t i; + uint64_t l, h1; + struct __lsld_ext_result struct_res1; + tnc = UINT64_C(64) - cnt; + msb = sz - 1; + xp = x + 0; + high = *xp; + struct_res = lsld_ext(high, tnc); + retval = struct_res.__field_0; + h = struct_res.__field_1; + low = h; + i = 0; + while (i < msb) { + xp = xp + 1; + high = *xp; + struct_res1 = lsld_ext(high, tnc); + l = struct_res1.__field_0; + h1 = struct_res1.__field_1; + xp[-1] = low + l; + low = h1; + i = i + 1; + } + x[msb] = low; + return retval; +} + diff --git a/generated-src/lib/logical.h b/generated-src/lib/logical.h new file mode 100644 index 0000000..cd12baa --- /dev/null +++ b/generated-src/lib/logical.h @@ -0,0 +1,24 @@ +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <assert.h> +#include <alloca.h> +uint64_t lsl_mod_ext(uint64_t x, uint64_t cnt); + +struct __lsld_ext_result +{ uint64_t __field_0; + uint64_t __field_1; +}; + +struct __lsld_ext_result lsld_ext(uint64_t x, uint64_t cnt); + +int32_t clz_ext(uint64_t x); + +uint64_t wmpn_lshift(uint64_t * r, uint64_t * x, int32_t sz, uint64_t cnt); + +uint64_t wmpn_rshift(uint64_t * r, uint64_t * x, int32_t sz, uint64_t cnt); + +uint64_t wmpn_lshift_in_place(uint64_t * x, int32_t sz, uint64_t cnt); + +uint64_t wmpn_rshift_in_place(uint64_t * x, int32_t sz, uint64_t cnt); + diff --git a/generated-src/lib/map.h b/generated-src/lib/map.h new file mode 100644 index 0000000..6935395 --- /dev/null +++ b/generated-src/lib/map.h @@ -0,0 +1,7 @@ +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <assert.h> +#include <alloca.h> + + diff --git a/generated-src/lib/minmax.h b/generated-src/lib/minmax.h new file mode 100644 index 0000000..6935395 --- /dev/null +++ b/generated-src/lib/minmax.h @@ -0,0 +1,7 @@ +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <assert.h> +#include <alloca.h> + + diff --git a/generated-src/lib/mul.c b/generated-src/lib/mul.c new file mode 100644 index 0000000..1a4bc21 --- /dev/null +++ b/generated-src/lib/mul.c @@ -0,0 +1,136 @@ +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <assert.h> +#include <alloca.h> +#include "int.h" + +#include "int32.h" + +#include "uint64gmp.h" + +#include "power.h" + +#include "c.h" + +#include "array.h" + +#include "map.h" + +#include "types.h" + +#include "util.h" + +#include "add.h" + +uint64_t wmpn_mul_1(uint64_t * r, uint64_t * x, int32_t sz, uint64_t y) +{ + uint64_t cl, ul; + int32_t n; + uint64_t * up; + uint64_t * rp; + uint64_t l, h, lpl; + struct __mul64_double_result struct_res; + cl = UINT64_C(0); + ul = UINT64_C(0); + n = sz; + up = x + 0; + rp = r + 0; + while (!(n == 0)) { + ul = *up; + up = up + 1; + struct_res = mul64_double(ul, y); + l = struct_res.__field_0; + h = struct_res.__field_1; + lpl = l + cl; + cl = (lpl < cl ? UINT64_C(1) : UINT64_C(0)) + h; + *rp = lpl; + rp = rp + 1; + n = n - 1; + } + return cl; +} + +uint64_t wmpn_addmul_1(uint64_t * r, uint64_t * x, int32_t sz, uint64_t y) +{ + uint64_t ul, rl, cl; + int32_t n; + uint64_t * rp; + uint64_t * up; + uint64_t l, h; + struct __mul64_double_result struct_res; + uint64_t lpl; + ul = UINT64_C(0); + rl = UINT64_C(0); + cl = UINT64_C(0); + n = sz; + rp = r + 0; + up = x + 0; + while (!(n == 0)) { + ul = *up; + up = up + 1; + struct_res = mul64_double(ul, y); + l = struct_res.__field_0; + h = struct_res.__field_1; + lpl = l + cl; + cl = (lpl < cl ? UINT64_C(1) : UINT64_C(0)) + h; + rl = *rp; + lpl = rl + lpl; + cl = (lpl < rl ? UINT64_C(1) : UINT64_C(0)) + cl; + *rp = lpl; + rp = rp + 1; + n = n - 1; + } + return cl; +} + +uint64_t wmpn_addmul_n(uint64_t * r, uint64_t * x, uint64_t * y, int32_t sz) +{ + uint64_t * rp; + uint64_t * vp; + uint64_t lr, c; + int32_t vn; + uint64_t cqt, res, carry; + struct __add64_with_carry_result struct_res; + rp = r + 0; + vp = y + 0; + lr = UINT64_C(0); + c = UINT64_C(0); + vn = sz; + while (!(vn == 0)) { + cqt = wmpn_addmul_1(rp, x, sz, *vp); + lr = rp[sz]; + struct_res = add64_with_carry(cqt, lr, c); + res = struct_res.__field_0; + carry = struct_res.__field_1; + rp[sz] = res; + c = carry; + rp = rp + 1; + vp = vp + 1; + vn = vn - 1; + } + return c; +} + +void wmpn_mul_basecase(uint64_t * r, uint64_t * x, int32_t sx, uint64_t * y, + int32_t sy) +{ + uint64_t c; + uint64_t * rp; + uint64_t * vp; + int32_t vn; + uint64_t res; + c = wmpn_mul_1(r, x, sx, *y); + r[sx] = c; + rp = r + 1; + vp = y + 1; + vn = sy - 1; + while (vn >= 1) { + res = wmpn_addmul_1(rp, x, sx, *vp); + rp[sx] = res; + rp = rp + 1; + vp = vp + 1; + vn = vn - 1; + } +} + diff --git a/generated-src/lib/mul.h b/generated-src/lib/mul.h new file mode 100644 index 0000000..8a2fb2d --- /dev/null +++ b/generated-src/lib/mul.h @@ -0,0 +1,14 @@ +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <assert.h> +#include <alloca.h> +uint64_t wmpn_mul_1(uint64_t * r, uint64_t * x, int32_t sz, uint64_t y); + +uint64_t wmpn_addmul_1(uint64_t * r, uint64_t * x, int32_t sz, uint64_t y); + +uint64_t wmpn_addmul_n(uint64_t * r, uint64_t * x, uint64_t * y, int32_t sz); + +void wmpn_mul_basecase(uint64_t * r, uint64_t * x, int32_t sx, uint64_t * y, + int32_t sy); + diff --git a/generated-src/lib/power.h b/generated-src/lib/power.h new file mode 100644 index 0000000..d8a7321 --- /dev/null +++ b/generated-src/lib/power.h @@ -0,0 +1,5 @@ +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <assert.h> +#include <alloca.h> diff --git a/generated-src/lib/realinfix.h b/generated-src/lib/realinfix.h new file mode 100644 index 0000000..f74f737 --- /dev/null +++ b/generated-src/lib/realinfix.h @@ -0,0 +1,14 @@ +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <assert.h> +#include <alloca.h> + + + + + + + + + diff --git a/generated-src/lib/sqrt.c b/generated-src/lib/sqrt.c new file mode 100644 index 0000000..a2bbd07 --- /dev/null +++ b/generated-src/lib/sqrt.c @@ -0,0 +1,306 @@ +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <assert.h> +#include <alloca.h> +#include "array.h" + +#include "map.h" + +#include "c.h" + +#include "int32.h" + +#include "uint64gmp.h" + +#include "euclideandivision.h" + +#include "int.h" + +#include "power.h" + +#include "types.h" + +#include "compare.h" + +#include "util.h" + +#include "add.h" + +#include "sub.h" + +#include "mul.h" + +#include "logical.h" + +#include "div.h" + +#include "sqrt1.h" + +#include "toom.h" + +uint64_t wmpn_sqrtrem2(uint64_t * sp, uint64_t * rp, uint64_t * np) +{ + uint64_t np0, o, prec, nph, rq, u, uh; + uint64_t sp0, rp0, q; + int64_t cc; + uint64_t npl, ul, q2; + np0 = *np; + o = sqrt1(rp, np[1]); + sp0 = o; + rp0 = *rp; + prec = (64) / UINT64_C(2); + nph = np0 >> (prec + UINT64_C(1)); + rp0 = (rp0 << (prec - UINT64_C(1))) + nph; + q = rp0 / sp0; + rq = q >> prec; + q = q - rq; + u = rp0 - q * sp0; + sp0 = (sp0 << prec) + q; + uh = u >> (prec - UINT64_C(1)); + cc = (int64_t)uh; + npl = np0 % (UINT64_C(1) << (prec + UINT64_C(1))); + ul = u << (prec + UINT64_C(1)); + rp0 = ul + npl; + q2 = q * q; + if (rp0 < q2) { + cc = cc - INT64_C(1); + } + rp0 = rp0 - q2; + if (cc < INT64_C(0)) { + rp0 = rp0 + sp0; + if (rp0 < sp0) { + cc = cc + INT64_C(1); + } + sp0 = sp0 - UINT64_C(1); + rp0 = rp0 + sp0; + if (rp0 < sp0) { + cc = cc + INT64_C(1); + } + } + *rp = rp0; + *sp = sp0; + return (uint64_t)cc; +} + +uint64_t wmpn_dc_sqrtrem(uint64_t * sp, uint64_t * np, int32_t n, + uint64_t * scratch); + +uint64_t wmpn_dc_sqrtrem(uint64_t * sp, uint64_t * np, int32_t n, + uint64_t * scratch) +{ + uint64_t r; + int32_t l, h; + uint64_t * npqt; + uint64_t * spl; + uint64_t q; + uint64_t o, sl, sh; + uint64_t * npl; + int64_t c; + uint64_t st, ql, qh, cqt; + uint64_t * npn; + int32_t ll; + uint64_t bo, b, bo1, o1, cqt1; + uint64_t * nll; + int64_t o2, o3; + uint64_t o4, bo2; + if (n == 1) { + r = wmpn_sqrtrem2(sp, scratch, np); + *np = *scratch; + return r; + } else { + l = n / 2; + h = n - l; + npqt = np + (l + l); + spl = sp + l; + o = wmpn_dc_sqrtrem(spl, npqt, h, scratch); + q = o; + if (!(q == UINT64_C(0))) { + wmpn_sub_in_place(npqt, h, spl, h); + } + IGNORE2(np, npqt); + npl = np + l; + wmpn_tdiv_qr_in_place(scratch, npl, n, spl, h); + sl = scratch[l]; + q = q + sl; + sh = *scratch; + c = (int64_t)(sh % UINT64_C(2)); + wmpn_rshift(sp, scratch, l, UINT64_C(1)); + st = sp[l - 1]; + ql = q << ((64) - UINT64_C(1)); + qh = q >> UINT64_C(1); + sp[l - 1] = st + ql; + q = qh; + if (!(c == INT64_C(0))) { + cqt = wmpn_add_in_place(npl, h, spl, h); + c = (int64_t)cqt; + } + IGNORE2(np, npl); + npn = np + n; + wmpn_mul(npn, sp, l, sp, l); + ll = 2 * l; + bo = wmpn_sub_in_place(np, ll, npn, ll); + b = q + bo; + if (l == h) { + c = c - (int64_t)b; + } else { + nll = np + ll; + bo1 = wmpn_sub_1_in_place(nll, 1, b); + c = c - (int64_t)bo1; + } + if (c < INT64_C(0)) { + o1 = wmpn_add_1_in_place(spl, h, q); + q = o1; + IGNORE2(sp, spl); + cqt1 = wmpn_addmul_1(np, sp, n, UINT64_C(2)); + c = c + (int64_t)(UINT64_C(2) * q + cqt1); + o4 = wmpn_sub_1_in_place(np, n, UINT64_C(1)); + o3 = (int64_t)o4; + o2 = c - o3; + c = o2; + bo2 = wmpn_sub_1_in_place(sp, n, UINT64_C(1)); + q = q - bo2; + } else { + IGNORE2(sp, spl); + } + IGNORE2(np, npn); + return (uint64_t)c; + } +} + +int32_t wmpn_sqrtrem(uint64_t * sp, uint64_t * rp, uint64_t * np, int32_t n); + +int32_t wmpn_sqrtrem(uint64_t * sp, uint64_t * rp, uint64_t * np, int32_t n) +{ + uint64_t high, s, nh, ncc, cc; + uint64_t c; + int32_t res, adj; + int32_t tn, rn; + uint64_t * scratch; + uint64_t * tp; + uint64_t * o; + uint64_t * tpa; + int32_t i; + uint64_t * xp; + uint64_t * rp1; + uint64_t rl; + uint64_t o1, s00, rc; + uint64_t * s0; + uint64_t cc1; + uint64_t o2, o3; + uint64_t * tp1; + uint64_t c2; + uint64_t * tp11; + int32_t i1; + uint64_t * xp1; + uint64_t * rp2; + int32_t i2; + uint64_t * xp2; + uint64_t * rp3; + uint64_t h; + high = np[n - 1]; + c = (uint64_t)__builtin_clzll(high) / UINT64_C(2); + if (n == 1) { + if (c == UINT64_C(0)) { + s = sqrt1(rp, high); + *sp = s; + } else { + nh = high << (UINT64_C(2) * c); + ncc = sqrt1(rp, nh); + cc = ncc >> c; + *sp = cc; + *rp = high - cc * cc; + } + if (*rp == UINT64_C(0)) { + res = 0; + } else { + res = 1; + } + return res; + } + tn = (n + 1) / 2; + rn = 0; + adj = (int32_t)((uint64_t)n % UINT64_C(2)); + scratch = alloca(((uint32_t)tn / 2U + 1U) * sizeof(uint64_t)); + if (!(adj == 0) || !(c == UINT64_C(0))) { + o = alloca(2U * (uint32_t)tn * sizeof(uint64_t)); + tp = o; + *tp = UINT64_C(0); + tpa = tp + adj; + if (!(c == UINT64_C(0))) { + wmpn_lshift(tpa, np, n, UINT64_C(2) * c); + } else { + i = 0; + xp = np + 0; + rp1 = tpa + 0; + while (i < n) { + *rp1 = *xp; + rp1 = rp1 + 1; + xp = xp + 1; + i = i + 1; + } + } + IGNORE2(tp, tpa); + c = c + (!(adj == 0) ? UINT64_C(32) : UINT64_C(0)); + o1 = wmpn_dc_sqrtrem(sp, tp, tn, scratch); + rl = o1; + s0 = alloca(1U * sizeof(uint64_t)); + s00 = *sp % (UINT64_C(1) << c); + *s0 = s00; + rc = wmpn_addmul_1(tp, sp, tn, UINT64_C(2) * s00); + rl = rl + rc; + o2 = wmpn_submul_1(tp, s0, 1, s00); + cc1 = o2; + if (tn > 1) { + tp1 = tp + 1; + o3 = wmpn_sub_1_in_place(tp1, tn - 1, cc1); + cc1 = o3; + } + rl = rl - cc1; + wmpn_rshift_in_place(sp, tn, c); + tp[tn] = rl; + c2 = c << UINT64_C(1); + if (c2 < UINT64_C(64)) { + tn = tn + 1; + } else { + tp11 = tp + 1; + c2 = c2 - UINT64_C(64); + tp = tp11; + } + if (!(c2 == UINT64_C(0))) { + wmpn_rshift(rp, tp, tn, c2); + } else { + i1 = 0; + xp1 = tp + 0; + rp2 = rp + 0; + while (i1 < tn) { + *rp2 = *xp1; + rp2 = rp2 + 1; + xp1 = xp1 + 1; + i1 = i1 + 1; + } + } + rn = tn; + } else { + i2 = 0; + xp2 = np + 0; + rp3 = rp + 0; + while (i2 < n) { + *rp3 = *xp2; + rp3 = rp3 + 1; + xp2 = xp2 + 1; + i2 = i2 + 1; + } + h = wmpn_dc_sqrtrem(sp, rp, tn, scratch); + rp[tn] = h; + rn = tn + (int32_t)h; + } + while (rp[rn - 1] == UINT64_C(0)) { + rn = rn - 1; + if (rn == 0) { + break; + } + } + return rn; +} + diff --git a/generated-src/lib/sqrt.h b/generated-src/lib/sqrt.h new file mode 100644 index 0000000..e54594d --- /dev/null +++ b/generated-src/lib/sqrt.h @@ -0,0 +1,12 @@ +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <assert.h> +#include <alloca.h> +uint64_t wmpn_sqrtrem2(uint64_t * sp, uint64_t * rp, uint64_t * np); + +uint64_t wmpn_dc_sqrtrem(uint64_t * sp, uint64_t * np, int32_t n, + uint64_t * scratch); + +int32_t wmpn_sqrtrem(uint64_t * sp, uint64_t * rp, uint64_t * np, int32_t n); + diff --git a/generated-src/lib/sqrt1.c b/generated-src/lib/sqrt1.c new file mode 100644 index 0000000..f28b77d --- /dev/null +++ b/generated-src/lib/sqrt1.c @@ -0,0 +1,60 @@ +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <assert.h> +#include <alloca.h> +#include "int.h" + +#include "euclideandivision.h" + +#include "realinfix.h" + +#include "uint64.h" + +#include "power.h" + +#include "map.h" + +#include "c.h" + +#include "fxp.h" + + +#include "sqrtinit.h" + +uint64_t rsa_estimate (uint64_t a) { + uint64_t abits, x0; + abits = a >> 55; + x0 = 0x100 | invsqrttab[abits - 0x80]; + return x0; +} + +uint64_t sqrt1(uint64_t * rp, uint64_t a0) +{ + uint64_t a, x0, a1, m1, t1qt, t1, x1, a2, u1, u2, m2, t2qt, t2, x2, x; + uint64_t c, s; + a = a0; + x0 = rsa_estimate(a); + a1 = a >> UINT64_C(31); + m1 = UINT64_C(0x2000000000000) - UINT64_C(0x30000); + t1qt = m1 - x0 * x0 * a1; + t1 = (uint64_t)((int64_t)t1qt >> UINT64_C(16)); + x1 = (x0 << UINT64_C(16)) + (uint64_t)((int64_t)(x0 * t1) >> UINT64_C(18)); + a2 = a >> UINT64_C(24); + u1 = x1 * a2; + u2 = u1 >> UINT64_C(25); + m2 = UINT64_C(0x24000000000); + t2qt = (a << UINT64_C(14)) - u2 * u2 - m2; + t2 = (uint64_t)((int64_t)t2qt >> UINT64_C(24)); + x2 = u1 + (uint64_t)((int64_t)(x1 * t2) >> UINT64_C(15)); + x = x2 >> UINT64_C(32); + c = x; + s = c * c; + if (s + UINT64_C(2) * c <= a0 - UINT64_C(1)) { + s = s + UINT64_C(2) * c + UINT64_C(1); + c = c + UINT64_C(1); + } + *rp = a0 - s; + return c; +} + diff --git a/generated-src/lib/sqrt1.h b/generated-src/lib/sqrt1.h new file mode 100644 index 0000000..708a51d --- /dev/null +++ b/generated-src/lib/sqrt1.h @@ -0,0 +1,7 @@ +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <assert.h> +#include <alloca.h> +uint64_t sqrt1(uint64_t * rp, uint64_t a0); + diff --git a/generated-src/lib/sqrtinit.h b/generated-src/lib/sqrtinit.h new file mode 100644 index 0000000..a66d9d5 --- /dev/null +++ b/generated-src/lib/sqrtinit.h @@ -0,0 +1,51 @@ +/* The common 0x100 was removed */ +static const unsigned char invsqrttab[384] = { + 0xff,0xfd,0xfb,0xf9,0xf7,0xf5,0xf3,0xf2, /* sqrt(1/80)..sqrt(1/87) */ + 0xf0,0xee,0xec,0xea,0xe9,0xe7,0xe5,0xe4, /* sqrt(1/88)..sqrt(1/8f) */ + 0xe2,0xe0,0xdf,0xdd,0xdb,0xda,0xd8,0xd7, /* sqrt(1/90)..sqrt(1/97) */ + 0xd5,0xd4,0xd2,0xd1,0xcf,0xce,0xcc,0xcb, /* sqrt(1/98)..sqrt(1/9f) */ + 0xc9,0xc8,0xc6,0xc5,0xc4,0xc2,0xc1,0xc0, /* sqrt(1/a0)..sqrt(1/a7) */ + 0xbe,0xbd,0xbc,0xba,0xb9,0xb8,0xb7,0xb5, /* sqrt(1/a8)..sqrt(1/af) */ + 0xb4,0xb3,0xb2,0xb0,0xaf,0xae,0xad,0xac, /* sqrt(1/b0)..sqrt(1/b7) */ + 0xaa,0xa9,0xa8,0xa7,0xa6,0xa5,0xa4,0xa3, /* sqrt(1/b8)..sqrt(1/bf) */ + 0xa2,0xa0,0x9f,0x9e,0x9d,0x9c,0x9b,0x9a, /* sqrt(1/c0)..sqrt(1/c7) */ + 0x99,0x98,0x97,0x96,0x95,0x94,0x93,0x92, /* sqrt(1/c8)..sqrt(1/cf) */ + 0x91,0x90,0x8f,0x8e,0x8d,0x8c,0x8c,0x8b, /* sqrt(1/d0)..sqrt(1/d7) */ + 0x8a,0x89,0x88,0x87,0x86,0x85,0x84,0x83, /* sqrt(1/d8)..sqrt(1/df) */ + 0x83,0x82,0x81,0x80,0x7f,0x7e,0x7e,0x7d, /* sqrt(1/e0)..sqrt(1/e7) */ + 0x7c,0x7b,0x7a,0x79,0x79,0x78,0x77,0x76, /* sqrt(1/e8)..sqrt(1/ef) */ + 0x76,0x75,0x74,0x73,0x72,0x72,0x71,0x70, /* sqrt(1/f0)..sqrt(1/f7) */ + 0x6f,0x6f,0x6e,0x6d,0x6d,0x6c,0x6b,0x6a, /* sqrt(1/f8)..sqrt(1/ff) */ + 0x6a,0x69,0x68,0x68,0x67,0x66,0x66,0x65, /* sqrt(1/100)..sqrt(1/107) */ + 0x64,0x64,0x63,0x62,0x62,0x61,0x60,0x60, /* sqrt(1/108)..sqrt(1/10f) */ + 0x5f,0x5e,0x5e,0x5d,0x5c,0x5c,0x5b,0x5a, /* sqrt(1/110)..sqrt(1/117) */ + 0x5a,0x59,0x59,0x58,0x57,0x57,0x56,0x56, /* sqrt(1/118)..sqrt(1/11f) */ + 0x55,0x54,0x54,0x53,0x53,0x52,0x52,0x51, /* sqrt(1/120)..sqrt(1/127) */ + 0x50,0x50,0x4f,0x4f,0x4e,0x4e,0x4d,0x4d, /* sqrt(1/128)..sqrt(1/12f) */ + 0x4c,0x4b,0x4b,0x4a,0x4a,0x49,0x49,0x48, /* sqrt(1/130)..sqrt(1/137) */ + 0x48,0x47,0x47,0x46,0x46,0x45,0x45,0x44, /* sqrt(1/138)..sqrt(1/13f) */ + 0x44,0x43,0x43,0x42,0x42,0x41,0x41,0x40, /* sqrt(1/140)..sqrt(1/147) */ + 0x40,0x3f,0x3f,0x3e,0x3e,0x3d,0x3d,0x3c, /* sqrt(1/148)..sqrt(1/14f) */ + 0x3c,0x3b,0x3b,0x3a,0x3a,0x39,0x39,0x39, /* sqrt(1/150)..sqrt(1/157) */ + 0x38,0x38,0x37,0x37,0x36,0x36,0x35,0x35, /* sqrt(1/158)..sqrt(1/15f) */ + 0x35,0x34,0x34,0x33,0x33,0x32,0x32,0x32, /* sqrt(1/160)..sqrt(1/167) */ + 0x31,0x31,0x30,0x30,0x2f,0x2f,0x2f,0x2e, /* sqrt(1/168)..sqrt(1/16f) */ + 0x2e,0x2d,0x2d,0x2d,0x2c,0x2c,0x2b,0x2b, /* sqrt(1/170)..sqrt(1/177) */ + 0x2b,0x2a,0x2a,0x29,0x29,0x29,0x28,0x28, /* sqrt(1/178)..sqrt(1/17f) */ + 0x27,0x27,0x27,0x26,0x26,0x26,0x25,0x25, /* sqrt(1/180)..sqrt(1/187) */ + 0x24,0x24,0x24,0x23,0x23,0x23,0x22,0x22, /* sqrt(1/188)..sqrt(1/18f) */ + 0x21,0x21,0x21,0x20,0x20,0x20,0x1f,0x1f, /* sqrt(1/190)..sqrt(1/197) */ + 0x1f,0x1e,0x1e,0x1e,0x1d,0x1d,0x1d,0x1c, /* sqrt(1/198)..sqrt(1/19f) */ + 0x1c,0x1b,0x1b,0x1b,0x1a,0x1a,0x1a,0x19, /* sqrt(1/1a0)..sqrt(1/1a7) */ + 0x19,0x19,0x18,0x18,0x18,0x18,0x17,0x17, /* sqrt(1/1a8)..sqrt(1/1af) */ + 0x17,0x16,0x16,0x16,0x15,0x15,0x15,0x14, /* sqrt(1/1b0)..sqrt(1/1b7) */ + 0x14,0x14,0x13,0x13,0x13,0x12,0x12,0x12, /* sqrt(1/1b8)..sqrt(1/1bf) */ + 0x12,0x11,0x11,0x11,0x10,0x10,0x10,0x0f, /* sqrt(1/1c0)..sqrt(1/1c7) */ + 0x0f,0x0f,0x0f,0x0e,0x0e,0x0e,0x0d,0x0d, /* sqrt(1/1c8)..sqrt(1/1cf) */ + 0x0d,0x0c,0x0c,0x0c,0x0c,0x0b,0x0b,0x0b, /* sqrt(1/1d0)..sqrt(1/1d7) */ + 0x0a,0x0a,0x0a,0x0a,0x09,0x09,0x09,0x09, /* sqrt(1/1d8)..sqrt(1/1df) */ + 0x08,0x08,0x08,0x07,0x07,0x07,0x07,0x06, /* sqrt(1/1e0)..sqrt(1/1e7) */ + 0x06,0x06,0x06,0x05,0x05,0x05,0x04,0x04, /* sqrt(1/1e8)..sqrt(1/1ef) */ + 0x04,0x04,0x03,0x03,0x03,0x03,0x02,0x02, /* sqrt(1/1f0)..sqrt(1/1f7) */ + 0x02,0x02,0x01,0x01,0x01,0x01,0x00,0x00, /* sqrt(1/1f8)..sqrt(1/1ff) */ +}; diff --git a/generated-src/lib/sub.c b/generated-src/lib/sub.c new file mode 100644 index 0000000..d645d67 --- /dev/null +++ b/generated-src/lib/sub.c @@ -0,0 +1,219 @@ +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <assert.h> +#include <alloca.h> +#include "int.h" + +#include "int32.h" + +#include "uint64gmp.h" + +#include "power.h" + +#include "c.h" + +#include "array.h" + +#include "map.h" + +#include "types.h" + +uint64_t wmpn_sub_1(uint64_t * r, uint64_t * x, int32_t sz, uint64_t y) +{ + uint64_t b, lx; + int32_t i; + uint64_t res, res1; + b = UINT64_C(0); + lx = *x; + i = 1; + res = lx - y; + *r = res; + if (lx < y) { + b = UINT64_C(1); + while (i < sz) { + lx = x[i]; + res1 = lx - UINT64_C(1); + r[i] = res1; + i = i + 1; + if (!(lx == UINT64_C(0))) { + b = UINT64_C(0); + break; + } + } + } + while (i < sz) { + lx = x[i]; + r[i] = lx; + i = i + 1; + } + return b; +} + +uint64_t wmpn_sub_n(uint64_t * r, uint64_t * x, uint64_t * y, int32_t sz) +{ + uint64_t lx, ly, b; + int32_t i; + uint64_t res, borrow; + struct __sub64_with_borrow_result struct_res; + lx = UINT64_C(0); + ly = UINT64_C(0); + b = UINT64_C(0); + i = 0; + while (i < sz) { + lx = x[i]; + ly = y[i]; + struct_res = sub64_with_borrow(lx, ly, b); + res = struct_res.__field_0; + borrow = struct_res.__field_1; + r[i] = res; + b = borrow; + i = i + 1; + } + return b; +} + +uint64_t wmpn_sub(uint64_t * r, uint64_t * x, int32_t sx, uint64_t * y, + int32_t sy) +{ + uint64_t lx, ly, b; + int32_t i; + uint64_t res, borrow, res1; + struct __sub64_with_borrow_result struct_res; + lx = UINT64_C(0); + ly = UINT64_C(0); + b = UINT64_C(0); + i = 0; + while (i < sy) { + lx = x[i]; + ly = y[i]; + struct_res = sub64_with_borrow(lx, ly, b); + res = struct_res.__field_0; + borrow = struct_res.__field_1; + r[i] = res; + b = borrow; + i = i + 1; + } + if (!(b == UINT64_C(0))) { + while (i < sx) { + lx = x[i]; + res1 = lx - UINT64_C(1); + r[i] = res1; + i = i + 1; + if (!(lx == UINT64_C(0))) { + b = UINT64_C(0); + break; + } + } + } + while (i < sx) { + lx = x[i]; + r[i] = lx; + i = i + 1; + } + return b; +} + +uint64_t wmpn_sub_in_place(uint64_t * x, int32_t sx, uint64_t * y, int32_t sy) +{ + uint64_t lx, ly, b; + int32_t i; + uint64_t res, borrow, res1; + struct __sub64_with_borrow_result struct_res; + lx = UINT64_C(0); + ly = UINT64_C(0); + b = UINT64_C(0); + i = 0; + while (i < sy) { + lx = x[i]; + ly = y[i]; + struct_res = sub64_with_borrow(lx, ly, b); + res = struct_res.__field_0; + borrow = struct_res.__field_1; + x[i] = res; + b = borrow; + i = i + 1; + } + if (!(b == UINT64_C(0))) { + while (i < sx) { + lx = x[i]; + res1 = lx - UINT64_C(1); + x[i] = res1; + i = i + 1; + if (!(lx == UINT64_C(0))) { + b = UINT64_C(0); + break; + } + } + } + return b; +} + +void wmpn_decr(uint64_t * x, uint64_t y) +{ + uint64_t b, lx; + uint64_t * xp; + uint64_t res, res1; + b = UINT64_C(0); + lx = *x; + xp = x + 1; + res = lx - y; + *x = res; + if (res > lx) { + b = UINT64_C(1); + while (!(b == UINT64_C(0))) { + lx = *xp; + res1 = lx - UINT64_C(1); + *xp = res1; + xp = xp + 1; + if (!(lx == UINT64_C(0))) { + b = UINT64_C(0); + break; + } + } + } else { + return; + } +} + +void wmpn_decr_1(uint64_t * x) +{ + uint64_t lx; + uint64_t * xp; + uint64_t res; + lx = UINT64_C(0); + xp = x + 0; + while (lx == UINT64_C(0)) { + lx = *xp; + res = lx - UINT64_C(1); + *xp = res; + xp = xp + 1; + } +} + +uint64_t wmpn_sub_1_in_place(uint64_t * x, int32_t sz, uint64_t y) +{ + uint64_t b, lx; + int32_t i; + uint64_t res, res1; + b = UINT64_C(0); + lx = *x; + i = 1; + res = lx - y; + *x = res; + if (lx < y) { + b = UINT64_C(1); + while (i < sz) { + lx = x[i]; + res1 = lx - UINT64_C(1); + x[i] = res1; + i = i + 1; + if (!(lx == UINT64_C(0))) { + b = UINT64_C(0); + break; + } + } + } + return b; +} + diff --git a/generated-src/lib/sub.h b/generated-src/lib/sub.h new file mode 100644 index 0000000..0b8eaa3 --- /dev/null +++ b/generated-src/lib/sub.h @@ -0,0 +1,20 @@ +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <assert.h> +#include <alloca.h> +uint64_t wmpn_sub_1(uint64_t * r, uint64_t * x, int32_t sz, uint64_t y); + +uint64_t wmpn_sub_n(uint64_t * r, uint64_t * x, uint64_t * y, int32_t sz); + +uint64_t wmpn_sub(uint64_t * r, uint64_t * x, int32_t sx, uint64_t * y, + int32_t sy); + +uint64_t wmpn_sub_in_place(uint64_t * x, int32_t sx, uint64_t * y, int32_t sy); + +void wmpn_decr(uint64_t * x, uint64_t y); + +void wmpn_decr_1(uint64_t * x); + +uint64_t wmpn_sub_1_in_place(uint64_t * x, int32_t sz, uint64_t y); + diff --git a/generated-src/lib/toom.c b/generated-src/lib/toom.c new file mode 100644 index 0000000..5096c23 --- /dev/null +++ b/generated-src/lib/toom.c @@ -0,0 +1,537 @@ +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <assert.h> +#include <alloca.h> +#include "array.h" + +#include "map.h" + +#include "c.h" + +#include "int32.h" + +#include "uint64gmp.h" + +#include "int.h" + +#include "power.h" + +#include "valuation.h" + +#include "computerdivision.h" + +#include "types.h" + +#include "compare.h" + +#include "util.h" + +#include "add.h" + +#include "sub.h" + +#include "mul.h" + +#include "logical.h" + +int32_t toom22_threshold() +{ return 29; +} + +void wmpn_toom22_mul(uint64_t * r, uint64_t * x, int32_t sx, uint64_t * y, + int32_t sy, uint64_t * scratch); + +void wmpn_toom22_mul_rec(uint64_t * r, uint64_t * x, int32_t sx, + uint64_t * y, int32_t sy, uint64_t * scratch); + +void wmpn_toom22_mul_n_rec(uint64_t * r, uint64_t * x, uint64_t * y, + uint64_t * scratch, int32_t sz); + +void wmpn_toom32_mul(uint64_t * r, uint64_t * x, int32_t sx, uint64_t * y, + int32_t sy, uint64_t * scratch); + +void wmpn_mul(uint64_t * r, uint64_t * x, int32_t sx, uint64_t * y, + int32_t sy); + +void wmpn_toom22_mul(uint64_t * r, uint64_t * x, int32_t sx, uint64_t * y, + int32_t sy, uint64_t * scratch) +{ + int32_t s, n, t; + uint64_t * x0; + uint64_t * x1; + uint64_t * y0; + uint64_t * y1; + uint64_t * rqt; + uint64_t * ro; + uint64_t * scratchqt; + uint64_t * s_out; + uint64_t * vinf; + uint64_t * xsm1; + uint64_t * ysm1; + int vm1_neg; + uint64_t b, lx; + uint64_t * y0t; + int c0, c1; + uint64_t * ysm1t; + uint64_t * v0; + uint64_t * v0n; + uint64_t * vinfn; + uint64_t cy; + uint64_t o, c, cy2, cqt, cqtqt, b1; + uint64_t * vinf0; + uint64_t * vinfn1; + s = sx / 2; + n = sx - s; + t = sy - n; + x0 = x; + x1 = x + n; + y0 = y; + y1 = y + n; + rqt = r - 0; + ro = r + (sx + sy); + scratchqt = scratch - 0; + s_out = scratch + (n + n); + vinf = r + (n + n); + xsm1 = r; + ysm1 = r + n; + vm1_neg = 0; + if (s == n) { + if (wmpn_cmp(x0, x1, n) < 0) { + wmpn_sub_n(xsm1, x1, x0, n); + vm1_neg = 1; + } else { + wmpn_sub_n(xsm1, x0, x1, n); + } + } else { + if (x0[s] == UINT64_C(0) && wmpn_cmp(x0, x1, s) < 0) { + wmpn_sub_n(xsm1, x1, x0, s); + xsm1[s] = UINT64_C(0); + vm1_neg = 1; + } else { + b = wmpn_sub_n(xsm1, x0, x1, s); + lx = x0[s]; + xsm1[s] = lx - b; + } + } + if (t == n) { + if (wmpn_cmp(y0, y1, n) < 0) { + wmpn_sub_n(ysm1, y1, y0, n); + vm1_neg = !vm1_neg; + } else { + wmpn_sub_n(ysm1, y0, y1, n); + } + } else { + y0t = y0 + t; + c0 = wmpn_zero_p(y0t, n - t) == 1; + c1 = wmpn_cmp(y0, y1, t) < 0; + if (c0 && c1) { + wmpn_sub_n(ysm1, y1, y0, t); + ysm1t = ysm1 + t; + wmpn_zero(ysm1t, n - t); + vm1_neg = !vm1_neg; + } else { + wmpn_sub(ysm1, y0, n, y1, t); + } + } + wmpn_toom22_mul_n_rec(scratch, xsm1, ysm1, s_out, n); + IGNORE2(r, ysm1); + v0 = r; + if (s > t) { + wmpn_toom22_mul_rec(vinf, x1, s, y1, t, s_out); + } else { + wmpn_toom22_mul_n_rec(vinf, x1, y1, s_out, s); + } + wmpn_toom22_mul_n_rec(v0, x0, y0, s_out, n); + v0n = v0 + n; + vinfn = vinf + n; + o = wmpn_add_in_place(vinf, n, v0n, n); + cy = o; + c = wmpn_add_n(v0n, vinf, v0, n); + cy2 = c + cy; + cqt = wmpn_add_in_place(vinf, n, vinfn, s + t - n); + cy = cy + cqt; + IGNORE2(v0n, vinf); + if (vm1_neg) { + cqtqt = wmpn_add_in_place(v0n, n + n, scratch, n + n); + cy = cy + cqtqt; + } else { + b1 = wmpn_sub_in_place(v0n, n + n, scratch, n + n); + cy = cy - b1; + } + IGNORE2(r, v0n); + IGNORE2(r, vinfn); + IGNORE2(scratch, s_out); + vinf0 = r + (n + n); + wmpn_incr(vinf0, cy2); + vinfn1 = r + 3 * n; + if (cy <= UINT64_C(3)) { + wmpn_incr(vinfn1, cy); + } else { + wmpn_decr_1(vinfn1); + } + IGNORE2(r, ro); + IGNORE2(rqt, r); + IGNORE2(scratchqt, scratch); + return; +} + +void wmpn_toom22_mul_rec(uint64_t * r, uint64_t * x, int32_t sx, + uint64_t * y, int32_t sy, uint64_t * scratch) +{ + if (sy <= toom22_threshold()) { + wmpn_mul_basecase(r, x, sx, y, sy); + return; + } else { + if (4 * sx < 5 * sy) { + wmpn_toom22_mul(r, x, sx, y, sy, scratch); + return; + } else { + wmpn_toom32_mul(r, x, sx, y, sy, scratch); + return; + } + } +} + +void wmpn_toom22_mul_n_rec(uint64_t * r, uint64_t * x, uint64_t * y, + uint64_t * scratch, int32_t sz) +{ + if (sz <= toom22_threshold()) { + wmpn_mul_basecase(r, x, sz, y, sz); + return; + } else { + wmpn_toom22_mul(r, x, sz, y, sz, scratch); + return; + } +} + +void wmpn_toom32_mul(uint64_t * r, uint64_t * x, int32_t sx, uint64_t * y, + int32_t sy, uint64_t * scratch) +{ + int32_t n, s, t; + uint64_t * x0; + uint64_t * x1; + uint64_t * x2; + uint64_t * y0; + uint64_t * y1; + uint64_t * rol; + uint64_t * ror; + uint64_t * sol; + uint64_t * sor; + uint64_t * xp1; + uint64_t * yp1; + uint64_t * xm1; + uint64_t * ym1; + uint64_t * v1; + uint64_t * vm1; + uint64_t xp1_hi, yp1_hi, hi; + int vm1_neg; + uint64_t o, b, c, o1, o2; + int32_t cmp, cmp1; + uint64_t * y0t; + int c0, c1; + int32_t cmp2; + uint64_t * ym1t; + uint64_t cy; + uint64_t * sn; + uint64_t c2, c3; + uint64_t * sn1; + uint64_t * sn2; + uint64_t c4, c5, b1, r1; + uint64_t * vm1n; + struct __sub64_with_borrow_result struct_res; + uint64_t c6, r2; + struct __add64_with_carry_result struct_res1; + int32_t s1; + uint64_t * vy0; + uint64_t * vy1; + uint64_t * vy2; + uint64_t t02, c7, c11, c21, c31, b11, b2, b3; + uint64_t * vm1n1; + uint64_t * r3n; + uint64_t * r1n; + uint64_t * r4n; + uint64_t * r2n; + uint64_t bo1, ly2; + uint64_t bo; + int64_t h; + uint64_t bo2, bo2qt, bo3, bo3qt, c8; + int32_t rs; + uint64_t * r2n1; + uint64_t b4; + n = 1 + (2 * sx >= 3 * sy ? (sx - 1) / 3 : (sy - 1) / 2); + s = sx - 2 * n; + t = sy - n; + x0 = x; + x1 = x + n; + x2 = x1 + n; + y0 = y; + y1 = y + n; + rol = r - 0; + ror = r + (sx + sy); + sol = scratch - 0; + sor = scratch + (n + n + 1); + xp1 = r; + yp1 = r + n; + xm1 = yp1 + n; + ym1 = xm1 + n; + v1 = scratch; + vm1 = r; + xp1_hi = UINT64_C(0); + yp1_hi = UINT64_C(0); + hi = UINT64_C(0); + vm1_neg = 0; + o = wmpn_add(xp1, x0, n, x2, s); + xp1_hi = o; + cmp = wmpn_cmp(xp1, x1, n); + if (xp1_hi == UINT64_C(0) && cmp < 0) { + wmpn_sub_n(xm1, x1, xp1, n); + hi = UINT64_C(0); + vm1_neg = 1; + } else { + b = wmpn_sub_n(xm1, xp1, x1, n); + hi = xp1_hi - b; + } + c = wmpn_add_in_place(xp1, n, x1, n); + xp1_hi = xp1_hi + c; + if (t == n) { + o1 = wmpn_add_n(yp1, y0, y1, n); + yp1_hi = o1; + cmp1 = wmpn_cmp(y0, y1, n); + if (cmp1 < 0) { + wmpn_sub_n(ym1, y1, y0, n); + vm1_neg = !vm1_neg; + } else { + wmpn_sub_n(ym1, y0, y1, n); + } + } else { + o2 = wmpn_add(yp1, y0, n, y1, t); + yp1_hi = o2; + y0t = y0 + t; + c0 = wmpn_zero_p(y0t, n - t) == 1; + cmp2 = wmpn_cmp(y0, y1, t); + c1 = cmp2 < 0; + if (c0 && c1) { + wmpn_sub_n(ym1, y1, y0, t); + ym1t = ym1 + t; + wmpn_zero(ym1t, n - t); + vm1_neg = !vm1_neg; + } else { + wmpn_sub(ym1, y0, n, y1, t); + } + } + wmpn_toom22_mul_n_rec(v1, xp1, yp1, sor, n); + cy = UINT64_C(0); + if (xp1_hi == UINT64_C(1)) { + sn = scratch + n; + c2 = wmpn_add_in_place(sn, n, yp1, n); + cy = c2; + } else { + if (xp1_hi == UINT64_C(2)) { + sn1 = scratch + n; + c3 = wmpn_addmul_1(sn1, yp1, n, UINT64_C(2)); + cy = c3; + } + } + if (!(yp1_hi == UINT64_C(0))) { + sn2 = scratch + n; + c4 = wmpn_add_in_place(sn2, n, xp1, n); + cy = xp1_hi * yp1_hi + c4 + cy; + } + IGNORE2(vm1, yp1); + wmpn_toom22_mul_n_rec(vm1, xm1, ym1, sor, n); + if (!(hi == UINT64_C(0))) { + vm1n = vm1 + n; + c5 = wmpn_add_in_place(vm1n, n, ym1, n); + IGNORE2(vm1, vm1n); + hi = c5; + } + if (vm1_neg) { + b1 = wmpn_sub_in_place(scratch, 2 * n, vm1, 2 * n); + struct_res = sub64_with_borrow(cy, hi, b1); + r1 = struct_res.__field_0; + scratch[2 * n] = r1; + } else { + c6 = wmpn_add_in_place(scratch, 2 * n, vm1, 2 * n); + struct_res1 = add64_with_carry(cy, hi, c6); + r2 = struct_res1.__field_0; + scratch[2 * n] = r2; + } + s1 = 2 * n + 1; + wmpn_rshift_in_place(scratch, s1, UINT64_C(1)); + IGNORE2(xm1, ym1); + vy0 = scratch; + vy1 = xm1; + vy2 = scratch + n; + t02 = vy2[n]; + c7 = wmpn_add_n(vy1, vy0, vy2, n); + wmpn_incr(vy2, c7 + t02); + vm1n1 = vm1 + n; + if (vm1_neg) { + c11 = wmpn_add_in_place(scratch, n, vm1, n); + c21 = wmpn_add_in_place(vy1, n, vm1n1, n); + hi = hi + c21; + c31 = wmpn_add_1_in_place(vy1, n, c11); + hi = hi + c31; + wmpn_incr(vy2, hi); + } else { + b11 = wmpn_sub_in_place(scratch, n, vm1, n); + b2 = wmpn_sub_in_place(vy1, n, vm1n1, n); + hi = hi + b2; + b3 = wmpn_sub_1_in_place(vy1, n, b11); + hi = hi + b3; + wmpn_decr(vy2, hi); + } + wmpn_toom22_mul_n_rec(r, x, y, sor, n); + r3n = xm1 + n; + if (s > t) { + wmpn_mul(r3n, x2, s, y1, t); + } else { + wmpn_mul(r3n, y1, t, x2, s); + } + r1n = r + n; + r4n = r3n + n; + r2n = xm1; + bo1 = wmpn_sub_in_place(r1n, n, r3n, n); + bo = bo1; + ly2 = vy2[n]; + h = (int64_t)(ly2 + bo1); + bo2 = wmpn_sub_in_place(r2n, n, r, n); + bo2qt = wmpn_sub_1_in_place(r2n, n, bo); + bo = bo2 + bo2qt; + bo3 = wmpn_sub_n(r3n, vy2, r1n, n); + bo3qt = wmpn_sub_1_in_place(r3n, n, bo); + bo = bo3 + bo3qt; + h = h - (int64_t)bo; + IGNORE2(r2n, r3n); + IGNORE2(r1n, r2n); + c8 = wmpn_add_in_place(r1n, 3 * n, scratch, n); + h = h + (int64_t)c8; + IGNORE2(r, r1n); + rs = s + t - n; + if (__builtin_expect(s + t > n,1)) { + r2n1 = r + 2 * n; + b4 = wmpn_sub_in_place(r2n1, 2 * n, r4n, rs); + h = h - (int64_t)b4; + if (h < INT64_C(0)) { + wmpn_decr(r4n, (uint64_t)-h); + } else { + wmpn_incr(r4n, (uint64_t)h); + } + } + IGNORE2(r, r4n); + IGNORE2(scratch, vy2); + IGNORE2(scratch, sor); + IGNORE2(sol, scratch); + IGNORE2(r, ror); + IGNORE2(rol, r); + return; +} + +void wmpn_mul(uint64_t * r, uint64_t * x, int32_t sx, uint64_t * y, + int32_t sy) +{ + uint64_t * scratch; + uint64_t * rol; + uint64_t * ror; + int32_t un; + int32_t su; + uint64_t * ws; + uint64_t * up; + uint64_t * rp; + uint64_t cy; + uint64_t * rpn; + uint64_t * wsy; + int32_t i; + uint64_t * xp; + uint64_t * rp1; + uint64_t cy1; + uint64_t * rpn1; + uint64_t * wsy1; + int32_t i1; + uint64_t * xp1; + uint64_t * rp2; + if (sy <= toom22_threshold()) { + wmpn_mul_basecase(r, x, sx, y, sy); + return; + } else { + scratch = alloca((uint32_t)(5 * sy + 128) * sizeof(uint64_t)); + rol = r - 0; + ror = r + (sx + sy); + if (2 * sx >= 5 * sy) { + un = sx; + su = 3 * sy / 2; + ws = alloca((uint32_t)(4 * sy) * sizeof(uint64_t)); + wmpn_toom32_mul(r, x, su, y, sy, scratch); + un = un - su; + up = x + su; + rp = r + su; + while (un >= 2 * sy) { + wmpn_toom32_mul(ws, up, su, y, sy, scratch); + cy = wmpn_add_in_place(rp, sy, ws, sy); + rpn = rp + sy; + wsy = ws + sy; + i = 0; + xp = wsy + 0; + rp1 = rpn + 0; + while (i < su) { + *rp1 = *xp; + rp1 = rp1 + 1; + xp = xp + 1; + i = i + 1; + } + wmpn_incr(rpn, cy); + un = un - su; + up = up + su; + rp = rp + su; + } + if (sy <= un) { + if (4 * un < 5 * sy) { + wmpn_toom22_mul(ws, up, un, y, sy, scratch); + } else { + wmpn_toom32_mul(ws, up, un, y, sy, scratch); + } + } else { + wmpn_mul(ws, y, sy, up, un); + } + cy1 = wmpn_add_in_place(rp, sy, ws, sy); + rpn1 = rp + sy; + wsy1 = ws + sy; + i1 = 0; + xp1 = wsy1 + 0; + rp2 = rpn1 + 0; + while (i1 < un) { + *rp2 = *xp1; + rp2 = rp2 + 1; + xp1 = xp1 + 1; + i1 = i1 + 1; + } + wmpn_incr(rpn1, cy1); + (void)ws; + } else { + if (4 * sx < 5 * sy) { + wmpn_toom22_mul(r, x, sx, y, sy, scratch); + } else { + wmpn_toom32_mul(r, x, sx, y, sy, scratch); + } + } + (void)scratch; + IGNORE2(r, ror); + IGNORE2(rol, r); + return; + } +} + +void wmpn_mul_n(uint64_t * r, uint64_t * x, uint64_t * y, int32_t sz) +{ + uint64_t * ws; + if (sz <= toom22_threshold()) { + wmpn_mul_basecase(r, x, sz, y, sz); + return; + } else { + ws = alloca((uint32_t)(2 * (sz + 64)) * sizeof(uint64_t)); + wmpn_toom22_mul(r, x, sz, y, sz, ws); + return; + } +} + diff --git a/generated-src/lib/toom.h b/generated-src/lib/toom.h new file mode 100644 index 0000000..ccbc2f4 --- /dev/null +++ b/generated-src/lib/toom.h @@ -0,0 +1,24 @@ +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <assert.h> +#include <alloca.h> +int32_t toom22_threshold(); + +void wmpn_toom22_mul(uint64_t * r, uint64_t * x, int32_t sx, uint64_t * y, + int32_t sy, uint64_t * scratch); + +void wmpn_toom22_mul_rec(uint64_t * r, uint64_t * x, int32_t sx, + uint64_t * y, int32_t sy, uint64_t * scratch); + +void wmpn_toom22_mul_n_rec(uint64_t * r, uint64_t * x, uint64_t * y, + uint64_t * scratch, int32_t sz); + +void wmpn_toom32_mul(uint64_t * r, uint64_t * x, int32_t sx, uint64_t * y, + int32_t sy, uint64_t * scratch); + +void wmpn_mul(uint64_t * r, uint64_t * x, int32_t sx, uint64_t * y, + int32_t sy); + +void wmpn_mul_n(uint64_t * r, uint64_t * x, uint64_t * y, int32_t sz); + diff --git a/generated-src/lib/types.h b/generated-src/lib/types.h new file mode 100644 index 0000000..d8a7321 --- /dev/null +++ b/generated-src/lib/types.h @@ -0,0 +1,5 @@ +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <assert.h> +#include <alloca.h> diff --git a/generated-src/lib/uint64.h b/generated-src/lib/uint64.h new file mode 100644 index 0000000..f347ea5 --- /dev/null +++ b/generated-src/lib/uint64.h @@ -0,0 +1,6 @@ +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <assert.h> +#include <alloca.h> + diff --git a/generated-src/lib/uint64gmp.h b/generated-src/lib/uint64gmp.h new file mode 100644 index 0000000..1e911a4 --- /dev/null +++ b/generated-src/lib/uint64gmp.h @@ -0,0 +1,91 @@ +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <assert.h> +#include <alloca.h> + +typedef unsigned __int128 uint128_t; + +struct __mul64_double_result +{ uint64_t __field_0; + uint64_t __field_1; +}; + +static inline struct __mul64_double_result mul64_double(uint64_t x, uint64_t y) +{ + uint128_t z = (uint128_t)x * (uint128_t)y; + struct __mul64_double_result result = { z, z >> 64 }; + return result; +} + +static inline uint64_t div64_2by1(uint64_t ul, uint64_t uh, uint64_t d) +{ + return (((uint128_t)uh << 64) | ul) / d; +} + + +struct __add64_with_carry_result +{ uint64_t __field_0; + uint64_t __field_1; +}; + +static inline struct __add64_with_carry_result +add64_with_carry(uint64_t x, uint64_t y, uint64_t c) +{ + struct __add64_with_carry_result result; + uint64_t r = x + y + c; + result.__field_0 = r; + if (r == x) result.__field_1 = c; + else result.__field_1 = (r < x); + return result; +} + +struct __sub64_with_borrow_result +{ uint64_t __field_0; + uint64_t __field_1; +}; + +static inline struct __sub64_with_borrow_result +sub64_with_borrow(uint64_t x, uint64_t y, uint64_t b) +{ + struct __sub64_with_borrow_result result; + uint64_t r = x - y - b; + result.__field_0 = r; + if (r > x) result.__field_1 = 1; + else if (r == x) result.__field_1 = b; + else result.__field_1 = 0; + return result; +} + +struct __add64_3_result +{ uint64_t __field_0; + uint64_t __field_1; +}; + +static inline struct __add64_3_result add64_3(uint64_t x, uint64_t y, uint64_t z) +{ + struct __add64_3_result result; + uint64_t r, c1, c2; + r = x + y; + c1 = r < y; + r += z; + c2 = r < z; + result.__field_1 = c1 + c2; + result.__field_0 = r; + return result; +} + +struct __lsld64_result +{ uint64_t __field_0; + uint64_t __field_1; +}; + +static inline struct __lsld64_result lsld64(uint64_t x, uint64_t cnt) +{ + struct __lsld64_result result; + result.__field_1 = x >> (64 - cnt); + result.__field_0 = x << cnt; + return result; +} + + diff --git a/generated-src/lib/util.c b/generated-src/lib/util.c new file mode 100644 index 0000000..f58e6bc --- /dev/null +++ b/generated-src/lib/util.c @@ -0,0 +1,65 @@ +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <assert.h> +#include <alloca.h> +#include "int.h" + +#include "int32.h" + +#include "uint64gmp.h" + +#include "power.h" + +#include "c.h" + +#include "map.h" + +#include "types.h" + +void wmpn_copyi(uint64_t * r, uint64_t * x, int32_t sz) +{ + int32_t i; + uint64_t * xp; + uint64_t * rp; + i = 0; + xp = x + 0; + rp = r + 0; + while (i < sz) { + *rp = *xp; + rp = rp + 1; + xp = xp + 1; + i = i + 1; + } +} + +int32_t wmpn_zero_p(uint64_t * x, int32_t sz) +{ + int32_t i; + uint64_t uzero; + uint64_t lx; + i = sz; + uzero = UINT64_C(0); + lx = uzero; + while (i >= 1) { + i = i - 1; + lx = x[i]; + if (!(lx == uzero)) { + return 0; + } + } + return 1; +} + +void wmpn_zero(uint64_t * r, int32_t sz) +{ + int32_t i; + uint64_t lzero; + i = 0; + lzero = UINT64_C(0); + while (i < sz) { + r[i] = lzero; + i = i + 1; + } +} + diff --git a/generated-src/lib/util.h b/generated-src/lib/util.h new file mode 100644 index 0000000..a864c95 --- /dev/null +++ b/generated-src/lib/util.h @@ -0,0 +1,11 @@ +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <assert.h> +#include <alloca.h> +void wmpn_copyi(uint64_t * r, uint64_t * x, int32_t sz); + +int32_t wmpn_zero_p(uint64_t * x, int32_t sz); + +void wmpn_zero(uint64_t * r, int32_t sz); + diff --git a/generated-src/lib/valuation.h b/generated-src/lib/valuation.h new file mode 100644 index 0000000..f347ea5 --- /dev/null +++ b/generated-src/lib/valuation.h @@ -0,0 +1,6 @@ +#include <stdlib.h> +#include <stdint.h> +#include <stdio.h> +#include <assert.h> +#include <alloca.h> + diff --git a/generated-src/wmp.h b/generated-src/wmp.h new file mode 100644 index 0000000..13028c3 --- /dev/null +++ b/generated-src/wmp.h @@ -0,0 +1,57 @@ +#ifndef __WMP_H__ +#define __WMP_H__ + +#include <stdint.h> + +typedef int32_t wmp_size_t; +typedef uint64_t wmp_limb_t; +typedef wmp_limb_t *wmp_ptr; +typedef wmp_limb_t const *wmp_srcptr; + +#ifdef __cplusplus +extern "C" { +#endif + +int32_t wmpn_cmp (wmp_srcptr, wmp_srcptr, wmp_size_t); + +void wmpn_copyi (wmp_ptr, wmp_srcptr, wmp_size_t); + +int32_t wmpn_zero_p (wmp_srcptr, wmp_size_t); + +void wmpn_zero (wmp_ptr, wmp_size_t); + +wmp_limb_t wmpn_add (wmp_ptr, wmp_srcptr, wmp_size_t, wmp_srcptr, wmp_size_t); +wmp_limb_t wmpn_add_1 (wmp_ptr, wmp_srcptr, wmp_size_t, wmp_limb_t); +wmp_limb_t wmpn_add_n (wmp_ptr, wmp_srcptr, wmp_srcptr, wmp_size_t); +wmp_limb_t wmpn_add_in_place (wmp_ptr, wmp_size_t, wmp_srcptr, wmp_size_t); +wmp_limb_t wmpn_add_1_in_place (wmp_ptr, wmp_size_t, wmp_limb_t); + +wmp_limb_t wmpn_sub (wmp_ptr, wmp_srcptr, wmp_size_t, wmp_srcptr, wmp_size_t); +wmp_limb_t wmpn_sub_1 (wmp_ptr, wmp_srcptr, wmp_size_t, wmp_limb_t); +wmp_limb_t wmpn_sub_n (wmp_ptr, wmp_srcptr, wmp_srcptr, wmp_size_t); +wmp_limb_t wmpn_sub_in_place (wmp_ptr, wmp_size_t, wmp_srcptr, wmp_size_t); +wmp_limb_t wmpn_sub_1_in_place (wmp_ptr, wmp_size_t, wmp_limb_t); + +void wmpn_mul (wmp_ptr, wmp_srcptr, wmp_size_t, wmp_srcptr, wmp_size_t); +wmp_limb_t wmpn_mul_1 (wmp_ptr, wmp_srcptr, wmp_size_t, wmp_limb_t); +void wmpn_mul_n (wmp_ptr, wmp_srcptr, wmp_srcptr, wmp_size_t); + +wmp_limb_t wmpn_addmul_1 (wmp_ptr, wmp_srcptr, wmp_size_t, wmp_limb_t); +wmp_limb_t wmpn_submul_1 (wmp_ptr, wmp_srcptr, wmp_size_t, wmp_limb_t); + +wmp_limb_t wmpn_lshift (wmp_ptr, wmp_srcptr, wmp_size_t, uint64_t); +wmp_limb_t wmpn_rshift (wmp_ptr, wmp_srcptr, wmp_size_t, uint64_t); +wmp_limb_t wmpn_lshift_in_place (wmp_ptr, wmp_size_t, uint64_t); +wmp_limb_t wmpn_rshift_in_place (wmp_ptr, wmp_size_t, uint64_t); + +wmp_limb_t wmpn_divrem_1 (wmp_ptr, wmp_srcptr, wmp_size_t, wmp_limb_t); +void wmpn_tdiv_qr (wmp_ptr, wmp_ptr, wmp_srcptr, wmp_size_t, wmp_srcptr, wmp_size_t); +void wmpn_tdiv_qr_in_place (wmp_ptr, wmp_srcptr, wmp_size_t, wmp_srcptr, wmp_size_t); + +wmp_size_t wmpn_sqrtrem (wmp_ptr, wmp_ptr, wmp_srcptr, wmp_size_t); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/why3 b/why3 new file mode 160000 index 0000000..83bee6f --- /dev/null +++ b/why3 @@ -0,0 +1 @@ +Subproject commit 83bee6fa62b20de592920e1b163df0e5e52d6955 -- GitLab