From 8e9b3e1909999dfff043dc1ee906c58c5dfab225 Mon Sep 17 00:00:00 2001 From: Vincent Laporte Date: Mon, 23 Jun 2025 11:26:07 +0200 Subject: [PATCH] Update syntax of casts --- .../keccak1600/amd64/avx2/keccakf1600_4x.jinc | 2 +- .../keccak1600/amd64/bmi1/keccakf1600.jinc | 2 +- .../keccak1600/amd64/ref/keccakf1600.jinc | 2 +- .../keccak1600/amd64/ref1/keccakf1600.jinc | 2 +- .../crypto_hash/sha256/amd64/ref/sha256.jinc | 2 +- .../crypto_hash/sha512/amd64/ref/sha512.jinc | 2 +- .../kyber/common/amd64/ref/poly.jinc | 4 +- .../kyber/kyber512/amd64/avx2/gen_matrix.jinc | 12 +- .../kyber/kyber768/amd64/avx2/gen_matrix.jinc | 12 +- .../kyber/kyber768/amd64/ref/indcpa.jinc | 4 +- .../mlkem/mlkem768/amd64/avx2/gen_matrix.jinc | 22 +-- .../mlkem/mlkem768/amd64/avx2/indcpa.jinc | 4 +- .../amd64/avx2/keccak/keccakf1600.jinc | 2 +- .../keccak/keccakf1600_4x_avx2_compact.jinc | 2 +- .../mlkem/mlkem768/amd64/ref/fips202.jinc | 2 +- .../mlkem/mlkem768/amd64/ref/indcpa.jinc | 4 +- .../dilithium/common/amd64/avx2/ntt.jinc | 32 ++-- .../dilithium/common/amd64/packing.jinc | 148 +++++++++--------- .../dilithium/common/amd64/verify_end.jinc | 4 +- .../falcon512/amd64/avx2/butterfly.jinc | 136 ++++++++-------- .../falcon512/common/amd64/encode_decode.jinc | 4 +- 21 files changed, 202 insertions(+), 202 deletions(-) diff --git a/oldsrc-should-delete/common/keccak/keccak1600/amd64/avx2/keccakf1600_4x.jinc b/oldsrc-should-delete/common/keccak/keccak1600/amd64/avx2/keccakf1600_4x.jinc index aa0ac4a1..b65b140c 100644 --- a/oldsrc-should-delete/common/keccak/keccak1600/amd64/avx2/keccakf1600_4x.jinc +++ b/oldsrc-should-delete/common/keccak/keccak1600/amd64/avx2/keccakf1600_4x.jinc @@ -191,7 +191,7 @@ inline fn __keccakf1600_4x_avx2(reg ptr u256[25] a) -> reg ptr u256[25] rc = RC.[c]; e = __round_4x_avx2(e, a, rc, r8, r56); - rc = RC.[(int) c + 32]; + rc = RC.[(uint) c + 32]; a = __round_4x_avx2(a, e, rc, r8, r56); c += 64; diff --git a/oldsrc-should-delete/common/keccak/keccak1600/amd64/bmi1/keccakf1600.jinc b/oldsrc-should-delete/common/keccak/keccak1600/amd64/bmi1/keccakf1600.jinc index 18847ded..d39c939f 100644 --- a/oldsrc-should-delete/common/keccak/keccak1600/amd64/bmi1/keccakf1600.jinc +++ b/oldsrc-should-delete/common/keccak/keccak1600/amd64/bmi1/keccakf1600.jinc @@ -146,7 +146,7 @@ inline fn __keccakf1600_bmi1(reg ptr u64[25] a) -> reg ptr u64[25] e = __round_bmi1(e, a, rc); RC = s_RC; - rc = RC[(int) c + 1]; + rc = RC[(uint) c + 1]; a = __round_bmi1(a, e, rc); c += 2; diff --git a/oldsrc-should-delete/common/keccak/keccak1600/amd64/ref/keccakf1600.jinc b/oldsrc-should-delete/common/keccak/keccak1600/amd64/ref/keccakf1600.jinc index f9cf8daf..4aeac333 100644 --- a/oldsrc-should-delete/common/keccak/keccak1600/amd64/ref/keccakf1600.jinc +++ b/oldsrc-should-delete/common/keccak/keccak1600/amd64/ref/keccakf1600.jinc @@ -139,7 +139,7 @@ inline fn __keccakf1600_ref(stack u64[25] a) -> stack u64[25] rc = RC[c]; e = __round_ref(a, rc); - rc = RC[(int) c + 1]; + rc = RC[(uint) c + 1]; a = __round_ref(e, rc); c += 2; diff --git a/oldsrc-should-delete/common/keccak/keccak1600/amd64/ref1/keccakf1600.jinc b/oldsrc-should-delete/common/keccak/keccak1600/amd64/ref1/keccakf1600.jinc index 6c691d18..fdb575b4 100644 --- a/oldsrc-should-delete/common/keccak/keccak1600/amd64/ref1/keccakf1600.jinc +++ b/oldsrc-should-delete/common/keccak/keccak1600/amd64/ref1/keccakf1600.jinc @@ -144,7 +144,7 @@ inline fn __keccakf1600_ref1(reg ptr u64[25] a) -> reg ptr u64[25] rc = RC[c]; e = __round_ref1(e, a, rc); - rc = RC[(int) c + 1]; + rc = RC[(uint) c + 1]; a = __round_ref1(a, e, rc); c += 2; diff --git a/oldsrc-should-delete/crypto_hash/sha256/amd64/ref/sha256.jinc b/oldsrc-should-delete/crypto_hash/sha256/amd64/ref/sha256.jinc index 6f731ddd..c45cde70 100644 --- a/oldsrc-should-delete/crypto_hash/sha256/amd64/ref/sha256.jinc +++ b/oldsrc-should-delete/crypto_hash/sha256/amd64/ref/sha256.jinc @@ -290,7 +290,7 @@ fn _blocks_1_ref(reg ptr u32[8] _H, reg ptr u32[32] sblocks, reg u64 nblocks) -> s_i = i; oblocks = i << 4; for t=0 to 16 - { v = sblocks[(int)oblocks + t]; + { v = sblocks[(uint)oblocks + t]; v = #BSWAP_32(v); W[t] = v; } diff --git a/oldsrc-should-delete/crypto_hash/sha512/amd64/ref/sha512.jinc b/oldsrc-should-delete/crypto_hash/sha512/amd64/ref/sha512.jinc index 0c413d90..72279bf7 100644 --- a/oldsrc-should-delete/crypto_hash/sha512/amd64/ref/sha512.jinc +++ b/oldsrc-should-delete/crypto_hash/sha512/amd64/ref/sha512.jinc @@ -290,7 +290,7 @@ fn _blocks_1_ref(reg ptr u64[8] _H, reg ptr u64[32] sblocks, reg u64 nblocks) -> s_i = i; oblocks = i << 4; for t=0 to 16 - { v = sblocks[(int)oblocks + t]; + { v = sblocks[(uint)oblocks + t]; v = #BSWAP_64(v); W[t] = v; } diff --git a/oldsrc-should-delete/crypto_kem/kyber/common/amd64/ref/poly.jinc b/oldsrc-should-delete/crypto_kem/kyber/common/amd64/ref/poly.jinc index ce7cc489..fe991d2b 100644 --- a/oldsrc-should-delete/crypto_kem/kyber/common/amd64/ref/poly.jinc +++ b/oldsrc-should-delete/crypto_kem/kyber/common/amd64/ref/poly.jinc @@ -464,11 +464,11 @@ fn __load24(reg ptr u8[KYBER_ETA1*KYBER_N/4] buf, reg u64 i) -> reg u32 r = (32u)buf[i]; - t = (32u)buf[(int)i+1]; + t = (32u)buf[(uint)i+1]; t <<= 8; r |= t; - t = (32u)buf[(int)i+2]; + t = (32u)buf[(uint)i+2]; t <<= 16; r |= t; diff --git a/oldsrc-should-delete/crypto_kem/kyber/kyber512/amd64/avx2/gen_matrix.jinc b/oldsrc-should-delete/crypto_kem/kyber/kyber512/amd64/avx2/gen_matrix.jinc index 972a6f6f..1fd33970 100644 --- a/oldsrc-should-delete/crypto_kem/kyber/kyber512/amd64/avx2/gen_matrix.jinc +++ b/oldsrc-should-delete/crypto_kem/kyber/kyber512/amd64/avx2/gen_matrix.jinc @@ -381,7 +381,7 @@ fn _rej_uniform_avx(reg ptr u16[KYBER_N] rp, reg ptr u8[REJ_UNIFORM_AVX_BUFLEN] while(!b) { f0 = #VPERMQ(buf.[:u256 pos], 0x94); - f1 = #VPERMQ(buf.[:u256 24 + (int)pos], 0x94); + f1 = #VPERMQ(buf.[:u256 24 + (uint)pos], 0x94); f0 = #VPSHUFB_256(f0, idx8); f1 = #VPSHUFB_256(f1, idx8); g0 = #VPSRL_16u16(f0, 4); @@ -438,10 +438,10 @@ fn _rej_uniform_avx(reg ptr u16[KYBER_N] rp, reg ptr u8[REJ_UNIFORM_AVX_BUFLEN] f0 = #VPSHUFB_256(f0, g0); f1 = #VPSHUFB_256(f1, g1); - rp.[:u128 2*(int)ctr] = (128u)f0; - rp.[:u128 2*(int)t64] = #VEXTRACTI128(f0, 1); - rp.[:u128 2*(int)t64_1] = (128u)f1; - rp.[:u128 2*(int)t64_2] = #VEXTRACTI128(f1, 1); + rp.[:u128 2*(uint)ctr] = (128u)f0; + rp.[:u128 2*(uint)t64] = #VEXTRACTI128(f0, 1); + rp.[:u128 2*(uint)t64_1] = (128u)f1; + rp.[:u128 2*(uint)t64_2] = #VEXTRACTI128(f1, 1); ctr = t64_3; @@ -483,7 +483,7 @@ fn _rej_uniform_avx(reg ptr u16[KYBER_N] rp, reg ptr u8[REJ_UNIFORM_AVX_BUFLEN] l = #VPUNPCKL_16u8(l, h); f = #VPSHUFB_128(f, l); - rp.[:u128 2*(int)ctr] = f; + rp.[:u128 2*(uint)ctr] = f; ctr += good; pos += 12; diff --git a/oldsrc-should-delete/crypto_kem/kyber/kyber768/amd64/avx2/gen_matrix.jinc b/oldsrc-should-delete/crypto_kem/kyber/kyber768/amd64/avx2/gen_matrix.jinc index 47a3a201..21767ad0 100644 --- a/oldsrc-should-delete/crypto_kem/kyber/kyber768/amd64/avx2/gen_matrix.jinc +++ b/oldsrc-should-delete/crypto_kem/kyber/kyber768/amd64/avx2/gen_matrix.jinc @@ -394,7 +394,7 @@ fn _rej_uniform_avx(reg ptr u16[KYBER_N] rp, reg ptr u8[REJ_UNIFORM_AVX_BUFLEN] while(!b) { f0 = #VPERMQ(buf.[:u256 pos], 0x94); - f1 = #VPERMQ(buf.[:u256 24 + (int)pos], 0x94); + f1 = #VPERMQ(buf.[:u256 24 + (uint)pos], 0x94); f0 = #VPSHUFB_256(f0, idx8); f1 = #VPSHUFB_256(f1, idx8); g0 = #VPSRL_16u16(f0, 4); @@ -451,10 +451,10 @@ fn _rej_uniform_avx(reg ptr u16[KYBER_N] rp, reg ptr u8[REJ_UNIFORM_AVX_BUFLEN] f0 = #VPSHUFB_256(f0, g0); f1 = #VPSHUFB_256(f1, g1); - rp.[:u128 2*(int)ctr] = (128u)f0; - rp.[:u128 2*(int)t64] = #VEXTRACTI128(f0, 1); - rp.[:u128 2*(int)t64_1] = (128u)f1; - rp.[:u128 2*(int)t64_2] = #VEXTRACTI128(f1, 1); + rp.[:u128 2*(uint)ctr] = (128u)f0; + rp.[:u128 2*(uint)t64] = #VEXTRACTI128(f0, 1); + rp.[:u128 2*(uint)t64_1] = (128u)f1; + rp.[:u128 2*(uint)t64_2] = #VEXTRACTI128(f1, 1); ctr = t64_3; @@ -496,7 +496,7 @@ fn _rej_uniform_avx(reg ptr u16[KYBER_N] rp, reg ptr u8[REJ_UNIFORM_AVX_BUFLEN] l = #VPUNPCKL_16u8(l, h); f = #VPSHUFB_128(f, l); - rp.[:u128 2*(int)ctr] = f; + rp.[:u128 2*(uint)ctr] = f; ctr += good; pos += 12; diff --git a/oldsrc-should-delete/crypto_kem/kyber/kyber768/amd64/ref/indcpa.jinc b/oldsrc-should-delete/crypto_kem/kyber/kyber768/amd64/ref/indcpa.jinc index 89e9133a..e5bbec91 100644 --- a/oldsrc-should-delete/crypto_kem/kyber/kyber768/amd64/ref/indcpa.jinc +++ b/oldsrc-should-delete/crypto_kem/kyber/kyber768/amd64/ref/indcpa.jinc @@ -109,7 +109,7 @@ fn __indcpa_enc(stack u64 sctp, reg ptr u8[32] msgp, reg u64 pkp, reg ptr u8[KYB while (i < KYBER_SYMBYTES/8) { t64 = [:u64 pkp]; - publicseed.[:u64 8 * (int)i] = t64; + publicseed.[:u64 8 * (uint)i] = t64; pkp += 8; i += 1; } @@ -193,7 +193,7 @@ fn __iindcpa_enc(reg ptr u8[KYBER_CT_LEN] ctp, reg ptr u8[32] msgp, reg u64 pkp, while (i < KYBER_SYMBYTES/8) { t64 = [:u64 pkp]; - publicseed.[:u64 8*(int)i] = t64; + publicseed.[:u64 8*(uint)i] = t64; pkp += 8; i += 1; } diff --git a/oldsrc-should-delete/crypto_kem/mlkem/mlkem768/amd64/avx2/gen_matrix.jinc b/oldsrc-should-delete/crypto_kem/mlkem/mlkem768/amd64/avx2/gen_matrix.jinc index e8c0a571..4e7ab086 100644 --- a/oldsrc-should-delete/crypto_kem/mlkem/mlkem768/amd64/avx2/gen_matrix.jinc +++ b/oldsrc-should-delete/crypto_kem/mlkem/mlkem768/amd64/avx2/gen_matrix.jinc @@ -343,8 +343,8 @@ inline fn __gen_matrix_buf_rejection_filter48 // - so the last 8 bytes will be discarded: // - 0x01aaaaaaaaaaaa08, 0x01bbbbbbbbbbbb08, 0x01bbbbbbbbbbbb08, 0x01cccccccccccc08 - f0 = #VPERMQ(buf.[:u256 (int) buf_offset + 0 ], (4u2)[2,1,1,0]); - f1 = #VPERMQ(buf.[:u256 (int) buf_offset + 24], (4u2)[2,1,1,0]); + f0 = #VPERMQ(buf.[:u256 (uint) buf_offset + 0 ], (4u2)[2,1,1,0]); + f1 = #VPERMQ(buf.[:u256 (uint) buf_offset + 24], (4u2)[2,1,1,0]); // next, the data is shuffled at byte level. For a given state (in u64s): // - 0xa8a7a6a5a4a3a2a1, 0xb8b7b6b5b4b3b2b1, 0xc8c7c6c5c4c3c2c1, 0xd8d7d6d5d4d3d2d1 @@ -487,7 +487,7 @@ inline fn __write_u128_boundchk if ( condition_8 ) { ms = #update_msf(condition_8, ms); - pol.[:u128 2*(int)ctr] = data; + pol.[:u128 2*(uint)ctr] = data; ctr += 8; } else { @@ -499,7 +499,7 @@ inline fn __write_u128_boundchk if ( condition_4 ) { ms = #update_msf(condition_4, ms); - pol.[:u64 2*(int)ctr] = data_u64; + pol.[:u64 2*(uint)ctr] = data_u64; data_u64 = #VPEXTR_64(data, 1); ctr += 4; } else @@ -509,7 +509,7 @@ inline fn __write_u128_boundchk if ( condition_2 ) { ms = #update_msf(condition_2, ms); - pol.[:u32 2*(int)ctr] = (32u) data_u64; + pol.[:u32 2*(uint)ctr] = (32u) data_u64; data_u64 >>= 32; ctr += 2; } else @@ -519,7 +519,7 @@ inline fn __write_u128_boundchk if ( condition_1 ) { ms = #update_msf(condition_1, ms); - pol.[:u16 2*(int)ctr] = (16u) data_u64; + pol.[:u16 2*(uint)ctr] = (16u) data_u64; ctr += 1; } else { ms = #update_msf(!condition_1, ms); } @@ -545,7 +545,7 @@ inline fn __gen_matrix_buf_rejection_filter24 reg u128 shuffle_0_1 t128; reg u64 good t0_0 t0_1; - f0 = #VPERMQ(buf.[:u256 (int) buf_offset + 0 ], (4u2)[2,1,1,0]); + f0 = #VPERMQ(buf.[:u256 (uint) buf_offset + 0 ], (4u2)[2,1,1,0]); f0 = #VPSHUFB_256(f0, load_shuffle); g0 = #VPSRL_16u16(f0, 4); f0 = #VPBLEND_16u16(f0, g0, 0xAA); @@ -661,10 +661,10 @@ inline fn gen_matrix_get_indexes( t = _t; t <<= 3; // t * 8 b += t; - idx[0] = gmi[(int) b + 0]; - idx[1] = gmi[(int) b + 1]; - idx[2] = gmi[(int) b + 2]; - idx[3] = gmi[(int) b + 3]; + idx[0] = gmi[(uint) b + 0]; + idx[1] = gmi[(uint) b + 1]; + idx[2] = gmi[(uint) b + 2]; + idx[3] = gmi[(uint) b + 3]; return idx; } diff --git a/oldsrc-should-delete/crypto_kem/mlkem/mlkem768/amd64/avx2/indcpa.jinc b/oldsrc-should-delete/crypto_kem/mlkem/mlkem768/amd64/avx2/indcpa.jinc index 6bfb79eb..d1024568 100644 --- a/oldsrc-should-delete/crypto_kem/mlkem/mlkem768/amd64/avx2/indcpa.jinc +++ b/oldsrc-should-delete/crypto_kem/mlkem/mlkem768/amd64/avx2/indcpa.jinc @@ -92,7 +92,7 @@ fn __indcpa_enc_0(#mmx reg u64 sctp, reg ptr u8[MLKEM_INDCPA_MSGBYTES] msgp, reg while (i < MLKEM_SYMBYTES/8) { #declassify t64 = [:u64 pkp]; - publicseed.[:u64 8 * (int)i] = t64; + publicseed.[:u64 8 * (uint)i] = t64; pkp += 8; i += 1; } @@ -164,7 +164,7 @@ fn __indcpa_enc_1( while (i < MLKEM_SYMBYTES/8) { #declassify t64 = [:u64 pkp]; - publicseed.[:u64 8*(int)i] = t64; + publicseed.[:u64 8*(uint)i] = t64; pkp += 8; i += 1; } diff --git a/oldsrc-should-delete/crypto_kem/mlkem/mlkem768/amd64/avx2/keccak/keccakf1600.jinc b/oldsrc-should-delete/crypto_kem/mlkem/mlkem768/amd64/avx2/keccak/keccakf1600.jinc index e05886c4..ddf38bb9 100644 --- a/oldsrc-should-delete/crypto_kem/mlkem/mlkem768/amd64/avx2/keccak/keccakf1600.jinc +++ b/oldsrc-should-delete/crypto_kem/mlkem/mlkem768/amd64/avx2/keccak/keccakf1600.jinc @@ -145,7 +145,7 @@ inline fn __keccakf1600(reg ptr u64[25] a) -> reg ptr u64[25] rc = RC[c]; e = keccakf1600_round(e, a, rc); - rc = RC[(int) c + 1]; + rc = RC[(uint) c + 1]; a = keccakf1600_round(a, e, rc); c += 2; diff --git a/oldsrc-should-delete/crypto_kem/mlkem/mlkem768/amd64/avx2/keccak/keccakf1600_4x_avx2_compact.jinc b/oldsrc-should-delete/crypto_kem/mlkem/mlkem768/amd64/avx2/keccak/keccakf1600_4x_avx2_compact.jinc index 83742c1a..9652cf41 100644 --- a/oldsrc-should-delete/crypto_kem/mlkem/mlkem768/amd64/avx2/keccak/keccakf1600_4x_avx2_compact.jinc +++ b/oldsrc-should-delete/crypto_kem/mlkem/mlkem768/amd64/avx2/keccak/keccakf1600_4x_avx2_compact.jinc @@ -197,7 +197,7 @@ inline fn __keccakf1600_4x(reg ptr u256[25] a) -> reg ptr u256[25] a_s = a; s_e = e; a = a_s; e = s_e; - rc = RC.[(int) c + 32]; + rc = RC.[(uint) c + 32]; a = keccakf1600_4x_round(a, e, rc, r8, r56); // just an expensive pointer swap (#todo request feature) diff --git a/oldsrc-should-delete/crypto_kem/mlkem/mlkem768/amd64/ref/fips202.jinc b/oldsrc-should-delete/crypto_kem/mlkem/mlkem768/amd64/ref/fips202.jinc index 5ab544a5..5f780647 100644 --- a/oldsrc-should-delete/crypto_kem/mlkem/mlkem768/amd64/ref/fips202.jinc +++ b/oldsrc-should-delete/crypto_kem/mlkem/mlkem768/amd64/ref/fips202.jinc @@ -211,7 +211,7 @@ inline fn __keccakf1600(reg ptr u64[25] a) -> reg ptr u64[25] rc = RC[c]; e = keccakf1600_round(e, a, rc); - rc = RC[(int) c + 1]; + rc = RC[(uint) c + 1]; a = keccakf1600_round(a, e, rc); c += 2; diff --git a/oldsrc-should-delete/crypto_kem/mlkem/mlkem768/amd64/ref/indcpa.jinc b/oldsrc-should-delete/crypto_kem/mlkem/mlkem768/amd64/ref/indcpa.jinc index 8e7a489f..e3cd1cf5 100644 --- a/oldsrc-should-delete/crypto_kem/mlkem/mlkem768/amd64/ref/indcpa.jinc +++ b/oldsrc-should-delete/crypto_kem/mlkem/mlkem768/amd64/ref/indcpa.jinc @@ -106,7 +106,7 @@ fn __indcpa_enc(#mmx reg u64 sctp, reg ptr u8[32] msgp, reg u64 pkp, reg ptr u8[ while (i < MLKEM_SYMBYTES/8) { #declassify t64 = [:u64 pkp]; - publicseed.[:u64 8 * (int)i] = t64; + publicseed.[:u64 8 * (uint)i] = t64; pkp += 8; i += 1; } @@ -182,7 +182,7 @@ fn __iindcpa_enc(reg ptr u8[MLKEM_CT_LEN] ctp, reg ptr u8[32] msgp, reg u64 pkp, while (i < MLKEM_SYMBYTES/8) { #declassify t64 = [:u64 pkp]; - publicseed.[:u64 8*(int)i] = t64; + publicseed.[:u64 8*(uint)i] = t64; pkp += 8; i += 1; } diff --git a/oldsrc-should-delete/crypto_sign/dilithium/common/amd64/avx2/ntt.jinc b/oldsrc-should-delete/crypto_sign/dilithium/common/amd64/avx2/ntt.jinc index 66c2ea06..e690bcec 100644 --- a/oldsrc-should-delete/crypto_sign/dilithium/common/amd64/avx2/ntt.jinc +++ b/oldsrc-should-delete/crypto_sign/dilithium/common/amd64/avx2/ntt.jinc @@ -670,14 +670,14 @@ fn invntt_tomont_avx2(reg ptr u32[256] poly) -> reg ptr u32[256] { fn ntt_transpose_inner_avx2(reg ptr u32[256] poly_ptr, reg u64 offset) -> reg ptr u32[256] { reg u256 ymm3 ymm4 ymm5 ymm6 ymm7 ymm8 ymm9 ymm10 ymm11; - ymm4 = #VMOVDQU_256(poly_ptr.[:u256 32 * 0 + (int) offset]); - ymm5 = #VMOVDQU_256(poly_ptr.[:u256 32 * 1 + (int) offset]); - ymm6 = #VMOVDQU_256(poly_ptr.[:u256 32 * 2 + (int) offset]); - ymm7 = #VMOVDQU_256(poly_ptr.[:u256 32 * 3 + (int) offset]); - ymm8 = #VMOVDQU_256(poly_ptr.[:u256 32 * 4 + (int) offset]); - ymm9 = #VMOVDQU_256(poly_ptr.[:u256 32 * 5 + (int) offset]); - ymm10 = #VMOVDQU_256(poly_ptr.[:u256 32 * 6 + (int) offset]); - ymm11 = #VMOVDQU_256(poly_ptr.[:u256 32 * 7 + (int) offset]); + ymm4 = #VMOVDQU_256(poly_ptr.[:u256 32 * 0 + (uint) offset]); + ymm5 = #VMOVDQU_256(poly_ptr.[:u256 32 * 1 + (uint) offset]); + ymm6 = #VMOVDQU_256(poly_ptr.[:u256 32 * 2 + (uint) offset]); + ymm7 = #VMOVDQU_256(poly_ptr.[:u256 32 * 3 + (uint) offset]); + ymm8 = #VMOVDQU_256(poly_ptr.[:u256 32 * 4 + (uint) offset]); + ymm9 = #VMOVDQU_256(poly_ptr.[:u256 32 * 5 + (uint) offset]); + ymm10 = #VMOVDQU_256(poly_ptr.[:u256 32 * 6 + (uint) offset]); + ymm11 = #VMOVDQU_256(poly_ptr.[:u256 32 * 7 + (uint) offset]); ymm3, ymm8 = shuffle8(ymm4, ymm8); ymm4, ymm9 = shuffle8(ymm5, ymm9); @@ -694,14 +694,14 @@ fn ntt_transpose_inner_avx2(reg ptr u32[256] poly_ptr, reg u64 offset) -> reg pt ymm5, ymm4 = shuffle2(ymm3, ymm4); ymm3, ymm11 = shuffle2(ymm10, ymm11); - #VMOVDQU_256(poly_ptr.[:u256 32 * 0 + (int) offset]) = ymm9; - #VMOVDQU_256(poly_ptr.[:u256 32 * 1 + (int) offset]) = ymm8; - #VMOVDQU_256(poly_ptr.[:u256 32 * 2 + (int) offset]) = ymm7; - #VMOVDQU_256(poly_ptr.[:u256 32 * 3 + (int) offset]) = ymm6; - #VMOVDQU_256(poly_ptr.[:u256 32 * 4 + (int) offset]) = ymm5; - #VMOVDQU_256(poly_ptr.[:u256 32 * 5 + (int) offset]) = ymm4; - #VMOVDQU_256(poly_ptr.[:u256 32 * 6 + (int) offset]) = ymm3; - #VMOVDQU_256(poly_ptr.[:u256 32 * 7 + (int) offset]) = ymm11; + #VMOVDQU_256(poly_ptr.[:u256 32 * 0 + (uint) offset]) = ymm9; + #VMOVDQU_256(poly_ptr.[:u256 32 * 1 + (uint) offset]) = ymm8; + #VMOVDQU_256(poly_ptr.[:u256 32 * 2 + (uint) offset]) = ymm7; + #VMOVDQU_256(poly_ptr.[:u256 32 * 3 + (uint) offset]) = ymm6; + #VMOVDQU_256(poly_ptr.[:u256 32 * 4 + (uint) offset]) = ymm5; + #VMOVDQU_256(poly_ptr.[:u256 32 * 5 + (uint) offset]) = ymm4; + #VMOVDQU_256(poly_ptr.[:u256 32 * 6 + (uint) offset]) = ymm3; + #VMOVDQU_256(poly_ptr.[:u256 32 * 7 + (uint) offset]) = ymm11; return poly_ptr; } diff --git a/oldsrc-should-delete/crypto_sign/dilithium/common/amd64/packing.jinc b/oldsrc-should-delete/crypto_sign/dilithium/common/amd64/packing.jinc index 8251a05f..511e7e14 100644 --- a/oldsrc-should-delete/crypto_sign/dilithium/common/amd64/packing.jinc +++ b/oldsrc-should-delete/crypto_sign/dilithium/common/amd64/packing.jinc @@ -32,44 +32,44 @@ fn polyeta_2_pack(reg ptr u32[Li2_polydeg] s1, reg ptr u8[Li2_pack_eta_2_len] de dest0 = _eta; // t0 dest0 -= s1[i]; t1 = _eta; - t1 -= s1[(int) i + 1]; + t1 -= s1[(uint) i + 1]; t1 <<= 3; dest0 |= t1; t2 = _eta; - t2 -= s1[(int) i + 2]; + t2 -= s1[(uint) i + 2]; dest1 = t2; t2 <<= 6; dest0 |= t2; - dest[(int) addr + 0] = (8u) dest0; + dest[(uint) addr + 0] = (8u) dest0; // r[3*i+1] = (t[2] >> 2) | (t[3] << 1) | (t[4] << 4) | (t[5] << 7); dest1 >>= 2; t3 = _eta; - t3 -= s1[(int) i + 3]; + t3 -= s1[(uint) i + 3]; t3 += t3; dest1 |= t3; t4 = _eta; - t4 -= s1[(int) i + 4]; + t4 -= s1[(uint) i + 4]; t4 <<= 4; dest1 |= t4; t5 = _eta; - t5 -= s1[(int) i + 5]; + t5 -= s1[(uint) i + 5]; dest2 = t5; t5 <<= 7; dest1 |= t5; - dest[(int) addr + 1] = (8u) dest1; + dest[(uint) addr + 1] = (8u) dest1; // r[3*i+2] = (t[5] >> 1) | (t[6] << 2) | (t[7] << 5); dest2 >>= 1; t6 = _eta; - t6 -= s1[(int) i + 6]; + t6 -= s1[(uint) i + 6]; t6 <<= 2; dest2 |= t6; t7 = _eta; - t7 -= s1[(int) i + 7]; + t7 -= s1[(uint) i + 7]; t7 <<= 5; dest2 |= t7; - dest[(int) addr + 2] = (8u) dest2; + dest[(uint) addr + 2] = (8u) dest2; i += 8; } @@ -93,7 +93,7 @@ fn polyeta_4_pack(reg ptr u32[Li2_polydeg] s1, reg ptr u8[Li2_pack_eta_4_len] de lo = _eta; lo -= coeff; - coeff = s1[(int) i + 1]; + coeff = s1[(uint) i + 1]; hi = _eta; hi -= coeff; @@ -125,9 +125,9 @@ fn polyeta_2_unpack(reg ptr u32[Li2_polydeg] r, reg ptr u8[Li2_pack_eta_2_len] a ?{}, dest_addr = #set0_64(); while (i < Li2_polydeg / 8) { // r->coeffs[8*i+0] = (a[3*i+0] >> 0) & 7; - a0 = (32u) a[(int) src_addr + 0]; - a1 = (32u) a[(int) src_addr + 1]; - a2 = (32u) a[(int) src_addr + 2]; + a0 = (32u) a[(uint) src_addr + 0]; + a1 = (32u) a[(uint) src_addr + 1]; + a2 = (32u) a[(uint) src_addr + 2]; // r->coeffs[8*i+0] = ETA - r->coeffs[8*i+0]; c32_0 = a0; c32_0 &= 0x7; @@ -141,7 +141,7 @@ fn polyeta_2_unpack(reg ptr u32[Li2_polydeg] r, reg ptr u8[Li2_pack_eta_2_len] a // r->coeffs[8*i+1] = ETA - r->coeffs[8*i+1]; coeff = _eta; coeff -= c32_0; - r[(int) dest_addr + 1] = coeff; + r[(uint) dest_addr + 1] = coeff; // r->coeffs[8*i+2] = ((a[3*i+0] >> 6) | (a[3*i+1] << 2)) & 7; c32_0 = a0 >> 6; c32_0 = #LEA_32(4 * a1 + c32_0); @@ -149,7 +149,7 @@ fn polyeta_2_unpack(reg ptr u32[Li2_polydeg] r, reg ptr u8[Li2_pack_eta_2_len] a // r->coeffs[8*i+2] = ETA - r->coeffs[8*i+2]; coeff = _eta; coeff -= c32_0; - r[(int) dest_addr + 2] = coeff; + r[(uint) dest_addr + 2] = coeff; // r->coeffs[8*i+3] = (a[3*i+1] >> 1) & 7; c32_0 = a1; c32_0 >>= 1; @@ -157,7 +157,7 @@ fn polyeta_2_unpack(reg ptr u32[Li2_polydeg] r, reg ptr u8[Li2_pack_eta_2_len] a // r->coeffs[8*i+3] = ETA - r->coeffs[8*i+3]; coeff = _eta; coeff -= c32_0; - r[(int) dest_addr + 3] = coeff; + r[(uint) dest_addr + 3] = coeff; // r->coeffs[8*i+4] = (a[3*i+1] >> 4) & 7; c32_0 = a1; c32_0 >>= 4; @@ -165,7 +165,7 @@ fn polyeta_2_unpack(reg ptr u32[Li2_polydeg] r, reg ptr u8[Li2_pack_eta_2_len] a // r->coeffs[8*i+4] = ETA - r->coeffs[8*i+4]; coeff = _eta; coeff -= c32_0; - r[(int) dest_addr + 4] = coeff; + r[(uint) dest_addr + 4] = coeff; // r->coeffs[8*i+5] = ((a[3*i+1] >> 7) | (a[3*i+2] << 1)) & 7; c32_0 = a1 >> 7; c32_0 = #LEA_32(2 * a2 + c32_0); @@ -173,7 +173,7 @@ fn polyeta_2_unpack(reg ptr u32[Li2_polydeg] r, reg ptr u8[Li2_pack_eta_2_len] a // r->coeffs[8*i+5] = ETA - r->coeffs[8*i+5]; coeff = _eta; coeff -= c32_0; - r[(int) dest_addr + 5] = coeff; + r[(uint) dest_addr + 5] = coeff; // r->coeffs[8*i+6] = (a[3*i+2] >> 2) & 7; c32_0 = a2; c32_0 >>= 2; @@ -181,14 +181,14 @@ fn polyeta_2_unpack(reg ptr u32[Li2_polydeg] r, reg ptr u8[Li2_pack_eta_2_len] a // r->coeffs[8*i+6] = ETA - r->coeffs[8*i+6]; coeff = _eta; coeff -= c32_0; - r[(int) dest_addr + 6] = coeff; + r[(uint) dest_addr + 6] = coeff; // r->coeffs[8*i+7] = (a[3*i+2] >> 5) & 7; c32_0 = a2 >> 5; c32_0 &= 0x7; // r->coeffs[8*i+7] = ETA - r->coeffs[8*i+7]; coeff = _eta; coeff -= c32_0; - r[(int) dest_addr + 7] = coeff; + r[(uint) dest_addr + 7] = coeff; i += 1; src_addr += 3; @@ -213,7 +213,7 @@ fn polyeta_4_unpack(reg ptr u32[Li2_polydeg] r, reg ptr u8[Li2_pack_eta_4_len] a // r->coeffs[2*i+0] = ETA - r->coeffs[2*i+0]; x = -x; x += _eta; - r[2 * ((int) i)] = x; + r[2 * ((uint) i)] = x; // r->coeffs[2*i+1] = a[i] >> 4; x = (32u) a[i]; @@ -221,7 +221,7 @@ fn polyeta_4_unpack(reg ptr u32[Li2_polydeg] r, reg ptr u8[Li2_pack_eta_4_len] a // r->coeffs[2*i+1] = ETA - r->coeffs[2*i+1]; x = -x; x += _eta; - r[2 * ((int) i) + 1] = x; + r[2 * ((uint) i) + 1] = x; i += 1; } @@ -243,59 +243,59 @@ fn polyz_pack_gamma1_217(reg ptr u8[Li2_pack_zlen_gamma1_217] r, reg ptr u32[Li2 while (i < Li2_polydeg / 4) { // t[0] = GAMMA1 - a->coeffs[4*i+0]; t0 = gamma1; - t0 -= a[(int) src_addr + 0]; + t0 -= a[(uint) src_addr + 0]; // r[9*i+0] = t[0]; - r[(int) dest_addr + 0] = (8u) t0; + r[(uint) dest_addr + 0] = (8u) t0; // r[9*i+1] = t[0] >> 8; c32_0 = t0; c32_0 >>= 8; - r[(int) dest_addr + 1] = (8u) c32_0; + r[(uint) dest_addr + 1] = (8u) c32_0; // r[9*i+2] = t[0] >> 16; c32_0 = t0 >> 16; // t[1] = GAMMA1 - a->coeffs[4*i+1]; t1 = gamma1; - t1 -= a[(int) src_addr + 1]; + t1 -= a[(uint) src_addr + 1]; // r[9*i+2] |= t[1] << 2; c32_1 = t1; c32_1 <<= 2; // TODO: opt c32_0 |= c32_1; - r[(int) dest_addr + 2] = (8u) c32_0; + r[(uint) dest_addr + 2] = (8u) c32_0; // r[9*i+3] = t[1] >> 6; c32_0 = t1; c32_0 >>= 6; - r[(int) dest_addr + 3] = (8u) c32_0; + r[(uint) dest_addr + 3] = (8u) c32_0; // r[9*i+4] = t[1] >> 14; c32_0 = t1 >> 14; // t[2] = GAMMA1 - a->coeffs[4*i+2]; t2 = gamma1; - t2 -= a[(int) src_addr + 2]; + t2 -= a[(uint) src_addr + 2]; // r[9*i+4] |= t[2] << 4; c32_1 = t2; c32_1 <<= 4; c32_0 |= c32_1; - r[(int) dest_addr + 4] = (8u) c32_0; + r[(uint) dest_addr + 4] = (8u) c32_0; // r[9*i+5] = t[2] >> 4; c32_0 = t2; c32_0 >>= 4; - r[(int) dest_addr + 5] = (8u) c32_0; + r[(uint) dest_addr + 5] = (8u) c32_0; // r[9*i+6] = t[2] >> 12; c32_0 = t2; c32_0 >>= 12; // t[3] = GAMMA1 - a->coeffs[4*i+3]; t3 = gamma1; - t3 -= a[(int) src_addr + 3]; + t3 -= a[(uint) src_addr + 3]; // r[9*i+6] |= t[3] << 6; c32_1 = t3; c32_1 <<= 6; c32_0 |= c32_1; - r[(int) dest_addr + 6] = (8u) c32_0; + r[(uint) dest_addr + 6] = (8u) c32_0; // r[9*i+7] = t[3] >> 2; c32_0 = t3; c32_0 >>= 2; - r[(int) dest_addr + 7] = (8u) c32_0; + r[(uint) dest_addr + 7] = (8u) c32_0; // r[9*i+8] = t[3] >> 10; c32_0 = t3 >> 10; - r[(int) dest_addr + 8] = (8u) c32_0; + r[(uint) dest_addr + 8] = (8u) c32_0; i += 1; src_addr += 4; @@ -316,15 +316,15 @@ fn polyz_unpack_gamma1_217(reg ptr u32[Li2_polydeg] r, reg ptr u8[Li2_pack_zlen_ out_addr = #LEA_64(4*i); // r->coeffs[4*i+0] = a[9*i+0]; - x = (32u) a[(int) in_addr + 0]; + x = (32u) a[(uint) in_addr + 0]; // r->coeffs[4*i+0] |= (uint32_t)a[9*i+1] << 8; - c32 = (32u) a[(int) in_addr + 1]; + c32 = (32u) a[(uint) in_addr + 1]; c32 <<= 8; x |= c32; // r->coeffs[4*i+0] |= (uint32_t)a[9*i+2] << 16; - c32 = (32u) a[(int) in_addr + 2]; + c32 = (32u) a[(uint) in_addr + 2]; c32 <<= 16; x |= c32; @@ -337,16 +337,16 @@ fn polyz_unpack_gamma1_217(reg ptr u32[Li2_polydeg] r, reg ptr u8[Li2_pack_zlen_ r[out_addr + 0] = x; // r->coeffs[4*i+1] = a[9*i+2] >> 2; - x = (32u) a[(int) in_addr + 2]; + x = (32u) a[(uint) in_addr + 2]; x >>= 2; // r->coeffs[4*i+1] |= (uint32_t)a[9*i+3] << 6; - c32 = (32u) a[(int) in_addr + 3]; + c32 = (32u) a[(uint) in_addr + 3]; c32 <<= 6; x |= c32; // r->coeffs[4*i+1] |= (uint32_t)a[9*i+4] << 14; - c32 = (32u) a[(int) in_addr + 4]; + c32 = (32u) a[(uint) in_addr + 4]; c32 <<= 14; x |= c32; @@ -359,16 +359,16 @@ fn polyz_unpack_gamma1_217(reg ptr u32[Li2_polydeg] r, reg ptr u8[Li2_pack_zlen_ r[out_addr + 1] = x; // r->coeffs[4*i+2] = a[9*i+4] >> 4; - x = (32u) a[(int) in_addr + 4]; + x = (32u) a[(uint) in_addr + 4]; x >>= 4; // r->coeffs[4*i+2] |= (uint32_t)a[9*i+5] << 4; - c32 = (32u) a[(int) in_addr + 5]; + c32 = (32u) a[(uint) in_addr + 5]; c32 <<= 4; x |= c32; // r->coeffs[4*i+2] |= (uint32_t)a[9*i+6] << 12; - c32 = (32u) a[(int) in_addr + 6]; + c32 = (32u) a[(uint) in_addr + 6]; c32 <<= 12; x |= c32; @@ -381,16 +381,16 @@ fn polyz_unpack_gamma1_217(reg ptr u32[Li2_polydeg] r, reg ptr u8[Li2_pack_zlen_ r[out_addr + 2] = x; // r->coeffs[4*i+3] = a[9*i+6] >> 6; - x = (32u) a[(int) in_addr + 6]; + x = (32u) a[(uint) in_addr + 6]; x >>= 6; // r->coeffs[4*i+3] |= (uint32_t)a[9*i+7] << 2; - c32 = (32u) a[(int) in_addr + 7]; + c32 = (32u) a[(uint) in_addr + 7]; c32 <<= 2; x |= c32; // r->coeffs[4*i+3] |= (uint32_t)a[9*i+8] << 10; - c32 = (32u) a[(int) in_addr + 8]; + c32 = (32u) a[(uint) in_addr + 8]; c32 <<= 10; x |= c32; @@ -426,11 +426,11 @@ fn polyz_pack_gamma1_219(reg ptr u8[Li2_pack_zlen_gamma1_219] r, reg ptr u32[Li2 while (i < Li2_polydeg / 2) { // t[0] = GAMMA1 - a->coeffs[2*i+0]; t0 = gamma1; - t0 -= a[(int) src_addr + 0]; + t0 -= a[(uint) src_addr + 0]; // t[1] = GAMMA1 - a->coeffs[2*i+1]; t1 = gamma1; - t1 -= a[(int) src_addr + 1]; + t1 -= a[(uint) src_addr + 1]; // r[5*i+0] = t[0]; x = t0; @@ -439,7 +439,7 @@ fn polyz_pack_gamma1_219(reg ptr u8[Li2_pack_zlen_gamma1_219] r, reg ptr u32[Li2 // r[5*i+1] = t[0] >> 8; x = t0; x >>= 8; - r[(int) dest_addr + 1] = (8u) x; + r[(uint) dest_addr + 1] = (8u) x; // r[5*i+2] = t[0] >> 16; x = t0; @@ -450,17 +450,17 @@ fn polyz_pack_gamma1_219(reg ptr u8[Li2_pack_zlen_gamma1_219] r, reg ptr u32[Li2 x = t1; x <<= 4; c |= (8u) x; - r[(int) dest_addr + 2] = c; + r[(uint) dest_addr + 2] = c; // r[5*i+3] = t[1] >> 4; x = t1; x >>= 4; - r[(int) dest_addr + 3] = (8u) x; + r[(uint) dest_addr + 3] = (8u) x; // r[5*i+4] = t[1] >> 12; x = t1; x >>= 12; - r[(int) dest_addr + 4] = (8u) x; + r[(uint) dest_addr + 4] = (8u) x; i += 1; src_addr += 2; @@ -662,36 +662,36 @@ fn polyt1_unpack(reg ptr u32[Li2_polydeg] t1, reg ptr u8[Li2_pack_t1len] buf) i4 = #LEA_64(4 * i); // i4 = 4 * i i5 = #LEA_64(4 * i + i); // i5 = 5 * i - x0 = (32u)buf[(int) i5 + 0]; - xtmp1 = (32u)buf[(int) i5 + 1]; + x0 = (32u)buf[(uint) i5 + 0]; + xtmp1 = (32u)buf[(uint) i5 + 1]; xtmp2 = xtmp1; xtmp1 <<= 8; x0 |= xtmp1; x0 &= 0x3FF; - t1[(int) i4 + 0] = x0; + t1[(uint) i4 + 0] = x0; x1 = xtmp2 >> 2; - xtmp1 = (32u)buf[(int) i5 + 2]; + xtmp1 = (32u)buf[(uint) i5 + 2]; xtmp2 = xtmp1; xtmp1 <<= 6; x1 |= xtmp1; x1 &= 0x3FF; - t1[(int) i4 + 1] = x1; + t1[(uint) i4 + 1] = x1; x2 = xtmp2 >> 4; - xtmp1 = (32u)buf[(int) i5 + 3]; + xtmp1 = (32u)buf[(uint) i5 + 3]; xtmp2 = xtmp1; xtmp1 <<= 4; x2 |= xtmp1; x2 &= 0x3FF; - t1[(int) i4 + 2] = x2; + t1[(uint) i4 + 2] = x2; x3 = xtmp2 >> 6; - xtmp1 = (32u)buf[(int) i5 + 4]; + xtmp1 = (32u)buf[(uint) i5 + 4]; xtmp1 <<= 2; x3 |= xtmp1; x3 &= 0x3FF; // FIXME: opt: redundant operation - t1[(int) i4 + 3] = x3; + t1[(uint) i4 + 3] = x3; i += 1; } @@ -1112,31 +1112,31 @@ fn polyw1_pack_gamma2_88(reg ptr u32[Li2_polydeg] w1, reg ptr u8[Li2_pack_highbi ?{}, src_addr = #set0_64(); ?{}, dest_addr = #set0_64(); while (i < Li2_polydeg / 4) { - t2 = w1[(int) src_addr + 2]; - t3 = w1[(int) src_addr + 3]; + t2 = w1[(uint) src_addr + 2]; + t3 = w1[(uint) src_addr + 3]; // r[3*i+0] = a->coeffs[4*i+0]; // r[3*i+0] |= a->coeffs[4*i+1] << 6; - c32_0 = w1[(int) src_addr + 1]; + c32_0 = w1[(uint) src_addr + 1]; t1 = c32_0; c32_0 <<= 6; - c32_0 |= w1[(int) src_addr + 0]; - w1_buf[(int) dest_addr + 0] = c32_0; + c32_0 |= w1[(uint) src_addr + 0]; + w1_buf[(uint) dest_addr + 0] = c32_0; // r[3*i+1] = a->coeffs[4*i+1] >> 2; // r[3*i+1] |= a->coeffs[4*i+2] << 4; c32_0 = t1 >> 2; - c32_1 = w1[(int) src_addr + 2]; + c32_1 = w1[(uint) src_addr + 2]; t2 = c32_1; c32_1 <<= 4; c32_0 |= c32_1; - w1_buf[(int) dest_addr + 1] = c32_0; + w1_buf[(uint) dest_addr + 1] = c32_0; // r[3*i+2] = a->coeffs[4*i+2] >> 4; // r[3*i+2] |= a->coeffs[4*i+3] << 2; c32_0 = t2 >> 4; - c32_1 = w1[(int) src_addr + 3]; + c32_1 = w1[(uint) src_addr + 3]; c32_1 <<= 2; // TODO: opt: use LEA instruction c32_0 |= c32_1; - w1_buf[(int) dest_addr + 2] = c32_0; + w1_buf[(uint) dest_addr + 2] = c32_0; i += 1; src_addr += 4; @@ -1156,9 +1156,9 @@ fn polyw1_pack_gamma2_32(reg ptr u32[Li2_polydeg] w1, reg ptr u8[Li2_pack_highbi ?{}, src_addr = #set0_64(); while (i < Li2_polydeg / 2) { // r[i] = a->coeffs[2*i+0] | (a->coeffs[2*i+1] << 4); - c32 = w1[(int) src_addr + 1]; + c32 = w1[(uint) src_addr + 1]; c32 <<= 4; - c32 |= w1[(int) src_addr + 0]; + c32 |= w1[(uint) src_addr + 0]; w1_buf[i] = (8u) c32; i += 1; diff --git a/oldsrc-should-delete/crypto_sign/dilithium/common/amd64/verify_end.jinc b/oldsrc-should-delete/crypto_sign/dilithium/common/amd64/verify_end.jinc index 1f7423f0..4384e1e9 100644 --- a/oldsrc-should-delete/crypto_sign/dilithium/common/amd64/verify_end.jinc +++ b/oldsrc-should-delete/crypto_sign/dilithium/common/amd64/verify_end.jinc @@ -64,7 +64,7 @@ fn unpack_hints(reg ptr u8[Li2_omega + Li2_k] hints_buf, reg ptr u32[Li2_k * Li2 // if(j > k && sig[j] <= sig[j-1]) return 1; idx1 = (64u)hints_buf[j]; if (j > k) { - idx2 = (64u)hints_buf[(int) j-1]; + idx2 = (64u)hints_buf[(uint) j-1]; if (idx1 <= idx2) { fail = 1; } @@ -170,7 +170,7 @@ fn verify_inner(stack ptr u8[Li2_SIGN_LEN] sig, reg u64 m, reg u64 m_len, stack z_to_unpack = polyz_unpack(z_to_unpack, z_buf); z[ii * Li2_polydeg : Li2_polydeg] = z_to_unpack; } - z_normcheck_fail = checknorm_vecl(z, (int) (Li2_gamma1 - Li2_beta)); + z_normcheck_fail = checknorm_vecl(z, (uint) (Li2_gamma1 - Li2_beta)); if (z_normcheck_fail == 0) { z = fft_vec(z); diff --git a/oldsrc-should-delete/crypto_sign/falcon/falcon512/amd64/avx2/butterfly.jinc b/oldsrc-should-delete/crypto_sign/falcon/falcon512/amd64/avx2/butterfly.jinc index 84896911..10e9030b 100644 --- a/oldsrc-should-delete/crypto_sign/falcon/falcon512/amd64/avx2/butterfly.jinc +++ b/oldsrc-should-delete/crypto_sign/falcon/falcon512/amd64/avx2/butterfly.jinc @@ -44,11 +44,11 @@ fn __2_layer_CT_butterfly_8(reg u256 buff0 buff1 buff2 buff3 buff4 buff5 buff6 b } - t0 = twiddlelo.[:u256 (int) twiddle_indx + 0 * 32]; - t1 = twiddlehi.[:u256 (int) twiddle_indx + 0 * 32]; + t0 = twiddlelo.[:u256 (uint) twiddle_indx + 0 * 32]; + t1 = twiddlehi.[:u256 (uint) twiddle_indx + 0 * 32]; - t2 = twiddlelo.[:u256 (int) twiddle_indx + 3 * 32]; - t3 = twiddlehi.[:u256 (int) twiddle_indx + 3 * 32]; + t2 = twiddlelo.[:u256 (uint) twiddle_indx + 3 * 32]; + t3 = twiddlehi.[:u256 (uint) twiddle_indx + 3 * 32]; t4 = #VPMULL_16u16(buff2, t0); @@ -101,8 +101,8 @@ fn __2_layer_CT_butterfly_8(reg u256 buff0 buff1 buff2 buff3 buff4 buff5 buff6 b } - t0 = twiddlelo.[:u256 (int) twiddle_indx + 1 * 32]; - t1 = twiddlehi.[:u256 (int) twiddle_indx + 1 * 32]; + t0 = twiddlelo.[:u256 (uint) twiddle_indx + 1 * 32]; + t1 = twiddlehi.[:u256 (uint) twiddle_indx + 1 * 32]; t4 = #VPMULL_16u16(buff1, t0); buff1 = #VPMULH_16u16(buff1, t1); @@ -111,8 +111,8 @@ fn __2_layer_CT_butterfly_8(reg u256 buff0 buff1 buff2 buff3 buff4 buff5 buff6 b buff1 = #VPSUB_16u16(buff0, t5); buff0 = #VPADD_16u16(buff0, t5); - t2 = twiddlelo.[:u256 (int) twiddle_indx + 2 * 32]; - t3 = twiddlehi.[:u256 (int) twiddle_indx + 2 * 32]; + t2 = twiddlelo.[:u256 (uint) twiddle_indx + 2 * 32]; + t3 = twiddlehi.[:u256 (uint) twiddle_indx + 2 * 32]; t4 = #VPMULL_16u16(buff3, t2); buff3 = #VPMULH_16u16(buff3, t3); @@ -121,8 +121,8 @@ fn __2_layer_CT_butterfly_8(reg u256 buff0 buff1 buff2 buff3 buff4 buff5 buff6 b buff3 = #VPSUB_16u16(buff2, t5); buff2 = #VPADD_16u16(buff2, t5); - t0 = twiddlelo.[:u256 (int) twiddle_indx + 4 * 32]; - t1 = twiddlehi.[:u256 (int) twiddle_indx + 4 * 32]; + t0 = twiddlelo.[:u256 (uint) twiddle_indx + 4 * 32]; + t1 = twiddlehi.[:u256 (uint) twiddle_indx + 4 * 32]; t4 = #VPMULL_16u16(buff5, t0); buff5 = #VPMULH_16u16(buff5, t1); @@ -131,8 +131,8 @@ fn __2_layer_CT_butterfly_8(reg u256 buff0 buff1 buff2 buff3 buff4 buff5 buff6 b buff5 = #VPSUB_16u16(buff4, t5); buff4 = #VPADD_16u16(buff4, t5); - t2 = twiddlelo.[:u256 (int) twiddle_indx + 5 * 32]; - t3 = twiddlehi.[:u256 (int) twiddle_indx + 5 * 32]; + t2 = twiddlelo.[:u256 (uint) twiddle_indx + 5 * 32]; + t3 = twiddlehi.[:u256 (uint) twiddle_indx + 5 * 32]; t4 = #VPMULL_16u16(buff7, t2); buff7 = #VPMULH_16u16(buff7, t3); @@ -167,10 +167,10 @@ fn __2_layer_GS_butterfly_8(reg u256 buff0 buff1 buff2 buff3 buff4 buff5 buff6 b // ======== - t0 = twiddlelo.[:u256 (int) twiddle_indx + 1 * 32]; - t1 = twiddlehi.[:u256 (int) twiddle_indx + 1 * 32]; - t2 = twiddlelo.[:u256 (int) twiddle_indx + 2 * 32]; - t3 = twiddlehi.[:u256 (int) twiddle_indx + 2 * 32]; + t0 = twiddlelo.[:u256 (uint) twiddle_indx + 1 * 32]; + t1 = twiddlehi.[:u256 (uint) twiddle_indx + 1 * 32]; + t2 = twiddlelo.[:u256 (uint) twiddle_indx + 2 * 32]; + t3 = twiddlehi.[:u256 (uint) twiddle_indx + 2 * 32]; t4 = #VPSUB_16u16(buff0, buff1); t5 = #VPSUB_16u16(buff2, buff3); @@ -185,10 +185,10 @@ fn __2_layer_GS_butterfly_8(reg u256 buff0 buff1 buff2 buff3 buff4 buff5 buff6 b buff1 = #VPSUB_16u16(t4, buff1); buff3 = #VPSUB_16u16(t5, buff3); - t0 = twiddlelo.[:u256 (int) twiddle_indx + 4 * 32]; - t1 = twiddlehi.[:u256 (int) twiddle_indx + 4 * 32]; - t2 = twiddlelo.[:u256 (int) twiddle_indx + 5 * 32]; - t3 = twiddlehi.[:u256 (int) twiddle_indx + 5 * 32]; + t0 = twiddlelo.[:u256 (uint) twiddle_indx + 4 * 32]; + t1 = twiddlehi.[:u256 (uint) twiddle_indx + 4 * 32]; + t2 = twiddlelo.[:u256 (uint) twiddle_indx + 5 * 32]; + t3 = twiddlehi.[:u256 (uint) twiddle_indx + 5 * 32]; t4 = #VPSUB_16u16(buff4, buff5); t5 = #VPSUB_16u16(buff6, buff7); @@ -224,8 +224,8 @@ fn __2_layer_GS_butterfly_8(reg u256 buff0 buff1 buff2 buff3 buff4 buff5 buff6 b // ======== - t0 = twiddlelo.[:u256 (int) twiddle_indx + 0 * 32]; - t1 = twiddlehi.[:u256 (int) twiddle_indx + 0 * 32]; + t0 = twiddlelo.[:u256 (uint) twiddle_indx + 0 * 32]; + t1 = twiddlehi.[:u256 (uint) twiddle_indx + 0 * 32]; t4 = #VPSUB_16u16(buff0, buff2); t5 = #VPSUB_16u16(buff1, buff3); @@ -240,8 +240,8 @@ fn __2_layer_GS_butterfly_8(reg u256 buff0 buff1 buff2 buff3 buff4 buff5 buff6 b buff2 = #VPSUB_16u16(t4, buff2); buff3 = #VPSUB_16u16(t5, buff3); - t0 = twiddlelo.[:u256 (int) twiddle_indx + 3 * 32]; - t1 = twiddlehi.[:u256 (int) twiddle_indx + 3 * 32]; + t0 = twiddlelo.[:u256 (uint) twiddle_indx + 3 * 32]; + t1 = twiddlehi.[:u256 (uint) twiddle_indx + 3 * 32]; t4 = #VPSUB_16u16(buff4, buff6); t5 = #VPSUB_16u16(buff5, buff7); @@ -319,8 +319,8 @@ fn __3_layer_CT_butterfly_8( } - t0 = twiddlelo.[:u256 (int) twiddle_indx + 0 * 32]; - t1 = twiddlehi.[:u256 (int) twiddle_indx + 0 * 32]; + t0 = twiddlelo.[:u256 (uint) twiddle_indx + 0 * 32]; + t1 = twiddlehi.[:u256 (uint) twiddle_indx + 0 * 32]; t2 = #VPMULL_16u16(buff4, t0); t4 = #VPMULL_16u16(buff5, t0); @@ -371,8 +371,8 @@ fn __3_layer_CT_butterfly_8( } - t0 = twiddlelo.[:u256 (int) twiddle_indx + 1 * 32]; - t1 = twiddlehi.[:u256 (int) twiddle_indx + 1 * 32]; + t0 = twiddlelo.[:u256 (uint) twiddle_indx + 1 * 32]; + t1 = twiddlehi.[:u256 (uint) twiddle_indx + 1 * 32]; t2 = #VPMULL_16u16(buff2, t0); t4 = #VPMULL_16u16(buff3, t0); @@ -387,8 +387,8 @@ fn __3_layer_CT_butterfly_8( buff0 = #VPADD_16u16(buff0, t3); buff1 = #VPADD_16u16(buff1, t5); - t0 = twiddlelo.[:u256 (int) twiddle_indx + 2 * 32]; - t1 = twiddlehi.[:u256 (int) twiddle_indx + 2 * 32]; + t0 = twiddlelo.[:u256 (uint) twiddle_indx + 2 * 32]; + t1 = twiddlehi.[:u256 (uint) twiddle_indx + 2 * 32]; t2 = #VPMULL_16u16(buff6, t0); t4 = #VPMULL_16u16(buff7, t0); @@ -426,10 +426,10 @@ fn __3_layer_CT_butterfly_8( } - t0 = twiddlelo.[:u256 (int) twiddle_indx + 3 * 32]; - t1 = twiddlehi.[:u256 (int) twiddle_indx + 3 * 32]; - t2 = twiddlelo.[:u256 (int) twiddle_indx + 4 * 32]; - t3 = twiddlehi.[:u256 (int) twiddle_indx + 4 * 32]; + t0 = twiddlelo.[:u256 (uint) twiddle_indx + 3 * 32]; + t1 = twiddlehi.[:u256 (uint) twiddle_indx + 3 * 32]; + t2 = twiddlelo.[:u256 (uint) twiddle_indx + 4 * 32]; + t3 = twiddlehi.[:u256 (uint) twiddle_indx + 4 * 32]; t4 = #VPMULL_16u16(buff1, t0); t0 = #VPMULL_16u16(buff3, t2); @@ -444,10 +444,10 @@ fn __3_layer_CT_butterfly_8( buff0 = #VPADD_16u16(buff0, t5); buff2 = #VPADD_16u16(buff2, t1); - t4 = twiddlelo.[:u256 (int) twiddle_indx + 5 * 32]; - t5 = twiddlehi.[:u256 (int) twiddle_indx + 5 * 32]; - t0 = twiddlelo.[:u256 (int) twiddle_indx + 6 * 32]; - t1 = twiddlehi.[:u256 (int) twiddle_indx + 6 * 32]; + t4 = twiddlelo.[:u256 (uint) twiddle_indx + 5 * 32]; + t5 = twiddlehi.[:u256 (uint) twiddle_indx + 5 * 32]; + t0 = twiddlelo.[:u256 (uint) twiddle_indx + 6 * 32]; + t1 = twiddlehi.[:u256 (uint) twiddle_indx + 6 * 32]; t2 = #VPMULL_16u16(buff5, t4); t4 = #VPMULL_16u16(buff7, t0); @@ -484,10 +484,10 @@ fn __3_layer_GS_butterfly_8(reg u256 buff0 buff1 buff2 buff3 buff4 buff5 buff6 b // ======== - t0 = twiddlelo.[:u256 (int) twiddle_indx + 3 * 32]; - t1 = twiddlehi.[:u256 (int) twiddle_indx + 3 * 32]; - t2 = twiddlelo.[:u256 (int) twiddle_indx + 4 * 32]; - t3 = twiddlehi.[:u256 (int) twiddle_indx + 4 * 32]; + t0 = twiddlelo.[:u256 (uint) twiddle_indx + 3 * 32]; + t1 = twiddlehi.[:u256 (uint) twiddle_indx + 3 * 32]; + t2 = twiddlelo.[:u256 (uint) twiddle_indx + 4 * 32]; + t3 = twiddlehi.[:u256 (uint) twiddle_indx + 4 * 32]; t4 = #VPSUB_16u16(buff0, buff1); t5 = #VPSUB_16u16(buff2, buff3); @@ -502,10 +502,10 @@ fn __3_layer_GS_butterfly_8(reg u256 buff0 buff1 buff2 buff3 buff4 buff5 buff6 b buff1 = #VPSUB_16u16(t4, buff1); buff3 = #VPSUB_16u16(t5, buff3); - t0 = twiddlelo.[:u256 (int) twiddle_indx + 5 * 32]; - t1 = twiddlehi.[:u256 (int) twiddle_indx + 5 * 32]; - t2 = twiddlelo.[:u256 (int) twiddle_indx + 6 * 32]; - t3 = twiddlehi.[:u256 (int) twiddle_indx + 6 * 32]; + t0 = twiddlelo.[:u256 (uint) twiddle_indx + 5 * 32]; + t1 = twiddlehi.[:u256 (uint) twiddle_indx + 5 * 32]; + t2 = twiddlelo.[:u256 (uint) twiddle_indx + 6 * 32]; + t3 = twiddlehi.[:u256 (uint) twiddle_indx + 6 * 32]; t4 = #VPSUB_16u16(buff4, buff5); t5 = #VPSUB_16u16(buff6, buff7); @@ -543,8 +543,8 @@ fn __3_layer_GS_butterfly_8(reg u256 buff0 buff1 buff2 buff3 buff4 buff5 buff6 b // ======== - t0 = twiddlelo.[:u256 (int) twiddle_indx + 1 * 32]; - t1 = twiddlehi.[:u256 (int) twiddle_indx + 1 * 32]; + t0 = twiddlelo.[:u256 (uint) twiddle_indx + 1 * 32]; + t1 = twiddlehi.[:u256 (uint) twiddle_indx + 1 * 32]; t2 = #VPSUB_16u16(buff0, buff2); t4 = #VPSUB_16u16(buff1, buff3); @@ -559,8 +559,8 @@ fn __3_layer_GS_butterfly_8(reg u256 buff0 buff1 buff2 buff3 buff4 buff5 buff6 b buff2 = #VPSUB_16u16(t2, t3); buff3 = #VPSUB_16u16(t4, t5); - t0 = twiddlelo.[:u256 (int) twiddle_indx + 2 * 32]; - t1 = twiddlehi.[:u256 (int) twiddle_indx + 2 * 32]; + t0 = twiddlelo.[:u256 (uint) twiddle_indx + 2 * 32]; + t1 = twiddlehi.[:u256 (uint) twiddle_indx + 2 * 32]; t2 = #VPSUB_16u16(buff4, buff6); t4 = #VPSUB_16u16(buff5, buff7); @@ -598,8 +598,8 @@ fn __3_layer_GS_butterfly_8(reg u256 buff0 buff1 buff2 buff3 buff4 buff5 buff6 b // ======== - t0 = twiddlelo.[:u256 (int) twiddle_indx + 0 * 32]; - t1 = twiddlehi.[:u256 (int) twiddle_indx + 0 * 32]; + t0 = twiddlelo.[:u256 (uint) twiddle_indx + 0 * 32]; + t1 = twiddlehi.[:u256 (uint) twiddle_indx + 0 * 32]; t2 = #VPSUB_16u16(buff0, buff4); t4 = #VPSUB_16u16(buff1, buff5); @@ -670,10 +670,10 @@ fn __3_layer_GS_butterfly_8_last(reg u256 buff0 buff1 buff2 buff3 buff4 buff5 bu // ======== - t0 = twiddlelo.[:u256 (int) twiddle_indx + 4 * 32]; - t1 = twiddlehi.[:u256 (int) twiddle_indx + 4 * 32]; - t2 = twiddlelo.[:u256 (int) twiddle_indx + 5 * 32]; - t3 = twiddlehi.[:u256 (int) twiddle_indx + 5 * 32]; + t0 = twiddlelo.[:u256 (uint) twiddle_indx + 4 * 32]; + t1 = twiddlehi.[:u256 (uint) twiddle_indx + 4 * 32]; + t2 = twiddlelo.[:u256 (uint) twiddle_indx + 5 * 32]; + t3 = twiddlehi.[:u256 (uint) twiddle_indx + 5 * 32]; t4 = #VPSUB_16u16(buff0, buff1); t5 = #VPSUB_16u16(buff2, buff3); @@ -688,10 +688,10 @@ fn __3_layer_GS_butterfly_8_last(reg u256 buff0 buff1 buff2 buff3 buff4 buff5 bu buff1 = #VPSUB_16u16(t4, buff1); buff3 = #VPSUB_16u16(t5, buff3); - t0 = twiddlelo.[:u256 (int) twiddle_indx + 6 * 32]; - t1 = twiddlehi.[:u256 (int) twiddle_indx + 6 * 32]; - t2 = twiddlelo.[:u256 (int) twiddle_indx + 7 * 32]; - t3 = twiddlehi.[:u256 (int) twiddle_indx + 7 * 32]; + t0 = twiddlelo.[:u256 (uint) twiddle_indx + 6 * 32]; + t1 = twiddlehi.[:u256 (uint) twiddle_indx + 6 * 32]; + t2 = twiddlelo.[:u256 (uint) twiddle_indx + 7 * 32]; + t3 = twiddlehi.[:u256 (uint) twiddle_indx + 7 * 32]; t4 = #VPSUB_16u16(buff4, buff5); t5 = #VPSUB_16u16(buff6, buff7); @@ -729,8 +729,8 @@ fn __3_layer_GS_butterfly_8_last(reg u256 buff0 buff1 buff2 buff3 buff4 buff5 bu // ======== - t0 = twiddlelo.[:u256 (int) twiddle_indx + 2 * 32]; - t1 = twiddlehi.[:u256 (int) twiddle_indx + 2 * 32]; + t0 = twiddlelo.[:u256 (uint) twiddle_indx + 2 * 32]; + t1 = twiddlehi.[:u256 (uint) twiddle_indx + 2 * 32]; t2 = #VPSUB_16u16(buff0, buff2); t4 = #VPSUB_16u16(buff1, buff3); @@ -745,8 +745,8 @@ fn __3_layer_GS_butterfly_8_last(reg u256 buff0 buff1 buff2 buff3 buff4 buff5 bu buff2 = #VPSUB_16u16(t2, t3); buff3 = #VPSUB_16u16(t4, t5); - t0 = twiddlelo.[:u256 (int) twiddle_indx + 3 * 32]; - t1 = twiddlehi.[:u256 (int) twiddle_indx + 3 * 32]; + t0 = twiddlelo.[:u256 (uint) twiddle_indx + 3 * 32]; + t1 = twiddlehi.[:u256 (uint) twiddle_indx + 3 * 32]; t2 = #VPSUB_16u16(buff4, buff6); t4 = #VPSUB_16u16(buff5, buff7); @@ -784,8 +784,8 @@ fn __3_layer_GS_butterfly_8_last(reg u256 buff0 buff1 buff2 buff3 buff4 buff5 bu // ======== - t0 = twiddlelo.[:u256 (int) twiddle_indx + 1 * 32]; - t1 = twiddlehi.[:u256 (int) twiddle_indx + 1 * 32]; + t0 = twiddlelo.[:u256 (uint) twiddle_indx + 1 * 32]; + t1 = twiddlehi.[:u256 (uint) twiddle_indx + 1 * 32]; t2 = #VPSUB_16u16(buff0, buff4); t4 = #VPSUB_16u16(buff1, buff5); @@ -813,8 +813,8 @@ fn __3_layer_GS_butterfly_8_last(reg u256 buff0 buff1 buff2 buff3 buff4 buff5 bu buff6 = #VPSUB_16u16(t2, t3); buff7 = #VPSUB_16u16(t4, t5); - t0 = twiddlelo.[:u256 (int) twiddle_indx + 0 * 32]; - t1 = twiddlehi.[:u256 (int) twiddle_indx + 0 * 32]; + t0 = twiddlelo.[:u256 (uint) twiddle_indx + 0 * 32]; + t1 = twiddlehi.[:u256 (uint) twiddle_indx + 0 * 32]; t2 = #VPMULL_16u16(buff0, t0); t3 = #VPMULL_16u16(buff1, t0); diff --git a/oldsrc-should-delete/crypto_sign/falcon/falcon512/common/amd64/encode_decode.jinc b/oldsrc-should-delete/crypto_sign/falcon/falcon512/common/amd64/encode_decode.jinc index b216f738..65a66402 100644 --- a/oldsrc-should-delete/crypto_sign/falcon/falcon512/common/amd64/encode_decode.jinc +++ b/oldsrc-should-delete/crypto_sign/falcon/falcon512/common/amd64/encode_decode.jinc @@ -33,7 +33,7 @@ inline fn __modq_decode_8( if(w >= 12289) { is_zero = 1; } - out.[(int) out_i * 2] = (16u)w; + out.[(uint) out_i * 2] = (16u)w; out_i += 1; } @@ -108,7 +108,7 @@ fn __modq_decode_8_unrolled(stack u16[8] out, reg u64 in) is_zero = 1; } - out.[(int) out_i * 2] = (16u)w; + out.[(uint) out_i * 2] = (16u)w; out_i += 1; }