Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ inline fn __keccakf1600_4x_avx2(reg ptr u256[25] a) -> reg ptr u256[25]
rc = RC.[c];
e = __round_4x_avx2(e, a, rc, r8, r56);

rc = RC.[(int) c + 32];
rc = RC.[(uint) c + 32];
a = __round_4x_avx2(a, e, rc, r8, r56);

c += 64;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ inline fn __keccakf1600_bmi1(reg ptr u64[25] a) -> reg ptr u64[25]
e = __round_bmi1(e, a, rc);

RC = s_RC;
rc = RC[(int) c + 1];
rc = RC[(uint) c + 1];
a = __round_bmi1(a, e, rc);

c += 2;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ inline fn __keccakf1600_ref(stack u64[25] a) -> stack u64[25]
rc = RC[c];
e = __round_ref(a, rc);

rc = RC[(int) c + 1];
rc = RC[(uint) c + 1];
a = __round_ref(e, rc);

c += 2;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ inline fn __keccakf1600_ref1(reg ptr u64[25] a) -> reg ptr u64[25]
rc = RC[c];
e = __round_ref1(e, a, rc);

rc = RC[(int) c + 1];
rc = RC[(uint) c + 1];
a = __round_ref1(a, e, rc);

c += 2;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ fn _blocks_1_ref(reg ptr u32[8] _H, reg ptr u32[32] sblocks, reg u64 nblocks) ->
s_i = i;
oblocks = i << 4;
for t=0 to 16
{ v = sblocks[(int)oblocks + t];
{ v = sblocks[(uint)oblocks + t];
v = #BSWAP_32(v);
W[t] = v;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ fn _blocks_1_ref(reg ptr u64[8] _H, reg ptr u64[32] sblocks, reg u64 nblocks) ->
s_i = i;
oblocks = i << 4;
for t=0 to 16
{ v = sblocks[(int)oblocks + t];
{ v = sblocks[(uint)oblocks + t];
v = #BSWAP_64(v);
W[t] = v;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -464,11 +464,11 @@ fn __load24(reg ptr u8[KYBER_ETA1*KYBER_N/4] buf, reg u64 i) -> reg u32

r = (32u)buf[i];

t = (32u)buf[(int)i+1];
t = (32u)buf[(uint)i+1];
t <<= 8;
r |= t;

t = (32u)buf[(int)i+2];
t = (32u)buf[(uint)i+2];
t <<= 16;
r |= t;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,7 @@ fn _rej_uniform_avx(reg ptr u16[KYBER_N] rp, reg ptr u8[REJ_UNIFORM_AVX_BUFLEN]
while(!b)
{
f0 = #VPERMQ(buf.[:u256 pos], 0x94);
f1 = #VPERMQ(buf.[:u256 24 + (int)pos], 0x94);
f1 = #VPERMQ(buf.[:u256 24 + (uint)pos], 0x94);
f0 = #VPSHUFB_256(f0, idx8);
f1 = #VPSHUFB_256(f1, idx8);
g0 = #VPSRL_16u16(f0, 4);
Expand Down Expand Up @@ -438,10 +438,10 @@ fn _rej_uniform_avx(reg ptr u16[KYBER_N] rp, reg ptr u8[REJ_UNIFORM_AVX_BUFLEN]
f0 = #VPSHUFB_256(f0, g0);
f1 = #VPSHUFB_256(f1, g1);

rp.[:u128 2*(int)ctr] = (128u)f0;
rp.[:u128 2*(int)t64] = #VEXTRACTI128(f0, 1);
rp.[:u128 2*(int)t64_1] = (128u)f1;
rp.[:u128 2*(int)t64_2] = #VEXTRACTI128(f1, 1);
rp.[:u128 2*(uint)ctr] = (128u)f0;
rp.[:u128 2*(uint)t64] = #VEXTRACTI128(f0, 1);
rp.[:u128 2*(uint)t64_1] = (128u)f1;
rp.[:u128 2*(uint)t64_2] = #VEXTRACTI128(f1, 1);

ctr = t64_3;

Expand Down Expand Up @@ -483,7 +483,7 @@ fn _rej_uniform_avx(reg ptr u16[KYBER_N] rp, reg ptr u8[REJ_UNIFORM_AVX_BUFLEN]
l = #VPUNPCKL_16u8(l, h);
f = #VPSHUFB_128(f, l);

rp.[:u128 2*(int)ctr] = f;
rp.[:u128 2*(uint)ctr] = f;
ctr += good;

pos += 12;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,7 @@ fn _rej_uniform_avx(reg ptr u16[KYBER_N] rp, reg ptr u8[REJ_UNIFORM_AVX_BUFLEN]
while(!b)
{
f0 = #VPERMQ(buf.[:u256 pos], 0x94);
f1 = #VPERMQ(buf.[:u256 24 + (int)pos], 0x94);
f1 = #VPERMQ(buf.[:u256 24 + (uint)pos], 0x94);
f0 = #VPSHUFB_256(f0, idx8);
f1 = #VPSHUFB_256(f1, idx8);
g0 = #VPSRL_16u16(f0, 4);
Expand Down Expand Up @@ -451,10 +451,10 @@ fn _rej_uniform_avx(reg ptr u16[KYBER_N] rp, reg ptr u8[REJ_UNIFORM_AVX_BUFLEN]
f0 = #VPSHUFB_256(f0, g0);
f1 = #VPSHUFB_256(f1, g1);

rp.[:u128 2*(int)ctr] = (128u)f0;
rp.[:u128 2*(int)t64] = #VEXTRACTI128(f0, 1);
rp.[:u128 2*(int)t64_1] = (128u)f1;
rp.[:u128 2*(int)t64_2] = #VEXTRACTI128(f1, 1);
rp.[:u128 2*(uint)ctr] = (128u)f0;
rp.[:u128 2*(uint)t64] = #VEXTRACTI128(f0, 1);
rp.[:u128 2*(uint)t64_1] = (128u)f1;
rp.[:u128 2*(uint)t64_2] = #VEXTRACTI128(f1, 1);

ctr = t64_3;

Expand Down Expand Up @@ -496,7 +496,7 @@ fn _rej_uniform_avx(reg ptr u16[KYBER_N] rp, reg ptr u8[REJ_UNIFORM_AVX_BUFLEN]
l = #VPUNPCKL_16u8(l, h);
f = #VPSHUFB_128(f, l);

rp.[:u128 2*(int)ctr] = f;
rp.[:u128 2*(uint)ctr] = f;
ctr += good;

pos += 12;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ fn __indcpa_enc(stack u64 sctp, reg ptr u8[32] msgp, reg u64 pkp, reg ptr u8[KYB
while (i < KYBER_SYMBYTES/8)
{
t64 = [:u64 pkp];
publicseed.[:u64 8 * (int)i] = t64;
publicseed.[:u64 8 * (uint)i] = t64;
pkp += 8;
i += 1;
}
Expand Down Expand Up @@ -193,7 +193,7 @@ fn __iindcpa_enc(reg ptr u8[KYBER_CT_LEN] ctp, reg ptr u8[32] msgp, reg u64 pkp,
while (i < KYBER_SYMBYTES/8)
{
t64 = [:u64 pkp];
publicseed.[:u64 8*(int)i] = t64;
publicseed.[:u64 8*(uint)i] = t64;
pkp += 8;
i += 1;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -343,8 +343,8 @@ inline fn __gen_matrix_buf_rejection_filter48
// - so the last 8 bytes will be discarded:
// - 0x01aaaaaaaaaaaa08, 0x01bbbbbbbbbbbb08, 0x01bbbbbbbbbbbb08, 0x01cccccccccccc08

f0 = #VPERMQ(buf.[:u256 (int) buf_offset + 0 ], (4u2)[2,1,1,0]);
f1 = #VPERMQ(buf.[:u256 (int) buf_offset + 24], (4u2)[2,1,1,0]);
f0 = #VPERMQ(buf.[:u256 (uint) buf_offset + 0 ], (4u2)[2,1,1,0]);
f1 = #VPERMQ(buf.[:u256 (uint) buf_offset + 24], (4u2)[2,1,1,0]);

// next, the data is shuffled at byte level. For a given state (in u64s):
// - 0xa8a7a6a5a4a3a2a1, 0xb8b7b6b5b4b3b2b1, 0xc8c7c6c5c4c3c2c1, 0xd8d7d6d5d4d3d2d1
Expand Down Expand Up @@ -487,7 +487,7 @@ inline fn __write_u128_boundchk
if ( condition_8 ) {
ms = #update_msf(condition_8, ms);

pol.[:u128 2*(int)ctr] = data;
pol.[:u128 2*(uint)ctr] = data;
ctr += 8;
} else
{
Expand All @@ -499,7 +499,7 @@ inline fn __write_u128_boundchk
if ( condition_4 ) {
ms = #update_msf(condition_4, ms);

pol.[:u64 2*(int)ctr] = data_u64;
pol.[:u64 2*(uint)ctr] = data_u64;
data_u64 = #VPEXTR_64(data, 1);
ctr += 4;
} else
Expand All @@ -509,7 +509,7 @@ inline fn __write_u128_boundchk
if ( condition_2 ) {
ms = #update_msf(condition_2, ms);

pol.[:u32 2*(int)ctr] = (32u) data_u64;
pol.[:u32 2*(uint)ctr] = (32u) data_u64;
data_u64 >>= 32;
ctr += 2;
} else
Expand All @@ -519,7 +519,7 @@ inline fn __write_u128_boundchk
if ( condition_1 ) {
ms = #update_msf(condition_1, ms);

pol.[:u16 2*(int)ctr] = (16u) data_u64;
pol.[:u16 2*(uint)ctr] = (16u) data_u64;
ctr += 1;
} else
{ ms = #update_msf(!condition_1, ms); }
Expand All @@ -545,7 +545,7 @@ inline fn __gen_matrix_buf_rejection_filter24
reg u128 shuffle_0_1 t128;
reg u64 good t0_0 t0_1;

f0 = #VPERMQ(buf.[:u256 (int) buf_offset + 0 ], (4u2)[2,1,1,0]);
f0 = #VPERMQ(buf.[:u256 (uint) buf_offset + 0 ], (4u2)[2,1,1,0]);
f0 = #VPSHUFB_256(f0, load_shuffle);
g0 = #VPSRL_16u16(f0, 4);
f0 = #VPBLEND_16u16(f0, g0, 0xAA);
Expand Down Expand Up @@ -661,10 +661,10 @@ inline fn gen_matrix_get_indexes(
t = _t; t <<= 3; // t * 8
b += t;

idx[0] = gmi[(int) b + 0];
idx[1] = gmi[(int) b + 1];
idx[2] = gmi[(int) b + 2];
idx[3] = gmi[(int) b + 3];
idx[0] = gmi[(uint) b + 0];
idx[1] = gmi[(uint) b + 1];
idx[2] = gmi[(uint) b + 2];
idx[3] = gmi[(uint) b + 3];

return idx;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ fn __indcpa_enc_0(#mmx reg u64 sctp, reg ptr u8[MLKEM_INDCPA_MSGBYTES] msgp, reg
while (i < MLKEM_SYMBYTES/8)
{
#declassify t64 = [:u64 pkp];
publicseed.[:u64 8 * (int)i] = t64;
publicseed.[:u64 8 * (uint)i] = t64;
pkp += 8;
i += 1;
}
Expand Down Expand Up @@ -164,7 +164,7 @@ fn __indcpa_enc_1(
while (i < MLKEM_SYMBYTES/8)
{
#declassify t64 = [:u64 pkp];
publicseed.[:u64 8*(int)i] = t64;
publicseed.[:u64 8*(uint)i] = t64;
pkp += 8;
i += 1;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ inline fn __keccakf1600(reg ptr u64[25] a) -> reg ptr u64[25]
rc = RC[c];
e = keccakf1600_round(e, a, rc);

rc = RC[(int) c + 1];
rc = RC[(uint) c + 1];
a = keccakf1600_round(a, e, rc);

c += 2;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ inline fn __keccakf1600_4x(reg ptr u256[25] a) -> reg ptr u256[25]
a_s = a; s_e = e;
a = a_s; e = s_e;

rc = RC.[(int) c + 32];
rc = RC.[(uint) c + 32];
a = keccakf1600_4x_round(a, e, rc, r8, r56);

// just an expensive pointer swap (#todo request feature)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ inline fn __keccakf1600(reg ptr u64[25] a) -> reg ptr u64[25]
rc = RC[c];
e = keccakf1600_round(e, a, rc);

rc = RC[(int) c + 1];
rc = RC[(uint) c + 1];
a = keccakf1600_round(a, e, rc);

c += 2;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ fn __indcpa_enc(#mmx reg u64 sctp, reg ptr u8[32] msgp, reg u64 pkp, reg ptr u8[
while (i < MLKEM_SYMBYTES/8)
{
#declassify t64 = [:u64 pkp];
publicseed.[:u64 8 * (int)i] = t64;
publicseed.[:u64 8 * (uint)i] = t64;
pkp += 8;
i += 1;
}
Expand Down Expand Up @@ -182,7 +182,7 @@ fn __iindcpa_enc(reg ptr u8[MLKEM_CT_LEN] ctp, reg ptr u8[32] msgp, reg u64 pkp,
while (i < MLKEM_SYMBYTES/8)
{
#declassify t64 = [:u64 pkp];
publicseed.[:u64 8*(int)i] = t64;
publicseed.[:u64 8*(uint)i] = t64;
pkp += 8;
i += 1;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -670,14 +670,14 @@ fn invntt_tomont_avx2(reg ptr u32[256] poly) -> reg ptr u32[256] {
fn ntt_transpose_inner_avx2(reg ptr u32[256] poly_ptr, reg u64 offset) -> reg ptr u32[256] {
reg u256 ymm3 ymm4 ymm5 ymm6 ymm7 ymm8 ymm9 ymm10 ymm11;

ymm4 = #VMOVDQU_256(poly_ptr.[:u256 32 * 0 + (int) offset]);
ymm5 = #VMOVDQU_256(poly_ptr.[:u256 32 * 1 + (int) offset]);
ymm6 = #VMOVDQU_256(poly_ptr.[:u256 32 * 2 + (int) offset]);
ymm7 = #VMOVDQU_256(poly_ptr.[:u256 32 * 3 + (int) offset]);
ymm8 = #VMOVDQU_256(poly_ptr.[:u256 32 * 4 + (int) offset]);
ymm9 = #VMOVDQU_256(poly_ptr.[:u256 32 * 5 + (int) offset]);
ymm10 = #VMOVDQU_256(poly_ptr.[:u256 32 * 6 + (int) offset]);
ymm11 = #VMOVDQU_256(poly_ptr.[:u256 32 * 7 + (int) offset]);
ymm4 = #VMOVDQU_256(poly_ptr.[:u256 32 * 0 + (uint) offset]);
ymm5 = #VMOVDQU_256(poly_ptr.[:u256 32 * 1 + (uint) offset]);
ymm6 = #VMOVDQU_256(poly_ptr.[:u256 32 * 2 + (uint) offset]);
ymm7 = #VMOVDQU_256(poly_ptr.[:u256 32 * 3 + (uint) offset]);
ymm8 = #VMOVDQU_256(poly_ptr.[:u256 32 * 4 + (uint) offset]);
ymm9 = #VMOVDQU_256(poly_ptr.[:u256 32 * 5 + (uint) offset]);
ymm10 = #VMOVDQU_256(poly_ptr.[:u256 32 * 6 + (uint) offset]);
ymm11 = #VMOVDQU_256(poly_ptr.[:u256 32 * 7 + (uint) offset]);

ymm3, ymm8 = shuffle8(ymm4, ymm8);
ymm4, ymm9 = shuffle8(ymm5, ymm9);
Expand All @@ -694,14 +694,14 @@ fn ntt_transpose_inner_avx2(reg ptr u32[256] poly_ptr, reg u64 offset) -> reg pt
ymm5, ymm4 = shuffle2(ymm3, ymm4);
ymm3, ymm11 = shuffle2(ymm10, ymm11);

#VMOVDQU_256(poly_ptr.[:u256 32 * 0 + (int) offset]) = ymm9;
#VMOVDQU_256(poly_ptr.[:u256 32 * 1 + (int) offset]) = ymm8;
#VMOVDQU_256(poly_ptr.[:u256 32 * 2 + (int) offset]) = ymm7;
#VMOVDQU_256(poly_ptr.[:u256 32 * 3 + (int) offset]) = ymm6;
#VMOVDQU_256(poly_ptr.[:u256 32 * 4 + (int) offset]) = ymm5;
#VMOVDQU_256(poly_ptr.[:u256 32 * 5 + (int) offset]) = ymm4;
#VMOVDQU_256(poly_ptr.[:u256 32 * 6 + (int) offset]) = ymm3;
#VMOVDQU_256(poly_ptr.[:u256 32 * 7 + (int) offset]) = ymm11;
#VMOVDQU_256(poly_ptr.[:u256 32 * 0 + (uint) offset]) = ymm9;
#VMOVDQU_256(poly_ptr.[:u256 32 * 1 + (uint) offset]) = ymm8;
#VMOVDQU_256(poly_ptr.[:u256 32 * 2 + (uint) offset]) = ymm7;
#VMOVDQU_256(poly_ptr.[:u256 32 * 3 + (uint) offset]) = ymm6;
#VMOVDQU_256(poly_ptr.[:u256 32 * 4 + (uint) offset]) = ymm5;
#VMOVDQU_256(poly_ptr.[:u256 32 * 5 + (uint) offset]) = ymm4;
#VMOVDQU_256(poly_ptr.[:u256 32 * 6 + (uint) offset]) = ymm3;
#VMOVDQU_256(poly_ptr.[:u256 32 * 7 + (uint) offset]) = ymm11;

return poly_ptr;
}
Expand Down
Loading
Loading