Skip to content

Commit

Permalink
Use the precomputed iota constants instead of lfsr
Browse files Browse the repository at this point in the history
  • Loading branch information
pdimov committed Feb 1, 2025
1 parent cb887e3 commit af1b095
Showing 1 changed file with 73 additions and 79 deletions.
152 changes: 73 additions & 79 deletions include/boost/hash2/detail/keccak.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#define BOOST_HASH2_DETAIL_KECCAK_HPP_INCLUDED

// Copyright 2025 Christian Mazakas
// Copyright 2025 Peter Dimov
// Distributed under the Boost Software License, Version 1.0.
// https://www.boost.org/LICENSE_1_0.txt

Expand Down Expand Up @@ -36,112 +37,105 @@ BOOST_FORCEINLINE BOOST_HASH2_SHA3_CONSTEXPR void write_lane( std::uint64_t stat
state[ 5 * y + x ] = lane;
}

BOOST_FORCEINLINE BOOST_HASH2_SHA3_CONSTEXPR int lfsr(std::uint8_t& lfsr)
{
int result = (lfsr & 0x01) != 0;
if( lfsr & 0x80 ) {
lfsr = ( ( lfsr << 1 ) ^ 0x71 );
} else {
lfsr <<= 1;
}
return result;
}

BOOST_FORCEINLINE BOOST_HASH2_SHA3_CONSTEXPR void xor_lane( std::uint64_t state[ 25 ], int x, int y, std::uint64_t v )
{
state[ 5 * y + x ] ^= v;
}

BOOST_FORCEINLINE BOOST_HASH2_SHA3_CONSTEXPR void keccak_permute( std::uint64_t state[ 25 ] )
inline BOOST_HASH2_SHA3_CONSTEXPR void keccak_round( std::uint64_t (&state)[ 25 ] )
{
std::uint8_t lfsr_reg = 0x01;
auto const num_rounds = 24;
for( int i = 0; i < num_rounds; ++i )
{
{
// theta
// theta

std::uint64_t C1[ 5 ] = {};
std::uint64_t C2[ 5 ] = {};

for( int x = 0; x < 5; ++x )
{
C1[ x ] = state[ x ] ^ state[ x + 5 ] ^ state[ x + 10 ] ^ state[ x + 15 ] ^ state[ x + 20 ];
C2[ x ] = detail::rotl( C1[ x ], 1 );
}
std::uint64_t C1[ 5 ] = {};
std::uint64_t C2[ 5 ] = {};

for( int x = 0; x < 5; ++x )
{
// in proper modulo math, (x - 1) % 5 is isomorphic to (x + 4 ) % 5
for( int y = 0; y < 5; ++y )
{
state[ 5 * y + x ] ^= C1[ ( x + 4 ) % 5] ^ C2[ ( x + 1 ) % 5 ];
}
}
for( int x = 0; x < 5; ++x )
{
C1[ x ] = state[ x ] ^ state[ x + 5 ] ^ state[ x + 10 ] ^ state[ x + 15 ] ^ state[ x + 20 ];
C2[ x ] = detail::rotl( C1[ x ], 1 );
}

for( int x = 0; x < 5; ++x )
{
// rho and pi fused

// calculate these using Figure 2.4 in the Keccak reference with % 64 applied
int const rho_offsets[ 25 ] =
{
0, 1, 62, 28, 27,
36, 44, 6, 55, 20,
3, 10, 43, 25, 39,
41, 45, 15, 21, 8,
18, 2, 61, 56, 14,
};

// the actual ordering is the reverse of this list
// but to keep the code simple, we use the fact that this operation is linear to apply it
// in a different order so the indexing math is easier/more simple
// otherwise, we run into a case where it's essentially touching the `0 - 1` index of the
// array
int const pi_step[ 24 ] =
{ 1, 6, 9, 22, 14, 20, 2, 12, 13, 19, 23, 15, 4, 24, 21, 8, 16, 5, 3, 18, 17, 11, 7, 10 };

auto lane = detail::rotl( state[ 1 ], rho_offsets[ 1 ] );
for( int t = 0; t < 23; ++t )
// in proper modulo math, (x - 1) % 5 is isomorphic to (x + 4 ) % 5
for( int y = 0; y < 5; ++y )
{
state[ pi_step[ t ] ] = detail::rotl( state[ pi_step[ t + 1 ] ], rho_offsets[ pi_step[ t + 1 ] ] );
state[ 5 * y + x ] ^= C1[ ( x + 4 ) % 5] ^ C2[ ( x + 1 ) % 5 ];
}
state[ pi_step[ 23 ] ] = lane;
}
}

{
// rho and pi fused

// calculate these using Figure 2.4 in the Keccak reference with % 64 applied
int const rho_offsets[ 25 ] =
{
0, 1, 62, 28, 27,
36, 44, 6, 55, 20,
3, 10, 43, 25, 39,
41, 45, 15, 21, 8,
18, 2, 61, 56, 14,
};

// the actual ordering is the reverse of this list
// but to keep the code simple, we use the fact that this operation is linear to apply it
// in a different order so the indexing math is easier/more simple
// otherwise, we run into a case where it's essentially touching the `0 - 1` index of the
// array
int const pi_step[ 24 ] =
{ 1, 6, 9, 22, 14, 20, 2, 12, 13, 19, 23, 15, 4, 24, 21, 8, 16, 5, 3, 18, 17, 11, 7, 10 };

auto lane = detail::rotl( state[ 1 ], rho_offsets[ 1 ] );
for( int t = 0; t < 23; ++t )
{
// chi
state[ pi_step[ t ] ] = detail::rotl( state[ pi_step[ t + 1 ] ], rho_offsets[ pi_step[ t + 1 ] ] );
}
state[ pi_step[ 23 ] ] = lane;
}

std::uint64_t plane[ 5 ] = {};
for( int y = 0; y < 5; ++y )
{
// chi

std::uint64_t plane[ 5 ] = {};
for( int y = 0; y < 5; ++y )
{
for( int x = 0; x < 5; ++x )
{
for( int x = 0; x < 5; ++x )
{
plane[ x ] = read_lane( state, x, y );
}

for( int x = 0; x < 5; ++x )
{
auto v = plane[ x ] ^ ( ( ~plane[ ( x + 1 ) % 5 ] ) & plane[ ( x + 2 ) % 5 ] );
write_lane( state, x, y, v );
}
plane[ x ] = read_lane( state, x, y );
}
}

{
// iota
for( int j = 0; j < 7; ++j )
for( int x = 0; x < 5; ++x )
{
unsigned pos = ( 1 << j ) - 1;
if( lfsr( lfsr_reg ) )
{
xor_lane( state, 0 ,0, ( std::uint64_t{1} << pos ));
}
auto v = plane[ x ] ^ ( ( ~plane[ ( x + 1 ) % 5 ] ) & plane[ ( x + 2 ) % 5 ] );
write_lane( state, x, y, v );
}
}
}
}

inline /*BOOST_HASH2_SHA3_CONSTEXPR*/ void keccak_permute( std::uint64_t (&state)[ 25 ] )
{
static constexpr std::uint64_t iota_rc[ 24 ] =
{
0x0000000000000001ull, 0x0000000000008082ull, 0x800000000000808aull,
0x8000000080008000ull, 0x000000000000808bull, 0x0000000080000001ull,
0x8000000080008081ull, 0x8000000000008009ull, 0x000000000000008aull,
0x0000000000000088ull, 0x0000000080008009ull, 0x000000008000000aull,
0x000000008000808bull, 0x800000000000008bull, 0x8000000000008089ull,
0x8000000000008003ull, 0x8000000000008002ull, 0x8000000000000080ull,
0x000000000000800aull, 0x800000008000000aull, 0x8000000080008081ull,
0x8000000000008080ull, 0x0000000080000001ull, 0x8000000080008008ull,
};

for( int i = 0; i < 24; ++i )
{
keccak_round( state );
state[ 0 ] ^= iota_rc[ i ];
}
}

} // namespace detail
} // namespace hash2
} // namespace boost
Expand Down

0 comments on commit af1b095

Please sign in to comment.