diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 498a5683a..837032e65 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -364,21 +364,6 @@ jobs: acvp: false nix-shell: ${{ matrix.compiler.shell }} cflags: "-std=c17" - # The purpose of this job is to test non-default yet valid configurations - hol_light_proofs: - name: hol-light proofs - runs-on: pqcp-arm64 - if: github.repository_owner == 'pq-code-package' && !github.event.pull_request.head.repo.fork - needs: [quickcheck, quickcheck-windows, quickcheck-c90, quickcheck-lib, examples, lint, lint-markdown-link] - steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - - uses: ./.github/actions/setup-shell - with: - gh_token: ${{ secrets.GITHUB_TOKEN }} - nix-shell: 'hol_light' - script: | - make -C proofs/hol_light/arm p256/bignum_montmul_p256.correct - config_variations: name: Non-standard configurations needs: [quickcheck, quickcheck-windows, quickcheck-c90, quickcheck-lib, examples, lint, lint-markdown-link] diff --git a/.github/workflows/hol_light.yml b/.github/workflows/hol_light.yml new file mode 100644 index 000000000..d2565bfd5 --- /dev/null +++ b/.github/workflows/hol_light.yml @@ -0,0 +1,37 @@ +# SPDX-License-Identifier: Apache-2.0 + +name: HOL-Light +permissions: + contents: read +on: + push: + branches: ["main"] + # Only run upon changes to AArch64 NTT or invNTT + paths: + - 'proofs/hol_light/arm/**/*.S' + pull_request: + branches: ["main"] + paths: + # Only run upon changes to AArch64 NTT or invNTT + - 'proofs/hol_light/arm/**/*.S' + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + hol_light_proofs: + strategy: + matrix: + proof: [mlkem_ntt,mlkem_intt] + name: HOL Light proof for ${{ matrix.proof }}.S + runs-on: pqcp-arm64 + if: github.repository_owner == 'pq-code-package' && !github.event.pull_request.head.repo.fork + steps: + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - uses: ./.github/actions/setup-shell + with: + gh_token: ${{ secrets.GITHUB_TOKEN }} + nix-shell: 'hol_light' + script: | + make -C proofs/hol_light/arm mlkem/${{ matrix.proof }}.correct diff --git a/proofs/hol_light/arm/Makefile b/proofs/hol_light/arm/Makefile index c0b209704..efc58b83f 100644 --- a/proofs/hol_light/arm/Makefile +++ b/proofs/hol_light/arm/Makefile @@ -3,25 +3,20 @@ # SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 ############################################################################# -OSTYPE_RESULT=$(shell uname -s) -ARCHTYPE_RESULT=$(shell uname -m) - -# Assembler directives that mark symbols as .hidden -# or .private_extern can be enabled by passing -# in the S2N_BN_HIDE_SYMBOLS parameter as: # -# make S2N_BN_HIDE_SYMBOLS=1 +# This Makefile is derived from the Makefile arm/Makefile in s2n-bignum. +# - Remove all s2n-bignum proofs and tutorial, add mlkem-native proofs +# - Minor path modifications to support base theories from s2n-bignum +# to reside in a separate read-only directory # -SRC ?= $(S2N_BIGNUM_DIR) -SRC_ARM ?= $(SRC)/arm +.DEFAULT_GOAL := run_proofs -ifeq ($(S2N_BN_HIDE_SYMBOLS),1) -SYMBOL_HIDING=-DS2N_BN_HIDE_SYMBOLS=1 -else -SYMBOL_HIDING= -endif +OSTYPE_RESULT=$(shell uname -s) +ARCHTYPE_RESULT=$(shell uname -m) +SRC ?= $(S2N_BIGNUM_DIR) +SRC_ARM ?= $(SRC)/arm # Add explicit language input parameter to cpp, otherwise the use of #n for # numeric literals in ARM code is a problem when used inside #define macros @@ -69,344 +64,18 @@ endif endif endif -# List of object files for point operations and bignum operations - -POINT_OBJ = curve25519/curve25519_ladderstep.o \ - curve25519/curve25519_ladderstep_alt.o \ - curve25519/curve25519_pxscalarmul.o \ - curve25519/curve25519_pxscalarmul_alt.o \ - curve25519/curve25519_x25519.o \ - curve25519/curve25519_x25519_alt.o \ - curve25519/curve25519_x25519_byte.o \ - curve25519/curve25519_x25519_byte_alt.o \ - curve25519/curve25519_x25519base.o \ - curve25519/curve25519_x25519base_alt.o \ - curve25519/curve25519_x25519base_byte.o \ - curve25519/curve25519_x25519base_byte_alt.o \ - curve25519/edwards25519_decode.o \ - curve25519/edwards25519_decode_alt.o \ - curve25519/edwards25519_encode.o \ - curve25519/edwards25519_epadd.o \ - curve25519/edwards25519_epadd_alt.o \ - curve25519/edwards25519_epdouble.o \ - curve25519/edwards25519_epdouble_alt.o \ - curve25519/edwards25519_pdouble.o \ - curve25519/edwards25519_pdouble_alt.o \ - curve25519/edwards25519_pepadd.o \ - curve25519/edwards25519_pepadd_alt.o \ - curve25519/edwards25519_scalarmulbase.o \ - curve25519/edwards25519_scalarmulbase_alt.o \ - curve25519/edwards25519_scalarmuldouble.o \ - curve25519/edwards25519_scalarmuldouble_alt.o \ - p256/p256_montjadd.o \ - p256/p256_montjadd_alt.o \ - p256/p256_montjdouble.o \ - p256/p256_montjdouble_alt.o \ - p256/p256_montjmixadd.o \ - p256/p256_montjmixadd_alt.o \ - p256/p256_montjscalarmul.o \ - p256/p256_montjscalarmul_alt.o \ - p256/p256_scalarmul.o \ - p256/p256_scalarmul_alt.o \ - p256/p256_scalarmulbase.o \ - p256/p256_scalarmulbase_alt.o \ - p384/p384_montjadd.o \ - p384/p384_montjadd_alt.o \ - p384/p384_montjdouble.o \ - p384/p384_montjdouble_alt.o \ - p384/p384_montjmixadd.o \ - p384/p384_montjmixadd_alt.o \ - p384/p384_montjscalarmul.o \ - p384/p384_montjscalarmul_alt.o \ - p521/p521_jadd.o \ - p521/p521_jadd_alt.o \ - p521/p521_jdouble.o \ - p521/p521_jdouble_alt.o \ - p521/p521_jmixadd.o \ - p521/p521_jmixadd_alt.o \ - p521/p521_jscalarmul.o \ - p521/p521_jscalarmul_alt.o \ - secp256k1/secp256k1_jadd.o \ - secp256k1/secp256k1_jadd_alt.o \ - secp256k1/secp256k1_jdouble.o \ - secp256k1/secp256k1_jdouble_alt.o \ - secp256k1/secp256k1_jmixadd.o \ - secp256k1/secp256k1_jmixadd_alt.o \ - sm2/sm2_montjadd.o \ - sm2/sm2_montjadd_alt.o \ - sm2/sm2_montjdouble.o \ - sm2/sm2_montjdouble_alt.o \ - sm2/sm2_montjmixadd.o \ - sm2/sm2_montjmixadd_alt.o \ - sm2/sm2_montjscalarmul.o \ - sm2/sm2_montjscalarmul_alt.o - -BIGNUM_OBJ = curve25519/bignum_add_p25519.o \ - curve25519/bignum_cmul_p25519.o \ - curve25519/bignum_double_p25519.o \ - curve25519/bignum_inv_p25519.o \ - curve25519/bignum_invsqrt_p25519.o \ - curve25519/bignum_invsqrt_p25519_alt.o \ - curve25519/bignum_madd_n25519.o \ - curve25519/bignum_madd_n25519_alt.o \ - curve25519/bignum_mod_m25519_4.o \ - curve25519/bignum_mod_n25519.o \ - curve25519/bignum_mod_n25519_4.o \ - curve25519/bignum_mod_p25519_4.o \ - curve25519/bignum_mul_p25519.o \ - curve25519/bignum_mul_p25519_alt.o \ - curve25519/bignum_neg_p25519.o \ - curve25519/bignum_optneg_p25519.o \ - curve25519/bignum_sqr_p25519.o \ - curve25519/bignum_sqr_p25519_alt.o \ - curve25519/bignum_sqrt_p25519.o \ - curve25519/bignum_sqrt_p25519_alt.o \ - curve25519/bignum_sub_p25519.o \ - fastmul/bignum_emontredc_8n.o \ - fastmul/bignum_emontredc_8n_neon.o \ - fastmul/bignum_emontredc_8n_cdiff.o \ - fastmul/bignum_kmul_16_32.o \ - fastmul/bignum_kmul_16_32_neon.o \ - fastmul/bignum_kmul_32_64.o \ - fastmul/bignum_kmul_32_64_neon.o \ - fastmul/bignum_ksqr_16_32.o \ - fastmul/bignum_ksqr_16_32_neon.o \ - fastmul/bignum_ksqr_32_64.o \ - fastmul/bignum_ksqr_32_64_neon.o \ - fastmul/bignum_mul_4_8.o \ - fastmul/bignum_mul_4_8_alt.o \ - fastmul/bignum_mul_6_12.o \ - fastmul/bignum_mul_6_12_alt.o \ - fastmul/bignum_mul_8_16.o \ - fastmul/bignum_mul_8_16_alt.o \ - fastmul/bignum_mul_8_16_neon.o \ - fastmul/bignum_sqr_4_8.o \ - fastmul/bignum_sqr_4_8_alt.o \ - fastmul/bignum_sqr_6_12.o \ - fastmul/bignum_sqr_6_12_alt.o \ - fastmul/bignum_sqr_8_16.o \ - fastmul/bignum_sqr_8_16_alt.o \ - fastmul/bignum_sqr_8_16_neon.o \ - generic/bignum_add.o \ - generic/bignum_amontifier.o \ - generic/bignum_amontmul.o \ - generic/bignum_amontredc.o \ - generic/bignum_amontsqr.o \ - generic/bignum_bitfield.o \ - generic/bignum_bitsize.o \ - generic/bignum_cdiv.o \ - generic/bignum_cdiv_exact.o \ - generic/bignum_cld.o \ - generic/bignum_clz.o \ - generic/bignum_cmadd.o \ - generic/bignum_cmnegadd.o \ - generic/bignum_cmod.o \ - generic/bignum_cmul.o \ - generic/bignum_coprime.o \ - generic/bignum_copy.o \ - generic/bignum_copy_row_from_table.o \ - generic/bignum_copy_row_from_table_8n_neon.o \ - generic/bignum_copy_row_from_table_16_neon.o \ - generic/bignum_copy_row_from_table_32_neon.o \ - generic/bignum_ctd.o \ - generic/bignum_ctz.o \ - generic/bignum_demont.o \ - generic/bignum_digit.o \ - generic/bignum_digitsize.o \ - generic/bignum_divmod10.o \ - generic/bignum_emontredc.o \ - generic/bignum_eq.o \ - generic/bignum_even.o \ - generic/bignum_ge.o \ - generic/bignum_gt.o \ - generic/bignum_iszero.o \ - generic/bignum_le.o \ - generic/bignum_lt.o \ - generic/bignum_madd.o \ - generic/bignum_modadd.o \ - generic/bignum_moddouble.o \ - generic/bignum_modexp.o \ - generic/bignum_modifier.o \ - generic/bignum_modinv.o \ - generic/bignum_modoptneg.o \ - generic/bignum_modsub.o \ - generic/bignum_montifier.o \ - generic/bignum_montmul.o \ - generic/bignum_montredc.o \ - generic/bignum_montsqr.o \ - generic/bignum_mul.o \ - generic/bignum_muladd10.o \ - generic/bignum_mux.o \ - generic/bignum_mux16.o \ - generic/bignum_negmodinv.o \ - generic/bignum_nonzero.o \ - generic/bignum_normalize.o \ - generic/bignum_odd.o \ - generic/bignum_of_word.o \ - generic/bignum_optadd.o \ - generic/bignum_optneg.o \ - generic/bignum_optsub.o \ - generic/bignum_optsubadd.o \ - generic/bignum_pow2.o \ - generic/bignum_shl_small.o \ - generic/bignum_shr_small.o \ - generic/bignum_sqr.o \ - generic/bignum_sub.o \ - generic/word_bytereverse.o \ - generic/word_clz.o \ - generic/word_ctz.o \ - generic/word_divstep59.o \ - generic/word_max.o \ - generic/word_min.o \ - generic/word_negmodinv.o \ - generic/word_popcount.o \ - generic/word_recip.o \ - p256/bignum_add_p256.o \ - p256/bignum_bigendian_4.o \ - p256/bignum_cmul_p256.o \ - p256/bignum_deamont_p256.o \ - p256/bignum_demont_p256.o \ - p256/bignum_double_p256.o \ - p256/bignum_half_p256.o \ - p256/bignum_inv_p256.o \ - p256/bignum_littleendian_4.o \ - p256/bignum_mod_n256.o \ - p256/bignum_mod_n256_4.o \ - p256/bignum_mod_p256.o \ - p256/bignum_mod_p256_4.o \ - p256/bignum_montinv_p256.o \ - p256/bignum_montmul_p256.o \ - p256/bignum_montmul_p256_alt.o \ - p256/bignum_montmul_p256_neon.o \ - p256/bignum_montsqr_p256.o \ - p256/bignum_montsqr_p256_alt.o \ - p256/bignum_montsqr_p256_neon.o \ - p256/bignum_mux_4.o \ - p256/bignum_neg_p256.o \ - p256/bignum_nonzero_4.o \ - p256/bignum_optneg_p256.o \ - p256/bignum_sub_p256.o \ - p256/bignum_tomont_p256.o \ - p256/bignum_triple_p256.o \ - p384/bignum_add_p384.o \ - p384/bignum_bigendian_6.o \ - p384/bignum_cmul_p384.o \ - p384/bignum_deamont_p384.o \ - p384/bignum_demont_p384.o \ - p384/bignum_double_p384.o \ - p384/bignum_half_p384.o \ - p384/bignum_inv_p384.o \ - p384/bignum_littleendian_6.o \ - p384/bignum_mod_n384.o \ - p384/bignum_mod_n384_6.o \ - p384/bignum_mod_p384.o \ - p384/bignum_mod_p384_6.o \ - p384/bignum_montinv_p384.o \ - p384/bignum_montmul_p384.o \ - p384/bignum_montmul_p384_alt.o \ - p384/bignum_montmul_p384_neon.o \ - p384/bignum_montsqr_p384.o \ - p384/bignum_montsqr_p384_alt.o \ - p384/bignum_montsqr_p384_neon.o \ - p384/bignum_mux_6.o \ - p384/bignum_neg_p384.o \ - p384/bignum_nonzero_6.o \ - p384/bignum_optneg_p384.o \ - p384/bignum_sub_p384.o \ - p384/bignum_tomont_p384.o \ - p384/bignum_triple_p384.o \ - p521/bignum_add_p521.o \ - p521/bignum_cmul_p521.o \ - p521/bignum_deamont_p521.o \ - p521/bignum_demont_p521.o \ - p521/bignum_double_p521.o \ - p521/bignum_fromlebytes_p521.o \ - p521/bignum_half_p521.o \ - p521/bignum_inv_p521.o \ - p521/bignum_mod_n521_9.o \ - p521/bignum_mod_p521_9.o \ - p521/bignum_montmul_p521.o \ - p521/bignum_montmul_p521_alt.o \ - p521/bignum_montmul_p521_neon.o \ - p521/bignum_montsqr_p521.o \ - p521/bignum_montsqr_p521_alt.o \ - p521/bignum_montsqr_p521_neon.o \ - p521/bignum_mul_p521.o \ - p521/bignum_mul_p521_alt.o \ - p521/bignum_mul_p521_neon.o \ - p521/bignum_neg_p521.o \ - p521/bignum_optneg_p521.o \ - p521/bignum_sqr_p521.o \ - p521/bignum_sqr_p521_alt.o \ - p521/bignum_sqr_p521_neon.o \ - p521/bignum_sub_p521.o \ - p521/bignum_tolebytes_p521.o \ - p521/bignum_tomont_p521.o \ - p521/bignum_triple_p521.o \ - secp256k1/bignum_add_p256k1.o \ - secp256k1/bignum_cmul_p256k1.o \ - secp256k1/bignum_deamont_p256k1.o \ - secp256k1/bignum_demont_p256k1.o \ - secp256k1/bignum_double_p256k1.o \ - secp256k1/bignum_half_p256k1.o \ - secp256k1/bignum_mod_n256k1_4.o \ - secp256k1/bignum_mod_p256k1_4.o \ - secp256k1/bignum_montmul_p256k1.o \ - secp256k1/bignum_montmul_p256k1_alt.o \ - secp256k1/bignum_montsqr_p256k1.o \ - secp256k1/bignum_montsqr_p256k1_alt.o \ - secp256k1/bignum_mul_p256k1.o \ - secp256k1/bignum_mul_p256k1_alt.o \ - secp256k1/bignum_neg_p256k1.o \ - secp256k1/bignum_optneg_p256k1.o \ - secp256k1/bignum_sqr_p256k1.o \ - secp256k1/bignum_sqr_p256k1_alt.o \ - secp256k1/bignum_sub_p256k1.o \ - secp256k1/bignum_tomont_p256k1.o \ - secp256k1/bignum_triple_p256k1.o \ - sm2/bignum_add_sm2.o \ - sm2/bignum_cmul_sm2.o \ - sm2/bignum_deamont_sm2.o \ - sm2/bignum_demont_sm2.o \ - sm2/bignum_double_sm2.o \ - sm2/bignum_half_sm2.o \ - sm2/bignum_inv_sm2.o \ - sm2/bignum_mod_nsm2.o \ - sm2/bignum_mod_nsm2_4.o \ - sm2/bignum_mod_sm2.o \ - sm2/bignum_mod_sm2_4.o \ - sm2/bignum_montinv_sm2.o \ - sm2/bignum_montmul_sm2.o \ - sm2/bignum_montmul_sm2_alt.o \ - sm2/bignum_montsqr_sm2.o \ - sm2/bignum_montsqr_sm2_alt.o \ - sm2/bignum_neg_sm2.o \ - sm2/bignum_optneg_sm2.o \ - sm2/bignum_sub_sm2.o \ - sm2/bignum_tomont_sm2.o \ - sm2/bignum_triple_sm2.o - -UNOPT_OBJ = p256/unopt/p256_montjadd.o \ - p256/unopt/p256_montjdouble.o \ - p384/unopt/p384_montjadd.o \ - p384/unopt/p384_montjdouble.o \ - fastmul/unopt/bignum_emontredc_8n_cdiff_base.o - -OBJ = $(POINT_OBJ) $(BIGNUM_OBJ) +OBJ = mlkem/mlkem_ntt.o mlkem/mlkem_intt.o # According to # https://developer.apple.com/documentation/xcode/writing-arm64-code-for-apple-platforms, # x18 should not be used for Apple platforms. Check this using grep. - -$(OBJ): %.o : $(SRC_ARM)/%.S +$(OBJ): %.o : %.S $(Q)[ -d $(@D) ] || mkdir -p $(@D) cat $< | $(PREPROCESS) | $(SPLIT) | grep -v -E '^\s+.quad\s+0x[0-9a-f]+$$' | $(ASSEMBLE) -o $@ - $(OBJDUMP) $@ | ( ( ! grep --ignore-case -E 'w18|[^0]x18' ) || ( rm $@ ; exit 1 ) ) cat $< | $(PREPROCESS) | $(SPLIT) | $(ASSEMBLE) -o $@ - -libs2nbignum.a: $(OBJ) ; ar -rc libs2nbignum.a $(OBJ) - -clean:; rm -f libs2nbignum.a */*.o */*/*.o */*.correct */*.native +clean:; rm -f */*.o */*/*.o */*.correct */*.native # Proof-related parts # @@ -420,9 +89,11 @@ clean:; rm -f libs2nbignum.a */*.o */*/*.o */*.correct */*.native # in your home directory, and do "make" inside the subdirectory hol-light, # then the following HOLDIR setting should be right: -HOLDIR?=$(HOME)/hol-light +HOLDIR?=$(HOLLIGHTDIR) HOLLIGHT:=$(HOLDIR)/hol.sh +BASE?=$(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) + PROOF_BINS = $(OBJ:.o=.native) PROOF_LOGS = $(OBJ:.o=.correct) @@ -433,65 +104,25 @@ proofs/simulator.native: $(SRC_ARM)/proofs/simulator.ml $(SRC)/tools/build-proof.sh $(SRC_ARM)/proofs/simulator.ml "$(HOLLIGHT)" "$@" .SECONDEXPANSION: -%.native: $(SRC_ARM)/proofs/$$(*F).ml %.o ; $(SRC)/tools/build-proof.sh $< "$(HOLLIGHT)" "$@" +%.native: proofs/$$(*F).ml %.o ; $(SRC)/tools/build-proof.sh $(BASE)/$< "$(HOLLIGHT)" "$@" # Run them and print the standard output+error at *.correct - -%.correct: %.native ; $(SRC)/tools/run-proof.sh "$<" "$@" - -# Cases where a proof uses other proofs for lemmas and/or subroutines - -p256/bignum_montmul_p256_neon.native: p256/bignum_montmul_p256.native -p384/bignum_montmul_p384_neon.native: p384/bignum_montmul_p384.native -p521/bignum_montmul_p521_neon.native: p521/bignum_montmul_p521.native -p256/bignum_montsqr_p256_neon.native: p256/bignum_montsqr_p256.native -p384/bignum_montsqr_p384_neon.native: p384/bignum_montsqr_p384.native -p521/bignum_montsqr_p521_neon.native: p521/bignum_montsqr_p521.native -p521/bignum_mul_p521_neon.native: p521/bignum_mul_p521.native -p521/bignum_sqr_p521_neon.native: p521/bignum_sqr_p521.native -fastmul/bignum_mul_8_16_neon.native: fastmul/bignum_mul_8_16.native -fastmul/bignum_sqr_8_16_neon.native: fastmul/bignum_sqr_8_16.native -curve25519/curve25519_x25519.native: curve25519/bignum_inv_p25519.native -curve25519/curve25519_x25519_alt.native: curve25519/bignum_inv_p25519.native -curve25519/curve25519_x25519_byte.native: curve25519/bignum_inv_p25519.native -curve25519/curve25519_x25519_byte_alt.native: curve25519/bignum_inv_p25519.native -curve25519/curve25519_x25519base.native: curve25519/bignum_inv_p25519.native -curve25519/curve25519_x25519base_alt.native: curve25519/bignum_inv_p25519.native -curve25519/curve25519_x25519base_byte.native: curve25519/bignum_inv_p25519.native -curve25519/curve25519_x25519base_byte_alt.native: curve25519/bignum_inv_p25519.native -curve25519/edwards25519_scalarmulbase.native: curve25519/bignum_inv_p25519.native -curve25519/edwards25519_scalarmulbase_alt.native: curve25519/bignum_inv_p25519.native -curve25519/edwards25519_scalarmuldouble.native: curve25519/bignum_inv_p25519.native -curve25519/edwards25519_scalarmuldouble_alt.native: curve25519/bignum_inv_p25519.native -generic/bignum_modexp.native: generic/bignum_amontifier.native generic/bignum_amontmul.native generic/bignum_demont.native generic/bignum_mux.native -p256/p256_montjadd.native: p256/unopt/p256_montjadd.o p256/bignum_montsqr_p256_neon.native p256/bignum_montmul_p256_neon.native p256/bignum_sub_p256.native -p256/p256_montjdouble.native: p256/unopt/p256_montjdouble.o p256/bignum_montsqr_p256_neon.native p256/bignum_montmul_p256_neon.native p256/bignum_sub_p256.native p256/bignum_add_p256.native -p256/p256_montjscalarmul.native: p256/p256_montjadd.native p256/p256_montjdouble.native -p256/p256_montjscalarmul_alt.native: p256/p256_montjadd_alt.native p256/p256_montjdouble_alt.native -p256/p256_scalarmul.native: p256/bignum_demont_p256.native p256/bignum_inv_p256.native p256/bignum_tomont_p256.native p256/p256_montjadd.native p256/p256_montjdouble.native p256/p256_montjmixadd.native -p256/p256_scalarmul_alt.native: p256/bignum_demont_p256.native p256/bignum_inv_p256.native p256/p256_montjadd_alt.native p256/p256_montjdouble_alt.native p256/p256_montjmixadd_alt.native -p256/p256_scalarmulbase.native: p256/bignum_demont_p256.native p256/bignum_inv_p256.native p256/p256_montjmixadd.native -p256/p256_scalarmulbase_alt.native: p256/bignum_demont_p256.native p256/bignum_inv_p256.native p256/p256_montjmixadd_alt.native -p384/p384_montjadd.native: p384/unopt/p384_montjadd.o p384/bignum_montsqr_p384_neon.native p384/bignum_montmul_p384_neon.native p384/bignum_sub_p384.native -p384/p384_montjdouble.native: p384/unopt/p384_montjdouble.o p384/bignum_montsqr_p384_neon.native p384/bignum_montmul_p384_neon.native p384/bignum_sub_p384.native p384/bignum_add_p384.native -p384/p384_montjscalarmul.native: \ - p384/p384_montjadd.native p384/p384_montjdouble.native \ - p384/bignum_sub_p384.native p384/bignum_add_p384.native -p384/p384_montjscalarmul_alt.native: p384/p384_montjadd_alt.native p384/p384_montjdouble_alt.native -p521/p521_jadd.native: p521/bignum_mul_p521_neon.native p521/bignum_sqr_p521_neon.native -p521/p521_jdouble.native: p521/bignum_mul_p521_neon.native p521/bignum_sqr_p521_neon.native -p521/p521_jscalarmul.native: p521/bignum_mod_n521_9.native p521/p521_jadd.native p521/p521_jdouble.native -p521/p521_jscalarmul_alt.native: p521/bignum_mod_n521_9.native -sm2/sm2_montjscalarmul.native: sm2/sm2_montjadd.native sm2/sm2_montjdouble.native -sm2/sm2_montjscalarmul_alt.native: sm2/sm2_montjadd_alt.native sm2/sm2_montjdouble_alt.native - -unopt: $(UNOPT_OBJ) - -build_proofs: $(UNOPT_OBJ) $(PROOF_BINS); +%.correct: %.native + $< 2>&1 | tee $@ + @if (grep -i "error:\|exception:" "$@" >/dev/null); then \ + echo "$< had errors!"; \ + exit 1; \ + else \ + echo "$< OK"; \ + fi + +build_proofs: $(PROOF_BINS); run_proofs: build_proofs $(PROOF_LOGS); proofs: run_proofs ; $(SRC)/tools/count-proofs.sh . +.PHONY: proofs build_proofs run_proofs sematest clean + # Always run sematest regardless of dependency check FORCE: ; # Always use max. # of cores because in Makefile one cannot get the passed number of -j. diff --git a/proofs/hol_light/arm/README.md b/proofs/hol_light/arm/README.md new file mode 100644 index 000000000..797988593 --- /dev/null +++ b/proofs/hol_light/arm/README.md @@ -0,0 +1,39 @@ +[//]: # (SPDX-License-Identifier: CC-BY-4.0) + +# HOL Light functional correctness proofs + +This directory contains functional correctness proofs for some optimized +ML-KEM AArch64 assembly routines. The proofs were developed by John Harrison +and are written in the [HOL Light](https://hol-light.github.io/) theorem +prover, utilizing the assembly verification infrastructure from [s2n-bignum](https://github.com/awslabs/s2n-bignum). + +Each function is proved in a separate `.ml` file in [proofs/](proofs). Each file +contains the byte code being verified, as well as the specification that is being +proved. Specifications are essentially Hoare triples, with the noteworthy difference +that the program is implicit as the content of memory at the PC; which is asserted to +be the code under verification as part of the precondition. + +## Running the proofs + +To reproduce the proofs, enter the nix shell via + +```bash +nix develop --experimental-features 'nix-command flakes' +``` + +from mlkem-native's base directory. Then + +```bash +make -C proofs/hol_light/arm +``` + +will build and run the proofs. Note that this make take hours even on powerful machines. + +## What is covered? + +At present, this directory contains functional correctness proofs for the following functions: + +- Optimized AArch64 forward NTT: [mlkem_ntt.S](mlkem/mlkem_ntt.S) +- Optimized AArch64 inverse NTT: [mlkem_intt.S](mlkem/mlkem_intt.S) + +Both functions are super-optimized using [SLOTHY](https://github.com/slothy-optimizer/slothy/). diff --git a/proofs/hol_light/arm/mlkem/mlkem_intt.S b/proofs/hol_light/arm/mlkem/mlkem_intt.S new file mode 100644 index 000000000..ee0322c5f --- /dev/null +++ b/proofs/hol_light/arm/mlkem/mlkem_intt.S @@ -0,0 +1,416 @@ +// Copyright (c) 2022 Arm Limited +// Copyright (c) 2022 Hanno Becker +// Copyright (c) 2023 Amin Abdulrahman, Matthias Kannwischer +// Copyright (c) 2024 The mlkem-native project authors +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT + +// ---------------------------------------------------------------------------- +// Inverse number-theoretic transform from ML-KEM +// Input a[256], z_01234[80], z_56[384] (all signed 16-bit words); output a[256] (signed 16-bit words). +// +// The transform is in-place with input and output a[256], with the input in +// bitreversed order and the output mapped into the Montgomery domain via +// x |-> (2^16 * x) mod 3329. The two other parameters are expected to point to +// tables of constants whose definitions can be found in the mlkem-native +// repo (mlkem/native/aarch64/src/aarch64_zetas.c) or our "tests/test.c". +// +// extern void mlkem_intt(int16_t a[256],int16_t z_01234[80],int16_t z_56[384]); +// +// Standard ARM ABI: X0 = a, X1 = z_01234, X2 = z_56 +// ---------------------------------------------------------------------------- +#include "_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(mlkem_intt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(mlkem_intt) + .text + .balign 4 + +S2N_BN_SYMBOL(mlkem_intt): + +// This implementation is generated by SLOTHY, set up to optimize for +// the Neoverse N1 microarchitecture, starting from the clean version +// +// https://github.com/pq-code-package/mlkem-native/blob/main/mlkem/native/aarch64/intt_clean.S +// +// in the mlkem-native repository. + +PQCP_MLKEM_NATIVE_MLKEM512_intt_asm_opt: + sub sp, sp, #0x40 + stp d8, d9, [sp] + stp d10, d11, [sp, #16] + stp d12, d13, [sp, #32] + stp d14, d15, [sp, #48] + mov w5, #0xd01 + mov v7.h[0], w5 + mov w5, #0x4ebf + mov v7.h[1], w5 + mov w5, #0x200 + dup v29.8h, w5 + mov w5, #0x13b0 + dup v30.8h, w5 + mov x3, x0 + mov x4, #0x8 + +scale_start: + ldr q8, [x3] + ldr q9, [x3, #16] + ldr q10, [x3, #32] + ldr q11, [x3, #48] + sqrdmulh v27.8h, v8.8h, v30.8h + mul v8.8h, v8.8h, v29.8h + mls v8.8h, v27.8h, v7.h[0] + sqrdmulh v27.8h, v9.8h, v30.8h + mul v9.8h, v9.8h, v29.8h + mls v9.8h, v27.8h, v7.h[0] + sqrdmulh v27.8h, v10.8h, v30.8h + mul v10.8h, v10.8h, v29.8h + mls v10.8h, v27.8h, v7.h[0] + sqrdmulh v27.8h, v11.8h, v30.8h + mul v11.8h, v11.8h, v29.8h + mls v11.8h, v27.8h, v7.h[0] + str q8, [x3], #64 + stur q9, [x3, #-48] + stur q10, [x3, #-32] + stur q11, [x3, #-16] + subs x4, x4, #0x1 + cbnz x4, scale_start + mov x3, x0 + mov x4, #0x8 + ldr q26, [x3] + ldr q8, [x3, #16] + ldr q24, [x3, #32] + ldr q16, [x3, #48] + ldr q9, [x2], #96 + trn1 v0.4s, v24.4s, v16.4s + ldur q6, [x2, #-80] + ldur q3, [x2, #-64] + ldur q15, [x2, #-48] + ldur q4, [x2, #-32] + ldur q28, [x2, #-16] + sub x4, x4, #0x1 + +layer3456_start: + trn1 v12.4s, v26.4s, v8.4s + trn2 v26.4s, v26.4s, v8.4s + trn2 v8.4s, v24.4s, v16.4s + trn2 v11.2d, v12.2d, v0.2d + trn1 v12.2d, v12.2d, v0.2d + trn2 v16.2d, v26.2d, v8.2d + trn1 v26.2d, v26.2d, v8.2d + sub v8.8h, v11.8h, v16.8h + add v11.8h, v11.8h, v16.8h + sub v16.8h, v12.8h, v26.8h + add v12.8h, v12.8h, v26.8h + sqrdmulh v26.8h, v8.8h, v28.8h + sqrdmulh v15.8h, v16.8h, v15.8h + mul v16.8h, v16.8h, v3.8h + mul v8.8h, v8.8h, v4.8h + sub v0.8h, v12.8h, v11.8h + add v12.8h, v12.8h, v11.8h + mls v16.8h, v15.8h, v7.h[0] + mls v8.8h, v26.8h, v7.h[0] + sqrdmulh v26.8h, v0.8h, v6.8h + mul v11.8h, v0.8h, v9.8h + ldr q15, [x1], #16 + sub v0.8h, v16.8h, v8.8h + mls v11.8h, v26.8h, v7.h[0] + add v26.8h, v16.8h, v8.8h + sqrdmulh v8.8h, v0.8h, v6.8h + mul v16.8h, v0.8h, v9.8h + trn1 v0.4s, v12.4s, v26.4s + trn2 v12.4s, v12.4s, v26.4s + ldr q26, [x3, #64] + mls v16.8h, v8.8h, v7.h[0] + ldr q8, [x3, #80] + ldr q24, [x3, #96] + trn1 v9.4s, v11.4s, v16.4s + trn2 v11.4s, v11.4s, v16.4s + ldr q16, [x3, #112] + trn2 v6.2d, v0.2d, v9.2d + trn2 v3.2d, v12.2d, v11.2d + trn1 v0.2d, v0.2d, v9.2d + trn1 v12.2d, v12.2d, v11.2d + sub v11.8h, v6.8h, v3.8h + sub v9.8h, v0.8h, v12.8h + add v12.8h, v0.8h, v12.8h + sqrdmulh v0.8h, v11.8h, v15.h[5] + sqrdmulh v4.8h, v9.8h, v15.h[3] + mul v9.8h, v9.8h, v15.h[2] + mul v11.8h, v11.8h, v15.h[4] + add v6.8h, v6.8h, v3.8h + sqdmulh v3.8h, v12.8h, v7.h[1] + mls v9.8h, v4.8h, v7.h[0] + mls v11.8h, v0.8h, v7.h[0] + sqdmulh v0.8h, v6.8h, v7.h[1] + srshr v3.8h, v3.8h, #11 + sqdmulh v4.8h, v9.8h, v7.h[1] + sqdmulh v28.8h, v11.8h, v7.h[1] + mls v12.8h, v3.8h, v7.h[0] + srshr v0.8h, v0.8h, #11 + srshr v3.8h, v4.8h, #11 + srshr v4.8h, v28.8h, #11 + mls v6.8h, v0.8h, v7.h[0] + mls v9.8h, v3.8h, v7.h[0] + mls v11.8h, v4.8h, v7.h[0] + trn1 v0.4s, v24.4s, v16.4s + sub v3.8h, v12.8h, v6.8h + add v12.8h, v12.8h, v6.8h + sub v6.8h, v9.8h, v11.8h + sqrdmulh v4.8h, v3.8h, v15.h[1] + mul v3.8h, v3.8h, v15.h[0] + sqrdmulh v28.8h, v6.8h, v15.h[1] + mul v15.8h, v6.8h, v15.h[0] + add v11.8h, v9.8h, v11.8h + mls v3.8h, v4.8h, v7.h[0] + str q12, [x3], #64 + mls v15.8h, v28.8h, v7.h[0] + stur q11, [x3, #-48] + ldr q9, [x2], #96 + stur q3, [x3, #-32] + ldur q6, [x2, #-80] + stur q15, [x3, #-16] + ldur q3, [x2, #-64] + ldur q15, [x2, #-48] + ldur q4, [x2, #-32] + ldur q28, [x2, #-16] + sub x4, x4, #0x1 + cbnz x4, layer3456_start + trn1 v11.4s, v26.4s, v8.4s + trn2 v24.4s, v24.4s, v16.4s + trn2 v26.4s, v26.4s, v8.4s + trn1 v18.2d, v11.2d, v0.2d + trn2 v11.2d, v11.2d, v0.2d + trn2 v12.2d, v26.2d, v24.2d + trn1 v8.2d, v26.2d, v24.2d + sub v26.8h, v11.8h, v12.8h + sub v13.8h, v18.8h, v8.8h + add v24.8h, v18.8h, v8.8h + mul v16.8h, v26.8h, v4.8h + sqrdmulh v17.8h, v13.8h, v15.8h + mul v3.8h, v13.8h, v3.8h + sqrdmulh v26.8h, v26.8h, v28.8h + add v10.8h, v11.8h, v12.8h + mls v3.8h, v17.8h, v7.h[0] + mls v16.8h, v26.8h, v7.h[0] + sub v26.8h, v24.8h, v10.8h + ldr q4, [x1], #16 + sub v12.8h, v3.8h, v16.8h + sqrdmulh v15.8h, v26.8h, v6.8h + mul v11.8h, v26.8h, v9.8h + mul v8.8h, v12.8h, v9.8h + sqrdmulh v12.8h, v12.8h, v6.8h + add v0.8h, v24.8h, v10.8h + mls v11.8h, v15.8h, v7.h[0] + add v6.8h, v3.8h, v16.8h + mls v8.8h, v12.8h, v7.h[0] + trn2 v26.4s, v0.4s, v6.4s + trn2 v12.4s, v11.4s, v8.4s + trn1 v3.4s, v11.4s, v8.4s + trn1 v17.4s, v0.4s, v6.4s + trn1 v8.2d, v26.2d, v12.2d + trn2 v13.2d, v26.2d, v12.2d + trn1 v11.2d, v17.2d, v3.2d + trn2 v15.2d, v17.2d, v3.2d + sub v12.8h, v11.8h, v8.8h + add v16.8h, v15.8h, v13.8h + sub v26.8h, v15.8h, v13.8h + mul v0.8h, v12.8h, v4.h[2] + sqrdmulh v9.8h, v12.8h, v4.h[3] + mul v13.8h, v26.8h, v4.h[4] + sqrdmulh v26.8h, v26.8h, v4.h[5] + add v24.8h, v11.8h, v8.8h + mls v0.8h, v9.8h, v7.h[0] + sqdmulh v12.8h, v16.8h, v7.h[1] + mls v13.8h, v26.8h, v7.h[0] + sqdmulh v11.8h, v24.8h, v7.h[1] + sqdmulh v8.8h, v0.8h, v7.h[1] + srshr v12.8h, v12.8h, #11 + sqdmulh v26.8h, v13.8h, v7.h[1] + srshr v11.8h, v11.8h, #11 + mls v16.8h, v12.8h, v7.h[0] + srshr v8.8h, v8.8h, #11 + srshr v26.8h, v26.8h, #11 + mls v24.8h, v11.8h, v7.h[0] + mls v0.8h, v8.8h, v7.h[0] + mls v13.8h, v26.8h, v7.h[0] + sub v26.8h, v24.8h, v16.8h + add v15.8h, v24.8h, v16.8h + sub v12.8h, v0.8h, v13.8h + mul v11.8h, v26.8h, v4.h[0] + sqrdmulh v16.8h, v26.8h, v4.h[1] + mul v26.8h, v12.8h, v4.h[0] + sqrdmulh v8.8h, v12.8h, v4.h[1] + add v12.8h, v0.8h, v13.8h + mls v11.8h, v16.8h, v7.h[0] + str q15, [x3], #64 + mls v26.8h, v8.8h, v7.h[0] + stur q12, [x3, #-48] + stur q11, [x3, #-32] + stur q26, [x3, #-16] + mov x4, #0x4 + ldr q0, [x1], #32 + ldur q1, [x1, #-16] + ldr q24, [x0, #128] + ldr q16, [x0, #192] + ldr q9, [x0, #256] + ldr q6, [x0, #320] + ldr q3, [x0, #384] + ldr q4, [x0, #448] + add v28.8h, v9.8h, v6.8h + add v19.8h, v24.8h, v16.8h + add v13.8h, v3.8h, v4.8h + ldr q11, [x0] + add v23.8h, v28.8h, v13.8h + ldr q15, [x0, #64] + sub x4, x4, #0x1 + +layer012_start: + sub v12.8h, v11.8h, v15.8h + add v26.8h, v11.8h, v15.8h + sub v8.8h, v24.8h, v16.8h + sqrdmulh v11.8h, v12.8h, v0.h[7] + mul v12.8h, v12.8h, v0.h[6] + sub v16.8h, v26.8h, v19.8h + add v26.8h, v26.8h, v19.8h + sqrdmulh v15.8h, v8.8h, v1.h[1] + mul v8.8h, v8.8h, v1.h[0] + mls v12.8h, v11.8h, v7.h[0] + sub v11.8h, v9.8h, v6.8h + sqrdmulh v24.8h, v16.8h, v0.h[3] + mul v16.8h, v16.8h, v0.h[2] + sub v9.8h, v26.8h, v23.8h + add v26.8h, v26.8h, v23.8h + mls v8.8h, v15.8h, v7.h[0] + sqrdmulh v15.8h, v11.8h, v1.h[3] + mul v11.8h, v11.8h, v1.h[2] + sub v6.8h, v3.8h, v4.8h + sub v3.8h, v12.8h, v8.8h + add v12.8h, v12.8h, v8.8h + mls v11.8h, v15.8h, v7.h[0] + sqrdmulh v8.8h, v6.8h, v1.h[5] + mls v16.8h, v24.8h, v7.h[0] + mul v15.8h, v6.8h, v1.h[4] + sqrdmulh v24.8h, v3.8h, v0.h[3] + mul v6.8h, v3.8h, v0.h[2] + sqrdmulh v3.8h, v9.8h, v0.h[1] + mul v9.8h, v9.8h, v0.h[0] + str q26, [x0], #16 + mls v15.8h, v8.8h, v7.h[0] + mls v6.8h, v24.8h, v7.h[0] + sub v26.8h, v28.8h, v13.8h + mls v9.8h, v3.8h, v7.h[0] + sub v8.8h, v11.8h, v15.8h + sqrdmulh v24.8h, v26.8h, v0.h[5] + mul v26.8h, v26.8h, v0.h[4] + add v11.8h, v11.8h, v15.8h + sqrdmulh v15.8h, v8.8h, v0.h[5] + mul v8.8h, v8.8h, v0.h[4] + mls v26.8h, v24.8h, v7.h[0] + sub v24.8h, v12.8h, v11.8h + add v12.8h, v12.8h, v11.8h + mls v8.8h, v15.8h, v7.h[0] + sqrdmulh v11.8h, v24.8h, v0.h[1] + mul v15.8h, v24.8h, v0.h[0] + sub v24.8h, v16.8h, v26.8h + add v26.8h, v16.8h, v26.8h + sub v16.8h, v6.8h, v8.8h + mls v15.8h, v11.8h, v7.h[0] + sqrdmulh v11.8h, v24.8h, v0.h[1] + mul v24.8h, v24.8h, v0.h[0] + add v8.8h, v6.8h, v8.8h + sqrdmulh v6.8h, v16.8h, v0.h[1] + mul v16.8h, v16.8h, v0.h[0] + mls v24.8h, v11.8h, v7.h[0] + str q9, [x0, #240] + ldr q11, [x0] + mls v16.8h, v6.8h, v7.h[0] + str q15, [x0, #304] + ldr q15, [x0, #64] + str q24, [x0, #368] + ldr q24, [x0, #128] + str q16, [x0, #432] + ldr q16, [x0, #192] + str q12, [x0, #48] + ldr q9, [x0, #256] + ldr q6, [x0, #320] + ldr q3, [x0, #384] + ldr q4, [x0, #448] + str q26, [x0, #112] + add v28.8h, v9.8h, v6.8h + add v13.8h, v3.8h, v4.8h + str q8, [x0, #176] + add v19.8h, v24.8h, v16.8h + add v23.8h, v28.8h, v13.8h + sub x4, x4, #0x1 + cbnz x4, layer012_start + add v10.8h, v11.8h, v15.8h + sub v12.8h, v28.8h, v13.8h + sub v11.8h, v11.8h, v15.8h + sub v22.8h, v10.8h, v19.8h + mul v18.8h, v12.8h, v0.h[4] + sqrdmulh v26.8h, v12.8h, v0.h[5] + sqrdmulh v12.8h, v22.8h, v0.h[3] + mul v13.8h, v22.8h, v0.h[2] + sub v31.8h, v24.8h, v16.8h + sqrdmulh v22.8h, v11.8h, v0.h[7] + mls v18.8h, v26.8h, v7.h[0] + mls v13.8h, v12.8h, v7.h[0] + sqrdmulh v2.8h, v31.8h, v1.h[1] + mul v5.8h, v31.8h, v1.h[0] + mul v15.8h, v11.8h, v0.h[6] + sub v12.8h, v13.8h, v18.8h + sub v4.8h, v3.8h, v4.8h + mls v5.8h, v2.8h, v7.h[0] + sqrdmulh v26.8h, v12.8h, v0.h[1] + mul v12.8h, v12.8h, v0.h[0] + mls v15.8h, v22.8h, v7.h[0] + sqrdmulh v8.8h, v4.8h, v1.h[5] + mul v4.8h, v4.8h, v1.h[4] + mls v12.8h, v26.8h, v7.h[0] + sub v21.8h, v15.8h, v5.8h + sub v28.8h, v9.8h, v6.8h + mls v4.8h, v8.8h, v7.h[0] + mul v24.8h, v21.8h, v0.h[2] + sqrdmulh v8.8h, v21.8h, v0.h[3] + sqrdmulh v6.8h, v28.8h, v1.h[3] + add v19.8h, v10.8h, v19.8h + mul v28.8h, v28.8h, v1.h[2] + mls v24.8h, v8.8h, v7.h[0] + sub v11.8h, v19.8h, v23.8h + str q12, [x0, #384] + mls v28.8h, v6.8h, v7.h[0] + sqrdmulh v16.8h, v11.8h, v0.h[1] + mul v9.8h, v11.8h, v0.h[0] + add v6.8h, v15.8h, v5.8h + add v26.8h, v28.8h, v4.8h + sub v15.8h, v28.8h, v4.8h + mls v9.8h, v16.8h, v7.h[0] + add v3.8h, v6.8h, v26.8h + mul v8.8h, v15.8h, v0.h[4] + sqrdmulh v15.8h, v15.8h, v0.h[5] + str q9, [x0, #256] + sub v2.8h, v6.8h, v26.8h + str q3, [x0, #64] + mls v8.8h, v15.8h, v7.h[0] + sqrdmulh v15.8h, v2.8h, v0.h[1] + mul v11.8h, v2.8h, v0.h[0] + add v16.8h, v13.8h, v18.8h + sub v12.8h, v24.8h, v8.8h + add v8.8h, v24.8h, v8.8h + mls v11.8h, v15.8h, v7.h[0] + sqrdmulh v26.8h, v12.8h, v0.h[1] + mul v12.8h, v12.8h, v0.h[0] + str q8, [x0, #192] + add v15.8h, v19.8h, v23.8h + str q11, [x0, #320] + mls v12.8h, v26.8h, v7.h[0] + str q15, [x0], #16 + str q16, [x0, #112] + str q12, [x0, #432] + ldp d8, d9, [sp] + ldp d10, d11, [sp, #16] + ldp d12, d13, [sp, #32] + ldp d14, d15, [sp, #48] + add sp, sp, #0x40 + ret diff --git a/proofs/hol_light/arm/mlkem/mlkem_ntt.S b/proofs/hol_light/arm/mlkem/mlkem_ntt.S new file mode 100644 index 000000000..e377f81b4 --- /dev/null +++ b/proofs/hol_light/arm/mlkem/mlkem_ntt.S @@ -0,0 +1,358 @@ +// Copyright (c) 2022 Arm Limited +// Copyright (c) 2022 Hanno Becker +// Copyright (c) 2023 Amin Abdulrahman, Matthias Kannwischer +// Copyright (c) 2024 The mlkem-native project authors +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: MIT + +// ---------------------------------------------------------------------------- +// Forward number-theoretic transform from ML-KEM +// Input a[256], z_01234[80], z_56[384] (all signed 16-bit words); output a[256] (signed 16-bit words). +// +// The transform is in-place with input and output a[256], with the output +// in bitreversed order. The two other parameters are expected to point to +// tables of constants whose definitions can be found in the mlkem-native +// repo (mlkem/native/aarch64/src/aarch64_zetas.c) or our "tests/test.c". +// +// extern void mlkem_ntt(int16_t a[256],int16_t z_01234[80],int16_t z_56[384]); +// +// Standard ARM ABI: X0 = a, X1 = z_01234, X2 = z_56 +// ---------------------------------------------------------------------------- +#include "_internal_s2n_bignum.h" + + S2N_BN_SYM_VISIBILITY_DIRECTIVE(mlkem_ntt) + S2N_BN_SYM_PRIVACY_DIRECTIVE(mlkem_ntt) + .text + .balign 4 + +S2N_BN_SYMBOL(mlkem_ntt): + +// This implementation is generated by SLOTHY, set up to optimize for +// the Neoverse N1 microarchitecture, starting from the clean version +// +// https://github.com/pq-code-package/mlkem-native/blob/main/mlkem/native/aarch64/src/ntt_clean.S +// +// in the mlkem-native repository. + +PQCP_MLKEM_NATIVE_MLKEM512_ntt_asm_opt: + sub sp, sp, #0x40 + stp d8, d9, [sp] + stp d10, d11, [sp, #16] + stp d12, d13, [sp, #32] + stp d14, d15, [sp, #48] + mov w5, #0xd01 + mov v7.h[0], w5 + mov w5, #0x4ebf + mov v7.h[1], w5 + mov x3, x0 + mov x4, #0x4 + ldr q0, [x1], #32 + ldur q1, [x1, #-16] + ldr q21, [x0] + ldr q26, [x0, #64] + ldr q29, [x0, #128] + ldr q20, [x0, #192] + ldr q23, [x0, #256] + ldr q11, [x0, #448] + mul v2.8h, v23.8h, v0.h[0] + ldr q17, [x0, #320] + mul v15.8h, v11.8h, v0.h[0] + ldr q13, [x0, #384] + sub x4, x4, #0x1 + sqrdmulh v14.8h, v23.8h, v0.h[1] + sqrdmulh v23.8h, v17.8h, v0.h[1] + mul v17.8h, v17.8h, v0.h[0] + sqrdmulh v28.8h, v13.8h, v0.h[1] + mls v2.8h, v14.8h, v7.h[0] + mul v14.8h, v13.8h, v0.h[0] + mls v17.8h, v23.8h, v7.h[0] + sqrdmulh v23.8h, v11.8h, v0.h[1] + sub v11.8h, v21.8h, v2.8h + mls v14.8h, v28.8h, v7.h[0] + sub v28.8h, v26.8h, v17.8h + add v17.8h, v26.8h, v17.8h + add v2.8h, v21.8h, v2.8h + sub v13.8h, v29.8h, v14.8h + add v14.8h, v29.8h, v14.8h + mls v15.8h, v23.8h, v7.h[0] + sqrdmulh v23.8h, v13.8h, v0.h[5] + mul v13.8h, v13.8h, v0.h[4] + sqrdmulh v21.8h, v14.8h, v0.h[3] + sub v26.8h, v20.8h, v15.8h + add v15.8h, v20.8h, v15.8h + mls v13.8h, v23.8h, v7.h[0] + sqrdmulh v23.8h, v26.8h, v0.h[5] + mul v26.8h, v26.8h, v0.h[4] + mul v14.8h, v14.8h, v0.h[2] + sub v29.8h, v11.8h, v13.8h + add v11.8h, v11.8h, v13.8h + mls v26.8h, v23.8h, v7.h[0] + sqrdmulh v23.8h, v15.8h, v0.h[3] + mul v13.8h, v15.8h, v0.h[2] + mls v14.8h, v21.8h, v7.h[0] + sub v15.8h, v28.8h, v26.8h + add v28.8h, v28.8h, v26.8h + mls v13.8h, v23.8h, v7.h[0] + sub v23.8h, v2.8h, v14.8h + add v14.8h, v2.8h, v14.8h + sqrdmulh v2.8h, v28.8h, v1.h[3] + sub v21.8h, v17.8h, v13.8h + add v17.8h, v17.8h, v13.8h + mul v28.8h, v28.8h, v1.h[2] + sqrdmulh v13.8h, v21.8h, v1.h[1] + sqrdmulh v26.8h, v17.8h, v0.h[7] + mul v17.8h, v17.8h, v0.h[6] + mul v21.8h, v21.8h, v1.h[0] + mls v28.8h, v2.8h, v7.h[0] + sqrdmulh v2.8h, v15.8h, v1.h[5] + mls v17.8h, v26.8h, v7.h[0] + mls v21.8h, v13.8h, v7.h[0] + sub v13.8h, v11.8h, v28.8h + add v28.8h, v11.8h, v28.8h + sub v11.8h, v14.8h, v17.8h + mul v15.8h, v15.8h, v1.h[4] + add v14.8h, v14.8h, v17.8h + sub v17.8h, v23.8h, v21.8h + add v23.8h, v23.8h, v21.8h + mls v15.8h, v2.8h, v7.h[0] + str q14, [x0], #16 + ldr q21, [x0] + sub v14.8h, v29.8h, v15.8h + add v2.8h, v29.8h, v15.8h + str q11, [x0, #48] + ldr q26, [x0, #64] + str q23, [x0, #112] + ldr q29, [x0, #128] + str q17, [x0, #176] + ldr q20, [x0, #192] + str q28, [x0, #240] + ldr q23, [x0, #256] + str q13, [x0, #304] + ldr q17, [x0, #320] + str q2, [x0, #368] + mul v2.8h, v23.8h, v0.h[0] + str q14, [x0, #432] + ldr q11, [x0, #448] + ldr q13, [x0, #384] + mul v15.8h, v11.8h, v0.h[0] + sub x4, x4, #0x1 + cbnz x4, PQCP_MLKEM_NATIVE_MLKEM512_ntt_asm_opt+0x60 + sqrdmulh v27.8h, v11.8h, v0.h[1] + mul v8.8h, v13.8h, v0.h[0] + sqrdmulh v22.8h, v13.8h, v0.h[1] + mul v11.8h, v17.8h, v0.h[0] + mls v15.8h, v27.8h, v7.h[0] + sqrdmulh v28.8h, v17.8h, v0.h[1] + mls v8.8h, v22.8h, v7.h[0] + sqrdmulh v5.8h, v23.8h, v0.h[1] + add v16.8h, v20.8h, v15.8h + mls v11.8h, v28.8h, v7.h[0] + sub v6.8h, v29.8h, v8.8h + sqrdmulh v17.8h, v16.8h, v0.h[3] + mul v23.8h, v16.8h, v0.h[2] + mul v13.8h, v6.8h, v0.h[4] + sqrdmulh v28.8h, v6.8h, v0.h[5] + mls v2.8h, v5.8h, v7.h[0] + mls v23.8h, v17.8h, v7.h[0] + add v27.8h, v26.8h, v11.8h + mls v13.8h, v28.8h, v7.h[0] + sub v9.8h, v21.8h, v2.8h + add v18.8h, v29.8h, v8.8h + sub v14.8h, v27.8h, v23.8h + add v29.8h, v9.8h, v13.8h + sub v30.8h, v9.8h, v13.8h + mul v28.8h, v14.8h, v1.h[0] + sqrdmulh v9.8h, v18.8h, v0.h[3] + mul v22.8h, v18.8h, v0.h[2] + sqrdmulh v17.8h, v14.8h, v1.h[1] + sub v14.8h, v20.8h, v15.8h + add v24.8h, v21.8h, v2.8h + mls v22.8h, v9.8h, v7.h[0] + sqrdmulh v9.8h, v14.8h, v0.h[5] + mul v13.8h, v14.8h, v0.h[4] + mls v28.8h, v17.8h, v7.h[0] + sub v5.8h, v24.8h, v22.8h + sub v2.8h, v26.8h, v11.8h + mls v13.8h, v9.8h, v7.h[0] + sub v17.8h, v5.8h, v28.8h + add v14.8h, v5.8h, v28.8h + add v28.8h, v27.8h, v23.8h + str q17, [x0, #192] + add v17.8h, v2.8h, v13.8h + str q14, [x0, #128] + sub v13.8h, v2.8h, v13.8h + sqrdmulh v26.8h, v17.8h, v1.h[3] + mul v15.8h, v17.8h, v1.h[2] + add v5.8h, v24.8h, v22.8h + sqrdmulh v23.8h, v13.8h, v1.h[5] + mul v13.8h, v13.8h, v1.h[4] + mls v15.8h, v26.8h, v7.h[0] + sqrdmulh v14.8h, v28.8h, v0.h[7] + mul v17.8h, v28.8h, v0.h[6] + mls v13.8h, v23.8h, v7.h[0] + add v6.8h, v29.8h, v15.8h + sub v28.8h, v29.8h, v15.8h + mls v17.8h, v14.8h, v7.h[0] + str q6, [x0, #256] + add v14.8h, v30.8h, v13.8h + str q28, [x0, #320] + sub v23.8h, v30.8h, v13.8h + str q14, [x0, #384] + add v3.8h, v5.8h, v17.8h + str q23, [x0, #448] + sub v28.8h, v5.8h, v17.8h + str q3, [x0], #16 + str q28, [x0, #48] + mov x0, x3 + mov x4, #0x8 + ldr q2, [x1], #16 + ldr q14, [x0, #48] + ldr q1, [x0, #32] + mul v17.8h, v14.8h, v2.h[0] + sqrdmulh v14.8h, v14.8h, v2.h[1] + mul v8.8h, v1.8h, v2.h[0] + ldr q23, [x0, #16] + mls v17.8h, v14.8h, v7.h[0] + sqrdmulh v1.8h, v1.8h, v2.h[1] + ldr q30, [x2], #96 + sub v14.8h, v23.8h, v17.8h + add v10.8h, v23.8h, v17.8h + mls v8.8h, v1.8h, v7.h[0] + sqrdmulh v1.8h, v14.8h, v2.h[5] + mul v14.8h, v14.8h, v2.h[4] + ldr q27, [x0] + mul v23.8h, v10.8h, v2.h[2] + mls v14.8h, v1.8h, v7.h[0] + sub v1.8h, v27.8h, v8.8h + ldur q28, [x2, #-64] + add v12.8h, v1.8h, v14.8h + sqrdmulh v21.8h, v10.8h, v2.h[3] + sub v5.8h, v1.8h, v14.8h + ldur q13, [x2, #-16] + sub x4, x4, #0x1 + ldr q19, [x0, #112] + ldr q1, [x1], #16 + mls v23.8h, v21.8h, v7.h[0] + add v6.8h, v27.8h, v8.8h + mul v4.8h, v19.8h, v1.h[0] + sqrdmulh v19.8h, v19.8h, v1.h[1] + ldr q25, [x0, #80] + trn1 v11.4s, v12.4s, v5.4s + mls v4.8h, v19.8h, v7.h[0] + sub v0.8h, v6.8h, v23.8h + ldur q16, [x2, #-80] + sub v24.8h, v25.8h, v4.8h + add v26.8h, v6.8h, v23.8h + add v4.8h, v25.8h, v4.8h + sqrdmulh v23.8h, v24.8h, v1.h[5] + mul v20.8h, v24.8h, v1.h[4] + sqrdmulh v21.8h, v4.8h, v1.h[3] + trn1 v27.4s, v26.4s, v0.4s + trn2 v25.4s, v12.4s, v5.4s + mls v20.8h, v23.8h, v7.h[0] + mul v23.8h, v4.8h, v1.h[2] + ldr q31, [x0, #96] + trn2 v12.4s, v26.4s, v0.4s + trn2 v19.2d, v27.2d, v11.2d + mul v8.8h, v31.8h, v1.h[0] + sqrdmulh v1.8h, v31.8h, v1.h[1] + trn2 v10.2d, v12.2d, v25.2d + sqrdmulh v0.8h, v19.8h, v16.8h + sqrdmulh v18.8h, v10.8h, v16.8h + trn1 v16.2d, v27.2d, v11.2d + trn1 v2.2d, v12.2d, v25.2d + mul v12.8h, v10.8h, v30.8h + mul v10.8h, v19.8h, v30.8h + mls v8.8h, v1.8h, v7.h[0] + ldur q14, [x2, #-48] + mls v10.8h, v0.8h, v7.h[0] + mls v12.8h, v18.8h, v7.h[0] + ldr q27, [x0, #64] + add v9.8h, v16.8h, v10.8h + sub v16.8h, v16.8h, v10.8h + sub v25.8h, v2.8h, v12.8h + add v30.8h, v2.8h, v12.8h + sub v10.8h, v27.8h, v8.8h + sqrdmulh v22.8h, v25.8h, v13.8h + sqrdmulh v13.8h, v30.8h, v14.8h + ldur q14, [x2, #-32] + add v12.8h, v10.8h, v20.8h + mul v5.8h, v30.8h, v28.8h + mul v26.8h, v25.8h, v14.8h + ldr q30, [x2], #96 + mls v5.8h, v13.8h, v7.h[0] + mls v26.8h, v22.8h, v7.h[0] + ldur q28, [x2, #-64] + add v13.8h, v9.8h, v5.8h + sub v9.8h, v9.8h, v5.8h + sub v5.8h, v16.8h, v26.8h + add v25.8h, v16.8h, v26.8h + trn1 v15.4s, v13.4s, v9.4s + trn2 v3.4s, v13.4s, v9.4s + trn1 v13.4s, v25.4s, v5.4s + trn2 v31.4s, v25.4s, v5.4s + sub v5.8h, v10.8h, v20.8h + trn1 v2.2d, v15.2d, v13.2d + trn2 v9.2d, v15.2d, v13.2d + str q2, [x0], #64 + trn1 v29.2d, v3.2d, v31.2d + stur q9, [x0, #-32] + trn2 v9.2d, v3.2d, v31.2d + stur q29, [x0, #-48] + ldur q13, [x2, #-16] + stur q9, [x0, #-16] + sub x4, x4, #0x1 + cbnz x4, PQCP_MLKEM_NATIVE_MLKEM512_ntt_asm_opt+0x30c + mls v23.8h, v21.8h, v7.h[0] + add v14.8h, v27.8h, v8.8h + ldur q1, [x2, #-32] + add v17.8h, v14.8h, v23.8h + sub v23.8h, v14.8h, v23.8h + trn2 v11.4s, v12.4s, v5.4s + trn1 v27.4s, v12.4s, v5.4s + trn2 v2.4s, v17.4s, v23.4s + ldur q26, [x2, #-80] + trn2 v14.2d, v2.2d, v11.2d + trn1 v15.4s, v17.4s, v23.4s + mul v5.8h, v14.8h, v30.8h + sqrdmulh v23.8h, v14.8h, v26.8h + trn2 v17.2d, v15.2d, v27.2d + trn1 v14.2d, v2.2d, v11.2d + mul v21.8h, v17.8h, v30.8h + mls v5.8h, v23.8h, v7.h[0] + sqrdmulh v17.8h, v17.8h, v26.8h + ldur q2, [x2, #-48] + sub v23.8h, v14.8h, v5.8h + add v14.8h, v14.8h, v5.8h + mls v21.8h, v17.8h, v7.h[0] + mul v1.8h, v23.8h, v1.8h + sqrdmulh v17.8h, v23.8h, v13.8h + mul v23.8h, v14.8h, v28.8h + sqrdmulh v14.8h, v14.8h, v2.8h + trn1 v28.2d, v15.2d, v27.2d + mls v1.8h, v17.8h, v7.h[0] + sub v11.8h, v28.8h, v21.8h + mls v23.8h, v14.8h, v7.h[0] + add v17.8h, v28.8h, v21.8h + sub v14.8h, v11.8h, v1.8h + add v1.8h, v11.8h, v1.8h + sub v28.8h, v17.8h, v23.8h + add v2.8h, v17.8h, v23.8h + trn1 v23.4s, v1.4s, v14.4s + trn2 v14.4s, v1.4s, v14.4s + trn2 v17.4s, v2.4s, v28.4s + trn1 v28.4s, v2.4s, v28.4s + trn2 v1.2d, v17.2d, v14.2d + trn1 v14.2d, v17.2d, v14.2d + str q1, [x0, #48] + trn2 v1.2d, v28.2d, v23.2d + str q14, [x0, #16] + trn1 v14.2d, v28.2d, v23.2d + str q1, [x0, #32] + str q14, [x0], #64 + ldp d8, d9, [sp] + ldp d10, d11, [sp, #16] + ldp d12, d13, [sp, #32] + ldp d14, d15, [sp, #48] + add sp, sp, #0x40 + ret diff --git a/proofs/hol_light/arm/proofs/mlkem_intt.ml b/proofs/hol_light/arm/proofs/mlkem_intt.ml new file mode 100644 index 000000000..8b9cbc325 --- /dev/null +++ b/proofs/hol_light/arm/proofs/mlkem_intt.ml @@ -0,0 +1,651 @@ +(* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + *) + +(* ========================================================================= *) +(* Inverse number theoretic transform. *) +(* ========================================================================= *) + +needs "arm/proofs/base.ml";; +needs "arm/proofs/utils/mlkem.ml";; + +(**** print_literal_from_elf "mlkem/mlkem_intt.o";; + ****) + +let mlkem_intt_mc = define_assert_from_elf + "mlkem_intt_mc" "mlkem/mlkem_intt.o" +[ + 0xd10103ff; (* arm_SUB SP SP (rvalue (word 64)) *) + 0x6d0027e8; (* arm_STP D8 D9 SP (Immediate_Offset (iword (&0))) *) + 0x6d012fea; (* arm_STP D10 D11 SP (Immediate_Offset (iword (&16))) *) + 0x6d0237ec; (* arm_STP D12 D13 SP (Immediate_Offset (iword (&32))) *) + 0x6d033fee; (* arm_STP D14 D15 SP (Immediate_Offset (iword (&48))) *) + 0x5281a025; (* arm_MOV W5 (rvalue (word 3329)) *) + 0x4e021ca7; (* arm_INS_GEN Q7 W5 0 16 *) + 0x5289d7e5; (* arm_MOV W5 (rvalue (word 20159)) *) + 0x4e061ca7; (* arm_INS_GEN Q7 W5 16 16 *) + 0x52804005; (* arm_MOV W5 (rvalue (word 512)) *) + 0x4e020cbd; (* arm_DUP_GEN Q29 X5 16 128 *) + 0x52827605; (* arm_MOV W5 (rvalue (word 5040)) *) + 0x4e020cbe; (* arm_DUP_GEN Q30 X5 16 128 *) + 0xaa0003e3; (* arm_MOV X3 X0 *) + 0xd2800104; (* arm_MOV X4 (rvalue (word 8)) *) + 0x3dc00068; (* arm_LDR Q8 X3 (Immediate_Offset (word 0)) *) + 0x3dc00469; (* arm_LDR Q9 X3 (Immediate_Offset (word 16)) *) + 0x3dc0086a; (* arm_LDR Q10 X3 (Immediate_Offset (word 32)) *) + 0x3dc00c6b; (* arm_LDR Q11 X3 (Immediate_Offset (word 48)) *) + 0x6e7eb51b; (* arm_SQRDMULH_VEC Q27 Q8 Q30 16 128 *) + 0x4e7d9d08; (* arm_MUL_VEC Q8 Q8 Q29 16 128 *) + 0x6f474368; (* arm_MLS_VEC Q8 Q27 (Q7 :> LANE_H 0) 16 128 *) + 0x6e7eb53b; (* arm_SQRDMULH_VEC Q27 Q9 Q30 16 128 *) + 0x4e7d9d29; (* arm_MUL_VEC Q9 Q9 Q29 16 128 *) + 0x6f474369; (* arm_MLS_VEC Q9 Q27 (Q7 :> LANE_H 0) 16 128 *) + 0x6e7eb55b; (* arm_SQRDMULH_VEC Q27 Q10 Q30 16 128 *) + 0x4e7d9d4a; (* arm_MUL_VEC Q10 Q10 Q29 16 128 *) + 0x6f47436a; (* arm_MLS_VEC Q10 Q27 (Q7 :> LANE_H 0) 16 128 *) + 0x6e7eb57b; (* arm_SQRDMULH_VEC Q27 Q11 Q30 16 128 *) + 0x4e7d9d6b; (* arm_MUL_VEC Q11 Q11 Q29 16 128 *) + 0x6f47436b; (* arm_MLS_VEC Q11 Q27 (Q7 :> LANE_H 0) 16 128 *) + 0x3c840468; (* arm_STR Q8 X3 (Postimmediate_Offset (word 64)) *) + 0x3c9d0069; (* arm_STR Q9 X3 (Immediate_Offset (word 18446744073709551568)) *) + 0x3c9e006a; (* arm_STR Q10 X3 (Immediate_Offset (word 18446744073709551584)) *) + 0x3c9f006b; (* arm_STR Q11 X3 (Immediate_Offset (word 18446744073709551600)) *) + 0xf1000484; (* arm_SUBS X4 X4 (rvalue (word 1)) *) + 0xb5fffd64; (* arm_CBNZ X4 (word 2097068) *) + 0xaa0003e3; (* arm_MOV X3 X0 *) + 0xd2800104; (* arm_MOV X4 (rvalue (word 8)) *) + 0x3dc0007a; (* arm_LDR Q26 X3 (Immediate_Offset (word 0)) *) + 0x3dc00468; (* arm_LDR Q8 X3 (Immediate_Offset (word 16)) *) + 0x3dc00878; (* arm_LDR Q24 X3 (Immediate_Offset (word 32)) *) + 0x3dc00c70; (* arm_LDR Q16 X3 (Immediate_Offset (word 48)) *) + 0x3cc60449; (* arm_LDR Q9 X2 (Postimmediate_Offset (word 96)) *) + 0x4e902b00; (* arm_TRN1 Q0 Q24 Q16 32 128 *) + 0x3cdb0046; (* arm_LDR Q6 X2 (Immediate_Offset (word 18446744073709551536)) *) + 0x3cdc0043; (* arm_LDR Q3 X2 (Immediate_Offset (word 18446744073709551552)) *) + 0x3cdd004f; (* arm_LDR Q15 X2 (Immediate_Offset (word 18446744073709551568)) *) + 0x3cde0044; (* arm_LDR Q4 X2 (Immediate_Offset (word 18446744073709551584)) *) + 0x3cdf005c; (* arm_LDR Q28 X2 (Immediate_Offset (word 18446744073709551600)) *) + 0xd1000484; (* arm_SUB X4 X4 (rvalue (word 1)) *) + 0x4e882b4c; (* arm_TRN1 Q12 Q26 Q8 32 128 *) + 0x4e886b5a; (* arm_TRN2 Q26 Q26 Q8 32 128 *) + 0x4e906b08; (* arm_TRN2 Q8 Q24 Q16 32 128 *) + 0x4ec0698b; (* arm_TRN2 Q11 Q12 Q0 64 128 *) + 0x4ec0298c; (* arm_TRN1 Q12 Q12 Q0 64 128 *) + 0x4ec86b50; (* arm_TRN2 Q16 Q26 Q8 64 128 *) + 0x4ec82b5a; (* arm_TRN1 Q26 Q26 Q8 64 128 *) + 0x6e708568; (* arm_SUB_VEC Q8 Q11 Q16 16 128 *) + 0x4e70856b; (* arm_ADD_VEC Q11 Q11 Q16 16 128 *) + 0x6e7a8590; (* arm_SUB_VEC Q16 Q12 Q26 16 128 *) + 0x4e7a858c; (* arm_ADD_VEC Q12 Q12 Q26 16 128 *) + 0x6e7cb51a; (* arm_SQRDMULH_VEC Q26 Q8 Q28 16 128 *) + 0x6e6fb60f; (* arm_SQRDMULH_VEC Q15 Q16 Q15 16 128 *) + 0x4e639e10; (* arm_MUL_VEC Q16 Q16 Q3 16 128 *) + 0x4e649d08; (* arm_MUL_VEC Q8 Q8 Q4 16 128 *) + 0x6e6b8580; (* arm_SUB_VEC Q0 Q12 Q11 16 128 *) + 0x4e6b858c; (* arm_ADD_VEC Q12 Q12 Q11 16 128 *) + 0x6f4741f0; (* arm_MLS_VEC Q16 Q15 (Q7 :> LANE_H 0) 16 128 *) + 0x6f474348; (* arm_MLS_VEC Q8 Q26 (Q7 :> LANE_H 0) 16 128 *) + 0x6e66b41a; (* arm_SQRDMULH_VEC Q26 Q0 Q6 16 128 *) + 0x4e699c0b; (* arm_MUL_VEC Q11 Q0 Q9 16 128 *) + 0x3cc1042f; (* arm_LDR Q15 X1 (Postimmediate_Offset (word 16)) *) + 0x6e688600; (* arm_SUB_VEC Q0 Q16 Q8 16 128 *) + 0x6f47434b; (* arm_MLS_VEC Q11 Q26 (Q7 :> LANE_H 0) 16 128 *) + 0x4e68861a; (* arm_ADD_VEC Q26 Q16 Q8 16 128 *) + 0x6e66b408; (* arm_SQRDMULH_VEC Q8 Q0 Q6 16 128 *) + 0x4e699c10; (* arm_MUL_VEC Q16 Q0 Q9 16 128 *) + 0x4e9a2980; (* arm_TRN1 Q0 Q12 Q26 32 128 *) + 0x4e9a698c; (* arm_TRN2 Q12 Q12 Q26 32 128 *) + 0x3dc0107a; (* arm_LDR Q26 X3 (Immediate_Offset (word 64)) *) + 0x6f474110; (* arm_MLS_VEC Q16 Q8 (Q7 :> LANE_H 0) 16 128 *) + 0x3dc01468; (* arm_LDR Q8 X3 (Immediate_Offset (word 80)) *) + 0x3dc01878; (* arm_LDR Q24 X3 (Immediate_Offset (word 96)) *) + 0x4e902969; (* arm_TRN1 Q9 Q11 Q16 32 128 *) + 0x4e90696b; (* arm_TRN2 Q11 Q11 Q16 32 128 *) + 0x3dc01c70; (* arm_LDR Q16 X3 (Immediate_Offset (word 112)) *) + 0x4ec96806; (* arm_TRN2 Q6 Q0 Q9 64 128 *) + 0x4ecb6983; (* arm_TRN2 Q3 Q12 Q11 64 128 *) + 0x4ec92800; (* arm_TRN1 Q0 Q0 Q9 64 128 *) + 0x4ecb298c; (* arm_TRN1 Q12 Q12 Q11 64 128 *) + 0x6e6384cb; (* arm_SUB_VEC Q11 Q6 Q3 16 128 *) + 0x6e6c8409; (* arm_SUB_VEC Q9 Q0 Q12 16 128 *) + 0x4e6c840c; (* arm_ADD_VEC Q12 Q0 Q12 16 128 *) + 0x4f5fd960; (* arm_SQRDMULH_VEC Q0 Q11 (Q15 :> LANE_H 5) 16 128 *) + 0x4f7fd124; (* arm_SQRDMULH_VEC Q4 Q9 (Q15 :> LANE_H 3) 16 128 *) + 0x4f6f8129; (* arm_MUL_VEC Q9 Q9 (Q15 :> LANE_H 2) 16 128 *) + 0x4f4f896b; (* arm_MUL_VEC Q11 Q11 (Q15 :> LANE_H 4) 16 128 *) + 0x4e6384c6; (* arm_ADD_VEC Q6 Q6 Q3 16 128 *) + 0x4f57c183; (* arm_SQDMULH_VEC Q3 Q12 (Q7 :> LANE_H 1) 16 128 *) + 0x6f474089; (* arm_MLS_VEC Q9 Q4 (Q7 :> LANE_H 0) 16 128 *) + 0x6f47400b; (* arm_MLS_VEC Q11 Q0 (Q7 :> LANE_H 0) 16 128 *) + 0x4f57c0c0; (* arm_SQDMULH_VEC Q0 Q6 (Q7 :> LANE_H 1) 16 128 *) + 0x4f152463; (* arm_SRSHR_VEC Q3 Q3 11 16 128 *) + 0x4f57c124; (* arm_SQDMULH_VEC Q4 Q9 (Q7 :> LANE_H 1) 16 128 *) + 0x4f57c17c; (* arm_SQDMULH_VEC Q28 Q11 (Q7 :> LANE_H 1) 16 128 *) + 0x6f47406c; (* arm_MLS_VEC Q12 Q3 (Q7 :> LANE_H 0) 16 128 *) + 0x4f152400; (* arm_SRSHR_VEC Q0 Q0 11 16 128 *) + 0x4f152483; (* arm_SRSHR_VEC Q3 Q4 11 16 128 *) + 0x4f152784; (* arm_SRSHR_VEC Q4 Q28 11 16 128 *) + 0x6f474006; (* arm_MLS_VEC Q6 Q0 (Q7 :> LANE_H 0) 16 128 *) + 0x6f474069; (* arm_MLS_VEC Q9 Q3 (Q7 :> LANE_H 0) 16 128 *) + 0x6f47408b; (* arm_MLS_VEC Q11 Q4 (Q7 :> LANE_H 0) 16 128 *) + 0x4e902b00; (* arm_TRN1 Q0 Q24 Q16 32 128 *) + 0x6e668583; (* arm_SUB_VEC Q3 Q12 Q6 16 128 *) + 0x4e66858c; (* arm_ADD_VEC Q12 Q12 Q6 16 128 *) + 0x6e6b8526; (* arm_SUB_VEC Q6 Q9 Q11 16 128 *) + 0x4f5fd064; (* arm_SQRDMULH_VEC Q4 Q3 (Q15 :> LANE_H 1) 16 128 *) + 0x4f4f8063; (* arm_MUL_VEC Q3 Q3 (Q15 :> LANE_H 0) 16 128 *) + 0x4f5fd0dc; (* arm_SQRDMULH_VEC Q28 Q6 (Q15 :> LANE_H 1) 16 128 *) + 0x4f4f80cf; (* arm_MUL_VEC Q15 Q6 (Q15 :> LANE_H 0) 16 128 *) + 0x4e6b852b; (* arm_ADD_VEC Q11 Q9 Q11 16 128 *) + 0x6f474083; (* arm_MLS_VEC Q3 Q4 (Q7 :> LANE_H 0) 16 128 *) + 0x3c84046c; (* arm_STR Q12 X3 (Postimmediate_Offset (word 64)) *) + 0x6f47438f; (* arm_MLS_VEC Q15 Q28 (Q7 :> LANE_H 0) 16 128 *) + 0x3c9d006b; (* arm_STR Q11 X3 (Immediate_Offset (word 18446744073709551568)) *) + 0x3cc60449; (* arm_LDR Q9 X2 (Postimmediate_Offset (word 96)) *) + 0x3c9e0063; (* arm_STR Q3 X3 (Immediate_Offset (word 18446744073709551584)) *) + 0x3cdb0046; (* arm_LDR Q6 X2 (Immediate_Offset (word 18446744073709551536)) *) + 0x3c9f006f; (* arm_STR Q15 X3 (Immediate_Offset (word 18446744073709551600)) *) + 0x3cdc0043; (* arm_LDR Q3 X2 (Immediate_Offset (word 18446744073709551552)) *) + 0x3cdd004f; (* arm_LDR Q15 X2 (Immediate_Offset (word 18446744073709551568)) *) + 0x3cde0044; (* arm_LDR Q4 X2 (Immediate_Offset (word 18446744073709551584)) *) + 0x3cdf005c; (* arm_LDR Q28 X2 (Immediate_Offset (word 18446744073709551600)) *) + 0xd1000484; (* arm_SUB X4 X4 (rvalue (word 1)) *) + 0xb5fff584; (* arm_CBNZ X4 (word 2096816) *) + 0x4e882b4b; (* arm_TRN1 Q11 Q26 Q8 32 128 *) + 0x4e906b18; (* arm_TRN2 Q24 Q24 Q16 32 128 *) + 0x4e886b5a; (* arm_TRN2 Q26 Q26 Q8 32 128 *) + 0x4ec02972; (* arm_TRN1 Q18 Q11 Q0 64 128 *) + 0x4ec0696b; (* arm_TRN2 Q11 Q11 Q0 64 128 *) + 0x4ed86b4c; (* arm_TRN2 Q12 Q26 Q24 64 128 *) + 0x4ed82b48; (* arm_TRN1 Q8 Q26 Q24 64 128 *) + 0x6e6c857a; (* arm_SUB_VEC Q26 Q11 Q12 16 128 *) + 0x6e68864d; (* arm_SUB_VEC Q13 Q18 Q8 16 128 *) + 0x4e688658; (* arm_ADD_VEC Q24 Q18 Q8 16 128 *) + 0x4e649f50; (* arm_MUL_VEC Q16 Q26 Q4 16 128 *) + 0x6e6fb5b1; (* arm_SQRDMULH_VEC Q17 Q13 Q15 16 128 *) + 0x4e639da3; (* arm_MUL_VEC Q3 Q13 Q3 16 128 *) + 0x6e7cb75a; (* arm_SQRDMULH_VEC Q26 Q26 Q28 16 128 *) + 0x4e6c856a; (* arm_ADD_VEC Q10 Q11 Q12 16 128 *) + 0x6f474223; (* arm_MLS_VEC Q3 Q17 (Q7 :> LANE_H 0) 16 128 *) + 0x6f474350; (* arm_MLS_VEC Q16 Q26 (Q7 :> LANE_H 0) 16 128 *) + 0x6e6a871a; (* arm_SUB_VEC Q26 Q24 Q10 16 128 *) + 0x3cc10424; (* arm_LDR Q4 X1 (Postimmediate_Offset (word 16)) *) + 0x6e70846c; (* arm_SUB_VEC Q12 Q3 Q16 16 128 *) + 0x6e66b74f; (* arm_SQRDMULH_VEC Q15 Q26 Q6 16 128 *) + 0x4e699f4b; (* arm_MUL_VEC Q11 Q26 Q9 16 128 *) + 0x4e699d88; (* arm_MUL_VEC Q8 Q12 Q9 16 128 *) + 0x6e66b58c; (* arm_SQRDMULH_VEC Q12 Q12 Q6 16 128 *) + 0x4e6a8700; (* arm_ADD_VEC Q0 Q24 Q10 16 128 *) + 0x6f4741eb; (* arm_MLS_VEC Q11 Q15 (Q7 :> LANE_H 0) 16 128 *) + 0x4e708466; (* arm_ADD_VEC Q6 Q3 Q16 16 128 *) + 0x6f474188; (* arm_MLS_VEC Q8 Q12 (Q7 :> LANE_H 0) 16 128 *) + 0x4e86681a; (* arm_TRN2 Q26 Q0 Q6 32 128 *) + 0x4e88696c; (* arm_TRN2 Q12 Q11 Q8 32 128 *) + 0x4e882963; (* arm_TRN1 Q3 Q11 Q8 32 128 *) + 0x4e862811; (* arm_TRN1 Q17 Q0 Q6 32 128 *) + 0x4ecc2b48; (* arm_TRN1 Q8 Q26 Q12 64 128 *) + 0x4ecc6b4d; (* arm_TRN2 Q13 Q26 Q12 64 128 *) + 0x4ec32a2b; (* arm_TRN1 Q11 Q17 Q3 64 128 *) + 0x4ec36a2f; (* arm_TRN2 Q15 Q17 Q3 64 128 *) + 0x6e68856c; (* arm_SUB_VEC Q12 Q11 Q8 16 128 *) + 0x4e6d85f0; (* arm_ADD_VEC Q16 Q15 Q13 16 128 *) + 0x6e6d85fa; (* arm_SUB_VEC Q26 Q15 Q13 16 128 *) + 0x4f648180; (* arm_MUL_VEC Q0 Q12 (Q4 :> LANE_H 2) 16 128 *) + 0x4f74d189; (* arm_SQRDMULH_VEC Q9 Q12 (Q4 :> LANE_H 3) 16 128 *) + 0x4f448b4d; (* arm_MUL_VEC Q13 Q26 (Q4 :> LANE_H 4) 16 128 *) + 0x4f54db5a; (* arm_SQRDMULH_VEC Q26 Q26 (Q4 :> LANE_H 5) 16 128 *) + 0x4e688578; (* arm_ADD_VEC Q24 Q11 Q8 16 128 *) + 0x6f474120; (* arm_MLS_VEC Q0 Q9 (Q7 :> LANE_H 0) 16 128 *) + 0x4f57c20c; (* arm_SQDMULH_VEC Q12 Q16 (Q7 :> LANE_H 1) 16 128 *) + 0x6f47434d; (* arm_MLS_VEC Q13 Q26 (Q7 :> LANE_H 0) 16 128 *) + 0x4f57c30b; (* arm_SQDMULH_VEC Q11 Q24 (Q7 :> LANE_H 1) 16 128 *) + 0x4f57c008; (* arm_SQDMULH_VEC Q8 Q0 (Q7 :> LANE_H 1) 16 128 *) + 0x4f15258c; (* arm_SRSHR_VEC Q12 Q12 11 16 128 *) + 0x4f57c1ba; (* arm_SQDMULH_VEC Q26 Q13 (Q7 :> LANE_H 1) 16 128 *) + 0x4f15256b; (* arm_SRSHR_VEC Q11 Q11 11 16 128 *) + 0x6f474190; (* arm_MLS_VEC Q16 Q12 (Q7 :> LANE_H 0) 16 128 *) + 0x4f152508; (* arm_SRSHR_VEC Q8 Q8 11 16 128 *) + 0x4f15275a; (* arm_SRSHR_VEC Q26 Q26 11 16 128 *) + 0x6f474178; (* arm_MLS_VEC Q24 Q11 (Q7 :> LANE_H 0) 16 128 *) + 0x6f474100; (* arm_MLS_VEC Q0 Q8 (Q7 :> LANE_H 0) 16 128 *) + 0x6f47434d; (* arm_MLS_VEC Q13 Q26 (Q7 :> LANE_H 0) 16 128 *) + 0x6e70871a; (* arm_SUB_VEC Q26 Q24 Q16 16 128 *) + 0x4e70870f; (* arm_ADD_VEC Q15 Q24 Q16 16 128 *) + 0x6e6d840c; (* arm_SUB_VEC Q12 Q0 Q13 16 128 *) + 0x4f44834b; (* arm_MUL_VEC Q11 Q26 (Q4 :> LANE_H 0) 16 128 *) + 0x4f54d350; (* arm_SQRDMULH_VEC Q16 Q26 (Q4 :> LANE_H 1) 16 128 *) + 0x4f44819a; (* arm_MUL_VEC Q26 Q12 (Q4 :> LANE_H 0) 16 128 *) + 0x4f54d188; (* arm_SQRDMULH_VEC Q8 Q12 (Q4 :> LANE_H 1) 16 128 *) + 0x4e6d840c; (* arm_ADD_VEC Q12 Q0 Q13 16 128 *) + 0x6f47420b; (* arm_MLS_VEC Q11 Q16 (Q7 :> LANE_H 0) 16 128 *) + 0x3c84046f; (* arm_STR Q15 X3 (Postimmediate_Offset (word 64)) *) + 0x6f47411a; (* arm_MLS_VEC Q26 Q8 (Q7 :> LANE_H 0) 16 128 *) + 0x3c9d006c; (* arm_STR Q12 X3 (Immediate_Offset (word 18446744073709551568)) *) + 0x3c9e006b; (* arm_STR Q11 X3 (Immediate_Offset (word 18446744073709551584)) *) + 0x3c9f007a; (* arm_STR Q26 X3 (Immediate_Offset (word 18446744073709551600)) *) + 0xd2800084; (* arm_MOV X4 (rvalue (word 4)) *) + 0x3cc20420; (* arm_LDR Q0 X1 (Postimmediate_Offset (word 32)) *) + 0x3cdf0021; (* arm_LDR Q1 X1 (Immediate_Offset (word 18446744073709551600)) *) + 0x3dc02018; (* arm_LDR Q24 X0 (Immediate_Offset (word 128)) *) + 0x3dc03010; (* arm_LDR Q16 X0 (Immediate_Offset (word 192)) *) + 0x3dc04009; (* arm_LDR Q9 X0 (Immediate_Offset (word 256)) *) + 0x3dc05006; (* arm_LDR Q6 X0 (Immediate_Offset (word 320)) *) + 0x3dc06003; (* arm_LDR Q3 X0 (Immediate_Offset (word 384)) *) + 0x3dc07004; (* arm_LDR Q4 X0 (Immediate_Offset (word 448)) *) + 0x4e66853c; (* arm_ADD_VEC Q28 Q9 Q6 16 128 *) + 0x4e708713; (* arm_ADD_VEC Q19 Q24 Q16 16 128 *) + 0x4e64846d; (* arm_ADD_VEC Q13 Q3 Q4 16 128 *) + 0x3dc0000b; (* arm_LDR Q11 X0 (Immediate_Offset (word 0)) *) + 0x4e6d8797; (* arm_ADD_VEC Q23 Q28 Q13 16 128 *) + 0x3dc0100f; (* arm_LDR Q15 X0 (Immediate_Offset (word 64)) *) + 0xd1000484; (* arm_SUB X4 X4 (rvalue (word 1)) *) + 0x6e6f856c; (* arm_SUB_VEC Q12 Q11 Q15 16 128 *) + 0x4e6f857a; (* arm_ADD_VEC Q26 Q11 Q15 16 128 *) + 0x6e708708; (* arm_SUB_VEC Q8 Q24 Q16 16 128 *) + 0x4f70d98b; (* arm_SQRDMULH_VEC Q11 Q12 (Q0 :> LANE_H 7) 16 128 *) + 0x4f60898c; (* arm_MUL_VEC Q12 Q12 (Q0 :> LANE_H 6) 16 128 *) + 0x6e738750; (* arm_SUB_VEC Q16 Q26 Q19 16 128 *) + 0x4e73875a; (* arm_ADD_VEC Q26 Q26 Q19 16 128 *) + 0x4f51d10f; (* arm_SQRDMULH_VEC Q15 Q8 (Q1 :> LANE_H 1) 16 128 *) + 0x4f418108; (* arm_MUL_VEC Q8 Q8 (Q1 :> LANE_H 0) 16 128 *) + 0x6f47416c; (* arm_MLS_VEC Q12 Q11 (Q7 :> LANE_H 0) 16 128 *) + 0x6e66852b; (* arm_SUB_VEC Q11 Q9 Q6 16 128 *) + 0x4f70d218; (* arm_SQRDMULH_VEC Q24 Q16 (Q0 :> LANE_H 3) 16 128 *) + 0x4f608210; (* arm_MUL_VEC Q16 Q16 (Q0 :> LANE_H 2) 16 128 *) + 0x6e778749; (* arm_SUB_VEC Q9 Q26 Q23 16 128 *) + 0x4e77875a; (* arm_ADD_VEC Q26 Q26 Q23 16 128 *) + 0x6f4741e8; (* arm_MLS_VEC Q8 Q15 (Q7 :> LANE_H 0) 16 128 *) + 0x4f71d16f; (* arm_SQRDMULH_VEC Q15 Q11 (Q1 :> LANE_H 3) 16 128 *) + 0x4f61816b; (* arm_MUL_VEC Q11 Q11 (Q1 :> LANE_H 2) 16 128 *) + 0x6e648466; (* arm_SUB_VEC Q6 Q3 Q4 16 128 *) + 0x6e688583; (* arm_SUB_VEC Q3 Q12 Q8 16 128 *) + 0x4e68858c; (* arm_ADD_VEC Q12 Q12 Q8 16 128 *) + 0x6f4741eb; (* arm_MLS_VEC Q11 Q15 (Q7 :> LANE_H 0) 16 128 *) + 0x4f51d8c8; (* arm_SQRDMULH_VEC Q8 Q6 (Q1 :> LANE_H 5) 16 128 *) + 0x6f474310; (* arm_MLS_VEC Q16 Q24 (Q7 :> LANE_H 0) 16 128 *) + 0x4f4188cf; (* arm_MUL_VEC Q15 Q6 (Q1 :> LANE_H 4) 16 128 *) + 0x4f70d078; (* arm_SQRDMULH_VEC Q24 Q3 (Q0 :> LANE_H 3) 16 128 *) + 0x4f608066; (* arm_MUL_VEC Q6 Q3 (Q0 :> LANE_H 2) 16 128 *) + 0x4f50d123; (* arm_SQRDMULH_VEC Q3 Q9 (Q0 :> LANE_H 1) 16 128 *) + 0x4f408129; (* arm_MUL_VEC Q9 Q9 (Q0 :> LANE_H 0) 16 128 *) + 0x3c81041a; (* arm_STR Q26 X0 (Postimmediate_Offset (word 16)) *) + 0x6f47410f; (* arm_MLS_VEC Q15 Q8 (Q7 :> LANE_H 0) 16 128 *) + 0x6f474306; (* arm_MLS_VEC Q6 Q24 (Q7 :> LANE_H 0) 16 128 *) + 0x6e6d879a; (* arm_SUB_VEC Q26 Q28 Q13 16 128 *) + 0x6f474069; (* arm_MLS_VEC Q9 Q3 (Q7 :> LANE_H 0) 16 128 *) + 0x6e6f8568; (* arm_SUB_VEC Q8 Q11 Q15 16 128 *) + 0x4f50db58; (* arm_SQRDMULH_VEC Q24 Q26 (Q0 :> LANE_H 5) 16 128 *) + 0x4f408b5a; (* arm_MUL_VEC Q26 Q26 (Q0 :> LANE_H 4) 16 128 *) + 0x4e6f856b; (* arm_ADD_VEC Q11 Q11 Q15 16 128 *) + 0x4f50d90f; (* arm_SQRDMULH_VEC Q15 Q8 (Q0 :> LANE_H 5) 16 128 *) + 0x4f408908; (* arm_MUL_VEC Q8 Q8 (Q0 :> LANE_H 4) 16 128 *) + 0x6f47431a; (* arm_MLS_VEC Q26 Q24 (Q7 :> LANE_H 0) 16 128 *) + 0x6e6b8598; (* arm_SUB_VEC Q24 Q12 Q11 16 128 *) + 0x4e6b858c; (* arm_ADD_VEC Q12 Q12 Q11 16 128 *) + 0x6f4741e8; (* arm_MLS_VEC Q8 Q15 (Q7 :> LANE_H 0) 16 128 *) + 0x4f50d30b; (* arm_SQRDMULH_VEC Q11 Q24 (Q0 :> LANE_H 1) 16 128 *) + 0x4f40830f; (* arm_MUL_VEC Q15 Q24 (Q0 :> LANE_H 0) 16 128 *) + 0x6e7a8618; (* arm_SUB_VEC Q24 Q16 Q26 16 128 *) + 0x4e7a861a; (* arm_ADD_VEC Q26 Q16 Q26 16 128 *) + 0x6e6884d0; (* arm_SUB_VEC Q16 Q6 Q8 16 128 *) + 0x6f47416f; (* arm_MLS_VEC Q15 Q11 (Q7 :> LANE_H 0) 16 128 *) + 0x4f50d30b; (* arm_SQRDMULH_VEC Q11 Q24 (Q0 :> LANE_H 1) 16 128 *) + 0x4f408318; (* arm_MUL_VEC Q24 Q24 (Q0 :> LANE_H 0) 16 128 *) + 0x4e6884c8; (* arm_ADD_VEC Q8 Q6 Q8 16 128 *) + 0x4f50d206; (* arm_SQRDMULH_VEC Q6 Q16 (Q0 :> LANE_H 1) 16 128 *) + 0x4f408210; (* arm_MUL_VEC Q16 Q16 (Q0 :> LANE_H 0) 16 128 *) + 0x6f474178; (* arm_MLS_VEC Q24 Q11 (Q7 :> LANE_H 0) 16 128 *) + 0x3d803c09; (* arm_STR Q9 X0 (Immediate_Offset (word 240)) *) + 0x3dc0000b; (* arm_LDR Q11 X0 (Immediate_Offset (word 0)) *) + 0x6f4740d0; (* arm_MLS_VEC Q16 Q6 (Q7 :> LANE_H 0) 16 128 *) + 0x3d804c0f; (* arm_STR Q15 X0 (Immediate_Offset (word 304)) *) + 0x3dc0100f; (* arm_LDR Q15 X0 (Immediate_Offset (word 64)) *) + 0x3d805c18; (* arm_STR Q24 X0 (Immediate_Offset (word 368)) *) + 0x3dc02018; (* arm_LDR Q24 X0 (Immediate_Offset (word 128)) *) + 0x3d806c10; (* arm_STR Q16 X0 (Immediate_Offset (word 432)) *) + 0x3dc03010; (* arm_LDR Q16 X0 (Immediate_Offset (word 192)) *) + 0x3d800c0c; (* arm_STR Q12 X0 (Immediate_Offset (word 48)) *) + 0x3dc04009; (* arm_LDR Q9 X0 (Immediate_Offset (word 256)) *) + 0x3dc05006; (* arm_LDR Q6 X0 (Immediate_Offset (word 320)) *) + 0x3dc06003; (* arm_LDR Q3 X0 (Immediate_Offset (word 384)) *) + 0x3dc07004; (* arm_LDR Q4 X0 (Immediate_Offset (word 448)) *) + 0x3d801c1a; (* arm_STR Q26 X0 (Immediate_Offset (word 112)) *) + 0x4e66853c; (* arm_ADD_VEC Q28 Q9 Q6 16 128 *) + 0x4e64846d; (* arm_ADD_VEC Q13 Q3 Q4 16 128 *) + 0x3d802c08; (* arm_STR Q8 X0 (Immediate_Offset (word 176)) *) + 0x4e708713; (* arm_ADD_VEC Q19 Q24 Q16 16 128 *) + 0x4e6d8797; (* arm_ADD_VEC Q23 Q28 Q13 16 128 *) + 0xd1000484; (* arm_SUB X4 X4 (rvalue (word 1)) *) + 0xb5fff664; (* arm_CBNZ X4 (word 2096844) *) + 0x4e6f856a; (* arm_ADD_VEC Q10 Q11 Q15 16 128 *) + 0x6e6d878c; (* arm_SUB_VEC Q12 Q28 Q13 16 128 *) + 0x6e6f856b; (* arm_SUB_VEC Q11 Q11 Q15 16 128 *) + 0x6e738556; (* arm_SUB_VEC Q22 Q10 Q19 16 128 *) + 0x4f408992; (* arm_MUL_VEC Q18 Q12 (Q0 :> LANE_H 4) 16 128 *) + 0x4f50d99a; (* arm_SQRDMULH_VEC Q26 Q12 (Q0 :> LANE_H 5) 16 128 *) + 0x4f70d2cc; (* arm_SQRDMULH_VEC Q12 Q22 (Q0 :> LANE_H 3) 16 128 *) + 0x4f6082cd; (* arm_MUL_VEC Q13 Q22 (Q0 :> LANE_H 2) 16 128 *) + 0x6e70871f; (* arm_SUB_VEC Q31 Q24 Q16 16 128 *) + 0x4f70d976; (* arm_SQRDMULH_VEC Q22 Q11 (Q0 :> LANE_H 7) 16 128 *) + 0x6f474352; (* arm_MLS_VEC Q18 Q26 (Q7 :> LANE_H 0) 16 128 *) + 0x6f47418d; (* arm_MLS_VEC Q13 Q12 (Q7 :> LANE_H 0) 16 128 *) + 0x4f51d3e2; (* arm_SQRDMULH_VEC Q2 Q31 (Q1 :> LANE_H 1) 16 128 *) + 0x4f4183e5; (* arm_MUL_VEC Q5 Q31 (Q1 :> LANE_H 0) 16 128 *) + 0x4f60896f; (* arm_MUL_VEC Q15 Q11 (Q0 :> LANE_H 6) 16 128 *) + 0x6e7285ac; (* arm_SUB_VEC Q12 Q13 Q18 16 128 *) + 0x6e648464; (* arm_SUB_VEC Q4 Q3 Q4 16 128 *) + 0x6f474045; (* arm_MLS_VEC Q5 Q2 (Q7 :> LANE_H 0) 16 128 *) + 0x4f50d19a; (* arm_SQRDMULH_VEC Q26 Q12 (Q0 :> LANE_H 1) 16 128 *) + 0x4f40818c; (* arm_MUL_VEC Q12 Q12 (Q0 :> LANE_H 0) 16 128 *) + 0x6f4742cf; (* arm_MLS_VEC Q15 Q22 (Q7 :> LANE_H 0) 16 128 *) + 0x4f51d888; (* arm_SQRDMULH_VEC Q8 Q4 (Q1 :> LANE_H 5) 16 128 *) + 0x4f418884; (* arm_MUL_VEC Q4 Q4 (Q1 :> LANE_H 4) 16 128 *) + 0x6f47434c; (* arm_MLS_VEC Q12 Q26 (Q7 :> LANE_H 0) 16 128 *) + 0x6e6585f5; (* arm_SUB_VEC Q21 Q15 Q5 16 128 *) + 0x6e66853c; (* arm_SUB_VEC Q28 Q9 Q6 16 128 *) + 0x6f474104; (* arm_MLS_VEC Q4 Q8 (Q7 :> LANE_H 0) 16 128 *) + 0x4f6082b8; (* arm_MUL_VEC Q24 Q21 (Q0 :> LANE_H 2) 16 128 *) + 0x4f70d2a8; (* arm_SQRDMULH_VEC Q8 Q21 (Q0 :> LANE_H 3) 16 128 *) + 0x4f71d386; (* arm_SQRDMULH_VEC Q6 Q28 (Q1 :> LANE_H 3) 16 128 *) + 0x4e738553; (* arm_ADD_VEC Q19 Q10 Q19 16 128 *) + 0x4f61839c; (* arm_MUL_VEC Q28 Q28 (Q1 :> LANE_H 2) 16 128 *) + 0x6f474118; (* arm_MLS_VEC Q24 Q8 (Q7 :> LANE_H 0) 16 128 *) + 0x6e77866b; (* arm_SUB_VEC Q11 Q19 Q23 16 128 *) + 0x3d80600c; (* arm_STR Q12 X0 (Immediate_Offset (word 384)) *) + 0x6f4740dc; (* arm_MLS_VEC Q28 Q6 (Q7 :> LANE_H 0) 16 128 *) + 0x4f50d170; (* arm_SQRDMULH_VEC Q16 Q11 (Q0 :> LANE_H 1) 16 128 *) + 0x4f408169; (* arm_MUL_VEC Q9 Q11 (Q0 :> LANE_H 0) 16 128 *) + 0x4e6585e6; (* arm_ADD_VEC Q6 Q15 Q5 16 128 *) + 0x4e64879a; (* arm_ADD_VEC Q26 Q28 Q4 16 128 *) + 0x6e64878f; (* arm_SUB_VEC Q15 Q28 Q4 16 128 *) + 0x6f474209; (* arm_MLS_VEC Q9 Q16 (Q7 :> LANE_H 0) 16 128 *) + 0x4e7a84c3; (* arm_ADD_VEC Q3 Q6 Q26 16 128 *) + 0x4f4089e8; (* arm_MUL_VEC Q8 Q15 (Q0 :> LANE_H 4) 16 128 *) + 0x4f50d9ef; (* arm_SQRDMULH_VEC Q15 Q15 (Q0 :> LANE_H 5) 16 128 *) + 0x3d804009; (* arm_STR Q9 X0 (Immediate_Offset (word 256)) *) + 0x6e7a84c2; (* arm_SUB_VEC Q2 Q6 Q26 16 128 *) + 0x3d801003; (* arm_STR Q3 X0 (Immediate_Offset (word 64)) *) + 0x6f4741e8; (* arm_MLS_VEC Q8 Q15 (Q7 :> LANE_H 0) 16 128 *) + 0x4f50d04f; (* arm_SQRDMULH_VEC Q15 Q2 (Q0 :> LANE_H 1) 16 128 *) + 0x4f40804b; (* arm_MUL_VEC Q11 Q2 (Q0 :> LANE_H 0) 16 128 *) + 0x4e7285b0; (* arm_ADD_VEC Q16 Q13 Q18 16 128 *) + 0x6e68870c; (* arm_SUB_VEC Q12 Q24 Q8 16 128 *) + 0x4e688708; (* arm_ADD_VEC Q8 Q24 Q8 16 128 *) + 0x6f4741eb; (* arm_MLS_VEC Q11 Q15 (Q7 :> LANE_H 0) 16 128 *) + 0x4f50d19a; (* arm_SQRDMULH_VEC Q26 Q12 (Q0 :> LANE_H 1) 16 128 *) + 0x4f40818c; (* arm_MUL_VEC Q12 Q12 (Q0 :> LANE_H 0) 16 128 *) + 0x3d803008; (* arm_STR Q8 X0 (Immediate_Offset (word 192)) *) + 0x4e77866f; (* arm_ADD_VEC Q15 Q19 Q23 16 128 *) + 0x3d80500b; (* arm_STR Q11 X0 (Immediate_Offset (word 320)) *) + 0x6f47434c; (* arm_MLS_VEC Q12 Q26 (Q7 :> LANE_H 0) 16 128 *) + 0x3c81040f; (* arm_STR Q15 X0 (Postimmediate_Offset (word 16)) *) + 0x3d801c10; (* arm_STR Q16 X0 (Immediate_Offset (word 112)) *) + 0x3d806c0c; (* arm_STR Q12 X0 (Immediate_Offset (word 432)) *) + 0x6d4027e8; (* arm_LDP D8 D9 SP (Immediate_Offset (iword (&0))) *) + 0x6d412fea; (* arm_LDP D10 D11 SP (Immediate_Offset (iword (&16))) *) + 0x6d4237ec; (* arm_LDP D12 D13 SP (Immediate_Offset (iword (&32))) *) + 0x6d433fee; (* arm_LDP D14 D15 SP (Immediate_Offset (iword (&48))) *) + 0x910103ff; (* arm_ADD SP SP (rvalue (word 64)) *) + 0xd65f03c0 (* arm_RET X30 *) +];; + +let MLKEM_INTT_EXEC = ARM_MK_EXEC_RULE mlkem_intt_mc;; + +(* ------------------------------------------------------------------------- *) +(* Data tables that are assumed in the precondition. *) +(* ------------------------------------------------------------------------- *) + +let intt_zetas_layer01234 = define + `intt_zetas_layer01234:int list = + [&1583; &15582; -- &821; -- &8081; &1355; &13338; &0; &0; -- &569; -- &5601; + &450; &4429; &936; &9213; &0; &0; &69; &679; &447; &4400; -- &535; + -- &5266; &0; &0; &543; &5345; &1235; &12156; -- &1426; -- &14036; &0; + &0; -- &797; -- &7845; -- &1333; -- &13121; &1089; &10719; &0; &0; + -- &193; -- &1900; -- &56; -- &551; &283; &2786; &0; &0; &1410; &13879; + -- &1476; -- &14529; -- &1339; -- &13180; &0; &0; -- &1062; -- &10453; + &882; &8682; -- &296; -- &2914; &0; &0; &1600; &15749; &40; &394; + &749; &7373; -- &848; -- &8347; &1432; &14095; -- &630; -- &6201; + &687; &6762; &0; &0]`;; + +let intt_zetas_layer56 = define + `intt_zetas_layer56:int list = + [-- &910; -- &910; -- &1227; -- &1227; &219; &219; &855; &855; -- &8957; + -- &8957; -- &12078; -- &12078; &2156; &2156; &8416; &8416; &1175; + &1175; &394; &394; -- &1029; -- &1029; -- &1212; -- &1212; &11566; + &11566; &3878; &3878; -- &10129; -- &10129; -- &11930; -- &11930; + -- &885; -- &885; &1219; &1219; &1455; &1455; &1607; &1607; -- &8711; + -- &8711; &11999; &11999; &14322; &14322; &15818; &15818; -- &648; + -- &648; -- &1481; -- &1481; &712; &712; &682; &682; -- &6378; -- &6378; + -- &14578; -- &14578; &7008; &7008; &6713; &6713; -- &886; -- &886; + &1179; &1179; -- &1026; -- &1026; -- &1092; -- &1092; -- &8721; + -- &8721; &11605; &11605; -- &10099; -- &10099; -- &10749; -- &10749; + &554; &554; -- &1143; -- &1143; -- &403; -- &403; &525; &525; &5453; + &5453; -- &11251; -- &11251; -- &3967; -- &3967; &5168; &5168; &927; + &927; -- &1534; -- &1534; &461; &461; -- &1438; -- &1438; &9125; + &9125; -- &15099; -- &15099; &4538; &4538; -- &14155; -- &14155; &735; + &735; -- &561; -- &561; -- &757; -- &757; -- &319; -- &319; &7235; + &7235; -- &5522; -- &5522; -- &7451; -- &7451; -- &3140; -- &3140; + &863; &863; &1230; &1230; &556; &556; -- &1063; -- &1063; &8495; + &8495; &12107; &12107; &5473; &5473; -- &10463; -- &10463; -- &452; + -- &452; -- &807; -- &807; -- &1435; -- &1435; &1010; &1010; -- &4449; + -- &4449; -- &7943; -- &7943; -- &14125; -- &14125; &9942; &9942; + -- &1645; -- &1645; &780; &780; &109; &109; &1031; &1031; -- &16192; + -- &16192; &7678; &7678; &1073; &1073; &10148; &10148; &1239; &1239; + -- &375; -- &375; &1292; &1292; -- &1584; -- &1584; &12196; &12196; + -- &3691; -- &3691; &12717; &12717; -- &15592; -- &15592; &1414; &1414; + -- &1320; -- &1320; -- &33; -- &33; &464; &464; &13918; &13918; + -- &12993; -- &12993; -- &325; -- &325; &4567; &4567; -- &641; -- &641; + &992; &992; &941; &941; &1021; &1021; -- &6309; -- &6309; &9764; + &9764; &9262; &9262; &10050; &10050; -- &268; -- &268; -- &733; + -- &733; &892; &892; -- &939; -- &939; -- &2638; -- &2638; -- &7215; + -- &7215; &8780; &8780; -- &9243; -- &9243; -- &632; -- &632; &816; &816; + &1352; &1352; -- &650; -- &650; -- &6221; -- &6221; &8032; &8032; + &13308; &13308; -- &6398; -- &6398; &642; &642; -- &952; -- &952; + &1540; &1540; -- &1651; -- &1651; &6319; &6319; -- &9371; -- &9371; + &15159; &15159; -- &16251; -- &16251; -- &1461; -- &1461; &1482; + &1482; &540; &540; &1626; &1626; -- &14381; -- &14381; &14588; &14588; + &5315; &5315; &16005; &16005; &1274; &1274; &1052; &1052; &1025; + &1025; -- &1197; -- &1197; &12540; &12540; &10355; &10355; &10089; + &10089; -- &11782; -- &11782; &279; &279; &1173; &1173; -- &233; + -- &233; &667; &667; &2746; &2746; &11546; &11546; -- &2293; -- &2293; + &6565; &6565; &314; &314; -- &756; -- &756; &48; &48; -- &1409; + -- &1409; &3091; &3091; -- &7441; -- &7441; &472; &472; -- &13869; + -- &13869; &1573; &1573; &76; &76; -- &331; -- &331; -- &289; -- &289; + &15483; &15483; &748; &748; -- &3258; -- &3258; -- &2845; -- &2845; + -- &1100; -- &1100; -- &723; -- &723; &680; &680; &568; &568; -- &10828; + -- &10828; -- &7117; -- &7117; &6693; &6693; &5591; &5591; &1041; + &1041; -- &1637; -- &1637; -- &583; -- &583; -- &17; -- &17; &10247; + &10247; -- &16113; -- &16113; -- &5739; -- &5739; -- &167; -- &167]`;; + +let intt_constants = define + `intt_constants z_01234 z_56 s <=> + (!i. i < 80 + ==> read(memory :> bytes16(word_add z_01234 (word(2 * i)))) s = + iword(EL i intt_zetas_layer01234)) /\ + (!i. i < 384 + ==> read(memory :> bytes16(word_add z_56 (word(2 * i)))) s = + iword(EL i intt_zetas_layer56))`;; + +(* ------------------------------------------------------------------------- *) +(* Some convenient proof tools. *) +(* ------------------------------------------------------------------------- *) + +let READ_MEMORY_MERGE_CONV = + let baseconv = + GEN_REWRITE_CONV I [READ_MEMORY_BYTESIZED_SPLIT] THENC + LAND_CONV(LAND_CONV(RAND_CONV(RAND_CONV + (TRY_CONV(GEN_REWRITE_CONV I [GSYM WORD_ADD_ASSOC] THENC + RAND_CONV WORD_ADD_CONV))))) in + let rec conv n tm = + if n = 0 then REFL tm else + (baseconv THENC BINOP_CONV (conv(n - 1))) tm in + conv;; + +let READ_MEMORY_SPLIT_CONV = + let baseconv = + GEN_REWRITE_CONV I [READ_MEMORY_BYTESIZED_UNSPLIT] THENC + BINOP_CONV(LAND_CONV(LAND_CONV(RAND_CONV(RAND_CONV + (TRY_CONV(GEN_REWRITE_CONV I [GSYM WORD_ADD_ASSOC] THENC + RAND_CONV WORD_ADD_CONV)))))) in + let rec conv n tm = + if n = 0 then REFL tm else + (baseconv THENC BINOP_CONV (conv(n - 1))) tm in + conv;; + +let SIMD_SIMPLIFY_CONV = + TOP_DEPTH_CONV + (REWR_CONV WORD_SUBWORD_AND ORELSEC WORD_SIMPLE_SUBWORD_CONV) THENC + DEPTH_CONV WORD_NUM_RED_CONV THENC + REWRITE_CONV[GSYM barred; GSYM barmul];; + +let SIMD_SIMPLIFY_TAC = + let simdable = can (term_match [] `read X (s:armstate):int128 = whatever`) in + TRY(FIRST_X_ASSUM + (ASSUME_TAC o + CONV_RULE(RAND_CONV SIMD_SIMPLIFY_CONV) o + check (simdable o concl)));; + +let MEMORY_128_FROM_16_TAC = + let a_tm = `a:int64` and n_tm = `n:num` and i64_ty = `:int64` + and pat = `read (memory :> bytes128(word_add a (word n))) s0` in + fun v n -> + let pat' = subst[mk_var(v,i64_ty),a_tm] pat in + let f i = + let itm = mk_small_numeral(16*i) in + READ_MEMORY_MERGE_CONV 3 (subst[itm,n_tm] pat') in + MP_TAC(end_itlist CONJ (map f (0--(n-1))));; + +(* ------------------------------------------------------------------------- *) +(* Correctness proof. *) +(* ------------------------------------------------------------------------- *) + +let MLKEM_INTT_CORRECT = prove + (`!a z_01234 z_56 x pc. + ALL (nonoverlapping (a,512)) + [(word pc,0x5d0); (z_01234,160); (z_56,768)] + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc) mlkem_intt_mc /\ + read PC s = word (pc + 0x14) /\ + C_ARGUMENTS [a; z_01234; z_56] s /\ + intt_constants z_01234 z_56 s /\ + !i. i < 256 + ==> read(memory :> bytes16(word_add a (word(2 * i)))) s = + x i) + (\s. read PC s = word(pc + 0x5b8) /\ + !i. i < 256 + ==> let zi = + read(memory :> bytes16(word_add a (word(2 * i)))) s in + (ival zi == inverse_ntt (ival o x) i) (mod &3329) /\ + abs(ival zi) <= &26624) + (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [Q8; Q9; Q10; Q11; Q12; Q13; Q14; Q15] ,, + MAYCHANGE [memory :> bytes(a,512)])`, + MAP_EVERY X_GEN_TAC + [`a:int64`; `z_01234:int64`; `z_56:int64`; `x:num->int16`; `pc:num`] THEN + REWRITE_TAC[MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI; C_ARGUMENTS; + NONOVERLAPPING_CLAUSES; ALL] THEN + DISCH_THEN(REPEAT_TCL CONJUNCTS_THEN ASSUME_TAC) THEN + + (*** Manually expand the cases in the hypotheses ***) + + REWRITE_TAC[intt_constants] THEN + CONV_TAC(RATOR_CONV(LAND_CONV(ONCE_DEPTH_CONV + (EXPAND_CASES_CONV THENC ONCE_DEPTH_CONV NUM_MULT_CONV)))) THEN + REWRITE_TAC[intt_zetas_layer01234; intt_zetas_layer56] THEN + CONV_TAC(ONCE_DEPTH_CONV EL_CONV) THEN + CONV_TAC(ONCE_DEPTH_CONV WORD_IWORD_CONV) THEN REWRITE_TAC[WORD_ADD_0] THEN + ENSURES_INIT_TAC "s0" THEN + + (*** Manually restructure to match the 128-bit loads. It would be nicer + *** if the simulation machinery did this automatically. + ***) + + MEMORY_128_FROM_16_TAC "a" 32 THEN + MEMORY_128_FROM_16_TAC "z_01234" 10 THEN + MEMORY_128_FROM_16_TAC "z_56" 48 THEN + ASM_REWRITE_TAC[WORD_ADD_0] THEN CONV_TAC WORD_REDUCE_CONV THEN + DISCARD_MATCHING_ASSUMPTIONS [`read (memory :> bytes16 a) s = x`] THEN + REPEAT STRIP_TAC THEN + + (*** Ghost up initial contents of Q7 since it isn't fully initialized ***) + + ABBREV_TAC `v7_init:int128 = read Q7 s0` THEN + + (*** Simulate all the way to the end, in effect unrolling loops ***) + + MAP_EVERY (fun n -> ARM_STEPS_TAC MLKEM_INTT_EXEC [n] THEN SIMD_SIMPLIFY_TAC) + (1--1181) THEN + ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[] THEN + + (*** Reverse the restructuring by splitting the 128-bit words up ***) + + REPEAT(FIRST_X_ASSUM(STRIP_ASSUME_TAC o + CONV_RULE SIMD_SIMPLIFY_CONV o + CONV_RULE(READ_MEMORY_SPLIT_CONV 3) o + check (can (term_match [] `read qqq s:int128 = xxx`) o concl))) THEN + + (*** Turn the conclusion into an explicit conjunction and split it up ***) + + CONV_TAC(ONCE_DEPTH_CONV let_CONV) THEN REWRITE_TAC[INT_ABS_BOUNDS] THEN + GEN_REWRITE_TAC (BINDER_CONV o RAND_CONV) [GSYM I_THM] THEN + CONV_TAC(EXPAND_CASES_CONV THENC ONCE_DEPTH_CONV NUM_MULT_CONV) THEN + ASM_REWRITE_TAC[I_THM; WORD_ADD_0] THEN DISCARD_STATE_TAC "s1181" THEN + REPEAT(W(fun (asl,w) -> + if length(conjuncts w) > 3 then CONJ_TAC else NO_TAC)) THEN + + (*** Get congruences and bounds for the result digits and finish ***) + + (W(MP_TAC o CONGBOUND_RULE o rand o lhand o rator o lhand o snd) THEN + MATCH_MP_TAC MONO_AND THEN CONJ_TAC THENL + [MATCH_MP_TAC(REWRITE_RULE[IMP_CONJ_ALT] INT_CONG_TRANS) THEN + CONV_TAC(ONCE_DEPTH_CONV INVERSE_NTT_CONV) THEN + REWRITE_TAC[GSYM INT_REM_EQ; o_THM] THEN CONV_TAC INT_REM_DOWN_CONV THEN + REWRITE_TAC[INT_REM_EQ] THEN + REWRITE_TAC[REAL_INT_CONGRUENCE; INT_OF_NUM_EQ; ARITH_EQ] THEN + REWRITE_TAC[GSYM REAL_OF_INT_CLAUSES] THEN + CONV_TAC(RAND_CONV REAL_POLY_CONV) THEN REAL_INTEGER_TAC; + MATCH_MP_TAC(INT_ARITH + `l':int <= l /\ u <= u' + ==> l <= x /\ x <= u ==> l' <= x /\ x <= u'`) THEN + CONV_TAC INT_REDUCE_CONV]));; + +(*** Subroutine form, somewhat messy elaboration of the usual wrapper ***) + +let MLKEM_INTT_SUBROUTINE_CORRECT = prove + (`!a z_01234 z_56 x pc stackpointer returnaddress. + aligned 16 stackpointer /\ + ALLPAIRS nonoverlapping + [(a,512); (word_sub stackpointer (word 64),64)] + [(word pc,0x5d0); (z_01234,160); (z_56,768)] /\ + nonoverlapping (a,512) (word_sub stackpointer (word 64),64) + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc) mlkem_intt_mc /\ + read PC s = word pc /\ + read SP s = stackpointer /\ + read X30 s = returnaddress /\ + C_ARGUMENTS [a; z_01234; z_56] s /\ + intt_constants z_01234 z_56 s /\ + !i. i < 256 + ==> read(memory :> bytes16(word_add a (word(2 * i)))) s = + x i) + (\s. read PC s = returnaddress /\ + !i. i < 256 + ==> let zi = + read(memory :> bytes16(word_add a (word(2 * i)))) s in + (ival zi == inverse_ntt (ival o x) i) (mod &3329) /\ + abs(ival zi) <= &26624) + (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(a,512); + memory :> bytes(word_sub stackpointer (word 64),64)])`, + let TWEAK_CONV = + REWRITE_CONV[intt_constants] THENC + ONCE_DEPTH_CONV let_CONV THENC + ONCE_DEPTH_CONV EXPAND_CASES_CONV THENC + ONCE_DEPTH_CONV NUM_MULT_CONV THENC + PURE_REWRITE_CONV [WORD_ADD_0] in + REWRITE_TAC[fst MLKEM_INTT_EXEC] THEN + CONV_TAC TWEAK_CONV THEN + ARM_ADD_RETURN_STACK_TAC ~pre_post_nsteps:(5,5) MLKEM_INTT_EXEC + (REWRITE_RULE[fst MLKEM_INTT_EXEC] (CONV_RULE TWEAK_CONV MLKEM_INTT_CORRECT)) + `[D8; D9; D10; D11; D12; D13; D14; D15]` 64);; diff --git a/proofs/hol_light/arm/proofs/mlkem_ntt.ml b/proofs/hol_light/arm/proofs/mlkem_ntt.ml new file mode 100644 index 000000000..7c0528046 --- /dev/null +++ b/proofs/hol_light/arm/proofs/mlkem_ntt.ml @@ -0,0 +1,608 @@ +(* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0 OR ISC OR MIT-0 + *) + +(* ========================================================================= *) +(* Forward number theoretic transform. *) +(* ========================================================================= *) + +needs "arm/proofs/base.ml";; +needs "arm/proofs/utils/mlkem.ml";; + +(**** print_literal_from_elf "mlkem/mlkem_ntt.o";; + ****) + +let mlkem_ntt_mc = define_assert_from_elf + "mlkem_ntt_mc" "mlkem/mlkem_ntt.o" +[ + 0xd10103ff; (* arm_SUB SP SP (rvalue (word 64)) *) + 0x6d0027e8; (* arm_STP D8 D9 SP (Immediate_Offset (iword (&0))) *) + 0x6d012fea; (* arm_STP D10 D11 SP (Immediate_Offset (iword (&16))) *) + 0x6d0237ec; (* arm_STP D12 D13 SP (Immediate_Offset (iword (&32))) *) + 0x6d033fee; (* arm_STP D14 D15 SP (Immediate_Offset (iword (&48))) *) + 0x5281a025; (* arm_MOV W5 (rvalue (word 3329)) *) + 0x4e021ca7; (* arm_INS_GEN Q7 W5 0 16 *) + 0x5289d7e5; (* arm_MOV W5 (rvalue (word 20159)) *) + 0x4e061ca7; (* arm_INS_GEN Q7 W5 16 16 *) + 0xaa0003e3; (* arm_MOV X3 X0 *) + 0xd2800084; (* arm_MOV X4 (rvalue (word 4)) *) + 0x3cc20420; (* arm_LDR Q0 X1 (Postimmediate_Offset (word 32)) *) + 0x3cdf0021; (* arm_LDR Q1 X1 (Immediate_Offset (word 18446744073709551600)) *) + 0x3dc00015; (* arm_LDR Q21 X0 (Immediate_Offset (word 0)) *) + 0x3dc0101a; (* arm_LDR Q26 X0 (Immediate_Offset (word 64)) *) + 0x3dc0201d; (* arm_LDR Q29 X0 (Immediate_Offset (word 128)) *) + 0x3dc03014; (* arm_LDR Q20 X0 (Immediate_Offset (word 192)) *) + 0x3dc04017; (* arm_LDR Q23 X0 (Immediate_Offset (word 256)) *) + 0x3dc0700b; (* arm_LDR Q11 X0 (Immediate_Offset (word 448)) *) + 0x4f4082e2; (* arm_MUL_VEC Q2 Q23 (Q0 :> LANE_H 0) 16 128 *) + 0x3dc05011; (* arm_LDR Q17 X0 (Immediate_Offset (word 320)) *) + 0x4f40816f; (* arm_MUL_VEC Q15 Q11 (Q0 :> LANE_H 0) 16 128 *) + 0x3dc0600d; (* arm_LDR Q13 X0 (Immediate_Offset (word 384)) *) + 0xd1000484; (* arm_SUB X4 X4 (rvalue (word 1)) *) + 0x4f50d2ee; (* arm_SQRDMULH_VEC Q14 Q23 (Q0 :> LANE_H 1) 16 128 *) + 0x4f50d237; (* arm_SQRDMULH_VEC Q23 Q17 (Q0 :> LANE_H 1) 16 128 *) + 0x4f408231; (* arm_MUL_VEC Q17 Q17 (Q0 :> LANE_H 0) 16 128 *) + 0x4f50d1bc; (* arm_SQRDMULH_VEC Q28 Q13 (Q0 :> LANE_H 1) 16 128 *) + 0x6f4741c2; (* arm_MLS_VEC Q2 Q14 (Q7 :> LANE_H 0) 16 128 *) + 0x4f4081ae; (* arm_MUL_VEC Q14 Q13 (Q0 :> LANE_H 0) 16 128 *) + 0x6f4742f1; (* arm_MLS_VEC Q17 Q23 (Q7 :> LANE_H 0) 16 128 *) + 0x4f50d177; (* arm_SQRDMULH_VEC Q23 Q11 (Q0 :> LANE_H 1) 16 128 *) + 0x6e6286ab; (* arm_SUB_VEC Q11 Q21 Q2 16 128 *) + 0x6f47438e; (* arm_MLS_VEC Q14 Q28 (Q7 :> LANE_H 0) 16 128 *) + 0x6e71875c; (* arm_SUB_VEC Q28 Q26 Q17 16 128 *) + 0x4e718751; (* arm_ADD_VEC Q17 Q26 Q17 16 128 *) + 0x4e6286a2; (* arm_ADD_VEC Q2 Q21 Q2 16 128 *) + 0x6e6e87ad; (* arm_SUB_VEC Q13 Q29 Q14 16 128 *) + 0x4e6e87ae; (* arm_ADD_VEC Q14 Q29 Q14 16 128 *) + 0x6f4742ef; (* arm_MLS_VEC Q15 Q23 (Q7 :> LANE_H 0) 16 128 *) + 0x4f50d9b7; (* arm_SQRDMULH_VEC Q23 Q13 (Q0 :> LANE_H 5) 16 128 *) + 0x4f4089ad; (* arm_MUL_VEC Q13 Q13 (Q0 :> LANE_H 4) 16 128 *) + 0x4f70d1d5; (* arm_SQRDMULH_VEC Q21 Q14 (Q0 :> LANE_H 3) 16 128 *) + 0x6e6f869a; (* arm_SUB_VEC Q26 Q20 Q15 16 128 *) + 0x4e6f868f; (* arm_ADD_VEC Q15 Q20 Q15 16 128 *) + 0x6f4742ed; (* arm_MLS_VEC Q13 Q23 (Q7 :> LANE_H 0) 16 128 *) + 0x4f50db57; (* arm_SQRDMULH_VEC Q23 Q26 (Q0 :> LANE_H 5) 16 128 *) + 0x4f408b5a; (* arm_MUL_VEC Q26 Q26 (Q0 :> LANE_H 4) 16 128 *) + 0x4f6081ce; (* arm_MUL_VEC Q14 Q14 (Q0 :> LANE_H 2) 16 128 *) + 0x6e6d857d; (* arm_SUB_VEC Q29 Q11 Q13 16 128 *) + 0x4e6d856b; (* arm_ADD_VEC Q11 Q11 Q13 16 128 *) + 0x6f4742fa; (* arm_MLS_VEC Q26 Q23 (Q7 :> LANE_H 0) 16 128 *) + 0x4f70d1f7; (* arm_SQRDMULH_VEC Q23 Q15 (Q0 :> LANE_H 3) 16 128 *) + 0x4f6081ed; (* arm_MUL_VEC Q13 Q15 (Q0 :> LANE_H 2) 16 128 *) + 0x6f4742ae; (* arm_MLS_VEC Q14 Q21 (Q7 :> LANE_H 0) 16 128 *) + 0x6e7a878f; (* arm_SUB_VEC Q15 Q28 Q26 16 128 *) + 0x4e7a879c; (* arm_ADD_VEC Q28 Q28 Q26 16 128 *) + 0x6f4742ed; (* arm_MLS_VEC Q13 Q23 (Q7 :> LANE_H 0) 16 128 *) + 0x6e6e8457; (* arm_SUB_VEC Q23 Q2 Q14 16 128 *) + 0x4e6e844e; (* arm_ADD_VEC Q14 Q2 Q14 16 128 *) + 0x4f71d382; (* arm_SQRDMULH_VEC Q2 Q28 (Q1 :> LANE_H 3) 16 128 *) + 0x6e6d8635; (* arm_SUB_VEC Q21 Q17 Q13 16 128 *) + 0x4e6d8631; (* arm_ADD_VEC Q17 Q17 Q13 16 128 *) + 0x4f61839c; (* arm_MUL_VEC Q28 Q28 (Q1 :> LANE_H 2) 16 128 *) + 0x4f51d2ad; (* arm_SQRDMULH_VEC Q13 Q21 (Q1 :> LANE_H 1) 16 128 *) + 0x4f70da3a; (* arm_SQRDMULH_VEC Q26 Q17 (Q0 :> LANE_H 7) 16 128 *) + 0x4f608a31; (* arm_MUL_VEC Q17 Q17 (Q0 :> LANE_H 6) 16 128 *) + 0x4f4182b5; (* arm_MUL_VEC Q21 Q21 (Q1 :> LANE_H 0) 16 128 *) + 0x6f47405c; (* arm_MLS_VEC Q28 Q2 (Q7 :> LANE_H 0) 16 128 *) + 0x4f51d9e2; (* arm_SQRDMULH_VEC Q2 Q15 (Q1 :> LANE_H 5) 16 128 *) + 0x6f474351; (* arm_MLS_VEC Q17 Q26 (Q7 :> LANE_H 0) 16 128 *) + 0x6f4741b5; (* arm_MLS_VEC Q21 Q13 (Q7 :> LANE_H 0) 16 128 *) + 0x6e7c856d; (* arm_SUB_VEC Q13 Q11 Q28 16 128 *) + 0x4e7c857c; (* arm_ADD_VEC Q28 Q11 Q28 16 128 *) + 0x6e7185cb; (* arm_SUB_VEC Q11 Q14 Q17 16 128 *) + 0x4f4189ef; (* arm_MUL_VEC Q15 Q15 (Q1 :> LANE_H 4) 16 128 *) + 0x4e7185ce; (* arm_ADD_VEC Q14 Q14 Q17 16 128 *) + 0x6e7586f1; (* arm_SUB_VEC Q17 Q23 Q21 16 128 *) + 0x4e7586f7; (* arm_ADD_VEC Q23 Q23 Q21 16 128 *) + 0x6f47404f; (* arm_MLS_VEC Q15 Q2 (Q7 :> LANE_H 0) 16 128 *) + 0x3c81040e; (* arm_STR Q14 X0 (Postimmediate_Offset (word 16)) *) + 0x3dc00015; (* arm_LDR Q21 X0 (Immediate_Offset (word 0)) *) + 0x6e6f87ae; (* arm_SUB_VEC Q14 Q29 Q15 16 128 *) + 0x4e6f87a2; (* arm_ADD_VEC Q2 Q29 Q15 16 128 *) + 0x3d800c0b; (* arm_STR Q11 X0 (Immediate_Offset (word 48)) *) + 0x3dc0101a; (* arm_LDR Q26 X0 (Immediate_Offset (word 64)) *) + 0x3d801c17; (* arm_STR Q23 X0 (Immediate_Offset (word 112)) *) + 0x3dc0201d; (* arm_LDR Q29 X0 (Immediate_Offset (word 128)) *) + 0x3d802c11; (* arm_STR Q17 X0 (Immediate_Offset (word 176)) *) + 0x3dc03014; (* arm_LDR Q20 X0 (Immediate_Offset (word 192)) *) + 0x3d803c1c; (* arm_STR Q28 X0 (Immediate_Offset (word 240)) *) + 0x3dc04017; (* arm_LDR Q23 X0 (Immediate_Offset (word 256)) *) + 0x3d804c0d; (* arm_STR Q13 X0 (Immediate_Offset (word 304)) *) + 0x3dc05011; (* arm_LDR Q17 X0 (Immediate_Offset (word 320)) *) + 0x3d805c02; (* arm_STR Q2 X0 (Immediate_Offset (word 368)) *) + 0x4f4082e2; (* arm_MUL_VEC Q2 Q23 (Q0 :> LANE_H 0) 16 128 *) + 0x3d806c0e; (* arm_STR Q14 X0 (Immediate_Offset (word 432)) *) + 0x3dc0700b; (* arm_LDR Q11 X0 (Immediate_Offset (word 448)) *) + 0x3dc0600d; (* arm_LDR Q13 X0 (Immediate_Offset (word 384)) *) + 0x4f40816f; (* arm_MUL_VEC Q15 Q11 (Q0 :> LANE_H 0) 16 128 *) + 0xd1000484; (* arm_SUB X4 X4 (rvalue (word 1)) *) + 0xb5fff664; (* arm_CBNZ X4 (word 2096844) *) + 0x4f50d17b; (* arm_SQRDMULH_VEC Q27 Q11 (Q0 :> LANE_H 1) 16 128 *) + 0x4f4081a8; (* arm_MUL_VEC Q8 Q13 (Q0 :> LANE_H 0) 16 128 *) + 0x4f50d1b6; (* arm_SQRDMULH_VEC Q22 Q13 (Q0 :> LANE_H 1) 16 128 *) + 0x4f40822b; (* arm_MUL_VEC Q11 Q17 (Q0 :> LANE_H 0) 16 128 *) + 0x6f47436f; (* arm_MLS_VEC Q15 Q27 (Q7 :> LANE_H 0) 16 128 *) + 0x4f50d23c; (* arm_SQRDMULH_VEC Q28 Q17 (Q0 :> LANE_H 1) 16 128 *) + 0x6f4742c8; (* arm_MLS_VEC Q8 Q22 (Q7 :> LANE_H 0) 16 128 *) + 0x4f50d2e5; (* arm_SQRDMULH_VEC Q5 Q23 (Q0 :> LANE_H 1) 16 128 *) + 0x4e6f8690; (* arm_ADD_VEC Q16 Q20 Q15 16 128 *) + 0x6f47438b; (* arm_MLS_VEC Q11 Q28 (Q7 :> LANE_H 0) 16 128 *) + 0x6e6887a6; (* arm_SUB_VEC Q6 Q29 Q8 16 128 *) + 0x4f70d211; (* arm_SQRDMULH_VEC Q17 Q16 (Q0 :> LANE_H 3) 16 128 *) + 0x4f608217; (* arm_MUL_VEC Q23 Q16 (Q0 :> LANE_H 2) 16 128 *) + 0x4f4088cd; (* arm_MUL_VEC Q13 Q6 (Q0 :> LANE_H 4) 16 128 *) + 0x4f50d8dc; (* arm_SQRDMULH_VEC Q28 Q6 (Q0 :> LANE_H 5) 16 128 *) + 0x6f4740a2; (* arm_MLS_VEC Q2 Q5 (Q7 :> LANE_H 0) 16 128 *) + 0x6f474237; (* arm_MLS_VEC Q23 Q17 (Q7 :> LANE_H 0) 16 128 *) + 0x4e6b875b; (* arm_ADD_VEC Q27 Q26 Q11 16 128 *) + 0x6f47438d; (* arm_MLS_VEC Q13 Q28 (Q7 :> LANE_H 0) 16 128 *) + 0x6e6286a9; (* arm_SUB_VEC Q9 Q21 Q2 16 128 *) + 0x4e6887b2; (* arm_ADD_VEC Q18 Q29 Q8 16 128 *) + 0x6e77876e; (* arm_SUB_VEC Q14 Q27 Q23 16 128 *) + 0x4e6d853d; (* arm_ADD_VEC Q29 Q9 Q13 16 128 *) + 0x6e6d853e; (* arm_SUB_VEC Q30 Q9 Q13 16 128 *) + 0x4f4181dc; (* arm_MUL_VEC Q28 Q14 (Q1 :> LANE_H 0) 16 128 *) + 0x4f70d249; (* arm_SQRDMULH_VEC Q9 Q18 (Q0 :> LANE_H 3) 16 128 *) + 0x4f608256; (* arm_MUL_VEC Q22 Q18 (Q0 :> LANE_H 2) 16 128 *) + 0x4f51d1d1; (* arm_SQRDMULH_VEC Q17 Q14 (Q1 :> LANE_H 1) 16 128 *) + 0x6e6f868e; (* arm_SUB_VEC Q14 Q20 Q15 16 128 *) + 0x4e6286b8; (* arm_ADD_VEC Q24 Q21 Q2 16 128 *) + 0x6f474136; (* arm_MLS_VEC Q22 Q9 (Q7 :> LANE_H 0) 16 128 *) + 0x4f50d9c9; (* arm_SQRDMULH_VEC Q9 Q14 (Q0 :> LANE_H 5) 16 128 *) + 0x4f4089cd; (* arm_MUL_VEC Q13 Q14 (Q0 :> LANE_H 4) 16 128 *) + 0x6f47423c; (* arm_MLS_VEC Q28 Q17 (Q7 :> LANE_H 0) 16 128 *) + 0x6e768705; (* arm_SUB_VEC Q5 Q24 Q22 16 128 *) + 0x6e6b8742; (* arm_SUB_VEC Q2 Q26 Q11 16 128 *) + 0x6f47412d; (* arm_MLS_VEC Q13 Q9 (Q7 :> LANE_H 0) 16 128 *) + 0x6e7c84b1; (* arm_SUB_VEC Q17 Q5 Q28 16 128 *) + 0x4e7c84ae; (* arm_ADD_VEC Q14 Q5 Q28 16 128 *) + 0x4e77877c; (* arm_ADD_VEC Q28 Q27 Q23 16 128 *) + 0x3d803011; (* arm_STR Q17 X0 (Immediate_Offset (word 192)) *) + 0x4e6d8451; (* arm_ADD_VEC Q17 Q2 Q13 16 128 *) + 0x3d80200e; (* arm_STR Q14 X0 (Immediate_Offset (word 128)) *) + 0x6e6d844d; (* arm_SUB_VEC Q13 Q2 Q13 16 128 *) + 0x4f71d23a; (* arm_SQRDMULH_VEC Q26 Q17 (Q1 :> LANE_H 3) 16 128 *) + 0x4f61822f; (* arm_MUL_VEC Q15 Q17 (Q1 :> LANE_H 2) 16 128 *) + 0x4e768705; (* arm_ADD_VEC Q5 Q24 Q22 16 128 *) + 0x4f51d9b7; (* arm_SQRDMULH_VEC Q23 Q13 (Q1 :> LANE_H 5) 16 128 *) + 0x4f4189ad; (* arm_MUL_VEC Q13 Q13 (Q1 :> LANE_H 4) 16 128 *) + 0x6f47434f; (* arm_MLS_VEC Q15 Q26 (Q7 :> LANE_H 0) 16 128 *) + 0x4f70db8e; (* arm_SQRDMULH_VEC Q14 Q28 (Q0 :> LANE_H 7) 16 128 *) + 0x4f608b91; (* arm_MUL_VEC Q17 Q28 (Q0 :> LANE_H 6) 16 128 *) + 0x6f4742ed; (* arm_MLS_VEC Q13 Q23 (Q7 :> LANE_H 0) 16 128 *) + 0x4e6f87a6; (* arm_ADD_VEC Q6 Q29 Q15 16 128 *) + 0x6e6f87bc; (* arm_SUB_VEC Q28 Q29 Q15 16 128 *) + 0x6f4741d1; (* arm_MLS_VEC Q17 Q14 (Q7 :> LANE_H 0) 16 128 *) + 0x3d804006; (* arm_STR Q6 X0 (Immediate_Offset (word 256)) *) + 0x4e6d87ce; (* arm_ADD_VEC Q14 Q30 Q13 16 128 *) + 0x3d80501c; (* arm_STR Q28 X0 (Immediate_Offset (word 320)) *) + 0x6e6d87d7; (* arm_SUB_VEC Q23 Q30 Q13 16 128 *) + 0x3d80600e; (* arm_STR Q14 X0 (Immediate_Offset (word 384)) *) + 0x4e7184a3; (* arm_ADD_VEC Q3 Q5 Q17 16 128 *) + 0x3d807017; (* arm_STR Q23 X0 (Immediate_Offset (word 448)) *) + 0x6e7184bc; (* arm_SUB_VEC Q28 Q5 Q17 16 128 *) + 0x3c810403; (* arm_STR Q3 X0 (Postimmediate_Offset (word 16)) *) + 0x3d800c1c; (* arm_STR Q28 X0 (Immediate_Offset (word 48)) *) + 0xaa0303e0; (* arm_MOV X0 X3 *) + 0xd2800104; (* arm_MOV X4 (rvalue (word 8)) *) + 0x3cc10422; (* arm_LDR Q2 X1 (Postimmediate_Offset (word 16)) *) + 0x3dc00c0e; (* arm_LDR Q14 X0 (Immediate_Offset (word 48)) *) + 0x3dc00801; (* arm_LDR Q1 X0 (Immediate_Offset (word 32)) *) + 0x4f4281d1; (* arm_MUL_VEC Q17 Q14 (Q2 :> LANE_H 0) 16 128 *) + 0x4f52d1ce; (* arm_SQRDMULH_VEC Q14 Q14 (Q2 :> LANE_H 1) 16 128 *) + 0x4f428028; (* arm_MUL_VEC Q8 Q1 (Q2 :> LANE_H 0) 16 128 *) + 0x3dc00417; (* arm_LDR Q23 X0 (Immediate_Offset (word 16)) *) + 0x6f4741d1; (* arm_MLS_VEC Q17 Q14 (Q7 :> LANE_H 0) 16 128 *) + 0x4f52d021; (* arm_SQRDMULH_VEC Q1 Q1 (Q2 :> LANE_H 1) 16 128 *) + 0x3cc6045e; (* arm_LDR Q30 X2 (Postimmediate_Offset (word 96)) *) + 0x6e7186ee; (* arm_SUB_VEC Q14 Q23 Q17 16 128 *) + 0x4e7186ea; (* arm_ADD_VEC Q10 Q23 Q17 16 128 *) + 0x6f474028; (* arm_MLS_VEC Q8 Q1 (Q7 :> LANE_H 0) 16 128 *) + 0x4f52d9c1; (* arm_SQRDMULH_VEC Q1 Q14 (Q2 :> LANE_H 5) 16 128 *) + 0x4f4289ce; (* arm_MUL_VEC Q14 Q14 (Q2 :> LANE_H 4) 16 128 *) + 0x3dc0001b; (* arm_LDR Q27 X0 (Immediate_Offset (word 0)) *) + 0x4f628157; (* arm_MUL_VEC Q23 Q10 (Q2 :> LANE_H 2) 16 128 *) + 0x6f47402e; (* arm_MLS_VEC Q14 Q1 (Q7 :> LANE_H 0) 16 128 *) + 0x6e688761; (* arm_SUB_VEC Q1 Q27 Q8 16 128 *) + 0x3cdc005c; (* arm_LDR Q28 X2 (Immediate_Offset (word 18446744073709551552)) *) + 0x4e6e842c; (* arm_ADD_VEC Q12 Q1 Q14 16 128 *) + 0x4f72d155; (* arm_SQRDMULH_VEC Q21 Q10 (Q2 :> LANE_H 3) 16 128 *) + 0x6e6e8425; (* arm_SUB_VEC Q5 Q1 Q14 16 128 *) + 0x3cdf004d; (* arm_LDR Q13 X2 (Immediate_Offset (word 18446744073709551600)) *) + 0xd1000484; (* arm_SUB X4 X4 (rvalue (word 1)) *) + 0x3dc01c13; (* arm_LDR Q19 X0 (Immediate_Offset (word 112)) *) + 0x3cc10421; (* arm_LDR Q1 X1 (Postimmediate_Offset (word 16)) *) + 0x6f4742b7; (* arm_MLS_VEC Q23 Q21 (Q7 :> LANE_H 0) 16 128 *) + 0x4e688766; (* arm_ADD_VEC Q6 Q27 Q8 16 128 *) + 0x4f418264; (* arm_MUL_VEC Q4 Q19 (Q1 :> LANE_H 0) 16 128 *) + 0x4f51d273; (* arm_SQRDMULH_VEC Q19 Q19 (Q1 :> LANE_H 1) 16 128 *) + 0x3dc01419; (* arm_LDR Q25 X0 (Immediate_Offset (word 80)) *) + 0x4e85298b; (* arm_TRN1 Q11 Q12 Q5 32 128 *) + 0x6f474264; (* arm_MLS_VEC Q4 Q19 (Q7 :> LANE_H 0) 16 128 *) + 0x6e7784c0; (* arm_SUB_VEC Q0 Q6 Q23 16 128 *) + 0x3cdb0050; (* arm_LDR Q16 X2 (Immediate_Offset (word 18446744073709551536)) *) + 0x6e648738; (* arm_SUB_VEC Q24 Q25 Q4 16 128 *) + 0x4e7784da; (* arm_ADD_VEC Q26 Q6 Q23 16 128 *) + 0x4e648724; (* arm_ADD_VEC Q4 Q25 Q4 16 128 *) + 0x4f51db17; (* arm_SQRDMULH_VEC Q23 Q24 (Q1 :> LANE_H 5) 16 128 *) + 0x4f418b14; (* arm_MUL_VEC Q20 Q24 (Q1 :> LANE_H 4) 16 128 *) + 0x4f71d095; (* arm_SQRDMULH_VEC Q21 Q4 (Q1 :> LANE_H 3) 16 128 *) + 0x4e802b5b; (* arm_TRN1 Q27 Q26 Q0 32 128 *) + 0x4e856999; (* arm_TRN2 Q25 Q12 Q5 32 128 *) + 0x6f4742f4; (* arm_MLS_VEC Q20 Q23 (Q7 :> LANE_H 0) 16 128 *) + 0x4f618097; (* arm_MUL_VEC Q23 Q4 (Q1 :> LANE_H 2) 16 128 *) + 0x3dc0181f; (* arm_LDR Q31 X0 (Immediate_Offset (word 96)) *) + 0x4e806b4c; (* arm_TRN2 Q12 Q26 Q0 32 128 *) + 0x4ecb6b73; (* arm_TRN2 Q19 Q27 Q11 64 128 *) + 0x4f4183e8; (* arm_MUL_VEC Q8 Q31 (Q1 :> LANE_H 0) 16 128 *) + 0x4f51d3e1; (* arm_SQRDMULH_VEC Q1 Q31 (Q1 :> LANE_H 1) 16 128 *) + 0x4ed9698a; (* arm_TRN2 Q10 Q12 Q25 64 128 *) + 0x6e70b660; (* arm_SQRDMULH_VEC Q0 Q19 Q16 16 128 *) + 0x6e70b552; (* arm_SQRDMULH_VEC Q18 Q10 Q16 16 128 *) + 0x4ecb2b70; (* arm_TRN1 Q16 Q27 Q11 64 128 *) + 0x4ed92982; (* arm_TRN1 Q2 Q12 Q25 64 128 *) + 0x4e7e9d4c; (* arm_MUL_VEC Q12 Q10 Q30 16 128 *) + 0x4e7e9e6a; (* arm_MUL_VEC Q10 Q19 Q30 16 128 *) + 0x6f474028; (* arm_MLS_VEC Q8 Q1 (Q7 :> LANE_H 0) 16 128 *) + 0x3cdd004e; (* arm_LDR Q14 X2 (Immediate_Offset (word 18446744073709551568)) *) + 0x6f47400a; (* arm_MLS_VEC Q10 Q0 (Q7 :> LANE_H 0) 16 128 *) + 0x6f47424c; (* arm_MLS_VEC Q12 Q18 (Q7 :> LANE_H 0) 16 128 *) + 0x3dc0101b; (* arm_LDR Q27 X0 (Immediate_Offset (word 64)) *) + 0x4e6a8609; (* arm_ADD_VEC Q9 Q16 Q10 16 128 *) + 0x6e6a8610; (* arm_SUB_VEC Q16 Q16 Q10 16 128 *) + 0x6e6c8459; (* arm_SUB_VEC Q25 Q2 Q12 16 128 *) + 0x4e6c845e; (* arm_ADD_VEC Q30 Q2 Q12 16 128 *) + 0x6e68876a; (* arm_SUB_VEC Q10 Q27 Q8 16 128 *) + 0x6e6db736; (* arm_SQRDMULH_VEC Q22 Q25 Q13 16 128 *) + 0x6e6eb7cd; (* arm_SQRDMULH_VEC Q13 Q30 Q14 16 128 *) + 0x3cde004e; (* arm_LDR Q14 X2 (Immediate_Offset (word 18446744073709551584)) *) + 0x4e74854c; (* arm_ADD_VEC Q12 Q10 Q20 16 128 *) + 0x4e7c9fc5; (* arm_MUL_VEC Q5 Q30 Q28 16 128 *) + 0x4e6e9f3a; (* arm_MUL_VEC Q26 Q25 Q14 16 128 *) + 0x3cc6045e; (* arm_LDR Q30 X2 (Postimmediate_Offset (word 96)) *) + 0x6f4741a5; (* arm_MLS_VEC Q5 Q13 (Q7 :> LANE_H 0) 16 128 *) + 0x6f4742da; (* arm_MLS_VEC Q26 Q22 (Q7 :> LANE_H 0) 16 128 *) + 0x3cdc005c; (* arm_LDR Q28 X2 (Immediate_Offset (word 18446744073709551552)) *) + 0x4e65852d; (* arm_ADD_VEC Q13 Q9 Q5 16 128 *) + 0x6e658529; (* arm_SUB_VEC Q9 Q9 Q5 16 128 *) + 0x6e7a8605; (* arm_SUB_VEC Q5 Q16 Q26 16 128 *) + 0x4e7a8619; (* arm_ADD_VEC Q25 Q16 Q26 16 128 *) + 0x4e8929af; (* arm_TRN1 Q15 Q13 Q9 32 128 *) + 0x4e8969a3; (* arm_TRN2 Q3 Q13 Q9 32 128 *) + 0x4e852b2d; (* arm_TRN1 Q13 Q25 Q5 32 128 *) + 0x4e856b3f; (* arm_TRN2 Q31 Q25 Q5 32 128 *) + 0x6e748545; (* arm_SUB_VEC Q5 Q10 Q20 16 128 *) + 0x4ecd29e2; (* arm_TRN1 Q2 Q15 Q13 64 128 *) + 0x4ecd69e9; (* arm_TRN2 Q9 Q15 Q13 64 128 *) + 0x3c840402; (* arm_STR Q2 X0 (Postimmediate_Offset (word 64)) *) + 0x4edf287d; (* arm_TRN1 Q29 Q3 Q31 64 128 *) + 0x3c9e0009; (* arm_STR Q9 X0 (Immediate_Offset (word 18446744073709551584)) *) + 0x4edf6869; (* arm_TRN2 Q9 Q3 Q31 64 128 *) + 0x3c9d001d; (* arm_STR Q29 X0 (Immediate_Offset (word 18446744073709551568)) *) + 0x3cdf004d; (* arm_LDR Q13 X2 (Immediate_Offset (word 18446744073709551600)) *) + 0x3c9f0009; (* arm_STR Q9 X0 (Immediate_Offset (word 18446744073709551600)) *) + 0xd1000484; (* arm_SUB X4 X4 (rvalue (word 1)) *) + 0xb5fff704; (* arm_CBNZ X4 (word 2096864) *) + 0x6f4742b7; (* arm_MLS_VEC Q23 Q21 (Q7 :> LANE_H 0) 16 128 *) + 0x4e68876e; (* arm_ADD_VEC Q14 Q27 Q8 16 128 *) + 0x3cde0041; (* arm_LDR Q1 X2 (Immediate_Offset (word 18446744073709551584)) *) + 0x4e7785d1; (* arm_ADD_VEC Q17 Q14 Q23 16 128 *) + 0x6e7785d7; (* arm_SUB_VEC Q23 Q14 Q23 16 128 *) + 0x4e85698b; (* arm_TRN2 Q11 Q12 Q5 32 128 *) + 0x4e85299b; (* arm_TRN1 Q27 Q12 Q5 32 128 *) + 0x4e976a22; (* arm_TRN2 Q2 Q17 Q23 32 128 *) + 0x3cdb005a; (* arm_LDR Q26 X2 (Immediate_Offset (word 18446744073709551536)) *) + 0x4ecb684e; (* arm_TRN2 Q14 Q2 Q11 64 128 *) + 0x4e972a2f; (* arm_TRN1 Q15 Q17 Q23 32 128 *) + 0x4e7e9dc5; (* arm_MUL_VEC Q5 Q14 Q30 16 128 *) + 0x6e7ab5d7; (* arm_SQRDMULH_VEC Q23 Q14 Q26 16 128 *) + 0x4edb69f1; (* arm_TRN2 Q17 Q15 Q27 64 128 *) + 0x4ecb284e; (* arm_TRN1 Q14 Q2 Q11 64 128 *) + 0x4e7e9e35; (* arm_MUL_VEC Q21 Q17 Q30 16 128 *) + 0x6f4742e5; (* arm_MLS_VEC Q5 Q23 (Q7 :> LANE_H 0) 16 128 *) + 0x6e7ab631; (* arm_SQRDMULH_VEC Q17 Q17 Q26 16 128 *) + 0x3cdd0042; (* arm_LDR Q2 X2 (Immediate_Offset (word 18446744073709551568)) *) + 0x6e6585d7; (* arm_SUB_VEC Q23 Q14 Q5 16 128 *) + 0x4e6585ce; (* arm_ADD_VEC Q14 Q14 Q5 16 128 *) + 0x6f474235; (* arm_MLS_VEC Q21 Q17 (Q7 :> LANE_H 0) 16 128 *) + 0x4e619ee1; (* arm_MUL_VEC Q1 Q23 Q1 16 128 *) + 0x6e6db6f1; (* arm_SQRDMULH_VEC Q17 Q23 Q13 16 128 *) + 0x4e7c9dd7; (* arm_MUL_VEC Q23 Q14 Q28 16 128 *) + 0x6e62b5ce; (* arm_SQRDMULH_VEC Q14 Q14 Q2 16 128 *) + 0x4edb29fc; (* arm_TRN1 Q28 Q15 Q27 64 128 *) + 0x6f474221; (* arm_MLS_VEC Q1 Q17 (Q7 :> LANE_H 0) 16 128 *) + 0x6e75878b; (* arm_SUB_VEC Q11 Q28 Q21 16 128 *) + 0x6f4741d7; (* arm_MLS_VEC Q23 Q14 (Q7 :> LANE_H 0) 16 128 *) + 0x4e758791; (* arm_ADD_VEC Q17 Q28 Q21 16 128 *) + 0x6e61856e; (* arm_SUB_VEC Q14 Q11 Q1 16 128 *) + 0x4e618561; (* arm_ADD_VEC Q1 Q11 Q1 16 128 *) + 0x6e77863c; (* arm_SUB_VEC Q28 Q17 Q23 16 128 *) + 0x4e778622; (* arm_ADD_VEC Q2 Q17 Q23 16 128 *) + 0x4e8e2837; (* arm_TRN1 Q23 Q1 Q14 32 128 *) + 0x4e8e682e; (* arm_TRN2 Q14 Q1 Q14 32 128 *) + 0x4e9c6851; (* arm_TRN2 Q17 Q2 Q28 32 128 *) + 0x4e9c285c; (* arm_TRN1 Q28 Q2 Q28 32 128 *) + 0x4ece6a21; (* arm_TRN2 Q1 Q17 Q14 64 128 *) + 0x4ece2a2e; (* arm_TRN1 Q14 Q17 Q14 64 128 *) + 0x3d800c01; (* arm_STR Q1 X0 (Immediate_Offset (word 48)) *) + 0x4ed76b81; (* arm_TRN2 Q1 Q28 Q23 64 128 *) + 0x3d80040e; (* arm_STR Q14 X0 (Immediate_Offset (word 16)) *) + 0x4ed72b8e; (* arm_TRN1 Q14 Q28 Q23 64 128 *) + 0x3d800801; (* arm_STR Q1 X0 (Immediate_Offset (word 32)) *) + 0x3c84040e; (* arm_STR Q14 X0 (Postimmediate_Offset (word 64)) *) + 0x6d4027e8; (* arm_LDP D8 D9 SP (Immediate_Offset (iword (&0))) *) + 0x6d412fea; (* arm_LDP D10 D11 SP (Immediate_Offset (iword (&16))) *) + 0x6d4237ec; (* arm_LDP D12 D13 SP (Immediate_Offset (iword (&32))) *) + 0x6d433fee; (* arm_LDP D14 D15 SP (Immediate_Offset (iword (&48))) *) + 0x910103ff; (* arm_ADD SP SP (rvalue (word 64)) *) + 0xd65f03c0 (* arm_RET X30 *) +];; + +let MLKEM_NTT_EXEC = ARM_MK_EXEC_RULE mlkem_ntt_mc;; + +(* ------------------------------------------------------------------------- *) +(* Data tables that are assumed in the precondition. *) +(* ------------------------------------------------------------------------- *) + +let ntt_zetas_layer01234 = define + `ntt_zetas_layer01234:int list = + [-- &1600; -- &15749; -- &749; -- &7373; -- &40; -- &394; -- &687; -- &6762; + &630; &6201; -- &1432; -- &14095; &848; &8347; &0; &0; &1062; &10453; &296; + &2914; -- &882; -- &8682; &0; &0; -- &1410; -- &13879; &1339; &13180; &1476; + &14529; &0; &0; &193; &1900; -- &283; -- &2786; &56; &551; &0; &0; &797; + &7845; -- &1089; -- &10719; &1333; &13121; &0; &0; -- &543; -- &5345; + &1426; &14036; -- &1235; -- &12156; &0; &0; -- &69; -- &679; &535; &5266; + -- &447; -- &4400; &0; &0; &569; &5601; -- &936; -- &9213; -- &450; + -- &4429; &0; &0; -- &1583; -- &15582; -- &1355; -- &13338; &821; + &8081; &0; &0]`;; + +let ntt_zetas_layer56 = define +`ntt_zetas_layer56:int list = + [&289; &289; &331; &331; -- &76; -- &76; -- &1573; -- &1573; &2845; + &2845; &3258; &3258; -- &748; -- &748; -- &15483; -- &15483; &17; &17; + &583; &583; &1637; &1637; -- &1041; -- &1041; &167; &167; &5739; + &5739; &16113; &16113; -- &10247; -- &10247; -- &568; -- &568; + -- &680; -- &680; &723; &723; &1100; &1100; -- &5591; -- &5591; -- &6693; + -- &6693; &7117; &7117; &10828; &10828; &1197; &1197; -- &1025; + -- &1025; -- &1052; -- &1052; -- &1274; -- &1274; &11782; &11782; + -- &10089; -- &10089; -- &10355; -- &10355; -- &12540; -- &12540; &1409; + &1409; -- &48; -- &48; &756; &756; -- &314; -- &314; &13869; &13869; + -- &472; -- &472; &7441; &7441; -- &3091; -- &3091; -- &667; -- &667; + &233; &233; -- &1173; -- &1173; -- &279; -- &279; -- &6565; -- &6565; + &2293; &2293; -- &11546; -- &11546; -- &2746; -- &2746; &650; &650; + -- &1352; -- &1352; -- &816; -- &816; &632; &632; &6398; &6398; + -- &13308; -- &13308; -- &8032; -- &8032; &6221; &6221; -- &1626; + -- &1626; -- &540; -- &540; -- &1482; -- &1482; &1461; &1461; -- &16005; + -- &16005; -- &5315; -- &5315; -- &14588; -- &14588; &14381; &14381; + &1651; &1651; -- &1540; -- &1540; &952; &952; -- &642; -- &642; + &16251; &16251; -- &15159; -- &15159; &9371; &9371; -- &6319; + -- &6319; -- &464; -- &464; &33; &33; &1320; &1320; -- &1414; -- &1414; + -- &4567; -- &4567; &325; &325; &12993; &12993; -- &13918; -- &13918; + &939; &939; -- &892; -- &892; &733; &733; &268; &268; &9243; &9243; + -- &8780; -- &8780; &7215; &7215; &2638; &2638; -- &1021; -- &1021; + -- &941; -- &941; -- &992; -- &992; &641; &641; -- &10050; -- &10050; + -- &9262; -- &9262; -- &9764; -- &9764; &6309; &6309; -- &1010; -- &1010; + &1435; &1435; &807; &807; &452; &452; -- &9942; -- &9942; &14125; + &14125; &7943; &7943; &4449; &4449; &1584; &1584; -- &1292; -- &1292; + &375; &375; -- &1239; -- &1239; &15592; &15592; -- &12717; -- &12717; + &3691; &3691; -- &12196; -- &12196; -- &1031; -- &1031; -- &109; + -- &109; -- &780; -- &780; &1645; &1645; -- &10148; -- &10148; -- &1073; + -- &1073; -- &7678; -- &7678; &16192; &16192; &1438; &1438; -- &461; + -- &461; &1534; &1534; -- &927; -- &927; &14155; &14155; -- &4538; + -- &4538; &15099; &15099; -- &9125; -- &9125; &1063; &1063; -- &556; + -- &556; -- &1230; -- &1230; -- &863; -- &863; &10463; &10463; -- &5473; + -- &5473; -- &12107; -- &12107; -- &8495; -- &8495; &319; &319; &757; + &757; &561; &561; -- &735; -- &735; &3140; &3140; &7451; &7451; &5522; + &5522; -- &7235; -- &7235; -- &682; -- &682; -- &712; -- &712; &1481; + &1481; &648; &648; -- &6713; -- &6713; -- &7008; -- &7008; &14578; + &14578; &6378; &6378; -- &525; -- &525; &403; &403; &1143; &1143; + -- &554; -- &554; -- &5168; -- &5168; &3967; &3967; &11251; &11251; + -- &5453; -- &5453; &1092; &1092; &1026; &1026; -- &1179; -- &1179; &886; + &886; &10749; &10749; &10099; &10099; -- &11605; -- &11605; &8721; + &8721; -- &855; -- &855; -- &219; -- &219; &1227; &1227; &910; &910; + -- &8416; -- &8416; -- &2156; -- &2156; &12078; &12078; &8957; &8957; + -- &1607; -- &1607; -- &1455; -- &1455; -- &1219; -- &1219; &885; + &885; -- &15818; -- &15818; -- &14322; -- &14322; -- &11999; + -- &11999; &8711; &8711; &1212; &1212; &1029; &1029; -- &394; -- &394; + -- &1175; -- &1175; &11930; &11930; &10129; &10129; -- &3878; -- &3878; + -- &11566; -- &11566]`;; + +let ntt_constants = define + `ntt_constants z_01234 z_56 s <=> + (!i. i < 80 + ==> read(memory :> bytes16(word_add z_01234 (word(2 * i)))) s = + iword(EL i ntt_zetas_layer01234)) /\ + (!i. i < 384 + ==> read(memory :> bytes16(word_add z_56 (word(2 * i)))) s = + iword(EL i ntt_zetas_layer56))`;; + +(* ------------------------------------------------------------------------- *) +(* Some convenient proof tools. *) +(* ------------------------------------------------------------------------- *) + +let READ_MEMORY_MERGE_CONV = + let baseconv = + GEN_REWRITE_CONV I [READ_MEMORY_BYTESIZED_SPLIT] THENC + LAND_CONV(LAND_CONV(RAND_CONV(RAND_CONV + (TRY_CONV(GEN_REWRITE_CONV I [GSYM WORD_ADD_ASSOC] THENC + RAND_CONV WORD_ADD_CONV))))) in + let rec conv n tm = + if n = 0 then REFL tm else + (baseconv THENC BINOP_CONV (conv(n - 1))) tm in + conv;; + +let READ_MEMORY_SPLIT_CONV = + let baseconv = + GEN_REWRITE_CONV I [READ_MEMORY_BYTESIZED_UNSPLIT] THENC + BINOP_CONV(LAND_CONV(LAND_CONV(RAND_CONV(RAND_CONV + (TRY_CONV(GEN_REWRITE_CONV I [GSYM WORD_ADD_ASSOC] THENC + RAND_CONV WORD_ADD_CONV)))))) in + let rec conv n tm = + if n = 0 then REFL tm else + (baseconv THENC BINOP_CONV (conv(n - 1))) tm in + conv;; + +let SIMD_SIMPLIFY_CONV = + TOP_DEPTH_CONV + (REWR_CONV WORD_SUBWORD_AND ORELSEC WORD_SIMPLE_SUBWORD_CONV) THENC + DEPTH_CONV WORD_NUM_RED_CONV THENC + REWRITE_CONV[GSYM barred; GSYM barmul];; + +let SIMD_SIMPLIFY_TAC = + let simdable = can (term_match [] `read X (s:armstate):int128 = whatever`) in + TRY(FIRST_X_ASSUM + (ASSUME_TAC o + CONV_RULE(RAND_CONV SIMD_SIMPLIFY_CONV) o + check (simdable o concl)));; + +let MEMORY_128_FROM_16_TAC = + let a_tm = `a:int64` and n_tm = `n:num` and i64_ty = `:int64` + and pat = `read (memory :> bytes128(word_add a (word n))) s0` in + fun v n -> + let pat' = subst[mk_var(v,i64_ty),a_tm] pat in + let f i = + let itm = mk_small_numeral(16*i) in + READ_MEMORY_MERGE_CONV 3 (subst[itm,n_tm] pat') in + MP_TAC(end_itlist CONJ (map f (0--(n-1))));; + +(* ------------------------------------------------------------------------- *) +(* Correctness proof. *) +(* ------------------------------------------------------------------------- *) + +let MLKEM_NTT_CORRECT = prove + (`!a z_01234 z_56 x pc. + ALL (nonoverlapping (a,512)) + [(word pc,0x504); (z_01234,160); (z_56,768)] + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc) mlkem_ntt_mc /\ + read PC s = word (pc + 0x14) /\ + C_ARGUMENTS [a; z_01234; z_56] s /\ + ntt_constants z_01234 z_56 s /\ + !i. i < 256 + ==> read(memory :> bytes16(word_add a (word(2 * i)))) s = + x i) + (\s. read PC s = word(pc + 0x4ec) /\ + ((!i. i < 256 ==> abs(ival(x i)) <= &8191) + ==> !i. i < 256 + ==> let zi = + read(memory :> bytes16(word_add a (word(2 * i)))) s in + (ival zi == forward_ntt (ival o x) i) (mod &3329) /\ + abs(ival zi) <= &23594)) + (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [Q8; Q9; Q10; Q11; Q12; Q13; Q14; Q15] ,, + MAYCHANGE [memory :> bytes(a,512)])`, + MAP_EVERY X_GEN_TAC + [`a:int64`; `z_01234:int64`; `z_56:int64`; `x:num->int16`; `pc:num`] THEN + REWRITE_TAC[MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI; C_ARGUMENTS; + NONOVERLAPPING_CLAUSES; ALL] THEN + DISCH_THEN(REPEAT_TCL CONJUNCTS_THEN ASSUME_TAC) THEN + + (*** Manually expand the cases in the hypotheses ***) + + REWRITE_TAC[ntt_constants] THEN + CONV_TAC(RATOR_CONV(LAND_CONV(ONCE_DEPTH_CONV + (EXPAND_CASES_CONV THENC ONCE_DEPTH_CONV NUM_MULT_CONV)))) THEN + REWRITE_TAC[ntt_zetas_layer01234; ntt_zetas_layer56] THEN + CONV_TAC(ONCE_DEPTH_CONV EL_CONV) THEN + CONV_TAC(ONCE_DEPTH_CONV WORD_IWORD_CONV) THEN REWRITE_TAC[WORD_ADD_0] THEN + ENSURES_INIT_TAC "s0" THEN + + (*** Manually restructure to match the 128-bit loads. It would be nicer + *** if the simulation machinery did this automatically. + ***) + + MEMORY_128_FROM_16_TAC "a" 32 THEN + MEMORY_128_FROM_16_TAC "z_01234" 10 THEN + MEMORY_128_FROM_16_TAC "z_56" 48 THEN + ASM_REWRITE_TAC[WORD_ADD_0] THEN CONV_TAC WORD_REDUCE_CONV THEN + DISCARD_MATCHING_ASSUMPTIONS [`read (memory :> bytes16 a) s = x`] THEN + REPEAT STRIP_TAC THEN + + (*** Ghost up initial contents of Q7 since it isn't fully initialized ***) + + ABBREV_TAC `v7_init:int128 = read Q7 s0` THEN + + (*** Simulate all the way to the end, in effect unrolling loops ***) + + MAP_EVERY (fun n -> ARM_STEPS_TAC MLKEM_NTT_EXEC [n] THEN SIMD_SIMPLIFY_TAC) + (1--904) THEN + ENSURES_FINAL_STATE_TAC THEN ASM_REWRITE_TAC[] THEN + + (*** Reverse the restructuring by splitting the 128-bit words up ***) + + REPEAT(FIRST_X_ASSUM(STRIP_ASSUME_TAC o + CONV_RULE SIMD_SIMPLIFY_CONV o + CONV_RULE(READ_MEMORY_SPLIT_CONV 3) o + check (can (term_match [] `read qqq s:int128 = xxx`) o concl))) THEN + + (*** Turn the conclusion into an explicit conjunction and split it up ***) + + DISCH_TAC THEN + CONV_TAC(ONCE_DEPTH_CONV let_CONV) THEN REWRITE_TAC[INT_ABS_BOUNDS] THEN + GEN_REWRITE_TAC (BINDER_CONV o RAND_CONV) [GSYM I_THM] THEN + CONV_TAC(EXPAND_CASES_CONV THENC ONCE_DEPTH_CONV NUM_MULT_CONV) THEN + ASM_REWRITE_TAC[I_THM; WORD_ADD_0] THEN DISCARD_STATE_TAC "s904" THEN + REPEAT(W(fun (asl,w) -> + if length(conjuncts w) > 3 then CONJ_TAC else NO_TAC)) THEN + + (*** Get congruences and bounds for the result digits and finish ***) + + FIRST_X_ASSUM(MP_TAC o CONV_RULE EXPAND_CASES_CONV) THEN + POP_ASSUM_LIST(K ALL_TAC) THEN + DISCH_THEN(fun aboth -> + W(MP_TAC o GEN_CONGBOUND_RULE (CONJUNCTS aboth) o + rand o lhand o rator o lhand o snd)) THEN + (MATCH_MP_TAC MONO_AND THEN CONJ_TAC THENL + [MATCH_MP_TAC(REWRITE_RULE[IMP_CONJ_ALT] INT_CONG_TRANS) THEN + CONV_TAC(ONCE_DEPTH_CONV FORWARD_NTT_CONV) THEN + REWRITE_TAC[GSYM INT_REM_EQ; o_THM] THEN CONV_TAC INT_REM_DOWN_CONV THEN + REWRITE_TAC[INT_REM_EQ] THEN + REWRITE_TAC[REAL_INT_CONGRUENCE; INT_OF_NUM_EQ; ARITH_EQ] THEN + REWRITE_TAC[GSYM REAL_OF_INT_CLAUSES] THEN + CONV_TAC(RAND_CONV REAL_POLY_CONV) THEN REAL_INTEGER_TAC; + MATCH_MP_TAC(INT_ARITH + `l':int <= l /\ u <= u' + ==> l <= x /\ x <= u ==> l' <= x /\ x <= u'`) THEN + CONV_TAC INT_REDUCE_CONV]));; + +(*** Subroutine form, somewhat messy elaboration of the usual wrapper ***) + +let MLKEM_NTT_SUBROUTINE_CORRECT = prove + (`!a z_01234 z_56 x pc stackpointer returnaddress. + aligned 16 stackpointer /\ + ALLPAIRS nonoverlapping + [(a,512); (word_sub stackpointer (word 64),64)] + [(word pc,0x504); (z_01234,160); (z_56,768)] /\ + nonoverlapping (a,512) (word_sub stackpointer (word 64),64) + ==> ensures arm + (\s. aligned_bytes_loaded s (word pc) mlkem_ntt_mc /\ + read PC s = word pc /\ + read SP s = stackpointer /\ + read X30 s = returnaddress /\ + C_ARGUMENTS [a; z_01234; z_56] s /\ + ntt_constants z_01234 z_56 s /\ + !i. i < 256 + ==> read(memory :> bytes16(word_add a (word(2 * i)))) s = + x i) + (\s. read PC s = returnaddress /\ + ((!i. i < 256 ==> abs(ival(x i)) <= &8191) + ==> !i. i < 256 + ==> let zi = + read(memory :> bytes16(word_add a (word(2 * i)))) s in + (ival zi == forward_ntt (ival o x) i) (mod &3329) /\ + abs(ival zi) <= &23594)) + (MAYCHANGE_REGS_AND_FLAGS_PERMITTED_BY_ABI ,, + MAYCHANGE [memory :> bytes(a,512); + memory :> bytes(word_sub stackpointer (word 64),64)])`, + let TWEAK_CONV = + REWRITE_CONV[ntt_constants] THENC + ONCE_DEPTH_CONV let_CONV THENC + ONCE_DEPTH_CONV EXPAND_CASES_CONV THENC + ONCE_DEPTH_CONV NUM_MULT_CONV THENC + PURE_REWRITE_CONV [WORD_ADD_0] in + REWRITE_TAC[fst MLKEM_NTT_EXEC] THEN + CONV_TAC TWEAK_CONV THEN + ARM_ADD_RETURN_STACK_TAC ~pre_post_nsteps:(5,5) MLKEM_NTT_EXEC + (REWRITE_RULE[fst MLKEM_NTT_EXEC] (CONV_RULE TWEAK_CONV MLKEM_NTT_CORRECT)) + `[D8; D9; D10; D11; D12; D13; D14; D15]` 64);;