From 3d511ff63e59f542ae20c722bfef1c867cd1da0e Mon Sep 17 00:00:00 2001
From: Tobias Holenstein <tholenstein@openjdk.org>
Date: Wed, 22 May 2024 08:50:15 +0000
Subject: [PATCH 1/9] 8329748: Change default value of AssertWXAtThreadSync to
 true

Reviewed-by: kvn, rrich
---
 src/hotspot/os/bsd/globals_bsd.hpp              | 2 +-
 src/hotspot/share/jfr/support/jfrIntrinsics.cpp | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/hotspot/os/bsd/globals_bsd.hpp b/src/hotspot/os/bsd/globals_bsd.hpp
index 66fae6a49d239..850d491a11fa4 100644
--- a/src/hotspot/os/bsd/globals_bsd.hpp
+++ b/src/hotspot/os/bsd/globals_bsd.hpp
@@ -35,7 +35,7 @@
                          range,                                         \
                          constraint)                                    \
                                                                         \
-  AARCH64_ONLY(develop(bool, AssertWXAtThreadSync, false,                \
+  AARCH64_ONLY(develop(bool, AssertWXAtThreadSync, true,                \
           "Conservatively check W^X thread state at possible safepoint" \
           "or handshake"))
 
diff --git a/src/hotspot/share/jfr/support/jfrIntrinsics.cpp b/src/hotspot/share/jfr/support/jfrIntrinsics.cpp
index 4b7c6c8aee9ce..63d0e686021f2 100644
--- a/src/hotspot/share/jfr/support/jfrIntrinsics.cpp
+++ b/src/hotspot/share/jfr/support/jfrIntrinsics.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -74,6 +74,7 @@ void* JfrIntrinsicSupport::write_checkpoint(JavaThread* jt) {
 
 void* JfrIntrinsicSupport::return_lease(JavaThread* jt) {
   DEBUG_ONLY(assert_precondition(jt);)
+  MACOS_AARCH64_ONLY(ThreadWXEnable __wx(WXWrite, jt));
   ThreadStateTransition::transition_from_java(jt, _thread_in_native);
   assert(jt->jfr_thread_local()->has_java_event_writer(), "invariant");
   assert(jt->jfr_thread_local()->shelved_buffer() != nullptr, "invariant");

From 8a9d77d58de259b6b2bdc2cc9e7bfdc28dcf7165 Mon Sep 17 00:00:00 2001
From: Fei Gao <fgao@openjdk.org>
Date: Wed, 22 May 2024 11:33:35 +0000
Subject: [PATCH 2/9] 8320622: [TEST] Improve coverage of
 compiler/loopopts/superword/TestMulAddS2I.java on different platforms

Reviewed-by: epeter, kvn
---
 .../compiler/loopopts/superword/TestMulAddS2I.java     | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/TestMulAddS2I.java b/test/hotspot/jtreg/compiler/loopopts/superword/TestMulAddS2I.java
index c65da58b28597..4521d43804b86 100644
--- a/test/hotspot/jtreg/compiler/loopopts/superword/TestMulAddS2I.java
+++ b/test/hotspot/jtreg/compiler/loopopts/superword/TestMulAddS2I.java
@@ -23,7 +23,7 @@
 
 /**
  * @test
- * @bug 8310886 8325252
+ * @bug 8310886 8325252 8320622
  * @summary Test MulAddS2I vectorization.
  * @library /test/lib /
  * @run driver compiler.loopopts.superword.TestMulAddS2I
@@ -68,12 +68,8 @@ public class TestMulAddS2I {
 
 
     public static void main(String[] args) {
-        if (Platform.isX64() || Platform.isX86()) {
-            TestFramework.runWithFlags("-XX:+UseUnalignedLoadStores");
-            TestFramework.runWithFlags("-XX:-UseUnalignedLoadStores");
-        } else {
-            TestFramework.run();
-        }
+        TestFramework.runWithFlags("-XX:+AlignVector");
+        TestFramework.runWithFlags("-XX:-AlignVector");
     }
 
     @Run(test = {"testa", "testb", "testc", "testd", "teste", "testf", "testg", "testh"})

From c3bc23fe48ca1603afe68a6ac4aaa523a1edbb41 Mon Sep 17 00:00:00 2001
From: Robbin Ehn <rehn@openjdk.org>
Date: Wed, 22 May 2024 11:47:54 +0000
Subject: [PATCH 3/9] 8326306: RISC-V: Re-structure MASM calls and jumps

Reviewed-by: fyang, luhenry
---
 src/hotspot/cpu/riscv/assembler_riscv.hpp     | 188 +++++++-------
 .../cpu/riscv/c1_LIRAssembler_riscv.cpp       |  12 +-
 .../shenandoahBarrierSetAssembler_riscv.cpp   |   4 +-
 .../cpu/riscv/jniFastGetField_riscv.cpp       |   2 +-
 .../cpu/riscv/macroAssembler_riscv.cpp        | 230 +++++++++---------
 .../cpu/riscv/macroAssembler_riscv.hpp        |  64 +++--
 src/hotspot/cpu/riscv/nativeInst_riscv.cpp    |   3 +-
 src/hotspot/cpu/riscv/stubGenerator_riscv.cpp |   4 +-
 .../templateInterpreterGenerator_riscv.cpp    |   6 +-
 9 files changed, 257 insertions(+), 256 deletions(-)

diff --git a/src/hotspot/cpu/riscv/assembler_riscv.hpp b/src/hotspot/cpu/riscv/assembler_riscv.hpp
index ff55951bd7d7b..b0249ac3344b7 100644
--- a/src/hotspot/cpu/riscv/assembler_riscv.hpp
+++ b/src/hotspot/cpu/riscv/assembler_riscv.hpp
@@ -656,39 +656,35 @@ class Assembler : public AbstractAssembler {
 
 #undef INSN
 
-#define INSN(NAME, op)                                                                \
-  void NAME(Register Rd, const int32_t offset) {                                      \
-    guarantee(is_simm21(offset) && ((offset % 2) == 0), "offset is invalid.");        \
-    unsigned insn = 0;                                                                \
-    patch((address)&insn, 6, 0, op);                                                  \
-    patch_reg((address)&insn, 7, Rd);                                                 \
-    patch((address)&insn, 19, 12, (uint32_t)((offset >> 12) & 0xff));                 \
-    patch((address)&insn, 20, (uint32_t)((offset >> 11) & 0x1));                      \
-    patch((address)&insn, 30, 21, (uint32_t)((offset >> 1) & 0x3ff));                 \
-    patch((address)&insn, 31, (uint32_t)((offset >> 20) & 0x1));                      \
-    emit(insn);                                                                       \
+ private:
+  // All calls and jumps must go via MASM.
+  // Format J-type
+  void _jal(Register Rd, const int32_t offset) {
+    guarantee(is_simm21(offset) && ((offset % 2) == 0), "offset is invalid.");
+    unsigned insn = 0;
+    patch((address)&insn, 6, 0, 0b1101111);
+    patch_reg((address)&insn, 7, Rd);
+    patch((address)&insn, 19, 12, (uint32_t)((offset >> 12) & 0xff));
+    patch((address)&insn, 20, (uint32_t)((offset >> 11) & 0x1));
+    patch((address)&insn, 30, 21, (uint32_t)((offset >> 1) & 0x3ff));
+    patch((address)&insn, 31, (uint32_t)((offset >> 20) & 0x1));
+    emit(insn);
   }
 
-  INSN(jal, 0b1101111);
-
-#undef INSN
-
-#define INSN(NAME, op, funct)                                                         \
-  void NAME(Register Rd, Register Rs, const int32_t offset) {                         \
-    guarantee(is_simm12(offset), "offset is invalid.");                               \
-    unsigned insn = 0;                                                                \
-    patch((address)&insn, 6, 0, op);                                                  \
-    patch_reg((address)&insn, 7, Rd);                                                 \
-    patch((address)&insn, 14, 12, funct);                                             \
-    patch_reg((address)&insn, 15, Rs);                                                \
-    int32_t val = offset & 0xfff;                                                     \
-    patch((address)&insn, 31, 20, val);                                               \
-    emit(insn);                                                                       \
+  // Format I-type
+  void _jalr(Register Rd, Register Rs, const int32_t offset) {
+    guarantee(is_simm12(offset), "offset is invalid.");
+    unsigned insn = 0;
+    patch((address)&insn, 6, 0, 0b1100111);
+    patch_reg((address)&insn, 7, Rd);
+    patch((address)&insn, 14, 12, 0b000);
+    patch_reg((address)&insn, 15, Rs);
+    int32_t val = offset & 0xfff;
+    patch((address)&insn, 31, 20, val);
+    emit(insn);
   }
 
-  INSN(_jalr, 0b1100111, 0b000);
-
-#undef INSN
+ public:
 
   enum barrier {
     i = 0b1000, o = 0b0100, r = 0b0010, w = 0b0001,
@@ -2294,21 +2290,23 @@ enum Nf {
 
 #undef INSN
 
-#define INSN(NAME, funct4, op)                                                               \
-  void NAME(Register Rs1) {                                                                  \
-    assert_cond(Rs1 != x0);                                                                  \
-    uint16_t insn = 0;                                                                       \
-    c_patch((address)&insn, 1, 0, op);                                                       \
-    c_patch_reg((address)&insn, 2, x0);                                                      \
-    c_patch_reg((address)&insn, 7, Rs1);                                                     \
-    c_patch((address)&insn, 15, 12, funct4);                                                 \
-    emit_int16(insn);                                                                        \
+ private:
+  // All calls and jumps must go via MASM.
+  // Format CR, c.jr/c.jalr
+  // Note C instruction can't be changed, i.e. relocation patching.
+  template <uint8_t InstructionType, uint8_t FunctionType>
+  void c_cr_if(Register Rs1) {
+    assert_cond(Rs1 != x0);
+    uint16_t insn = 0;
+    c_patch((address)&insn, 1, 0, FunctionType);
+    c_patch_reg((address)&insn, 2, x0);
+    c_patch_reg((address)&insn, 7, Rs1);
+    c_patch((address)&insn, 15, 12, InstructionType);
+    emit_int16(insn);
   }
 
-  INSN(c_jr,   0b1000, 0b10);
-  INSN(c_jalr, 0b1001, 0b10);
-
-#undef INSN
+  void c_jr(Register Rs1)   { c_cr_if<0b1000, 0b10>(Rs1); }
+  void c_jalr(Register Rs1) { c_cr_if<0b1001, 0b10>(Rs1); }
 
   typedef void (Assembler::* j_c_insn)(address dest);
   typedef void (Assembler::* compare_and_branch_c_insn)(Register Rs1, address dest);
@@ -2331,35 +2329,36 @@ enum Nf {
     }
   }
 
-#define INSN(NAME, funct3, op)                                                               \
-  void NAME(int32_t offset) {                                                                \
-    assert(is_simm12(offset) && ((offset % 2) == 0), "invalid encoding");                    \
-    uint16_t insn = 0;                                                                       \
-    c_patch((address)&insn, 1, 0, op);                                                       \
-    c_patch((address)&insn, 2, 2, (offset & nth_bit(5)) >> 5);                               \
-    c_patch((address)&insn, 5, 3, (offset & right_n_bits(4)) >> 1);                          \
-    c_patch((address)&insn, 6, 6, (offset & nth_bit(7)) >> 7);                               \
-    c_patch((address)&insn, 7, 7, (offset & nth_bit(6)) >> 6);                               \
-    c_patch((address)&insn, 8, 8, (offset & nth_bit(10)) >> 10);                             \
-    c_patch((address)&insn, 10, 9, (offset & right_n_bits(10)) >> 8);                        \
-    c_patch((address)&insn, 11, 11, (offset & nth_bit(4)) >> 4);                             \
-    c_patch((address)&insn, 12, 12, (offset & nth_bit(11)) >> 11);                           \
-    c_patch((address)&insn, 15, 13, funct3);                                                 \
-    emit_int16(insn);                                                                        \
-  }                                                                                          \
-  void NAME(address dest) {                                                                  \
-    assert_cond(dest != nullptr);                                                            \
-    int64_t distance = dest - pc();                                                          \
-    assert(is_simm12(distance) && ((distance % 2) == 0), "invalid encoding");                \
-    c_j(distance);                                                                           \
-  }                                                                                          \
-  void NAME(Label &L) {                                                                      \
-    wrap_label(L, &Assembler::NAME);                                                         \
+  // Format CJ, c.j (c.jal)
+  // Note C instruction can't be changed, i.e. relocation patching.
+  void c_j(int32_t offset) {
+    assert(is_simm12(offset) && ((offset % 2) == 0), "invalid encoding");
+    uint16_t insn = 0;
+    c_patch((address)&insn, 1, 0, 0b01);
+    c_patch((address)&insn, 2, 2, (offset & nth_bit(5)) >> 5);
+    c_patch((address)&insn, 5, 3, (offset & right_n_bits(4)) >> 1);
+    c_patch((address)&insn, 6, 6, (offset & nth_bit(7)) >> 7);
+    c_patch((address)&insn, 7, 7, (offset & nth_bit(6)) >> 6);
+    c_patch((address)&insn, 8, 8, (offset & nth_bit(10)) >> 10);
+    c_patch((address)&insn, 10, 9, (offset & right_n_bits(10)) >> 8);
+    c_patch((address)&insn, 11, 11, (offset & nth_bit(4)) >> 4);
+    c_patch((address)&insn, 12, 12, (offset & nth_bit(11)) >> 11);
+    c_patch((address)&insn, 15, 13, 0b101);
+    emit_int16(insn);
   }
 
-  INSN(c_j, 0b101, 0b01);
+  void c_j(address dest) {
+    assert_cond(dest != nullptr);
+    int64_t distance = dest - pc();
+    assert(is_simm12(distance) && ((distance % 2) == 0), "invalid encoding");
+    c_j(distance);
+  }
 
-#undef INSN
+  void c_j(Label &L) {
+    wrap_label(L, &Assembler::c_j);
+  }
+
+  public:
 
 #define INSN(NAME, funct3, op)                                                               \
   void NAME(Register Rs1, int32_t imm) {                                                     \
@@ -2812,24 +2811,35 @@ enum Nf {
 // --------------------------
 // Unconditional branch instructions
 // --------------------------
-#define INSN(NAME)                                                                           \
-  void NAME(Register Rd, Register Rs, const int32_t offset) {                                \
-    /* jalr -> c.jr/c.jalr */                                                                \
-    if (do_compress() && (offset == 0 && Rs != x0)) {                                        \
-      if (Rd == x1) {                                                                        \
-        c_jalr(Rs);                                                                          \
-        return;                                                                              \
-      } else if (Rd == x0) {                                                                 \
-        c_jr(Rs);                                                                            \
-        return;                                                                              \
-      }                                                                                      \
-    }                                                                                        \
-    _jalr(Rd, Rs, offset);                                                                   \
+ protected:
+  // All calls and jumps must go via MASM.
+  void jalr(Register Rd, Register Rs, const int32_t offset) {
+    /* jalr -> c.jr/c.jalr */
+    if (do_compress() && (offset == 0 && Rs != x0)) {
+      if (Rd == x1) {
+        c_jalr(Rs);
+        return;
+      } else if (Rd == x0) {
+        c_jr(Rs);
+        return;
+      }
+    }
+    _jalr(Rd, Rs, offset);
   }
 
-  INSN(jalr);
+  void jal(Register Rd, const int32_t offset) {
+    /* jal -> c.j, note c.jal is RV32C only */
+    if (do_compress() &&
+        Rd == x0 &&
+        is_simm12(offset) && ((offset % 2) == 0)) {
+      c_j(offset);
+      return;
+    }
 
-#undef INSN
+    _jal(Rd, offset);
+  }
+
+  public:
 
 // --------------------------
 // Miscellaneous Instructions
@@ -3009,18 +3019,6 @@ enum Nf {
 
 #undef INSN
 
-// ---------------------------------------------------------------------------------------
-
-#define INSN(NAME, REGISTER)                       \
-  void NAME(Register Rs) {                         \
-    jalr(REGISTER, Rs, 0);                         \
-  }
-
-  INSN(jr,   x0);
-  INSN(jalr, x1);
-
-#undef INSN
-
 // --------------  ZCB Instruction Definitions  --------------
 // Zcb additional C instructions
  private:
diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
index 5d0fa3fad3cec..a15405f532302 100644
--- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp
@@ -1841,17 +1841,7 @@ void LIR_Assembler::leal(LIR_Opr addr, LIR_Opr dest, LIR_PatchCode patch_code, C
 void LIR_Assembler::rt_call(LIR_Opr result, address dest, const LIR_OprList* args, LIR_Opr tmp, CodeEmitInfo* info) {
   assert(!tmp->is_valid(), "don't need temporary");
 
-  CodeBlob *cb = CodeCache::find_blob(dest);
-  if (cb != nullptr) {
-    __ far_call(RuntimeAddress(dest));
-  } else {
-    RuntimeAddress target(dest);
-    __ relocate(target.rspec(), [&] {
-      int32_t offset;
-      __ movptr(t0, target.target(), offset);
-      __ jalr(x1, t0, offset);
-    });
-  }
+  __ rt_call(dest);
 
   if (info != nullptr) {
     add_call_info_here(info);
diff --git a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
index 26d60441c2d2c..a93bf5394ce94 100644
--- a/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/gc/shenandoah/shenandoahBarrierSetAssembler_riscv.cpp
@@ -300,7 +300,7 @@ void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm,
     assert(!is_narrow, "phantom access cannot be narrow");
     target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak);
   }
-  __ call(target);
+  __ rt_call(target);
   __ mv(t0, x10);
   __ pop_call_clobbered_registers();
   __ mv(x10, t0);
@@ -703,7 +703,7 @@ void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_s
     assert(is_native, "phantom must only be called off-heap");
     target = CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom);
   }
-  __ call(target);
+  __ rt_call(target);
   __ mv(t0, x10);
   __ pop_call_clobbered_registers();
   __ mv(x10, t0);
diff --git a/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp
index fd1701c8188b8..8423ecad8a3da 100644
--- a/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp
+++ b/src/hotspot/cpu/riscv/jniFastGetField_riscv.cpp
@@ -177,7 +177,7 @@ address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) {
     __ relocate(target.rspec(), [&] {
       int32_t offset;
       __ la(t0, target.target(), offset);
-      __ jalr(x1, t0, offset);
+      __ jalr(t0, offset);
     });
     __ leave();
     __ ret();
diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
index b450d93680f3b..bfa68a88f5735 100644
--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp
@@ -339,7 +339,7 @@ void MacroAssembler::call_VM_base(Register oop_result,
     relocate(target.rspec(), [&] {
       int32_t offset;
       la(t0, target.target(), offset);
-      jalr(x0, t0, offset);
+      jr(t0, offset);
     });
     bind(ok);
   }
@@ -641,14 +641,16 @@ void MacroAssembler::emit_static_call_stub() {
   // Jump to the entry point of the c2i stub.
   int32_t offset = 0;
   movptr(t0, 0, offset);
-  jalr(x0, t0, offset);
+  jr(t0, offset);
 }
 
 void MacroAssembler::call_VM_leaf_base(address entry_point,
                                        int number_of_arguments,
                                        Label *retaddr) {
+  int32_t offset = 0;
   push_reg(RegSet::of(t0, xmethod), sp);   // push << t0 & xmethod >> to sp
-  call(entry_point);
+  mv(t0, entry_point, offset);
+  jalr(t0, offset);
   if (retaddr != nullptr) {
     bind(*retaddr);
   }
@@ -716,33 +718,19 @@ void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Reg
 }
 
 void MacroAssembler::la(Register Rd, const address addr) {
-  int64_t offset = addr - pc();
-  if (is_valid_32bit_offset(offset)) {
-    auipc(Rd, (int32_t)offset + 0x800);  //0x800, Note:the 11th sign bit
-    addi(Rd, Rd, ((int64_t)offset << 52) >> 52);
-  } else {
-    movptr(Rd, addr);
-  }
+  int32_t offset;
+  la(Rd, addr, offset);
+  addi(Rd, Rd, offset);
 }
 
 void MacroAssembler::la(Register Rd, const address addr, int32_t &offset) {
-  assert((uintptr_t)addr < (1ull << 48), "bad address");
-
-  unsigned long target_address = (uintptr_t)addr;
-  unsigned long low_address = (uintptr_t)CodeCache::low_bound();
-  unsigned long high_address = (uintptr_t)CodeCache::high_bound();
-  long offset_low = target_address - low_address;
-  long offset_high = target_address - high_address;
-
-  // RISC-V doesn't compute a page-aligned address, in order to partially
-  // compensate for the use of *signed* offsets in its base+disp12
-  // addressing mode (RISC-V's PC-relative reach remains asymmetric
-  // [-(2G + 2K), 2G - 2K).
-  if (offset_high >= -((1L << 31) + (1L << 11)) && offset_low < (1L << 31) - (1L << 11)) {
+  if (is_32bit_offset_from_codecache((int64_t)addr)) {
     int64_t distance = addr - pc();
+    assert(is_valid_32bit_offset(distance), "Must be");
     auipc(Rd, (int32_t)distance + 0x800);
     offset = ((int32_t)distance << 20) >> 20;
   } else {
+    assert(!CodeCache::contains(addr), "Must be");
     movptr(Rd, addr, offset);
   }
 }
@@ -859,88 +847,113 @@ void MacroAssembler::li(Register Rd, int64_t imm) {
   }
 }
 
-#define INSN(NAME, REGISTER)                                       \
-  void MacroAssembler::NAME(const address dest, Register temp) {   \
-    assert_cond(dest != nullptr);                                  \
-    int64_t distance = dest - pc();                                \
-    if (is_simm21(distance) && ((distance % 2) == 0)) {            \
-      Assembler::jal(REGISTER, distance);                          \
-    } else {                                                       \
-      assert(temp != noreg, "expecting a register");               \
-      int32_t offset = 0;                                          \
-      movptr(temp, dest, offset);                                  \
-      Assembler::jalr(REGISTER, temp, offset);                     \
-    }                                                              \
-  }                                                                \
-
-  INSN(j,   x0);
-  INSN(jal, x1);
-
-#undef INSN
+void MacroAssembler::jump_link(const address dest, Register temp) {
+  assert_cond(dest != nullptr);
+  int64_t distance = dest - pc();
+  if (is_simm21(distance) && ((distance % 2) == 0)) {
+    Assembler::jal(x1, distance);
+  } else {
+    assert(temp != noreg && temp != x0, "expecting a register");
+    int32_t offset = 0;
+    la(temp, dest, offset);
+    jalr(temp, offset);
+  }
+}
 
-#define INSN(NAME, REGISTER)                                       \
-  void MacroAssembler::NAME(const Address &adr, Register temp) {   \
-    switch (adr.getMode()) {                                       \
-      case Address::literal: {                                     \
-        relocate(adr.rspec(), [&] {                                \
-          NAME(adr.target(), temp);                                \
-        });                                                        \
-        break;                                                     \
-      }                                                            \
-      case Address::base_plus_offset: {                            \
-        int32_t offset = ((int32_t)adr.offset() << 20) >> 20;      \
-        la(temp, Address(adr.base(), adr.offset() - offset));      \
-        Assembler::jalr(REGISTER, temp, offset);                   \
-        break;                                                     \
-      }                                                            \
-      default:                                                     \
-        ShouldNotReachHere();                                      \
-    }                                                              \
-  }
-
-  INSN(j,   x0);
-  INSN(jal, x1);
+void MacroAssembler::jump_link(const Address &adr, Register temp) {
+  switch (adr.getMode()) {
+    case Address::literal: {
+      relocate(adr.rspec(), [&] {
+        jump_link(adr.target(), temp);
+      });
+      break;
+    }
+    case Address::base_plus_offset: {
+      int32_t offset = ((int32_t)adr.offset() << 20) >> 20;
+      la(temp, Address(adr.base(), adr.offset() - offset));
+      jalr(temp, offset);
+      break;
+    }
+    default:
+      ShouldNotReachHere();
+  }
+}
 
-#undef INSN
+void MacroAssembler::j(const address dest, Register temp) {
+  assert(CodeCache::contains(dest), "Must be");
+  assert_cond(dest != nullptr);
+  int64_t distance = dest - pc();
 
-#define INSN(NAME)                                                                    \
-  void MacroAssembler::NAME(Register Rd, const address dest, Register temp) {         \
-    assert_cond(dest != nullptr);                                                     \
-    int64_t distance = dest - pc();                                                   \
-    if (is_simm21(distance) && ((distance % 2) == 0)) {                               \
-      Assembler::NAME(Rd, distance);                                                  \
-    } else {                                                                          \
-      assert_different_registers(Rd, temp);                                           \
-      int32_t offset = 0;                                                             \
-      movptr(temp, dest, offset);                                                     \
-      jalr(Rd, temp, offset);                                                         \
-    }                                                                                 \
-  }                                                                                   \
-  void MacroAssembler::NAME(Register Rd, Label &L, Register temp) {                   \
-    assert_different_registers(Rd, temp);                                             \
-    wrap_label(Rd, L, temp, &MacroAssembler::NAME);                                   \
+  // We can't patch C, i.e. if Label wasn't bound we need to patch this jump.
+  IncompressibleRegion ir(this);
+  if (is_simm21(distance) && ((distance % 2) == 0)) {
+    Assembler::jal(x0, distance);
+  } else {
+    assert(temp != noreg && temp != x0, "expecting a register");
+    int32_t offset = 0;
+    la(temp, dest, offset);
+    jr(temp, offset);
   }
+}
 
-  INSN(jal);
+void MacroAssembler::j(const Address &adr, Register temp) {
+  switch (adr.getMode()) {
+    case Address::literal: {
+      relocate(adr.rspec(), [&] {
+        j(adr.target(), temp);
+      });
+      break;
+    }
+    case Address::base_plus_offset: {
+      int32_t offset = ((int32_t)adr.offset() << 20) >> 20;
+      la(temp, Address(adr.base(), adr.offset() - offset));
+      jr(temp, offset);
+      break;
+    }
+    default:
+      ShouldNotReachHere();
+  }
+}
 
-#undef INSN
+void MacroAssembler::j(Label &lab, Register temp) {
+  assert_different_registers(x0, temp);
+  if (lab.is_bound()) {
+    MacroAssembler::j(target(lab), temp);
+  } else {
+    lab.add_patch_at(code(), locator());
+    MacroAssembler::j(pc(), temp);
+  }
+}
 
-#define INSN(NAME, REGISTER)                                       \
-  void MacroAssembler::NAME(Label &l, Register temp) {             \
-    jal(REGISTER, l, temp);                                        \
-  }                                                                \
+void MacroAssembler::jr(Register Rd, int32_t offset) {
+  assert(Rd != noreg, "expecting a register");
+  Assembler::jalr(x0, Rd, offset);
+}
 
-  INSN(j,   x0);
-  INSN(jal, x1);
+void MacroAssembler::call(const address dest, Register temp) {
+  assert_cond(dest != nullptr);
+  assert(temp != noreg, "expecting a register");
+  int32_t offset = 0;
+  la(temp, dest, offset);
+  jalr(temp, offset);
+}
 
-#undef INSN
+void MacroAssembler::jalr(Register Rs, int32_t offset) {
+  assert(Rs != noreg, "expecting a register");
+  Assembler::jalr(x1, Rs, offset);
+}
 
-void MacroAssembler::wrap_label(Register Rt, Label &L, Register tmp, load_insn_by_temp insn) {
-  if (L.is_bound()) {
-    (this->*insn)(Rt, target(L), tmp);
+void MacroAssembler::rt_call(address dest, Register tmp) {
+  CodeBlob *cb = CodeCache::find_blob(dest);
+  RuntimeAddress target(dest);
+  if (cb) {
+    far_call(target, tmp);
   } else {
-    L.add_patch_at(code(), locator());
-    (this->*insn)(Rt, pc(), tmp);
+    relocate(target.rspec(), [&] {
+      int32_t offset;
+      la(tmp, target.target(), offset);
+      jalr(tmp, offset);
+    });
   }
 }
 
@@ -3169,7 +3182,6 @@ void MacroAssembler::atomic_cas(
 }
 
 void MacroAssembler::far_jump(const Address &entry, Register tmp) {
-  assert(ReservedCodeCacheSize < 4*G, "branch out of range");
   assert(CodeCache::find_blob(entry.target()) != nullptr,
          "destination of far call not found in code cache");
   assert(entry.rspec().type() == relocInfo::external_word_type
@@ -3179,12 +3191,11 @@ void MacroAssembler::far_jump(const Address &entry, Register tmp) {
   relocate(entry.rspec(), [&] {
     int32_t offset;
     la(tmp, entry.target(), offset);
-    jalr(x0, tmp, offset);
+    jr(tmp, offset);
   });
 }
 
 void MacroAssembler::far_call(const Address &entry, Register tmp) {
-  assert(ReservedCodeCacheSize < 4*G, "branch out of range");
   assert(CodeCache::find_blob(entry.target()) != nullptr,
          "destination of far call not found in code cache");
   assert(entry.rspec().type() == relocInfo::external_word_type
@@ -3194,9 +3205,8 @@ void MacroAssembler::far_call(const Address &entry, Register tmp) {
   // We can use auipc + jalr here because we know that the total size of
   // the code cache cannot exceed 2Gb.
   relocate(entry.rspec(), [&] {
-    int32_t offset;
-    la(tmp, entry.target(), offset);
-    jalr(x1, tmp, offset); // link
+    assert(is_valid_32bit_offset(entry.target() - pc()), "Far call using wrong instructions.");
+    call(entry.target(), tmp);
   });
 }
 
@@ -3452,7 +3462,7 @@ void MacroAssembler::reserved_stack_check() {
     relocate(target.rspec(), [&] {
       int32_t offset;
       movptr(t0, target.target(), offset);
-      jalr(x0, t0, offset);
+      jr(t0, offset);
     });
     should_not_reach_here();
 
@@ -3534,7 +3544,7 @@ address MacroAssembler::trampoline_call(Address entry) {
   }
 #endif
   relocate(entry.rspec(), [&] {
-    jal(target);
+    jump_link(target, t0);
   });
 
   postcond(pc() != badAddress);
@@ -4373,7 +4383,7 @@ address MacroAssembler::zero_words(Register ptr, Register cnt) {
         return nullptr;
       }
     } else {
-      jal(zero_blocks);
+      jump_link(zero_blocks, t0);
     }
   }
   bind(around);
@@ -5018,20 +5028,6 @@ void MacroAssembler::double_move(VMRegPair src, VMRegPair dst, Register tmp) {
   }
 }
 
-void MacroAssembler::rt_call(address dest, Register tmp) {
-  CodeBlob *cb = CodeCache::find_blob(dest);
-  RuntimeAddress target(dest);
-  if (cb) {
-    far_call(target, tmp);
-  } else {
-    relocate(target.rspec(), [&] {
-      int32_t offset;
-      movptr(tmp, target.target(), offset);
-      jalr(x1, tmp, offset);
-    });
-  }
-}
-
 void MacroAssembler::test_bit(Register Rd, Register Rs, uint32_t bit_pos) {
   assert(bit_pos < 64, "invalid bit range");
   if (UseZbs) {
diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
index 7c77edd8711ce..35a242e47fd06 100644
--- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
+++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp
@@ -592,14 +592,40 @@ class MacroAssembler: public Assembler {
   void bltz(Register Rs, const address dest);
   void bgtz(Register Rs, const address dest);
 
-  void j(Label &l, Register temp = t0);
+ private:
+  void jump_link(const address dest, Register temp);
+  void jump_link(const Address &adr, Register temp);
+ public:
+  // We try to follow risc-v asm menomics.
+  // But as we don't layout a reachable GOT,
+  // we often need to resort to movptr, li <48imm>.
+  // https://github.com/riscv-non-isa/riscv-asm-manual/blob/master/riscv-asm.md
+
+  // jump: jal x0, offset
+  // For long reach uses temp register for:
+  // la + jr
   void j(const address dest, Register temp = t0);
   void j(const Address &adr, Register temp = t0);
-  void jal(Label &l, Register temp = t0);
-  void jal(const address dest, Register temp = t0);
-  void jal(const Address &adr, Register temp = t0);
-  void jal(Register Rd, Label &L, Register temp = t0);
-  void jal(Register Rd, const address dest, Register temp = t0);
+  void j(Label &l, Register temp = t0);
+
+  // jump register: jalr x0, offset(rs)
+  void jr(Register Rd, int32_t offset = 0);
+
+  // call: la + jalr x1
+  void call(const address dest, Register temp = t0);
+
+  // jalr: jalr x1, offset(rs)
+  void jalr(Register Rs, int32_t offset = 0);
+
+  // Emit a runtime call. Only invalidates the tmp register which
+  // is used to keep the entry address for jalr/movptr.
+  // Uses call() for intra code cache, else movptr + jalr.
+  void rt_call(address dest, Register tmp = t0);
+
+  // ret: jalr x0, 0(x1)
+  inline void ret() {
+    Assembler::jalr(x0, x1, 0);
+  }
 
   //label
   void beqz(Register Rs, Label &l, bool is_far = false);
@@ -689,6 +715,14 @@ class MacroAssembler: public Assembler {
     return x < (twoG - twoK) && x >= (-twoG - twoK);
   }
 
+  // Ensure that the auipc can reach the destination at x from anywhere within
+  // the code cache so that if it is relocated we know it will still reach.
+  bool is_32bit_offset_from_codecache(int64_t x) {
+    int64_t low  = (int64_t)CodeCache::low_bound();
+    int64_t high = (int64_t)CodeCache::high_bound();
+    return is_valid_32bit_offset(x - low) && is_valid_32bit_offset(x - high);
+  }
+
 public:
   void push_reg(Register Rs);
   void pop_reg(Register Rd);
@@ -733,14 +767,13 @@ class MacroAssembler: public Assembler {
   typedef void (MacroAssembler::* compare_and_branch_insn)(Register Rs1, Register Rs2, const address dest);
   typedef void (MacroAssembler::* compare_and_branch_label_insn)(Register Rs1, Register Rs2, Label &L, bool is_far);
   typedef void (MacroAssembler::* jal_jalr_insn)(Register Rt, address dest);
-  typedef void (MacroAssembler::* load_insn_by_temp)(Register Rt, address dest, Register temp);
 
-  void wrap_label(Register r, Label &L, Register t, load_insn_by_temp insn);
   void wrap_label(Register r, Label &L, jal_jalr_insn insn);
   void wrap_label(Register r1, Register r2, Label &L,
                   compare_and_branch_insn insn,
                   compare_and_branch_label_insn neg_insn, bool is_far = false);
 
+  // la will use movptr instead of GOT when not in reach for auipc.
   void la(Register Rd, Label &label);
   void la(Register Rd, const address addr);
   void la(Register Rd, const address addr, int32_t &offset);
@@ -1469,21 +1502,6 @@ class MacroAssembler: public Assembler {
                    VMRegPair dst,
                    bool is_receiver,
                    int* receiver_offset);
-  // Emit a runtime call. Only invalidates the tmp register which
-  // is used to keep the entry address for jalr/movptr.
-  void rt_call(address dest, Register tmp = t0);
-
-  void call(const address dest, Register temp = t0) {
-    assert_cond(dest != nullptr);
-    assert(temp != noreg, "expecting a register");
-    int32_t offset = 0;
-    mv(temp, dest, offset);
-    jalr(x1, temp, offset);
-  }
-
-  inline void ret() {
-    jalr(x0, x1, 0);
-  }
 
 #ifdef ASSERT
   // Template short-hand support to clean-up after a failed call to trampoline
diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
index c4048f66e0d3e..399de3a2805d2 100644
--- a/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
+++ b/src/hotspot/cpu/riscv/nativeInst_riscv.cpp
@@ -400,7 +400,7 @@ void NativeGeneralJump::insert_unconditional(address code_pos, address entry) {
 
   int32_t offset = 0;
   a.movptr(t0, entry, offset); // lui, addi, slli, addi, slli
-  a.jalr(x0, t0, offset); // jalr
+  a.jr(t0, offset); // jalr
 
   ICache::invalidate_range(code_pos, instruction_size);
 }
@@ -410,7 +410,6 @@ void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer)
   ShouldNotCallThis();
 }
 
-
 address NativeCallTrampolineStub::destination(nmethod *nm) const {
   return ptr_at(data_offset);
 }
diff --git a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
index 582538a18b496..52fc126a459b7 100644
--- a/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
+++ b/src/hotspot/cpu/riscv/stubGenerator_riscv.cpp
@@ -652,7 +652,7 @@ class StubGenerator: public StubCodeGenerator {
     assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
 #endif
     BLOCK_COMMENT("call MacroAssembler::debug");
-    __ call(CAST_FROM_FN_PTR(address, MacroAssembler::debug64));
+    __ rt_call(CAST_FROM_FN_PTR(address, MacroAssembler::debug64));
     __ ebreak();
 
     return start;
@@ -5450,7 +5450,7 @@ static const int64_t right_3_bits = right_n_bits(3);
     }
     __ mv(c_rarg0, xthread);
     BLOCK_COMMENT("call runtime_entry");
-    __ call(runtime_entry);
+    __ rt_call(runtime_entry);
 
     // Generate oop map
     OopMap* map = new OopMap(framesize, 0);
diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
index 07aad47173c29..769e4dc5ccc78 100644
--- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
+++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp
@@ -1206,7 +1206,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
     // hand.
     //
     __ mv(c_rarg0, xthread);
-    __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans));
+    __ rt_call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans));
     __ get_method(xmethod);
     __ reinit_heapbase();
     __ bind(Continue);
@@ -1255,7 +1255,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
 
     __ push_call_clobbered_registers();
     __ mv(c_rarg0, xthread);
-    __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
+    __ rt_call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
     __ pop_call_clobbered_registers();
     __ bind(no_reguard);
   }
@@ -1815,7 +1815,7 @@ void TemplateInterpreterGenerator::trace_bytecode(Template* t) {
   // the tosca in-state for the given template.
 
   assert(Interpreter::trace_code(t->tos_in()) != nullptr, "entry must have been generated");
-  __ jal(Interpreter::trace_code(t->tos_in()));
+  __ call(Interpreter::trace_code(t->tos_in()));
   __ reinit_heapbase();
 }
 

From 4f1a10f84bcfadef263a0890b6834ccd3d5bb52f Mon Sep 17 00:00:00 2001
From: Coleen Phillimore <coleenp@openjdk.org>
Date: Wed, 22 May 2024 12:08:33 +0000
Subject: [PATCH 4/9] 8332360: JVM hangs at exit when running on a uniprocessor

Reviewed-by: dholmes, dcubed, shade
---
 src/hotspot/share/runtime/objectMonitor.cpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/hotspot/share/runtime/objectMonitor.cpp b/src/hotspot/share/runtime/objectMonitor.cpp
index d281acdf00699..c80c52e4ba3cd 100644
--- a/src/hotspot/share/runtime/objectMonitor.cpp
+++ b/src/hotspot/share/runtime/objectMonitor.cpp
@@ -987,6 +987,13 @@ void ObjectMonitor::ReenterI(JavaThread* current, ObjectWaiter* currentNode) {
     guarantee(v == ObjectWaiter::TS_ENTER || v == ObjectWaiter::TS_CXQ, "invariant");
     assert(owner_raw() != current, "invariant");
 
+    // This thread has been notified so try to reacquire the lock.
+    if (TryLock(current) == TryLockResult::Success) {
+      break;
+    }
+
+    // If that fails, spin again.  Note that spin count may be zero so the above TryLock
+    // is necessary.
     if (TrySpin(current)) {
         break;
     }

From 92d33501e091bdfaab52886078053b849a5a8f68 Mon Sep 17 00:00:00 2001
From: Ivan Walulya <iwalulya@openjdk.org>
Date: Wed, 22 May 2024 13:45:37 +0000
Subject: [PATCH 5/9] 8331920: ubsan: g1CardSetContainers.inline.hpp:266:5:
 runtime error: index 2 out of bounds for type 'G1CardSetHowl::ContainerPtr
 [2]' reported

Reviewed-by: tschatzl, aboldtch
---
 src/hotspot/share/gc/g1/g1CardSet.cpp         |  2 +-
 .../share/gc/g1/g1CardSetContainers.hpp       | 16 +++++++----
 .../gc/g1/g1CardSetContainers.inline.hpp      | 28 +++++++++++++++----
 3 files changed, 34 insertions(+), 12 deletions(-)

diff --git a/src/hotspot/share/gc/g1/g1CardSet.cpp b/src/hotspot/share/gc/g1/g1CardSet.cpp
index 101822dbc4428..3202859907cb8 100644
--- a/src/hotspot/share/gc/g1/g1CardSet.cpp
+++ b/src/hotspot/share/gc/g1/g1CardSet.cpp
@@ -533,7 +533,7 @@ G1AddCardResult G1CardSet::add_to_howl(ContainerPtr parent_container,
   ContainerPtr container;
 
   uint bucket = _config->howl_bucket_index(card_in_region);
-  ContainerPtr volatile* bucket_entry = howl->get_container_addr(bucket);
+  ContainerPtr volatile* bucket_entry = howl->container_addr(bucket);
 
   while (true) {
     if (Atomic::load(&howl->_num_entries) >= _config->cards_in_howl_threshold()) {
diff --git a/src/hotspot/share/gc/g1/g1CardSetContainers.hpp b/src/hotspot/share/gc/g1/g1CardSetContainers.hpp
index 16ec6b59a6d0d..261b7e5b20a22 100644
--- a/src/hotspot/share/gc/g1/g1CardSetContainers.hpp
+++ b/src/hotspot/share/gc/g1/g1CardSetContainers.hpp
@@ -238,23 +238,27 @@ class G1CardSetHowl : public G1CardSetContainer {
   using ContainerPtr = G1CardSet::ContainerPtr;
   EntryCountType volatile _num_entries;
 private:
-  ContainerPtr _buckets[2];
-  // Do not add class member variables beyond this point
+  // VLA implementation.
+  ContainerPtr _buckets[1];
+  // Do not add class member variables beyond this point.
 
   // Iterates over the given ContainerPtr with at index in this Howl card set,
   // applying a CardOrRangeVisitor on it.
   template <class CardOrRangeVisitor>
   void iterate_cardset(ContainerPtr const container, uint index, CardOrRangeVisitor& found, G1CardSetConfiguration* config);
 
+  ContainerPtr at(EntryCountType index) const;
+
+  ContainerPtr const* buckets() const;
+
 public:
   G1CardSetHowl(EntryCountType card_in_region, G1CardSetConfiguration* config);
 
-  ContainerPtr* get_container_addr(EntryCountType index) {
-    return &_buckets[index];
-  }
+  ContainerPtr const* container_addr(EntryCountType index) const;
 
-  bool contains(uint card_idx, G1CardSetConfiguration* config);
+  ContainerPtr* container_addr(EntryCountType index);
 
+  bool contains(uint card_idx, G1CardSetConfiguration* config);
   // Iterates over all ContainerPtrs in this Howl card set, applying a CardOrRangeVisitor
   // on it.
   template <class CardOrRangeVisitor>
diff --git a/src/hotspot/share/gc/g1/g1CardSetContainers.inline.hpp b/src/hotspot/share/gc/g1/g1CardSetContainers.inline.hpp
index 35abc09230622..330e9d6360048 100644
--- a/src/hotspot/share/gc/g1/g1CardSetContainers.inline.hpp
+++ b/src/hotspot/share/gc/g1/g1CardSetContainers.inline.hpp
@@ -257,15 +257,33 @@ inline size_t G1CardSetBitMap::header_size_in_bytes() {
     return offset_of(G1CardSetBitMap, _bits);
 }
 
+inline G1CardSetHowl::ContainerPtr const* G1CardSetHowl::container_addr(EntryCountType index) const {
+  assert(index < _num_entries, "precondition");
+  return buckets() + index;
+}
+
+inline G1CardSetHowl::ContainerPtr* G1CardSetHowl::container_addr(EntryCountType index) {
+  return const_cast<ContainerPtr*>(const_cast<const G1CardSetHowl*>(this)->container_addr(index));
+}
+
+inline G1CardSetHowl::ContainerPtr G1CardSetHowl::at(EntryCountType index) const {
+  return *container_addr(index);
+}
+
+inline G1CardSetHowl::ContainerPtr const* G1CardSetHowl::buckets() const {
+  const void* ptr = reinterpret_cast<const char*>(this) + header_size_in_bytes();
+  return reinterpret_cast<ContainerPtr const*>(ptr);
+}
+
 inline G1CardSetHowl::G1CardSetHowl(EntryCountType card_in_region, G1CardSetConfiguration* config) :
   G1CardSetContainer(),
   _num_entries((config->max_cards_in_array() + 1)) /* Card Transfer will not increment _num_entries */ {
   EntryCountType num_buckets = config->num_buckets_in_howl();
   EntryCountType bucket = config->howl_bucket_index(card_in_region);
   for (uint i = 0; i < num_buckets; ++i) {
-    _buckets[i] = G1CardSetInlinePtr();
+    *container_addr(i) = G1CardSetInlinePtr();
     if (i == bucket) {
-      G1CardSetInlinePtr value(&_buckets[i], _buckets[i]);
+      G1CardSetInlinePtr value(container_addr(i), at(i));
       value.add(card_in_region, config->inline_ptr_bits_per_card(), config->max_cards_in_inline_ptr());
     }
   }
@@ -273,7 +291,7 @@ inline G1CardSetHowl::G1CardSetHowl(EntryCountType card_in_region, G1CardSetConf
 
 inline bool G1CardSetHowl::contains(uint card_idx, G1CardSetConfiguration* config) {
   EntryCountType bucket = config->howl_bucket_index(card_idx);
-  ContainerPtr* array_entry = get_container_addr(bucket);
+  ContainerPtr* array_entry = container_addr(bucket);
   ContainerPtr container = Atomic::load_acquire(array_entry);
 
   switch (G1CardSet::container_type(container)) {
@@ -299,14 +317,14 @@ inline bool G1CardSetHowl::contains(uint card_idx, G1CardSetConfiguration* confi
 template <class CardOrRangeVisitor>
 inline void G1CardSetHowl::iterate(CardOrRangeVisitor& found, G1CardSetConfiguration* config) {
   for (uint i = 0; i < config->num_buckets_in_howl(); ++i) {
-    iterate_cardset(_buckets[i], i, found, config);
+    iterate_cardset(at(i), i, found, config);
   }
 }
 
 template <class ContainerPtrVisitor>
 inline void G1CardSetHowl::iterate(ContainerPtrVisitor& found, uint num_card_sets) {
   for (uint i = 0; i < num_card_sets; ++i) {
-    found(&_buckets[i]);
+    found(container_addr(i));
   }
 }
 

From 9ca90ccd6bfec76e54e2e870bd706fad5abf233c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Johan=20Sj=C3=B6len?= <jsjolen@openjdk.org>
Date: Wed, 22 May 2024 13:52:51 +0000
Subject: [PATCH 6/9] 8332610: Remove unused nWakeups in ObjectMonitor

Reviewed-by: coleenp, dcubed
---
 src/hotspot/share/runtime/objectMonitor.cpp | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/src/hotspot/share/runtime/objectMonitor.cpp b/src/hotspot/share/runtime/objectMonitor.cpp
index c80c52e4ba3cd..178f3e97d7108 100644
--- a/src/hotspot/share/runtime/objectMonitor.cpp
+++ b/src/hotspot/share/runtime/objectMonitor.cpp
@@ -829,7 +829,6 @@ void ObjectMonitor::EnterI(JavaThread* current) {
   // to defer the state transitions until absolutely necessary,
   // and in doing so avoid some transitions ...
 
-  int nWakeups = 0;
   int recheckInterval = 1;
 
   for (;;) {
@@ -872,15 +871,14 @@ void ObjectMonitor::EnterI(JavaThread* current) {
     }
 
     // The lock is still contested.
+
     // Keep a tally of the # of futile wakeups.
     // Note that the counter is not protected by a lock or updated by atomics.
     // That is by design - we trade "lossy" counters which are exposed to
     // races during updates for a lower probe effect.
-
     // This PerfData object can be used in parallel with a safepoint.
     // See the work around in PerfDataManager::destroy().
     OM_PERFDATA_OP(FutileWakeups, inc());
-    ++nWakeups;
 
     // Assuming this is not a spurious wakeup we'll normally find _succ == current.
     // We can defer clearing _succ until after the spin completes
@@ -981,7 +979,6 @@ void ObjectMonitor::ReenterI(JavaThread* current, ObjectWaiter* currentNode) {
 
   assert(current->thread_state() != _thread_blocked, "invariant");
 
-  int nWakeups = 0;
   for (;;) {
     ObjectWaiter::TStates v = currentNode->TState;
     guarantee(v == ObjectWaiter::TS_ENTER || v == ObjectWaiter::TS_CXQ, "invariant");
@@ -1018,11 +1015,6 @@ void ObjectMonitor::ReenterI(JavaThread* current, ObjectWaiter* currentNode) {
     }
 
     // The lock is still contested.
-    // Keep a tally of the # of futile wakeups.
-    // Note that the counter is not protected by a lock or updated by atomics.
-    // That is by design - we trade "lossy" counters which are exposed to
-    // races during updates for a lower probe effect.
-    ++nWakeups;
 
     // Assuming this is not a spurious wakeup we'll normally
     // find that _succ == current.
@@ -1032,6 +1024,10 @@ void ObjectMonitor::ReenterI(JavaThread* current, ObjectWaiter* currentNode) {
     // *must* retry  _owner before parking.
     OrderAccess::fence();
 
+    // Keep a tally of the # of futile wakeups.
+    // Note that the counter is not protected by a lock or updated by atomics.
+    // That is by design - we trade "lossy" counters which are exposed to
+    // races during updates for a lower probe effect.
     // This PerfData object can be used in parallel with a safepoint.
     // See the work around in PerfDataManager::destroy().
     OM_PERFDATA_OP(FutileWakeups, inc());

From afed7d0b0593864e5595840a6b645c210ff28c7c Mon Sep 17 00:00:00 2001
From: Volodymyr Paprotski <volodymyr.paprotski@intel.com>
Date: Wed, 22 May 2024 16:27:27 +0000
Subject: [PATCH 7/9] 8329538: Accelerate P256 on x86_64 using Montgomery
 intrinsic

Reviewed-by: ihse, ascarpino, sviswanathan
---
 .../classes/build/tools/intpoly/FieldGen.java |  12 +-
 make/test/BuildMicrobenchmark.gmk             |   2 +
 src/hotspot/cpu/x86/macroAssembler_x86.hpp    |   3 +
 src/hotspot/cpu/x86/stubGenerator_x86_64.cpp  |   5 +
 src/hotspot/cpu/x86/stubGenerator_x86_64.hpp  |   3 +
 ....cpp => stubGenerator_x86_64_poly1305.cpp} |   0
 .../x86/stubGenerator_x86_64_poly_mont.cpp    | 376 ++++++++++++
 src/hotspot/cpu/x86/vm_version_x86.cpp        |  12 +
 src/hotspot/share/classfile/vmIntrinsics.cpp  |   4 +
 src/hotspot/share/classfile/vmIntrinsics.hpp  |  13 +-
 .../gc/shenandoah/c2/shenandoahSupport.cpp    |   6 +
 src/hotspot/share/jvmci/vmStructs_jvmci.cpp   |   2 +
 src/hotspot/share/opto/c2compiler.cpp         |   2 +
 src/hotspot/share/opto/escape.cpp             |   2 +
 src/hotspot/share/opto/library_call.cpp       |  68 ++-
 src/hotspot/share/opto/library_call.hpp       |   2 +
 src/hotspot/share/opto/runtime.cpp            |  39 ++
 src/hotspot/share/opto/runtime.hpp            |   2 +
 src/hotspot/share/runtime/globals.hpp         |   2 +
 src/hotspot/share/runtime/stubRoutines.cpp    |   2 +
 src/hotspot/share/runtime/stubRoutines.hpp    |   4 +
 .../sun/security/ec/ECDHKeyAgreement.java     |   6 +
 .../sun/security/ec/ECDSAOperations.java      |   4 +-
 .../classes/sun/security/ec/ECOperations.java | 507 ++++++++--------
 .../sun/security/ec/point/AffinePoint.java    |  45 +-
 .../security/ec/point/ProjectivePoint.java    |  10 +-
 .../math/IntegerMontgomeryFieldModuloP.java   |  40 ++
 .../util/math/intpoly/IntegerPolynomial.java  |  95 +--
 .../math/intpoly/IntegerPolynomial1305.java   |   8 +-
 .../intpoly/IntegerPolynomialModBinP.java     |   9 +-
 .../MontgomeryIntegerPolynomialP256.java      | 560 ++++++++++++++++++
 .../sun/security/ec/ECOperationsFuzzTest.java | 171 ++++++
 .../sun/security/ec/ECOperationsKATTest.java  | 253 ++++++++
 .../math/intpoly/IntegerPolynomialTest.java   |  95 +++
 .../intpoly/MontgomeryPolynomialFuzzTest.java | 100 ++++
 .../crypto/full/PolynomialP256Bench.java      | 105 ++++
 36 files changed, 2253 insertions(+), 316 deletions(-)
 rename src/hotspot/cpu/x86/{stubGenerator_x86_64_poly.cpp => stubGenerator_x86_64_poly1305.cpp} (100%)
 create mode 100644 src/hotspot/cpu/x86/stubGenerator_x86_64_poly_mont.cpp
 create mode 100644 src/java.base/share/classes/sun/security/util/math/IntegerMontgomeryFieldModuloP.java
 create mode 100644 src/java.base/share/classes/sun/security/util/math/intpoly/MontgomeryIntegerPolynomialP256.java
 create mode 100644 test/jdk/com/sun/security/ec/ECOperationsFuzzTest.java
 create mode 100644 test/jdk/com/sun/security/ec/ECOperationsKATTest.java
 create mode 100644 test/jdk/com/sun/security/util/math/intpoly/IntegerPolynomialTest.java
 create mode 100644 test/jdk/com/sun/security/util/math/intpoly/MontgomeryPolynomialFuzzTest.java
 create mode 100644 test/micro/org/openjdk/bench/javax/crypto/full/PolynomialP256Bench.java

diff --git a/make/jdk/src/classes/build/tools/intpoly/FieldGen.java b/make/jdk/src/classes/build/tools/intpoly/FieldGen.java
index a65150ac421b5..234f5cfce0d26 100644
--- a/make/jdk/src/classes/build/tools/intpoly/FieldGen.java
+++ b/make/jdk/src/classes/build/tools/intpoly/FieldGen.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -778,7 +778,7 @@ private String generate(FieldParams params) throws IOException {
         result.appendLine("}");
 
         result.appendLine("@Override");
-        result.appendLine("protected void mult(long[] a, long[] b, long[] r) {");
+        result.appendLine("protected int mult(long[] a, long[] b, long[] r) {");
         result.incrIndent();
         for (int i = 0; i < 2 * params.getNumLimbs() - 1; i++) {
             result.appendIndent();
@@ -804,6 +804,9 @@ private String generate(FieldParams params) throws IOException {
             }
         }
         result.append(");\n");
+        result.appendIndent();
+        result.append("return 0;");
+        result.appendLine();
         result.decrIndent();
         result.appendLine("}");
 
@@ -833,7 +836,7 @@ private String generate(FieldParams params) throws IOException {
         //      }
         //  }
         result.appendLine("@Override");
-        result.appendLine("protected void square(long[] a, long[] r) {");
+        result.appendLine("protected int square(long[] a, long[] r) {");
         result.incrIndent();
         for (int i = 0; i < 2 * params.getNumLimbs() - 1; i++) {
             result.appendIndent();
@@ -874,6 +877,9 @@ private String generate(FieldParams params) throws IOException {
             }
         }
         result.append(");\n");
+        result.appendIndent();
+        result.append("return 0;");
+        result.appendLine();
         result.decrIndent();
         result.appendLine("}");
 
diff --git a/make/test/BuildMicrobenchmark.gmk b/make/test/BuildMicrobenchmark.gmk
index 4a6232afbe0b9..7b65e89610e04 100644
--- a/make/test/BuildMicrobenchmark.gmk
+++ b/make/test/BuildMicrobenchmark.gmk
@@ -109,6 +109,8 @@ $(eval $(call SetupJavaCompilation, BUILD_JDK_MICROBENCHMARK, \
         --add-exports java.base/jdk.internal.vm=ALL-UNNAMED \
         --add-exports java.base/sun.invoke.util=ALL-UNNAMED \
         --add-exports java.base/sun.security.util=ALL-UNNAMED \
+        --add-exports java.base/sun.security.util.math=ALL-UNNAMED \
+        --add-exports java.base/sun.security.util.math.intpoly=ALL-UNNAMED \
         --enable-preview \
         -XDsuppressNotes \
         -processor org.openjdk.jmh.generators.BenchmarkProcessor, \
diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp
index e045572a5cdbb..c69c8c0d447b5 100644
--- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp
@@ -1549,6 +1549,8 @@ class MacroAssembler: public Assembler {
       Assembler::evpsrlvd(dst, mask, nds, src, merge, vector_len);
     }
   }
+
+  using Assembler::evpsrlq;
   void evpsrlq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) {
     if (!is_varshift) {
       Assembler::evpsrlq(dst, mask, nds, src, merge, vector_len);
@@ -1570,6 +1572,7 @@ class MacroAssembler: public Assembler {
       Assembler::evpsravd(dst, mask, nds, src, merge, vector_len);
     }
   }
+  using Assembler::evpsraq;
   void evpsraq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len, bool is_varshift) {
     if (!is_varshift) {
       Assembler::evpsraq(dst, mask, nds, src, merge, vector_len);
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
index 63226a560f4df..3f2865e7465e4 100644
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
@@ -4255,6 +4255,11 @@ void StubGenerator::generate_compiler_stubs() {
     StubRoutines::_poly1305_processBlocks = generate_poly1305_processBlocks();
   }
 
+  if (UseIntPolyIntrinsics) {
+    StubRoutines::_intpoly_montgomeryMult_P256 = generate_intpoly_montgomeryMult_P256();
+    StubRoutines::_intpoly_assign = generate_intpoly_assign();
+  }
+
   if (UseMD5Intrinsics) {
     StubRoutines::_md5_implCompress = generate_md5_implCompress(false, "md5_implCompress");
     StubRoutines::_md5_implCompressMB = generate_md5_implCompress(true, "md5_implCompressMB");
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp b/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp
index 725932b9e0310..02435bd172c47 100644
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.hpp
@@ -483,6 +483,9 @@ class StubGenerator: public StubCodeGenerator {
                                const XMMRegister P2L, const XMMRegister P2H,
                                const XMMRegister YTMP1, const Register rscratch);
 
+  address generate_intpoly_montgomeryMult_P256();
+  address generate_intpoly_assign();
+
   // BASE64 stubs
 
   address base64_shuffle_addr();
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_poly.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_poly1305.cpp
similarity index 100%
rename from src/hotspot/cpu/x86/stubGenerator_x86_64_poly.cpp
rename to src/hotspot/cpu/x86/stubGenerator_x86_64_poly1305.cpp
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64_poly_mont.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64_poly_mont.cpp
new file mode 100644
index 0000000000000..25ee68072492c
--- /dev/null
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64_poly_mont.cpp
@@ -0,0 +1,376 @@
+/*
+ * Copyright (c) 2024, Intel Corporation. All rights reserved.
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "macroAssembler_x86.hpp"
+#include "stubGenerator_x86_64.hpp"
+
+#define __ _masm->
+
+ATTRIBUTE_ALIGNED(64) uint64_t MODULUS_P256[] = {
+  0x000fffffffffffffULL, 0x00000fffffffffffULL,
+  0x0000000000000000ULL, 0x0000001000000000ULL,
+  0x0000ffffffff0000ULL, 0x0000000000000000ULL,
+  0x0000000000000000ULL, 0x0000000000000000ULL
+};
+static address modulus_p256() {
+  return (address)MODULUS_P256;
+}
+
+ATTRIBUTE_ALIGNED(64) uint64_t P256_MASK52[] = {
+  0x000fffffffffffffULL, 0x000fffffffffffffULL,
+  0x000fffffffffffffULL, 0x000fffffffffffffULL,
+  0xffffffffffffffffULL, 0xffffffffffffffffULL,
+  0xffffffffffffffffULL, 0xffffffffffffffffULL,
+};
+static address p256_mask52() {
+  return (address)P256_MASK52;
+}
+
+ATTRIBUTE_ALIGNED(64) uint64_t SHIFT1R[] = {
+  0x0000000000000001ULL, 0x0000000000000002ULL,
+  0x0000000000000003ULL, 0x0000000000000004ULL,
+  0x0000000000000005ULL, 0x0000000000000006ULL,
+  0x0000000000000007ULL, 0x0000000000000000ULL,
+};
+static address shift_1R() {
+  return (address)SHIFT1R;
+}
+
+ATTRIBUTE_ALIGNED(64) uint64_t SHIFT1L[] = {
+  0x0000000000000007ULL, 0x0000000000000000ULL,
+  0x0000000000000001ULL, 0x0000000000000002ULL,
+  0x0000000000000003ULL, 0x0000000000000004ULL,
+  0x0000000000000005ULL, 0x0000000000000006ULL,
+};
+static address shift_1L() {
+  return (address)SHIFT1L;
+}
+
+/**
+ * Unrolled Word-by-Word Montgomery Multiplication
+ * r = a * b * 2^-260 (mod P)
+ *
+ * Reference [1]: Shay Gueron and Vlad Krasnov
+ *    "Fast Prime Field Elliptic Curve Cryptography with 256 Bit Primes"
+ *    See Figure 5. "Algorithm 2: Word-by-Word Montgomery Multiplication for a Montgomery
+ *    Friendly modulus p". Note: Step 6. Skipped; Instead use numAdds to reuse existing overflow
+ *    logic.
+ *
+ * Pseudocode:
+ *
+ *                                                     +--+--+--+--+--+--+--+--+
+ *   M = load(*modulus_p256)                           | 0| 0| 0|m5|m4|m3|m2|m1|
+ *                                                     +--+--+--+--+--+--+--+--+
+ *   A = load(*aLimbs)                                 | 0| 0| 0|a5|a4|a3|a2|a1|
+ *                                                     +--+--+--+--+--+--+--+--+
+ *   Acc1 = 0                                          | 0| 0| 0| 0| 0| 0| 0| 0|
+ *                                                     +--+--+--+--+--+--+--+--+
+ *      ---- for i = 0 to 4
+ *                                                     +--+--+--+--+--+--+--+--+
+ *          Acc2 = 0                                   | 0| 0| 0| 0| 0| 0| 0| 0|
+ *                                                     +--+--+--+--+--+--+--+--+
+ *          B = replicate(bLimbs[i])                   |bi|bi|bi|bi|bi|bi|bi|bi|
+ *                                                     +--+--+--+--+--+--+--+--+
+ *                                                     +--+--+--+--+--+--+--+--+
+ *                                               Acc1+=| 0| 0| 0|c5|c4|c3|c2|c1|
+ *                                                    *| 0| 0| 0|a5|a4|a3|a2|a1|
+ *          Acc1 += A *  B                             |bi|bi|bi|bi|bi|bi|bi|bi|
+ *                                                     +--+--+--+--+--+--+--+--+
+ *                                               Acc2+=| 0| 0| 0| 0| 0| 0| 0| 0|
+ *                                                   *h| 0| 0| 0|a5|a4|a3|a2|a1|
+ *          Acc2 += A *h B                             |bi|bi|bi|bi|bi|bi|bi|bi|
+ *                                                     +--+--+--+--+--+--+--+--+
+ *          N = replicate(Acc1[0])                     |n0|n0|n0|n0|n0|n0|n0|n0|
+ *                                                     +--+--+--+--+--+--+--+--+
+ *                                                     +--+--+--+--+--+--+--+--+
+ *                                               Acc1+=| 0| 0| 0|c5|c4|c3|c2|c1|
+ *                                                    *| 0| 0| 0|m5|m4|m3|m2|m1|
+ *          Acc1 += M *  N                             |n0|n0|n0|n0|n0|n0|n0|n0| Note: 52 low bits of Acc1[0] == 0 due to Montgomery!
+ *                                                     +--+--+--+--+--+--+--+--+
+ *                                               Acc2+=| 0| 0| 0|d5|d4|d3|d2|d1|
+ *                                                   *h| 0| 0| 0|m5|m4|m3|m2|m1|
+ *          Acc2 += M *h N                             |n0|n0|n0|n0|n0|n0|n0|n0|
+ *                                                     +--+--+--+--+--+--+--+--+
+ *          if (i == 4) break;
+ *          // Combine high/low partial sums Acc1 + Acc2
+ *                                                     +--+--+--+--+--+--+--+--+
+ *          carry = Acc1[0] >> 52                      | 0| 0| 0| 0| 0| 0| 0|c1|
+ *                                                     +--+--+--+--+--+--+--+--+
+ *          Acc2[0] += carry
+ *                                                     +--+--+--+--+--+--+--+--+
+ *          Acc1 = Acc1 shift one q element>>          | 0| 0| 0| 0|c5|c4|c3|c2|
+ *                                                     +--+--+--+--+--+--+--+--+
+ *          Acc1 = Acc1 + Acc2
+ *      ---- done
+ *   // Last Carry round: Combine high/low partial sums Acc1<high_bits> + Acc1 + Acc2
+ *   carry = Acc1 >> 52
+ *   Acc1 = Acc1 shift one q element >>
+ *   Acc1  = mask52(Acc1)
+ *   Acc2  += carry
+ *   Acc1 = Acc1 + Acc2
+ *   output to rLimbs
+ */
+void montgomeryMultiply(const Register aLimbs, const Register bLimbs, const Register rLimbs, const Register tmp, MacroAssembler* _masm) {
+  Register t0 = tmp;
+  Register rscratch = tmp;
+
+  // Inputs
+  XMMRegister A = xmm0;
+  XMMRegister B = xmm1;
+  XMMRegister T = xmm2;
+
+  // Intermediates
+  XMMRegister Acc1 = xmm10;
+  XMMRegister Acc2 = xmm11;
+  XMMRegister N    = xmm12;
+  XMMRegister carry = xmm13;
+
+  // // Constants
+  XMMRegister modulus = xmm20;
+  XMMRegister shift1L = xmm21;
+  XMMRegister shift1R = xmm22;
+  XMMRegister mask52  = xmm23;
+  KRegister limb0    = k1;
+  KRegister allLimbs = k2;
+
+  __ mov64(t0, 0x1);
+  __ kmovql(limb0, t0);
+  __ mov64(t0, 0x1f);
+  __ kmovql(allLimbs, t0);
+  __ evmovdquq(shift1L, allLimbs, ExternalAddress(shift_1L()), false, Assembler::AVX_512bit, rscratch);
+  __ evmovdquq(shift1R, allLimbs, ExternalAddress(shift_1R()), false, Assembler::AVX_512bit, rscratch);
+  __ evmovdquq(mask52, allLimbs, ExternalAddress(p256_mask52()), false, Assembler::AVX_512bit, rscratch);
+
+  // M = load(*modulus_p256)
+  __ evmovdquq(modulus, allLimbs, ExternalAddress(modulus_p256()), false, Assembler::AVX_512bit, rscratch);
+
+  // A = load(*aLimbs);  masked evmovdquq() can be slow. Instead load full 256bit, and compbine with 64bit
+  __ evmovdquq(A, Address(aLimbs, 8), Assembler::AVX_256bit);
+  __ evpermq(A, allLimbs, shift1L, A, false, Assembler::AVX_512bit);
+  __ movq(T, Address(aLimbs, 0));
+  __ evporq(A, A, T, Assembler::AVX_512bit);
+
+  // Acc1 = 0
+  __ vpxorq(Acc1, Acc1, Acc1, Assembler::AVX_512bit);
+  for (int i = 0; i< 5; i++) {
+      // Acc2 = 0
+      __ vpxorq(Acc2, Acc2, Acc2, Assembler::AVX_512bit);
+
+      // B = replicate(bLimbs[i])
+      __ vpbroadcastq(B, Address(bLimbs, i*8), Assembler::AVX_512bit);
+
+      // Acc1 += A * B
+      __ evpmadd52luq(Acc1, A, B, Assembler::AVX_512bit);
+
+      // Acc2 += A *h B
+      __ evpmadd52huq(Acc2, A, B, Assembler::AVX_512bit);
+
+      // N = replicate(Acc1[0])
+      __ vpbroadcastq(N, Acc1, Assembler::AVX_512bit);
+
+      // Acc1 += M *  N
+      __ evpmadd52luq(Acc1, modulus, N, Assembler::AVX_512bit);
+
+      // Acc2 += M *h N
+      __ evpmadd52huq(Acc2, modulus, N, Assembler::AVX_512bit);
+
+      if (i == 4) break;
+
+      // Combine high/low partial sums Acc1 + Acc2
+
+      // carry = Acc1[0] >> 52
+      __ evpsrlq(carry, limb0, Acc1, 52, true, Assembler::AVX_512bit);
+
+      // Acc2[0] += carry
+      __ evpaddq(Acc2, limb0, carry, Acc2, true, Assembler::AVX_512bit);
+
+      // Acc1 = Acc1 shift one q element >>
+      __ evpermq(Acc1, allLimbs, shift1R, Acc1, false, Assembler::AVX_512bit);
+
+      // Acc1 = Acc1 + Acc2
+      __ vpaddq(Acc1, Acc1, Acc2, Assembler::AVX_512bit);
+  }
+
+  // Last Carry round: Combine high/low partial sums Acc1<high_bits> + Acc1 + Acc2
+  // carry = Acc1 >> 52
+  __ evpsrlq(carry, allLimbs, Acc1, 52, true, Assembler::AVX_512bit);
+
+  // Acc1 = Acc1 shift one q element >>
+  __ evpermq(Acc1, allLimbs, shift1R, Acc1, false, Assembler::AVX_512bit);
+
+  // Acc1  = mask52(Acc1)
+  __ evpandq(Acc1, Acc1, mask52, Assembler::AVX_512bit); // Clear top 12 bits
+
+  // Acc2 += carry
+  __ evpaddq(Acc2, allLimbs, carry, Acc2, true, Assembler::AVX_512bit);
+
+  // Acc1 = Acc1 + Acc2
+  __ vpaddq(Acc1, Acc1, Acc2, Assembler::AVX_512bit);
+
+  // output to rLimbs (1 + 4 limbs)
+  __ movq(Address(rLimbs, 0), Acc1);
+  __ evpermq(Acc1, k0, shift1R, Acc1, true, Assembler::AVX_512bit);
+  __ evmovdquq(Address(rLimbs, 8), k0, Acc1, true, Assembler::AVX_256bit);
+}
+
+address StubGenerator::generate_intpoly_montgomeryMult_P256() {
+  __ align(CodeEntryAlignment);
+  StubCodeMark mark(this, "StubRoutines", "intpoly_montgomeryMult_P256");
+  address start = __ pc();
+  __ enter();
+
+  // Register Map
+  const Register aLimbs  = c_rarg0; // rdi | rcx
+  const Register bLimbs  = c_rarg1; // rsi | rdx
+  const Register rLimbs  = c_rarg2; // rdx | r8
+  const Register tmp     = r9;
+
+  montgomeryMultiply(aLimbs, bLimbs, rLimbs, tmp, _masm);
+  __ mov64(rax, 0x1); // Return 1 (Fig. 5, Step 6 [1] skipped in montgomeryMultiply)
+
+  __ leave();
+  __ ret(0);
+  return start;
+}
+
+// A = B if select
+// Must be:
+//  - constant time (i.e. no branches)
+//  - no-side channel (i.e. all memory must always be accessed, and in same order)
+void assign_avx(XMMRegister A, Address aAddr, XMMRegister B, Address bAddr, KRegister select, int vector_len, MacroAssembler* _masm) {
+  __ evmovdquq(A, aAddr, vector_len);
+  __ evmovdquq(B, bAddr, vector_len);
+  __ evmovdquq(A, select, B, true, vector_len);
+  __ evmovdquq(aAddr, A, vector_len);
+}
+
+void assign_scalar(Address aAddr, Address bAddr, Register select, Register tmp, MacroAssembler* _masm) {
+  // Original java:
+  // long dummyLimbs = maskValue & (a[i] ^ b[i]);
+  // a[i] = dummyLimbs ^ a[i];
+
+  __ movq(tmp, aAddr);
+  __ xorq(tmp, bAddr);
+  __ andq(tmp, select);
+  __ xorq(aAddr, tmp);
+}
+
+address StubGenerator::generate_intpoly_assign() {
+  // KNOWN Lengths:
+  //   MontgomeryIntPolynP256:  5 = 4 + 1
+  //   IntegerPolynomial1305:   5 = 4 + 1
+  //   IntegerPolynomial25519: 10 = 8 + 2
+  //   IntegerPolynomialP256:  10 = 8 + 2
+  //   Curve25519OrderField:   10 = 8 + 2
+  //   Curve25519OrderField:   10 = 8 + 2
+  //   P256OrderField:         10 = 8 + 2
+  //   IntegerPolynomialP384:  14 = 8 + 4 + 2
+  //   P384OrderField:         14 = 8 + 4 + 2
+  //   IntegerPolynomial448:   16 = 8 + 8
+  //   Curve448OrderField:     16 = 8 + 8
+  //   Curve448OrderField:     16 = 8 + 8
+  //   IntegerPolynomialP521:  19 = 8 + 8 + 2 + 1
+  //   P521OrderField:         19 = 8 + 8 + 2 + 1
+  // Special Cases 5, 10, 14, 16, 19
+
+  __ align(CodeEntryAlignment);
+  StubCodeMark mark(this, "StubRoutines", "intpoly_assign");
+  address start = __ pc();
+  __ enter();
+
+  // Inputs
+  const Register set     = c_rarg0;
+  const Register aLimbs  = c_rarg1;
+  const Register bLimbs  = c_rarg2;
+  const Register length  = c_rarg3;
+  XMMRegister A = xmm0;
+  XMMRegister B = xmm1;
+
+  Register tmp = r9;
+  KRegister select = k1;
+  Label L_Length5, L_Length10, L_Length14, L_Length16, L_Length19, L_DefaultLoop, L_Done;
+
+  __ negq(set);
+  __ kmovql(select, set);
+
+  // NOTE! Crypto code cannot branch on user input. However; allowed to branch on number of limbs;
+  // Number of limbs is a constant in each IntegerPolynomial (i.e. this side-channel branch leaks
+  //   number of limbs which is not a secret)
+  __ cmpl(length, 5);
+  __ jcc(Assembler::equal, L_Length5);
+  __ cmpl(length, 10);
+  __ jcc(Assembler::equal, L_Length10);
+  __ cmpl(length, 14);
+  __ jcc(Assembler::equal, L_Length14);
+  __ cmpl(length, 16);
+  __ jcc(Assembler::equal, L_Length16);
+  __ cmpl(length, 19);
+  __ jcc(Assembler::equal, L_Length19);
+
+  // Default copy loop (UNLIKELY)
+  __ cmpl(length, 0);
+  __ jcc(Assembler::lessEqual, L_Done);
+  __ bind(L_DefaultLoop);
+  assign_scalar(Address(aLimbs, 0), Address(bLimbs, 0), set, tmp, _masm);
+  __ subl(length, 1);
+  __ lea(aLimbs, Address(aLimbs,8));
+  __ lea(bLimbs, Address(bLimbs,8));
+  __ cmpl(length, 0);
+  __ jcc(Assembler::greater, L_DefaultLoop);
+  __ jmp(L_Done);
+
+  __ bind(L_Length5); // 1 + 4
+  assign_scalar(Address(aLimbs, 0), Address(bLimbs, 0), set, tmp, _masm);
+  assign_avx(A, Address(aLimbs, 8), B, Address(bLimbs, 8), select, Assembler::AVX_256bit, _masm);
+  __ jmp(L_Done);
+
+  __ bind(L_Length10); // 2 + 8
+  assign_avx(A, Address(aLimbs, 0),  B, Address(bLimbs, 0),  select, Assembler::AVX_128bit, _masm);
+  assign_avx(A, Address(aLimbs, 16), B, Address(bLimbs, 16), select, Assembler::AVX_512bit, _masm);
+  __ jmp(L_Done);
+
+  __ bind(L_Length14); // 2 + 4 + 8
+  assign_avx(A, Address(aLimbs, 0),  B, Address(bLimbs, 0),  select, Assembler::AVX_128bit, _masm);
+  assign_avx(A, Address(aLimbs, 16), B, Address(bLimbs, 16), select, Assembler::AVX_256bit, _masm);
+  assign_avx(A, Address(aLimbs, 48), B, Address(bLimbs, 48), select, Assembler::AVX_512bit, _masm);
+  __ jmp(L_Done);
+
+  __ bind(L_Length16); // 8 + 8
+  assign_avx(A, Address(aLimbs, 0),  B, Address(bLimbs, 0),  select, Assembler::AVX_512bit, _masm);
+  assign_avx(A, Address(aLimbs, 64), B, Address(bLimbs, 64), select, Assembler::AVX_512bit, _masm);
+  __ jmp(L_Done);
+
+  __ bind(L_Length19); // 1 + 2 + 8 + 8
+  assign_scalar(Address(aLimbs, 0), Address(bLimbs, 0), set, tmp, _masm);
+  assign_avx(A, Address(aLimbs, 8),  B, Address(bLimbs, 8),  select, Assembler::AVX_128bit, _masm);
+  assign_avx(A, Address(aLimbs, 24), B, Address(bLimbs, 24), select, Assembler::AVX_512bit, _masm);
+  assign_avx(A, Address(aLimbs, 88), B, Address(bLimbs, 88), select, Assembler::AVX_512bit, _masm);
+
+  __ bind(L_Done);
+  __ leave();
+  __ ret(0);
+  return start;
+}
diff --git a/src/hotspot/cpu/x86/vm_version_x86.cpp b/src/hotspot/cpu/x86/vm_version_x86.cpp
index 0bffb1aee13f8..fbc952fc8d17a 100644
--- a/src/hotspot/cpu/x86/vm_version_x86.cpp
+++ b/src/hotspot/cpu/x86/vm_version_x86.cpp
@@ -1366,6 +1366,18 @@ void VM_Version::get_processor_features() {
     FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false);
   }
 
+#ifdef _LP64
+  if (supports_avx512ifma() && supports_avx512vlbw()) {
+    if (FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) {
+      FLAG_SET_DEFAULT(UseIntPolyIntrinsics, true);
+    }
+  } else
+#endif
+  if (UseIntPolyIntrinsics) {
+    warning("Intrinsics for Polynomial crypto functions not available on this CPU.");
+    FLAG_SET_DEFAULT(UseIntPolyIntrinsics, false);
+  }
+
 #ifdef _LP64
   if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
     UseMultiplyToLenIntrinsic = true;
diff --git a/src/hotspot/share/classfile/vmIntrinsics.cpp b/src/hotspot/share/classfile/vmIntrinsics.cpp
index 8d4f57165e151..e60495d1f47fd 100644
--- a/src/hotspot/share/classfile/vmIntrinsics.cpp
+++ b/src/hotspot/share/classfile/vmIntrinsics.cpp
@@ -492,6 +492,10 @@ bool vmIntrinsics::disabled_by_jvm_flags(vmIntrinsics::ID id) {
   case vmIntrinsics::_poly1305_processBlocks:
     if (!UsePoly1305Intrinsics) return true;
     break;
+  case vmIntrinsics::_intpoly_montgomeryMult_P256:
+  case vmIntrinsics::_intpoly_assign:
+    if (!UseIntPolyIntrinsics) return true;
+    break;
   case vmIntrinsics::_updateBytesCRC32C:
   case vmIntrinsics::_updateDirectByteBufferCRC32C:
     if (!UseCRC32CIntrinsics) return true;
diff --git a/src/hotspot/share/classfile/vmIntrinsics.hpp b/src/hotspot/share/classfile/vmIntrinsics.hpp
index a0db1a65d3a46..b8d8c40cc47a4 100644
--- a/src/hotspot/share/classfile/vmIntrinsics.hpp
+++ b/src/hotspot/share/classfile/vmIntrinsics.hpp
@@ -526,7 +526,18 @@ class methodHandle;
   do_intrinsic(_digestBase_implCompressMB, sun_security_provider_digestbase, implCompressMB_name, countPositives_signature, F_R)   \
    do_name(     implCompressMB_name,                               "implCompressMultiBlock0")                           \
                                                                                                                         \
-   /* support for java.util.Base64.Encoder*/                                                                            \
+  /* support for sun.security.util.math.intpoly.MontgomeryIntegerPolynomialP256 */                                      \
+  do_class(sun_security_util_math_intpoly_MontgomeryIntegerPolynomialP256, "sun/security/util/math/intpoly/MontgomeryIntegerPolynomialP256")  \
+  do_intrinsic(_intpoly_montgomeryMult_P256, sun_security_util_math_intpoly_MontgomeryIntegerPolynomialP256, intPolyMult_name, intPolyMult_signature, F_R) \
+  do_name(intPolyMult_name, "mult")                                                                                     \
+  do_signature(intPolyMult_signature, "([J[J[J)I")                                                                      \
+                                                                                                                        \
+  do_class(sun_security_util_math_intpoly_IntegerPolynomial, "sun/security/util/math/intpoly/IntegerPolynomial")        \
+  do_intrinsic(_intpoly_assign, sun_security_util_math_intpoly_IntegerPolynomial, intPolyAssign_name, intPolyAssign_signature, F_S) \
+   do_name(intPolyAssign_name, "conditionalAssign")                                                                     \
+   do_signature(intPolyAssign_signature, "(I[J[J)V")                                                                    \
+                                                                                                                        \
+  /* support for java.util.Base64.Encoder*/                                                                             \
   do_class(java_util_Base64_Encoder, "java/util/Base64$Encoder")                                                        \
   do_intrinsic(_base64_encodeBlock, java_util_Base64_Encoder, encodeBlock_name, encodeBlock_signature, F_R)             \
   do_name(encodeBlock_name, "encodeBlock")                                                                              \
diff --git a/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.cpp b/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.cpp
index 14fb038a6c6a8..dbb45995698a6 100644
--- a/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.cpp
+++ b/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.cpp
@@ -463,6 +463,12 @@ void ShenandoahBarrierC2Support::verify(RootNode* root) {
         "decodeBlock",
         { { TypeFunc::Parms, ShenandoahLoad },  { TypeFunc::Parms+3, ShenandoahStore },   { -1, ShenandoahNone },
           { -1,  ShenandoahNone},                 { -1,  ShenandoahNone},                 { -1,  ShenandoahNone} },
+        "intpoly_montgomeryMult_P256",
+        { { TypeFunc::Parms, ShenandoahLoad },  { TypeFunc::Parms+1, ShenandoahLoad  },   { TypeFunc::Parms+2, ShenandoahStore },
+          { -1,  ShenandoahNone},                 { -1,  ShenandoahNone},                 { -1,  ShenandoahNone} },
+        "intpoly_assign",
+        { { TypeFunc::Parms+1, ShenandoahStore }, { TypeFunc::Parms+2, ShenandoahLoad },  { -1, ShenandoahNone },
+          { -1,  ShenandoahNone},                 { -1,  ShenandoahNone},                 { -1,  ShenandoahNone} },
       };
 
       if (call->is_call_to_arraycopystub()) {
diff --git a/src/hotspot/share/jvmci/vmStructs_jvmci.cpp b/src/hotspot/share/jvmci/vmStructs_jvmci.cpp
index 035c1dd1ce9ed..c72ca3870b5ab 100644
--- a/src/hotspot/share/jvmci/vmStructs_jvmci.cpp
+++ b/src/hotspot/share/jvmci/vmStructs_jvmci.cpp
@@ -361,6 +361,8 @@
   static_field(StubRoutines,                _md5_implCompressMB,                              address)                               \
   static_field(StubRoutines,                _chacha20Block,                                   address)                               \
   static_field(StubRoutines,                _poly1305_processBlocks,                          address)                               \
+  static_field(StubRoutines,                _intpoly_montgomeryMult_P256,                     address)                               \
+  static_field(StubRoutines,                _intpoly_assign,                                  address)                               \
   static_field(StubRoutines,                _sha1_implCompress,                               address)                               \
   static_field(StubRoutines,                _sha1_implCompressMB,                             address)                               \
   static_field(StubRoutines,                _sha256_implCompress,                             address)                               \
diff --git a/src/hotspot/share/opto/c2compiler.cpp b/src/hotspot/share/opto/c2compiler.cpp
index dc15e82dff8f1..c5e174784773f 100644
--- a/src/hotspot/share/opto/c2compiler.cpp
+++ b/src/hotspot/share/opto/c2compiler.cpp
@@ -786,6 +786,8 @@ bool C2Compiler::is_intrinsic_supported(vmIntrinsics::ID id) {
   case vmIntrinsics::_base64_encodeBlock:
   case vmIntrinsics::_base64_decodeBlock:
   case vmIntrinsics::_poly1305_processBlocks:
+  case vmIntrinsics::_intpoly_montgomeryMult_P256:
+  case vmIntrinsics::_intpoly_assign:
   case vmIntrinsics::_updateCRC32:
   case vmIntrinsics::_updateBytesCRC32:
   case vmIntrinsics::_updateByteBufferCRC32:
diff --git a/src/hotspot/share/opto/escape.cpp b/src/hotspot/share/opto/escape.cpp
index 1a9e74dab3df2..b011c9928b6f1 100644
--- a/src/hotspot/share/opto/escape.cpp
+++ b/src/hotspot/share/opto/escape.cpp
@@ -2173,6 +2173,8 @@ void ConnectionGraph::process_call_arguments(CallNode *call) {
                   strcmp(call->as_CallLeaf()->_name, "counterMode_AESCrypt") == 0 ||
                   strcmp(call->as_CallLeaf()->_name, "galoisCounterMode_AESCrypt") == 0 ||
                   strcmp(call->as_CallLeaf()->_name, "poly1305_processBlocks") == 0 ||
+                  strcmp(call->as_CallLeaf()->_name, "intpoly_montgomeryMult_P256") == 0 ||
+                  strcmp(call->as_CallLeaf()->_name, "intpoly_assign") == 0 ||
                   strcmp(call->as_CallLeaf()->_name, "ghash_processBlocks") == 0 ||
                   strcmp(call->as_CallLeaf()->_name, "chacha20Block") == 0 ||
                   strcmp(call->as_CallLeaf()->_name, "encodeBlock") == 0 ||
diff --git a/src/hotspot/share/opto/library_call.cpp b/src/hotspot/share/opto/library_call.cpp
index b018fcf509713..96e88c1a96bad 100644
--- a/src/hotspot/share/opto/library_call.cpp
+++ b/src/hotspot/share/opto/library_call.cpp
@@ -638,7 +638,10 @@ bool LibraryCallKit::try_to_inline(int predicate) {
     return inline_base64_decodeBlock();
   case vmIntrinsics::_poly1305_processBlocks:
     return inline_poly1305_processBlocks();
-
+  case vmIntrinsics::_intpoly_montgomeryMult_P256:
+    return inline_intpoly_montgomeryMult_P256();
+  case vmIntrinsics::_intpoly_assign:
+    return inline_intpoly_assign();
   case vmIntrinsics::_encodeISOArray:
   case vmIntrinsics::_encodeByteISOArray:
     return inline_encodeISOArray(false);
@@ -7568,6 +7571,69 @@ bool LibraryCallKit::inline_poly1305_processBlocks() {
   return true;
 }
 
+bool LibraryCallKit::inline_intpoly_montgomeryMult_P256() {
+  address stubAddr;
+  const char *stubName;
+  assert(UseIntPolyIntrinsics, "need intpoly intrinsics support");
+  assert(callee()->signature()->size() == 3, "intpoly_montgomeryMult_P256 has %d parameters", callee()->signature()->size());
+  stubAddr = StubRoutines::intpoly_montgomeryMult_P256();
+  stubName = "intpoly_montgomeryMult_P256";
+
+  if (!stubAddr) return false;
+  null_check_receiver();  // null-check receiver
+  if (stopped())  return true;
+
+  Node* a = argument(1);
+  Node* b = argument(2);
+  Node* r = argument(3);
+
+  a = must_be_not_null(a, true);
+  b = must_be_not_null(b, true);
+  r = must_be_not_null(r, true);
+
+  Node* a_start = array_element_address(a, intcon(0), T_LONG);
+  assert(a_start, "a array is NULL");
+  Node* b_start = array_element_address(b, intcon(0), T_LONG);
+  assert(b_start, "b array is NULL");
+  Node* r_start = array_element_address(r, intcon(0), T_LONG);
+  assert(r_start, "r array is NULL");
+
+  Node* call = make_runtime_call(RC_LEAF | RC_NO_FP,
+                                 OptoRuntime::intpoly_montgomeryMult_P256_Type(),
+                                 stubAddr, stubName, TypePtr::BOTTOM,
+                                 a_start, b_start, r_start);
+  Node* result = _gvn.transform(new ProjNode(call, TypeFunc::Parms));
+  set_result(result);
+  return true;
+}
+
+bool LibraryCallKit::inline_intpoly_assign() {
+  assert(UseIntPolyIntrinsics, "need intpoly intrinsics support");
+  assert(callee()->signature()->size() == 3, "intpoly_assign has %d parameters", callee()->signature()->size());
+  const char *stubName = "intpoly_assign";
+  address stubAddr = StubRoutines::intpoly_assign();
+  if (!stubAddr) return false;
+
+  Node* set = argument(0);
+  Node* a = argument(1);
+  Node* b = argument(2);
+  Node* arr_length = load_array_length(a);
+
+  a = must_be_not_null(a, true);
+  b = must_be_not_null(b, true);
+
+  Node* a_start = array_element_address(a, intcon(0), T_LONG);
+  assert(a_start, "a array is NULL");
+  Node* b_start = array_element_address(b, intcon(0), T_LONG);
+  assert(b_start, "b array is NULL");
+
+  Node* call = make_runtime_call(RC_LEAF | RC_NO_FP,
+                                 OptoRuntime::intpoly_assign_Type(),
+                                 stubAddr, stubName, TypePtr::BOTTOM,
+                                 set, a_start, b_start, arr_length);
+  return true;
+}
+
 //------------------------------inline_digestBase_implCompress-----------------------
 //
 // Calculate MD5 for single-block byte[] array.
diff --git a/src/hotspot/share/opto/library_call.hpp b/src/hotspot/share/opto/library_call.hpp
index cb4f34a0db6b9..1111c795114c0 100644
--- a/src/hotspot/share/opto/library_call.hpp
+++ b/src/hotspot/share/opto/library_call.hpp
@@ -307,6 +307,8 @@ class LibraryCallKit : public GraphKit {
   bool inline_base64_encodeBlock();
   bool inline_base64_decodeBlock();
   bool inline_poly1305_processBlocks();
+  bool inline_intpoly_montgomeryMult_P256();
+  bool inline_intpoly_assign();
   bool inline_digestBase_implCompress(vmIntrinsics::ID id);
   bool inline_digestBase_implCompressMB(int predicate);
   bool inline_digestBase_implCompressMB(Node* digestBaseObj, ciInstanceKlass* instklass,
diff --git a/src/hotspot/share/opto/runtime.cpp b/src/hotspot/share/opto/runtime.cpp
index 2c0215047857c..3b4519623ad74 100644
--- a/src/hotspot/share/opto/runtime.cpp
+++ b/src/hotspot/share/opto/runtime.cpp
@@ -1401,6 +1401,45 @@ const TypeFunc* OptoRuntime::poly1305_processBlocks_Type() {
   return TypeFunc::make(domain, range);
 }
 
+// MontgomeryIntegerPolynomialP256 multiply function
+const TypeFunc* OptoRuntime::intpoly_montgomeryMult_P256_Type() {
+  int argcnt = 3;
+
+  const Type** fields = TypeTuple::fields(argcnt);
+  int argp = TypeFunc::Parms;
+  fields[argp++] = TypePtr::NOTNULL;    // a array
+  fields[argp++] = TypePtr::NOTNULL;    // b array
+  fields[argp++] = TypePtr::NOTNULL;    // r(esult) array
+  assert(argp == TypeFunc::Parms + argcnt, "correct decoding");
+  const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
+
+  // result type needed
+  fields = TypeTuple::fields(1);
+  fields[TypeFunc::Parms + 0] = TypeInt::INT; // carry bits in output
+  const TypeTuple* range = TypeTuple::make(TypeFunc::Parms+1, fields);
+  return TypeFunc::make(domain, range);
+}
+
+// IntegerPolynomial constant time assignment function
+const TypeFunc* OptoRuntime::intpoly_assign_Type() {
+  int argcnt = 4;
+
+  const Type** fields = TypeTuple::fields(argcnt);
+  int argp = TypeFunc::Parms;
+  fields[argp++] = TypeInt::INT;        // set flag
+  fields[argp++] = TypePtr::NOTNULL;    // a array (result)
+  fields[argp++] = TypePtr::NOTNULL;    // b array (if set is set)
+  fields[argp++] = TypeInt::INT;        // array length
+  assert(argp == TypeFunc::Parms + argcnt, "correct decoding");
+  const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
+
+  // result type needed
+  fields = TypeTuple::fields(1);
+  fields[TypeFunc::Parms + 0] = NULL; // void
+  const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
+  return TypeFunc::make(domain, range);
+}
+
 //------------- Interpreter state access for on stack replacement
 const TypeFunc* OptoRuntime::osr_end_Type() {
   // create input type (domain)
diff --git a/src/hotspot/share/opto/runtime.hpp b/src/hotspot/share/opto/runtime.hpp
index 30656044cbb20..e4cbdf2f0d0b4 100644
--- a/src/hotspot/share/opto/runtime.hpp
+++ b/src/hotspot/share/opto/runtime.hpp
@@ -298,6 +298,8 @@ class OptoRuntime : public AllStatic {
   static const TypeFunc* base64_encodeBlock_Type();
   static const TypeFunc* base64_decodeBlock_Type();
   static const TypeFunc* poly1305_processBlocks_Type();
+  static const TypeFunc* intpoly_montgomeryMult_P256_Type();
+  static const TypeFunc* intpoly_assign_Type();
 
   static const TypeFunc* updateBytesCRC32_Type();
   static const TypeFunc* updateBytesCRC32C_Type();
diff --git a/src/hotspot/share/runtime/globals.hpp b/src/hotspot/share/runtime/globals.hpp
index 3c05ea985b656..6bfb260606bcb 100644
--- a/src/hotspot/share/runtime/globals.hpp
+++ b/src/hotspot/share/runtime/globals.hpp
@@ -233,6 +233,8 @@ const int ObjectAlignmentInBytes = 8;
                                                                             \
   product(bool, UsePoly1305Intrinsics, false, DIAGNOSTIC,                   \
           "Use intrinsics for sun.security.util.math.intpoly")              \
+  product(bool, UseIntPolyIntrinsics, false, DIAGNOSTIC,                   \
+          "Use intrinsics for sun.security.util.math.intpoly.MontgomeryIntegerPolynomialP256") \
                                                                             \
   product(size_t, LargePageSizeInBytes, 0,                                  \
           "Maximum large page size used (0 will use the default large "     \
diff --git a/src/hotspot/share/runtime/stubRoutines.cpp b/src/hotspot/share/runtime/stubRoutines.cpp
index f52255f504d90..74286a4ac98fb 100644
--- a/src/hotspot/share/runtime/stubRoutines.cpp
+++ b/src/hotspot/share/runtime/stubRoutines.cpp
@@ -132,6 +132,8 @@ address StubRoutines::_chacha20Block                       = nullptr;
 address StubRoutines::_base64_encodeBlock                  = nullptr;
 address StubRoutines::_base64_decodeBlock                  = nullptr;
 address StubRoutines::_poly1305_processBlocks              = nullptr;
+address StubRoutines::_intpoly_montgomeryMult_P256         = nullptr;
+address StubRoutines::_intpoly_assign                      = nullptr;
 
 address StubRoutines::_md5_implCompress      = nullptr;
 address StubRoutines::_md5_implCompressMB    = nullptr;
diff --git a/src/hotspot/share/runtime/stubRoutines.hpp b/src/hotspot/share/runtime/stubRoutines.hpp
index fe32c9613c814..65b0c0d2f26f9 100644
--- a/src/hotspot/share/runtime/stubRoutines.hpp
+++ b/src/hotspot/share/runtime/stubRoutines.hpp
@@ -215,6 +215,8 @@ class StubRoutines: AllStatic {
   static address _base64_encodeBlock;
   static address _base64_decodeBlock;
   static address _poly1305_processBlocks;
+  static address _intpoly_montgomeryMult_P256;
+  static address _intpoly_assign;
 
   static address _md5_implCompress;
   static address _md5_implCompressMB;
@@ -409,6 +411,8 @@ class StubRoutines: AllStatic {
   static address electronicCodeBook_encryptAESCrypt()   { return _electronicCodeBook_encryptAESCrypt; }
   static address electronicCodeBook_decryptAESCrypt()   { return _electronicCodeBook_decryptAESCrypt; }
   static address poly1305_processBlocks()               { return _poly1305_processBlocks; }
+  static address intpoly_montgomeryMult_P256()          { return _intpoly_montgomeryMult_P256; }
+  static address intpoly_assign()        { return _intpoly_assign; }
   static address counterMode_AESCrypt()  { return _counterMode_AESCrypt; }
   static address ghash_processBlocks()   { return _ghash_processBlocks; }
   static address chacha20Block()         { return _chacha20Block; }
diff --git a/src/java.base/share/classes/sun/security/ec/ECDHKeyAgreement.java b/src/java.base/share/classes/sun/security/ec/ECDHKeyAgreement.java
index 1fdbd94786ab3..be3bdfdd63990 100644
--- a/src/java.base/share/classes/sun/security/ec/ECDHKeyAgreement.java
+++ b/src/java.base/share/classes/sun/security/ec/ECDHKeyAgreement.java
@@ -31,6 +31,7 @@
 import sun.security.util.ECUtil;
 import sun.security.util.NamedCurve;
 import sun.security.util.math.IntegerFieldModuloP;
+import sun.security.util.math.IntegerMontgomeryFieldModuloP;
 import sun.security.util.math.MutableIntegerModuloP;
 import sun.security.util.math.SmallValue;
 
@@ -265,6 +266,11 @@ byte[] deriveKeyImpl(ECPrivateKey priv, ECOperations ops,
         ECPublicKey pubKey) throws InvalidKeyException {
 
         IntegerFieldModuloP field = ops.getField();
+        if (field instanceof IntegerMontgomeryFieldModuloP) {
+            // No point of doing a single SmallValue operation in Montgomery domain
+            field = ((IntegerMontgomeryFieldModuloP)field).residueField();
+        }
+
         // convert s array into field element and multiply by the cofactor
         MutableIntegerModuloP scalar = field.getElement(priv.getS()).mutable();
         SmallValue cofactor =
diff --git a/src/java.base/share/classes/sun/security/ec/ECDSAOperations.java b/src/java.base/share/classes/sun/security/ec/ECDSAOperations.java
index 7bbcbd032f125..f58d7d8f2d783 100644
--- a/src/java.base/share/classes/sun/security/ec/ECDSAOperations.java
+++ b/src/java.base/share/classes/sun/security/ec/ECDSAOperations.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -252,7 +252,7 @@ public boolean verifySignedDigest(byte[] digest, byte[] sig, ECPoint pp) {
         MutablePoint p1 = ecOps.multiply(basePoint, temp1);
         MutablePoint p2 = ecOps.multiply(pp, temp2);
 
-        ecOps.setSum(p1, p2.asAffine());
+        ecOps.setSum(p1, p2);
         IntegerModuloP result = p1.asAffine().getX();
         b2a(result, orderField, temp1);
         return MessageDigest.isEqual(temp1, r);
diff --git a/src/java.base/share/classes/sun/security/ec/ECOperations.java b/src/java.base/share/classes/sun/security/ec/ECOperations.java
index d4959aed46315..2f94bb8534242 100644
--- a/src/java.base/share/classes/sun/security/ec/ECOperations.java
+++ b/src/java.base/share/classes/sun/security/ec/ECOperations.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -46,12 +46,7 @@
  * Formulas are derived from "Complete addition formulas for prime order
  * elliptic curves" by Renes, Costello, and Batina.
  */
-
 public class ECOperations {
-    private static final ECOperations secp256r1Ops =
-        new ECOperations(IntegerPolynomialP256.ONE.getElement(
-                CurveDB.lookup(KnownOIDs.secp256r1.value()).getCurve().getB()),
-                P256OrderField.ONE);
 
     /*
      * An exception indicating a problem with an intermediate value produced
@@ -64,7 +59,7 @@ static class IntermediateValueException extends Exception {
     }
 
     static final Map<BigInteger, IntegerFieldModuloP> fields = Map.of(
-        IntegerPolynomialP256.MODULUS, IntegerPolynomialP256.ONE,
+        IntegerPolynomialP256.MODULUS, MontgomeryIntegerPolynomialP256.ONE,
         IntegerPolynomialP384.MODULUS, IntegerPolynomialP384.ONE,
         IntegerPolynomialP521.MODULUS, IntegerPolynomialP521.ONE
     );
@@ -207,11 +202,28 @@ public static boolean allZero(byte[] arr) {
      * @return the product
      */
     public MutablePoint multiply(AffinePoint affineP, byte[] s) {
-        return PointMultiplier.of(this, affineP).pointMultiply(s);
+        PointMultiplier multiplier = null;
+        if (getField() instanceof IntegerMontgomeryFieldModuloP
+                && affineP.equals(Secp256R1GeneratorMontgomeryMultiplier.generator)) {
+            // Lazy class loading here
+            multiplier = Secp256R1GeneratorMontgomeryMultiplier.multiplier;
+        } else {
+            multiplier = new DefaultMultiplier(this, affineP);
+        }
+
+        return multiplier.pointMultiply(s);
     }
 
+    /**
+     * Multiply an affine ecpoint point by a scalar and return the result as a
+     * mutable point.
+     *
+     * @param ecPoint the point
+     * @param s the scalar as a little-endian array
+     * @return the product
+     */
     public MutablePoint multiply(ECPoint ecPoint, byte[] s) {
-        return PointMultiplier.of(this, ecPoint).pointMultiply(s);
+        return multiply(AffinePoint.fromECPoint(ecPoint, getField()), s);
     }
 
     /*
@@ -264,21 +276,26 @@ private void setDouble(ProjectivePoint.Mutable p, MutableIntegerModuloP t0,
 
     }
 
-    /*
-     * Mixed point addition. This method constructs new temporaries each time
-     * it is called. For better efficiency, the method that reuses temporaries
-     * should be used if more than one sum will be computed.
+    /**
+     * Adds second Mutable (Projective) point to first.
+     *
+     * Used by ECDSAOperations. This method constructs new temporaries each time
+     * it is called. For better efficiency, the (private) method that reuses
+     * temporaries should be used if more than one sum will be computed.
+     *
+     * @param p first point and result
+     * @param p2 second point to add
      */
-    public void setSum(MutablePoint p, AffinePoint p2) {
-
+    public void setSum(MutablePoint p, MutablePoint p2) {
         IntegerModuloP zero = p.getField().get0();
         MutableIntegerModuloP t0 = zero.mutable();
         MutableIntegerModuloP t1 = zero.mutable();
         MutableIntegerModuloP t2 = zero.mutable();
         MutableIntegerModuloP t3 = zero.mutable();
         MutableIntegerModuloP t4 = zero.mutable();
-        setSum((ProjectivePoint.Mutable) p, p2, t0, t1, t2, t3, t4);
 
+        setSum((ProjectivePoint.Mutable) p, (ProjectivePoint.Mutable) p2,
+            t0, t1, t2, t3, t4);
     }
 
     /*
@@ -289,18 +306,18 @@ private void setSum(ProjectivePoint.Mutable p, AffinePoint p2,
         MutableIntegerModuloP t2, MutableIntegerModuloP t3,
         MutableIntegerModuloP t4) {
 
-        t0.setValue(p.getX()).setProduct(p2.getX());
-        t1.setValue(p.getY()).setProduct(p2.getY());
-        t3.setValue(p2.getX()).setSum(p2.getY());
+        t0.setValue(p.getX()).setProduct(p2.getX(false));
+        t1.setValue(p.getY()).setProduct(p2.getY(false));
+        t3.setValue(p2.getX(false)).setSum(p2.getY(false));
         t4.setValue(p.getX()).setSum(p.getY());
         t3.setProduct(t4);
         t4.setValue(t0).setSum(t1);
 
         t3.setDifference(t4);
-        t4.setValue(p2.getY()).setProduct(p.getZ());
+        t4.setValue(p2.getY(false)).setProduct(p.getZ());
         t4.setSum(p.getY());
 
-        p.getY().setValue(p2.getX()).setProduct(p.getZ());
+        p.getY().setValue(p2.getX(false)).setProduct(p.getZ());
         p.getY().setSum(p.getX());
         t2.setValue(p.getZ());
         p.getZ().setProduct(b);
@@ -412,11 +429,8 @@ public boolean checkOrder(ECPoint point) {
         return isNeutral(this.multiply(ap, scalar));
     }
 
-    sealed interface PointMultiplier {
-        Map<ECPoint, PointMultiplier> multipliers = Map.of(
-                Secp256R1GeneratorMultiplier.generator,
-                Secp256R1GeneratorMultiplier.multiplier);
-
+    sealed interface PointMultiplier
+        permits DefaultMultiplier, Secp256R1GeneratorMontgomeryMultiplier {
         // Multiply the point by a scalar and return the result as a mutable
         // point.  The multiplier point is specified by the implementation of
         // this interface, which could be a general EC point or EC generator
@@ -429,26 +443,6 @@ sealed interface PointMultiplier {
         // in little endian byte array representation.
         ProjectivePoint.Mutable pointMultiply(byte[] scalar);
 
-        static PointMultiplier of(ECOperations ecOps, AffinePoint affPoint) {
-            PointMultiplier multiplier = multipliers.get(affPoint.toECPoint());
-            if (multiplier == null) {
-                multiplier = new Default(ecOps, affPoint);
-            }
-
-            return multiplier;
-        }
-
-        static PointMultiplier of(ECOperations ecOps, ECPoint ecPoint) {
-            PointMultiplier multiplier = multipliers.get(ecPoint);
-            if (multiplier == null) {
-                AffinePoint affPoint =
-                        AffinePoint.fromECPoint(ecPoint, ecOps.getField());
-                multiplier = new Default(ecOps, affPoint);
-            }
-
-            return multiplier;
-        }
-
         private static void lookup(
                 ProjectivePoint.Immutable[] ips, int index,
                 ProjectivePoint.Mutable result) {
@@ -465,232 +459,249 @@ private static void lookup(
                 result.conditionalSet(pi, set);
             }
         }
+    }
 
-        final class Default implements PointMultiplier {
-            private final AffinePoint affineP;
-            private final ECOperations ecOps;
+    final static class DefaultMultiplier implements PointMultiplier {
+        private final ECOperations ecOps;
+        private final ProjectivePoint.Immutable[] pointMultiples;
 
-            private Default(ECOperations ecOps, AffinePoint affineP) {
-                this.ecOps = ecOps;
-                this.affineP = affineP;
-            }
+        DefaultMultiplier(ECOperations ecOps, AffinePoint affineP) {
+            this.ecOps = ecOps;
 
-            @Override
-            public ProjectivePoint.Mutable pointMultiply(byte[] s) {
-                // 4-bit windowed multiply with branchless lookup.
-                // The mixed addition is faster, so it is used to construct
-                // the array at the beginning of the operation.
-
-                IntegerFieldModuloP field = affineP.getX().getField();
-                ImmutableIntegerModuloP zero = field.get0();
-                // temporaries
-                MutableIntegerModuloP t0 = zero.mutable();
-                MutableIntegerModuloP t1 = zero.mutable();
-                MutableIntegerModuloP t2 = zero.mutable();
-                MutableIntegerModuloP t3 = zero.mutable();
-                MutableIntegerModuloP t4 = zero.mutable();
-
-                ProjectivePoint.Mutable result =
-                        new ProjectivePoint.Mutable(field);
-                result.getY().setValue(field.get1().mutable());
-
-                ProjectivePoint.Immutable[] pointMultiples =
-                        new ProjectivePoint.Immutable[16];
-                // 0P is neutral---same as initial result value
-                pointMultiples[0] = result.fixed();
-
-                ProjectivePoint.Mutable ps = new ProjectivePoint.Mutable(field);
-                ps.setValue(affineP);
-                // 1P = P
-                pointMultiples[1] = ps.fixed();
-
-                // the rest are calculated using mixed point addition
-                for (int i = 2; i < 16; i++) {
-                    ecOps.setSum(ps, affineP, t0, t1, t2, t3, t4);
-                    pointMultiples[i] = ps.fixed();
-                }
+            // Precompute and cache point multiples
+            this.pointMultiples = new ProjectivePoint.Immutable[16];
 
-                ProjectivePoint.Mutable lookupResult = ps.mutable();
+            IntegerFieldModuloP field = ecOps.getField();
+            ImmutableIntegerModuloP zero = field.get0();
+            // temporaries
+            MutableIntegerModuloP t0 = zero.mutable();
+            MutableIntegerModuloP t1 = zero.mutable();
+            MutableIntegerModuloP t2 = zero.mutable();
+            MutableIntegerModuloP t3 = zero.mutable();
+            MutableIntegerModuloP t4 = zero.mutable();
 
-                for (int i = s.length - 1; i >= 0; i--) {
-                    double4(result, t0, t1, t2, t3, t4);
+            ProjectivePoint.Mutable ps =
+                new ProjectivePoint.Mutable(field);
+            ps.getY().setValue(field.get1().mutable());
 
-                    int high = (0xFF & s[i]) >>> 4;
-                    lookup(pointMultiples, high, lookupResult);
-                    ecOps.setSum(result, lookupResult, t0, t1, t2, t3, t4);
+            // 0P is neutral---same as initial result value
+            pointMultiples[0] = ps.fixed();
 
-                    double4(result, t0, t1, t2, t3, t4);
+            ps.setValue(affineP);
+            // 1P = P
+            pointMultiples[1] = ps.fixed();
 
-                    int low = 0xF & s[i];
-                    lookup(pointMultiples, low, lookupResult);
-                    ecOps.setSum(result, lookupResult, t0, t1, t2, t3, t4);
-                }
+            // the rest are calculated using mixed point addition
+            for (int i = 2; i < 16; i++) {
+                ecOps.setSum(ps, affineP, t0, t1, t2, t3, t4);
+                pointMultiples[i] = ps.fixed();
+            }
+        }
 
-                return result;
+        @Override
+        public ProjectivePoint.Mutable pointMultiply(byte[] s) {
+            // 4-bit windowed multiply with branchless lookup.
+            // The mixed addition is faster, so it is used to construct
+            // the array at the beginning of the operation.
+
+            IntegerFieldModuloP field = ecOps.getField();
+            ImmutableIntegerModuloP zero = field.get0();
+            // temporaries
+            MutableIntegerModuloP t0 = zero.mutable();
+            MutableIntegerModuloP t1 = zero.mutable();
+            MutableIntegerModuloP t2 = zero.mutable();
+            MutableIntegerModuloP t3 = zero.mutable();
+            MutableIntegerModuloP t4 = zero.mutable();
+
+            ProjectivePoint.Mutable result = new ProjectivePoint.Mutable(field);
+            result.getY().setValue(field.get1().mutable());
+            ProjectivePoint.Mutable lookupResult = new ProjectivePoint.Mutable(field);
+
+            for (int i = s.length - 1; i >= 0; i--) {
+                double4(result, t0, t1, t2, t3, t4);
+
+                int high = (0xFF & s[i]) >>> 4;
+                PointMultiplier.lookup(pointMultiples, high, lookupResult);
+                ecOps.setSum(result, lookupResult, t0, t1, t2, t3, t4);
+
+                double4(result, t0, t1, t2, t3, t4);
+
+                int low = 0xF & s[i];
+                PointMultiplier.lookup(pointMultiples, low, lookupResult);
+                ecOps.setSum(result, lookupResult, t0, t1, t2, t3, t4);
             }
 
-            private void double4(ProjectivePoint.Mutable p,
-                    MutableIntegerModuloP t0, MutableIntegerModuloP t1,
-                    MutableIntegerModuloP t2, MutableIntegerModuloP t3,
-                    MutableIntegerModuloP t4) {
-                for (int i = 0; i < 4; i++) {
-                    ecOps.setDouble(p, t0, t1, t2, t3, t4);
-                }
+            return result;
+        }
+
+        private void double4(ProjectivePoint.Mutable p,
+                MutableIntegerModuloP t0, MutableIntegerModuloP t1,
+                MutableIntegerModuloP t2, MutableIntegerModuloP t3,
+                MutableIntegerModuloP t4) {
+            for (int i = 0; i < 4; i++) {
+                ecOps.setDouble(p, t0, t1, t2, t3, t4);
+            }
+        }
+    }
+
+    // Represents a multiplier with a larger precomputed table. Intended to be
+    // used for Basepoint multiplication
+    final static class Secp256R1GeneratorMontgomeryMultiplier
+        implements PointMultiplier {
+        private static final ECOperations secp256r1Ops = new ECOperations(
+            MontgomeryIntegerPolynomialP256.ONE.getElement(
+                    CurveDB.P_256.getCurve().getB()), P256OrderField.ONE);
+        public static final AffinePoint generator = AffinePoint.fromECPoint(
+            CurveDB.P_256.getGenerator(), secp256r1Ops.getField());
+        public static final PointMultiplier multiplier =
+            new Secp256R1GeneratorMontgomeryMultiplier();
+
+        private final ImmutableIntegerModuloP zero;
+        private final ImmutableIntegerModuloP one;
+        private final ProjectivePoint.Immutable[][] points;
+        private final BigInteger[] base;
+
+        private Secp256R1GeneratorMontgomeryMultiplier() {
+            this(MontgomeryIntegerPolynomialP256.ONE,
+                new DefaultMultiplier(secp256r1Ops, generator));
+
+            // Check that the tables are correctly generated.
+            if (ECOperations.class.desiredAssertionStatus()) {
+                verifyTables(this);
             }
         }
 
-        final class Secp256R1GeneratorMultiplier implements PointMultiplier {
-            private static final ECPoint generator =
-                    CurveDB.P_256.getGenerator();
-            private static final PointMultiplier multiplier =
-                    new Secp256R1GeneratorMultiplier();
-
-            private static final ImmutableIntegerModuloP zero =
-                    IntegerPolynomialP256.ONE.get0();
-            private static final ImmutableIntegerModuloP one =
-                    IntegerPolynomialP256.ONE.get1();
-
-            @Override
-            public ProjectivePoint.Mutable pointMultiply(byte[] s) {
-                MutableIntegerModuloP t0 = zero.mutable();
-                MutableIntegerModuloP t1 = zero.mutable();
-                MutableIntegerModuloP t2 = zero.mutable();
-                MutableIntegerModuloP t3 = zero.mutable();
-                MutableIntegerModuloP t4 = zero.mutable();
-
-                ProjectivePoint.Mutable d = new ProjectivePoint.Mutable(
-                        zero.mutable(),
-                        one.mutable(),
-                        zero.mutable());
-                ProjectivePoint.Mutable r = d.mutable();
-                for (int i = 15; i >= 0; i--) {
-                    secp256r1Ops.setDouble(d, t0, t1, t2, t3, t4);
-                    for (int j = 3; j >= 0; j--) {
-                        int pos = i + j * 16;
-                        int index = (bit(s, pos + 192) << 3) |
-                                    (bit(s, pos + 128) << 2) |
-                                    (bit(s, pos +  64) << 1) |
-                                     bit(s, pos);
-
-                        lookup(P256.points[j], index, r);
-                        secp256r1Ops.setSum(d, r, t0, t1, t2, t3, t4);
+        private Secp256R1GeneratorMontgomeryMultiplier(
+            IntegerFieldModuloP field, PointMultiplier smallTableMultiplier) {
+            zero = field.get0();
+            one = field.get1();
+
+            // Pre-computed table to speed up the point multiplication.
+            //
+            // This is a 4x16 array of ProjectivePoint.Immutable elements.
+            // The first row contains the following multiples of the
+            // generator.
+            //
+            // index   |    point
+            // --------+----------------
+            // 0x0000  | 0G
+            // 0x0001  | 1G
+            // 0x0002  | (2^64)G
+            // 0x0003  | (2^64 + 1)G
+            // 0x0004  | 2^128G
+            // 0x0005  | (2^128 + 1)G
+            // 0x0006  | (2^128 + 2^64)G
+            // 0x0007  | (2^128 + 2^64 + 1)G
+            // 0x0008  | 2^192G
+            // 0x0009  | (2^192 + 1)G
+            // 0x000A  | (2^192 + 2^64)G
+            // 0x000B  | (2^192 + 2^64 + 1)G
+            // 0x000C  | (2^192 + 2^128)G
+            // 0x000D  | (2^192 + 2^128 + 1)G
+            // 0x000E  | (2^192 + 2^128 + 2^64)G
+            // 0x000F  | (2^192 + 2^128 + 2^64 + 1)G
+            //
+            // For the other 3 rows, points[i][j] = 2^16 * (points[i-1][j].
+
+            // Generate the pre-computed tables.  This block may be
+            // replaced with hard-coded tables in order to speed up
+            // the class loading.
+            points = new ProjectivePoint.Immutable[4][16];
+            BigInteger[] factors = new BigInteger[] {
+                    BigInteger.ONE,
+                    BigInteger.TWO.pow(64),
+                    BigInteger.TWO.pow(128),
+                    BigInteger.TWO.pow(192)
+            };
+
+            base = new BigInteger[16];
+            base[0] = BigInteger.ZERO;
+            base[1] = BigInteger.ONE;
+            base[2] = factors[1];
+            for (int i = 3; i < 16; i++) {
+                base[i] = BigInteger.ZERO;
+                for (int k = 0; k < 4; k++) {
+                    if (((i >>> k) & 0x01) != 0) {
+                        base[i] = base[i].add(factors[k]);
                     }
                 }
+            }
 
-                return d;
+            for (int d = 0; d < 4; d++) {
+                for (int w = 0; w < 16; w++) {
+                    BigInteger bi = base[w];
+                    if (d != 0) {
+                        bi = bi.multiply(BigInteger.TWO.pow(d * 16));
+                    }
+                    if (w == 0) {
+                        points[d][0] = new ProjectivePoint.Immutable(
+                            zero.fixed(), one.fixed(), zero.fixed());
+                    } else {
+                        byte[] s = bi.toByteArray();
+                        ArrayUtil.reverse(s);
+                        ProjectivePoint.Mutable m = smallTableMultiplier.pointMultiply(s);
+                        points[d][w] = m.fixed();
+                    }
+                }
             }
+        }
 
-            private static int bit(byte[] k, int i) {
-                return (k[i >> 3] >> (i & 0x07)) & 0x01;
+        public ProjectivePoint.Mutable pointMultiply(byte[] s) {
+            MutableIntegerModuloP t0 = zero.mutable();
+            MutableIntegerModuloP t1 = zero.mutable();
+            MutableIntegerModuloP t2 = zero.mutable();
+            MutableIntegerModuloP t3 = zero.mutable();
+            MutableIntegerModuloP t4 = zero.mutable();
+
+            ProjectivePoint.Mutable d = new ProjectivePoint.Mutable(
+                    zero.mutable(),
+                    one.mutable(),
+                    zero.mutable());
+            ProjectivePoint.Mutable r = d.mutable();
+            for (int i = 15; i >= 0; i--) {
+                secp256r1Ops.setDouble(d, t0, t1, t2, t3, t4);
+                for (int j = 3; j >= 0; j--) {
+                    int pos = i + j * 16;
+                    int index = (bit(s, pos + 192) << 3) |
+                                (bit(s, pos + 128) << 2) |
+                                (bit(s, pos +  64) << 1) |
+                                    bit(s, pos);
+
+                    PointMultiplier.lookup(points[j], index, r);
+                    secp256r1Ops.setSum(d, r, t0, t1, t2, t3, t4);
+                }
             }
 
-            // Lazy loading of the tables.
-            private static final class P256 {
-                // Pre-computed table to speed up the point multiplication.
-                //
-                // This is a 4x16 array of ProjectivePoint.Immutable elements.
-                // The first row contains the following multiples of the
-                // generator.
-                //
-                // index   |    point
-                // --------+----------------
-                // 0x0000  | 0G
-                // 0x0001  | 1G
-                // 0x0002  | (2^64)G
-                // 0x0003  | (2^64 + 1)G
-                // 0x0004  | 2^128G
-                // 0x0005  | (2^128 + 1)G
-                // 0x0006  | (2^128 + 2^64)G
-                // 0x0007  | (2^128 + 2^64 + 1)G
-                // 0x0008  | 2^192G
-                // 0x0009  | (2^192 + 1)G
-                // 0x000A  | (2^192 + 2^64)G
-                // 0x000B  | (2^192 + 2^64 + 1)G
-                // 0x000C  | (2^192 + 2^128)G
-                // 0x000D  | (2^192 + 2^128 + 1)G
-                // 0x000E  | (2^192 + 2^128 + 2^64)G
-                // 0x000F  | (2^192 + 2^128 + 2^64 + 1)G
-                //
-                // For the other 3 rows, points[i][j] = 2^16 * (points[i-1][j].
-                private static final ProjectivePoint.Immutable[][] points;
-
-                // Generate the pre-computed tables.  This block may be
-                // replaced with hard-coded tables in order to speed up
-                // the class loading.
-                static {
-                    points = new ProjectivePoint.Immutable[4][16];
-                    BigInteger[] factors = new BigInteger[] {
-                            BigInteger.ONE,
-                            BigInteger.TWO.pow(64),
-                            BigInteger.TWO.pow(128),
-                            BigInteger.TWO.pow(192)
-                    };
-
-                    BigInteger[] base = new BigInteger[16];
-                    base[0] = BigInteger.ZERO;
-                    base[1] = BigInteger.ONE;
-                    base[2] = factors[1];
-                    for (int i = 3; i < 16; i++) {
-                        base[i] = BigInteger.ZERO;
-                        for (int k = 0; k < 4; k++) {
-                            if (((i >>> k) & 0x01) != 0) {
-                                base[i] = base[i].add(factors[k]);
-                            }
-                        }
-                    }
+            return d;
+        }
 
-                    for (int d = 0; d < 4; d++) {
-                        for (int w = 0; w < 16; w++) {
-                            BigInteger bi = base[w];
-                            if (d != 0) {
-                                bi = bi.multiply(BigInteger.TWO.pow(d * 16));
-                            }
-                            if (w == 0) {
-                                points[d][0] = new ProjectivePoint.Immutable(
-                                    zero.fixed(), one.fixed(), zero.fixed());
-                            } else {
-                                PointMultiplier multiplier = new Default(
-                                    secp256r1Ops, AffinePoint.fromECPoint(
-                                        generator, zero.getField()));
-                                byte[] s = bi.toByteArray();
-                                ArrayUtil.reverse(s);
-                                ProjectivePoint.Mutable m =
-                                        multiplier.pointMultiply(s);
-                                points[d][w] = m.setValue(m.asAffine()).fixed();
-                            }
-                        }
-                    }
+        private static int bit(byte[] k, int i) {
+            return (k[i >> 3] >> (i & 0x07)) & 0x01;
+        }
 
-                    // Check that the tables are correctly generated.
-                    if (ECOperations.class.desiredAssertionStatus()) {
-                        verifyTables(base);
+        protected void verifyTables(PointMultiplier multiplier) {
+            for (int d = 0; d < 4; d++) {
+                for (int w = 0; w < 16; w++) {
+                    BigInteger bi = base[w];
+                    if (d != 0) {
+                        bi = bi.multiply(BigInteger.TWO.pow(d * 16));
                     }
-                }
-
-                private static void verifyTables(BigInteger[] base) {
-                    for (int d = 0; d < 4; d++) {
-                        for (int w = 0; w < 16; w++) {
-                            BigInteger bi = base[w];
-                            if (d != 0) {
-                                bi = bi.multiply(BigInteger.TWO.pow(d * 16));
-                            }
-                            if (w != 0) {
-                                byte[] s = new byte[32];
-                                byte[] b = bi.toByteArray();
-                                ArrayUtil.reverse(b);
-                                System.arraycopy(b, 0, s, 0, b.length);
-
-                                ProjectivePoint.Mutable m =
-                                        multiplier.pointMultiply(s);
-                                ProjectivePoint.Immutable v =
-                                        m.setValue(m.asAffine()).fixed();
-                                if (!v.getX().asBigInteger().equals(
-                                        points[d][w].getX().asBigInteger()) ||
-                                    !v.getY().asBigInteger().equals(
-                                        points[d][w].getY().asBigInteger())) {
-                                    throw new RuntimeException();
-                                }
-                            }
+                    if (w != 0) {
+                        byte[] s = new byte[32];
+                        byte[] b = bi.toByteArray();
+                        ArrayUtil.reverse(b);
+                        System.arraycopy(b, 0, s, 0, b.length);
+
+                        // Compare this multiplier to the table
+                        // (generated by Default multiplier)
+                        AffinePoint m = multiplier.pointMultiply(s).asAffine();
+                        AffinePoint v = points[d][w].asAffine();
+                        if (!m.equals(v)) {
+                            java.util.HexFormat hex = java.util.HexFormat.of();
+                            throw new RuntimeException(
+                                "Bad multiple found at [" +d+"]["+w+"]" +
+                                hex.formatHex(s) + " " + m.getX().asBigInteger()
+                            );
                         }
                     }
                 }
diff --git a/src/java.base/share/classes/sun/security/ec/point/AffinePoint.java b/src/java.base/share/classes/sun/security/ec/point/AffinePoint.java
index bc227b0babfca..bc1530cd61bdb 100644
--- a/src/java.base/share/classes/sun/security/ec/point/AffinePoint.java
+++ b/src/java.base/share/classes/sun/security/ec/point/AffinePoint.java
@@ -26,6 +26,7 @@
 
 import sun.security.util.math.ImmutableIntegerModuloP;
 import sun.security.util.math.IntegerFieldModuloP;
+import sun.security.util.math.IntegerMontgomeryFieldModuloP;
 
 import java.security.spec.ECPoint;
 import java.util.Objects;
@@ -54,14 +55,30 @@ public static AffinePoint fromECPoint(
     }
 
     public ECPoint toECPoint() {
-        return new ECPoint(x.asBigInteger(), y.asBigInteger());
+        return new ECPoint(getX().asBigInteger(), getY().asBigInteger());
     }
 
     public ImmutableIntegerModuloP getX() {
+        return getX(true);
+    }
+
+    public ImmutableIntegerModuloP getX(boolean fieldCheck) {
+        IntegerFieldModuloP field = x.getField();
+        if (fieldCheck && field instanceof IntegerMontgomeryFieldModuloP) {
+            return ((IntegerMontgomeryFieldModuloP)field).fromMontgomery(x);
+        }
         return x;
     }
 
     public ImmutableIntegerModuloP getY() {
+        return getY(true);
+    }
+
+    public ImmutableIntegerModuloP getY(boolean fieldCheck) {
+        IntegerFieldModuloP field = y.getField();
+        if (fieldCheck && field instanceof IntegerMontgomeryFieldModuloP) {
+            return ((IntegerMontgomeryFieldModuloP)field).fromMontgomery(y);
+        }
         return y;
     }
 
@@ -71,8 +88,30 @@ public boolean equals(Object obj) {
             return false;
         }
         AffinePoint p = (AffinePoint) obj;
-        boolean xEquals = x.asBigInteger().equals(p.x.asBigInteger());
-        boolean yEquals = y.asBigInteger().equals(p.y.asBigInteger());
+        boolean xEquals, yEquals;
+        boolean thisMont = x.getField() instanceof IntegerMontgomeryFieldModuloP;
+        boolean objMont = p.x.getField() instanceof IntegerMontgomeryFieldModuloP;
+        if (thisMont ^ objMont == false) {
+            // both fields same
+            xEquals = x.asBigInteger().equals(p.x.asBigInteger());
+            yEquals = y.asBigInteger().equals(p.y.asBigInteger());
+        } else if (thisMont) {
+            // mismatched fields should not happen in production, but useful in
+            // testing
+            IntegerMontgomeryFieldModuloP field =
+                (IntegerMontgomeryFieldModuloP)x.getField();
+            xEquals = x.asBigInteger().equals(
+                field.getElement(p.x.asBigInteger()).asBigInteger());
+            yEquals = y.asBigInteger().equals(
+                field.getElement(p.y.asBigInteger()).asBigInteger());
+        } else {
+            IntegerMontgomeryFieldModuloP field =
+                (IntegerMontgomeryFieldModuloP)p.x.getField();
+            xEquals = field.getElement(
+                x.asBigInteger()).asBigInteger().equals(p.x.asBigInteger());
+            yEquals = field.getElement(
+                y.asBigInteger()).asBigInteger().equals(p.y.asBigInteger());
+        }
         return xEquals && yEquals;
     }
 
diff --git a/src/java.base/share/classes/sun/security/ec/point/ProjectivePoint.java b/src/java.base/share/classes/sun/security/ec/point/ProjectivePoint.java
index a3ebc532d4693..fbb6681b724ac 100644
--- a/src/java.base/share/classes/sun/security/ec/point/ProjectivePoint.java
+++ b/src/java.base/share/classes/sun/security/ec/point/ProjectivePoint.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,6 +25,7 @@
 package sun.security.ec.point;
 
 import sun.security.util.math.*;
+import jdk.internal.vm.annotation.ForceInline;
 
 /**
  * Elliptic curve point in projective coordinates (X, Y, Z) where
@@ -145,6 +146,7 @@ public Mutable conditionalSet(Point p, int set) {
             return conditionalSet(pp, set);
         }
 
+        @ForceInline
         private <T extends IntegerModuloP>
         Mutable conditionalSet(ProjectivePoint<T> pp, int set) {
 
@@ -157,9 +159,9 @@ Mutable conditionalSet(ProjectivePoint<T> pp, int set) {
 
         @Override
         public Mutable setValue(AffinePoint p) {
-            x.setValue(p.getX());
-            y.setValue(p.getY());
-            z.setValue(p.getX().getField().get1());
+            x.setValue(p.getX(false));
+            y.setValue(p.getY(false));
+            z.setValue(p.getX(false).getField().get1());
 
             return this;
         }
diff --git a/src/java.base/share/classes/sun/security/util/math/IntegerMontgomeryFieldModuloP.java b/src/java.base/share/classes/sun/security/util/math/IntegerMontgomeryFieldModuloP.java
new file mode 100644
index 0000000000000..2987674a32b14
--- /dev/null
+++ b/src/java.base/share/classes/sun/security/util/math/IntegerMontgomeryFieldModuloP.java
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package sun.security.util.math;
+
+import java.math.BigInteger;
+
+/**
+ * An interface for the field of integers modulo a prime number. An
+ * implementation of this interface can be used to get properties of the
+ * field and to produce field elements of type ImmutableIntegerModuloP from
+ * other objects and representations of field elements.
+ */
+
+public interface IntegerMontgomeryFieldModuloP extends IntegerFieldModuloP {
+    ImmutableIntegerModuloP fromMontgomery(ImmutableIntegerModuloP m);
+    IntegerFieldModuloP residueField();
+}
diff --git a/src/java.base/share/classes/sun/security/util/math/intpoly/IntegerPolynomial.java b/src/java.base/share/classes/sun/security/util/math/intpoly/IntegerPolynomial.java
index 18ee87e0b240d..05b4a71bebb51 100644
--- a/src/java.base/share/classes/sun/security/util/math/intpoly/IntegerPolynomial.java
+++ b/src/java.base/share/classes/sun/security/util/math/intpoly/IntegerPolynomial.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -32,6 +32,9 @@
 import java.nio.ByteOrder;
 import java.util.Arrays;
 
+import jdk.internal.vm.annotation.ForceInline;
+import jdk.internal.vm.annotation.IntrinsicCandidate;
+
 /**
  * A large number polynomial representation using sparse limbs of signed
  * long (64-bit) values. Limb values will always fit within a long, so inputs
@@ -62,10 +65,9 @@
 public abstract sealed class IntegerPolynomial implements IntegerFieldModuloP
     permits IntegerPolynomial1305, IntegerPolynomial25519,
             IntegerPolynomial448, IntegerPolynomialP256,
-            IntegerPolynomialP384, IntegerPolynomialP521,
-            IntegerPolynomialModBinP, P256OrderField,
-            P384OrderField, P521OrderField,
-            Curve25519OrderField,
+            MontgomeryIntegerPolynomialP256, IntegerPolynomialP384,
+            IntegerPolynomialP521, IntegerPolynomialModBinP, P256OrderField,
+            P384OrderField, P521OrderField, Curve25519OrderField,
             Curve448OrderField {
 
     protected static final BigInteger TWO = BigInteger.valueOf(2);
@@ -74,7 +76,8 @@ public abstract sealed class IntegerPolynomial implements IntegerFieldModuloP
     private final BigInteger modulus;
     protected final int bitsPerLimb;
     private final long[] posModLimbs;
-    private final int maxAdds;
+    private final int maxAddsMul; // max additions before a multiplication
+    private final int maxAddsAdd; // max additions before an addition
 
     /**
      * Reduce an IntegerPolynomial representation (a) and store the result
@@ -87,11 +90,12 @@ public abstract sealed class IntegerPolynomial implements IntegerFieldModuloP
      * store the result in an IntegerPolynomial representation in a. Requires
      * that a.length == numLimbs.
      */
-    protected void multByInt(long[] a, long b) {
+    protected int multByInt(long[] a, long b) {
         for (int i = 0; i < a.length; i++) {
             a[i] *= b;
         }
         reduce(a);
+        return 0;
     }
 
     /**
@@ -100,7 +104,7 @@ protected void multByInt(long[] a, long b) {
      * a.length == b.length == r.length == numLimbs. It is allowed for a and r
      * to be the same array.
      */
-    protected abstract void mult(long[] a, long[] b, long[] r);
+    protected abstract int mult(long[] a, long[] b, long[] r);
 
     /**
      * Multiply an IntegerPolynomial representation (a) with itself and store
@@ -108,19 +112,23 @@ protected void multByInt(long[] a, long b) {
      * a.length == r.length == numLimbs. It is allowed for a and r
      * to be the same array.
      */
-    protected abstract void square(long[] a, long[] r);
+    protected abstract int square(long[] a, long[] r);
 
     IntegerPolynomial(int bitsPerLimb,
                       int numLimbs,
-                      int maxAdds,
+                      int maxAddsMul,
                       BigInteger modulus) {
 
 
         this.numLimbs = numLimbs;
         this.modulus = modulus;
         this.bitsPerLimb = bitsPerLimb;
-        this.maxAdds = maxAdds;
-
+        this.maxAddsMul = maxAddsMul;
+        if (bitsPerLimb>32) {
+            this.maxAddsAdd = 64 - bitsPerLimb;
+        } else {
+            this.maxAddsAdd = 32 - bitsPerLimb;
+        }
         posModLimbs = setPosModLimbs();
     }
 
@@ -135,7 +143,7 @@ protected int getNumLimbs() {
     }
 
     public int getMaxAdds() {
-        return maxAdds;
+        return maxAddsMul;
     }
 
     @Override
@@ -327,10 +335,9 @@ private void setLimbsValue(BigInteger v, long[] limbs) {
     }
 
     protected void setLimbsValuePositive(BigInteger v, long[] limbs) {
-        assert bitsPerLimb < 32;
         long limbMask = (1L << bitsPerLimb) - 1;
         for (int i = 0; i < limbs.length; i++) {
-            limbs[i] = v.intValue() & limbMask;
+            limbs[i] = v.longValue() & limbMask;
             v = v.shiftRight(bitsPerLimb);
         }
     }
@@ -449,6 +456,8 @@ protected void addLimbs(long[] a, long[] b, long[] dst) {
      * will be unchanged. If set==1, then the values of b will be assigned to a.
      * The behavior is undefined if swap has any value other than 0 or 1.
      */
+    @ForceInline
+    @IntrinsicCandidate
     protected static void conditionalAssign(int set, long[] a, long[] b) {
         int maskValue = -set;
         for (int i = 0; i < a.length; i++) {
@@ -557,14 +566,12 @@ public ImmutableElement add(IntegerModuloP genB) {
             Element b = (Element)genB;
 
             // Reduce if required.
-            // if (numAdds >= maxAdds) {
-            if (numAdds > 32 - bitsPerLimb) {
+            if (numAdds > maxAddsAdd) {
                reduce(limbs);
                numAdds = 0;
             }
 
-            // if (b.numAdds >= maxAdds) {
-            if (b.numAdds > 32 - bitsPerLimb) {
+            if (b.numAdds > maxAddsAdd) {
                 reduce(b.limbs);
                 b.numAdds = 0;
             }
@@ -586,7 +593,7 @@ public ImmutableElement additiveInverse() {
                 newLimbs[i] = -limbs[i];
             }
 
-            return new ImmutableElement(newLimbs, numAdds);
+            return new ImmutableElement(newLimbs, numAdds+1);
         }
 
         protected long[] cloneLow(long[] limbs) {
@@ -604,32 +611,32 @@ public ImmutableElement multiply(IntegerModuloP genB) {
             Element b = (Element)genB;
 
             // Reduce if required.
-            if (numAdds > maxAdds) {
+            if (numAdds > maxAddsMul) {
                 reduce(limbs);
                 numAdds = 0;
             }
 
-            if (b.numAdds > maxAdds) {
+            if (b.numAdds > maxAddsMul) {
                 reduce(b.limbs);
                 b.numAdds = 0;
             }
 
             long[] newLimbs = new long[limbs.length];
-            mult(limbs, b.limbs, newLimbs);
-            return new ImmutableElement(newLimbs, 0);
+            int numAdds = mult(limbs, b.limbs, newLimbs);
+            return new ImmutableElement(newLimbs, numAdds);
         }
 
         @Override
         public ImmutableElement square() {
             // Reduce if required.
-            if (numAdds > maxAdds) {
+            if (numAdds > maxAddsMul) {
                 reduce(limbs);
                 numAdds = 0;
             }
 
             long[] newLimbs = new long[limbs.length];
-            IntegerPolynomial.this.square(limbs, newLimbs);
-            return new ImmutableElement(newLimbs, 0);
+            int numAdds = IntegerPolynomial.this.square(limbs, newLimbs);
+            return new ImmutableElement(newLimbs, numAdds);
         }
 
         public void addModPowerTwo(IntegerModuloP arg, byte[] result) {
@@ -637,12 +644,12 @@ public void addModPowerTwo(IntegerModuloP arg, byte[] result) {
             Element other = (Element)arg;
 
             // Reduce if required.
-            if (numAdds > 32 - bitsPerLimb) {
+            if (numAdds > maxAddsAdd) {
                 reduce(limbs);
                 numAdds = 0;
             }
 
-            if (other.numAdds > 32 - bitsPerLimb) {
+            if (other.numAdds > maxAddsAdd) {
                 reduce(other.limbs);
                 other.numAdds = 0;
             }
@@ -734,32 +741,30 @@ public MutableElement setProduct(IntegerModuloP genB) {
             Element b = (Element)genB;
 
             // Reduce if required.
-            if (numAdds > maxAdds) {
+            if (numAdds > maxAddsMul) {
                 reduce(limbs);
                 numAdds = 0;
             }
 
-            if (b.numAdds > maxAdds) {
+            if (b.numAdds > maxAddsMul) {
                 reduce(b.limbs);
                 b.numAdds = 0;
             }
 
-            mult(limbs, b.limbs, limbs);
-            numAdds = 0;
+            numAdds = mult(limbs, b.limbs, limbs);
             return this;
         }
 
         @Override
         public MutableElement setProduct(SmallValue v) {
             // Reduce if required.
-            if (numAdds > maxAdds) {
+            if (numAdds > maxAddsMul) {
                 reduce(limbs);
                 numAdds = 0;
             }
 
             int value = ((Limb)v).value;
-            multByInt(limbs, value);
-            numAdds = 0;
+            numAdds += multByInt(limbs, value);
             return this;
         }
 
@@ -769,14 +774,12 @@ public MutableElement setSum(IntegerModuloP genB) {
             Element b = (Element)genB;
 
             // Reduce if required.
-            // if (numAdds >= maxAdds) {
-            if (numAdds > 32 - bitsPerLimb) {
+            if (numAdds > maxAddsAdd) {
                reduce(limbs);
                numAdds = 0;
             }
 
-            // if (b.numAdds >= maxAdds) {
-            if (b.numAdds > 32 - bitsPerLimb) {
+            if (b.numAdds > maxAddsAdd) {
                 reduce(b.limbs);
                 b.numAdds = 0;
             }
@@ -795,14 +798,12 @@ public MutableElement setDifference(IntegerModuloP genB) {
             Element b = (Element)genB;
 
             // Reduce if required.
-            // if (numAdds >= maxAdds) {
-            if (numAdds > 32 - bitsPerLimb) {
+            if (numAdds > maxAddsAdd) {
                reduce(limbs);
                numAdds = 0;
             }
 
-            // if (b.numAdds >= maxAdds) {
-            if (b.numAdds > 32 - bitsPerLimb) {
+            if (b.numAdds > maxAddsAdd) {
                 reduce(b.limbs);
                 b.numAdds = 0;
             }
@@ -818,13 +819,12 @@ public MutableElement setDifference(IntegerModuloP genB) {
         @Override
         public MutableElement setSquare() {
             // Reduce if required.
-            if (numAdds > maxAdds) {
+            if (numAdds > maxAddsMul) {
                 reduce(limbs);
                 numAdds = 0;
             }
 
-            IntegerPolynomial.this.square(limbs, limbs);
-            numAdds = 0;
+            numAdds = IntegerPolynomial.this.square(limbs, limbs);;
             return this;
         }
 
@@ -833,6 +833,7 @@ public MutableElement setAdditiveInverse() {
             for (int i = 0; i < limbs.length; i++) {
                 limbs[i] = -limbs[i];
             }
+            numAdds++;
             return this;
         }
     }
diff --git a/src/java.base/share/classes/sun/security/util/math/intpoly/IntegerPolynomial1305.java b/src/java.base/share/classes/sun/security/util/math/intpoly/IntegerPolynomial1305.java
index 5015d186d37b9..706651330d389 100644
--- a/src/java.base/share/classes/sun/security/util/math/intpoly/IntegerPolynomial1305.java
+++ b/src/java.base/share/classes/sun/security/util/math/intpoly/IntegerPolynomial1305.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2018, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -50,7 +50,7 @@ private IntegerPolynomial1305() {
         super(BITS_PER_LIMB, NUM_LIMBS, 1, MODULUS);
     }
 
-    protected void mult(long[] a, long[] b, long[] r) {
+    protected int mult(long[] a, long[] b, long[] r) {
 
         // Use grade-school multiplication into primitives to avoid the
         // temporary array allocation. This is equivalent to the following
@@ -73,6 +73,7 @@ protected void mult(long[] a, long[] b, long[] r) {
         long c8 = (a[4] * b[4]);
 
         carryReduce(r, c0, c1, c2, c3, c4, c5, c6, c7, c8);
+        return 0;
     }
 
     private void carryReduce(long[] r, long c0, long c1, long c2, long c3,
@@ -99,7 +100,7 @@ private void carryReduce(long[] r, long c0, long c1, long c2, long c3,
     }
 
     @Override
-    protected void square(long[] a, long[] r) {
+    protected int square(long[] a, long[] r) {
         // Use grade-school multiplication with a simple squaring optimization.
         // Multiply into primitives to avoid the temporary array allocation.
         // This is equivalent to the following code:
@@ -122,6 +123,7 @@ protected void square(long[] a, long[] r) {
         long c8 = (a[4] * a[4]);
 
         carryReduce(r, c0, c1, c2, c3, c4, c5, c6, c7, c8);
+        return 0;
     }
 
     @Override
diff --git a/src/java.base/share/classes/sun/security/util/math/intpoly/IntegerPolynomialModBinP.java b/src/java.base/share/classes/sun/security/util/math/intpoly/IntegerPolynomialModBinP.java
index c6e58322d7cad..e57316ed964f6 100644
--- a/src/java.base/share/classes/sun/security/util/math/intpoly/IntegerPolynomialModBinP.java
+++ b/src/java.base/share/classes/sun/security/util/math/intpoly/IntegerPolynomialModBinP.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -131,11 +131,12 @@ private void multOnly(long[] a, long[] b, long[] c) {
     }
 
     @Override
-    protected void mult(long[] a, long[] b, long[] r) {
+    protected int mult(long[] a, long[] b, long[] r) {
 
         long[] c = new long[2 * numLimbs];
         multOnly(a, b, c);
         carryReduce(c, r);
+        return 0;
     }
 
     private void modReduceInBits(long[] limbs, int index, int bits, long x) {
@@ -188,7 +189,7 @@ protected void reduce(long[] a) {
     }
 
     @Override
-    protected void square(long[] a, long[] r) {
+    protected int square(long[] a, long[] r) {
 
         long[] c = new long[2 * numLimbs];
         for (int i = 0; i < numLimbs; i++) {
@@ -199,7 +200,7 @@ protected void square(long[] a, long[] r) {
         }
 
         carryReduce(c, r);
-
+        return 0;
     }
 
     /**
diff --git a/src/java.base/share/classes/sun/security/util/math/intpoly/MontgomeryIntegerPolynomialP256.java b/src/java.base/share/classes/sun/security/util/math/intpoly/MontgomeryIntegerPolynomialP256.java
new file mode 100644
index 0000000000000..d4c0348eb9d5a
--- /dev/null
+++ b/src/java.base/share/classes/sun/security/util/math/intpoly/MontgomeryIntegerPolynomialP256.java
@@ -0,0 +1,560 @@
+/*
+ * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package sun.security.util.math.intpoly;
+
+import sun.security.util.math.ImmutableIntegerModuloP;
+import sun.security.util.math.IntegerMontgomeryFieldModuloP;
+import sun.security.util.math.SmallValue;
+import sun.security.util.math.IntegerFieldModuloP;
+import java.lang.Math;
+import java.math.BigInteger;
+import jdk.internal.vm.annotation.IntrinsicCandidate;
+
+// Reference:
+// - [1] Shay Gueron and Vlad Krasnov "Fast Prime Field Elliptic Curve
+//       Cryptography with 256 Bit Primes"
+//
+public final class MontgomeryIntegerPolynomialP256 extends IntegerPolynomial
+        implements IntegerMontgomeryFieldModuloP {
+    private static final int BITS_PER_LIMB = 52;
+    private static final int NUM_LIMBS = 5;
+    private static final int MAX_ADDS = 0;
+    public static final BigInteger MODULUS = evaluateModulus();
+    private static final long LIMB_MASK = -1L >>> (64 - BITS_PER_LIMB);
+
+    public static final MontgomeryIntegerPolynomialP256 ONE = new MontgomeryIntegerPolynomialP256();
+
+    // h = 2^(2*260)%p = 0x4fffffffdfffffffffffffffefffffffbffffffff000000000000000300
+    // oneActual = 1
+    // oneMont = (1*2^260) mod p
+    // modulus = p
+    private static final long[] h = new long[] {
+        0x0000000000000300L, 0x000ffffffff00000L, 0x000ffffefffffffbL,
+        0x000fdfffffffffffL, 0x0000000004ffffffL };
+    private static final long[] oneActual = new long[] {
+        0x0000000000000001L, 0x0000000000000000L, 0x0000000000000000L,
+        0x0000000000000000L, 0x0000000000000000L };
+    private static final long[] oneMont = new long[] {
+        0x0000000000000010L, 0x000f000000000000L, 0x000fffffffffffffL,
+        0x000ffeffffffffffL, 0x00000000000fffffL };
+    private static final long[] zero = new long[] {
+        0x0000000000000000L, 0x0000000000000000L, 0x0000000000000000L,
+        0x0000000000000000L, 0x0000000000000000L };
+    private static final long[] modulus = new long[] {
+        0x000fffffffffffffL, 0x00000fffffffffffL, 0x0000000000000000L,
+        0x0000001000000000L, 0x0000ffffffff0000L };
+
+    private MontgomeryIntegerPolynomialP256() {
+        super(BITS_PER_LIMB, NUM_LIMBS, MAX_ADDS, MODULUS);
+    }
+
+    public IntegerFieldModuloP residueField() {
+        return IntegerPolynomialP256.ONE;
+    }
+
+    // (224%nat,-1)::(192%nat,1)::(96%nat,1)::(0%nat,-1)::nil.
+    private static BigInteger evaluateModulus() {
+        BigInteger result = BigInteger.valueOf(2).pow(256);
+        result = result.subtract(BigInteger.valueOf(1).shiftLeft(224));
+        result = result.add(BigInteger.valueOf(1).shiftLeft(192));
+        result = result.add(BigInteger.valueOf(1).shiftLeft(96));
+        result = result.subtract(BigInteger.valueOf(1));
+        return result;
+    }
+
+    @Override
+    public ImmutableElement get0() {
+        return new ImmutableElement(zero, 0);
+    }
+
+    // One in montgomery domain: (1*2^260) mod p
+    @Override
+    public ImmutableElement get1() {
+        return new ImmutableElement(oneMont, 0);
+    }
+
+    // Convert v to Montgomery domain
+    @Override
+    public ImmutableElement getElement(BigInteger v) {
+        long[] vLimbs = new long[NUM_LIMBS];
+        long[] montLimbs = new long[NUM_LIMBS];
+        setLimbsValuePositive(v, vLimbs);
+
+        // Convert to Montgomery domain
+        int numAdds = mult(vLimbs, h, montLimbs);
+        return new ImmutableElement(montLimbs, numAdds);
+    }
+
+    @Override
+    public SmallValue getSmallValue(int value) {
+        // Explicitely here as reminder that SmallValue stays in residue domain
+        // See multByInt below for how this is used
+        return super.getSmallValue(value);
+    }
+
+    /*
+     * This function is used by IntegerPolynomial.setProduct(SmallValue v) to
+     * multiply by a small constant (i.e. (int) 1,2,3,4). Instead of doing a
+     * montgomery conversion followed by a montgomery multiplication, just use
+     * the spare top (64-BITS_PER_LIMB) bits to multiply by a constant. (See [1]
+     * Section 4 )
+     *
+     * Will return an unreduced value
+     */
+    @Override
+    protected int multByInt(long[] a, long b) {
+        assert (b < (1 << BITS_PER_LIMB));
+        for (int i = 0; i < a.length; i++) {
+            a[i] *= b;
+        }
+        return (int) (b - 1);
+    }
+
+    @Override
+    public ImmutableIntegerModuloP fromMontgomery(ImmutableIntegerModuloP n) {
+        assert n.getField() == MontgomeryIntegerPolynomialP256.ONE;
+
+        ImmutableElement nn = (ImmutableElement) n;
+        long[] r1 = new long[NUM_LIMBS];
+        long[] r2 = new long[2 * NUM_LIMBS];
+        long[] limbs = nn.getLimbs();
+        reduce(limbs);
+        MontgomeryIntegerPolynomialP256.ONE.mult(limbs, oneActual, r1);
+        reduce(r1);
+        halfLimbs(r1, r2);
+        return IntegerPolynomialP256.ONE.new ImmutableElement(r2, 0);
+    }
+
+    private void halfLimbs(long[] a, long[] r) {
+        final long HALF_BITS_LIMB = BITS_PER_LIMB / 2;
+        final long HALF_LIMB_MASK = -1L >>> (64 - HALF_BITS_LIMB);
+        r[0] = a[0] & HALF_LIMB_MASK;
+        r[1] = a[0] >> HALF_BITS_LIMB;
+        r[2] = a[1] & HALF_LIMB_MASK;
+        r[3] = a[1] >> HALF_BITS_LIMB;
+        r[4] = a[2] & HALF_LIMB_MASK;
+        r[5] = a[2] >> HALF_BITS_LIMB;
+        r[6] = a[3] & HALF_LIMB_MASK;
+        r[7] = a[3] >> HALF_BITS_LIMB;
+        r[8] = a[4] & HALF_LIMB_MASK;
+        r[9] = a[4] >> HALF_BITS_LIMB;
+    }
+
+    @Override
+    protected int square(long[] a, long[] r) {
+        return mult(a, a, r);
+    }
+
+    /**
+     * Unrolled Word-by-Word Montgomery Multiplication r = a * b * 2^-260 (mod P)
+     *
+     * See [1] Figure 5. "Algorithm 2: Word-by-Word Montgomery Multiplication
+     * for a Montgomery Friendly modulus p". Note: Step 6. Skipped; Instead use
+     * numAdds to reuse existing overflow logic.
+     */
+    @IntrinsicCandidate
+    protected int mult(long[] a, long[] b, long[] r) {
+        long aa0 = a[0];
+        long aa1 = a[1];
+        long aa2 = a[2];
+        long aa3 = a[3];
+        long aa4 = a[4];
+
+        long bb0 = b[0];
+        long bb1 = b[1];
+        long bb2 = b[2];
+        long bb3 = b[3];
+        long bb4 = b[4];
+
+        final long shift1 = 64 - BITS_PER_LIMB; // 12
+        final long shift2 = BITS_PER_LIMB; // 40
+
+        long d0, d1, d2, d3, d4;      // low digits from multiplication
+        long dd0, dd1, dd2, dd3, dd4; // high digits from multiplication
+        long n, n0, n1, n2, n3, n4,
+            nn0, nn1, nn2, nn3, nn4; // modulus multiple digits
+        long c0, c1, c2, c3, c4, c5, c6, c7, c8, c9; // multiplication result
+                                                     // digits for each column
+
+        // Row 0 - multiply by aa0 and reduce out c0
+        d0 = aa0 * bb0;
+        dd0 = Math.unsignedMultiplyHigh(aa0, bb0) << shift1 | (d0 >>> shift2);
+        d0 &= LIMB_MASK;
+        n = d0;
+        d1 = aa0 * bb1;
+        dd1 = Math.unsignedMultiplyHigh(aa0, bb1) << shift1 | (d1 >>> shift2);
+        d1 &= LIMB_MASK;
+        d2 = aa0 * bb2;
+        dd2 = Math.unsignedMultiplyHigh(aa0, bb2) << shift1 | (d2 >>> shift2);
+        d2 &= LIMB_MASK;
+        d3 = aa0 * bb3;
+        dd3 = Math.unsignedMultiplyHigh(aa0, bb3) << shift1 | (d3 >>> shift2);
+        d3 &= LIMB_MASK;
+        d4 = aa0 * bb4;
+        dd4 = Math.unsignedMultiplyHigh(aa0, bb4) << shift1 | (d4 >>> shift2);
+        d4 &= LIMB_MASK;
+
+        n0 = n * modulus[0];
+        nn0 = Math.unsignedMultiplyHigh(n, modulus[0]) << shift1 | (n0 >>> shift2);
+        n0 &= LIMB_MASK;
+        n1 = n * modulus[1];
+        nn1 = Math.unsignedMultiplyHigh(n, modulus[1]) << shift1 | (n1 >>> shift2);
+        n1 &= LIMB_MASK;
+        n2 = n * modulus[2];
+        nn2 = Math.unsignedMultiplyHigh(n, modulus[2]) << shift1 | (n2 >>> shift2);
+        n2 &= LIMB_MASK;
+        n3 = n * modulus[3];
+        nn3 = Math.unsignedMultiplyHigh(n, modulus[3]) << shift1 | (n3 >>> shift2);
+        n3 &= LIMB_MASK;
+        n4 = n * modulus[4];
+        nn4 = Math.unsignedMultiplyHigh(n, modulus[4]) << shift1 | (n4 >>> shift2);
+        n4 &= LIMB_MASK;
+
+        dd0 += nn0;
+        d0 += n0;
+        dd1 += nn1;
+        d1 += n1;
+        dd2 += nn2;
+        d2 += n2;
+        dd3 += nn3;
+        d3 += n3;
+        dd4 += nn4;
+        d4 += n4;
+
+        c1 = d1 + dd0 + (d0 >>> BITS_PER_LIMB);
+        c2 = d2 + dd1;
+        c3 = d3 + dd2;
+        c4 = d4 + dd3;
+        c5 = dd4;
+
+        // Row 1 - multiply by aa1 and reduce out c1
+        d0 = aa1 * bb0;
+        dd0 = Math.unsignedMultiplyHigh(aa1, bb0) << shift1 | (d0 >>> shift2);
+        d0 &= LIMB_MASK;
+        d0 += c1;
+        n = d0 & LIMB_MASK;
+        d1 = aa1 * bb1;
+        dd1 = Math.unsignedMultiplyHigh(aa1, bb1) << shift1 | (d1 >>> shift2);
+        d1 &= LIMB_MASK;
+        d2 = aa1 * bb2;
+        dd2 = Math.unsignedMultiplyHigh(aa1, bb2) << shift1 | (d2 >>> shift2);
+        d2 &= LIMB_MASK;
+        d3 = aa1 * bb3;
+        dd3 = Math.unsignedMultiplyHigh(aa1, bb3) << shift1 | (d3 >>> shift2);
+        d3 &= LIMB_MASK;
+        d4 = aa1 * bb4;
+        dd4 = Math.unsignedMultiplyHigh(aa1, bb4) << shift1 | (d4 >>> shift2);
+        d4 &= LIMB_MASK;
+
+        n0 = n * modulus[0];
+        dd0 += Math.unsignedMultiplyHigh(n, modulus[0]) << shift1 | (n0 >>> shift2);
+        d0 += n0 & LIMB_MASK;
+        n1 = n * modulus[1];
+        dd1 += Math.unsignedMultiplyHigh(n, modulus[1]) << shift1 | (n1 >>> shift2);
+        d1 += n1 & LIMB_MASK;
+        n2 = n * modulus[2];
+        dd2 += Math.unsignedMultiplyHigh(n, modulus[2]) << shift1 | (n2 >>> shift2);
+        d2 += n2 & LIMB_MASK;
+        n3 = n * modulus[3];
+        dd3 += Math.unsignedMultiplyHigh(n, modulus[3]) << shift1 | (n3 >>> shift2);
+        d3 += n3 & LIMB_MASK;
+        n4 = n * modulus[4];
+        dd4 += Math.unsignedMultiplyHigh(n, modulus[4]) << shift1 | (n4 >>> shift2);
+        d4 += n4 & LIMB_MASK;
+
+        c2 += d1 + dd0 + (d0 >>> BITS_PER_LIMB);
+        c3 += d2 + dd1;
+        c4 += d3 + dd2;
+        c5 += d4 + dd3;
+        c6 = dd4;
+
+        // Row 2 - multiply by aa2 and reduce out c2
+        d0 = aa2 * bb0;
+        dd0 = Math.unsignedMultiplyHigh(aa2, bb0) << shift1 | (d0 >>> shift2);
+        d0 &= LIMB_MASK;
+        d0 += c2;
+        n = d0 & LIMB_MASK;
+        d1 = aa2 * bb1;
+        dd1 = Math.unsignedMultiplyHigh(aa2, bb1) << shift1 | (d1 >>> shift2);
+        d1 &= LIMB_MASK;
+        d2 = aa2 * bb2;
+        dd2 = Math.unsignedMultiplyHigh(aa2, bb2) << shift1 | (d2 >>> shift2);
+        d2 &= LIMB_MASK;
+        d3 = aa2 * bb3;
+        dd3 = Math.unsignedMultiplyHigh(aa2, bb3) << shift1 | (d3 >>> shift2);
+        d3 &= LIMB_MASK;
+        d4 = aa2 * bb4;
+        dd4 = Math.unsignedMultiplyHigh(aa2, bb4) << shift1 | (d4 >>> shift2);
+        d4 &= LIMB_MASK;
+
+        n0 = n * modulus[0];
+        dd0 += Math.unsignedMultiplyHigh(n, modulus[0]) << shift1 | (n0 >>> shift2);
+        d0 += n0 & LIMB_MASK;
+        n1 = n * modulus[1];
+        dd1 += Math.unsignedMultiplyHigh(n, modulus[1]) << shift1 | (n1 >>> shift2);
+        d1 += n1 & LIMB_MASK;
+        n2 = n * modulus[2];
+        dd2 += Math.unsignedMultiplyHigh(n, modulus[2]) << shift1 | (n2 >>> shift2);
+        d2 += n2 & LIMB_MASK;
+        n3 = n * modulus[3];
+        dd3 += Math.unsignedMultiplyHigh(n, modulus[3]) << shift1 | (n3 >>> shift2);
+        d3 += n3 & LIMB_MASK;
+        n4 = n * modulus[4];
+        dd4 += Math.unsignedMultiplyHigh(n, modulus[4]) << shift1 | (n4 >>> shift2);
+        d4 += n4 & LIMB_MASK;
+
+        c3 += d1 + dd0 + (d0 >>> BITS_PER_LIMB);
+        c4 += d2 + dd1;
+        c5 += d3 + dd2;
+        c6 += d4 + dd3;
+        c7 = dd4;
+
+        // Row 3 - multiply by aa3 and reduce out c3
+        d0 = aa3 * bb0;
+        dd0 = Math.unsignedMultiplyHigh(aa3, bb0) << shift1 | (d0 >>> shift2);
+        d0 &= LIMB_MASK;
+        d0 += c3;
+        n = d0 & LIMB_MASK;
+        d1 = aa3 * bb1;
+        dd1 = Math.unsignedMultiplyHigh(aa3, bb1) << shift1 | (d1 >>> shift2);
+        d1 &= LIMB_MASK;
+        d2 = aa3 * bb2;
+        dd2 = Math.unsignedMultiplyHigh(aa3, bb2) << shift1 | (d2 >>> shift2);
+        d2 &= LIMB_MASK;
+        d3 = aa3 * bb3;
+        dd3 = Math.unsignedMultiplyHigh(aa3, bb3) << shift1 | (d3 >>> shift2);
+        d3 &= LIMB_MASK;
+        d4 = aa3 * bb4;
+        dd4 = Math.unsignedMultiplyHigh(aa3, bb4) << shift1 | (d4 >>> shift2);
+        d4 &= LIMB_MASK;
+
+        n0 = n * modulus[0];
+        dd0 += Math.unsignedMultiplyHigh(n, modulus[0]) << shift1 | (n0 >>> shift2);
+        d0 += n0 & LIMB_MASK;
+        n1 = n * modulus[1];
+        dd1 += Math.unsignedMultiplyHigh(n, modulus[1]) << shift1 | (n1 >>> shift2);
+        d1 += n1 & LIMB_MASK;
+        n2 = n * modulus[2];
+        dd2 += Math.unsignedMultiplyHigh(n, modulus[2]) << shift1 | (n2 >>> shift2);
+        d2 += n2 & LIMB_MASK;
+        n3 = n * modulus[3];
+        dd3 += Math.unsignedMultiplyHigh(n, modulus[3]) << shift1 | (n3 >>> shift2);
+        d3 += n3 & LIMB_MASK;
+        n4 = n * modulus[4];
+        dd4 += Math.unsignedMultiplyHigh(n, modulus[4]) << shift1 | (n4 >>> shift2);
+        d4 += n4 & LIMB_MASK;
+
+        c4 += d1 + dd0 + (d0 >>> BITS_PER_LIMB);
+        c5 += d2 + dd1;
+        c6 += d3 + dd2;
+        c7 += d4 + dd3;
+        c8 = dd4;
+
+        // Row 4 - multiply by aa3 and reduce out c4
+        d0 = aa4 * bb0;
+        dd0 = Math.unsignedMultiplyHigh(aa4, bb0) << shift1 | (d0 >>> shift2);
+        d0 &= LIMB_MASK;
+        d0 += c4;
+        n = d0 & LIMB_MASK;
+        d1 = aa4 * bb1;
+        dd1 = Math.unsignedMultiplyHigh(aa4, bb1) << shift1 | (d1 >>> shift2);
+        d1 &= LIMB_MASK;
+        d2 = aa4 * bb2;
+        dd2 = Math.unsignedMultiplyHigh(aa4, bb2) << shift1 | (d2 >>> shift2);
+        d2 &= LIMB_MASK;
+        d3 = aa4 * bb3;
+        dd3 = Math.unsignedMultiplyHigh(aa4, bb3) << shift1 | (d3 >>> shift2);
+        d3 &= LIMB_MASK;
+        d4 = aa4 * bb4;
+        dd4 = Math.unsignedMultiplyHigh(aa4, bb4) << shift1 | (d4 >>> shift2);
+        d4 &= LIMB_MASK;
+
+        n0 = n * modulus[0];
+        dd0 += Math.unsignedMultiplyHigh(n, modulus[0]) << shift1 | (n0 >>> shift2);
+        d0 += n0 & LIMB_MASK;
+        n1 = n * modulus[1];
+        dd1 += Math.unsignedMultiplyHigh(n, modulus[1]) << shift1 | (n1 >>> shift2);
+        d1 += n1 & LIMB_MASK;
+        n2 = n * modulus[2];
+        dd2 += Math.unsignedMultiplyHigh(n, modulus[2]) << shift1 | (n2 >>> shift2);
+        d2 += n2 & LIMB_MASK;
+        n3 = n * modulus[3];
+        dd3 += Math.unsignedMultiplyHigh(n, modulus[3]) << shift1 | (n3 >>> shift2);
+        d3 += n3 & LIMB_MASK;
+        n4 = n * modulus[4];
+        dd4 += Math.unsignedMultiplyHigh(n, modulus[4]) << shift1 | (n4 >>> shift2);
+        d4 += n4 & LIMB_MASK;
+
+        c5 += d1 + dd0 + (d0 >>> BITS_PER_LIMB);
+        c6 += d2 + dd1 + (c5 >>> BITS_PER_LIMB);
+        c7 += d3 + dd2 + (c6 >>> BITS_PER_LIMB);
+        c8 += d4 + dd3 + (c7 >>> BITS_PER_LIMB);
+        c9 = dd4 + (c8 >>> BITS_PER_LIMB);
+
+        c5 &= LIMB_MASK;
+        c6 &= LIMB_MASK;
+        c7 &= LIMB_MASK;
+        c8 &= LIMB_MASK;
+
+        // At this point, the result could overflow by one modulus.
+        c0 = c5 - modulus[0];
+        c1 = c6 - modulus[1] + (c0 >> BITS_PER_LIMB);
+        c0 &= LIMB_MASK;
+        c2 = c7 - modulus[2] + (c1 >> BITS_PER_LIMB);
+        c1 &= LIMB_MASK;
+        c3 = c8 - modulus[3] + (c2 >> BITS_PER_LIMB);
+        c2 &= LIMB_MASK;
+        c4 = c9 - modulus[4] + (c3 >> BITS_PER_LIMB);
+        c3 &= LIMB_MASK;
+
+        long mask = c4 >> BITS_PER_LIMB; // Signed shift!
+
+        r[0] = ((c5 & mask) | (c0 & ~mask));
+        r[1] = ((c6 & mask) | (c1 & ~mask));
+        r[2] = ((c7 & mask) | (c2 & ~mask));
+        r[3] = ((c8 & mask) | (c3 & ~mask));
+        r[4] = ((c9 & mask) | (c4 & ~mask));
+
+        return 0;
+    }
+
+    @Override
+    protected void finalCarryReduceLast(long[] limbs) {
+        reduce(limbs);
+    }
+
+    @Override
+    protected long carryValue(long x) {
+        return x >> BITS_PER_LIMB;
+    }
+
+    @Override
+    protected void postEncodeCarry(long[] v) {
+        // not needed because carry is unsigned
+    }
+
+    // Proof:
+    // carry * 2^256 (mod p) ==  carry * [2^256 - p] (mod p)
+    //                       ==  carry * [2^256 - (2^256 -2^224 +2^192 +2^96 -1)] (mod p)
+    //                       ==  carry * [2^224 -2^192 -2^96 +1] (mod p)
+    @Override
+    protected void reduce(long[] limbs) {
+        long b0 = limbs[0];
+        long b1 = limbs[1];
+        long b2 = limbs[2];
+        long b3 = limbs[3];
+        long b4 = limbs[4];
+        long carry = b4 >> 48; // max 16-bits
+        b4 -= carry << 48;
+
+        // 2^0 position
+        b0 += carry;
+        // -2^96
+        b1 -= carry << 44;
+        // -2^192
+        b3 -= carry << 36;
+        // 2^224
+        b4 += carry << 16;
+
+        b1 += b0 >> BITS_PER_LIMB;
+        b2 += b1 >> BITS_PER_LIMB;
+        b3 += b2 >> BITS_PER_LIMB;
+        b4 += b3 >> BITS_PER_LIMB;
+
+        b0 &= LIMB_MASK;
+        b1 &= LIMB_MASK;
+        b2 &= LIMB_MASK;
+        b3 &= LIMB_MASK;
+
+        long c0, c1, c2, c3, c4;
+        c0 = modulus[0] + b0;
+        c1 = modulus[1] + b1 + (c0 >> BITS_PER_LIMB);
+        c0 &= LIMB_MASK;
+        c2 = modulus[2] + b2 + (c1 >> BITS_PER_LIMB);
+        c1 &= LIMB_MASK;
+        c3 = modulus[3] + b3 + (c2 >> BITS_PER_LIMB);
+        c2 &= LIMB_MASK;
+        c4 = modulus[4] + b4 + (c3 >> BITS_PER_LIMB);
+        c3 &= LIMB_MASK;
+
+        long mask = b4 >> BITS_PER_LIMB; // Signed shift!
+
+        limbs[0] = (b0 & ~mask) | (c0 & mask);
+        limbs[1] = (b1 & ~mask) | (c1 & mask);
+        limbs[2] = (b2 & ~mask) | (c2 & mask);
+        limbs[3] = (b3 & ~mask) | (c3 & mask);
+        limbs[4] = (b4 & ~mask) | (c4 & mask);
+    }
+
+    public ImmutableElement getElement(byte[] v, int offset, int length,
+            byte highByte) {
+
+        long[] vLimbs = new long[NUM_LIMBS];
+        long[] montLimbs = new long[NUM_LIMBS];
+        super.encode(v, offset, length, highByte, vLimbs);
+
+        // Convert to Montgomery domain
+        int numAdds = mult(vLimbs, h, montLimbs);
+        return new ImmutableElement(montLimbs, numAdds);
+    }
+
+    /*
+     * This function 'moves/reduces' digit 'v' to the 'lower' limbs
+     *
+     * The result is not reduced further. Carry propagation is not performed
+     * (see IntegerPolynomial.reduceHigh() for how this method is used)
+     *
+     * Proof:
+     *   v * 2^(i*52) (mod p) ==  v * 2^(52i) - v * 2^(52i-256) * p                               (mod p)
+     *                        ==  v * 2^(52i) - v * 2^(52i-256) * (2^256 -2^224 +2^192 +2^96 -1)  (mod p)
+     *                        ==  v * 2^(52i) - v * [2^(52i-256+256) -2^(52i-256+224) +2^(52i-256+192) +2^(52i-256+96) -2^(52i-256)] (mod p)
+     *                        ==  v * 2^(52i) - v * [2^(52i) -2^(52i-32) +2^(52i-64) +2^(52i-160) -2^(52i-256)]                      (mod p)
+     *
+     *                        ==  v * [2^(52i-32) +2^(52i-52-12) +2^(52i-3*52-4) -2^(52i-4*52-48)] (mod p)
+     */
+    @Override
+    protected void reduceIn(long[] limbs, long v, int i) {
+        // Since top term (2^(52i-32)) will leave top 20 bits back in the same
+        // position i,
+        // "repeat same reduction on top 20 bits"
+        v += v >> 32;
+
+        // 2^(52i-32)
+        limbs[i - 1] += (v << 20) & LIMB_MASK;
+
+        // 2^(52i-52-12)
+        limbs[i - 2] -= (v << 40) & LIMB_MASK;
+        limbs[i - 1] -= v >> 12;
+
+        // 2^(52i-3*52-4)
+        limbs[i - 4] -= (v << 48) & LIMB_MASK;
+        limbs[i - 3] -= v >> 4;
+
+        // 2^(52i-4*52-48)
+        limbs[i - 5] += (v << 4) & LIMB_MASK;
+        limbs[i - 4] += v >> 48;
+    }
+}
\ No newline at end of file
diff --git a/test/jdk/com/sun/security/ec/ECOperationsFuzzTest.java b/test/jdk/com/sun/security/ec/ECOperationsFuzzTest.java
new file mode 100644
index 0000000000000..31d83815ed894
--- /dev/null
+++ b/test/jdk/com/sun/security/ec/ECOperationsFuzzTest.java
@@ -0,0 +1,171 @@
+/*
+ * Copyright (c) 2024, Intel Corporation. All rights reserved.
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+import java.util.Random;
+import java.math.BigInteger;
+import java.lang.reflect.Field;
+import java.security.spec.ECParameterSpec;
+import sun.security.ec.ECOperations;
+import sun.security.util.ECUtil;
+import sun.security.util.NamedCurve;
+import sun.security.util.CurveDB;
+import sun.security.ec.point.*;
+import java.security.spec.ECPoint;
+import sun.security.util.KnownOIDs;
+import sun.security.util.math.IntegerMontgomeryFieldModuloP;
+import sun.security.util.math.intpoly.*;
+
+/*
+ * @test
+ * @key randomness
+ * @modules java.base/sun.security.ec java.base/sun.security.ec.point
+ *          java.base/sun.security.util java.base/sun.security.util.math
+ *          java.base/sun.security.util.math.intpoly
+ * @run main/othervm/timeout=1200 --add-opens
+ *      java.base/sun.security.ec=ALL-UNNAMED -XX:+UnlockDiagnosticVMOptions
+ *      -XX:-UseIntPolyIntrinsics ECOperationsFuzzTest
+ * @summary Unit test ECOperationsFuzzTest.
+ */
+
+/*
+ * @test
+ * @key randomness
+ * @modules java.base/sun.security.ec java.base/sun.security.ec.point
+ *          java.base/sun.security.util java.base/sun.security.util.math
+ *          java.base/sun.security.util.math.intpoly
+ * @run main/othervm/timeout=1200 --add-opens
+ *      java.base/sun.security.ec=ALL-UNNAMED -XX:+UnlockDiagnosticVMOptions
+ *      -XX:+UseIntPolyIntrinsics ECOperationsFuzzTest
+ * @summary Unit test ECOperationsFuzzTest.
+ */
+
+// This test case is NOT entirely deterministic, it uses a random seed for
+// pseudo-random number generator. If a failure occurs, hardcode the seed to
+// make the test case deterministic
+public class ECOperationsFuzzTest {
+    public static void main(String[] args) throws Exception {
+        // Note: it might be useful to increase this number during development
+        final int repeat = 10000;
+        test(repeat);
+        System.out.println("Fuzz Success");
+    }
+
+    private static void check(MutablePoint reference, MutablePoint testValue,
+            long seed, int iter) {
+        AffinePoint affineRef = reference.asAffine();
+        AffinePoint affine = testValue.asAffine();
+        if (!affineRef.equals(affine)) {
+            throw new RuntimeException(
+                    "Found error with seed " + seed + "at iteration " + iter);
+        }
+    }
+
+    public static void test(int repeat) throws Exception {
+        Random rnd = new Random();
+        long seed = rnd.nextLong();
+        rnd.setSeed(seed);
+
+        int keySize = 256;
+        ECParameterSpec params = ECUtil.getECParameterSpec(keySize);
+        NamedCurve curve = CurveDB.lookup(KnownOIDs.secp256r1.value());
+        ECPoint generator = curve.getGenerator();
+        BigInteger b = curve.getCurve().getB();
+        if (params == null || generator == null) {
+            throw new RuntimeException(
+                    "No EC parameters available for key size " + keySize + " bits");
+        }
+
+        ECOperations ops = ECOperations.forParameters(params).get();
+        ECOperations opsReference = new ECOperations(
+                IntegerPolynomialP256.ONE.getElement(b), P256OrderField.ONE);
+
+        boolean instanceTest1 = ops
+                .getField() instanceof IntegerMontgomeryFieldModuloP;
+        boolean instanceTest2 = opsReference
+                .getField() instanceof IntegerMontgomeryFieldModuloP;
+        if (instanceTest1 == false || instanceTest2 == true) {
+            throw new RuntimeException("Bad Initialization: ["
+                + instanceTest1 + "," + instanceTest2 + "]");
+        }
+
+        byte[] multiple = new byte[keySize / 8];
+        rnd.nextBytes(multiple);
+        multiple[keySize/8 - 1] &= 0x7f; // from opsReference.seedToScalar(multiple);
+
+        MutablePoint referencePoint = opsReference.multiply(generator, multiple);
+        MutablePoint point = ops.multiply(generator, multiple);
+        check(referencePoint, point, seed, -1);
+
+        AffinePoint refAffineGenerator = AffinePoint.fromECPoint(generator,
+                referencePoint.getField());
+        AffinePoint montAffineGenerator = AffinePoint.fromECPoint(generator,
+                point.getField());
+
+        MutablePoint refProjGenerator = new ProjectivePoint.Mutable(
+                refAffineGenerator.getX(false).mutable(),
+                refAffineGenerator.getY(false).mutable(),
+                referencePoint.getField().get1().mutable());
+
+        MutablePoint projGenerator = new ProjectivePoint.Mutable(
+                montAffineGenerator.getX(false).mutable(),
+                montAffineGenerator.getY(false).mutable(),
+                point.getField().get1().mutable());
+
+        for (int i = 0; i < repeat; i++) {
+            rnd.nextBytes(multiple);
+            multiple[keySize/8 - 1] &= 0x7f; // opsReference.seedToScalar(multiple);
+
+            MutablePoint nextReferencePoint = opsReference
+                    .multiply(referencePoint.asAffine(), multiple);
+            MutablePoint nextPoint = ops.multiply(point.asAffine().toECPoint(),
+                    multiple);
+            check(nextReferencePoint, nextPoint, seed, i);
+
+            if (rnd.nextBoolean()) {
+                opsReference.setSum(nextReferencePoint, referencePoint);
+                ops.setSum(nextPoint, point);
+                check(nextReferencePoint, nextPoint, seed, i);
+            }
+
+            if (rnd.nextBoolean()) {
+                opsReference.setSum(nextReferencePoint, refProjGenerator);
+                ops.setSum(nextPoint, projGenerator);
+                check(nextReferencePoint, nextPoint, seed, i);
+            }
+
+            if (rnd.nextInt(100) < 10) { // 10% Reset point to generator, test
+                                         // generator multiplier
+                referencePoint = opsReference.multiply(generator, multiple);
+                point = ops.multiply(generator, multiple);
+                check(referencePoint, point, seed, i);
+            } else {
+                referencePoint = nextReferencePoint;
+                point = nextPoint;
+            }
+        }
+    }
+
+}
+
+// make test TEST="test/jdk/com/sun/security/ec/ECOperationsFuzzTest.java"
\ No newline at end of file
diff --git a/test/jdk/com/sun/security/ec/ECOperationsKATTest.java b/test/jdk/com/sun/security/ec/ECOperationsKATTest.java
new file mode 100644
index 0000000000000..3c98b5f63cdbf
--- /dev/null
+++ b/test/jdk/com/sun/security/ec/ECOperationsKATTest.java
@@ -0,0 +1,253 @@
+/*
+ * Copyright (c) 2024, Intel Corporation. All rights reserved.
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+import java.util.Random;
+import java.util.List;
+import java.util.LinkedList;
+import java.math.BigInteger;
+import java.lang.reflect.Field;
+import java.security.spec.ECParameterSpec;
+import sun.security.ec.ECOperations;
+import sun.security.util.ECUtil;
+import sun.security.util.NamedCurve;
+import sun.security.util.CurveDB;
+import sun.security.ec.point.*;
+import java.security.spec.ECPoint;
+import sun.security.util.KnownOIDs;
+import sun.security.util.math.IntegerMontgomeryFieldModuloP;
+import sun.security.util.math.intpoly.*;
+
+/*
+ * @test
+ * @modules java.base/sun.security.ec java.base/sun.security.ec.point
+ *          java.base/sun.security.util java.base/sun.security.util.math
+ *          java.base/sun.security.util.math.intpoly
+ * @run main/othervm --add-opens java.base/sun.security.ec=ALL-UNNAMED
+ *      ECOperationsKATTest
+ * @summary Unit test ECOperationsKATTest.
+ */
+
+/*
+ * @test
+ * @modules java.base/sun.security.ec java.base/sun.security.ec.point
+ *          java.base/sun.security.util java.base/sun.security.util.math
+ *          java.base/sun.security.util.math.intpoly
+ * @run main/othervm -XX:+UnlockDiagnosticVMOptions -Xcomp
+ *      -XX:-TieredCompilation --add-opens java.base/sun.security.ec=ALL-UNNAMED
+ *      -XX:+UnlockDiagnosticVMOptions ECOperationsKATTest
+ * @summary Unit test ECOperationsKATTest.
+ */
+
+ public class ECOperationsKATTest {
+    final private static java.util.HexFormat hex = java.util.HexFormat.of();
+
+    public static void main(String args[]) throws Exception {
+        int testsPassed = 0;
+        int testNumber = 0;
+
+        for (TestData test : testList) {
+            System.out.println("*** Test " + ++testNumber + ": " + test.testName);
+            if (runSingleTest(test)) {
+                testsPassed++;
+            }
+        }
+        System.out.println();
+
+        if (testsPassed != testNumber) {
+            throw new RuntimeException(
+                    "One or more tests failed. Check output for details");
+        }
+    }
+
+    private static boolean check(MutablePoint testValue, ECPoint reference) {
+        AffinePoint affine = testValue.asAffine();
+        BigInteger x = affine.getX().asBigInteger();
+        BigInteger y = affine.getY().asBigInteger();
+        BigInteger refX = reference.getAffineX();
+        BigInteger refY = reference.getAffineY();
+
+        if (!refX.equals(x) || !refY.equals(y)) {
+            System.out.println("ERROR - Output Mismatch!");
+            System.out.println("Expected: X: " + refX.toString(16) + " Y: "
+                    + refY.toString(16));
+            System.out.println(
+                    "Result:   X: " + x.toString(16) + " Y: " + y.toString(16));
+            return false;
+        }
+        return true;
+    }
+
+    private static class TestData {
+        public TestData(String name, String keyStr, String xStr1, String yStr1,
+                String xStr2, String yStr2) {
+            testName = name;
+            // multiplier = (new BigInteger(keyStr, 16)).toByteArray();
+            multiplier = hex.parseHex(keyStr);
+            sun.security.util.ArrayUtil.reverse(multiplier);
+            reference1 = new ECPoint(new BigInteger(xStr1, 16),
+                    new BigInteger(yStr1, 16));
+            reference2 = new ECPoint(new BigInteger(xStr2, 16),
+                    new BigInteger(yStr2, 16));
+        }
+
+        String testName;
+        byte[] multiplier;
+        ECPoint reference1; // For generator multiplier test
+        ECPoint reference2; // For non-generator multiplier test
+    }
+
+    public static final List<TestData> testList = new LinkedList<TestData>() {{
+    // (x1,y1) = mult*generator
+    // (x2,y2) = mult*mult*generator
+    add(new TestData("Test Vector #1",
+        "0000000000000000000000000000000000000000000000000000000000000012", // mult
+        "1057E0AB5780F470DEFC9378D1C7C87437BB4C6F9EA55C63D936266DBD781FDA", // x1
+        "F6F1645A15CBE5DC9FA9B7DFD96EE5A7DCC11B5C5EF4F1F78D83B3393C6A45A2", // y1
+        "4954047A366A91E3FD94E574DB6F2B04F3A8465883DBC55A816EA563BF54A324", // x2
+        "B5A54786FD9EA48C9FC38A0557B0C4D54F285908A7291B630D06BEE970F530D3") // y2
+    );
+    add(new TestData("Test Vector #2",
+        "1200000000000000000000000000000000000000000000000000000000000000", // mult
+        "DF684E6D0D57AF8B89DA11E8F7436C3D360F531D62BDCE42C5A8B72D73D5C717", // x
+        "9D3576BD03C09B8F416EE9C27D70AD4A425119271ACF549312CA48758F4E1FEC", // y
+        "57C8257EEAABF5446DCFACB99DEE104367B6C9950C76797C372EB177D5FA23B3", // x
+        "1CD3E8A34521C1C8E574EB4B99343CAA57E00725D8618F0231C7C79AA6837725") // y
+    );
+    add(new TestData("Test Vector #3",
+        "0000000000000000000000000000000120000000000000000000000000000012", // mult
+        "A69DFD47B24485E5F523BDA5FBACF03F5A7C3D22E0C2BC6705594B7B051A06D0", // x
+        "ECF19629416BE5C9AF1E30988F3AA8B803809CF4D12944EB49C5E9892723798A", // y
+        "1E28559F5B681C308632EE11A007B9891B3FD592C982C4926153795794295E58", // x
+        "3C373046C27BB34609A43C91DF6D4B9AB9EB08F3B69A8F8FAE944211D8297F30") // y
+    );
+    add(new TestData("Test Vector #4",
+        "0000000000000000000000000000000000000000000000000000000000000001", // mult
+        "6B17D1F2E12C4247F8BCE6E563A440F277037D812DEB33A0F4A13945D898C296", // x
+        "4FE342E2FE1A7F9B8EE7EB4A7C0F9E162BCE33576B315ECECBB6406837BF51F5", // y
+        "6B17D1F2E12C4247F8BCE6E563A440F277037D812DEB33A0F4A13945D898C296", // x
+        "4FE342E2FE1A7F9B8EE7EB4A7C0F9E162BCE33576B315ECECBB6406837BF51F5") // y
+    );
+    add(new TestData("Test Vector #5",
+        "EFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF", // mult
+        "66B71D0BD47344197CCFB0C9578EAF0ADB609E05BB4E8F87D56BD34F24EE7C47", // x
+        "14A0ECB7F708C02B2BAE238D2C4607BB9D04FCE64E10A428C911D6FA25B2F0FD", // y
+        "D25AAFD0FCC5B5E95C84C0702C138BC4D7FEB4E5F9C2DFB4301E313507EFDF44", // x
+        "F3F04EBC7D308511B0392BB7171CF92688D6484A95A8100EDFC933613A359133") // y
+    );
+    add(new TestData("Test Vector #6",
+        "1111111111111111111111111111111111111111111111111111111111111111", // mult
+        "0217E617F0B6443928278F96999E69A23A4F2C152BDF6D6CDF66E5B80282D4ED", // x
+        "194A7DEBCB97712D2DDA3CA85AA8765A56F45FC758599652F2897C65306E5794", // y
+        "A83A07D6AE918359DEBCC385DA1E416EB83417435079CA8DB06005E107C309A0", // x
+        "5AACDF816850C33EB3E54F3D0DD759B97B5E7065B2060016F73735E4A6AADE23") // y
+    );
+    }};
+
+    private static boolean runSingleTest(TestData testData) {
+        int keySize = 256;
+        ECParameterSpec params = ECUtil.getECParameterSpec(keySize);
+        NamedCurve curve = CurveDB.lookup(KnownOIDs.secp256r1.value());
+        ECPoint generator = curve.getGenerator();
+        BigInteger b = curve.getCurve().getB();
+        if (params == null || generator == null) {
+            throw new RuntimeException(
+                    "No EC parameters available for key size " + keySize + " bits");
+        }
+
+        ECOperations ops = ECOperations.forParameters(params).get();
+        ECOperations opsReference = new ECOperations(
+                IntegerPolynomialP256.ONE.getElement(b), P256OrderField.ONE);
+
+        boolean instanceTest1 = ops
+                .getField() instanceof IntegerMontgomeryFieldModuloP;
+        boolean instanceTest2 = opsReference
+                .getField() instanceof IntegerMontgomeryFieldModuloP;
+        if (instanceTest1 == false || instanceTest2 == true) {
+            throw new RuntimeException("Bad Initialization: [" + instanceTest1 + ","
+                    + instanceTest2 + "]");
+        }
+
+        MutablePoint nextPoint = ops.multiply(generator, testData.multiplier);
+        MutablePoint nextReferencePoint = opsReference.multiply(generator,
+                testData.multiplier);
+        if (!check(nextReferencePoint, testData.reference1)
+                || !check(nextPoint, testData.reference1)) {
+            return false;
+        }
+
+        nextPoint = ops.multiply(nextPoint.asAffine(), testData.multiplier);
+        nextReferencePoint = opsReference.multiply(nextReferencePoint.asAffine(),
+                testData.multiplier);
+        if (!check(nextReferencePoint, testData.reference2)
+                || !check(nextPoint, testData.reference2)) {
+            return false;
+        }
+
+        return true;
+    }
+}
+
+//make test TEST="test/jdk/com/sun/security/ec/ECOperationsKATTest.java"
+
+/*
+ * KAT generator using OpenSSL for reference vectors
+ * g++ ecpoint.cpp -g -lcrypto -Wno-deprecated-declarations && ./a.out
+ * (Some OpenSSL EC operations are marked internal i.e. deprecated)
+ *
+
+#include <openssl/obj_mac.h>
+#include <openssl/ec.h>
+
+void check(int rc, const char* locator) {
+  if (rc != 1) {
+    printf("Failed at %s\n", locator);
+    exit(55);
+  }
+}
+
+int main(){
+  BN_CTX* ctx = BN_CTX_new();
+  BIGNUM* k = BN_CTX_get(ctx);
+  BIGNUM* x1 = BN_CTX_get(ctx);
+  BIGNUM* y1 = BN_CTX_get(ctx);
+  BIGNUM* x2 = BN_CTX_get(ctx);
+  BIGNUM* y2 = BN_CTX_get(ctx);
+  EC_GROUP *ec_group = EC_GROUP_new_by_curve_name(NID_X9_62_prime256v1);
+  EC_POINT* pubkey = EC_POINT_new(ec_group);
+  EC_POINT* pubkey2 = EC_POINT_new(ec_group);
+  int rc;
+
+  rc = BN_hex2bn(&k, "1111111111111111111111111111111111111111111111111111111111111111"); //check(rc, "set raw key");
+  rc = EC_POINT_mul(ec_group, pubkey, k, NULL, NULL, ctx);  check(rc, "mult public key");
+  rc = EC_POINT_get_affine_coordinates(ec_group, pubkey, x1, y1, ctx);   check(rc, "get affine coordinates");
+  rc = EC_POINT_mul(ec_group, pubkey2, NULL, pubkey, k, ctx);  check(rc, "mult public key");
+  rc = EC_POINT_get_affine_coordinates(ec_group, pubkey2, x2, y2, ctx);   check(rc, "get affine coordinates");
+  printf("k: %s\n", BN_bn2hex(k));
+  printf("x: %s\ny: %s\n", BN_bn2hex(x1), BN_bn2hex(y1));
+  printf("x: %s\ny: %s\n", BN_bn2hex(x2), BN_bn2hex(y2));
+
+  BN_CTX_free(ctx);
+  return 0;
+}
+ */
\ No newline at end of file
diff --git a/test/jdk/com/sun/security/util/math/intpoly/IntegerPolynomialTest.java b/test/jdk/com/sun/security/util/math/intpoly/IntegerPolynomialTest.java
new file mode 100644
index 0000000000000..237c0408c580c
--- /dev/null
+++ b/test/jdk/com/sun/security/util/math/intpoly/IntegerPolynomialTest.java
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2024, Intel Corporation. All rights reserved.
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+import java.util.Random;
+import java.math.BigInteger;
+import java.util.Arrays;
+import sun.security.util.math.*;
+import sun.security.util.math.intpoly.*;
+
+/*
+ * @test
+ * @key randomness
+ * @modules java.base/sun.security.util java.base/sun.security.util.math
+ * java.base/sun.security.util.math.intpoly
+ * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:-UseIntPolyIntrinsics
+ * IntegerPolynomialTest
+ * @summary Unit test
+ * IntegerPolynomial.MutableIntegerModuloP.conditionalAssign().
+ */
+
+/*
+ * @test
+ * @key randomness
+ * @modules java.base/sun.security.util java.base/sun.security.util.math
+ * java.base/sun.security.util.math.intpoly
+ * @run main/othervm -XX:+UnlockDiagnosticVMOptions -Xcomp
+ * -XX:-TieredCompilation -XX:+UseIntPolyIntrinsics IntegerPolynomialTest
+ * @summary Unit test
+ * IntegerPolynomial.MutableIntegerModuloP.conditionalAssign().
+ */
+
+// This test case is NOT entirely deterministic, it uses a random seed for
+// pseudo-random number generator. If a failure occurs, hardcode the seed to
+// make the test case deterministic
+public class IntegerPolynomialTest {
+    public static void main(String[] args) throws Exception {
+        Random rnd = new Random();
+        long seed = rnd.nextLong();
+        rnd.setSeed(seed);
+
+        IntegerPolynomial testFields[] = new IntegerPolynomial[] {
+                IntegerPolynomial1305.ONE, IntegerPolynomial25519.ONE,
+                IntegerPolynomial448.ONE, IntegerPolynomialP256.ONE,
+                MontgomeryIntegerPolynomialP256.ONE, IntegerPolynomialP384.ONE,
+                IntegerPolynomialP521.ONE,
+                new IntegerPolynomialModBinP.Curve25519OrderField(),
+                new IntegerPolynomialModBinP.Curve448OrderField(),
+                P256OrderField.ONE, P384OrderField.ONE, P521OrderField.ONE,
+                Curve25519OrderField.ONE, Curve448OrderField.ONE };
+
+        for (IntegerPolynomial field : testFields) {
+            ImmutableIntegerModuloP aRef = field
+                    .getElement(new BigInteger(32 * 64, rnd));
+            MutableIntegerModuloP a = aRef.mutable();
+            ImmutableIntegerModuloP bRef = field
+                    .getElement(new BigInteger(32 * 64, rnd));
+            MutableIntegerModuloP b = bRef.mutable();
+
+            a.conditionalSet(b, 0); // Don't assign
+            if (Arrays.equals(a.getLimbs(), b.getLimbs())) {
+                throw new RuntimeException(
+                        "[SEED " + seed + "]: Incorrect assign for " + field);
+            }
+            a.conditionalSet(b, 1); // Assign
+            if (!Arrays.equals(a.getLimbs(), b.getLimbs())) {
+                throw new RuntimeException(
+                        "[SEED " + seed + "]: Incorrect assign for " + field);
+            }
+        }
+        System.out.println("Test Success");
+    }
+}
+
+//make test TEST="test/jdk/com/sun/security/util/math/intpoly/IntegerPolynomialTest.java"
\ No newline at end of file
diff --git a/test/jdk/com/sun/security/util/math/intpoly/MontgomeryPolynomialFuzzTest.java b/test/jdk/com/sun/security/util/math/intpoly/MontgomeryPolynomialFuzzTest.java
new file mode 100644
index 0000000000000..da5aa33d8310d
--- /dev/null
+++ b/test/jdk/com/sun/security/util/math/intpoly/MontgomeryPolynomialFuzzTest.java
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2024, Intel Corporation. All rights reserved.
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+import java.util.Random;
+import sun.security.util.math.IntegerMontgomeryFieldModuloP;
+import sun.security.util.math.ImmutableIntegerModuloP;
+import java.math.BigInteger;
+import sun.security.util.math.intpoly.*;
+
+/*
+ * @test
+ * @key randomness
+ * @modules java.base/sun.security.util java.base/sun.security.util.math
+ *          java.base/sun.security.util.math.intpoly
+ * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:-UseIntPolyIntrinsics
+ *      MontgomeryPolynomialFuzzTest
+ * @summary Unit test MontgomeryPolynomialFuzzTest.
+ */
+
+/*
+ * @test
+ * @key randomness
+ * @modules java.base/sun.security.util java.base/sun.security.util.math
+ *          java.base/sun.security.util.math.intpoly
+ * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UseIntPolyIntrinsics
+ *      MontgomeryPolynomialFuzzTest
+ * @summary Unit test MontgomeryPolynomialFuzzTest.
+ */
+
+// This test case is NOT entirely deterministic, it uses a random seed for pseudo-random number generator
+// If a failure occurs, hardcode the seed to make the test case deterministic
+public class MontgomeryPolynomialFuzzTest {
+    public static void main(String[] args) throws Exception {
+        // Note: it might be useful to increase this number during development
+        final int repeat = 1000000;
+        for (int i = 0; i < repeat; i++) {
+            run();
+        }
+        System.out.println("Fuzz Success");
+    }
+
+    private static void check(BigInteger reference,
+            ImmutableIntegerModuloP testValue, long seed) {
+        if (!reference.equals(testValue.asBigInteger())) {
+            throw new RuntimeException("SEED: " + seed);
+        }
+    }
+
+    public static void run() throws Exception {
+        Random rnd = new Random();
+        long seed = rnd.nextLong();
+        rnd.setSeed(seed);
+
+        IntegerMontgomeryFieldModuloP montField = MontgomeryIntegerPolynomialP256.ONE;
+        BigInteger P = MontgomeryIntegerPolynomialP256.ONE.MODULUS;
+        BigInteger r = BigInteger.ONE.shiftLeft(260).mod(P);
+        BigInteger rInv = r.modInverse(P);
+        BigInteger aRef = (new BigInteger(P.bitLength(), rnd)).mod(P);
+
+        // Test conversion to montgomery domain
+        ImmutableIntegerModuloP a = montField.getElement(aRef);
+        aRef = aRef.multiply(r).mod(P);
+        check(aRef, a, seed);
+
+        if (rnd.nextBoolean()) {
+            aRef = aRef.multiply(aRef).multiply(rInv).mod(P);
+            a = a.multiply(a);
+            check(aRef, a, seed);
+        }
+
+        if (rnd.nextBoolean()) {
+            aRef = aRef.add(aRef).mod(P);
+            a = a.add(a);
+            check(aRef, a, seed);
+        }
+    }
+}
+
+//make test TEST="test/jdk/com/sun/security/util/math/intpoly/MontgomeryPolynomialFuzzTest.java"
\ No newline at end of file
diff --git a/test/micro/org/openjdk/bench/javax/crypto/full/PolynomialP256Bench.java b/test/micro/org/openjdk/bench/javax/crypto/full/PolynomialP256Bench.java
new file mode 100644
index 0000000000000..94c247c908022
--- /dev/null
+++ b/test/micro/org/openjdk/bench/javax/crypto/full/PolynomialP256Bench.java
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package org.openjdk.bench.javax.crypto.full;
+
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.annotations.Benchmark;
+import java.math.BigInteger;
+import java.util.concurrent.TimeUnit;
+import sun.security.util.math.intpoly.MontgomeryIntegerPolynomialP256;
+import sun.security.util.math.intpoly.IntegerPolynomialP256;
+import sun.security.util.math.MutableIntegerModuloP;
+import sun.security.util.math.ImmutableIntegerModuloP;
+
+@Fork(jvmArgsAppend = {"-XX:+AlwaysPreTouch",
+    "--add-exports", "java.base/sun.security.util.math.intpoly=ALL-UNNAMED",
+    "--add-exports", "java.base/sun.security.util.math=ALL-UNNAMED"}, value = 1)
+@Warmup(iterations = 3, time = 3)
+@Measurement(iterations = 8, time = 2)
+@OutputTimeUnit(TimeUnit.SECONDS)
+@State(Scope.Thread)
+@BenchmarkMode(Mode.Throughput)
+public class PolynomialP256Bench {
+    final MontgomeryIntegerPolynomialP256 montField = MontgomeryIntegerPolynomialP256.ONE;
+    final IntegerPolynomialP256 residueField = IntegerPolynomialP256.ONE;
+    final BigInteger refx =
+        new BigInteger("6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296", 16);
+    final ImmutableIntegerModuloP x = residueField.getElement(refx);
+    final ImmutableIntegerModuloP X = montField.getElement(refx);
+    final ImmutableIntegerModuloP one = montField.get1();
+
+    @Param({"true", "false"})
+    private boolean isMontBench;
+
+    @Benchmark
+    public MutableIntegerModuloP benchMultiply() {
+        MutableIntegerModuloP test;
+        if (isMontBench) {
+            test = X.mutable();
+        } else {
+            test = x.mutable();
+        }
+
+        for (int i = 0; i< 10000; i++) {
+            test = test.setProduct(test);
+        }
+        return test;
+    }
+
+    @Benchmark
+    public MutableIntegerModuloP benchSquare() {
+        MutableIntegerModuloP test;
+        if (isMontBench) {
+            test = X.mutable();
+        } else {
+            test = x.mutable();
+        }
+
+        for (int i = 0; i< 10000; i++) {
+            test = test.setSquare();
+        }
+        return test;
+    }
+
+    @Benchmark
+    public MutableIntegerModuloP benchAssign() {
+        MutableIntegerModuloP test1 = X.mutable();
+        MutableIntegerModuloP test2 = one.mutable();
+        for (int i = 0; i< 10000; i++) {
+            test1.conditionalSet(test2, 0);
+            test1.conditionalSet(test2, 1);
+            test2.conditionalSet(test1, 0);
+            test2.conditionalSet(test1, 1);
+        }
+        return test2;
+    }
+}

From a0c5714dbc8a60d905f9deea153e7f31fbd64d06 Mon Sep 17 00:00:00 2001
From: Nizar Benalla <nbenalla@openjdk.org>
Date: Wed, 22 May 2024 16:31:17 +0000
Subject: [PATCH 8/9] 8332071: Convert package.html files in
 `java.management.rmi` to package-info.java 8332376: Add `@since` tags to
 `java.management.rmi`

Reviewed-by: kevinw, rriggs
---
 .../remote/rmi/RMIConnectorServer.java        |  36 +-
 .../management/remote/rmi/package-info.java   | 326 +++++++++++++++++
 .../javax/management/remote/rmi/package.html  | 329 ------------------
 3 files changed, 346 insertions(+), 345 deletions(-)
 create mode 100644 src/java.management.rmi/share/classes/javax/management/remote/rmi/package-info.java
 delete mode 100644 src/java.management.rmi/share/classes/javax/management/remote/rmi/package.html

diff --git a/src/java.management.rmi/share/classes/javax/management/remote/rmi/RMIConnectorServer.java b/src/java.management.rmi/share/classes/javax/management/remote/rmi/RMIConnectorServer.java
index 139b7653424c8..0502cb8a971ff 100644
--- a/src/java.management.rmi/share/classes/javax/management/remote/rmi/RMIConnectorServer.java
+++ b/src/java.management.rmi/share/classes/javax/management/remote/rmi/RMIConnectorServer.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -102,21 +102,23 @@ public class RMIConnectorServer extends JMXConnectorServer {
         "jmx.remote.rmi.server.socket.factory";
 
     /**
-    * Name of the attribute that specifies an
-    * {@link ObjectInputFilter} pattern string to filter classes acceptable
-    * for {@link RMIServer#newClient(java.lang.Object) RMIServer.newClient()}
-    * remote method call.
-    * <p>
-    * The filter pattern must be in same format as used in
-    * {@link java.io.ObjectInputFilter.Config#createFilter}
-    * <p>
-    * This list of classes allowed by filter should correspond to the
-    * transitive closure of the credentials class (or classes) used by the
-    * installed {@linkplain JMXAuthenticator} associated with the
-    * {@linkplain RMIServer} implementation.
-    * If the attribute is not set then any class is deemed acceptable.
-    * @see ObjectInputFilter
-    */
+     * Name of the attribute that specifies an
+     * {@link ObjectInputFilter} pattern string to filter classes acceptable
+     * for {@link RMIServer#newClient(java.lang.Object) RMIServer.newClient()}
+     * remote method call.
+     * <p>
+     * The filter pattern must be in same format as used in
+     * {@link java.io.ObjectInputFilter.Config#createFilter}
+     * <p>
+     * This list of classes allowed by filter should correspond to the
+     * transitive closure of the credentials class (or classes) used by the
+     * installed {@linkplain JMXAuthenticator} associated with the
+     * {@linkplain RMIServer} implementation.
+     * If the attribute is not set then any class is deemed acceptable.
+     * @see ObjectInputFilter
+     *
+     * @since 10
+     */
     public static final String CREDENTIALS_FILTER_PATTERN =
         "jmx.remote.rmi.server.credentials.filter.pattern";
 
@@ -152,6 +154,8 @@ public class RMIConnectorServer extends JMXConnectorServer {
      * an allow-list that is too narrow or a reject-list that is too wide may
      * prevent legitimate clients from interoperating with the
      * {@code JMXConnectorServer}.
+     *
+     * @since 10
      */
     public static final String SERIAL_FILTER_PATTERN =
        "jmx.remote.rmi.server.serial.filter.pattern";
diff --git a/src/java.management.rmi/share/classes/javax/management/remote/rmi/package-info.java b/src/java.management.rmi/share/classes/javax/management/remote/rmi/package-info.java
new file mode 100644
index 0000000000000..d26c797e6888e
--- /dev/null
+++ b/src/java.management.rmi/share/classes/javax/management/remote/rmi/package-info.java
@@ -0,0 +1,326 @@
+/*
+ * Copyright (c) 2002, 2024, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ *    <p>The RMI connector is a connector for the JMX Remote API that
+ *      uses RMI to transmit client requests to a remote MBean server.
+ *      This package defines the classes that the user of an RMI
+ *      connector needs to reference directly, for both the client and
+ *      server sides.  It also defines certain classes that the user
+ *      will not usually reference directly, but that must be defined so
+ *      that different implementations of the RMI connector can
+ *      interoperate.</p>
+ *
+ *    <p>The RMI connector supports the JRMP transport for RMI.</p>
+ *
+ *    <p>Like most connectors in the JMX Remote API, an RMI connector
+ *      usually has an address, which
+ *      is a {@link javax.management.remote.JMXServiceURL
+ *      JMXServiceURL}.  The protocol part of this address is
+ *      <code>rmi</code> for a connector that uses the default RMI
+ *      transport (JRMP).</p>
+ *
+ *    <p>There are two forms for RMI connector addresses:</p>
+ *
+ *    <ul>
+ *      <li>
+ *  In the <em>JNDI form</em>, the URL indicates <em>where to find
+ *  an RMI stub for the connector</em>.  This RMI stub is a Java
+ *  object of type {@link javax.management.remote.rmi.RMIServer
+ *  RMIServer} that gives remote access to the connector server.
+ *  With this address form, the RMI stub is obtained from an
+ *  external directory entry included in the URL.  An external
+ *  directory is any directory recognized by {@link javax.naming
+ *  JNDI}, typically the RMI registry, LDAP, or COS Naming.
+ *
+ *      <li>
+ *  In the <em>encoded form</em>, the URL directly includes the
+ *  information needed to connect to the connector server.  When
+ *  using RMI/JRMP, the encoded form is the serialized RMI stub
+ *  for the server object, encoded using BASE64 without embedded
+ *  newlines.
+ *    </ul>
+ *
+ *    <p>Addresses are covered in more detail below.</p>
+ *
+ *
+ *    <h2>Creating an RMI connector server</h2>
+ *
+ *    <p>The usual way to create an RMI connector server is to supply an
+ *      RMI connector address to the method {@link
+ *      javax.management.remote.JMXConnectorServerFactory#newJMXConnectorServer
+ *      JMXConnectorServerFactory.newJMXConnectorServer}.  The MBean
+ *      server to which the connector server is attached can be
+ *      specified as a parameter to that method.  Alternatively, the
+ *      connector server can be registered as an MBean in that MBean
+ *      server.</p>
+ *
+ *    <p>An RMI connector server can also be created by constructing an
+ *      instance of {@link
+ *      javax.management.remote.rmi.RMIConnectorServer
+ *      RMIConnectorServer}, explicitly or through the MBean server's
+ *      <code>createMBean</code> method.</p>
+ *
+ *    <h3>Choosing the RMI transport</h3>
+ *
+ *    <p>You can choose the RMI transport by specifying
+ *      <code>rmi</code> in the <code><em>protocol</em></code> part of the
+ *      <code>serviceURL</code> when creating the connector server.  You
+ *      can also create specialized connector servers by instantiating
+ *      an appropriate subclass of {@link
+ *      javax.management.remote.rmi.RMIServerImpl RMIServerImpl} and
+ *      supplying it to the <code>RMIConnectorServer</code>
+ *      constructor.</p>
+ *
+ *
+ *    <h3><a id="servergen">Connector addresses generated by the
+ *  server</a></h3>
+ *
+ *    <p>If the <code>serviceURL</code> you specify has an empty URL
+ *      path (after the optional host and port), or if you do not
+ *      specify a <code>serviceURL</code>, then the connector server
+ *      will fabricate a new <code>JMXServiceURL</code> that clients can
+ *      use to connect:</p>
+ *
+ *    <ul>
+ *
+ *      <li><p>If the <code>serviceURL</code> looks like:</p>
+ *
+ *  <pre>
+ *  <code>service:jmx:rmi://<em>host</em>:<em>port</em></code>
+ *  </pre>
+ *
+ *  <p>then the connector server will generate an {@link
+ *  javax.management.remote.rmi.RMIJRMPServerImpl
+ *  RMIJRMPServerImpl} and the returned <code>JMXServiceURL</code>
+ *  looks like:</p>
+ *
+ *  <pre>
+ *  <code>service:jmx:rmi://<em>host</em>:<em>port</em>/stub/<em>XXXX</em></code>
+ *  </pre>
+ *
+ *  <p>where <code><em>XXXX</em></code> is the serialized form of the
+ *  stub for the generated object, encoded in BASE64 without
+ *  newlines.</p>
+ *
+ *      <li><p>If there is no <code>serviceURL</code>, there must be a
+ *  user-provided <code>RMIServerImpl</code>.  The connector server
+ *        will generate a <code>JMXServiceURL</code> using the <code>rmi</code>
+ *  form.</p>
+ *
+ *    </ul>
+ *
+ *    <p>The <code><em>host</em></code> in a user-provided
+ *      <code>serviceURL</code> is optional.  If present, it is copied
+ *      into the generated <code>JMXServiceURL</code> but otherwise
+ *      ignored.  If absent, the generated <code>JXMServiceURL</code>
+ *      will have the local host name.</p>
+ *
+ *    <p>The <code><em>port</em></code> in a user-provided
+ *      <code>serviceURL</code> is also optional.  If present, it is
+ *      also copied into the generated <code>JMXServiceURL</code>;
+ *      otherwise, the generated <code>JMXServiceURL</code> has no port.
+ *      For an <code>serviceURL</code> using the <code>rmi</code>
+ *      protocol, the <code><em>port</em></code>, if present, indicates
+ *      what port the generated remote object should be exported on.  It
+ *      has no other effect.</p>
+ *
+ *    <p>If the user provides an <code>RMIServerImpl</code> rather than a
+ *      <code>JMXServiceURL</code>, then the generated
+ *      <code>JMXServiceURL</code> will have the local host name in its
+ *      <code><em>host</em></code> part and no
+ *      <code><em>port</em></code>.</p>
+ *
+ *
+ *    <h3><a id="directory">Connector addresses based on directory
+ *  entries</a></h3>
+ *
+ *    <p>As an alternative to the generated addresses just described,
+ *      the <code>serviceURL</code> address supplied when creating a
+ *      connector server can specify a <em>directory address</em> in
+ *      which to store the provided or generated <code>RMIServer</code>
+ *      stub.  This directory address is then used by both client and
+ *      server.</p>
+ *
+ *    <p>In this case, the <code>serviceURL</code> has the following form:</p>
+ *
+ *    <pre>
+ *    <code>service:jmx:rmi://<em>host</em>:<em>port</em>/jndi/<em>jndi-name</em></code>
+ *    </pre>
+ *
+ *    <p>Here, <code><em>jndi-name</em></code> is a string that can be
+ *      supplied to {@link javax.naming.InitialContext#bind
+ *      javax.naming.InitialContext.bind}.</p>
+ *
+ *    <p>As usual, the <code><em>host</em></code> and
+ *      <code>:<em>port</em></code> can be omitted.</p>
+ *
+ *    <p>The connector server will generate an
+ *      <code>RMIServerImpl</code> based on the protocol
+ *      (<code>rmi</code>) and the <code><em>port</em></code> if any.  When
+ *      the connector server is started, it will derive a stub from this
+ *      object using its {@link
+ *      javax.management.remote.rmi.RMIServerImpl#toStub toStub} method
+ *      and store the object using the given
+ *      <code><em>jndi-name</em></code>.  The properties defined by the
+ *      JNDI API are consulted as usual.</p>
+ *
+ *    <p>For example, if the <code>JMXServiceURL</code> is:
+ *
+ *      <pre>
+ *      <code>service:jmx:rmi://ignoredhost/jndi/rmi://myhost/myname</code>
+ *      </pre>
+ *
+ *      then the connector server will generate an
+ *      <code>RMIJRMPServerImpl</code> and store its stub using the JNDI
+ *      name
+ *
+ *      <pre>
+ *      <code>rmi://myhost/myname</code>
+ *      </pre>
+ *
+ *      which means entry <code>myname</code> in the RMI registry
+ *      running on the default port of host <code>myhost</code>.  Note
+ *      that the RMI registry only allows registration from the local
+ *      host.  So, in this case, <code>myhost</code> must be the name
+ *      (or a name) of the host that the connector server is running
+ *      on.
+ *
+ *    <p>In this <code>JMXServiceURL</code>, the first <code>rmi:</code>
+ *      specifies the RMI
+ *      connector, while the second <code>rmi:</code> specifies the RMI
+ *      registry.
+ *
+ *    <p>As another example, if the <code>JMXServiceURL</code> is:
+ *
+ *      <pre>
+ *      <code>service:jmx:rmi://ignoredhost/jndi/ldap://dirhost:9999/cn=this,ou=that</code>
+ *      </pre>
+ *
+ *      then the connector server will generate an
+ *      <code>RMIJRMPServerImpl</code> and store its stub using the JNDI
+ *      name
+ *
+ *      <pre>
+ *      <code>ldap://dirhost:9999/cn=this,ou=that</code>
+ *      </pre>
+ *
+ *      which means entry <code>cn=this,ou=that</code> in the LDAP
+ *      directory running on port 9999 of host <code>dirhost</code>.
+ *
+ *    <p>If the <code>JMXServiceURL</code> is:
+ *
+ *      <pre>
+ *      <code>service:jmx:rmi://ignoredhost/jndi/cn=this,ou=that</code>
+ *      </pre>
+ *
+ *      then the connector server will generate an
+ *      <code>RMIJRMPServerImpl</code> and store its stub using the JNDI
+ *      name
+ *
+ *      <pre>
+ *      <code>cn=this,ou=that</code>
+ *      </pre>
+ *
+ *      For this case to work, the JNDI API must have been configured
+ *      appropriately to supply the information about what directory to
+ *      use.
+ *
+ *    <p>In these examples, the host name <code>ignoredhost</code> is
+ *      not used by the connector server or its clients.  It can be
+ *      omitted, for example:</p>
+ *
+ *      <pre>
+ *      <code>service:jmx:rmi:///jndi/cn=this,ou=that</code>
+ *      </pre>
+ *
+ *    <p>However, it is good practice to use the name of the host
+ *      where the connector server is running.  This is often different
+ *      from the name of the directory host.</p>
+ *
+ *
+ *    <h3>Connector server attributes</h3>
+ *
+ *    <p>When using the default JRMP transport, RMI socket factories can
+ *      be specified using the attributes
+ *      <code>jmx.remote.rmi.client.socket.factory</code> and
+ *      <code>jmx.remote.rmi.server.socket.factory</code> in the
+ *      <code>environment</code> given to the
+ *      <code>RMIConnectorServer</code> constructor.  The values of these
+ *      attributes must be of type {@link
+ *      java.rmi.server.RMIClientSocketFactory} and {@link
+ *      java.rmi.server.RMIServerSocketFactory}, respectively.  These
+ *      factories are used when creating the RMI objects associated with
+ *      the connector.</p>
+ *
+ *    <h2>Creating an RMI connector client</h2>
+ *
+ *    <p>An RMI connector client is usually constructed using {@link
+ *      javax.management.remote.JMXConnectorFactory}, with a
+ *      <code>JMXServiceURL</code> that has <code>rmi</code> as its protocol.</p>
+ *
+ *    <p>If the <code>JMXServiceURL</code> was generated by the server,
+ *      as described above under <a href="#servergen">"connector
+ *      addresses generated by the server"</a>, then the client will
+ *      need to obtain it directly or indirectly from the server.
+ *      Typically, the server makes the <code>JMXServiceURL</code>
+ *      available by storing it in a file or a lookup service.</p>
+ *
+ *    <p>If the <code>JMXServiceURL</code> uses the directory syntax, as
+ *      described above under <a href="#directory">"connector addresses
+ *      based on directory entries"</a>, then the client may obtain it
+ *      as just explained, or client and server may both know the
+ *      appropriate directory entry to use.  For example, if the
+ *      connector server for the Whatsit agent uses the entry
+ *      <code>whatsit-agent-connector</code> in the RMI registry on host
+ *      <code>myhost</code>, then client and server can both know
+ *      that the appropriate <code>JMXServiceURL</code> is:</p>
+ *
+ *    <pre>
+ *    <code>service:jmx:rmi:///jndi/rmi://myhost/whatsit-agent-connector</code>
+ *    </pre>
+ *
+ *    <p>If you have an RMI stub of type {@link
+ *      javax.management.remote.rmi.RMIServer RMIServer}, you can
+ *      construct an RMI connection directly by using the appropriate
+ *      constructor of {@link javax.management.remote.rmi.RMIConnector
+ *      RMIConnector}.</p>
+ *
+ *    <h2>Dynamic code downloading</h2>
+ *
+ *    <p>If an RMI connector client or server receives from its peer an
+ *      instance of a class that it does not know, and if dynamic code
+ *      downloading is active for the RMI connection, then the class can
+ *      be downloaded from a codebase specified by the peer.
+ *      {@extLink rmi_guide Java RMI Guide} explains this in more detail.</p>
+ *
+ *    @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045,
+ *    section 6.8, "Base64 Content-Transfer-Encoding"</a>
+ *
+ *
+ *    @since 1.5
+ *
+ */
+package javax.management.remote.rmi;
diff --git a/src/java.management.rmi/share/classes/javax/management/remote/rmi/package.html b/src/java.management.rmi/share/classes/javax/management/remote/rmi/package.html
deleted file mode 100644
index 9afd476f2470b..0000000000000
--- a/src/java.management.rmi/share/classes/javax/management/remote/rmi/package.html
+++ /dev/null
@@ -1,329 +0,0 @@
-<html>
-<head>
-    <title>RMI connector</title>
-<!--
-Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved.
-DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-
-This code is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License version 2 only, as
-published by the Free Software Foundation.  Oracle designates this
-particular file as subject to the "Classpath" exception as provided
-by Oracle in the LICENSE file that accompanied this code.
-
-This code is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-version 2 for more details (a copy is included in the LICENSE file that
-accompanied this code).
-
-You should have received a copy of the GNU General Public License version
-2 along with this work; if not, write to the Free Software Foundation,
-Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-
-Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-or visit www.oracle.com if you need additional information or have any
-questions.
--->
-</head>
-<body bgcolor="white">
-    <p>The RMI connector is a connector for the JMX Remote API that
-      uses RMI to transmit client requests to a remote MBean server.
-      This package defines the classes that the user of an RMI
-      connector needs to reference directly, for both the client and
-      server sides.  It also defines certain classes that the user
-      will not usually reference directly, but that must be defined so
-      that different implementations of the RMI connector can
-      interoperate.</p>
-
-    <p>The RMI connector supports the JRMP transport for RMI.</p>
-
-    <p>Like most connectors in the JMX Remote API, an RMI connector
-      usually has an address, which
-      is a {@link javax.management.remote.JMXServiceURL
-      JMXServiceURL}.  The protocol part of this address is
-      <code>rmi</code> for a connector that uses the default RMI
-      transport (JRMP).</p>
-
-    <p>There are two forms for RMI connector addresses:</p>
-
-    <ul>
-      <li>
-	In the <em>JNDI form</em>, the URL indicates <em>where to find
-	an RMI stub for the connector</em>.  This RMI stub is a Java
-	object of type {@link javax.management.remote.rmi.RMIServer
-	RMIServer} that gives remote access to the connector server.
-	With this address form, the RMI stub is obtained from an
-	external directory entry included in the URL.  An external
-	directory is any directory recognized by {@link javax.naming
-	JNDI}, typically the RMI registry, LDAP, or COS Naming.
-
-      <li>
-	In the <em>encoded form</em>, the URL directly includes the
-	information needed to connect to the connector server.  When
-	using RMI/JRMP, the encoded form is the serialized RMI stub
-	for the server object, encoded using BASE64 without embedded
-	newlines.
-    </ul>
-
-    <p>Addresses are covered in more detail below.</p>
-
-
-    <h2>Creating an RMI connector server</h2>
-
-    <p>The usual way to create an RMI connector server is to supply an
-      RMI connector address to the method {@link
-      javax.management.remote.JMXConnectorServerFactory#newJMXConnectorServer
-      JMXConnectorServerFactory.newJMXConnectorServer}.  The MBean
-      server to which the connector server is attached can be
-      specified as a parameter to that method.  Alternatively, the
-      connector server can be registered as an MBean in that MBean
-      server.</p>
-
-    <p>An RMI connector server can also be created by constructing an
-      instance of {@link
-      javax.management.remote.rmi.RMIConnectorServer
-      RMIConnectorServer}, explicitly or through the MBean server's
-      <code>createMBean</code> method.</p>
-
-    <h3>Choosing the RMI transport</h3>
-
-    <p>You can choose the RMI transport by specifying
-      <code>rmi</code> in the <code><em>protocol</em></code> part of the
-      <code>serviceURL</code> when creating the connector server.  You
-      can also create specialized connector servers by instantiating
-      an appropriate subclass of {@link
-      javax.management.remote.rmi.RMIServerImpl RMIServerImpl} and
-      supplying it to the <code>RMIConnectorServer</code>
-      constructor.</p>
-
-
-    <h3><a id="servergen">Connector addresses generated by the
-	server</a></h3>
-
-    <p>If the <code>serviceURL</code> you specify has an empty URL
-      path (after the optional host and port), or if you do not
-      specify a <code>serviceURL</code>, then the connector server
-      will fabricate a new <code>JMXServiceURL</code> that clients can
-      use to connect:</p>
-
-    <ul>
-
-      <li><p>If the <code>serviceURL</code> looks like:</p>
-
-	<pre>
-	<code>service:jmx:rmi://<em>host</em>:<em>port</em></code>
-	</pre>
-
-	<p>then the connector server will generate an {@link
-	javax.management.remote.rmi.RMIJRMPServerImpl
-	RMIJRMPServerImpl} and the returned <code>JMXServiceURL</code>
-	looks like:</p>
-
-	<pre>
-	<code>service:jmx:rmi://<em>host</em>:<em>port</em>/stub/<em>XXXX</em></code>
-	</pre>
-
-	<p>where <code><em>XXXX</em></code> is the serialized form of the
-	stub for the generated object, encoded in BASE64 without
-	newlines.</p>
-
-      <li><p>If there is no <code>serviceURL</code>, there must be a
-	user-provided <code>RMIServerImpl</code>.  The connector server
-        will generate a <code>JMXServiceURL</code> using the <code>rmi</code>
-	form.</p>
-
-    </ul>
-
-    <p>The <code><em>host</em></code> in a user-provided
-      <code>serviceURL</code> is optional.  If present, it is copied
-      into the generated <code>JMXServiceURL</code> but otherwise
-      ignored.  If absent, the generated <code>JXMServiceURL</code>
-      will have the local host name.</p>
-
-    <p>The <code><em>port</em></code> in a user-provided
-      <code>serviceURL</code> is also optional.  If present, it is
-      also copied into the generated <code>JMXServiceURL</code>;
-      otherwise, the generated <code>JMXServiceURL</code> has no port.
-      For an <code>serviceURL</code> using the <code>rmi</code>
-      protocol, the <code><em>port</em></code>, if present, indicates
-      what port the generated remote object should be exported on.  It
-      has no other effect.</p>
-
-    <p>If the user provides an <code>RMIServerImpl</code> rather than a
-      <code>JMXServiceURL</code>, then the generated
-      <code>JMXServiceURL</code> will have the local host name in its
-      <code><em>host</em></code> part and no
-      <code><em>port</em></code>.</p>
-
-
-    <h3><a id="directory">Connector addresses based on directory
-	entries</a></h3>
-
-    <p>As an alternative to the generated addresses just described,
-      the <code>serviceURL</code> address supplied when creating a
-      connector server can specify a <em>directory address</em> in
-      which to store the provided or generated <code>RMIServer</code>
-      stub.  This directory address is then used by both client and
-      server.</p>
-
-    <p>In this case, the <code>serviceURL</code> has the following form:</p>
-
-    <pre>
-    <code>service:jmx:rmi://<em>host</em>:<em>port</em>/jndi/<em>jndi-name</em></code>
-    </pre>
-
-    <p>Here, <code><em>jndi-name</em></code> is a string that can be
-      supplied to {@link javax.naming.InitialContext#bind
-      javax.naming.InitialContext.bind}.</p>
-
-    <p>As usual, the <code><em>host</em></code> and
-      <code>:<em>port</em></code> can be omitted.</p>
-
-    <p>The connector server will generate an
-      <code>RMIServerImpl</code> based on the protocol
-      (<code>rmi</code>) and the <code><em>port</em></code> if any.  When
-      the connector server is started, it will derive a stub from this
-      object using its {@link
-      javax.management.remote.rmi.RMIServerImpl#toStub toStub} method
-      and store the object using the given
-      <code><em>jndi-name</em></code>.  The properties defined by the
-      JNDI API are consulted as usual.</p>
-
-    <p>For example, if the <code>JMXServiceURL</code> is:
-
-      <pre>
-      <code>service:jmx:rmi://ignoredhost/jndi/rmi://myhost/myname</code>
-      </pre>
-
-      then the connector server will generate an
-      <code>RMIJRMPServerImpl</code> and store its stub using the JNDI
-      name
-
-      <pre>
-      <code>rmi://myhost/myname</code>
-      </pre>
-
-      which means entry <code>myname</code> in the RMI registry
-      running on the default port of host <code>myhost</code>.  Note
-      that the RMI registry only allows registration from the local
-      host.  So, in this case, <code>myhost</code> must be the name
-      (or a name) of the host that the connector server is running
-      on.
-
-    <p>In this <code>JMXServiceURL</code>, the first <code>rmi:</code>
-      specifies the RMI
-      connector, while the second <code>rmi:</code> specifies the RMI
-      registry.
-
-    <p>As another example, if the <code>JMXServiceURL</code> is:
-
-      <pre>
-      <code>service:jmx:rmi://ignoredhost/jndi/ldap://dirhost:9999/cn=this,ou=that</code>
-      </pre>
-
-      then the connector server will generate an
-      <code>RMIJRMPServerImpl</code> and store its stub using the JNDI
-      name
-
-      <pre>
-      <code>ldap://dirhost:9999/cn=this,ou=that</code>
-      </pre>
-
-      which means entry <code>cn=this,ou=that</code> in the LDAP
-      directory running on port 9999 of host <code>dirhost</code>.
-
-    <p>If the <code>JMXServiceURL</code> is:
-
-      <pre>
-      <code>service:jmx:rmi://ignoredhost/jndi/cn=this,ou=that</code>
-      </pre>
-
-      then the connector server will generate an
-      <code>RMIJRMPServerImpl</code> and store its stub using the JNDI
-      name
-
-      <pre>
-      <code>cn=this,ou=that</code>
-      </pre>
-
-      For this case to work, the JNDI API must have been configured
-      appropriately to supply the information about what directory to
-      use.
-
-    <p>In these examples, the host name <code>ignoredhost</code> is
-      not used by the connector server or its clients.  It can be
-      omitted, for example:</p>
-
-      <pre>
-      <code>service:jmx:rmi:///jndi/cn=this,ou=that</code>
-      </pre>
-
-    <p>However, it is good practice to use the name of the host
-      where the connector server is running.  This is often different
-      from the name of the directory host.</p>
-
-
-    <h3>Connector server attributes</h3>
-
-    <p>When using the default JRMP transport, RMI socket factories can
-      be specified using the attributes
-      <code>jmx.remote.rmi.client.socket.factory</code> and
-      <code>jmx.remote.rmi.server.socket.factory</code> in the
-      <code>environment</code> given to the
-      <code>RMIConnectorServer</code> constructor.  The values of these
-      attributes must be of type {@link
-      java.rmi.server.RMIClientSocketFactory} and {@link
-      java.rmi.server.RMIServerSocketFactory}, respectively.  These
-      factories are used when creating the RMI objects associated with
-      the connector.</p>
-
-    <h2>Creating an RMI connector client</h2>
-
-    <p>An RMI connector client is usually constructed using {@link
-      javax.management.remote.JMXConnectorFactory}, with a
-      <code>JMXServiceURL</code> that has <code>rmi</code> as its protocol.</p>
-
-    <p>If the <code>JMXServiceURL</code> was generated by the server,
-      as described above under <a href="#servergen">"connector
-      addresses generated by the server"</a>, then the client will
-      need to obtain it directly or indirectly from the server.
-      Typically, the server makes the <code>JMXServiceURL</code>
-      available by storing it in a file or a lookup service.</p>
-
-    <p>If the <code>JMXServiceURL</code> uses the directory syntax, as
-      described above under <a href="#directory">"connector addresses
-      based on directory entries"</a>, then the client may obtain it
-      as just explained, or client and server may both know the
-      appropriate directory entry to use.  For example, if the
-      connector server for the Whatsit agent uses the entry
-      <code>whatsit-agent-connector</code> in the RMI registry on host
-      <code>myhost</code>, then client and server can both know
-      that the appropriate <code>JMXServiceURL</code> is:</p>
-
-    <pre>
-    <code>service:jmx:rmi:///jndi/rmi://myhost/whatsit-agent-connector</code>
-    </pre>
-
-    <p>If you have an RMI stub of type {@link
-      javax.management.remote.rmi.RMIServer RMIServer}, you can
-      construct an RMI connection directly by using the appropriate
-      constructor of {@link javax.management.remote.rmi.RMIConnector
-      RMIConnector}.</p>
-
-    <h2>Dynamic code downloading</h2>
-
-    <p>If an RMI connector client or server receives from its peer an
-      instance of a class that it does not know, and if dynamic code
-      downloading is active for the RMI connection, then the class can
-      be downloaded from a codebase specified by the peer.
-      {@extLink rmi_guide Java RMI Guide} explains this in more detail.</p>
-
-    @see <a href="http://www.ietf.org/rfc/rfc2045.txt">RFC 2045,
-    section 6.8, "Base64 Content-Transfer-Encoding"</a>
-
-
-    @since 1.5
-
-  </body>
-</html>

From 2170e99cb49a4ef2086ecec7515a72d56148d0f2 Mon Sep 17 00:00:00 2001
From: Liam Miller-Cushon <cushon@openjdk.org>
Date: Wed, 22 May 2024 18:02:37 +0000
Subject: [PATCH 9/9] 8331081: 'internal proprietary API' diagnostics if
 --system is configured to an earlier JDK version

Reviewed-by: jlahoda
---
 .../com/sun/tools/javac/code/ClassFinder.java |  23 ++--
 .../options/system/SystemSunProprietary.java  | 127 ++++++++++++++++++
 2 files changed, 141 insertions(+), 9 deletions(-)
 create mode 100644 test/langtools/tools/javac/options/system/SystemSunProprietary.java

diff --git a/src/jdk.compiler/share/classes/com/sun/tools/javac/code/ClassFinder.java b/src/jdk.compiler/share/classes/com/sun/tools/javac/code/ClassFinder.java
index 95f8f847923c6..972d6a1075b3d 100644
--- a/src/jdk.compiler/share/classes/com/sun/tools/javac/code/ClassFinder.java
+++ b/src/jdk.compiler/share/classes/com/sun/tools/javac/code/ClassFinder.java
@@ -241,7 +241,7 @@ protected ClassFinder(Context context) {
      * available from the module system.
      */
     long getSupplementaryFlags(ClassSymbol c) {
-        if (jrtIndex == null || !jrtIndex.isInJRT(c.classfile) || c.name == names.module_info) {
+        if (c.name == names.module_info) {
             return 0;
         }
 
@@ -257,17 +257,22 @@ long getSupplementaryFlags(ClassSymbol c) {
             try {
                 ModuleSymbol owningModule = packge.modle;
                 if (owningModule == syms.noModule) {
-                    JRTIndex.CtSym ctSym = jrtIndex.getCtSym(packge.flatName());
-                    Profile minProfile = Profile.DEFAULT;
-                    if (ctSym.proprietary)
-                        newFlags |= PROPRIETARY;
-                    if (ctSym.minProfile != null)
-                        minProfile = Profile.lookup(ctSym.minProfile);
-                    if (profile != Profile.DEFAULT && minProfile.value > profile.value) {
-                        newFlags |= NOT_IN_PROFILE;
+                    if (jrtIndex != null && jrtIndex.isInJRT(c.classfile)) {
+                        JRTIndex.CtSym ctSym = jrtIndex.getCtSym(packge.flatName());
+                        Profile minProfile = Profile.DEFAULT;
+                        if (ctSym.proprietary)
+                            newFlags |= PROPRIETARY;
+                        if (ctSym.minProfile != null)
+                            minProfile = Profile.lookup(ctSym.minProfile);
+                        if (profile != Profile.DEFAULT && minProfile.value > profile.value) {
+                            newFlags |= NOT_IN_PROFILE;
+                        }
                     }
                 } else if (owningModule.name == names.jdk_unsupported) {
                     newFlags |= PROPRIETARY;
+                } else {
+                    // don't accumulate user modules in supplementaryFlags
+                    return 0;
                 }
             } catch (IOException ignore) {
             }
diff --git a/test/langtools/tools/javac/options/system/SystemSunProprietary.java b/test/langtools/tools/javac/options/system/SystemSunProprietary.java
new file mode 100644
index 0000000000000..0a16305aaba7b
--- /dev/null
+++ b/test/langtools/tools/javac/options/system/SystemSunProprietary.java
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2024, Alphabet LLC. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * @test
+ * @bug 8331081
+ * @summary Verify 'internal proprietary API' diagnostics if --system is configured
+ * @library /tools/lib
+ * @modules jdk.compiler/com.sun.tools.javac.api jdk.compiler/com.sun.tools.javac.main
+ *     jdk.compiler/com.sun.tools.javac.jvm jdk.jdeps/com.sun.tools.javap
+ * @build toolbox.ToolBox toolbox.JarTask toolbox.JavacTask toolbox.JavapTask toolbox.TestRunner
+ * @run main SystemSunProprietary
+ */
+import toolbox.JavacTask;
+import toolbox.Task;
+import toolbox.Task.Expect;
+import toolbox.TestRunner;
+import toolbox.ToolBox;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.Arrays;
+import java.util.List;
+
+public class SystemSunProprietary extends TestRunner {
+
+    private final ToolBox tb = new ToolBox();
+
+    public SystemSunProprietary() {
+        super(System.err);
+    }
+
+    public static void main(String... args) throws Exception {
+        new SystemSunProprietary().runTests();
+    }
+
+    @Test
+    public void testUnsafe(Path base) throws IOException {
+        Path src = base.resolve("src");
+        tb.writeJavaFiles(
+                src,
+                "module m { requires jdk.unsupported; }",
+                "package test; public class Test { sun.misc.Unsafe unsafe; } ");
+        Path classes = base.resolve("classes");
+        tb.createDirectories(classes);
+
+        List<String> log;
+        List<String> expected =
+                Arrays.asList(
+                        "Test.java:1:43: compiler.warn.sun.proprietary: sun.misc.Unsafe",
+                        "1 warning");
+
+        log =
+                new JavacTask(tb)
+                        .options("-XDrawDiagnostics")
+                        .outdir(classes)
+                        .files(tb.findJavaFiles(src))
+                        .run(Expect.SUCCESS)
+                        .writeAll()
+                        .getOutputLines(Task.OutputKind.DIRECT);
+
+        if (!expected.equals(log)) {
+            throw new AssertionError("Unexpected output: " + log);
+        }
+
+        log =
+                new JavacTask(tb)
+                        .options("-XDrawDiagnostics", "--system", System.getProperty("java.home"))
+                        .outdir(classes)
+                        .files(tb.findJavaFiles(src))
+                        .run(Expect.SUCCESS)
+                        .writeAll()
+                        .getOutputLines(Task.OutputKind.DIRECT);
+
+        if (!expected.equals(log)) {
+            throw new AssertionError("Unexpected output: " + log);
+        }
+
+        // Create a valid argument to system that isn't the current java.home
+        Path originalSystem = Path.of(System.getProperty("java.home"));
+        Path system = base.resolve("system");
+        for (String path : List.of("release", "lib/modules", "lib/jrt-fs.jar")) {
+            Path to = system.resolve(path);
+            Files.createDirectories(to.getParent());
+            Files.copy(originalSystem.resolve(path), to);
+        }
+
+        log =
+                new JavacTask(tb)
+                        .options("-XDrawDiagnostics", "--system", system.toString())
+                        .outdir(classes)
+                        .files(tb.findJavaFiles(src))
+                        .run(Expect.SUCCESS)
+                        .writeAll()
+                        .getOutputLines(Task.OutputKind.DIRECT);
+
+        if (!expected.equals(log)) {
+            throw new AssertionError("Unexpected output: " + log);
+        }
+    }
+
+    protected void runTests() throws Exception {
+        runTests(m -> new Object[] {Paths.get(m.getName())});
+    }
+}