refactor: multinode integration test refactor (#662)

* refactor multi node test suite Refactor the multinode test suite into the marketplace test suite. - Arbitrary number of nodes can be started with each test: clients, providers, validators - Hardhat can also be started locally with each test, usually for the purpose of saving and inspecting its log file. - Log files for all nodes can be persisted on disk, with configuration at the test-level - Log files, if persisted (as specified in the test), will be persisted to a CI artifact - Node config is specified at the test-level instead of the suite-level - Node/Hardhat process starting/stopping is now async, and runs much faster - Per-node config includes: - simulating proof failures - logging to file - log level - log topics - storage quota - debug (print logs to stdout) - Tests find next available ports when starting nodes, as closing ports on Windows can lag - Hardhat is no longer required to be running prior to starting the integration tests (as long as Hardhat is configured to run in the tests). - If Hardhat is already running, a snapshot will be taken and reverted before and after each test, respectively. - If Hardhat is not already running and configured to run at the test-level, a Hardhat process will be spawned and torn down before and after each test, respectively. * additional logging for debug purposes * address PR feedback - fix spelling - revert change from catching ProviderError to SignerError -- this should be handled more consistently in the Market abstraction, and will be handled in another PR. - remove method label from raiseAssert - remove unused import * Use API instead of command exec to test for free port Use chronos `createStreamServer` API to test for free port by binding localhost address and port. Use `ServerFlags.ReuseAddr` to enable reuse of same IP/Port on multiple test runs. * clean up * remove upraises annotations from tests * Update tests to work with updated erasure coding slot sizes * update dataset size, nodes, tolerance to match valid ec params Integration tests now have valid dataset sizes (blocks), tolerances, and number of nodes, to work with valid ec params. These values are validated when requested storage. Print the rest api failure message (via doAssert) when a rest api call fails (eg the rest api may validate some ec params). All integration tests pass when the async `clock.now` changes are reverted. * dont use async clock for now * fix workflow * move integration logs uplod to reusable --------- Co-authored-by: Dmitriy Ryajov <[email protected]>
codex-storage · Feb 19, 2024 · d70ab59 · d70ab59
1 parent 0497114
commit d70ab59
Show file tree

Hide file tree

Showing 23 changed files with 1,252 additions and 380 deletions.
diff --git a/.github/workflows/ci-reusable.yml b/.github/workflows/ci-reusable.yml
@@ -70,6 +70,14 @@ jobs:
         if: matrix.tests == 'integration' || matrix.tests == 'all'
         run: make -j${ncpu} testIntegration
 
+      - name: Upload integration tests log files
+        uses: actions/upload-artifact@v3
+        if: always()
+        with:
+          name: integration-tests-logs
+          path: tests/integration/logs/
+          retention-days: 1
+
   status:
     if: always()
     needs: [build]

diff --git a/.gitignore b/.gitignore
@@ -39,3 +39,4 @@ docker/hostdatadir
 docker/prometheus-data
 .DS_Store
 nim.cfg
+tests/integration/logs
diff --git a/codex/node.nim b/codex/node.nim
@@ -459,7 +459,8 @@ proc requestStorage*(
     reward            = reward
     proofProbability  = proofProbability
     collateral        = collateral
-    expiry            = expiry
+    expiry            = expiry.truncate(int64)
+    now               = self.clock.now
 
   trace "Received a request for storage!"
 

diff --git a/codex/purchasing/states/submitted.nim b/codex/purchasing/states/submitted.nim
@@ -35,6 +35,7 @@ method run*(state: PurchaseSubmitted, machine: Machine): Future[?State] {.async.
 
   proc withTimeout(future: Future[void]) {.async.} =
     let expiry = request.expiry.truncate(int64) + 1
+    trace "waiting for request fulfillment or expiry", expiry
     await future.withTimeout(clock, expiry)
 
   try:

diff --git a/codex/sales/states/downloading.nim b/codex/sales/states/downloading.nim
@@ -57,7 +57,8 @@ method run*(state: SaleDownloading, machine: Machine): Future[?State] {.async.}
     # update availability size
     var bytes: uint = 0
     for blk in blocks:
-      bytes += blk.data.len.uint
+      if not blk.cid.isEmpty:
+        bytes += blk.data.len.uint
 
     trace "Releasing batch of bytes written to disk", bytes
     return await reservations.release(reservation.id,

diff --git a/codex/sales/states/filling.nim b/codex/sales/states/filling.nim
@@ -34,3 +34,4 @@ method run(state: SaleFilling, machine: Machine): Future[?State] {.async.} =
 
   debug "Filling slot", requestId = data.requestId, slotIndex = data.slotIndex
   await market.fillSlot(data.requestId, data.slotIndex, state.proof, collateral)
+  debug "Waiting for slot filled event...", requestId = $data.requestId, slotIndex = $data.slotIndex
diff --git a/codex/sales/states/proving.nim b/codex/sales/states/proving.nim
@@ -81,6 +81,7 @@ proc proveLoop(
       debug "Proof is required", period = currentPeriod, challenge = challenge
       await state.prove(slot, challenge, onProve, market, currentPeriod)
 
+    debug "waiting until next period"
     await waitUntilPeriod(currentPeriod + 1)
 
 method `$`*(state: SaleProving): string = "SaleProving"
@@ -126,12 +127,12 @@ method run*(state: SaleProving, machine: Machine): Future[?State] {.async.} =
     debug "Stopping proving.", requestId = data.requestId, slotIndex = data.slotIndex
 
     if not state.loop.isNil:
-        if not state.loop.finished:
-          try:
-            await state.loop.cancelAndWait()
-          except CatchableError as e:
-            error "Error during cancelation of prooving loop", msg = e.msg
+      if not state.loop.finished:
+        try:
+          await state.loop.cancelAndWait()
+        except CatchableError as e:
+          error "Error during cancellation of proving loop", msg = e.msg
 
-        state.loop = nil
+      state.loop = nil
 
   return some State(SalePayout())
diff --git a/codex/sales/states/provingsimulated.nim b/codex/sales/states/provingsimulated.nim
@@ -8,6 +8,7 @@ when codex_enable_proof_failures:
   import ../../contracts/requests
   import ../../logutils
   import ../../market
+  import ../../utils/exceptions
   import ../salescontext
   import ./proving
 
@@ -20,7 +21,7 @@ when codex_enable_proof_failures:
       proofCount: int
 
   proc onSubmitProofError(error: ref CatchableError, period: UInt256, slotId: SlotId) =
-    error "Submitting invalid proof failed", period = period, slotId, msg = error.msg
+    error "Submitting invalid proof failed", period, slotId, msg = error.msgDetail
 
   method prove*(state: SaleProvingSimulated, slot: Slot, challenge: ProofChallenge, onProve: OnProve, market: Market, currentPeriod: Period) {.async.} =
     trace "Processing proving in simulated mode"
@@ -33,7 +34,7 @@ when codex_enable_proof_failures:
         warn "Submitting INVALID proof", period = currentPeriod, slotId = slot.id
         await market.submitProof(slot.id, Groth16Proof.default)
       except ProviderError as e:
-        if not e.revertReason.contains("Invalid proof"):
+        if not e.msgDetail.contains("Invalid proof"):
           onSubmitProofError(e, currentPeriod, slot.id)
       except CatchableError as e:
         onSubmitProofError(e, currentPeriod, slot.id)

diff --git a/tests/examples.nim b/tests/examples.nim
@@ -4,11 +4,21 @@ import std/times
 import std/typetraits
 
 import pkg/codex/contracts/requests
+import pkg/codex/rng
 import pkg/codex/contracts/proofs
 import pkg/codex/sales/slotqueue
 import pkg/codex/stores
+import pkg/codex/units
 
+import pkg/chronos
+import pkg/stew/byteutils
 import pkg/stint
+
+import ./codex/helpers/randomchunker
+
+export randomchunker
+export units
+
 proc exampleString*(length: int): string =
   let chars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
   result = newString(length) # Create a new empty string with a given length
@@ -78,3 +88,16 @@ proc example*(_: type Groth16Proof): Groth16Proof =
     b: G2Point.example,
     c: G1Point.example
   )
+
+proc example*(_: type RandomChunker, blocks: int): Future[string] {.async.} =
+  # doAssert blocks >= 3, "must be more than 3 blocks"
+  let rng = Rng.instance()
+  let chunker = RandomChunker.new(
+    rng, size = DefaultBlockSize * blocks.NBytes, chunkSize = DefaultBlockSize)
+  var data: seq[byte]
+  while (let moar = await chunker.getBytes(); moar != []):
+    data.add moar
+  return byteutils.toHex(data)
+
+proc example*(_:  type RandomChunker): Future[string] {.async.} =
+  await RandomChunker.example(3)
diff --git a/tests/integration/clioption.nim b/tests/integration/clioption.nim
@@ -0,0 +1,13 @@
+import pkg/questionable
+
+type
+  CliOption* = object of RootObj
+    nodeIdx*: ?int
+    key*: string
+    value*: string
+
+proc `$`*(option: CliOption): string =
+  var res = option.key
+  if option.value.len > 0:
+    res &= "=" & option.value
+  return res
diff --git a/tests/integration/codexclient.nim b/tests/integration/codexclient.nim
@@ -1,6 +1,5 @@
 import std/httpclient
 import std/strutils
-import std/sequtils
 
 from pkg/libp2p import Cid, `$`, init
 import pkg/stint
@@ -109,25 +108,26 @@ proc requestStorage*(
   ## Call request storage REST endpoint
   ##
   let response = client.requestStorageRaw(cid, duration, reward, proofProbability, collateral, expiry, nodes, tolerance)
-  assert response.status == "200 OK"
+  if response.status != "200 OK":
+    doAssert(false, response.body)
   PurchaseId.fromHex(response.body).catch
 
 proc getPurchase*(client: CodexClient, purchaseId: PurchaseId): ?!RestPurchase =
   let url = client.baseurl & "/storage/purchases/" & purchaseId.toHex
-  let body = client.http.getContent(url)
-  let json = ? parseJson(body).catch
-  RestPurchase.fromJson(json)
+  try:
+    let body = client.http.getContent(url)
+    let json = ? parseJson(body).catch
+    return RestPurchase.fromJson(json)
+  except CatchableError as e:
+    return failure e.msg
 
 proc getSalesAgent*(client: CodexClient, slotId: SlotId): ?!RestSalesAgent =
   let url = client.baseurl & "/sales/slots/" & slotId.toHex
-  echo "getting sales agent for id, ", slotId.toHex
   try:
     let body = client.http.getContent(url)
-    echo "get sales agent body: ", body
     let json = ? parseJson(body).catch
     return RestSalesAgent.fromJson(json)
   except CatchableError as e:
-    echo "[client.getSalesAgent] error getting agent: ", e.msg
     return failure e.msg
 
 proc getSlots*(client: CodexClient): ?!seq[Slot] =

diff --git a/tests/integration/codexconfig.nim b/tests/integration/codexconfig.nim
@@ -0,0 +1,61 @@
+import std/options
+import std/sequtils
+import pkg/codex/units
+import ./clioption
+import ./nodeconfig
+
+export nodeconfig
+export clioption
+
+type
+  CodexConfig* = ref object of NodeConfig
+    numNodes*: int
+    cliOptions*: seq[CliOption]
+    logTopics*: seq[string]
+
+proc nodes*(config: CodexConfig, numNodes: int): CodexConfig =
+  if numNodes < 0:
+    raise newException(ValueError, "numNodes must be >= 0")
+
+  var startConfig = config
+  startConfig.numNodes = numNodes
+  return startConfig
+
+proc simulateProofFailuresFor*(
+  config: CodexConfig,
+  providerIdx: int,
+  failEveryNProofs: int
+): CodexConfig =
+
+  if providerIdx > config.numNodes - 1:
+    raise newException(ValueError, "provider index out of bounds")
+
+  var startConfig = config
+  startConfig.cliOptions.add(
+    CliOption(
+      nodeIdx: some providerIdx,
+      key: "--simulate-proof-failures",
+      value: $failEveryNProofs
+    )
+  )
+  return startConfig
+
+proc withLogTopics*(
+  config: CodexConfig,
+  topics: varargs[string]
+): CodexConfig =
+
+  var startConfig = config
+  startConfig.logTopics = startConfig.logTopics.concat(@topics)
+  return startConfig
+
+proc withStorageQuota*(
+  config: CodexConfig,
+  quota: NBytes
+): CodexConfig =
+
+  var startConfig = config
+  startConfig.cliOptions.add(
+    CliOption(key: "--storage-quota", value: $quota)
+  )
+  return startConfig
diff --git a/tests/integration/codexprocess.nim b/tests/integration/codexprocess.nim
@@ -0,0 +1,75 @@
+import pkg/questionable
+import pkg/questionable/results
+import pkg/confutils
+import pkg/chronicles
+import pkg/ethers
+import pkg/libp2p
+import std/os
+import std/strutils
+import codex/conf
+import ./codexclient
+import ./nodeprocess
+
+export codexclient
+export chronicles
+export nodeprocess
+
+logScope:
+  topics = "integration testing codex process"
+
+type
+  CodexProcess* = ref object of NodeProcess
+    client: ?CodexClient
+
+method workingDir(node: CodexProcess): string =
+  return currentSourcePath() / ".." / ".." / ".."
+
+method executable(node: CodexProcess): string =
+  return "build" / "codex"
+
+method startedOutput(node: CodexProcess): string =
+  return "REST service started"
+
+method processOptions(node: CodexProcess): set[AsyncProcessOption] =
+  return {AsyncProcessOption.StdErrToStdOut}
+
+method outputLineEndings(node: CodexProcess): string =
+  return "\n"
+
+method onOutputLineCaptured(node: CodexProcess, line: string) =
+  discard
+
+proc dataDir(node: CodexProcess): string =
+  let config = CodexConf.load(cmdLine = node.arguments)
+  return config.dataDir.string
+
+proc ethAccount*(node: CodexProcess): Address =
+  let config = CodexConf.load(cmdLine = node.arguments)
+  without ethAccount =? config.ethAccount:
+    raiseAssert "eth account not set"
+  return Address(ethAccount)
+
+proc apiUrl*(node: CodexProcess): string =
+  let config = CodexConf.load(cmdLine = node.arguments)
+  return "http://" & config.apiBindAddress & ":" & $config.apiPort & "/api/codex/v1"
+
+proc client*(node: CodexProcess): CodexClient =
+  if client =? node.client:
+    return client
+  let client = CodexClient.new(node.apiUrl)
+  node.client = some client
+  return client
+
+method stop*(node: CodexProcess) {.async.} =
+  logScope:
+    nodeName = node.name
+
+  await procCall NodeProcess(node).stop()
+
+  trace "stopping codex client"
+  if client =? node.client:
+    client.close()
+    node.client = none CodexClient
+
+method removeDataDir*(node: CodexProcess) =
+  removeDir(node.dataDir)
diff --git a/tests/integration/hardhatconfig.nim b/tests/integration/hardhatconfig.nim
@@ -0,0 +1,6 @@
+import ./nodeconfig
+
+export nodeconfig
+
+type
+  HardhatConfig* = ref object of NodeConfig
Original file line number	Diff line number	Diff line change
Expand Up		@@ -34,3 +34,4 @@ method run(state: SaleFilling, machine: Machine): Future[?State] {.async.} =

		debug "Filling slot", requestId = data.requestId, slotIndex = data.slotIndex
		await market.fillSlot(data.requestId, data.slotIndex, state.proof, collateral)
		debug "Waiting for slot filled event...", requestId = $data.requestId, slotIndex = $data.slotIndex