Skip to content

Commit

Permalink
refactor: multinode integration test refactor (#662)
Browse files Browse the repository at this point in the history
* refactor multi node test suite

Refactor the multinode test suite into the marketplace test suite.

- Arbitrary number of nodes can be started with each test: clients, providers, validators
- Hardhat can also be started locally with each test, usually for the purpose of saving and inspecting its log file.
- Log files for all nodes can be persisted on disk, with configuration at the test-level
- Log files, if persisted (as specified in the test), will be persisted to a CI artifact
- Node config is specified at the test-level instead of the suite-level
- Node/Hardhat process starting/stopping is now async, and runs much faster
- Per-node config includes:
  - simulating proof failures
  - logging to file
  - log level
  - log topics
  - storage quota
  - debug (print logs to stdout)
- Tests find next available ports when starting nodes, as closing ports on Windows can lag
- Hardhat is no longer required to be running prior to starting the integration tests (as long as Hardhat is configured to run in the tests).
  - If Hardhat is already running, a snapshot will be taken and reverted before and after each test, respectively.
  - If Hardhat is not already running and configured to run at the test-level, a Hardhat process will be spawned and torn down before and after each test, respectively.

* additional logging for debug purposes

* address PR feedback

- fix spelling
- revert change from catching ProviderError to SignerError -- this should be handled more consistently in the Market abstraction, and will be handled in another PR.
- remove method label from raiseAssert
- remove unused import

* Use API instead of command exec to test for free port

Use chronos `createStreamServer` API to test for free port by binding localhost address and port. Use `ServerFlags.ReuseAddr` to enable reuse of same IP/Port on multiple test runs.

* clean up

* remove upraises annotations from tests

* Update tests to work with updated erasure coding slot sizes

* update dataset size, nodes, tolerance to match valid ec params

Integration tests now have valid dataset sizes (blocks), tolerances, and number of nodes, to work with valid ec params. These values are validated when requested storage.

Print the rest api failure message (via doAssert) when a rest api call fails (eg the rest api may validate some ec params).

All integration tests pass when the async `clock.now` changes are reverted.

* dont use async clock for now

* fix workflow

* move integration logs uplod to reusable

---------

Co-authored-by: Dmitriy Ryajov <[email protected]>
  • Loading branch information
emizzle and dryajov authored Feb 19, 2024
1 parent 0497114 commit d70ab59
Show file tree
Hide file tree
Showing 23 changed files with 1,252 additions and 380 deletions.
8 changes: 8 additions & 0 deletions .github/workflows/ci-reusable.yml
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,14 @@ jobs:
if: matrix.tests == 'integration' || matrix.tests == 'all'
run: make -j${ncpu} testIntegration

- name: Upload integration tests log files
uses: actions/upload-artifact@v3
if: always()
with:
name: integration-tests-logs
path: tests/integration/logs/
retention-days: 1

status:
if: always()
needs: [build]
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,4 @@ docker/hostdatadir
docker/prometheus-data
.DS_Store
nim.cfg
tests/integration/logs
3 changes: 2 additions & 1 deletion codex/node.nim
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,8 @@ proc requestStorage*(
reward = reward
proofProbability = proofProbability
collateral = collateral
expiry = expiry
expiry = expiry.truncate(int64)
now = self.clock.now

trace "Received a request for storage!"

Expand Down
1 change: 1 addition & 0 deletions codex/purchasing/states/submitted.nim
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ method run*(state: PurchaseSubmitted, machine: Machine): Future[?State] {.async.

proc withTimeout(future: Future[void]) {.async.} =
let expiry = request.expiry.truncate(int64) + 1
trace "waiting for request fulfillment or expiry", expiry
await future.withTimeout(clock, expiry)

try:
Expand Down
3 changes: 2 additions & 1 deletion codex/sales/states/downloading.nim
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ method run*(state: SaleDownloading, machine: Machine): Future[?State] {.async.}
# update availability size
var bytes: uint = 0
for blk in blocks:
bytes += blk.data.len.uint
if not blk.cid.isEmpty:
bytes += blk.data.len.uint

trace "Releasing batch of bytes written to disk", bytes
return await reservations.release(reservation.id,
Expand Down
1 change: 1 addition & 0 deletions codex/sales/states/filling.nim
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,4 @@ method run(state: SaleFilling, machine: Machine): Future[?State] {.async.} =

debug "Filling slot", requestId = data.requestId, slotIndex = data.slotIndex
await market.fillSlot(data.requestId, data.slotIndex, state.proof, collateral)
debug "Waiting for slot filled event...", requestId = $data.requestId, slotIndex = $data.slotIndex
13 changes: 7 additions & 6 deletions codex/sales/states/proving.nim
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ proc proveLoop(
debug "Proof is required", period = currentPeriod, challenge = challenge
await state.prove(slot, challenge, onProve, market, currentPeriod)

debug "waiting until next period"
await waitUntilPeriod(currentPeriod + 1)

method `$`*(state: SaleProving): string = "SaleProving"
Expand Down Expand Up @@ -126,12 +127,12 @@ method run*(state: SaleProving, machine: Machine): Future[?State] {.async.} =
debug "Stopping proving.", requestId = data.requestId, slotIndex = data.slotIndex

if not state.loop.isNil:
if not state.loop.finished:
try:
await state.loop.cancelAndWait()
except CatchableError as e:
error "Error during cancelation of prooving loop", msg = e.msg
if not state.loop.finished:
try:
await state.loop.cancelAndWait()
except CatchableError as e:
error "Error during cancellation of proving loop", msg = e.msg

state.loop = nil
state.loop = nil

return some State(SalePayout())
5 changes: 3 additions & 2 deletions codex/sales/states/provingsimulated.nim
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ when codex_enable_proof_failures:
import ../../contracts/requests
import ../../logutils
import ../../market
import ../../utils/exceptions
import ../salescontext
import ./proving

Expand All @@ -20,7 +21,7 @@ when codex_enable_proof_failures:
proofCount: int

proc onSubmitProofError(error: ref CatchableError, period: UInt256, slotId: SlotId) =
error "Submitting invalid proof failed", period = period, slotId, msg = error.msg
error "Submitting invalid proof failed", period, slotId, msg = error.msgDetail

method prove*(state: SaleProvingSimulated, slot: Slot, challenge: ProofChallenge, onProve: OnProve, market: Market, currentPeriod: Period) {.async.} =
trace "Processing proving in simulated mode"
Expand All @@ -33,7 +34,7 @@ when codex_enable_proof_failures:
warn "Submitting INVALID proof", period = currentPeriod, slotId = slot.id
await market.submitProof(slot.id, Groth16Proof.default)
except ProviderError as e:
if not e.revertReason.contains("Invalid proof"):
if not e.msgDetail.contains("Invalid proof"):
onSubmitProofError(e, currentPeriod, slot.id)
except CatchableError as e:
onSubmitProofError(e, currentPeriod, slot.id)
Expand Down
23 changes: 23 additions & 0 deletions tests/examples.nim
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,21 @@ import std/times
import std/typetraits

import pkg/codex/contracts/requests
import pkg/codex/rng
import pkg/codex/contracts/proofs
import pkg/codex/sales/slotqueue
import pkg/codex/stores
import pkg/codex/units

import pkg/chronos
import pkg/stew/byteutils
import pkg/stint

import ./codex/helpers/randomchunker

export randomchunker
export units

proc exampleString*(length: int): string =
let chars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
result = newString(length) # Create a new empty string with a given length
Expand Down Expand Up @@ -78,3 +88,16 @@ proc example*(_: type Groth16Proof): Groth16Proof =
b: G2Point.example,
c: G1Point.example
)

proc example*(_: type RandomChunker, blocks: int): Future[string] {.async.} =
# doAssert blocks >= 3, "must be more than 3 blocks"
let rng = Rng.instance()
let chunker = RandomChunker.new(
rng, size = DefaultBlockSize * blocks.NBytes, chunkSize = DefaultBlockSize)
var data: seq[byte]
while (let moar = await chunker.getBytes(); moar != []):
data.add moar
return byteutils.toHex(data)

proc example*(_: type RandomChunker): Future[string] {.async.} =
await RandomChunker.example(3)
13 changes: 13 additions & 0 deletions tests/integration/clioption.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import pkg/questionable

type
CliOption* = object of RootObj
nodeIdx*: ?int
key*: string
value*: string

proc `$`*(option: CliOption): string =
var res = option.key
if option.value.len > 0:
res &= "=" & option.value
return res
16 changes: 8 additions & 8 deletions tests/integration/codexclient.nim
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import std/httpclient
import std/strutils
import std/sequtils

from pkg/libp2p import Cid, `$`, init
import pkg/stint
Expand Down Expand Up @@ -109,25 +108,26 @@ proc requestStorage*(
## Call request storage REST endpoint
##
let response = client.requestStorageRaw(cid, duration, reward, proofProbability, collateral, expiry, nodes, tolerance)
assert response.status == "200 OK"
if response.status != "200 OK":
doAssert(false, response.body)
PurchaseId.fromHex(response.body).catch

proc getPurchase*(client: CodexClient, purchaseId: PurchaseId): ?!RestPurchase =
let url = client.baseurl & "/storage/purchases/" & purchaseId.toHex
let body = client.http.getContent(url)
let json = ? parseJson(body).catch
RestPurchase.fromJson(json)
try:
let body = client.http.getContent(url)
let json = ? parseJson(body).catch
return RestPurchase.fromJson(json)
except CatchableError as e:
return failure e.msg

proc getSalesAgent*(client: CodexClient, slotId: SlotId): ?!RestSalesAgent =
let url = client.baseurl & "/sales/slots/" & slotId.toHex
echo "getting sales agent for id, ", slotId.toHex
try:
let body = client.http.getContent(url)
echo "get sales agent body: ", body
let json = ? parseJson(body).catch
return RestSalesAgent.fromJson(json)
except CatchableError as e:
echo "[client.getSalesAgent] error getting agent: ", e.msg
return failure e.msg

proc getSlots*(client: CodexClient): ?!seq[Slot] =
Expand Down
61 changes: 61 additions & 0 deletions tests/integration/codexconfig.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import std/options
import std/sequtils
import pkg/codex/units
import ./clioption
import ./nodeconfig

export nodeconfig
export clioption

type
CodexConfig* = ref object of NodeConfig
numNodes*: int
cliOptions*: seq[CliOption]
logTopics*: seq[string]

proc nodes*(config: CodexConfig, numNodes: int): CodexConfig =
if numNodes < 0:
raise newException(ValueError, "numNodes must be >= 0")

var startConfig = config
startConfig.numNodes = numNodes
return startConfig

proc simulateProofFailuresFor*(
config: CodexConfig,
providerIdx: int,
failEveryNProofs: int
): CodexConfig =

if providerIdx > config.numNodes - 1:
raise newException(ValueError, "provider index out of bounds")

var startConfig = config
startConfig.cliOptions.add(
CliOption(
nodeIdx: some providerIdx,
key: "--simulate-proof-failures",
value: $failEveryNProofs
)
)
return startConfig

proc withLogTopics*(
config: CodexConfig,
topics: varargs[string]
): CodexConfig =

var startConfig = config
startConfig.logTopics = startConfig.logTopics.concat(@topics)
return startConfig

proc withStorageQuota*(
config: CodexConfig,
quota: NBytes
): CodexConfig =

var startConfig = config
startConfig.cliOptions.add(
CliOption(key: "--storage-quota", value: $quota)
)
return startConfig
75 changes: 75 additions & 0 deletions tests/integration/codexprocess.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import pkg/questionable
import pkg/questionable/results
import pkg/confutils
import pkg/chronicles
import pkg/ethers
import pkg/libp2p
import std/os
import std/strutils
import codex/conf
import ./codexclient
import ./nodeprocess

export codexclient
export chronicles
export nodeprocess

logScope:
topics = "integration testing codex process"

type
CodexProcess* = ref object of NodeProcess
client: ?CodexClient

method workingDir(node: CodexProcess): string =
return currentSourcePath() / ".." / ".." / ".."

method executable(node: CodexProcess): string =
return "build" / "codex"

method startedOutput(node: CodexProcess): string =
return "REST service started"

method processOptions(node: CodexProcess): set[AsyncProcessOption] =
return {AsyncProcessOption.StdErrToStdOut}

method outputLineEndings(node: CodexProcess): string =
return "\n"

method onOutputLineCaptured(node: CodexProcess, line: string) =
discard

proc dataDir(node: CodexProcess): string =
let config = CodexConf.load(cmdLine = node.arguments)
return config.dataDir.string

proc ethAccount*(node: CodexProcess): Address =
let config = CodexConf.load(cmdLine = node.arguments)
without ethAccount =? config.ethAccount:
raiseAssert "eth account not set"
return Address(ethAccount)

proc apiUrl*(node: CodexProcess): string =
let config = CodexConf.load(cmdLine = node.arguments)
return "http://" & config.apiBindAddress & ":" & $config.apiPort & "/api/codex/v1"

proc client*(node: CodexProcess): CodexClient =
if client =? node.client:
return client
let client = CodexClient.new(node.apiUrl)
node.client = some client
return client

method stop*(node: CodexProcess) {.async.} =
logScope:
nodeName = node.name

await procCall NodeProcess(node).stop()

trace "stopping codex client"
if client =? node.client:
client.close()
node.client = none CodexClient

method removeDataDir*(node: CodexProcess) =
removeDir(node.dataDir)
6 changes: 6 additions & 0 deletions tests/integration/hardhatconfig.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import ./nodeconfig

export nodeconfig

type
HardhatConfig* = ref object of NodeConfig
Loading

0 comments on commit d70ab59

Please sign in to comment.