Skip to content

Commit

Permalink
Merge branch 'master' into dinghwah/DAOS-16268-Daostest
Browse files Browse the repository at this point in the history
Test-tag: DaosCoreTestRebuild
Allow-unstable-test: true
Doc-only: false
Required-githooks: true
Signed-off-by: Ding Ho <[email protected]>
  • Loading branch information
dinghwah committed Sep 12, 2024
2 parents a532b57 + f01114d commit 5a827bb
Show file tree
Hide file tree
Showing 28 changed files with 568 additions and 232 deletions.
2 changes: 1 addition & 1 deletion ci/gha_functions.sh
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ provision_cluster() {
while [ $((SECONDS-START)) -lt $wait_seconds ]; do
if clush -B -S -l root -w "$nodestring" '[ -d /var/chef/reports ]'; then
# shellcheck disable=SC2016
clush -B -S -l root -w "$nodestring" --connect_timeout 30 --command_timeout 600 "if [ -e /root/job_info ]; then
clush -B -S -l root -w "$nodestring" --connect_timeout 30 --command_timeout 900 "if [ -e /root/job_info ]; then
cat /root/job_info
fi
echo \"Last provisioning run info:
Expand Down
6 changes: 1 addition & 5 deletions src/bio/bio_buffer.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,6 @@ dma_alloc_chunk(unsigned int cnt)

D_ASSERT(bytes > 0);

if (DAOS_FAIL_CHECK(DAOS_NVME_ALLOCBUF_ERR)) {
D_ERROR("Injected DMA buffer allocation error.\n");
return NULL;
}

D_ALLOC_PTR(chunk);
if (chunk == NULL) {
return NULL;
Expand Down Expand Up @@ -848,6 +843,7 @@ dma_map_one(struct bio_desc *biod, struct bio_iov *biov, void *arg)
bio_iov_set_raw_buf(biov, NULL);
return 0;
}
D_ASSERT(!BIO_ADDR_IS_GANG(&biov->bi_addr));

if (direct_scm_access(biod, biov)) {
struct umem_instance *umem = biod->bd_umem;
Expand Down
3 changes: 2 additions & 1 deletion src/bio/bio_bulk.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* (C) Copyright 2021-2022 Intel Corporation.
* (C) Copyright 2021-2024 Intel Corporation.
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -640,6 +640,7 @@ bulk_map_one(struct bio_desc *biod, struct bio_iov *biov, void *data)
goto done;
}
D_ASSERT(!BIO_ADDR_IS_DEDUP(&biov->bi_addr));
D_ASSERT(!BIO_ADDR_IS_GANG(&biov->bi_addr));

hdl = bulk_get_hdl(biod, biov, roundup_pgs(pg_cnt), pg_off, arg);
if (hdl == NULL) {
Expand Down
3 changes: 1 addition & 2 deletions src/bio/bio_xstream.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
/* SPDK blob parameters */
#define DAOS_BS_CLUSTER_SZ (1ULL << 25) /* 32MB */
/* DMA buffer parameters */
#define DAOS_DMA_CHUNK_MB 8 /* 8MB DMA chunks */
#define DAOS_DMA_CHUNK_CNT_INIT 24 /* Per-xstream init chunks, 192MB */
#define DAOS_DMA_CHUNK_CNT_MAX 128 /* Per-xstream max chunks, 1GB */
#define DAOS_DMA_CHUNK_CNT_MIN 32 /* Per-xstream min chunks, 256MB */
Expand Down Expand Up @@ -207,7 +206,7 @@ bio_nvme_init(const char *nvme_conf, int numa_node, unsigned int mem_size,
{
char *env;
int rc, fd;
unsigned int size_mb = DAOS_DMA_CHUNK_MB;
unsigned int size_mb = BIO_DMA_CHUNK_MB;

if (tgt_nr <= 0) {
D_ERROR("tgt_nr: %u should be > 0\n", tgt_nr);
Expand Down
4 changes: 4 additions & 0 deletions src/common/checksum.c
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,10 @@ daos_csummer_compare_csum_info(struct daos_csummer *obj,
match = daos_csummer_csum_compare(obj, ci_idx2csum(a, i),
ci_idx2csum(b, i),
a->cs_len);
if (unlikely(!match))
D_ERROR("Checksum mismatch at index %d/%d "DF_CI_BUF" != "DF_CI_BUF"\n", i,
a->cs_nr, DP_CI_BUF(ci_idx2csum(a, i), a->cs_len),
DP_CI_BUF(ci_idx2csum(b, i), b->cs_len));
}

return match;
Expand Down
5 changes: 2 additions & 3 deletions src/control/cmd/daos/pretty/selftest.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,7 @@ func PrintSelfTestResult(out io.Writer, result *daos.SelfTestResult, verbose, sh
return errors.Errorf("nil %T", result)
}

rpcThroughput := float64(result.MasterLatency.Succeeded()) / result.Duration.Seconds()

rpcThroughput := result.RPCThroughput()
epRanks := ranklist.NewRankSet()
epTgts := hostlist.NewNumericSet()
for _, ep := range result.TargetEndpoints {
Expand All @@ -73,7 +72,7 @@ func PrintSelfTestResult(out io.Writer, result *daos.SelfTestResult, verbose, sh
}
if result.SendSize > 0 || result.ReplySize > 0 {
suffix := "B/s"
bw := rpcThroughput * (float64(result.SendSize) + float64(result.ReplySize))
bw := result.RPCBandwidth()
if !showBytes {
bw *= 8
suffix = "bps"
Expand Down
18 changes: 10 additions & 8 deletions src/control/cmd/daos/pretty/selftest_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ func TestPretty_PrintSelfTestConfig(t *testing.T) {
Client/Server Network Test Parameters
-------------------------------------
Servers : All
Send RPC Size : 1.00 MiB
Reply RPC Size : 1.00 MiB
Send RPC Size : 1.00 KiB
Reply RPC Size : 1.00 KiB
RPCs Per Server: 10000
`,
Expand All @@ -56,8 +56,8 @@ Client/Server Network Test Parameters
Client/Server Network Test Parameters
-------------------------------------
Server : 1
Send RPC Size : 1.00 MiB
Reply RPC Size : 1.00 MiB
Send RPC Size : 1.00 KiB
Reply RPC Size : 1.00 KiB
RPCs Per Server: 10000
`,
Expand Down Expand Up @@ -85,8 +85,8 @@ Client/Server Network Test Parameters
Client/Server Network Test Parameters
-------------------------------------
Servers : All
Send RPC Size : 1.00 MiB
Reply RPC Size : 1.00 MiB
Send RPC Size : 1.00 KiB
Reply RPC Size : 1.00 KiB
RPCs Per Server : 10000
System Name : daos_server
Tag : 0
Expand Down Expand Up @@ -143,8 +143,8 @@ Client/Server Network Test Parameters
Client/Server Network Test Parameters
-------------------------------------
Servers : All
Send RPC Size : 1.00 MiB
Reply RPC Size : 1.00 MiB
Send RPC Size : 1.00 KiB
Reply RPC Size : 1.00 KiB
RPCs Per Server : 10000
System Name : daos_server
Tags : ERROR (0 tags)
Expand All @@ -169,6 +169,8 @@ Client/Server Network Test Parameters
func genResult(xfrm func(result *daos.SelfTestResult)) *daos.SelfTestResult {
cfg := &daos.SelfTestConfig{}
cfg.SetDefaults()
cfg.SendSizes = []uint64{1 << 20}
cfg.ReplySizes = cfg.SendSizes
result := &daos.SelfTestResult{
MasterEndpoint: daos.SelfTestEndpoint{Rank: 3, Tag: 0},
TargetEndpoints: []daos.SelfTestEndpoint{
Expand Down
17 changes: 16 additions & 1 deletion src/control/lib/daos/selftest.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ type (
var defaultLatencyPercentiles []uint64 = []uint64{50, 75, 90, 95, 99}

const (
defaultSendSize = 1 << 20 // 1MiB
defaultSendSize = 1 << 10 // 1KiB
defaultReplySize = defaultSendSize
defaultRepCount = 10000
defaultMaxInflight = 16
Expand Down Expand Up @@ -297,6 +297,8 @@ func (str *SelfTestResult) MarshalJSON() ([]byte, error) {
MasterEndpoint string `json:"master_endpoint"`
TargetEndpoints []string `json:"target_endpoints"`
EndpointLatencies map[string]*EndpointLatency `json:"target_latencies,omitempty"`
RPCThroughput float64 `json:"rpc_count_per_second"`
RPCBandwidth float64 `json:"rpc_bytes_per_second"`
*toJSON
}{
MasterEndpoint: str.MasterEndpoint.String(),
Expand All @@ -308,6 +310,8 @@ func (str *SelfTestResult) MarshalJSON() ([]byte, error) {
return eps
}(),
EndpointLatencies: epLatencies,
RPCThroughput: str.RPCThroughput(),
RPCBandwidth: str.RPCBandwidth(),
toJSON: (*toJSON)(str),
})
}
Expand Down Expand Up @@ -352,3 +356,14 @@ func (str *SelfTestResult) TargetRanks() (ranks []ranklist.Rank) {
}
return
}

// RPCThroughput calculates the number of RPCs per second.
func (str *SelfTestResult) RPCThroughput() float64 {
return float64(str.MasterLatency.Succeeded()) / str.Duration.Seconds()
}

// RPCBandwidth calculates the bytes per second value based on the number of
// RPCs sent for the duration of the test.
func (str *SelfTestResult) RPCBandwidth() float64 {
return str.RPCThroughput() * (float64(str.SendSize) + float64(str.ReplySize))
}
2 changes: 2 additions & 0 deletions src/control/lib/daos/selftest_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,8 @@ func TestDaos_SelfTestResult_MarshalJSON(t *testing.T) {
"fail_count": 0
}
},
"rpc_count_per_second": 1500,
"rpc_bytes_per_second": 3072000,
"repetitions": 3000,
"send_size": 1024,
"reply_size": 1024,
Expand Down
2 changes: 1 addition & 1 deletion src/include/daos/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -851,7 +851,7 @@ enum {
#define DAOS_NVME_FAULTY (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0x50)
#define DAOS_NVME_WRITE_ERR (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0x51)
#define DAOS_NVME_READ_ERR (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0x52)
#define DAOS_NVME_ALLOCBUF_ERR (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0x53)
#define DAOS_NVME_ALLOCBUF_ERR (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0x53) /* deprecated */
#define DAOS_NVME_WAL_TX_LOST (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0x54)

#define DAOS_POOL_CREATE_FAIL_CORPC (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0x60)
Expand Down
84 changes: 78 additions & 6 deletions src/include/daos_srv/bio.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,30 +29,47 @@
((addr)->ba_flags &= ~(BIO_FLAG_DEDUP_BUF))
#define BIO_ADDR_IS_CORRUPTED(addr) ((addr)->ba_flags & BIO_FLAG_CORRUPTED)
#define BIO_ADDR_SET_CORRUPTED(addr) ((addr)->ba_flags |= BIO_FLAG_CORRUPTED)
#define BIO_ADDR_IS_GANG(addr) ((addr)->ba_flags & BIO_FLAG_GANG)
#define BIO_ADDR_SET_GANG(addr) ((addr)->ba_flags |= BIO_FLAG_GANG)

/* Can support up to 16 flags for a BIO address */
enum BIO_FLAG {
/* The address is a hole */
BIO_FLAG_HOLE = (1 << 0),
/* The address is a deduped extent */
/* The address is a deduped extent, transient only flag */
BIO_FLAG_DEDUP = (1 << 1),
/* The address is a buffer for dedup verify */
/* The address is a buffer for dedup verify, transient only flag */
BIO_FLAG_DEDUP_BUF = (1 << 2),
/* The data located on the address is marked as corrupted */
BIO_FLAG_CORRUPTED = (1 << 3),
/* The address is a gang address */
BIO_FLAG_GANG = (1 << 4),
};

#define BIO_DMA_CHUNK_MB 8 /* 8MB DMA chunks */

/**
* It's used to represent an address on SCM, or an address on NVMe, or a gang address.
*
* The gang address consists of N addresses from scattered allocations, the scattered
* allocations could have different size and media type, they are compactly stored on
* the SCM pointing by 'ba_off' as following:
*
* N 64bits offsets, N 32bits sizes, N 8bits media types
*/
typedef struct {
/*
* Byte offset within PMDK pmemobj pool for SCM;
* Byte offset within PMDK pmemobj pool for SCM or gang address;
* Byte offset within SPDK blob for NVMe.
*/
uint64_t ba_off;
/* DAOS_MEDIA_SCM or DAOS_MEDIA_NVME */
uint8_t ba_type;
uint8_t ba_pad1;
/* Number of addresses when BIO_FLAG_GANG is set */
uint8_t ba_gang_nr;
/* See BIO_FLAG enum */
uint16_t ba_flags;
uint32_t ba_pad2;
uint32_t ba_pad;
} bio_addr_t;

struct sys_db;
Expand Down Expand Up @@ -127,8 +144,63 @@ enum bio_bs_state {
BIO_BS_STATE_SETUP,
};

/* Size for storing N offset + size + metia_type */
static inline unsigned int
bio_gaddr_size(uint8_t gang_nr)
{
unsigned int size;

if (gang_nr == 0)
return 0;

size = sizeof(uint64_t) + sizeof(uint32_t) + sizeof(uint8_t);
return roundup(size * gang_nr, sizeof(uint64_t));
}

static inline void
bio_gaddr_set(struct umem_instance *umm, bio_addr_t *gaddr, int i,
uint8_t type, uint32_t len, uint64_t off)
{
uint8_t *ptr;
unsigned int ptr_off;

D_ASSERT(BIO_ADDR_IS_GANG(gaddr));
D_ASSERT(i < gaddr->ba_gang_nr);
ptr = umem_off2ptr(umm, gaddr->ba_off);

ptr_off = sizeof(uint64_t) * i;
*((uint64_t *)(ptr + ptr_off)) = off;

ptr_off = sizeof(uint64_t) * gaddr->ba_gang_nr + sizeof(uint32_t) * i;
*((uint32_t *)(ptr + ptr_off)) = len;

ptr_off = (sizeof(uint64_t) + sizeof(uint32_t)) * gaddr->ba_gang_nr + i;
*(ptr + ptr_off) = type;
}

static inline void
bio_gaddr_get(struct umem_instance *umm, bio_addr_t *gaddr, int i,
uint8_t *type, uint32_t *len, uint64_t *off)
{
uint8_t *ptr;
unsigned int ptr_off;

D_ASSERT(BIO_ADDR_IS_GANG(gaddr));
D_ASSERT(i < gaddr->ba_gang_nr);
ptr = umem_off2ptr(umm, gaddr->ba_off);

ptr_off = sizeof(uint64_t) * i;
*off = *((uint64_t *)(ptr + ptr_off));

ptr_off = sizeof(uint64_t) * gaddr->ba_gang_nr + sizeof(uint32_t) * i;
*len = *((uint32_t *)(ptr + ptr_off));

ptr_off = (sizeof(uint64_t) + sizeof(uint32_t)) * gaddr->ba_gang_nr + i;
*type = *(ptr + ptr_off);
}

static inline void
bio_addr_set(bio_addr_t *addr, uint16_t type, uint64_t off)
bio_addr_set(bio_addr_t *addr, uint8_t type, uint64_t off)
{
addr->ba_type = type;
addr->ba_off = umem_off2offset(off);
Expand Down
3 changes: 3 additions & 0 deletions src/include/daos_srv/vos_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#define VOS_POOL_DF_2_2 24
#define VOS_POOL_DF_2_4 25
#define VOS_POOL_DF_2_6 26
#define VOS_POOL_DF_2_8 28

struct dtx_rsrvd_uint {
void *dru_scm;
Expand Down Expand Up @@ -299,6 +300,8 @@ enum {
VOS_POOL_FEAT_EMBED_FIRST = (1ULL << 3),
/** Flat DKEY support enabled */
VOS_POOL_FEAT_FLAT_DKEY = (1ULL << 4),
/** Gang address for SV support */
VOS_POOL_FEAT_GANG_SV = (1ULL << 5),
};

/** Mask for any conditionals passed to to the fetch */
Expand Down
3 changes: 3 additions & 0 deletions src/object/cli_csum.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
#include <daos/cont_props.h>
#include "obj_internal.h"

/** How many times to retry UPDATE RPCs on checksum error */
#define MAX_CSUM_RETRY 10

int dc_obj_csum_update(struct daos_csummer *csummer, struct cont_props props, daos_obj_id_t param,
daos_key_t *dkey, daos_iod_t *iods, d_sg_list_t *sgls, const uint32_t iod_nr,
struct dcs_layout *layout, struct dcs_csum_info **dkey_csum,
Expand Down
8 changes: 5 additions & 3 deletions src/object/cli_ec.c
Original file line number Diff line number Diff line change
Expand Up @@ -1408,13 +1408,15 @@ obj_ec_singv_split(daos_unit_oid_t oid, uint16_t layout_ver, struct daos_oclass_
{
uint64_t c_bytes = obj_ec_singv_cell_bytes(iod_size, oca);
uint32_t tgt_off = obj_ec_shard_off_by_layout_ver(layout_ver, dkey_hash, oca, oid.id_shard);
uint64_t tgt_size = min(c_bytes, iod_size - tgt_off * c_bytes);
char *data = sgl->sg_iovs[0].iov_buf;

D_ASSERT(iod_size != DAOS_REC_ANY);
D_ASSERTF(iod_size != DAOS_REC_ANY && iod_size == sgl->sg_iovs[0].iov_len,
DF_U64 " == %zu\n", iod_size, sgl->sg_iovs[0].iov_len);
if (tgt_off > 0)
memmove(data, data + tgt_off * c_bytes, c_bytes);
memmove(data, data + tgt_off * c_bytes, tgt_size);

sgl->sg_iovs[0].iov_len = c_bytes;
sgl->sg_iovs[0].iov_len = tgt_size;
return 0;
}

Expand Down
Loading

0 comments on commit 5a827bb

Please sign in to comment.