Skip to content

Commit

Permalink
add more logs to determine schedulable GPUs
Browse files Browse the repository at this point in the history
  • Loading branch information
kaigai committed Jan 8, 2025
1 parent ebc3b95 commit 6041a14
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 82 deletions.
37 changes: 29 additions & 8 deletions src/arrow_fdw.c
Original file line number Diff line number Diff line change
Expand Up @@ -2716,21 +2716,33 @@ GetOptimalGpusForArrowFdw(PlannerInfo *root, RelOptInfo *baserel)
if (baseRelIsArrowFdw(baserel) &&
IsA(priv_list, List) && list_length(priv_list) == 2)
{
List *af_list = linitial(priv_list);
const char *relname = getRelOptInfoName(root, baserel);
List *arrow_files_list = linitial(priv_list);
ListCell *lc;

foreach (lc, af_list)
foreach (lc, arrow_files_list)
{
ArrowFileState *af_state = lfirst(lc);
gpumask_t __optimal_gpus;
gpumask_t __optimal_gpus;

__optimal_gpus = GetOptimalGpuForFile(af_state->filename);
if (__optimal_gpus == INVALID_GPUMASK)
return 0;
if (lc == list_head(af_list))
__optimal_gpus = 0;
if (lc == list_head(arrow_files_list))
{
optimal_gpus = __optimal_gpus;
if (optimal_gpus == 0)
__hdbxLogDebug("foreign-table='%s' arrow-file='%s' has no schedulable GPUs", relname, af_state->filename);
}
else
optimal_gpus &= __optimal_gpus;
{
__optimal_gpus &= optimal_gpus;
if (optimal_gpus != __optimal_gpus)
__hdbxLogDebug("foreign-table='%s' arrow-file='%s' reduced GPUs-set %08lx => %08lx", relname, af_state->filename, optimal_gpus, __optimal_gpus);
optimal_gpus = __optimal_gpus;
}
if (optimal_gpus == 0)
break;
}
}
return optimal_gpus;
Expand Down Expand Up @@ -4457,10 +4469,19 @@ __arrowFdwExecInit(ScanState *ss,

if (__optimal_gpus == INVALID_GPUMASK)
optimal_gpus = 0;
else if (af_states_list == NIL)
if (af_states_list == NIL)
{
optimal_gpus = __optimal_gpus;
if (optimal_gpus == 0)
__hdbxLogDebug("foreign-table='%s' arrow-file='%s' has no schedulable GPUs", RelationGetRelationName(frel), fname);
}
else
optimal_gpus &= __optimal_gpus;
{
__optimal_gpus &= optimal_gpus;
if (optimal_gpus != __optimal_gpus)
__hdbxLogDebug("foreign-table='%s' arrow-file='%s' reduced GPUs-Set %08lx -> %08lx", RelationGetRelationName(frel), fname, optimal_gpus, __optimal_gpus);
optimal_gpus = __optimal_gpus;
}
}
else if ((pts->xpu_task_flags & DEVKIND__NVIDIA_DPU) != 0)
{
Expand Down
28 changes: 5 additions & 23 deletions src/executor.c
Original file line number Diff line number Diff line change
Expand Up @@ -743,7 +743,6 @@ pgstromBuildSessionInfo(pgstromTaskState *pts,
session->kcxt_kvecs_ndims = pp_info->kvecs_ndims;
session->kcxt_extra_bufsz = pp_info->extra_bufsz;
session->cuda_stack_size = pp_info->cuda_stack_size;
session->xpu_task_flags = pts->xpu_task_flags;
session->hostEpochTimestamp = SetEpochTimestamp();
session->xactStartTimestamp = GetCurrentTransactionStartTimestamp();
session->session_xact_state = __build_session_xact_state(&buf);
Expand Down Expand Up @@ -1463,7 +1462,6 @@ pgstromExecInitTaskState(CustomScanState *node, EState *estate, int eflags)
elog(ERROR, "PG-Strom does not support table access method: %s",
get_am_name(am_oid));
/* Is GPU-Cache available? */
//FIXME: Try GpuCache only when DEVTASK__USED_GPUCACHE is set by planner
pts->gcache_desc = pgstromGpuCacheExecInit(pts);
if (pts->gcache_desc)
pts->xpu_task_flags |= DEVTASK__USED_GPUCACHE;
Expand All @@ -1476,10 +1474,8 @@ pgstromExecInitTaskState(CustomScanState *node, EState *estate, int eflags)
pp_info->brin_index_quals);
if ((pts->xpu_task_flags & DEVKIND__NVIDIA_GPU) != 0)
{
//FIXME: Try GPU-Direct SQL only when DEVTASK__USED_GPUDIRECT is
// set by planner
pts->optimal_gpus = GetOptimalGpuForRelation(rel);
if (pts->optimal_gpus)
if (pts->optimal_gpus != 0)
{
/*
* If particular GPUs are optimal, we can use
Expand Down Expand Up @@ -2224,31 +2220,17 @@ pgstromGpuDirectExplain(pgstromTaskState *pts,
}
else
{
int head = -1;
int base;
uint64 count;

appendStringInfo(&buf, "enabled (N=%d,", numGpuDevAttrs);
appendStringInfo(&buf, "enabled (N=%d,", get_bitcount(pts->optimal_gpus));
base = buf.len;
for (int k=0; k <= numGpuDevAttrs; k++)
{
if (k < numGpuDevAttrs &&
(pts->optimal_gpus & (1UL<<k)) != 0)
{
if (head < 0)
{
appendStringInfo(&buf, "%s%d",
buf.len == base ? "GPU" : ",", k);
head = k;
}
}
else if (head >= 0)
if ((pts->optimal_gpus & (1UL<<k)) != 0)
{
if (head + 2 == k)
appendStringInfo(&buf, ",%d", k-1);
else if (head + 2 > k)
appendStringInfo(&buf, "-%d", k-1);
head = -1;
appendStringInfo(&buf, "%s%d",
base == buf.len ? "GPU" : ",", k);
}
}
if (es->analyze && ps_state)
Expand Down
57 changes: 22 additions & 35 deletions src/gpu_device.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "pg_strom.h"
#include "cuda_common.h"

#define COMPONENT_LABEL "gpu-device"
/* variable declarations */
GpuDevAttributes *gpuDevAttrs = NULL;
int numGpuDevAttrs = 0;
Expand Down Expand Up @@ -49,17 +50,6 @@ static struct {
#undef DEV_ATTR
};

#define __Info(format,...) \
do { \
if (heterodb_extra_ereport_level >= 1) \
elog(LOG, "gpu-device: [info]" format, ##__VA_ARGS__); \
} while(0)
#define __Debug(format,...) \
do { \
if (heterodb_extra_ereport_level >= 2) \
elog(LOG, "gpu-device: [debug]" format, ##__VA_ARGS__); \
} while(0)

static const char *
sysfs_read_line(const char *path)
{
Expand Down Expand Up @@ -580,14 +570,15 @@ GetOptimalGpuForFile(const char *pathname)
* GetOptimalGpuForTablespace
*/
static gpumask_t
GetOptimalGpuForTablespace(Oid tablespace_oid)
GetOptimalGpuForTablespace(Oid tablespace_oid, const char *relname)
{
tablespace_optimal_gpu_entry *hentry;
bool found;
gpumask_t optimal_gpus;
bool found;

if (!pgstrom_gpudirect_enabled)
{
__Info("GPU-Direct SQL disabled: pg_strom.gpudirect_enabled = off");
__hdbxLogInfo("GPU-Direct SQL disabled: pg_strom.gpudirect_enabled = off");
return 0UL;
}

Expand Down Expand Up @@ -621,12 +612,11 @@ GetOptimalGpuForTablespace(Oid tablespace_oid)
PG_TRY();
{
name = get_tablespace_name(tablespace_oid);

path = GetDatabasePath(MyDatabaseId, tablespace_oid);
if (name)
strncpy(hentry->tablespace_name, name, NAMEDATALEN);
else
sprintf(hentry->tablespace_name, "tablespace-%u", tablespace_oid);
path = GetDatabasePath(MyDatabaseId, tablespace_oid);
hentry->optimal_gpus = GetOptimalGpuForFile(path);
}
PG_CATCH();
Expand All @@ -639,16 +629,16 @@ GetOptimalGpuForTablespace(Oid tablespace_oid)
}
PG_END_TRY();
}
optimal_gpus = hentry->optimal_gpus;
if (optimal_gpus == INVALID_GPUMASK)
optimal_gpus = 0UL;
if (optimal_gpus == 0)
__hdbxLogInfo("GPU-Direct SQL disabled: no schedulable GPUs for '%s' on top of the tablespace '%s'",
relname, hentry->tablespace_name);
else
{
if (hentry->optimal_gpus == 0)
__Info("GPU-Direct SQL disabled: no assinable GPUs on the tablespace '%s'",
hentry->tablespace_name);
else
__Info("GPU-Direct SQL: tablespace='%s' optimal-GPUs=%08lx",
hentry->tablespace_name, hentry->optimal_gpus);
}
return hentry->optimal_gpus;
__hdbxLogInfo("GPU-Direct SQL: relation='%s' tablespace='%s' optimal-GPUs=%08lx",
relname, hentry->tablespace_name, optimal_gpus);
return optimal_gpus;
}

/*
Expand All @@ -657,16 +647,13 @@ GetOptimalGpuForTablespace(Oid tablespace_oid)
gpumask_t
GetOptimalGpuForRelation(Relation relation)
{
const char *relname = RelationGetRelationName(relation);
Oid tablespace_oid;
gpumask_t optimal_gpus;

/* only heap relation */
Assert(RelationGetForm(relation)->relam == HEAP_TABLE_AM_OID);
tablespace_oid = RelationGetForm(relation)->reltablespace;

if ((optimal_gpus = GetOptimalGpuForTablespace(tablespace_oid)) == INVALID_GPUMASK)
return 0;
return optimal_gpus;
return GetOptimalGpuForTablespace(tablespace_oid, relname);
}

/*
Expand All @@ -681,7 +668,7 @@ GetOptimalGpuForBaseRel(PlannerInfo *root, RelOptInfo *baserel)

if (!pgstrom_gpudirect_enabled)
{
__Info("GPU-Direct SQL disabled: pg_strom.gpudirect_enabled = off");
__hdbxLogInfo("GPU-Direct SQL disabled: pg_strom.gpudirect_enabled = off");
return 0UL;
}
if (baseRelIsArrowFdw(baserel))
Expand All @@ -690,14 +677,14 @@ GetOptimalGpuForBaseRel(PlannerInfo *root, RelOptInfo *baserel)
total_sz = (size_t)baserel->pages * (size_t)BLCKSZ;
if (total_sz < pgstrom_gpudirect_threshold)
{
__Info("GPU-Direct SQL disabled: estimated relation size (%s; %s) is smaller than the threshold (%s)",
__hdbxLogInfo("GPU-Direct SQL disabled: estimated relation size (%s; %s) is smaller than the threshold (%s)",
getRelOptInfoName(root, baserel),
format_bytesz(total_sz),
format_bytesz(pgstrom_gpudirect_threshold));
return 0UL; /* table is too small */
}

optimal_gpus = GetOptimalGpuForTablespace(baserel->reltablespace);
optimal_gpus = GetOptimalGpuForTablespace(baserel->reltablespace,
getRelOptInfoName(root, baserel));
if (optimal_gpus != INVALID_GPUMASK)
{
RangeTblEntry *rte = root->simple_rte_array[baserel->relid];
Expand All @@ -708,7 +695,7 @@ GetOptimalGpuForBaseRel(PlannerInfo *root, RelOptInfo *baserel)
relpersistence != RELPERSISTENCE_UNLOGGED)
{
optimal_gpus = 0;
__Info("GPU-Direct SQL disabled: not supported on temporary tables");
__hdbxLogInfo("GPU-Direct SQL disabled: not supported on temporary tables");
}
}
return optimal_gpus;
Expand Down
36 changes: 20 additions & 16 deletions src/gpu_service.c
Original file line number Diff line number Diff line change
Expand Up @@ -141,26 +141,28 @@ static const char *pgstrom_fatbin_image_filename = "/dev/null";
static void
gpuservLoggerReport(const char *fmt, ...) pg_attribute_printf(1, 2);

#define __gsLogNoCxt(fmt,...) \
gpuservLoggerReport("GPU-Serv|LOG|%s|%d|%s|" fmt "\n", \
__basename(__FILE__), \
__LINE__, \
__FUNCTION__, \
##__VA_ARGS__)

#define __gsLogCxt(gcontext,fmt,...) \
gpuservLoggerReport("GPU%d|LOG|%s|%d|%s|" fmt "\n", \
gcontext->cuda_dindex, \
__basename(__FILE__), \
__LINE__, \
__FUNCTION__, \
##__VA_ARGS__)
#define __gsLog(fmt, ...) \
do { \
if (!gcontext) \
gpuservLoggerReport("GPU-Serv|LOG|%s|%d|%s|" fmt "\n", \
__basename(__FILE__), \
__LINE__, \
__FUNCTION__, \
##__VA_ARGS__); \
if (GpuWorkerCurrentContext) \
__gsLogCxt(GpuWorkerCurrentContext,fmt,##__VA_ARGS__); \
else \
gpuservLoggerReport("GPU%d|LOG|%s|%d|%s|" fmt "\n", \
gcontext->cuda_dindex, \
__basename(__FILE__), \
__LINE__, \
__FUNCTION__, \
##__VA_ARGS__); \
__gsLogNoCxt(fmt,##__VA_ARGS__); \
} while(0)

#define __gsLog(fmt, ...) \
__gsLogCxt(GpuWorkerCurrentContext,fmt,##__VA_ARGS__)

#define __gsDebug(fmt, ...) \
do { \
if (gpuserv_shared_state && \
Expand Down Expand Up @@ -3286,7 +3288,9 @@ gpuservHandleOpenSession(gpuClient *gclient, XpuCommand *xcmd)
iov.iov_base = &resp;
iov.iov_len = resp.length;
__gpuClientWriteBack(gclient, &iov, 1);

__gsLogNoCxt("Open session (GPUs-set: %08lx, Task-flags: %08x)",
gclient->optimal_gpus,
gclient->xpu_task_flags);
return true;

error:
Expand Down
14 changes: 14 additions & 0 deletions src/pg_strom.h
Original file line number Diff line number Diff line change
Expand Up @@ -521,6 +521,20 @@ extern long PAGES_PER_BLOCK; /* (BLCKSZ / PAGE_SIZE) */
* extra.c
*/
extern int heterodb_extra_ereport_level;
#define __hdbxLogError(fmt,...) \
do { \
elog(LOG, __FILE__ ": [error] " fmt, ##__VA_ARGS__); \
} while(0)
#define __hdbxLogInfo(fmt,...) \
do { \
if (heterodb_extra_ereport_level >= 1) \
elog(LOG, __FILE__ ": [info] " fmt, ##__VA_ARGS__); \
} while(0)
#define __hdbxLogDebug(fmt,...) \
do { \
if (heterodb_extra_ereport_level >= 2) \
elog(LOG, __FILE__ ": [error] " fmt, ##__VA_ARGS__); \
} while(0)
extern void heterodbExtraEreport(int elevel);
extern heterodb_extra_ereport_callback_type
heterodbExtraRegisterEreportCallback(heterodb_extra_ereport_callback_type callback);
Expand Down

0 comments on commit 6041a14

Please sign in to comment.