Skip to content

Commit

Permalink
CUDA: fix mul_mat_vec for CC 6.0
Browse files Browse the repository at this point in the history
  • Loading branch information
JohannesGaessler committed Feb 9, 2025
1 parent 19d3c82 commit c80a441
Showing 1 changed file with 5 additions and 0 deletions.
5 changes: 5 additions & 0 deletions ggml/src/ggml-cuda/mmv.cu
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,12 @@ void ggml_cuda_op_mul_mat_vec(
GGML_ASSERT(src1_ncols == 1);

const int cc = ggml_cuda_info().devices[ggml_cuda_get_device()].cc;
#ifdef GGML_CUDA_F16
const enum ggml_prec prec = fast_fp16_available(cc) ? ggml_prec(dst->op_params[0]) : GGML_PREC_F32;
#else
// FIXME by default there is no code for CC 6.0 so trying to use FP16 intrinsics results in a crash
const enum ggml_prec prec = fast_fp16_available(cc) && cc != 600 ? ggml_prec(dst->op_params[0]) : GGML_PREC_F32;
#endif // GGML_CUDA_F16


// ggml_cuda_op provides single, contiguous matrices
Expand Down

0 comments on commit c80a441

Please sign in to comment.