diff --git a/llamafile/copy.sh b/llamafile/copy.sh index 4f0031fd5a..d0c464b93b 100755 --- a/llamafile/copy.sh +++ b/llamafile/copy.sh @@ -7,6 +7,7 @@ scp llama.cpp/ggml-cuda.cu \ llama.cpp/ggml-cuda.h \ llama.cpp/ggml-impl.h \ llama.cpp/ggml-alloc.h \ + llama.cpp/ggml-common.h \ llama.cpp/ggml-backend.h \ llama.cpp/ggml-backend-impl.h \ llama.cpp/ggml.h \ diff --git a/llamafile/cuda.bat b/llamafile/cuda.bat index d6936d1339..a47067c86c 100644 --- a/llamafile/cuda.bat +++ b/llamafile/cuda.bat @@ -8,7 +8,6 @@ nvcc -arch=all ^ --shared ^ --forward-unknown-to-host-compiler ^ -Xcompiler="/nologo /EHsc /O2 /GR /MT" ^ - -use_fast_math ^ -DNDEBUG ^ -DGGML_BUILD=1 ^ -DGGML_SHARED=1 ^ diff --git a/llamafile/rocm.bat b/llamafile/rocm.bat index 5a4b039b18..58874d3c69 100644 --- a/llamafile/rocm.bat +++ b/llamafile/rocm.bat @@ -19,7 +19,7 @@ :: :: TODO(jart): How do we get this to not depend on VCRUNTIME140? -%HIP_PATH%\bin\clang++.exe ^ +"%HIP_PATH%\bin\clang++.exe" ^ -fuse-ld=lld ^ -shared ^ -nostartfiles ^ @@ -36,7 +36,7 @@ -D_XOPEN_SOURCE=600 ^ -D__HIP_PLATFORM_AMD__=1 ^ -D__HIP_PLATFORM_HCC__=1 ^ - -isystem %HIP_PATH%\include ^ + -isystem "%HIP_PATH%\include" ^ -O3 ^ -DNDEBUG ^ -D_DLL ^ @@ -50,7 +50,5 @@ --offload-arch=gfx1010,gfx1012,gfx906,gfx1030,gfx1031,gfx1032,gfx1100,gfx1101,gfx1102,gfx1103 ^ -o ggml-rocm.dll ^ ggml-cuda.cu ^ - -l%HIP_PATH%\lib\hipblas.lib ^ - -l%HIP_PATH%\lib\rocblas.lib ^ - -l%HIP_PATH%\lib\amdhip64.lib ^ + "-l%HIP_PATH%\lib\amdhip64.lib" ^ -lkernel32 diff --git a/llamafile/version.h b/llamafile/version.h index 3f702f9040..a20f962edb 100644 --- a/llamafile/version.h +++ b/llamafile/version.h @@ -1,8 +1,8 @@ #pragma once #define LLAMAFILE_MAJOR 0 -#define LLAMAFILE_MINOR 6 -#define LLAMAFILE_PATCH 2 +#define LLAMAFILE_MINOR 7 +#define LLAMAFILE_PATCH 0 #define LLAMAFILE_VERSION \ (100000000 * LLAMAFILE_MAJOR + 1000000 * LLAMAFILE_MINOR + LLAMAFILE_PATCH)