Skip to content

Commit

Permalink
fix: api changes
Browse files Browse the repository at this point in the history
  • Loading branch information
jhen0409 committed Jan 7, 2025
1 parent e498e65 commit df47e14
Show file tree
Hide file tree
Showing 7 changed files with 215 additions and 38 deletions.
1 change: 1 addition & 0 deletions android/src/main/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ set(
${RNLLAMA_LIB_DIR}/ggml-threading.cpp
${RNLLAMA_LIB_DIR}/ggml-quants.c
${RNLLAMA_LIB_DIR}/log.cpp
${RNLLAMA_LIB_DIR}/llama-impl.cpp
${RNLLAMA_LIB_DIR}/llama-grammar.cpp
${RNLLAMA_LIB_DIR}/llama-sampling.cpp
${RNLLAMA_LIB_DIR}/llama-vocab.cpp
Expand Down
22 changes: 16 additions & 6 deletions android/src/main/jni.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

#define LOGI(...) __android_log_print(ANDROID_LOG_INFO, TAG, __VA_ARGS__)
#define LOGW(...) __android_log_print(ANDROID_LOG_WARN, TAG, __VA_ARGS__)

#define LOGE(...) __android_log_print(ANDROID_LOG_ERROR, TAG, __VA_ARGS__)
static inline int min(int a, int b) {
return (a < b) ? a : b;
}
Expand Down Expand Up @@ -334,13 +334,18 @@ Java_com_rnllama_LlamaContext_initContext(
llama_free(llama->ctx);
}

std::vector<common_lora_adapter_info> lora_adapters;
std::vector<common_lora_adapter_info> lora;
const char *lora_chars = env->GetStringUTFChars(lora_str, nullptr);
if (lora_chars != nullptr && lora_chars[0] != '\0') {
common_lora_adapter_info la;
la.path = lora_chars;
la.scale = lora_scaled;
lora_adapters.push_back(la);
la.ptr = llama_lora_adapter_init(llama->model, la.path.c_str());
if (la.ptr == nullptr) {
LOGE("%s: failed to apply lora adapter '%s'\n", __func__, la.path.c_str());
return -1;
}
lora.push_back(la);
}

if (lora_list != nullptr) {
Expand All @@ -354,13 +359,18 @@ Java_com_rnllama_LlamaContext_initContext(
common_lora_adapter_info la;
la.path = path_chars;
la.scale = readablemap::getFloat(env, lora_adapter, "scaled", 1.0f);
lora_adapters.push_back(la);
la.ptr = llama_lora_adapter_init(llama->model, la.path.c_str());
if (la.ptr == nullptr) {
LOGE("%s: failed to apply lora adapter '%s'\n", __func__, la.path.c_str());
return -1;
}
lora.push_back(la);
env->ReleaseStringUTFChars(path, path_chars);
}
}
}
env->ReleaseStringUTFChars(lora_str, lora_chars);
int result = llama->applyLoraAdapters(lora_adapters);
int result = llama->applyLoraAdapters(lora);
if (result != 0) {
LOGI("[RNLlama] Failed to apply lora adapters");
llama_free(llama->ctx);
Expand Down Expand Up @@ -944,7 +954,7 @@ Java_com_rnllama_LlamaContext_getLoadedLoraAdapters(
auto llama = context_map[(long) context_ptr];
auto loaded_lora_adapters = llama->getLoadedLoraAdapters();
auto result = createWritableArray(env);
for (common_lora_adapter_container &la : loaded_lora_adapters) {
for (common_lora_adapter_info &la : loaded_lora_adapters) {
auto map = createWriteableMap(env);
putString(env, map, "path", la.path.c_str());
putDouble(env, map, "scaled", la.scale);
Expand Down
166 changes: 166 additions & 0 deletions cpp/llama-impl.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
#include "llama-impl.h"

#include "llama.h"

#include <cinttypes>
#include <climits>
#include <cstdarg>
#include <cstring>
#include <vector>
#include <sstream>

struct llama_logger_state {
lm_ggml_log_callback log_callback = llama_log_callback_default;
void * log_callback_user_data = nullptr;
};

static llama_logger_state g_logger_state;

time_meas::time_meas(int64_t & t_acc, bool disable) : t_start_us(disable ? -1 : lm_ggml_time_us()), t_acc(t_acc) {}

time_meas::~time_meas() {
if (t_start_us >= 0) {
t_acc += lm_ggml_time_us() - t_start_us;
}
}

void llama_log_set(lm_ggml_log_callback log_callback, void * user_data) {
lm_ggml_log_set(log_callback, user_data);
g_logger_state.log_callback = log_callback ? log_callback : llama_log_callback_default;
g_logger_state.log_callback_user_data = user_data;
}

static void llama_log_internal_v(lm_ggml_log_level level, const char * format, va_list args) {
va_list args_copy;
va_copy(args_copy, args);
char buffer[128];
int len = vsnprintf(buffer, 128, format, args);
if (len < 128) {
g_logger_state.log_callback(level, buffer, g_logger_state.log_callback_user_data);
} else {
char * buffer2 = new char[len + 1];
vsnprintf(buffer2, len + 1, format, args_copy);
buffer2[len] = 0;
g_logger_state.log_callback(level, buffer2, g_logger_state.log_callback_user_data);
delete[] buffer2;
}
va_end(args_copy);
}

void llama_log_internal(lm_ggml_log_level level, const char * format, ...) {
va_list args;
va_start(args, format);
llama_log_internal_v(level, format, args);
va_end(args);
}

void llama_log_callback_default(lm_ggml_log_level level, const char * text, void * user_data) {
(void) level;
(void) user_data;
fputs(text, stderr);
fflush(stderr);
}

void replace_all(std::string & s, const std::string & search, const std::string & replace) {
if (search.empty()) {
return;
}
std::string builder;
builder.reserve(s.length());
size_t pos = 0;
size_t last_pos = 0;
while ((pos = s.find(search, last_pos)) != std::string::npos) {
builder.append(s, last_pos, pos - last_pos);
builder.append(replace);
last_pos = pos + search.length();
}
builder.append(s, last_pos, std::string::npos);
s = std::move(builder);
}

std::string format(const char * fmt, ...) {
va_list ap;
va_list ap2;
va_start(ap, fmt);
va_copy(ap2, ap);
int size = vsnprintf(NULL, 0, fmt, ap);
LM_GGML_ASSERT(size >= 0 && size < INT_MAX); // NOLINT
std::vector<char> buf(size + 1);
int size2 = vsnprintf(buf.data(), size + 1, fmt, ap2);
LM_GGML_ASSERT(size2 == size);
va_end(ap2);
va_end(ap);
return std::string(buf.data(), size);
}

std::string llama_format_tensor_shape(const std::vector<int64_t> & ne) {
char buf[256];
snprintf(buf, sizeof(buf), "%5" PRId64, ne.at(0));
for (size_t i = 1; i < ne.size(); i++) {
snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), ", %5" PRId64, ne.at(i));
}
return buf;
}

std::string llama_format_tensor_shape(const struct lm_ggml_tensor * t) {
char buf[256];
snprintf(buf, sizeof(buf), "%5" PRId64, t->ne[0]);
for (int i = 1; i < LM_GGML_MAX_DIMS; i++) {
snprintf(buf + strlen(buf), sizeof(buf) - strlen(buf), ", %5" PRId64, t->ne[i]);
}
return buf;
}

static std::string lm_gguf_data_to_str(enum lm_gguf_type type, const void * data, int i) {
switch (type) {
case LM_GGUF_TYPE_UINT8: return std::to_string(((const uint8_t *)data)[i]);
case LM_GGUF_TYPE_INT8: return std::to_string(((const int8_t *)data)[i]);
case LM_GGUF_TYPE_UINT16: return std::to_string(((const uint16_t *)data)[i]);
case LM_GGUF_TYPE_INT16: return std::to_string(((const int16_t *)data)[i]);
case LM_GGUF_TYPE_UINT32: return std::to_string(((const uint32_t *)data)[i]);
case LM_GGUF_TYPE_INT32: return std::to_string(((const int32_t *)data)[i]);
case LM_GGUF_TYPE_UINT64: return std::to_string(((const uint64_t *)data)[i]);
case LM_GGUF_TYPE_INT64: return std::to_string(((const int64_t *)data)[i]);
case LM_GGUF_TYPE_FLOAT32: return std::to_string(((const float *)data)[i]);
case LM_GGUF_TYPE_FLOAT64: return std::to_string(((const double *)data)[i]);
case LM_GGUF_TYPE_BOOL: return ((const bool *)data)[i] ? "true" : "false";
default: return format("unknown type %d", type);
}
}

std::string lm_gguf_kv_to_str(const struct lm_gguf_context * ctx_gguf, int i) {
const enum lm_gguf_type type = lm_gguf_get_kv_type(ctx_gguf, i);

switch (type) {
case LM_GGUF_TYPE_STRING:
return lm_gguf_get_val_str(ctx_gguf, i);
case LM_GGUF_TYPE_ARRAY:
{
const enum lm_gguf_type arr_type = lm_gguf_get_arr_type(ctx_gguf, i);
int arr_n = lm_gguf_get_arr_n(ctx_gguf, i);
const void * data = lm_gguf_get_arr_data(ctx_gguf, i);
std::stringstream ss;
ss << "[";
for (int j = 0; j < arr_n; j++) {
if (arr_type == LM_GGUF_TYPE_STRING) {
std::string val = lm_gguf_get_arr_str(ctx_gguf, i, j);
// escape quotes
replace_all(val, "\\", "\\\\");
replace_all(val, "\"", "\\\"");
ss << '"' << val << '"';
} else if (arr_type == LM_GGUF_TYPE_ARRAY) {
ss << "???";
} else {
ss << lm_gguf_data_to_str(arr_type, data, j);
}
if (j < arr_n - 1) {
ss << ", ";
}
}
ss << "]";
return ss.str();
}
default:
return lm_gguf_data_to_str(type, lm_gguf_get_val_data(ctx_gguf, i), 0);
}
}
34 changes: 10 additions & 24 deletions cpp/rn-llama.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ struct llama_rn_context
std::string stopping_word;
bool incomplete = false;

std::vector<common_lora_adapter_container> lora_adapters;
std::vector<common_lora_adapter_info> lora;

~llama_rn_context()
{
Expand Down Expand Up @@ -303,8 +303,8 @@ struct llama_rn_context
{
params = params_;
common_init_result result = common_init_from_params(params);
model = result.model;
ctx = result.context;
model = result.model.get();
ctx = result.context.get();
if (model == nullptr)
{
LOG_ERROR("unable to load model: %s", params_.model.c_str());
Expand Down Expand Up @@ -747,33 +747,19 @@ struct llama_rn_context
std::string("]");
}

int applyLoraAdapters(std::vector<common_lora_adapter_info> lora_adapters) {
this->lora_adapters.clear();
auto containers = std::vector<common_lora_adapter_container>();
for (auto & la : lora_adapters) {
common_lora_adapter_container loaded_la;
loaded_la.path = la.path;
loaded_la.scale = la.scale;
loaded_la.adapter = llama_lora_adapter_init(model, la.path.c_str());
if (loaded_la.adapter == nullptr) {
LOG_ERROR("%s: failed to apply lora adapter '%s'\n", __func__, la.path.c_str());
return -1;
}

this->lora_adapters.push_back(loaded_la);
containers.push_back(loaded_la);
}
common_lora_adapters_apply(ctx, containers);
int applyLoraAdapters(std::vector<common_lora_adapter_info> lora) {
this->lora = lora;
common_lora_adapters_apply(ctx, lora);
return 0;
}

void removeLoraAdapters() {
this->lora_adapters.clear();
common_lora_adapters_apply(ctx, this->lora_adapters); // apply empty list
this->lora.clear();
common_lora_adapters_apply(ctx, this->lora); // apply empty list
}

std::vector<common_lora_adapter_container> getLoadedLoraAdapters() {
return this->lora_adapters;
std::vector<common_lora_adapter_info> getLoadedLoraAdapters() {
return this->lora;
}
};

Expand Down
2 changes: 1 addition & 1 deletion example/ios/.xcode.env.local
Original file line number Diff line number Diff line change
@@ -1 +1 @@
export NODE_BINARY=/var/folders/g8/v75_3l3n23g909mshlzdj4wh0000gn/T/yarn--1736228143880-0.46934568361956774/node
export NODE_BINARY=/var/folders/g8/v75_3l3n23g909mshlzdj4wh0000gn/T/yarn--1736232782592-0.9546752819894395/node
26 changes: 19 additions & 7 deletions ios/RNLlamaContext.mm
Original file line number Diff line number Diff line change
Expand Up @@ -154,13 +154,17 @@ + (instancetype)initWithParams:(NSDictionary *)params onProgress:(void (^)(unsig
@throw [NSException exceptionWithName:@"LlamaException" reason:@"Embedding is not supported in encoder-decoder models" userInfo:nil];
}

std::vector<common_lora_adapter_info> lora_adapters;
std::vector<common_lora_adapter_info> lora;
if (params[@"lora"]) {
common_lora_adapter_info la;
la.path = [params[@"lora"] UTF8String];
la.scale = 1.0f;
if (params[@"lora_scaled"]) la.scale = [params[@"lora_scaled"] floatValue];
lora_adapters.push_back(la);
la.ptr = llama_lora_adapter_init(context->llama->model, la.path.c_str());
if (la.ptr == nullptr) {
@throw [NSException exceptionWithName:@"LlamaException" reason:@"Failed to apply lora adapter" userInfo:nil];
}
lora.push_back(la);
}
if (params[@"lora_list"] && [params[@"lora_list"] isKindOfClass:[NSArray class]]) {
NSArray *lora_list = params[@"lora_list"];
Expand All @@ -171,11 +175,15 @@ + (instancetype)initWithParams:(NSDictionary *)params onProgress:(void (^)(unsig
common_lora_adapter_info la;
la.path = [path UTF8String];
la.scale = scale;
lora_adapters.push_back(la);
la.ptr = llama_lora_adapter_init(context->llama->model, la.path.c_str());
if (la.ptr == nullptr) {
@throw [NSException exceptionWithName:@"LlamaException" reason:@"Failed to apply lora adapter" userInfo:nil];
}
lora.push_back(la);
}
}
if (lora_adapters.size() > 0) {
int result = context->llama->applyLoraAdapters(lora_adapters);
if (lora.size() > 0) {
int result = context->llama->applyLoraAdapters(lora);
if (result != 0) {
delete context->llama;
@throw [NSException exceptionWithName:@"LlamaException" reason:@"Failed to apply lora adapters" userInfo:nil];
Expand Down Expand Up @@ -566,6 +574,10 @@ - (void)applyLoraAdapters:(NSArray *)loraAdapters {
common_lora_adapter_info la;
la.path = [loraAdapter[@"path"] UTF8String];
la.scale = [loraAdapter[@"scaled"] doubleValue];
la.ptr = llama_lora_adapter_init(llama->model, la.path.c_str());
if (la.ptr == nullptr) {
@throw [NSException exceptionWithName:@"LlamaException" reason:@"Failed to apply lora adapter" userInfo:nil];
}
lora_adapters.push_back(la);
}
int result = llama->applyLoraAdapters(lora_adapters);
Expand All @@ -579,9 +591,9 @@ - (void)removeLoraAdapters {
}

- (NSArray *)getLoadedLoraAdapters {
std::vector<common_lora_adapter_container> loaded_lora_adapters = llama->getLoadedLoraAdapters();
std::vector<common_lora_adapter_info> loaded_lora_adapters = llama->getLoadedLoraAdapters();
NSMutableArray *result = [[NSMutableArray alloc] init];
for (common_lora_adapter_container &la : loaded_lora_adapters) {
for (common_lora_adapter_info &la : loaded_lora_adapters) {
[result addObject:@{
@"path": [NSString stringWithUTF8String:la.path.c_str()],
@"scale": @(la.scale)
Expand Down
2 changes: 2 additions & 0 deletions scripts/bootstrap.sh
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ cp ./llama.cpp/src/llama-cparams.cpp ./cpp/llama-cparams.cpp
cp ./llama.cpp/src/llama-hparams.h ./cpp/llama-hparams.h
cp ./llama.cpp/src/llama-hparams.cpp ./cpp/llama-hparams.cpp
cp ./llama.cpp/src/llama-impl.h ./cpp/llama-impl.h
cp ./llama.cpp/src/llama-impl.cpp ./cpp/llama-impl.cpp

cp ./llama.cpp/src/llama-vocab.h ./cpp/llama-vocab.h
cp ./llama.cpp/src/llama-vocab.cpp ./cpp/llama-vocab.cpp
Expand All @@ -92,6 +93,7 @@ cp ./llama.cpp/common/sampling.cpp ./cpp/sampling.cpp
# List of files to process
files_add_lm_prefix=(
"./cpp/llama-impl.h"
"./cpp/llama-impl.cpp"
"./cpp/llama-vocab.h"
"./cpp/llama-vocab.cpp"
"./cpp/llama-grammar.h"
Expand Down

0 comments on commit df47e14

Please sign in to comment.