From b598cf84fa27e5610adce287ef0f29687abcf13d Mon Sep 17 00:00:00 2001 From: mike dupont Date: Wed, 22 Nov 2023 16:46:32 -0500 Subject: [PATCH] compiling and running --- Makefile | 4 +- common/common.h | 2 +- common/grammar-parser.cpp | 2 +- examples/batched-bench/batched-bench.cpp | 24 +++---- examples/batched/batched.cpp | 10 +-- examples/llava/llava.cpp | 24 +++---- examples/main/main.cpp | 5 ++ examples/save-load-state/save-load-state.cpp | 8 +-- examples/server/server.cpp | 73 ++++++++++++++------ examples/simple/simple.cpp | 14 ++-- llama.cpp | 70 +++++++++---------- llama.h | 25 ++++++- 12 files changed, 158 insertions(+), 103 deletions(-) diff --git a/Makefile b/Makefile index c61580b7fef65..2fed76e369e28 100644 --- a/Makefile +++ b/Makefile @@ -734,5 +734,5 @@ tests/test-tokenizer-1-bpe: tests/test-tokenizer-1-bpe.cpp ggml.o llama.o $(COMM tests/test-tokenizer-1-llama: tests/test-tokenizer-1-llama.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) -tests/test-c.o: tests/test-c.c llama.h - $(CC) $(CFLAGS) -c $(filter-out %.h,$^) -o $@ +tests/test-c.o: tests/test-c.cpp llama.h + $(CXX) $(CFLAGS) -c $(filter-out %.h,$^) -o $@ diff --git a/common/common.h b/common/common.h index 88fa13fc067c2..77fe3093cd9e7 100644 --- a/common/common.h +++ b/common/common.h @@ -42,7 +42,7 @@ extern char const *LLAMA_BUILD_TARGET; // int32_t get_num_physical_cores(); -struct gpt_params { +struct gpt_params : refl::attr::usage::type{ uint32_t seed = -1; // RNG seed int32_t n_threads = get_num_physical_cores(); diff --git a/common/grammar-parser.cpp b/common/grammar-parser.cpp index 59bcf0d7392cb..503ed4212f313 100644 --- a/common/grammar-parser.cpp +++ b/common/grammar-parser.cpp @@ -219,7 +219,7 @@ namespace grammar_parser { // in original rule, replace previous symbol with reference to generated rule out_elements.resize(last_sym_start); - llama_grammar_element(LLAMA_GRETYPE_RULE_REF, sub_rule_id) a; + llama_grammar_element a(LLAMA_GRETYPE_RULE_REF, sub_rule_id); out_elements.push_back(a); pos = parse_space(pos + 1, is_nested); diff --git a/examples/batched-bench/batched-bench.cpp b/examples/batched-bench/batched-bench.cpp index b8758a5f41693..ee3ad3b8c8307 100644 --- a/examples/batched-bench/batched-bench.cpp +++ b/examples/batched-bench/batched-bench.cpp @@ -121,18 +121,18 @@ int main(int argc, char ** argv) { for (int32_t i = 0; i < (int32_t) batch.n_tokens; i += n_batch) { const int32_t n_tokens = std::min(n_batch, (int32_t) (batch.n_tokens - i)); - llama_batch batch_view = { - .n_tokens=n_tokens, - .token=batch.token + i, - .embd=nullptr, - .pos=batch.pos + i, - .n_seq_id=batch.n_seq_id + i, - .seq_id=batch.seq_id + i, - .logits=batch.logits + i, - .all_pos_0=0, - .all_pos_1=0, - .all_seq_id=0, // unused - }; + llama_batch batch_view( + /* .n_tokens= */ n_tokens, + /* .token= */ batch.token + i, + /* .embd= */ nullptr, + /* .pos= */ batch.pos + i, + /* .n_seq_id= */ batch.n_seq_id + i, + /* .seq_id= */ batch.seq_id + i, + /* .logits= */ batch.logits + i, + /* .all_pos_0= */0, + /* .all_pos_1= */0, + /* .all_seq_id= */0 // unused + ); const int ret = llama_decode(ctx, batch_view); if (ret != 0) { diff --git a/examples/batched/batched.cpp b/examples/batched/batched.cpp index 6ce3944f28986..2a872e72ddd86 100644 --- a/examples/batched/batched.cpp +++ b/examples/batched/batched.cpp @@ -169,13 +169,13 @@ int main(int argc, char ** argv) { candidates.reserve(n_vocab); for (llama_token token_id = 0; token_id < n_vocab; token_id++) { - candidates.emplace_back(llama_token_data{ - .id=token_id, - .logit=logits[token_id], - .p=0.0f }); + candidates.emplace_back(llama_token_data( + token_id, + logits[token_id], + 0.0f )); } - llama_token_data_array candidates_p = { .data=candidates.data(), .size=candidates.size(), .sorted=false }; + llama_token_data_array candidates_p (candidates.data(), candidates.size(), false ); const int top_k = 40; const float top_p = 0.9f; diff --git a/examples/llava/llava.cpp b/examples/llava/llava.cpp index e9bf9ee0998c2..9b3bbfd3c7049 100644 --- a/examples/llava/llava.cpp +++ b/examples/llava/llava.cpp @@ -75,18 +75,18 @@ bool llava_eval_image_embed(llama_context * ctx_llama, const struct llava_image_ if (n_eval > n_batch) { n_eval = n_batch; } - llama_batch batch = { - .n_tokens=int32_t(n_eval), - .token=nullptr, - .embd=(image_embed->embed+i*n_embd), - .pos=nullptr, - .n_seq_id=nullptr, - .seq_id=nullptr, - .logits=nullptr, - .all_pos_0=*n_past, - .all_pos_1=1, - .all_seq_id=0 - }; + llama_batch batch( + /* .n_tokens= */int32_t(n_eval), + /* .token= */nullptr, + /* .embd= */(image_embed->embed+i*n_embd), + /* .pos= */nullptr, + /* .n_seq_id= */nullptr, + /* .seq_id= */nullptr, + /* .logits= */nullptr, + /* .all_pos_0= */*n_past, + /* .all_pos_1= */1, + /* .all_seq_id= */0 + ); if (llama_decode(ctx_llama, batch)) { fprintf(stderr, "%s : failed to eval\n", __func__); return false; diff --git a/examples/main/main.cpp b/examples/main/main.cpp index de24283ab8020..98d07bd5f2d1d 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -99,11 +99,16 @@ static void sigint_handler(int signo) { } } #endif +using namespace refl; int main(int argc, char ** argv) { gpt_params params; g_params = ¶ms; + using Td = type_descriptor; + //constexpr auto tbl = descriptor::get_attribute(Td{}); + //constexpr auto tbl_name = REFL_MAKE_CONST_STRING(tbl.name); + if (!gpt_params_parse(argc, argv, params)) { return 1; } diff --git a/examples/save-load-state/save-load-state.cpp b/examples/save-load-state/save-load-state.cpp index fb5a1066a0b60..4c2336f3b595d 100644 --- a/examples/save-load-state/save-load-state.cpp +++ b/examples/save-load-state/save-load-state.cpp @@ -67,10 +67,10 @@ int main(int argc, char ** argv) { std::vector candidates; candidates.reserve(n_vocab); for (llama_token token_id = 0; token_id < n_vocab; token_id++) { - candidates.emplace_back(llama_token_data{ - .id=token_id, - .logit=logits[token_id], - .p=0.0f}); + candidates.emplace_back(llama_token_data( + token_id, + logits[token_id], + 0.0f)); } llama_token_data_array candidates_p(candidates.data(), candidates.size(), false ); auto next_token = llama_sample_token(ctx, &candidates_p); diff --git a/examples/server/server.cpp b/examples/server/server.cpp index de8236ab47058..1bf37cf1a0e19 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -31,8 +31,16 @@ using json = nlohmann::json; -struct server_params +struct server_params : refl::attr::usage::type { + + server_params(): + hostname( "127.0.0.1"), + public_path(public_path), + port(port), + read_timeout(read_timeout), + write_timeout( 600) {}; + std::string hostname = "127.0.0.1"; std::string public_path = "examples/server/public"; int32_t port = 8080; @@ -522,6 +530,28 @@ struct llama_server_context std::vector queue_results; std::mutex mutex_tasks; std::mutex mutex_results; + llama_server_context(): + model(nullptr), + ctx(nullptr), + clp_ctx(nullptr), + params(params), + batch(batch), + multimodal(false), + clean_kv_cache( true), + all_slots_are_idle( false), + add_bos_token( true), + //int32_t id_gen; + //int32_t n_ctx; // total context for all clients / slots + system_need_update(false){} + //std::string system_prompt; + //std::vector system_tokens; + //std::string name_user; // this should be the antiprompt + //std::string name_assistant; + //std::vector slots; + //std::vector queue_tasks; + //std::vector queue_results; + //std::mutex mutex_tasks; + //std::mutex mutex_results; ~llama_server_context() { @@ -1303,7 +1333,7 @@ struct llama_server_context for (int32_t i = 0; i < (int32_t) batch.n_tokens; i += n_batch) { const int32_t n_tokens = std::min(n_batch, (int32_t) (batch.n_tokens - i)); - llama_batch batch_view = { + llama_batch batch_view( n_tokens, batch.token + i, nullptr, @@ -1311,8 +1341,8 @@ struct llama_server_context batch.n_seq_id + i, batch.seq_id + i, batch.logits + i, - 0, 0, 0, // unused - }; + 0, 0, 0 // unused + ); if (llama_decode(ctx, batch_view)) { LOG_TEE("%s : failed to eval\n", __func__); @@ -1665,19 +1695,18 @@ struct llama_server_context for (int32_t i = 0; i < (int32_t) batch.n_tokens; i += n_batch) { const int32_t n_tokens = std::min(n_batch, (int32_t) (batch.n_tokens - i)); - llama_batch batch_view = - { - .n_tokens=n_tokens, - .token=batch.token + i, - .embd=nullptr, - .pos=batch.pos + i, - .n_seq_id=batch.n_seq_id + i, - .seq_id=batch.seq_id + i, - .logits=batch.logits + i, - .all_pos_0=.0, - .all_pos_1=0, - .all_seq_id=0, // unused - }; + llama_batch batch_view( + /* .n_tokens= */n_tokens, + /* .token= */batch.token + i, + /* .embd= */nullptr, + /* .pos= */batch.pos + i, + /* .n_seq_id= */batch.n_seq_id + i, + /* .seq_id= */batch.seq_id + i, + /* .logits= */batch.logits + i, + /* .all_pos_0= */.0, + /* .all_pos_1= */0, + /* .all_seq_id= */0 // unused + ); const int ret = llama_decode(ctx, batch_view); if (ret != 0) @@ -1724,10 +1753,10 @@ struct llama_server_context slot.t_prompt_processing = (slot.t_start_genereration - slot.t_start_process_prompt) / 1e3; } - llama_token_data_array cur_p = { - .data=slot.ctx_sampling->cur.data(), - .size=slot.ctx_sampling->cur.size(), - .sorted=false }; + llama_token_data_array cur_p( + slot.ctx_sampling->cur.data(), + slot.ctx_sampling->cur.size(), + false ); result.tok = id; const int32_t n_probs = slot.sparams.n_probs; @@ -2596,4 +2625,4 @@ int main(int argc, char **argv) llama_backend_free(); return 0; -} +} diff --git a/examples/simple/simple.cpp b/examples/simple/simple.cpp index 0e30f32567ce5..92a5442604dd7 100644 --- a/examples/simple/simple.cpp +++ b/examples/simple/simple.cpp @@ -124,15 +124,15 @@ int main(int argc, char ** argv) { candidates.reserve(n_vocab); for (llama_token token_id = 0; token_id < n_vocab; token_id++) { - candidates.emplace_back(llama_token_data{ .id=token_id, - .logit=logits[token_id], - .p=0.0f }); + candidates.emplace_back(llama_token_data( token_id, + logits[token_id], + 0.0f )); } - llama_token_data_array candidates_p = { - .data=candidates.data(), - .size=candidates.size(), - .sorted=false }; + llama_token_data_array candidates_p( + candidates.data(), + candidates.size(), + false ); // sample the most likely token const llama_token new_token_id = llama_sample_token_greedy(ctx, &candidates_p); diff --git a/llama.cpp b/llama.cpp index 01e675019851c..9bf27d69e53d1 100644 --- a/llama.cpp +++ b/llama.cpp @@ -9321,18 +9321,18 @@ int llama_eval_embd( int n_past) { llama_kv_cache_seq_rm(ctx->kv_self, -1, n_past, -1); - llama_batch batch = { - .n_tokens=n_tokens, - .token=nullptr, - .embd=embd, - .pos=nullptr, - .n_seq_id=nullptr, - .seq_id=nullptr, - .logits=nullptr, - .all_pos_0=n_past, - .all_pos_1=1, - .all_seq_id=0 - }; + llama_batch batch( + n_tokens, + nullptr, + embd, + nullptr, + nullptr, + nullptr, + nullptr, + n_past, + 1, + 0 + ); const int ret = llama_decode_internal(*ctx, batch); if (ret < 0) { @@ -9352,34 +9352,32 @@ struct llama_batch llama_batch_get_one( int32_t n_tokens, llama_pos pos_0, llama_seq_id seq_id) { - llama_batch b ={ - .n_tokens = n_tokens, - .token = tokens, - .embd = nullptr, - .pos = nullptr, - .n_seq_id = nullptr, - .seq_id = nullptr, - .logits = nullptr, - .all_pos_0 = pos_0, - .all_pos_1 = 1, - .all_seq_id = seq_id, - }; + llama_batch b( + n_tokens, + tokens, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + pos_0, + 1, + seq_id); return b; } struct llama_batch llama_batch_init(int32_t n_tokens, int32_t embd, int32_t n_seq_max) { - llama_batch batch = { - .n_tokens = 0, - .embd=nullptr, - .pos=nullptr, - .n_seq_id=nullptr, - .seq_id=nullptr, - .logits=nullptr, - .all_pos_0=0, - .all_pos_1=0, - .all_seq_id=0 - - }; + llama_batch batch( + /* .n_tokens = */ 0, + /* .token */ (llama_token *)nullptr, + /* .embd= */ (float *)nullptr, + /* .pos= */ (llama_pos *)nullptr, + /* .n_seq_id= */ (int32_t *)nullptr, + /* .seq_id= */ (llama_seq_id **)nullptr, + /* .logits= */ (int8_t *)nullptr, + /* .all_pos_0= */ 0, + /* .all_pos_1= */ 0 , + /* .all_seq_id= */ 0); if (embd) { batch.embd = (float *) malloc(sizeof(float) * n_tokens * embd); diff --git a/llama.h b/llama.h index 32d5dff8d49e7..ba8656341fbef 100644 --- a/llama.h +++ b/llama.h @@ -152,6 +152,29 @@ extern "C" { // - logits : if zero, the logits for the respective token will not be output // typedef struct llama_batch : refl::attr::usage::type{ + + llama_batch(int32_t n_tokens, + llama_token * token, + float * embd, + llama_pos * pos, + int32_t * n_seq_id, + llama_seq_id ** seq_id, + int8_t * logits, + llama_pos all_pos_0, + llama_pos all_pos_1, + llama_seq_id all_seq_id + ) : + n_tokens(n_tokens), + token(token), + embd(embd), + pos(pos), + n_seq_id(n_seq_id), + seq_id(seq_id), + logits(logits), + all_pos_0(all_pos_0), + all_pos_1(all_pos_1), + all_seq_id(all_seq_id) {} + int32_t n_tokens; llama_token * token; @@ -254,7 +277,7 @@ extern "C" { llama_grammar_element( enum llama_gretype type, uint32_t value // Unicode code point or rule ID ):type(type), value(value){} - llama_grammar_element( ):type(0), value(0){} + llama_grammar_element( ):type(llama_gretype(0)), value(0){} enum llama_gretype type; uint32_t value; // Unicode code point or rule ID } llama_grammar_element;