Skip to content

Commit

Permalink
compiling and running
Browse files Browse the repository at this point in the history
  • Loading branch information
mike dupont committed Nov 22, 2023
1 parent 09a1f05 commit b598cf8
Show file tree
Hide file tree
Showing 12 changed files with 158 additions and 103 deletions.
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -734,5 +734,5 @@ tests/test-tokenizer-1-bpe: tests/test-tokenizer-1-bpe.cpp ggml.o llama.o $(COMM
tests/test-tokenizer-1-llama: tests/test-tokenizer-1-llama.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)

tests/test-c.o: tests/test-c.c llama.h
$(CC) $(CFLAGS) -c $(filter-out %.h,$^) -o $@
tests/test-c.o: tests/test-c.cpp llama.h
$(CXX) $(CFLAGS) -c $(filter-out %.h,$^) -o $@
2 changes: 1 addition & 1 deletion common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ extern char const *LLAMA_BUILD_TARGET;
//
int32_t get_num_physical_cores();

struct gpt_params {
struct gpt_params : refl::attr::usage::type{
uint32_t seed = -1; // RNG seed

int32_t n_threads = get_num_physical_cores();
Expand Down
2 changes: 1 addition & 1 deletion common/grammar-parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ namespace grammar_parser {

// in original rule, replace previous symbol with reference to generated rule
out_elements.resize(last_sym_start);
llama_grammar_element(LLAMA_GRETYPE_RULE_REF, sub_rule_id) a;
llama_grammar_element a(LLAMA_GRETYPE_RULE_REF, sub_rule_id);
out_elements.push_back(a);

pos = parse_space(pos + 1, is_nested);
Expand Down
24 changes: 12 additions & 12 deletions examples/batched-bench/batched-bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,18 +121,18 @@ int main(int argc, char ** argv) {
for (int32_t i = 0; i < (int32_t) batch.n_tokens; i += n_batch) {
const int32_t n_tokens = std::min(n_batch, (int32_t) (batch.n_tokens - i));

llama_batch batch_view = {
.n_tokens=n_tokens,
.token=batch.token + i,
.embd=nullptr,
.pos=batch.pos + i,
.n_seq_id=batch.n_seq_id + i,
.seq_id=batch.seq_id + i,
.logits=batch.logits + i,
.all_pos_0=0,
.all_pos_1=0,
.all_seq_id=0, // unused
};
llama_batch batch_view(
/* .n_tokens= */ n_tokens,
/* .token= */ batch.token + i,
/* .embd= */ nullptr,
/* .pos= */ batch.pos + i,
/* .n_seq_id= */ batch.n_seq_id + i,
/* .seq_id= */ batch.seq_id + i,
/* .logits= */ batch.logits + i,
/* .all_pos_0= */0,
/* .all_pos_1= */0,
/* .all_seq_id= */0 // unused
);

const int ret = llama_decode(ctx, batch_view);
if (ret != 0) {
Expand Down
10 changes: 5 additions & 5 deletions examples/batched/batched.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,13 +169,13 @@ int main(int argc, char ** argv) {
candidates.reserve(n_vocab);

for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
candidates.emplace_back(llama_token_data{
.id=token_id,
.logit=logits[token_id],
.p=0.0f });
candidates.emplace_back(llama_token_data(
token_id,
logits[token_id],
0.0f ));
}

llama_token_data_array candidates_p = { .data=candidates.data(), .size=candidates.size(), .sorted=false };
llama_token_data_array candidates_p (candidates.data(), candidates.size(), false );

const int top_k = 40;
const float top_p = 0.9f;
Expand Down
24 changes: 12 additions & 12 deletions examples/llava/llava.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,18 +75,18 @@ bool llava_eval_image_embed(llama_context * ctx_llama, const struct llava_image_
if (n_eval > n_batch) {
n_eval = n_batch;
}
llama_batch batch = {
.n_tokens=int32_t(n_eval),
.token=nullptr,
.embd=(image_embed->embed+i*n_embd),
.pos=nullptr,
.n_seq_id=nullptr,
.seq_id=nullptr,
.logits=nullptr,
.all_pos_0=*n_past,
.all_pos_1=1,
.all_seq_id=0
};
llama_batch batch(
/* .n_tokens= */int32_t(n_eval),
/* .token= */nullptr,
/* .embd= */(image_embed->embed+i*n_embd),
/* .pos= */nullptr,
/* .n_seq_id= */nullptr,
/* .seq_id= */nullptr,
/* .logits= */nullptr,
/* .all_pos_0= */*n_past,
/* .all_pos_1= */1,
/* .all_seq_id= */0
);
if (llama_decode(ctx_llama, batch)) {
fprintf(stderr, "%s : failed to eval\n", __func__);
return false;
Expand Down
5 changes: 5 additions & 0 deletions examples/main/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,11 +99,16 @@ static void sigint_handler(int signo) {
}
}
#endif
using namespace refl;

int main(int argc, char ** argv) {
gpt_params params;
g_params = &params;

using Td = type_descriptor<gpt_params>;
//constexpr auto tbl = descriptor::get_attribute<gpt_params>(Td{});
//constexpr auto tbl_name = REFL_MAKE_CONST_STRING(tbl.name);

if (!gpt_params_parse(argc, argv, params)) {
return 1;
}
Expand Down
8 changes: 4 additions & 4 deletions examples/save-load-state/save-load-state.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,10 @@ int main(int argc, char ** argv) {
std::vector<llama_token_data> candidates;
candidates.reserve(n_vocab);
for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
candidates.emplace_back(llama_token_data{
.id=token_id,
.logit=logits[token_id],
.p=0.0f});
candidates.emplace_back(llama_token_data(
token_id,
logits[token_id],
0.0f));
}
llama_token_data_array candidates_p(candidates.data(), candidates.size(), false );
auto next_token = llama_sample_token(ctx, &candidates_p);
Expand Down
73 changes: 51 additions & 22 deletions examples/server/server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,16 @@

using json = nlohmann::json;

struct server_params
struct server_params : refl::attr::usage::type
{

server_params():
hostname( "127.0.0.1"),
public_path(public_path),
port(port),
read_timeout(read_timeout),
write_timeout( 600) {};

std::string hostname = "127.0.0.1";
std::string public_path = "examples/server/public";
int32_t port = 8080;
Expand Down Expand Up @@ -522,6 +530,28 @@ struct llama_server_context
std::vector<task_result> queue_results;
std::mutex mutex_tasks;
std::mutex mutex_results;
llama_server_context():
model(nullptr),
ctx(nullptr),
clp_ctx(nullptr),
params(params),
batch(batch),
multimodal(false),
clean_kv_cache( true),
all_slots_are_idle( false),
add_bos_token( true),
//int32_t id_gen;
//int32_t n_ctx; // total context for all clients / slots
system_need_update(false){}
//std::string system_prompt;
//std::vector<llama_token> system_tokens;
//std::string name_user; // this should be the antiprompt
//std::string name_assistant;
//std::vector<llama_client_slot> slots;
//std::vector<task_server> queue_tasks;
//std::vector<task_result> queue_results;
//std::mutex mutex_tasks;
//std::mutex mutex_results;

~llama_server_context()
{
Expand Down Expand Up @@ -1303,16 +1333,16 @@ struct llama_server_context
for (int32_t i = 0; i < (int32_t) batch.n_tokens; i += n_batch)
{
const int32_t n_tokens = std::min(n_batch, (int32_t) (batch.n_tokens - i));
llama_batch batch_view = {
llama_batch batch_view(
n_tokens,
batch.token + i,
nullptr,
batch.pos + i,
batch.n_seq_id + i,
batch.seq_id + i,
batch.logits + i,
0, 0, 0, // unused
};
0, 0, 0 // unused
);
if (llama_decode(ctx, batch_view))
{
LOG_TEE("%s : failed to eval\n", __func__);
Expand Down Expand Up @@ -1665,19 +1695,18 @@ struct llama_server_context
for (int32_t i = 0; i < (int32_t) batch.n_tokens; i += n_batch)
{
const int32_t n_tokens = std::min(n_batch, (int32_t) (batch.n_tokens - i));
llama_batch batch_view =
{
.n_tokens=n_tokens,
.token=batch.token + i,
.embd=nullptr,
.pos=batch.pos + i,
.n_seq_id=batch.n_seq_id + i,
.seq_id=batch.seq_id + i,
.logits=batch.logits + i,
.all_pos_0=.0,
.all_pos_1=0,
.all_seq_id=0, // unused
};
llama_batch batch_view(
/* .n_tokens= */n_tokens,
/* .token= */batch.token + i,
/* .embd= */nullptr,
/* .pos= */batch.pos + i,
/* .n_seq_id= */batch.n_seq_id + i,
/* .seq_id= */batch.seq_id + i,
/* .logits= */batch.logits + i,
/* .all_pos_0= */.0,
/* .all_pos_1= */0,
/* .all_seq_id= */0 // unused
);

const int ret = llama_decode(ctx, batch_view);
if (ret != 0)
Expand Down Expand Up @@ -1724,10 +1753,10 @@ struct llama_server_context
slot.t_prompt_processing = (slot.t_start_genereration - slot.t_start_process_prompt) / 1e3;
}

llama_token_data_array cur_p = {
.data=slot.ctx_sampling->cur.data(),
.size=slot.ctx_sampling->cur.size(),
.sorted=false };
llama_token_data_array cur_p(
slot.ctx_sampling->cur.data(),
slot.ctx_sampling->cur.size(),
false );
result.tok = id;

const int32_t n_probs = slot.sparams.n_probs;
Expand Down Expand Up @@ -2596,4 +2625,4 @@ int main(int argc, char **argv)

llama_backend_free();
return 0;
}
}
14 changes: 7 additions & 7 deletions examples/simple/simple.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,15 +124,15 @@ int main(int argc, char ** argv) {
candidates.reserve(n_vocab);

for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
candidates.emplace_back(llama_token_data{ .id=token_id,
.logit=logits[token_id],
.p=0.0f });
candidates.emplace_back(llama_token_data( token_id,
logits[token_id],
0.0f ));
}

llama_token_data_array candidates_p = {
.data=candidates.data(),
.size=candidates.size(),
.sorted=false };
llama_token_data_array candidates_p(
candidates.data(),
candidates.size(),
false );

// sample the most likely token
const llama_token new_token_id = llama_sample_token_greedy(ctx, &candidates_p);
Expand Down
70 changes: 34 additions & 36 deletions llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9321,18 +9321,18 @@ int llama_eval_embd(
int n_past) {
llama_kv_cache_seq_rm(ctx->kv_self, -1, n_past, -1);

llama_batch batch = {
.n_tokens=n_tokens,
.token=nullptr,
.embd=embd,
.pos=nullptr,
.n_seq_id=nullptr,
.seq_id=nullptr,
.logits=nullptr,
.all_pos_0=n_past,
.all_pos_1=1,
.all_seq_id=0
};
llama_batch batch(
n_tokens,
nullptr,
embd,
nullptr,
nullptr,
nullptr,
nullptr,
n_past,
1,
0
);

const int ret = llama_decode_internal(*ctx, batch);
if (ret < 0) {
Expand All @@ -9352,34 +9352,32 @@ struct llama_batch llama_batch_get_one(
int32_t n_tokens,
llama_pos pos_0,
llama_seq_id seq_id) {
llama_batch b ={
.n_tokens = n_tokens,
.token = tokens,
.embd = nullptr,
.pos = nullptr,
.n_seq_id = nullptr,
.seq_id = nullptr,
.logits = nullptr,
.all_pos_0 = pos_0,
.all_pos_1 = 1,
.all_seq_id = seq_id,
};
llama_batch b(
n_tokens,
tokens,
nullptr,
nullptr,
nullptr,
nullptr,
nullptr,
pos_0,
1,
seq_id);
return b;
}

struct llama_batch llama_batch_init(int32_t n_tokens, int32_t embd, int32_t n_seq_max) {
llama_batch batch = {
.n_tokens = 0,
.embd=nullptr,
.pos=nullptr,
.n_seq_id=nullptr,
.seq_id=nullptr,
.logits=nullptr,
.all_pos_0=0,
.all_pos_1=0,
.all_seq_id=0

};
llama_batch batch(
/* .n_tokens = */ 0,
/* .token */ (llama_token *)nullptr,
/* .embd= */ (float *)nullptr,
/* .pos= */ (llama_pos *)nullptr,
/* .n_seq_id= */ (int32_t *)nullptr,
/* .seq_id= */ (llama_seq_id **)nullptr,
/* .logits= */ (int8_t *)nullptr,
/* .all_pos_0= */ 0,
/* .all_pos_1= */ 0 ,
/* .all_seq_id= */ 0);

if (embd) {
batch.embd = (float *) malloc(sizeof(float) * n_tokens * embd);
Expand Down
Loading

0 comments on commit b598cf8

Please sign in to comment.