Skip to content

Commit

Permalink
comment
Browse files Browse the repository at this point in the history
  • Loading branch information
awni committed Jan 9, 2025
1 parent 5e8f88d commit edcc1bb
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 5 deletions.
2 changes: 0 additions & 2 deletions llms/export/mlxlm.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@ std::function<mx::Args(mx::Args)> load_model(const std::string &path);

BPETokenizer load_tokenizer(const std::string &path);

struct GenerationResponse {};

void generate(const std::function<mx::Args(mx::Args)> &model,
const BPETokenizer &tokenizer, const std::string &prompt,
int max_tokens = 256);
2 changes: 1 addition & 1 deletion llms/export/third_party/download_unicode.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ url=https://raw.githubusercontent.com/ggerganov/llama.cpp/${commit}/src/

for file in 'unicode.cpp' 'unicode.h' 'unicode-data.cpp' 'unicode-data.h'
do
curl -OL ${url}/${file}
curl -OL ${url}/${file} 2>/dev/null
done

touch unicode_downloaded
8 changes: 6 additions & 2 deletions llms/export/tokenizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,13 +98,17 @@ std::vector<int> BPETokenizer::encode(std::string text) const {
auto one_step_merge = [this](std::string segment, std::vector<int> &splits) {
int merge_idx;
int rank = INT32_MAX;
std::string candidate;
for (int i = 0; i < splits.size() - 2; ++i) {
auto start = splits[i];
auto mid = splits[i + 1];
auto end = splits[i + 2];
std::string candidate = segment.substr(start, mid - start);
candidate.clear();
candidate.insert(candidate.end(), segment.begin() + start,
segment.begin() + mid);
candidate += " ";
candidate += segment.substr(mid, end - mid);
candidate.insert(candidate.end(), segment.begin() + mid,
segment.begin() + end);
if (auto it = merges_.find(candidate); it != merges_.end()) {
if (it->second < rank) {
merge_idx = i;
Expand Down

0 comments on commit edcc1bb

Please sign in to comment.