Skip to content

Commit

Permalink
Merge branch 'exo-explore:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
SethBurkart123 authored Oct 16, 2024
2 parents 8097e5c + 1e4524b commit 9e6cff6
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 0 deletions.
7 changes: 7 additions & 0 deletions exo/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,4 +59,11 @@
"qwen-2.5-math-72b": {
"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Qwen2.5-Math-72B-Instruct-4bit", start_layer=0, end_layer=0, n_layers=80),
},
### nemotron
"nemotron-70b": {
"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/nvidia_Llama-3.1-Nemotron-70B-Instruct-HF_4bit", start_layer=0, end_layer=0, n_layers=80),
},
"nemotron-70b-bf16": {
"MLXDynamicShardInferenceEngine": Shard(model_id="mlx-community/Llama-3.1-Nemotron-70B-Instruct-HF-bf16", start_layer=0, end_layer=0, n_layers=80),
},
}
2 changes: 2 additions & 0 deletions exo/tinychat/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@
<option value="llama-3.1-405b">Llama 3.1 405B</option>
<option value="llama-3-8b">Llama 3 8B</option>
<option value="llama-3-70b">Llama 3 70B</option>
<option value="nemotron-70b">Nemotron 70B</option>
<option value="nemotron-70b-bf16">Nemotron 70B (BF16)</option>
<option value="mistral-nemo">Mistral Nemo</option>
<option value="mistral-large">Mistral Large</option>
<option value="deepseek-coder-v2-lite">Deepseek Coder V2 Lite</option>
Expand Down
1 change: 1 addition & 0 deletions exo/tinychat/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,7 @@ document.addEventListener("alpine:init", () => {
}
}
this.lastErrorMessage = null;
this.downloadProgress = null;
}
} else {
// No ongoing download
Expand Down
2 changes: 2 additions & 0 deletions exo/topology/device_capabilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@ def to_dict(self):
"NVIDIA TITAN RTX": DeviceFlops(fp32=16.31*TFLOPS, fp16=32.62*TFLOPS, int8=65.24*TFLOPS),
# GTX 10 series
"NVIDIA GEFORCE GTX 1050 TI": DeviceFlops(fp32=2.0*TFLOPS, fp16=4.0*TFLOPS, int8=8.0*TFLOPS),
# GTX 16 series
"NVIDIA GeForce GTX 1660 TI": DeviceFlops(fp32=4.8*TFLOPS, fp16=9.6*TFLOPS, int8=19.2*TFLOPS),
# QUADRO RTX Ampere series
"NVIDIA RTX A2000": DeviceFlops(fp32=7.99*TFLOPS, fp16=7.99*TFLOPS, int8=31.91*TFLOPS),
"NVIDIA RTX A4000": DeviceFlops(fp32=19.17*TFLOPS, fp16=19.17*TFLOPS, int8=76.68*TFLOPS),
Expand Down

0 comments on commit 9e6cff6

Please sign in to comment.