-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 4396273
Showing
4 changed files
with
100 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
.idea |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# LLM Inference Speeds | ||
|
||
This repository contains benchmark data for various Large Language Models (LLM) based on their inference speeds measured in tokens per second. The benchmarks are performed across different hardware configurations using the prompt "tell a story". | ||
|
||
## About the Data | ||
|
||
The data represents the performance of several LLMs, detailing the tokens processed per second on specific hardware setups. Each entry includes the model name, the hardware used, and the measured speed. | ||
|
||
## Explore the Benchmarks | ||
|
||
You can view and interact with the benchmark data through a searchable table on our GitHub Pages site. Use the search field to filter by model name and explore different hardware performances. | ||
|
||
**[View the Inference Speeds Table](https://dmatora.github.io/inference-speed/)** | ||
|
||
## Contributing | ||
|
||
Contributions to the benchmark data are welcome! Please refer to the contributing guidelines for more information on how you can contribute. | ||
|
||
## License | ||
|
||
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
[ | ||
{"model": "Model Mistral Instruct 7B Q4", "hardware": "i7-7700HQ", "speed": "3 tokens/sec", "proof": "https://github.com/dmatora/inference-speed/issues/1"} | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
<!DOCTYPE html> | ||
<html lang="en"> | ||
<head> | ||
<meta charset="UTF-8"> | ||
<meta name="viewport" content="width=device-width, initial-scale=1.0"> | ||
<title>LLM Inference Speeds</title> | ||
<style> | ||
body { font-family: Arial, sans-serif; } | ||
input { margin: 20px 0; padding: 10px; width: 200px; } | ||
table { width: 100%; border-collapse: collapse; } | ||
th, td { padding: 10px; border: 1px solid #ddd; text-align: left; } | ||
th { background-color: #f2f2f2; } | ||
a { color: blue; text-decoration: none; } | ||
a:hover { text-decoration: underline; } | ||
</style> | ||
</head> | ||
<body> | ||
<h1>LLM Inference Speeds</h1> | ||
<input type="text" id="searchInput" onkeyup="filterModels()" placeholder="Search for models..."> | ||
|
||
<table id="dataTable"> | ||
<thead> | ||
<tr> | ||
<th>Model</th> | ||
<th>Hardware</th> | ||
<th>Speed</th> | ||
<th>Proof</th> | ||
</tr> | ||
</thead> | ||
<tbody> | ||
</tbody> | ||
</table> | ||
|
||
<script> | ||
document.addEventListener('DOMContentLoaded', function() { | ||
fetch('data.json') | ||
.then(response => response.json()) | ||
.then(data => { | ||
const tableBody = document.getElementById('dataTable').getElementsByTagName('tbody')[0]; | ||
data.forEach(item => { | ||
let row = tableBody.insertRow(); | ||
let cell1 = row.insertCell(0); | ||
let cell2 = row.insertCell(1); | ||
let cell3 = row.insertCell(2); | ||
let cell4 = row.insertCell(3); | ||
cell1.textContent = item.model; | ||
cell2.textContent = item.hardware; | ||
cell3.textContent = item.speed; | ||
cell4.innerHTML = '<a href="' + item.proof + '" target="_blank">Issue Link</a>'; | ||
}); | ||
}) | ||
.catch(error => console.error('Error loading the data:', error)); | ||
}); | ||
|
||
function filterModels() { | ||
var input, filter, table, tr, td, i, txtValue; | ||
input = document.getElementById("searchInput"); | ||
filter = input.value.toUpperCase(); | ||
table = document.getElementById("dataTable"); | ||
tr = table.getElementsByTagName("tr"); | ||
for (i = 0; i < tr.length; i++) { | ||
td = tr[i].getElementsByTagName("td")[0]; | ||
if (td) { | ||
txtValue = td.textContent || td.innerText; | ||
if (txtValue.toUpperCase().indexOf(filter) > -1) { | ||
tr[i].style.display = ""; | ||
} else { | ||
tr[i].style.display = "none"; | ||
} | ||
} | ||
} | ||
} | ||
</script> | ||
</body> | ||
</html> |