Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
s-mizuki-nlp committed Jun 28, 2024
2 parents 75ac1fb + 612a1ba commit 8313f73
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 85 deletions.
6 changes: 2 additions & 4 deletions _config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,8 @@ description: >- # this means to ignore newlines until "baseurl:"
Write an awesome description for your new site here. You can edit this
line in _config.yml. It will appear in your document head meta (for
Google search results) and in your feed.xml site description.
#baseurl: "/evaluation" # the subpath of your site, e.g. /blog
#url: "https://swallow-llm.github.io" # the base hostname & protocol for your site, e.g. http://example.com
baseurl: "/temp/llm-evaluations/"
url: "https://www.chokkan.org"
baseurl: "/evaluation" # the subpath of your site, e.g. /blog
url: "https://swallow-llm.github.io" # the base hostname & protocol for your site, e.g. http://example.com
twitter_username: jekyllrb
github_username: jekyll

Expand Down
2 changes: 1 addition & 1 deletion _includes/view.html
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,7 @@ <h3>{{ ui.models }}</h3>
aspectRatio: portrait ? aspect_portrait : aspect_landscape,
scales: portrait ?
{ x: { beginAtZero: true, min: 0, max: 1 }, xAxes: [{ ticks: {autoSkip: false } } ] } :
{ y: { beginAtZero: true, min: 0, max: 1 } },
{ y: { beginAtZero: true, min: 0, max: 1 }, yAxes: [{ ticks: {autoSkip: false } } ] },
layout: { padding: {left: 20, right: 20} },
plugins: { title: { display: true, position: 'bottom', text: '{{ ui.lang_avg_long }}' } },
},
Expand Down
118 changes: 40 additions & 78 deletions assets/data.js
Original file line number Diff line number Diff line change
Expand Up @@ -879,15 +879,15 @@ const dataSet = [
"Name": "Llama 3 Swallow 8B",
"Base name": "",
"Size (B)": 8,
"Coding": NaN,
"Extraction": NaN,
"Humanities": NaN,
"Math": NaN,
"Reasoning": NaN,
"Roleplay": NaN,
"Stem": NaN,
"Writing": NaN,
"Ja MT-Bench": NaN,
"Coding": 0.1081,
"Extraction": 0.1147,
"Humanities": 0.1770,
"Math": 0.2047,
"Reasoning": 0.1080,
"Roleplay": 0.1203,
"Stem": 0.1912,
"Writing": 0.1473,
"Ja MT-Bench": 0.1464,
"JCom": 0.8945,
"JEMHopQA": 0.4848,
"NIILC": 0.564,
Expand Down Expand Up @@ -993,36 +993,36 @@ const dataSet = [
"Name": "Llama-3-ELYZA-JP-8B",
"Base name": "非公開",
"Size (B)": 8,
"Coding": NaN,
"Extraction": NaN,
"Humanities": NaN,
"Math": NaN,
"Reasoning": NaN,
"Roleplay": NaN,
"Stem": NaN,
"Writing": NaN,
"Ja MT-Bench": NaN,
"JCom": NaN,
"JEMHopQA": NaN,
"NIILC": NaN,
"JSQuAD": NaN,
"XL-Sum": NaN,
"MGSM": NaN,
"WMT20 (en-ja)": NaN,
"WMT20 (ja-en)": NaN,
"Ja Avg": NaN,
"JMMLU": NaN,
"JHumanEval": NaN,
"OpenBookQA": NaN,
"TriviaQA": NaN,
"HellaSwag": NaN,
"SQuAD2": NaN,
"XWINO": NaN,
"MMLU": NaN,
"GSM8K": NaN,
"BBH": NaN,
"HumanEval": NaN,
"En Avg": NaN,
"Coding": 0.2908,
"Extraction": 0.6421,
"Humanities": 0.6406,
"Math": 0.3088,
"Reasoning": 0.55,
"Roleplay": 0.674,
"Stem": 0.5251,
"Writing": 0.6744,
"Ja MT-Bench": 0.5382,
"JCom": 0.9017,
"JEMHopQA": 0.5124,
"NIILC": 0.5016,
"JSQuAD": 0.9113,
"XL-Sum": 0.1677,
"MGSM": 0.46,
"WMT20 (en-ja)": 0.2509,
"WMT20 (ja-en)": 0.1846,
"Ja Avg": 0.4754,
"JMMLU": 0.4829,
"JHumanEval": 0.3811,
"OpenBookQA": 0.32,
"TriviaQA": 0.5502,
"HellaSwag": 0.5224,
"SQuAD2": 0.3631,
"XWINO": 0.8809,
"MMLU": 0.5875,
"GSM8K": 0.5701,
"BBH": 0.3213,
"HumanEval": 0.4604,
"En Avg": 0.5084,
"SortKey": "Llama-3-ELYZA-JP-008"
},
{
Expand Down Expand Up @@ -1519,44 +1519,6 @@ const dataSet = [
"En Avg": NaN,
"SortKey": "Sarashina2-013"
},
{
"Type": "base",
"Model": "stockmark/stockmark-100b",
"Name": "Stockmark-100b",
"Base name": "",
"Size (B)": 100,
"Coding": NaN,
"Extraction": NaN,
"Humanities": NaN,
"Math": NaN,
"Reasoning": NaN,
"Roleplay": NaN,
"Stem": NaN,
"Writing": NaN,
"Ja MT-Bench": NaN,
"JCom": 0.1975,
"JEMHopQA": 0.4121,
"NIILC": 0.5527,
"JSQuAD": 0.5577,
"XL-Sum": 0.0629,
"MGSM": 0.008,
"WMT20 (en-ja)": 0.2043,
"WMT20 (ja-en)": 0.1184,
"Ja Avg": 0.2609,
"JMMLU": 0.2344,
"JHumanEval": NaN,
"OpenBookQA": 0.28,
"TriviaQA": 0.3675,
"HellaSwag": 0.4586,
"SQuAD2": 0.1855,
"XWINO": 0.8185,
"MMLU": 0.259,
"GSM8K": 0.0136,
"BBH": 0.2597,
"HumanEval": NaN,
"En Avg": 0.3303,
"SortKey": "Stockmark-100"
},
{
"Type": "base",
"Model": "tokyotech-llm/Swallow-7b-hf",
Expand Down Expand Up @@ -1827,7 +1789,7 @@ const dataSet = [
"Type": "inst",
"Model": "tokyotech-llm/Swallow-MX-8x7b-NVE-v0.1",
"Name": "Swallow-MX-8x7b-NVE-v0.1",
"Base name": "指示チューニングモデルではない",
"Base name": "instruct・chatモデルではない",
"Size (B)": 47,
"Coding": -1.0,
"Extraction": -1.0,
Expand Down
4 changes: 2 additions & 2 deletions index.ja.html
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
- type: bar
target: avg
width: 12
aspect_portrait: 0.5
aspect_landscape: 2
aspect_portrait: 0.25
aspect_landscape: 1.77
persistent_group: index
select_all: true
instructions:
Expand Down

0 comments on commit 8313f73

Please sign in to comment.