Skip to content

Commit

Permalink
Updated with the latest experimental results.
Browse files Browse the repository at this point in the history
  • Loading branch information
chokkan committed Jun 29, 2024
1 parent 66c5b0c commit a1d628e
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 89 deletions.
16 changes: 5 additions & 11 deletions _data/models.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@
name: KARAKURI LM 70B Chat v0.1
size: 70
type: instruct
- basename: Swallow-MX-8x7b-NVE-v0.1
- basename: Swallow-MX 8x7B v0.1
missing: Japanese tasks, Japanese MT-bench tasks, English tasks
model: karakuri-ai/karakuri-lm-8x7b-instruct-v0.1
name: KARAKURI LM 8x7B Instruct v0.1
Expand Down Expand Up @@ -179,8 +179,7 @@
size: 47
type: instruct
- basename: ''
missing: JCom, JEMHopQA, NIILC, JSQuAD, Ja Avg, OpenBookQA, TriviaQA, HellaSwag,
SQuAD2, XWINO, GSM8K, BBH, HumanEval, En Avg
missing: BBH, HumanEval
model: mistralai/Mixtral-8x7B-v0.1
name: Mixtral-8x7B-v0.1
size: 47
Expand Down Expand Up @@ -234,7 +233,8 @@
size: 7
type: base
- basename: ''
missing: Japanese tasks, English tasks
missing: JCom, JEMHopQA, NIILC, JSQuAD, MGSM, WMT20 (en-ja), WMT20 (ja-en), Ja Avg,
OpenBookQA, TriviaQA, HellaSwag, SQuAD2, XWINO, MMLU, GSM8K, BBH, En Avg
model: sbintuitions/sarashina2-13b
name: Sarashina2-13B
size: 13
Expand Down Expand Up @@ -278,15 +278,9 @@
- basename: ''
missing: ''
model: tokyotech-llm/Swallow-MX-8x7b-NVE-v0.1
name: Swallow-MX v0.1
name: Swallow-MX 8x7B v0.1
size: 47
type: base
- basename: instruct・chatモデルではない
missing: Japanese MT-bench tasks
model: tokyotech-llm/Swallow-MX-8x7b-NVE-v0.1
name: Swallow-MX-8x7b-NVE-v0.1
size: 47
type: instruct
- basename: ''
missing: ''
model: 01-ai/Yi-1.5-6B
Expand Down
117 changes: 39 additions & 78 deletions assets/data.js
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,7 @@ const dataSet = [
"Type": "inst",
"Model": "karakuri-ai/karakuri-lm-8x7b-instruct-v0.1",
"Name": "KARAKURI LM 8x7B Instruct v0.1",
"Base name": "Swallow-MX-8x7b-NVE-v0.1",
"Base name": "Swallow-MX 8x7B v0.1",
"Size (B)": 47,
"Coding": NaN,
"Extraction": NaN,
Expand Down Expand Up @@ -1184,29 +1184,29 @@ const dataSet = [
"Stem": NaN,
"Writing": NaN,
"Ja MT-Bench": NaN,
"JCom": -1.0,
"JEMHopQA": -1.0,
"NIILC": -1.0,
"JSQuAD": -1.0,
"JCom": 0.8436,
"JEMHopQA": 0.5038,
"NIILC": 0.3745,
"JSQuAD": 0.8952,
"XL-Sum": 0.2205,
"MGSM": 0.328,
"WMT20 (en-ja)": 0.1948,
"WMT20 (ja-en)": 0.2024,
"Ja Avg": -0.5054,
"JMMLU": -1.0,
"Ja Avg": 0.3079,
"JMMLU": 0.5162,
"JHumanEval": -1.0,
"OpenBookQA": -1.0,
"TriviaQA": -1.0,
"HellaSwag": -1.0,
"SQuAD2": -1.0,
"XWINO": -1.0,
"OpenBookQA": 0.388,
"TriviaQA": 0.7938,
"HellaSwag": 0.664,
"SQuAD2": 0.3612,
"XWINO": 0.9217,
"MMLU": 0.7038,
"GSM8K": -1.0,
"GSM8K": 0.5823,
"BBH": -1.0,
"HumanEval": -1.0,
"En Avg": -0.8107,
"En Avg": 0.2683,
"SortKey": "Mixtral--v0.1047",
"Missing": "JCom, JEMHopQA, NIILC, JSQuAD, Ja Avg, OpenBookQA, TriviaQA, HellaSwag, SQuAD2, XWINO, GSM8K, BBH, HumanEval, En Avg"
"Missing": "BBH, HumanEval"
},
{
"Type": "base",
Expand Down Expand Up @@ -1535,29 +1535,29 @@ const dataSet = [
"Stem": NaN,
"Writing": NaN,
"Ja MT-Bench": NaN,
"JCom": NaN,
"JEMHopQA": NaN,
"NIILC": NaN,
"JSQuAD": NaN,
"XL-Sum": NaN,
"MGSM": NaN,
"WMT20 (en-ja)": NaN,
"WMT20 (ja-en)": NaN,
"Ja Avg": NaN,
"JMMLU": NaN,
"JHumanEval": NaN,
"OpenBookQA": NaN,
"TriviaQA": NaN,
"HellaSwag": NaN,
"SQuAD2": NaN,
"XWINO": NaN,
"MMLU": NaN,
"GSM8K": NaN,
"BBH": NaN,
"HumanEval": NaN,
"En Avg": NaN,
"JCom": -1.0,
"JEMHopQA": -1.0,
"NIILC": -1.0,
"JSQuAD": -1.0,
"XL-Sum": 0.1588,
"MGSM": -1.0,
"WMT20 (en-ja)": -1.0,
"WMT20 (ja-en)": -1.0,
"Ja Avg": -0.7841,
"JMMLU": -1.0,
"JHumanEval": 0.0,
"OpenBookQA": -1.0,
"TriviaQA": -1.0,
"HellaSwag": -1.0,
"SQuAD2": -1.0,
"XWINO": -1.0,
"MMLU": -1.0,
"GSM8K": -1.0,
"BBH": -1.0,
"HumanEval": 0.0,
"En Avg": -0.8889,
"SortKey": "Sarashina2-013",
"Missing": "Japanese tasks, English tasks"
"Missing": "JCom, JEMHopQA, NIILC, JSQuAD, MGSM, WMT20 (en-ja), WMT20 (ja-en), Ja Avg, OpenBookQA, TriviaQA, HellaSwag, SQuAD2, XWINO, MMLU, GSM8K, BBH, En Avg"
},
{
"Type": "base",
Expand Down Expand Up @@ -1796,7 +1796,7 @@ const dataSet = [
{
"Type": "base",
"Model": "tokyotech-llm/Swallow-MX-8x7b-NVE-v0.1",
"Name": "Swallow-MX v0.1",
"Name": "Swallow-MX 8x7B v0.1",
"Base name": "",
"Size (B)": 47,
"Coding": NaN,
Expand Down Expand Up @@ -1829,48 +1829,9 @@ const dataSet = [
"BBH": 0.6822,
"HumanEval": 0.4,
"En Avg": 0.5977,
"SortKey": "Swallow-MX v0.1047",
"SortKey": "Swallow-MX v0.1047",
"Missing": ""
},
{
"Type": "inst",
"Model": "tokyotech-llm/Swallow-MX-8x7b-NVE-v0.1",
"Name": "Swallow-MX-8x7b-NVE-v0.1",
"Base name": "instruct・chatモデルではない",
"Size (B)": 47,
"Coding": -1.0,
"Extraction": -1.0,
"Humanities": -1.0,
"Math": -1.0,
"Reasoning": -1.0,
"Roleplay": -1.0,
"Stem": -1.0,
"Writing": -1.0,
"Ja MT-Bench": -1.0,
"JCom": 0.9223,
"JEMHopQA": 0.5519,
"NIILC": 0.5779,
"JSQuAD": 0.9146,
"XL-Sum": 0.262,
"MGSM": 0.42,
"WMT20 (en-ja)": 0.2718,
"WMT20 (ja-en)": 0.209,
"Ja Avg": 0.5048,
"JMMLU": 0.5623,
"JHumanEval": 0.3567,
"OpenBookQA": 0.342,
"TriviaQA": 0.7734,
"HellaSwag": 0.6505,
"SQuAD2": 0.3479,
"XWINO": 0.9166,
"MMLU": 0.6941,
"GSM8K": 0.5724,
"BBH": 0.6822,
"HumanEval": 0.4,
"En Avg": 0.5977,
"SortKey": "Swallow-MX--NVE-v0.1047",
"Missing": "Japanese MT-bench tasks"
},
{
"Type": "base",
"Model": "01-ai/Yi-1.5-6B",
Expand Down

0 comments on commit a1d628e

Please sign in to comment.