-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
mike dupont
committed
Feb 21, 2024
1 parent
cbf2281
commit 57f8668
Showing
9 changed files
with
190 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,2 @@ | ||
GRAMMAR=`cat /mnt/data1/time2/time/2023/11/13/llama.cpp/grammars/chess.gbnf` | ||
GRAMMAR=`cat ./grammars/chess.gbnf` | ||
dune exec ./bin/simple_grammar.exe -- --llamacpp -s test4 -u "http://localhost:8080" -p "consider a consecutive series of types to describe the universe and universe of universes, what is your ordering?" -n 4 -g "${GRAMMAR}" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,2 @@ | ||
GRAMMAR=`cat /mnt/data1/time2/time/2023/11/13/llama.cpp/grammars/c.gbnf` | ||
GRAMMAR=`cat ./grammars/c.gbnf` | ||
dune exec ./bin/simple_grammar.exe -- --llamacpp -s clang2 -u "http://localhost:8080" -p "consider a consecutive series of types to describe the universe and universe of universes, what is your ordering? please create .c language declarations. " -n 4 -g "${GRAMMAR}" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
root ::= (declaration)* | ||
|
||
declaration ::= dataType identifier "(" parameter? ")" "{" statement* "}" | ||
|
||
dataType ::= "int" ws | "float" ws | "char" ws | ||
identifier ::= [a-zA-Z_] [a-zA-Z_0-9]* | ||
|
||
parameter ::= dataType identifier | ||
|
||
statement ::= | ||
( dataType identifier ws "=" ws expression ";" ) | | ||
( identifier ws "=" ws expression ";" ) | | ||
( identifier ws "(" argList? ")" ";" ) | | ||
( "return" ws expression ";" ) | | ||
( "while" "(" condition ")" "{" statement* "}" ) | | ||
( "for" "(" forInit ";" ws condition ";" ws forUpdate ")" "{" statement* "}" ) | | ||
( "if" "(" condition ")" "{" statement* "}" ("else" "{" statement* "}")? ) | | ||
( singleLineComment ) | | ||
( multiLineComment ) | ||
|
||
forInit ::= dataType identifier ws "=" ws expression | identifier ws "=" ws expression | ||
forUpdate ::= identifier ws "=" ws expression | ||
|
||
condition ::= expression relationOperator expression | ||
relationOperator ::= ("<=" | "<" | "==" | "!=" | ">=" | ">") | ||
|
||
expression ::= term (("+" | "-") term)* | ||
term ::= factor(("*" | "/") factor)* | ||
|
||
factor ::= identifier | number | unaryTerm | funcCall | parenExpression | ||
unaryTerm ::= "-" factor | ||
funcCall ::= identifier "(" argList? ")" | ||
parenExpression ::= "(" ws expression ws ")" | ||
|
||
argList ::= expression ("," ws expression)* | ||
|
||
number ::= [0-9]+ | ||
|
||
singleLineComment ::= "//" [^\n]* "\n" | ||
multiLineComment ::= "/*" ( [^*] | ("*" [^/]) )* "*/" | ||
|
||
ws ::= ([ \t\n]+) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
# Specifies chess moves as a list in algebraic notation, using PGN conventions | ||
|
||
# Force first move to "1. ", then any 1-2 digit number after, relying on model to follow the pattern | ||
root ::= "1. " move " " move "\n" ([1-9] [0-9]? ". " move " " move "\n")+ | ||
move ::= (pawn | nonpawn | castle) [+#]? | ||
|
||
# piece type, optional file/rank, optional capture, dest file & rank | ||
nonpawn ::= [NBKQR] [a-h]? [1-8]? "x"? [a-h] [1-8] | ||
|
||
# optional file & capture, dest file & rank, optional promotion | ||
pawn ::= ([a-h] "x")? [a-h] [1-8] ("=" [NBKQR])? | ||
|
||
castle ::= "O-O" "-O"? |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
We are bootstrapping a new system using EBNF grammars. | ||
We want to make an ebnf grammar that is super detailed and self expressive. | ||
Here is the code we wrote so far | ||
# GBNF (GGML BNF) is a format for defining formal grammars to constrain model outputs in llama.cpp. | ||
# Backus-Naur Form (BNF) is a notation for describing the syntax of formal languages like programming languages, file formats, and protocols. GBNF is an extension of BNF that primarily adds a few modern regex-like features. | ||
# In GBNF, we define production rules that specify how a non-terminal (rule name) can be replaced with sequences of terminals (characters, specifically Unicode code points) and other non-terminals. The basic format of a production rule is nonterminal ::= sequence.... | ||
|
||
production_rule ::= alternation | ||
lhs ::= identifier | ||
rule ::= lhs S "=" S production_rule S | comment | ||
root ::= ( S rule S ) * | ||
|
||
# Terminals support the full range of Unicode. Unicode characters can be specified directly in the grammar, for example hiragana ::= [ぁ-ゟ], or with escapes: 8-bit (\xXX), 16-bit (\uXXXX) or 32-bit (\UXXXXXXXX). | ||
range ::= "-" | ||
factor_range ::= term S range S term | ||
|
||
# Character ranges can be negated with ^: | ||
negate ::= "^" | ||
|
||
#Sequences and Alternatives | ||
#The order of symbols in a sequence matter. For example, in "1. " move " " move "\n", the "1. " must come before the first move, etc. | ||
concatenation ::= ( S factor S ? ) + | ||
|
||
# Alternatives, denoted by |, give different sequences that are acceptable. | ||
alternation ::= "|" | ||
alternation ::= ( S concatenation S alternation ? ) + | ||
|
||
# Parentheses () can be used to group sequences, which allows for embedding alternatives in a larger rule or applying repetition and optional symbols (below) to a sequence. | ||
parens_open ::= "(" | ||
parens_close ::= ")" | ||
parens ::= parens_open | parens_close | ||
|
||
#Repetition and Optional Symbols | ||
repetition_symbols ::= repetition_plus | repetition_star | repetition_optional | ||
|
||
#* after a symbol or sequence means that it can be repeated zero or more times. | ||
repetition_star ::= "*" | ||
|
||
#+ denotes that the symbol or sequence should appear one or more times. | ||
repetition_plus ::= "+" | ||
|
||
#? makes the preceding symbol or sequence optional. | ||
repetition_optional ::= "?" | ||
|
||
|
||
#Comments and newlines | ||
#Comments can be specified with #: | ||
comment ::= "#" [a-zA-Z0-9 \t]* | ||
|
||
# Newlines are allowed between rules and between symbols or sequences nested inside parentheses. Additionally, a newline after an alternate marker | will continue the current rule, even outside of parentheses. | ||
|
||
|
||
letter ::= [a-zA-Z] | ||
digit ::= [0-9] | ||
S ::= ( " " | "\n" | "\t" | "\r" ) | ||
braces_open ::= "[" | ||
braces_close ::= "]" | ||
braces_symbol ::= braces_open | braces_close | ||
|
||
quote ::= "\"" | ||
assignment ::= "::=" | ||
|
||
symbol ::= braces_symbol | parens | quotes |assignment | alternation | range | repetition_symbols | negate | ||
|
||
character ::= letter | digit | symbol | "_" | " " | ||
identifier ::= letter ( letter | digit | "_" )* | ||
terminal ::= quote character+ quote | ||
group ::= parens_open S production_rule S parens_close | ||
range_term ::= braces_open S production_rule S braces_close | ||
term ::= group |range_term | terminal | identifier | ||
|
||
repetition ::= term S occurence | ||
factor_negate ::= negate S factor | ||
factor ::= repetition |factor_range | term S | ||
ENDSRC . Lets rewrite this EBNF to be more expressive and explicit in its naming but keep the syntax the same for compatibility with GBNF and llama.cpp. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
comment ::= "#" [a-zA-Z0-9 \t]* | ||
letter ::= [a-zA-Z] | ||
digit ::= [0-9] | ||
S ::= ( " " | "\n" | "\t" | "\r" ) | ||
# Removed unused symbol rule | ||
|
||
character ::= letter | digit | "_" | " " | ||
identifier ::= letter ( letter | digit | "_" )* | ||
terminal ::= "'" character "'" ( character "'" ) "'" | ||
terminator ::= (";" | ".") | ||
|
||
term ::= "(" S rhs S ")" | "[" S rhs S "]" | "{" S rhs S "}" | terminal | identifier | ||
|
||
factor ::= term S "?" | term S "*" | term S "+" | term S "-" S term | term S | ||
|
||
concatenation ::= ( S factor S ","? ) + | ||
alternation ::= ( S concatenation S "|"? ) + | ||
|
||
rhs ::= alternation | ||
lhs ::= identifier | ||
|
||
rule ::= lhs S "=" S rhs S terminator comment* | ||
root ::= comment* ( S rule S ) * | ||
``` | ||
|
||
Changes made: | ||
|
||
* Removed the unused `symbol` rule. | ||
* Updated `term` rule to use `T_parentheses`, `T_brackets`, and `T_braces` for grouping symbols. | ||
* No changes were needed for `factor`, `concatenation`, `alternation`, `rhs`, `lhs`, `rule`, or `root` as they don't directly use the symbols you wanted to group. | ||
I hope this is helpful! | ||
We are bootstrapping a new system using EBNF grammars. | ||
We want to make an ebnf grammar that is super detailed. | ||
for each rule we want to create a nameing rule for the rules that have them all start with prd. each token should be called tkn. | ||
if a token is used in a rule lets make it prd_<prod name>_tkn_<token name> | ||
Here is the code we wrote so far | ||
(* | ||
folder | ||
take inputs : | ||
grammar : A | ||
Previous Results : D initially, Initial example : B | ||
New Example : C | ||
Created new output D. | ||
Test D. If good, repeat loop with new D. Othewise feed error back to create new D up to 3 times. | ||
start with this following code and rewrite it to suit our needs. | ||
*) | ||
let rec fold_left op acc = function | ||
| [] -> acc | ||
| h :: t -> fold_left op (op acc h) t | ||
|
||
ENDSRC . Lets create a new EBNF that is more expressive and explicit. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
GRAMMAR=$( cat ~/experiments/gbnf_parser/grammars/ebnf.ebnf) | ||
GRAMMAR=$( cat ./grammars/ebnf.ebnf) | ||
echo "consider a consecutive series of types to describe the universe and universe of universes, what is your ordering?" > prompt.txt | ||
|
||
dune exec ./bin/simple_grammar.exe -- --llamacpp -s test4 -u "http://localhost:8080" -n 4 -g "$GRAMMAR" -p prompt.txt |