1
0
Fork 0
mirror of synced 2025-09-24 04:40:05 +00:00
ZoKrates/zokrates_parser/src/zokrates.pest
2022-11-21 17:39:19 +01:00

185 lines
7.6 KiB
Text

file = { SOI ~ pragma? ~ symbol_declaration* ~ EOI }
pragma = { "#pragma" ~ "curve" ~ curve }
curve = @{ (ASCII_ALPHANUMERIC | "_") * }
string = @{(!"\"" ~ ANY)*}
quoted_string = ${ "\"" ~ string ~ "\"" }
semicolon = _{";"}
symbol_declaration = { (((import_directive | const_definition | type_definition) ~ semicolon) | (ty_struct_definition | function_definition)) }
import_directive = { main_import_directive | from_import_directive }
from_import_directive = { "from" ~ quoted_string ~ "import" ~ import_symbol_list }
main_import_directive = { "import" ~ quoted_string ~ ("as" ~ identifier)? }
import_symbol = { identifier ~ ("as" ~ identifier)? }
import_symbol_list = _{ import_symbol ~ ("," ~ import_symbol)* }
function_definition = {"def" ~ identifier ~ constant_generics_declaration? ~ "(" ~ parameter_list ~ ")" ~ ("->" ~ ty)? ~ block_statement }
const_definition = {"const" ~ typed_identifier ~ "=" ~ expression }
type_definition = {"type" ~ identifier ~ constant_generics_declaration? ~ "=" ~ ty }
constant_generics_declaration = _{ "<" ~ constant_generics_list ~ ">" }
constant_generics_list = _{ identifier ~ ("," ~ identifier)* }
parameter_list = _{(parameter ~ ("," ~ parameter)*)?}
parameter = { vis? ~ ty ~ _mut? ~ identifier }
// basic types
ty_field = {"field"}
ty_bool = {"bool"}
ty_u8 = {"u8"}
ty_u16 = {"u16"}
ty_u32 = {"u32"}
ty_u64 = {"u64"}
ty_basic = { ty_field | ty_bool | ty_u8 | ty_u16 | ty_u32 | ty_u64 }
ty_basic_or_struct_or_tuple = { ty_basic | ty_struct | ty_tuple }
ty_array = { ty_basic_or_struct_or_tuple ~ ("[" ~ expression ~ "]")+ }
ty = { ty_array | ty_basic | ty_struct | ty_tuple }
// tuples
ty_tuple = { "(" ~ (ty_tuple_multiple_inner | ty_tuple_single_inner | ty_tuple_empty_inner ) ~ ")" }
ty_tuple_empty_inner = _{ "" }
ty_tuple_single_inner = _{ ty ~ "," }
ty_tuple_multiple_inner = _{ ty ~ ("," ~ ty)+ ~ ","? }
// structs
ty_struct = { identifier ~ explicit_generics? }
// type definitions
ty_struct_definition = { "struct" ~ identifier ~ constant_generics_declaration? ~ "{" ~ struct_field_list ~ "}" }
struct_field_list = _{ (struct_field ~ semicolon)* }
struct_field = { typed_identifier }
vis_private = {"private"}
vis_public = {"public"}
vis = { vis_private | vis_public }
_mut = {"mut"}
// Statements
statement = { (iteration_statement // does not require semicolon
| ((log_statement
|return_statement
| definition_statement
| assertion_statement
) ~ semicolon)) }
log_statement = { "log" ~ "(" ~ quoted_string ~ "," ~ expression_list ~ ")"}
block_statement = _{ "{" ~ statement* ~ "}" }
iteration_statement = { "for" ~ typed_identifier ~ "in" ~ expression ~ ".." ~ expression ~ block_statement }
return_statement = { "return" ~ expression? }
definition_statement = { typed_identifier_or_assignee ~ "=" ~ expression }
assertion_statement = {"assert" ~ "(" ~ expression ~ ("," ~ quoted_string)? ~ ")"}
typed_identifier_or_assignee = { typed_identifier | assignee }
// Expressions
expression_list = _{(expression ~ ("," ~ expression)*)?}
expression = { unaried_term ~ (op_binary ~ unaried_term)* }
unaried_term = { op_unary? ~ powered_term }
powered_term = { postfixed_term ~ (op_pow ~ exponent_expression)? }
postfixed_term = { term ~ access* }
term = { ("(" ~ expression ~ ")") | inline_tuple_expression | inline_struct_expression | if_else_expression | primary_expression | inline_array_expression | array_initializer_expression }
spread = { "..." ~ expression }
range = { from_expression? ~ ".." ~ to_expression? }
from_expression = { expression }
to_expression = { expression }
inline_tuple_expression = { "(" ~ (inline_tuple_multiple_expression_inner | inline_tuple_single_expression_inner | inline_tuple_empty_expression_inner) ~ ")" }
inline_tuple_empty_expression_inner = _{ "" }
inline_tuple_single_expression_inner = _{ expression ~ "," }
inline_tuple_multiple_expression_inner = _{ expression ~ ("," ~ expression)+ ~ ","? }
block_expression = _{ "{" ~ statement* ~ expression ~ "}" }
if_else_expression = { "if" ~ expression ~ block_expression ~ "else" ~ block_expression }
access = { array_access | call_access | dot_access }
array_access = { "[" ~ range_or_expression ~ "]" }
call_access = { ("::" ~ explicit_generics)? ~ "(" ~ arguments ~ ")" }
arguments = { expression_list }
explicit_generics = { "<" ~ constant_generics_values ~ ">" }
constant_generics_values = _{ constant_generics_value ~ ("," ~ constant_generics_value)* }
constant_generics_value = { literal | identifier | underscore }
underscore = { "_" }
dot_access = { "." ~ identifier_or_decimal }
identifier_or_decimal = { identifier | decimal_number }
primary_expression = { identifier | literal }
inline_struct_expression = { identifier ~ "{" ~ inline_struct_member_list ~ "}" }
inline_struct_member_list = _{(inline_struct_member ~ ("," ~ inline_struct_member)*)? ~ ","? }
inline_struct_member = { identifier ~ ":" ~ expression }
inline_array_expression = { "[" ~ inline_array_inner ~ "]" }
inline_array_inner = _{(spread_or_expression ~ ("," ~ spread_or_expression)*)?}
spread_or_expression = { spread | expression }
range_or_expression = { range | expression }
exponent_expression = { "(" ~ expression ~ ")" | primary_expression }
array_initializer_expression = { "[" ~ expression ~ semicolon ~ expression ~ "]" }
// End Expressions
typed_identifier = { ty ~ _mut? ~ identifier }
assignee = { identifier ~ assignee_access* }
assignee_access = { array_access | dot_access }
identifier = @{ ((!keyword ~ ASCII_ALPHA) | (keyword ~ (ASCII_ALPHANUMERIC | "_"))) ~ (ASCII_ALPHANUMERIC | "_")* }
// Literals for all types
literal = { hex_literal | decimal_literal | boolean_literal }
decimal_literal = ${ decimal_number ~ ("_"? ~ decimal_suffix)? }
decimal_number = @{ "0" | ASCII_NONZERO_DIGIT ~ ASCII_DIGIT* }
decimal_suffix = { decimal_suffix_u8 | decimal_suffix_u16 | decimal_suffix_u32 | decimal_suffix_u64 | decimal_suffix_field }
decimal_suffix_u8 = { "u8" }
decimal_suffix_u16 = { "u16" }
decimal_suffix_u32 = { "u32" }
decimal_suffix_u64 = { "u64" }
decimal_suffix_field = { "f" }
boolean_literal = { "true" | "false" }
hex_literal = !{ "0x" ~ hex_number }
hex_number = { hex_number_u64 | hex_number_u32 | hex_number_u16 | hex_number_u8 }
hex_number_u8 = { ASCII_HEX_DIGIT{2} }
hex_number_u16 = { ASCII_HEX_DIGIT{4} }
hex_number_u32 = { ASCII_HEX_DIGIT{8} }
hex_number_u64 = { ASCII_HEX_DIGIT{16} }
// Operators
op_or = @{"||"}
op_and = @{"&&"}
op_bit_xor = {"^"}
op_bit_and = {"&"}
op_bit_or = {"|"}
op_equal = @{"=="}
op_not_equal = @{"!="}
op_lt = {"<"}
op_lte = @{"<="}
op_gt = {">"}
op_gte = @{">="}
op_add = {"+"}
op_sub = {"-"}
op_mul = {"*"}
op_div = {"/"}
op_rem = {"%"}
op_pow = @{"**"}
op_not = {"!"}
op_neg = {"-"}
op_pos = {"+"}
op_left_shift = @{"<<"}
op_right_shift = @{">>"}
op_ternary = {"?" ~ expression ~ ":"}
// `op_pow` is *not* in `op_binary` because its precedence is handled in this parser rather than down the line in precedence climbing
op_binary = _ { op_or | op_and | op_bit_xor | op_bit_and | op_bit_or | op_left_shift | op_right_shift | op_equal | op_not_equal | op_lte | op_lt | op_gte | op_gt | op_add | op_sub | op_mul | op_div | op_rem | op_ternary }
op_unary = { op_pos | op_neg | op_not }
WHITESPACE = _{ " " | "\t" | "\\" | COMMENT | NEWLINE }
COMMENT = _{ ("/*" ~ (!"*/" ~ ANY)* ~ "*/") | ("//" ~ (!NEWLINE ~ ANY)*) }
// the ordering of reserved keywords matters: if "as" is before "assert", then "assert" gets parsed as (as)(sert) and incorrectly
// accepted
keyword = @{
"log"|"assert"|"as"|"bool"|"const"|"def"|"else"|"false"|"field"|"for"|"if"|"import"|"from"|
"in"|"mut"|"private"|"public"|"return"|"struct"|"true"|"type"|"u8"|"u16"|"u32"|"u64"
}