1
0
Fork 0
mirror of synced 2025-09-23 12:18:44 +00:00
ZoKrates/zokrates_parser/src/zokrates.pest
2021-07-08 15:14:18 +02:00

168 lines
7.2 KiB
Text

file = { SOI ~ NEWLINE* ~ pragma? ~ NEWLINE* ~ symbol_declaration* ~ EOI }
pragma = { "#pragma" ~ "curve" ~ curve }
curve = @{ (ASCII_ALPHANUMERIC | "_") * }
symbol_declaration = { (import_directive | ty_struct_definition | const_definition | function_definition) ~ NEWLINE* }
import_directive = { main_import_directive | from_import_directive }
from_import_directive = { "from" ~ "\"" ~ import_source ~ "\"" ~ "import" ~ import_symbol_list ~ NEWLINE* }
main_import_directive = { "import" ~ "\"" ~ import_source ~ "\"" ~ ("as" ~ identifier)? ~ NEWLINE+ }
import_source = @{(!"\"" ~ ANY)*}
import_symbol = { identifier ~ ("as" ~ identifier)? }
import_symbol_list = _{ import_symbol ~ ("," ~ import_symbol)* }
function_definition = {"def" ~ identifier ~ constant_generics_declaration? ~ "(" ~ parameter_list ~ ")" ~ return_types ~ ":" ~ NEWLINE* ~ statement* }
const_definition = {"const" ~ ty ~ identifier ~ "=" ~ expression ~ NEWLINE*}
return_types = _{ ( "->" ~ ( "(" ~ type_list ~ ")" | ty ))? }
constant_generics_declaration = _{ "<" ~ constant_generics_list ~ ">" }
constant_generics_list = _{ identifier ~ ("," ~ identifier)* }
parameter_list = _{(parameter ~ ("," ~ parameter)*)?}
parameter = {vis? ~ ty ~ identifier}
// basic types
ty_field = {"field"}
ty_bool = {"bool"}
ty_u8 = {"u8"}
ty_u16 = {"u16"}
ty_u32 = {"u32"}
ty_u64 = {"u64"}
ty_basic = { ty_field | ty_bool | ty_u8 | ty_u16 | ty_u32 | ty_u64 }
ty_basic_or_struct = { ty_basic | ty_struct }
ty_array = { ty_basic_or_struct ~ ("[" ~ expression ~ "]")+ }
ty = { ty_array | ty_basic | ty_struct }
type_list = _{(ty ~ ("," ~ ty)*)?}
// structs
ty_struct = { identifier ~ explicit_generics? }
// type definitions
ty_struct_definition = { "struct" ~ identifier ~ constant_generics_declaration? ~ "{" ~ NEWLINE* ~ struct_field_list ~ NEWLINE* ~ "}" ~ NEWLINE* }
struct_field_list = _{(struct_field ~ (NEWLINE+ ~ struct_field)*)? }
struct_field = { ty ~ identifier }
vis_private = {"private"}
vis_public = {"public"}
vis = { vis_private | vis_public }
// Statements
statement = { (return_statement // does not require subsequent newline
| (iteration_statement
| definition_statement
| expression_statement
) ~ NEWLINE
) ~ NEWLINE* }
iteration_statement = { "for" ~ ty ~ identifier ~ "in" ~ expression ~ ".." ~ expression ~ "do" ~ NEWLINE* ~ statement* ~ "endfor"}
return_statement = { "return" ~ expression_list}
definition_statement = { typed_identifier_or_assignee_list ~ "=" ~ expression } // declare and assign, so only identifiers are allowed, unlike `assignment_statement`
expression_statement = {"assert" ~ "(" ~ expression ~ ")"}
typed_identifier_or_assignee_list = _{ typed_identifier_or_assignee ~ ("," ~ typed_identifier_or_assignee)* }
typed_identifier_or_assignee = { typed_identifier | assignee } // we don't use { ty? ~ identifier } as with a single token, it gets parsed as `ty` but we want `identifier`
// Expressions
expression_list = _{(expression ~ ("," ~ expression)*)?}
expression = { unaried_term ~ (op_binary ~ unaried_term)* }
unaried_term = { op_unary? ~ powered_term }
powered_term = { term ~ (op_pow ~ exponent_expression)? }
term = { ("(" ~ expression ~ ")") | inline_struct_expression | conditional_expression | postfix_expression | primary_expression | inline_array_expression | array_initializer_expression }
spread = { "..." ~ expression }
range = { from_expression? ~ ".." ~ to_expression? }
from_expression = { expression }
to_expression = { expression }
conditional_expression = { "if" ~ expression ~ "then" ~ expression ~ "else" ~ expression ~ "fi"}
postfix_expression = { identifier ~ access+ } // we force there to be at least one access, otherwise this matches single identifiers
access = { array_access | call_access | member_access }
array_access = { "[" ~ range_or_expression ~ "]" }
call_access = { ("::" ~ explicit_generics)? ~ "(" ~ arguments ~ ")" }
arguments = { expression_list }
explicit_generics = { "<" ~ constant_generics_values ~ ">" }
constant_generics_values = _{ constant_generics_value ~ ("," ~ constant_generics_value)* }
constant_generics_value = { literal | identifier | underscore }
underscore = { "_" }
member_access = { "." ~ identifier }
primary_expression = { identifier
| literal
}
inline_struct_expression = { identifier ~ "{" ~ NEWLINE* ~ inline_struct_member_list ~ NEWLINE* ~ "}" }
inline_struct_member_list = _{(inline_struct_member ~ ("," ~ NEWLINE* ~ inline_struct_member)*)? ~ ","? }
inline_struct_member = { identifier ~ ":" ~ expression }
inline_array_expression = { "[" ~ NEWLINE* ~ inline_array_inner ~ NEWLINE* ~ "]" }
inline_array_inner = _{(spread_or_expression ~ ("," ~ NEWLINE* ~ spread_or_expression)*)?}
spread_or_expression = { spread | expression }
range_or_expression = { range | expression }
exponent_expression = { "(" ~ expression ~ ")" | primary_expression }
array_initializer_expression = { "[" ~ expression ~ ";" ~ expression ~ "]" }
// End Expressions
typed_identifier = { ty ~ identifier }
assignee = { identifier ~ assignee_access* }
assignee_access = { array_access | member_access }
identifier = @{ ((!keyword ~ ASCII_ALPHA) | (keyword ~ (ASCII_ALPHANUMERIC | "_"))) ~ (ASCII_ALPHANUMERIC | "_")* }
// Literals for all types
literal = { hex_literal | decimal_literal | boolean_literal }
decimal_literal = ${ decimal_number ~ ("_"? ~ decimal_suffix)? }
decimal_number = @{ "0" | ASCII_NONZERO_DIGIT ~ ASCII_DIGIT* }
decimal_suffix = { decimal_suffix_u8 | decimal_suffix_u16 | decimal_suffix_u32 | decimal_suffix_u64 | decimal_suffix_field }
decimal_suffix_u8 = { "u8" }
decimal_suffix_u16 = { "u16" }
decimal_suffix_u32 = { "u32" }
decimal_suffix_u64 = { "u64" }
decimal_suffix_field = { "f" }
boolean_literal = { "true" | "false" }
hex_literal = !{ "0x" ~ hex_number }
hex_number = { hex_number_u64 | hex_number_u32 | hex_number_u16 | hex_number_u8 }
hex_number_u8 = { ASCII_HEX_DIGIT{2} }
hex_number_u16 = { ASCII_HEX_DIGIT{4} }
hex_number_u32 = { ASCII_HEX_DIGIT{8} }
hex_number_u64 = { ASCII_HEX_DIGIT{16} }
// Operators
op_or = @{"||"}
op_and = @{"&&"}
op_bit_xor = {"^"}
op_bit_and = {"&"}
op_bit_or = {"|"}
op_equal = @{"=="}
op_not_equal = @{"!="}
op_lt = {"<"}
op_lte = @{"<="}
op_gt = {">"}
op_gte = @{">="}
op_add = {"+"}
op_sub = {"-"}
op_mul = {"*"}
op_div = {"/"}
op_rem = {"%"}
op_pow = @{"**"}
op_not = {"!"}
op_neg = {"-"}
op_pos = {"+"}
op_left_shift = @{"<<"}
op_right_shift = @{">>"}
// `op_pow` is *not* in `op_binary` because its precedence is handled in this parser rather than down the line in precedence climbing
op_binary = _ { op_or | op_and | op_bit_xor | op_bit_and | op_bit_or | op_left_shift | op_right_shift | op_equal | op_not_equal | op_lte | op_lt | op_gte | op_gt | op_add | op_sub | op_mul | op_div | op_rem }
op_unary = { op_pos | op_neg | op_not }
WHITESPACE = _{ " " | "\t" | "\\" ~ COMMENT? ~ NEWLINE}
COMMENT = _{ ("/*" ~ (!"*/" ~ ANY)* ~ "*/") | ("//" ~ (!NEWLINE ~ ANY)*) }
// the ordering of reserved keywords matters: if "as" is before "assert", then "assert" gets parsed as (as)(sert) and incorrectly
// accepted
keyword = @{"assert"|"as"|"bool"|"byte"|"const"|"def"|"do"|"else"|"endfor"|"export"|"false"|"field"|"for"|"if"|"then"|"fi"|"import"|"from"|
"in"|"private"|"public"|"return"|"struct"|"true"|"u8"|"u16"|"u32"|"u64"
}