-- Copyright 2023 Sam Nystrom -- Hare LPeg lexer local l = require('lexer') local C, Cmt, P, R, S = lpeg.C, lpeg.Cmt, lpeg.P, lpeg.R, lpeg.S local lex = l.new('hare') -- Whitespace lex:add_rule('whitespace', l.token(l.WHITESPACE, l.space^1)) -- Comments lex:add_rule('comment', l.token(l.COMMENT, l.to_eol('//', true))) -- Keywords lex:add_rule('keyword', l.token(l.KEYWORD, l.word_match{ 'as', 'is', 'if', 'else', 'match', 'switch', 'break', 'continue', 'defer', 'return', 'yield', 'const', 'def', 'let', 'fn', 'case', 'for', 'export', 'static', 'enum', 'struct', 'union', 'type', })) -- Builtins lex:add_rule('function', l.token(l.FUNCTION, l.word_match{ 'abort', 'assert', 'align', 'len', 'offset', 'alloc', 'free', 'append', 'insert', 'delete', -- C ABI 'vastart', 'vaarg', 'vaend', })) -- Types lex:add_rule('type', l.token(l.TYPE, l.word_match{ 'bool', 'f32', 'f64', 'i8', 'i16', 'i32', 'i64', 'u8', 'u16', 'u32', 'u64', 'size', 'int', 'uint', 'uintptr', 'never', 'nullable', 'opaque', 'rune', 'str', 'void', -- C ABI 'valist', })) -- Constants lex:add_rule('constant', l.token(l.CONSTANT, l.word_match{'true', 'false', 'null'})) -- Numbers --[[local identifier = P('r#')^-1 * l.word local function opt_cap(patt) return C(patt^-1) end local float = l.digit * (Cmt(opt_cap('.' * l.digit) * opt_cap(S('eE') * S('+-')^-1 * l.digit) * opt_cap(P('f32') + 'f64'), function(input, index, decimals, exponent, type) return decimals ~= "" or exponent ~= "" or type ~= "" end) + '.' * -(S('._') + identifier)) local bin = P('0b') * S('01') local oct = P('0o') * lpeg.R('07') local hex = P('0x') * l.xdigit local integer = (bin + oct + hex + decimal_literal) * (S('iu') * (P('8') + '16' + '32' + '64' + '128' + 'size'))^-1 --]] lex:add_rule('number', l.token(l.NUMBER, l.float + l.integer)) -- Strings lex:add_rule('string', l.token(l.STRING, l.range('"') + l.range('\''))) -- Operators lex:add_rule('operator', l.token(l.OPERATOR, S('+-/*<>!=@~&|^?:;,.()[]{}'))) lex:add_fold_point(l.OPERATOR, '{', '}') lex:add_fold_point(l.COMMENT, l.fold_consecutive_lines('//')) return lex