1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
|
-- Copyright 2023 Sam Nystrom <sam@samnystrom.dev>
-- Hare LPeg lexer
local l = require('lexer')
local C, Cmt, P, R, S = lpeg.C, lpeg.Cmt, lpeg.P, lpeg.R, lpeg.S
local lex = l.new('hare')
-- Whitespace
lex:add_rule('whitespace', l.token(l.WHITESPACE, l.space^1))
-- Comments
lex:add_rule('comment', l.token(l.COMMENT, l.to_eol('//', true)))
-- Keywords
lex:add_rule('keyword', l.token(l.KEYWORD, l.word_match{
'as', 'is',
'if', 'else', 'match', 'switch',
'break', 'continue', 'defer', 'return', 'yield',
'const', 'def', 'let',
'fn',
'case',
'for',
'export', 'static',
'enum', 'struct', 'union',
'type',
}))
-- Builtins
lex:add_rule('function', l.token(l.FUNCTION, l.word_match{
'abort', 'assert',
'align', 'len', 'offset',
'alloc', 'free',
'append', 'insert', 'delete',
-- C ABI
'vastart', 'vaarg', 'vaend',
}))
-- Types
lex:add_rule('type', l.token(l.TYPE, l.word_match{
'bool',
'f32', 'f64',
'i8', 'i16', 'i32', 'i64', 'u8', 'u16', 'u32', 'u64',
'size', 'int', 'uint', 'uintptr',
'never',
'nullable',
'opaque',
'rune', 'str',
'void',
-- C ABI
'valist',
}))
-- Constants
lex:add_rule('constant', l.token(l.CONSTANT, l.word_match{'true', 'false', 'null'}))
-- Numbers
--[[local identifier = P('r#')^-1 * l.word
local function opt_cap(patt) return C(patt^-1) end
local float = l.digit *
(Cmt(opt_cap('.' * l.digit) * opt_cap(S('eE') * S('+-')^-1 * l.digit) *
opt_cap(P('f32') + 'f64'), function(input, index, decimals, exponent, type)
return decimals ~= "" or exponent ~= "" or type ~= ""
end) + '.' * -(S('._') + identifier))
local bin = P('0b') * S('01')
local oct = P('0o') * lpeg.R('07')
local hex = P('0x') * l.xdigit
local integer = (bin + oct + hex + decimal_literal) *
(S('iu') * (P('8') + '16' + '32' + '64' + '128' + 'size'))^-1
--]]
lex:add_rule('number', l.token(l.NUMBER, l.float + l.integer))
-- Strings
lex:add_rule('string', l.token(l.STRING, l.range('"') + l.range('\'')))
-- Operators
lex:add_rule('operator', l.token(l.OPERATOR, S('+-/*<>!=@~&|^?:;,.()[]{}')))
lex:add_fold_point(l.OPERATOR, '{', '}')
lex:add_fold_point(l.COMMENT, l.fold_consecutive_lines('//'))
return lex
|