Skip to content

Commit

Permalink
Peeking lexer optimizations (#263)
Browse files Browse the repository at this point in the history
* Provide PeekingLexer.Checkpoint as an allocation-free Clone alternative

The state itself is still in private fields, thus Checkpoint being a
public field doesn't break encapsulation.

* Remove PeekingLexer.Clone and avoid an allocation when branching

Roughly 5% less CPU time and 12% fewer allocations in the Thrift
benchmark with generated lexer.

* Optimize PeekingLexer to provide constant-time Peek

The attached Benchmark is over 3x faster after the optimization

* Add methods to create and restore PeekingLexer checkpoints
  • Loading branch information
petee-d authored Dec 3, 2022
1 parent e748387 commit 5adbb7c
Show file tree
Hide file tree
Showing 5 changed files with 85 additions and 60 deletions.
5 changes: 2 additions & 3 deletions context.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ type contextFieldSet struct {

// Context for a single parse.
type parseContext struct {
*lexer.PeekingLexer
lexer.PeekingLexer
depth int
trace io.Writer
deepestError error
Expand All @@ -31,7 +31,7 @@ type parseContext struct {

func newParseContext(lex *lexer.PeekingLexer, lookahead int, caseInsensitive map[lexer.TokenType]bool) *parseContext {
return &parseContext{
PeekingLexer: lex,
PeekingLexer: *lex,
caseInsensitive: caseInsensitive,
lookahead: lookahead,
}
Expand Down Expand Up @@ -78,7 +78,6 @@ func (p *parseContext) Branch() *parseContext {
branch := &parseContext{}
*branch = *p
branch.apply = nil
branch.PeekingLexer = p.PeekingLexer.Clone()
return branch
}

Expand Down
107 changes: 57 additions & 50 deletions lexer/peek.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,23 @@ package lexer

// PeekingLexer supports arbitrary lookahead as well as cloning.
type PeekingLexer struct {
rawCursor RawCursor
cursor int
eof Token
tokens []Token
elide map[TokenType]bool
Checkpoint
tokens []Token
elide map[TokenType]bool
}

// RawCursor index in the token stream.
type RawCursor int

// Checkpoint wraps the mutable state of the PeekingLexer.
//
// Copying and restoring just this state is a bit faster than copying the entire PeekingLexer.
type Checkpoint struct {
rawCursor RawCursor // The raw position of the next possibly elided token
nextCursor RawCursor // The raw position of the next non-elided token
cursor int // Index of the next non-elided token among other non-elided tokens
}

// Upgrade a Lexer to a PeekingLexer with arbitrary lookahead.
//
// "elide" is a slice of token types to elide from processing.
Expand All @@ -27,12 +34,12 @@ func Upgrade(lex Lexer, elide ...TokenType) (*PeekingLexer, error) {
if err != nil {
return r, err
}
r.tokens = append(r.tokens, t)
if t.EOF() {
r.eof = t
break
}
r.tokens = append(r.tokens, t)
}
r.advanceToNonElided()
return r, nil
}

Expand All @@ -42,39 +49,48 @@ func (p *PeekingLexer) Range(rawStart, rawEnd RawCursor) []Token {
}

// Cursor position in tokens, excluding elided tokens.
func (p *PeekingLexer) Cursor() int {
return p.cursor
func (c Checkpoint) Cursor() int {
return c.cursor
}

// RawCursor position in tokens, including elided tokens.
func (p *PeekingLexer) RawCursor() RawCursor {
return p.rawCursor
func (c Checkpoint) RawCursor() RawCursor {
return c.rawCursor
}

// Next consumes and returns the next token.
func (p *PeekingLexer) Next() *Token {
for int(p.rawCursor) < len(p.tokens) {
t := &p.tokens[p.rawCursor]
p.rawCursor++
if p.elide[t.Type] {
continue
}
p.cursor++
t := &p.tokens[p.nextCursor]
if t.EOF() {
return t
}
return &p.eof
p.nextCursor++
p.rawCursor = p.nextCursor
p.cursor++
p.advanceToNonElided()
return t
}

// Peek ahead at the next token.
// Peek ahead at the next non-elided token.
func (p *PeekingLexer) Peek() *Token {
for i := int(p.rawCursor); i < len(p.tokens); i++ {
t := &p.tokens[i]
if p.elide[t.Type] {
continue
return &p.tokens[p.nextCursor]
}

// RawPeek peeks ahead at the next raw token.
//
// Unlike Peek, this will include elided tokens.
func (p *PeekingLexer) RawPeek() *Token {
return &p.tokens[p.rawCursor]
}

// advanceToNonElided advances nextCursor to the closest non-elided token
func (p *PeekingLexer) advanceToNonElided() {
for ; ; p.nextCursor++ {
t := &p.tokens[p.nextCursor]
if t.EOF() || !p.elide[t.Type] {
return
}
return t
}
return &p.eof
}

// PeekAny peeks forward over elided and non-elided tokens.
Expand All @@ -85,42 +101,33 @@ func (p *PeekingLexer) Peek() *Token {
// The returned RawCursor position is the location of the returned token.
// Use FastForward to move the internal cursors forward.
func (p *PeekingLexer) PeekAny(match func(Token) bool) (t Token, rawCursor RawCursor) {
tokenCount := RawCursor(len(p.tokens))
for i := p.rawCursor; i < tokenCount; i++ {
for i := p.rawCursor; ; i++ {
t = p.tokens[i]
if match(t) || !p.elide[t.Type] {
if t.EOF() || match(t) || !p.elide[t.Type] {
return t, i
}
}
return p.eof, tokenCount
}

// FastForward the internal cursors to this RawCursor position.
func (p *PeekingLexer) FastForward(rawCursor RawCursor) {
tokenCount := RawCursor(len(p.tokens))
for ; p.rawCursor <= rawCursor && p.rawCursor < tokenCount; p.rawCursor++ {
t := p.tokens[p.rawCursor]
if p.elide[t.Type] {
continue
for ; p.rawCursor <= rawCursor; p.rawCursor++ {
t := &p.tokens[p.rawCursor]
if t.EOF() {
break
}
if !p.elide[t.Type] {
p.cursor++
}
p.cursor++
}
p.nextCursor = p.rawCursor
p.advanceToNonElided()
}

// RawPeek peeks ahead at the next raw token.
//
// Unlike Peek, this will include elided tokens.
func (p *PeekingLexer) RawPeek() *Token {
if int(p.rawCursor) < len(p.tokens) {
return &p.tokens[p.rawCursor]
}
return &p.eof
func (p *PeekingLexer) MakeCheckpoint() Checkpoint {
return p.Checkpoint
}

// Clone creates a clone of this PeekingLexer at its current token.
//
// The parent and clone are completely independent.
func (p *PeekingLexer) Clone() *PeekingLexer {
clone := *p
return &clone
func (p *PeekingLexer) LoadCheckpoint(checkpoint Checkpoint) {
p.Checkpoint = checkpoint
}
25 changes: 22 additions & 3 deletions lexer/peek_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"testing"

require "github.com/alecthomas/assert/v2"

"github.com/alecthomas/participle/v2/lexer"
)

Expand Down Expand Up @@ -32,7 +33,7 @@ func TestUpgrade(t *testing.T) {
require.Equal(t, tokens, l.Range(0, 3))
}

func TestPeekAndNextAny(t *testing.T) {
func TestPeekingLexer_Peek_Next_Checkpoint(t *testing.T) {
slexdef := lexer.MustSimple([]lexer.SimpleRule{
{"Ident", `\w+`},
{"Whitespace", `\s+`},
Expand All @@ -48,7 +49,25 @@ func TestPeekAndNextAny(t *testing.T) {
{Type: -3, Value: " ", Pos: lexer.Position{Line: 1, Column: 12, Offset: 11}},
{Type: -2, Value: "last", Pos: lexer.Position{Line: 1, Column: 13, Offset: 12}},
}
tok := plex.Next()
require.Equal(t, expected[0], *tok)
checkpoint := plex.Checkpoint
require.Equal(t, expected[0], *plex.Next())
require.Equal(t, expected[2], *plex.Peek(), "should have skipped whitespace")
plex.Checkpoint = checkpoint
require.Equal(t, expected[0], *plex.Peek(), "should have reverted to pre-Next state")
}

func BenchmarkPeekingLexer_Peek(b *testing.B) {
tokens := []lexer.Token{{Type: 1, Value: "x"}, {Type: 3, Value: " "}, {Type: 2, Value: "y"}}
l, err := lexer.Upgrade(&staticLexer{tokens: tokens}, 3)
require.NoError(b, err)
l.Next()
t := l.Peek()
b.ResetTimer()
for i := 0; i < b.N; i++ {
t = l.Peek()
if t.EOF() {
return
}
}
require.Equal(b, lexer.Token{Type: 2, Value: "y"}, *t)
}
4 changes: 2 additions & 2 deletions nodes.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ func (p *parseable) Parse(ctx *parseContext, parent reflect.Value) (out []reflec
defer ctx.printTrace(p)()
rv := reflect.New(p.t)
v := rv.Interface().(Parseable)
err = v.Parse(ctx.PeekingLexer)
err = v.Parse(&ctx.PeekingLexer)
if err != nil {
if err == NextMatch {
return nil, nil
Expand All @@ -84,7 +84,7 @@ func (c *custom) GoString() string { return c.typ.Name() }

func (c *custom) Parse(ctx *parseContext, parent reflect.Value) (out []reflect.Value, err error) {
defer ctx.printTrace(c)()
results := c.parseFn.Call([]reflect.Value{reflect.ValueOf(ctx.PeekingLexer)})
results := c.parseFn.Call([]reflect.Value{reflect.ValueOf(&ctx.PeekingLexer)})
if err, _ := results[1].Interface().(error); err != nil {
if err == NextMatch {
return nil, nil
Expand Down
4 changes: 2 additions & 2 deletions parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ func (p *Parser[G]) ParseFromLexer(lex *lexer.PeekingLexer, options ...ParseOpti
}
}
ctx := newParseContext(lex, p.useLookahead, caseInsensitive)
defer func() { *lex = *ctx.PeekingLexer }()
defer func() { *lex = ctx.PeekingLexer }()
for _, option := range options {
option(ctx)
}
Expand Down Expand Up @@ -268,7 +268,7 @@ func (p *Parser[G]) parseInto(ctx *parseContext, parseNode node, rv reflect.Valu
}

func (p *Parser[G]) rootParseable(ctx *parseContext, parseable Parseable) error {
if err := parseable.Parse(ctx.PeekingLexer); err != nil {
if err := parseable.Parse(&ctx.PeekingLexer); err != nil {
if err == NextMatch {
err = &UnexpectedTokenError{Unexpected: *ctx.Peek()}
} else {
Expand Down

0 comments on commit 5adbb7c

Please sign in to comment.