// Copyright 2020 The OPA Authors.  All rights reserved.
// Use of this source code is governed by an Apache2
// license that can be found in the LICENSE file.

package ast

import (
	"bytes"
	"encoding/json"
	"fmt"
	"io"
	"math/big"
	"net/url"
	"regexp"
	"sort"
	"strconv"
	"strings"
	"unicode/utf8"

	"gopkg.in/yaml.v2"

	"github.com/open-policy-agent/opa/ast/internal/scanner"
	"github.com/open-policy-agent/opa/ast/internal/tokens"
	"github.com/open-policy-agent/opa/ast/location"
)

// Note: This state is kept isolated from the parser so that we
// can do efficient shallow copies of these values when doing a
// save() and restore().
type state struct {
	s         *scanner.Scanner
	lastEnd   int
	skippedNL bool
	tok       tokens.Token
	tokEnd    int
	lit       string
	loc       Location
	errors    Errors
	hints     []string
	comments  []*Comment
	wildcard  int
}

func (s *state) String() string {
	return fmt.Sprintf("<s: %v, tok: %v, lit: %q, loc: %v, errors: %d, comments: %d>", s.s, s.tok, s.lit, s.loc, len(s.errors), len(s.comments))
}

func (s *state) Loc() *location.Location {
	cpy := s.loc
	return &cpy
}

func (s *state) Text(offset, end int) []byte {
	bs := s.s.Bytes()
	if offset >= 0 && offset < len(bs) {
		if end >= offset && end <= len(bs) {
			return bs[offset:end]
		}
	}
	return nil
}

// Parser is used to parse Rego statements.
type Parser struct {
	r     io.Reader
	s     *state
	po    ParserOptions
	cache parsedTermCache
}

type parsedTermCacheItem struct {
	t      *Term
	post   *state // post is the post-state that's restored on a cache-hit
	offset int
	next   *parsedTermCacheItem
}

type parsedTermCache struct {
	m *parsedTermCacheItem
}

func (c parsedTermCache) String() string {
	s := strings.Builder{}
	s.WriteRune('{')
	var e *parsedTermCacheItem
	for e = c.m; e != nil; e = e.next {
		fmt.Fprintf(&s, "%v", e)
	}
	s.WriteRune('}')
	return s.String()
}

func (e *parsedTermCacheItem) String() string {
	return fmt.Sprintf("<%d:%v>", e.offset, e.t)
}

// ParserOptions defines the options for parsing Rego statements.
type ParserOptions struct {
	Capabilities       *Capabilities
	ProcessAnnotation  bool
	AllFutureKeywords  bool
	FutureKeywords     []string
	SkipRules          bool
	unreleasedKeywords bool // TODO(sr): cleanup
}

// NewParser creates and initializes a Parser.
func NewParser() *Parser {
	p := &Parser{
		s:  &state{},
		po: ParserOptions{},
	}
	return p
}

// WithFilename provides the filename for Location details
// on parsed statements.
func (p *Parser) WithFilename(filename string) *Parser {
	p.s.loc.File = filename
	return p
}

// WithReader provides the io.Reader that the parser will
// use as its source.
func (p *Parser) WithReader(r io.Reader) *Parser {
	p.r = r
	return p
}

// WithProcessAnnotation enables or disables the processing of
// annotations by the Parser
func (p *Parser) WithProcessAnnotation(processAnnotation bool) *Parser {
	p.po.ProcessAnnotation = processAnnotation
	return p
}

// WithFutureKeywords enables "future" keywords, i.e., keywords that can
// be imported via
//
//	import future.keywords.kw
//	import future.keywords.other
//
// but in a more direct way. The equivalent of this import would be
//
//	WithFutureKeywords("kw", "other")
func (p *Parser) WithFutureKeywords(kws ...string) *Parser {
	p.po.FutureKeywords = kws
	return p
}

// WithAllFutureKeywords enables all "future" keywords, i.e., the
// ParserOption equivalent of
//
//	import future.keywords
func (p *Parser) WithAllFutureKeywords(yes bool) *Parser {
	p.po.AllFutureKeywords = yes
	return p
}

// withUnreleasedKeywords allows using keywords that haven't surfaced
// as future keywords (see above) yet, but have tests that require
// them to be parsed
func (p *Parser) withUnreleasedKeywords(yes bool) *Parser {
	p.po.unreleasedKeywords = yes
	return p
}

// WithCapabilities sets the capabilities structure on the parser.
func (p *Parser) WithCapabilities(c *Capabilities) *Parser {
	p.po.Capabilities = c
	return p
}

// WithSkipRules instructs the parser not to attempt to parse Rule statements.
func (p *Parser) WithSkipRules(skip bool) *Parser {
	p.po.SkipRules = skip
	return p
}

func (p *Parser) parsedTermCacheLookup() (*Term, *state) {
	l := p.s.loc.Offset
	// stop comparing once the cached offsets are lower than l
	for h := p.cache.m; h != nil && h.offset >= l; h = h.next {
		if h.offset == l {
			return h.t, h.post
		}
	}
	return nil, nil
}

func (p *Parser) parsedTermCachePush(t *Term, s0 *state) {
	s1 := p.save()
	o0 := s0.loc.Offset
	entry := parsedTermCacheItem{t: t, post: s1, offset: o0}

	// find the first one whose offset is smaller than ours
	var e *parsedTermCacheItem
	for e = p.cache.m; e != nil; e = e.next {
		if e.offset < o0 {
			break
		}
	}
	entry.next = e
	p.cache.m = &entry
}

// futureParser returns a shallow copy of `p` with an empty
// cache, and a scanner that knows all future keywords.
// It's used to present hints in errors, when statements would
// only parse successfully if some future keyword is enabled.
func (p *Parser) futureParser() *Parser {
	q := *p
	q.s = p.save()
	q.s.s = p.s.s.WithKeywords(futureKeywords)
	q.cache = parsedTermCache{}
	return &q
}

// presentParser returns a shallow copy of `p` with an empty
// cache, and a scanner that knows none of the future keywords.
// It is used to successfully parse keyword imports, like
//
//	import future.keywords.in
//
// even when the parser has already been informed about the
// future keyword "in". This parser won't error out because
// "in" is an identifier.
func (p *Parser) presentParser() (*Parser, map[string]tokens.Token) {
	var cpy map[string]tokens.Token
	q := *p
	q.s = p.save()
	q.s.s, cpy = p.s.s.WithoutKeywords(futureKeywords)
	q.cache = parsedTermCache{}
	return &q, cpy
}

// Parse will read the Rego source and parse statements and
// comments as they are found. Any errors encountered while
// parsing will be accumulated and returned as a list of Errors.
func (p *Parser) Parse() ([]Statement, []*Comment, Errors) {

	if p.po.Capabilities == nil {
		p.po.Capabilities = CapabilitiesForThisVersion()
	}

	allowedFutureKeywords := map[string]tokens.Token{}

	for _, kw := range p.po.Capabilities.FutureKeywords {
		var ok bool
		allowedFutureKeywords[kw], ok = futureKeywords[kw]
		if !ok {
			return nil, nil, Errors{
				&Error{
					Code:     ParseErr,
					Message:  fmt.Sprintf("illegal capabilities: unknown keyword: %v", kw),
					Location: nil,
				},
			}
		}
	}

	var err error
	p.s.s, err = scanner.New(p.r)
	if err != nil {
		return nil, nil, Errors{
			&Error{
				Code:     ParseErr,
				Message:  err.Error(),
				Location: nil,
			},
		}
	}

	selected := map[string]tokens.Token{}
	if p.po.AllFutureKeywords {
		for kw, tok := range allowedFutureKeywords {
			selected[kw] = tok
		}
	} else {
		for _, kw := range p.po.FutureKeywords {
			tok, ok := allowedFutureKeywords[kw]
			if !ok {
				return nil, nil, Errors{
					&Error{
						Code:     ParseErr,
						Message:  fmt.Sprintf("unknown future keyword: %v", kw),
						Location: nil,
					},
				}
			}
			selected[kw] = tok
		}
	}
	p.s.s = p.s.s.WithKeywords(selected)

	// read the first token to initialize the parser
	p.scan()

	var stmts []Statement

	// Read from the scanner until the last token is reached or no statements
	// can be parsed. Attempt to parse package statements, import statements,
	// rule statements, and then body/query statements (in that order). If a
	// statement cannot be parsed, restore the parser state before trying the
	// next type of statement. If a statement can be parsed, continue from that
	// point trying to parse packages, imports, etc. in the same order.
	for p.s.tok != tokens.EOF {

		s := p.save()

		if pkg := p.parsePackage(); pkg != nil {
			stmts = append(stmts, pkg)
			continue
		} else if len(p.s.errors) > 0 {
			break
		}

		p.restore(s)
		s = p.save()

		if imp := p.parseImport(); imp != nil {
			if FutureRootDocument.Equal(imp.Path.Value.(Ref)[0]) {
				p.futureImport(imp, allowedFutureKeywords)
			}
			stmts = append(stmts, imp)
			continue
		} else if len(p.s.errors) > 0 {
			break
		}

		p.restore(s)

		if !p.po.SkipRules {
			s = p.save()

			if rules := p.parseRules(); rules != nil {
				for i := range rules {
					stmts = append(stmts, rules[i])
				}
				continue
			} else if len(p.s.errors) > 0 {
				break
			}

			p.restore(s)
		}

		if body := p.parseQuery(true, tokens.EOF); body != nil {
			stmts = append(stmts, body)
			continue
		}

		break
	}

	if p.po.ProcessAnnotation {
		stmts = p.parseAnnotations(stmts)
	}

	return stmts, p.s.comments, p.s.errors
}

func (p *Parser) parseAnnotations(stmts []Statement) []Statement {

	annotStmts, errs := parseAnnotations(p.s.comments)
	for _, err := range errs {
		p.error(err.Location, err.Message)
	}

	for _, annotStmt := range annotStmts {
		stmts = append(stmts, annotStmt)
	}

	return stmts
}

func parseAnnotations(comments []*Comment) ([]*Annotations, Errors) {

	var hint = []byte("METADATA")
	var curr *metadataParser
	var blocks []*metadataParser

	for i := 0; i < len(comments); i++ {
		if curr != nil {
			if comments[i].Location.Row == comments[i-1].Location.Row+1 && comments[i].Location.Col == 1 {
				curr.Append(comments[i])
				continue
			}
			curr = nil
		}
		if bytes.HasPrefix(bytes.TrimSpace(comments[i].Text), hint) {
			curr = newMetadataParser(comments[i].Location)
			blocks = append(blocks, curr)
		}
	}

	var stmts []*Annotations
	var errs Errors
	for _, b := range blocks {
		a, err := b.Parse()
		if err != nil {
			errs = append(errs, &Error{
				Code:     ParseErr,
				Message:  err.Error(),
				Location: b.loc,
			})
		} else {
			stmts = append(stmts, a)
		}
	}

	return stmts, errs
}

func (p *Parser) parsePackage() *Package {

	var pkg Package
	pkg.SetLoc(p.s.Loc())

	if p.s.tok != tokens.Package {
		return nil
	}

	p.scan()
	if p.s.tok != tokens.Ident {
		p.illegalToken()
		return nil
	}

	term := p.parseTerm()

	if term != nil {
		switch v := term.Value.(type) {
		case Var:
			pkg.Path = Ref{
				DefaultRootDocument.Copy().SetLocation(term.Location),
				StringTerm(string(v)).SetLocation(term.Location),
			}
		case Ref:
			pkg.Path = make(Ref, len(v)+1)
			pkg.Path[0] = DefaultRootDocument.Copy().SetLocation(v[0].Location)
			first, ok := v[0].Value.(Var)
			if !ok {
				p.errorf(v[0].Location, "unexpected %v token: expecting var", TypeName(v[0].Value))
				return nil
			}
			pkg.Path[1] = StringTerm(string(first)).SetLocation(v[0].Location)
			for i := 2; i < len(pkg.Path); i++ {
				switch v[i-1].Value.(type) {
				case String:
					pkg.Path[i] = v[i-1]
				default:
					p.errorf(v[i-1].Location, "unexpected %v token: expecting string", TypeName(v[i-1].Value))
					return nil
				}
			}
		default:
			p.illegalToken()
			return nil
		}
	}

	if pkg.Path == nil {
		if len(p.s.errors) == 0 {
			p.error(p.s.Loc(), "expected path")
		}
		return nil
	}

	return &pkg
}

func (p *Parser) parseImport() *Import {

	var imp Import
	imp.SetLoc(p.s.Loc())

	if p.s.tok != tokens.Import {
		return nil
	}

	p.scan()
	if p.s.tok != tokens.Ident {
		p.error(p.s.Loc(), "expected ident")
		return nil
	}
	q, prev := p.presentParser()
	term := q.parseTerm()
	if term != nil {
		switch v := term.Value.(type) {
		case Var:
			imp.Path = RefTerm(term).SetLocation(term.Location)
		case Ref:
			for i := 1; i < len(v); i++ {
				if _, ok := v[i].Value.(String); !ok {
					p.errorf(v[i].Location, "unexpected %v token: expecting string", TypeName(v[i].Value))
					return nil
				}
			}
			imp.Path = term
		}
	}
	// keep advanced parser state, reset known keywords
	p.s = q.s
	p.s.s = q.s.s.WithKeywords(prev)

	if imp.Path == nil {
		p.error(p.s.Loc(), "expected path")
		return nil
	}

	path := imp.Path.Value.(Ref)

	if !RootDocumentNames.Contains(path[0]) && !FutureRootDocument.Equal(path[0]) {
		p.errorf(imp.Path.Location, "unexpected import path, must begin with one of: %v, got: %v",
			RootDocumentNames.Union(NewSet(FutureRootDocument)),
			path[0])
		return nil
	}

	if p.s.tok == tokens.As {
		p.scan()

		if p.s.tok != tokens.Ident {
			p.illegal("expected var")
			return nil
		}

		if alias := p.parseTerm(); alias != nil {
			v, ok := alias.Value.(Var)
			if ok {
				imp.Alias = v
				return &imp
			}
		}
		p.illegal("expected var")
		return nil
	}

	return &imp
}

func (p *Parser) parseRules() []*Rule {

	var rule Rule
	rule.SetLoc(p.s.Loc())

	if p.s.tok == tokens.Default {
		p.scan()
		rule.Default = true
	}

	if p.s.tok != tokens.Ident {
		return nil
	}

	usesContains := false
	if rule.Head, usesContains = p.parseHead(rule.Default); rule.Head == nil {
		return nil
	}

	if rule.Default {
		if !p.validateDefaultRuleValue(&rule) {
			return nil
		}

		rule.Body = NewBody(NewExpr(BooleanTerm(true).SetLocation(rule.Location)).SetLocation(rule.Location))
		return []*Rule{&rule}
	}

	if usesContains && !rule.Head.Reference.IsGround() {
		p.error(p.s.Loc(), "multi-value rules need ground refs")
		return nil
	}

	// back-compat with `p[x] { ... }``
	hasIf := p.s.tok == tokens.If

	// p[x] if ...  becomes a single-value rule p[x]
	if hasIf && !usesContains && len(rule.Head.Ref()) == 2 {
		if rule.Head.Value == nil {
			rule.Head.Value = BooleanTerm(true).SetLocation(rule.Head.Location)
		} else {
			// p[x] = y if  becomes a single-value rule p[x] with value y, but needs name for compat
			v, ok := rule.Head.Ref()[0].Value.(Var)
			if !ok {
				return nil
			}
			rule.Head.Name = v
		}
	}

	// p[x]         becomes a multi-value rule p
	if !hasIf && !usesContains &&
		len(rule.Head.Args) == 0 && // not a function
		len(rule.Head.Ref()) == 2 { // ref like 'p[x]'
		v, ok := rule.Head.Ref()[0].Value.(Var)
		if !ok {
			return nil
		}
		rule.Head.Name = v
		rule.Head.Key = rule.Head.Ref()[1]
		if rule.Head.Value == nil {
			rule.Head.SetRef(rule.Head.Ref()[:len(rule.Head.Ref())-1])
		}
	}

	switch {
	case hasIf:
		p.scan()
		s := p.save()
		if expr := p.parseLiteral(); expr != nil {
			// NOTE(sr): set literals are never false or undefined, so parsing this as
			//  p if { true }
			//       ^^^^^^^^ set of one element, `true`
			// isn't valid.
			isSetLiteral := false
			if t, ok := expr.Terms.(*Term); ok {
				_, isSetLiteral = t.Value.(Set)
			}
			// expr.Term is []*Term or Every
			if !isSetLiteral {
				rule.Body.Append(expr)
				break
			}
		}

		// parsing as literal didn't work out, expect '{ BODY }'
		p.restore(s)
		fallthrough

	case p.s.tok == tokens.LBrace:
		p.scan()
		if rule.Body = p.parseBody(tokens.RBrace); rule.Body == nil {
			return nil
		}
		p.scan()

	case usesContains:
		rule.Body = NewBody(NewExpr(BooleanTerm(true).SetLocation(rule.Location)).SetLocation(rule.Location))
		return []*Rule{&rule}

	default:
		return nil
	}

	if p.s.tok == tokens.Else {
		if r := rule.Head.Ref(); len(r) > 1 && !r[len(r)-1].Value.IsGround() {
			p.error(p.s.Loc(), "else keyword cannot be used on rules with variables in head")
			return nil
		}
		if rule.Head.Key != nil {
			p.error(p.s.Loc(), "else keyword cannot be used on multi-value rules")
			return nil
		}

		if rule.Else = p.parseElse(rule.Head); rule.Else == nil {
			return nil
		}
	}

	rule.Location.Text = p.s.Text(rule.Location.Offset, p.s.lastEnd)

	rules := []*Rule{&rule}

	for p.s.tok == tokens.LBrace {

		if rule.Else != nil {
			p.error(p.s.Loc(), "expected else keyword")
			return nil
		}

		loc := p.s.Loc()

		p.scan()
		var next Rule

		if next.Body = p.parseBody(tokens.RBrace); next.Body == nil {
			return nil
		}
		p.scan()

		loc.Text = p.s.Text(loc.Offset, p.s.lastEnd)
		next.SetLoc(loc)

		// Chained rule head's keep the original
		// rule's head AST but have their location
		// set to the rule body.
		next.Head = rule.Head.Copy()
		for i := range next.Head.Args {
			if v, ok := next.Head.Args[i].Value.(Var); ok && v.IsWildcard() {
				next.Head.Args[i].Value = Var(p.genwildcard())
			}
		}
		setLocRecursive(next.Head, loc)

		rules = append(rules, &next)
	}

	return rules
}

func (p *Parser) parseElse(head *Head) *Rule {

	var rule Rule
	rule.SetLoc(p.s.Loc())

	rule.Head = head.Copy()
	for i := range rule.Head.Args {
		if v, ok := rule.Head.Args[i].Value.(Var); ok && v.IsWildcard() {
			rule.Head.Args[i].Value = Var(p.genwildcard())
		}
	}
	rule.Head.SetLoc(p.s.Loc())

	defer func() {
		rule.Location.Text = p.s.Text(rule.Location.Offset, p.s.lastEnd)
	}()

	p.scan()

	switch p.s.tok {
	case tokens.LBrace, tokens.If: // no value, but a body follows directly
		rule.Head.Value = BooleanTerm(true)
	case tokens.Assign, tokens.Unify:
		rule.Head.Assign = tokens.Assign == p.s.tok
		p.scan()
		rule.Head.Value = p.parseTermInfixCall()
		if rule.Head.Value == nil {
			return nil
		}
		rule.Head.Location.Text = p.s.Text(rule.Head.Location.Offset, p.s.lastEnd)
	default:
		p.illegal("expected else value term or rule body")
		return nil
	}

	hasIf := p.s.tok == tokens.If

	if hasIf {
		p.scan()
		s := p.save()
		if expr := p.parseLiteral(); expr != nil {
			// NOTE(sr): set literals are never false or undefined, so parsing this as
			//  p if false else if { true }
			//                     ^^^^^^^^ set of one element, `true`
			// isn't valid.
			isSetLiteral := false
			if t, ok := expr.Terms.(*Term); ok {
				_, isSetLiteral = t.Value.(Set)
			}
			// expr.Term is []*Term or Every
			if !isSetLiteral {
				rule.Body.Append(expr)
				setLocRecursive(rule.Body, rule.Location)
				return &rule
			}
		}
		p.restore(s)
	}

	if p.s.tok != tokens.LBrace {
		rule.Body = NewBody(NewExpr(BooleanTerm(true)))
		setLocRecursive(rule.Body, rule.Location)
		return &rule
	}

	p.scan()

	if rule.Body = p.parseBody(tokens.RBrace); rule.Body == nil {
		return nil
	}

	p.scan()

	if p.s.tok == tokens.Else {
		if rule.Else = p.parseElse(head); rule.Else == nil {
			return nil
		}
	}
	return &rule
}

func (p *Parser) parseHead(defaultRule bool) (*Head, bool) {

	head := &Head{}
	loc := p.s.Loc()
	defer func() {
		if head != nil {
			head.SetLoc(loc)
			head.Location.Text = p.s.Text(head.Location.Offset, p.s.lastEnd)
		}
	}()

	term := p.parseVar()
	if term == nil {
		return nil, false
	}

	ref := p.parseTermFinish(term, true)
	if ref == nil {
		p.illegal("expected rule head name")
		return nil, false
	}

	switch x := ref.Value.(type) {
	case Var:
		head = NewHead(x)
	case Ref:
		head = RefHead(x)
	case Call:
		op, args := x[0], x[1:]
		var ref Ref
		switch y := op.Value.(type) {
		case Var:
			ref = Ref{op}
		case Ref:
			if _, ok := y[0].Value.(Var); !ok {
				p.illegal("rule head ref %v invalid", y)
				return nil, false
			}
			ref = y
		}
		head = RefHead(ref)
		head.Args = append([]*Term{}, args...)

	default:
		return nil, false
	}

	name := head.Ref().String()

	switch p.s.tok {
	case tokens.Contains: // NOTE: no Value for `contains` heads, we return here
		// Catch error case of using 'contains' with a function definition rule head.
		if head.Args != nil {
			p.illegal("the contains keyword can only be used with multi-value rule definitions (e.g., %s contains <VALUE> { ... })", name)
		}
		p.scan()
		head.Key = p.parseTermInfixCall()
		if head.Key == nil {
			p.illegal("expected rule key term (e.g., %s contains <VALUE> { ... })", name)
		}
		return head, true

	case tokens.Unify:
		p.scan()
		head.Value = p.parseTermInfixCall()
		if head.Value == nil {
			// FIX HEAD.String()
			p.illegal("expected rule value term (e.g., %s[%s] = <VALUE> { ... })", name, head.Key)
		}
	case tokens.Assign:
		s := p.save()
		p.scan()
		head.Assign = true
		head.Value = p.parseTermInfixCall()
		if head.Value == nil {
			p.restore(s)
			switch {
			case len(head.Args) > 0:
				p.illegal("expected function value term (e.g., %s(...) := <VALUE> { ... })", name)
			case head.Key != nil:
				p.illegal("expected partial rule value term (e.g., %s[...] := <VALUE> { ... })", name)
			case defaultRule:
				p.illegal("expected default rule value term (e.g., default %s := <VALUE>)", name)
			default:
				p.illegal("expected rule value term (e.g., %s := <VALUE> { ... })", name)
			}
		}
	}

	if head.Value == nil && head.Key == nil {
		if len(head.Ref()) != 2 || len(head.Args) > 0 {
			head.Value = BooleanTerm(true).SetLocation(head.Location)
		}
	}
	return head, false
}

func (p *Parser) parseBody(end tokens.Token) Body {
	return p.parseQuery(false, end)
}

func (p *Parser) parseQuery(requireSemi bool, end tokens.Token) Body {
	body := Body{}

	if p.s.tok == end {
		p.error(p.s.Loc(), "found empty body")
		return nil
	}

	for {

		expr := p.parseLiteral()
		if expr == nil {
			return nil
		}

		body.Append(expr)

		if p.s.tok == tokens.Semicolon {
			p.scan()
			continue
		}

		if p.s.tok == end || requireSemi {
			return body
		}

		if !p.s.skippedNL {
			// If there was already an error then don't pile this one on
			if len(p.s.errors) == 0 {
				p.illegal(`expected \n or %s or %s`, tokens.Semicolon, end)
			}
			return nil
		}
	}
}

func (p *Parser) parseLiteral() (expr *Expr) {

	offset := p.s.loc.Offset
	loc := p.s.Loc()

	defer func() {
		if expr != nil {
			loc.Text = p.s.Text(offset, p.s.lastEnd)
			expr.SetLoc(loc)
		}
	}()

	var negated bool
	if p.s.tok == tokens.Not {
		p.scan()
		negated = true
	}

	switch p.s.tok {
	case tokens.Some:
		if negated {
			p.illegal("illegal negation of 'some'")
			return nil
		}
		return p.parseSome()
	case tokens.Every:
		if negated {
			p.illegal("illegal negation of 'every'")
			return nil
		}
		return p.parseEvery()
	default:
		s := p.save()
		expr := p.parseExpr()
		if expr != nil {
			expr.Negated = negated
			if p.s.tok == tokens.With {
				if expr.With = p.parseWith(); expr.With == nil {
					return nil
				}
			}
			// If we find a plain `every` identifier, attempt to parse an every expression,
			// add hint if it succeeds.
			if term, ok := expr.Terms.(*Term); ok && Var("every").Equal(term.Value) {
				var hint bool
				t := p.save()
				p.restore(s)
				if expr := p.futureParser().parseEvery(); expr != nil {
					_, hint = expr.Terms.(*Every)
				}
				p.restore(t)
				if hint {
					p.hint("`import future.keywords.every` for `every x in xs { ... }` expressions")
				}
			}
			return expr
		}
		return nil
	}
}

func (p *Parser) parseWith() []*With {

	withs := []*With{}

	for {

		with := With{
			Location: p.s.Loc(),
		}
		p.scan()

		if p.s.tok != tokens.Ident {
			p.illegal("expected ident")
			return nil
		}

		with.Target = p.parseTerm()
		if with.Target == nil {
			return nil
		}

		switch with.Target.Value.(type) {
		case Ref, Var:
			break
		default:
			p.illegal("expected with target path")
		}

		if p.s.tok != tokens.As {
			p.illegal("expected as keyword")
			return nil
		}

		p.scan()

		if with.Value = p.parseTermInfixCall(); with.Value == nil {
			return nil
		}

		with.Location.Text = p.s.Text(with.Location.Offset, p.s.lastEnd)

		withs = append(withs, &with)

		if p.s.tok != tokens.With {
			break
		}
	}

	return withs
}

func (p *Parser) parseSome() *Expr {

	decl := &SomeDecl{}
	decl.SetLoc(p.s.Loc())

	// Attempt to parse "some x in xs", which will end up in
	//   SomeDecl{Symbols: ["member(x, xs)"]}
	s := p.save()
	p.scan()
	if term := p.parseTermInfixCall(); term != nil {
		if call, ok := term.Value.(Call); ok {
			switch call[0].String() {
			case Member.Name:
				if len(call) != 3 {
					p.illegal("illegal domain")
					return nil
				}
			case MemberWithKey.Name:
				if len(call) != 4 {
					p.illegal("illegal domain")
					return nil
				}
			default:
				p.illegal("expected `x in xs` or `x, y in xs` expression")
				return nil
			}

			decl.Symbols = []*Term{term}
			expr := NewExpr(decl).SetLocation(decl.Location)
			if p.s.tok == tokens.With {
				if expr.With = p.parseWith(); expr.With == nil {
					return nil
				}
			}
			return expr
		}
	}

	p.restore(s)
	s = p.save() // new copy for later
	var hint bool
	p.scan()
	if term := p.futureParser().parseTermInfixCall(); term != nil {
		if call, ok := term.Value.(Call); ok {
			switch call[0].String() {
			case Member.Name, MemberWithKey.Name:
				hint = true
			}
		}
	}

	// go on as before, it's `some x[...]` or illegal
	p.restore(s)
	if hint {
		p.hint("`import future.keywords.in` for `some x in xs` expressions")
	}

	for { // collecting var args

		p.scan()

		if p.s.tok != tokens.Ident {
			p.illegal("expected var")
			return nil
		}

		decl.Symbols = append(decl.Symbols, p.parseVar())

		p.scan()

		if p.s.tok != tokens.Comma {
			break
		}
	}

	return NewExpr(decl).SetLocation(decl.Location)
}

func (p *Parser) parseEvery() *Expr {
	qb := &Every{}
	qb.SetLoc(p.s.Loc())

	// TODO(sr): We'd get more accurate error messages if we didn't rely on
	// parseTermInfixCall here, but parsed "var [, var] in term" manually.
	p.scan()
	term := p.parseTermInfixCall()
	if term == nil {
		return nil
	}
	call, ok := term.Value.(Call)
	if !ok {
		p.illegal("expected `x[, y] in xs { ... }` expression")
		return nil
	}
	switch call[0].String() {
	case Member.Name: // x in xs
		if len(call) != 3 {
			p.illegal("illegal domain")
			return nil
		}
		qb.Value = call[1]
		qb.Domain = call[2]
	case MemberWithKey.Name: // k, v in xs
		if len(call) != 4 {
			p.illegal("illegal domain")
			return nil
		}
		qb.Key = call[1]
		qb.Value = call[2]
		qb.Domain = call[3]
		if _, ok := qb.Key.Value.(Var); !ok {
			p.illegal("expected key to be a variable")
			return nil
		}
	default:
		p.illegal("expected `x[, y] in xs { ... }` expression")
		return nil
	}
	if _, ok := qb.Value.Value.(Var); !ok {
		p.illegal("expected value to be a variable")
		return nil
	}
	if p.s.tok == tokens.LBrace { // every x in xs { ... }
		p.scan()
		body := p.parseBody(tokens.RBrace)
		if body == nil {
			return nil
		}
		p.scan()
		qb.Body = body
		expr := NewExpr(qb).SetLocation(qb.Location)

		if p.s.tok == tokens.With {
			if expr.With = p.parseWith(); expr.With == nil {
				return nil
			}
		}
		return expr
	}

	p.illegal("missing body")
	return nil
}

func (p *Parser) parseExpr() *Expr {

	lhs := p.parseTermInfixCall()
	if lhs == nil {
		return nil
	}

	if op := p.parseTermOp(tokens.Assign, tokens.Unify); op != nil {
		if rhs := p.parseTermInfixCall(); rhs != nil {
			return NewExpr([]*Term{op, lhs, rhs})
		}
		return nil
	}

	// NOTE(tsandall): the top-level call term is converted to an expr because
	// the evaluator does not support the call term type (nested calls are
	// rewritten by the compiler.)
	if call, ok := lhs.Value.(Call); ok {
		return NewExpr([]*Term(call))
	}

	return NewExpr(lhs)
}

// parseTermInfixCall consumes the next term from the input and returns it. If a
// term cannot be parsed the return value is nil and error will be recorded. The
// scanner will be advanced to the next token before returning.
// By starting out with infix relations (==, !=, <, etc) and further calling the
// other binary operators (|, &, arithmetics), it constitutes the binding
// precedence.
func (p *Parser) parseTermInfixCall() *Term {
	return p.parseTermIn(nil, true, p.s.loc.Offset)
}

func (p *Parser) parseTermInfixCallInList() *Term {
	return p.parseTermIn(nil, false, p.s.loc.Offset)
}

func (p *Parser) parseTermIn(lhs *Term, keyVal bool, offset int) *Term {
	// NOTE(sr): `in` is a bit special: besides `lhs in rhs`, it also
	// supports `key, val in rhs`, so it can have an optional second lhs.
	// `keyVal` triggers if we attempt to parse a second lhs argument (`mhs`).
	if lhs == nil {
		lhs = p.parseTermRelation(nil, offset)
	}
	if lhs != nil {
		if keyVal && p.s.tok == tokens.Comma { // second "lhs", or "middle hand side"
			s := p.save()
			p.scan()
			if mhs := p.parseTermRelation(nil, offset); mhs != nil {
				if op := p.parseTermOpName(MemberWithKey.Ref(), tokens.In); op != nil {
					if rhs := p.parseTermRelation(nil, p.s.loc.Offset); rhs != nil {
						call := p.setLoc(CallTerm(op, lhs, mhs, rhs), lhs.Location, offset, p.s.lastEnd)
						switch p.s.tok {
						case tokens.In:
							return p.parseTermIn(call, keyVal, offset)
						default:
							return call
						}
					}
				}
			}
			p.restore(s)
		}
		if op := p.parseTermOpName(Member.Ref(), tokens.In); op != nil {
			if rhs := p.parseTermRelation(nil, p.s.loc.Offset); rhs != nil {
				call := p.setLoc(CallTerm(op, lhs, rhs), lhs.Location, offset, p.s.lastEnd)
				switch p.s.tok {
				case tokens.In:
					return p.parseTermIn(call, keyVal, offset)
				default:
					return call
				}
			}
		}
	}
	return lhs
}

func (p *Parser) parseTermRelation(lhs *Term, offset int) *Term {
	if lhs == nil {
		lhs = p.parseTermOr(nil, offset)
	}
	if lhs != nil {
		if op := p.parseTermOp(tokens.Equal, tokens.Neq, tokens.Lt, tokens.Gt, tokens.Lte, tokens.Gte); op != nil {
			if rhs := p.parseTermOr(nil, p.s.loc.Offset); rhs != nil {
				call := p.setLoc(CallTerm(op, lhs, rhs), lhs.Location, offset, p.s.lastEnd)
				switch p.s.tok {
				case tokens.Equal, tokens.Neq, tokens.Lt, tokens.Gt, tokens.Lte, tokens.Gte:
					return p.parseTermRelation(call, offset)
				default:
					return call
				}
			}
		}
	}
	return lhs
}

func (p *Parser) parseTermOr(lhs *Term, offset int) *Term {
	if lhs == nil {
		lhs = p.parseTermAnd(nil, offset)
	}
	if lhs != nil {
		if op := p.parseTermOp(tokens.Or); op != nil {
			if rhs := p.parseTermAnd(nil, p.s.loc.Offset); rhs != nil {
				call := p.setLoc(CallTerm(op, lhs, rhs), lhs.Location, offset, p.s.lastEnd)
				switch p.s.tok {
				case tokens.Or:
					return p.parseTermOr(call, offset)
				default:
					return call
				}
			}
		}
		return lhs
	}
	return nil
}

func (p *Parser) parseTermAnd(lhs *Term, offset int) *Term {
	if lhs == nil {
		lhs = p.parseTermArith(nil, offset)
	}
	if lhs != nil {
		if op := p.parseTermOp(tokens.And); op != nil {
			if rhs := p.parseTermArith(nil, p.s.loc.Offset); rhs != nil {
				call := p.setLoc(CallTerm(op, lhs, rhs), lhs.Location, offset, p.s.lastEnd)
				switch p.s.tok {
				case tokens.And:
					return p.parseTermAnd(call, offset)
				default:
					return call
				}
			}
		}
		return lhs
	}
	return nil
}

func (p *Parser) parseTermArith(lhs *Term, offset int) *Term {
	if lhs == nil {
		lhs = p.parseTermFactor(nil, offset)
	}
	if lhs != nil {
		if op := p.parseTermOp(tokens.Add, tokens.Sub); op != nil {
			if rhs := p.parseTermFactor(nil, p.s.loc.Offset); rhs != nil {
				call := p.setLoc(CallTerm(op, lhs, rhs), lhs.Location, offset, p.s.lastEnd)
				switch p.s.tok {
				case tokens.Add, tokens.Sub:
					return p.parseTermArith(call, offset)
				default:
					return call
				}
			}
		}
	}
	return lhs
}

func (p *Parser) parseTermFactor(lhs *Term, offset int) *Term {
	if lhs == nil {
		lhs = p.parseTerm()
	}
	if lhs != nil {
		if op := p.parseTermOp(tokens.Mul, tokens.Quo, tokens.Rem); op != nil {
			if rhs := p.parseTerm(); rhs != nil {
				call := p.setLoc(CallTerm(op, lhs, rhs), lhs.Location, offset, p.s.lastEnd)
				switch p.s.tok {
				case tokens.Mul, tokens.Quo, tokens.Rem:
					return p.parseTermFactor(call, offset)
				default:
					return call
				}
			}
		}
	}
	return lhs
}

func (p *Parser) parseTerm() *Term {
	if term, s := p.parsedTermCacheLookup(); s != nil {
		p.restore(s)
		return term
	}
	s0 := p.save()

	var term *Term
	switch p.s.tok {
	case tokens.Null:
		term = NullTerm().SetLocation(p.s.Loc())
	case tokens.True:
		term = BooleanTerm(true).SetLocation(p.s.Loc())
	case tokens.False:
		term = BooleanTerm(false).SetLocation(p.s.Loc())
	case tokens.Sub, tokens.Dot, tokens.Number:
		term = p.parseNumber()
	case tokens.String:
		term = p.parseString()
	case tokens.Ident, tokens.Contains: // NOTE(sr): contains anywhere BUT in rule heads gets no special treatment
		term = p.parseVar()
	case tokens.LBrack:
		term = p.parseArray()
	case tokens.LBrace:
		term = p.parseSetOrObject()
	case tokens.LParen:
		offset := p.s.loc.Offset
		p.scan()
		if r := p.parseTermInfixCall(); r != nil {
			if p.s.tok == tokens.RParen {
				r.Location.Text = p.s.Text(offset, p.s.tokEnd)
				term = r
			} else {
				p.error(p.s.Loc(), "non-terminated expression")
			}
		}
	default:
		p.illegalToken()
	}

	term = p.parseTermFinish(term, false)
	p.parsedTermCachePush(term, s0)
	return term
}

func (p *Parser) parseTermFinish(head *Term, skipws bool) *Term {
	if head == nil {
		return nil
	}
	offset := p.s.loc.Offset
	p.doScan(skipws)

	switch p.s.tok {
	case tokens.LParen, tokens.Dot, tokens.LBrack:
		return p.parseRef(head, offset)
	case tokens.Whitespace:
		p.scan()
		fallthrough
	default:
		if _, ok := head.Value.(Var); ok && RootDocumentNames.Contains(head) {
			return RefTerm(head).SetLocation(head.Location)
		}
		return head
	}
}

func (p *Parser) parseNumber() *Term {
	var prefix string
	loc := p.s.Loc()
	if p.s.tok == tokens.Sub {
		prefix = "-"
		p.scan()
		switch p.s.tok {
		case tokens.Number, tokens.Dot:
			break
		default:
			p.illegal("expected number")
			return nil
		}
	}
	if p.s.tok == tokens.Dot {
		prefix += "."
		p.scan()
		if p.s.tok != tokens.Number {
			p.illegal("expected number")
			return nil
		}
	}

	// Check for multiple leading 0's, parsed by math/big.Float.Parse as decimal 0:
	// https://golang.org/pkg/math/big/#Float.Parse
	if ((len(prefix) != 0 && prefix[0] == '-') || len(prefix) == 0) &&
		len(p.s.lit) > 1 && p.s.lit[0] == '0' && p.s.lit[1] == '0' {
		p.illegal("expected number")
		return nil
	}

	// Ensure that the number is valid
	s := prefix + p.s.lit
	f, ok := new(big.Float).SetString(s)
	if !ok {
		p.illegal("invalid float")
		return nil
	}

	// Put limit on size of exponent to prevent non-linear cost of String()
	// function on big.Float from causing denial of service: https://github.com/golang/go/issues/11068
	//
	// n == sign * mantissa * 2^exp
	// 0.5 <= mantissa < 1.0
	//
	// The limit is arbitrary.
	exp := f.MantExp(nil)
	if exp > 1e5 || exp < -1e5 || f.IsInf() { // +/- inf, exp is 0
		p.error(p.s.Loc(), "number too big")
		return nil
	}

	// Note: Use the original string, do *not* round trip from
	// the big.Float as it can cause precision loss.
	r := NumberTerm(json.Number(s)).SetLocation(loc)
	return r
}

func (p *Parser) parseString() *Term {
	if p.s.lit[0] == '"' {
		var s string
		err := json.Unmarshal([]byte(p.s.lit), &s)
		if err != nil {
			p.errorf(p.s.Loc(), "illegal string literal: %s", p.s.lit)
			return nil
		}
		term := StringTerm(s).SetLocation(p.s.Loc())
		return term
	}
	return p.parseRawString()
}

func (p *Parser) parseRawString() *Term {
	if len(p.s.lit) < 2 {
		return nil
	}
	term := StringTerm(p.s.lit[1 : len(p.s.lit)-1]).SetLocation(p.s.Loc())
	return term
}

// this is the name to use for instantiating an empty set, e.g., `set()`.
var setConstructor = RefTerm(VarTerm("set"))

func (p *Parser) parseCall(operator *Term, offset int) (term *Term) {

	loc := operator.Location
	var end int

	defer func() {
		p.setLoc(term, loc, offset, end)
	}()

	p.scan() // steps over '('

	if p.s.tok == tokens.RParen { // no args, i.e. set() or any.func()
		end = p.s.tokEnd
		p.scanWS()
		if operator.Equal(setConstructor) {
			return SetTerm()
		}
		return CallTerm(operator)
	}

	if r := p.parseTermList(tokens.RParen, []*Term{operator}); r != nil {
		end = p.s.tokEnd
		p.scanWS()
		return CallTerm(r...)
	}

	return nil
}

func (p *Parser) parseRef(head *Term, offset int) (term *Term) {

	loc := head.Location
	var end int

	defer func() {
		p.setLoc(term, loc, offset, end)
	}()

	switch h := head.Value.(type) {
	case Var, *Array, Object, Set, *ArrayComprehension, *ObjectComprehension, *SetComprehension, Call:
		// ok
	default:
		p.errorf(loc, "illegal ref (head cannot be %v)", TypeName(h))
	}

	ref := []*Term{head}

	for {
		switch p.s.tok {
		case tokens.Dot:
			p.scanWS()
			if p.s.tok != tokens.Ident {
				p.illegal("expected %v", tokens.Ident)
				return nil
			}
			ref = append(ref, StringTerm(p.s.lit).SetLocation(p.s.Loc()))
			p.scanWS()
		case tokens.LParen:
			term = p.parseCall(p.setLoc(RefTerm(ref...), loc, offset, p.s.loc.Offset), offset)
			if term != nil {
				switch p.s.tok {
				case tokens.Whitespace:
					p.scan()
					end = p.s.lastEnd
					return term
				case tokens.Dot, tokens.LBrack:
					term = p.parseRef(term, offset)
				}
			}
			end = p.s.tokEnd
			return term
		case tokens.LBrack:
			p.scan()
			if term := p.parseTermInfixCall(); term != nil {
				if p.s.tok != tokens.RBrack {
					p.illegal("expected %v", tokens.LBrack)
					return nil
				}
				ref = append(ref, term)
				p.scanWS()
			} else {
				return nil
			}
		case tokens.Whitespace:
			end = p.s.lastEnd
			p.scan()
			return RefTerm(ref...)
		default:
			end = p.s.lastEnd
			return RefTerm(ref...)
		}
	}
}

func (p *Parser) parseArray() (term *Term) {

	loc := p.s.Loc()
	offset := p.s.loc.Offset

	defer func() {
		p.setLoc(term, loc, offset, p.s.tokEnd)
	}()

	p.scan()

	if p.s.tok == tokens.RBrack {
		return ArrayTerm()
	}

	potentialComprehension := true

	// Skip leading commas, eg [, x, y]
	// Supported for backwards compatibility. In the future
	// we should make this a parse error.
	if p.s.tok == tokens.Comma {
		potentialComprehension = false
		p.scan()
	}

	s := p.save()

	// NOTE(tsandall): The parser cannot attempt a relational term here because
	// of ambiguity around comprehensions. For example, given:
	//
	//  {1 | 1}
	//
	// Does this represent a set comprehension or a set containing binary OR
	// call? We resolve the ambiguity by prioritizing comprehensions.
	head := p.parseTerm()

	if head == nil {
		return nil
	}

	switch p.s.tok {
	case tokens.RBrack:
		return ArrayTerm(head)
	case tokens.Comma:
		p.scan()
		if terms := p.parseTermList(tokens.RBrack, []*Term{head}); terms != nil {
			return NewTerm(NewArray(terms...))
		}
		return nil
	case tokens.Or:
		if potentialComprehension {
			// Try to parse as if it is an array comprehension
			p.scan()
			if body := p.parseBody(tokens.RBrack); body != nil {
				return ArrayComprehensionTerm(head, body)
			}
			if p.s.tok != tokens.Comma {
				return nil
			}
		}
		// fall back to parsing as a normal array definition
	}

	p.restore(s)

	if terms := p.parseTermList(tokens.RBrack, nil); terms != nil {
		return NewTerm(NewArray(terms...))
	}
	return nil
}

func (p *Parser) parseSetOrObject() (term *Term) {
	loc := p.s.Loc()
	offset := p.s.loc.Offset

	defer func() {
		p.setLoc(term, loc, offset, p.s.tokEnd)
	}()

	p.scan()

	if p.s.tok == tokens.RBrace {
		return ObjectTerm()
	}

	potentialComprehension := true

	// Skip leading commas, eg {, x, y}
	// Supported for backwards compatibility. In the future
	// we should make this a parse error.
	if p.s.tok == tokens.Comma {
		potentialComprehension = false
		p.scan()
	}

	s := p.save()

	// Try parsing just a single term first to give comprehensions higher
	// priority to "or" calls in ambiguous situations. Eg: { a | b }
	// will be a set comprehension.
	//
	// Note: We don't know yet if it is a set or object being defined.
	head := p.parseTerm()
	if head == nil {
		return nil
	}

	switch p.s.tok {
	case tokens.Or:
		if potentialComprehension {
			return p.parseSet(s, head, potentialComprehension)
		}
	case tokens.RBrace, tokens.Comma:
		return p.parseSet(s, head, potentialComprehension)
	case tokens.Colon:
		return p.parseObject(head, potentialComprehension)
	}

	p.restore(s)

	head = p.parseTermInfixCallInList()
	if head == nil {
		return nil
	}

	switch p.s.tok {
	case tokens.RBrace, tokens.Comma:
		return p.parseSet(s, head, false)
	case tokens.Colon:
		// It still might be an object comprehension, eg { a+1: b | ... }
		return p.parseObject(head, potentialComprehension)
	}

	p.illegal("non-terminated set")
	return nil
}

func (p *Parser) parseSet(s *state, head *Term, potentialComprehension bool) *Term {
	switch p.s.tok {
	case tokens.RBrace:
		return SetTerm(head)
	case tokens.Comma:
		p.scan()
		if terms := p.parseTermList(tokens.RBrace, []*Term{head}); terms != nil {
			return SetTerm(terms...)
		}
	case tokens.Or:
		if potentialComprehension {
			// Try to parse as if it is a set comprehension
			p.scan()
			if body := p.parseBody(tokens.RBrace); body != nil {
				return SetComprehensionTerm(head, body)
			}
			if p.s.tok != tokens.Comma {
				return nil
			}
		}
		// Fall back to parsing as normal set definition
		p.restore(s)
		if terms := p.parseTermList(tokens.RBrace, nil); terms != nil {
			return SetTerm(terms...)
		}
	}
	return nil
}

func (p *Parser) parseObject(k *Term, potentialComprehension bool) *Term {
	// NOTE(tsandall): Assumption: this function is called after parsing the key
	// of the head element and then receiving a colon token from the scanner.
	// Advance beyond the colon and attempt to parse an object.
	if p.s.tok != tokens.Colon {
		panic("expected colon")
	}
	p.scan()

	s := p.save()

	// NOTE(sr): We first try to parse the value as a term (`v`), and see
	// if we can parse `{ x: v | ...}` as a comprehension.
	// However, if we encounter either a Comma or an RBace, it cannot be
	// parsed as a comprehension -- so we save double work further down
	// where `parseObjectFinish(k, v, false)` would only exercise the
	// same code paths once more.
	v := p.parseTerm()
	if v == nil {
		return nil
	}

	potentialRelation := true
	if potentialComprehension {
		switch p.s.tok {
		case tokens.RBrace, tokens.Comma:
			potentialRelation = false
			fallthrough
		case tokens.Or:
			if term := p.parseObjectFinish(k, v, true); term != nil {
				return term
			}
		}
	}

	p.restore(s)

	if potentialRelation {
		v := p.parseTermInfixCallInList()
		if v == nil {
			return nil
		}

		switch p.s.tok {
		case tokens.RBrace, tokens.Comma:
			return p.parseObjectFinish(k, v, false)
		}
	}

	p.illegal("non-terminated object")
	return nil
}

func (p *Parser) parseObjectFinish(key, val *Term, potentialComprehension bool) *Term {
	switch p.s.tok {
	case tokens.RBrace:
		return ObjectTerm([2]*Term{key, val})
	case tokens.Or:
		if potentialComprehension {
			p.scan()
			if body := p.parseBody(tokens.RBrace); body != nil {
				return ObjectComprehensionTerm(key, val, body)
			}
		} else {
			p.illegal("non-terminated object")
		}
	case tokens.Comma:
		p.scan()
		if r := p.parseTermPairList(tokens.RBrace, [][2]*Term{{key, val}}); r != nil {
			return ObjectTerm(r...)
		}
	}
	return nil
}

func (p *Parser) parseTermList(end tokens.Token, r []*Term) []*Term {
	if p.s.tok == end {
		return r
	}
	for {
		term := p.parseTermInfixCallInList()
		if term != nil {
			r = append(r, term)
			switch p.s.tok {
			case end:
				return r
			case tokens.Comma:
				p.scan()
				if p.s.tok == end {
					return r
				}
				continue
			default:
				p.illegal(fmt.Sprintf("expected %q or %q", tokens.Comma, end))
				return nil
			}
		}
		return nil
	}
}

func (p *Parser) parseTermPairList(end tokens.Token, r [][2]*Term) [][2]*Term {
	if p.s.tok == end {
		return r
	}
	for {
		key := p.parseTermInfixCallInList()
		if key != nil {
			switch p.s.tok {
			case tokens.Colon:
				p.scan()
				if val := p.parseTermInfixCallInList(); val != nil {
					r = append(r, [2]*Term{key, val})
					switch p.s.tok {
					case end:
						return r
					case tokens.Comma:
						p.scan()
						if p.s.tok == end {
							return r
						}
						continue
					default:
						p.illegal(fmt.Sprintf("expected %q or %q", tokens.Comma, end))
						return nil
					}
				}
			default:
				p.illegal(fmt.Sprintf("expected %q", tokens.Colon))
				return nil
			}
		}
		return nil
	}
}

func (p *Parser) parseTermOp(values ...tokens.Token) *Term {
	for i := range values {
		if p.s.tok == values[i] {
			r := RefTerm(VarTerm(fmt.Sprint(p.s.tok)).SetLocation(p.s.Loc())).SetLocation(p.s.Loc())
			p.scan()
			return r
		}
	}
	return nil
}

func (p *Parser) parseTermOpName(ref Ref, values ...tokens.Token) *Term {
	for i := range values {
		if p.s.tok == values[i] {
			for _, r := range ref {
				r.SetLocation(p.s.Loc())
			}
			t := RefTerm(ref...)
			t.SetLocation(p.s.Loc())
			p.scan()
			return t
		}
	}
	return nil
}

func (p *Parser) parseVar() *Term {

	s := p.s.lit

	term := VarTerm(s).SetLocation(p.s.Loc())

	// Update wildcard values with unique identifiers
	if term.Equal(Wildcard) {
		term.Value = Var(p.genwildcard())
	}

	return term
}

func (p *Parser) genwildcard() string {
	c := p.s.wildcard
	p.s.wildcard++
	return fmt.Sprintf("%v%d", WildcardPrefix, c)
}

func (p *Parser) error(loc *location.Location, reason string) {
	p.errorf(loc, reason)
}

func (p *Parser) errorf(loc *location.Location, f string, a ...interface{}) {
	msg := strings.Builder{}
	fmt.Fprintf(&msg, f, a...)

	switch len(p.s.hints) {
	case 0: // nothing to do
	case 1:
		msg.WriteString(" (hint: ")
		msg.WriteString(p.s.hints[0])
		msg.WriteRune(')')
	default:
		msg.WriteString(" (hints: ")
		for i, h := range p.s.hints {
			if i > 0 {
				msg.WriteString(", ")
			}
			msg.WriteString(h)
		}
		msg.WriteRune(')')
	}

	p.s.errors = append(p.s.errors, &Error{
		Code:     ParseErr,
		Message:  msg.String(),
		Location: loc,
		Details:  newParserErrorDetail(p.s.s.Bytes(), loc.Offset),
	})
	p.s.hints = nil
}

func (p *Parser) hint(f string, a ...interface{}) {
	p.s.hints = append(p.s.hints, fmt.Sprintf(f, a...))
}

func (p *Parser) illegal(note string, a ...interface{}) {
	tok := p.s.tok.String()

	if p.s.tok == tokens.Illegal {
		p.errorf(p.s.Loc(), "illegal token")
		return
	}

	tokType := "token"
	if tokens.IsKeyword(p.s.tok) {
		tokType = "keyword"
	}
	if _, ok := futureKeywords[p.s.tok.String()]; ok {
		tokType = "keyword"
	}

	note = fmt.Sprintf(note, a...)
	if len(note) > 0 {
		p.errorf(p.s.Loc(), "unexpected %s %s: %s", tok, tokType, note)
	} else {
		p.errorf(p.s.Loc(), "unexpected %s %s", tok, tokType)
	}
}

func (p *Parser) illegalToken() {
	p.illegal("")
}

func (p *Parser) scan() {
	p.doScan(true)
}

func (p *Parser) scanWS() {
	p.doScan(false)
}

func (p *Parser) doScan(skipws bool) {

	// NOTE(tsandall): the last position is used to compute the "text" field for
	// complex AST nodes. Whitespace never affects the last position of an AST
	// node so do not update it when scanning.
	if p.s.tok != tokens.Whitespace {
		p.s.lastEnd = p.s.tokEnd
		p.s.skippedNL = false
	}

	var errs []scanner.Error
	for {
		var pos scanner.Position
		p.s.tok, pos, p.s.lit, errs = p.s.s.Scan()

		p.s.tokEnd = pos.End
		p.s.loc.Row = pos.Row
		p.s.loc.Col = pos.Col
		p.s.loc.Offset = pos.Offset
		p.s.loc.Text = p.s.Text(pos.Offset, pos.End)

		for _, err := range errs {
			p.error(p.s.Loc(), err.Message)
		}

		if len(errs) > 0 {
			p.s.tok = tokens.Illegal
		}

		if p.s.tok == tokens.Whitespace {
			if p.s.lit == "\n" {
				p.s.skippedNL = true
			}
			if skipws {
				continue
			}
		}

		if p.s.tok != tokens.Comment {
			break
		}

		// For backwards compatibility leave a nil
		// Text value if there is no text rather than
		// an empty string.
		var commentText []byte
		if len(p.s.lit) > 1 {
			commentText = []byte(p.s.lit[1:])
		}
		comment := NewComment(commentText)
		comment.SetLoc(p.s.Loc())
		p.s.comments = append(p.s.comments, comment)
	}
}

func (p *Parser) save() *state {
	cpy := *p.s
	s := *cpy.s
	cpy.s = &s
	return &cpy
}

func (p *Parser) restore(s *state) {
	p.s = s
}

func setLocRecursive(x interface{}, loc *location.Location) {
	NewGenericVisitor(func(x interface{}) bool {
		if node, ok := x.(Node); ok {
			node.SetLoc(loc)
		}
		return false
	}).Walk(x)
}

func (p *Parser) setLoc(term *Term, loc *location.Location, offset, end int) *Term {
	if term != nil {
		cpy := *loc
		term.Location = &cpy
		term.Location.Text = p.s.Text(offset, end)
	}
	return term
}

func (p *Parser) validateDefaultRuleValue(rule *Rule) bool {
	if rule.Head.Value == nil {
		p.error(rule.Loc(), "illegal default rule (must have a value)")
		return false
	}

	valid := true
	vis := NewGenericVisitor(func(x interface{}) bool {
		switch x.(type) {
		case *ArrayComprehension, *ObjectComprehension, *SetComprehension: // skip closures
			return true
		case Ref, Var, Call:
			p.error(rule.Loc(), fmt.Sprintf("illegal default rule (value cannot contain %v)", TypeName(x)))
			valid = false
			return true
		}
		return false
	})

	vis.Walk(rule.Head.Value.Value)
	return valid
}

// We explicitly use yaml unmarshalling, to accommodate for the '_' in 'related_resources',
// which isn't handled properly by json for some reason.
type rawAnnotation struct {
	Scope            string                 `yaml:"scope"`
	Title            string                 `yaml:"title"`
	Entrypoint       bool                   `yaml:"entrypoint"`
	Description      string                 `yaml:"description"`
	Organizations    []string               `yaml:"organizations"`
	RelatedResources []interface{}          `yaml:"related_resources"`
	Authors          []interface{}          `yaml:"authors"`
	Schemas          []rawSchemaAnnotation  `yaml:"schemas"`
	Custom           map[string]interface{} `yaml:"custom"`
}

type rawSchemaAnnotation map[string]interface{}

type metadataParser struct {
	buf      *bytes.Buffer
	comments []*Comment
	loc      *location.Location
}

func newMetadataParser(loc *Location) *metadataParser {
	return &metadataParser{loc: loc, buf: bytes.NewBuffer(nil)}
}

func (b *metadataParser) Append(c *Comment) {
	b.buf.Write(bytes.TrimPrefix(c.Text, []byte(" ")))
	b.buf.WriteByte('\n')
	b.comments = append(b.comments, c)
}

var yamlLineErrRegex = regexp.MustCompile(`^yaml:(?: unmarshal errors:[\n\s]*)? line ([[:digit:]]+):`)

func (b *metadataParser) Parse() (*Annotations, error) {

	var raw rawAnnotation

	if len(bytes.TrimSpace(b.buf.Bytes())) == 0 {
		return nil, fmt.Errorf("expected METADATA block, found whitespace")
	}

	if err := yaml.Unmarshal(b.buf.Bytes(), &raw); err != nil {
		var comment *Comment
		match := yamlLineErrRegex.FindStringSubmatch(err.Error())
		if len(match) == 2 {
			n, err2 := strconv.Atoi(match[1])
			if err2 == nil {
				index := n - 1 // line numbering is 1-based so subtract one from row
				if index >= len(b.comments) {
					comment = b.comments[len(b.comments)-1]
				} else {
					comment = b.comments[index]
				}
				b.loc = comment.Location
			}
		}
		return nil, augmentYamlError(err, b.comments)
	}

	var result Annotations
	result.comments = b.comments
	result.Scope = raw.Scope
	result.Entrypoint = raw.Entrypoint
	result.Title = raw.Title
	result.Description = raw.Description
	result.Organizations = raw.Organizations

	for _, v := range raw.RelatedResources {
		rr, err := parseRelatedResource(v)
		if err != nil {
			return nil, fmt.Errorf("invalid related-resource definition %s: %w", v, err)
		}
		result.RelatedResources = append(result.RelatedResources, rr)
	}

	for _, pair := range raw.Schemas {
		k, v := unwrapPair(pair)

		var a SchemaAnnotation
		var err error

		a.Path, err = ParseRef(k)
		if err != nil {
			return nil, fmt.Errorf("invalid document reference")
		}

		switch v := v.(type) {
		case string:
			a.Schema, err = parseSchemaRef(v)
			if err != nil {
				return nil, err
			}
		case map[interface{}]interface{}:
			w, err := convertYAMLMapKeyTypes(v, nil)
			if err != nil {
				return nil, fmt.Errorf("invalid schema definition: %w", err)
			}
			a.Definition = &w
		default:
			return nil, fmt.Errorf("invalid schema declaration for path %q", k)
		}

		result.Schemas = append(result.Schemas, &a)
	}

	for _, v := range raw.Authors {
		author, err := parseAuthor(v)
		if err != nil {
			return nil, fmt.Errorf("invalid author definition %s: %w", v, err)
		}
		result.Authors = append(result.Authors, author)
	}

	result.Custom = make(map[string]interface{})
	for k, v := range raw.Custom {
		val, err := convertYAMLMapKeyTypes(v, nil)
		if err != nil {
			return nil, err
		}
		result.Custom[k] = val
	}

	result.Location = b.loc
	return &result, nil
}

// augmentYamlError augments a YAML error with hints intended to help the user figure out the cause of an otherwise cryptic error.
// These are hints, instead of proper errors, because they are educated guesses, and aren't guaranteed to be correct.
func augmentYamlError(err error, comments []*Comment) error {
	// Adding hints for when key/value ':' separator isn't suffixed with a legal YAML space symbol
	for _, comment := range comments {
		txt := string(comment.Text)
		parts := strings.Split(txt, ":")
		if len(parts) > 1 {
			parts = parts[1:]
			var invalidSpaces []string
			for partIndex, part := range parts {
				if len(part) == 0 && partIndex == len(parts)-1 {
					invalidSpaces = []string{}
					break
				}

				r, _ := utf8.DecodeRuneInString(part)
				if r == ' ' || r == '\t' {
					invalidSpaces = []string{}
					break
				}

				invalidSpaces = append(invalidSpaces, fmt.Sprintf("%+q", r))
			}
			if len(invalidSpaces) > 0 {
				err = fmt.Errorf(
					"%s\n  Hint: on line %d, symbol(s) %v immediately following a key/value separator ':' is not a legal yaml space character",
					err.Error(), comment.Location.Row, invalidSpaces)
			}
		}
	}
	return err
}

func unwrapPair(pair map[string]interface{}) (k string, v interface{}) {
	for k, v = range pair {
	}
	return
}

var errInvalidSchemaRef = fmt.Errorf("invalid schema reference")

// NOTE(tsandall): 'schema' is not registered as a root because it's not
// supported by the compiler or evaluator today. Once we fix that, we can remove
// this function.
func parseSchemaRef(s string) (Ref, error) {

	term, err := ParseTerm(s)
	if err == nil {
		switch v := term.Value.(type) {
		case Var:
			if term.Equal(SchemaRootDocument) {
				return SchemaRootRef.Copy(), nil
			}
		case Ref:
			if v.HasPrefix(SchemaRootRef) {
				return v, nil
			}
		}
	}

	return nil, errInvalidSchemaRef
}

func parseRelatedResource(rr interface{}) (*RelatedResourceAnnotation, error) {
	rr, err := convertYAMLMapKeyTypes(rr, nil)
	if err != nil {
		return nil, err
	}

	switch rr := rr.(type) {
	case string:
		if len(rr) > 0 {
			u, err := url.Parse(rr)
			if err != nil {
				return nil, err
			}
			return &RelatedResourceAnnotation{Ref: *u}, nil
		}
		return nil, fmt.Errorf("ref URL may not be empty string")
	case map[string]interface{}:
		description := strings.TrimSpace(getSafeString(rr, "description"))
		ref := strings.TrimSpace(getSafeString(rr, "ref"))
		if len(ref) > 0 {
			u, err := url.Parse(ref)
			if err != nil {
				return nil, err
			}
			return &RelatedResourceAnnotation{Description: description, Ref: *u}, nil
		}
		return nil, fmt.Errorf("'ref' value required in object")
	}

	return nil, fmt.Errorf("invalid value type, must be string or map")
}

func parseAuthor(a interface{}) (*AuthorAnnotation, error) {
	a, err := convertYAMLMapKeyTypes(a, nil)
	if err != nil {
		return nil, err
	}

	switch a := a.(type) {
	case string:
		return parseAuthorString(a)
	case map[string]interface{}:
		name := strings.TrimSpace(getSafeString(a, "name"))
		email := strings.TrimSpace(getSafeString(a, "email"))
		if len(name) > 0 || len(email) > 0 {
			return &AuthorAnnotation{name, email}, nil
		}
		return nil, fmt.Errorf("'name' and/or 'email' values required in object")
	}

	return nil, fmt.Errorf("invalid value type, must be string or map")
}

func getSafeString(m map[string]interface{}, k string) string {
	if v, found := m[k]; found {
		if s, ok := v.(string); ok {
			return s
		}
	}
	return ""
}

const emailPrefix = "<"
const emailSuffix = ">"

// parseAuthor parses a string into an AuthorAnnotation. If the last word of the input string is enclosed within <>,
// it is extracted as the author's email. The email may not contain whitelines, as it then will be interpreted as
// multiple words.
func parseAuthorString(s string) (*AuthorAnnotation, error) {
	parts := strings.Fields(s)

	if len(parts) == 0 {
		return nil, fmt.Errorf("author is an empty string")
	}

	namePartCount := len(parts)
	trailing := parts[namePartCount-1]
	var email string
	if len(trailing) >= len(emailPrefix)+len(emailSuffix) && strings.HasPrefix(trailing, emailPrefix) &&
		strings.HasSuffix(trailing, emailSuffix) {
		email = trailing[len(emailPrefix):]
		email = email[0 : len(email)-len(emailSuffix)]
		namePartCount = namePartCount - 1
	}

	name := strings.Join(parts[0:namePartCount], " ")

	return &AuthorAnnotation{Name: name, Email: email}, nil
}

func convertYAMLMapKeyTypes(x interface{}, path []string) (interface{}, error) {
	var err error
	switch x := x.(type) {
	case map[interface{}]interface{}:
		result := make(map[string]interface{}, len(x))
		for k, v := range x {
			str, ok := k.(string)
			if !ok {
				return nil, fmt.Errorf("invalid map key type(s): %v", strings.Join(path, "/"))
			}
			result[str], err = convertYAMLMapKeyTypes(v, append(path, str))
			if err != nil {
				return nil, err
			}
		}
		return result, nil
	case []interface{}:
		for i := range x {
			x[i], err = convertYAMLMapKeyTypes(x[i], append(path, fmt.Sprintf("%d", i)))
			if err != nil {
				return nil, err
			}
		}
		return x, nil
	default:
		return x, nil
	}
}

// futureKeywords is the source of truth for future keywords that will
// eventually become standard keywords inside of Rego.
var futureKeywords = map[string]tokens.Token{
	"in":       tokens.In,
	"every":    tokens.Every,
	"contains": tokens.Contains,
	"if":       tokens.If,
}

func (p *Parser) futureImport(imp *Import, allowedFutureKeywords map[string]tokens.Token) {
	path := imp.Path.Value.(Ref)

	if len(path) == 1 || !path[1].Equal(StringTerm("keywords")) {
		p.errorf(imp.Path.Location, "invalid import, must be `future.keywords`")
		return
	}

	if imp.Alias != "" {
		p.errorf(imp.Path.Location, "`future` imports cannot be aliased")
		return
	}

	kwds := make([]string, 0, len(allowedFutureKeywords))
	for k := range allowedFutureKeywords {
		kwds = append(kwds, k)
	}

	switch len(path) {
	case 2: // all keywords imported, nothing to do
	case 3: // one keyword imported
		kw, ok := path[2].Value.(String)
		if !ok {
			p.errorf(imp.Path.Location, "invalid import, must be `future.keywords.x`, e.g. `import future.keywords.in`")
			return
		}
		keyword := string(kw)
		_, ok = allowedFutureKeywords[keyword]
		if !ok {
			sort.Strings(kwds) // so the error message is stable
			p.errorf(imp.Path.Location, "unexpected keyword, must be one of %v", kwds)
			return
		}

		kwds = []string{keyword} // overwrite
	}
	for _, kw := range kwds {
		p.s.s.AddKeyword(kw, allowedFutureKeywords[kw])
	}
}