Files
kubesphere/vendor/github.com/open-policy-agent/opa/internal/edittree/edittree.go
KubeSphere CI Bot 447a51f08b feat: kubesphere 4.0 (#6115)
* feat: kubesphere 4.0

Signed-off-by: ci-bot <ci-bot@kubesphere.io>

* feat: kubesphere 4.0

Signed-off-by: ci-bot <ci-bot@kubesphere.io>

---------

Signed-off-by: ci-bot <ci-bot@kubesphere.io>
Co-authored-by: ks-ci-bot <ks-ci-bot@example.com>
Co-authored-by: joyceliu <joyceliu@yunify.com>
2024-09-06 11:05:52 +08:00

1186 lines
38 KiB
Go

// Copyright 2023 The OPA Authors. All rights reserved.
// Use of this source code is governed by an Apache2
// license that can be found in the LICENSE file.
// Package EditTree implements a specialized tree data structure that
// allows for cheap edits and modifications of nested Term structures.
//
// # Overview
//
// The EditTree data structure exists to solve an ugly problem in Rego:
// modification/deletion of a Term can be very expensive, because we have
// to rebuild the whole Term, sans the modified/deleted parts.
//
// To work around that problem, the EditTree allows simple add, modify, or
// delete operations on Term structures, and then at the end of a series of
// edits, the caller can pay the cost of generating a new Term from the
// tree of edits relatively efficiently. (Essentially a recursive, DFS
// traversal of the tree.)
//
// The data structure preserves basic type/safety properties, the same as
// working on the real underlying Term values. To do this, recursive
// lookups are used. On average, these are fairly straightforward and
// cheap to do.
//
// Basic Operations:
// - Insert/update EditTree node
// - Delete EditTree node
// - Unfold EditTree nodes along a path
// - Render EditTree node
//
// These operations provide all of the basic utilities required to
// recursively construct the tree. Ref-based convenience functions are also
// provided, to make rendering subtrees at a particular JSON path easier.
//
// Path-Based Convenience Functions:
// - InsertAtPath
// - DeleteAtPath
// - RenderAtPath
//
// Additionally, a few "optional" (but nice to have) functions have been
// added, to allow replacing slower/less-efficient equivalents elsewhere.
//
// Optional Functions:
// - Exists (a more efficient boolean alternative to Unfold)
// - Filter (an alternative to Object.Filter that efficiently renders
// paths out of an EditTree)
//
// # Storing scalar children "inline"
//
// The original design for the EditTree allocated a new EditTree node for
// each Term stored in the tree, but this was found to be inefficient when
// dealing with large arrays and objects. The current design of the
// EditTree separates children based on their types, with scalars stored in
// a hash -> Term map, and composites stored in a hash -> EditTree map.
//
// This results in dramatically fewer heap allocations and faster access
// times for "shallow" Term structures, without penalizing nested Term
// structures noticeably.
//
// # Object operations
//
// Objects are the most straightforward composite type, as their key-value
// structure maps naturally onto trees. Their inserts/deletes are recorded
// directly in the appropriate child maps, with almost no additional
// complexity required.
//
// Object EditTree nodes use the child key and value maps, and will not
// initialize the bit-vectors, since those are only used for Arrays.
//
// # Set operations
//
// Set data types have a major problem: they're *content-addressed*. This
// means that we often have to render/materialize the sub-terms before
// carrying out inserts or deletes, in order to know if the path to the
// destination Term exists. This forces a tree collapse at the Set's
// EditTree node, and is brutally inefficient.
//
// Example:
//
// Source set: {[0], "a"}
// {"op": "add", "path": [[0], 1], "value": 1} -> result value: {[0, 1], "a"}
// {"op": "add", "path": [[0, 1], 3], "value": 3} -> result value: {[0, 1, 3], "a"}
//
// We mitigate this somewhat by only collapsing a Set when a composite
// value is being used for indexing. Scalars imply a shallow access, which
// we can look up directly in the appropriate child map.
//
// Set EditTree nodes use the child key and value maps, and will not
// initialize the bit-vectors, since those are only used for Arrays.
//
// # Array operations
//
// Arrays can have elements inserted/replaced/deleted, and this requires
// some bookkeeping work to keep everything straight. We do this
// bookkeeping work using two bit vectors to track all the
// insertions/deletions.
//
// One bit-vector tracks which indexes are preserved/eliminated from the
// original Array, and the second bit-vector tracks which indexes have
// insertions. We can record inserts and deletes *directly* on the second
// bit-vector, "bleeding through" deletions to the preserved/eliminated bit
// vector when there's not an insert to wipe out first.
//
// For bleed-through deletes, a linear scan is required to find the index
// of which original element will be knocked out. We then mark that bit in
// the preserved/eliminated bit-vector. This is a fair bit of bookkeeping,
// but greatly reduces the cost and complexity of tracking Array state.
// There can only be insertions, or original values present. Any other
// "deletion" is an error.
//
// Insert and Delete operations also imply a linear "index rewriting" pass
// for an Array's child maps, where indexes that occur above the affected
// index of the insertion/deletion must be incremented or decremented
// appropriately. This ensures that when rendered later, the
// original/inserted values will be spliced in at the correct offsets in
// the final Array value.
//
// Due to optimizations discussed later, Array EditTree nodes do not use
// the child key map (leaving it uninitialized), but will initialize and
// use the child value maps normally. Array EditTree nodes are the only
// types of EditTree nodes that should ever be expected to have initialized
// bit-vectors present.
//
// # Scalar operations
//
// Scalars are fairly simple: just a term stored in an EditTree node, or in
// the scalar child map of a composite type's EditTree node. They cannot
// have children, and normally do not exist as independent EditTree nodes,
// except to satisfy certain EditTree APIs.
//
// Scalar EditTree nodes can only be expected to have a valid Term value;
// all other fields will be left uninitialized.
//
// # Optimization: Direct Array Indexing with ints
//
// Arrays are unique in Rego, because the only valid Terms that can index
// into them are integer, numeric values. When processing the key Terms for
// Objects and Sets, we have to identify children by their hash values
// (which hash to integers). Because the only valid key Terms for Arrays
// work as ints as well, we can skip the hashing step entirely, and just
// use the int indexes *directly*.
//
// This provides a substantial CPU savings in benchmarks, because the
// "index rewriting" passes become much cheaper from not having to rehash
// every child's index.
package edittree
import (
"encoding/json"
"fmt"
"math/big"
"sort"
"strings"
"github.com/open-policy-agent/opa/ast"
"github.com/open-policy-agent/opa/internal/edittree/bitvector"
)
// Deletions are encoded with a nil value pointer.
type EditTree struct {
value *ast.Term
childKeys map[int]*ast.Term
childScalarValues map[int]*ast.Term
childCompositeValues map[int]*EditTree
eliminated *bitvector.BitVector // Which original indexes have been knocked out?
insertions *bitvector.BitVector // Which indexes have a new value inserted at them? (also used for "live" bookkeeping)
}
// Creates a new EditTree node from term.
func NewEditTree(term *ast.Term) *EditTree {
if term == nil {
return nil
}
var tree EditTree
switch x := term.Value.(type) {
case ast.Object, ast.Set:
tree = EditTree{
value: term,
childKeys: map[int]*ast.Term{},
childScalarValues: map[int]*ast.Term{},
childCompositeValues: map[int]*EditTree{},
}
case *ast.Array:
tree = EditTree{
value: term,
childScalarValues: map[int]*ast.Term{},
childCompositeValues: map[int]*EditTree{},
}
bytesLength := ((x.Len() - 1) / 8) + 1 // How many bytes to use for the bit-vectors.
tree.eliminated = bitvector.NewBitVector(make([]byte, bytesLength), x.Len())
tree.insertions = bitvector.NewBitVector(make([]byte, bytesLength), x.Len())
default:
tree = EditTree{
value: term,
}
}
return &tree
}
// Returns correct (collision-resolved) hash for this term + whether or not
// it was found in the table already.
func (e *EditTree) getKeyHash(key *ast.Term) (int, bool) {
hash := key.Hash()
// This `equal` utility is duplicated and manually inlined a number of
// time in this file. Inlining it avoids heap allocations, so it makes
// a big performance difference: some operations like lookup become twice
// as slow without it.
var equal func(v ast.Value) bool
switch x := key.Value.(type) {
case ast.Null, ast.Boolean, ast.String, ast.Var:
equal = func(y ast.Value) bool { return x == y }
case ast.Number:
if xi, err := json.Number(x).Int64(); err == nil {
equal = func(y ast.Value) bool {
if y, ok := y.(ast.Number); ok {
if yi, err := json.Number(y).Int64(); err == nil {
return xi == yi
}
}
return false
}
break
}
// We use big.Rat for comparing big numbers.
// It replaces big.Float due to following reason:
// big.Float comes with a default precision of 64, and setting a
// larger precision results in more memory being allocated
// (regardless of the actual number we are parsing with SetString).
//
// Note: If we're so close to zero that big.Float says we are zero, do
// *not* big.Rat).SetString on the original string it'll potentially
// take very long.
var a *big.Rat
fa, ok := new(big.Float).SetString(string(x))
if !ok {
panic("illegal value")
}
if fa.IsInt() {
if i, _ := fa.Int64(); i == 0 {
a = new(big.Rat).SetInt64(0)
}
}
if a == nil {
a, ok = new(big.Rat).SetString(string(x))
if !ok {
panic("illegal value")
}
}
equal = func(b ast.Value) bool {
if bNum, ok := b.(ast.Number); ok {
var b *big.Rat
fb, ok := new(big.Float).SetString(string(bNum))
if !ok {
panic("illegal value")
}
if fb.IsInt() {
if i, _ := fb.Int64(); i == 0 {
b = new(big.Rat).SetInt64(0)
}
}
if b == nil {
b, ok = new(big.Rat).SetString(string(bNum))
if !ok {
panic("illegal value")
}
}
return a.Cmp(b) == 0
}
return false
}
default:
equal = func(y ast.Value) bool { return ast.Compare(x, y) == 0 }
}
// Look through childKeys, looking up the original hash
// value first, and then use linear-probing to iter
// through the keys until we either find the Term we're
// after, or run out of candidates.
for curr, ok := e.childKeys[hash]; ok; {
if equal(curr.Value) {
return hash, true
}
hash++
curr, ok = e.childKeys[hash]
}
// Didn't find any matches in childKeys. Hash will be
// the first open slot after the linear probing loop.
return hash, false
}
//gcassert:inline
func isComposite(t *ast.Term) bool {
switch t.Value.(type) {
case ast.Object, ast.Set, *ast.Array:
return true
default:
return false
}
}
//gcassert:inline
func (e *EditTree) setChildKey(hash int, key *ast.Term) {
e.childKeys[hash] = key
}
//gcassert:inline
func (e *EditTree) setChildScalarValue(hash int, value *ast.Term) {
e.childScalarValues[hash] = value
}
//gcassert:inline
func (e *EditTree) setChildCompositeValue(hash int, child *EditTree) {
e.childCompositeValues[hash] = child
}
// We don't have a deleteChildKeys method, because once a key is inserted,
// it can only be replaced with either another value, or a delete node from
// then on.
//
//gcassert:inline
func (e *EditTree) deleteChildValue(hash int) {
delete(e.childScalarValues, hash)
delete(e.childCompositeValues, hash)
}
// Insert creates a new child of e, and returns the new child EditTree node.
func (e *EditTree) Insert(key, value *ast.Term) (*EditTree, error) {
if e.value == nil {
return nil, fmt.Errorf("deleted node encountered during insert operation")
}
if key == nil {
return nil, fmt.Errorf("nil key provided for insert operation")
}
if value == nil {
return nil, fmt.Errorf("nil value provided for insert operation")
}
switch x := e.value.Value.(type) {
case ast.Object:
return e.unsafeInsertObject(key, value), nil
case ast.Set:
if !key.Equal(value) {
return nil, fmt.Errorf("set key %v does not equal value to be inserted %v", key, value)
}
// We only collapse this Set-typed node if a composite type is involved.
if isComposite(key) {
// TODO: Investigate re-rendering *only* the immediate composite children.
collapsed := e.Render()
e.value = collapsed
e.childKeys = map[int]*ast.Term{}
e.childScalarValues = map[int]*ast.Term{}
e.childCompositeValues = map[int]*EditTree{}
}
return e.unsafeInsertSet(key, value), nil
case *ast.Array:
idx, err := toIndex(e.insertions.Length(), key)
if err != nil {
return nil, err
}
if idx < 0 || idx > e.insertions.Length() {
return nil, fmt.Errorf("index for array insertion out of bounds")
}
return e.unsafeInsertArray(idx, value), nil
default:
// Catch all primitive types.
return nil, fmt.Errorf("expected composite type, found value: %v (type: %T)", x, x)
}
}
func (e *EditTree) unsafeInsertObject(key, value *ast.Term) *EditTree {
child := NewEditTree(value)
keyHash, found := e.getKeyHash(key)
if found {
e.deleteChildValue(keyHash)
}
e.setChildKey(keyHash, key)
if isComposite(value) {
e.setChildCompositeValue(keyHash, child)
} else {
e.setChildScalarValue(keyHash, value)
}
return child
}
func (e *EditTree) unsafeInsertSet(key, value *ast.Term) *EditTree {
child := NewEditTree(value)
keyHash, found := e.getKeyHash(key)
if found {
e.deleteChildValue(keyHash)
}
e.setChildKey(keyHash, key)
if isComposite(value) {
e.setChildCompositeValue(keyHash, child)
} else {
e.setChildScalarValue(keyHash, value)
}
return child
}
func (e *EditTree) unsafeInsertArray(idx int, value *ast.Term) *EditTree {
child := NewEditTree(value)
// Collect insertion indexes above the insertion site for rewriting.
rewritesScalars := []int{}
rewritesComposites := []int{}
for i := idx; i < e.insertions.Length(); i++ {
if e.insertions.Element(i) == 1 {
if _, ok := e.childScalarValues[i]; ok {
rewritesScalars = append(rewritesScalars, i)
continue
}
if _, ok := e.childCompositeValues[i]; ok {
rewritesComposites = append(rewritesComposites, i)
continue
}
panic(fmt.Errorf("invalid index %d during Insert operation", i))
}
}
// Do rewrites in reverse order to make room for the newly-inserted element.
for i := len(rewritesScalars) - 1; i >= 0; i-- {
originalIdx := rewritesScalars[i]
rewriteIdx := rewritesScalars[i] + 1
v := e.childScalarValues[originalIdx]
e.deleteChildValue(originalIdx)
e.setChildScalarValue(rewriteIdx, v)
}
for i := len(rewritesComposites) - 1; i >= 0; i-- {
originalIdx := rewritesComposites[i]
rewriteIdx := rewritesComposites[i] + 1
v := e.childCompositeValues[originalIdx]
e.deleteChildValue(originalIdx)
e.setChildCompositeValue(rewriteIdx, v)
}
// Insert new element in children array, bump bit-vector over by 1.
if idx == e.insertions.Length() {
e.insertions.Append(1)
} else {
e.insertions.Insert(1, idx)
}
if isComposite(value) {
e.setChildCompositeValue(idx, child)
} else {
e.setChildScalarValue(idx, value)
}
return child
}
// Delete removes a child of e, or else creates a delete node for a term
// already present in e. It then returns the deleted child EditTree node.
func (e *EditTree) Delete(key *ast.Term) (*EditTree, error) {
if e.value == nil {
return nil, fmt.Errorf("deleted node encountered during delete operation")
}
if key == nil {
return nil, fmt.Errorf("nil key provided for delete operation")
}
switch e.value.Value.(type) {
case ast.Object:
keyHash, found := e.getKeyHash(key)
// If child found, replace with delete node. If delete node already existed, error.
if found {
if child, ok := e.childScalarValues[keyHash]; ok {
if child == nil {
return nil, fmt.Errorf("cannot delete the already deleted scalar node for key %v", key)
}
e.setChildKey(keyHash, key)
e.setChildScalarValue(keyHash, nil)
return NewEditTree(child), nil
}
if child, ok := e.childCompositeValues[keyHash]; ok {
if child == nil {
return nil, fmt.Errorf("cannot delete the already deleted composite node for key %v", key)
}
e.setChildKey(keyHash, key)
e.setChildCompositeValue(keyHash, nil)
return child, nil
}
// Note(philipc): We panic here, because the only way to reach
// this panic is to have broken the bookkeeping around the key
// and child maps in a way that is not recoverable.
// For example, if we have an Object EditTree node, and mess up
// the bookkeeping elsewhere by deleting just the value from
// the child maps, *without* also deleting the key from the key
// map, we would reach this place, where the data structure
// *expects* a value to exist, but nothing is present.
panic(fmt.Errorf("hash value %d not found in scalar or composite child maps", keyHash))
}
// No child, lookup the key in e.value, and put in a delete if present.
// Error if key does not exist in e.value.
return e.fallbackDelete(key)
case ast.Set:
// We only collapse this Set-typed node if a composite type is involved.
if isComposite(key) {
// TODO: Investigate re-rendering *only* the immediate composite children.
collapsed := e.Render()
e.value = collapsed
e.childKeys = map[int]*ast.Term{}
e.childScalarValues = map[int]*ast.Term{}
e.childCompositeValues = map[int]*EditTree{}
} else {
keyHash, found := e.getKeyHash(key)
// If child found, replace with delete node. If delete node already existed, error.
if found {
if child, ok := e.childScalarValues[keyHash]; ok {
if child == nil {
return nil, fmt.Errorf("cannot delete the already deleted scalar node for key %v", key)
}
if key.Equal(child) {
e.setChildKey(keyHash, key)
e.setChildScalarValue(keyHash, nil)
return NewEditTree(child), nil
}
}
}
}
// No child, lookup the key in e.value, and put in a delete if present.
// Error if key does not exist in e.value.
return e.fallbackDelete(key)
case *ast.Array:
idx, err := toIndex(e.insertions.Length(), key)
if err != nil {
return nil, err
}
if idx < 0 || idx > e.insertions.Length()-1 {
return nil, fmt.Errorf("index for array delete out of bounds")
}
// Collect insertion indexes above the delete site for rewriting.
rewritesScalars := []int{}
rewritesComposites := []int{}
for i := idx + 1; i < e.insertions.Length(); i++ {
if e.insertions.Element(i) == 1 {
if _, ok := e.childScalarValues[i]; ok {
rewritesScalars = append(rewritesScalars, i)
continue
}
if _, ok := e.childCompositeValues[i]; ok {
rewritesComposites = append(rewritesComposites, i)
continue
}
panic(fmt.Errorf("invalid index %d during Insert operation", i))
}
}
// Do rewrites to clear out the newly-removed element.
e.deleteChildValue(idx)
for i := 0; i < len(rewritesScalars); i++ {
originalIdx := rewritesScalars[i]
rewriteIdx := rewritesScalars[i] - 1
v := e.childScalarValues[originalIdx]
e.deleteChildValue(originalIdx)
e.setChildScalarValue(rewriteIdx, v)
}
for i := 0; i < len(rewritesComposites); i++ {
originalIdx := rewritesComposites[i]
rewriteIdx := rewritesComposites[i] - 1
v := e.childCompositeValues[originalIdx]
e.deleteChildValue(originalIdx)
e.setChildCompositeValue(rewriteIdx, v)
}
// "bleed through" to the underlying array if needed.
// To do this, we sum up the zeroes below the current index, and use that value
// to index through the `eliminated` array until we find a surviving index.
if e.insertions.Element(idx) == 0 {
zeroesSeen := 1 + sumZeroesBelowIndex(idx, e.insertions)
// Mark appropriate position in `eliminated` array, or error.
elimIdx, found := findIndexOfNthZero(zeroesSeen, e.eliminated)
if !found {
panic(fmt.Errorf("could not successfully eliminate index %d from array", idx))
}
e.eliminated.Set(1, elimIdx)
}
// Delete element from insertions array, bump bit-vec over by 1.
e.insertions.Delete(idx)
return e, nil
default:
// Catch all primitive types.
return nil, fmt.Errorf("expected composite type, found value: %v (type: %T)", e.value.Value, e.value.Value)
}
}
//gcassert:inline
func sumZeroesBelowIndex(index int, bv *bitvector.BitVector) int {
zeroesSeen := 0
for i := 0; i < index; i++ {
if bv.Element(i) == 0 {
zeroesSeen++
}
}
return zeroesSeen
}
func findIndexOfNthZero(n int, bv *bitvector.BitVector) (int, bool) {
zeroesSeen := 0
for i := 0; i < bv.Length(); i++ {
if bv.Element(i) == 0 {
zeroesSeen++
}
if zeroesSeen == n {
return i, true
}
}
return 0, false
}
// Helper function for sets/objects when the key isn't present in either
// child map.
func (e *EditTree) fallbackDelete(key *ast.Term) (*EditTree, error) {
value, err := e.value.Value.Find(ast.Ref{key})
if err != nil {
return nil, fmt.Errorf("cannot delete child key %v that does not exist", key)
}
keyHash, _ := e.getKeyHash(key)
e.setChildKey(keyHash, key)
if isComposite(ast.NewTerm(value)) {
e.setChildCompositeValue(keyHash, nil)
} else {
e.setChildScalarValue(keyHash, nil)
}
return NewEditTree(ast.NewTerm(value)), nil
}
// Unfurls a chain of EditTree nodes down a given path, or else returns an error.
func (e *EditTree) Unfold(path ast.Ref) (*EditTree, error) {
// 0 path segments base case. (Root hits this.)
if len(path) == 0 {
return e, nil
}
// 1+ path segment case.
if e.value == nil {
return nil, fmt.Errorf("nil value encountered where composite value was expected")
}
// Switch behavior based on types.
key := path[0]
switch x := e.value.Value.(type) {
case ast.Object:
keyHash, found := e.getKeyHash(key)
if found {
if term, ok := e.childScalarValues[keyHash]; ok {
if term == nil {
return nil, fmt.Errorf("cannot unfold the already deleted scalar node for key %v", key)
}
child := NewEditTree(term)
return child.Unfold(path[1:])
}
if child, ok := e.childCompositeValues[keyHash]; ok {
if child == nil {
return nil, fmt.Errorf("cannot unfold the already deleted composite node for key %v", key)
}
return child.Unfold(path[1:])
}
// Note(philipc): We panic here, because the only way to reach
// this panic is to have broken the bookkeeping around the key
// and child maps in a way that is not recoverable.
// For example, if we have an Object EditTree node, and mess up
// the bookkeeping elsewhere by deleting just the value from
// the child maps, *without* also deleting the key from the key
// map, we would reach this place, where the data structure
// *expects* a value to exist, but nothing is present.
panic(fmt.Errorf("hash value %d not found in scalar or composite child maps", keyHash))
}
// Fall back to looking up the key in e.value.
// Extend the tree if key is present. Error otherwise.
if v, err := x.Find(ast.Ref{path[0]}); err == nil {
child, err := e.Insert(path[0], ast.NewTerm(v))
if err != nil {
return nil, err
}
return child.Unfold(path[1:])
}
return nil, fmt.Errorf("path %v does not exist in object term %v", ast.Ref{path[0]}, e.value.Value)
case ast.Set:
// Sets' keys *are* their values, so in order to allow accurate
// traversal, we have to collapse the tree beneath this node,
// so that we can accurately unfold it again for an update,
// once we know that the key we care about is present.
if isComposite(key) {
collapsed := e.Render()
e.value = collapsed
e.childKeys = map[int]*ast.Term{}
e.childScalarValues = map[int]*ast.Term{}
e.childCompositeValues = map[int]*EditTree{}
} else {
keyHash, found := e.getKeyHash(key)
if found {
if term, ok := e.childScalarValues[keyHash]; ok {
child := NewEditTree(term)
return child.Unfold(path[1:])
}
}
}
// Fall back to looking up the key in e.value.
// Extend the tree if key is present. Error otherwise.
if v, err := e.value.Value.Find(ast.Ref{path[0]}); err == nil {
child, err := e.Insert(path[0], ast.NewTerm(v))
if err != nil {
return nil, err
}
return child.Unfold(path[1:])
}
return nil, fmt.Errorf("path %v does not exist in set term %v", ast.Ref{path[0]}, e.value.Value)
case *ast.Array:
idx, err := toIndex(e.insertions.Length(), path[0])
if err != nil {
return nil, err
}
if term, ok := e.childScalarValues[idx]; ok {
child := NewEditTree(term)
return child.Unfold(path[1:])
}
if child, ok := e.childCompositeValues[idx]; ok {
return child.Unfold(path[1:])
}
// Fall back to looking up the key in e.value.
// Extend the tree if key is present. Error otherwise.
if v, err := x.Find(ast.Ref{ast.IntNumberTerm(idx)}); err == nil {
// TODO: Consider a more efficient "Replace" function that special-cases this for arrays instead?
_, err := e.Delete(ast.IntNumberTerm(idx))
if err != nil {
return nil, err
}
child, err := e.Insert(ast.IntNumberTerm(idx), ast.NewTerm(v))
if err != nil {
return nil, err
}
return child.Unfold(path[1:])
}
return nil, fmt.Errorf("path %v does not exist in array term %v", ast.Ref{ast.IntNumberTerm(idx)}, e.value.Value)
default:
// Catch all primitive types.
return nil, fmt.Errorf("expected composite type for path %v, found value: %v (type: %T)", ast.Ref{path[0]}, x, x)
}
}
// Render generates the effective value for the term at e by recursively
// rendering the children of e, and then copying over any leftover keys
// from the original term stored at e.
func (e *EditTree) Render() *ast.Term {
if e.value == nil {
return nil
}
switch x := e.value.Value.(type) {
case ast.Object:
// Early exit if no modifications.
if len(e.childKeys) == 0 {
return e.value
}
// Build a new Object with modified/deleted keys.
// We do this by adding the modified/deleted keys first, then
// skipping those keys later when iterating over the original base
// term in e.value.
skipKeysList := make([]*ast.Term, 0, len(e.childKeys))
out := make([][2]*ast.Term, 0, len(e.childKeys)+x.Len())
for hash, term := range e.childScalarValues {
skipKeysList = append(skipKeysList, e.childKeys[hash])
if term == nil {
continue // Delete case.
}
// Normal value case.
out = append(out, [2]*ast.Term{e.childKeys[hash], term})
}
for hash, child := range e.childCompositeValues {
skipKeysList = append(skipKeysList, e.childKeys[hash])
if child == nil {
continue // Delete case.
}
// Normal value case.
subtreeResult := child.Render()
out = append(out, [2]*ast.Term{e.childKeys[hash], subtreeResult})
}
skipKeys := ast.NewSet(skipKeysList...)
// Copy over all keys that weren't deleted/modified.
x.Foreach(func(k, v *ast.Term) {
if skipKeys.Contains(k) {
return
}
out = append(out, [2]*ast.Term{k, v})
})
return ast.ObjectTerm(out...)
case ast.Set:
// Early exit if no modifications.
if len(e.childKeys) == 0 {
return e.value
}
// Build a new Set.
// Sets only can have deletions/new insertions, because the value
// *is* its own key.
skipKeysList := make([]*ast.Term, 0, len(e.childKeys))
out := make([]*ast.Term, 0, x.Len()+len(e.childKeys))
for hash, term := range e.childScalarValues {
skipKeysList = append(skipKeysList, e.childKeys[hash])
if term == nil {
continue // Delete case.
}
// Normal value case.
out = append(out, e.childScalarValues[hash])
}
// Only happens when this set hasn't been collapsed yet.
for hash, child := range e.childCompositeValues {
skipKeysList = append(skipKeysList, e.childKeys[hash])
if child == nil {
continue // Delete case.
}
// Normal value case.
subtreeResult := child.Render()
out = append(out, subtreeResult)
}
skipKeys := ast.NewSet(skipKeysList...)
// Copy over all keys that weren't deleted/modified.
x.Foreach(func(key *ast.Term) {
if skipKeys.Contains(key) {
return
}
out = append(out, key)
})
return ast.SetTerm(out...)
case *ast.Array:
// No early exit here, because we might have just deletes on the
// original array. We build a new Array with modified/deleted keys.
out := make([]*ast.Term, 0, e.insertions.Length())
eIdx := 0
for i := 0; i < e.insertions.Length(); i++ {
// If the index == 0, that indicates we should look up the next
// surviving original element.
// If the index == 1, that indicates we should look up that
// index in the child maps.
if e.insertions.Element(i) == 0 {
// Scan through the e.eliminated bit-vec, pick the
// first non-zero index. The then use that index to
// look up the element we should append from e.value.
foundIdx := false
for j := eIdx; j < e.eliminated.Length(); j++ {
if e.eliminated.Element(j) == 0 {
foundIdx = true
eIdx = j
break
}
}
if !foundIdx {
panic(fmt.Errorf("too many eliminated indexes in array, expected to find uneliminated index %d", i))
}
// Append element from original term.
out = append(out, x.Elem(eIdx))
eIdx++ // Bump the counter, so that we monotonically advance through the array.
} else {
// Append value from rendered child index.
// Since deletions are not possible as children for an Array,
// we don't need to check for nils here.
if t, ok := e.childScalarValues[i]; ok {
out = append(out, t)
} else if child, ok := e.childCompositeValues[i]; ok {
t := child.Render()
out = append(out, t)
} else {
panic(fmt.Errorf("invalid index %d does not exist in array", i))
}
}
}
return ast.ArrayTerm(out...)
default:
return e.value
}
}
// InsertAtPath traverses down the tree from e and uses the last path
// segment as the key to insert value into the tree.
// Returns the inserted EditTree node.
func (e *EditTree) InsertAtPath(path ast.Ref, value *ast.Term) (*EditTree, error) {
if value == nil {
return nil, fmt.Errorf("cannot insert nil value into EditTree")
}
if len(path) == 0 {
e.value = value
e.childKeys = map[int]*ast.Term{}
e.childScalarValues = map[int]*ast.Term{}
e.childCompositeValues = map[int]*EditTree{}
if v, ok := value.Value.(*ast.Array); ok {
bytesLength := ((v.Len() - 1) / 8) + 1 // How many bytes to use for the bit-vectors.
e.eliminated = bitvector.NewBitVector(make([]byte, bytesLength), v.Len())
e.insertions = bitvector.NewBitVector(make([]byte, bytesLength), v.Len())
}
return e, nil
}
dest, err := e.Unfold(path[:len(path)-1])
if err != nil {
return nil, err
}
return dest.Insert(path[len(path)-1], value)
}
// DeleteAtPath traverses down the tree from e and uses the last path
// segment as the key to delete a node from the tree.
// Returns the deleted EditTree node.
func (e *EditTree) DeleteAtPath(path ast.Ref) (*EditTree, error) {
// Root document case:
if len(path) == 0 {
if e.value == nil {
return nil, fmt.Errorf("deleted node encountered during delete operation")
}
e.value = nil
e.childKeys = nil
e.childScalarValues = nil
e.childCompositeValues = nil
e.eliminated = nil
e.insertions = nil
return e, nil
}
dest, err := e.Unfold(path[:len(path)-1])
if err != nil {
return nil, err
}
return dest.Delete(path[len(path)-1])
}
// RenderAtPath traverses down the tree from e and renders the EditTree
// node at the end of path.
func (e *EditTree) RenderAtPath(path ast.Ref) (*ast.Term, error) {
dest, err := e.Unfold(path)
if err != nil {
return nil, err
}
return dest.Render(), nil
}
func (e *EditTree) String() string {
if t := e.Render(); t != nil {
return "EditTree[" + t.String() + "]"
}
return ""
}
func (e *EditTree) Exists(path ast.Ref) bool {
if e.value == nil {
return false
}
switch {
// 0 path segments base case. (Root hits this.)
case len(path) == 0:
return true
// 1+ path segments case.
case len(path) >= 1:
// Switch behavior based on types.
key := path[0]
switch x := e.value.Value.(type) {
case ast.Object:
keyHash, found := e.getKeyHash(key)
if found {
if term, ok := e.childScalarValues[keyHash]; ok {
if term == nil {
return false
}
return len(path) == 1
}
if child, ok := e.childCompositeValues[keyHash]; ok {
if child == nil {
return false
}
return child.Exists(path[1:])
}
// Note(philipc): We panic here, because the only way to reach
// this panic is to have broken the bookkeeping around the key
// and child maps in a way that is not recoverable.
// For example, if we have an Object EditTree node, and mess up
// the bookkeeping elsewhere by deleting just the value from
// the child maps, *without* also deleting the key from the key
// map, we would reach this place, where the data structure
// *expects* a value to exist, but nothing is present.
panic(fmt.Errorf("hash value %d not found in scalar or composite child maps", keyHash))
}
// Fallback if child lookup failed.
_, err := x.Find(path)
return err == nil
case ast.Set:
// Sets' keys *are* their values, so in order to allow accurate
// traversal, we have to collapse the tree beneath this node,
// so that we can accurately unfold it again for an update,
// once we know that the key we care about is present.
if isComposite(key) {
collapsed := e.Render()
e.value = collapsed
e.childKeys = map[int]*ast.Term{}
e.childScalarValues = map[int]*ast.Term{}
e.childCompositeValues = map[int]*EditTree{}
} else {
keyHash, found := e.getKeyHash(key)
if found {
if _, ok := e.childScalarValues[keyHash]; ok {
return len(path) == 1
}
}
}
// Fallback if child lookup failed.
_, err := e.value.Value.Find(path)
return err == nil
case *ast.Array:
var idx int
idx, err := toIndex(e.insertions.Length(), path[0])
if err != nil {
return false
}
if _, ok := e.childScalarValues[idx]; ok {
return len(path) == 1
}
if child, ok := e.childCompositeValues[idx]; ok {
return child.Exists(path[1:])
}
// Fallback if child lookup failed.
// We have to ensure that the lookup term is a number here, or Find will fail.
k := ast.Ref{ast.IntNumberTerm(idx)}.Concat(path[1:])
_, err = x.Find(k)
return err == nil
default:
// Catch all primitive types.
return false
}
}
return false
}
// --------------------------------------------------------------------
// Utility functions
// toIndex tries to convert path elements (that may be strings) into indexes
// into an array.
func toIndex(arrayLength int, term *ast.Term) (int, error) {
i := 0
var ok bool
switch v := term.Value.(type) {
case ast.Number:
if i, ok = v.Int(); !ok {
return 0, fmt.Errorf("invalid number type for indexing")
}
case ast.String:
if v == "-" {
return arrayLength, nil
}
num := ast.Number(v)
if i, ok = num.Int(); !ok {
return 0, fmt.Errorf("invalid string for indexing")
}
if v != "0" && strings.HasPrefix(string(v), "0") {
return 0, fmt.Errorf("leading zeros are not allowed in JSON paths")
}
default:
return 0, fmt.Errorf("invalid type for indexing")
}
return i, nil
}
// --------------------------------------------------------------------
// Term-level utility functions
// Filter pulls out only the values selected by paths.
// This is done recursively by plucking off one level from the paths each time we descend a level.
// Note: Values pulled from arrays will have the same approximate
// ordering in the final term.
func (e *EditTree) Filter(paths []ast.Ref) *ast.Term {
if e.value == nil {
return nil
}
// Separate out keys for this level.
// In the event of paths like "a", "a/b", "a/b/c", the "a" path will win out.
// Nil keys, such as "" or [], are not permitted. (legacy behavior)
pathMap := make(map[ast.Value][]ast.Ref, len(paths))
renderNowList := []*ast.Term{}
for i := range paths {
path := paths[i]
switch {
case len(path) == 0:
continue // ignore nil paths, such as "" and [].
case len(path) == 1:
renderNowList = append(renderNowList, path[0])
default: // len(path) > 1
if _, ok := pathMap[path[0].Value]; !ok {
pathMap[path[0].Value] = []ast.Ref{}
}
pathMap[path[0].Value] = append(pathMap[path[0].Value], path[1:])
}
}
renderNow := ast.NewSet(renderNowList...)
// Clear everything out of the pathMap that has a renderNow candidate.
for k := range pathMap {
if renderNow.Contains(ast.NewTerm(k)) {
delete(pathMap, k)
}
}
// Now that we've reached the target, we can start rendering everything beneath us in the tree.
switch e.value.Value.(type) {
case ast.Object:
out := make([][2]*ast.Term, 0, renderNow.Len()+len(pathMap))
// Render any finished paths.
renderNow.Foreach(func(k *ast.Term) {
if e.Exists(ast.Ref{k}) {
subtreeResult, _ := e.RenderAtPath(ast.Ref{k})
out = append(out, [2]*ast.Term{k, subtreeResult})
}
})
// Recursively descend remaining paths.
for k, p := range pathMap {
if e.Exists(ast.Ref{ast.NewTerm(k)}) {
child, _ := e.Unfold(ast.Ref{ast.NewTerm(k)})
subtreeResult := child.Filter(p)
out = append(out, [2]*ast.Term{ast.NewTerm(k), subtreeResult})
}
}
return ast.ObjectTerm(out...)
case ast.Set:
out := make([]*ast.Term, 0, renderNow.Len()+len(pathMap))
// Render any finished paths.
renderNow.Foreach(func(k *ast.Term) {
if e.Exists(ast.Ref{k}) {
subtreeResult, _ := e.RenderAtPath(ast.Ref{k})
out = append(out, subtreeResult)
}
})
// Recursively descend remaining paths.
for k, p := range pathMap {
if e.Exists(ast.Ref{ast.NewTerm(k)}) {
child, _ := e.Unfold(ast.Ref{ast.NewTerm(k)})
subtreeResult := child.Filter(p)
out = append(out, subtreeResult)
}
}
return ast.SetTerm(out...)
case *ast.Array:
// No early exit here, because we might have just deletes on the
// original array. We build a new Array with modified/deleted keys.
out := make([]*ast.Term, 0, renderNow.Len()+len(pathMap))
// Sort array indexes before descending.
idxList := make([]*ast.Term, 0, len(pathMap))
renderNow.Foreach(func(k *ast.Term) {
idxList = append(idxList, k)
})
for k := range pathMap {
idxList = append(idxList, ast.NewTerm(k))
}
sort.Sort(termSlice(idxList))
// Render child or recursively descend as needed.
for i := range idxList {
k := idxList[i]
if renderNow.Contains(k) {
if e.Exists(ast.Ref{k}) {
subtreeResult, _ := e.RenderAtPath(ast.Ref{k})
out = append(out, subtreeResult)
}
} else if e.Exists(ast.Ref{k}) {
child, _ := e.Unfold(ast.Ref{k})
subtreeResult := child.Filter(pathMap[k.Value])
out = append(out, subtreeResult)
}
}
return ast.ArrayTerm(out...)
default:
return e.value
}
}
type termSlice []*ast.Term
func (s termSlice) Less(i, j int) bool { return ast.Compare(s[i].Value, s[j].Value) < 0 }
func (s termSlice) Swap(i, j int) { x := s[i]; s[i] = s[j]; s[j] = x }
func (s termSlice) Len() int { return len(s) }