Files
kubesphere/vendor/github.com/jszwec/csvutil/decoder.go
2021-03-16 10:27:20 +08:00

488 lines
13 KiB
Go

package csvutil
import (
"io"
"reflect"
)
type decField struct {
columnIndex int
field
decodeFunc
zero interface{}
}
// A Decoder reads and decodes string records into structs.
type Decoder struct {
// Tag defines which key in the struct field's tag to scan for names and
// options (Default: 'csv').
Tag string
// If true, Decoder will return a MissingColumnsError if it discovers
// that any of the columns are missing. This means that a CSV input
// will be required to contain all columns that were defined in the
// provided struct.
DisallowMissingColumns bool
// If not nil, Map is a function that is called for each field in the csv
// record before decoding the data. It allows mapping certain string values
// for specific columns or types to a known format. Decoder calls Map with
// the current column name (taken from header) and a zero non-pointer value
// of a type to which it is going to decode data into. Implementations
// should use type assertions to recognize the type.
//
// The good example of use case for Map is if NaN values are represented by
// eg 'n/a' string, implementing a specific Map function for all floats
// could map 'n/a' back into 'NaN' to allow successful decoding.
//
// Use Map with caution. If the requirements of column or type are not met
// Map should return 'field', since it is the original value that was
// read from the csv input, this would indicate no change.
//
// If struct field is an interface v will be of type string, unless the
// struct field contains a settable pointer value - then v will be a zero
// value of that type.
//
// Map must be set before the first call to Decode and not changed after it.
Map func(field, col string, v interface{}) string
r Reader
typeKey typeKey
hmap map[string]int
header []string
record []string
cache []decField
unused []int
funcMap map[reflect.Type]reflect.Value
ifaceFuncs []reflect.Value
}
// NewDecoder returns a new decoder that reads from r.
//
// Decoder will match struct fields according to the given header.
//
// If header is empty NewDecoder will read one line and treat it as a header.
//
// Records coming from r must be of the same length as the header.
//
// NewDecoder may return io.EOF if there is no data in r and no header was
// provided by the caller.
func NewDecoder(r Reader, header ...string) (dec *Decoder, err error) {
if len(header) == 0 {
header, err = r.Read()
if err != nil {
return nil, err
}
}
h := make([]string, len(header))
copy(h, header)
header = h
m := make(map[string]int, len(header))
for i, h := range header {
m[h] = i
}
return &Decoder{
r: r,
header: header,
hmap: m,
unused: make([]int, 0, len(header)),
}, nil
}
// Decode reads the next string record or records from its input and stores it
// in the value pointed to by v which must be a pointer to a struct, struct slice
// or struct array.
//
// Decode matches all exported struct fields based on the header. Struct fields
// can be adjusted by using tags.
//
// The "omitempty" option specifies that the field should be omitted from
// the decoding if record's field is an empty string.
//
// Examples of struct field tags and their meanings:
// // Decode matches this field with "myName" header column.
// Field int `csv:"myName"`
//
// // Decode matches this field with "Field" header column.
// Field int
//
// // Decode matches this field with "myName" header column and decoding is not
// // called if record's field is an empty string.
// Field int `csv:"myName,omitempty"`
//
// // Decode matches this field with "Field" header column and decoding is not
// // called if record's field is an empty string.
// Field int `csv:",omitempty"`
//
// // Decode ignores this field.
// Field int `csv:"-"`
//
// // Decode treats this field exactly as if it was an embedded field and
// // matches header columns that start with "my_prefix_" to all fields of this
// // type.
// Field Struct `csv:"my_prefix_,inline"`
//
// // Decode treats this field exactly as if it was an embedded field.
// Field Struct `csv:",inline"`
//
// By default decode looks for "csv" tag, but this can be changed by setting
// Decoder.Tag field.
//
// To Decode into a custom type v must implement csvutil.Unmarshaler or
// encoding.TextUnmarshaler.
//
// Anonymous struct fields with tags are treated like normal fields and they
// must implement csvutil.Unmarshaler or encoding.TextUnmarshaler unless inline
// tag is specified.
//
// Anonymous struct fields without tags are populated just as if they were
// part of the main struct. However, fields in the main struct have bigger
// priority and they are populated first. If main struct and anonymous struct
// field have the same fields, the main struct's fields will be populated.
//
// Fields of type []byte expect the data to be base64 encoded strings.
//
// Float fields are decoded to NaN if a string value is 'NaN'. This check
// is case insensitive.
//
// Interface fields are decoded to strings unless they contain settable pointer
// value.
//
// Pointer fields are decoded to nil if a string value is empty.
//
// If v is a slice, Decode resets it and reads the input until EOF, storing all
// decoded values in the given slice. Decode returns nil on EOF.
//
// If v is an array, Decode reads the input until EOF or until it decodes all
// corresponding array elements. If the input contains less elements than the
// array, the additional Go array elements are set to zero values. Decode
// returns nil on EOF unless there were no records decoded.
//
// Fields with inline tags that have a non-empty prefix must not be cyclic
// structures. Passing such values to Decode will result in an infinite loop.
func (d *Decoder) Decode(v interface{}) (err error) {
val := reflect.ValueOf(v)
if val.Kind() != reflect.Ptr || val.IsNil() {
return &InvalidDecodeError{Type: reflect.TypeOf(v)}
}
elem := indirect(val.Elem())
switch elem.Kind() {
case reflect.Struct:
return d.decodeStruct(elem)
case reflect.Slice:
return d.decodeSlice(elem)
case reflect.Array:
return d.decodeArray(elem)
case reflect.Interface, reflect.Invalid:
elem = walkValue(elem)
if elem.Kind() != reflect.Invalid {
return &InvalidDecodeError{Type: elem.Type()}
}
return &InvalidDecodeError{Type: val.Type()}
default:
return &InvalidDecodeError{Type: reflect.PtrTo(elem.Type())}
}
}
// Record returns the most recently read record. The slice is valid until the
// next call to Decode.
func (d *Decoder) Record() []string {
return d.record
}
// Header returns the first line that came from the reader, or returns the
// defined header by the caller.
func (d *Decoder) Header() []string {
header := make([]string, len(d.header))
copy(header, d.header)
return header
}
// Unused returns a list of column indexes that were not used during decoding
// due to lack of matching struct field.
func (d *Decoder) Unused() []int {
if len(d.unused) == 0 {
return nil
}
indices := make([]int, len(d.unused))
copy(indices, d.unused)
return indices
}
// Register registers a custom decoding function for a concrete type or interface.
// The argument f must be of type:
// func([]byte, T) error
//
// T must be a concrete type such as *time.Time, or interface that has at least one
// method.
//
// During decoding, fields are matched by the concrete type first. If match is not
// found then Decoder looks if field implements any of the registered interfaces
// in order they were registered.
//
// Register panics if:
// - f does not match the right signature
// - f is an empty interface
// - f was already registered
//
// Register is based on the encoding/json proposal:
// https://github.com/golang/go/issues/5901.
func (d *Decoder) Register(f interface{}) {
v := reflect.ValueOf(f)
typ := v.Type()
if typ.Kind() != reflect.Func ||
typ.NumIn() != 2 || typ.NumOut() != 1 ||
typ.In(0) != _bytes || typ.Out(0) != _error {
panic("csvutil: func must be of type func([]byte, T) error")
}
argType := typ.In(1)
if argType.Kind() == reflect.Interface && argType.NumMethod() == 0 {
panic("csvutil: func argument type must not be an empty interface")
}
if d.funcMap == nil {
d.funcMap = make(map[reflect.Type]reflect.Value)
}
if _, ok := d.funcMap[argType]; ok {
panic("csvutil: func " + typ.String() + " already registered")
}
d.funcMap[argType] = v
if argType.Kind() == reflect.Interface {
d.ifaceFuncs = append(d.ifaceFuncs, v)
}
}
func (d *Decoder) decodeSlice(slice reflect.Value) error {
typ := slice.Type().Elem()
if walkType(typ).Kind() != reflect.Struct {
return &InvalidDecodeError{Type: reflect.PtrTo(slice.Type())}
}
slice.SetLen(0)
var c int
for ; ; c++ {
v := reflect.New(typ)
err := d.decodeStruct(indirect(v))
if err == io.EOF {
if c == 0 {
return io.EOF
}
break
}
// we want to ensure that we append this element to the slice even if it
// was partially decoded due to error. This is how JSON pkg does it.
slice.Set(reflect.Append(slice, v.Elem()))
if err != nil {
return err
}
}
slice.Set(slice.Slice3(0, c, c))
return nil
}
func (d *Decoder) decodeArray(v reflect.Value) error {
if walkType(v.Type().Elem()).Kind() != reflect.Struct {
return &InvalidDecodeError{Type: reflect.PtrTo(v.Type())}
}
l := v.Len()
var i int
for ; i < l; i++ {
if err := d.decodeStruct(indirect(v.Index(i))); err == io.EOF {
if i == 0 {
return io.EOF
}
break
} else if err != nil {
return err
}
}
zero := reflect.Zero(v.Type().Elem())
for i := i; i < l; i++ {
v.Index(i).Set(zero)
}
return nil
}
func (d *Decoder) decodeStruct(v reflect.Value) (err error) {
d.record, err = d.r.Read()
if err != nil {
return err
}
if len(d.record) != len(d.header) {
return ErrFieldCount
}
return d.unmarshal(d.record, v)
}
func (d *Decoder) unmarshal(record []string, v reflect.Value) error {
fields, err := d.fields(typeKey{d.tag(), v.Type()})
if err != nil {
return err
}
fieldLoop:
for _, f := range fields {
isBlank := record[f.columnIndex] == ""
if f.tag.omitEmpty && isBlank {
continue
}
fv := v
for n, i := range f.index {
fv = fv.Field(i)
if fv.Kind() == reflect.Ptr {
if fv.IsNil() {
if isBlank && n == len(f.index)-1 { // ensure we are on the leaf.
continue fieldLoop
}
// this can happen if a field is an unexported embedded
// pointer type. In Go prior to 1.10 it was possible to
// set such value because of a bug in the reflect package
// https://github.com/golang/go/issues/21353
if !fv.CanSet() {
return errPtrUnexportedStruct(fv.Type())
}
fv.Set(reflect.New(fv.Type().Elem()))
}
if isBlank && n == len(f.index)-1 { // ensure we are on the leaf.
fv.Set(reflect.Zero(fv.Type()))
continue fieldLoop
}
if n != len(f.index)-1 {
fv = fv.Elem() // walk pointer until we are on the the leaf.
}
}
}
s := record[f.columnIndex]
if d.Map != nil && f.zero != nil {
zero := f.zero
if fv := walkPtr(fv); fv.Kind() == reflect.Interface && !fv.IsNil() {
if v := walkValue(fv); v.CanSet() {
zero = reflect.Zero(v.Type()).Interface()
}
}
s = d.Map(s, d.header[f.columnIndex], zero)
}
if err := f.decodeFunc(s, fv); err != nil {
return err
}
}
return nil
}
func (d *Decoder) fields(k typeKey) ([]decField, error) {
if k == d.typeKey {
return d.cache, nil
}
var (
fields = cachedFields(k)
decFields = make([]decField, 0, len(fields))
used = make([]bool, len(d.header))
missingCols []string
)
for _, f := range fields {
i, ok := d.hmap[f.name]
if !ok {
if d.DisallowMissingColumns {
missingCols = append(missingCols, f.name)
}
continue
}
fn, err := decodeFn(f.baseType, d.funcMap, d.ifaceFuncs)
if err != nil {
return nil, err
}
df := decField{
columnIndex: i,
field: f,
decodeFunc: fn,
}
if d.Map != nil {
switch f.typ.Kind() {
case reflect.Interface:
df.zero = "" // interface values are decoded to strings
default:
df.zero = reflect.Zero(walkType(f.typ)).Interface()
}
}
decFields = append(decFields, df)
used[i] = true
}
if len(missingCols) > 0 {
return nil, &MissingColumnsError{
Columns: missingCols,
}
}
d.unused = d.unused[:0]
for i, b := range used {
if !b {
d.unused = append(d.unused, i)
}
}
d.cache, d.typeKey = decFields, k
return d.cache, nil
}
func (d *Decoder) tag() string {
if d.Tag == "" {
return defaultTag
}
return d.Tag
}
func indirect(v reflect.Value) reflect.Value {
for {
switch v.Kind() {
case reflect.Interface:
if v.IsNil() {
return v
}
e := v.Elem()
if e.Kind() == reflect.Ptr && !e.IsNil() {
v = e
continue
}
return v
case reflect.Ptr:
if v.IsNil() {
v.Set(reflect.New(v.Type().Elem()))
}
v = v.Elem()
default:
return v
}
}
}