Newer
Older
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Represents JSON data structure using native Go types: booleans, floats,
// strings, arrays, and maps.
package json
import (
"reflect"
"runtime"
"strconv"
"strings"
"unicode"
"utf16"
"utf8"
// Unmarshal parses the JSON-encoded data and stores the result
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
// Unmarshal traverses the value v recursively.
// If an encountered value implements the Unmarshaler interface,
// Unmarshal calls its UnmarshalJSON method with a well-formed
// JSON encoding.
//
// Otherwise, Unmarshal uses the inverse of the encodings that
// Marshal uses, allocating maps, slices, and pointers as necessary,
// with the following additional rules:
//
// To unmarshal a JSON value into a nil interface value, the
// type stored in the interface value is one of:
//
// bool, for JSON booleans
// float64, for JSON numbers
// string, for JSON strings
// []interface{}, for JSON arrays
// map[string]interface{}, for JSON objects
// nil for JSON null
//
// If a JSON value is not appropriate for a given target type,
// or if a JSON number overflows the target type, Unmarshal
// skips that field and completes the unmarshalling as best it can.
// If no more serious errors are encountered, Unmarshal returns
// an UnmarshalTypeError describing the earliest such error.
//
func Unmarshal(data []byte, v interface{}) os.Error {
d := new(decodeState).init(data)
// Quick check for well-formedness.
// Avoids filling out half a data structure
// before discovering a JSON syntax error.
err := checkValid(data, &d.scan)
if err != nil {
return err
// Unmarshaler is the interface implemented by objects
// that can unmarshal a JSON description of themselves.
// The input can be assumed to be a valid JSON object
// encoding. UnmarshalJSON must copy the JSON data
// if it wishes to retain the data after returning.
type Unmarshaler interface {
UnmarshalJSON([]byte) os.Error
// An UnmarshalTypeError describes a JSON value that was
// not appropriate for a value of a specific Go type.
type UnmarshalTypeError struct {
Value string // description of JSON value - "bool", "array", "number -5"
Type reflect.Type // type of Go value it could not be assigned to
func (e *UnmarshalTypeError) String() string {
return "json: cannot unmarshal " + e.Value + " into Go value of type " + e.Type.String()
}
// An UnmarshalFieldError describes a JSON object key that
// led to an unexported (and therefore unwritable) struct field.
type UnmarshalFieldError struct {
Key string
Field reflect.StructField
}
func (e *UnmarshalFieldError) String() string {
return "json: cannot unmarshal object key " + strconv.Quote(e.Key) + " into unexported field " + e.Field.Name + " of type " + e.Type.String()
}
// An InvalidUnmarshalError describes an invalid argument passed to Unmarshal.
// (The argument to Unmarshal must be a non-nil pointer.)
type InvalidUnmarshalError struct {
Type reflect.Type
}
func (e *InvalidUnmarshalError) String() string {
if e.Type == nil {
return "json: Unmarshal(nil)"
}
return "json: Unmarshal(non-pointer " + e.Type.String() + ")"
}
return "json: Unmarshal(nil " + e.Type.String() + ")"
}
func (d *decodeState) unmarshal(v interface{}) (err os.Error) {
defer func() {
if r := recover(); r != nil {
if _, ok := r.(runtime.Error); ok {
panic(r)
}
err = r.(os.Error)
}
}()
pv := rv
if pv.Kind() != reflect.Ptr ||
pv.IsNil() {
return &InvalidUnmarshalError{reflect.Typeof(v)}
}
// We decode rv not pv.Elem because the Unmarshaler interface
// test must be applied at the top level of the value.
d.value(rv)
// decodeState represents the state while decoding a JSON value.
type decodeState struct {
data []byte
off int // read offset in data
scan scanner
nextscan scanner // for calls to nextValue
savedError os.Error
}
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
// errPhase is used for errors that should not happen unless
// there is a bug in the JSON decoder or something is editing
// the data slice while the decoder executes.
var errPhase = os.NewError("JSON decoder out of sync - data changing underfoot?")
func (d *decodeState) init(data []byte) *decodeState {
d.data = data
d.off = 0
d.savedError = nil
return d
}
// error aborts the decoding by panicking with err.
func (d *decodeState) error(err os.Error) {
panic(err)
}
// saveError saves the first err it is called with,
// for reporting at the end of the unmarshal.
func (d *decodeState) saveError(err os.Error) {
if d.savedError == nil {
d.savedError = err
}
}
// next cuts off and returns the next full JSON value in d.data[d.off:].
// The next value is known to be an object or array, not a literal.
func (d *decodeState) next() []byte {
c := d.data[d.off]
item, rest, err := nextValue(d.data[d.off:], &d.nextscan)
if err != nil {
d.error(err)
}
d.off = len(d.data) - len(rest)
// Our scanner has seen the opening brace/bracket
// and thinks we're still in the middle of the object.
// invent a closing brace/bracket to get it out.
if c == '{' {
d.scan.step(&d.scan, '}')
} else {
d.scan.step(&d.scan, ']')
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
return item
}
// scanWhile processes bytes in d.data[d.off:] until it
// receives a scan code not equal to op.
// It updates d.off and returns the new scan code.
func (d *decodeState) scanWhile(op int) int {
var newOp int
for {
if d.off >= len(d.data) {
newOp = d.scan.eof()
d.off = len(d.data) + 1 // mark processed EOF with len+1
} else {
c := int(d.data[d.off])
d.off++
newOp = d.scan.step(&d.scan, c)
}
if newOp != op {
break
}
}
return newOp
}
// value decodes a JSON value from d.data[d.off:] into the value.
// it updates d.off to point past the decoded value.
func (d *decodeState) value(v reflect.Value) {
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
_, rest, err := nextValue(d.data[d.off:], &d.nextscan)
if err != nil {
d.error(err)
}
d.off = len(d.data) - len(rest)
// d.scan thinks we're still at the beginning of the item.
// Feed in an empty string - the shortest, simplest value -
// so that it knows we got to the end of the value.
if d.scan.step == stateRedo {
panic("redo")
}
d.scan.step(&d.scan, '"')
d.scan.step(&d.scan, '"')
return
}
switch op := d.scanWhile(scanSkipSpace); op {
default:
d.error(errPhase)
case scanBeginArray:
d.array(v)
case scanBeginObject:
d.object(v)
case scanBeginLiteral:
d.literal(v)
}
}
// indirect walks down v allocating pointers as needed,
// until it gets to a non-pointer.
// if it encounters an Unmarshaler, indirect stops and returns that.
// if wantptr is true, indirect stops at the last pointer.
func (d *decodeState) indirect(v reflect.Value, wantptr bool) (Unmarshaler, reflect.Value) {
for {
var isUnmarshaler bool
if v.Type().NumMethod() > 0 {
// Remember that this is an unmarshaler,
// but wait to return it until after allocating
// the pointer (if necessary).
_, isUnmarshaler = v.Interface().(Unmarshaler)
}
if iv := v; iv.Kind() == reflect.Interface && !iv.IsNil() {
if pv.Elem().Kind() != reflect.Ptr &&
wantptr && !isUnmarshaler {
pv.Set(reflect.Zero(pv.Type().Elem()).Addr())
if isUnmarshaler {
// Using v.Interface().(Unmarshaler)
// here means that we have to use a pointer
// as the struct field. We cannot use a value inside
// a pointer to a struct, because in that case
// v.Interface() is the value (x.f) not the pointer (&x.f).
// This is an unfortunate consequence of reflect.
// An alternative would be to look up the
// UnmarshalJSON method and return a FuncValue.
return v.Interface().(Unmarshaler), reflect.Value{}
// array consumes an array from d.data[d.off-1:], decoding into the value v.
// the first byte of the array ('[') has been read already.
func (d *decodeState) array(v reflect.Value) {
// Check for unmarshaler.
unmarshaler, pv := d.indirect(v, false)
if unmarshaler != nil {
d.off--
err := unmarshaler.UnmarshalJSON(d.next())
if err != nil {
d.error(err)
}
return
}
v = pv
// Decoding into nil interface? Switch to non-reflect code.
iv := v
ok := iv.Kind() == reflect.Interface
if ok {
iv.Set(reflect.NewValue(d.arrayInterface()))
return
}
// Check type of target.
av := v
if av.Kind() != reflect.Array && av.Kind() != reflect.Slice {
d.saveError(&UnmarshalTypeError{"array", v.Type()})
i := 0
for {
// Look ahead for ] - can only happen on first iteration.
op := d.scanWhile(scanSkipSpace)
if op == scanEndArray {
break
}
// Back up so d.value can have the byte we just read.
d.off--
d.scan.undo(op)
// Get element of array, growing if necessary.
newcap := sv.Cap() + sv.Cap()/2
if newcap < 4 {
newcap = 4
}
newv := reflect.MakeSlice(sv.Type(), sv.Len(), newcap)
reflect.Copy(newv, sv)
// Must be slice; gave up on array during i >= av.Cap().
sv.SetLen(i + 1)
}
// Decode into element.
if i < av.Len() {
} else {
// Ran out of fixed array: skip.
}
i++
// Next token must be , or ].
op = d.scanWhile(scanSkipSpace)
if op == scanEndArray {
break
}
if op != scanArrayValue {
d.error(errPhase)
}
}
if i < av.Len() {
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
}
} else {
sv.SetLen(i)
}
}
}
// matchName returns true if key should be written to a field named name.
func matchName(key, name string) bool {
return strings.ToLower(key) == strings.ToLower(name)
}
// object consumes an object from d.data[d.off-1:], decoding into the value v.
// the first byte of the object ('{') has been read already.
func (d *decodeState) object(v reflect.Value) {
// Check for unmarshaler.
unmarshaler, pv := d.indirect(v, false)
if unmarshaler != nil {
d.off--
err := unmarshaler.UnmarshalJSON(d.next())
if err != nil {
d.error(err)
}
return
}
v = pv
// Decoding into nil interface? Switch to non-reflect code.
iv := v
if iv.Kind() == reflect.Interface {
iv.Set(reflect.NewValue(d.objectInterface()))
return
}
// Check type of target: struct or map[string]T
var (
if t.Key() != reflect.Typeof("") {
d.saveError(&UnmarshalTypeError{"object", v.Type()})
break
}
mv = v
if mv.IsNil() {
sv = v
default:
d.saveError(&UnmarshalTypeError{"object", v.Type()})
}
d.off--
d.next() // skip over { } in input
return
}
for {
// Read opening " of string key or closing }.
op := d.scanWhile(scanSkipSpace)
if op == scanEndObject {
// closing } - can only happen on first iteration.
break
}
if op != scanBeginLiteral {
d.error(errPhase)
}
// Read string key.
start := d.off - 1
op = d.scanWhile(scanContinue)
item := d.data[start : d.off-1]
key, ok := unquote(item)
if !ok {
d.error(errPhase)
}
// Figure out field corresponding to key.
if mv.IsValid() {
subv = reflect.Zero(mv.Type().Elem())
var f reflect.StructField
var ok bool
// First try for field with that tag.
if isValidTag(key) {
for i := 0; i < sv.NumField(); i++ {
f = st.Field(i)
if f.Tag == key {
ok = true
break
}
f, ok = st.FieldByName(key)
}
if !ok {
// Third, case-insensitive match.
f, ok = st.FieldByNameFunc(func(s string) bool { return matchName(key, s) })
}
// Extract value; name must be exported.
if ok {
if f.PkgPath != "" {
d.saveError(&UnmarshalFieldError{key, st, f})
} else {
subv = sv.FieldByIndex(f.Index)
}
}
// Read : before value.
if op == scanSkipSpace {
op = d.scanWhile(scanSkipSpace)
}
if op != scanObjectKey {
d.error(errPhase)
}
// Read value.
d.value(subv)
// Write value back to map;
// if using struct, subv points into struct already.
if mv.IsValid() {
mv.SetMapIndex(reflect.NewValue(key), subv)
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
}
// Next token must be , or }.
op = d.scanWhile(scanSkipSpace)
if op == scanEndObject {
break
}
if op != scanObjectValue {
d.error(errPhase)
}
}
}
// literal consumes a literal from d.data[d.off-1:], decoding into the value v.
// The first byte of the literal has been read already
// (that's how the caller knows it's a literal).
func (d *decodeState) literal(v reflect.Value) {
// All bytes inside literal return scanContinue op code.
start := d.off - 1
op := d.scanWhile(scanContinue)
// Scan read one byte too far; back up.
d.off--
d.scan.undo(op)
item := d.data[start:d.off]
// Check for unmarshaler.
wantptr := item[0] == 'n' // null
unmarshaler, pv := d.indirect(v, wantptr)
if unmarshaler != nil {
err := unmarshaler.UnmarshalJSON(item)
if err != nil {
d.error(err)
}
return
}
v = pv
switch c := item[0]; c {
case 'n': // null
default:
d.saveError(&UnmarshalTypeError{"null", v.Type()})
case reflect.Interface, reflect.Ptr, reflect.Map:
v.Set(reflect.Zero(v.Type()))
}
case 't', 'f': // true, false
value := c == 't'
default:
d.saveError(&UnmarshalTypeError{"bool", v.Type()})
case reflect.Bool:
v.SetBool(value)
case reflect.Interface:
v.Set(reflect.NewValue(value))
}
case '"': // string
if !ok {
d.error(errPhase)
}
default:
d.saveError(&UnmarshalTypeError{"string", v.Type()})
if v.Type() != byteSliceType {
d.saveError(&UnmarshalTypeError{"string", v.Type()})
break
}
b := make([]byte, base64.StdEncoding.DecodedLen(len(s)))
n, err := base64.StdEncoding.Decode(b, s)
if err != nil {
d.saveError(err)
break
}
v.Set(reflect.NewValue(b[0:n]))
case reflect.String:
v.SetString(string(s))
case reflect.Interface:
}
default: // number
if c != '-' && (c < '0' || c > '9') {
d.error(errPhase)
}
s := string(item)
default:
d.error(&UnmarshalTypeError{"number", v.Type()})
n, err := strconv.Atof64(s)
if err != nil {
d.saveError(&UnmarshalTypeError{"number " + s, v.Type()})
break
}
v.Set(reflect.NewValue(n))
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
d.saveError(&UnmarshalTypeError{"number " + s, v.Type()})
break
}
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
d.saveError(&UnmarshalTypeError{"number " + s, v.Type()})
break
}
n, err := strconv.AtofN(s, v.Type().Bits())
d.saveError(&UnmarshalTypeError{"number " + s, v.Type()})
break
}
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
// The xxxInterface routines build up a value to be stored
// in an empty interface. They are not strictly necessary,
// but they avoid the weight of reflection in this common case.
// valueInterface is like value but returns interface{}
func (d *decodeState) valueInterface() interface{} {
switch d.scanWhile(scanSkipSpace) {
default:
d.error(errPhase)
case scanBeginArray:
return d.arrayInterface()
case scanBeginObject:
return d.objectInterface()
case scanBeginLiteral:
return d.literalInterface()
}
panic("unreachable")
}
// arrayInterface is like array but returns []interface{}.
func (d *decodeState) arrayInterface() []interface{} {
var v vector.Vector
for {
// Look ahead for ] - can only happen on first iteration.
op := d.scanWhile(scanSkipSpace)
if op == scanEndArray {
break
}
// Back up so d.value can have the byte we just read.
d.off--
d.scan.undo(op)
v.Push(d.valueInterface())
// Next token must be , or ].
op = d.scanWhile(scanSkipSpace)
if op == scanEndArray {
break
}
if op != scanArrayValue {
d.error(errPhase)
}
}
return v
}
// objectInterface is like object but returns map[string]interface{}.
func (d *decodeState) objectInterface() map[string]interface{} {
m := make(map[string]interface{})
for {
// Read opening " of string key or closing }.
op := d.scanWhile(scanSkipSpace)
if op == scanEndObject {
// closing } - can only happen on first iteration.
break
}
if op != scanBeginLiteral {
d.error(errPhase)
}
// Read string key.
start := d.off - 1
op = d.scanWhile(scanContinue)
item := d.data[start : d.off-1]
key, ok := unquote(item)
if !ok {
d.error(errPhase)
}
// Read : before value.
if op == scanSkipSpace {
op = d.scanWhile(scanSkipSpace)
}
if op != scanObjectKey {
d.error(errPhase)
}
// Read value.
m[key] = d.valueInterface()
// Next token must be , or }.
op = d.scanWhile(scanSkipSpace)
if op == scanEndObject {
break
}
if op != scanObjectValue {
d.error(errPhase)
}
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
// literalInterface is like literal but returns an interface value.
func (d *decodeState) literalInterface() interface{} {
// All bytes inside literal return scanContinue op code.
start := d.off - 1
op := d.scanWhile(scanContinue)
// Scan read one byte too far; back up.
d.off--
d.scan.undo(op)
item := d.data[start:d.off]
switch c := item[0]; c {
case 'n': // null
return nil
case 't', 'f': // true, false
return c == 't'
case '"': // string
s, ok := unquote(item)
if !ok {
d.error(errPhase)
}
return s
default: // number
if c != '-' && (c < '0' || c > '9') {
d.error(errPhase)
}
n, err := strconv.Atof64(string(item))
if err != nil {
d.saveError(&UnmarshalTypeError{"number " + string(item), reflect.Typeof(0.0)})
}
return n
}
panic("unreachable")
}
// getu4 decodes \uXXXX from the beginning of s, returning the hex value,
// or it returns -1.
func getu4(s []byte) int {
if len(s) < 6 || s[0] != '\\' || s[1] != 'u' {
return -1
}
rune, err := strconv.Btoui64(string(s[2:6]), 16)
if err != nil {
return -1
}
return int(rune)
}
// unquote converts a quoted JSON string literal s into an actual string t.
// The rules are different than for Go, so cannot use strconv.Unquote.
func unquote(s []byte) (t string, ok bool) {
s, ok = unquoteBytes(s)
t = string(s)
return
}
func unquoteBytes(s []byte) (t []byte, ok bool) {
if len(s) < 2 || s[0] != '"' || s[len(s)-1] != '"' {
return
}
s = s[1 : len(s)-1]
// Check for unusual characters. If there are none,
// then no unquoting is needed, so return a slice of the
// original bytes.
r := 0
for r < len(s) {
c := s[r]
if c == '\\' || c == '"' || c < ' ' {
break
}
if c < utf8.RuneSelf {
r++
continue
}
rune, size := utf8.DecodeRune(s[r:])
if rune == utf8.RuneError && size == 1 {
break
}
r += size
}
if r == len(s) {
return s, true
}
b := make([]byte, len(s)+2*utf8.UTFMax)
w := copy(b, s[0:r])
for r < len(s) {
// Out of room? Can only happen if s is full of
// malformed UTF-8 and we're replacing each
// byte with RuneError.
if w >= len(b)-2*utf8.UTFMax {
nb := make([]byte, (len(b)+utf8.UTFMax)*2)
copy(nb, b[0:w])
b = nb
}
switch c := s[r]; {
case c == '\\':
r++
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
return
}
switch s[r] {
default:
return
case '"', '\\', '/', '\'':
b[w] = s[r]
r++
w++
case 'b':
b[w] = '\b'
r++
w++
case 'f':
b[w] = '\f'
r++
w++
case 'n':
b[w] = '\n'
r++
w++
case 'r':
b[w] = '\r'
r++
w++
case 't':
b[w] = '\t'
r++
w++
case 'u':
r--
rune := getu4(s[r:])
if rune < 0 {
return
}
r += 6
if utf16.IsSurrogate(rune) {
rune1 := getu4(s[r:])
if dec := utf16.DecodeRune(rune, rune1); dec != unicode.ReplacementChar {
// A valid pair; consume.
r += 6
w += utf8.EncodeRune(b[w:], dec)
break
}
// Invalid surrogate; fall back to replacement rune.
rune = unicode.ReplacementChar
}
w += utf8.EncodeRune(b[w:], rune)
}
// Quote, control characters are invalid.
case c == '"', c < ' ':
return
// ASCII
case c < utf8.RuneSelf:
b[w] = c
r++
w++
// Coerce to well-formed UTF-8.
default:
rune, size := utf8.DecodeRune(s[r:])
r += size
w += utf8.EncodeRune(b[w:], rune)