Plan 9 from Bell Labs’s /usr/web/sources/contrib/ericvh/go-plan9/src/pkg/json/parse.go

Copyright © 2021 Plan 9 Foundation.
Distributed under the MIT License.
Download the Plan 9 distribution.


// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// JSON (JavaScript Object Notation) parser.
// See http://www.json.org/

// The json package implements a simple parser and
// representation for JSON (JavaScript Object Notation),
// as defined at http://www.json.org/.
package json

import (
	"bytes";
	"strconv";
	"utf8";
)

// Strings
//
//   Double quoted with escapes: \" \\ \/ \b \f \n \r \t \uXXXX.
//   No literal control characters, supposedly.
//   Have also seen \' and embedded newlines.

func _UnHex(p string, r, l int) (v int, ok bool) {
	v = 0;
	for i := r; i < l; i++ {
		if i >= len(p) {
			return 0, false
		}
		v *= 16;
		switch {
		case '0' <= p[i] && p[i] <= '9':
			v += int(p[i] - '0')
		case 'a' <= p[i] && p[i] <= 'f':
			v += int(p[i] - 'a' + 10)
		case 'A' <= p[i] && p[i] <= 'F':
			v += int(p[i] - 'A' + 10)
		default:
			return 0, false
		}
	}
	return v, true;
}

func _ToHex(b []byte, rune int) {
	const hexDigits = "0123456789abcdef";
	b[0] = hexDigits[rune>>12&0xf];
	b[1] = hexDigits[rune>>8&0xf];
	b[2] = hexDigits[rune>>4&0xf];
	b[3] = hexDigits[rune&0xf];
}

// Unquote unquotes the JSON-quoted string s,
// returning a raw string t.  If s is not a valid
// JSON-quoted string, Unquote returns with ok set to false.
func Unquote(s string) (t string, ok bool) {
	if len(s) < 2 || s[0] != '"' || s[len(s)-1] != '"' {
		return
	}
	b := make([]byte, len(s));
	w := 0;
	for r := 1; r < len(s)-1; {
		switch {
		case s[r] == '\\':
			r++;
			if r >= len(s)-1 {
				return
			}
			switch s[r] {
			default:
				return
			case '"', '\\', '/', '\'':
				b[w] = s[r];
				r++;
				w++;
			case 'b':
				b[w] = '\b';
				r++;
				w++;
			case 'f':
				b[w] = '\f';
				r++;
				w++;
			case 'n':
				b[w] = '\n';
				r++;
				w++;
			case 'r':
				b[w] = '\r';
				r++;
				w++;
			case 't':
				b[w] = '\t';
				r++;
				w++;
			case 'u':
				r++;
				rune, ok := _UnHex(s, r, r+4);
				if !ok {
					return
				}
				r += 4;
				w += utf8.EncodeRune(rune, b[w:]);
			}
		// Control characters are invalid, but we've seen raw \n.
		case s[r] < ' ' && s[r] != '\n':
			if s[r] == '\n' {
				b[w] = '\n';
				r++;
				w++;
				break;
			}
			return;
		// ASCII
		case s[r] < utf8.RuneSelf:
			b[w] = s[r];
			r++;
			w++;
		// Coerce to well-formed UTF-8.
		default:
			rune, size := utf8.DecodeRuneInString(s[r:]);
			r += size;
			w += utf8.EncodeRune(rune, b[w:]);
		}
	}
	return string(b[0:w]), true;
}

// Quote quotes the raw string s using JSON syntax,
// so that Unquote(Quote(s)) = s, true.
func Quote(s string) string {
	chr := make([]byte, 6);
	chr0 := chr[0:1];
	b := new(bytes.Buffer);
	chr[0] = '"';
	b.Write(chr0);

	for _, rune := range s {
		switch {
		case rune == '"' || rune == '\\':
			chr[0] = '\\';
			chr[1] = byte(rune);
			b.Write(chr[0:2]);

		case rune == '\b':
			chr[0] = '\\';
			chr[1] = 'b';
			b.Write(chr[0:2]);

		case rune == '\f':
			chr[0] = '\\';
			chr[1] = 'f';
			b.Write(chr[0:2]);

		case rune == '\n':
			chr[0] = '\\';
			chr[1] = 'n';
			b.Write(chr[0:2]);

		case rune == '\r':
			chr[0] = '\\';
			chr[1] = 'r';
			b.Write(chr[0:2]);

		case rune == '\t':
			chr[0] = '\\';
			chr[1] = 't';
			b.Write(chr[0:2]);

		case 0x20 <= rune && rune < utf8.RuneSelf:
			chr[0] = byte(rune);
			b.Write(chr0);

		default:
			chr[0] = '\\';
			chr[1] = 'u';
			_ToHex(chr[2:6], rune);
			b.Write(chr);
		}
	}
	chr[0] = '"';
	b.Write(chr0);
	return b.String();
}


// _Lexer

type _Lexer struct {
	s	string;
	i	int;
	kind	int;
	token	string;
}

func punct(c byte) bool {
	return c == '"' || c == '[' || c == ']' || c == ':' || c == '{' || c == '}' || c == ','
}

func white(c byte) bool	{ return c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\v' }

func skipwhite(p string, i int) int {
	for i < len(p) && white(p[i]) {
		i++
	}
	return i;
}

func skiptoken(p string, i int) int {
	for i < len(p) && !punct(p[i]) && !white(p[i]) {
		i++
	}
	return i;
}

func skipstring(p string, i int) int {
	for i++; i < len(p) && p[i] != '"'; i++ {
		if p[i] == '\\' {
			i++
		}
	}
	if i >= len(p) {
		return i
	}
	return i + 1;
}

func (t *_Lexer) Next() {
	i, s := t.i, t.s;
	i = skipwhite(s, i);
	if i >= len(s) {
		t.kind = 0;
		t.token = "";
		t.i = len(s);
		return;
	}

	c := s[i];
	switch {
	case c == '-' || '0' <= c && c <= '9':
		j := skiptoken(s, i);
		t.kind = '1';
		t.token = s[i:j];
		i = j;

	case 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z':
		j := skiptoken(s, i);
		t.kind = 'a';
		t.token = s[i:j];
		i = j;

	case c == '"':
		j := skipstring(s, i);
		t.kind = '"';
		t.token = s[i:j];
		i = j;

	case c == '[', c == ']', c == ':', c == '{', c == '}', c == ',':
		t.kind = int(c);
		t.token = s[i : i+1];
		i++;

	default:
		t.kind = '?';
		t.token = s[i : i+1];
	}

	t.i = i;
}


// Parser
//
// Implements parsing but not the actions.  Those are
// carried out by the implementation of the Builder interface.
// A Builder represents the object being created.
// Calling a method like Int64(i) sets that object to i.
// Calling a method like Elem(i) or Key(s) creates a
// new builder for a subpiece of the object (logically,
// an array element or a map key).
//
// There are two Builders, in other files.
// The JsonBuilder builds a generic Json structure
// in which maps are maps.
// The StructBuilder copies data into a possibly
// nested data structure, using the "map keys"
// as struct field names.

type _Value interface{}

// BUG(rsc): The json Builder interface needs to be
// reconciled with the xml Builder interface.

// A Builder is an interface implemented by clients and passed
// to the JSON parser.  It gives clients full control over the
// eventual representation returned by the parser.
type Builder interface {
	// Set value
	Int64(i int64);
	Uint64(i uint64);
	Float64(f float64);
	String(s string);
	Bool(b bool);
	Null();
	Array();
	Map();

	// Create sub-Builders
	Elem(i int) Builder;
	Key(s string) Builder;

	// Flush changes to parent Builder if necessary.
	Flush();
}

func parse(lex *_Lexer, build Builder) bool {
	ok := false;
Switch:
	switch lex.kind {
	case 0:
		break
	case '1':
		// If the number is exactly an integer, use that.
		if i, err := strconv.Atoi64(lex.token); err == nil {
			build.Int64(i);
			ok = true;
		} else if i, err := strconv.Atoui64(lex.token); err == nil {
			build.Uint64(i);
			ok = true;
		} else
		// Fall back to floating point.
		if f, err := strconv.Atof64(lex.token); err == nil {
			build.Float64(f);
			ok = true;
		}

	case 'a':
		switch lex.token {
		case "true":
			build.Bool(true);
			ok = true;
		case "false":
			build.Bool(false);
			ok = true;
		case "null":
			build.Null();
			ok = true;
		}

	case '"':
		if str, ok1 := Unquote(lex.token); ok1 {
			build.String(str);
			ok = true;
		}

	case '[':
		// array
		build.Array();
		lex.Next();
		n := 0;
		for lex.kind != ']' {
			if n > 0 {
				if lex.kind != ',' {
					break Switch
				}
				lex.Next();
			}
			if !parse(lex, build.Elem(n)) {
				break Switch
			}
			n++;
		}
		ok = true;

	case '{':
		// map
		lex.Next();
		build.Map();
		n := 0;
		for lex.kind != '}' {
			if n > 0 {
				if lex.kind != ',' {
					break Switch
				}
				lex.Next();
			}
			if lex.kind != '"' {
				break Switch
			}
			key, ok := Unquote(lex.token);
			if !ok {
				break Switch
			}
			lex.Next();
			if lex.kind != ':' {
				break Switch
			}
			lex.Next();
			if !parse(lex, build.Key(key)) {
				break Switch
			}
			n++;
		}
		ok = true;
	}

	if ok {
		lex.Next()
	}
	build.Flush();
	return ok;
}

// Parse parses the JSON syntax string s and makes calls to
// the builder to construct a parsed representation.
// On success, it returns with ok set to true.
// On error, it returns with ok set to false, errindx set
// to the byte index in s where a syntax error occurred,
// and errtok set to the offending token.
func Parse(s string, builder Builder) (ok bool, errindx int, errtok string) {
	lex := new(_Lexer);
	lex.s = s;
	lex.Next();
	if parse(lex, builder) {
		if lex.kind == 0 {	// EOF
			return true, 0, ""
		}
	}
	return false, lex.i, lex.token;
}

Bell Labs OSI certified Powered by Plan 9

(Return to Plan 9 Home Page)

Copyright © 2021 Plan 9 Foundation. All Rights Reserved.
Comments to [email protected].