JSON pointer

Revision as of 01:33, 20 August 2023 by Petelomax (talk | contribs) (→‎{{header|Phix}}: rogue trailing comma)

JSON Pointer is a syntax for targeting a value in a JSON (JavaScript Object Notation) document.

JSON pointer is a draft programming task. It is not yet considered ready to be promoted as a complete task, for reasons that should be found in its talk page.

Standardized in RFC 6901, a JSON Pointer is a Unicode string containing slash (/) separated tokens. Each token is either a potential property name for a JSON object, or a potential index for a JSON array. When a property name contains a slash (/) or a tilde (~), they are encoded as ~1 and ~0, respectively.

Pseudocode

Evaluating or resolving a JSON Pointer against an arbitrary JSON document might look like this.

set the current node to the document root
FOR each token in the pointer
  decode the token
  IF the current node is an array
    IF the token is a string representation of an array index AND the index is in range
      set the current node to node[index]
    ELSE
      error condition
    ENDIF
  ELSE IF the current node is an object
    IF the current node has a property matching token
      set the current node to node[token]
    ELSE
      error condition
    ENDIF
  ELSE
    error condition
  ENDIF
ENDFOR

Barring any error conditions, the result is the value of the current node after the loop has completed. For example, evaluating the JSON Pointer /foo/bar/1 against the JSON document {"foo": {"bar": ["hello", "world"]}}, would result in the value "world".

See RFC 6901 for details.

Task

Demonstrate parsing and evaluation of JSON Pointers using, at least, the following examples. Display, on this page, either the resulting JSON value for each test case or a helpful error message.

Example JSON document
{
  "wiki": {
    "links": [
      "https://rosettacode.org/wiki/Rosetta_Code",
      "https://discord.com/channels/1011262808001880065"
    ]
  },
  "": "Rosetta",
  " ": "Code",
  "g/h": "chrestomathy",
  "i~j": "site",
  "abc": ["is", "a"],
  "def": { "": "programming" }
}
Example JSON pointers

JSON pointers are surrounded by quotes to emphasize whitespace and the empty pointer. The quotes are not part of the pointer.

JSON pointer Expected result
"" The entire input document.
"/" "Rosetta"
"/ " "Code"
"/abc" ["is", "a"]
"/def/" "programming"
"/g~1h" "chrestomathy"
"/i~0j" "site"
"/wiki/links/0" "https://rosettacode.org/wiki/Rosetta_Code"
"/wiki/links/1" "https://discord.com/channels/1011262808001880065"
"/wiki/links/2" Error condition.
"/wiki/name" Error condition.
"/no/such/thing" Error condition.
"bad/pointer" Error condition.
Reference
Related tasks

Go

package main

import (
	"encoding/json"
	"fmt"
	"log"
	"os"
	"regexp"
	"strconv"
	"strings"
)

var reIndex = regexp.MustCompile(`^(0|[1-9][0-9]*)$`)

type JSONPointer []string

func NewJSONPointer(pointer string) (*JSONPointer, error) {
	var tokens JSONPointer

	if pointer == "" {
		return &tokens, nil
	}

	p, slash := strings.CutPrefix(pointer, "/")
	if !slash && len(p) > 0 {
		return nil, fmt.Errorf(
			"\"%s\" pointers must start with a slash or be the empty string", pointer)
	}

	for _, token := range strings.Split(p, "/") {
		tokens = append(tokens,
			strings.ReplaceAll(strings.ReplaceAll(token, "~1", "/"), "~0", "~"),
		)
	}
	return &tokens, nil
}

func (p JSONPointer) Resolve(data interface{}) (interface{}, error) {
	obj := data
	var found bool
	for i, token := range p {
		obj, found = getItem(obj, token)
		if !found {
			return nil, fmt.Errorf("\"%s\" does not exist", encode(p[:i+1]))
		}
	}
	return obj, nil
}

func (p JSONPointer) String() string {
	return encode(p)
}

func encode(tokens []string) string {
	var encoded []string
	for _, token := range tokens {
		encoded = append(encoded,
			strings.ReplaceAll(strings.ReplaceAll(token, "~", "~0"), "/", "~1"))
	}
	if len(encoded) == 0 {
		return ""
	}
	return "/" + strings.Join(encoded, "/")
}

func getItem(data interface{}, token string) (interface{}, bool) {
	switch data.(type) {
	case []interface{}:
		return getArrayItem(data.([]interface{}), token)
	case map[string]interface{}:
		return getObjectItem(data.(map[string]interface{}), token)
	default:
		return nil, false
	}
}

func getArrayItem(array []interface{}, token string) (interface{}, bool) {
	if reIndex.MatchString(token) {
		if idx, err := strconv.Atoi(token); err == nil && idx < len(array) {
			return array[idx], true
		}
	}
	return nil, false
}

func getObjectItem(object map[string]interface{}, token string) (interface{}, bool) {
	if val, found := object[token]; found {
		return val, true
	}
	return nil, false
}

func prettyJSON(data interface{}) string {
	b, err := json.MarshalIndent(data, "", "  ")
	if err != nil {
		log.Fatal(err)
	}
	return string(b)
}

func loadJSON(f string) interface{} {
	bytes, err := os.ReadFile(f)
	if err != nil {
		log.Fatal(err)
	}

	var obj interface{}
	err = json.Unmarshal(bytes, &obj)
	if err != nil {
		log.Fatal(err)
	}

	return obj
}

var examples = []string{
	"",
	"/",
	"/ ",
	"/abc",
	"/def/",
	"/g~1h",
	"/i~0j",
	"/wiki/links/0",
	"/wiki/links/1",
	"/wiki/links/2",
	"/wiki/name",
	"/no/such/thing",
	"bad/pointer",
}

func main() {
	doc := loadJSON("example.json")
	for _, s := range examples {
		p, err := NewJSONPointer(s)
		if err != nil {
			fmt.Printf("error: %v\n\n", err)
			continue
		}

		if result, err := p.Resolve(doc); err != nil {
			fmt.Printf("error: %v\n\n", err)
		} else {
			fmt.Printf("\"%s\" -> %s\n\n", p, prettyJSON(result))
		}
	}

}
Output:
"" -> {
  "": "Rosetta",
  " ": "Code",
  "abc": [
    "is",
    "a"
  ],
  "def": {
    "": "programming"
  },
  "g/h": "chrestomathy",
  "i~j": "site",
  "wiki": {
    "links": [
      "https://rosettacode.org/wiki/Rosetta_Code",
      "https://discord.com/channels/1011262808001880065"
    ]
  }
}

"/" -> "Rosetta"

"/ " -> "Code"

"/abc" -> [
  "is",
  "a"
]

"/def/" -> "programming"

"/g~1h" -> "chrestomathy"

"/i~0j" -> "site"

"/wiki/links/0" -> "https://rosettacode.org/wiki/Rosetta_Code"

"/wiki/links/1" -> "https://discord.com/channels/1011262808001880065"

error: "/wiki/links/2" does not exist

error: "/wiki/name" does not exist

error: "/no" does not exist

error: "bad/pointer" pointers must start with a slash or be the empty string

JavaScript

ES2022

class JSONPointer {
  #tokens;

  constructor(pointer) {
    this.#tokens = this.#parse(pointer);
  }

  resolve(data) {
    return this.#tokens.reduce(this.#getItem.bind(this), data);
  }

  toString() {
    return this.#encode(this.#tokens);
  }

  #parse(pointer) {
    if (pointer.length && !pointer.startsWith("/")) {
      throw new JSONPointerError(
        `\"${pointer}\" pointers must start with a slash or be the empty string`
      );
    }

    return pointer
      .split("/")
      .map((token) => token.replaceAll("~1", "/").replaceAll("~0", "~"))
      .slice(1);
  }

  #getItem(obj, token, idx) {
    // NOTE:
    //   - string primitives "have own" indices and `length`.
    //   - Arrays have a `length` property.
    //   - A property might exist with the value `undefined`.
    //   - obj[1] is equivalent to obj["1"].
    if (
      typeof obj === "object" &&
      !(Array.isArray(obj) && token === "length") &&
      Object.hasOwn(obj, token)
    )
      return obj[token];
    throw new JSONPointerError(
      `\"${this.#encode(this.#tokens.slice(0, idx + 1))}\" does not exist`
    );
  }

  #encode(tokens) {
    if (!tokens.length) return "";
    return (
      "/" +
      tokens
        .map((token) => token.replaceAll("~", "~0").replaceAll("/", "~1"))
        .join("/")
    );
  }
}

class JSONPointerError extends Error {}

const doc = {
  wiki: {
    links: [
      "https://rosettacode.org/wiki/Rosetta_Code",
      "https://discord.com/channels/1011262808001880065",
    ],
  },
  "": "Rosetta",
  " ": "Code",
  "g/h": "chrestomathy",
  "i~j": "site",
  abc: ["is", "a"],
  def: { "": "programming" },
};

const examples = [
  "",
  "/",
  "/ ",
  "/abc",
  "/def/",
  "/g~1h",
  "/i~0j",
  "/wiki/links/0",
  "/wiki/links/1",
  "/wiki/links/2",
  "/wiki/name",
  "/no/such/thing",
  "bad/pointer",
];

for (const p of examples) {
  try {
    const pointer = new JSONPointer(p);
    const result = pointer.resolve(doc);
    console.log(`"${p}" -> ${JSON.stringify(result, undefined, 2)}\n`);
  } catch (err) {
    if (err instanceof JSONPointerError) {
      console.log(`error: ${err.message}\n`);
    } else {
      throw err;
    }
  }
}
Output:
"" -> {
  "wiki": {
    "links": [
      "https://rosettacode.org/wiki/Rosetta_Code",
      "https://discord.com/channels/1011262808001880065"
    ]
  },
  "": "Rosetta",
  " ": "Code",
  "g/h": "chrestomathy",
  "i~j": "site",
  "abc": [
    "is",
    "a"
  ],
  "def": {
    "": "programming"
  }
}

"/" -> "Rosetta"

"/ " -> "Code"

"/abc" -> [
  "is",
  "a"
]

"/def/" -> "programming"

"/g~1h" -> "chrestomathy"

"/i~0j" -> "site"

"/wiki/links/0" -> "https://rosettacode.org/wiki/Rosetta_Code"

"/wiki/links/1" -> "https://discord.com/channels/1011262808001880065"

error: "/wiki/links/2" does not exist

error: "/wiki/name" does not exist

error: "/no" does not exist

error: "bad/pointer" pointers must start with a slash or be the empty string

Phix

Note that parse_json() in version 1.0.3 and earlier does not support unquoted keys or trailing commas, fixed in 1.0.4 but for now at least the dtxt constant below has a few additional quotes added and trailing commas removed.

with javascript_semantics
include builtins\json.e
function parseJSONPointer(string p)
    if p="" then return {} end if
    if p[1]!='/' then throw("pointers must start with a slash or be the empty string") end if
    return apply(true,substitute_all,{split(p,'/',false),{{"~1","~0"}},{{"/","~"}}})[2..$]
end function

function uri_percent_decode(string s)
    -- eg `/k%22l` -> `/k"l`
    sequence pc = find_all('%',s), 
             rc = repeat(" ",length(pc))
    for j,i in pc do
        integer nb1 = 0, nb2 = 0
        if i<=length(s)-2 then
            nb1 = find(s[i+1],"0123456789ABCDEF")
            nb2 = find(s[i+2],"0123456789ABCDEF")
        end if
        if nb1=0 or nb2=0 then
            pc[j] = 0
        else
            rc[j][1] = (nb1-1)*16+nb2-1
        end if
    end for
    for i,j in reverse(pc) do
        if j then s[j..j+2] = rc[-i] end if
    end for
    return s
end function

function resolve(object json, sequence tokens)
    for t in tokens do
        integer jtype = iff(sequence(json) and length(json)?json[1]:0)
        if jtype=JSON_ARRAY then
            -- Note: we expect "JSON Pointers" to contain 0-based indices,
            -- but Phix indices are 1-based, //and// parse_json() returns
            -- sequences with first element of JSON_ARRAY/OBJECT/KEYWORD.
            integer k = to_integer(t,-1)+1
            if k<1 or k>=length(json) then throw("bad index: "&t) end if
            json = json[k+1]
        elsif jtype=JSON_OBJECT then
            object jt = extract_json_field(json, t, {})  
            if jt={} then
                jt = extract_json_field(json,uri_percent_decode(t),{})
            end if
            -- Aside: parse_json() returns {JSON_OBJECT} not {}, and
            --  likwise {JSON_ARRAY} to represent the empty array [],
            --  hence {} is a sound choice for the "not found" value.
            if jt={} then throw(t&": no such field") end if
            json = jt
        else -- native/keyword
            throw("non subscriptable: %v [%s]",{json,t})
        end if
    end for
    return json
end function

constant dtxt = """
{
  "wiki": {
    "links": [
      "https://rosettacode.org/wiki/Rosetta_Code",
      "https://discord.com/channels/1011262808001880065"
    ]
  },
  "": "Rosetta",
  " ": "Code",
  "g/h": "chrestomathy",
  "i~j": "site",
  "abc": ["is", "a"],
  "def": { "": "programming" }
}""", doc = parse_json(dtxt)

constant tests = {
  "",
  "/",
  "/ ",
  "/abc",
  "/def/",
  "/g~1h",  -- or "/g%2Fh",
  "/i~0j",  -- or "/i%7Ej",
  "/wiki/links/0",
  "/wiki/links/1",
  "/wiki/links/2",
  "/wiki/name",
  "/no/such/thing",
  "bad/pointer",
}

for p in tests do
  try 
    sequence ptr = parseJSONPointer(p)
    object result = resolve(doc,ptr)
    printf(1,"%v -> %s\n",{p,print_json("",result)})
  catch e 
    printf(1,"%v => Error %s\n\n",{p,e[E_USER]})
  end try
end for
Output:
"" -> {
 "wiki":{
         "links":[
                  "https://rosettacode.org/wiki/Rosetta_Code",
                  "https://discord.com/channels/1011262808001880065"
                 ]
        },
 "":"Rosetta",
 " ":"Code",
 "g/h":"chrestomathy",
 "i~j":"site",
 "abc":[
        "is",
        "a"
       ],
 "def":{
        "":"programming"
       }
}

"/" -> "Rosetta"

"/ " -> "Code"

"/abc" -> [
 "is",
 "a"
]

"/def/" -> "programming"

"/g~1h" -> "chrestomathy"

"/i~0j" -> "site"

"/wiki/links/0" -> "https://rosettacode.org/wiki/Rosetta_Code"

"/wiki/links/1" -> "https://discord.com/channels/1011262808001880065"

"/wiki/links/2" => Error bad index: 2

"/wiki/name" => Error name: no such field

"/no/such/thing" => Error no: no such field

"bad/pointer" => Error pointers must start with a slash or be the empty string