S-expressions: Difference between revisions

From Rosetta Code
Content added Content deleted
m (→‎{{header|Pike}}: make output more readable)
m (→‎{{header|Scheme}}: make output more readable)
Line 1,076: Line 1,076:
Output:
Output:
"((data \"quoted data\" 123 4.5)\n (data (!@# (4.5) \"(more\" \"data)\")))"
"((data \"quoted data\" 123 4.5)\n (data (!@# (4.5) \"(more\" \"data)\")))"

((data "quoted data" 123 4.5) (data (!@# (4.5) "(more" "data)")))
((data "quoted data" 123 4.5) (data (!@# (4.5) "(more" "data)")))

"((data \"quoted data\" 123 4.5) (data (!@# (4.5) \"(more\" \"data)\")))"
"((data \"quoted data\" 123 4.5) (data (!@# (4.5) \"(more\" \"data)\")))"



Revision as of 00:17, 21 October 2011

S-expressions is a draft programming task. It is not yet considered ready to be promoted as a complete task, for reasons that should be found in its talk page.

S-Expressions are one convenient way to parse and store data.

Write a simple reader and writer for S-Expressions that handles quoted and unquoted strings, integers and floats.

The reader should read a single but nested S-Expression from a string and store it in a suitable datastructure (list, array, etc). Newlines and other whitespace may be ignored unless contained within a quoted string. () inside quoted strings are not interpreted, but treated as part of the string. Handling escaped quotes inside a string is optional. thus (foo"bar) maybe treated as a string 'foo"bar', or as an error.

For this, the reader need not recognise '\' for escaping, but should, in addition, recognize numbers if the language has appropriate datatypes.


Languages that support it may treat unquoted strings as symbols.

Note that with the exception of ()" (\ if escaping is supported) and whitespace there are no special characters. Anything else is allowed without quotes.

The reader should be able to read the following input <lang lisp>((data "quoted data" 123 4.5)

(data (!@# (4.5) "(more" "data)")))</lang>

and turn it into a native datastructure.

The writer should be able to take the produced list and turn it into a new S-Expression. Strings that don't contain whitespace or parentheses () don't need to be quoted in the resulting S-Expression, but as a simplification, any string may be quoted.

C

<lang c>#include <stdio.h>

  1. include <stdlib.h>
  2. include <ctype.h>
  3. include <string.h>

enum { S_NONE, S_LIST, S_STRING, S_SYMBOL };

typedef struct { int type; size_t len; void *buf; } s_expr, *expr;

void whine(char *s) { fprintf(stderr, "parse error before ==>%.10s\n", s); }

expr parse_string(char *s, char **e) { expr ex = calloc(sizeof(s_expr), 1); char buf[256] = {0}; int i = 0;

while (*s) { if (i >= 256) { fprintf(stderr, "string too long:\n"); whine(s); goto fail; } switch (*s) { case '\\': switch (*++s) { case '\\': case '"': buf[i++] = *s++; continue;

default: whine(s); goto fail; } case '"': goto success; default: buf[i++] = *s++; } } fail: free(ex); return 0;

success: *e = s + 1; ex->type = S_STRING; ex->buf = strdup(buf); ex->len = strlen(buf); return ex; }

expr parse_symbol(char *s, char **e) { expr ex = calloc(sizeof(s_expr), 1); char buf[256] = {0}; int i = 0;

while (*s) { if (i >= 256) { fprintf(stderr, "symbol too long:\n"); whine(s); goto fail; } if (isspace(*s)) goto success; if (*s == ')' || *s == '(') { s--; goto success; }

switch (*s) { case '\\': switch (*++s) { case '\\': case '"': case '(': case ')': buf[i++] = *s++; continue; default: whine(s); goto fail; } case '"': whine(s); goto success; default: buf[i++] = *s++; } } fail: free(ex); return 0;

success: *e = s + 1; ex->type = S_SYMBOL; ex->buf = strdup(buf); ex->len = strlen(buf); return ex; }

void append(expr list, expr ele) { list->buf = realloc(list->buf, sizeof(expr) * ++list->len); ((expr*)(list->buf))[list->len - 1] = ele; }

expr parse_list(char *s, char **e) { expr ex = calloc(sizeof(s_expr), 1), chld; char *next;

ex->len = 0;

while (*s) { if (isspace(*s)) { s++; continue; }

switch (*s) { case '"': chld = parse_string(s+1, &next); if (!chld) goto fail; append(ex, chld); s = next; continue; case '(': chld = parse_list(s+1, &next); if (!chld) goto fail; append(ex, chld); s = next; continue; case ')': goto success;

default: chld = parse_symbol(s, &next); if (!chld) goto fail; append(ex, chld); s = next; continue; } }

fail: whine(s); free(ex); return 0;

success: *e = s+1; ex->type = S_LIST; return ex; }

expr parse_term(char *s, char **e) { while (*s) { if (isspace(*s)) { s++; continue; } switch(*s) { case '(': return parse_list(s+1, e); case '"': return parse_string(s+1, e); default: return parse_symbol(s+1, e); } } return 0; }

void print_expr(expr e, int depth) {

  1. define sep() for(i = 0; i < depth; i++) printf(" ")

int i; if (!e) return;


switch(e->type) { case S_LIST: sep(); puts("("); for (i = 0; i < e->len; i++) print_expr(((expr*)e->buf)[i], depth + 1); sep(); puts(")"); return; case S_SYMBOL: case S_STRING: sep(); if (e->type == S_STRING) putchar('"'); for (i = 0; i < e->len; i++) { switch(((char*)e->buf)[i]) { case '"': case '\\': putchar('\\'); break; case ')': case '(': if (e->type == S_SYMBOL) putchar('\\'); }

putchar(((char*)e->buf)[i]); } if (e->type == S_STRING) putchar('"'); putchar('\n'); return; } }

int main() { char *next, *in = "((data da\\(\\)ta \"quot\\\\ed data\" 123 4.5)\n" " (\"data\" (!@# (4.5) \"(mo\\\"re\" \"data)\")))";

expr x = parse_term(in, &next);

printf("input is:\n%s\n", in); printf("parsed as:\n"); print_expr(x, 0); return 0; }</lang>output<lang>input is: ((data da\(\)ta "quot\\ed data" 123 4.5)

("data" (!@# (4.5) "(mo\"re" "data)")))

parsed as: (

   (
       data
       da\(\)ta
       "quot\\ed data"
       123
       4.5
   )
   (
       "data"
       (
           !@#
           (
               4.5
           )
           "(mo\"re"
           "data)"
       )
   )

)</lang>

Common Lisp

This example is in need of improvement:

Please demonstrate how to write a parser in lisp. see Talk:S-Expressions#lisp_solutions.

Seeing as Lisp syntax is made up of s-expressions, it has built-in facilities for parsing them. The read function parses an s-expression from an input stream. <lang lisp>(setq input "((data \"quoted data\" 123 4.5)

(data (!@# (4.5) \"(more\" \"data)\")))")

(setq data (read (make-string-input-stream input))) (setq output (format nil "~S" data)) (print input) (print data) (print output)</lang> Output:

"((data \"quoted data\" 123 4.5)
 (data (!@# (4.5) \"(more\" \"data)\")))" 
((DATA "quoted data" 123 4.5) (DATA (|!@#| (4.5) "(more" "data)"))) 
"((DATA \"quoted data\" 123 4.5) (DATA (|!@#| (4.5) \"(more\" \"data)\")))"

Icon and Unicon

The following should suffice as a demonstration. String escaping and quoting could be handled more robustly. The example takes single and double qoutes. Single quotes were used instead of doubles in the input. <lang Icon>link ximage

procedure main()

 in := "((data 'quoted data' 123 4.5) (data (!@# (4.5) '(more' 'data)')))"
 # in := map(in,"'","\"") # uncomment to put back double quotes if desired
 write("Input:  ",image(in))
 write("Structure: \n",ximage(S := string2sexp(in)))
 write("Output:  ",image(sexp2string(S)))

end

procedure sexp2string(S) #: return a string representing the s-expr

  s := ""
  every t := !S do {
     if type(t) == "list" then 
        s ||:= "(" || trim(sexp2string(t)) || ")"
     else 
        if upto('() \t\r\n',t) then 
           s ||:= "'" || t || "'" 
        else
           s ||:= t 
     s ||:= " "
     }
  return trim(s)

end

procedure string2sexp(s) #: return a s-expression nested list

  if s ? ( sexptokenize(T := []), pos(0) ) then 
     return sexpnest(T)
  else
     write("Malformed: ",s)

end

procedure sexpnest(T,L) #: transform s-expr token list to nested list

  /L := []
  while t := get(T) do           
     case t of {
        "("      :  {
                    put(L,[])
                    sexpnest(T,L[*L])
                    }
        ")"      :  return L
        default  :  put(L, numeric(t) | t)
     }
     return L

end

procedure sexptokenize(T) #: return list of tokens parsed from an s-expr string static sym initial sym := &letters++&digits++'~`!@#$%^&*_-+|;:.,<>[]{}'

  until pos(0) do 
     case &subject[&pos] of {
        " "   :  tab(many(' \t\r\n'))                     # consume whitespace
        "'"|"\""  : 
           (q := move(1)) & put(T,tab(find(q))) & move(1) # quotes
        "("   :  put(T,move(1)) & sexptokenize(T)         # open      
        ")"   :  put(T,move(1)) &return T                 # close
        default  : put(T, tab(many(sym)))                 # other symbols
        } 
  return T

end</lang>

ximage.icn formats arbitrary structures into printable strings

Output:

Input:  "((data 'quoted data' 123 4.5) (data (!@# (4.5) '(more' 'data)')))"
Structure:
L2 := list(1)
   L2[1] := L3 := list(2)
      L3[1] := L4 := list(4)
         L4[1] := "data"
         L4[2] := "quoted data"
         L4[3] := 123
         L4[4] := 4.5
      L3[2] := L5 := list(2)
         L5[1] := "data"
         L5[2] := L6 := list(4)
            L6[1] := "!@#"
            L6[2] := L7 := list(1)
               L7[1] := 4.5
            L6[3] := "(more"
            L6[4] := "data)"
Output:  "((data 'quoted data' 123 4.5) (data (!@# (4.5) '(more' 'data)')))"

J

Since J already has a way of expressing nested lists, this implementation is for illustration purposes only. No attempt is made to handle arrays which are not representable using sexpr syntax.

This implementation does not support escape characters. If escape characters were added, we would need additional support in the tokenizer (an extra character class, and in the state table an extra column and two extra rows, or almost double the number of state transitions: 35 instead of 20), and additional support in the data language (unfmt would need to strip out escape characters and fmt would need to insert escape characters -- so each of these routines would also perhaps double in size.) And that's a lot of bulk for serialize/deserialize mechanism which, by design, cannot represent frequently used data elements (such as matrices and gerunds).

<lang j>NB. character classes: 0: paren, 1: quote, 2: whitespace, 3: wordforming (default) chrMap=: '()';'"';' ',LF,TAB,CR

NB. state columns correspond to the above character classes NB. first digit chooses next state. NB. second digit is action 0: do nothing, 1: start word, 2: end word states=: 10 10#: ".;._2]0 :0

 11  21  00  31  NB. state 0: initial state
 12  22  02  32  NB. state 1: after () or after closing "
 40  10  40  40  NB. state 2: after opening "
 12  22  02  30  NB. state 3: after word forming character
 40  10  40  40  NB. state 4: between opening " and closing "

)

tokenize=: (0;states;<chrMap)&;:

rdSexpr=:3 :0 :.wrSexpr

 s=. r=.  [ 'L R'=. ;:'()'
 for_token. tokenize y do.
   select. token
     case. L do. r=.   [ s=. s,<r
     case. R do. s=. }:s [ r=. (_1{::s),<r
     case.   do. r=. r,token
   end.
 end.
 >{.r

)

wrSexpr=: ('(' , ;:^:_1 , ')'"_)^:L.L:1^:L. :.rdSexpr

fmt=: 3 :0 :.unfmt

 if. '"' e. {.y     do. }.,}: y  NB. quoted string
 elseif. 0=#$n=.".y do. n        NB. number or character
 elseif.            do. s:<y     NB. symbol
 end.

)

unfmt=: 3 :0 :.fmt

 select. 3!:0 y
   case. 1;4;8;16;128 do. ":!.20 y
   case. 2;131072     do.
     select. #$y
       case. 0 do. ',y,'
       case. 1 do. '"',y,'"'
     end.
   case. 64           do. (":y),'x'
   case. 65536        do. >s:inv y
 end.

)

readSexpr=: fmt L:0 @rdSexpr :.writeSexpr writeSexpr=: wrSexpr @(unfmt L:0) :.readSexpr</lang>


Example use:

<lang j> readSexpr '((data "quoted data" 123 4.5) (data (!@# (4.5) "(more" "data)")))' ┌───────────────────────────┬────────────────────────────────┐ │┌─────┬───────────┬───┬───┐│┌─────┬────────────────────────┐│ ││`data│quoted data│123│4.5│││`data│┌────┬─────┬─────┬─────┐││ │└─────┴───────────┴───┴───┘││ ││`!@#│┌───┐│(more│data)│││ │ ││ ││ ││4.5││ │ │││ │ ││ ││ │└───┘│ │ │││ │ ││ │└────┴─────┴─────┴─────┘││ │ │└─────┴────────────────────────┘│ └───────────────────────────┴────────────────────────────────┘

  writeSexpr readSexpr '((data "quoted data" 123 4.5) (data (!@# (4.5) "(more" "data)")))'

((data "quoted data" 123 4.5) (data (!@# (4.5) "(more" "data)")))</lang>

OCaml

The file SExpr.mli containing the interface:

<lang ocaml>(** This module is a very simple parsing library for S-expressions. *) (* Copyright (C) 2009 Florent Monnier, released under MIT license. *)

type sexpr = Atom of string | Expr of sexpr list (** the type of S-expressions *)

val parse_string : string -> sexpr list (** parse from a string *)

val parse_ic : in_channel -> sexpr list (** parse from an input channel *)

val parse_file : string -> sexpr list (** parse from a file *)

val parse : (unit -> char option) -> sexpr list (** parse from a custom function, [None] indicates the end of the flux *)

val print_sexpr : sexpr list -> unit (** a dump function for the type [sexpr] *)

val print_sexpr_indent : sexpr list -> unit (** same than [print_sexpr] but with indentation *)

val string_of_sexpr : sexpr list -> string (** convert an expression of type [sexpr] into a string *)

val string_of_sexpr_indent : sexpr list -> string (** same than [string_of_sexpr] but with indentation *)</lang>

The file SExpr.ml containing the implementation:

<lang ocaml>(** This module is a very simple parsing library for S-expressions. *) (* Copyright (C) 2009 Florent Monnier, released under MIT license. *) (* modified to match the task description *)

type sexpr = Atom of string | Expr of sexpr list

type state =

 | Parse_root of sexpr list
 | Parse_content of sexpr list
 | Parse_word of Buffer.t * sexpr list
 | Parse_string of bool * Buffer.t * sexpr list

let parse pop_char =

 let rec aux st =
   match pop_char() with
   | None ->
       begin match st with
       | Parse_root sl -> (List.rev sl)
       | Parse_content _
       | Parse_word _
       | Parse_string _ ->
           failwith "Parsing error: content not closed by parenthesis"
       end
   | Some c ->
       match c with
       | '(' ->
           begin match st with
           | Parse_root sl ->
               let this = aux(Parse_content []) in
               aux(Parse_root((Expr this)::sl))
           | Parse_content sl ->
               let this = aux(Parse_content []) in
               aux(Parse_content((Expr this)::sl))
           | Parse_word(w, sl) ->
               let this = aux(Parse_content []) in
               aux(Parse_content((Expr this)::Atom(Buffer.contents w)::sl))
           | Parse_string(_, s, sl) ->
               Buffer.add_char s c;
               aux(Parse_string(false, s, sl))
           end
       | ')' ->
           begin match st with
           | Parse_root sl ->
               failwith "Parsing error: closing parenthesis without openning"
           | Parse_content sl -> (List.rev sl)
           | Parse_word(w, sl) -> List.rev(Atom(Buffer.contents w)::sl)
           | Parse_string(_, s, sl) ->
               Buffer.add_char s c;
               aux(Parse_string(false, s, sl))
           end
       | ' ' | '\n' | '\r' | '\t' ->
           begin match st with
           | Parse_root sl -> aux(Parse_root sl)
           | Parse_content sl -> aux(Parse_content sl)
           | Parse_word(w, sl) -> aux(Parse_content(Atom(Buffer.contents w)::sl))
           | Parse_string(_, s, sl) ->
               Buffer.add_char s c;
               aux(Parse_string(false, s, sl))
           end
       | '"' ->
           (* '"' *)
           begin match st with
           | Parse_root _ -> failwith "Parse error: double quote at root level"
           | Parse_content sl ->
               let s = Buffer.create 74 in
               aux(Parse_string(false, s, sl))
           | Parse_word(w, sl) ->
               let s = Buffer.create 74 in
               aux(Parse_string(false, s, Atom(Buffer.contents w)::sl))
           | Parse_string(true, s, sl) ->
               Buffer.add_char s c;
               aux(Parse_string(false, s, sl))
           | Parse_string(false, s, sl) ->
               aux(Parse_content(Atom(Buffer.contents s)::sl))
           end
       | '\\' ->
           begin match st with
           | Parse_string(true, s, sl) ->
               Buffer.add_char s c;
               aux(Parse_string(false, s, sl))
           | Parse_string(false, s, sl) ->
               aux(Parse_string(true, s, sl))
           | _ ->
               failwith "Parsing error: escape character in wrong place"
           end
       | _ ->
           begin match st with
           | Parse_root _ ->
               failwith(Printf.sprintf "Parsing error: char '%c' at root level" c)
           | Parse_content sl ->
               let w = Buffer.create 16 in
               Buffer.add_char w c;
               aux(Parse_word(w, sl))
           | Parse_word(w, sl) ->
               Buffer.add_char w c;
               aux(Parse_word(w, sl))
           | Parse_string(_, s, sl) ->
               Buffer.add_char s c;
               aux(Parse_string(false, s, sl))
           end
 in
 aux (Parse_root [])


let string_pop_char str =

 let len = String.length str in
 let i = ref(-1) in
 (function () -> incr i; if !i >= len then None else Some(str.[!i]))


let parse_string str =

 parse (string_pop_char str)


let ic_pop_char ic =

 (function () ->
    try Some(input_char ic)
    with End_of_file -> (None))


let parse_ic ic =

 parse (ic_pop_char ic)


let parse_file filename =

 let ic = open_in filename in
 let res = parse_ic ic in
 close_in ic;
 (res)


let contains s ch =

 let len = String.length s in
 let rec aux i =
   if i >= len then false
   else if s.[i] = ch then true
   else aux (succ i)
 in
 aux 0

let contains_whitespace s =

 List.exists (contains s) [' '; '\n'; '\r'; '\t'; '('; ')']

let quote s =

 "\"" ^ s ^ "\""

let protect s =

 let s = String.escaped s in
 if contains_whitespace s then quote s else s


let string_of_sexpr s =

 let rec aux acc = function
 | (Atom tag)::tl -> aux ((protect tag)::acc) tl
 | (Expr e)::tl ->
     let s =
       "(" ^
       (String.concat " " (aux [] e))
       ^ ")"
     in
     aux (s::acc) tl
 | [] -> (List.rev acc)
 in
 String.concat " " (aux [] s)


let print_sexpr s =

 print_endline (string_of_sexpr s)


let string_of_sexpr_indent s =

 let rec aux i acc = function
 | (Atom tag)::tl -> aux i ((protect tag)::acc) tl
 | (Expr e)::tl ->
     let s =
       "\n" ^ (String.make i ' ') ^ "(" ^
       (String.concat " " (aux (succ i) [] e))
       ^ ")"
     in
     aux i (s::acc) tl
 | [] -> (List.rev acc)
 in
 String.concat "\n" (aux 0 [] s)


let print_sexpr_indent s =

 print_endline (string_of_sexpr_indent s)</lang>

Then we compile this small module and test it in the interactive loop:

$ ocamlc -c SExpr.mli
$ ocamlc -c SExpr.ml
$ ocaml SExpr.cmo 
        Objective Caml version 3.11.2

# open SExpr ;;

# let s = read_line () ;;
((data "quoted data" 123 4.5) (data (!@# (4.5) "(more" "data)")))
val s : string =
  "((data \"quoted data\" 123 4.5) (data (!@# (4.5) \"(more\" \"data)\")))"

# let se = SExpr.parse_string s ;;
val se : SExpr.sexpr list =
  [Expr
    [Expr [Atom "data"; Atom "quoted data"; Atom "123"; Atom "4.5"];
     Expr
      [Atom "data";
       Expr [Atom "!@#"; Expr [Atom "4.5"]; Atom "(more"; Atom "data)"]]]]

# SExpr.print_sexpr se ;;
((data "quoted data" 123 4.5) (data (!@# (4.5) "(more" "data)")))
- : unit = ()

# SExpr.print_sexpr_indent se ;;

(
 (data "quoted data" 123 4.5) 
 (data 
  (!@# 
   (4.5) "(more" "data)")))
- : unit = ()

PicoLisp

This example is in need of improvement:

Please demonstrate how to write a parser in lisp. see Talk:S-Expressions#lisp_solutions.

The 'any' function parses an s-expression from a string (indentical to the way 'read' does this from an input stream). <lang PicoLisp>: (any "((data \"quoted data\" 123 4.5) (data (!@# (4.5) \"(more\" \"data)\")))") -> ((data "quoted data" 123 5) (data (!@# (5) "(more" "data)")))

(view @)

+---+-- data | | | +-- "quoted data" | | | +-- 123 | | | +-- 5 | +---+-- data

   |
   +---+-- !@#
       |
       +---+-- 5
       |
       +-- "(more"
       |
       +-- "data)"</lang>

'sym' does the reverse (i.e. builds a symbol (string) from an expression). <lang PicoLisp>: (sym @@) -> "((data \"quoted data\" 123 5) (data (!@# (5) \"(more\" \"data)\")))"</lang>

Pike

<lang pike>class Symbol(string name) {

   string _sprintf(int type)
   { 
       switch(type)
       { 
           case 's': return name; 
           case 'O': return sprintf("(Symbol: %s)", name||"");
           case 'q': return name; 
           case 't': return "Symbol";
           default:  return sprintf("%"+int2char(type), name);
       } 
   }
   mixed cast(string type)
   { 
       switch(type)
       { 
           case "string": return name;
           default: throw(sprintf("can not cast 'Symbol' to '%s'", type)); 
       }  
   }

}

mixed value(string token) {

   if ((string)(int)token==token)
       return (int)token;
   array result = array_sscanf(token, "%f%s");
   if (sizeof(result) && floatp(result[0]) && ! sizeof(result[1]))
       return result[0];
   else
       return Symbol(token);

}

array tokenizer(string input) {

   array output = ({}); 
   for(int i=0; i<sizeof(input); i++)
   { 
       switch(input[i])
       { 
           case '(': output+= ({"("}); break; 
           case ')': output += ({")"}); break; 
           case '"': //"
                     output+=array_sscanf(input[++i..], "%s\"%[ \t\n]")[0..0]; 
                     i+=sizeof(output[-1]); 
                     break; 
           case ' ': 
           case '\t': 
           case '\n': break; 
           default: string token = array_sscanf(input[i..], "%s%[) \t\n]")[0]; 
                    output+=({ value(token) }); 
                    i+=sizeof(token)-1; 
                    break; 
       }
   }
   return output;

}

// this function is based on the logic in Parser.C.group() in the pike library; array group(array tokens) {

   ADT.Stack stack=ADT.Stack();
   array ret =({});
   foreach(tokens;; string token)
   {
       switch(token)
       {
           case "(": stack->push(ret); ret=({}); break;
           case ")":
                   if (!sizeof(ret) || !stack->ptr) 
                   {
                     // Mismatch
                       werror ("unmatched close parenthesis\n");
                       return ret;
                   }
                   ret=stack->pop()+({ ret }); 
                   break;
           default: ret+=({token}); break;
       }
   }
   return ret;

}

string sexp(array input) {

   array output = ({});
   foreach(input;; mixed item)
   {
       if (arrayp(item))
           output += ({ sexp(item) });
       else if (intp(item))
           output += ({ sprintf("%d", item) });
       else if (floatp(item))
           output += ({ sprintf("%f", item) });
       else
           output += ({ sprintf("%q", item) });
   }
   return "("+output*" "+")";

}

string input = "((data \"quoted data\" 123 4.5)\n (data (!@# (4.5) \"(more\" \"data)\")))"; array data = group(tokenizer(input))[0]; string output = sexp(data);</lang>

Output:

((data "quoted data" 123 4.5)
 (data (!@# (4.5) "(more" "data)")))
({ ({ (Symbol: data), "quoted data", 123, 4.5 }), ({ (Symbol: data), ({ (Symbol: !@#), ({ 4.5 }), "(more", "data)" }) }) })
((data "quoted data" 123 4.5) (data (!@# (45) "(more" "data)")))

Python

<lang python>import re

dbg = False

term_regex = r(?mx)

   \s*(?:
       (?P<brackl>\()|
       (?P<brackr>\))|
       (?P<num>\d+\.\d+|\d+)\b|
       (?P<sq>"[^"]*")|
       (?P\S+)\b
      )

def parse_sexp(sexp):

   stack = []
   out = []
   if dbg: print("%-6s %-14s %-44s %-s" % tuple("term value out stack".split()))
   for termtypes in re.finditer(term_regex, sexp):
       term, value = [(t,v) for t,v in termtypes.groupdict().items() if v][0]
       if dbg: print("%-7s %-14s %-44r %-r" % (term, value, out, stack))
       if   term == 'brackl':
           stack.append(out)
           out = []
       elif term == 'brackr':
           assert stack, "Trouble with nesting of brackets"
           tmpout, out = out, stack.pop(-1)
           out.append(tmpout)
       elif term == 'num':
           v = float(value)
           if v.is_integer(): v = int(v)
           out.append(v)
       elif term == 'sq':
           out.append(value[1:-1])
       elif term == 's':
           out.append(value)
       else:
           raise NotImplementedError("Error: %r" % (term, value))
   assert not stack, "Trouble with nesting of brackets"
   return out[0]

def print_sexp(exp):

   out = 
   if type(exp) == type([]):
       out += '(' + ' '.join(print_sexp(x) for x in exp) + ')'
   elif type(exp) == type() and re.search(r'[\s()]', exp):
       out += '"%s"' % repr(exp)[1:-1].replace('"', '\"')
   else:
       out += '%s' % exp
   return out
       
   

if __name__ == '__main__':

   sexp =  ( ( data "quoted data" 123 4.5)
        (data (123 (4.5) "(more" "data)")))
   print('Input S-expression: %r' % (sexp, ))
   parsed = parse_sexp(sexp)
   print("\nParsed to Python:", parsed)
   print("\nThen back to: '%s'" % print_sexp(parsed))</lang>
Output
Input S-expression: '((data "quoted data" 123 4.5)\n         (data (123 (4.5) "(more" "data)")))'

Parsed to Python: [['data', 'quoted data', 123, 4.5], ['data', [123, [4.5], '(more', 'data)']]]

Then back to: '((data "quoted data" 123 4.5) (data (123 (4.5) "(more" "data)")))'

Ruby

Works with: Ruby version 1.9

<lang ruby>class SExpr

 def initialize(str)
   @original = str
   @data = parse_sexpr(str)
 end
 attr_reader :data, :original
 def to_sexpr
   @data.to_sexpr
 end
 private
 def parse_sexpr(str)
   state = :token_start
   tokens = []
   word = ""
   str.each_char do |char|
     case state
     when :token_start
       case char
       when "(" 
         tokens << :lbr
       when ")" 
         tokens << :rbr
       when /\s/
         # do nothing, just consume the whitespace
       when  '"'
         state = :read_quoted_string
         word = ""
       else
         state = :read_string_or_number
         word = char
       end
     when :read_quoted_string
       case char
       when '"'
         tokens << word
         state = :token_start
       else
         word << char
       end
     when :read_string_or_number
       case char
       when /\s/
         tokens << symbol_or_number(word)
         state = :token_start
       when ')'
         tokens << symbol_or_number(word)
         tokens << :rbr
         state = :token_start
       else
         word << char
       end
     end
   end
   sexpr_tokens_to_array(tokens)
 end
 def symbol_or_number(word)
   begin
     Integer(word)
   rescue ArgumentError
     begin 
       Float(word)
     rescue ArgumentError
       word.to_sym
     end
   end
 end
 def sexpr_tokens_to_array(tokens, idx = 0)
   result = []
   while idx < tokens.length
     case tokens[idx]
     when :lbr
       tmp, idx = sexpr_tokens_to_array(tokens, idx + 1)
       result << tmp
     when :rbr
       return [result, idx]
     else 
       result << tokens[idx]
     end
     idx += 1
   end
   result[0]
 end

end

class Object

 def to_sexpr
   self
 end

end

class String

 def to_sexpr
   self.match(/[\s()]/) ? self.inspect : self
 end

end

class Symbol

 alias :to_sexpr :to_s

end

class Array

 def to_sexpr
   "(%s)" % inject([]) {|a, elem| a << elem.to_sexpr}.join(" ")
 end

end


sexpr = SExpr.new <<END ((data "quoted data" 123 4.5)

(data (!@# (4.5) "(more" "data)")))

END

puts "original sexpr:\n#{sexpr.original}" puts "\nruby data structure:\n#{sexpr.data.inspect}" puts "\nand back to S-Expr:\n#{sexpr.to_sexpr}"</lang>

outputs

original sexpr:
((data "quoted data" 123 4.5)
 (data (!@# (4.5) "(more" "data)")))

ruby data structure:
[[:data, "quoted data", 123, 4.5], [:data, [:"!@#", [4.5], "(more", "data)"]]]

and back to S-Expr:
((data "quoted data" 123 4.5) (data (!@# (4.5) "(more" "data)")))

Scheme

This example is in need of improvement:

Please demonstrate how to write a parser in lisp. see Talk:S-Expressions#lisp_solutions.

Like Common Lisp, R5RS Scheme has a read function parses an s-expression from an input stream.

(uses SRFI 6, Basic String Ports) <lang scheme>(define input "((data \"quoted data\" 123 4.5)

(data (!@# (4.5) \"(more\" \"data)\")))")

(define data (read (open-input-string input))) (define output (let ((out (open-output-string)))

                 (write data out)
                 (get-output-string out)))

(write input) (newline) (write data) (newline) (write output) (newline)</lang> Output:

"((data \"quoted data\" 123 4.5)\n (data (!@# (4.5) \"(more\" \"data)\")))"
((data "quoted data" 123 4.5) (data (!@# (4.5) "(more" "data)")))
"((data \"quoted data\" 123 4.5) (data (!@# (4.5) \"(more\" \"data)\")))"