1;;; -*- mode:scheme; coding:utf-8; -*-
2;;;
3;;; text/sql/simplifier - Simplify SQL
4;;;
5;;;   Copyright (c) 2015  Takashi Kato  <ktakashi@ymail.com>
6;;;
7;;;   Redistribution and use in source and binary forms, with or without
8;;;   modification, are permitted provided that the following conditions
9;;;   are met:
10;;;
11;;;   1. Redistributions of source code must retain the above copyright
12;;;      notice, this list of conditions and the following disclaimer.
13;;;
14;;;   2. Redistributions in binary form must reproduce the above copyright
15;;;      notice, this list of conditions and the following disclaimer in the
16;;;      documentation and/or other materials provided with the distribution.
17;;;
18;;;   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19;;;   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20;;;   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21;;;   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22;;;   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23;;;   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
24;;;   TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25;;;   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26;;;   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27;;;   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28;;;   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29;;;
30
31(library (text sql simplifier)
32    (export simplify-ssql)
33    (import (rnrs)
34	    (srfi :1 lists) ;; for reverse!
35	    (srfi :13 strings)
36	    (srfi :14 char-sets)
37	    (match))
38
39;; what we do here is basically 2 things (may increase later)
40;;  - concatenate identifier with dot instead of (~ ...) form
41;;  - unescape unicode character and normal character (like condision)
42(define (simplify-ssql ssql)
43  (cond ((pair? ssql)
44	 (case (car ssql)
45	   ((~) (simplify-identifier ssql))
46	   ((unicode)
47	    (let ((r (unescape-unicode ssql)))
48	      (if (and (pair? r) (eq? (car r) '!))
49		  (simplify-identifier (list '~ r))
50		  r)))
51	   ((escape) (unescape-string ssql))
52	   (else => (lambda (e) (cons (simplify-ssql e)
53				      (simplify-ssql (cdr ssql)))))))
54	;; we don't care this
55	(else ssql)))
56
57(define (simplify-identifier ssql)
58  (define (handle-identifier ssql)
59    (define (has-dot? s) (string-any #\. s))
60    (cond ((symbol? ssql) (symbol->string ssql))
61	  ((pair? ssql)
62	   (case (car ssql)
63	     ((!)
64	      (let ((s (cadr ssql)))
65		;; if the value contains '.' then we can't simply
66		;; make it symbol so keep it delimited
67		(if (has-dot? s) ssql s)))
68	     ((unicode) (handle-identifier (unescape-unicode ssql)))
69	     ;; we can't simplify method invocation or some weird case
70	     ;; e.g. 'select (select *).*' or so
71	     (else ssql)))
72	  ;; most likely invalid case but this process shouldn't
73	  ;; raise an error
74	  (else ssql)))
75  (define (concat ids) (string->symbol (string-join (reverse! ids) ".")))
76  (define (check-identifier id acc r)
77    (if (string? id)
78	(values (cons id acc) r)
79	(values '() (if (null? acc) (cons id r) (cons* id (concat acc) r)))))
80  (let loop ((converted (map handle-identifier (cdr ssql)))
81	     (acc '()) ;; list of strings
82	     (r '()))
83    (if (null? converted)
84	(if (null? r)
85	     (concat acc)
86	     (let ((i `(,@(reverse! r)
87			,@(if (null? acc) '() (list (concat acc))))))
88	       ;; (~ id) -> id
89	       (if (null? (cdr i))
90		   (car i)
91		   (cons '~ i))))
92	(let-values (((nacc nr) (check-identifier (car converted) acc r)))
93	  (loop (cdr converted) nacc nr)))))
94
95(define hexit (string->char-set "1234567890abcdefABCDEF"))
96
97(define (unescape-unicode ssql)
98  ;; unescaping may raise an error because of invalid format of
99  ;; either unicode escape or escape character
100  (define (handle-escape in e)
101    (define (read-it in n)
102      (let loop ((i 0) (r '()))
103	(if (= i n)
104	    (reverse! r)
105	    (let ((ch (get-char in)))
106	      (cond ((eof-object? ch)
107		     (error
108		      'simplify-ssql
109		      "unexpected EOF during reading unicode escape character"
110		      ssql))
111		    ((char-set-contains? hexit ch)
112		     (loop (+ i 1) (cons ch r)))
113		    (else
114		     (assertion-violation 'simplify-ssql "invalid hexit value"
115					  ch ssql)))))))
116    (define (convert chars)
117      ;; TODO make it better
118      (integer->char (string->number (list->string chars) 16)))
119    (let ((nc (get-char in)))
120      (cond ((eof-object? nc)
121	     (error 'simplify-ssql
122		    "unexpected EOF during reading unicode escape character"
123		    ssql))
124	    ((char=? nc e) e) ;; escaped escape
125	    ((char=? nc #\+) (convert (read-it in 6)))
126	    (else (convert (cons nc (read-it in 3)))))))
127
128  (define (unescape str escape)
129    (unless (= (string-length escape) 1)
130      (assertion-violation 'simplify-ssql
131       "unicode escape character must be one length string" escape ssql))
132    (let-values (((out extract) (open-string-output-port))
133		 ((in)          (open-string-input-port str))
134		 ((e)           (string-ref escape 0)))
135      (when (or (char=? e #\+)
136		(char-set-contains? hexit e) (char-whitespace? e))
137	(assertion-violation 'simplify-ssql
138			     "invalid uescape character" e ssql))
139      (let loop ()
140	(let ((ch (get-char in)))
141	  (cond ((eof-object? ch) (extract))
142		((and (char=? ch #\\) (not (char=? e #\\)))
143		 ;; we need to escape this one with '\'
144		 (put-string out "\\\\")
145		 (loop))
146		((char=? ch e) (put-char out (handle-escape in e)) (loop))
147		(else (put-char out ch) (loop)))))))
148  (match ssql
149    (('unicode ('! id) 'uescape s) (list '! (unescape id s)))
150    (('unicode ('! id))            (list '! (unescape id "\\")))
151    (('unicode s 'uescape e)       (unescape s e))
152    (('unicode s)                  (unescape s "\\"))
153    ;; sorry but we can't handle this one
154    (else ssql)))
155
156(define (unescape-string ssql)
157  (define (unescape str escape)
158    (unless (= (string-length escape) 1)
159      (assertion-violation 'simplify-ssql
160       "escape character must be one length string" escape ssql))
161    (let-values (((out extract) (open-string-output-port))
162		 ((in)          (open-string-input-port str))
163		 ((e)           (string-ref escape 0)))
164      (let loop ()
165	(let ((ch (get-char in)))
166	  (cond ((eof-object? ch) (extract))
167		;; if user specifies '%' as escape. very unlikely but
168		;; may happen
169		((and (char=? ch #\\) (not (char=? e #\\)))
170		 (put-string out "\\\\") (loop))
171		((char=? ch e)
172		 (let ((nc (get-char in)))
173		   (cond ((eof-object? nc) (extract)) ;; how should we handle?
174			 (else
175			  (put-char out #\\)
176			  (put-char out nc) (loop)))))
177		(else (put-char out ch) (loop)))))))
178  (match ssql
179    (('escape (? string? s) (? string? e))
180     (unescape s e))
181    ;; incorrect but intensional?
182    (else ssql)))
183
184)
185