Coverage report: /development/source/library/org/datagraph/spocq-shard/src/core/encoding/terms/xml-shard-term.lisp
| Kind | Covered | All | % |
| expression | 500 | 667 | 75.0 |
| branch | 27 | 40 | 67.5 |
Key
Not instrumented
Conditionalized out
Executed
Not executed
Both branches taken
One branch taken
Neither branch taken
1
;;; -*- Mode: lisp; Syntax: ansi-common-lisp; Base: 10; Package: org.datagraph.spocq.implementation; -*-
2
;;; (load "patches/xml-shard-term.lisp")
4
;;; encode xml terms using direct access to persistent term definitions
6
(in-package :org.datagraph.spocq.implementation)
8
;;; results+xml serializer
9
;;; http://www.w3.org/TR/rdf-sparql-XMLres/
11
;;; replace the xml field encoding operator with one which retrieves terms directly from lmdb.
13
(defmethod write-sparql-results+xml :around ((results solution-generator) (stream t))
14
(rlmdb::with-string-database (sdb)
17
(defmethod write-rdf+xml :around ((results solution-generator) (stream t))
18
(rlmdb::with-string-database (sdb)
21
(fmakunbound 'write-sparql-results-field+xml)
23
(defun write-sparql-results-field+xml (page variables stream &optional (index 0) (start 0) end)
24
(dotimes (page-index (array-dimension page 0))
25
(when (>= index start)
26
(when (and end (>= index end))
28
(write-string " <result>" stream)
29
(loop for value-index from 0
31
for term-id = (aref page page-index value-index)
32
unless (= term-id +null-term-id+)
34
(format stream " <binding name='~a'>" name)
35
(encode-xml-term-number term-id stream)
36
(write-string "</binding>" stream)))
37
(write-string " </result>" stream))
41
(fmakunbound 'write-sparql-results-field-rdf+xml)
43
(defun write-sparql-results-field-rdf+xml (page stream &optional (index 0) (start 0) end)
44
(dotimes (page-index (array-dimension page 0))
45
(when (>= index start)
46
(when (and end (>= index end))
48
(let ((subject-term-id (aref page page-index 0))
49
(predicate-term-id (aref page page-index 1))
50
(object-term-id (aref page page-index 2)))
51
(unless (or (= subject-term-id +null-term-id+)
52
(= predicate-term-id +null-term-id+)
53
(= object-term-id +null-term-id+))
54
(write-string " " stream)
55
(encode-rdf-xml-description-subject-term-id subject-term-id stream)
56
(encode-rdf-xml-description-predicate-and-object-term-ids predicate-term-id object-term-id stream)
57
(format stream "</rdf:Description>~%"))
61
(defmethod rdf+xml-iri-parts ((iri-term-number integer))
62
"given a term id, retrieve and deconstruct the lexical form string"
63
(let* ((%term (rlmdb:shard-term-fetch iri-term-number))
64
(term-type (rlmdb:%shard-term-type %term))
65
(%term-data (rlmdb:%shard-term-data %term)))
66
(declare (type cffi:foreign-pointer %term)
67
(type symbol term-type)
68
(type cffi:foreign-pointer %term-data))
71
(let ((lexical-form (rlmdb:shard-string-fetch (rlmdb:%shard-term-data-string-value-offset %term-data))))
72
(rdf+xml-iri-parts lexical-form))))))
74
(defun encode-rdf-xml-description-subject-term-id (subject-term-number stream)
75
"Encode the complete description element start tag with an about or a node id attribute respective
76
the subject term type"
78
(declare (type fixnum subject-term-number))
79
(format stream " <rdf:Description ")
80
(let* ((%term (rlmdb:shard-term-fetch subject-term-number))
81
(term-type (rlmdb:%shard-term-type %term))
82
(%term-data (rlmdb:%shard-term-data %term)))
83
(declare (type cffi:foreign-pointer %term)
84
(type symbol term-type)
85
(type cffi:foreign-pointer %term-data))
89
(ecase (rlmdb:%shard-term-subtype-node-subtype %term)
91
(format stream "rdf:nodeID='~@[~a_~]genid~d'"
93
(rlmdb:%shard-term-data-node-genid %term-data)))
95
(format stream "rdf:nodeID='~@[~a_~]~V/%format-xml-iri-string/~d'"
97
;; need a byte-count limited version of stream-write-turtle-external-utf8-string
98
4 (rlmdb:%shard-term-data-node-gensym-prefix %term-data)
99
(rlmdb:%shard-term-data-node-gensym-suffix %term-data)))
101
(format stream "rdf:nodeID='~@[~a_~]~V/%format-xml-iri-string/'"
103
8 (rlmdb:%shard-term-data-node-label %term-data)))
105
(format stream "rdf:nodeID='~@[~a_~]~/format-xml-iri-string-id/'"
107
(rlmdb:%shard-term-data-node-label-offset %term-data)))))
109
(format stream "rdf:about='~/format-xml-iri-string-id/'"
110
(rlmdb:%shard-term-data-uri-string-offset %term-data)))
112
(error "RDF encoding error: invalid subject term id type: ~s: ~s." subject-term-number term-type))))
113
(write-char #\> stream))
115
(defun encode-rdf-xml-description-predicate-and-object-term-ids (predicate-term-number object-term-number stream)
116
"Encode the predicate as the rdf element start with a generic identifier and namespace binding.
117
Leave the start tag incomplete in order that the object can be encoded as additional attributes
119
Delegate the object envoding to encode-rdf-xml-object.
120
Finally, close the predicate element."
122
(multiple-value-bind (prefix namespace-name local-part)
123
(rdf+xml-iri-parts predicate-term-number)
124
(format stream "<~a:~a xmlns:~a='~a' " ;; leave it open for the object to complete
125
prefix local-part prefix namespace-name)
126
(let* ((%term (rlmdb:shard-term-fetch object-term-number))
127
(term-type (rlmdb:%shard-term-type %term))
128
(%term-data (rlmdb:%shard-term-data %term)))
129
(declare (type cffi:foreign-pointer %term)
130
(type symbol term-type)
131
(type cffi:foreign-pointer %term-data))
135
(ecase (rlmdb:%shard-term-subtype-node-subtype %term)
137
(format stream "rdf:nodeID='~@[~a_~]genid~d'/>"
139
(rlmdb:%shard-term-data-node-genid %term-data)))
141
(format stream "rdf:nodeID='~@[~a_~]~V/%format-xml-iri-string/~d'/>"
143
;; need a byte-count limited version of stream-write-turtle-external-utf8-string
144
4 (rlmdb:%shard-term-data-node-gensym-prefix %term-data)
145
(rlmdb:%shard-term-data-node-gensym-suffix %term-data)))
147
(format stream "rdf:nodeID='~@[~a_~]~V/%format-xml-iri-string/'/>"
149
8 (rlmdb:%shard-term-data-node-label %term-data)))
151
(format stream "rdf:nodeID='~@[~a_~]~/format-xml-iri-string-id/'/>"
153
(rlmdb:%shard-term-data-node-label-offset %term-data)))))
155
(format stream "rdf:resource='~/format-xml-iri-string-id/'/>"
156
(rlmdb:%shard-term-data-uri-string-offset %term-data)))
158
(ecase (rlmdb:%shard-term-subtype-string-subtype %term)
160
(format stream ">~V/%format-xml-character-data/</~a:~a>"
161
8 %term-data prefix local-part))
163
(let* ((string-id (rlmdb:%shard-term-data-string-value-offset %term-data))
164
(language-id (rlmdb:%shard-term-data-string-language-offset %term-data)))
165
(if (plusp language-id)
166
(format stream "xml:lang='~/format-xml-character-data-id/'>~/format-xml-character-data-id/</~a:~a>"
167
language-id string-id prefix local-part)
168
(format stream ">~/format-xml-character-data-id/</~a:~a>"
169
string-id prefix local-part))))))
172
(let ((lexical-form-id (rlmdb:%shard-term-data-literal-value-offset %term-data))
173
(datatype-string-id (rlmdb:%shard-term-data-literal-datatype-offset %term-data)))
174
(if (is-string-datatype-string-id datatype-string-id)
175
(format stream ">~/format-xml-character-data-id/</~a:~a>"
178
(format stream "rdf:datatype='~/format-xml-iri-string-id/'>~/format-xml-character-data-id/</~a:~a>"
179
datatype-string-id lexical-form-id
180
prefix local-part))))
182
(format stream "rdf:datatype='http://www.w3.org/2001/XMLSchema#boolean'>~:[false~;true~]</~a:~a>"
183
(rlmdb:%shard-term-data-boolean %term-data)
186
(let ((value (rlmdb:%shard-term-data-integer %term-data)))
187
(format stream "rdf:datatype='~a'>~a</~a:~a>"
188
(if *encode-object-subtypes*
190
((signed-byte 16) "http://www.w3.org/2001/XMLSchema#short")
191
(t "http://www.w3.org/2001/XMLSchema#integer"))
192
"http://www.w3.org/2001/XMLSchema#integer")
196
(ecase (rlmdb:%shard-term-subtype-decimal-subtype %term)
198
(let* ((options (rlmdb:%shard-term-options %term))
199
(minusp (zerop (logand options #x01)))
200
(scale (ash options -1))
201
(value (rlmdb:%shard-term-data-integer %term-data)))
202
(when minusp (setf value (- value)))
203
(setf scale (if (zerop value) 1 (expt 10 scale)))
205
#+sbcl (sb-kernel::%make-ratio value scale)
206
#-sbcl (/ value scale))
207
(let ((*read-default-float-format* 'single-float))
208
(format stream "rdf:datatype='http://www.w3.org/2001/XMLSchema#decimal'>~f</~a:~a>"
210
prefix local-part))))
212
(let* ((integer (rlmdb:%shard-term-data-decimal-integer %term-data))
213
(fraction (if (zerop integer) 1 (rlmdb:%shard-term-data-decimal-fraction %term-data))))
214
(format stream "rdf:datatype='http://www.w3.org/2001/XMLSchema#decimal'>~d.~d</~a:~a>"
216
prefix local-part)))))
218
(let ((value (rlmdb:%shard-term-data-float %term-data))
219
(*read-default-float-format* 'single-float))
220
(format stream "rdf:datatype='http://www.w3.org/2001/XMLSchema#float'>~f</~a:~a>"
224
(let ((value (rlmdb:%shard-term-data-double %term-data))
225
(*read-default-float-format* 'double-float))
226
(format stream "rdf:datatype='http://www.w3.org/2001/XMLSchema#double'>~f</~a:~a>"
231
(let ((value (timeline-location-date-time (rlmdb:%shard-term-data-time %term-data))))
232
(format stream "rdf:datatype='http://www.w3.org/2001/XMLSchema#dateTime'>~a</~a:~a>"
233
(term-lexical-form value)
236
(let ((value (timeline-location-date (rlmdb:%shard-term-data-time %term-data))))
237
(format stream "rdf:datatype='http://www.w3.org/2001/XMLSchema#date'>~a</~a:~a>"
238
(term-lexical-form value)
241
(let ((value (timeline-location-time (rlmdb:%shard-term-data-time %term-data))))
242
(format stream "rdf:datatype='http://www.w3.org/2001/XMLSchema#time'>~a</~a:~a>"
243
(term-lexical-form value)
244
prefix local-part)))))
249
(defun write-xml-character-data-char (char stream)
251
(#\< (write-string "<" stream))
252
(#\> (write-string ">" stream))
253
(#\& (write-string "&" stream))
254
(t (write-char char stream))))
256
(defun %write-xml-character-data (%string stream &optional (byte-count most-positive-fixnum))
257
(let ((dsu:*utf8-iso8859-allowed* t)
258
(dsu:*utf8-surrogates-allowed* t)
259
(decoder (load-time-value (dsu:content-encoding-byte-decoder (dsu:content-encoding :utf-8))))
261
(declare (type (function (function cffi:foreign-pointer) (or character null)) decoder)
262
(type cffi:foreign-pointer %string)
263
(type fixnum byte-count)
264
(optimize (speed 3) (safety 0)))
265
(multiple-value-bind (writer arg) (stream-writer stream)
266
(labels ((get-byte (%string)
267
(declare (type cffi:foreign-pointer %string))
268
(when (< i byte-count)
269
(let ((byte (cffi:mem-aref %string :uint8 i)))
271
(when (plusp byte) byte))))
274
(#\< (loop for char across "<" do (funcall writer arg char)))
275
(#\> (loop for char across ">" do (funcall writer arg char)))
276
(#\& (loop for char across "&" do (funcall writer arg char)))
277
((#\newline #\return #\tab) (funcall writer arg char))
279
;; do not constrain any other characters
280
(funcall writer arg char)))))
281
(loop for char = (funcall decoder #'get-byte %string)
283
do (put-char char))))))
285
(defun write-xml-character-data-id (string-id stream)
286
"Given an rdf db string id, retrieve the external data and write it
287
to the stream as xml character data."
288
(flet ((%write-string-data (k raw-value)
290
(let* ((size (rlmdb.i::%mdb-val-size raw-value))
291
(%data (rlmdb.i::%mdb-val-data raw-value)))
292
(%write-xml-character-data %data stream size)
294
(declare (dynamic-extent #'%write-string-data))
295
(rlmdb::call-with-shard-string string-id #'%write-string-data)))
297
(defun cl-user::format-xml-character-data-id (stream string-id &optional option arg)
298
(declare (ignore option arg))
299
(write-xml-character-data-id string-id stream))
301
(defun cl-user::%format-xml-character-data (stream %string &optional option arg byte-count)
302
(declare (ignore option arg))
303
(assert (integerp byte-count))
304
(%write-xml-character-data %string stream byte-count))
306
(defun write-xml-iri-char (char stream)
307
(labels ((encode-xml-char-entity-escape (char)
308
(format stream "&#~d;" (char-code char)))
309
(encode-urlencode-charcode-escape (code)
310
(format stream "%~2,'0x" code)))
312
((#\< #\> #\" #\' #\&)
313
(encode-xml-char-entity-escape char))
314
(t (let ((code (char-code char)))
315
(if (<= code #x1f) ; C0 control : http://en.wikipedia.org/wiki/Control_character
316
(encode-urlencode-charcode-escape code)
317
(write-char char stream)))))))
319
(defun %write-xml-iri-string (%string stream byte-count)
320
"Emit an external character data string to an utf-8 encoded stream with xml escaping.
321
This escapes the string terminator, whitespace-format, and control characters.
322
All else is passed to the stream for utf-8 encoding.
323
Perform lax unicode decoding"
324
(unless byte-count (setf byte-count most-positive-fixnum))
325
(let ((dsu:*utf8-iso8859-allowed* t)
326
(dsu:*utf8-surrogates-allowed* t)
327
(decoder (load-time-value (dsu:content-encoding-byte-decoder (dsu:content-encoding :utf-8))))
329
(declare (type (function (function cffi:foreign-pointer) (or character null)) decoder)
330
(type cffi:foreign-pointer %string)
331
(type fixnum byte-count)
332
(optimize (speed 3) (safety 0)))
333
(flet ((get-byte (%string)
334
(declare (type cffi:foreign-pointer %string))
335
(when (< i byte-count)
336
(let ((byte (cffi:mem-aref %string :uint8 i)))
338
(when (plusp byte) byte)))))
339
(loop for char = (funcall decoder #'get-byte %string)
341
do (write-xml-iri-char char stream)))))
343
(defun write-xml-iri-string-id (string-id stream)
344
(flet ((%write-maybe-null-terminated-string (k raw-value)
346
(let* ((size (rlmdb.i::%mdb-val-size raw-value))
347
(%data (rlmdb.i::%mdb-val-data raw-value))
348
(terminated-size (if (zerop (cffi:mem-aref %data :uint8 (1- size))) (1- size) size)))
349
(%write-xml-iri-string %data stream terminated-size)
351
(declare (dynamic-extent #'%write-maybe-null-terminated-string))
352
(rlmdb::call-with-shard-string string-id #'%write-maybe-null-terminated-string)))
355
(defun cl-user::format-xml-iri-string-id (stream string-id &optional option arg)
356
(declare (ignore option arg))
357
(write-xml-iri-string-id string-id stream))
359
(defun cl-user::%format-xml-iri-string (stream %string &optional option arg byte-count)
360
(declare (ignore option arg))
361
(assert (integerp byte-count))
362
(%write-xml-iri-string %string stream byte-count))
365
(defun encode-xml-term-number (term-number stream)
366
(declare (type fixnum term-number))
367
(let* ((%term (rlmdb:shard-term-fetch term-number))
368
(term-type (rlmdb:%shard-term-type %term))
369
(%term-data (rlmdb:%shard-term-data %term)))
370
(declare (type cffi:foreign-pointer %term)
371
(type symbol term-type)
372
(type cffi:foreign-pointer %term-data))
376
(ecase (rlmdb:%shard-term-subtype-node-subtype %term)
378
(format stream "<bnode>~@[~a_~]genid~d</bnode>"
380
(rlmdb:%shard-term-data-node-genid %term-data)))
382
(format stream "<bnode>~@[~a_~]~V/%format-xml-iri-string/~d</bnode>"
384
;; need a byte-count limited version of stream-write-turtle-external-utf8-string
385
4 (rlmdb:%shard-term-data-node-gensym-prefix %term-data)
386
(rlmdb:%shard-term-data-node-gensym-suffix %term-data)))
388
(format stream "<bnode>~@[~a_~]~V/%format-xml-iri-string/</bnode>"
390
8 (rlmdb:%shard-term-data-node-label %term-data)))
392
(format stream "<bnode>~@[~a_~]~/format-xml-iri-string-id/</bnode>"
394
(rlmdb:%shard-term-data-node-label-offset %term-data)))))
396
(format stream "<uri>~/format-xml-iri-string-id/</uri>"
397
(rlmdb:%shard-term-data-uri-string-offset %term-data)))
399
(ecase (rlmdb:%shard-term-subtype-string-subtype %term)
401
(format stream "<literal>~V/%format-xml-character-data/</literal>"
404
(let* ((string-id (rlmdb:%shard-term-data-string-value-offset %term-data))
405
(language-id (rlmdb:%shard-term-data-string-language-offset %term-data)))
406
(if (plusp language-id)
407
(format stream "<literal xml:lang='~/format-xml-character-data-id/'>~/format-xml-character-data-id/</literal>" language-id string-id)
408
(format stream "<literal>~/format-xml-character-data-id/</literal>" string-id))))))
410
(let ((lexical-form-id (rlmdb:%shard-term-data-literal-value-offset %term-data))
411
(datatype-string-id (rlmdb:%shard-term-data-literal-datatype-offset %term-data)))
412
(if (is-string-datatype-string-id datatype-string-id)
413
(format stream "<literal>~/format-xml-character-data-id/</literal>"
415
(format stream "<literal datatype='~/format-xml-iri-string-id/'>~/format-xml-character-data-id/</literal>"
416
datatype-string-id lexical-form-id))))
418
(format stream "<literal datatype='http://www.w3.org/2001/XMLSchema#boolean'>~:[false~;true~]</literal>"
419
(rlmdb:%shard-term-data-boolean %term-data)))
421
(encode-xml-object (rlmdb:%shard-term-data-integer %term-data) stream))
423
(ecase (rlmdb:%shard-term-subtype-decimal-subtype %term)
425
(let* ((options (rlmdb:%shard-term-options %term))
426
(minusp (zerop (logand options #x01)))
427
(scale (ash options -1))
428
(value (rlmdb:%shard-term-data-integer %term-data)))
429
(when minusp (setf value (- value)))
430
(setf scale (if (zerop value) 1 (expt 10 scale)))
432
#+sbcl (sb-kernel::%make-ratio value scale)
433
#-sbcl (/ value scale))
434
(encode-xml-object value stream)))
436
(let* ((integer (rlmdb:%shard-term-data-decimal-integer %term-data))
437
(fraction (if (zerop integer) 1 (rlmdb:%shard-term-data-decimal-fraction %term-data))))
438
(format stream "<literal datatype='http://www.w3.org/2001/XMLSchema#decimal'>~d.~d</literal>" integer fraction)))))
441
(encode-xml-object (rlmdb:%shard-term-data-float %term-data) stream))
443
(encode-xml-object (rlmdb:%shard-term-data-double %term-data) stream))
445
(encode-xml-object (timeline-location-date-time (rlmdb:%shard-term-data-time %term-data)) stream))
447
(encode-xml-object (timeline-location-date (rlmdb:%shard-term-data-time %term-data)) stream))
449
(encode-xml-object (timeline-location-time (rlmdb:%shard-term-data-time %term-data)) stream))