Coverage report: /development/source/library/org/datagraph/spocq-shard/src/core/encoding/terms/tsv-shard-term.lisp
| Kind | Covered | All | % |
| expression | 244 | 312 | 78.2 |
| branch | 17 | 24 | 70.8 |
Key
Not instrumented
Conditionalized out
Executed
Not executed
Both branches taken
One branch taken
Neither branch taken
1
;;; -*- Mode: lisp; Syntax: ansi-common-lisp; Base: 10; Package: org.datagraph.spocq.implementation; -*-
3
(in-package :org.datagraph.spocq.implementation)
5
;;; replace the tsv field encoding operator with one which retrieves terms directly from lmdb.
6
;;; term encoding delegates to turtle methods
8
;;; there is no tsv rfc.
9
;;; there is an iana definition description. in that fields are unexcaped and do not permit tabs
10
;;; https://www.iana.org/assignments/media-types/text/tab-separated-values
11
;;, the older encoding in src/encodinc/sparql-results-tsv.lisp is wrong. it uses turtle escapes
13
(remove-method #'write-sparql-results+tsv
14
(find-method #'write-sparql-results+tsv
16
(list (find-class 'solution-generator)
19
(defmethod write-sparql-results+tsv ((results solution-generator) (stream t))
20
(rlmdb::with-string-database (sdb)
21
(let* ((dimensions (solution-generator-dimensions results))
22
(channel (solution-generator-channel results))
23
(variable-count (length dimensions))
25
(start (or (response-offset) 0))
27
(*expand-literal-values* nil))
28
(write-tsv-dimensions dimensions stream)
30
(do-pages (page channel)
31
(if (and end (>= index end))
33
(if (>= (+ index (array-dimension page 0)) start)
34
(cond ((= variable-count (array-dimension page 1))
35
(dotimes (page-index (array-dimension page 0))
36
(when (>= index start)
37
(when (and end (>= index end))
39
(loop for value-index from 0 below variable-count
41
(unless (zerop value-index) (write-char #\tab stream))
42
(let ((term-number (aref page page-index value-index)))
43
(cond ((= term-number +null-term-id+)) ; skip it
45
(encode-tsv-term-number term-number stream))))))
49
(log-warn "field width mismatch: ~s : ~s."
50
dimensions (array-dimension page 1))
51
(incf index (array-dimension page 0))))
52
; otherwise skip the entire page
53
(incf index (array-dimension page 0)))))
54
(incf-stat *statements-returned* index))))
57
(defun write-tsv-char (char stream)
59
contrary to the iana document, sparql results stipulate limited escaping.
60
https://www.w3.org/TR/sparql11-results-csv-tsv/#tsv-terms"
62
(#\linefeed (write-string "\\n" stream))
63
(#\return (write-string "\\r" stream))
64
(#\tab (write-string "\\t" stream))
65
(t (write-char char stream)))
68
(defun %write-tsv-string (%string stream byte-count)
69
"Emit an external character data string to an utf-8 encoded stream with tsv escaping.
70
This escapes the string terminator, whitespace-format, and control characters.
71
All else is passed to the stream for utf-8 encoding."
72
(unless byte-count (setf byte-count most-positive-fixnum)) ;; presume null-terminated
73
(let ((dsu:*utf8-iso8859-allowed* t)
74
(dsu:*utf8-surrogates-allowed* t)
75
(decoder (load-time-value (dsu:content-encoding-byte-decoder (dsu:content-encoding :utf-8))))
77
(declare (type (function (function cffi:foreign-pointer) (or character null)) decoder)
78
(type cffi:foreign-pointer %string)
79
(type fixnum byte-count)
80
(optimize (speed 3) (safety 0)))
81
(flet ((get-byte (%string)
82
(declare (type cffi:foreign-pointer %string))
83
(when (< i byte-count)
84
(let ((byte (cffi:mem-aref %string :uint8 i)))
86
(when (plusp byte) byte)))))
87
(loop for char = (funcall decoder #'get-byte %string)
89
do (write-tsv-char char stream))
92
(defun write-tsv-string-id (string-id stream)
93
"Given an rdf db string id, retrieve the external data and write it
94
to the stream as turtle in utf8."
95
(flet ((%write-string-data (k raw-value)
97
(let* ((size (rlmdb.i::%mdb-val-size raw-value))
98
(%data (rlmdb.i::%mdb-val-data raw-value)))
99
(%write-tsv-string %data stream size))))
100
(declare (dynamic-extent #'%write-string-data))
101
(rlmdb::call-with-shard-string string-id #'%write-string-data)))
103
(defun cl-user::format-tsv-string-id (stream string-id &optional option arg)
104
(declare (ignore option arg))
105
(write-tsv-string-id string-id stream))
107
(defun cl-user::%format-tsv-string (stream %string &optional option arg byte-count)
108
(declare (ignore option arg))
109
(assert (integerp byte-count))
110
(%write-tsv-string %string stream byte-count))
113
(defun encode-tsv-term-number (term-number stream)
114
"Given a term number and a stream, encode the stored term properties
115
(type, string, immediate value) to the given stream."
116
(declare (type fixnum term-number))
117
(let* ((%term (rlmdb:shard-term-fetch term-number))
118
(term-type (rlmdb:%shard-term-type %term))
119
(%term-data (rlmdb:%shard-term-data %term)))
120
(declare (type cffi:foreign-pointer %term)
121
(type symbol term-type)
122
(type cffi:foreign-pointer %term-data))
126
(ecase (rlmdb:%shard-term-subtype-node-subtype %term)
128
(format stream "_:~@[~a_~]genid~d"
130
(rlmdb:%shard-term-data-node-genid %term-data)))
132
(format stream "_:~@[~a_~]~V/%format-tsv-string/~d"
134
;; need a byte-count limited version of stream-write-turtle-external-utf8-string
135
4 (rlmdb:%shard-term-data-node-gensym-prefix %term-data)
136
(rlmdb:%shard-term-data-node-gensym-suffix %term-data)))
138
(format stream "_:~@[~a_~]~V/%format-tsv-string/"
140
8 (rlmdb:%shard-term-data-node-label %term-data)))
142
(format stream "_:~@[~a_~]~/format-tsv-string-id/"
144
(rlmdb:%shard-term-data-node-label-offset %term-data)))))
146
(format stream "<~/format-tsv-string-id/>"
147
(rlmdb:%shard-term-data-uri-string-offset %term-data)))
149
(ecase (rlmdb:%shard-term-subtype-string-subtype %term)
151
(format stream "\"~V/%format-tsv-string/\""
154
(let* ((string-offset (rlmdb:%shard-term-data-string-value-offset %term-data))
155
(language-offset (rlmdb:%shard-term-data-string-language-offset %term-data)))
156
(if (plusp language-offset)
157
(format stream "\"~/format-tsv-string-id/@~/format-tsv-string-id/\"" string-offset language-offset)
158
(format stream "\"~/format-tsv-string-id/\"" string-offset))))))
160
;; introduce quotations for all literals even though it should matter for strings only
161
;; in distinction to csv, include data types
162
(let ((lexical-form-id (rlmdb:%shard-term-data-literal-value-offset %term-data))
163
(datatype-string-id (rlmdb:%shard-term-data-literal-datatype-offset %term-data)))
164
(if (is-string-datatype-string-id datatype-string-id)
165
(format stream "\"~/format-tsv-string-id/\""
167
;; delegate to turtle iri encoding
168
(format stream "\"~/format-tsv-string-id/\"^^<~/format-turtle-iri-string-id/>"
169
lexical-form-id datatype-string-id))))
171
;; must return it directly to avoid anomolous appearances
172
(format stream "\"~:[false~;true~]\"^^<http://www.w3.org/2001/XMLSchema#boolean>"
173
(rlmdb:%shard-term-data-boolean %term-data)))
175
(format stream "~a" (rlmdb:%shard-term-data-integer %term-data)))
178
(ecase (rlmdb:%shard-term-subtype-decimal-subtype %term)
180
(let* ((options (rlmdb:%shard-term-options %term))
181
(minusp (zerop (logand options #x01)))
182
(scale (ash options -1))
183
(value (rlmdb:%shard-term-data-integer %term-data)))
184
(when minusp (setf value (- value)))
185
(setf scale (if (zerop value) 1 (expt 10 scale)))
187
#+sbcl (sb-kernel::%make-ratio value scale)
188
#-sbcl (/ value scale))
189
(let ((*read-default-float-format* 'single-float))
190
(format stream "~f" (float value 1.0s0)))))
192
(let* ((integer (rlmdb:%shard-term-data-decimal-integer %term-data))
193
(fraction (if (zerop integer) 1 (rlmdb:%shard-term-data-decimal-fraction %term-data))))
194
(format stream "~d.~d" integer fraction)))))
196
(let ((*read-default-float-format* 'single-float))
197
(format stream "~f" (rlmdb:%shard-term-data-float %term-data))))
200
(let ((*read-default-float-format* 'double-float))
201
(format stream "~f"(rlmdb:%shard-term-data-double %term-data))))
204
(format stream "\"~a\"^^<http://www.w3.org/2001/XMLSchema#dateTime>"
205
(term-lexical-form (timeline-location-date-time (rlmdb:%shard-term-data-time %term-data)))))
208
(format stream "\"~a\"^^<http://www.w3.org/2001/XMLSchema#date>"
209
(term-lexical-form (timeline-location-date (rlmdb:%shard-term-data-time %term-data)))))
212
(format stream "\"~a\"^^<http://www.w3.org/2001/XMLSchema#time>"
213
(term-lexical-form (timeline-location-time (rlmdb:%shard-term-data-time %term-data)))))