Coverage report: /development/source/library/org/datagraph/spocq-shard/src/core/encoding/terms/csv-shard-term.lisp
| Kind | Covered | All | % |
| expression | 230 | 298 | 77.2 |
| branch | 15 | 22 | 68.2 |
Key
Not instrumented
Conditionalized out
Executed
Not executed
Both branches taken
One branch taken
Neither branch taken
1
;;; -*- Mode: lisp; Syntax: ansi-common-lisp; Base: 10; Package: org.datagraph.spocq.implementation; -*-
3
(in-package :org.datagraph.spocq.implementation)
5
;;; replace the csv field encoding operator with one which retrieves terms directly from lmdb.
7
#+(or) ;; carried over from jspn -> nexperia#266
8
(defun write-csv-char (char stream)
9
"emit a possibly escaped character to a csv-encoded stream:
11
All Unicode characters may be placed within the
12
quotation marks except for the characters that must be escaped:
13
quotation mark, reverse solidus, and the control characters (U+0000
16
(#\" (write-string "\"\"" stream))
17
(#\backspace (write-string "\\b" stream))
18
(#\page (write-string "\\f" stream))
19
(#\linefeed (write-string "\\n" stream))
20
(#\return (write-string "\\r" stream))
21
(#\tab (write-string "\\t" stream))
22
(t (let ((code (char-code char)))
23
(if (or (<= code #x1f) ; C0 control : http://en.wikipedia.org/wiki/Control_character
24
(and (<= #x80 code) (<= code #x9f))) ; C1 control
25
(format stream "\\u~4,'0x" code)
26
(write-char char stream)))))
28
(defun write-csv-char (char stream)
29
"emit a possibly escaped character to a csv-encoded stream:
31
double quotes are doubled."
33
(#\" (write-string "\"\"" stream))
34
(t (write-char char stream)))
37
(defun %write-csv-string (%string stream byte-count)
38
"Emit an external character data string to an utf-8 encoded stream with csv escaping.
39
This escapes the string terminator, whitespace-format, and control characters.
40
All else is passed to the stream for utf-8 encoding."
41
(unless byte-count (setf byte-count most-positive-fixnum)) ;; presume null-terminated
42
(let ((dsu:*utf8-iso8859-allowed* t)
43
(dsu:*utf8-surrogates-allowed* t)
44
(decoder (load-time-value (dsu:content-encoding-byte-decoder (dsu:content-encoding :utf-8))))
46
(declare (type (function (function cffi:foreign-pointer) (or character null)) decoder)
47
(type cffi:foreign-pointer %string)
48
(type fixnum byte-count)
49
(optimize (speed 3) (safety 0)))
50
(flet ((get-byte (%string)
51
(declare (type cffi:foreign-pointer %string))
52
(when (< i byte-count)
53
(let ((byte (cffi:mem-aref %string :uint8 i)))
55
(when (plusp byte) byte)))))
56
(loop for char = (funcall decoder #'get-byte %string)
58
do (write-csv-char char stream))
61
(defun write-csv-string-id (string-id stream)
62
"Given an rdf db string id, retrieve the external data and write it
63
to the stream as turtle in utf8."
64
(flet ((%write-string-data (k raw-value)
66
(let* ((size (rlmdb.i::%mdb-val-size raw-value))
67
(%data (rlmdb.i::%mdb-val-data raw-value)))
68
(%write-csv-string %data stream size))))
69
(declare (dynamic-extent #'%write-string-data))
70
(rlmdb::call-with-shard-string string-id #'%write-string-data)))
72
(defun cl-user::format-csv-string-id (stream string-id &optional option arg)
73
(declare (ignore option arg))
74
(write-csv-string-id string-id stream))
76
(defun cl-user::%format-csv-string (stream %string &optional option arg byte-count)
77
(declare (ignore option arg))
78
(assert (integerp byte-count))
79
(%write-csv-string %string stream byte-count))
82
(defun encode-csv-term-number (term-number stream)
83
"Given a term number and a stream, encode the stored term properties
84
(type, string, immediate value) to the given stream."
85
(declare (type fixnum term-number))
86
(let* ((%term (rlmdb:shard-term-fetch term-number))
87
(term-type (rlmdb:%shard-term-type %term))
88
(%term-data (rlmdb:%shard-term-data %term)))
89
(declare (type cffi:foreign-pointer %term)
90
(type symbol term-type)
91
(type cffi:foreign-pointer %term-data))
95
(ecase (rlmdb:%shard-term-subtype-node-subtype %term)
97
(format stream "_:~@[~a_~]genid~d"
99
(rlmdb:%shard-term-data-node-genid %term-data)))
101
(format stream "_:~@[~a_~]~V/%format-csv-string/~d"
103
;; need a byte-count limited version of stream-write-turtle-external-utf8-string
104
4 (rlmdb:%shard-term-data-node-gensym-prefix %term-data)
105
(rlmdb:%shard-term-data-node-gensym-suffix %term-data)))
107
(format stream "_:~@[~a_~]~V/%format-csv-string/"
109
8 (rlmdb:%shard-term-data-node-label %term-data)))
111
(format stream "_:~@[~a_~]~/format-csv-string-id/"
113
(rlmdb:%shard-term-data-node-label-offset %term-data)))))
115
(format stream "~/format-csv-string-id/"
116
(rlmdb:%shard-term-data-uri-string-offset %term-data)))
118
(ecase (rlmdb:%shard-term-subtype-string-subtype %term)
120
(format stream "\"~V/%format-csv-string/\""
123
(let* ((string-offset (rlmdb:%shard-term-data-string-value-offset %term-data))
124
(language-offset (rlmdb:%shard-term-data-string-language-offset %term-data)))
125
(if (plusp language-offset)
126
(format stream "\"~/format-csv-string-id/@~/format-csv-string-id/\"" string-offset language-offset)
127
(format stream "\"~/format-csv-string-id/\"" string-offset))))))
129
;; must return it directly to avoid anomolous appearances
130
(format stream "~:[false~;true~]" (rlmdb:%shard-term-data-boolean %term-data)))
132
;; introduce quotations for all literals
133
(let ((lexical-form-id (rlmdb:%shard-term-data-literal-value-offset %term-data)))
134
(format stream "\"~/format-csv-string-id/\""
137
(format stream "~a" (rlmdb:%shard-term-data-integer %term-data)))
139
(ecase (rlmdb:%shard-term-subtype-decimal-subtype %term)
141
(let* ((options (rlmdb:%shard-term-options %term))
142
(minusp (zerop (logand options #x01)))
143
(scale (ash options -1))
144
(value (rlmdb:%shard-term-data-integer %term-data)))
145
(when minusp (setf value (- value)))
146
(setf scale (if (zerop value) 1 (expt 10 scale)))
148
#+sbcl (sb-kernel::%make-ratio value scale)
149
#-sbcl (/ value scale))
150
(let ((*read-default-float-format* 'single-float))
151
(format stream "~f" (float value 1.0s0)))))
153
(let* ((integer (rlmdb:%shard-term-data-decimal-integer %term-data))
154
(fraction (if (zerop integer) 1 (rlmdb:%shard-term-data-decimal-fraction %term-data))))
155
(format stream "~d.~d" integer fraction)))))
157
(let ((*read-default-float-format* 'single-float))
158
(format stream "~f" (rlmdb:%shard-term-data-float %term-data))))
161
(let ((*read-default-float-format* 'double-float))
162
(format stream "~f" (rlmdb:%shard-term-data-double %term-data))))
166
(term-lexical-form (timeline-location-date-time (rlmdb:%shard-term-data-time %term-data)))))
170
(term-lexical-form (timeline-location-date (rlmdb:%shard-term-data-time %term-data)))))
174
(term-lexical-form (timeline-location-time (rlmdb:%shard-term-data-time %term-data)))))
179
(remove-method #'write-sparql-results+csv
180
(find-method #'write-sparql-results+tsv
182
(list (find-class 'solution-generator)
185
(defmethod write-sparql-results+csv ((results solution-generator) (stream t))
186
(let* ((dimensions (solution-generator-dimensions results))
187
(channel (solution-generator-channel results))
188
(variable-count (length dimensions))
190
(start (or (response-offset) 0))
191
(end (response-end)))
192
(format stream "~{~a~^,~}" dimensions)
194
(rlmdb::with-string-database (sdb)
195
(do-pages (page channel)
196
(if (and end (>= index end))
198
(if (>= (+ index (array-dimension page 0)) start)
199
(cond ((= variable-count (array-dimension page 1))
200
(dotimes (page-index (array-dimension page 0))
201
(when (>= index start)
202
(when (and end (>= index end))
204
(loop for value-index from 0 below variable-count
205
do (progn (unless (zerop value-index) (write-char #\, stream))
206
(let ((term-number (aref page page-index value-index)))
207
(cond ((= term-number +null-term-id+)
208
;; no content as per https://www.w3.org/TR/sparql11-results-csv-tsv/#csv-example
209
;; as it were, an empty string will be encoded with quotes
210
;; (write-string "\"\"" stream)
213
(encode-csv-term-number term-number stream))))))
217
(log-warn "field width mismatch: ~s : ~s."
218
dimensions (array-dimension page 1))
219
(incf index (array-dimension page 0))))
220
; otherwise skip the entire page
221
(incf index (array-dimension page 0)))))
222
(incf-stat *statements-returned* index))))