Coverage report: /development/source/library/org/datagraph/spocq-shard/src/core/encoding/terms/tsv-shard-term.lisp

KindCoveredAll%
expression244312 78.2
branch1724 70.8
Key
Not instrumented
Conditionalized out
Executed
Not executed
 
Both branches taken
One branch taken
Neither branch taken
1
 ;;; -*- Mode: lisp; Syntax: ansi-common-lisp; Base: 10; Package: org.datagraph.spocq.implementation; -*-
2
 
3
 (in-package :org.datagraph.spocq.implementation)
4
 
5
 ;;; replace the tsv field encoding operator with one which retrieves terms directly from lmdb.
6
 ;;; term encoding delegates to turtle methods
7
 
8
 ;;; there is no tsv rfc.
9
 ;;; there is an iana definition description. in that fields are unexcaped and do not permit tabs
10
 ;;;    https://www.iana.org/assignments/media-types/text/tab-separated-values
11
 ;;, the older encoding in src/encodinc/sparql-results-tsv.lisp is wrong. it uses turtle escapes
12
 
13
 (remove-method #'write-sparql-results+tsv
14
                (find-method #'write-sparql-results+tsv
15
                             nil
16
                             (list (find-class 'solution-generator)
17
                                   (find-class 't))))
18
 
19
 (defmethod write-sparql-results+tsv ((results solution-generator) (stream t))
20
   (rlmdb::with-string-database (sdb)
21
     (let* ((dimensions (solution-generator-dimensions results))
22
            (channel (solution-generator-channel results))
23
            (variable-count (length dimensions))
24
            (index 0)
25
            (start (or (response-offset) 0))
26
            (end (response-end))
27
            (*expand-literal-values* nil))
28
       (write-tsv-dimensions dimensions stream)
29
       (tsv-eol stream)
30
       (do-pages (page channel)
31
                 (if (and end (>= index end))
32
                     (return)
33
                     (if (>= (+ index (array-dimension page 0)) start)
34
                         (cond ((= variable-count (array-dimension page 1))
35
                                (dotimes (page-index (array-dimension page 0))
36
                                  (when (>= index start)
37
                                    (when (and end (>= index end))
38
                                      (return))
39
                                    (loop for value-index from 0 below variable-count
40
                                      do (progn
41
                                           (unless (zerop value-index) (write-char #\tab stream))
42
                                           (let ((term-number (aref page page-index value-index)))
43
                                             (cond ((= term-number +null-term-id+)) ; skip it
44
                                                   (t
45
                                                    (encode-tsv-term-number term-number stream))))))
46
                                    (tsv-eol stream))
47
                                  (incf index)))
48
                               (t
49
                                (log-warn "field width mismatch: ~s : ~s."
50
                                          dimensions (array-dimension page 1))
51
                                (incf index (array-dimension page 0))))
52
                         ; otherwise skip the entire page
53
                         (incf index (array-dimension page 0)))))
54
       (incf-stat *statements-returned* index))))
55
 
56
 
57
 (defun write-tsv-char (char stream)
58
   "emit a character.
59
    contrary to the iana document, sparql results stipulate limited escaping.
60
    https://www.w3.org/TR/sparql11-results-csv-tsv/#tsv-terms"
61
   (case char
62
     (#\linefeed (write-string "\\n" stream))
63
     (#\return (write-string "\\r" stream))
64
     (#\tab (write-string "\\t" stream))
65
     (t (write-char char stream)))
66
   char)
67
 
68
 (defun %write-tsv-string (%string stream byte-count)
69
   "Emit an external character data string to an utf-8 encoded stream with tsv escaping.
70
  This escapes the string terminator, whitespace-format, and control characters.
71
  All else is passed to the stream for utf-8 encoding."
72
   (unless byte-count (setf byte-count most-positive-fixnum)) ;; presume null-terminated
73
   (let ((dsu:*utf8-iso8859-allowed* t)
74
         (dsu:*utf8-surrogates-allowed* t)
75
         (decoder (load-time-value (dsu:content-encoding-byte-decoder (dsu:content-encoding :utf-8))))
76
         (i 0))
77
     (declare (type (function (function cffi:foreign-pointer) (or character null)) decoder)
78
              (type cffi:foreign-pointer %string)
79
              (type fixnum byte-count)
80
              (optimize (speed 3) (safety 0)))
81
     (flet ((get-byte (%string)
82
              (declare (type cffi:foreign-pointer %string))
83
              (when (< i byte-count)
84
                (let ((byte (cffi:mem-aref %string :uint8 i)))
85
                  (incf i)
86
                  (when (plusp byte) byte)))))
87
       (loop for char = (funcall decoder #'get-byte %string)
88
         while char
89
         do (write-tsv-char char stream))
90
       i)))
91
 
92
 (defun write-tsv-string-id (string-id stream)
93
   "Given an rdf db string id, retrieve the external data and write it
94
   to the stream as turtle in utf8."
95
   (flet ((%write-string-data (k raw-value)
96
            (declare (ignore k))
97
            (let* ((size (rlmdb.i::%mdb-val-size raw-value))
98
                   (%data (rlmdb.i::%mdb-val-data raw-value)))
99
              (%write-tsv-string %data stream size))))
100
     (declare (dynamic-extent #'%write-string-data))
101
     (rlmdb::call-with-shard-string string-id #'%write-string-data)))
102
 
103
 (defun cl-user::format-tsv-string-id (stream string-id &optional option arg)
104
   (declare (ignore option arg))
105
   (write-tsv-string-id string-id stream))
106
 
107
 (defun cl-user::%format-tsv-string (stream %string &optional option arg byte-count)
108
   (declare (ignore option arg))
109
   (assert (integerp byte-count))
110
   (%write-tsv-string %string stream byte-count))
111
 
112
 
113
 (defun encode-tsv-term-number (term-number stream)
114
   "Given a term number and a stream, encode the stored term properties
115
    (type, string, immediate value) to the given stream."
116
   (declare (type fixnum term-number))
117
   (let* ((%term (rlmdb:shard-term-fetch term-number))
118
          (term-type (rlmdb:%shard-term-type %term))
119
          (%term-data (rlmdb:%shard-term-data %term)))
120
     (declare (type cffi:foreign-pointer %term)
121
              (type symbol term-type)
122
              (type cffi:foreign-pointer %term-data))
123
 
124
     (ecase term-type
125
       (:node
126
        (ecase (rlmdb:%shard-term-subtype-node-subtype %term)
127
          (:node-genid
128
           (format stream "_:~@[~a_~]genid~d"
129
                   (blank-node-prefix)
130
                   (rlmdb:%shard-term-data-node-genid %term-data)))
131
          (:node-gensym
132
           (format stream "_:~@[~a_~]~V/%format-tsv-string/~d"
133
                   (blank-node-prefix)
134
                   ;; need a byte-count limited version of stream-write-turtle-external-utf8-string
135
                   4 (rlmdb:%shard-term-data-node-gensym-prefix %term-data)
136
                   (rlmdb:%shard-term-data-node-gensym-suffix %term-data)))
137
          (:node-short
138
           (format stream "_:~@[~a_~]~V/%format-tsv-string/"
139
                   (blank-node-prefix)
140
                   8 (rlmdb:%shard-term-data-node-label %term-data)))
141
          ((:node-long :none)
142
           (format stream "_:~@[~a_~]~/format-tsv-string-id/"
143
                   (blank-node-prefix)
144
                   (rlmdb:%shard-term-data-node-label-offset %term-data)))))
145
       (:uri
146
        (format stream "<~/format-tsv-string-id/>"
147
                (rlmdb:%shard-term-data-uri-string-offset %term-data)))
148
       (:string
149
        (ecase (rlmdb:%shard-term-subtype-string-subtype %term)
150
          (:string-short
151
           (format stream "\"~V/%format-tsv-string/\""
152
                   8 %term-data))
153
          (:string-long
154
           (let* ((string-offset (rlmdb:%shard-term-data-string-value-offset %term-data))
155
                  (language-offset (rlmdb:%shard-term-data-string-language-offset %term-data)))
156
             (if (plusp language-offset)
157
                 (format stream "\"~/format-tsv-string-id/@~/format-tsv-string-id/\"" string-offset language-offset)
158
                 (format stream "\"~/format-tsv-string-id/\"" string-offset))))))
159
       (:literal
160
        ;; introduce quotations for all literals even though it should matter for strings only
161
        ;; in distinction to csv, include data types
162
        (let ((lexical-form-id (rlmdb:%shard-term-data-literal-value-offset %term-data))
163
              (datatype-string-id (rlmdb:%shard-term-data-literal-datatype-offset %term-data)))
164
          (if (is-string-datatype-string-id datatype-string-id)
165
              (format stream "\"~/format-tsv-string-id/\""
166
                      lexical-form-id )
167
              ;; delegate to turtle iri encoding
168
              (format stream "\"~/format-tsv-string-id/\"^^<~/format-turtle-iri-string-id/>"
169
                      lexical-form-id datatype-string-id))))
170
       (:boolean
171
        ;; must return it directly to avoid anomolous appearances
172
        (format stream "\"~:[false~;true~]\"^^<http://www.w3.org/2001/XMLSchema#boolean>"
173
               (rlmdb:%shard-term-data-boolean %term-data)))
174
       (:integer
175
        (format stream "~a" (rlmdb:%shard-term-data-integer %term-data)))
176
 
177
       (:decimal
178
        (ecase (rlmdb:%shard-term-subtype-decimal-subtype %term)
179
          (:decimal-scaled
180
           (let* ((options (rlmdb:%shard-term-options %term))
181
                  (minusp (zerop (logand options #x01)))
182
                  (scale (ash options -1))
183
                  (value (rlmdb:%shard-term-data-integer %term-data)))
184
             (when minusp (setf value (- value)))
185
             (setf scale (if (zerop value) 1 (expt 10 scale)))
186
             (setf value
187
                   #+sbcl (sb-kernel::%make-ratio value scale)
188
                   #-sbcl (/ value scale))
189
             (let ((*read-default-float-format* 'single-float))
190
               (format stream "~f" (float value 1.0s0)))))
191
          ((:broken :none)
192
           (let* ((integer (rlmdb:%shard-term-data-decimal-integer %term-data))
193
                  (fraction (if (zerop integer) 1 (rlmdb:%shard-term-data-decimal-fraction %term-data))))
194
             (format stream "~d.~d" integer fraction)))))
195
       (:float
196
         (let ((*read-default-float-format* 'single-float))
197
           (format stream "~f"   (rlmdb:%shard-term-data-float %term-data))))
198
 
199
       (:double
200
        (let ((*read-default-float-format* 'double-float))
201
          (format stream "~f"(rlmdb:%shard-term-data-double %term-data))))
202
 
203
       (:datetime
204
        (format stream "\"~a\"^^<http://www.w3.org/2001/XMLSchema#dateTime>"
205
                (term-lexical-form (timeline-location-date-time (rlmdb:%shard-term-data-time %term-data)))))
206
 
207
       (:date
208
        (format stream "\"~a\"^^<http://www.w3.org/2001/XMLSchema#date>"
209
                (term-lexical-form (timeline-location-date (rlmdb:%shard-term-data-time %term-data)))))
210
 
211
       (:time
212
        (format stream "\"~a\"^^<http://www.w3.org/2001/XMLSchema#time>"
213
                (term-lexical-form (timeline-location-time (rlmdb:%shard-term-data-time %term-data)))))
214
 
215
       (:none
216
         nil))))