Coverage report: /development/source/library/org/datagraph/spocq-shard/src/core/rlmdb/utility.lisp
| Kind | Covered | All | % |
| expression | 145 | 600 | 24.2 |
| branch | 0 | 52 | 0.0 |
Key
Not instrumented
Conditionalized out
Executed
Not executed
Both branches taken
One branch taken
Neither branch taken
1
;;; -*- Mode: lisp; Syntax: ansi-common-lisp; Base: 10; Package: org.datagraph.spocq.implementation; -*-
3
(in-package :org.datagraph.rdf.lmdb.implementation)
5
(:documentation "Utility operators:
7
- native (lmdb) <-> lisp conversion")
9
(:documentation "Temporal operators:
10
- combine timestamp locations with durations to yield target timestamps
13
(cffi:defcstruct log_record_v0
16
(transaction_uuid :unsigned-char :count 16)
17
(timestamp_begun :uint64 :offset 18)
18
(timestamp_committed :uint64 :offset 26)
19
(visible_count :uint64 :offset 34)
20
(removed_count :uint64 :offset 42)
21
(inserted_count :uint64 :offset 50))
22
;;; (+ 1 1 16 8 8 8 8 8)
24
(defparameter +log_record_v0-size+ 58)
26
(defmacro with-lmdb-value ((name size-form value-form) &body body)
27
`(lmdb::with-empty-value (,name)
28
(setf (cffi:foreign-slot-value ,name '(:struct liblmdb:val) 'liblmdb:mv-size) (cffi:make-pointer ,size-form))
29
(setf (cffi:foreign-slot-value ,name '(:struct liblmdb:val) 'liblmdb:mv-data) ,value-form)
32
(defmacro with-lmdb-values (bindings &body body)
33
`(with-lmdb-value ,(pop bindings)
34
,@(if bindings `((with-lmdb-values ,bindings ,@body)) body)))
36
(defmacro %mdb-val-size (%val)
37
`(cffi:pointer-address (cffi:foreign-slot-value ,%val '(:struct liblmdb:val) 'liblmdb:mv-size)))
38
(defsetf %mdb-val-size (%val) (size)
39
`(setf (cffi:foreign-slot-value ,%val '(:struct liblmdb:val) 'liblmdb:mv-size) (cffi:make-pointer ,size)))
41
(defmacro %mdb-val-data (%val)
42
`(cffi:foreign-slot-value ,%val '(:struct liblmdb:val) 'liblmdb:mv-data))
43
(defsetf %mdb-val-data (%val) (%data)
44
`(setf (cffi:foreign-slot-value ,%val '(:struct liblmdb:val) 'liblmdb:mv-data) ,%data))
46
(defmacro rlmdb::with-string-database ((db &rest args) &body body)
47
(let ((op (gensym "wsb-")))
49
(declare (ignorable ,db))
51
(declare (dynamic-extent #',op))
52
(rlmdb::call-with-string-database #',op ,@args))))
54
(defstruct rlmdb:revision-record
55
"A revision-record collects the minimal revision description:
57
- the respective UUID (as string)
58
- the respective commit timestamp as :uint64
59
This is sufficient to capture the metadata"
60
(ordinal 0 :type integer)
61
(uuid +null-uuid-string+ :type string)
62
(timestamp 0 :type (unsigned-byte 64))
63
(timestamp-begun 0 :type (unsigned-byte 64)))
65
(defstruct (rlmdb:metadata-record (:include rlmdb:revision-record)
66
(:constructor make-metadata-record
67
(&key |revision-id| (ordinal |revision-id|)
68
|revision-uuid| (uuid |revision-uuid|)
69
|revision-time| (timestamp |revision-time|)
70
(timestamp-begun timestamp)
71
|class| (class |class|)
72
;; the metadata db can contain other properties
74
"A metadata-record captures global information about the repository
75
which reflects it s current state and type.
76
One is constructed for repositories which have no revision
77
as a minimal revision record based on the content of their metadata db.
78
In addition to the revision state, it can indicate a class."
79
(class nil :type symbol)
82
(defstruct (rlmdb:revision-log-record (:include rlmdb:revision-record))
83
"A revision-log-record adds the full revision information to the
84
basic revision record."
85
(version 0 :type (unsigned-byte 8))
86
(flags 0 :type (unsigned-byte 8))
87
(visible-count 0 :type (unsigned-byte 64))
88
(removed-count 0 :type (unsigned-byte 64))
89
(inserted-count 0 :type (unsigned-byte 64)))
91
(defstruct (rlmdb::bitemporal-revision-log-record (:include rlmdb:revision-log-record))
92
"A bitmporal revision-log-record adds validity time"
93
(timestamp-valid 0 :type (unsigned-byte 64)))
95
(defgeneric rlmdb:is-valid-revision-record (record)
96
(:method ((record t)) nil)
97
(:method ((record rlmdb:revision-record))
98
(typep (revision-record-timestamp record) '(integer 1))))
100
(defgeneric rlmdb:revision-record-date-time (record)
101
(:method ((record rlmdb:revision-record))
102
(spocq.i:timeline-location-date-time (rlmdb:revision-record-timestamp record))))
104
(defgeneric rlmdb:revision-record-begun-date-time (record)
105
(:method ((record rlmdb:revision-record))
106
(spocq.i:timeline-location-date-time (rlmdb:revision-record-timestamp-begun record))))
108
(defgeneric rlmdb:revision-record-valid-date-time (record)
109
(:method ((record rlmdb:revision-record))
110
(spocq.i:timeline-location-date-time (rlmdb:revision-record-timestamp record)))
111
(:method ((record rlmdb:bitemporal-revision-log-record))
112
(spocq.i:timeline-location-date-time (rlmdb:bitemporal-revision-log-record-timestamp-valid record))))
114
(defun %print-vector (%vector count stream)
115
(format stream "~&[~s](~{~2,'0x~^ ~})~%"
117
(loop for i below count
118
collect (cffi:mem-aref %vector :uint8 i)))
121
(defgeneric location-plus-duration (date-time duration)
122
(:method ((date-time integer) (duration null))
124
(:method ((date-time integer) (duration integer))
125
(spocq.i::date-time-timeline-location
126
(spocq.e:+ (spocq.i::timeline-location-date-time date-time)
127
(spocq.i::timeline-location-day-time-duration duration))))
128
(:method ((date-time integer) (duration spocq:day-time-duration))
129
(spocq.i::date-time-timeline-location
130
(spocq.e:+ (spocq.i::timeline-location-date-time date-time)
132
(:method ((date-time spocq:date-time) (duration null))
133
(spocq.i::date-time-timeline-location date-time))
134
(:method ((date-time spocq:date-time) (duration integer))
135
(spocq.i::date-time-timeline-location
137
(spocq.i::timeline-location-day-time-duration duration))))
138
(:method ((date-time spocq:date-time) (duration spocq:day-time-duration))
139
(spocq.i::date-time-timeline-location
144
(defun decode-metadata-symbol (data)
145
"Decode a package-qualified symbol.
147
- do not unescape any characters!"
148
(let* ((colon (position (char-code #\:) data))
149
(package-name (when colon (map 'string #'code-char (subseq data 0 colon))))
150
(symbol-name (map 'string #'code-char (if colon (subseq data (1+ colon)) data))))
152
(intern symbol-name package-name)
153
(intern symbol-name))))
155
(defun decode-metadata-string (data)
156
"Decode as a string. metadata is w/o unicode & sized, so w/o null termination!!!"
157
(map 'string #'code-char data))
159
(defun decode-metadata-unicode-string (data)
160
"Decode as a unicode string with possible null termination"
161
(let* ((decoder (load-time-value (content-encoding-byte-decoder (dsu:content-encoding :utf-8))))
162
(byte-count (length data))
163
(string (make-array byte-count :element-type 'character :fill-pointer 0))
165
(flet ((get-byte (data)
166
(when (< i byte-count)
167
(let ((byte (aref data i)))
169
(when (plusp byte) byte)))))
170
(loop for char = (funcall decoder #'get-byte data)
172
do (vector-push-extend char string))
176
(defgeneric rlmdb:decode-metadata (name %data-vector)
177
(:documentation "LMDB data is handled as an external byte vector.
178
Conversions are per-property:
179
- timestamps : 8byte unsigned
180
- ordinals : 4-byte unsigned
181
- uuids : 16-byte vector to string through uuid conversion
183
(:method ((name string) (data t))
184
(rlmdb:decode-metadata (intern name :keyword) data))
185
(:method ((name (eql :|revision-id|)) (data vector))
186
(dsu.codecs::BUFFER-GET-UNSIGNED-BYTE-32-LE data 0))
187
(:method ((name (eql :|revision-time|)) (data vector))
188
"For a revision timestamp, return the timeline location"
189
(dsu.codecs::BUFFER-GET-UNSIGNED-BYTE-64-LE data 0))
190
(:method ((name (eql :|revision-uuid|)) (%data SB-SYS:SYSTEM-AREA-POINTER))
191
(string-downcase (uuid:byte-array-to-string (%get-vector %data 16))))
192
(:method ((name (eql :|revision-uuid|)) (data vector))
193
(string-downcase (uuid:byte-array-to-string data)))
194
(:method ((name (eql :|source-repository|)) (data vector))
195
(decode-metadata-string data))
196
(:method ((name (eql :|view-name|)) (data vector))
197
(decode-metadata-string data))
198
(:method ((name (eql :|storage-class|)) (data vector))
199
(decode-metadata-symbol data))
200
(:method ((name (eql :|class|)) (data vector))
201
(decode-metadata-symbol data))
202
(:method ((name t) (data vector))
206
(defun encode-metadata-symbol (data)
207
"Decode a package-qualified symbol.
209
- do not unescape any characters!"
210
(let* ((symbol-name (symbol-name data))
211
(package-name (package-name (symbol-package data)))
212
(buffer (make-array (+ (length symbol-name) 1 (length package-name)) :element-type '(unsigned-byte 8))))
213
(loop for char across package-name
215
do (assert (not (eql char #\:)) ()
216
"encode-metadata-symbol: package name includes #\: ~s" package-name)
217
do (setf (aref buffer i) (char-code char)))
218
(setf (aref buffer (length package-name)) (char-code #\:))
219
(loop for char across symbol-name
220
for i from (1+ (length package-name))
221
do (setf (aref buffer i) (char-code char)))
224
(defgeneric rlmdb:encode-metadata (name value)
225
(:documentation "LMDB data is handled as a byte vector.
226
Conversions are per-property:
227
- timestamps : 8byte unsigned
228
- ordinals : 4-byte unsigned
229
- uuids : 16-byte vector to string through uuid conversion
231
(:method ((name string) (value t))
232
(rlmdb:encode-metadata (intern name :keyword) value))
233
(:method ((name (eql :|revision-id|)) (value integer))
234
(let ((buffer (make-array 4 :element-type '(unsigned-byte 8))))
235
(dsu.codecs::BUFFER-SET-UNSIGNED-BYTE-32-LE buffer value 0)
237
(:method ((name (eql :|revision-time|)) (value integer))
238
(let ((buffer (make-array 8 :element-type '(unsigned-byte 8))))
239
(dsu.codecs::BUFFER-SET-UNSIGNED-BYTE-64-LE buffer value 0)
241
(:method ((name (eql :|revision-uuid|)) (data string))
242
(spocq.i::string-to-uuid data (spocq.i::make-uuid-vector)))
243
(:method ((name (eql :|class|)) (data symbol))
244
(encode-metadata-symbol data))
245
(:method ((name (eql :|storage-class|)) (data symbol))
246
(encode-metadata-symbol data))
247
(:method ((name t) (data vector))
250
(defgeneric %put-uuid (uuid %uuid)
251
(:method ((uuid string) (%uuid SB-SYS:SYSTEM-AREA-POINTER))
252
(%put-uuid (rlmdb:encode-metadata uuid :|revision-uuid|) %uuid))
253
(:method ((uuid vector) (%uuid SB-SYS:SYSTEM-AREA-POINTER))
254
(%put-vector uuid %uuid)))
256
(defun %put-vector (vector %data)
257
(loop for i from 0 below (length vector)
258
do (setf (cffi:mem-aref %data :uint8 i) (elt vector i)))
261
(defun %get-vector (%data size)
262
(let ((vec (make-array size :element-type '(unsigned-byte 8))))
263
(loop for i from 0 to (1- size) do
264
(setf (elt vec i) (cffi:mem-aref %data :unsigned-char i)))
267
(defun %decode-revision-record (k v)
268
(let ((record (cffi:foreign-slot-value v '(:struct liblmdb:val) 'liblmdb:mv-data)))
269
(make-revision-record
270
:ordinal (cffi:mem-ref (cffi:foreign-slot-value k '(:struct liblmdb:val) 'liblmdb:mv-data) :uint32)
271
:timestamp (cffi:foreign-slot-value record '(:struct log_record_v0) 'timestamp_committed)
272
:uuid (rlmdb:decode-metadata :|revision-uuid| (cffi:foreign-slot-value record '(:struct log_record_v0) 'transaction_uuid)))))
274
(defun %decode-revision-log-record (record)
275
(make-revision-log-record
276
:version (cffi:foreign-slot-value record '(:struct log_record_v0) 'version)
277
:flags (cffi:foreign-slot-value record '(:struct log_record_v0) 'flags)
278
:uuid (rlmdb:decode-metadata :|revision-uuid| (cffi:foreign-slot-value record '(:struct log_record_v0) 'transaction_uuid))
279
:timestamp (cffi:foreign-slot-value record '(:struct log_record_v0) 'timestamp_committed)
280
:timestamp-begun (cffi:foreign-slot-value record '(:struct log_record_v0) 'timestamp_begun)
281
;; nb. the visible count is not maintained by the rdfcache implementation
282
:visible-count (cffi:foreign-slot-value record '(:struct log_record_v0) 'visible_count)
283
:removed-count (cffi:foreign-slot-value record '(:struct log_record_v0) 'removed_count)
284
:inserted-count (cffi:foreign-slot-value record '(:struct log_record_v0) 'inserted_count)))
286
(cffi:with-foreign-object (%record '(:struct log_record_v0))
287
(%decode-revision-log-record
288
(%encode-revision-log-record
289
(make-revision-log-record :ordinal 1
290
:uuid "11111111-2222-3333-4444-555555555555"
300
(defgeneric %encode-revision-log-record (record %record)
301
(:method ((record rlmdb:revision-log-record) (%record SB-SYS:SYSTEM-AREA-POINTER))
302
(cffi:with-foreign-slots ((version flags
303
timestamp_committed timestamp_begun
304
visible_count removed_count inserted_count)
306
(:struct log_record_v0))
307
(spocq.i::%encode-uuid (rlmdb:revision-log-record-uuid record) ;; is case-independent
308
(cffi:foreign-slot-pointer %record '(:struct log_record_v0) 'transaction_uuid))
309
(setf version (rlmdb:revision-log-record-version record)
310
flags (rlmdb:revision-log-record-flags record)
311
timestamp_committed (rlmdb:revision-log-record-timestamp record)
312
timestamp_begun (rlmdb:revision-log-record-timestamp-begun record)
313
visible_count (rlmdb:revision-log-record-visible-count record)
314
removed_count (rlmdb:revision-log-record-removed-count record)
315
inserted_count (rlmdb:revision-log-record-inserted-count record))
318
(:documentation "LMDB rdf revision verification."
319
"A repository with revisions stores a presence vector as the respective
320
quad value. This is a vector of revision ordinalities which takes on one of three forms:
321
- [] : no revisions, a quad is always present
322
- [ordinal]: one revision, a quad is present at that revision and all successors
323
- [ordinalStart, ordinalEnd, ...] the quad is present at or after the start and before the end.
325
the search process is
326
- find the highest at or below a given ordinal,
327
- if the index is odd, the quad is present, while if it is even, the quad is absent.
332
(defun %test-visibility (ordinal %vector length)
333
"Locate the highest ordinal less than that given.
334
Iff it is even, then the quad is visible in that revision.
335
Handle the degenerate cases without searching:
336
- length 0 implies unrevisioned
337
- length 1 implies presence at or after after that ordinal
338
- length 2 implies between the two"
339
(labels ((test-ordinal (test-position)
340
(let ((test-ordinal (cffi:mem-aref %vector :uint32 test-position)))
341
(cond ((< test-ordinal ordinal) -1)
342
((> test-ordinal ordinal) 1)
344
(binary-search (start end)
345
(let* ((test-position (ash (+ start end) -1))
346
(result (test-ordinal test-position)))
347
;; (format t "~&next: start ~s end ~s test-position ~s result ~s~%" start end test-position result)
350
(-1 (if (= test-position start)
352
(binary-search test-position end)))
353
(+1 (if (= test-position start)
355
(binary-search start test-position)))))))
358
(let ((vector (make-array length)))
359
(loop for i below length
360
do (setf (aref vector i) (cffi:mem-aref %vector :uint32 i)))
365
(1 (>= ordinal (cffi:mem-aref %vector :uint32 0)))
366
;; for a very small repository, this specal case was worth 0.17 elapsed time
367
(2 (and (>= ordinal (cffi:mem-aref %vector :uint32 0))
368
(< ordinal (cffi:mem-aref %vector :uint32 1))))
369
(t (let ((position (binary-search 0 length)))
371
(cond ((evenp position)
373
((and (oddp position) (> length (1+ position)) (= ordinal (cffi:mem-aref %vector :uint32 (1+ position))))
374
;; if the found value was a delete, but the next is an insert, then it is present
375
(values t (1+ position)))
377
(values nil position)))))))))
381
(flet ((trace-%test-visibility (original ordinal %vector length)
382
(let ((result (multiple-value-list (funcall original
383
ordinal %vector length))))
384
(print (list '%test-visibility result
386
:vector (let ((vector (make-array length)))
387
(loop for i below length
388
do (setf (aref vector i) (cffi:mem-aref %vector :uint32 i)))
390
(apply #'values result))))
391
(sb-int:encapsulate '%test-visibility 'trace #'trace-%test-visibility))
392
(sb-int:unencapsulate '%test-visibility 'trace ))
394
(defun test-visibility (ordinal vector length)
395
(test-ordinal-visibility ordinal vector length))
397
(defun test-ordinal-visibility (ordinal vector length)
398
(labels ((test-ordinal (test-position)
399
(let ((test-ordinal (aref vector test-position)))
400
(cond ((< test-ordinal ordinal) -1)
401
((> test-ordinal ordinal) +1)
403
(binary-search (start end)
404
(let* ((test-position (floor (ash (+ start end) -1)))
405
(result (test-ordinal test-position)))
406
; (format t "~&next: start ~s end ~s test-position ~s result ~s~%" start end test-position result)
409
(-1 (if (= test-position start)
411
(binary-search test-position end)))
412
(+1 (if (= test-position start)
414
(binary-search start test-position)))))))
417
(1 (values (>= ordinal (aref vector 0))
419
(t (let ((position (binary-search 0 length)))
421
(cond ((evenp position)
423
((and (oddp position) (> length (1+ position)) (= ordinal (aref vector (1+ position))))
424
(values t (1+ position)))
426
(values nil position)))))))))
427
;;; (trace test-visibility)
428
;;; (loop for ordinal below 10 collect (cons ordinal (multiple-value-list (test-visibility ordinal #(1 3 5 7) 4))))
429
;;; (test-visibility 24 #(21 22 22 23 23 24 24) 7)
430
;;; (test-visibility 24 #(21 22 22 23 23 24) 6)
431
;;; (test-visibility 24 #(21 22 22 23 23 24 25) 7)
434
(defgeneric repository-ordinal-timestamp (repository ordinal)
435
(:documentation "given an ordinal locate the respective revision record
436
and return its timestamp"))
438
(defun test-timestamp-visibility (timestamp vector length)
439
"Given a timestamp and a sized ordinal presence vector, locate the highest position
440
for which the respective present timestamp is <= that given"
441
(labels ((test-ordinal (test-position)
442
(let* ((test-ordinal (aref vector test-position))
443
(test-timestamp (repository-ordinal-timestamp *metadata-repository* test-ordinal)))
444
(cond ((< test-timestamp timestamp) -1)
445
((> test-timestamp timestamp) +1)
447
(binary-search (start end)
448
(let* ((test-position (floor (ash (+ start end) -1)))
449
(result (test-ordinal test-position)))
450
; (format t "~&next: start ~s end ~s test-position ~s result ~s~%" start end test-position result)
453
(-1 (if (= test-position start)
455
(binary-search test-position end)))
456
(+1 (if (= test-position start)
458
(binary-search start test-position)))))))
461
(1 (values (>= timestamp (aref vector 0))
463
(t (let ((position (binary-search 0 length)))
465
(cond ((evenp position)
467
((and (oddp position) (> length (1+ position)) (= timestamp (aref vector (1+ position))))
468
(values t (1+ position)))
470
(values nil position))))))))))
472
(defun get-visibility-set (vector length last-ordinal)
473
"Locate the highest ordinal less that that given.
474
Iff it is even, then the quad is visible in that revision.
475
Handle the degenerate cases without searching."
476
(loop for index from 0 below length by 2
477
if (< index (1- length))
478
append (loop for i from (aref vector index)
479
below (aref vector (1+ index))
481
else append (loop for i from (aref vector index) to last-ordinal
483
finally (return set)))
484
;;; (get-visibility-set #() 0 9)
485
;;; (get-visibility-set #(1) 1 9)
486
;;; (get-visibility-set #(1 3 5 7) 4 9)
487
;;; (get-visibility-set #(1 3 5 7 8) 5 9)
490
(defun %test-visibility-range (min-ordinal max-ordinal %vector length &key (from-end nil))
491
"Given inclusive ordinal bounds, test their visibility with respect to a revision history"
492
(if (= min-ordinal max-ordinal)
493
(%test-visibility min-ordinal %vector length)
495
(loop for ordinal from max-ordinal downto min-ordinal
496
for position = (%test-visibility ordinal %vector length)
497
when position return position)
498
(loop for ordinal from min-ordinal to max-ordinal
499
for position = (%test-visibility ordinal %vector length)
500
when position return position))))
503
(defun %get-visibility-set (%vector length last-ordinal)
504
"Locate the highest ordinal less that that given.
505
Iff it is even, then thequad is visible in that revision.
506
Handle the degenerate cases without searching."
507
(loop for index from 0 below length by 2
508
if (< index (1- length))
509
append (loop for i from (cffi:mem-aref %vector :uint32 index)
510
below (cffi:mem-aref %vector :uint32 (1+ index))
512
else append (loop for i from (cffi:mem-aref %vector :uint32 index) to last-ordinal
514
finally (return set)))
516
(defun %visibility-record-to-vector (%vector vector)
517
(loop for index from 0 below (length vector)
518
do (setf (aref vector index) (cffi:mem-aref %vector :uint32 index))
519
finally (return vector)))
521
(defun decode-visibility-record (vector %vector count)
522
(when (> count (array-dimension vector 0))
523
(setf vector (adjust-array vector count)))
524
(when (array-has-fill-pointer-p vector) (setf (fill-pointer vector) count))
525
(%visibility-record-to-vector %vector vector)
528
(defun decode-ordinal-record (%vector count &aux (vector (make-array count)))
529
(%visibility-record-to-vector %vector vector)
532
(defun encode-visibility-record (vector %vector)
533
(loop for index from 0 below (length vector)
534
do (setf (cffi:mem-aref %vector :uint32 index) (aref vector index))
535
finally (return %vector)))
537
(defun decode-db-name (string)
538
(map 'string #'code-char
539
(map 'vector #'(lambda (string) (parse-integer string :radix 16))
540
(loop for i below (length string) by 2
541
collect (subseq string i (+ i 2))))))
542
;;; /srv/dydra/storage/strings.mdb
543
;;; (decode-db-name "736861313a753332") : "sha1:u32"
544
;;; (decode-db-name "7533323a63737472") : "u32:cstr"
545
;;; (decode-db-name "7533323a73686131") : "u32:sha1"