Coverage report: /development/source/library/org/datagraph/spocq-shard/src/store/rlmdb/string-database.lisp
| Kind | Covered | All | % |
| expression | 214 | 444 | 48.2 |
| branch | 6 | 10 | 60.0 |
Key
Not instrumented
Conditionalized out
Executed
Not executed
Both branches taken
One branch taken
Neither branch taken
1
;;; -*- Mode: lisp; Syntax: ansi-common-lisp; Base: 10; Package: org.datagraph.rdf.lmdb.implementation; -*-
2
;;; (load "patches/shard-term.lisp")
4
;;; implement direct access to persistent term definitions
8
(defclass rlmdb::string-dictionary (lmdb:environment)
10
:accessor dictionary-id-sha1-database)
12
:accessor dictionary-id-utf8-database)
14
:accessor dictionary-sha1-id-database)
16
:initform 4 :initarg :id-size
17
:reader dictionary-id-size)
19
:initform nil :initarg :read-only
20
:reader dictionary-read-only-p)))
23
;;; /srv/dydra/storage/strings.mdb
24
;;; (decode-db-name "736861313a753332") : "sha1:u32"
25
;;; (decode-db-name "7533323a63737472") : "u32:cstr"
26
;;; (decode-db-name "7533323a73686131") : "u32:sha1"
29
(defmethod initialize-instance ((instance rlmdb::string-dictionary) &rest initargs
30
&key (directory *string-db-pathname*)
31
(mapsize *string-db-mapsize*)
32
(max-readers rlmdb:*max-readers*))
33
(apply #'call-next-method instance
37
:max-readers max-readers
40
(lmdb:open-environment instance)
41
;; when working with existing string databases this is not the right thing to do...
42
;; there is no creation operator. this ensure string 0 is ""
43
;; )) (rlmdb::shard-string-store instance "")
46
;;; (defparameter *term-dictionary* (make-instance 'rlmdb::term-dictionary))
47
;;; (lmdb:close-environment *term-dictionary*)
50
(defmethod lmdb:open-environment ((dictionary rlmdb::string-dictionary) &rest args)
52
(apply #'rlmdb:open-environment-databases dictionary args))
56
(defmethod rlmdb:open-environment-databases progn ((dictionary rlmdb::string-dictionary)
57
&key (if-does-not-exist :create))
58
;; this process creates a database instance based on the given transaction,
59
;; initializes it and then leaves it open for use with other transactions.
60
;; the initial transaction remains in place, but is not used subsequently.
61
(setf (dictionary-id-sha1-database dictionary)
62
(ensure-environment-database dictionary "u32:sha1"
63
:if-does-not-exist if-does-not-exist
64
:class 'rlmdb:database))
65
(setf (dictionary-id-utf8-database dictionary)
66
(ensure-environment-database dictionary "u32:cstr"
67
:if-does-not-exist if-does-not-exist
68
:class 'rlmdb:database))
69
(setf (dictionary-sha1-id-database dictionary)
70
(ensure-environment-database dictionary "sha1:u32"
71
:if-does-not-exist if-does-not-exist
72
:class 'rlmdb:database)))
75
"This intends a process to open just one u23:cstr database in just one string
76
dictionary environment. Both are bound lazily to global variables.
77
Threads operate on the database by establishing respective transactions.
78
Should it become necessary to open more than on string database, a thread must
79
rebind *string-db* and *string-db-environment* to nil and *string-db-pathname*
80
to the local dictionary location and proceed with the dynamic bindings.")
82
(defun rlmdb::string-database-environment ()
83
"Lazily instantiate and open a string dictionary and its database and bind
84
it to the global *string-db-environment*."
85
(or *string-db-environment*
86
(setf *string-db-environment* (make-instance 'rlmdb::string-dictionary :directory *string-db-pathname*))))
88
(defun rlmdb::string-database ()
89
"Lazily bind the u32:cstr database from the current environment to the global
93
(dictionary-id-utf8-database (rlmdb::string-database-environment)))))
95
(defparameter *string-sha1-db* nil)
96
(defun rlmdb::string-sha1-database ()
97
"Lazily bind the sha1:u32 database from the current environment to the global
100
(setf *string-sha1-db*
101
(dictionary-sha1-id-database (rlmdb::string-database-environment)))))
106
(defun funcall-sdo (operator database)
107
(funcall operator database))
109
(defun rlmdb::call-with-string-database (operator &key (flags liblmdb:+rdonly+) (normal-disposition :abort))
110
"Invoke the given operator on the open id->utf8 database within a transaction.
111
If a transaction is already active, then that context is used with the global database.
112
Otherwise ensure that the environment and database are open, establish a
113
transaction and invoke the operator on the database in that new context."
114
(cond ((and *string-db-transaction* (= flags liblmdb:+rdonly+))
115
(funcall-sdo operator (rlmdb::string-database)))
117
(rlmdb::string-database)
118
(lmdb:with-transaction ((*string-db-transaction* (lmdb:make-transaction *string-db-environment* :flags flags))
119
:normal-disposition normal-disposition)
120
(funcall-sdo operator *string-db*)))))
122
(defmacro rlmdb::with-string-sha1-database ((db &rest args) &body body)
123
(let ((op (gensym "wsb-")))
125
(declare (ignorable ,db))
127
(declare (dynamic-extent #',op))
128
(rlmdb::call-with-string-sha1-database #',op ,@args))))
130
(defun rlmdb::call-with-string-sha1-database (operator &key (flags liblmdb:+rdonly+) (normal-disposition :abort))
131
"Invoke the given operator on the open id->utf8 database within a transaction.
132
If a transaction is already active, then that context is used with the global database.
133
Otherwise ensure that the environment and database are open, establish a
134
transaction and invoke the operator on the database in that new context."
135
(cond ((and *string-db-transaction* (= flags liblmdb:+rdonly+))
136
(funcall-sdo operator (rlmdb::string-sha1-database)))
138
(rlmdb::string-sha1-database)
139
(lmdb:with-transaction ((*string-db-transaction* (lmdb:make-transaction *string-db-environment* :flags flags))
140
:normal-disposition normal-disposition)
141
(funcall-sdo operator *string-sha1-db*)))))
143
;;; (rlmdb:with-string-database (sdb) (spocq.i::encode-turtle-term-number 1 *trace-output*))
146
the dynamic state during query evaluation is such that, in order that transactions on the string database
147
extend beyond a single fetch, they must be interleaved with index transactions.
148
for example, in a case where a thread which is iterating through a bgp also interprets pushed filters,
149
the bgp is compiled to establish a string-db transaction at the outset.
150
in the course of the iteration, any index scan established its own transaction over the index database.
151
this shadows the string-db transaction.
152
to allow for this, call-with-string-database binds also the string-db transaction and tests for
155
at the point of the fetch the state can be
156
(:DB #<LMDB:DATABASE "u32:cstr" {101879E403}> :SDB
157
#<LMDB:DATABASE "u32:cstr" {101879E403}> :SDBE
158
#<ORG.DATAGRAPH.RDF.LMDB::STRING-DICTIONARY "strings.mdb" #.(SB-SYS:INT-SAP #X7AE92C008960)[#.(SB-SYS:INT-SAP #X7AE92C001D00)] {101879B093}>
160
#<ORG.DATAGRAPH.RDF.LMDB:TRANSACTION #<ORG.DATAGRAPH.RDF.LMDB:REPOSITORY openrdf-sesame/collation "76745d0a-7b23-e94d-ab49-6b7c9a4d6bdb" #.(SB-SYS:INT-SAP #X0257C2D0)[#.(SB-SYS:INT-SAP #X0227AEB0)] {1015BAE303}>@7 {101879F4C3}>
162
#<ORG.DATAGRAPH.RDF.LMDB:TRANSACTION #<ORG.DATAGRAPH.RDF.LMDB::STRING-DICTIONARY "strings.mdb" #.(SB-SYS:INT-SAP #X7AE92C008960)[#.(SB-SYS:INT-SAP #X7AE92C001D00)] {101879B093}>@19263978 {101879E653}>)
164
if the is performed on the index database transaction, but supplies the string database, the operation complete, but returns a not-found error code. (!?!)
167
(defun rlmdb:shard-string-fetch (string-id)
168
"Fetch the identified string from the current *string-db*"
169
(labels ((decode-null-terminated-string (k raw-value)
171
(let* ((size (rlmdb.i::%mdb-val-size raw-value))
172
(%data (rlmdb.i::%mdb-val-data raw-value)))
173
(cffi:foreign-string-to-lisp %data :count (if (zerop (cffi:mem-aref %data :uint8 (1- size))) (1- size) size)))))
174
(declare (dynamic-extent #'decode-null-terminated-string))
175
(rlmdb:with-string-database (db)
176
(cond ((lmdb:get-with db string-id #'decode-null-terminated-string :transaction *string-db-transaction*))
178
;; if it was not found, reset the transaction and try again
179
(lmdb:reset-transaction *string-db-transaction*)
180
(lmdb:renew-transaction *string-db-transaction*)
181
(lmdb:get-with db string-id #'decode-null-terminated-string :transaction *string-db-transaction*))))))
183
;; (rlmdb::with-string-database (db) (shard-string-fetch 98427199))
185
(defun rlmdb::call-with-shard-string (string-id op)
186
"In order to include strings commited subsequent to the primary transaction,
187
perform the get retry with a new transaction if the first one does not produce a result
188
the operator must requrn non-nil when it has been invoked.
189
The operator must return non-null if it runs"
190
(declare (dynamic-extent op))
191
(rlmdb:with-string-database (db)
192
(cond ((lmdb:get-with db string-id op :transaction *string-db-transaction*))
194
;; if it was not found, reset the transaction and try again
195
(lmdb:reset-transaction *string-db-transaction*)
196
(lmdb:renew-transaction *string-db-transaction*)
197
(lmdb:get-with db string-id op :transaction *string-db-transaction*)))))
200
(defgeneric rlmdb::string-dictionary-get (key)
201
(:method ((key integer))
202
(rlmdb:with-string-database (db)
203
(cffi:with-foreign-object (%id :uint32)
204
(setf (cffi:mem-ref %id :uint32) key)
205
(with-lmdb-values ((%id-key&value (dictionary-id-size *string-db-environment*) %id)
206
(%utf8-data 0 (cffi:null-pointer)))
207
(let ((return-code (liblmdb:get (lmdb::handle *string-db-transaction*)
211
(alexandria:switch (return-code)
213
(let* ((size (rlmdb.i::%mdb-val-size %utf8-data))
214
(%data (rlmdb.i::%mdb-val-data %utf8-data)))
215
(cffi:foreign-string-to-lisp %data :count (if (zerop (cffi:mem-aref %data :uint8 (1- size))) (1- size) size))))
219
(lmdb::unknown-error return-code))))))))
221
(:method ((key string))
222
(let* ((length (length key))
223
(buffer-length (1+ (* length 4)))
224
(buffer (make-array buffer-length :element-type '(unsigned-byte 8) :fill-pointer 0))
225
(encoder (load-time-value (content-encoding-byte-encoder (content-encoding :utf-8)))))
226
(flet ((buffer-insert-byte (buffer byte)
227
(declare (type (array (unsigned-byte 8) (*)) buffer))
228
(declare (type (unsigned-byte 8) byte))
229
;; check bounds here as it's finally the encoded positioning
230
(vector-push-extend byte buffer)))
231
(declare (dynamic-extent #'buffer-insert-byte))
233
(funcall encoder (char key i) #'buffer-insert-byte buffer))
235
(vector-push-extend 0 buffer)
236
(rlmdb::string-dictionary-get buffer))))
238
(:method ((utf8-vector vector))
239
"Given the encoded string, compute its sha1 (w/o null termination),
240
open a transaction over the id->string and sha1->id databases
241
retrieve the integer id"
242
(let ((sha1-vector (ironclad:digest-sequence 'ironclad:sha1 utf8-vector :end (1- (length utf8-vector)))))
243
(cffi:with-foreign-array (%sha1 sha1-vector '(:array :uint8 20))
244
(with-lmdb-values ((%sha1-key 20 %sha1))
245
(rlmdb::with-string-sha1-database (db)
247
(format *trace-output* "rlmdb::string-dictionary-get ~s"
248
(list lmdb::*transaction* (lmdb::handle lmdb::*transaction*)
249
*string-db-transaction* (lmdb::handle *string-db-transaction*)
250
:id-sha1 (dictionary-id-sha1-database *string-db-environment*) (lmdb::handle (dictionary-id-sha1-database *string-db-environment*))
251
:id-utf8 (dictionary-id-utf8-database *string-db-environment*) (lmdb::handle (dictionary-id-utf8-database *string-db-environment*))
252
:sha1-id (dictionary-sha1-id-database *string-db-environment*) (lmdb::handle (dictionary-sha1-id-database *string-db-environment*))
254
(with-lmdb-values ((%id-key&value 0 (cffi:null-pointer)))
255
(let ((return-code (liblmdb:get (lmdb::handle *string-db-transaction*)
256
(lmdb::handle *string-sha1-db*)
259
(alexandria:switch (return-code)
261
(cffi:mem-ref (CFFI:FOREIGN-SLOT-VALUE %ID-KEY&VALUE '(:STRUCT LIBLMDB:VAL)
267
(lmdb::unknown-error return-code)))))))))))
270
;; (rlmdb::string-dictionary-get "http://www.w3.org/2001/XMLSchema#string")
271
;; (rlmdb::string-dictionary-get 1)
272
;;; (rlmdb::string-dictionary-get "http://www.w3.org/1999/02/22-rdf-syntax-ns#type")
273
;; (rlmdb::string-dictionary-put "one-two-three")
274
;; (rlmdb::string-dictionary-put "one-two-three-four")
276
(defgeneric rlmdb::string-dictionary-put (string)
277
(:method ((string string))
278
(let* ((length (length string))
279
(buffer-length (1+ (* length 4)))
280
(buffer (make-array buffer-length :element-type '(unsigned-byte 8) :fill-pointer 0))
281
(encoder (load-time-value (content-encoding-byte-encoder (content-encoding :utf-8)))))
282
(flet ((buffer-insert-byte (buffer byte)
283
(declare (type (array (unsigned-byte 8) (*)) buffer))
284
(declare (type (unsigned-byte 8) byte))
285
;; check bounds here as it's finally the encoded positioning
286
(vector-push-extend byte buffer)))
287
(declare (dynamic-extent #'buffer-insert-byte))
289
(funcall encoder (char string i) #'buffer-insert-byte buffer))
291
(vector-push-extend 0 buffer)
292
(rlmdb::string-dictionary-put buffer))))
294
(:method ((utf8-vector vector))
295
"Given the encoded string, compute its sha1 (w/o null termination),
296
open a transaction over the id->string and sha1->id databases
298
(let ((sha1-vector (ironclad:digest-sequence 'ironclad:sha1 utf8-vector :end (1- (length utf8-vector)))))
299
(cffi:with-foreign-array (%sha1 sha1-vector '(:array :uint8 20))
300
(cffi:with-foreign-array (%utf8 utf8-vector `(:array :uint8 ,(length utf8-vector)))
301
(with-lmdb-values ((%sha1-key 20 %sha1)
302
(%utf8-value (length utf8-vector) %utf8))
303
;; use the standard interface to ensure the environment and its databases are open
304
(rlmdb:with-string-database (id-utf8-database :flags 0 :normal-disposition :commit)
305
;; to this point all databases are open and a write transaction exists
306
(let ((sha1-id-database (dictionary-sha1-id-database *string-db-environment*))
307
(id-sha1-database (dictionary-id-sha1-database *string-db-environment*)))
308
(with-lmdb-values ((%id-key&value 0 (cffi:null-pointer)))
310
(let ((return-code (liblmdb:get (lmdb::handle *string-db-transaction*)
311
(lmdb::handle sha1-id-database)
314
(alexandria:switch (return-code)
316
(cffi:mem-ref (CFFI:FOREIGN-SLOT-VALUE %ID-KEY&VALUE '(:STRUCT LIBLMDB:VAL)
320
(when (dictionary-read-only-p *string-db-environment*)
321
(error "string database is read-only: ~s" *string-db-environment*))
322
(let ((id (rlmdb::entry-count id-utf8-database)))
323
(cffi:with-foreign-object (%id :uint32)
324
(setf (cffi:mem-ref %id :uint32) id)
325
(with-lmdb-values ((%id-key&value (dictionary-id-size *string-db-environment*) %id))
326
(let ((return-code (liblmdb:put (lmdb::handle *string-db-transaction*)
327
(lmdb::handle sha1-id-database)
331
(alexandria:switch (return-code)
334
(let ((return-code (liblmdb:put (lmdb::handle *string-db-transaction*)
335
(lmdb::handle id-utf8-database)
339
(alexandria:switch (return-code)
341
(let ((return-code (liblmdb:put (lmdb::handle *string-db-transaction*)
342
(lmdb::handle id-sha1-database)
346
(alexandria:switch (return-code)
350
(lmdb::unknown-error return-code)))))
352
(lmdb::unknown-error return-code)))))
354
(lmdb::unknown-error return-code))))))))
356
(lmdb::unknown-error return-code)))))))))))))
359
(defgeneric rlmdb::string-dictionary-patch (id)
360
(:documentation "Given a value - string, integer id, or sha1 id, patch the other two databases")
363
(:method ((id integer))
364
(let ((string (rlmdb::string-dictionary-get id)))
366
(let* ((length (length string))
367
(buffer-length (1+ (* length 4)))
368
(utf8-buffer (make-array buffer-length :element-type '(unsigned-byte 8) :fill-pointer 0))
369
(encoder (load-time-value (content-encoding-byte-encoder (content-encoding :utf-8)))))
370
(flet ((buffer-insert-byte (buffer byte)
371
(declare (type (array (unsigned-byte 8) (*)) buffer))
372
(declare (type (unsigned-byte 8) byte))
373
;; check bounds here as it's finally the encoded positioning
374
(vector-push-extend byte buffer)))
375
(declare (dynamic-extent #'buffer-insert-byte))
377
(funcall encoder (char string i) #'buffer-insert-byte utf8-buffer))
379
(vector-push-extend 0 utf8-buffer)
380
(let ((sha1-vector (ironclad:digest-sequence 'ironclad:sha1 utf8-buffer :end (1- (length utf8-buffer)))))
381
(cffi:with-foreign-array (%sha1 sha1-vector '(:array :uint8 20))
382
(rlmdb::with-string-sha1-database (sha1-id-database :flags 0 :normal-disposition :commit)
383
(cffi:with-foreign-object (%id :uint32)
384
(with-lmdb-values ((%sha1-key 20 %sha1)
385
(%id-key&value (dictionary-id-size *string-db-environment*) %id))
386
(let ((return-code (liblmdb:get (lmdb::handle *string-db-transaction*)
387
(lmdb::handle sha1-id-database)
390
(alexandria:switch (return-code)
392
(values (cffi:mem-ref %id :uint32) sha1-vector nil))
393
(liblmdb:+notfound+ ; add it
394
(setf (cffi:mem-ref %id :uint32) id)
395
(let ((return-code (liblmdb:put (lmdb::handle *string-db-transaction*)
396
(lmdb::handle sha1-id-database)
400
(alexandria:switch (return-code)
402
(values (cffi:mem-ref %id :uint32) sha1-vector t))
404
(lmdb::unknown-error return-code))))))))))))))))))
407
;;; echo -n "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" | shasum
408
;;; -> 3c197cb1f6842dc41aa48dc8b9032284bcf39a27
409
;;; (IRONCLAD:BYTE-ARRAY-TO-HEX-STRING #(60 25 124 177 246 132 45 196 26 164 141 200 185 3 34 132 188 243 154 39))
412
;;; test that string db access works for pushed filters
415
(spocq.i::test-sparql "select *
417
{ ?s <http://example.org/location> ?location }.
418
#bind (lang(?location) as ?lang)
419
filter ('da' = lang(?location))
420
}" :repository-id "openrdf-sesame/collation")
422
(transaction-object-term-number nil "346")
425
(ransaction-object-term-number nil "1")
427
(cffi:foreign-slot-value (shard-term-fetch (rlmdb:value-term-number "346"))
428
'(:struct shard-term) 'type)
430
(rlmdb:with-string-database (db)
431
(encode-turtle-term-number 872946 *trace-output*))
433
(spocq.i::test-sparql "select ?s ?p ?o
435
bind (<http://exmaple.org/s> as ?s)
436
bind (<http://exmaple.org/p> as ?p)
438
}" :repository-id "james/system"
439
:response-content-type mime:application/n-quads)