Coverage report: /development/source/library/org/datagraph/spocq-shard/src/store/rlmdb/string-database.lisp

KindCoveredAll%
expression214444 48.2
branch610 60.0
Key
Not instrumented
Conditionalized out
Executed
Not executed
 
Both branches taken
One branch taken
Neither branch taken
1
 ;;; -*- Mode: lisp; Syntax: ansi-common-lisp; Base: 10; Package: org.datagraph.rdf.lmdb.implementation; -*-
2
 ;;; (load "patches/shard-term.lisp")
3
 
4
 ;;; implement direct access to persistent term definitions
5
 
6
 (in-package :rlmdb.i)
7
 
8
 (defclass rlmdb::string-dictionary (lmdb:environment)
9
   ((id-sha1-database
10
     :accessor dictionary-id-sha1-database)
11
    (id-utf8-database
12
     :accessor dictionary-id-utf8-database)
13
    (sha1-id-database
14
     :accessor dictionary-sha1-id-database)
15
    (id-size
16
     :initform 4 :initarg :id-size
17
     :reader dictionary-id-size)
18
    (read-only
19
     :initform nil :initarg :read-only
20
     :reader dictionary-read-only-p)))
21
   
22
 
23
 ;;; /srv/dydra/storage/strings.mdb
24
 ;;; (decode-db-name "736861313a753332") : "sha1:u32"
25
 ;;; (decode-db-name "7533323a63737472") : "u32:cstr"
26
 ;;; (decode-db-name "7533323a73686131") : "u32:sha1" 
27
 
28
 
29
 (defmethod initialize-instance ((instance rlmdb::string-dictionary) &rest initargs
30
                                 &key (directory *string-db-pathname*)
31
                                 (mapsize *string-db-mapsize*)
32
                                 (max-readers rlmdb:*max-readers*))
33
   (apply #'call-next-method instance
34
          :directory directory
35
          :max-databases 3
36
          :mapsize mapsize
37
          :max-readers max-readers
38
          initargs)
39
          
40
   (lmdb:open-environment instance)
41
   ;; when working with existing string databases this is not the right thing to do...
42
   ;; there is no creation operator. this ensure string 0 is ""
43
   ;; )) (rlmdb::shard-string-store instance "")
44
   )
45
 
46
 ;;; (defparameter *term-dictionary* (make-instance 'rlmdb::term-dictionary))
47
 ;;; (lmdb:close-environment *term-dictionary*)
48
 
49
 
50
 (defmethod lmdb:open-environment ((dictionary rlmdb::string-dictionary) &rest args)
51
   (call-next-method)
52
   (apply #'rlmdb:open-environment-databases dictionary args))
53
 
54
 
55
 
56
 (defmethod rlmdb:open-environment-databases progn ((dictionary rlmdb::string-dictionary)
57
                                                    &key (if-does-not-exist :create))
58
   ;; this process creates a database instance based on the given transaction,
59
   ;; initializes it and then leaves it open for use with other transactions.
60
   ;; the initial transaction remains in place, but is not used subsequently.
61
   (setf (dictionary-id-sha1-database dictionary)
62
         (ensure-environment-database dictionary "u32:sha1"
63
                                      :if-does-not-exist if-does-not-exist
64
                                      :class 'rlmdb:database))
65
   (setf (dictionary-id-utf8-database dictionary)
66
         (ensure-environment-database dictionary "u32:cstr"
67
                                      :if-does-not-exist if-does-not-exist
68
                                      :class 'rlmdb:database))
69
   (setf (dictionary-sha1-id-database dictionary)
70
         (ensure-environment-database dictionary "sha1:u32"
71
                                      :if-does-not-exist if-does-not-exist
72
                                      :class 'rlmdb:database)))
73
 
74
 (:documentation
75
  "This intends a process to open just one u23:cstr database in just one string
76
  dictionary environment. Both are bound lazily to global variables.
77
  Threads operate on the database by establishing respective transactions.
78
  Should it become necessary to open more than on string database, a thread must
79
  rebind *string-db* and *string-db-environment* to nil and *string-db-pathname*
80
  to the local dictionary location and proceed with the dynamic bindings.")
81
 
82
 (defun rlmdb::string-database-environment ()
83
   "Lazily instantiate and open a string dictionary and its database and bind
84
   it to the global *string-db-environment*."
85
   (or *string-db-environment*
86
       (setf *string-db-environment* (make-instance 'rlmdb::string-dictionary :directory *string-db-pathname*))))
87
 
88
 (defun rlmdb::string-database ()
89
   "Lazily bind the u32:cstr database from the current environment to the global
90
   *string-db*"
91
   (or *string-db*
92
       (setf *string-db*
93
             (dictionary-id-utf8-database (rlmdb::string-database-environment)))))
94
 
95
 (defparameter *string-sha1-db* nil)
96
 (defun rlmdb::string-sha1-database ()
97
   "Lazily bind the sha1:u32 database from the current environment to the global
98
   *string-sha1-db*"
99
   (or *string-sha1-db*
100
       (setf *string-sha1-db*
101
             (dictionary-sha1-id-database (rlmdb::string-database-environment)))))
102
 
103
 
104
 
105
 
106
 (defun funcall-sdo (operator database)
107
   (funcall operator database))
108
 
109
 (defun rlmdb::call-with-string-database (operator &key (flags liblmdb:+rdonly+) (normal-disposition :abort))
110
   "Invoke the given operator on the open id->utf8 database within a transaction.
111
   If a transaction is already active, then that context is used with the global database.
112
   Otherwise ensure that the environment and database are open, establish a
113
   transaction and invoke the operator on the database in that new context."
114
   (cond ((and *string-db-transaction* (= flags liblmdb:+rdonly+))
115
          (funcall-sdo operator (rlmdb::string-database)))
116
         (t
117
          (rlmdb::string-database)
118
          (lmdb:with-transaction ((*string-db-transaction* (lmdb:make-transaction *string-db-environment* :flags flags))
119
                                  :normal-disposition normal-disposition)
120
            (funcall-sdo operator *string-db*)))))
121
 
122
 (defmacro rlmdb::with-string-sha1-database ((db &rest args) &body body)
123
   (let ((op (gensym "wsb-")))
124
     `(flet ((,op (,db)
125
               (declare (ignorable ,db))
126
               ,@body))
127
        (declare (dynamic-extent #',op))
128
        (rlmdb::call-with-string-sha1-database #',op ,@args))))
129
 
130
 (defun rlmdb::call-with-string-sha1-database (operator &key (flags liblmdb:+rdonly+) (normal-disposition :abort))
131
   "Invoke the given operator on the open id->utf8 database within a transaction.
132
   If a transaction is already active, then that context is used with the global database.
133
   Otherwise ensure that the environment and database are open, establish a
134
   transaction and invoke the operator on the database in that new context."
135
   (cond ((and *string-db-transaction* (= flags liblmdb:+rdonly+))
136
          (funcall-sdo operator (rlmdb::string-sha1-database)))
137
         (t
138
          (rlmdb::string-sha1-database)
139
          (lmdb:with-transaction ((*string-db-transaction* (lmdb:make-transaction *string-db-environment* :flags flags))
140
                                  :normal-disposition normal-disposition)
141
            (funcall-sdo operator *string-sha1-db*)))))
142
 
143
 ;;; (rlmdb:with-string-database (sdb) (spocq.i::encode-turtle-term-number 1 *trace-output*))
144
 
145
 #|
146
 the dynamic state during query evaluation is such that, in order that transactions on the string database
147
 extend beyond a single fetch, they must be interleaved with index transactions.
148
 for example, in a case where a thread which is iterating through a bgp also interprets pushed filters,
149
 the bgp is compiled to establish a string-db transaction at the outset.
150
 in the course of the iteration, any index scan established its own transaction over the index database.
151
 this shadows the string-db transaction.
152
 to allow for this, call-with-string-database binds also the string-db transaction and tests for
153
 its presence.
154
 
155
 at the point of the fetch the state can be
156
 (:DB #<LMDB:DATABASE "u32:cstr" {101879E403}> :SDB
157
  #<LMDB:DATABASE "u32:cstr" {101879E403}> :SDBE
158
  #<ORG.DATAGRAPH.RDF.LMDB::STRING-DICTIONARY "strings.mdb" #.(SB-SYS:INT-SAP #X7AE92C008960)[#.(SB-SYS:INT-SAP #X7AE92C001D00)] {101879B093}>
159
  :XACT
160
  #<ORG.DATAGRAPH.RDF.LMDB:TRANSACTION #<ORG.DATAGRAPH.RDF.LMDB:REPOSITORY openrdf-sesame/collation "76745d0a-7b23-e94d-ab49-6b7c9a4d6bdb" #.(SB-SYS:INT-SAP #X0257C2D0)[#.(SB-SYS:INT-SAP #X0227AEB0)] {1015BAE303}>@7 {101879F4C3}>
161
  :SDB-XACT
162
  #<ORG.DATAGRAPH.RDF.LMDB:TRANSACTION #<ORG.DATAGRAPH.RDF.LMDB::STRING-DICTIONARY "strings.mdb" #.(SB-SYS:INT-SAP #X7AE92C008960)[#.(SB-SYS:INT-SAP #X7AE92C001D00)] {101879B093}>@19263978 {101879E653}>)
163
 
164
 if the is performed on the index database transaction, but supplies the string database, the operation complete, but returns a not-found error code. (!?!)
165
 |#
166
 
167
 (defun rlmdb:shard-string-fetch (string-id)
168
   "Fetch the identified string from the current *string-db*"
169
   (labels ((decode-null-terminated-string (k raw-value)
170
              (declare (ignore k))
171
              (let* ((size (rlmdb.i::%mdb-val-size raw-value))
172
                     (%data (rlmdb.i::%mdb-val-data raw-value)))
173
                (cffi:foreign-string-to-lisp %data :count (if (zerop (cffi:mem-aref %data :uint8 (1- size))) (1- size) size)))))
174
     (declare (dynamic-extent #'decode-null-terminated-string))
175
     (rlmdb:with-string-database (db)
176
       (cond ((lmdb:get-with db string-id #'decode-null-terminated-string :transaction *string-db-transaction*))
177
             (t
178
              ;; if it was not found, reset the transaction and try again
179
              (lmdb:reset-transaction *string-db-transaction*)
180
              (lmdb:renew-transaction *string-db-transaction*)
181
              (lmdb:get-with db string-id #'decode-null-terminated-string :transaction *string-db-transaction*))))))
182
 
183
 ;; (rlmdb::with-string-database (db) (shard-string-fetch 98427199))
184
 
185
 (defun rlmdb::call-with-shard-string (string-id op)
186
   "In order to include strings commited subsequent to the primary transaction,
187
    perform the get retry with a new transaction if the first one does not produce a result
188
    the operator must requrn non-nil when it has been invoked.
189
    The operator must return non-null if it runs"
190
   (declare (dynamic-extent op))
191
   (rlmdb:with-string-database (db)
192
     (cond ((lmdb:get-with db string-id op :transaction *string-db-transaction*))
193
           (t
194
            ;; if it was not found, reset the transaction and try again
195
            (lmdb:reset-transaction *string-db-transaction*)
196
            (lmdb:renew-transaction *string-db-transaction*)
197
            (lmdb:get-with db string-id op :transaction *string-db-transaction*)))))
198
 
199
 
200
 (defgeneric rlmdb::string-dictionary-get (key)
201
   (:method ((key integer))
202
     (rlmdb:with-string-database (db)
203
       (cffi:with-foreign-object (%id :uint32)
204
         (setf (cffi:mem-ref %id :uint32) key)
205
         (with-lmdb-values ((%id-key&value (dictionary-id-size *string-db-environment*) %id)
206
                            (%utf8-data 0 (cffi:null-pointer)))
207
           (let ((return-code (liblmdb:get (lmdb::handle *string-db-transaction*)
208
                                           (lmdb::handle db)
209
                                           %id-key&value
210
                                           %utf8-data)))
211
             (alexandria:switch (return-code)
212
                                (0
213
                                 (let* ((size (rlmdb.i::%mdb-val-size %utf8-data))
214
                                        (%data (rlmdb.i::%mdb-val-data %utf8-data)))
215
                                   (cffi:foreign-string-to-lisp %data :count (if (zerop (cffi:mem-aref %data :uint8 (1- size))) (1- size) size))))
216
                                (liblmdb:+notfound+
217
                                 nil)
218
                                (t
219
                                 (lmdb::unknown-error return-code))))))))
220
 
221
   (:method ((key string))
222
     (let* ((length (length key))
223
            (buffer-length (1+ (* length 4)))
224
            (buffer (make-array buffer-length :element-type '(unsigned-byte 8) :fill-pointer 0))
225
            (encoder (load-time-value (content-encoding-byte-encoder (content-encoding :utf-8)))))
226
       (flet ((buffer-insert-byte (buffer byte)
227
                (declare (type (array (unsigned-byte 8) (*)) buffer))
228
                (declare (type (unsigned-byte 8) byte))
229
                ;; check bounds here as it's finally the encoded positioning
230
                (vector-push-extend byte buffer)))
231
         (declare (dynamic-extent #'buffer-insert-byte))
232
         (dotimes (i length) 
233
           (funcall encoder (char key i) #'buffer-insert-byte buffer))
234
         ;; null-terminate
235
         (vector-push-extend 0 buffer)
236
         (rlmdb::string-dictionary-get buffer))))
237
 
238
   (:method ((utf8-vector vector))
239
     "Given the encoded string, compute its sha1 (w/o null termination),
240
     open a transaction over the id->string and sha1->id databases
241
     retrieve the integer id"
242
     (let ((sha1-vector (ironclad:digest-sequence 'ironclad:sha1 utf8-vector :end (1- (length utf8-vector)))))
243
       (cffi:with-foreign-array (%sha1 sha1-vector '(:array :uint8 20))
244
         (with-lmdb-values ((%sha1-key 20 %sha1))
245
           (rlmdb::with-string-sha1-database (db)
246
             #+(or)
247
             (format *trace-output* "rlmdb::string-dictionary-get ~s"
248
                     (list lmdb::*transaction*  (lmdb::handle lmdb::*transaction*)
249
                           *string-db-transaction* (lmdb::handle *string-db-transaction*)
250
                           :id-sha1 (dictionary-id-sha1-database *string-db-environment*) (lmdb::handle (dictionary-id-sha1-database *string-db-environment*))
251
                           :id-utf8 (dictionary-id-utf8-database *string-db-environment*) (lmdb::handle (dictionary-id-utf8-database *string-db-environment*))
252
                           :sha1-id (dictionary-sha1-id-database *string-db-environment*) (lmdb::handle (dictionary-sha1-id-database *string-db-environment*))
253
                           ))
254
             (with-lmdb-values ((%id-key&value 0 (cffi:null-pointer)))
255
               (let ((return-code (liblmdb:get (lmdb::handle *string-db-transaction*)
256
                                               (lmdb::handle *string-sha1-db*)
257
                                               %sha1-key
258
                                               %id-key&value)))
259
                 (alexandria:switch (return-code)
260
                                    (0
261
                                     (cffi:mem-ref (CFFI:FOREIGN-SLOT-VALUE %ID-KEY&VALUE '(:STRUCT LIBLMDB:VAL)
262
                                                                            'LIBLMDB:MV-DATA)
263
                                                   :uint32))
264
                                    (liblmdb:+notfound+
265
                                     nil)
266
                                    (t
267
                                     (lmdb::unknown-error return-code)))))))))))
268
 
269
 
270
 ;; (rlmdb::string-dictionary-get "http://www.w3.org/2001/XMLSchema#string")
271
 ;; (rlmdb::string-dictionary-get 1)
272
 ;;; (rlmdb::string-dictionary-get "http://www.w3.org/1999/02/22-rdf-syntax-ns#type")
273
 ;; (rlmdb::string-dictionary-put "one-two-three")
274
 ;; (rlmdb::string-dictionary-put "one-two-three-four")
275
 
276
 (defgeneric rlmdb::string-dictionary-put (string)
277
   (:method ((string string))
278
     (let* ((length (length string))
279
            (buffer-length (1+ (* length 4)))
280
            (buffer (make-array buffer-length :element-type '(unsigned-byte 8) :fill-pointer 0))
281
            (encoder (load-time-value (content-encoding-byte-encoder (content-encoding :utf-8)))))
282
       (flet ((buffer-insert-byte (buffer byte)
283
                (declare (type (array (unsigned-byte 8) (*)) buffer))
284
                (declare (type (unsigned-byte 8) byte))
285
                ;; check bounds here as it's finally the encoded positioning
286
                (vector-push-extend byte buffer)))
287
         (declare (dynamic-extent #'buffer-insert-byte))
288
         (dotimes (i length) 
289
           (funcall encoder (char string i) #'buffer-insert-byte buffer))
290
         ;; null-terminate
291
         (vector-push-extend 0 buffer)
292
         (rlmdb::string-dictionary-put buffer))))
293
 
294
   (:method ((utf8-vector vector))
295
     "Given the encoded string, compute its sha1 (w/o null termination),
296
     open a transaction over the id->string and sha1->id databases
297
     store the two pairs"
298
     (let ((sha1-vector (ironclad:digest-sequence 'ironclad:sha1 utf8-vector :end (1- (length utf8-vector)))))
299
       (cffi:with-foreign-array (%sha1 sha1-vector '(:array :uint8 20))
300
         (cffi:with-foreign-array (%utf8 utf8-vector `(:array :uint8 ,(length utf8-vector)))
301
           (with-lmdb-values ((%sha1-key 20 %sha1)
302
                              (%utf8-value (length utf8-vector) %utf8))
303
             ;; use the standard interface to ensure the environment and its databases are open
304
             (rlmdb:with-string-database (id-utf8-database :flags 0 :normal-disposition :commit)
305
               ;; to this point all databases are open and a write transaction exists
306
               (let ((sha1-id-database (dictionary-sha1-id-database *string-db-environment*))
307
                     (id-sha1-database (dictionary-id-sha1-database *string-db-environment*)))
308
                 (with-lmdb-values ((%id-key&value 0 (cffi:null-pointer)))
309
                   ;; probe first                                    nil)
310
                   (let ((return-code (liblmdb:get (lmdb::handle *string-db-transaction*)
311
                                                   (lmdb::handle sha1-id-database)
312
                                                   %sha1-key
313
                                                   %id-key&value)))
314
                     (alexandria:switch (return-code)
315
                                        (0
316
                                         (cffi:mem-ref (CFFI:FOREIGN-SLOT-VALUE %ID-KEY&VALUE '(:STRUCT LIBLMDB:VAL)
317
                                                                                'LIBLMDB:MV-DATA)
318
                                                       :uint32))
319
                                        (liblmdb:+notfound+
320
                                         (when (dictionary-read-only-p *string-db-environment*)
321
                                           (error "string database is read-only: ~s" *string-db-environment*))
322
                                         (let ((id (rlmdb::entry-count id-utf8-database)))
323
                                           (cffi:with-foreign-object (%id :uint32)
324
                                             (setf (cffi:mem-ref %id :uint32) id)
325
                                             (with-lmdb-values ((%id-key&value (dictionary-id-size *string-db-environment*) %id))
326
                                               (let ((return-code (liblmdb:put (lmdb::handle *string-db-transaction*)
327
                                                                               (lmdb::handle sha1-id-database)
328
                                                                               %sha1-key
329
                                                                               %id-key&value
330
                                                                               0)))
331
                                                 (alexandria:switch (return-code)
332
                                                                    (0
333
                                                                     ;; continue
334
                                                                     (let ((return-code (liblmdb:put (lmdb::handle *string-db-transaction*)
335
                                                                                                     (lmdb::handle id-utf8-database)
336
                                                                                                     %id-key&value
337
                                                                                                     %utf8-value
338
                                                                                                     0)))
339
                                                                       (alexandria:switch (return-code)
340
                                                                                          (0
341
                                                                                           (let ((return-code (liblmdb:put (lmdb::handle *string-db-transaction*)
342
                                                                                                     (lmdb::handle id-sha1-database)
343
                                                                                                     %id-key&value
344
                                                                                                     %sha1-key
345
                                                                                                     0)))
346
                                                                                             (alexandria:switch (return-code)
347
                                                                                                                (0
348
                                                                                                                 id)
349
                                                                                                                (t
350
                                                                                                                 (lmdb::unknown-error return-code)))))
351
                                                                                          (t
352
                                                                                           (lmdb::unknown-error return-code)))))
353
                                                                    (t
354
                                                                     (lmdb::unknown-error return-code))))))))
355
                                        (t
356
                                         (lmdb::unknown-error return-code)))))))))))))
357
 
358
 
359
 (defgeneric rlmdb::string-dictionary-patch (id)
360
   (:documentation "Given a value - string, integer id, or sha1 id, patch the other two databases")
361
   ;; just u32->sha1
362
   
363
   (:method ((id integer))
364
     (let ((string (rlmdb::string-dictionary-get id)))
365
       (when string
366
         (let* ((length (length string))
367
                (buffer-length (1+ (* length 4)))
368
                (utf8-buffer (make-array buffer-length :element-type '(unsigned-byte 8) :fill-pointer 0))
369
                (encoder (load-time-value (content-encoding-byte-encoder (content-encoding :utf-8)))))
370
           (flet ((buffer-insert-byte (buffer byte)
371
                    (declare (type (array (unsigned-byte 8) (*)) buffer))
372
                    (declare (type (unsigned-byte 8) byte))
373
                    ;; check bounds here as it's finally the encoded positioning
374
                    (vector-push-extend byte buffer)))
375
             (declare (dynamic-extent #'buffer-insert-byte))
376
             (dotimes (i length) 
377
               (funcall encoder (char string i) #'buffer-insert-byte utf8-buffer))
378
             ;; null-terminate
379
             (vector-push-extend 0 utf8-buffer)
380
             (let ((sha1-vector (ironclad:digest-sequence 'ironclad:sha1 utf8-buffer :end (1- (length utf8-buffer)))))
381
               (cffi:with-foreign-array (%sha1 sha1-vector '(:array :uint8 20))
382
                 (rlmdb::with-string-sha1-database (sha1-id-database :flags 0 :normal-disposition :commit)
383
                   (cffi:with-foreign-object (%id :uint32)
384
                     (with-lmdb-values ((%sha1-key 20 %sha1)
385
                                        (%id-key&value (dictionary-id-size *string-db-environment*) %id))
386
                       (let ((return-code (liblmdb:get (lmdb::handle *string-db-transaction*)
387
                                                       (lmdb::handle sha1-id-database)
388
                                                       %sha1-key
389
                                                       %id-key&value)))
390
                         (alexandria:switch (return-code)
391
                                            (0 ; exists
392
                                             (values (cffi:mem-ref %id :uint32) sha1-vector nil))
393
                                            (liblmdb:+notfound+ ; add it
394
                                             (setf (cffi:mem-ref %id :uint32) id)
395
                                             (let ((return-code (liblmdb:put (lmdb::handle *string-db-transaction*)
396
                                                                             (lmdb::handle sha1-id-database)
397
                                                                             %sha1-key
398
                                                                             %id-key&value
399
                                                                             0)))
400
                                               (alexandria:switch (return-code)
401
                                                                  (0
402
                                                                   (values (cffi:mem-ref %id :uint32) sha1-vector t))
403
                                                                  (t
404
                                                                   (lmdb::unknown-error return-code))))))))))))))))))
405
 
406
                      
407
 ;;; echo -n "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" | shasum
408
 ;;; -> 3c197cb1f6842dc41aa48dc8b9032284bcf39a27
409
 ;;; (IRONCLAD:BYTE-ARRAY-TO-HEX-STRING #(60 25 124 177 246 132 45 196 26 164 141 200 185 3 34 132 188 243 154 39))
410
 ;;; -> same
411
 
412
 ;;; test that string db access works for pushed filters 
413
 #+(or)
414
 (
415
 (spocq.i::test-sparql "select *
416
  where {
417
   { ?s <http://example.org/location> ?location }.
418
   #bind (lang(?location) as ?lang)
419
   filter ('da' = lang(?location))
420
  }" :repository-id "openrdf-sesame/collation")
421
 
422
 (transaction-object-term-number nil "346")
423
 872946
424
 
425
 (ransaction-object-term-number nil "1")
426
 ;; on nl12
427
 (cffi:foreign-slot-value (shard-term-fetch (rlmdb:value-term-number "346"))
428
                          '(:struct shard-term) 'type)
429
 
430
 (rlmdb:with-string-database (db)
431
   (encode-turtle-term-number 872946 *trace-output*))
432
 
433
 (spocq.i::test-sparql "select ?s ?p ?o
434
  where {
435
   bind (<http://exmaple.org/s> as ?s)
436
   bind (<http://exmaple.org/p> as ?p)
437
   bind ('346' as ?o)
438
  }" :repository-id "james/system"
439
  :response-content-type mime:application/n-quads)
440
 
441
 )