Coverage report: /development/source/library/org/datagraph/spocq-shard/src/store/rlmdb/term-database.lisp

KindCoveredAll%
expression0406 0.0
branch04 0.0
Key
Not instrumented
Conditionalized out
Executed
Not executed
 
Both branches taken
One branch taken
Neither branch taken
1
 ;;; -*- Mode: lisp; Syntax: ansi-common-lisp; Base: 10; Package: org.datagraph.rdf.lmdb.implementation; -*-
2
 ;;; (load "patches/term-database.lisp")
3
 
4
 ;;; implement direct access to persistent term definitions
5
 
6
 (in-package :rlmdb.i)
7
 
8
 (defclass rlmdb::term-dictionary (lmdb:environment)
9
   ((id-term-database
10
     :accessor dictionary-id-term-database)
11
    (sha1-id-database
12
     :accessor dictionary-sha1-id-database)
13
    (id-size
14
     :initform 4 :initarg :id-size
15
     :reader dictionary-id-size)))
16
 
17
 (defclass rlmdb::sha1-dictionary (lmdb:environment)
18
   ((sha1-database
19
     :accessor dictionary-sha1-database)))
20
 
21
 (cffi:defcfun ("ntriples_term_serialize" %ntriples-term-serialize) :size_t (term rdfcache::term) (buffer :pointer) (size :size_t))
22
 (cffi:defcfun ("ntriples_term_length" %ntriples-term-length) :size_t (term rdfcache::term))
23
 
24
 (defun compute-rdf-term-sha1 (%rdf-term %sha1)
25
   "Given an rdf term (simplified form) compute the sha1 into the given buffer.
26
   Return the serialized term length"
27
 
28
   (let ((size (%ntriples-term-length %rdf-term)))
29
     (unless (plusp size)
30
       (log-error "compute-rdf-term-sha1: invalid term: ~/format-term/" %rdf-term)
31
       (error "compute-rdf-term-sha1: invalid term: ~s" %rdf-term))
32
     (incf size)
33
     (cffi:with-foreign-pointer (%buffer size)
34
       (%ntriples-term-serialize %rdf-term %buffer size)
35
       (let ((sha1 (make-array 40 :element-type '(unsigned-byte 8)))
36
             (buffer (make-array size :element-type '(unsigned-byte 8))))
37
         (declare (dynamic-extent sha1))
38
         (loop for i below size
39
           do (setf (aref buffer i) (cffi:mem-ref %buffer :uint8 i)))
40
         (ironclad:digest-sequence 'ironclad:sha1 buffer :end size :digest sha1)
41
         (cffi:lisp-array-to-foreign sha1 %sha1 '(:uint 40)))
42
       size)))
43
 
44
 ;;; /srv/dydra/storage/termid.mdb
45
 ;;; (decode-db-name "736861313a753332") : "sha1:u32"
46
 ;;; (decode-db-name "7533323a63737472") : "u32:cstr"
47
 ;;; (decode-db-name "7533323a73686131") : "u32:sha1" 
48
 
49
 (defparameter *term-dictionary-pathname* #p"/srv/dydra/storage/term-dictionary/")
50
 (defparameter *term-segment-pathname* #p"/srv/dydra/storage/terms")
51
 (defparameter *term-map-pathname* #p"/srv/dydra/storage/terms.mdb/")
52
 
53
 (defparameter +term-record-size+ (CFFI:FOREIGN-TYPE-SIZE '(:struct shard-term)))
54
 
55
 (defparameter *term-transaction* nil
56
   "Available to bind a term dictionary transaction in parallel with a one which
57
   covers the string dictionary.")
58
 
59
 (defmethod initialize-instance ((instance rlmdb::term-dictionary) &rest initargs
60
                                 &key (directory *term-dictionary-pathname*))
61
   (apply #'call-next-method instance
62
          :directory directory
63
          :max-databases 3
64
          :mapsize rlmdb:*mapsize*
65
          initargs)
66
          
67
   (lmdb:open-environment instance)
68
   ;; there is no creation operator. this ensure string 0 is ""
69
   )
70
 
71
 
72
 (defmethod initialize-instance ((instance rlmdb::sha1-dictionary) &rest initargs
73
                                 &key (directory *term-map-pathname*))
74
   (apply #'call-next-method instance
75
          :directory directory
76
          :max-databases 1
77
          :mapsize rlmdb:*mapsize*
78
          initargs)
79
          
80
   (lmdb:open-environment instance)
81
   ;; there is no creation operator. this ensure string 0 is ""
82
   )
83
 
84
 (defmethod lmdb:open-environment ((dictionary rlmdb::term-dictionary) &rest args)
85
   (call-next-method)
86
   (apply #'rlmdb:open-environment-databases dictionary args))
87
 
88
 (defmethod rlmdb:open-environment-databases progn ((dictionary rlmdb::term-dictionary) &key (if-does-not-exist :create))
89
   (setf (dictionary-id-term-database dictionary)
90
         (ensure-environment-database dictionary "u32:term"
91
                                      :if-does-not-exist if-does-not-exist
92
                                      :class 'rlmdb:database))
93
   (setf (dictionary-sha1-id-database dictionary)
94
         (ensure-environment-database dictionary "sha1:u32"
95
                                      :if-does-not-exist if-does-not-exist
96
                                      :class 'rlmdb:database)))
97
 
98
 (defmethod lmdb:open-environment ((dictionary rlmdb::sha1-dictionary) &rest args)
99
   (call-next-method)
100
   (apply #'rlmdb:open-environment-databases dictionary args))
101
 
102
 (defmethod rlmdb:open-environment-databases progn ((dictionary rlmdb::sha1-dictionary) &key (if-does-not-exist :error))
103
   (setf (dictionary-sha1-database dictionary)
104
         (ensure-environment-database dictionary "sha1:u32"
105
                                      :if-does-not-exist if-does-not-exist
106
                                      :class 'rlmdb:database)))
107
 
108
 
109
 (defmethod rlmdb::clear-environment-databases ((environment rlmdb::term-dictionary))
110
   (call-next-method)
111
   (flet ((clear-db (db) (when db (lmdb:drop-database db :delete 0))))
112
     (clear-db (dictionary-id-sha1-database environment))
113
     (clear-db (dictionary-sha1-id-database environment))))
114
 
115
 
116
 (defgeneric rlmdb::call-with-dictionary-entry (operator dictionary key)
117
   (:documentation
118
    "Call the given operator with the identified term record")
119
   (:method (operator (location pathname) (key t))
120
     (rlmdb::call-with-dictionary-entry operator
121
                                        (make-instance 'rlmdb::term-dictionary :directory location)
122
                                        key))
123
 
124
   (:method (operator (dictionary rlmdb::term-dictionary) (key integer))
125
     (flet ((do-call (transaction)
126
              (let ((id-term-database (dictionary-id-term-database dictionary)))
127
                (lmdb:with-database (id-term-database)
128
                  (cffi:with-foreign-object (%id :uint32)
129
                    (setf (cffi:mem-ref %id :uint32) key)
130
                    (with-lmdb-values ((%id-key (dictionary-id-size dictionary) %id)
131
                                       (%term-data 0 (cffi:null-pointer)))
132
                      (let ((return-code (liblmdb:get (lmdb::handle transaction)
133
                                                      (lmdb::handle id-term-database)
134
                                                      %id-key
135
                                                      %term-data)))
136
                        (alexandria:switch (return-code)
137
                                           (0
138
                                            (let* ((size (rlmdb.i::%mdb-val-size %term-data))
139
                                                   (%data (rlmdb.i::%mdb-val-data %term-data)))
140
                                              (assert (= size +term-record-size+) ()
141
                                                      "Invalid term record size: ~s" size)
142
                                              (funcall operator %data)))
143
                                           (liblmdb:+notfound+
144
                                            nil)
145
                                           (t
146
                                            (lmdb::unknown-error return-code))))))))))
147
       (cond (*term-transaction*
148
              (do-call *term-transaction*))
149
             (*transaction*
150
              (do-call *transaction*))
151
             (t
152
              (lmdb:with-transaction ((*transaction* (lmdb:make-transaction dictionary)))
153
               (let ((*term-transaction* *transaction*))
154
                 (do-call *transaction*)))))))
155
   
156
   (:method (operator (dictionary rlmdb::term-dictionary) (key vector))
157
     "interpret a vector as the term sha1"
158
     (flet ((do-call (transaction)
159
              (cffi:with-foreign-array (%sha1 key '(:array :uint8 20))
160
                (with-lmdb-values ((%sha1-key 20 %sha1))
161
                  (let ((sha1-id-database (dictionary-sha1-id-database dictionary)))
162
                    (with-lmdb-values ((%id-value 0 (cffi:null-pointer)))
163
                      (let ((return-code (liblmdb:get (lmdb::handle transaction)
164
                                                      (lmdb::handle sha1-id-database)
165
                                                      %sha1-key
166
                                                      %id-value)))
167
                        (alexandria:switch (return-code)
168
                                           (0
169
                                            (let ((id (cffi:mem-ref (CFFI:FOREIGN-SLOT-VALUE %ID-VALUE '(:STRUCT LIBLMDB:VAL)
170
                                                                                             'LIBLMDB:MV-DATA)
171
                                                                    :uint32)))
172
                                              (rlmdb::call-with-dictionary-entry operator dictionary id)))
173
                                            (liblmdb:+notfound+
174
                                             nil)
175
                                            (t
176
                                             (lmdb::unknown-error return-code))))))))))
177
       (cond (*term-transaction*
178
              (do-call *term-transaction*))
179
             (*transaction*
180
              (do-call *transaction*))
181
             (t
182
              (lmdb:with-transaction ((*transaction* (lmdb:make-transaction dictionary)))
183
               (let ((*term-transaction* *transaction*))
184
                 (do-call *transaction*))))))))
185
                    
186
 (defmacro rlmdb::with-dictionary-entry ((term-record dictionary term-key) &body body)
187
   (let ((op (gensym "dictionary-op")))
188
     `(flet ((,op (,term-record)
189
               ,@body))
190
        (declare (dynamic-extent #',op))
191
        (rlmdb::call-with-dictionary-entry #',op ,dictionary ,term-key))))
192
 
193
 
194
 (defmethod rlmdb::dictionary-get-object ((dictionary rlmdb::term-dictionary) (key t))
195
   "Given a term dictionary fetch the respective term, constuct and return the object.
196
   If the key does not identify one, return NIL
197
   to compare:
198
      %fetch-term-record-> term::fetch_record-> ... ->linear_term_segment::fetch
199
   returns a pointer to the mapped terms file.
200
   in this case, the record had dynamic extent and copying it is not necessary."
201
 
202
   (rlmdb::with-dictionary-entry (%term-record dictionary key)
203
     (term-record-value %term-record)))
204
 
205
 
206
 ;;; this must include a term segment file and the sha1:uint32 map
207
 ;;; the term is appended to the segment, which yields the integer id.
208
 ;;; this is then written into the sha1:uint32 map
209
 
210
 (defgeneric rlmdb::term-dictionary-put (dictionary object)
211
   (:method ((location pathname) (content t))
212
     (rlmdb::term-dictionary-put (make-instance 'rlmdb::term-dictionary :directory location)
213
                                 content))
214
 
215
   (:method ((dictionary rlmdb::term-dictionary) (object spocq:unbound-variable))
216
     0)
217
 
218
   (:method ((dictionary rlmdb::term-dictionary) (object string))
219
     (constrain-string-length object)
220
     (call-next-method))
221
   (:method ((dictionary rlmdb::term-dictionary) (object t))
222
     (spocq.i::with-term-record (%term)
223
       (rdfcache::%clear-term %term)
224
       (unwind-protect
225
         (let ((sha1-vector (make-array 20 :element-type '(unsigned-byte 8) :initial-element 0)))
226
           ;; use the compact term as input to the hash key calculation
227
           (spocq.i::set-optional-term %term object)
228
           (cffi:with-foreign-array (%sha1 sha1-vector '(:array :uint8 20))
229
             (with-shard-term-record (%term-record)
230
               (set-term-record-value object %term-record)
231
               (cffi:with-foreign-object (%id :uint32)
232
                 (with-lmdb-values ((%id-key&value (dictionary-id-size dictionary) %id)
233
                                    (%sha1-key 20 %sha1)
234
                                    (%term-value +term-record-size+ %term-record))
235
                   (compute-rdf-term-sha1 %term %sha1)
236
                   (let* ((id-term-database (dictionary-id-term-database dictionary))
237
                          (sha1-id-database (dictionary-sha1-id-database dictionary))
238
                          (return-code (liblmdb:get (lmdb::handle *transaction*)
239
                                                    (lmdb::handle sha1-id-database)
240
                                                    %sha1-key
241
                                                    %id-key&value)))
242
                     (alexandria:switch (return-code)
243
                                        (0
244
                                         (cffi:mem-ref %id :uint32))
245
                                        (liblmdb:+notfound+
246
                                         (let ((id (rlmdb::entry-count sha1-id-database)))
247
                                           (cffi:with-foreign-object (%id :uint32)
248
                                             (setf (cffi:mem-ref %id :uint32) id)
249
                                             ;; continue
250
                                             (let ((return-code (liblmdb:put (lmdb::handle *transaction*)
251
                                                                             (lmdb::handle sha1-id-database)
252
                                                                             %sha1-key
253
                                                                             %id-key&value
254
                                                                             0)))
255
                                               (alexandria:switch (return-code)
256
                                                                  (0
257
                                                                   (let ((return-code (liblmdb:put (lmdb::handle lmdb:*transaction*)
258
                                                                                                   (lmdb::handle id-term-database)
259
                                                                                                   %id-key&value
260
                                                                                                   %term-value
261
                                                                                                   0)))
262
                                                                     (alexandria:switch (return-code)
263
                                                                                        (0
264
                                                                                         id)
265
                                                                                        (t
266
                                                                                         (lmdb::unknown-error return-code)))))
267
                                                                  (t
268
                                                                   (lmdb::unknown-error return-code)))))))
269
                                        (t
270
                                         (lmdb::unknown-error return-code)))))))))
271
         (spocq.i::clear-optional-term %term)))))
272
 
273
 (defgeneric copy-term-segment (dictionary)
274
   (:documentation "Copy just the terms records from a term segment to a 'uint32:term' database.")
275
   (:method ((location pathname))
276
     (copy-term-segment (make-instance 'rlmdb::term-dictionary :directory location)))
277
 
278
   (:method ((dictionary rlmdb::term-dictionary))
279
     (let ((transaction (lmdb:make-transaction dictionary :flags 0))
280
           (id-term-database (dictionary-id-term-database dictionary)))
281
       (lmdb:with-database (id-term-database)
282
         (lmdb:with-transaction (transaction :initial-disposition :begin :normal-disposition :commit)
283
           (loop with count = (%term-record-count)
284
             for id from 0 below count
285
             for %term-record = (shard-term-fetch id)
286
             do (cffi:with-foreign-object (%id :uint32)
287
                  (with-lmdb-values ((%id-key (dictionary-id-size dictionary) %id)
288
                                     (%term-value +term-record-size+ %term-record))
289
                    (setf (cffi:mem-ref %id :uint32) id)
290
                    (let ((return-code (liblmdb:put (lmdb::handle transaction)
291
                                                    (lmdb::handle id-term-database)
292
                                                    %id-key
293
                                                    %term-value
294
                                                    0)))
295
                      (alexandria:switch (return-code)
296
                                         (0
297
                                          id)
298
                                         (t
299
                                          (lmdb::unknown-error return-code))))))
300
             finally (return count)))))))
301
 ;;; (copy-term-segment *term-dictionary-pathname*)
302
 
303
 (defgeneric reconstruct-term-dictionary (simple-dictionary compund-dictionary segment-location)
304
   (:documentation "Transform the simple dictionary, in which the 'sha1:unint32'
305
   relation is the only database, into one in which it is a sub-database.")
306
   (:method ((simple pathname) (compound t) (segment t))
307
     (reconstruct-term-dictionary (make-instance 'rlmdb::sha1-dictionary :directory simple)
308
                                  compound
309
                                  segment))
310
   (:method ((simple t) (compound pathname) (segment t))
311
     (reconstruct-term-dictionary simple
312
                                  (make-instance 'rlmdb::term-dictionary :directory compound)
313
                                  segment))
314
   
315
   (:method ((simple rlmdb::sha1-dictionary) (compound rlmdb::term-dictionary) segment)
316
     (let ((sha1-vector (make-array 20 :element-type '(unsigned-byte 8) :initial-element 0))
317
           (count 0))
318
       (cffi:with-foreign-array (%sha1 sha1-vector '(:array :uint8 20))
319
         (let ((sha1-id-database (dictionary-sha1-id-database compound)))
320
           (let ((sha1-database (dictionary-sha1-database simple)))
321
           (lmdb:with-transaction ((simple-transaction (lmdb:make-transaction simple :flags liblmdb:+rdonly+)))
322
             (lmdb:with-transaction ((compound-transaction (lmdb:make-transaction compound :flags 0)
323
                                                           :initial-disposition :begin :normal-disposition :commit))
324
               (lmdb:with-database (sha1-id-database)
325
                 (lmdb:with-database (sha1-database)
326
                 (let ((cur (lmdb:make-cursor sha1-database :transaction simple-transaction)))
327
                   (lmdb:with-cursor (cur)
328
                     (let ((%cursor (lmdb::handle cur)))
329
                       (cffi:with-foreign-object (%id :uint32)
330
                         (with-lmdb-values ((%id-value (dictionary-id-size compound) %id)
331
                                            (%sha1-key 20 %sha1))
332
                           (setf (cffi:mem-ref %id :uint32) 0)
333
                           (loop for get-op = :+set-range+ then :+next+
334
                             while (let ((return-code (liblmdb:cursor-get %cursor
335
                                                                       %sha1-key
336
                                                                       %id-value
337
                                                                       get-op)))
338
                                  (alexandria:switch (return-code)
339
                                                     (0
340
                                                      (let ((return-code (liblmdb:put (lmdb::handle compound-transaction)
341
                                                                                      (lmdb::handle sha1-id-database)
342
                                                                                      %sha1-key
343
                                                                                      %id-value
344
                                                                                      0)))
345
                                                        (alexandria:switch (return-code)
346
                                                                           (0
347
                                                                            (incf count))
348
                                                                           (t
349
                                                                            (warn "put failed: ~s" return-code)
350
                                                                            (lmdb::unknown-error return-code)))))
351
                                                     (liblmdb:+notfound+ ;; complete
352
                                                      nil)
353
                                                     (t
354
                                                      (warn "get failed: ~s" return-code)
355
                                                      (lmdb::unknown-error return-code)))))
356
                           count)))))))))))))))
357
 ;;; (reconstruct-term-dictionary *term-map-pathname* *term-dictionary-pathname* *term-segment-pathname*)
358
 
359
 #+(or)
360
 (
361
 (let ((count (term-record-count))
362
       (stats (make-hash-table)))
363
   (time (loop
364
           for x below 100000000
365
           for term-number = (random count)
366
           for %term = (shard-term-fetch term-number)
367
           for term-type = (cffi:foreign-slot-value %term '(:struct shard-term) 'type)
368
              do (incf (gethash term-type stats 0))))
369
   (loop for type being each hash-key of stats using (hash-value type-count)
370
     append (list type type-count)))
371
  ;; = 20.625sec
372
 
373
 (let ((count (term-record-count))
374
       (stats (make-hash-table))
375
       (dictionary (make-instance 'rlmdb::term-dictionary :directory *term-dictionary-pathname*)))
376
   (flet ((count-term (%term)
377
            (let ((term-type (cffi:foreign-slot-value %term '(:struct shard-term) 'type)))
378
              (incf (gethash term-type stats 0)))))
379
     (declare (dynamic-extent #'count-term))
380
     (time (loop
381
             for x below 100000000
382
             for term-number = (random count)
383
             do (rlmdb::call-with-dictionary-entry #'count-term dictionary term-number))))
384
   (loop for type being each hash-key of stats using (hash-value type-count)
385
     append (list type type-count)))
386
 )
387
 
388
 #|
389
 * (let ((count (term-record-count))
390
       (stats (make-hash-table)))
391
   (time (loop
392
           for x below 100000000
393
           for term-number = (random count)
394
           for %term = (shard-term-fetch term-number)
395
           for term-type = (cffi:foreign-slot-value %term '(:struct shard-term) 'type)
396
              do (incf (gethash term-type stats 0))))
397
   (loop for type being each hash-key of stats using (hash-value type-count)
398
     append (list type type-count)))
399
 Evaluation took:
400
   20.625 seconds of real time
401
   20.624000 seconds of total run time (18.360000 user, 2.264000 system)
402
   [ Run times consist of 0.028 seconds GC time, and 20.596 seconds non-GC time. ]
403
   100.00% CPU
404
   72,188,810,838 processor cycles
405
   1,599,995,904 bytes consed
406
   
407
 (:NODE 6919517 :URI 52009439 :STRING 31078242 :LITERAL 5721147 :DOUBLE 3192767
408
  :FLOAT 273979 :INTEGER 534306 :DATE 31441 :DECIMAL 238780 :TIME 379 :BOOLEAN 3)
409
 
410
 v/s
411
 
412
 * (let ((count (term-record-count))
413
       (stats (make-hash-table))
414
       (dictionary (make-instance 'rlmdb::term-dictionary :directory *term-dictionary-pathname*)))
415
   (flet ((count-term (%term)
416
            (let ((term-type (cffi:foreign-slot-value %term '(:struct shard-term) 'type)))
417
              (incf (gethash term-type stats 0)))))
418
     (declare (dynamic-extent #'count-term))
419
     (time (loop
420
             for x below 100000000
421
             for term-number = (random count)
422
             do (rlmdb::call-with-dictionary-entry #'count-term dictionary term-number))))
423
   (loop for type being each hash-key of stats using (hash-value type-count)
424
     append (list type type-count)))
425
 Evaluation took:
426
   445.223 seconds of real time
427
   445.252000 seconds of total run time (443.496000 user, 1.756000 system)
428
   [ Run times consist of 0.244 seconds GC time, and 445.008 seconds non-GC time. ]
429
   100.01% CPU
430
   1,558,309,320,443 processor cycles
431
   1,242 page faults
432
   40,000,176,128 bytes consed
433
   
434
 (:URI 52012808 :STRING 31082022 :DOUBLE 3193920 :LITERAL 5713698 :NODE 6917442
435
  :DECIMAL 239210 :INTEGER 533950 :FLOAT 275364 :DATE 31227 :TIME 355 :BOOLEAN 4)
436
 
437
 |#
438