Coverage report: /development/source/library/org/datagraph/spocq-shard/src/store/rlmdb/term-database.lisp
| Kind | Covered | All | % |
| expression | 0 | 406 | 0.0 |
| branch | 0 | 4 | 0.0 |
Key
Not instrumented
Conditionalized out
Executed
Not executed
Both branches taken
One branch taken
Neither branch taken
1
;;; -*- Mode: lisp; Syntax: ansi-common-lisp; Base: 10; Package: org.datagraph.rdf.lmdb.implementation; -*-
2
;;; (load "patches/term-database.lisp")
4
;;; implement direct access to persistent term definitions
8
(defclass rlmdb::term-dictionary (lmdb:environment)
10
:accessor dictionary-id-term-database)
12
:accessor dictionary-sha1-id-database)
14
:initform 4 :initarg :id-size
15
:reader dictionary-id-size)))
17
(defclass rlmdb::sha1-dictionary (lmdb:environment)
19
:accessor dictionary-sha1-database)))
21
(cffi:defcfun ("ntriples_term_serialize" %ntriples-term-serialize) :size_t (term rdfcache::term) (buffer :pointer) (size :size_t))
22
(cffi:defcfun ("ntriples_term_length" %ntriples-term-length) :size_t (term rdfcache::term))
24
(defun compute-rdf-term-sha1 (%rdf-term %sha1)
25
"Given an rdf term (simplified form) compute the sha1 into the given buffer.
26
Return the serialized term length"
28
(let ((size (%ntriples-term-length %rdf-term)))
30
(log-error "compute-rdf-term-sha1: invalid term: ~/format-term/" %rdf-term)
31
(error "compute-rdf-term-sha1: invalid term: ~s" %rdf-term))
33
(cffi:with-foreign-pointer (%buffer size)
34
(%ntriples-term-serialize %rdf-term %buffer size)
35
(let ((sha1 (make-array 40 :element-type '(unsigned-byte 8)))
36
(buffer (make-array size :element-type '(unsigned-byte 8))))
37
(declare (dynamic-extent sha1))
38
(loop for i below size
39
do (setf (aref buffer i) (cffi:mem-ref %buffer :uint8 i)))
40
(ironclad:digest-sequence 'ironclad:sha1 buffer :end size :digest sha1)
41
(cffi:lisp-array-to-foreign sha1 %sha1 '(:uint 40)))
44
;;; /srv/dydra/storage/termid.mdb
45
;;; (decode-db-name "736861313a753332") : "sha1:u32"
46
;;; (decode-db-name "7533323a63737472") : "u32:cstr"
47
;;; (decode-db-name "7533323a73686131") : "u32:sha1"
49
(defparameter *term-dictionary-pathname* #p"/srv/dydra/storage/term-dictionary/")
50
(defparameter *term-segment-pathname* #p"/srv/dydra/storage/terms")
51
(defparameter *term-map-pathname* #p"/srv/dydra/storage/terms.mdb/")
53
(defparameter +term-record-size+ (CFFI:FOREIGN-TYPE-SIZE '(:struct shard-term)))
55
(defparameter *term-transaction* nil
56
"Available to bind a term dictionary transaction in parallel with a one which
57
covers the string dictionary.")
59
(defmethod initialize-instance ((instance rlmdb::term-dictionary) &rest initargs
60
&key (directory *term-dictionary-pathname*))
61
(apply #'call-next-method instance
64
:mapsize rlmdb:*mapsize*
67
(lmdb:open-environment instance)
68
;; there is no creation operator. this ensure string 0 is ""
72
(defmethod initialize-instance ((instance rlmdb::sha1-dictionary) &rest initargs
73
&key (directory *term-map-pathname*))
74
(apply #'call-next-method instance
77
:mapsize rlmdb:*mapsize*
80
(lmdb:open-environment instance)
81
;; there is no creation operator. this ensure string 0 is ""
84
(defmethod lmdb:open-environment ((dictionary rlmdb::term-dictionary) &rest args)
86
(apply #'rlmdb:open-environment-databases dictionary args))
88
(defmethod rlmdb:open-environment-databases progn ((dictionary rlmdb::term-dictionary) &key (if-does-not-exist :create))
89
(setf (dictionary-id-term-database dictionary)
90
(ensure-environment-database dictionary "u32:term"
91
:if-does-not-exist if-does-not-exist
92
:class 'rlmdb:database))
93
(setf (dictionary-sha1-id-database dictionary)
94
(ensure-environment-database dictionary "sha1:u32"
95
:if-does-not-exist if-does-not-exist
96
:class 'rlmdb:database)))
98
(defmethod lmdb:open-environment ((dictionary rlmdb::sha1-dictionary) &rest args)
100
(apply #'rlmdb:open-environment-databases dictionary args))
102
(defmethod rlmdb:open-environment-databases progn ((dictionary rlmdb::sha1-dictionary) &key (if-does-not-exist :error))
103
(setf (dictionary-sha1-database dictionary)
104
(ensure-environment-database dictionary "sha1:u32"
105
:if-does-not-exist if-does-not-exist
106
:class 'rlmdb:database)))
109
(defmethod rlmdb::clear-environment-databases ((environment rlmdb::term-dictionary))
111
(flet ((clear-db (db) (when db (lmdb:drop-database db :delete 0))))
112
(clear-db (dictionary-id-sha1-database environment))
113
(clear-db (dictionary-sha1-id-database environment))))
116
(defgeneric rlmdb::call-with-dictionary-entry (operator dictionary key)
118
"Call the given operator with the identified term record")
119
(:method (operator (location pathname) (key t))
120
(rlmdb::call-with-dictionary-entry operator
121
(make-instance 'rlmdb::term-dictionary :directory location)
124
(:method (operator (dictionary rlmdb::term-dictionary) (key integer))
125
(flet ((do-call (transaction)
126
(let ((id-term-database (dictionary-id-term-database dictionary)))
127
(lmdb:with-database (id-term-database)
128
(cffi:with-foreign-object (%id :uint32)
129
(setf (cffi:mem-ref %id :uint32) key)
130
(with-lmdb-values ((%id-key (dictionary-id-size dictionary) %id)
131
(%term-data 0 (cffi:null-pointer)))
132
(let ((return-code (liblmdb:get (lmdb::handle transaction)
133
(lmdb::handle id-term-database)
136
(alexandria:switch (return-code)
138
(let* ((size (rlmdb.i::%mdb-val-size %term-data))
139
(%data (rlmdb.i::%mdb-val-data %term-data)))
140
(assert (= size +term-record-size+) ()
141
"Invalid term record size: ~s" size)
142
(funcall operator %data)))
146
(lmdb::unknown-error return-code))))))))))
147
(cond (*term-transaction*
148
(do-call *term-transaction*))
150
(do-call *transaction*))
152
(lmdb:with-transaction ((*transaction* (lmdb:make-transaction dictionary)))
153
(let ((*term-transaction* *transaction*))
154
(do-call *transaction*)))))))
156
(:method (operator (dictionary rlmdb::term-dictionary) (key vector))
157
"interpret a vector as the term sha1"
158
(flet ((do-call (transaction)
159
(cffi:with-foreign-array (%sha1 key '(:array :uint8 20))
160
(with-lmdb-values ((%sha1-key 20 %sha1))
161
(let ((sha1-id-database (dictionary-sha1-id-database dictionary)))
162
(with-lmdb-values ((%id-value 0 (cffi:null-pointer)))
163
(let ((return-code (liblmdb:get (lmdb::handle transaction)
164
(lmdb::handle sha1-id-database)
167
(alexandria:switch (return-code)
169
(let ((id (cffi:mem-ref (CFFI:FOREIGN-SLOT-VALUE %ID-VALUE '(:STRUCT LIBLMDB:VAL)
172
(rlmdb::call-with-dictionary-entry operator dictionary id)))
176
(lmdb::unknown-error return-code))))))))))
177
(cond (*term-transaction*
178
(do-call *term-transaction*))
180
(do-call *transaction*))
182
(lmdb:with-transaction ((*transaction* (lmdb:make-transaction dictionary)))
183
(let ((*term-transaction* *transaction*))
184
(do-call *transaction*))))))))
186
(defmacro rlmdb::with-dictionary-entry ((term-record dictionary term-key) &body body)
187
(let ((op (gensym "dictionary-op")))
188
`(flet ((,op (,term-record)
190
(declare (dynamic-extent #',op))
191
(rlmdb::call-with-dictionary-entry #',op ,dictionary ,term-key))))
194
(defmethod rlmdb::dictionary-get-object ((dictionary rlmdb::term-dictionary) (key t))
195
"Given a term dictionary fetch the respective term, constuct and return the object.
196
If the key does not identify one, return NIL
198
%fetch-term-record-> term::fetch_record-> ... ->linear_term_segment::fetch
199
returns a pointer to the mapped terms file.
200
in this case, the record had dynamic extent and copying it is not necessary."
202
(rlmdb::with-dictionary-entry (%term-record dictionary key)
203
(term-record-value %term-record)))
206
;;; this must include a term segment file and the sha1:uint32 map
207
;;; the term is appended to the segment, which yields the integer id.
208
;;; this is then written into the sha1:uint32 map
210
(defgeneric rlmdb::term-dictionary-put (dictionary object)
211
(:method ((location pathname) (content t))
212
(rlmdb::term-dictionary-put (make-instance 'rlmdb::term-dictionary :directory location)
215
(:method ((dictionary rlmdb::term-dictionary) (object spocq:unbound-variable))
218
(:method ((dictionary rlmdb::term-dictionary) (object string))
219
(constrain-string-length object)
221
(:method ((dictionary rlmdb::term-dictionary) (object t))
222
(spocq.i::with-term-record (%term)
223
(rdfcache::%clear-term %term)
225
(let ((sha1-vector (make-array 20 :element-type '(unsigned-byte 8) :initial-element 0)))
226
;; use the compact term as input to the hash key calculation
227
(spocq.i::set-optional-term %term object)
228
(cffi:with-foreign-array (%sha1 sha1-vector '(:array :uint8 20))
229
(with-shard-term-record (%term-record)
230
(set-term-record-value object %term-record)
231
(cffi:with-foreign-object (%id :uint32)
232
(with-lmdb-values ((%id-key&value (dictionary-id-size dictionary) %id)
234
(%term-value +term-record-size+ %term-record))
235
(compute-rdf-term-sha1 %term %sha1)
236
(let* ((id-term-database (dictionary-id-term-database dictionary))
237
(sha1-id-database (dictionary-sha1-id-database dictionary))
238
(return-code (liblmdb:get (lmdb::handle *transaction*)
239
(lmdb::handle sha1-id-database)
242
(alexandria:switch (return-code)
244
(cffi:mem-ref %id :uint32))
246
(let ((id (rlmdb::entry-count sha1-id-database)))
247
(cffi:with-foreign-object (%id :uint32)
248
(setf (cffi:mem-ref %id :uint32) id)
250
(let ((return-code (liblmdb:put (lmdb::handle *transaction*)
251
(lmdb::handle sha1-id-database)
255
(alexandria:switch (return-code)
257
(let ((return-code (liblmdb:put (lmdb::handle lmdb:*transaction*)
258
(lmdb::handle id-term-database)
262
(alexandria:switch (return-code)
266
(lmdb::unknown-error return-code)))))
268
(lmdb::unknown-error return-code)))))))
270
(lmdb::unknown-error return-code)))))))))
271
(spocq.i::clear-optional-term %term)))))
273
(defgeneric copy-term-segment (dictionary)
274
(:documentation "Copy just the terms records from a term segment to a 'uint32:term' database.")
275
(:method ((location pathname))
276
(copy-term-segment (make-instance 'rlmdb::term-dictionary :directory location)))
278
(:method ((dictionary rlmdb::term-dictionary))
279
(let ((transaction (lmdb:make-transaction dictionary :flags 0))
280
(id-term-database (dictionary-id-term-database dictionary)))
281
(lmdb:with-database (id-term-database)
282
(lmdb:with-transaction (transaction :initial-disposition :begin :normal-disposition :commit)
283
(loop with count = (%term-record-count)
284
for id from 0 below count
285
for %term-record = (shard-term-fetch id)
286
do (cffi:with-foreign-object (%id :uint32)
287
(with-lmdb-values ((%id-key (dictionary-id-size dictionary) %id)
288
(%term-value +term-record-size+ %term-record))
289
(setf (cffi:mem-ref %id :uint32) id)
290
(let ((return-code (liblmdb:put (lmdb::handle transaction)
291
(lmdb::handle id-term-database)
295
(alexandria:switch (return-code)
299
(lmdb::unknown-error return-code))))))
300
finally (return count)))))))
301
;;; (copy-term-segment *term-dictionary-pathname*)
303
(defgeneric reconstruct-term-dictionary (simple-dictionary compund-dictionary segment-location)
304
(:documentation "Transform the simple dictionary, in which the 'sha1:unint32'
305
relation is the only database, into one in which it is a sub-database.")
306
(:method ((simple pathname) (compound t) (segment t))
307
(reconstruct-term-dictionary (make-instance 'rlmdb::sha1-dictionary :directory simple)
310
(:method ((simple t) (compound pathname) (segment t))
311
(reconstruct-term-dictionary simple
312
(make-instance 'rlmdb::term-dictionary :directory compound)
315
(:method ((simple rlmdb::sha1-dictionary) (compound rlmdb::term-dictionary) segment)
316
(let ((sha1-vector (make-array 20 :element-type '(unsigned-byte 8) :initial-element 0))
318
(cffi:with-foreign-array (%sha1 sha1-vector '(:array :uint8 20))
319
(let ((sha1-id-database (dictionary-sha1-id-database compound)))
320
(let ((sha1-database (dictionary-sha1-database simple)))
321
(lmdb:with-transaction ((simple-transaction (lmdb:make-transaction simple :flags liblmdb:+rdonly+)))
322
(lmdb:with-transaction ((compound-transaction (lmdb:make-transaction compound :flags 0)
323
:initial-disposition :begin :normal-disposition :commit))
324
(lmdb:with-database (sha1-id-database)
325
(lmdb:with-database (sha1-database)
326
(let ((cur (lmdb:make-cursor sha1-database :transaction simple-transaction)))
327
(lmdb:with-cursor (cur)
328
(let ((%cursor (lmdb::handle cur)))
329
(cffi:with-foreign-object (%id :uint32)
330
(with-lmdb-values ((%id-value (dictionary-id-size compound) %id)
331
(%sha1-key 20 %sha1))
332
(setf (cffi:mem-ref %id :uint32) 0)
333
(loop for get-op = :+set-range+ then :+next+
334
while (let ((return-code (liblmdb:cursor-get %cursor
338
(alexandria:switch (return-code)
340
(let ((return-code (liblmdb:put (lmdb::handle compound-transaction)
341
(lmdb::handle sha1-id-database)
345
(alexandria:switch (return-code)
349
(warn "put failed: ~s" return-code)
350
(lmdb::unknown-error return-code)))))
351
(liblmdb:+notfound+ ;; complete
354
(warn "get failed: ~s" return-code)
355
(lmdb::unknown-error return-code)))))
357
;;; (reconstruct-term-dictionary *term-map-pathname* *term-dictionary-pathname* *term-segment-pathname*)
361
(let ((count (term-record-count))
362
(stats (make-hash-table)))
364
for x below 100000000
365
for term-number = (random count)
366
for %term = (shard-term-fetch term-number)
367
for term-type = (cffi:foreign-slot-value %term '(:struct shard-term) 'type)
368
do (incf (gethash term-type stats 0))))
369
(loop for type being each hash-key of stats using (hash-value type-count)
370
append (list type type-count)))
373
(let ((count (term-record-count))
374
(stats (make-hash-table))
375
(dictionary (make-instance 'rlmdb::term-dictionary :directory *term-dictionary-pathname*)))
376
(flet ((count-term (%term)
377
(let ((term-type (cffi:foreign-slot-value %term '(:struct shard-term) 'type)))
378
(incf (gethash term-type stats 0)))))
379
(declare (dynamic-extent #'count-term))
381
for x below 100000000
382
for term-number = (random count)
383
do (rlmdb::call-with-dictionary-entry #'count-term dictionary term-number))))
384
(loop for type being each hash-key of stats using (hash-value type-count)
385
append (list type type-count)))
389
* (let ((count (term-record-count))
390
(stats (make-hash-table)))
392
for x below 100000000
393
for term-number = (random count)
394
for %term = (shard-term-fetch term-number)
395
for term-type = (cffi:foreign-slot-value %term '(:struct shard-term) 'type)
396
do (incf (gethash term-type stats 0))))
397
(loop for type being each hash-key of stats using (hash-value type-count)
398
append (list type type-count)))
400
20.625 seconds of real time
401
20.624000 seconds of total run time (18.360000 user, 2.264000 system)
402
[ Run times consist of 0.028 seconds GC time, and 20.596 seconds non-GC time. ]
404
72,188,810,838 processor cycles
405
1,599,995,904 bytes consed
407
(:NODE 6919517 :URI 52009439 :STRING 31078242 :LITERAL 5721147 :DOUBLE 3192767
408
:FLOAT 273979 :INTEGER 534306 :DATE 31441 :DECIMAL 238780 :TIME 379 :BOOLEAN 3)
412
* (let ((count (term-record-count))
413
(stats (make-hash-table))
414
(dictionary (make-instance 'rlmdb::term-dictionary :directory *term-dictionary-pathname*)))
415
(flet ((count-term (%term)
416
(let ((term-type (cffi:foreign-slot-value %term '(:struct shard-term) 'type)))
417
(incf (gethash term-type stats 0)))))
418
(declare (dynamic-extent #'count-term))
420
for x below 100000000
421
for term-number = (random count)
422
do (rlmdb::call-with-dictionary-entry #'count-term dictionary term-number))))
423
(loop for type being each hash-key of stats using (hash-value type-count)
424
append (list type type-count)))
426
445.223 seconds of real time
427
445.252000 seconds of total run time (443.496000 user, 1.756000 system)
428
[ Run times consist of 0.244 seconds GC time, and 445.008 seconds non-GC time. ]
430
1,558,309,320,443 processor cycles
432
40,000,176,128 bytes consed
434
(:URI 52012808 :STRING 31082022 :DOUBLE 3193920 :LITERAL 5713698 :NODE 6917442
435
:DECIMAL 239210 :INTEGER 533950 :FLOAT 275364 :DATE 31227 :TIME 355 :BOOLEAN 4)