Coverage report: /development/source/library/org/datagraph/spocq-shard/src/core/agp.lisp
| Kind | Covered | All | % |
| expression | 642 | 1931 | 33.2 |
| branch | 53 | 204 | 26.0 |
Key
Not instrumented
Conditionalized out
Executed
Not executed
Both branches taken
One branch taken
Neither branch taken
1
;; -*- Mode: lisp; Syntax: ansi-common-lisp; Base: 10; Package: org.datagraph.spocq.implementation; -*-
3
(in-package :org.datagraph.spocq.implementation)
5
(:documentation "This file defines augmented graph patterns for the 'org.datagraph.spocq' RDF engine."
8
"Copyright 2010 [james anderson](mailto:james.anderson@setf.de) All Rights Reserved.")
11
"An `augmented graph pattern` (AGP) provides a context to annotate the bgp
12
triple/quad patterns, incorporate additional forms and provide interface hooks:
13
- a graph element to indicate an optional GRAPH term
14
- filter terms to be interleaved with the statement matching
15
- slice terms to limit retrieval
16
- variable constraints to provide initial values for pattern terms.
18
The AGP instance is produced by BGP macro-expansion, carried through the
19
algebra evaluation process to the point where its solutions are required, at
20
which point its is compiled on-the-fly into a generator function and
22
The generator function is applied to possible s-i-p constraint inputs and
23
controls the interative match/scan/filter operations on the argument
24
data sources (repositories, external datasets, inline functions, etc)
25
to yield a solution stream for further query evaluation.
27
The compilation process relies on various analysis and manipulation functions
28
which are implemented here:
30
: bgp element ordering
31
: collated pattern isolation
32
based on sensitivity analysis and statement topology with the aim to effect
33
parallel or iterative execution as the combinations of topology and
36
In early versions, AGP instances were delegates for remote execution and
37
they were arranges in a scheduling graph to initiate remote execution and
38
interpret the responses.
39
In the current implementation, they act as leaves to match/scan solutions from
40
the external storage instances
42
Field propagation between them is constrained by the respective BGP roles.
43
For example, while it is possible to propagate solutions from the base left
44
join clause to the optional one, propagation in the other direction is not
45
permitted, as that is the nature of its optionality:
47
join bgp1 <-> bgp2 : permits propagation between siblings and promotion
48
leftjoin bgp1 -> bgp2 : permits propagation from main to optional and promotion of main or of combination
49
union bgp1 - bgp2 : permits no sibling propagation and promotion of combination only
62
(defun agp-initialized-p (agp) (eq (agp-state agp) :initialize))
63
(defun agp-delegated-p (agp) (eq (agp-state agp) :delegate))
64
(defun agp-completed-p (agp) (eq (agp-state agp) :complete))
67
(defmethod agp-solutions ((agp agp))
70
(multiple-value-bind (solutions request-count response-count new-state)
71
(apply-agp-pattern-function agp)
72
(declare (ignore request-count response-count))
73
(values (setf (agp-solutions agp) solutions)
74
(setf (agp-state agp) new-state))))
76
(values (get-agp-solutions agp) :complete))
78
nil (agp-state agp))))
81
(defun apply-agp-pattern-function (agp)
82
(funcall (agp-pattern-function agp)
84
:solutions (agp-effective-bindings agp)))
87
(defgeneric agp-effective-bindings (agp)
89
(or (agp-bindings agp) '(nil))))
91
(defgeneric agp-reset (agp)
93
(setf (agp-solutions agp) nil
94
(agp-state agp) :initialize)))
96
(defun receive-agp-field (agp)
97
(source-receive-agp-field *store-io* agp))
99
(defgeneric source-receive-agp-field (source agp)
100
(:method ((source amqp:channel) (agp agp))
101
(labels ((receive-message-content (stream content-type)
102
(setf content-type mime:application/sparql-query+sse)
103
(setf (agp-solutions agp)
104
(third (nth-value 1 (receive-message stream content-type))))
105
(return-from source-receive-agp-field (agp-state agp)))
106
(handle-store-response (channel class method &rest args)
107
(declare (dynamic-extent args)
108
(ignore channel method))
109
(apply #'amqp:respond-to-deliver class
110
:body #'receive-message-content
112
(setf (de.setf.amqp.implementation::channel-command source 'amqp:deliver)
113
#'handle-store-response)
114
(amqp.u:process-connection-loop (amqp.u:channel-connection source)))))
116
(defmethod (setf agp-solutions) ((solutions list) (agp agp))
117
(setf (agp-state agp) :complete)
118
(setf-agp-solutions solutions agp)
119
(propagate-solution-field agp solutions)
124
(defgeneric agp-form (agp)
126
(cons 'spocq.a:|agp| (agp-body agp))))
128
(defgeneric agp-repository (agp)
130
(if (slot-boundp agp 'repository)
131
(get-agp-repository agp)
132
(setf-agp-repository (task-revision (agp-query agp)) agp))))
134
(defgeneric agp-propagated-base-dimensions (agp)
135
(:documentation "Return the list of dimensions to be used for the source field for bgp pattern matching.
136
The intrinsic value is the list of base dimensions provided when instantiated, but that is suppressed
137
when a graph variable is present in the pattern but not in the source field.
138
This is to allow that iteration across all named graphs is slow when matching v/s scanning and
139
causes to compile for propagation only if w/o a graph or the graph is propagated.")
141
(let ((base-dimensions (agp-base-dimensions agp))
142
(graph (agp-graph agp)))
143
(when (or (not (variable-p graph)) (member graph base-dimensions))
148
(inspect (spocq-compile (lambda () (spocq.a:|bgp| (spocq.a:|triple| ?::s ?::p ?::o)))))
152
(defgeneric propagate-solution-field (agp solution-field)
153
(:method ((agp agp) solution-field)
154
(dolist (successor (agp-successors agp))
155
(setf (agp-bindings successor) solution-field))
156
(dolist (successor (agp-successors agp))
157
(when (and (agp-initialized-p successor)
158
(every #'agp-completed-p (agp-predecessors successor)))
159
(agp-solutions successor)))))
162
;;; (defun compute-pattern-partitions (patterns) (list patterns))
164
(defun annotate-statement-patterns (revision patterns &key (graph |urn:dydra|:|all|))
165
(flet ((dimension-pattern (pattern)
166
;; generate on-demand, but return the pattern
167
(statement-dimensions pattern)
169
(count-pattern (pattern)
170
(destructuring-bind (tag subject predicate object . rest) pattern
171
(declare (ignore tag rest))
172
(setf (statement-count pattern)
173
(cond ((extension-operator-p predicate) 1)
174
;; ((property-path-p predicate) 1) ;; cache actual count
176
(repository-pattern-count revision subject predicate object graph))
179
(loop for pattern in patterns
180
if (bgp-statement-form-p pattern)
181
collect (count-pattern (dimension-pattern pattern))
185
(defun compute-bgp-cardinality (forms)
186
"combine the respective cardinalities to yield a composite estimate"
187
(let ((cardinality 1))
188
(flet ((pattern-count (pattern)
189
(or (statement-count pattern) 1)))
190
(loop for form in forms
191
if (bgp-statement-form-p form)
192
do (setf cardinality (* cardinality (pattern-count form))))
195
;;; (reduce-pattern-expressions '((spocq.a:|triple| ?::s ?::p ?::o)) '((spocq.a:|filter| (spocq.a:= ?::o <http://example.org>))))
198
(defun expand-bgp-property-paths (statements)
199
(labels ((expand-sequence-path (subject verb-sequence object statement-rest)
200
(destructuring-bind (first . rest) verb-sequence
202
(let ((node (ecase *nondistinguished-marker-type*
203
(:blank-node (cons-blank-node "b"))
204
(:variable (cons-variable "PP")))))
205
(cons `(spocq.a:|triple| ,@(arrange-terms subject first node))
206
(expand-sequence-path node rest object statement-rest))))
208
`((spocq.a:|triple| ,@(arrange-terms subject first object)
209
,@(plist-difference statement-rest '(:dimensions))))))))
210
(expandable-p (property)
211
(or (property-path-verb-p property)
212
(inverted-property-path-p property)))
213
(arrange-terms (subject predicate object)
215
(inverted-property-path `(,object ,(unary-property-path-element predicate) ,subject))
216
(property-path-verb `(,subject ,(property-path-verb-iri predicate) ,object)))))
218
(loop for pattern in statements
219
for (subject predicate object . rest) = (rest pattern)
220
if (and (sequence-property-path-p predicate)
221
(every #'expandable-p (sequence-property-path-elements predicate)))
222
append (expand-sequence-path subject (sequence-property-path-elements predicate) object rest)
223
else collect pattern))))
226
;;; impute probability from a pattern to its variables to transfer to other patterns
227
;;; use any initial bindings to specify actual probability
228
;;; needs repository size
230
(defun fold-filter-constants (body)
231
"Given as a BODY a list of statement patterns and filters, rewrite the
232
patterns to integrate any reducible filters components.
233
These any same term forms with terms which allow identity v/s equality tests
234
and any constant comparisons.
235
Return a list of the rewritten patterns, the remaining filters, and an
236
annotation with an alist for equivalents.
237
There is also code to arrange logical operators, but they are not now used.
239
BODY : (list triple-pattern)
240
(list filter-constraint)
241
RESULT : (values triple-pattern*
242
(variable . constraint)*
245
(let* ((patterns (remove-if-not #'bgp-statement-form-p body))
246
(filters (remove-if-not #'filter-form-p body))
247
(other-forms (set-difference (set-difference body filters) patterns)))
248
;; short-circuit logical forms
249
(cond ((or (null filters) (null *fold-agp-filters*))
252
((some #'logical-bgp-statement-form-p patterns)
253
(values patterns filters))
255
(let ((variables (expression-variables patterns))
257
(labels ((expression-logicals (expression)
258
(if (consp expression)
259
(if (member (first expression) '(spocq.a:|&&| spocq.a:|exprlist|))
260
(append (expression-logicals (second expression))
261
(expression-logicals (third expression)))
264
(compute-conjunction (expressions)
265
(if (rest expressions)
266
(destructuring-bind (first . rest) expressions
267
`(spocq.a:|&&| ,first ,(compute-conjunction rest)))
268
(first expressions)))
269
(reducible-argument-p (arg)
270
;; a reducible argument is one for which the 'same term' logic of the query operator
271
;; wwill produce the same result as '=' or 'sameTerm' in the algebra.
272
;; this is true of resource nodes and string literals. it may not be true of
274
(or (iri-p arg) (stringp arg)))
275
(reducible-expression-p (expression)
276
;; reduce only those expression which can guaranteed to be equivalent
277
;; to a sameTerm predicate:
278
;; - sameTerm for two variables or involving a type with intrinsic identity or
279
;; - = for a variable and a literal with intrinsic identity
280
;; require an equality with at least one variable and otherwise either
281
;; variable of constant.
282
(and (consp expression) (= (length expression) 3)
283
(not (temporal-expression-p expression))
284
(destructuring-bind (op arg1 arg2) expression
285
(let ((arg1-is-variable (variable-p arg1))
286
(arg2-is-variable (variable-p arg2))
287
(arg1-is-reducible (reducible-argument-p arg1))
288
(arg2-is-reducible (reducible-argument-p arg2)))
291
(or (and arg1-is-variable arg2-is-reducible)
292
(and arg1-is-reducible arg2-is-variable)))
294
(or ;; do not reduce identical variables (and arg1-is-variable arg2-is-variable)
295
(and arg1-is-variable arg2-is-reducible)
296
(and arg1-is-reducible arg2-is-variable)))
298
(fold-expression (expression)
299
;; fold immediately evaluable expressions into a constant
300
(if (and (null (expression-variables expression))
301
(not (temporal-expression-p expression)))
302
(ebv (ignore-errors (eval expression)))
304
(reduce-filter (filter)
305
;; attempt to eliminate the filter by resolving terms into the patterns
306
;; if successful, return nil
307
;; if any (sub)expressions remains, return them as filters.
308
(let* ((constraint (second filter))
309
(logicals (mapcar #'fold-expression (expression-logicals constraint)))
310
(non-reducibles (remove-if #'reducible-expression-p logicals))
311
(reducibles (set-difference logicals non-reducibles)))
312
;; beta reduce the filter / pattern combination
313
(loop for (nil arg1 arg2) in reducibles
314
if (and (variable-p arg1) (not (variable-p arg2)))
315
do (setf variables (remove arg1 variables)
316
equivalents (acons arg1 arg2 equivalents))
317
else if (and (not (variable-p arg1)) (variable-p arg2))
318
do (setf variables (remove arg2 variables)
319
equivalents (acons arg2 arg1 equivalents)))
321
`(spocq.a:|filter| ,(compute-conjunction non-reducibles))))))
323
;; substitute variable equality and constant constraints into the patterns
324
(setf filters (remove nil (mapcar #'reduce-filter filters)))
326
(setf patterns (loop with equivalent-variables = (mapcar #'first equivalents)
327
for pattern in patterns
328
for pevs = (intersection equivalent-variables pattern)
329
collect (if pevs ;; replace variables with equivalents
330
(sublis equivalents pattern)
332
(setf filters (sublis equivalents filters)))
333
(values (append other-forms patterns)
338
;;; 20120206 : exercise option between splitting bgp into joins and
339
;;; propagating constraints within it.
341
;;; (conicidentally) with scaling, the scan rate has increased and the match rate has dropped.
342
;;; this causes intra-bgp constraint passing to increase the match times. an alternative is
343
;;; to first sort the patterns af per the now last step in compute-agp-form and then, given the
344
;;; estimates selectivities, if the initial statement would produce more solutions than the
345
;;; second on its own - or even some factor which reflects the ration of match to scan, then split the
348
;;; the order would be
350
;;; (compute-pattern-partitions (compute-agp-form (reduce-pattern-expressions (agp-expressions agp))))
352
;;; which would fold constants, then order by selectivity, then partition
354
(defparameter *compute-pattern-partitions-criteria* :by-term)
355
;;; (setq *compute-pattern-partitions-criteria* nil)
358
(defun compute-pattern-partitions (patterns)
359
(compute-repository-pattern-partitions *repository* *compute-pattern-partitions-criteria* patterns))
361
(defun compute-pattern-partitions (patterns)
362
(compute-repository-pattern-partitions *repository* patterns))
365
(defgeneric compute-repository-pattern-partitions (repository patterns)
366
(:documentation "Given a list of patterns, partition them according to a given criteria, or do nothing.")
368
(:method ((repository t) patterns)
372
(:method ((repository repository) body)
373
"The simplest partition process separates statements by shared bindings -
374
both distinguished and non-distinguished variables."
375
(partition-statements-by-variables body))
377
#+(or) ;; more complex than necessary given other mechanisms
378
(:method ((repository repository) patterns)
379
"Given a list of PATTERNS, return a list of its partitions into paths which share no variable,
381
- in the first pass, collect the patterns for each term.
382
- in the second pass,
383
-- start with an arbitrary pattern and compute its variable closure
384
-- repeat for each statement not yet in a partition.
385
- in the third pass, compute the analogous closure for constants
386
In the first phase, partition for variables and blank nodes - as the latter act as nondistinguished
387
variables. In the second phase, repartition all isolated statements for constants.
388
return the list of partitions."
389
(let ((term-patterns ())
391
(labels ((expression-roots (pattern)
392
(append (statement-variables pattern)
393
(statement-blank-nodes pattern)
394
(statement-constants pattern)))
395
(pattern-links (pattern)
396
(append (statement-variables pattern)
397
(statement-blank-nodes pattern)))
399
(pattern-constants (pattern)
400
(let ((subject (second pattern)))
401
(when (spocq.e:constantp subject)
403
(term-patterns (term)
404
(rest (assoc term term-patterns)))
405
((setf term-patterns) (patterns term)
406
(let ((entry (assoc term term-patterns)))
408
(setf (rest entry) patterns)
409
(cdar (setf term-patterns (acons term patterns term-patterns))))))
410
(ensure-pattern-partition (pattern term-selector unary-p)
411
(labels ((pattern-in-partition-p (pattern)
412
(loop for partition in partitions
413
when (member pattern partition)
416
;; collect a tree from the dimensions of the initial pattern
417
(let ((partition ()))
418
(labels ((walk-partition (pattern)
419
(unless (member pattern partition)
420
(push pattern partition)
421
(loop for term in (funcall term-selector pattern)
422
for term-position = (position term pattern)
423
do ;; (print (list :walk term term-position pattern))
424
(loop for linked-pattern in (term-patterns term)
425
for linked-term-position = (position term linked-pattern)
426
unless (or (eq linked-pattern pattern)
427
(pattern-in-partition-p linked-pattern))
428
;; do (print (list :test linked-term-position linked-pattern))
429
when (case term-position
430
(1 (case linked-term-position
432
(3 (case linked-term-position
434
do (walk-partition linked-pattern))))))
435
(walk-partition pattern)
437
(declare (dynamic-extent #'pattern-in-partition-p))
438
(unless (pattern-in-partition-p pattern)
439
(let ((new-partition (new-partition)))
440
(when (or unary-p (rest new-partition))
441
(push new-partition partitions))))))
442
(pivot-partition-p (partition)
443
;; true if two terms are identical and the other is a variable
444
(flet ((identical-of (accessor predicate)
445
(let ((first (funcall accessor (first partition))))
446
(and (funcall predicate first)
447
(dolist (pattern (rest partition) t)
448
(unless (eql first (funcall accessor pattern)) (return nil)))))))
449
;; ? why require identity among predicates?
450
;; the only criteria should be iri-p
451
(and (identical-of #'third #'iri-p)
452
(or (identical-of #'second #'variable-p)
453
(identical-of #'fourth #'variable-p))))))
454
;; first, collect the patterns which contain a term
455
(dolist (pattern patterns)
456
(dolist (term (statement-dimensions pattern))
457
(push pattern (term-patterns term))))
458
#+(or) ;; why eliminate these?
459
(dolist (tp term-patterns)
460
(when (pivot-partition-p (rest tp))
461
(setf (rest tp) nil)))
462
;; (pprint (list :term-patterns term-patterns))
463
;; then build partitions, first for variables, then for constants
464
(dolist (pattern patterns)
465
(ensure-pattern-partition pattern #'pattern-links nil))
466
;; (pprint (list :partitions/links partitions))
467
(dolist (pattern patterns)
468
(ensure-pattern-partition pattern #'statement-constants t))
469
;; (pprint (list :partitions/constants partitions))
470
;; (mapcar #'(lambda (p) (cons (first p) (length (rest p)))) term-patterns)
474
(defun partition-statements-by-variables (body)
475
"partition according to shared variables/blank nodes.
476
If forms other other than statement patterns are present, distribute them.
477
!!! they should be relevant everywhere"
478
(let ((variable-statements (make-hash-table))
479
(non-statements (remove-if #'bgp-statement-form-p body))
480
(variables-and-nodes ()))
481
(loop for form in body
482
when (bgp-statement-form-p form)
484
(loop for variable in (statement-variables form)
485
do (push form (gethash variable variable-statements)))
486
(loop for variable in (statement-blank-nodes form)
487
do (push form (gethash variable variable-statements)))))
488
(setf variables-and-nodes
489
(loop for key being each hash-key of variable-statements
491
(labels ((collect-partition (key collected)
492
(let ((partition (gethash key variable-statements)))
493
(remhash key variable-statements)
494
(setf collected (union collected partition))
495
(loop for key in (partition-keys partition)
496
do (setf collected (collect-partition key collected)))
498
(partition-keys (statements)
500
(loop for statement in statements
502
(loop for variable in (statement-variables statement)
503
do (pushnew variable keys))
504
(loop for node in (statement-blank-nodes statement)
505
do (pushnew node keys))))
507
(let ((partitions (remove nil
508
(loop for key in variables-and-nodes
509
collect (collect-partition key ())))))
510
(loop for partition in partitions
511
collect (append non-statements partition))))))
513
(equalp (partition-statements-by-variables
514
'((spocq.a:|graph| x)
515
(spocq.a:|triple| ?::s p1 ?::o1)
516
(spocq.a:|triple| ?::s p2 ?::o2)
517
(spocq.a:|triple| ?::o2 p3 ?::o3)))
518
'(((spocq.a:|graph| x)
519
(ORG.DATAGRAPH.SPOCQ.ALGEBRA:|triple| ?::S P1 ?::O1)
520
(ORG.DATAGRAPH.SPOCQ.ALGEBRA:|triple| ?::S P2 ?::O2)
521
(ORG.DATAGRAPH.SPOCQ.ALGEBRA:|triple| ?::O2 P3 ?::O3))))
523
(equalp (partition-statements-by-variables
524
'((spocq.a:|graph| x)
525
(spocq.a:|triple| ?::s p1 ?::o1)
526
(spocq.a:|triple| ?::s p2 ?::o2)
527
(spocq.a:|triple| ?::s2 p3 ?::o3)))
528
'(((spocq.a:|graph| x)
529
(ORG.DATAGRAPH.SPOCQ.ALGEBRA:|triple| ?::S P2 ?::O2)
530
(ORG.DATAGRAPH.SPOCQ.ALGEBRA:|triple| ?::S P1 ?::O1))
532
(ORG.DATAGRAPH.SPOCQ.ALGEBRA:|triple| ?::S2 P3 ?::O3))))
535
(defun collate-indexed-bgp-forms (body)
536
"Given a basic graph pattern body - a list of pattern and filter statement, examine the statements to
537
combine patterns and filters for those predicates which indicate an index, construct the effective
538
index statements and return two statement sets, the matched and the filtered.
540
Retain the original order given the point of first appearance of some predicate."
542
(let* ((indexed-statements ())
544
(statements-by-index (loop with cache = ()
546
for operator = (first form)
548
((spocq.a:|quad| spocq.a:|triple|)
549
(let* ((predicate (statement-predicate form))
550
(index (repository-index *transaction* predicate)))
552
(push form indexed-statements)
553
(push form (getf cache index))))))
554
finally (return cache)))
555
(variables-by-index (loop for (index statements) on statements-by-index by #'cddr
556
append (list index (expression-variables statements))))
557
(filters-by-index (loop with cache = ()
559
for operator = (first form)
562
(let* ((variables (expression-variables form))
563
(index (loop for (index index-variables) on variables-by-index by #'cddr
564
when (null (set-difference variables index-variables))
567
(push form indexed-filters)
568
(push form (getf cache index))))))
569
finally (return cache)))
570
;; isolate statement-filter combinations and just statements
571
;; a filter over a vraible which is not present in any statement is invalid
572
(indexed-forms (loop for (index statements) on statements-by-index by #'cddr
573
for statement-filters = (getf filters-by-index index)
574
;; defer to bgp compilation
575
;; append (compute-index-statements index1 statements filters))))
576
collect (cons `(spocq.a::|declare| (index ,index)) (append statements statement-filters)))))
577
;; check for unrelated filters.
578
;; this would mean, they were pushed, but do not concern variables from any statement
579
(loop with unrelated-filters = ()
580
for (index filters) on filters-by-index by #'cddr
581
unless (getf statements-by-index index)
582
do (push filters unrelated-filters)
583
finally (assert (null unrelated-filters) ()
584
"BGP includes unrelated index filters: ~s" unrelated-filters))
585
(let* ((slice-form nil)
586
(other-forms (loop for form in body
587
unless (or (when (slice-form-p form)
588
(setf slice-form form))
589
(loop for forms in indexed-forms
590
when (find form forms)
593
;; then isolated indexed statements from the standard matching process
594
(values indexed-forms
598
(defparameter *sip-cardinality-limit* 1000)
600
(defmethod collate-subject-bgp-forms (patterns)
601
"A time-series repository collates results from graph patterns which pertain to
602
its declared predecates. Statements which involve those predicates are isolated
603
into paritions which share subject."
604
(let ((triples (remove-if-not #'triple-form-p patterns))
605
(non-triples (remove-if #'triple-form-p patterns)))
606
;; partition based on three interacting indicators
607
;; -- a statement pattern with a |urn:dydra|:|event| predicate
608
;; -- statements with declared event predicates
609
;; -- an event filter
611
;; separate those triples which involve a constant time-series predicate and a variable object.
612
;; the ts index keys omit the object values and substitute a event designator term
613
;; (ordinal, uuid, or timestamp) which is understood as the complex event id.
614
;; when matching, the event id may be constant or variable depending on the term
615
;; which appears in the event statement pattern and/or the boounds expressed in any event filter
616
;; the graph and the subject may be constant or variable.
617
;; consistent with rsp, the graph identifes the "simple event object", or sample,
618
;; while the event designator identifies the complex event
619
;; - http://ontologydesignpatterns.org/wiki/Submissions:EventProcessing
620
;; - https://www.w3.org/community/rsp/wiki/RDF_Stream_Models
621
;; different patterns yield different match processes and different binding streams.
622
;; in any case, the basic partition is those which are present in the time-series indices.
624
;; the partition process expects at most one set of patterns which pertains to a given subject
625
;; and produces one of
626
;; - no partition, if no event indicator is present
627
;; - a two-statement partition with the event statement pattern and a wild pattern
628
;; - a multi-statement partition with the event pattern and those which include egent predicates.
629
(labels ((cons-cardinality (body)
630
(let ((cardinality (compute-bgp-cardinality body)))
631
(cons `(declare (spocq.e::cardinality ,cardinality)) body)))
632
(partition-variables (partition)
633
(loop for statement in partition
635
do (setf variables (union variables (statement-variables statement)))
636
finally (return variables)))
637
(merge-path-statement (triple partition partition-variables)
638
(when (and (implies-bounded-property-path (statement-predicate triple))
639
(intersection (statement-variables triple) partition-variables))
640
(setf (rest (last partition)) (list triple))
642
(loop with collections = (make-hash-table)
643
for triple in triples
644
for (tag . spo) = triple
645
;; allow elementary iri's only in the predicate partitions
646
;; a nested loop in either direction is not likely to perform well
647
;; for example nxp's 40da4ed939d965f771cc6c91b4c55381df55733e includes paths
648
;; with result cardinalities of 126k
649
if (and (eq tag 'spocq.a:|triple|) (iri-p (second spo)))
650
;; factor constant predicate by subject
651
do (push triple (gethash (first spo) collections))
652
else collect triple into other-triples
653
finally (let* ((subject-partitions (loop for partition-triples being each hash-value of collections
654
if (> (length partition-triples) 1)
655
collect partition-triples
656
else do (push (first partition-triples) other-triples)))
657
(partitions-variables (loop for partition in subject-partitions
658
collect (partition-variables partition)))
659
;; discard declarations
660
(non-triples-ncd (remove-if #'(lambda (s) (or (and (eq (first s) 'declare)
661
(eq (first (second s)) 'spocq.e::cardinality))
664
(slice-form (find-if #'slice-form-p non-triples)))
665
;; merge kleene paths. other paths will have been expanded
666
(let ((other-autonomous (loop for triple in other-triples
667
unless (loop for partition in subject-partitions
668
for partition-variables in partitions-variables
669
when (merge-path-statement triple partition partition-variables)
672
(setf subject-partitions (loop for partition in subject-partitions
673
collect (append non-triples-ncd (cons-cardinality partition))))
674
(setf other-autonomous (when other-autonomous
675
(append non-triples-ncd (cons-cardinality other-autonomous))))
676
(return (values subject-partitions other-autonomous slice-form))))))))
679
(defun compute-nested-join (forms)
680
"Given a list of partitions (that is, statement pattern lists), compute an
681
operator tree which combines them with joins and wraps any simple pattern
683
!!! it should use cardinality estimate to establish s-i-p joins where possible."
685
(let* ((dimensioned-forms (loop for form in forms
686
collect (cons (expression-dimensions form) form)))
687
(paired-dimensioned-forms (loop for list on dimensioned-forms
688
for first = (first list)
689
append (loop for second in (rest list)
690
collect (list first second))))
691
(ordered-pdfs (sort (copy-list paired-dimensioned-forms)
692
#'> :key #'(lambda (pair)
693
(destructuring-bind (df1 df2) pair
694
(length (intersection (first df1) (first df2))))))))
695
(destructuring-bind (((dl . left) (dr . right)) . others) ordered-pdfs
696
(declare (ignore others))
697
(let* ((join-form `(spocq.a:|join| ,left ,right))
698
(form-variables (union dl dr)))
699
(setf dimensioned-forms (remove left (remove right dimensioned-forms :key #'rest) :key #'rest))
700
(loop (unless dimensioned-forms (return join-form))
701
(when (rest dimensioned-forms)
702
(setf dimensioned-forms (sort dimensioned-forms #'>
703
:key #'(lambda (d.other) (length (intersection form-variables (first d.other)))))))
704
(destructuring-bind ((d . other) . others) dimensioned-forms
705
(setf join-form `(spocq.a:|join| ,join-form ,other))
706
(setf form-variables (union form-variables d))
707
(setf dimensioned-forms others)))
711
(defparameter *bgp-statement-pattern-limit* 256)
712
(defparameter *bgp-sip-paths* nil)
714
;;; (setf (get 'compute-match-scan-partitions :verbose) t)
715
;;; (setf (get 'compute-match-scan-partitions :verbose) nil)
716
(defgeneric compute-match-scan-partitions (repository body &key)
717
(:method ((repository repository) body &key
718
(match-rate (repository-match-rate repository))
719
(scan-rate (repository-scan-rate repository))
720
(verbose (get 'compute-match-scan-partitions :verbose)))
721
"Given a list of patterns, generate a decimated list according to estimates of
723
Assume that any blank nodes have already been replaced across all patterns
724
in some preliminary expansion step."
725
(let ((partitions ())
726
(current-partition ())
727
(repository-count nil)
729
(limit *bgp-statement-pattern-limit*)
730
(patterns (remove-if-not #'bgp-statement-form-p body))
731
(non-patterns (remove-if #'bgp-statement-form-p body)))
732
(labels ((repository-count ()
734
(setf repository-count
735
(if repository (repository-statement-count repository) 1))))
736
(pattern-count (pattern)
737
(or (statement-count pattern) 1))
738
(extension-pattern-p (pattern)
739
(destructuring-bind (subject predicate object) (statement-terms pattern)
740
(declare (ignore subject object))
741
(extension-operator-p predicate)))
742
(path-pattern-p (pattern)
743
(destructuring-bind (subject predicate object) (statement-terms pattern)
744
(declare (ignore subject object))
745
(property-path-p predicate)))
746
(add-to-partition (pattern)
748
(push pattern current-partition))
751
(when current-partition (close-partition)))
753
(push (nreverse current-partition) partitions)
754
(setf current-partition ())))
755
(cond ((rest patterns)
756
(loop for (this next) on patterns
757
for (this-count next-count) on (mapcar #'pattern-count patterns)
758
for open-new = (and next
759
(not (extension-pattern-p this))
760
(not (extension-pattern-p next))
761
(or (and (not *bgp-sip-paths*) (path-pattern-p next))
762
(and (> this-count 2)
763
(> (/ this-count match-rate) (/ next-count scan-rate)))
765
do (when verbose (format *trace-output* "~&~s~%"
766
(list :open-new open-new
767
:match-rate match-rate :scan-rate scan-rate
768
:this-count this-count :next-count next-count
769
:count count :limit limit
770
:this this :next next)))
771
do (add-to-partition this)
772
when open-new do (new-partition))
774
(loop for partition in (nreverse partitions)
775
collect (append non-patterns partition)))
777
;; just one partition
781
(defgeneric compute-pattern-order (repository patterns &key base-dimensions)
782
(:method ((repository repository) patterns &key (base-dimensions nil))
783
(let* ((ordered-patterns ())
784
(pattern-effective-counts ())
785
(repository-count nil))
786
(labels ((repository-count ()
788
(setf repository-count
789
(if repository (repository-statement-count repository) 1))))
790
(pattern-count (pattern)
791
(or (getf (statement-properties pattern) :count) 1))
792
(pattern-predecessor (pattern)
793
(find pattern ordered-patterns :test #'intersection :key #'expression-variables))
794
(pattern-effective-count (pattern)
795
(or (rest (assoc pattern pattern-effective-counts))
796
(let* ((count (pattern-count pattern))
797
(predecessor (pattern-predecessor pattern))
798
(predecessor-count (when predecessor (pattern-count predecessor)))
799
(effective-count (if predecessor-count
800
(* predecessor-count count)
802
(setf (pattern-effective-count pattern) effective-count))))
803
((setf pattern-effective-count) (count pattern)
804
(setf (rest (or (assoc pattern pattern-effective-counts)
805
(first (setf pattern-effective-counts (acons pattern count pattern-effective-counts)))))
807
(bound-variable-p (v) (member v base-dimensions))
808
(pattern-variable-count (pattern)
809
(loop for term in (statement-terms pattern)
810
when (and (variable-p term) (not (bound-variable-p term)))
812
(pattern-preceeds (pattern1 pattern2)
813
;; precedence involves a number of factors
814
;; variable count - but not variables which have been propagated in to the bgp
815
;; selectivity of the pattern
816
;; the expected selectivity based on the statements which proceed.
817
(let* ((pec1 (pattern-effective-count pattern1))
818
(pec2 (pattern-effective-count pattern2))
819
(pvc1 (pattern-variable-count pattern1))
820
(pvc2 (pattern-variable-count pattern2))
821
(pp1 (pattern-predecessor pattern1))
822
(pp2 (pattern-predecessor pattern2))
823
(inherent-preceeds (or (< pec1 pec2) (and (= pec1 pec2) (< pvc1 pvc2)))))
825
(or inherent-preceeds (member pp2 (member pp1 ordered-patterns)))
827
(and (not pp2) inherent-preceeds))))))
829
(declare (dynamic-extent #'pattern-count))
830
;; iff there is more than one pattern statement, the repository contains
831
;; enough to make it worthwhile, and there are not logical combination operators
833
;; sort the patterns such that those with more literals/resources preceed
834
(cond ((and *agp-sort-patterns*
836
(or (null repository) (> (repository-count) *agp-sort-statement-count-minimum*))
837
(notany #'logical-bgp-statement-form-p patterns))
838
(setf patterns (sort (copy-list patterns) #'> :key #'pattern-variable-count))
839
(loop (unless patterns (return))
840
(when (rest patterns)
841
(setf pattern-effective-counts ())
842
(setf patterns (stable-sort patterns #'pattern-preceeds))
843
(log-trace "sort agp patterns: count ~d; ordered: ~a; remaining: ~a"
845
(mapcar #'(lambda (pattern) (cons (ceiling (pattern-count pattern)) pattern))
847
(mapcar #'(lambda (pattern) (cons (ceiling (pattern-effective-count pattern)) pattern))
849
(let ((selected (pop patterns)))
850
(push selected ordered-patterns)))
851
(nreverse ordered-patterns))
855
(defgeneric compute-agp-form (agp)
856
(:documentation "Given an AGP, collect and arrange its bindings, patterns, and filters for delegation to
857
a store. The result is an 'agp' form in which an optional 'bind' term appears first, followed by 'triple',
858
'quad' and 'filter' terms. These latter are arranged such that more specific patterns appear earlier and
859
a filter term appears immediately after all of its variables.
861
see [http://www.ntu.edu.sg/home/bshe/ScalableJoin.pdf] for improvements to the sensitivty computation
862
by precomuting combinations.")
865
(let* ((repository (agp-repository agp))
866
(graph (agp-graph agp))
867
(variables (expression-variables agp))
868
(base-dimensions (agp-propagated-base-dimensions agp))
869
(equivalents (agp-equivalents agp))
870
(query (agp-query agp))
871
(filters (agp-filters agp))
872
(binds (agp-binds agp))
873
(slice (agp-slice agp))
874
(patterns (copy-list (agp-statements agp)))
875
(ordered-patterns ())
877
(pattern-effective-counts ())
878
(repository-count nil)
879
(reference-dimensions (agp-reference-dimensions agp)))
880
(declare (ignorable query))
881
(labels ((repository-count ()
883
(setf repository-count
884
(if repository (repository-statement-count repository) 1))))
885
(pattern-count (pattern)
886
(or (rest (assoc pattern pattern-counts))
887
(setf (pattern-count pattern)
888
(if (triple-form-p pattern)
889
(destructuring-bind (subject predicate object) (statement-terms pattern)
890
(if (extension-operator-p predicate)
892
(repository-pattern-count repository subject predicate object graph)))
894
((setf pattern-count) (count pattern)
895
(setf (rest (or (assoc pattern pattern-counts)
896
(first (setf pattern-counts (acons pattern count pattern-counts)))))
898
(pattern-predecessor (pattern)
899
(find pattern ordered-patterns :test #'intersection :key #'expression-variables))
900
(pattern-effective-count (pattern)
901
(or (rest (assoc pattern pattern-effective-counts))
902
(let* ((count (pattern-count pattern))
903
(predecessor (pattern-predecessor pattern))
904
(predecessor-count (when predecessor (pattern-count predecessor)))
905
(effective-count (if predecessor-count
906
(+ predecessor-count count)
908
(setf (pattern-effective-count pattern) effective-count))))
909
((setf pattern-effective-count) (count pattern)
910
(setf (rest (or (assoc pattern pattern-effective-counts)
911
(first (setf pattern-effective-counts (acons pattern count pattern-effective-counts)))))
913
(bound-variable-p (v) (member v base-dimensions))
914
(binding-count (pattern) (count-if #'bound-variable-p pattern))
915
(pattern-preceeds (pattern1 pattern2)
916
;; precedence involves a number of factors
917
;; variable count - but not variables which have been propagated in to the bgp
918
;; selectivity of the pattern
919
;; the expected selectivity based on the statements which proceed.
920
(let* ((pc1 (pattern-effective-count pattern1))
921
(pc2 (pattern-effective-count pattern2))
922
(pp1 (pattern-predecessor pattern1))
923
(pp2 (pattern-predecessor pattern2))
924
(pbc1 (binding-count pattern1))
925
(pbc2 (binding-count pattern2))
926
(inherent-preceeds (or (< pc1 pc2)
928
(< (count-if #'variable-p pattern1)
929
(count-if #'variable-p pattern2))))))
931
(unless (> pbc2 pbc1)
936
inherent-preceeds))))))))
938
(declare (dynamic-extent #'pattern-count #'bound-variable-p))
939
;; iff there is more than one pattern statement, the repository contains
940
;; enough to make it worthwhile, and there are not logical combination operators
942
;; sort the patterns such that those with more literals/resources preceed
943
(cond ((and *agp-sort-patterns*
945
(> (repository-count) *agp-sort-statement-count-minimum*)
946
(notany #'logical-bgp-statement-form-p patterns))
947
(loop (unless patterns (return))
948
(when (rest patterns)
949
(setf pattern-effective-counts ())
950
(setf patterns (stable-sort patterns #'pattern-preceeds))
951
(log-trace "sort agp patterns: count ~d; ordered: ~a; remaining: ~a"
953
(mapcar #'(lambda (pattern) (cons (ceiling (pattern-count pattern)) pattern))
955
(mapcar #'(lambda (pattern) (cons (ceiling (pattern-effective-count pattern)) pattern))
957
(let ((selected (pop patterns)))
958
(push selected ordered-patterns)
959
(setf (pattern-count selected) (pattern-effective-count selected))))
960
(setf (agp-field-size-estimate agp)
961
(* (pattern-effective-count (first ordered-patterns)) (length variables)))
962
(setf ordered-patterns (nreverse ordered-patterns)))
964
(setf ordered-patterns patterns)
965
(setf (agp-field-size-estimate agp) (length variables))))
967
;; insert each filter immediately after all of its variables have a binding
968
(dolist (filter filters)
969
(let ((filter-variables (expression-variables filter))
970
(more-patterns ordered-patterns))
971
(loop (let ((pattern (first more-patterns)))
972
(cond ((null pattern)
974
((or (null (setf filter-variables
975
(set-difference filter-variables (expression-variables pattern))))
976
(null (rest more-patterns)))
977
(push filter (rest more-patterns))
979
(pop more-patterns))))
980
#+(or) ; moved to compute-decimated-bgp-lambda
981
(let ((rewrite-operators (query-bgp-rewrite-operators query)))
982
(when rewrite-operators
983
(loop for operator in rewrite-operators
984
do (setf ordered-patterns (or (funcall operator ordered-patterns) ordered-patterns)))))
985
(let ((agp-form `(spocq.a::|agp|
986
,@(when (agp-graph agp) `((spocq.a:|graph| ,(agp-graph agp))))
987
,@(agp-declarations agp)
988
(spocq.a::|id| ,(agp-id agp))
989
,@(when slice `((spocq.a:|slice| ,@slice)))
990
,@(let ((opaque-variables (set-difference variables reference-dimensions)))
991
;; declare opaque variables. these are those variables from the patterns only, for which the
992
;; query does not require transparency due to their interpretation elsewhere.
993
(when opaque-variables
994
`((spocq.a::|declare| (spocq.a::|opaque| ,@opaque-variables)))))
996
`((spocq.a::|equivalents| ,@equivalents)))
999
,@(let ((binds (agp-temporal-binds agp)))
1000
(when binds `((spocq.a::|temporal-bind| ,@binds))))
1001
,@(let ((constraints (agp-version-constraints agp)))
1002
(when constraints `((spocq.a::|version-constraint| ,@constraints)))))))
1003
(log-trace "compute-agp-form: ~:[unchanged~;reordered~]: ~s"
1004
(eq ordered-patterns patterns) agp-form)
1007
(defun compute-agp-order (agps)
1008
;; first predict the solution field size
1010
;; compute an initial form to use for sorting based on solution count
1012
(setf (agp-predecessors agp) nil)
1013
(setf (agp-successors agp) nil))
1015
(when *propagate-agp-bindings*
1016
(flet ((bound-count-statements (agp bind-variables)
1017
(loop for pattern in (agp-statements agp)
1018
for pattern-variables = (expression-variables pattern)
1019
when (and pattern-variables
1020
(null (set-difference pattern-variables bind-variables)))
1022
;; should take into account the size of the source solution field - eg through a slice
1023
;; shouild connect not just bgp, but also subselects (for the slice benefit)
1024
(loop for agp in (remove :synchronous agps :key #'agp-processing-mode)
1025
do (loop for other-agp in (rest (member agp agps))
1026
when (intersection (agp-variables agp) (agp-variables other-agp))
1027
;; if the agps are in the same scope and share variables, then if propagation
1028
;; from one to the other would render a pattern to a constant
1029
;; then the one should proceed the other
1030
do (cond ((equal (agp-join-scope agp) (agp-join-scope other-agp))
1031
(let ((agp-counts (bound-count-statements agp (agp-projection-dimensions other-agp)))
1032
(other-counts (bound-count-statements other-agp (agp-projection-dimensions agp))))
1033
(cond ((> agp-counts other-counts)
1034
(push agp (agp-predecessors other-agp))
1035
(push other-agp (agp-successors agp)))
1036
((> other-counts agp-counts)
1037
(push other-agp (agp-predecessors agp))
1038
(push agp (agp-successors other-agp)))
1040
(push agp (agp-predecessors other-agp))
1041
(push other-agp (agp-successors agp))))))
1042
((tailp (agp-join-scope agp) (agp-join-scope other-agp))
1043
;; if agp is a higher scope, propagate from it
1044
(push agp (agp-predecessors other-agp))
1045
(push other-agp (agp-successors agp)))
1046
((tailp (agp-join-scope other-agp) (agp-join-scope agp))
1047
;; if the other one is higher, propagate the other way
1048
(push other-agp (agp-predecessors agp))
1049
(push agp (agp-successors other-agp)))))))
1051
;; sort the list based on precedence
1052
(setf agps (sort (copy-list agps) #'(lambda (agp1 agp2) (member agp2 (agp-successors agp1)))))
1054
;; reduce predecessor/successor relation to just one
1055
(loop for agps on agps
1056
for agp = (first agps)
1057
for other-agps = (rest agps)
1058
do (loop for successor in (agp-successors agp) do
1059
(loop for other-agp in other-agps
1060
when (find successor (agp-successors other-agp))
1061
do (setf (agp-successors other-agp) (remove successor (agp-successors other-agp))
1062
(agp-predecessors successor) (remove other-agp (agp-predecessors successor))))
1063
(when (setf (agp-base-dimensions successor) (agp-projection-dimensions agp))
1064
;; recompute the form of any which have base dimensions from a predecessor
1065
(slot-makunbound successor 'form)
1066
(agp-form successor)))))
1068
;; log the result for posterity
1069
(mapcar #'(lambda (agp)
1070
(log-trace "ordered agp: ~a/~a [@ ~a][= ~a] ~a predecessors: ~a successors: ~a"
1071
(agp-id agp) (agp-join-scope agp) (agp-state agp) (agp-field-size-estimate agp) (agp-variables agp)
1072
(mapcar #'agp-id (agp-predecessors agp))
1073
(mapcar #'agp-id (agp-successors agp)))
1075
(format *trace-output* "~&~%ordered agp: ~a/~a [@ ~a][= ~a] ~a predecessors: ~a successors: ~a~%"
1076
(agp-id agp) (agp-join-scope agp) (agp-state agp) (agp-field-size-estimate agp) (agp-variables agp)
1077
(mapcar #'agp-id (agp-predecessors agp))
1078
(mapcar #'agp-id (agp-successors agp))))
1086
(defgeneric compute-bgp-lambda (store pattern-elements &key
1091
projection-dimensions
1094
(:documentation "Given a set of quad patterns, compute a function of one required argument, a data store,
1095
and an optional initial solution field, which returns the matched graph.
1096
PATTERN-ELEMENS : (list triple) : triple patterns with literal, variable, or blank node terms
1097
:GRAPH : a variable or uri designator for the context to be bound or asserts
1098
:GRAPHS : the domain for the statement pattern's graph term.
1099
:BASE-DIMENSIONS : (list variable) : the variables to beincluded as initial bindings
1100
:PROJECTION-VARIABLES : (list variable) : the variables to project from the result field.
1101
defaults to all those in the statement patterns plus any graph variable.
1103
The result is a filter over over a statement source.
1104
The bgp graph pattern is rewritten into a succession of individual matches and continuation delegations.
1105
In addition to the individual patterns, the translation recognizes bindings and filter expressions.
1106
It arranges to feed bindings into the process as the initial field. Filter evaluation is interleaved with
1107
matching to reduce the solution field size.
1109
Each pattern of the bgp set is transformed into a function which delegates a single pattern match request
1110
to the store. The request arguments are the terms from the pattern abstracted over its variables.
1111
The successive functions act as continuations for the earlier terms, whereby variables bound by the earlier
1112
terms become literals in the successive queries. The operations are executed as expressed in the initial
1113
form, without repording. That is left to the caller.
1115
If a continuation is supplied, it must evaluate to a function of one argument, the pattern's solution field.
1116
The final continuation constructs a solution from each combination of the bound variables for which all
1117
queries succeed and collects a field of all combinations.") )
1119
(defgeneric compute-logical-bgp-lambda (store pattern-elements &key
1124
projection-dimensions
1127
(defgeneric agp-pattern-function (agp)
1128
(:documentation "return the compiled implmentation for the given agp as applied to
1129
its repository. the operator is identified by statement patterns, graph constraint,
1130
the request graphs and any input dimensions. These serve as a cache key to
1131
save the computed operator for re-use")
1132
(:method ((agp agp))
1133
(let* ((repository (agp-repository agp))
1134
(cache (repository-bgp-cache repository))
1135
;; eliminate all non-pertinent data from the tree, eg. the extended triple properties
1136
;; now does not reiterate the agp analysis. the bgp expansion should do it all
1137
(body (agp-body agp))
1138
(pattern-forms (loop for form in body
1139
for key = (case (first form)
1140
((spocq.a:|filter| spocq.a:|bind|) form)
1141
(spocq.a:|triple| (statement-terms form)) ; skip type, extract terms
1142
(spocq.a:|graph| form)
1143
(spocq.a:|slice| form))
1144
when key collect key))
1145
;; (type (type-of repository)) ; type is redundant as the cache is owned by the repository
1146
(key (list pattern-forms
1148
(agp-propagated-base-dimensions agp)
1149
(agp-dataset-graphs agp)
1150
(first (query-dynamic-bindings (agp-query agp)))
1151
(agp-version-constraints agp))))
1153
(or (gethash key cache)
1154
(setf (gethash key cache)
1155
(compute-agp-pattern-function agp repository)))
1156
(compute-agp-pattern-function agp repository)))))
1158
(defmethod agp-pattern-function :around (agp)
1159
(let* ((repository (agp-repository agp))
1160
(cache (repository-bgp-cache repository)))
1162
(print (list :before (loop for key being each hash-key of cache
1163
using (hash-value function) collect (list key function))
1165
(prog1 (print (call-next-method))
1166
(print (list :after (loop for key being each hash-key of cache
1167
using (hash-value function) collect (list key function))))))))
1170
(defparameter *agp-pattern-lambda* nil
1171
"Save the latest pattern lambda expression")
1172
(defparameter *trace-pattern-forms* ()
1173
"If the list of pattern forms intersects with a given bgp,
1174
log the patterns and the generated lambda")
1178
(defgeneric compute-agp-pattern-function (agp repository)
1179
(:documentation "Generate and compile the bgp interpretation operator")
1180
(:method ((agp agp) (repository t))
1181
;; log before and after to get timestamps in the syslog
1182
(log-trace "agp pattern function: patterns ~a" (agp-statements agp))
1183
(log-trace "agp pattern function: filters ~a" (agp-filters agp))
1184
(let* ((lambda (compute-agp-pattern-lambda agp repository)))
1185
(log-trace "agp pattern function: form ~a" (agp-form agp))
1186
(log-trace "agp pattern function: lambda ~a" lambda)
1187
(setq *agp-pattern-lambda* lambda)
1188
(values (ecase *query-evaluation-mode*
1189
(:compiled (spocq-compile lambda))
1190
(:interpreted (spocq-compile `(lambda (&rest args)
1191
(eval (cons ',lambda args))))))
1194
(defgeneric compute-agp-pattern-lambda (agp repository)
1195
(:method ((agp agp) repository)
1196
(let* ((pattern-forms (rest (agp-form agp))))
1197
(if (some #'logical-bgp-statement-form-p pattern-forms)
1198
(compute-logical-bgp-lambda repository
1200
:graph (agp-graph agp)
1201
:base-dimensions (agp-propagated-base-dimensions agp)
1202
:projection-dimensions (agp-projection-dimensions agp)
1203
:dataset-graphs (agp-dataset-graphs agp)
1204
:dynamic-variables (first (query-dynamic-bindings (agp-query agp))))
1205
(multiple-value-bind (lambda dimensions)
1206
(compute-bgp-lambda repository
1208
:graph (agp-graph agp)
1209
:base-dimensions (agp-propagated-base-dimensions agp)
1210
;; these need to be correct
1211
:projection-dimensions (agp-projection-dimensions agp)
1212
:dataset-graphs (agp-dataset-graphs agp)
1213
:variables (if (variable-p (agp-graph agp))
1214
(cons (agp-graph agp) (agp-variables agp))
1215
(agp-variables agp))
1216
:dynamic-variables (first (query-dynamic-bindings (agp-query agp))))
1217
(setf (agp-projection-dimensions agp) dimensions)
1218
(when (and *trace-pattern-forms*
1219
(log-level-qualifies? :trace)
1220
(intersection pattern-forms *trace-pattern-forms* :test #'equalp))
1221
(log-trace "agp: patterns: ~a lambda ~a" pattern-forms lambda))
1224
(defgeneric agp-sort-dimensions (agp)
1225
(:documentation "return the projection dimensions reordered to reflect the implicit sort
1226
which results from bgp processing. this depends on the statement reordering and from the the order
1227
and nesting of the match requests to the store.")
1229
#+(or) ; does not work for logical bgp forms
1230
(:method ((agp agp))
1231
(sort-bgp-dimensions (agp-projection-dimensions agp)
1232
(remove-if-not #'(lambda (s)
1233
(or (bgp-statement-form-p s)
1237
(:method ((agp agp))
1238
(agp-projection-dimensions agp)))
1241
(defun sort-bgp-dimensions (dimensions statements)
1242
(sort (copy-list dimensions) #'<
1243
:key #'(lambda (dimension) (let ((stmt (find-if #'(lambda (s) (find dimension s)) statements)))
1245
(+ (1+ (position stmt statements)) (* .1 (position dimension stmt)))
1246
(progn (warn "lost dimension: ~a ~a." dimension statements) 0))))))
1249
;; to get a profile hook on it
1250
(defun call-agp-function (agp function)
1251
(restart-bind ((terminate-task (lambda (task-to-terminate &optional condition)
1252
(when (eq (agp-query agp) task-to-terminate)
1253
(log-info "task terminated in agp thread: ~a" task-to-terminate)
1254
(return-from call-agp-function condition)))))
1256
#+(or) ;; for bgp error debugging
1257
(handler-bind ((error (lambda (c)
1258
(warn "error processing bgp: ~a" c)
1259
(list-thread-operations :verbose nil)
1260
(break "error: ~s" c))))
1262
#+(or) ;; for per-thread tracing
1263
(with-open-file (*trace-output* (format nil "/tmp/cursor.~a" (agp-id agp))
1265
:if-exists :supersede
1266
:if-does-not-exist :create)
1267
(funcall function))))
1271
;;; (defparameter *agp-channels* ())
1272
;;; (defun agp-channel-put (result-channel page) (channel-put result-channel page))
1275
(defun run-pattern-function (&rest args)
1276
(apply (first args) (rest args)))
1278
(defun run-agp-thread (agp pattern-function result-channel base-channel)
1279
(trace-algebra run-agp-thread result-channel base-channel)
1280
(setf (agp-state agp) :delegate)
1281
(setf (agp-start-time agp) (rdfcache:time-in-thread))
1282
(setf (agp-solution-count agp)
1284
(run-pattern-function pattern-function result-channel base-channel)
1285
(run-pattern-function pattern-function result-channel)))
1286
(setf (agp-state agp) :complete)
1287
(setf (agp-end-time agp) (rdfcache:time-in-thread))
1288
(trace-algebra run-agp-thread.complete result-channel :time (rdfcache:time-in-thread))
1289
;; return the aspect name for accounting
1292
(defun agp-generator (agp)
1293
"Given an AGP, return a generator which initiates the respective BGP match
1294
and streams the matched pages into a result channel. "
1296
(typecase *transaction*
1297
(rdfcache-decimated-matrix-transaction
1298
;; need to compute bindings and modifiers from task state
1299
(funcall (agp-pattern-function agp)))
1301
(ecase *query-execution-method*
1302
(:reduce (funcall (agp-pattern-function agp)))
1304
(destructuring-bind (&optional start end) (rest (assoc 'spocq.a:|slice| (agp-statements agp)))
1305
(let* ((path-count (count-if #'property-path-p (agp-statements agp) :key #'third))
1306
(base-dimensions (agp-propagated-base-dimensions agp))
1307
(result-channel (make-channel :name `(spocq.a:|bgp| ,(agp-id agp) ,(task-id *query*))
1308
:dimensions (agp-projection-dimensions agp)
1309
;; set the page size/count to be propagated upwards
1310
:size (if (plusp path-count)
1311
*channel-sliced-size-limit*
1312
(effective-channel-size :start start :end end))
1313
:page-length (if (plusp path-count)
1314
*field-sliced-page-length*
1315
(effective-page-length :start start :end end))))
1316
(base-channel (when base-dimensions
1317
(setf (agp-base-channel agp)
1318
(make-channel :name `(:base spocq.a:|bgp| ,(agp-id agp) ,(task-id *query*))
1319
:dimensions base-dimensions
1320
:size (effective-channel-size :start start :end end)
1321
:page-length (effective-page-length :start start :end end)))))
1322
(projection-dimensions (agp-projection-dimensions agp))
1323
(pattern-function (agp-pattern-function agp)))
1324
;; (print result-channel)
1325
(trace-algebra agp-generator base-dimensions base-channel projection-dimensions result-channel)
1326
(when base-channel (trace-algebra agp-generator.base-channel base-channel))
1327
(make-bgp-generator :operator 'spocq.a:|bgp|
1328
:dimensions projection-dimensions
1329
:sort-dimensions projection-dimensions ;; (agp-sort-dimensions agp)
1330
:expression (list #'run-agp-thread agp pattern-function result-channel base-channel)
1331
;; just the first channel contributes to reduction
1332
:channel result-channel
1334
:pattern-function pattern-function
1335
:constituents ()))))))))
1337
;;; run a bgp directly emitting the pages to a continuation
1339
(defun funcall-bgp-operator (op arg)
1343
(defmethod run-bgp (continuation (revision repository-revision) (body list) &rest args)
1344
"Compile and run the given bgp patterns. They are processed as-is, with out reordering.
1345
Cache and re-user results. This is done relative to the revision's reference
1346
repository as the implementation is independent of revision."
1347
(let ((cache (repository-bgp-cache (repository-revision-reference revision))))
1348
(with-open-transaction (revision :revision-id (repository-revision-id revision)
1349
:read-only-p t :operation 'spocq.a:|select|)
1350
(flet ((compile-operator ()
1351
(spocq-compile (apply #'compute-bgp-lambda revision body args))))
1352
(let ((bgp-operator (or (gethash body cache)
1353
(setf (gethash body cache) (compile-operator))))
1354
(*repository* revision))
1355
(funcall-bgp-operator bgp-operator continuation))))))
1358
(defgeneric find-bgp-expansion (repository body library)
1359
(:documentation "Manage bgp expansions relative to the active revision
1360
cache by keying with the library name as well and the original bgp patterns")
1361
(:argument-precedence-order repository library body)
1362
(:method ((repository t) (body t) (library rule-library))
1363
(find-bgp-expansion repository body (rule-library-name library)))
1364
(:method ((repository string) (body t) (library t))
1365
(find-bgp-expansion body (repository repository) library))
1366
(:method ((repository repository) (body t) (library string))
1367
(get-aspect-cache (list :inferences library body) :repository repository)))
1369
(defgeneric (setf find-bgp-expansion) (new-body repository body library)
1370
(:documentation "Manage bgp expansions relative to the active revision
1371
cache by keying with the library name as well and the original bgp patterns")
1372
(:argument-precedence-order repository library body new-body)
1373
(:method ((new-body t) (repository t) (body t) (library rule-library))
1374
(setf (find-bgp-expansion repository body (rule-library-name library)) new-body))
1375
(:method ((new-body t) (repository string) (body t) (library t))
1376
(setf (find-bgp-expansion body (repository repository) library) new-body))
1377
(:method ((new-body t) (repository repository) (body t) (library string))
1378
(setf (get-aspect-cache (list :inferences library body) :repository repository) new-body)))
1381
(defgeneric coalesce-bgp-functions (function-predicates body)
1382
(:documentation "replace each predicate list with a single statement which
1383
encapsulates the arguments from the subject list, the results from the
1384
object list and the function predicate iri.
1385
the precedents for the role assignment:
1386
- https://www.w3.org/2004/12/rules-ws/paper/94/
1387
- https://www.w3.org/TR/swbp-n-aryRelations/")
1388
(:method ((predicates list) (body list))
1389
(loop for predicate in predicates
1390
do (multiple-value-bind (remaining reference parameters results) (coalesce-bgp-predicate predicate body)
1391
(destructuring-bind (op subject predicate object . rest) reference
1392
(setf body (substitute `(,op ,subject
1393
,(make-function-verb :function predicate :parameters parameters :results results)
1400
(defgeneric coalesce-bgp-views (view-predicates body)
1401
(:documentation "Replace statements which identify a view as verb together with the related lists.
1402
Just rearrange and return the BGP body without exercising any control over dimensions, repository reference,
1403
authorization, etc.")
1404
(:method ((predicates t) (body t))
1405
(loop for predicate in predicates
1406
do (multiple-value-bind (remaining reference parameters results) (coalesce-bgp-predicate predicate body)
1407
(destructuring-bind (op subject predicate object . rest) reference
1408
(setf body (substitute `(,op ,subject
1409
,(make-view-verb :url predicate :parameters parameters :results results)
1416
(defun coalesce-bgp-predicate (predicate body)
1417
"collect all statements which contain the predicate.
1418
values: the unrelated body (net of reference, argument and result lists)
1419
the argument variables (the object list)
1420
the result variables (the subject list)"
1421
(let* ((reference (loop for statement in body
1422
when (and (triple-form-p statement)
1423
(eq predicate (statement-predicate statement)))
1426
(multiple-value-bind (arguments argument-statements)
1427
(let ((object (statement-object reference)))
1428
(if (spocq:blank-node-p object)
1429
(extract-bgp-term-list object body)
1430
(values (list object) nil)))
1431
(when argument-statements
1432
(setf body (remove-if #'(lambda (stmt) (member stmt argument-statements)) body)))
1433
(multiple-value-bind (results result-statements)
1434
(let ((subject (statement-subject reference)))
1435
(if (spocq:blank-node-p subject)
1436
(extract-bgp-term-list subject body)
1437
(values (list subject) nil)))
1438
(when result-statements
1439
(setf body (remove-if #'(lambda (stmt) (member stmt result-statements)) body)))
1440
(values body reference arguments results)))
1443
(defun extract-bgp-term-list (head statements)
1444
"given a list of statement patterns and the term for the list head, compute the list of objects.
1445
return that list and the list of the involved statements"
1446
(let ((list-statements ())
1448
(flet ((is-first (statement) (and (eq (statement-subject statement) head) (eq (statement-predicate statement) |rdf|:|first|)))
1449
(is-rest (statement) (and (eq (statement-subject statement) head) (eq (statement-predicate statement) |rdf|:|rest|))))
1450
(declare (dynamic-extent #'is-first #'is-rest))
1451
(loop until (eq head |rdf|:|nil|)
1452
do (let* ((first (find-if #'is-first statements))
1453
(rest (find-if #'is-rest statements)))
1454
(cond ((or (null first) (null rest)) ;; broken
1456
((or (member first list-statements) (member rest list-statements)) ;; duplicate
1459
(push (statement-object first) list-terms)
1460
(setf head (statement-object rest))
1461
(push first list-statements)
1462
(push rest list-statements)))))
1463
(values (reverse list-terms) list-statements))))
1467
(extract-bgp-term-list '?::?1
1468
(rest '(spocq.a:|bgp| (spocq.a:|triple| |rdf|:|nil| ?::someFunction ?::?1)
1469
(spocq.a:|triple| ?::?1 |rdf|:|first| ?::a)
1470
(spocq.a:|triple| ?::?1 |rdf|:|rest| |rdf|:|nil|))))
1471
(extract-bgp-term-list '|rdf|:|nil|
1472
(rest '(spocq.a:|bgp| (spocq.a:|triple| |rdf|:|nil| ?::someFunction ?::?1)
1473
(spocq.a:|triple| ?::?1 |rdf|:|first| ?::a)
1474
(spocq.a:|triple| ?::?1 |rdf|:|rest| |rdf|:|nil|))))
1475
(coalesce-bgp-predicate '?::someFunction
1476
(rest '(spocq.a:|bgp| (spocq.a:|triple| |rdf|:|nil| ?::someFunction ?::?1)
1477
(spocq.a:|triple| ?::?1 |rdf|:|first| ?::a)
1478
(spocq.a:|triple| ?::?1 |rdf|:|rest| |rdf|:|nil|))))
1480
;;; (spocq.a:|bgp| (spocq.a:|triple| "aaa" ?::x (get-time)))
1485
(defgeneric compute-formula-identifier (graph)
1486
(:documentation "Given a triple graph, canonicalize it, compute its hash
1487
and return the respective <urn:n-triples:...> iri.
1488
The graph must be constant terms only, sich that the ntriples representation is available.
1489
Any nested grapns are replaced with their respective identifiers.")
1490
(:method ((graph cons))
1491
(assert (bgp-form-p graph) ()
1492
"Invalid formula ~s" graph)
1493
(flet ((ensure-term (term)
1494
(if (bgp-form-p term)
1495
(compute-formula-identifier term)
1497
(let* ((statements (loop for (nil subject predicate object) in (rest graph)
1498
collect (list (ensure-term subject) predicate (ensure-term object))))
1499
(hash (hash-graph statements)))
1500
(intern-iri (concatenate 'string "urn:n-triples:" hash)))))
1502
(error "Invalid formula ~s" term)))