Coverage report: /development/source/library/org/datagraph/spocq-shard/src/store/rdfcache/property-paths-as-multigraph-join.lisp

KindCoveredAll%
expression5122353 21.8
branch36192 18.8
Key
Not instrumented
Conditionalized out
Executed
Not executed
 
Both branches taken
One branch taken
Neither branch taken
1
 ;;; -*- Mode: lisp; Syntax: ansi-common-lisp; Base: 10; Package: org.datagraph.spocq.implementation; -*-
2
 
3
 (in-package :org.datagraph.spocq.implementation)
4
 ;; (load "/development/source/library/org/datagraph/spocq/src/store/property-paths-as-join.lisp")
5
 ;;;
6
 ;;; as patch
7
 ;;; (load "patches/property-paths-as-join.lisp")
8
 
9
 ;;; !!!
10
 ;;; implement page-wise sip for paths similar to service and revision mechanisms.
11
 ;;; rather than starting the kleene start join with just what is present as the
12
 ;;; pattern subject and/or object, use the internediate results for one of them
13
 ;;; as the initial join for the respective term to reduce the cardinality of the first step.
14
 
15
 (:documentation "property paths (including bounded path navigation as joins)"
16
   "The SPARQL standard[1] defines property path semantics in terms of the operators
17
 
18
  - `ZeroLengthPath`
19
  - `ZeroOrMorePath`
20
  - `OneOrMorePath`
21
  - `NegatedPropertySet`
22
 
23
  The remaining aspects of property paths, inversion, sequences and fixed bounds, are described as syntactic
24
  sugar for subject - object role reversal and joins or unions of enumerated triple patterns to be rewritten
25
  when the paths are parsed. Rather than rewrite all paths to disjoint bgps, this implementation interprets
26
  those paths which require it as consolidated operations in-line as it evaluates the original bgp and
27
  allows for splitting the others to optimization their interpretation as joins.
28
 
29
  The functions are refactored so as to make each aspect an operation, of which the fixed bounds
30
  and *-or-more constraints are handled similarly, and inversion is also autonomous.
31
  - match-property-path : the interface operator
32
    -> match-property-path-type
33
       -> match-zero-length-path
34
       -> match-bounded-path (n to m)
35
                             (0 to m) == match-zero-length-path, match-bounded-path(1,m)
36
       -> match-inverted-path
37
       -> match-negated-path
38
       -> match-property-path-verb : the universal base operator for elementary paths
39
 
40
  Calls to the interface operator are compiled into the bgp matching code at the point where the respective
41
  statement pattern appears. The interface accepts the repository handle combined with the current subject and
42
  object state, and a continuation to supply result bindings to the subsequent matching process.
43
  If the respective term is already bound, the argument is a non-zero integer term number which has been
44
  propagated between statement sin the BGP. Otherwise, it is still unbound, the value is zero.
45
  The predicate term is always passed as a path object, which is deconstructed and resolved to a term number
46
  on-the-fly as the path is interpreted.
47
 
48
  There are several details
49
 
50
  - The cycle and de-duplication rules apply to the operator for matching bounded paths at the point
51
    where a node would initiate a successive match step. That is, the constrain applies neither
52
    the a root node at the point of introduction, nor to a desitnation node.
53
    This means, the a node is registered when it first appears as a subject-or-object in
54
    the path - this is, when it shifts from match result to match argument.
55
    This node deduplication happens in match-bounded-path, respective the traversal direction.
56
 
57
  - Negation is defined as the bindings for which any path other than that given matches, _not_
58
    the bindings for which the given path does not match.[2] As it is restricted to a single verb
59
    (or step) it can be implemented as a filter on matches with a wild-card predicate.
60
 
61
  - The  default/named/all graph configuration interacts with zero-length paths.
62
 
63
    The essential feature of zero-length paths is that they match one statement for each possible
64
    (subject x object) combination. this includes both variable bindings to statements in the
65
    store and autonomous bindings for constant subject and object. where the graph term is constant,
66
    this yields a single solution. where the graph term is a variable, this yields one solution
67
    for each binding of the graph variable. this by analogy to the match arity where the subject and or
68
    object is variable and the result must enumerate the respective value domain. where the graph is one
69
    of the distinguished graphs, the solutions should reflect the nature of the designated graph.
70
 
71
  <urn:dydra:default> is just the default graph, which has no name
72
  <urn:dydra:named> is a wildcard which designates all named graphs individually
73
  <urn:dydra:all> designates the merge of he default graph and all named graphs
74
  <urn:dydra:none> designates a null graph
75
 
76
    the denotation is not static. when <urn:rdfcache:all> is used as the default graph, then
77
    no name is available. when it is used in from named, then the default graph would be named
78
    <urn:rdfcache:default>, which would be a first class term.
79
 
80
    this means that the graph term cannot be interpreted without knowing whether it is a from or
81
    from named term... ?
82
 
83
  - The spec defines de-duplication for ZeroOrMorePath and OneOrMorePath types only.
84
    At the point when the spec also included general bounded paths, they were deduplicated as well.
85
    This implementation still supports arbitrary bounds, which then not deduplicated to correspond
86
    to the semantics of explicit paths when unrolled.
87
 
88
  - simple walk v/s regular path and counted v/s existential result sets[2,3].
89
    This implementation is existential, simple walk.
90
    The in-memory join follows from the arguments in Gubichev and Nueman's
91
    'Path Query Processing on Very Large RDF Graphs'[4],
92
 
93
  ---
94
  [1] http://www.w3.org/TR/2013/REC-sparql11-query-20130321/#propertypaths
95
  [2] http://www.w3.org/TR/sparql11-query/#PropertyPathPatterns
96
  [3] http://lists.w3.org/Archives/Public/public-rdf-dawg-comments/2012Feb/0029.html
97
  [4] webdb2011.rutgers.edu/papers/Paper%2021/pathwebdb.pdf
98
  ")
99
 
100
 #+(or)
101
 (progn
102
   (trace match-property-path)
103
   ;; (trace match-property-path-type :break t)
104
   (trace match-property-path-type)
105
   (trace match-sequence-path match-zero-length-path
106
          repository-load-graph run-sparql
107
          match-bounded-path)
108
   (trace call-with-path-cache )
109
   ;; (trace call-path-continuation :break t)
110
   (trace call-path-continuation)
111
            
112
   (defmethod call-path-continuation :before ((continuation t) (context t) (subject integer) predicate (object integer))
113
     (trace-paths "~%cpc:                    ~a(~a) ~a ~a(~a) -> ~a~%"
114
             (format-term-number-object subject) subject
115
             predicate
116
             (format-term-number-object object) object
117
             continuation))
118
   (defmethod match-property-path-type :before ((repository-handle t) (context t)
119
                                                subject predicate object &optional continuation)
120
     (declare (ignore continuation))
121
     (trace-paths "~&mppt                  : ~a(~a) ~a ~a(~a)~%"
122
             (format-term-number-object subject) subject
123
             predicate
124
             (format-term-number-object object) object))
125
  
126
   )
127
 
128
 (defvar *paths-trace-output* nil)
129
 
130
 
131
 (defstruct (path-field (:include solution-field))
132
   (by-object-cache nil)
133
   (by-subject-cache nil))
134
 
135
 (defun format-term-number-object (tn &optional (default tn))
136
   (if (integerp tn)
137
       (if (<= tn 0)
138
           default
139
           (term-number-object tn))
140
       tn))
141
 
142
 (defmacro trace-paths (control &rest arguments)
143
   (if (stringp control)
144
       `(when *paths-trace-output*
145
          (let ((*print-pretty* nil))
146
            (fresh-line *paths-trace-output*)
147
            (format *paths-trace-output* ,control ,@arguments)))
148
       `(when *paths-trace-output*
149
          (let ((*print-pretty* nil))
150
            (format *paths-trace-output* "~&~a[~a] : ~@{~a~^ ~}~%"
151
                    ',control (bt:thread-name (bt:current-thread)) ,@arguments)))))
152
 
153
 (defvar *match-property-path-level* 0
154
   "Indicates the recursion level for match-property-path in order to distinguish top-level terminal nodes
155
  from internal nodes between sub-paths. The former are always included in results while latter may be
156
  suppressed as internal cycles.")
157
 
158
 (defvar *match-property-path-length* 0
159
   "Indicates the matched length of a bounded path for comparison with the bounds constraints.")
160
 
161
 (defvar *match-property-path-nodes* ()
162
   "Binds a dynamically extended list of navigated nodes to track cycles.")
163
 
164
 
165
 (defun clear-node-cache ()
166
   (when *match-property-path-nodes*
167
     (clrhash *match-property-path-nodes*)))
168
 
169
 (defgeneric test-and-set-path-node (node)
170
   (:documentation "return true iff the link from the node through the given predicate has already been traversed.
171
    otherwise return nil and mark the link visited.")
172
   (:method ((node t))
173
     (when *match-property-path-nodes*
174
       (cond ((gethash node *match-property-path-nodes*)
175
              (trace-paths :pp.taspn.was node (format-term-number-object node) *match-property-path-nodes*)
176
              t)
177
             (t
178
              (setf (gethash node *match-property-path-nodes*) t)
179
              (trace-paths :pp.taspn.new node (format-term-number-object node) *match-property-path-nodes*)
180
              nil)))))
181
 
182
 
183
 (defmacro with-node-cache (&body body)
184
   "execute the body in a context which enforces a 'simple walk' constraint across literal predicates"
185
   `(flet ((.path-op. () ,@body))
186
      (declare (dynamic-extent #'.path-op.))
187
      (call-with-node-cache #'.path-op.)))
188
 
189
 (defun call-with-node-cache (op)
190
   "Invoke the operator in a context which enforces a 'simple walk' constraint on the next step in
191
  the generated path. It binds the node cache which is used to mark a link."
192
   (declare (dynamic-extent op))
193
   ;; (let ((*match-property-path-nodes* (or *match-property-path-nodes* (make-hash-table :test 'equalp))))
194
   (let ((*match-property-path-nodes* (make-hash-table :test 'equalp)))
195
     (funcall op)))
196
 
197
 (defmacro with-marked-node ((node) &body body)
198
   "execute the body in a context which constrains the s-p-o link to be part of  a 'simple walk'."
199
   `(flet ((.link-op. () ,@body))
200
      (declare (dynamic-extent #'.link-op.))
201
      (call-with-marked-node #'.link-op. ,node)))
202
 
203
 (defun call-with-marked-node (op node)
204
  "constrain steps which include would repeat a path link which as already been traversed.
205
  the constant node determines the direction."
206
   (declare (dynamic-extent op))
207
   (if *match-property-path-nodes*
208
     (if (wildcard-term-p node)
209
         (funcall op)
210
         (cond ((test-and-set-path-node node)
211
                (trace-paths :pp.cwmn.stopped node (format-term-number-object node)))
212
               (t
213
                (trace-paths :pp.cwmn.called node (format-term-number-object node) op)
214
                (funcall op))))
215
     (funcall op)))
216
 
217
 (defgeneric call-path-continuation (continuation context subject predicate object)
218
   (:documentation "take the next step along a path subject to circularity constraints specific to navigation
219
    and applicable to term number nodes only")
220
   (:method (continuation context-term-number
221
                          (subject-term-number integer) (predicate t) (object-term-number integer))
222
     ;; allow abstract as well as concrete paths - necessary for nested bounds
223
     (trace-paths "~&cpc                   : ~a(~a) ~a ~a(~a)~%"
224
             (format-term-number-object subject-term-number) subject-term-number
225
             predicate
226
             (format-term-number-object object-term-number) object-term-number)
227
     (funcall continuation context-term-number subject-term-number predicate object-term-number))
228
   (:method (continuation context (subject-term-number integer) (predicate property-path-verb) (object-term-number integer))
229
     ;; given a verb, resolve it to the term number to equate it with the cases, where the number is supplied directly
230
     (call-path-continuation continuation context
231
                             subject-term-number
232
                             (repository-intern-property-path *transaction* predicate)
233
                             object-term-number)))
234
        
235
 
236
 ;;; 2013-02-03 reimplemented
237
 ;;;  match-property-path
238
 ;;;  match-property-path-type (sequence-property-path)
239
 ;;;  match-property-path-type (property-path-verb)
240
 ;;;  match-sequence-path
241
 ;;; to work with path fields
242
 
243
 (defparameter *property-path-mode* :traverse)
244
 ;;; (defparameter *property-path-mode* :join)
245
 
246
 (defgeneric rdfcache-map-context-numbers (op transaction)
247
   (:method (op (transaction rdfcache-transaction))
248
     (rdfcache-map-context-numbers op (transaction-record transaction)))
249
   #+sbcl
250
   (:method (op (x-record SB-SYS:SYSTEM-AREA-POINTER))
251
     (rdfcache:map-context-numbers op x-record)))
252
 
253
 
254
 ;;; first version is temporary for testing agains older code
255
 
256
 (defparameter *match-property-path-context* nil)
257
 
258
 #+(or)
259
 (defgeneric match-property-path (repository-handle context subject property-path object &optional continuation)
260
   (:documentation "The primary interface operator distinguishes the direction of the path according
261
    to the presence of wild or constant terms among
262
      subject -> object : when (constant x wild)
263
      subject <- object : when (wild x constant)
264
      path existence : when (constant x constant)
265
      endpoint existence : when (wild x wild)
266
    in order to invoke the correct operator for cycle-suppression criteria and eventual continued
267
    evaluation through match-property-path-type, which then distinguishes the method according to path type.
268
    The respective directional operators are each available for call sites which have already determined
269
    the direction.
270
    The continuation applies to each verb-path match.")
271
   (:argument-precedence-order subject object property-path repository-handle context)
272
 
273
   (:method (repository-handle context subject property-path object &optional continuation)
274
     "if the context is wild, or if it designates all named contexts
275
      iterate over them to bind it for all internal matching"
276
     (if (or (wildcard-term-p context)
277
             (= context (repository-named-contexts-term-number repository-handle)))
278
       ;; map the per-graph operation over the results of a match
279
       ;; which is constrained by all the given subject, predicate, and object.
280
       (if continuation
281
           (match-property-path-type repository-handle context subject property-path object continuation)
282
           #+(or)
283
           (flet ((do-each-context-once (constant-context)
284
                    (when (case context
285
                            (-2 (case constant-context
286
                                  (-1 nil)
287
                                  (t t)))
288
                            (0 t))
289
                      (match-property-path repository-handle constant-context subject property-path object continuation))))
290
             (declare (dynamic-extent #'do-each-context-once))
291
             (rdfcache-map-context-numbers #'do-each-context-once *transaction*))
292
         (error "A wildcard context for a property path requires a continuation."))
293
         (if (or (eq *property-path-mode* :traverse)
294
                 (property-path-verb-p property-path)
295
                 (and (response-limit)
296
                      (> (/ (repository-pattern-count repository-handle nil nil nil context)
297
                            (repository-scan-rate repository-handle))
298
                         (/ (response-limit)
299
                            (repository-match-rate repository-handle)))))
300
             ;; if the part is an elementary verb or the results are sliced and a continuation is provided,
301
             ;; walk it incrementally
302
             (match-property-path-type repository-handle context subject property-path object continuation)
303
             ;; otherwise, implement it as as a join over the segments
304
             (if continuation
305
                 (loop for (matched-subject matched-object)
306
                   across (path-field-solutions-by-subject (match-property-path-type repository-handle context subject property-path object))
307
                   do (funcall continuation context matched-subject property-path matched-object))
308
                 ;; without a continuation, return the path field
309
                 (match-property-path-type repository-handle context subject property-path object))))))
310
 
311
 (defgeneric match-property-path (repository-handle context subject property-path object &optional continuation)
312
   (:documentation "The primary property path interface operator accepts the repository handle, the pattern, and an optional continuation.
313
    it distinguishes among context argument and control flow only and delegates the remaining logic to match-property-path-type,
314
    which will distinguish the implementation by path type.
315
 
316
    The context argument is distinguished according to whether it should be provided to the store, for it to
317
    perform any wild-card or enumerated matching in order to coerce the result into a single result context,
318
    or the argument should be used here, to enumerate the designated extension, in order to constrain
319
    the resulting paths to reside in a single context.")
320
   (:argument-precedence-order subject object property-path repository-handle context)
321
 
322
   (:method :before ((repository-handle t) context subject property-path object &optional continuation)
323
     (declare (ignore continuation))
324
     (trace-paths "~@{~s~^ ~}" :mpp
325
                  :context (format-term-number-object context) context 
326
                  :subject (format-term-number-object subject) subject
327
                  :predicate property-path
328
                  :object (format-term-number-object object) object
329
                  :target *match-target-graph*))
330
 
331
   (:method (repository-handle context subject property-path object &optional continuation)
332
     "if the context argument is wild, it is a variable in a graph clause, given which the paths should be
333
      generated independently for each context.
334
      If the context is either the <urn:dydra:all> or <urn:dydra:named> term, then the store should perform the
335
      match, and the results should be coerced into that graph.
336
      If the argument is a graph set, then the store should perform the
337
      match, and the results should be coerced into that graph."
338
     (flet ((result-continuation  (context subject predicate object)
339
              (trace-paths "mpp result            : ~a(~a) ~a(~a) ~a ~a(~a)~%"
340
                      (format-term-number-object context) context
341
                      (format-term-number-object subject) subject
342
                      predicate
343
                      (format-term-number-object object) object)
344
              (funcall continuation context subject predicate object)))
345
       (if (wildcard-term-p context)
346
           ;; for a wildcard/variable only, map the per-graph operation over the results of a match which
347
           ;; enumerates the concrete contexts. distinguished graphs such as <urn:dydra:all> or
348
           ;; <urn:dydra:named>, which correspond to #'repository-named-contexts-term-number, are
349
           ;; treated just as single concrete graphs or sets, by passing them to the store.
350
           (if continuation
351
               (match-property-path-type repository-handle context subject property-path object continuation)
352
               (error "A wildcard context for a property path requires a continuation."))
353
           (if continuation
354
               (if (or (eq *property-path-mode* :traverse)
355
                       (property-path-verb-p property-path)
356
                       (bounded-property-path-p property-path)
357
                       (and (response-limit)
358
                            (> (/ (repository-pattern-count repository-handle nil nil nil context)
359
                                  (repository-scan-rate repository-handle))
360
                               (/ (response-limit)
361
                                  (repository-match-rate repository-handle)))))
362
                   ;; if the part is an elementary verb or the results are sliced and a continuation is provided,
363
                   ;; walk it incrementally
364
                   (match-property-path-type repository-handle context subject property-path object #'result-continuation)
365
                   ;; otherwise, implement it as a a join over the segments
366
                   (loop for (matched-subject matched-object)
367
                     across (path-field-solutions-by-subject (match-property-path-type repository-handle context subject property-path object))
368
                     do (funcall #'result-continuation context matched-subject property-path matched-object)
369
                     count t
370
                     ))
371
               ;; without a continuation, return the path field
372
               (match-property-path-type repository-handle context subject property-path object))))))
373
         
374
 
375
 
376
 ;;;
377
 ;;; specialized methods for concrete property path classes
378
 ;;; (dolist (sym (apropos-list  "property-path")) (when (and (find-class sym nil) (subtypep sym 'property-path)) (print sym)))
379
 
380
 (defgeneric match-property-path-type (repository-handle context subject property-path object &optional continuation)
381
   (:documentation "The internal operator specializes the implementation by path class to walk sequences,
382
    invert paths, negate elementary paths, or constrain paths by bounds. It expects the path to be distinct
383
    and any cycles to be suppressed by the calling function and uses match-property-path to effect that for
384
    its continued path interpretation.")
385
   (:argument-precedence-order property-path subject object repository-handle context)
386
 
387
   (:method (repository-handle context subject (predicate integer) object &optional continuation)
388
     (repository-query-by-verb continuation repository-handle context subject predicate object))
389
   )
390
 
391
 
392
 (defmethod match-property-path-type (repository-handle context subject (predicate property-path-verb) object &optional continuation)
393
   "the method for path terminals (verbs) uses the literal predicate - an iri, to perform a query/count against
394
    the connected repository - depending on whether any end node is wild. if that operation succeeds, 
395
    continue with the results."
396
   
397
   (if continuation
398
     (repository-query-by-verb continuation repository-handle context subject predicate object)
399
     (if (or (wildcard-term-p object) (wildcard-term-p subject))
400
       (let ((solutions ()))
401
         (flet ((accumulate-solutions (context subject predicate object)
402
                  (declare (ignore context predicate))
403
                  (push (list subject object) solutions)))
404
           (declare (dynamic-extent #'accumulate-solutions))
405
           (repository-query-by-verb #'accumulate-solutions repository-handle context subject predicate object))
406
         (make-path-field :solutions (coerce (nreverse solutions) 'vector)))
407
       (when (plusp (repository-count-by-verb repository-handle context subject predicate object))
408
         (make-path-field :solutions (vector `(,subject ,object)))))))
409
 
410
 
411
 (defmethod repository-pattern-count ((repository-handle t) (subject t) (predicate property-path-verb) (object t) (context t))
412
   (repository-pattern-count repository-handle subject (property-path-verb-iri predicate) object context))
413
 
414
 
415
 (defmethod match-property-path-type (repository-handle context subject (path and-property-path) object &optional continuation)
416
   "A path conjunction yields a solution when _every_ match yields the same constituents
417
  If a continuation is provided, successively generate solutions for each path element and
418
  pass those through the successive"
419
 
420
   (if continuation
421
     (destructuring-bind (first-predicate . rest-predicates) (property-path-elements path)
422
       (flet ((match-first-continue (first-context first-subject first-predicate first-object)
423
                "For each node pair which matches the first predicate, each successive predicate must also be present."
424
                (declare (ignore first-predicate))
425
                (labels ((match-step (next-context predicates)
426
                           (cond (predicates
427
                                  (destructuring-bind (next-first-predicate . next-rest-predicates) predicates
428
                                    (flet ((match-step-continue (matched-context matched-subject matched-predicate matched-object)
429
                                             (declare (ignore matched-subject matched-predicate matched-object))
430
                                             ;; if a match succeeded, continue with the next predicate
431
                                             (match-step matched-context next-rest-predicates)))
432
                                      (match-property-path-type repository-handle next-context first-subject next-first-predicate first-object #'match-step-continue))))
433
                                 (t
434
                                  (call-path-continuation continuation next-context first-subject path first-object)))))
435
                  (match-step first-context rest-predicates))))
436
         (declare (dynamic-extent #'match-first-continue))
437
         (match-property-path-type repository-handle context subject first-predicate object #'match-first-continue)
438
         t))
439
     (let* ((path-fields (loop for path-element in (property-path-elements path)
440
                               for path-field = (match-property-path-type repository-handle context subject path-element object)
441
                               if (zerop (path-field-length path-field))
442
                               do (return nil)
443
                               else collect path-field )))
444
       (reduce #'intersect-path-fields path-fields))))
445
 
446
 
447
 (defmethod repository-pattern-count ((repository-handle t) (subject t) (predicate and-property-path) (object t) (context t))
448
   (let* ((count most-positive-fixnum))
449
     (dolist (element (nary-property-path-elements predicate))
450
       (setf count (min count (repository-pattern-count repository-handle nil element nil context))))
451
     count))
452
 
453
 
454
 (defmethod match-property-path-type (repository-handle context subject (path bounded-property-path) object &optional continuation)
455
   (let ((min (property-path-min path))
456
         (max (property-path-max path)))
457
     (match-bounded-path repository-handle context subject (property-path-element path) object
458
                         min max continuation)))
459
 
460
 
461
 (defmethod repository-pattern-count ((repository-handle t) (subject t) (predicate bounded-property-path) (object t) (context t))
462
   (let ((min (property-path-min predicate))
463
         (max (property-path-min predicate)))
464
     (max (if max max (repository-pattern-count repository-handle subject (unary-property-path-element predicate) object context))
465
          (if (eql min 0) 1 0))))
466
 
467
 
468
 (defmethod match-property-path-type (repository-handle context subject (path inverted-property-path) object &optional continuation)
469
   "An inverted path navigates in the reverse order.
470
    Skip the cycle suppression as that would mark both end nodes."
471
   (match-inverted-path repository-handle context subject (property-path-element path) object continuation))
472
 
473
 
474
 (defmethod repository-pattern-count ((repository-handle t) (subject t) (path inverted-property-path) (object t) (context t))
475
   "an inverted path always reverses the order"
476
   (repository-pattern-count repository-handle object (unary-property-path-element path) subject context))
477
 
478
 
479
 (defmethod match-property-path-type (repository-handle context subject (path negated-property-path) object &optional continuation)
480
   (match-negated-path repository-handle context subject (property-path-element path) object continuation))
481
 
482
 
483
 (defmethod repository-pattern-count ((repository-handle t) (subject t) (path negated-property-path) (object t) (context t))
484
   (repository-statement-count repository-handle)
485
   #+(or)  ; is not correct, as the negation returns all others rather than failing if true
486
   (if (zerop (repository-pattern-count repository-handle subject (unary-property-path-element path) object context))
487
     1 0))
488
 
489
 
490
 (defmethod match-property-path-type (repository-handle context subject (path or-property-path) object &optional continuation)
491
   "A path disjunction yields the sum of the solutions for _every_ matched constituent."
492
   (if continuation
493
     (loop for alternative in (property-path-elements path)
494
       count alternative
495
       do (match-property-path-type repository-handle context subject alternative object continuation))
496
     (let* ((path-fields (loop for path-element in (property-path-elements path)
497
                               for path-field = (match-property-path-type repository-handle context subject path-element object)
498
                               unless (zerop (path-field-length path-field))
499
                               collect path-field )))
500
       (reduce #'sum-path-fields path-fields))))
501
 
502
 
503
 (defmethod repository-pattern-count ((repository-handle t) (subject t) (predicate or-property-path) (object t) (context t))
504
   (let* ((count 0))
505
     (dolist (element (nary-property-path-elements predicate))
506
       (setf count (max count (repository-pattern-count repository-handle nil element nil context))))
507
     count))
508
 
509
 
510
 (defmethod match-property-path-type (repository-handle context subject (property-path sequence-property-path) object &optional continuation)
511
   "A path sequence yields solutions for extreme endpoints of matched constituent segments.
512
  In the special case of a binary path with an initial kleen star constituent, interpret that as an alternative
513
  of the second element alone, or a sequence of the first element and the second, where the inital minimum bound is 1."
514
   (let ((elements (sequence-property-path-elements property-path)))
515
       (if (= (length elements) 2)
516
           (destructuring-bind (first second) elements
517
             (if (and (bounded-property-path-p first) (eql 0 (bounded-property-path-min first)))
518
                 (match-property-path-type repository-handle context subject
519
                                           (make-or-property-path :elements (list second (make-sequence-property-path
520
                                                                                          :elements (list (make-bounded-property-path
521
                                                                                                           :element (bounded-property-path-element first)
522
                                                                                                           :min 1 :max (bounded-property-path-max first))
523
                                                                                                          second))))
524
                                           object continuation)
525
                 (match-sequence-path repository-handle context subject (property-path-elements property-path) object continuation)))
526
           (match-sequence-path repository-handle context subject (property-path-elements property-path) object continuation))))
527
 
528
 
529
 (defmethod repository-pattern-count ((repository-handle t) (subject t) (predicate sequence-property-path) (object t) (context t))
530
   ;;; this is just an estimate
531
   (loop for element in (nary-property-path-elements predicate)
532
     for element-pattern-count = (repository-pattern-count repository-handle nil element nil context)
533
     if (zerop element-pattern-count)
534
     return 0
535
     else minimize element-pattern-count ))
536
 
537
 
538
 (defmethod match-property-path-type (repository-handle context subject (path zero-length-property-path) object &optional continuation)
539
   (match-zero-length-path repository-handle context subject (property-path-element path) object continuation))
540
 
541
 (defmethod repository-pattern-count ((repository-handle t) (subject t) (path zero-length-property-path) (object t) (context t))
542
   "a zero-length path always yields results."
543
   (max (repository-pattern-count repository-handle subject (property-path-element path) object context) 1))
544
 
545
 
546
 
547
 ;;; implement each navigation form as a specific operator
548
 ;;; each respective path-type operator applies to the qualified path element with requisite additional arguments.
549
 ;;; - bounded path
550
 ;;; - inverted path
551
 ;;; - negated path
552
 ;;; - sequence path
553
 ;;; - zero length path
554
 
555
 (defparameter *property-path-bounded-mode* :materialize)
556
 ;; (defparameter *property-path-bounded-mode* :traverse)
557
 (defparameter *property-path-enumeration-maximum* 10000000)
558
 (defparameter *property-path-enumeration-minimum* 1)
559
 (defun match-bounded-path (repository-handle context subject predicate object min max &optional continuation)
560
   "Interpret a property path with optional bounds. Given the path predicate and the associated context/subject/object
561
  terms, traverse the graph from whichver subject/object is constant towards the one which is a wildcard and invoke the
562
  continuation for each intermediate and terminal node which is within the bounds.
563
 
564
  repository-handle : (or repository transaction)
565
  context : (or wild-term term)
566
  subject : (or wild-term term)
567
  predicate : term
568
  object : (or wild-term term)
569
  min : (or null (integer 0)) : the minimal predicate count; (or null (eql 0)) divides the process between
570
    match-zero-length-path and a reinvocation with minimum bound 1
571
  max : (or null (integer 1)) : the maximum predicate count; when null, the path is unlimited
572
  continuation : (function (term term term term) t) : the continuation is invoked with matched context, subject, and object
573
    terms and the original path predicate for each path within the bounds.
574
 
575
  Loops and duplicates are handled (see section 9.3 of sparql 1.1) for the case when the upper bound is not
576
  supplied. That is, for a, so called, 'arbitrary length path', a node cache serves to recognize traversed nodes
577
  and terminate the traversal. For paths with constant bounds, no de-duplication occurs."
578
 
579
   (trace-paths "~&mbp                   : ~a(~a) ~a(~a) ~a ~a(~a) (~a ~a) -> ~a (~a)~%"
580
           (format-term-number-object context) context
581
           (format-term-number-object subject) subject
582
           predicate
583
           (format-term-number-object object) object
584
           min max
585
           (format-term-number-object *match-target-graph*) *match-target-graph*)
586
   (if continuation
587
       (ecase  *property-path-bounded-mode*
588
         (:materialize
589
          (if (and (property-path-verb-p predicate)
590
                   (<= *property-path-enumeration-minimum*
591
                       (repository-count-by-verb repository-handle context subject predicate object)
592
                       *property-path-enumeration-maximum*))
593
              (stream-match-bounded-path-materialized repository-handle context subject predicate object min max continuation)
594
              (stream-match-bounded-path repository-handle context subject predicate object min max continuation)))
595
         (:traverse
596
          (stream-match-bounded-path repository-handle context subject predicate object min max continuation)))
597
     (collect-match-bounded-path repository-handle context subject predicate object min max)))
598
 
599
 
600
 (defun stream-match-bounded-path (repository-handle context subject predicate object min max continuation)
601
   (let ((path-cache (make-hash-table :test 'equal)))
602
     (flet ((bounded-path-continuation (context subject matched-predicate object)
603
              (declare (ignore context))
604
              ;;; just cache the endpoints for later delivery
605
              (trace-paths "~&pp.smbp.bpc: ~a(~a) ~a ~a(~a) in ~a~%"
606
                           (format-term-number-object subject) subject ; 
607
                           matched-predicate
608
                           (format-term-number-object object) object
609
                           path-cache)
610
              (incf (gethash (cons subject object) path-cache 0))))
611
       (declare (dynamic-extent #'bounded-path-continuation))
612
       (with-node-cache
613
           ;; the wild subject causes the problem, that the node cace is shared among the
614
           ;; individual start paths, with the effect the a paoth of the form rest?/first
615
           ;; collected the first segments only.
616
           (labels ((bound-path (min max)
617
                      (trace-paths "~&pp.smbp.bpmm: ~a ~a ~a ~a ~a~%" subject predicate object min max)
618
                      (when (integerp max) (setq *match-property-path-nodes* nil))  ;; no deduplication
619
                      (cond ((or (null min) (zerop min))
620
                             (trace-paths :pp.mbp.zerop context subject predicate object)
621
                             (match-zero-length-path repository-handle context subject predicate object #'bounded-path-continuation)
622
                             (bound-path 1 max))
623
                            ((and (eql min 1(eql max 1))
624
                             (match-property-path-type repository-handle context subject predicate object #'bounded-path-continuation))
625
                            (t
626
                           (flet ((test-last-bounded-segment (match-context match-subject match-predicate match-object)
627
                                    (trace-paths "~&pp.smbp tlbs: ~a(~a) ~a ~a(~a)~%"
628
                                                 (format-term-number-object match-subject) match-subject
629
                                                 match-predicate
630
                                                 (format-term-number-object match-object) match-object)
631
                                    (when (eql object match-object)
632
                                      (bounded-path-continuation match-context match-subject match-predicate match-object)))
633
                                  (first-match-from-subject (first-context first-subject first-predicate first-object)
634
                                    (trace-paths :pp.mbp.fmfs first-context first-subject first-predicate first-object)
635
                                    (labels ((match-subject-to-object-step (next-subject)
636
                                               (let ((*match-property-path-length* (1+ *match-property-path-length*)))
637
                                                 (trace-paths "pp.smbp.mstos: ~a(~a) ~a~%"
638
                                                              (format-term-number-object next-subject) next-subject
639
                                                              first-predicate)
640
                                                 (when (or (null min) (>= *match-property-path-length* min))
641
                                                   (trace-paths :pp.smbp.mstos.continue first-context first-subject predicate next-subject)
642
                                                   (bounded-path-continuation first-context first-subject predicate next-subject))
643
                                                 (if (and max (>= *match-property-path-length* max))
644
                                                     (trace-paths :pp.smbp.mstos.stopped@max max)
645
                                                     (flet ((match-object-step-continue (matched-context matched-subject predicate matched-object)
646
                                                              (trace-paths :pp.smpb.mosc matched-context matched-subject predicate matched-object)
647
                                                              (match-subject-to-object-step matched-object)))
648
                                                       (declare (dynamic-extent #'match-object-step-continue))
649
                                                       (with-marked-node (next-subject)
650
                                                         (trace-paths :pp.smbp.mstos.next next-subject)
651
                                                         ;; must use the original predicate in order to ensure the type is recognized
652
                                                         (match-property-path-type repository-handle first-context next-subject predicate *wildcard-identifier*
653
                                                                                   #'match-object-step-continue)))))))
654
                                      (match-subject-to-object-step first-object)))
655
                                  (first-match-from-object (first-context first-subject first-predicate first-object)
656
                                    (trace-paths :pp.mbp.fmfo first-context first-subject first-predicate first-object)
657
                                    (labels ((match-object-to-subject-step (next-object)
658
                                               (let ((*match-property-path-length* (1+ *match-property-path-length*)))
659
                                                 (trace-paths "pp.smbp.motss: ~a(~a) ~a~%"
660
                                                              (format-term-number-object next-object) next-object
661
                                                              first-predicate)
662
                                                 (when (or (null min) (>= *match-property-path-length* min))
663
                                                   (trace-paths :pp.smbp.motss.continue first-context next-object predicate first-object)
664
                                                   (bounded-path-continuation first-context next-object predicate first-object))
665
                                                 (if (and max (>= *match-property-path-length* max))
666
                                                     (trace-paths :pp.smbp.motss.stopped@max max)
667
                                                     (flet ((match-subject-step-continue (matched-context matched-subject predicate matched-object)
668
                                                              (trace-paths :pp.smpd.mssc matched-context matched-subject predicate matched-object)
669
                                                              (match-object-to-subject-step matched-subject)))
670
                                                       (declare (dynamic-extent #'match-subject-step-continue))
671
                                                       (with-marked-node (next-object)
672
                                                         (trace-paths :pp.smbp.motss.next subject next-object)
673
                                                         ;; must use the original predicate in order to ensure the type is recognized
674
                                                         (match-property-path-type repository-handle first-context *wildcard-identifier* predicate next-object
675
                                                                                   #'match-subject-step-continue)))))))
676
                                      (match-object-to-subject-step first-subject)))                                 )
677
                             (declare (dynamic-extent #'first-match-from-object #'first-match-from-subject #'test-last-bounded-segment))
678
                             (trace-paths :pp.mbp context subject predicate object)
679
                             (if (wildcard-term-p subject)
680
                                 (if (wildcard-term-p object)
681
                                     ;; if both are wild, match from subject
682
                                     (let ((per-subject-caches (make-hash-table :test 'eql)))
683
                                       (labels ((per-subject-cache (term-number)
684
                                                  (or (gethash term-number per-subject-caches)
685
                                                      (setf (gethash term-number per-subject-caches) (make-hash-table :test 'equalp))))
686
                                                (first-match-wild-subject (first-context first-subject first-predicate first-object)
687
                                                  ;; start a new traversal from the given subject
688
                                                  (setf *match-property-path-nodes* (per-subject-cache first-subject))
689
                                                  (first-match-from-subject first-context first-subject first-predicate first-object)))
690
                                         (declare (dynamic-extent #'first-match-wild-subject))
691
                                         (match-property-path-type repository-handle context subject predicate object #'first-match-wild-subject)))
692
                                     ;; if just the subject is wild
693
                                     (match-property-path-type repository-handle context subject predicate object #'first-match-from-object))
694
                                 (if (wildcard-term-p object)
695
                                     ;; if just the object is wild
696
                                     (match-property-path-type repository-handle context subject predicate object #'first-match-from-subject)
697
                                     ;; if both end terms are constant:
698
                                     ;; - mark the subject node to constrain outgoing links from the constant subject
699
                                     ;; - split the path and search for the intermediate predicates from the subject
700
                                     ;; as a combination of a single step and a bounded path for the remainder
701
                                     (stream-match-bounded-path repository-handle context subject predicate *wildcard-identifier* 
702
                                                                min max #'test-last-bounded-segment))))))))
703
             (bound-path min max)))
704
       (trace-paths "~&pp.smbp.complete: ~a: ~a(~a) ~a ~a(~a)~%" path-cache
705
                    (format-term-number-object subject) subject predicate
706
                    (format-term-number-object object) object)
707
       (if (and (integerp min(integerp max))
708
           (loop with path-count = 0
709
             for (subject . object) being each hash-key of path-cache using (hash-value count)
710
             do (progn (incf path-count count)
711
                  (trace-paths "~&pp.smbp.complete.cont : ~xa: ~a(~a) ~a(~a)~%"
712
                               count (format-term-number-object subject) subject
713
                               (format-term-number-object object) object)
714
                  (dotimes (i count) (call-path-continuation continuation context subject predicate object)))
715
             finally (return path-count))
716
           (loop for (subject . object) being each hash-key of path-cache
717
             count t
718
             do (progn (trace-paths "~&pp.smbp.complete.cont : ~a(~a) ~a(~a)~%"
719
                               (format-term-number-object subject) subject
720
                               (format-term-number-object object) object)
721
                  (call-path-continuation continuation context subject predicate object)))))))
722
 
723
 
724
 (defun stream-match-bounded-path-materialized (repository-handle context subject predicate object min max continuation)
725
   "given a bounded path with a single term as the predicate, enumerate the matched paths by  combining the
726
  results from two scans:
727
    - first a materialized enumeration of the matches for the predicate term keyed on the root term position
728
    - then, iteratively, walk the hash-join in the respective direction.
729
    - limit traversal from a given node (see continue-fron-*) to once only.
730
    - emit solutions to the continuation with the actual or the target from
731
 
732
   the invocation variations involve combinations of whether the graph is implicit or explicit,
733
   and enumerated or abstract. each variation accounts for one combination of values for
734
   the context argument and the *match-target-graph*
735
 
736
  | variation                    |   context        |  *match-target-graph*  |    example                             |
737
  |------------------------------|------------------|------------------------|----------------------------------------|
738
  | the implicit default graph   |    -1            |  -1                    | where {?s :p* ?o}                      |
739
  | enumerated merged graphs     |  (<g1> <g2>)     |  -1                    | from <g1> from <g2> where {?s :p* ?o}  |
740
  | all graphs merged            |     0            |  -1                    | from <urn:dydra:all> where {?s :p* ?o} |
741
  | named graphs merged          |    -2            |  -1                    | from <urn:dydra:named> where {?s :p* ?o} |
742
  | implicit named graphs        |    -2            |  ()                    | where { graph ?g {?s :p* ?o} }         |
743
  | enumerated named graphs      |  (<g1> <g2>)     |  ()                    | from named <g1> from named <g2> { graph ?g where {?s :p* ?o} } |
744
 
745
   which covers all meaningful combinations. 
746
   (-1, ()) and ( -1, -1 ) are equivalent
747
   (* , -2) is not valid as the <urn:dydra:all> is not a purposeful target
748
 
749
   the consequence for navigation is that, where the target is unspecified, the navigation is
750
   local to the respective graph, the graph combines with the subject, respective object to
751
   identify the navigation node, and the actual graph is passed in the result solution.
752
 "
753
   (let ((scan-cache (make-hash-table :test (if *match-target-graph* 'eql 'equal)))
754
         (*match-property-path-length* 0)
755
         (materialized-count 0)
756
         (navigation-count 0)
757
         (solution-count 0))
758
     (labels ((bounded-path-continuation (context subject object)
759
                ;;; just cache the endpoints for later delivery
760
                (incf solution-count)
761
               (trace-paths "~&pp.smbp.bpc: ~a(~a)  ~a(~a) ~a ~a(~a)~%"
762
                             (format-term-number-object context) context
763
                             (format-term-number-object subject) subject 
764
                             predicate
765
                             (format-term-number-object object) object)
766
                (funcall continuation (or *match-target-graph* context) subject predicate object))
767
              (bounded-path-continuation-aux (context subject predicate object)
768
                ;;(declare (ignore predicate))
769
                (trace-paths "~&pp.smbp.bpca: ~a(~a)  ~a(~a) ~a ~a(~a)~%"
770
                             (format-term-number-object context) context
771
                             (format-term-number-object subject) subject 
772
                             predicate
773
                             (format-term-number-object object) object)
774
                (bounded-path-continuation context subject object))
775
              ;; if the target graph is fixed, then the cache is across all graphs
776
              (cache-from-subject (matched-context subject matched-predicate object)
777
                (declare (ignore matched-predicate))
778
                (trace-paths "~&pp.smbp.cfs: ~a(~a)  ~a(~a) ~a(~a)~%"
779
                             (format-term-number-object matched-context) matched-context
780
                             (format-term-number-object subject) subject 
781
                             (format-term-number-object object) object)
782
                (incf materialized-count)
783
                (push object (gethash subject scan-cache)))
784
              (cache-from-object (matched-context subject matched-predicate object)
785
                (declare (ignore matched-predicate))
786
                (incf materialized-count)
787
                (trace-paths "~&pp.smbp.cfo: ~a(~a)  ~a(~a) ~a(~a)~%"
788
                             (format-term-number-object matched-context) matched-context
789
                             (format-term-number-object subject) subject 
790
                             (format-term-number-object object) object)
791
                (push subject (gethash object scan-cache)))
792
              ;; otherwise it is specific to the context of the given match
793
              (cache-from-graph-subject (matched-context subject matched-predicate object)
794
                (declare (ignore matched-predicate))
795
                (trace-paths "~&pp.smbp.cfgs: ~a(~a)  ~a(~a) ~a(~a)~%"
796
                             (format-term-number-object matched-context) matched-context
797
                             (format-term-number-object subject) subject 
798
                             (format-term-number-object object) object)
799
                (incf materialized-count)
800
                (push object (gethash (cons subject matched-context) scan-cache)))
801
              (cache-from-graph-object (matched-context subject matched-predicate object)
802
                (declare (ignore matched-predicate))
803
                (incf materialized-count)
804
                (trace-paths "~&pp.smbp.cfgo: ~a(~a)  ~a(~a) ~a(~a)~%"
805
                             (format-term-number-object matched-context) matched-context
806
                             (format-term-number-object subject) subject 
807
                             (format-term-number-object object) object)
808
                (push subject (gethash (cons object matched-context) scan-cache)))
809
              ;; the navigation is through the materialized solution field which is keyed by
810
              ;; node or node+graph pair depending on the target graph.
811
              (get-cache-node (context term)
812
                (if *match-target-graph*
813
                    (gethash term scan-cache)
814
                    (let ((key (cons term context)))
815
                      (declare (dynamic-extent key))
816
                      (gethash key scan-cache))))
817
              (continue-from-subject (context subject emit recurse) ;; continuation)
818
                (declare (dynamic-extent emit recurse))
819
                (let ((subject-to-objects (get-cache-node context subject))
820
                      (*match-property-path-length* (1+ *match-property-path-length*)))
821
                  (trace-paths "~&pp.smbp.contfs: @~a ~a(~a) ~a~%"
822
                               *match-property-path-length*
823
                               (format-term-number-object subject) subject
824
                               (loop for object in subject-to-objects
825
                                 collect (list (format-term-number-object object) object)))
826
                  ;; emit breadth-first
827
                  (loop for object in subject-to-objects
828
                    do (progn (incf navigation-count)
829
                         (funcall emit context subject object)))
830
                  (loop for object in subject-to-objects
831
                    do (progn (incf navigation-count)
832
                         (funcall recurse context object)))))
833
              (continue-from-object (context object emit recurse) ;; continuation)
834
                (declare (dynamic-extent emit recurse))
835
                (let ((object-to-subjects (get-cache-node context object))
836
                      (*match-property-path-length* (1+ *match-property-path-length*)))
837
                  (trace-paths "~&pp.smbp.contfo: @~a ~a(~a) ~a~%"
838
                               *match-property-path-length*
839
                               (format-term-number-object object) object
840
                               (loop for subject in object-to-subjects
841
                                 collect (list (format-term-number-object subject) subject)))
842
                  ;; emit breadth-first
843
                  (loop for subject in object-to-subjects
844
                    do (progn (incf navigation-count)
845
                         (funcall emit context subject object)))
846
                  (loop for subject in object-to-subjects
847
                    do (progn (incf navigation-count)
848
                         (funcall recurse context subject))))))
849
       (declare (dynamic-extent #'cache-from-subject #'cache-from-object
850
                                #'cache-from-graph-subject #'cache-from-graph-object)
851
                (dynamic-extent #'bounded-path-continuation-aux))
852
       (with-node-cache
853
           (labels ((bound-path (min max)
854
                      (when (integerp max) (setq *match-property-path-nodes* nil))
855
                      (cond ((or (null min) (zerop min))
856
                           (trace-paths :pp.mbp.zerop context subject predicate object)
857
                           (match-zero-length-path repository-handle context subject predicate object #'bounded-path-continuation-aux)
858
                           (bound-path 1 max))
859
                          ((and (eql min 1) (eql max 1))
860
                           (match-property-path-type repository-handle context subject predicate object #'bounded-path-continuation-aux))
861
                          ((wildcard-term-p object)
862
                           ;; traverse from subject
863
                           ;; first, materialize the paths across the predicate using wild subject and object
864
                           (match-property-path-type repository-handle context *wildcard-identifier* predicate *wildcard-identifier*
865
                                                     (if *match-target-graph* #'cache-from-subject #'cache-from-graph-subject))
866
                           (trace-paths *trace-output*"~&pp.smbpm.from-subject-cache-complete: ~a/~a: ~a(~a) ~a ~a(~a)~%"
867
                                    scan-cache materialized-count
868
                                        (format-term-number-object subject) subject
869
                                        predicate
870
                                        (format-term-number-object object) object)
871
                           (let ((first-subject subject))
872
                             (labels ((emit-subject-to-object-step (next-graph next-subject next-object)
873
                                        (trace-paths "~&pp.smbpm.estos: (>= ~a ~a) ~a ~a~%"
874
                                                     *match-property-path-length* min next-subject next-object)
875
                                        (when (or (null min) (>= *match-property-path-length* min))
876
                                          (trace-paths :pp.smbpm.estos.bpc next-subject predicate next-object)
877
                                          (bounded-path-continuation next-graph first-subject next-object)))
878
                                      (recurse-subject-to-object-step (next-graph next-object)
879
                                        (if (and max (>= *match-property-path-length* max))
880
                                            (trace-paths :pp.smbpm.rstos.stopped@max max)
881
                                            ;;(with-marked-node (next-object)
882
                                            (progn
883
                                              (trace-paths "~&pp.smbpm.rstos.mstos: ~a(~a)~%"
884
                                                           (format-term-number-object next-object) next-object)
885
                                              (match-subject-to-object-step next-graph next-object))))
886
                                      (match-subject-to-object-step (context subject)
887
                                        (trace-paths "~&pp.smbp.msto: ~a(~a)~a(~a)~%"
888
                                                (format-term-number-object context) context 
889
                                                (format-term-number-object subject) subject )
890
                                        (with-marked-node ((if *match-target-graph* subject (cons subject context)))
891
                                          (continue-from-subject context subject ;; #'continue-subject-to-object-step))))
892
                                                                 #'emit-subject-to-object-step
893
                                                                 #'recurse-subject-to-object-step))))
894
                               (declare (dynamic-extent #'emit-subject-to-object-step
895
                                                        #'recurse-subject-to-object-step))
896
                               (if (wildcard-term-p subject) ;; enumerate subjects
897
                                   ;; the wild subject causes the problem, that the node cache is shared among the
898
                                   ;; individual start paths, with the effect the a path of the form rest?/first
899
                                   ;; collected the first segments only.
900
                                   (loop for key being each hash-key of scan-cache
901
                                     for key-context = (if *match-target-graph* context (rest key))
902
                                     for key-subject = (if *match-target-graph* key (first key))
903
                                     do (progn (clear-node-cache)
904
                                          (trace-paths "~&pp.smbp.map-scan-cache: ~a(~a)~%"
905
                                                  (format-term-number-object key-subject) key-subject)
906
                                          (setf first-subject key-subject)
907
                                          (match-subject-to-object-step key-context key-subject)))
908
                                   (match-subject-to-object-step context subject))
909
                               (trace-paths "~&pp.smbpm.from-subject-navigation-complete: ~a/~a~%"
910
                                       scan-cache navigation-count))))
911
                           ((wildcard-term-p subject)
912
                            ;; traverse from object
913
                            (match-property-path-type repository-handle context *wildcard-identifier* predicate *wildcard-identifier*
914
                                                      (if *match-target-graph* #'cache-from-object #'cache-from-graph-object))
915
                            (trace-paths "~&pp.smbpm.from-object-cache-complete: ~a/~a: ~a(~a) ~a ~a(~a)~%"
916
                                         scan-cache materialized-count
917
                                         (format-term-number-object subject) subject
918
                                         predicate
919
                                         (format-term-number-object object) object)
920
                            (let ((first-object object))
921
                              (labels ((emit-object-to-subject-step (next-graph next-subject next-object)
922
                                         (trace-paths "~&pp.smbpm.eotss: (>= ~a ~a) ~a ~a~%"
923
                                                      *match-property-path-length* min next-subject next-object)
924
                                         (when (or (null min) (>= *match-property-path-length* min))
925
                                           (trace-paths :pp.smbpm.eotss.bpc next-subject predicate next-object)
926
                                           (bounded-path-continuation next-graph next-subject first-object)))
927
                                       (recurse-object-to-subject-step (next-graph next-subject)
928
                                         (if (and max (>= *match-property-path-length* max))
929
                                             (trace-paths :pp.smbpm.rotss.stopped@max max)
930
                                             ;;(with-marked-node (next-object)
931
                                             (progn
932
                                               (trace-paths "~&pp.smbpm.rotss.motss: ~a(~a)~%"
933
                                                            (format-term-number-object next-subject) next-subject)
934
                                               (match-object-to-subject-step next-graph next-subject))))
935
                                       (match-object-to-subject-step (context object)
936
                                         (trace-paths "~&pp.smbpm.motss: ~a(~a) ~a(~a)~%"
937
                                                      (format-term-number-object subject) subject
938
                                                      (format-term-number-object object) object)
939
                                         (with-marked-node ((if *match-target-graph* object (cons object context)))
940
                                           (continue-from-object context object
941
                                                                 #'emit-object-to-subject-step
942
                                                                 #'recurse-object-to-subject-step))))
943
                                (declare (dynamic-extent #'emit-object-to-subject-step
944
                                                         #'recurse-object-to-subject-step))
945
                                (match-object-to-subject-step context object)
946
                                (trace-paths "~&pp.smbpm.from-object-navigation-complete: ~a/~a~%"
947
                                       scan-cache navigation-count))))
948
                           (t
949
                            ;; constrain the endpoints
950
                            (flet ((test-last-bounded-segment (match-context match-subject match-predicate match-object)
951
                                     (trace-paths "~&pp.smbp tlbs: ~a(~a) ~a ~a(~a)~%"
952
                                                  (format-term-number-object match-subject) match-subject
953
                                                  match-predicate
954
                                                  (format-term-number-object match-object) match-object)
955
                                     (when (eql object match-object)
956
                                       (bounded-path-continuation match-context match-subject match-object))))
957
                              (stream-match-bounded-path-materialized repository-handle
958
                                                                      context subject predicate *wildcard-identifier* 
959
                                                                      min max #'test-last-bounded-segment))))))
960
             (bound-path min max)
961
             (trace-paths "~&pp.smbpm.complete: ~a / ~a / ~a~%"
962
                          materialized-count navigation-count solution-count)
963
             (values materialized-count navigation-count solution-count))))))
964
 
965
 
966
 (defun collect-match-bounded-path (repository-handle context subject predicate object min max)
967
   "compute the path field denoted by a given path by matching it once against the repository and then
968
  reducing the combinations by cross-joining under the bounds constraints.
969
  If the subject is wild, iterate across all nodes as subject and union the results."
970
 
971
   (cond ((or (null min) (zerop min))
972
          (trace-paths :pp.mbp.zerop context subject predicate object)
973
          (union-path-fields (match-zero-length-path repository-handle context subject predicate object)
974
                             (match-bounded-path repository-handle context subject predicate object 1 max)))
975
         ((and (eql min 1(eql max 1))
976
          (match-property-path-type repository-handle context subject predicate object))
977
         ((and (wildcard-term-p subject) nil)
978
          (unless max (setf max *solution-count-limit*))
979
          (when (> min max) (rotatef min max))
980
          (let* ((distinct-path-field (let* ((full-field (match-property-path-type repository-handle context subject predicate *wildcard-identifier*))
981
                                             ;; for a predicate term, the result was already distinct, otherwise de-duplicate it
982
                                             (distinct-solutions (if (property-path-verb-p predicate)
983
                                                                   (path-field-solutions full-field)
984
                                                                   (remove-duplicates (path-field-solutions full-field) :test #'equal))))
985
                                        (make-path-field :solutions distinct-solutions)))
986
                (subject-cache (make-hash-table :test 'eql))
987
                (results #()))
988
            (loop for (subject nil) across (path-field-solutions distinct-path-field)
989
                  unless (gethash subject subject-cache)
990
                  do (let* ((solution-filter (if (wildcard-term-p object)
991
                                              #'(lambda (solution) (= (first solution) subject))
992
                                              #'(lambda (solution) (and (= (first solution) subject) (= (second solution) object)))))
993
                           (base-path-field (make-path-field :solutions (remove-if-not solution-filter (path-field-solutions distinct-path-field))))
994
                           (cache (make-hash-table :test 'eql)))
995
                       (setf (gethash subject subject-cache) t)
996
                       (loop for (nil  object) across (path-field-solutions base-path-field)
997
                             do (setf (gethash object cache) t))
998
                       (loop for iteration from 1 to max
999
                             for next-path-field = base-path-field
1000
                             then (merge-join-path-fields next-path-field distinct-path-field cache)
1001
                             until (zerop (path-field-length next-path-field))
1002
                             when (>= iteration min)
1003
                             do (progn
1004
                                  (trace-paths :pp.mbp.next min max iteration (path-field-solutions next-path-field))
1005
                                  (setf results (concatenate 'vector results (path-field-solutions next-path-field)))))))
1006
            (make-path-field :solutions results)))
1007
         (t
1008
          (unless max (setf max *solution-count-limit*))
1009
          (when (> min max) (rotatef min max))
1010
          (let* ((distinct-path-field (let ((full-field (match-property-path-type repository-handle context *wildcard-identifier* predicate *wildcard-identifier*)))
1011
                                        (if (property-path-verb-p predicate)
1012
                                          full-field
1013
                                          (make-path-field :solutions (remove-duplicates (path-field-solutions full-field) :test #'equal)))))
1014
                 (solution-filter (if (wildcard-term-p object)
1015
                                    #'(lambda (solution) (= (first solution) subject))
1016
                                    #'(lambda (solution) (and (= (first solution) subject) (= (second solution) object)))))
1017
                 (base-path-field (if (wildcard-term-p subject)
1018
                                    distinct-path-field
1019
                                    (make-path-field :solutions  (remove-if-not solution-filter (path-field-solutions distinct-path-field)))))
1020
                 (cache (make-hash-table :test 'eql))
1021
                 (results #()))
1022
            (declare (dynamic-extent solution-filter))
1023
            (loop for (nil  object) across (path-field-solutions base-path-field)
1024
                  do (setf (gethash object cache) t))
1025
            (loop for iteration from 1 to max
1026
                  for next-path-field = base-path-field
1027
                  then (merge-join-path-fields next-path-field distinct-path-field cache)
1028
                  until (zerop (path-field-length next-path-field))
1029
                  when (>= iteration min)
1030
                  do (progn
1031
                       (trace-paths :pp.mbp.next min max iteration (path-field-solutions next-path-field))
1032
                       (setf results (concatenate 'vector results (path-field-solutions next-path-field)))))
1033
            (make-path-field :solutions results)))))
1034
 
1035
 
1036
 (defun match-inverted-path (repository-handle context subject path-to-invert object &optional continuation)
1037
   "Match the path specified by exchanging the subject and object.
1038
    Indicate the direction reversal for cycle recognition."
1039
 
1040
   (if continuation
1041
       (flet ((continue-match-inverted (matched-context matched-subject predicate matched-object)
1042
                (call-path-continuation continuation matched-context matched-object predicate matched-subject)))
1043
         (declare (dynamic-extent #'continue-match-inverted))
1044
         (match-property-path-type repository-handle context object path-to-invert subject #'continue-match-inverted))
1045
       (let ((path-field (match-property-path-type repository-handle context object path-to-invert subject)))
1046
         (make-path-field :solutions (map 'vector #'reverse (path-field-solutions path-field))))))
1047
 
1048
 
1049
 (defgeneric match-negated-path (repository-handle context subject negated-path object &optional continuation)
1050
   (:documentation "Apply the continuation in the event that no NEGATED-PATH does yield any match.
1051
     - the path argument can be either a verb, an inverted verb, or a dusjunction of those
1052
     - the continued values are those matches for which some property other than the negated set
1053
       is present; inversion means subject-object roles are swapped; disjunction means
1054
       the conjunction of the filtered matches. that is !(iri_1 | iri_2) == !iri_1 & !iri_2.
1055
       thus the control structure is the same as for a conjunction and each element is all other than the matching
1056
       with inversion flipping the respective roles.")
1057
 
1058
   (:method (repository-handle context subject (path or-property-path) object &optional continuation)
1059
     ;; succeed for those predicates which every disjunction member succeeds
1060
     ;; !!! needs to be rewritten in a form which is linear in solution count rather than this,
1061
     ;;; which is exponential in the disjunction term count
1062
     (if continuation
1063
       (destructuring-bind (first-predicate . rest-predicates) (property-path-elements path)
1064
         (flet ((match-first-continue (first-context first-subject first-predicate first-object)
1065
                  (declare (ignore first-predicate))
1066
                  (labels ((match-step (predicates)
1067
                             (cond (predicates
1068
                                    (destructuring-bind (next-first-predicate . next-rest-predicates) predicates
1069
                                      (flet ((match-step-continue (matched-context matched-subject predicate matched-object)
1070
                                               (declare (ignore matched-context predicate))
1071
                                               (when (and (spocq.e:same-term first-subject matched-subject)
1072
                                                          (spocq.e:same-term first-object matched-object))
1073
                                                 (match-step next-rest-predicates))))
1074
                                        (match-negated-path repository-handle first-context subject next-first-predicate object
1075
                                                            #'match-step-continue))))
1076
                                   (t
1077
                                    (call-path-continuation continuation first-context first-subject path first-object)))))
1078
                    (match-step rest-predicates))))
1079
           (declare (dynamic-extent #'match-first-continue))
1080
           (match-negated-path repository-handle context subject first-predicate object #'match-first-continue)))
1081
       (let* ((path-fields (loop for path-element in (property-path-elements path)
1082
                                 for path-field = (match-negated-path repository-handle context subject path-element object)
1083
                                 if (zerop (path-field-length path-field))
1084
                                 do (return nil)
1085
                                 else collect path-field )))
1086
         (reduce #'intersect-path-fields path-fields))))
1087
   
1088
   (:method (repository-handle context subject (path inverted-property-path) object &optional continuation)
1089
     (match-negated-path repository-handle context object (property-path-element path) subject continuation))
1090
 
1091
   (:method (repository-handle context subject (negated-path-verb property-path-verb) object &optional continuation)
1092
     "match the statements for which the predicate is not that given by iterating over all statements
1093
      respective the wild/constant nature of the given subject and object and filtering out
1094
      the given predicate."
1095
 
1096
     (let ((negated-predicate (repository-intern-property-path repository-handle negated-path-verb)))
1097
       (if continuation
1098
         (flet ((continue-if-not-matched (context matched-subject matched-predicate matched-object)
1099
                  (unless (= matched-predicate negated-predicate)
1100
                    (call-path-continuation continuation context matched-subject matched-predicate matched-object))))
1101
           (declare (dynamic-extent #'continue-if-not-matched))
1102
           (repository-query-by-verb #'continue-if-not-matched repository-handle context subject *wildcard-identifier* object))
1103
         (let ((solutions ()))
1104
           (flet ((collect-if-not-matched (context matched-subject matched-predicate matched-object)
1105
                    (declare (ignore context))
1106
                    (unless (= matched-predicate negated-predicate)
1107
                      (push (list matched-subject matched-object) solutions))))
1108
             (declare (dynamic-extent #'collect-if-not-matched))
1109
             (repository-query-by-verb #'collect-if-not-matched repository-handle context subject *wildcard-identifier* object)
1110
             (make-path-field :solutions (coerce (nreverse solutions) 'vector))))))))
1111
   
1112
 
1113
 
1114
 (defun expand-path-sequence (subject path-sequence object)
1115
   (loop for paths on path-sequence
1116
         for path = (first paths)
1117
         collect (list (if (eq paths path-sequence) subject *wildcard-identifier*)
1118
                       path
1119
                       (if (rest paths) *wildcard-identifier* object))))
1120
 
1121
 ;;; (expand-path-sequence 2 '(<http://test1> <http://test2>) 3)
1122
 ;;; (expand-path-sequence 2 (make-sequence-property-path :elements '(<http://test1> <http://test2> <http://test3>)) 3)
1123
 
1124
 (defun match-sequence-path (repository-handle context subject path-sequence object &optional continuation)
1125
   "Match a path sequence by joining the results of matching the endpoints of the path's constituent elements."
1126
   (labels ((match-from-initial-path-element (initial-subject path-elements)
1127
              (let ((cache (make-hash-table :test #'eql)))
1128
                (flet ((collect-first (matched-context matched-subject matched-predicate matched-object)
1129
                         (declare (ignorable matched-context matched-predicate))
1130
                         ;; if initial is wild, it is not eql to matched
1131
                         ;; (print (list :tinitial=? matched-subject initial-subject))
1132
                         (push matched-subject (gethash matched-object cache))))
1133
                  (declare (dynamic-extent #'collect-first))
1134
                  (destructuring-bind (next-element . rest-elements) path-elements
1135
                    (match-property-path-type repository-handle context initial-subject next-element *wildcard-identifier*
1136
                                              #'collect-first)
1137
                    (match-next-path-element cache rest-elements)))))
1138
            (match-next-path-element (previous-cache path-elements)
1139
              (destructuring-bind (this-element . rest-elements) path-elements
1140
                ;; join matched subject with cached objects to build a new cache and continue
1141
                (let ((this-cache (make-hash-table :test #'eql)))
1142
                  (flet ((collect-next (matched-context matched-subject matched-predicate matched-object)
1143
                           (declare (ignorable matched-context matched-predicate))
1144
                           (setf (gethash matched-object this-cache)
1145
                                 (append (gethash matched-subject previous-cache) (gethash matched-object this-cache)))))
1146
                    (declare (dynamic-extent #'collect-next))
1147
                    (match-property-path-type repository-handle context *wildcard-identifier* this-element *wildcard-identifier*
1148
                                              #'collect-next)
1149
                    (if rest-elements
1150
                        (match-next-path-element this-cache rest-elements)
1151
                        (loop for terminal-object being each hash-key of this-cache
1152
                          using (hash-value initials)
1153
                          ;; do (print (list :initials terminal-object initials))
1154
                          do (loop for initial-subject in (remove-duplicates initials)
1155
                               do (call-path-continuation continuation context initial-subject path-sequence terminal-object))))))))
1156
 
1157
            (match-from-terminal-path-element (terminal-object path-elements)
1158
              ;; join from constant object back to the pattern subject
1159
              (let ((cache (make-hash-table :test #'eql)))
1160
                (flet ((collect-terminal (matched-context matched-subject matched-predicate matched-object)
1161
                         (declare (ignorable matched-context matched-predicate matched-object))
1162
                         ;; (print (list :terminal=? matched-object terminal-object))
1163
                         (push terminal-object (gethash matched-subject cache))
1164
                         #+(or)
1165
                         (print (list :collect-terminal matched-context matched-subject matched-predicate matched-object
1166
                                      (loop for key being each hash-key of cache collect key)))))
1167
                  (declare (dynamic-extent #'collect-terminal))
1168
                  (destructuring-bind (terminal-element . previous-elements) path-elements
1169
                    (match-property-path-type repository-handle context *wildcard-identifier* terminal-element terminal-object
1170
                                              #'collect-terminal)
1171
                    (match-previous-path-element cache previous-elements)))))
1172
            (match-previous-path-element (successor-cache path-elements)
1173
              (destructuring-bind (this-element . previous-elements) path-elements
1174
                ;; join matched subject with cached objects to build a new cache and continue
1175
                (let ((this-cache (make-hash-table :test #'eql)))
1176
                  (flet ((collect-previous (matched-context matched-subject matched-predicate matched-object)
1177
                           (declare (ignorable matched-context matched-predicate))
1178
                           (setf (gethash matched-subject this-cache)
1179
                                 (append (gethash matched-object successor-cache) (gethash matched-subject this-cache)))
1180
                           #+(or)
1181
                           (print (list :collect-previous matched-context matched-subject matched-predicate matched-object
1182
                                        (loop for key being each hash-key of this-cache collect key)))))
1183
                    (declare (dynamic-extent #'collect-previous))
1184
                    (match-property-path-type repository-handle context *wildcard-identifier* this-element *wildcard-identifier*
1185
                                              #'collect-previous)
1186
                    ;; (print (list :collected-previous (loop for key being each hash-key of this-cache collect key)))
1187
                    (if previous-elements
1188
                        (match-previous-path-element this-cache previous-elements)
1189
                        (loop for initial-subject being each hash-key of this-cache
1190
                          using (hash-value terminals)
1191
                          ;; do (print (list :terminals initial-subject terminals))
1192
                          do (loop for terminal-object in (remove-duplicates terminals)
1193
                               do (call-path-continuation continuation context initial-subject path-sequence terminal-object)
1194
                    ))))))))
1195
     ;; metch from path subject or object, depending on which may be wild/constant
1196
     (if (and (wildcard-term-p subject(not (wildcard-term-p object)))
1197
         ;; match from the object end
1198
         (match-from-terminal-path-element object (reverse path-sequence))
1199
         (match-from-initial-path-element subject path-sequence))))
1200
 
1201
                                     
1202
 
1203
 
1204
 #+(or) ;; superseded by hash-based interpretation
1205
 (defun match-sequence-path (repository-handle context subject path-sequence object &optional continuation)
1206
   "Match the endpoints of the path's constituent elements."
1207
   (if continuation
1208
     (if (wildcard-term-p object)
1209
         (if (wildcard-term-p subject)
1210
             (match-sequence-path-from-wild-subject repository-handle context path-sequence continuation)
1211
             (match-sequence-path-from-constant-subject repository-handle context subject path-sequence continuation))
1212
         (if (wildcard-term-p subject)
1213
             (match-sequence-path-from-constant-object repository-handle context object path-sequence continuation)
1214
             ;; if both are constant, match from subject and constrain the result
1215
             (flet ((test-last-segment (match-context match-subject match-predicate match-object)
1216
                       (trace-paths "~&pp.msp.tls: ~a(~a) ~a ~a(~a)~%"
1217
                                    (format-term-number-object match-subject) match-subject
1218
                                    match-predicate
1219
                                    (format-term-number-object match-object) match-object)
1220
                       (when (eql object match-object)
1221
                         (call-path-continuation continuation match-context match-subject path-sequence match-object))))
1222
               (declare (dynamic-extent #'test-last-segment))
1223
               (match-sequence-path-from-constant-subject repository-handle context subject path-sequence #'test-last-segment))))
1224
     (if (rest path-sequence)
1225
       (let* ((patterns (expand-path-sequence subject path-sequence object))
1226
              (fields (loop for (subject path object) in patterns
1227
                            collect (match-property-path-type repository-handle context subject path object))))
1228
         (reduce #'merge-join-path-fields fields))
1229
       (match-property-path-type repository-handle context subject (first path-sequence) object))))
1230
 
1231
 
1232
 (defun match-sequence-path-from-constant-subject (repository-handle context subject path-sequence continuation)
1233
   "propagate the constraint from the subject to navigate the path sequence to a wild object"
1234
   (trace-paths :pp.mspfcs context subject path-sequence)
1235
   (labels ((match-subject-to-object-step (next-context next-subject predicates)
1236
              (trace-paths :pp.mspfcs.mstos next-subject predicates)
1237
              (if predicates
1238
                  (destructuring-bind (next-predicate . rest-predicates) predicates
1239
                    (flet ((match-subject-to-object-step-continue (matched-context matched-subject predicate matched-object)
1240
                             (declare (ignore matched-subject predicate))
1241
                             (match-subject-to-object-step matched-context matched-object rest-predicates)))
1242
                      (declare (dynamic-extent #'match-subject-to-object-step-continue))
1243
                      (match-property-path-type repository-handle next-context next-subject next-predicate *wildcard-identifier*
1244
                                                #'match-subject-to-object-step-continue)))
1245
                  (call-path-continuation continuation context subject path-sequence next-subject))))
1246
     (match-subject-to-object-step context subject path-sequence)
1247
     t))
1248
 
1249
 (defun match-sequence-path-from-constant-object (repository-handle context object path-sequence continuation)
1250
   "propagate the constraint from the object to navigate a path to a wild subject"
1251
   (trace-paths :pp.mspfco context object path-sequence)
1252
   (labels ((match-object-to-subject-step (next-context next-object predicates)
1253
              (trace-paths :pp.mspfco.mstos next-object predicates)
1254
              (if predicates
1255
                  (destructuring-bind (next-predicate . rest-predicates) predicates
1256
                    (flet ((match-object-to-subject-step-continue (matched-context matched-subject predicate matched-object)
1257
                             (declare (ignore matched-object predicate))
1258
                             (match-object-to-subject-step matched-context matched-subject rest-predicates)))
1259
                      (declare (dynamic-extent #'match-object-to-subject-step-continue))
1260
                      (match-property-path-type repository-handle next-context *wildcard-identifier* next-predicate next-object
1261
                                                #'match-object-to-subject-step-continue)))
1262
                  (call-path-continuation continuation context next-object path-sequence object))))
1263
     (match-object-to-subject-step context object (reverse path-sequence))
1264
     t))
1265
 
1266
 #+(or)
1267
 (defun match-sequence-path-from-wild-subject (repository-handle context path-sequence continuation)
1268
   "bind the subject and propagate the constraint from there to a wild object"
1269
   (trace-paths :pp.mspfws context path-sequence)
1270
   (destructuring-bind (first-predicate . rest-predicates) path-sequence
1271
     (flet ((match-subject-to-object-step (first-context first-subject first-predicate next-subject)
1272
              (declare (ignore first-predicate))
1273
              (trace-paths :pp.mspfws.mstos next-subject rest-predicates next-subject)
1274
              (if rest-predicates
1275
                  (flet ((continue-rest-predicates (match-context match-subject match-predicate match-object)
1276
                           (declare (ignore match-context))
1277
                           (trace-paths "~&:pp.mspfws.mstos.crp: ~a(~a) ~a ~a(~a)~%"
1278
                                        (format-term-number-object match-subject) match-subject
1279
                                        match-predicate
1280
                                        (format-term-number-object match-object) match-object)
1281
                           (call-path-continuation continuation context first-subject path-sequence match-object)))
1282
                    (match-sequence-path-from-constant-subject repository-handle context next-subject rest-predicates
1283
                                                               #'continue-rest-predicates))
1284
                  (call-path-continuation continuation first-context first-subject path-sequence next-subject))))
1285
       (declare (dynamic-extent #'match-subject-to-object-step))
1286
       (match-property-path-type repository-handle context *wildcard-identifier* first-predicate *wildcard-identifier* #'match-subject-to-object-step))))
1287
 
1288
 (defun match-sequence-path-from-wild-subject (repository-handle context path-sequence continuation)
1289
   "bind the subject and propagate the constraint from there to a wild object"
1290
   (trace-paths :pp.mspfws context path-sequence)
1291
   (destructuring-bind (first-predicate . rest-predicates) path-sequence
1292
     (flet ((match-subject-to-object-step (first-context first-subject matched-predicate next-subject)
1293
              (declare (ignore matched-predicate))
1294
              (trace-paths :pp.mspfws.mstos first-subject rest-predicates next-subject)
1295
              (if rest-predicates
1296
                  (flet ((continue-with-first-subject (match-context match-subject match-predicate last-object)
1297
                           (declare (ignore match-context match-subject match-predicate))
1298
                           (call-path-continuation continuation first-context first-subject path-sequence last-object)))
1299
                    (declare (dynamic-extent #'continue-with-first-subject))
1300
                    (match-sequence-path-from-constant-subject repository-handle first-context next-subject rest-predicates #'continue-with-first-subject))
1301
                  (call-path-continuation continuation first-context first-subject path-sequence next-subject))))
1302
       (declare (dynamic-extent #'match-subject-to-object-step))
1303
       (match-property-path-type repository-handle context *wildcard-identifier* first-predicate *wildcard-identifier* #'match-subject-to-object-step))))
1304
 
1305
 
1306
 (defun call-with-path-field-collector (op &rest args)
1307
   (let ((solutions ()))
1308
     (flet ((collect (context subject predicate object)
1309
              (declare (ignore context predicate))
1310
              (push (list subject object) solutions)))
1311
       (apply op (append args (list #'collect)))
1312
       (make-path-field :solutions (coerce (nreverse solutions) 'vector)))))
1313
 
1314
 (defun path-field-solutions-by-object (path-field)
1315
   (or (path-field-by-object-cache path-field)
1316
       (setf (path-field-by-object-cache path-field)
1317
             (sort (copy-seq (path-field-solutions path-field)) #'< :key #'second))))
1318
 
1319
 (defun path-field-solutions-by-subject (path-field)
1320
   (or (path-field-by-subject-cache path-field)
1321
       (setf (path-field-by-subject-cache path-field)
1322
             (sort (copy-seq (path-field-solutions path-field)) #'< :key #'first))))
1323
 
1324
 
1325
 (defun path-field-solution-list (path-field)
1326
   (map 'list #'identity (path-field-solutions path-field)))
1327
 
1328
 (defun path-field-solution-objects (path-field)
1329
   (map 'list #'(lambda (s) (mapcar #'term-number-object s)) (path-field-solutions path-field)))
1330
 
1331
 #|
1332
 #+(or) the slot accessor already exists
1333
 (defun path-field-length (path-field)
1334
   (length (path-field-solutions path-field)))
1335
 |#
1336
 
1337
 ;;; operators intended to be used with reduce. as such either argument may be missing.
1338
 ;;; if just the first is supplied, then the result is indentity.
1339
 ;;; if neither is supplied, then the null field.
1340
 (defun intersect-path-fields (&optional field1 field2)
1341
   (if field1
1342
     (if field2
1343
       (make-path-field :solutions (coerce (intersection (path-field-solution-list field1)
1344
                                                         (path-field-solution-list field2)
1345
                                                         :test #'equal)
1346
                                            'vector))
1347
       field1)
1348
     (make-path-field :solutions #())))
1349
 
1350
 (defun union-path-fields (&optional field1 field2)
1351
   (if field1
1352
     (if field2
1353
       (make-path-field :solutions (coerce (let ((s1 (path-field-solution-list field1))
1354
                                                 (s2 (path-field-solution-list field2)))
1355
                                             (append s1 (set-difference s2 s1 :test #'equal)))
1356
                                           'vector))
1357
       field1)
1358
     (make-path-field :solutions #())))
1359
 
1360
 (defun sum-path-fields (&optional field1 field2)
1361
   (if field1
1362
     (if field2
1363
       (make-path-field :solutions (coerce (let ((s1 (path-field-solution-list field1))
1364
                                                         (s2 (path-field-solution-list field2)))
1365
                                                     (append s1 s2))
1366
                                           'vector))
1367
       field1)
1368
     (make-path-field :solutions #())))
1369
 
1370
 (defun difference-path-fields (&optional field1 field2)
1371
   (if field1
1372
     (if field2
1373
       (make-path-field :solutions (coerce (let ((s1 (path-field-solution-list field1))
1374
                                                 (s2 (path-field-solution-list field2)))
1375
                                             (set-difference s1 s2 :test #'equal))
1376
                                           'vector))
1377
       field1)
1378
     (make-path-field :solutions #())))
1379
 
1380
 
1381
 (defun merge-join-path-fields (object-field subject-field &optional cache)
1382
   "Join two solution fields, each of which had been generated from a step in a path match.
1383
  Each field has just two dimensions 's' and 'o'. The 'object-field' isses from the
1384
  preceeding path element in a sequence and the 'subject-field' issues from the successor.
1385
  The join of (of.s of.o) x (sf.s sf.o) takes the form of cross joins over the matching (of.o == sf.s)
1386
  sub-fields. The result is the (of.s , sf.o) projection.
1387
  If a cache is provided, add only those solutions for which the sf.o is not present and 
1388
  update the cache respectively for each new solution."
1389
 
1390
   (let* ((object-solutions (path-field-solutions-by-object object-field))
1391
          (subject-solutions (path-field-solutions-by-subject subject-field))
1392
          (o-end (length object-solutions))
1393
          (s-end (length subject-solutions))
1394
          (o-index 0)
1395
          (s-index 0)
1396
          (count 0)
1397
          (increment (min o-end s-end))
1398
          (result (make-array increment))
1399
          (result-index 0)
1400
          (result-end increment)
1401
          (o-solution nil)
1402
          (object nil)
1403
          (s-solution nil)
1404
          (subject nil))
1405
     (unless (or (zerop o-end) (zerop s-end))
1406
       (block :cross-join
1407
         (flet ((put-solution (subject object)
1408
                  (when (or (null cache)
1409
                            (when (null (gethash object cache)) (setf (gethash object cache) t)))
1410
                    (when (>= result-index result-end)
1411
                      (setf result-end (+ result-end increment))
1412
                      (setf result (adjust-array result result-end)))
1413
                    (setf (aref result result-index) (list subject object))
1414
                    (incf result-index)
1415
                    (when (and *solution-count-limit* (> result-index *solution-count-limit*))
1416
                      (log-warn "merge-join-path-fields: terminated @~a solutions."
1417
                                result-index)
1418
                      (terminate-task *query*))))
1419
                (update-object ()
1420
                  (when (>= o-index o-end)
1421
                    (return-from :cross-join))
1422
                  (setf o-solution (aref object-solutions o-index)
1423
                        object (second o-solution)))
1424
                (update-subject ()
1425
                  (when (>= s-index s-end)
1426
                    (return-from :cross-join))
1427
                  (setf s-solution (aref subject-solutions s-index)
1428
                        subject (first s-solution))))
1429
           (update-object)
1430
           (update-subject)
1431
           (loop
1432
             (cond ((> object subject)        ; subject sides needs to catch up
1433
                    (incf s-index)
1434
                    (update-subject))
1435
                   ((> subject object)        ; object side needs to catch up
1436
                    (incf o-index)
1437
                    (update-object))
1438
                   (t
1439
                  ;; keys are equal, nest iterations across the repective constant segments
1440
                  ;; and cross-join the solutions.
1441
                  (incf count)
1442
                  (loop with next-s-index = s-index
1443
                        for o-cross-index from o-index below o-end
1444
                        for o-cross-solution = (aref object-solutions o-cross-index)
1445
                        for o-cross-value = (second o-cross-solution)
1446
                        while (= object o-cross-value)
1447
                        do (loop for s-cross-index from s-index below s-end
1448
                                 for s-cross-solution = (aref subject-solutions s-cross-index)
1449
                                 for s-cross-value = (first s-cross-solution)
1450
                                 while (= subject s-cross-value)
1451
                                 do (put-solution (first o-cross-solution) (second s-cross-solution))
1452
                                 finally (setf next-s-index s-cross-index))
1453
                        finally (setf o-index o-cross-index
1454
                                      s-index next-s-index))
1455
                  (update-subject)
1456
                  (update-object)))))))
1457
     (when (< result-index (length result))
1458
       (setf result (adjust-array result result-index)))
1459
     (make-path-field :solutions result)))
1460
 
1461
 
1462
 (defgeneric map-repository-subject-and-objects (continuation repository-handle context &key distinct)
1463
   )
1464
 
1465
 (defun match-zero-length-path (repository-handle context subject predicate object &optional continuation)
1466
   "Yield all self-referential statements over the given predicate, respective the initial
1467
  subject/object boundary conditions. each may be either a variable or a term. In the latter case,
1468
  it must be either a term pointer or a term number."
1469
   (trace-paths "~&mzlp                  : ~a(~a) ~a ~a(~a)~%"
1470
           (format-term-number-object subject) subject
1471
           predicate
1472
           (format-term-number-object object) object)
1473
   (if continuation
1474
       (let ((count 0)
1475
             (zero-node-cache (make-hash-table :test 'eql)))
1476
         (labels ((reflexive-continuation (c term-id predicate-id)
1477
                    (trace-paths "~&mzlp.rc?              : ~a(~a) ~a~%"
1478
                                 (format-term-number-object term-id) term-id
1479
                                 predicate-id)
1480
                    (incf count)
1481
                    (call-path-continuation continuation c term-id predicate-id term-id))
1482
                  (map-continuation-caching (context term-id)
1483
                    (let ((key (if *match-target-graph* term-id (cons context term-id))))
1484
                      (unless (gethash key zero-node-cache)
1485
                        (setf (gethash key zero-node-cache) t)
1486
                        (reflexive-continuation (or *match-target-graph* context) term-id predicate)))
1487
                    t))
1488
           ;; the active graph is always a constant
1489
           ;; examine just that context for the respective subject/object combinations
1490
           (declare (dynamic-extent #'map-continuation-caching))
1491
           (if (wildcard-term-p subject)
1492
               (if (wildcard-term-p object)
1493
                   ;; given two variables, match all identical terms _once_; 
1494
                   ;; this will not include the respective graphs neither to distinguish node instances
1495
                   ;; nor to provide contect for further navigation. -- in all cases the original graph
1496
                   ;; is used - even when it is yurn:dydra:all>
1497
                   (progn
1498
                     (map-repository-subject-and-objects #'map-continuation-caching repository-handle context :distinct t)
1499
                     #+(or)
1500
                     (do-repository-subjects (subject :repository-handle repository-handle :context context :distinct t)
1501
                                             (reflexive-continuation context subject predicate))
1502
                     #+(or)
1503
                     (do-repository-objects (object :repository-handle repository-handle :context context :distinct t)
1504
                                            (reflexive-continuation context object predicate)))
1505
                   ;; for subject-variable / object-term, yield a self-reference from the object
1506
                   (reflexive-continuation context object predicate))
1507
               (if (wildcard-term-p object)
1508
                   ;; for subject-term / object-variable, yield a self-reference from the subject
1509
                   (reflexive-continuation context subject predicate)
1510
                   ;; test just the zero length component -- the subject and term must be identical
1511
                   (when (eql subject object)
1512
                     (reflexive-continuation context subject predicate)))))
1513
         count)
1514
     (let ((solutions ()))
1515
       (flet ((collect (context subject predicate object)
1516
                (declare (ignore context predicate))
1517
                (push (list subject object) solutions)))
1518
         (declare (dynamic-extent #'collect))
1519
         (match-zero-length-path repository-handle context subject predicate object #'collect)
1520
         (make-path-field :solutions (coerce (nreverse solutions) 'vector))))))
1521
     
1522
 
1523
 ;;;
1524
 ;;; ultimate query/count operators by predicate
1525
 
1526
 (defparameter *cursor-level* 0)
1527
 
1528
 (defgeneric transaction-object-term-number (transaction term)
1529
   (:method ((transaction shard-transaction) (term t))
1530
     (rlmdb:value-term-number term)))
1531
 
1532
 
1533
 (defgeneric repository-query-by-verb (continuation repository-handle context subject predicate object)
1534
   (:documentation "Given a repository handle, apply an operator
1535
     to the successive context, subject, predicate and object literal combinations which match the given term
1536
     pattern
1537
     OP : (function  (integer integer integer integer) t) : to be applied in succession to the
1538
       matched term combinations.
1539
     REPOSITORY-HANDLE ;a repository-cache instance or a transaction as a handle on a remote store
1540
     PREDICATE : (or integer cffi:foreign-pointer iri property-path-verb) : designates the predicate term
1541
     CONTEXT, SUBJECT, OBJECT : (or <wild> term-identifier) : either a wild card or
1542
       a designator for each term respective the repository.")
1543
 
1544
   (:argument-precedence-order repository-handle predicate continuation context subject object)
1545
 
1546
   (:method ((continuation t) (transaction transaction) context subject (predicate property-path-verb) object)
1547
     "if the predicate is a verb, check wheter there is a related expansion.
1548
      if so, use that. otherwise determine the predicate term identifier and delegate to that method"
1549
     (if (property-path-verb-query predicate)
1550
         (delegate-to-path-query continuation transaction context subject predicate object)
1551
         (repository-query-by-verb continuation transaction context subject (repository-intern-property-path transaction predicate) object)))
1552
 
1553
   (:method ((continuation t) (transaction transaction) context subject (predicate spocq:iri) object)
1554
     "if the predicate is an iri, use it as a term identifier."
1555
     (repository-query-by-verb continuation transaction context subject (transaction-object-term-number transaction predicate) object))
1556
 
1557
   (:method ((continuation t) (transaction rdfcache-transaction) context subject (predicate integer) object)
1558
     "if the predicate is an integer, it as a term number - perform the actual match and upon success
1559
      invoke the continuation with the concrete terms under cycle constraints"
1560
     (cond ((null (transaction-parent-p transaction))
1561
            (trace-paths "pp.rqbv.suppress    : query-by-verb for empty repository: ~a" transaction)
1562
            nil)
1563
           (t
1564
            (trace-paths "pp.rqbv             : ~a(~a) ~a(~a) ~a(~a)~%"
1565
                         (format-term-number-object subject) subject
1566
                         (format-term-number-object predicate) predicate
1567
                         (format-term-number-object object) object)
1568
            (let ((match-cursor nil)
1569
                  (*cursor-level* (1+ *cursor-level*)))
1570
              (when *match-target-graph* (setf context *match-property-path-context*))
1571
              (unwind-protect
1572
                  ;; *match-target-graph* is bound by the bgp procesing function to indicate whether that
1573
                  ;; the graph should be coerced (to the default graph), because it is outside of a GRAPH
1574
                  ;; clause, but with a FROM clause.
1575
                  #+(or)
1576
                  (format *trace-output* "~%~vt~a: ~a(~a) ~a(~a) ~a(~a) ~a(~a) graph  ~a(~a)"
1577
                          (* 2 *cursor-level*) *cursor-level*
1578
                          (format-term-number-object context) context
1579
                          (format-term-number-object subject) subject
1580
                          (format-term-number-object predicate) predicate
1581
                          (format-term-number-object object) object
1582
                          (format-term-number-object *match-target-graph*) *match-target-graph*)
1583
                  (when (setf match-cursor (rdfcache::make-quad-cursor (transaction-record transaction) context subject predicate object
1584
                                                                       :graph *match-target-graph*))
1585
                    (unwind-protect
1586
                        (block :cursor-loop
1587
                          (loop while (dydra-ndk::quad-cursor-next match-cursor)
1588
                            count t
1589
                            do (let ((context-id (dydra-ndk::quad-cursor-graph-id match-cursor))
1590
                                     (subject-id (dydra-ndk::quad-cursor-subject-id match-cursor))
1591
                                     (predicate-id (dydra-ndk::quad-cursor-predicate-id match-cursor))
1592
                                     (object-id (dydra-ndk::quad-cursor-object-id match-cursor)))
1593
                                 #+(or)
1594
                                 (format *trace-output* "~&~vt~a:@~a(~a) ~a(~a) ~a(~a) ~a(~a) graph  ~a(~a)~%"
1595
                                         (* 2 *cursor-level*) *cursor-level*
1596
                                         (format-term-number-object context) context
1597
                                         (format-term-number-object subject) subject
1598
                                         (format-term-number-object predicate) predicate
1599
                                         (format-term-number-object object) object
1600
                                         (format-term-number-object *match-target-graph*) *match-target-graph*)
1601
                                 #+(or)
1602
                                 (format *trace-output* "~&~vt~a:=~a(~a) ~a(~a) ~a(~a) ~a(~a)~%"
1603
                                         (* 2 *cursor-level*) *cursor-level*
1604
                                         (format-term-number-object context-id) context-id
1605
                                         (format-term-number-object subject-id) subject-id
1606
                                         (format-term-number-object predicate-id) predicate-id
1607
                                         (format-term-number-object object-id) object-id)
1608
                                 (trace-paths "pp.rqbv.next        : ~a(~a) ~a(~a) ~a(~a) ~a(~a) ->~%"
1609
                                              (format-term-number-object context) context
1610
                                              (format-term-number-object subject) subject
1611
                                              (format-term-number-object predicate) predicate
1612
                                              (format-term-number-object object) object)
1613
                                 (trace-paths "pp.rqbv.next        : ~a(~a) ~a(~a) ~a(~a) ~a(~a)~%"
1614
                                              (format-term-number-object context-id) context-id
1615
                                              (format-term-number-object subject-id) subject-id
1616
                                              (format-term-number-object predicate-id) predicate-id
1617
                                              (format-term-number-object object-id) object-id)
1618
                                 (call-path-continuation continuation context-id subject-id predicate-id object-id))))
1619
                      (when match-cursor
1620
                        (dydra-ndk::free-quad-cursor match-cursor)))))))))
1621
 
1622
   (:method ((op t) (transaction transaction) context subject (predicate symbol) object)
1623
     "Iff a given symbol designates a term, then use the term's number as the predicate."
1624
     (let ((term-number (symbol-term-id predicate)))
1625
       (when (and term-number (not (zerop term-number)))
1626
         (repository-query-by-verb op transaction context subject term-number object))))
1627
 
1628
   (:method ((op t) (repository repository-cache) context subject predicate object)
1629
     (flet ((repository-query-continue (stmt)
1630
              (funcall op (quad-graph stmt) (triple-subject stmt) (triple-predicate stmt) (triple-object stmt))))
1631
       (declare (dynamic-extent #'repository-query-continue))
1632
       (de.setf.resource:map-statements* #'repository-query-continue repository subject predicate object context))))
1633
 
1634
 
1635
 (defgeneric repository-count-by-verb (repository-handle context subject predicate object)
1636
   (:documentation "Given a repository handle, determine
1637
     the statement count for a given context, subject, predicate and object term pattern.
1638
     REPOSITORY-HANDLE ; a repository-cache instance or a transaction as a handle on a remote store
1639
     CONTEXT, SUBJECT, PREDICATE, OBJECT : (or <wild> term-identifier) : either a wild card or
1640
       a designator for each term respective the repository
1641
     VALUES : integer : the count of matched statements")
1642
 
1643
   (:method ((transaction transaction) context subject (predicate property-path-verb) object)
1644
     "if the predicate is an iri, use it as term identifier."
1645
     (repository-count-by-verb transaction context subject (repository-intern-property-path transaction predicate) object))
1646
 
1647
   (:method ((transaction transaction) context subject (predicate spocq:iri) object)
1648
     "if the predicate is an iri, use it as term identifier."
1649
     (repository-count-by-verb transaction context subject (rlmdb:value-term-number predicate) object))
1650
 
1651
   (:method ((transaction rdfcache-transaction) (context integer) subject (predicate integer) object)
1652
     "if the predicate is an integer, it as a term identifier."
1653
     (rdfcache-count (transaction-record transaction) context subject predicate object))
1654
 
1655
   (:method ((transaction rdfcache-transaction) (context list) subject (predicate integer) object)
1656
     "if the predicate is an integer, it as a term identifier."
1657
     (loop with %transaction = (transaction-record transaction)
1658
       for context in context
1659
       sum (rdfcache-count %transaction context subject predicate object)))
1660
 
1661
   (:method ((transaction shard-transaction) context subject (predicate symbol) object)
1662
     "Iff a given symbol designates a term, then use the term's number as the predicate. Otherwise return zero."
1663
     (let ((term-number (symbol-term-id predicate)))
1664
       (if term-number
1665
         (repository-count-by-verb transaction context subject term-number object)
1666
         0)))
1667
 
1668
   (:method ((repository repository-cache) context subject predicate object)
1669
     (let ((count 0))
1670
       (flet ((repository-count-continue (stmt)
1671
                (declare (ignore stmt))
1672
                (incf count)))
1673
         (declare (dynamic-extent #'repository-count-continue))
1674
         (de.setf.resource:map-statements* #'repository-count-continue repository subject predicate object context))
1675
       count)))