Coverage report: /development/source/library/org/datagraph/spocq-shard/src/algebra/operators/aggregate.lisp

KindCoveredAll%
expression587789 74.4
branch3556 62.5
Key
Not instrumented
Conditionalized out
Executed
Not executed
 
Both branches taken
One branch taken
Neither branch taken
1
 ;;; -*- Mode: lisp; Syntax: ansi-common-lisp; Base: 10; Package: org.datagraph.spocq.implementation; -*-
2
 
3
 (in-package :org.datagraph.spocq.implementation)
4
 
5
 (:documentation "This file defines the AGGREGATE operator for the 'org.datagraph.spocq' RDF SPARQL engine."
6
 
7
  (copyright
8
   "Copyright 2010 [james anderson](mailto:james.anderson@setf.de) All Rights Reserved.")
9
 
10
  (:long-description
11
   "The select query form may include a group aggregation clause. This is implemented in two phases, first
12
  the solutions are segmented into keyed groups, second each group is aggregated to generate a solution.
13
  This file implements the aggregation step. see select.lisp and group.lisp for the other apsects.
14
 
15
  The rules for handling errors and unbound values lead to unexpected results.
16
  MAX and MIN are defined in terms of ORDER BY, which means they apply that order predicate to the
17
  arguments, which permits unbound indicators and places them as extreme results.
18
  In addition, the requirement, that a SELECT assignment produce the same results as an explicit BIND
19
  means that assignment errors yield unbound indicators as well.
20
  The implementation does not follow this requirement: it suppressed intermediate errors and ignores them."))
21
 
22
 ;;; in order to suppress the solution for empty aggregates, treat them as if there
23
 ;;; reduction caused an error. see
24
 ;;;    https://github.com/afs/sparql-group-agg
25
 (defparameter *empty-aggregation-set-operator* 'log-warn!)
26
 ;;; (defparameter *empty-aggregation-set-operator* 'error)
27
 
28
 (defmacro spocq.e::incf (location &optional (amount 1))
29
   "Provides a limited in-place increment for sparql aggregation operations which delegates to
30
  the extended '+' operator in order to support non-arithmetic data."
31
   `(setf ,location (spocq.e:+ ,location ,amount)))
32
 
33
 (defmacro spocq.a:|aggregate| (solution-field bindings key-bindings &rest args &key count end limit offset start)
34
   (declare (ignore count end limit offset start)
35
            (dynamic-extent args))
36
   (apply #'macroexpand-aggregate solution-field bindings key-bindings args))
37
 
38
 
39
 (defun macroexpand-aggregate (solution-field bindings key-bindings &rest args &key count end limit offset start)
40
   (declare (ignore count end limit offset start))
41
   (setf args (apply #'canonicalize-algebra-arguments args))
42
   (let ((reference-dimensions (union (bindings-value-variables bindings) (bindings-value-variables key-bindings))))
43
     #+(or) ;; any slice applies to the aggregation result
44
     (let ((sliced-expression (when args (apply #'compute-expression-slice solution-field args))))
45
       (if sliced-expression
46
           `(spocq.e:aggregate (spocq.e::with-reference-dimensions ,reference-dimensions ,sliced-expression) ',bindings ',key-bindings)
47
           `(spocq.e:aggregate (spocq.e::with-reference-dimensions ,reference-dimensions ,solution-field) ',bindings ',key-bindings ,@args)))
48
     `(spocq.e:aggregate (spocq.e::with-reference-dimensions ,reference-dimensions ,solution-field) ',bindings ',key-bindings ,@args)))
49
 
50
 (defgeneric spocq.e:aggregate (solution-field projection key-bindings &key end start)
51
   (:documentation "Aggregate a solution field as specified by the given aggregation and gruping
52
  specifications:
53
  SOLUTION-FIELD : a field with either the structure (key . (list solutions)). if it is grouped, or just
54
  (list solutions, if it is not. The distinction is indicated by the field dimensions, which is either a
55
  dimension list or a pair of lists. If grouped, each group sequnce is aggregated. If not, then
56
  an unsegmented stream is aggregated.
57
  BINDINGS : a list of (variable form) bindings, each form of which is applied iteratively across each group
58
    to yield a binding for the given variable in the result field.")
59
 
60
   (:method :before ((field t) projection key-bindings &key end start)
61
     (assert-argument-types spocq.e:aggregate
62
       (start (or null (integer 0)))
63
       (end (or null (integer 0))))
64
     (incf-stat *algebra-operations*)
65
     (trace-algebra spocq.e:aggregate field projection key-bindings
66
                    :start start :end end))
67
 
68
   (:method ((field solution-generator) projection key-bindings &rest args)
69
     (declare (dynamic-extent args))
70
     (apply #'spocq.e:stream-aggregate field projection key-bindings args)))
71
 
72
 
73
 (defun run-aggregate-thread (result-channel field-generator base-dimensions projection key-bindings
74
                                             args)
75
   (let ((base-channel (solution-generator-channel field-generator))
76
         (base-expression (solution-generator-expression field-generator)))
77
     (push 'spocq.a:|aggregate| (channel-name base-channel))
78
     (let ((*thread-operations* (cons (list 'spocq.a:|aggregate| base-dimensions args)
79
                                      *thread-operations*)))
80
       (query-run-in-thread *query* base-expression)
81
       (setf (channel-size result-channel) (channel-size base-channel)
82
             (channel-page-length result-channel) (channel-page-length base-channel))
83
       (apply #'process-aggregate result-channel base-channel
84
              base-dimensions
85
              projection
86
              key-bindings
87
              args)
88
       'spocq.a:|aggregate|)))
89
 
90
 (defun spocq.e:stream-aggregate (field-generator projection key-bindings &rest args &key end start)
91
   (let* ((base-dimensions (solution-generator-dimensions field-generator))
92
          (result-dimensions (bindings-variables projection))
93
          (result-channel (make-channel :name (list 'spocq.a:|aggregate| (task-id *query*))
94
                                        :dimensions result-dimensions
95
                                        :size (effective-channel-size :start start :end end)
96
                                        :page-length (effective-page-length :start start :end end))))
97
     (make-solution-generator :operator 'spocq.a:|aggregate|
98
                              :dimensions result-dimensions
99
                              :expression (list #'run-aggregate-thread result-channel field-generator
100
                                                base-dimensions projection key-bindings
101
                                                args)
102
                              :channel result-channel
103
                              :constituents (list field-generator))))
104
 
105
 (defgeneric process-aggregate (result-field base-field base-dimensions projection key-bindings &key start end)
106
   )
107
 
108
 (defparameter *PROCESS-AGGREGATE.TRACE* nil)
109
 (defparameter *PROCESS-AGGREGATE.key-predicate* #'equalp)
110
 ;;; (defparameter *PROCESS-AGGREGATE.key-predicate* #'equal)
111
 
112
 (defmethod process-aggregate ((destination array-page-channel)
113
                               (source array-page-channel)
114
                               base-dimensions
115
                               projection
116
                               key-bindings
117
                               &key (start 0) end)
118
   (assert-argument-types process-aggregate
119
     (base-dimensions list)
120
     (projection list)
121
     (key-bindings list))
122
   (incf-stat *algebra-operations*)
123
 
124
   (multiple-value-bind (result-dimensions make-group-operators key-operator)
125
                        (compute-aggregate-operators base-dimensions projection key-bindings)
126
     (declare (type function make-group-operators key-operator))
127
     (let* ((result-page-width (channel-page-width destination))
128
            (result-page-length (channel-page-length destination))
129
            (result-page nil)
130
            (result-index result-page-length)
131
            (result-count 0)
132
            (solution-count 0)
133
            (group-operators (make-hash-table :test *PROCESS-AGGREGATE.key-predicate*)))
134
       (assert (= (length result-dimensions) result-page-width) ()
135
               "Channel and operation dimensions do not match: ~a: ~a." destination result-dimensions)
136
       (labels ((base-processor (base-page)
137
                  (dotimes (base-index (array-dimension base-page 0))
138
                    (let* ((group-key (funcall key-operator base-page base-index))
139
                           (step-operator (group-step-operator group-key)))
140
                      (funcall step-operator base-page base-index group-key))))
141
                (collect-aggregate (group-reduce group-key)
142
                  (when (> (incf result-count) start)
143
                    (next-solution-location)
144
                    ;; count the result only if the reduction succeeds
145
                    (trace-data process-aggregate-collect destination result-dimensions group-key result-index)
146
                    (unless (funcall group-reduce result-page result-index group-key)
147
                      (decf result-count)
148
                      (decf result-index))
149
                    (when (and end (>= result-count end)) (complete-solutions))))
150
                (next-solution-location ()
151
                  ;; return a page (possible newly created) and the next free location in that page
152
                  (when (>= (incf result-index) result-page-length)
153
                    (when result-page (put-page result-page))
154
                    (setf result-page (new-field-page destination result-page-length result-page-width)
155
                          result-index 0)))
156
                (complete-solutions ()
157
                  (when result-page
158
                    (let ((page-result-count (1+ result-index)))
159
                      (when (< page-result-count result-page-length)
160
                        (setf result-page
161
                              (adjust-page result-page (list page-result-count result-page-width)))))
162
                    (put-page result-page))
163
                  (incf-stat *solutions-processed* solution-count)
164
                  (incf-stat *solutions-constructed* result-count)
165
                  (put-page nil)
166
                  (return-from process-aggregate (values solution-count result-count)))
167
                (put-page (page)
168
                  (trace-data process-aggregate-put destination result-dimensions page)
169
                  (if page
170
                    (put-field-page destination page)
171
                    (complete-field destination)))
172
                (group-step-operator (key)
173
                  ;; for the given key, return the respective step operator for that key's group
174
                  (or (first (gethash key group-operators))
175
                      ;; if the group does not yet exists, create its step and aggregation operators operator anew
176
                      ;; and return the step operator
177
                      (first (setf (gethash (if (consp key) (copy-list key) key)
178
                                            group-operators)
179
                                   (funcall make-group-operators))))))
180
         (unless (and (plusp result-page-length) (or (null end) (> end start))) (complete-solutions))
181
         (loop for solutions = (get-field-page source)
182
           until (null solutions)
183
           do (progn (check-query-status *query*)
184
                (incf solution-count (array-dimension solutions 0))
185
                (trace-data process-aggregate.get source solutions)
186
                (base-processor solutions)
187
                (release-field-page source solutions)))
188
         (when *process-aggregate.trace*
189
           (format *trace-output* "~%group-operators: ~s" group-operators)
190
           (loop for key being each hash-key of group-operators
191
             for i from 1
192
             do (format *trace-output* "~%key(~d): ~s" i key)))
193
         (if (plusp (hash-table-count group-operators))
194
             (loop for (nil . group-reduce) being each hash-value of group-operators
195
               using (hash-key group-key)
196
               do (collect-aggregate group-reduce group-key))
197
             ;; otherwise, generate the null solution
198
             (let ((null-reduce (rest (funcall make-group-operators))))
199
               (collect-aggregate null-reduce nil)))
200
         (complete-solutions)))))
201
 
202
 
203
 #+(or)
204
 (progn
205
   (compute-aggregate-operators '(?::other ?::sale)
206
                                '((?::offer (spocq.a:|sample| ?::other)) (?::otherCount (spocq.a:|count| ?::other)))
207
                                '(?::other))
208
   )
209
 
210
 (defparameter *compute-aggregate-operators.lambda* nil)
211
 
212
 ;;; version for which the key is the extension of the field
213
 (defun compute-aggregate-operators (base-dimensions bindings key-bindings)
214
   "returns three operators which close over a lexical environment with accumulation state:
215
  - initialization, to reset the state
216
  - step (base-page base-index) to perform the iterative step
217
  - reduce (base-page base-index group-key-values), to produce the aggregate solution"
218
   
219
   (let* ((base-width (length base-dimensions))
220
          (aggregate-variables (bindings-variables bindings))
221
          (key-dimensions (bindings-variables key-bindings))
222
          (result-width (length aggregate-variables))
223
          (aggregate-forms (bindings-value-forms bindings))
224
          (temporary-bindings ())         ; ((<variable> <form>) ... ) to allow specific initial values
225
          #+(or) (aggregate-expression-variables (loop for (nil expression) in bindings
226
                                                       append (expression-variables expression)))
227
          (iteration-forms ())
228
          (result-map ()))
229
     (declare (ignorable base-width))    ; in case tracing code is eliminated
230
     (labels ((abstract-aggregate-form (expression)
231
                ;; for each expression,
232
                ;; - locate the aggregation forms,
233
                ;; - note for each the requisite temporary variables
234
                ;; - note an iteration form
235
                ;; - replace the form with a reduction form
236
                (typecase expression
237
                  (cons (cond ((aggregate-operator-p (first expression))
238
                               (assert (notany #'aggregate-expression-p (rest expression)) ()
239
                                       "Invalid aggregate term (nested): ~a" expression)
240
                               (let ((existing-result (rest (assoc expression result-map :test #'equalp))))
241
                                 (or existing-result
242
                                     (multiple-value-bind (t-bindings i-form r-form)
243
                                                          (apply #'aggregation-form-components expression)
244
                                       (setf temporary-bindings (append temporary-bindings t-bindings))
245
                                       (push i-form iteration-forms)
246
                                       (let ((r-variable (gensym "RESULT-")))
247
                                         (push `(,r-variable nil) temporary-bindings)
248
                                         (push (cons expression r-variable) result-map)
249
                                         `(setf ,r-variable ,r-form))))))
250
                              (t
251
                               (cons (first expression)
252
                                     (mapcar #'abstract-aggregate-form (rest expression))))))
253
                  (t expression)))
254
              (abstract-aggregate-condition (term)
255
                (cond ((or (member term key-dimensions)
256
                           (and (variable-p term) (not *strict-aggregation-sample*)))
257
                       ;; allow the key variable to appear as a sample
258
                       (let ((r-variable (gensym "SAMPLE-")))
259
                         (push (list r-variable nil) temporary-bindings)
260
                         (setf iteration-forms (append iteration-forms `((spocq.e::sample-step (,r-variable) ,term))))
261
                         (push (cons term r-variable) result-map)
262
                         r-variable))
263
                      ((or (aggregate-expression-p term) (null key-dimensions))
264
                       (abstract-aggregate-form term))
265
                      ((and (consp term(every #'(lambda (var) (member var key-dimensions)) (expression-variables term)))
266
                       ;; if it computes some result from key, then allow that
267
                       (let ((r-variable (gensym "KEY-")))
268
                         (push (list r-variable nil) temporary-bindings)
269
                         (setf iteration-forms (append iteration-forms `((spocq.e::sample-step (,r-variable) ,term))))
270
                         (push (cons term r-variable) result-map)
271
                         r-variable))
272
                      (t
273
                       (spocq.e:aggregate-projection-error :token term :expression key-bindings))))
274
              (handled-form (form) `(handler-case ,form (error () nil))))
275
       ;; iterate over the aggregation bindings and collect from each
276
       ;; - any initialization forms fir temporary variables
277
       ;; - any iteration forms, with the field reference replaced by a reference to a temporary interation state
278
       ;; - any reduction forms, with references to the temporary interation state
279
       (setf aggregate-forms (mapcar #'abstract-aggregate-condition aggregate-forms))
280
       
281
       (let* ((result-macros (loop for v in aggregate-variables
282
                                   for v-index from 0
283
                                   collect `(,v (field-object-aref result-page result-index ,v-index ,v))))
284
              (base-macros (cons
285
                            ;; make complete solution available as the list of term numbers
286
                            ;; for use in (distinct *) 
287
                            `(solution (list ,@(loop for v in base-dimensions
288
                                                     for v-index from 0
289
                                                     collect `(aref base-page base-index ,v-index))))
290
                            ;; and each binding available as the interned value
291
                            (loop for v in base-dimensions
292
                                  for v-index from 0
293
                                  collect `(,v (field-object-aref base-page base-index ,v-index ,v)))))
294
              (initialization-lambda
295
               `(lambda () ,@(mapcar #'(lambda (binding) `(setf ,@binding)) temporary-bindings)))
296
              (step-lambda
297
               (if (consp key-dimensions)
298
                 (if (rest key-dimensions)
299
                   `(lambda (base-page base-index group-key)
300
                      (declare (fixnum base-index)
301
                               (type (simple-array fixnum (* ,base-width)) base-page)
302
                               (optimize ,@*field-optimization*))
303
                      (destructuring-bind ,key-dimensions group-key
304
                        (declare (ignorable ,@key-dimensions)
305
                                 (optimize ,@*field-optimization*))
306
                        (trace-data step-aggregation base-index
307
                                       (make-array ,base-width :displaced-to base-page :element-type 'fixnum
308
                                                   :displaced-index-offset (* base-index ,base-width))
309
                                       group-key)
310
                        (symbol-macrolet ,base-macros
311
                          ,@(mapcar #'handled-form iteration-forms))))
312
                   `(lambda (base-page base-index ,(first key-dimensions))
313
                      (declare (fixnum base-index)
314
                               (type (simple-array fixnum (* ,base-width)) base-page)
315
                               (optimize ,@*field-optimization*)
316
                               (ignorable ,(first key-dimensions)))
317
                      (trace-data step-aggregation base-index
318
                                     (make-array ,base-width :displaced-to base-page :element-type 'fixnum
319
                                                 :displaced-index-offset (* base-index ,base-width))
320
                                     ,(first key-dimensions))
321
                      (symbol-macrolet ,base-macros
322
                        ,@(mapcar #'handled-form iteration-forms))))
323
                 `(lambda (base-page base-index &optional group-key)
324
                    (declare (fixnum base-index)
325
                             (type (simple-array fixnum (* ,base-width)) base-page)
326
                             (optimize ,@*field-optimization*)
327
                             (ignore group-key))
328
                    (trace-data step-aggregation base-index
329
                                   (make-array ,base-width :displaced-to base-page :element-type 'fixnum
330
                                               :displaced-index-offset (* base-index ,base-width)))
331
                    (symbol-macrolet ,base-macros
332
                      ,@(mapcar #'handled-form iteration-forms)))))
333
              (reduction-lambda
334
               (if (consp key-dimensions)
335
                 (let* ((aliased-key-dims (loop for kd in key-dimensions
336
                                               collect (gensym (string kd))))
337
                        (key-dim-map (mapcar #'cons key-dimensions aliased-key-dims))
338
                        (aliased-aggregate-forms (sublis key-dim-map aggregate-forms)))
339
                   (if (rest key-bindings)
340
                     `(lambda (result-page result-index group-key)
341
                        (declare (fixnum result-index)
342
                                 (type (simple-array fixnum (* ,result-width)) result-page)
343
                                 (optimize ,@*field-optimization*))
344
                        (handler-case (symbol-macrolet ,result-macros
345
                                        (destructuring-bind ,aliased-key-dims group-key
346
                                          (declare (ignorable ,@aliased-key-dims))
347
                                          ,@(loop for variable in aggregate-variables
348
                                                  for form in aliased-aggregate-forms
349
                                                  collect `(setf ,variable (solution-value ,form))))
350
                                        t)
351
                          (error () nil)))
352
                     `(lambda (result-page result-index ,(first aliased-key-dims))
353
                        (declare (fixnum result-index)
354
                                 (type (simple-array fixnum (* ,result-width)) result-page)
355
                                 (optimize ,@*field-optimization*)
356
                                 (ignorable ,(first aliased-key-dims)))
357
                        (symbol-macrolet ,result-macros
358
                          (handler-case (progn (trace-data reduce-aggregation-before ,(first aliased-key-dims))
359
                                               ,@(loop for variable in aggregate-variables
360
                                                       for form in aliased-aggregate-forms
361
                                                       append `((setf ,variable (solution-value ,form))))
362
                                               (trace-data reduce-aggregation-after result-index
363
                                                              (make-array ,result-width :displaced-to result-page :element-type 'fixnum
364
                                                                          :displaced-index-offset (* result-index ,result-width))
365
                                                              ,(first aliased-key-dims))
366
                                               t)
367
                            (error () nil))))))
368
                 `(lambda (result-page result-index group-key)
369
                    (declare (fixnum result-index)
370
                             (type (simple-array fixnum (* ,result-width)) result-page)
371
                             (optimize ,@*field-optimization*)
372
                             (ignore group-key))
373
                    (handler-case (symbol-macrolet ,result-macros
374
                                    ,@(loop for variable in aggregate-variables
375
                                            for form in aggregate-forms
376
                                            collect `(setf ,variable (solution-value ,form)))
377
                                    t)
378
                      (error () nil))))))
379
         (let* ((generator-lambda `(lambda ()
380
                                     (let ,temporary-bindings
381
                                       ,@(rest initialization-lambda)
382
                                       (flet ((step-aggregation ,@(rest step-lambda))
383
                                              (reduction-aggregation ,@(rest reduction-lambda)))
384
                                         (cons #'step-aggregation
385
                                                 #'reduction-aggregation)))))
386
                (generator (spocq-compile (setq *compute-aggregate-operators.lambda* generator-lambda))))
387
           (values aggregate-variables
388
                   generator
389
                   (compute-extended-key-op base-dimensions key-bindings)
390
                   generator-lambda))))))
391
 
392
 ;;; version for which the field is extended :
393
 ;;; - no need to extend with the key on-the-fly
394
 ;;; - no aliasing
395
 #+(or)
396
 (defun compute-aggregate-operators (base-dimensions bindings key-dimensions)
397
   "returns three operators which close over a lexical environment with accumulation state:
398
  - initialization, to reset the state
399
  - step (base-page base-index) to perform the iterative step
400
  - reduce (base-page base-index group-key-values), to produce the aggregate solution"
401
   
402
   (let* ((base-width (length base-dimensions))
403
          (aggregate-variables (bindings-variables bindings))
404
          (result-width (length aggregate-variables))
405
          (aggregate-forms (bindings-value-forms bindings))
406
          (temporary-bindings ())         ; ((<variable> <form>) ... ) to allow specific initial values
407
          #+(or) (aggregate-expression-variables (loop for (nil expression) in bindings
408
                                                       append (expression-variables expression)))
409
          (iteration-forms ())
410
          (result-map ()))
411
     (declare (ignorable base-width))    ; in case tracing code is eliminated
412
     (labels ((abstract-aggregate-form (expression)
413
                ;; for each expression,
414
                ;; - locate the aggregation forms,
415
                ;; - note for each the requisite temporary variables
416
                ;; - note an iteration form
417
                ;; - replace the form with a reduction form
418
                (typecase expression
419
                  (cons (cond ((aggregate-operator-p (first expression))
420
                               (assert (notany #'aggregate-expression-p (rest expression)) ()
421
                                       "Invalid aggregate term (nested): ~a" expression)
422
                               (let ((existing-result (rest (assoc expression result-map :test #'equalp))))
423
                                 (or existing-result
424
                                     (multiple-value-bind (t-bindings i-form r-form)
425
                                                          (apply #'aggregation-form-components expression)
426
                                       (setf temporary-bindings (append temporary-bindings t-bindings))
427
                                       (push i-form iteration-forms)
428
                                       (let ((r-variable (gensym "RESULT-")))
429
                                         (push `(,r-variable nil) temporary-bindings)
430
                                         (push (cons expression r-variable) result-map)
431
                                         `(setf ,r-variable ,r-form))))))
432
                              (t
433
                               (cons (first expression)
434
                                     (mapcar #'abstract-aggregate-form (rest expression))))))
435
                  (t expression)))
436
              (abstract-aggregate-condition (term)
437
                (cond ((or (member term key-dimensions)
438
                           (and (variable-p term) (not *strict-aggregation-sample*)))
439
                       ;; allow the key variable to appear as a sample
440
                       (let ((r-variable (gensym "SAMPLE-")))
441
                         (push (list r-variable nil) temporary-bindings)
442
                         (setf iteration-forms (append iteration-forms `((spocq.e::sample-step (,r-variable) ,term))))
443
                         (push (cons term r-variable) result-map)
444
                         r-variable))
445
                      ((or (aggregate-expression-p term) (null key-dimensions))
446
                       (abstract-aggregate-form term))
447
                      ((and (consp term) (every #'(lambda (var) (member var key-dimensions)) (expression-variables term)))
448
                       ;; if it computes some result from key, then allow that
449
                       (let ((r-variable (gensym "KEY-")))
450
                         (push (list r-variable nil) temporary-bindings)
451
                         (setf iteration-forms (append iteration-forms `((spocq.e::sample-step (,r-variable) ,term))))
452
                         (push (cons term r-variable) result-map)
453
                         r-variable))
454
                      (t
455
                       (spocq.e:aggregate-projection-error :token term :expression key-bindings))))
456
              (handled-form (form) `(handler-case ,form (error () nil))))
457
       ;; iterate over the aggregation bindings and collect from each
458
       ;; - any initialization forms fir temporary variables
459
       ;; - any iteration forms, with the field reference replaced by a reference to a temporary interation state
460
       ;; - any reduction forms, with references to the temporary interation state
461
       (setf aggregate-forms (mapcar #'abstract-aggregate-condition aggregate-forms))
462
       
463
       (let* ((result-macros (loop for v in aggregate-variables
464
                                   for v-index from 0
465
                                   collect `(,v (field-object-aref result-page result-index ,v-index ,v))))
466
              (base-macros (cons
467
                            ;; make complete solution available as the list of term numbers
468
                            ;; for use in (distinct *) 
469
                            `(solution (list ,@(loop for v in base-dimensions
470
                                                     for v-index from 0
471
                                                     collect `(aref base-page base-index ,v-index))))
472
                            ;; and each binding available as the interned value
473
                            (loop for v in base-dimensions
474
                                  for v-index from 0
475
                                  collect `(,v (field-object-aref base-page base-index ,v-index ,v)))))
476
              (initialization-lambda
477
               `(lambda () ,@(mapcar #'(lambda (binding) `(setf ,@binding)) temporary-bindings)))
478
              (step-lambda
479
               `(lambda (base-page base-index)
480
                  (declare (fixnum base-index)
481
                           (type (simple-array fixnum (* ,base-width)) base-page)
482
                           (optimize ,@*field-optimization*))
483
                  (trace-data step-aggregation base-index
484
                              (make-array ,base-width :displaced-to base-page :element-type 'fixnum
485
                                          :displaced-index-offset (* base-index ,base-width)))
486
                  (symbol-macrolet ,base-macros
487
                    ,@(mapcar #'handled-form iteration-forms))))
488
              (reduction-lambda
489
               `(lambda (result-page result-index)
490
                  (declare (fixnum result-index)
491
                           (type (simple-array fixnum (* ,result-width)) result-page)
492
                           (optimize ,@*field-optimization*))
493
                  (handler-case (symbol-macrolet ,result-macros
494
                                                 ,@(loop for variable in aggregate-variables
495
                                                     collect `(setf ,variable (solution-value ,form)))
496
                                                 t)
497
                          (error () nil)))))
498
         (let* ((generator-lambda `(lambda ()
499
                                     (let ,temporary-bindings
500
                                       ,@(rest initialization-lambda)
501
                                       (flet ((step-aggregation ,@(rest step-lambda))
502
                                              (reduction-aggregation ,@(rest reduction-lambda)))
503
                                         (cons #'step-aggregation
504
                                                 #'reduction-aggregation)))))
505
                (generator (spocq-compile (setq *compute-aggregate-operators.lambda* generator-lambda))))
506
           (values aggregate-variables
507
                   generator
508
                   (compute-simple-key-op base-dimensions key-dimensions)))))))
509
 ;;;
510
 ;;; aggregation components to be used by the compiled aggregation functions
511
 ;;;
512
 ;;; superseded by the aggregation-form-components, below
513
 #+(or)
514
 (defmacro with-aggregation-reducers ((solution-counter) &body body)
515
   (macroexpand-with-aggregation-reducers solution-counter body))
516
 #+(or)
517
 (defun macroexpand-with-aggregation-reducers (solution-counter body)
518
   `(macrolet ((spocq.a:|avg| (var form count-var &key distinct)
519
                 `(ignore-errors ((lambda (v)
520
                                    ,(if distinct
521
                                       `(unless (gethash v ,distinct)
522
                                          (setf (gethash v ,distinct) t)
523
                                          (spocq.e::incf ,var v)
524
                                          (incf ,count-var))
525
                                       `(progn (spocq.e::incf ,var v)
526
                                               (incf ,count-var))))
527
                                  ,form)))
528
               (spocq.a:|count| (var form &key distinct)
529
                 `(ignore-errors ((lambda (v)
530
                                    ,(if distinct
531
                                       `(unless (gethash v ,distinct)
532
                                          (setf (gethash v ,distinct) t)
533
                                          (incf ,var))
534
                                       `(incf ,var)))
535
                                  ,form)))
536
               (spocq.a:|group_concat| (var form &key distinct separator)
537
                 `(ignore-errors ((lambda (v)
538
                                    ,(if distinct
539
                                       `(unless (gethash v ,distinct)
540
                                          (setf (gethash v ,distinct) t)
541
                                          (setf ,var (spocq.e::concat ,var v ,@(when separator `(:separator ,separator)))))
542
                                       `(setf ,var (spocq.e::concat ,var v ,@(when separator `(:separator ,separator)))))))
543
                                 ,form))
544
               (spocq.a:|min| (var form &key distinct)
545
                 (declare (ignore distinct))
546
                 `(ignore-errors ((lambda (v)
547
                                    (let ((*enable-sort-precedence* t))
548
                                      (setf ,var (spocq.a::|min| ,var v))))
549
                                 ,form)))
550
               (spocq.a:|max| (var form &key distinct)
551
                 (declare (ignore distinct))
552
                 `(ignore-errors ((lambda (v)
553
                                    (let ((*enable-sort-precedence* t))
554
                                      (setf ,var (spocq.a::|max| ,var v))))
555
                                  ,form)))
556
               (spocq.a:|sample| (var form &key distinct)
557
                 (declare (ignore distinct))
558
                 `(ignore-errors ((lambda (v)
559
                                    (unless ,var (setf ,var v)))
560
                                  ,form)))
561
               (spocq.a:|sum| (var form &rest rest)
562
                 `(spocq.a:|avg| ,var ,form ,@rest)))
563
      (incf ,solution-counter)
564
      ,@body))
565
                                                 
566
 (defun aggregation-form-components (op &rest arguments)
567
   (let* ((keywords (member-if #'keywordp arguments))
568
          (argument-expressions (ldiff arguments keywords)))
569
     (apply #'aggregation-form-components-op op argument-expressions keywords)))
570
 
571
 (defgeneric aggregation-form-components-op (op argument-list &key)
572
   (:documentation "Return for the given aggregation operator the respective iteration and reduction components:
573
  - a list of temporary variable bindings
574
  - an iteration form
575
  - a reduction form")
576
 
577
   (:method ((op (eql 'spocq.a:|avg|)) expressions &key distinct)
578
     (let ((expression (first expressions))
579
           (count-var (gensym "AVG-COUNT-"))
580
           (sum-var (gensym "AVG-SUM-"))
581
           (cache-var (when distinct (gensym "AVG-CACHE-"))))
582
       (values `((,count-var 0) (,sum-var nil) ,@(when distinct `((,cache-var (make-term-id-cache)))))
583
               `(spocq.e::avg-step (,count-var ,sum-var ,@(when distinct `(:cache ,cache-var))) ,expression)
584
               `(spocq.e::avg-result ,count-var ,sum-var))))
585
 
586
   (:method ((op (eql 'spocq.a:|count|)) expressions &key distinct)
587
     (let ((expression (first expressions))
588
           (count-var (gensym "COUNT-COUNT"))
589
           (cache-var (when distinct (gensym "COUNT-CACHE-"))))
590
       (values `((,count-var 0) ,@(when distinct `((,cache-var (make-term-id-cache)))))
591
               `(spocq.e::count-step (,count-var ,@(when distinct `(:cache ,cache-var))) ,expression)
592
               `(spocq.e::count-result ,count-var))))
593
 
594
   (:method ((op (eql 'spocq.a:|corr|)) expressions &key distinct)
595
     (declare (ignore distinct))         ; distinct not applicable
596
     (let ((expression1 (first expressions))
597
           (expression2 (second expressions)))
598
       (let ((sequence-var (gensym "CORR-SEQ-")))
599
         (values `((,sequence-var (make-correlation-sequence)))
600
                 `(spocq.e::correlation-step (,sequence-var) ,expression1 ,expression2)
601
                 `(spocq.e::correlation-result ,sequence-var)))))
602
 
603
   (:method ((op (eql 'spocq.a:|group_concat|)) expressions &key distinct separator)
604
     (let ((expression (first expressions))
605
           (sequence-var (gensym "CONCAT-SEQUENCE-"))
606
           (cache-var (when distinct (gensym "CONCAT-CACHE-"))))
607
       (values `((,sequence-var (make-aggregate-sequence)) ,@(when distinct `((,cache-var (make-term-id-cache)))))
608
               `(spocq.e::concat-step (,sequence-var
609
                                       ,@(when distinct `(:cache ,cache-var))
610
                                       ,@(when separator `(:separator ,separator)))
611
                                      ,expression)
612
               `(spocq.e::concat-result ,sequence-var))))
613
 
614
   (:method ((op (eql 'spocq.a:|max|)) expressions &key distinct)
615
     (declare (ignore distinct))
616
     (let ((expression (first expressions))
617
           (max-var (gensym "MAX-")))
618
       (values `((,max-var nil))
619
               `(spocq.e::max-step (,max-var) ,expression)
620
               `(spocq.e::max-result ,max-var))))
621
 
622
   (:method ((op (eql 'spocq.a:|min|)) expressions &key distinct)
623
     (declare (ignore distinct))         ; distinct cannot have any effect
624
     (let ((expression (first expressions))
625
           (min-var (gensym "MIN-")))
626
       (values `((,min-var nil))
627
               `(spocq.e::min-step (,min-var) ,expression)
628
               `(spocq.e::min-result ,min-var))))
629
 
630
   (:method ((op (eql 'spocq.a:|sample|)) expressions &key distinct)
631
     (declare (ignore distinct))         ; distinct cannot have any effect
632
     (let ((expression (first expressions))
633
           (sample-var (gensym "SAMPLE-")))
634
       (values `((,sample-var nil))
635
               `(spocq.e::sample-step (,sample-var) ,expression)
636
               `(spocq.e::sample-result ,sample-var))))
637
 
638
    (:method ((op (eql 'spocq.a:|std|)) expressions &key distinct)
639
     (declare (ignore distinct))         ; distinct not applicable
640
     (let ((expression (first expressions))
641
           (sequence-var (gensym "STD-SEQ-")))
642
       (values `((,sequence-var (make-std-sequence)))
643
               `(spocq.e::std-step (,sequence-var) ,expression)
644
               `(spocq.e::std-result ,sequence-var))))
645
 
646
   (:method ((op (eql 'spocq.a:|sum|)) expressions &key distinct)
647
     (let ((expression (first expressions))
648
           (sum-var (gensym "SUM-SUM-"))
649
           (cache-var (when distinct (gensym "SUM-CACHE-"))))
650
       (values `((,sum-var nil) ,@(when distinct `((,cache-var (make-term-id-cache)))))
651
               `(spocq.e::sum-step (,sum-var ,@(when distinct `(:cache ,cache-var))) ,expression)
652
               `(spocq.e::sum-result ,sum-var)))))
653
 
654
 (defmacro spocq.e::avg-step ((count-var sum-var  &key cache) form)
655
  `((lambda (v)
656
      ,(if cache
657
           `(unless (gethash v ,cache)
658
              (setf (gethash v ,cache) t)
659
              (if ,sum-var
660
                  (spocq.e::incf ,sum-var v)
661
                  (setf ,sum-var v))
662
              (incf ,count-var))
663
           `(progn
664
              (if ,sum-var
665
                  (spocq.e::incf ,sum-var v)
666
                  (setf ,sum-var v))
667
              (incf ,count-var))))
668
    ,form))
669
 
670
 (defmacro spocq.e::avg-result (count-var sum-var)
671
   `(if (zerop ,count-var) 0 (spocq.e::/ ,sum-var ,count-var)))
672
 
673
 (defun object-aref-p (expression)
674
   (and (consp expression)
675
        (case (first expression)
676
          ((field-object-aref foreign-array-named-object-ref) t)
677
          (t nil))))
678
 
679
 (defun suppress-object-reference (form)
680
   (ecase (first form)
681
     (field-object-aref
682
      `(aref ,@(field-object-aref-aref form)))
683
     (foreign-array-object-named-ref
684
      (destructuring-bind (op variable name &rest subscripts) form
685
        (declare (ignore op name))
686
       `(foreign-array-ref ,variable ,@subscripts)))))
687
 
688
 (defmacro spocq.e::count-step ((count-var &key cache) form &environment env)
689
   "Expand a count step reference with attention to several special cases:
690
  - if the term is '*' just count each invocation;
691
  - if the term is a variable which expands to a field reference, also just count each invocation;
692
  - otherwise, there is something which must be evaluated;
693
  - in addition, if there is a cache, ensure uniqueness;"
694
   (if (eq form 'spocq.s::*)
695
     (if cache
696
       ;; solution is bound by the iteration operator
697
       `(unless (gethash solution ,cache)
698
          (setf (gethash solution ,cache) t)
699
          (incf ,count-var))
700
       `(incf ,count-var))
701
     (let ((expansion (macroexpand-1 form env)))
702
       (if (object-aref-p expansion)
703
         (if cache
704
           `((lambda (v)
705
               (unless (or (= +null-term-id+ v) (gethash v ,cache))
706
                 (setf (gethash v ,cache) t)
707
                 (incf ,count-var)))
708
             (aref ,@(subseq expansion 1 4)))
709
           `(unless (= +null-term-id+ ,(suppress-object-reference expansion)) (incf ,count-var)))
710
         `((lambda (v) ,(if cache
711
                          `(unless (gethash v ,cache)
712
                             (setf (gethash v ,cache) t)
713
                             (incf ,count-var))
714
                          `(progn v (incf ,count-var))))
715
           ,form)))))
716
 
717
 (defmacro spocq.e::count-result (count-var)
718
   count-var)
719
 
720
 (defmacro spocq.e::concat-step ((sequence-var &key cache (separator " ")) form)
721
   `((lambda (v)
722
       ,(if cache
723
          `(unless (gethash v ,cache)
724
             (setf (gethash v ,cache) t)
725
             (setf ,sequence-var (spocq.e::concat ,sequence-var v ,separator)))
726
          `(setf ,sequence-var (spocq.e::concat ,sequence-var v ,separator))))
727
     ,form))
728
 
729
 (defmacro spocq.e::concat-result (sequence-var)
730
   sequence-var)
731
 
732
 (defmacro spocq.e::correlation-step ((sequence-var) form1 form2)
733
   `((lambda (v1 v2)
734
       (vector-push-extend (list v1 v2) ,sequence-var))
735
     ,form1 ,form2))
736
 
737
 (defun spocq.e::correlation-coefficient (sequence)
738
   (if (plusp (length sequence))
739
       (statistics:correlation-coefficient sequence)
740
       1))
741
 
742
 (defmacro spocq.e::correlation-result (sequence-var)
743
   `(spocq.e::correlation-coefficient ,sequence-var))
744
 
745
 (defmacro spocq.e::max-step ((max-var) form)
746
   `((lambda (v) (setf ,max-var (if ,max-var (let ((*enable-sort-precedence* t)) (spocq.e::max ,max-var v)) v)))
747
     ,form))
748
 
749
 (defmacro spocq.e::max-result (max-var)
750
   `(cond (,max-var)
751
          (t
752
           (funcall *empty-aggregation-set-operator* "MAX set is empty.")
753
           (load-time-value (spocq:make-unbound-variable ',max-var)))))
754
 
755
 (defmacro spocq.e::min-step ((min-var) form)
756
   `((lambda (v) (setf ,min-var (if ,min-var (let ((*enable-sort-precedence* t)) (spocq.e::min ,min-var v)) v)))
757
     ,form))
758
 
759
 (defmacro spocq.e::min-result (min-var)
760
   `(cond (,min-var)
761
          (t
762
           (funcall *empty-aggregation-set-operator* "MIN set is empty.")
763
           (load-time-value (spocq:make-unbound-variable ',min-var)))))
764
 
765
 (defmacro spocq.e::sample-step ((sample-var) form)
766
   "Retain the value as the group sample if either it is the first one
767
  or whatever has been cached was an unbound indicator"
768
   `((lambda (v) (typecase ,sample-var
769
                   ((or null spocq:unbound-variable)
770
                    (setf ,sample-var v))))
771
     ,form))
772
 
773
 (defmacro spocq.e::sample-result (sample-var)
774
   `(cond (,sample-var)
775
          (t
776
           (funcall *empty-aggregation-set-operator* "SAMPLE set is empty.")
777
           (load-time-value (spocq:make-unbound-variable ',sample-var)))))
778
 
779
 
780
 (defmacro spocq.e::std-step ((sequence-var) form)
781
   `((lambda (v)
782
       (vector-push-extend v ,sequence-var))
783
     ,form))
784
 
785
 (defun spocq.e::standard-deviation (sequence)
786
   (if (plusp (length sequence))
787
       (let* ((total (reduce #'+ sequence :initial-value 0))
788
              (avg (/ total (length sequence)))
789
              (dev-sq (loop for value across sequence
790
                        for dev = (- value avg)
791
                        sum (* dev dev))))
792
         (sqrt (/ dev-sq (length sequence))))
793
       0))
794
 
795
 (defmacro spocq.e::std-result (sequence-var)
796
   `(spocq.e::standard-deviation ,sequence-var))
797
 
798
 (defmacro spocq.e::sum-step ((sum-var &key cache) form)
799
   `((lambda (v)
800
       ,(if cache
801
          `(unless (gethash v ,cache)
802
             (setf (gethash v ,cache) t)
803
             (if ,sum-var
804
                 (spocq.e::incf ,sum-var v)
805
                 (setf ,sum-var v)))
806
          `(if ,sum-var
807
                 (spocq.e::incf ,sum-var v)
808
                 (setf ,sum-var v))))
809
     ,form))
810
 
811
 (defmacro spocq.e::sum-result (sum-var)
812
   sum-var)
813
 
814
 
815
 (defun make-aggregate-sequence ()
816
   (make-array 128 :element-type 'character :adjustable t :fill-pointer 0))
817
 (defun make-std-sequence ()
818
   (make-array 32 :adjustable t :fill-pointer 0))
819
 (defun make-correlation-sequence ()
820
   (make-array 32 :adjustable t :fill-pointer 0))
821
 
822
 
823
 ;;; if there is a group clause, construct a set of sets and iterate over it.
824
 ;;; otherwise operated just on the single field
825
 ;;; apply the havin as filters to whichever result
826
 ;;; what is the scope? of the variable in the select clause itself - i suspect, just outside.
827
 
828
 
829
 (defgeneric spocq.e::concat (base value separator)
830
   (:method (base (value string) separator)
831
     (flet ((extend (base value)
832
              (unless (>= (array-dimension base 0) (+ (length base) (length value)))
833
                (setf base (adjust-array base (+ (array-dimension base 0) (ash (array-dimension base 0) -2) (length value)))))
834
              (setf (fill-pointer base) (+ (length base) (length value)))
835
              (replace base value :start1 (- (length base) (length value)))
836
              ;; do not (constrain-string-length base)
837
              ;; leave that to what a string is interned in the store
838
              base))
839
       (when (plusp (length base))
840
         (setf base (extend base separator)))
841
       (extend base value)))
842
   (:method (base (value t) separator)
843
     (spocq.e::concat base (spocq.e:string value) separator)))
844
 ;;; (spocq.e::concat (spocq.e::concat (make-aggregate-sequence) "a" :separator "..") "qwer" ",")
845
 
846
 
847
 #|
848
 (spocq.i::test-sparql "
849
   select (corr(strlen(str(?o1)), strlen(str(?o2))) as ?corr)
850
   where {?s ?pred ?o bind(?o as ?o1) bind(?o as ?o2)}"
851
    :repository-id "jhacker/foaf")
852
 (spocq.i::test-sparql "select (count(*) as ?corr) where {?s ?pred ?s1 . ?s ?pred ?s2}"
853
    :repository-id "jhacker/foaf")
854
 |#