Coverage report: /development/source/library/org/datagraph/spocq-shard/src/core/encoding/csv.lisp
| Kind | Covered | All | % |
| expression | 0 | 301 | 0.0 |
| branch | 0 | 36 | 0.0 |
Key
Not instrumented
Conditionalized out
Executed
Not executed
Both branches taken
One branch taken
Neither branch taken
1
;;; -*- Mode: lisp; Syntax: ansi-common-lisp; Base: 10; Package: org.datagraph.spocq.implementation; -*-
3
(in-package :org.datagraph.spocq.implementation)
8
;;; https://tools.ietf.org/html/rfc4180
10
(defparameter *csv-bnf* "
11
[[1]] file ::= (header EOL)? filerecords?
12
[[1a]] filerecords ::= record filerecords?
13
[[2]] header :== name (COMMA header)?
14
[[3]] record :== recordfields EOL
15
[[3a]] recordfields ::= field (COMMA recordfields)?
17
[[5]] field ::= escaped | nonescaped
18
[[6]] escaped ::= WS* DQUOTE1 escapedtextdata* DQUOTE2 WS*
19
//[[6a]] escapedtextdata ::= TEXTDATA | COMMA | CR | LF | DQUOTEDQUOTE
20
[[6a]] escapedtextdata ::= TEXTDATA | COMMA | WS | DQUOTEDQUOTE
21
[[6b]] DQUOTEDQUOTE ::= DQUOTE1 DQUOTE2
22
[[6c]] DQUOTE1 ::= DQUOTE
23
[[6d]] DQUOTE2 ::= DQUOTE
24
[[7]] nonescaped ::= TEXTDATA*
29
[[12]] EOL ::= (CR LF) | CR | LF
30
//[[13]] TEXTDATA = %x20-21 | %x23-2B | %x2D-7E
32
;;; nb. the rfc4180 bnf with the optional terminating EOL is ambiguous beyond repair.
33
;;; it allows a single zero lenght field on the last line without an EOL.
36
(defpackage :org.datagraph.csv
38
:org.datagraph.spocq.implementation
41
(:shadow :input-reference :input-eof?)
45
"filerecords-Constructor"
48
"recordfields-Constructor"
52
"nonescaped-Constructor"
53
"TEXTDATA-Constructor"
56
(:import-from :spocq.i
59
(defparameter odcsv:*comma* #\,)
61
(defun odcsv::|file-Constructor| (EOL filerecords header)
62
(declare (ignore EOL))
64
(cons header filerecords)
67
(defun odcsv::|filerecords-Constructor| (filerecords record)
68
(cons record filerecords))
70
(defun odcsv::|header-Constructor| (COMMA header name)
71
(declare (ignore COMMA))
74
(defun odcsv::|record-Constructor| (EOL recordfields)
75
(declare (ignore EOL))
78
(defun odcsv::|recordfields-Constructor| (COMMA field record)
79
(declare (ignore COMMA))
82
(defun odcsv::|name-Constructor| (field)
85
(defun odcsv::|field-Constructor| (escaped nonescaped)
86
(or escaped nonescaped))
88
(defun odcsv::|escaped-Constructor| (DQUOTE1 DQUOTE2 escapedtextdata* &optional ws)
89
(declare (ignore DQUOTE1 DQUOTE2 ws))
90
(map 'string #'identity (reverse escapedtextdata*)))
92
(defun odcsv::|escapedtextdata-Constructor| (COMMA DQUOTEDQUOTE TEXTDATA WS)
93
(or COMMA DQUOTEDQUOTE TEXTDATA WS))
95
(defun odcsv::|DQUOTEDQUOTE-Constructor| (DQUOTE1 DQUOTE2)
96
(declare (ignore DQUOTE1 DQUOTE2))
99
(defun odcsv::|EOL-Constructor| (CR LF)
100
(declare (ignore CR LF))
103
(defun odcsv::|EOF-Constructor| (EOL)
104
(declare (ignore EOL))
107
(defun odcsv::|nonescaped-Constructor| (textdata*)
108
(trim-string-whitespace (map 'string #'identity (reverse textdata*))))
110
(macrolet ((def-char-constructors (&rest names)
111
`(progn ,@(loop for name in names collect `(defun ,name (c) c)))))
112
(def-char-constructors
113
ORG.DATAGRAPH.CSV::|COMMA-Constructor|
114
ORG.DATAGRAPH.CSV::|CR-Constructor|
115
ORG.DATAGRAPH.CSV::|DQUOTE1-Constructor|
116
ORG.DATAGRAPH.CSV::|DQUOTE2-Constructor|
117
ORG.DATAGRAPH.CSV::|LF-Constructor|
118
ORG.DATAGRAPH.CSV:|TEXTDATA-Constructor|
119
ORG.DATAGRAPH.CSV:|WS-Constructor|))
121
(defun odcsv::is-dquote (x)
124
(defun odcsv::is-cr (x)
127
(defun odcsv::is-lf (x)
130
(defun odcsv::is-comma (x)
131
(eql x odcsv:*comma*))
133
(defun odcsv::is-textdata (x)
135
(not (eql x odcsv:*comma*))
136
(let ((code (char-code x)))
137
(or (<= #x20 code #x21) ;; exclude control (cr, lf)
138
(<= #x23 code #x2B) ;; exclude " and ,
139
(<= #x2D code #x7E) ;; ... exclude delete
140
(<= #x80 code) ;; allow utf-8
141
(and (= code #x2c) (not (eql #\, odcsv:*comma*)))
144
(defun odcsv::is-ws (x)
145
(find x #(#\space #\tab #\return #\linefeed)))
148
(defparameter *parse-csv.initial-line* nil)
150
(defgeneric parse-csv (string &key start end start-name separator)
151
(:method (string &key (start 0 s-s) (end (length string) e-s) (start-name 'odcsv::|file|)
152
((:separator odcsv:*comma*) odcsv:*comma*))
153
(let ((*max-input-index* 0)
154
(atnp:*atn-term* nil)
155
(*parse-csv.initial-line* nil))
157
(setf string (subseq string start end)))
158
(multiple-value-bind (result index success)
159
(funcall 'odcsv::|file-Parser| string :start-name start-name)
161
(values result string index)
162
(flet ((_aref (array index)
163
(when (and (integerp index) (< index (length array))) (aref array index))))
164
(spocq.e::message-syntax-error :expression string
165
:token (_aref string index)
167
:line-offset nil))))))
168
(:method ((pathname pathname) &rest args)
169
(with-open-file (stream pathname :direction :input)
170
(apply #'parse-csv stream args)))
172
(:method ((stream stream) &key (start 0) end (separator odcsv:*comma*) start-name)
173
(declare (ignore start-name))
174
(labels ((parse-csv-header (stream)
175
(let ((line (read-csv-line stream :separator separator)))
177
(spocq.i::parse-csv line :start-name 'odcsv::|header| :separator separator))))
178
(parse-csv-line (stream)
179
(let ((line (read-csv-line stream)))
181
(spocq.i::parse-csv line :start-name 'odcsv::|recordfields| :separator separator)))))
182
(let* ((header (parse-csv-header stream)))
183
(values (loop for count from 0
184
for line = (parse-csv-line stream)
185
until (or (null line) (and end (>= count end)))
186
when (>= count start)
190
(defun parse-csv-header (line &key (separator odcsv:*comma*))
191
"given a prospective separator, verify/replace it and use that value
192
to split the string. suppress double quotes at the same time"
193
(setf separator (find-csv-separator line separator))
194
(values (split-string line (vector separator #\"))
197
(defun find-csv-separator (line separator &key (separators '(#\, #\; #\tab #\|)))
198
(cond ((position separator line)
200
((loop for separator in separators
201
when (find separator line)
205
(defun read-csv-line (stream &key (separator odcsv:*comma*))
206
"read and return a single - possibly continued, 'csv' line
207
if this is the first line and the separator is not found, try to pick a new one"
209
(loop (let ((line (read-line stream nil nil)))
210
(unless *parse-csv.initial-line*
211
(setf *parse-csv.initial-line* line)
212
(setf separator (find-csv-separator line separator))
213
(setq odcsv:*comma* separator))
214
(unless line (return))
215
(when lines (push (load-time-value (make-string 1 :initial-element #\newline)) lines))
217
(let ((comma (position separator line))
218
(dquote (position #\" line)))
219
(when (and (null (rest lines)) (evenp (count dquote line))) (return))
220
(when (and dquote (if comma (> comma dquote) t)) (return)))))
222
(string-trim #(#\return #\newline #\space)
223
(if (rest lines) (apply #'concatenate 'string (reverse lines)) (first lines))))))
225
(defun csv-variable-name (name)
226
(loop with new-name = (trim-string-whitespace name)
227
for to-replace in '(#\space #\/ #\. #\-)
228
do (setq new-name (substitute #\_ to-replace new-name))
229
finally (return new-name)))
232
((spocq.i::parse-csv #p"/opt/dydra/bin/datagraph_nexperia/nexperia/SPIDER-Datahub-views.csv")
233
(spocq.i::parse-csv #p"/root/imports/Contacts.csv")
238
(defun odcsv::input-reference (index)
239
(when (< index (length ATN-PARSER::*ATN-INPUT))
240
(aref ATN-PARSER::*ATN-INPUT index)))
241
(defun odcsv::input-eof? (index)
242
(>= index (length ATN-PARSER::*ATN-INPUT)))
247
(bnfp:compile-atn-system spocq.i::*csv-bnf*
248
:execute t :compile nil
249
:token-package (find-package :spocq.s)
250
:source-package (find-package :odcsv)
251
:source-pathname #p"/development/source/library/org/datagraph/spocq-dev/src/core/encoding/csv-grammar.lisp"
252
:input-function 'odcsv::input-reference
253
:input-eof-function 'odcsv::input-eof?
256
(load (compile-file #p"/development/source/library/org/datagraph/spocq-dev/src/core/encoding/csv-grammar.lisp"
257
:output-file "csv-grammar.fasl"))
260
(bnfp:compile-atn-system spocq.i::*csv-bnf*
261
:execute t :compile nil
262
:token-package (find-package :spocq.s)
263
:source-package (find-package :odcsv)
264
:source-pathname #p"/tmp/csv-grammar.lisp"
265
:input-function 'odcsv::input-reference
266
:input-eof-function 'odcsv::input-eof?
269
(load (compile-file #p"/tmp/csv-grammar.lisp"
270
:output-file "csv-grammar.fasl"))
272
(spocq.i::parse-csv "asdf,qwer
277
(spocq.i::parse-csv "asdf,qwer
283
(spocq.i::parse-csv "1,\"2,
285
" :start-name 'odcsv::|record|)
286
(spocq.i::parse-csv "1,\"2,3\"
287
" :start-name 'odcsv::|record|)
288
(spocq.i::parse-csv "\"2,3\"\"\",4
289
" :start-name 'odcsv::|file|)
290
(spocq.i::parse-csv "\"2,3\"\"\",4
291
" :start-name 'odcsv::|record|)
292
(spocq.i::parse-csv "\"2,3\"" :start-name 'odcsv::|field|)
295
"47212.5,ASHLAND,KS,67831,112111,Partnership,Unanswered,Unanswered,Unanswered,,4,04/15/2020,\" Farm Credit of Western Oklahoma, ACA\",KS - 01"
296
:start-name 'odcsv::|recordfields|)
298
"47212.5,ASHLAND,KS,67831,112111,Partnership,Unanswered,Unanswered,Unanswered,,4,04/15/2020,\"Farm Credit of Western Oklahoma ACA\",KS - 01"
299
:start-name 'odcsv::|recordfields|)
302
"2020,\" Farm, 1 2 3\",KS - 01"
303
:start-name 'odcsv::|recordfields|)