diff options
| author | EuAndreh <eu@euandre.org> | 2023-07-19 13:29:44 -0300 |
|---|---|---|
| committer | EuAndreh <eu@euandre.org> | 2023-07-19 19:19:28 -0300 |
| commit | faac76d7199d12fffda9d5bf141defbe962b81aa (patch) | |
| tree | 33c2f5c8afd7c07c4ff81589ac4c7b045c6d8d98 /src/org | |
| parent | Initial empty commit (diff) | |
| download | resyn-faac76d7199d12fffda9d5bf141defbe962b81aa.tar.gz resyn-faac76d7199d12fffda9d5bf141defbe962b81aa.tar.xz | |
Initial reader macro syntax
With syntax support for a variety of regex types (BRE, ERE and PCRE) and
for "ignore case" and "global" options, but with only PCRE implemented
underneath via cl-ppcre [0].
[0]: https://edicl.github.io/cl-ppcre/
Diffstat (limited to 'src/org')
| -rw-r--r-- | src/org/euandre/resyn.lisp | 198 |
1 files changed, 198 insertions, 0 deletions
diff --git a/src/org/euandre/resyn.lisp b/src/org/euandre/resyn.lisp new file mode 100644 index 0000000..9be2d35 --- /dev/null +++ b/src/org/euandre/resyn.lisp @@ -0,0 +1,198 @@ +(defpackage :org.euandre.resyn + (:use :cl) + (:shadow :match + :substitute) + (:export :match + :substitute + :reader)) +(in-package :org.euandre.resyn) + +#| +TODO: +- capture group +- (#~m/a(b+)c/i "abc ABbC") => [["abc" "ABbC"] ["b" "Bb"]], or ["abc" "ABbC"] + [["b"] ["Bb"]] +- robust ignore-case? (instead of #'STRING-DOWNCASE) +- global? +- BRE, ERE e PCRE +- glob? +- lambda expression vs funcallabla instance: simplicity vs read/print support +- turn the "if" inside #'read-regex into a compiler macro +- better error message than ecase? +- gensyms +- ack let over lambda +- split: (split #~/(,| and) ?/ "a, b, c and d") => ["a" "b" "c" "d"] + + + +regcomp: BRE, ERE +cl-ppcre: PCRE + + + + +(#~m/a(b+)(c)/ "123 456") +[] + + +(#~m/a(b+)(c)/ "abbc") +[["abbc", "bb", "c"]] + +(#~m/a(b+)(c)/ "abbc abc") +[["abbc", "bb", "c"] + ["abc", "b", "c"]] + + + + +(#~m/a(b+)(c)/ "abbc abc") +"abbc" +["bb" "c"] + +(#~m/a(b+)(c)/ "ac ac") +nil +nil + +(#~m/a(b+)(c)/ "abbc abc") +["abbc" "abc"] +[["bb" "c"] ["b" "c"]] + +(let [matches (#~m/a(b+)(c)/ "abbc abc")] + (when (seq matches)) + ...) + +(let [(matches captures) (#~m/a(b+)(c)/ "abbc abc")] + (map (fn [[b c]] + ...) + captures)) + +(loop + for (match [word spaces]) = (#~/([a-z]+)( +)/ "a b c d e f") + when (...)) +|# + + +(defun segment-reader (stream char n) + "FIXME: docstring" + (if (<= n 0) + (list '() '()) + (let ((chars '()) + (fmts '()) + (escaping? nil)) + (do ((curr (read-char stream) (read-char stream))) + ((and (char= char curr) (not escaping?))) + (case curr + (#\% + (if escaping? + (push #\% chars) + (progn + (push #\~ chars) + (push #\a chars) + (read-char stream) ;; discard #\( + (push `(progn ,@(read-delimited-list #\) stream)) + fmts))) + (setf escaping? nil)) + (#\~ + (when escaping? + (push #\\ chars)) + (push #\~ chars) + (push #\~ chars) + (setf escaping? nil)) + (#\\ + (when escaping? + (push #\\ chars) + (push #\\ chars)) + (setf escaping? (not escaping?))) + (t + (when escaping? + (push #\\ chars)) + (push curr chars) + (setf escaping? nil)))) + (destructuring-bind (segments formats) + (segment-reader stream char (1- n)) + (list + (cons (coerce (nreverse chars) 'string) + segments) + (cons (nreverse fmts) formats)))))) + +(defun read-regex (stream char n) + "FIXME: docstring" + (destructuring-bind (segments formats) + (segment-reader stream + char + n) + (mapcar (lambda (seg fmt) + (if fmt + `(format nil ,seg ,@fmt) + (format nil seg))) + segments + formats))) + +(defun match (regex str type &key ignore-case? global?) + "FIXME: docstring" + (declare (ignore global?)) ;; FIXME: implement global matching + (ecase type + ((:ERE :PCRE) ;; FIXME: implement separate rules + (cl-ppcre:scan-to-strings + (if ignore-case? (string-downcase regex) regex) ; FIXME: more robust + (if ignore-case? (string-downcase str) str))))) ; handling of case + +(defun substitute (regex str replacement type &key ignore-case? global?) + "FIXME: docstring" + (declare (ignore global?)) ;; FIXME: implement non-global replacing + (ecase type + ((:BRE :ERE :PCRE) ;; FIXME: implement separate rules + (cl-ppcre:regex-replace-all + (if ignore-case? (string-downcase regex) regex) ; FIXME: better + (if ignore-case? (string-downcase str) str) ; case handling + (if ignore-case? (string-downcase replacement) replacement))))) + +(defun reader (stream sub-char numarg) + "FIXME: docstring" + (declare (ignore sub-char numarg)) + (let* ((ignore-case? nil) + (global? nil) + (type nil) + (seen-modifiers '()) + (mode-char (read-char stream)) + (parts (read-regex stream + (ecase mode-char + ((#\m #\s) (read-char stream)) + (#\/ #\/)) + (ecase mode-char + ((#\m #\/) 1) + (#\s 2)))) + (regex (first parts)) + (replacement (second parts))) + (loop while t do + (let ((modifier (peek-char nil stream))) + (when (member modifier seen-modifiers :test #'char=) + (return)) + (case modifier + (#\i (setf ignore-case? t)) + (#\g (setf global? t)) + (#\B (if type (return) (setf type :BRE))) + (#\E (if type (return) (setf type :ERE))) + (#\P (if type (return) (setf type :PCRE))) + (t (return))) + (push modifier seen-modifiers) + (read-char stream))) + (when (not type) + (setf type :ERE)) + (ecase mode-char + ((#\m #\/) + `(lambda (str) + (match + ,regex + str + ,type + :ignore-case? ,ignore-case? + :global? ,global?))) + (#\s + `(lambda (str) + (substitute + ,regex + str + ,replacement + ,type + :ignore-case? ,ignore-case? + :global? ,global?)))))) |
