aboutsummaryrefslogtreecommitdiff
path: root/src/org
diff options
context:
space:
mode:
authorEuAndreh <eu@euandre.org>2023-07-19 13:29:44 -0300
committerEuAndreh <eu@euandre.org>2023-07-19 19:19:28 -0300
commitfaac76d7199d12fffda9d5bf141defbe962b81aa (patch)
tree33c2f5c8afd7c07c4ff81589ac4c7b045c6d8d98 /src/org
parentInitial empty commit (diff)
downloadresyn-faac76d7199d12fffda9d5bf141defbe962b81aa.tar.gz
resyn-faac76d7199d12fffda9d5bf141defbe962b81aa.tar.xz
Initial reader macro syntax
With syntax support for a variety of regex types (BRE, ERE and PCRE) and for "ignore case" and "global" options, but with only PCRE implemented underneath via cl-ppcre [0]. [0]: https://edicl.github.io/cl-ppcre/
Diffstat (limited to 'src/org')
-rw-r--r--src/org/euandre/resyn.lisp198
1 files changed, 198 insertions, 0 deletions
diff --git a/src/org/euandre/resyn.lisp b/src/org/euandre/resyn.lisp
new file mode 100644
index 0000000..9be2d35
--- /dev/null
+++ b/src/org/euandre/resyn.lisp
@@ -0,0 +1,198 @@
+(defpackage :org.euandre.resyn
+ (:use :cl)
+ (:shadow :match
+ :substitute)
+ (:export :match
+ :substitute
+ :reader))
+(in-package :org.euandre.resyn)
+
+#|
+TODO:
+- capture group
+- (#~m/a(b+)c/i "abc ABbC") => [["abc" "ABbC"] ["b" "Bb"]], or ["abc" "ABbC"] + [["b"] ["Bb"]]
+- robust ignore-case? (instead of #'STRING-DOWNCASE)
+- global?
+- BRE, ERE e PCRE
+- glob?
+- lambda expression vs funcallabla instance: simplicity vs read/print support
+- turn the "if" inside #'read-regex into a compiler macro
+- better error message than ecase?
+- gensyms
+- ack let over lambda
+- split: (split #~/(,| and) ?/ "a, b, c and d") => ["a" "b" "c" "d"]
+
+
+
+regcomp: BRE, ERE
+cl-ppcre: PCRE
+
+
+
+
+(#~m/a(b+)(c)/ "123 456")
+[]
+
+
+(#~m/a(b+)(c)/ "abbc")
+[["abbc", "bb", "c"]]
+
+(#~m/a(b+)(c)/ "abbc abc")
+[["abbc", "bb", "c"]
+ ["abc", "b", "c"]]
+
+
+
+
+(#~m/a(b+)(c)/ "abbc abc")
+"abbc"
+["bb" "c"]
+
+(#~m/a(b+)(c)/ "ac ac")
+nil
+nil
+
+(#~m/a(b+)(c)/ "abbc abc")
+["abbc" "abc"]
+[["bb" "c"] ["b" "c"]]
+
+(let [matches (#~m/a(b+)(c)/ "abbc abc")]
+ (when (seq matches))
+ ...)
+
+(let [(matches captures) (#~m/a(b+)(c)/ "abbc abc")]
+ (map (fn [[b c]]
+ ...)
+ captures))
+
+(loop
+ for (match [word spaces]) = (#~/([a-z]+)( +)/ "a b c d e f")
+ when (...))
+|#
+
+
+(defun segment-reader (stream char n)
+ "FIXME: docstring"
+ (if (<= n 0)
+ (list '() '())
+ (let ((chars '())
+ (fmts '())
+ (escaping? nil))
+ (do ((curr (read-char stream) (read-char stream)))
+ ((and (char= char curr) (not escaping?)))
+ (case curr
+ (#\%
+ (if escaping?
+ (push #\% chars)
+ (progn
+ (push #\~ chars)
+ (push #\a chars)
+ (read-char stream) ;; discard #\(
+ (push `(progn ,@(read-delimited-list #\) stream))
+ fmts)))
+ (setf escaping? nil))
+ (#\~
+ (when escaping?
+ (push #\\ chars))
+ (push #\~ chars)
+ (push #\~ chars)
+ (setf escaping? nil))
+ (#\\
+ (when escaping?
+ (push #\\ chars)
+ (push #\\ chars))
+ (setf escaping? (not escaping?)))
+ (t
+ (when escaping?
+ (push #\\ chars))
+ (push curr chars)
+ (setf escaping? nil))))
+ (destructuring-bind (segments formats)
+ (segment-reader stream char (1- n))
+ (list
+ (cons (coerce (nreverse chars) 'string)
+ segments)
+ (cons (nreverse fmts) formats))))))
+
+(defun read-regex (stream char n)
+ "FIXME: docstring"
+ (destructuring-bind (segments formats)
+ (segment-reader stream
+ char
+ n)
+ (mapcar (lambda (seg fmt)
+ (if fmt
+ `(format nil ,seg ,@fmt)
+ (format nil seg)))
+ segments
+ formats)))
+
+(defun match (regex str type &key ignore-case? global?)
+ "FIXME: docstring"
+ (declare (ignore global?)) ;; FIXME: implement global matching
+ (ecase type
+ ((:ERE :PCRE) ;; FIXME: implement separate rules
+ (cl-ppcre:scan-to-strings
+ (if ignore-case? (string-downcase regex) regex) ; FIXME: more robust
+ (if ignore-case? (string-downcase str) str))))) ; handling of case
+
+(defun substitute (regex str replacement type &key ignore-case? global?)
+ "FIXME: docstring"
+ (declare (ignore global?)) ;; FIXME: implement non-global replacing
+ (ecase type
+ ((:BRE :ERE :PCRE) ;; FIXME: implement separate rules
+ (cl-ppcre:regex-replace-all
+ (if ignore-case? (string-downcase regex) regex) ; FIXME: better
+ (if ignore-case? (string-downcase str) str) ; case handling
+ (if ignore-case? (string-downcase replacement) replacement)))))
+
+(defun reader (stream sub-char numarg)
+ "FIXME: docstring"
+ (declare (ignore sub-char numarg))
+ (let* ((ignore-case? nil)
+ (global? nil)
+ (type nil)
+ (seen-modifiers '())
+ (mode-char (read-char stream))
+ (parts (read-regex stream
+ (ecase mode-char
+ ((#\m #\s) (read-char stream))
+ (#\/ #\/))
+ (ecase mode-char
+ ((#\m #\/) 1)
+ (#\s 2))))
+ (regex (first parts))
+ (replacement (second parts)))
+ (loop while t do
+ (let ((modifier (peek-char nil stream)))
+ (when (member modifier seen-modifiers :test #'char=)
+ (return))
+ (case modifier
+ (#\i (setf ignore-case? t))
+ (#\g (setf global? t))
+ (#\B (if type (return) (setf type :BRE)))
+ (#\E (if type (return) (setf type :ERE)))
+ (#\P (if type (return) (setf type :PCRE)))
+ (t (return)))
+ (push modifier seen-modifiers)
+ (read-char stream)))
+ (when (not type)
+ (setf type :ERE))
+ (ecase mode-char
+ ((#\m #\/)
+ `(lambda (str)
+ (match
+ ,regex
+ str
+ ,type
+ :ignore-case? ,ignore-case?
+ :global? ,global?)))
+ (#\s
+ `(lambda (str)
+ (substitute
+ ,regex
+ str
+ ,replacement
+ ,type
+ :ignore-case? ,ignore-case?
+ :global? ,global?))))))