(defpackage :org.euandre.resyn (:use :cl) (:shadow :match :substitute) (:export :match :substitute :reader)) (in-package :org.euandre.resyn) #| TODO: - capture group - (#~m/a(b+)c/i "abc ABbC") => [["abc" "ABbC"] ["b" "Bb"]], or ["abc" "ABbC"] + [["b"] ["Bb"]] - robust ignore-case? (instead of #'STRING-DOWNCASE) - global? - BRE, ERE e PCRE - glob? - lambda expression vs funcallabla instance: simplicity vs read/print support - turn the "if" inside #'read-regex into a compiler macro - better error message than ecase? - gensyms - ack let over lambda - split: (split #~/(,| and) ?/ "a, b, c and d") => ["a" "b" "c" "d"] regcomp: BRE, ERE cl-ppcre: PCRE (#~m/a(b+)(c)/ "123 456") [] (#~m/a(b+)(c)/ "abbc") [["abbc", "bb", "c"]] (#~m/a(b+)(c)/ "abbc abc") [["abbc", "bb", "c"] ["abc", "b", "c"]] (#~m/a(b+)(c)/ "abbc abc") "abbc" ["bb" "c"] (#~m/a(b+)(c)/ "ac ac") nil nil (#~m/a(b+)(c)/ "abbc abc") ["abbc" "abc"] [["bb" "c"] ["b" "c"]] (let [matches (#~m/a(b+)(c)/ "abbc abc")] (when (seq matches)) ...) (let [(matches captures) (#~m/a(b+)(c)/ "abbc abc")] (map (fn [[b c]] ...) captures)) (loop for (match [word spaces]) = (#~/([a-z]+)( +)/ "a b c d e f") when (...)) |# (defun segment-reader (stream char n) "FIXME: docstring" (if (<= n 0) (list '() '()) (let ((chars '()) (fmts '()) (escaping? nil)) (do ((curr (read-char stream) (read-char stream))) ((and (char= char curr) (not escaping?))) (case curr (#\% (if escaping? (push #\% chars) (progn (push #\~ chars) (push #\a chars) (read-char stream) ;; discard #\( (push `(progn ,@(read-delimited-list #\) stream)) fmts))) (setf escaping? nil)) (#\~ (when escaping? (push #\\ chars)) (push #\~ chars) (push #\~ chars) (setf escaping? nil)) (#\\ (when escaping? (push #\\ chars) (push #\\ chars)) (setf escaping? (not escaping?))) (t (when escaping? (push #\\ chars)) (push curr chars) (setf escaping? nil)))) (destructuring-bind (segments formats) (segment-reader stream char (1- n)) (list (cons (coerce (nreverse chars) 'string) segments) (cons (nreverse fmts) formats)))))) (defun read-regex (stream char n) "FIXME: docstring" (destructuring-bind (segments formats) (segment-reader stream char n) (mapcar (lambda (seg fmt) (if fmt `(format nil ,seg ,@fmt) (format nil seg))) segments formats))) (defun match (regex str type &key ignore-case? global?) "FIXME: docstring" (declare (ignore global?)) ;; FIXME: implement global matching (ecase type ((:ERE :PCRE) ;; FIXME: implement separate rules (cl-ppcre:scan-to-strings (if ignore-case? (string-downcase regex) regex) ; FIXME: more robust (if ignore-case? (string-downcase str) str))))) ; handling of case (defun substitute (regex str replacement type &key ignore-case? global?) "FIXME: docstring" (declare (ignore global?)) ;; FIXME: implement non-global replacing (ecase type ((:BRE :ERE :PCRE) ;; FIXME: implement separate rules (cl-ppcre:regex-replace-all (if ignore-case? (string-downcase regex) regex) ; FIXME: better (if ignore-case? (string-downcase str) str) ; case handling (if ignore-case? (string-downcase replacement) replacement))))) (defun reader (stream sub-char numarg) "FIXME: docstring" (declare (ignore sub-char numarg)) (let* ((ignore-case? nil) (global? nil) (type nil) (seen-modifiers '()) (mode-char (read-char stream)) (parts (read-regex stream (ecase mode-char ((#\m #\s) (read-char stream)) (#\/ #\/)) (ecase mode-char ((#\m #\/) 1) (#\s 2)))) (regex (first parts)) (replacement (second parts))) (loop while t do (let ((modifier (peek-char nil stream))) (when (member modifier seen-modifiers :test #'char=) (return)) (case modifier (#\i (setf ignore-case? t)) (#\g (setf global? t)) (#\B (if type (return) (setf type :BRE))) (#\E (if type (return) (setf type :ERE))) (#\P (if type (return) (setf type :PCRE))) (t (return))) (push modifier seen-modifiers) (read-char stream))) (when (not type) (setf type :ERE)) (ecase mode-char ((#\m #\/) `(lambda (str) (match ,regex str ,type :ignore-case? ,ignore-case? :global? ,global?))) (#\s `(lambda (str) (substitute ,regex str ,replacement ,type :ignore-case? ,ignore-case? :global? ,global?))))))