From ea40c5dbc2b47d6fd2a23236828dc9e4ab1f77dc Mon Sep 17 00:00:00 2001 From: Ivar Refsdal Date: Sat, 4 Sep 2021 13:23:07 +0200 Subject: Initial commit Add release script Release 0.1.3 Use com.github.ivarref.yoltq namespace Use com.github.ivarref.yoltq namespace --- test/com/github/ivarref/yoltq/log_init.clj | 61 ++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 test/com/github/ivarref/yoltq/log_init.clj (limited to 'test/com/github/ivarref/yoltq/log_init.clj') diff --git a/test/com/github/ivarref/yoltq/log_init.clj b/test/com/github/ivarref/yoltq/log_init.clj new file mode 100644 index 0000000..cf69e55 --- /dev/null +++ b/test/com/github/ivarref/yoltq/log_init.clj @@ -0,0 +1,61 @@ +(ns com.github.ivarref.yoltq.log-init + (:require [clojure.term.colors :as colors] + [taoensso.timbre :as timbre] + [clojure.string :as str])) + +(def level-colors + {;:warn colors/red + :error colors/red}) + +(def ^:dynamic *override-color* nil) + +(defn min-length [n s] + (loop [s s] + (if (>= (count s) n) + s + (recur (str s " "))))) + +(defn local-console-format-fn + "A simpler log format, suitable for readable logs during development. colorized stacktraces" + [data] + (try + (let [{:keys [level ?err msg_ ?ns-str ?file hostname_ + timestamp_ ?line context]} data + ts (force timestamp_)] + (let [color-f (if (nil? *override-color*) + (get level-colors level str) + colors/green) + maybe-stacktrace (when ?err + (str "\n" (timbre/stacktrace ?err)))] + (cond-> (str #_(str ?ns-str ":" ?line) + #_(min-length (count "[yoltq:326] ") + (str + "[" + (some-> ?ns-str + (str/split #"\.") + (last)) + ":" ?line)) + ts + " " + (color-f (min-length 5 (str/upper-case (name level)))) + " " + #_(.getName ^Thread (Thread/currentThread)) + + (color-f (force msg_)) + + #_maybe-stacktrace)))) + + + (catch Throwable t + (println "error in local-console-format-fn:" (ex-message t)) + nil))) + + +(defn init-logging! [min-levels] + (timbre/merge-config! + {:min-level min-levels + :timestamp-opts {:pattern "HH:mm:ss.SSS" + :timezone :jvm-default} + :output-fn (fn [data] (local-console-format-fn data)) + :appenders {:println (timbre/println-appender {:stream :std-out})}})) + -- cgit v1.2.3 From d13b0cb0b72a9cef9f8e9bd82616899796a4853f Mon Sep 17 00:00:00 2001 From: Ivar Refsdal Date: Fri, 17 Sep 2021 14:51:01 +0200 Subject: Use [#'taoensso.timbre/*context*] as default :capture-bindings if present --- README.md | 10 +++++++--- deps.edn | 3 ++- src/com/github/ivarref/yoltq.clj | 7 ++++--- test/com/github/ivarref/yoltq/log_init.clj | 3 +++ test/com/github/ivarref/yoltq/virtual_test.clj | 24 ++++++++++++++++++------ 5 files changed, 34 insertions(+), 13 deletions(-) (limited to 'test/com/github/ivarref/yoltq/log_init.clj') diff --git a/README.md b/README.md index 514c4a3..902be2f 100644 --- a/README.md +++ b/README.md @@ -115,7 +115,7 @@ Inspecting `(yq/put :q {:work 123})]` you will see something like this: :queue-name :q, ; Destination queue :status :init, ; Status :payload "{:work 123}", ; Payload persisted to the database with pr-str - :bindings "{}", + :bindings "{}", ; Bindings that will be applied before executing consumer function :lock #uuid"037d7da1-5158-4243-8f72-feb1e47e15ca", ; Lock to protect from multiple consumers :tries 0, ; How many times the job has been executed :init-time 4305758012289 ; Time of initialization (System/nanoTime) @@ -153,7 +153,7 @@ The `payload` will be deserialized from the database using `clojure.edn/read-str you will get back what you put into `yq/put`. The yoltq system treats a queue consumer function invocation as successful if it does not throw an exception. -Thus any regular return value, be it `nil`, `false`, `true`, etc. is considered a success. +Any return value, be it `nil`, `false`, `true`, etc. is considered a success. ### Listening for queue jobs @@ -166,7 +166,7 @@ and process newly created queue jobs fairly quickly. This also means that queue jobs in status `:init` will almost always be processed without any type of backoff*. -This pool also schedules polling jobs that will regularly check for various statuses: +The threadpool also schedules polling jobs that will check for various statuses regularly: * Jobs in status `:error` that have waited for at least `:error-backoff-time` (default: 5 seconds) will be retried. * Jobs that have been in `:processing` for at least `:hung-backoff-time` (default: 30 minutes) will be considered hung and retried. @@ -212,6 +212,10 @@ A queue job will remain in status `:error` once `:max-retries` (default: 100) ha Ideally this will not happen. +### Logging + + + ### Total health and system sanity diff --git a/deps.edn b/deps.edn index cf8297c..a457628 100644 --- a/deps.edn +++ b/deps.edn @@ -3,7 +3,8 @@ :paths ["src"] - :aliases {:test {:extra-paths ["test"] + :aliases {:datomic {:extra-deps {com.datomic/datomic-pro {:mvn/version "1.0.6316" :exclusions [org.slf4j/slf4j-nop]}}} + :test {:extra-paths ["test"] :extra-deps {ivarref/datomic-schema {:mvn/version "0.2.0"} com.taoensso/timbre {:mvn/version "5.1.2"} com.fzakaria/slf4j-timbre {:mvn/version "0.3.21"} diff --git a/src/com/github/ivarref/yoltq.clj b/src/com/github/ivarref/yoltq.clj index 6341e41..58efca1 100644 --- a/src/com/github/ivarref/yoltq.clj +++ b/src/com/github/ivarref/yoltq.clj @@ -1,6 +1,5 @@ (ns com.github.ivarref.yoltq - (:require [datomic-schema.core] - [datomic.api :as d] + (:require [datomic.api :as d] [clojure.tools.logging :as log] [com.github.ivarref.yoltq.impl :as i] [com.github.ivarref.yoltq.report-queue :as rq] @@ -58,7 +57,9 @@ ; contain the stacktrace of the stuck threads. :pool-size 4 - :capture-bindings [] + :capture-bindings (if-let [s (resolve (symbol "taoensso.timbre/*context*"))] + [s] + []) ; How often should the system be polled for failed queue jobs :system-error-poll-delay (Duration/ofMinutes 1) diff --git a/test/com/github/ivarref/yoltq/log_init.clj b/test/com/github/ivarref/yoltq/log_init.clj index cf69e55..1aa6c02 100644 --- a/test/com/github/ivarref/yoltq/log_init.clj +++ b/test/com/github/ivarref/yoltq/log_init.clj @@ -39,6 +39,9 @@ " " (color-f (min-length 5 (str/upper-case (name level)))) " " + + (when-let [x-req-id (:x-request-id context)] + (str "[" x-req-id "] ")) #_(.getName ^Thread (Thread/currentThread)) (color-f (force msg_)) diff --git a/test/com/github/ivarref/yoltq/virtual_test.clj b/test/com/github/ivarref/yoltq/virtual_test.clj index e2ea19b..3c7c5b4 100644 --- a/test/com/github/ivarref/yoltq/virtual_test.clj +++ b/test/com/github/ivarref/yoltq/virtual_test.clj @@ -9,8 +9,7 @@ [clojure.tools.logging :as log] [com.github.ivarref.yoltq.impl :as i] [com.github.ivarref.yoltq :as yq] - [clojure.pprint :as pprint] - [clojure.edn :as edn])) + [taoensso.timbre :as timbre])) (use-fixtures :each vq/call-with-virtual-queue!) @@ -254,11 +253,24 @@ (deftest binding-test (let [conn (u/empty-conn)] (dq/init! {:conn conn - :capture-bindings [#'*some-binding*]}) + :capture-bindings [#'*some-binding* #'timbre/*context*]}) (dq/add-consumer! :q (fn [_] *some-binding*)) - (binding [*some-binding* 1] @(d/transact conn [(dq/put :q nil)])) - (binding [*some-binding* 2] @(d/transact conn [(dq/put :q nil)])) - @(d/transact conn [(dq/put :q nil)]) + (binding [timbre/*context* {:x-request-id "wooho"}] + (binding [*some-binding* 1] + @(d/transact conn [(dq/put :q nil)])) + (binding [*some-binding* 2] + @(d/transact conn [(dq/put :q nil)])) + @(d/transact conn [(dq/put :q nil)])) + (is (= 1 (vq/consume-expect! :q :done))) (is (= 2 (vq/consume-expect! :q :done))) (is (nil? (vq/consume-expect! :q :done))))) + + +(deftest default-binding-test + (let [conn (u/empty-conn)] + (dq/init! {:conn conn}) + (dq/add-consumer! :q (fn [_] (:x-request-id timbre/*context*))) + (binding [timbre/*context* {:x-request-id "123"}] + @(d/transact conn [(dq/put :q nil)])) + (is (= "123" (vq/consume-expect! :q :done))))) -- cgit v1.2.3 From ae49a7ec82ecd3988e0f7825b0adead1dc77c911 Mon Sep 17 00:00:00 2001 From: ire Date: Tue, 13 May 2025 21:39:07 +0200 Subject: Fix tx-report-queue sharing #7 --- README.md | 34 +++++++ deps.edn | 54 +++++------ src/com/github/ivarref/yoltq.clj | 86 ++++++++++++++--- src/com/github/ivarref/yoltq/report_queue.clj | 133 ++++++++++++++++++++++++-- test/com/github/ivarref/yoltq/log_init.clj | 2 + 5 files changed, 258 insertions(+), 51 deletions(-) (limited to 'test/com/github/ivarref/yoltq/log_init.clj') diff --git a/README.md b/README.md index c5f2bdb..f84a336 100644 --- a/README.md +++ b/README.md @@ -434,6 +434,40 @@ If you liked this library, you may also like: ## Change log +#### 2025-05-13 v0.2.?? [diff](https://github.com/ivarref/yoltq/compare/v0.2.64...HEAD) +Added support for specifying `tx-report-queue` as a keyword in `init!`. Yoltq will +then not grab the datomic report queue, but use the one provided: + +```clojure +(require '[com.github.ivarref.yoltq :as yq]) +(yq/init! {:conn conn + :tx-report-queue (yq/get-tx-report-queue-multicast! my-conn :yoltq) + ; ^^ can be any `java.util.concurrent.BlockingQueue` value + }) + +(another-tx-report-consumer! (yq/get-tx-report-queue-multicast! my-conn :another-consumer-id)) + +``` + +Added multicast support for `datomic.api/tx-report-queue`: +```clojure +(def my-q1 (yq/get-tx-report-queue-multicast! my-conn :q-id-1)) +; ^^ consume my-q1 just like you would do `datomic.api/tx-report-queue` + +(def my-q2 (yq/get-tx-report-queue-multicast! my-conn :q-id-2)) +; Both my-q1 and my-q2 will receive everything from `datomic.api/tx-report-queue` +``` + +`yq/get-tx-report-queue-multicast!` returns, like +`datomic.api/tx-report-queue`, +`java.util.concurrent.BlockingQueue` and starts a background thread that does +the multicasting as needed. Identical calls to `yq/get-tx-report-queue-multicast!` +returns the same `BlockingQueue`. + +Changed the default for `max-retries` from `10000` to `9223372036854775807`. + +Fixed reflection warnings. + #### 2023-03-20 v0.2.64 [diff](https://github.com/ivarref/yoltq/compare/v0.2.63...v0.2.64) Added support for `max-retries` being `0`, meaning the job should be retried forever (or at least 9223372036854775807 times). diff --git a/deps.edn b/deps.edn index e36885e..1e3fa9d 100644 --- a/deps.edn +++ b/deps.edn @@ -1,33 +1,31 @@ -{:deps {com.github.ivarref/double-trouble {:mvn/version "0.1.102"} - org.clojure/tools.logging {:mvn/version "1.2.4"} - org.clojure/clojure {:mvn/version "1.11.1"}} +{:deps {com.github.ivarref/double-trouble {:mvn/version "0.1.102"} + org.clojure/tools.logging {:mvn/version "1.2.4"} + org.clojure/clojure {:mvn/version "1.11.1"} + com.datomic/peer {:mvn/version "1.0.7364"}} - :paths ["src"] + :paths ["src"] - :aliases {:datomic {:extra-deps {com.datomic/datomic-pro {:mvn/version "1.0.6316" :exclusions [org.slf4j/slf4j-nop]}}} - :test {:extra-paths ["test"] - :extra-deps {ivarref/datomic-schema {:mvn/version "0.2.0"} - com.taoensso/timbre {:mvn/version "5.2.1"} - com.fzakaria/slf4j-timbre {:mvn/version "0.3.21"} - clojure-term-colors/clojure-term-colors {:mvn/version "0.1.0"} - com.datomic/datomic-pro {:mvn/version "1.0.6316" :exclusions [org.slf4j/slf4j-nop]} - org.postgresql/postgresql {:mvn/version "9.3-1102-jdbc41"} - com.taoensso/nippy {:mvn/version "3.2.0"} - io.github.cognitect-labs/test-runner {:git/tag "v0.5.0" :git/sha "b3fd0d2"}} - :jvm-opts ["-DDISABLE_SPY=true" - "-DTAOENSSO_TIMBRE_MIN_LEVEL_EDN=:error"] - :main-opts ["--report" "stderr" "-m" "cognitect.test-runner"]} + :aliases {:test {:extra-paths ["test"] + :extra-deps {ivarref/datomic-schema {:mvn/version "0.2.0"} + com.taoensso/timbre {:mvn/version "5.2.1"} + com.fzakaria/slf4j-timbre {:mvn/version "0.3.21"} + clojure-term-colors/clojure-term-colors {:mvn/version "0.1.0"} + org.postgresql/postgresql {:mvn/version "9.3-1102-jdbc41"} + com.taoensso/nippy {:mvn/version "3.2.0"} + io.github.cognitect-labs/test-runner {:git/tag "v0.5.0" :git/sha "b3fd0d2"}} + :exec-fn cognitect.test-runner.api/test + :jvm-opts ["-DDISABLE_SPY=true" + "-DTAOENSSO_TIMBRE_MIN_LEVEL_EDN=:error"] + :main-opts ["--report" "stderr" "-m" "cognitect.test-runner"]} - :jar {:extra-deps {pack/pack.alpha {:git/url "https://github.com/juxt/pack.alpha.git" - :sha "0e8731e0f24db05b74769e219051b0e92b50624a"}} - :main-opts ["-m" "mach.pack.alpha.skinny" "--no-libs" "--project-path" "target/out.jar"]} + :jar {:extra-deps {pack/pack.alpha {:git/url "https://github.com/juxt/pack.alpha.git" + :sha "0e8731e0f24db05b74769e219051b0e92b50624a"}} + :main-opts ["-m" "mach.pack.alpha.skinny" "--no-libs" "--project-path" "target/out.jar"]} - :release {:extra-deps {ivarref/pom-patch {:mvn/version "0.1.16"}}} + :release {:extra-deps {ivarref/pom-patch {:mvn/version "0.1.16"}}} - :deploy {:extra-deps {slipset/deps-deploy {:mvn/version "0.2.0"}} - :exec-fn deps-deploy.deps-deploy/deploy - :exec-args {:installer :remote - :sign-releases? false - :artifact "target/out.jar"}}} - - :mvn/repos {"my.datomic.com" {:url "https://my.datomic.com/repo"}}} + :deploy {:extra-deps {slipset/deps-deploy {:mvn/version "0.2.0"}} + :exec-fn deps-deploy.deps-deploy/deploy + :exec-args {:installer :remote + :sign-releases? false + :artifact "target/out.jar"}}}} \ No newline at end of file diff --git a/src/com/github/ivarref/yoltq.clj b/src/com/github/ivarref/yoltq.clj index a7dcddf..32298b7 100644 --- a/src/com/github/ivarref/yoltq.clj +++ b/src/com/github/ivarref/yoltq.clj @@ -12,7 +12,7 @@ (:import (datomic Connection) (java.lang.management ManagementFactory) (java.time Duration Instant ZoneOffset ZonedDateTime) - (java.util.concurrent ExecutorService Executors ScheduledExecutorService TimeUnit))) + (java.util.concurrent BlockingQueue ExecutorService Executors ScheduledExecutorService TimeUnit))) (defonce ^:dynamic *config* (atom nil)) (defonce threadpool (atom nil)) @@ -26,7 +26,7 @@ ; If you want no limit on the number of retries, specify ; the value `0`. That will set the effective retry limit to ; 9223372036854775807 times. - :max-retries 10000 + :max-retries 9223372036854775807 ; Minimum amount of time to wait before a failed queue job is retried :error-backoff-time (Duration/ofSeconds 5) @@ -86,6 +86,9 @@ (defn init! [{:keys [conn tx-report-queue] :as cfg}] (assert (instance? Connection conn) (str "Expected :conn to be of type datomic Connection. Was: " (or (some-> conn class str) "nil"))) + (when (some? tx-report-queue) + (assert (instance? BlockingQueue tx-report-queue) + (str "Expected :tx-report-queue to be of type java.util.concurrent.BlockingQueue"))) (locking threadpool @(d/transact conn i/schema) (let [new-cfg (swap! *config* @@ -96,9 +99,6 @@ :system-error (atom {}) :healthy? (atom nil) :slow? (atom nil) - :get-tx-report-queue (fn [] - (or tx-report-queue - (d/tx-report-queue conn))) :slow-thread-watcher-done? (promise)} default-opts (if *test-mode* old-conf (select-keys old-conf [:handlers])) @@ -144,12 +144,37 @@ (reset! *running?* true) (.scheduleAtFixedRate ^ScheduledExecutorService pool (fn [] (poller/poll-all-queues! *running?* *config* pool)) 0 poll-delay TimeUnit/MILLISECONDS) (.scheduleAtFixedRate ^ScheduledExecutorService pool (fn [] (errpoller/poll-errors *running?* *config*)) 0 system-error-poll-delay TimeUnit/MILLISECONDS) - (.execute ^ScheduledExecutorService pool (fn [] (rq/report-queue-listener *running?* queue-listener-ready pool *config*))) + (.execute ^ScheduledExecutorService pool + (fn [] + (try + (log/debug "report-queue-listener starting") + (rq/report-queue-listener *running?* queue-listener-ready pool *config*) + (finally + (log/debug "report-queue-listener exiting") + (deliver queue-listener-ready :finally))))) (future (try (slow-executor/show-slow-threads pool *config*) (finally (deliver slow-thread-watcher-done? :done)))) - @queue-listener-ready))) + (let [q-listener-retval (deref queue-listener-ready 30000 :timeout)] + (cond (= :timeout q-listener-retval) + (do + (log/error "Timed out waiting for report-queue-listener to start") + (throw (IllegalStateException. "Timed out waiting for report-queue-listener to start"))) + + (= :finally q-listener-retval) + (do + (log/error "report-queue-listener did not start") + (throw (IllegalStateException. "report-queue-listener did not start"))) + + (= :ready q-listener-retval) + (do + (log/debug "report-queue-listener is ready")) + + :else + (do + (log/error (str "Unexpected queue-listener-retval: " (pr-str q-listener-retval))) + (throw (IllegalStateException. (str "Unexpected queue-listener-retval: " (pr-str q-listener-retval)))))))))) (defn start! [] @@ -359,14 +384,13 @@ :min (apply min values))}))) (into (sorted-map))))) +(defn get-tx-report-queue-multicast! + "Multicast the datomic.api/tx-report-queue to different consumers. + The multicaster is started on demand. `conn` and `id` identifies the consumer. - -(defn add-tx-report-queue! - ([conn] - (add-tx-report-queue! conn :default)) - ([conn id] - (if @*config* - :...))) + Returns a `java.util.concurrent.BlockingQueue` like `datomic.api/tx-report-queue`." + [conn id] + (rq/get-tx-report-queue-multicast! conn id)) (comment (do @@ -401,3 +425,37 @@ @started-consuming? (stop!) nil))))) + +(comment + (do + (require 'com.github.ivarref.yoltq.log-init) + (com.github.ivarref.yoltq.log-init/init-logging! + [[#{"datomic.*" "com.datomic.*" "org.apache.*"} :warn] + [#{"com.github.ivarref.yoltq.report-queue"} :debug] + [#{"com.github.ivarref.yoltq.poller"} :info] + [#{"com.github.ivarref.yoltq.migrate"} :warn] + [#{"com.github.ivarref.yoltq"} :debug] + ;[#{"ivarref.yoltq*"} :info] + [#{"*"} :info]]) + (stop!) + (let [received (atom []) + uri (str "datomic:mem://demo")] + (d/delete-database uri) + (d/create-database uri) + (let [conn (d/connect uri) + started-consuming? (promise) + n 1] + (init! {:conn conn + :tx-report-queue (get-tx-report-queue-multicast! conn :yoltq) + :slow-thread-show-stacktrace? false}) + (add-consumer! :q (fn [_] + (deliver started-consuming? true))) + (log/info "begin start! ...") + (start!) + (log/info "begin start! ... Done") + (Thread/sleep 2000) + @(d/transact conn [(put :q {:work 123})]) + @started-consuming? + (stop!) + (log/info "stop! done") + nil)))) \ No newline at end of file diff --git a/src/com/github/ivarref/yoltq/report_queue.clj b/src/com/github/ivarref/yoltq/report_queue.clj index 20e0a93..9cddc93 100644 --- a/src/com/github/ivarref/yoltq/report_queue.clj +++ b/src/com/github/ivarref/yoltq/report_queue.clj @@ -3,8 +3,8 @@ [com.github.ivarref.yoltq.impl :as i] [datomic.api :as d] [clojure.tools.logging :as log]) - (:import (datomic Datom) - (java.util.concurrent ScheduledExecutorService BlockingQueue TimeUnit))) + (:import (datomic Connection Datom) + (java.util.concurrent LinkedBlockingQueue ScheduledExecutorService BlockingQueue TimeUnit))) (defn process-poll-result! [cfg id-ident poll-result consumer] @@ -28,18 +28,24 @@ (i/take! cfg) (i/execute! cfg))))) (catch Throwable t - (log/error t "unexpected error in process-poll-result!"))))))))) + (log/error t "Unexpected error in process-poll-result!"))))))))) (defn report-queue-listener [running? ready? ^ScheduledExecutorService pool config-atom] - (let [conn (:conn @config-atom) - ^BlockingQueue q (d/tx-report-queue conn) + (let [cfg @config-atom + conn (:conn cfg) + tx-report-queue-given (contains? cfg :tx-report-queue) + ^BlockingQueue q (if tx-report-queue-given + (get cfg :tx-report-queue) + (d/tx-report-queue conn)) id-ident (d/q '[:find ?e . :where [?e :db/ident :com.github.ivarref.yoltq/id]] (d/db conn))] + (assert (instance? BlockingQueue q)) + (log/debug "tx-report-queue-given:" tx-report-queue-given) (try (while @running? (when-let [poll-result (.poll ^BlockingQueue q 1 TimeUnit/SECONDS)] @@ -49,9 +55,118 @@ (fn [f] (when @running? (.execute ^ScheduledExecutorService pool f))))) - (deliver ready? true)) + (deliver ready? :ready)) (catch Throwable t - (log/error t "unexpected error in report-queue-listener")) + (log/error t "Unexpected error in report-queue-listener:" (.getMessage t))) (finally - (log/debug "remove tx-report-queue") - (d/remove-tx-report-queue conn))))) \ No newline at end of file + (if tx-report-queue-given + (log/debug "Remove tx-report-queue handled elsewhere") + (do + (log/debug "Remove tx-report-queue") + (d/remove-tx-report-queue conn))))))) + +(defonce ^:private multicast-state-lock (Object.)) + +(defonce ^:private multicast-state (atom {})) + +(defn- start-multicaster! [conn] + (let [multicaster-ready? (promise)] + (future + (log/debug "Multicaster starting for conn" conn) + (try + (let [input-queue (d/tx-report-queue conn)] + (loop [] + (when-let [mcast-state (get @multicast-state conn)] + (when-let [dest-queues (vals mcast-state)] + (let [element (.poll ^BlockingQueue input-queue 1 TimeUnit/SECONDS)] + (deliver multicaster-ready? :ready) + (when (some? element) + (doseq [q dest-queues] + (let [ok-offer (.offer ^BlockingQueue q element 30 TimeUnit/MINUTES)] + (when (false? ok-offer) + (log/error "Failed to offer item in multicaster for connection" conn)))))) + (recur))))) + (catch Throwable t + (deliver multicaster-ready? :error) + (log/error t "Unexpected error in multicaster:" (.getMessage t))) + (finally + (d/remove-tx-report-queue conn) + (log/debug "Multicaster exiting for conn" conn)))) + multicaster-ready?)) + +(defn get-tx-report-queue-multicast! + "Multicast the datomic.api/tx-report-queue to different consumers. + The multicaster is started on demand. `conn` and `id` identifies the consumer. + + Returns a `java.util.concurrent.BlockingQueue` like `datomic.api/tx-report-queue`." + [conn id] + (assert (instance? Connection conn)) + (assert (keyword? id)) + (locking multicast-state-lock + (assert (map? @multicast-state)) + (if-let [existing-q (get-in @multicast-state [conn id])] + (do + (log/debug "returning existing queue for id" id) + (assert (instance? BlockingQueue existing-q)) + existing-q) + (let [needs-multicaster? (not (contains? @multicast-state conn)) + new-state (swap! multicast-state (fn [old-state] (assoc-in old-state [conn id] (LinkedBlockingQueue.))))] + (when needs-multicaster? + (let [multicaster-promise (start-multicaster! conn) + multicaster-result (deref multicaster-promise (* 30 60000) :timeout)] + (cond (= multicaster-result :timeout) + (do + (log/error "Timeout waiting for multicaster to start") + (throw (RuntimeException. "Timeout waiting for multicaster to start"))) + (= multicaster-result :error) + (do + (log/error "Multicaster failed to start") + (throw (RuntimeException. "Multicaster failed to start"))) + (= multicaster-result :ready) + (log/debug "Multicaster is ready") + + :else + (do + (log/error "Unexpected state from multicaster:" multicaster-result) + (throw (RuntimeException. (str "Unexpected state from multicaster: " multicaster-result))))))) + (let [new-q (get-in new-state [conn id])] + (assert (instance? BlockingQueue new-q)) + new-q))))) + +(defn stop-all-multicasters! [] + (reset! multicast-state {})) + +(comment + (do + (require 'com.github.ivarref.yoltq.log-init) + (com.github.ivarref.yoltq.log-init/init-logging! + [[#{"datomic.*" "com.datomic.*" "org.apache.*"} :warn] + [#{"com.github.ivarref.yoltq.report-queue"} :debug] + [#{"com.github.ivarref.yoltq.poller"} :info] + [#{"com.github.ivarref.yoltq"} :debug] + ;[#{"ivarref.yoltq*"} :info] + [#{"*"} :info]]) + (defonce conn (let [uri (str "datomic:mem://demo") + _ (d/delete-database uri) + _ (d/create-database uri) + conn (d/connect uri)] + conn)))) + +(comment + (defn drain! [^BlockingQueue q] + (loop [cnt 0] + (if (nil? (.poll q 1 TimeUnit/SECONDS)) + cnt + (recur (inc cnt)))))) + +(comment + (let [q-1 (get-tx-report-queue-multicast! conn :q1) + q-2 (get-tx-report-queue-multicast! conn :q2)])) + +(comment + (drain! (get-tx-report-queue-multicast! conn :q1))) + +(comment + (do + @(d/transact conn [{:db/doc "demo"}]) + :yay)) \ No newline at end of file diff --git a/test/com/github/ivarref/yoltq/log_init.clj b/test/com/github/ivarref/yoltq/log_init.clj index 1aa6c02..f3fb6dc 100644 --- a/test/com/github/ivarref/yoltq/log_init.clj +++ b/test/com/github/ivarref/yoltq/log_init.clj @@ -3,6 +3,8 @@ [taoensso.timbre :as timbre] [clojure.string :as str])) +(set! *warn-on-reflection* true) + (def level-colors {;:warn colors/red :error colors/red}) -- cgit v1.2.3 From 4797e559410bce644c40b05fa9a321171a781e78 Mon Sep 17 00:00:00 2001 From: ire Date: Tue, 20 May 2025 22:43:39 +0200 Subject: Improve tx-report-queue sharing #7 --- src/com/github/ivarref/yoltq/report_queue.clj | 342 +++++++++++++++++++++----- test/com/github/ivarref/yoltq/log_init.clj | 2 +- 2 files changed, 283 insertions(+), 61 deletions(-) (limited to 'test/com/github/ivarref/yoltq/log_init.clj') diff --git a/src/com/github/ivarref/yoltq/report_queue.clj b/src/com/github/ivarref/yoltq/report_queue.clj index 9cddc93..239de12 100644 --- a/src/com/github/ivarref/yoltq/report_queue.clj +++ b/src/com/github/ivarref/yoltq/report_queue.clj @@ -6,7 +6,6 @@ (:import (datomic Connection Datom) (java.util.concurrent LinkedBlockingQueue ScheduledExecutorService BlockingQueue TimeUnit))) - (defn process-poll-result! [cfg id-ident poll-result consumer] (let [{:keys [tx-data db-after]} poll-result] (when-let [new-ids (->> tx-data @@ -30,7 +29,6 @@ (catch Throwable t (log/error t "Unexpected error in process-poll-result!"))))))))) - (defn report-queue-listener [running? ready? ^ScheduledExecutorService pool @@ -65,80 +63,253 @@ (log/debug "Remove tx-report-queue") (d/remove-tx-report-queue conn))))))) -(defonce ^:private multicast-state-lock (Object.)) +; https://stackoverflow.com/a/14488425 +(defn- dissoc-in + "Dissociates an entry from a nested associative structure returning a new + nested structure. keys is a sequence of keys. Any empty maps that result + will not be present in the new structure." + [m [k & ks :as keys]] + (if ks + (if-let [nextmap (get m k)] + (let [newmap (dissoc-in nextmap ks)] + (if (seq newmap) + (assoc m k newmap) + (dissoc m k))) + m) + (dissoc m k))) + +(defn- queues-to-shutdown [old-state new-state] + (assert (map? old-state)) + (assert (map? new-state)) + (doseq [x (vals new-state)] + (assert (vector? x))) + (doseq [x (vals old-state)] + (assert (vector? x))) + (let [new-qs (into #{} (mapv second (vals new-state)))] + (reduce + (fn [o [send-end-token? old-q]] + ;(assert (boolean? send-end-token?)) + ;(assert (instance? BlockingQueue old-q)) + (if (contains? new-qs old-q) + o + (conj o [send-end-token? old-q]))) + [] + (vals old-state)))) + +(comment + (queues-to-shutdown {:a [true 999] :b [false 777]} + {:a [true 123] :b [true 777]})) +(defn- multicast-once [conn work-item old-state new-state] + (assert (map? old-state)) + (assert (map? new-state)) + (doseq [[send-end-token? q-to-shutdown] (queues-to-shutdown old-state new-state)] + (if send-end-token? + (do + #_(log/debug "offering :end token") + (.offer ^BlockingQueue q-to-shutdown :end 1 TimeUnit/MICROSECONDS)) + (do + #_(log/debug "not offering :end token")))) + (when (seq new-state) + (if (some? work-item) + (reduce-kv + (fn [m id [send-end-token? q]] + (let [ok-offer (.offer ^BlockingQueue q work-item 1 TimeUnit/MICROSECONDS)] + (if (true? ok-offer) + (assoc m id [send-end-token? q]) + (log/warn "Failed to offer item in multicaster for connection" conn "and queue id" id)))) + {} + new-state) + new-state))) + +(defonce ^:private multicast-state-lock (Object.)) +(defonce ^:private consumer-state-lock (Object.)) (defonce ^:private multicast-state (atom {})) +(defonce ^:private thread-count (atom 0)) + +(defn- multicaster-loop [init-state conn ready?] + (let [input-queue (d/tx-report-queue conn)] + (deliver ready? true) + (loop [old-state init-state] + (let [work-item (.poll ^BlockingQueue input-queue 16 TimeUnit/MILLISECONDS) + new-state (locking multicast-state-lock + ; writer to `multicast-state` must be protected by `multicast-state-lock` + ; it should block minimally / spend minimum amount of time + (swap! multicast-state (fn [old-state] (update-in old-state [:iter-count conn] (fnil inc 0)))) + (if-let [new-state (multicast-once conn work-item old-state (get-in @multicast-state [:queues conn] {}))] + new-state + (do (swap! multicast-state (fn [old-state] (dissoc-in old-state [:queues conn]))) + (swap! multicast-state (fn [old-state] (update-in old-state [:thread-count conn] dec))) + (d/remove-tx-report-queue conn) + nil)))] + (if new-state + (recur new-state) + nil))))) (defn- start-multicaster! [conn] - (let [multicaster-ready? (promise)] + (let [ready? (promise)] (future (log/debug "Multicaster starting for conn" conn) (try - (let [input-queue (d/tx-report-queue conn)] - (loop [] - (when-let [mcast-state (get @multicast-state conn)] - (when-let [dest-queues (vals mcast-state)] - (let [element (.poll ^BlockingQueue input-queue 1 TimeUnit/SECONDS)] - (deliver multicaster-ready? :ready) - (when (some? element) - (doseq [q dest-queues] - (let [ok-offer (.offer ^BlockingQueue q element 30 TimeUnit/MINUTES)] - (when (false? ok-offer) - (log/error "Failed to offer item in multicaster for connection" conn)))))) - (recur))))) + (swap! thread-count inc) + (let [new-state (swap! multicast-state (fn [old-state] (update-in old-state [:thread-count conn] (fnil inc 0))))] + (assert (= 1 (get-in new-state [:thread-count conn]))) + ; "parent" thread holds `multicast-state-lock` and + ; waits for `ready?` promise, so effectively this new thread also holds + ; the lock until `ready?` is delivered. That is: it is safe + ; for this thread to modify multicast-state regardless of what other threads are doing + (multicaster-loop (get-in new-state [:queues conn]) conn ready?)) (catch Throwable t - (deliver multicaster-ready? :error) - (log/error t "Unexpected error in multicaster:" (.getMessage t))) + (log/error t "Unexpected error in multicaster:" (.getMessage t)) + (log/error "Multicaster exiting for conn")) (finally - (d/remove-tx-report-queue conn) + (swap! thread-count dec) (log/debug "Multicaster exiting for conn" conn)))) - multicaster-ready?)) + @ready?)) + +(defn- wait-multicast-thread-step [conn] + ; `get-tx-report-queue-multicast!` should return only when the multicaster thread + ; has picked up the new queue. + ; + ; Otherwise the following could happen: + ; 1. multicast thread is sleeping + ; 2: user-thread calls get-tx-report-queue-multicast! with `send-end-token?` `true` + ; 3: user-thread (or somebody else) calls `stop-multicaster`. + ; The multicast-state atom is now identical as it was in 1 + ; 4: multicast thread is scheduled and does _not_ detect any state change. + ; And therefore the multicast thread does _not_ send out an :end token as one would expect. + ; + ; Once [:iter-count conn] has changed, we know that the multicaster thread + ; will see the new queue. + ; We are still holding the consumer-state-lock, so no other thread + ; can do any stop-multicasting that would/could corrupt the state. + ; We can then be sure that the queue will receive the `:end` token when/if + ; the queue is stopped. + (let [start-ms (System/currentTimeMillis) + iter-count (get-in @multicast-state [:iter-count conn] -1)] + (loop [spin-count 0] + (if (not= iter-count (get-in @multicast-state [:iter-count conn])) + nil + (do + (let [spent-ms (- (System/currentTimeMillis) start-ms)] + (if (> spent-ms 30000) + (throw (RuntimeException. "Timed out waiting for multicaster thread")) + (do + (Thread/sleep 16) + (recur (inc spin-count)))))))))) (defn get-tx-report-queue-multicast! "Multicast the datomic.api/tx-report-queue to different consumers. The multicaster is started on demand. `conn` and `id` identifies the consumer. Returns a `java.util.concurrent.BlockingQueue` like `datomic.api/tx-report-queue`." - [conn id] - (assert (instance? Connection conn)) - (assert (keyword? id)) - (locking multicast-state-lock - (assert (map? @multicast-state)) - (if-let [existing-q (get-in @multicast-state [conn id])] - (do - (log/debug "returning existing queue for id" id) - (assert (instance? BlockingQueue existing-q)) - existing-q) - (let [needs-multicaster? (not (contains? @multicast-state conn)) - new-state (swap! multicast-state (fn [old-state] (assoc-in old-state [conn id] (LinkedBlockingQueue.))))] - (when needs-multicaster? - (let [multicaster-promise (start-multicaster! conn) - multicaster-result (deref multicaster-promise (* 30 60000) :timeout)] - (cond (= multicaster-result :timeout) - (do - (log/error "Timeout waiting for multicaster to start") - (throw (RuntimeException. "Timeout waiting for multicaster to start"))) - (= multicaster-result :error) - (do - (log/error "Multicaster failed to start") - (throw (RuntimeException. "Multicaster failed to start"))) - (= multicaster-result :ready) - (log/debug "Multicaster is ready") + ([conn id] + (get-tx-report-queue-multicast! conn id false)) + ([conn id send-end-token?] + (assert (instance? Connection conn)) + (assert (keyword? id)) + (locking consumer-state-lock + (let [the-q + (locking multicast-state-lock + (assert (map? @multicast-state)) + (if-let [existing-q (get-in @multicast-state [:queues conn id])] + (do + (swap! multicast-state + (fn [old-state] + (update-in old-state [:queues conn id] (fn [[end-token? q]] + (if (not= end-token? send-end-token?) + (log/debug "flipped `send-end-token?`") + (log/debug "identical `send-end-token?`")) + [send-end-token? q])))) + (log/debug "Returning existing queue for id" id) + (assert (instance? BlockingQueue (second existing-q))) + (second existing-q)) + (let [needs-multicaster? (nil? (get-in @multicast-state [:queues conn])) + new-q (LinkedBlockingQueue.) + new-state (swap! multicast-state (fn [old-state] (assoc-in old-state [:queues conn id] [send-end-token? new-q])))] + (if needs-multicaster? + (do + (start-multicaster! conn) + (log/debug "Multicaster thread started. Returning new queue for id" id) + new-q) + (do + (log/debug "Multicaster thread already exists. Returning new queue for id" id) + new-q)))))] + ; wait for multicaster thread to pick up current Queue + (wait-multicast-thread-step conn) + the-q)))) - :else +(defn wait-multicast-threads-exit [[old-state new-state]] + (assert (map? old-state)) + (assert (map? new-state)) + (assert (map? (get old-state :queues {}))) + (assert (map? (get new-state :queues {}))) + (assert (map? (get old-state :thread-count {}))) + (assert (map? (get new-state :thread-count {}))) + (locking consumer-state-lock + ; No new multicast threads will be launched inside this block. + ; The lock is already held by parent function. + ; + ; Why do we need to _wait_ for multicaster thread(s) to exit after + ; removing all queue ids for a given connection? + ; Otherwise the following could happen: + ; 1. multicaster thread is sleeping + ; 2. user calls stop-multicaster! + ; One would expect that multicaster thread would exit, but it is still sleeping + ; 3. user calls get-tx-report-queue-multicast! with the same conn + ; The state is now empty, so a new multicaster thread is spawned. + ; 4. Now there is two multicaster threads for the same connection! + ; ... and since the datomic report queue can be shared between threads + ; it will seemingly work, but when the end event is sent, it will be + ; sent by multiple threads. + (let [old-conns (into #{} (keys (get old-state :queues {}))) + new-conns (into #{} (keys (get new-state :queues {})))] + (doseq [old-conn old-conns] + (when-not (contains? new-conns old-conn) + (let [old-threadcount (get-in old-state [:thread-count old-conn] nil)] + (assert (= 1 old-threadcount)) + (let [start-ms (System/currentTimeMillis)] + (loop [] + (if (= 0 (get-in @multicast-state [:thread-count old-conn])) + :ok (do - (log/error "Unexpected state from multicaster:" multicaster-result) - (throw (RuntimeException. (str "Unexpected state from multicaster: " multicaster-result))))))) - (let [new-q (get-in new-state [conn id])] - (assert (instance? BlockingQueue new-q)) - new-q))))) + (let [spent-ms (- (System/currentTimeMillis) start-ms)] + (if (> spent-ms 30000) + (throw (RuntimeException. "Timed out waiting for multicaster thread to exit")) + (do + (Thread/sleep 16) + (recur)))))))))))))) + +(defn stop-multicaster-id! [conn id] + (locking consumer-state-lock + (wait-multicast-threads-exit + (locking multicast-state-lock + (swap-vals! multicast-state (fn [old-state] + (let [new-state (dissoc-in old-state [:queues conn id])] + (if (= {} (get-in new-state [:queues conn])) + (dissoc-in old-state [:queues conn]) + new-state)))))))) + +(defn stop-multicaster! [conn] + (locking consumer-state-lock + (wait-multicast-threads-exit + (locking multicast-state-lock + (swap-vals! multicast-state (fn [old-state] (dissoc-in old-state [:queues conn]))))))) (defn stop-all-multicasters! [] - (reset! multicast-state {})) + (locking consumer-state-lock + (wait-multicast-threads-exit + (locking multicast-state-lock + (swap-vals! multicast-state (fn [old-state] (assoc old-state :queues {}))))))) (comment (do (require 'com.github.ivarref.yoltq.log-init) + (defn drain! [^BlockingQueue q] + (loop [items []] + (if-let [elem (.poll q 100 TimeUnit/MILLISECONDS)] + (recur (conj items elem)) + items))) (com.github.ivarref.yoltq.log-init/init-logging! [[#{"datomic.*" "com.datomic.*" "org.apache.*"} :warn] [#{"com.github.ivarref.yoltq.report-queue"} :debug] @@ -153,20 +324,71 @@ conn)))) (comment - (defn drain! [^BlockingQueue q] - (loop [cnt 0] - (if (nil? (.poll q 1 TimeUnit/SECONDS)) - cnt - (recur (inc cnt)))))) + (do + (require 'com.github.ivarref.yoltq.log-init) + (defn drain! [^BlockingQueue q] + (loop [items []] + (if-let [elem (.poll q 100 TimeUnit/MILLISECONDS)] + (recur (conj items elem)) + items))) + (com.github.ivarref.yoltq.log-init/init-logging! + [[#{"datomic.*" "com.datomic.*" "org.apache.*"} :warn] + [#{"com.github.ivarref.yoltq.report-queue"} :debug] + [#{"com.github.ivarref.yoltq.poller"} :info] + [#{"com.github.ivarref.yoltq"} :debug] + ;[#{"ivarref.yoltq*"} :info] + [#{"*"} :info]]) + (log/info "********************************") + (defonce conn (let [uri (str "datomic:mem://demo") + _ (d/delete-database uri) + _ (d/create-database uri) + conn (d/connect uri)] + conn)) + (log/info "stop-all!") + (stop-all-multicasters!) + (assert (= 0 @thread-count)) + (let [q1 (get-tx-report-queue-multicast! conn :q1 false) + q2 (get-tx-report-queue-multicast! conn :q2 false) + _ (get-tx-report-queue-multicast! conn :q1 true)] + @(d/transact conn [{:db/doc "demo"}]) + @(d/transact conn [{:db/doc "demo"}]) + @(d/transact conn [{:db/doc "demo"}]) + (log/info "begin drain q1") + (stop-multicaster-id! conn :q1) + (println "thread count" @thread-count) + (let [qitems-2 (drain! q2) + qitems-1 (drain! q1)] + (assert (= :end (last qitems-1))) + (println "drain count q1:" (count qitems-1)) + (println "drain count q2:" (count qitems-2)))))) + +(comment + (do + (let [q (get-tx-report-queue-multicast! conn :q1 true)] + (log/debug "stopping id :q1") + (stop-multicaster-id! conn :q1) + (let [drained (drain! q)] + (println "drained:" drained) + (assert (= [:end] drained))) + @multicast-state))) (comment - (let [q-1 (get-tx-report-queue-multicast! conn :q1) - q-2 (get-tx-report-queue-multicast! conn :q2)])) + (stop-all-multicasters!)) (comment - (drain! (get-tx-report-queue-multicast! conn :q1))) + (do + (let [q (get-tx-report-queue-multicast! conn :q2 false)] + (println "drain count:" (count (drain! q))) + @multicast-state + nil))) + +(comment + (get-tx-report-queue-multicast! conn :q1 false) + (get-tx-report-queue-multicast! conn :q1 true)) (comment (do + @(d/transact conn [{:db/doc "demo"}]) + @(d/transact conn [{:db/doc "demo"}]) @(d/transact conn [{:db/doc "demo"}]) :yay)) \ No newline at end of file diff --git a/test/com/github/ivarref/yoltq/log_init.clj b/test/com/github/ivarref/yoltq/log_init.clj index f3fb6dc..7eae557 100644 --- a/test/com/github/ivarref/yoltq/log_init.clj +++ b/test/com/github/ivarref/yoltq/log_init.clj @@ -48,7 +48,7 @@ (color-f (force msg_)) - #_maybe-stacktrace)))) + maybe-stacktrace)))) (catch Throwable t -- cgit v1.2.3