cat

added
1.5

ns
clojure.core.reducers

type
function

(cat) (cat ctor) (cat left right)

A high-performance combining fn that yields the catenation of the
reduced values. The result is reducible, foldable, seqable and
counted, providing the identity collections are reducible, seqable
and counted. The single argument version will build a combining fn
with the supplied identity constructor. Tests for identity
with (zero? (count x)). See also foldcat.

                ;; The example showcase use of r/cat to build HashSets (instead 
;; of the default ArrayList) of distinct words in parallel 
;; and then merge all together walking the binary tree produced by r/fold.
(require '[clojure.core.reducers :as r])
(require '[clojure.string :refer [lower-case blank? split split-lines]])
(import  'java.util.HashSet)

(def book
  (-> "http://www.gutenberg.org/files/2600/2600-0.txt"
      slurp
      split-lines))

(def r-word (comp
  (r/map lower-case)
  (r/remove blank?)
  (r/map #(re-find #"\\w+" %))
  (r/mapcat #(split % #"\\s+"))))

(def btree
  (r/fold
    (r/cat #(HashSet.))
    r/append!
    (r-word book)))

(defn merge-tree [root res]
  (cond
    (instance? clojure.core.reducers.Cat root)
    (do (merge-tree (.left root) res) (merge-tree (.right root) res))
    (instance? HashSet root)
    (doto res (.addAll root))
    :else res))

(def distinct-words (merge-tree btree (HashSet.)))
(count distinct-words)
;; 17200