|
1 | | -(ns |
2 | | - ^{:doc "Utilities for declarative creation of Excel (.xlsx) spreadsheets, |
| 1 | +(ns excel-clj.core |
| 2 | + "Utilities for declarative creation of Excel (.xlsx) spreadsheets, |
3 | 3 | with higher level abstractions over Apache POI (https://poi.apache.org/). |
4 | 4 |
|
5 | 5 | The highest level data abstraction used to create excel spreadsheets is a |
|
9 | 9 | grid of [[cell]]. |
10 | 10 |
|
11 | 11 | Run the (example) function at the bottom of this namespace to see more." |
12 | | - :author "Matthew Downey"} excel-clj.core |
| 12 | + {:author "Matthew Downey"} |
13 | 13 | (:require [excel-clj.tree :as tree] |
14 | 14 | [excel-clj.style :as style] |
| 15 | + [excel-clj.prototype :as pt] |
15 | 16 | [clojure.string :as string] |
16 | | - [clojure.java.io :as io] |
17 | | - [taoensso.tufte :as tufte :refer (defnp p profiled profile)]) |
18 | | - (:import (org.apache.poi.ss.usermodel Cell RichTextString) |
19 | | - (org.apache.poi.xssf.usermodel XSSFWorkbook XSSFSheet XSSFRow XSSFCell) |
20 | | - (java.io File) |
| 17 | + [clojure.java.io :as io]) |
| 18 | + (:import (java.io File) |
21 | 19 | (java.awt Desktop HeadlessException) |
22 | | - (java.util Calendar Date) |
23 | | - (org.apache.poi.ss.util CellRangeAddress) |
24 | 20 | (org.jodconverter.office DefaultOfficeManagerBuilder) |
25 | 21 | (org.jodconverter OfficeDocumentConverter))) |
26 | 22 |
|
27 | 23 | (set! *warn-on-reflection* true) |
28 | 24 |
|
29 | | -;;; Low level code to write to & style sheets; you probably shouldn't have to |
30 | | -;;; touch this to make use of the API, but might choose to when adding or |
31 | | -;;; extending functionality |
32 | | - |
33 | | -(defmacro ^:private if-type |
34 | | - "For situations where there are overloads of a Java method that accept |
35 | | - multiple types and you want to either call the method with a correct type |
36 | | - hint (avoiding reflection) or do something else. |
37 | | -
|
38 | | - In the `if-true` form, the given `sym` becomes type hinted with the type in |
39 | | - `types` where (instance? type sym). Otherwise the `if-false` form is run." |
40 | | - [[sym types] if-true if-false] |
41 | | - (let [typed-sym (gensym)] |
42 | | - (letfn [(with-hint [type] |
43 | | - (let [using-hinted |
44 | | - ;; Replace uses of the un-hinted symbol if-true form with |
45 | | - ;; the generated symbol, to which we're about to add a hint |
46 | | - (clojure.walk/postwalk-replace {sym typed-sym} if-true)] |
47 | | - ;; Let the generated sym with a hint, e.g. (let [^Float x ...]) |
48 | | - `(let [~(with-meta typed-sym {:tag type}) ~sym] |
49 | | - ~using-hinted))) |
50 | | - (condition [type] (list `(instance? ~type ~sym) (with-hint type)))] |
51 | | - `(cond |
52 | | - ~@(mapcat condition types) |
53 | | - :else ~if-false)))) |
54 | | - |
55 | | -;; Example of the use of if-type |
56 | | -(comment |
57 | | - (let [test-fn #(time (reduce + (map % (repeat 1000000 "asdf")))) |
58 | | - reflection (fn [x] (.length x)) |
59 | | - len-hinted (fn [^String x] (.length x)) |
60 | | - if-type' (fn [x] (if-type [x [String]] |
61 | | - (.length x) |
62 | | - ;; So we know it executes the if-true path |
63 | | - (throw (RuntimeException.))))] |
64 | | - (println "Running...") |
65 | | - (print "With manual type hinting =>" (with-out-str (test-fn len-hinted))) |
66 | | - (print "With if-type hinting =>" (with-out-str (test-fn if-type'))) |
67 | | - (print "With reflection => ") |
68 | | - (flush) |
69 | | - (print (with-out-str (test-fn reflection))))) |
70 | | - |
71 | | -(defn- write-cell! |
72 | | - "Write the given data to the mutable cell object, coercing its type if |
73 | | - necessary." |
74 | | - [^Cell cell data] |
75 | | - ;; These types are allowed natively |
76 | | - (if-type [data [Boolean Calendar String Date Double RichTextString]] |
77 | | - (doto cell (.setCellValue data)) |
78 | | - |
79 | | - ;; Apache POI requires that numbers be doubles |
80 | | - (if (number? data) |
81 | | - (doto cell (.setCellValue (double data))) |
82 | | - |
83 | | - ;; Otherwise stringify it |
84 | | - (let [to-write (or (some-> data pr-str) "")] |
85 | | - (doto cell (.setCellValue ^String to-write)))))) |
86 | | - |
87 | | -(def ^:dynamic *max-col-width* |
88 | | - "Sometimes POI's auto sizing isn't super intelligent, so set a sanity-max on |
89 | | - the column width." |
| 25 | +(def ^{:dynamic true :deprecated true} *max-col-width* |
| 26 | + "Deprecated -- no longer has any effect." |
90 | 27 | 15000) |
91 | 28 |
|
92 | | -(def ^:dynamic *n-threads* |
93 | | - "Allow a custom number of threads used during writing." |
| 29 | +(def ^{:dynamic true :deprecated true} *n-threads* |
| 30 | + "Deprecated -- no longer has any effect." |
94 | 31 | (+ 2 (.. Runtime getRuntime availableProcessors))) |
95 | 32 |
|
96 | | -(defmacro ^:private doparallel [[sym coll] & body] |
97 | | - "Performance hack for writing the POI cells. |
98 | | - Like (dotimes [x xs] ...) but parallel." |
99 | | - `(let [n# *n-threads* |
100 | | - equal-chunks# (loop [num# n#, parts# [], coll# ~coll, c# (count ~coll)] |
101 | | - (if (<= num# 0) |
102 | | - parts# |
103 | | - (let [t# (quot (+ c# num# -1) num#)] |
104 | | - (recur (dec num#) (conj parts# (take t# coll#)) |
105 | | - (drop t# coll#) (- c# t#))))) |
106 | | - workers# |
107 | | - (doall |
108 | | - (for [chunk# equal-chunks#] |
109 | | - (future |
110 | | - (doseq [~sym chunk#] |
111 | | - ~@body))))] |
112 | | - (doseq [w# workers#] |
113 | | - (deref w#)))) |
114 | | - |
115 | | -(defn- ^XSSFSheet write-grid! |
116 | | - "Modify the given workbook by adding a sheet with the given name built from |
117 | | - the provided grid. |
118 | | -
|
119 | | - The grid is a collection of rows, where each cell is either a plain, non-map |
120 | | - value or a map of {:value ..., :style ..., :width ...}, with :value being the |
121 | | - contents of the cell, :style being an optional map of style data, and :width |
122 | | - being an optional cell width dictating how many horizontal slots the cell |
123 | | - takes up (creates merged cells). |
124 | | -
|
125 | | - Returns the sheet object." |
126 | | - [^XSSFWorkbook workbook ^String sheet-name grid] |
127 | | - (let [^XSSFSheet sh (.createSheet workbook sheet-name) |
128 | | - build-style' (memoize ;; Immutable styles can share mutable objects :) |
129 | | - (fn [style-map] |
130 | | - (->> (style/merge-all style/default-style (or style-map {})) |
131 | | - (style/build-style workbook)))) |
132 | | - layout (volatile! {})] |
133 | | - (try |
134 | | - |
135 | | - ;; N.B. So this code got uglier due to performance. Writing the cells |
136 | | - ;; takes many seconds for a large sheet (~50,000 rows) and we can improve |
137 | | - ;; the process a bit by doing the cell creation sequentially and the cell |
138 | | - ;; writing in parallel (on test data set reduced from ~19s to ~14s). |
139 | | - |
140 | | - ;; Unfortunately much of the time is spent writing to disk (~8s). |
141 | | - |
142 | | - ;; We have to do this part sequentially because POI doesn't use |
143 | | - ;; thread-safe data structures |
144 | | - (doseq [[row-idx row-data] (map-indexed vector grid)] |
145 | | - (let [row (p :create-row (.createRow sh (int row-idx)))] |
146 | | - (loop [col-idx 0 cells row-data] |
147 | | - (when-let [cell-data (first cells)] |
148 | | - ;; (1) Build the cell |
149 | | - (let [cell (p :create-cell (.createCell ^XSSFRow row col-idx)) |
150 | | - width (if (map? cell-data) (get cell-data :width 1) 1)] |
151 | | - |
152 | | - ;; (2) Merge if necessary into adjacent cells |
153 | | - (when (> width 1) |
154 | | - (.addMergedRegion |
155 | | - sh (CellRangeAddress. |
156 | | - row-idx row-idx col-idx (dec (+ col-idx width))))) |
157 | | - |
158 | | - ;; (3) Save the cell |
159 | | - (vswap! layout assoc-in [row-idx col-idx] cell) |
160 | | - (recur (+ col-idx ^long width) (rest cells))))))) |
161 | | - |
162 | | - ;; We can do this part in parallel at least, since the cells are all |
163 | | - ;; different objects |
164 | | - (let [layout @layout] |
165 | | - (doparallel [row (map-indexed vector grid)] |
166 | | - (let [[row-idx row-data] row] |
167 | | - (loop [col-idx 0, cells row-data] |
168 | | - (when-let [cell-data (first cells)] |
169 | | - ;; (1) Find the cell |
170 | | - (let [width (if (map? cell-data) (get cell-data :width 1) 1) |
171 | | - ^XSSFCell cell (get (get layout row-idx) col-idx)] |
172 | | - |
173 | | - ;; (2) Write the cell data |
174 | | - (p :write-cell |
175 | | - (write-cell! cell (cond-> cell-data (map? cell-data) :value))) |
176 | | - |
177 | | - ;; (3) Set the cell style |
178 | | - (let [style (build-style' |
179 | | - (if (map? cell-data) (:style cell-data) {}))] |
180 | | - (p :set-cell-style |
181 | | - (.setCellStyle cell style))) |
182 | | - |
183 | | - (recur (+ col-idx ^long width) (rest cells)))))))) |
184 | | - (catch Exception e |
185 | | - (-> "Failed to write grid!" |
186 | | - (ex-info {:sheet-name sheet-name :grid grid} e) |
187 | | - (throw)))) |
188 | | - |
189 | | - (dotimes [i (transduce (map count) (completing max) 0 grid)] |
190 | | - |
191 | | - ;; Only auto-size small tables because it takes forever (~10s on a large |
192 | | - ;; grid) |
193 | | - (when (< (count grid) 2000) |
194 | | - (p :auto-size (.autoSizeColumn sh i))) |
195 | | - |
196 | | - (when (> (.getColumnWidth sh i) *max-col-width*) |
197 | | - (.setColumnWidth sh i *max-col-width*))) |
198 | | - |
199 | | - (p :set-print-settings |
200 | | - (.setFitToPage sh true) |
201 | | - (.setFitWidth (.getPrintSetup sh) 1)) |
202 | | - sh)) |
203 | | - |
204 | | -(defn- workbook! |
205 | | - "Create a new Apache POI XSSFWorkbook workbook object." |
206 | | - [] |
207 | | - (XSSFWorkbook.)) |
208 | | - |
209 | 33 | ;;; Higher-level code to specify grids in terms of clojure data structures, |
210 | 34 | ;;; organized as either a table or a tree |
211 | 35 |
|
|
242 | 66 | {:value (get row col-name) |
243 | 67 | :style style})) |
244 | 68 | getters (map (fn [col-name] #(data-cell col-name %)) headers) |
245 | | - rows (mapv (apply juxt getters) tabular-data) |
246 | 69 | header-style (or header-style |
247 | 70 | ;; Add right alignment if it's an accounting column |
248 | 71 | (fn [name] |
249 | 72 | (cond-> (style/default-header-style name) |
250 | 73 | (@numeric? name) |
251 | 74 | (assoc :alignment :right))))] |
252 | | - (into |
253 | | - [(mapv #(->{:value % :style (header-style %)}) headers)] |
254 | | - rows))) |
| 75 | + (cons |
| 76 | + (map (fn [x] {:value x :style (header-style x)}) headers) |
| 77 | + (map (apply juxt getters) tabular-data)))) |
255 | 78 |
|
256 | 79 | (defn tree |
257 | 80 | "Build a sheet grid from the provided tree of data |
|
340 | 163 | The workbook is a key value collection of (sheet-name grid), either as map or |
341 | 164 | an association list (if ordering is important)." |
342 | 165 | [workbook path] |
343 | | - (let [path' (force-extension path "xlsx") |
344 | | - ;; Create the mutable, POI workbook object |
345 | | - ^XSSFWorkbook wb |
346 | | - (reduce |
347 | | - (fn [wb [sheet-name grid]] (doto wb (write-grid! sheet-name grid))) |
348 | | - (workbook!) |
349 | | - (seq workbook))] |
350 | | - (p :write-to-disk |
351 | | - (with-open [fos (io/output-stream (io/file (str path')))] |
352 | | - (.write wb fos))) |
353 | | - (io/file path'))) |
| 166 | + (let [convert-cell (fn [{:keys [value style width height] |
| 167 | + :or {width 1 height 1} |
| 168 | + :as cell-data}] |
| 169 | + (if-not (map? cell-data) |
| 170 | + (pt/wrapped cell-data) |
| 171 | + (-> (pt/wrapped value) |
| 172 | + (pt/style style) |
| 173 | + (pt/dims {:width width :height height})))) |
| 174 | + convert-row (fn [row] (map convert-cell row))] |
| 175 | + (pt/write! |
| 176 | + (map (fn [[sheet grid]] [sheet (map convert-row grid)]) workbook) |
| 177 | + path))) |
354 | 178 |
|
355 | 179 | (defn convert-pdf! |
356 | 180 | "Convert the `from-document`, either a File or a path to any office document, |
|
424 | 248 | ["This" "Row" "Has" "Its" "Own" |
425 | 249 | {:value "Format" :style {:font {:bold true}}}]]})) |
426 | 250 |
|
| 251 | + |
427 | 252 | (comment |
| 253 | + ;; This should open an Excel workbook |
| 254 | + (example) |
| 255 | + |
428 | 256 | ;; This will both open an example excel sheet and write & open a test pdf file |
429 | 257 | ;; with the same contents. On platforms without OpenOffice the convert-pdf! |
430 | 258 | ;; call will most likely fail. |
431 | | - (open (convert-pdf! (example) (temp ".pdf")))) |
| 259 | + (open (convert-pdf! (example) (temp ".pdf"))) |
| 260 | + |
| 261 | + ;; Expose ordering / styling issues in v1.2.X |
| 262 | + (quick-open |
| 263 | + [["Test" |
| 264 | + (table |
| 265 | + (for [x (range 10000)] |
| 266 | + {"N" x "N^2" (* x x) "N^3" (* x x x)}))]]) |
| 267 | + |
| 268 | + ;; Ballpark performance test |
| 269 | + (dotimes [_ 5] |
| 270 | + (time |
| 271 | + (write! |
| 272 | + [["Test" |
| 273 | + (table |
| 274 | + (for [x (range 100000)] |
| 275 | + {"N" x "N^2" (* x x) "N^3" (* x x x)}))]] |
| 276 | + "test.xlsx"))) |
| 277 | + |
| 278 | + ) |
0 commit comments