Skip to content

Commit dc52356

Browse files
committed
[new] Improve data compatibility when updating Nippy versions
When support is added for a new type in Nippy version X, it necessarily means that data containing that new type and frozen with Nippy version X is unthawable with Nippy versions < X. Earlier versions of Nippy will throw an exception on thawing affected data: \"Unrecognized type id (<n>). Data frozen with newer Nippy version?\" This can present a challenge when updating to new versions of Nippy, e.g.: - Rolling updates could lead to old and new versions of Nippy temporarily co-existing. - Data written with new types could limit your ability to revert a Nippy update. There's no easy solution to this in GENERAL, but we CAN at least help reduce the burden related to CHANGES in core data types by introducing changes over 2 phases: 1. Nippy vX reads new (changed) type, writes old type 2. Nippy vX+1 writes new (changed) type When relevant, we can then warn users in the CHANGELOG to not leapfrog (e.g. Nippy vX -> Nippy vX+2) when doing rolling updates. This commit bootstraps the new compatibility feature by initially targeting core type compatibility with Nippy v3.2.0 (2022-07-18). A future Nippy version (e.g. v3.5.0) will then target v3.4.0, with an appropriate CHANGELOG instruction to update in phases for environments that involve rolling updates.
1 parent bd4d520 commit dc52356

File tree

3 files changed

+147
-26
lines changed

3 files changed

+147
-26
lines changed

src/taoensso/nippy.clj

Lines changed: 41 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -662,9 +662,10 @@
662662
(let [ba (.getBytes s StandardCharsets/UTF_8)
663663
len (alength ba)]
664664
(enc/cond
665-
(sm-count?* len) (do (write-id out id-str-sm*) (write-sm-count* out len))
666-
(md-count? len) (do (write-id out id-str-md) (write-md-count out len))
667-
:else (do (write-id out id-str-lg) (write-lg-count out len)))
665+
(and (impl/target-release>= 330) (sm-count?* len)) (do (write-id out id-str-sm*) (write-sm-count* out len))
666+
(and (impl/target-release< 330) (sm-count? len)) (do (write-id out id-str-sm_) (write-sm-count out len))
667+
(md-count? len) (do (write-id out id-str-md) (write-md-count out len))
668+
:else (do (write-id out id-str-lg) (write-lg-count out len)))
668669

669670
(.write out ba 0 len))))
670671

@@ -692,8 +693,26 @@
692693

693694
(.write out ba 0 len)))
694695

696+
(defn- write-long-legacy [^DataOutput out ^long n]
697+
(enc/cond
698+
(zero? n) (write-id out id-long-0)
699+
(pos? n)
700+
(enc/cond
701+
(<= n Byte/MAX_VALUE) (do (write-id out id-long-sm_) (.writeByte out n))
702+
(<= n Short/MAX_VALUE) (do (write-id out id-long-md_) (.writeShort out n))
703+
(<= n Integer/MAX_VALUE) (do (write-id out id-long-lg_) (.writeInt out n))
704+
:else (do (write-id out id-long-xl) (.writeLong out n)))
705+
706+
:else
707+
(enc/cond
708+
(>= n Byte/MIN_VALUE) (do (write-id out id-long-sm_) (.writeByte out n))
709+
(>= n Short/MIN_VALUE) (do (write-id out id-long-md_) (.writeShort out n))
710+
(>= n Integer/MIN_VALUE) (do (write-id out id-long-lg_) (.writeInt out n))
711+
:else (do (write-id out id-long-xl) (.writeLong out n)))))
712+
695713
(defn- write-long [^DataOutput out ^long n]
696714
(enc/cond
715+
(impl/target-release< 330) (write-long-legacy out n)
697716
(zero? n) (write-id out id-long-0)
698717
(pos? n)
699718
(enc/cond
@@ -719,14 +738,10 @@
719738
(write-id out id-vec-0)
720739
(do
721740
(enc/cond
722-
(sm-count?* cnt)
723-
(enc/cond
724-
(== cnt 2) (write-id out id-vec-2)
725-
(== cnt 3) (write-id out id-vec-3)
726-
:else (do (write-id out id-vec-sm*) (write-sm-count* out cnt)))
727-
728-
(md-count? cnt) (do (write-id out id-vec-md) (write-md-count out cnt))
729-
:else (do (write-id out id-vec-lg) (write-lg-count out cnt)))
741+
(and (impl/target-release>= 330) (sm-count?* cnt)) (do (write-id out id-vec-sm*) (write-sm-count* out cnt))
742+
(and (impl/target-release< 330) (sm-count? cnt)) (do (write-id out id-vec-sm_) (write-sm-count out cnt))
743+
(md-count? cnt) (do (write-id out id-vec-md) (write-md-count out cnt))
744+
:else (do (write-id out id-vec-lg) (write-lg-count out cnt)))
730745

731746
(-run! (fn [in] (-freeze-with-meta! in out)) v)))))
732747

@@ -817,6 +832,8 @@
817832
(write-counted-coll out id-empty id-sm id-md id-lg coll)
818833
(write-uncounted-coll out id-empty id-sm id-md id-lg coll))))
819834

835+
(def ^:private ^:const meta-protocol-key ::meta-protocol-key)
836+
820837
;; Micro-optimization:
821838
;; As (write-kvs out id-map-0 id-map-sm id-map-md id-map-lg x)
822839
(defn- write-map [^DataOutput out m is-metadata?]
@@ -825,17 +842,20 @@
825842
(write-id out id-map-0)
826843
(do
827844
(enc/cond
828-
(sm-count?* cnt) (do (write-id out id-map-sm*) (write-sm-count* out cnt))
829-
(md-count? cnt) (do (write-id out id-map-md) (write-md-count out cnt))
830-
:else (do (write-id out id-map-lg) (write-lg-count out cnt)))
845+
(and (impl/target-release>= 330) (sm-count?* cnt)) (do (write-id out id-map-sm*) (write-sm-count* out cnt))
846+
(and (impl/target-release< 330) (sm-count? cnt)) (do (write-id out id-map-sm_) (write-sm-count out cnt))
847+
(md-count? cnt) (do (write-id out id-map-md) (write-md-count out cnt))
848+
:else (do (write-id out id-map-lg) (write-lg-count out cnt)))
831849

832850
(-run-kv!
833851
(fn [k v]
834852
(if (and is-metadata? (fn? v) (qualified-symbol? k))
835853
(do
836854
;; Strip Clojure v1.10+ metadata protocol extensions
837855
;; (used by defprotocol `:extend-via-metadata`)
838-
(write-id out id-meta-protocol-key)
856+
(if (impl/target-release>= 340)
857+
(write-id out id-meta-protocol-key)
858+
(-freeze-without-meta! meta-protocol-key out))
839859
(write-id out id-nil))
840860
(do
841861
(-freeze-with-meta! k out)
@@ -852,9 +872,10 @@
852872
(write-id out id-set-0)
853873
(do
854874
(enc/cond
855-
(sm-count?* cnt) (do (write-id out id-set-sm*) (write-sm-count* out cnt))
856-
(md-count? cnt) (do (write-id out id-set-md) (write-md-count out cnt))
857-
:else (do (write-id out id-set-lg) (write-lg-count out cnt)))
875+
(and (impl/target-release>= 330) (sm-count?* cnt)) (do (write-id out id-set-sm*) (write-sm-count* out cnt))
876+
(and (impl/target-release< 330) (sm-count? cnt)) (do (write-id out id-set-sm_) (write-sm-count out cnt))
877+
(md-count? cnt) (do (write-id out id-set-md) (write-md-count out cnt))
878+
:else (do (write-id out id-set-lg) (write-lg-count out cnt)))
858879

859880
(-run! (fn [in] (-freeze-with-meta! in out)) s)))))
860881

@@ -1533,11 +1554,11 @@
15331554
id-false false
15341555
id-char (.readChar in)
15351556

1536-
id-meta-protocol-key ::meta-protocol-key
1557+
id-meta-protocol-key meta-protocol-key
15371558
id-meta
15381559
(let [m (thaw-from-in! in) ; Always consume from stream
15391560
x (thaw-from-in! in)]
1540-
(if-let [m (when *incl-metadata?* (not-empty (dissoc m ::meta-protocol-key)))]
1561+
(if-let [m (when *incl-metadata?* (not-empty (dissoc m meta-protocol-key)))]
15411562
(with-meta x m)
15421563
(do x)))
15431564

src/taoensso/nippy/impl.clj

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,96 @@
151151
See that function's docstring for more info."
152152
[] (trim nmax (state_))))
153153

154+
;;;; Release targeting
155+
156+
(comment
157+
(set! *print-length* nil)
158+
(vec (sort (keys taoensso.nippy/public-types-spec)))
159+
160+
;; To help support release targeting, we keep track of when new type ids are added
161+
(let [id-history ; {<release> #{type-ids}}
162+
{340 ; v3.4.0 (2024-04-30), added 2
163+
;; New: map-entry meta-protocol-key
164+
#{0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
165+
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
166+
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
167+
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104
168+
105 106 110 111 112 113 114 115}
169+
170+
330 ; v3.3.0 (2023-10-11), added 11
171+
;; New: long-pos-sm long-pos-md long-pos-lg long-neg-sm long-neg-md long-neg-lg
172+
;; str-sm* vec-sm* set-sm* map-sm* sql-date
173+
#{0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
174+
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
175+
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
176+
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 105 106
177+
110 111 112 113 114 115}
178+
179+
320 ; v3.2.0 (2022-07-18), added none
180+
#{0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
181+
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
182+
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
183+
81 82 83 84 85 86 90 91 100 101 102 105 106 110 111 112 113 114 115}
184+
185+
313 ; v3.1.3 (2022-06-23), added 5
186+
;; New: time-instant time-duration time-period kw-md sym-md
187+
#{0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
188+
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
189+
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
190+
81 82 83 84 85 86 90 91 100 101 102 105 106 110 111 112 113 114 115}
191+
192+
300 ; v3.0.0 (2020-09-20), baseline
193+
#{0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
194+
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
195+
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 80
196+
81 82 90 91 100 101 102 105 106 110 111 112 113 114 115}}]
197+
198+
(defn diff [new-release old-release]
199+
(vec (sort (clojure.set/difference (id-history new-release) (id-history old-release))))))
200+
201+
(diff 340 330))
202+
203+
(let [;; Initially target compatibility with v3.2.0 (2020-07-18)
204+
;; Next release will target v3.4.0 (2024-04-30), etc.
205+
target-release
206+
(enc/get-env {:as :edn, :default 320}
207+
:taoensso.nippy.target-release)
208+
209+
target>=
210+
(fn [min-release]
211+
(if target-release
212+
(>= (long target-release) (long min-release))
213+
true))]
214+
215+
(defmacro target-release< [min-release] (not (target>= min-release)))
216+
(defmacro target-release>=
217+
"Returns true iff `target-release` is nil or >= given `min-release`.
218+
Used to help ease data migration for changes to core data types.
219+
220+
When support is added for a new type in Nippy version X, it necessarily means
221+
that data containing that new type and frozen with Nippy version X is unthawable
222+
with Nippy versions < X.
223+
224+
Earlier versions of Nippy will throw an exception on thawing affected data:
225+
\"Unrecognized type id (<n>). Data frozen with newer Nippy version?\"
226+
227+
This can present a challenge when updating to new versions of Nippy, e.g.:
228+
229+
- Rolling updates could lead to old and new versions of Nippy temporarily co-existing.
230+
- Data written with new types could limit your ability to revert a Nippy update.
231+
232+
There's no easy solution to this in GENERAL, but we CAN at least help reduce the
233+
burden related to CHANGES in core data types by introducing changes over 2 phases:
234+
235+
1. Nippy vX reads new (changed) type, writes old type
236+
2. Nippy vX+1 writes new (changed) type
237+
238+
When relevant, we can then warn users in the CHANGELOG to not leapfrog
239+
(e.g. Nippy vX -> Nippy vX+2) when doing rolling updates."
240+
[min-release] (target>= min-release)))
241+
242+
(comment (macroexpand '(target-release>= 340)))
243+
154244
;;;
155245

156246
(comment

test/taoensso/nippy_tests.clj

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,19 @@
192192

193193
(defn ba-hash [^bytes ba] (hash (seq ba)))
194194

195+
(defn gen-hashes [] (enc/map-vals (fn [v] (ba-hash (freeze v))) test-data))
196+
(defn cmp-hashes [new old] (vec (sort (reduce-kv (fn [s k v] (if (= (get old k) v) s (conj s k))) #{} new))))
197+
198+
(def ref-hashes-v341
199+
{:deftype -148586793, :lazy-seq-empty 1277437598, :true -1809580601, :long 598276629, :double -454270428, :lazy-seq -1039619789, :short 1152993378, :meta -858252893, :str-long -1970041891, :instant -1401948864, :many-keywords 665654816, :bigint 2033662230, :sym-ns 769802402, :queue 447747779, :float 603100813, :sorted-set 2005004017, :many-strings 1738215727, :nested -1350538572, :queue-empty 1760934486, :duration -775528642, :false 1506926383, :vector 813550992, :util-date 1326218051, :kw 389651898, :sym -1742024487, :str-short -921330463, :subvec 709331681, :kw-long 852232872, :integer 624865727, :sym-long -1535730190, :list -1207486853, :ratio 1186850097, :byte -1041979678, :bigdec -1846988137, :nil 2005042235, :defrecord -553848560, :sorted-map -1160380145, :sql-date 80018667, :map-entry 1219306839, :false-boxed 1506926383, :uri 870148616, :period -2043530540, :many-longs -1109794519, :uuid -338331115, :set 1649942133, :kw-ns 1050084331, :map 1989337680, :many-doubles -827569787, :char 858269588})
200+
201+
(def ref-hashes-v340
202+
{:deftype 1529147805, :lazy-seq-empty 1277437598, :true -1809580601, :long 219451189, :double -454270428, :lazy-seq -1039619789, :short 1152993378, :meta 352218350, :str-long -1970041891, :instant -1401948864, :many-keywords 665654816, :bigint 2033662230, :sym-ns 769802402, :queue 447747779, :float 603100813, :sorted-set 1443292905, :many-strings 1777678883, :nested -1590473924, :queue-empty 1760934486, :duration -775528642, :false 1506926383, :vector 89425525, :util-date 1326218051, :kw 389651898, :sym -1742024487, :str-short -1097575232, :subvec -2047667173, :kw-long 852232872, :integer 624865727, :sym-long -1535730190, :list -1113199651, :ratio 1186850097, :byte -1041979678, :bigdec -1846988137, :nil 2005042235, :defrecord 287634761, :sorted-map 1464032648, :sql-date 80018667, :map-entry -1353323498, :false-boxed 1506926383, :uri -1374752165, :period -2043530540, :many-longs 759118414, :uuid -338331115, :set -1515144175, :kw-ns 1050084331, :map 358912619, :many-doubles -827569787, :char 858269588})
203+
204+
(comment
205+
(cmp-hashes ref-hashes-v341 ref-hashes-v340)
206+
[:defrecord :deftype :list :long :many-longs :many-strings :map :map-entry :meta :nested :set :sorted-map :sorted-set :str-short :subvec :uri :vector])
207+
195208
(deftest _stable-serialized-output
196209
(testing "Stable serialized output"
197210

@@ -204,12 +217,9 @@
204217
(is (ba= (freeze (sorted-map :a 1 :b 1))
205218
(freeze (sorted-map :b 1 :a 1))) "Sorted structures are generally safe")
206219

207-
;; Track serialized output of stress data so that we can at least be aware of
208-
;; (and warn about) unintended changes for common/elementary types, etc. Note that
209-
;; reference hashes will need to be recalculated on changes to stress data.
210-
(let [reference-hashes ; (enc/map-vals (fn [v] (ba-hash (freeze v))) test-data)
211-
{:deftype 1529147805, :lazy-seq-empty 1277437598, :true -1809580601, :long 219451189, :double -454270428, :lazy-seq -1039619789, :short 1152993378, :meta 352218350, :str-long -1970041891, :instant -1401948864, :many-keywords 665654816, :bigint 2033662230, :sym-ns 769802402, :queue 447747779, :float 603100813, :sorted-set 1443292905, :many-strings 1777678883, :nested -1590473924, :queue-empty 1760934486, :duration -775528642, :false 1506926383, :vector 89425525, :util-date 1326218051, :kw 389651898, :sym -1742024487, :str-short -1097575232, :subvec -2047667173, :kw-long 852232872, :integer 624865727, :sym-long -1535730190, :list -1113199651, :ratio 1186850097, :byte -1041979678, :bigdec -1846988137, :nil 2005042235, :defrecord 287634761, :sorted-map 1464032648, :sql-date 80018667, :map-entry -1353323498, :false-boxed 1506926383, :uri -1374752165, :period -2043530540, :many-longs 759118414, :uuid -338331115, :set -1515144175, :kw-ns 1050084331, :map 358912619, :many-doubles -827569787, :char 858269588}
212-
220+
;; Track serialized output of stress data so that we can detect unintentional changes,
221+
;; and warn about intended ones. Hashes will need to be recalculated on changes to stress data.
222+
(let [reference-hashes ref-hashes-v341
213223
failures ; #{{:keys [k v]}}
214224
(reduce-kv
215225
(fn [failures k v]

0 commit comments

Comments
 (0)