@@ -6,6 +6,7 @@ package dev.whyoleg.cryptography.serialization.pem
66
77import kotlinx.io.*
88import kotlinx.io.bytestring.*
9+ import kotlinx.io.bytestring.unsafe.*
910import kotlin.io.encoding.*
1011
1112public class PemDocument (
@@ -17,27 +18,14 @@ public class PemDocument(
1718 content: ByteArray ,
1819 ) : this (label, ByteString (content))
1920
20- public fun encodeToString (): String = buildString {
21- encodedLines().forEach(::appendLine)
22- }
21+ public fun encodeToString (): String = encodeToByteArrayImpl().decodeToString()
2322
24- public fun encodeToByteArray (): ByteArray = encodeToString().encodeToByteArray()
25- public fun encodeToByteString (): ByteString = encodeToString().encodeToByteString()
23+ public fun encodeToByteArray (): ByteArray = encodeToByteArrayImpl()
2624
27- public fun encodeToSink (sink : Sink ) {
28- encodedLines().forEach { line ->
29- sink.writeString(line)
30- sink.writeCodePointValue(' \n ' .code)
31- }
32- }
25+ @OptIn(UnsafeByteStringApi ::class )
26+ public fun encodeToByteString (): ByteString = UnsafeByteStringOperations .wrapUnsafe(encodeToByteArrayImpl())
3327
34- // TODO: let's change implementation to use Base64.encodeToByteArray for Sink - there is no need to go through String
35- // same for encodeToByteString/encodeToByteArray
36- private fun encodedLines (): Sequence <String > = sequence {
37- yield (BEGIN_PREFIX + label.value + SUFFIX )
38- yieldAll(Base64 .encode(content).chunkedSequence(64 ))
39- yield (END_PREFIX + label.value + SUFFIX )
40- }
28+ public fun encodeToSink (sink : Sink ): Unit = sink.write(encodeToByteArrayImpl())
4129
4230 override fun equals (other : Any? ): Boolean {
4331 if (this == = other) return true
@@ -60,71 +48,258 @@ public class PemDocument(
6048 }
6149
6250 public companion object {
63- private const val BEGIN_PREFIX = " -----BEGIN "
64- private const val END_PREFIX = " -----END "
65- private const val SUFFIX = " -----"
66-
6751 // decode will skip comments and everything else which is not label or content
6852
6953 // will decode only the first one, even if there is something else after it
70- public fun decode (text : String ): PemDocument = decode(text.lineSequence())
71- public fun decode (bytes : ByteArray ): PemDocument = decode(bytes.decodeToString().lineSequence())
72- public fun decode (bytes : ByteString ): PemDocument = decode(bytes.decodeToString().lineSequence())
73- public fun decode (source : Source ): PemDocument = decode(generateSequence(source::readLine))
54+ public fun decode (text : String ): PemDocument {
55+ return tryDecodeFromString(text, startIndex = 0 , saveEndIndex = {}) ? : error(" Invalid PEM format: missing BEGIN label" )
56+ }
57+
58+ public fun decodeToSequence (text : String ): Sequence <PemDocument > = sequence {
59+ var startIndex = 0
60+ while (startIndex < text.length) {
61+ yield (tryDecodeFromString(text, startIndex) { startIndex = it } ? : break )
62+ }
63+ if (startIndex == 0 ) error(" Invalid PEM format: missing BEGIN label" )
64+ }
65+
66+ @OptIn(UnsafeByteStringApi ::class )
67+ public fun decode (bytes : ByteArray ): PemDocument {
68+ return decode(UnsafeByteStringOperations .wrapUnsafe(bytes))
69+ }
7470
75- public fun decodeToSequence (text : String ): Sequence <PemDocument > = decodeToSequence(text.lineSequence())
76- public fun decodeToSequence (bytes : ByteArray ): Sequence <PemDocument > = decodeToSequence(bytes.decodeToString().lineSequence())
77- public fun decodeToSequence (bytes : ByteString ): Sequence <PemDocument > = decodeToSequence(bytes.decodeToString().lineSequence())
78- public fun decodeToSequence (source : Source ): Sequence <PemDocument > = decodeToSequence(generateSequence(source::readLine))
71+ @OptIn(UnsafeByteStringApi ::class )
72+ public fun decodeToSequence (bytes : ByteArray ): Sequence <PemDocument > {
73+ return decodeToSequence(UnsafeByteStringOperations .wrapUnsafe(bytes))
74+ }
75+
76+ public fun decode (bytes : ByteString ): PemDocument {
77+ return tryDecodeFromByteString(bytes, startIndex = 0 , saveEndIndex = {}) ? : error(" Invalid PEM format: missing BEGIN label" )
78+ }
7979
80- // implementation
80+ public fun decodeToSequence (bytes : ByteString ): Sequence <PemDocument > = sequence {
81+ var startIndex = 0
82+ while (startIndex < bytes.size) {
83+ yield (tryDecodeFromByteString(bytes, startIndex) { startIndex = it } ? : break )
84+ }
85+ if (startIndex == 0 ) error(" Invalid PEM format: missing BEGIN label" )
86+ }
8187
82- // it will never be empty
83- private fun decode (lines : Sequence <String >): PemDocument = decodeToSequence(lines).first()
88+ public fun decode (source : Source ): PemDocument {
89+ return tryDecodeFromSource(source) ? : error(" Invalid PEM format: missing BEGIN label" )
90+ }
8491
85- // it will never be empty, or will throw an error - TBD
86- private fun decodeToSequence (lines : Sequence <String >): Sequence <PemDocument > = sequence {
92+ public fun decodeToSequence (source : Source ): Sequence <PemDocument > = sequence {
8793 var hasAtLeastOneBeginLabel = false
88- var beginLabel: String? = null
89- val content = StringBuilder ()
90-
91- for (line in lines) {
92- if (beginLabel == null ) {
93- beginLabel = line.findLabel(BEGIN_PREFIX , " BEGIN" ) ? : continue
94- hasAtLeastOneBeginLabel = true
95- } else {
96- val endLabel = line.findLabel(END_PREFIX , " END" ) ? : run {
97- content.append(line)
98- continue
99- }
100- check(beginLabel == endLabel) { " Invalid PEM format: BEGIN=`$beginLabel `, END=`$endLabel `" }
101-
102- val document = PemDocument (
103- label = PemLabel (beginLabel),
104- content = Base64 .decodeToByteString(content.toString())
105- )
106- content.clear()
107- beginLabel = null
108-
109- yield (document)
110- }
94+ while (! source.exhausted()) {
95+ yield (tryDecodeFromSource(source) ? : break )
96+ hasAtLeastOneBeginLabel = true
11197 }
98+ if (! hasAtLeastOneBeginLabel) error(" Invalid PEM format: missing BEGIN label" )
99+ }
100+ }
101+ }
102+
103+ private const val NEW_LINE = ' \n '
104+ private const val BEGIN_PREFIX = " -----BEGIN "
105+ private const val END_PREFIX = " -----END "
106+ private const val SUFFIX = " -----"
107+
108+ private const val NEW_LINE_BYTE = NEW_LINE .code.toByte()
109+ private val BEGIN_BYTES = BEGIN_PREFIX .encodeToByteArray()
110+ private val END_BYTES = END_PREFIX .encodeToByteArray()
111+ private val SUFFIX_BYTES = SUFFIX .encodeToByteArray()
112+
113+ // Overall, the performance significantly depends on the target,
114+ // some targets (wasmJs) may work with byte arrays faster, than with strings
115+ // f.e tryDecodeFromByteString(text.encodeToByteArray) is faster than tryDecodeFromString(text) by 50%
116+ // but hopefully it will be improved in the future
117+ // on JVM, operations on byte arrays are always faster :)
118+
119+ // 1.5 times faster than naive encodeToString()
120+ // 2 times faster than naive encodeToString().encodeToByteArray()
121+ // naive encodeToString impl:
122+ // return buildString {
123+ // append(BEGIN_PREFIX).append(label.value).appendLine(SUFFIX)
124+ // Base64.Pem.encodeToAppendable(content, this).appendLine()
125+ // append(END_PREFIX).append(label.value).appendLine(SUFFIX)
126+ // }
127+ private fun PemDocument.encodeToByteArrayImpl (): ByteArray {
128+ // based on kotlin.Base64 implementation
129+ fun base64EncodedSize (sourceSize : Int ): Int {
130+ val groups = sourceSize / 3 // bytesPerGroup
131+ val trailingBytes = sourceSize % 3 // bytesPerGroup
132+ var size = groups * 4 // symbolsPerGroup
133+ if (trailingBytes != 0 ) { // trailing symbols
134+ size + = 4
135+ }
136+ if (size < 0 ) { // Int overflow
137+ throw IllegalArgumentException (" Input is too big" )
138+ }
139+ size + = ((size - 1 ) / 64 ) * 2
140+ if (size < 0 ) { // Int overflow
141+ throw IllegalArgumentException (" Input is too big" )
142+ }
143+ return size
144+ }
112145
113- check(hasAtLeastOneBeginLabel) { " Invalid PEM format: missing BEGIN label" }
114- check(beginLabel == null ) { " Invalid PEM format: missing END label" }
146+ val label = label.value.encodeToByteArray()
147+ val encodedSize = base64EncodedSize(content.size)
148+
149+ val array = ByteArray (
150+ BEGIN_BYTES .size + label.size + SUFFIX_BYTES .size + 1 +
151+ encodedSize + 1 +
152+ END_BYTES .size + label.size + SUFFIX_BYTES .size + 1
153+ )
154+
155+ // encode `-----BEGIN LABEL-----\n`
156+ BEGIN_BYTES .copyInto(array)
157+ label.copyInto(array, BEGIN_BYTES .size)
158+ SUFFIX_BYTES .copyInto(array, BEGIN_BYTES .size + label.size)
159+ array[BEGIN_BYTES .size + label.size + SUFFIX_BYTES .size] = NEW_LINE_BYTE
160+
161+ // encode `base64\n`
162+ Base64 .Pem .encodeIntoByteArray(content, array, BEGIN_BYTES .size + label.size + SUFFIX_BYTES .size + 1 )
163+ array[BEGIN_BYTES .size + label.size + SUFFIX_BYTES .size + 1 + encodedSize] = NEW_LINE_BYTE
164+
165+ // encode `-----END LABEL-----\n`
166+ END_BYTES .copyInto(array, BEGIN_BYTES .size + label.size + SUFFIX_BYTES .size + 1 + encodedSize + 1 )
167+ label.copyInto(array, BEGIN_BYTES .size + label.size + SUFFIX_BYTES .size + 1 + encodedSize + 1 + END_BYTES .size)
168+ SUFFIX_BYTES .copyInto(array, BEGIN_BYTES .size + label.size + SUFFIX_BYTES .size + 1 + encodedSize + 1 + END_BYTES .size + label.size)
169+ array[array.lastIndex] = NEW_LINE_BYTE
170+
171+ return array
172+ }
173+
174+ // 1.5 times faster than using lineSequence()
175+ private inline fun tryDecodeFromString (
176+ text : String ,
177+ startIndex : Int ,
178+ saveEndIndex : (endIndex: Int ) -> Unit ,
179+ ): PemDocument ? {
180+ val beginIndex = text.indexOf(BEGIN_PREFIX , startIndex)
181+ if (beginIndex == - 1 ) return null
182+ val beginLineEndIndex = text.indexOf(NEW_LINE , beginIndex + BEGIN_PREFIX .length)
183+ if (beginLineEndIndex == - 1 ) error(" Invalid PEM format: missing new line after BEGIN label" )
184+ val beginSuffixIndex = text.indexOf(SUFFIX , beginIndex + BEGIN_PREFIX .length)
185+ if (beginSuffixIndex == - 1 || beginSuffixIndex > beginLineEndIndex) error(" Invalid PEM format: missing BEGIN label suffix" )
186+
187+ val beginLabel = text.substring(beginIndex + BEGIN_PREFIX .length, beginSuffixIndex)
188+
189+ val endIndex = text.indexOf(END_PREFIX , beginLineEndIndex)
190+ if (endIndex == - 1 ) error(" Invalid PEM format: missing END label" )
191+ val endLineEndIndex = text.indexOf(NEW_LINE , endIndex + END_PREFIX .length)
192+ val endSuffixIndex = text.indexOf(SUFFIX , endIndex + END_PREFIX .length)
193+ if (endSuffixIndex == - 1 || (endLineEndIndex != - 1 && endSuffixIndex > endLineEndIndex)) error(" Invalid PEM format: missing END label suffix" )
194+
195+ val endLabel = text.substring(endIndex + END_PREFIX .length, endSuffixIndex)
196+ if (endLabel != beginLabel) error(" Invalid PEM format: BEGIN=`$beginLabel `, END=`$endLabel `" )
197+
198+ saveEndIndex(
199+ if (endLineEndIndex == - 1 ) {
200+ endSuffixIndex + SUFFIX .length
201+ } else {
202+ endLineEndIndex + 1
115203 }
204+ )
205+ return PemDocument (
206+ label = PemLabel (beginLabel),
207+ content = Base64 .Pem .decodeToByteString(
208+ source = text,
209+ startIndex = beginLineEndIndex + 1 , // 1 because of new line
210+ endIndex = endIndex
211+ )
212+ )
213+ }
214+
215+ // 1.5 times faster than decode(bytes.decodeToString())
216+ // 2 times faster than using lineSequence()
217+ private inline fun tryDecodeFromByteString (
218+ bytes : ByteString ,
219+ startIndex : Int ,
220+ saveEndIndex : (endIndex: Int ) -> Unit ,
221+ ): PemDocument ? {
222+ val beginIndex = bytes.indexOf(BEGIN_BYTES , startIndex)
223+ if (beginIndex == - 1 ) return null
224+ val beginLineEndIndex = bytes.indexOf(NEW_LINE_BYTE , beginIndex + BEGIN_BYTES .size)
225+ if (beginLineEndIndex == - 1 ) error(" Invalid PEM format: missing new line after BEGIN label" )
226+ val beginSuffixIndex = bytes.indexOf(SUFFIX_BYTES , beginIndex + BEGIN_BYTES .size)
227+ if (beginSuffixIndex == - 1 || beginSuffixIndex > beginLineEndIndex) error(" Invalid PEM format: missing BEGIN label suffix" )
228+
229+ val beginLabel = bytes.substring(beginIndex + BEGIN_BYTES .size, beginSuffixIndex)
116230
117- private fun String.findLabel (prefix : String , type : String ): String? {
118- val startIndex = indexOf(prefix)
119- if (startIndex == - 1 ) return null
231+ val endIndex = bytes.indexOf(END_BYTES , beginLineEndIndex)
232+ if (endIndex == - 1 ) error(" Invalid PEM format: missing END label" )
233+ val endLineEndIndex = bytes.indexOf(NEW_LINE_BYTE , endIndex + END_BYTES .size)
234+ val endSuffixIndex = bytes.indexOf(SUFFIX_BYTES , endIndex + END_BYTES .size)
235+ if (endSuffixIndex == - 1 || (endLineEndIndex != - 1 && endSuffixIndex > endLineEndIndex)) error(" Invalid PEM format: missing END label suffix" )
120236
121- val endIndex = lastIndexOf( SUFFIX )
122- if (endIndex == - 1 ) error(" Invalid PEM format: missing suffix " )
237+ val endLabel = bytes.substring(endIndex + END_BYTES .size, endSuffixIndex )
238+ if (endLabel != beginLabel ) error(" Invalid PEM format: BEGIN=` ${beginLabel.decodeToString()} `, END=` ${endLabel.decodeToString()} ` " )
123239
124- val label = substring(startIndex + prefix.length, endIndex)
125- if (label.isBlank()) error(" Invalid PEM format: $type label is empty" )
240+ saveEndIndex(
241+ if (endLineEndIndex == - 1 ) {
242+ endSuffixIndex + SUFFIX_BYTES .size
126243
127- return label
244+ } else {
245+ endLineEndIndex + 1
128246 }
247+ )
248+
249+ return PemDocument (
250+ label = PemLabel (beginLabel.decodeToString()),
251+ content = Base64 .Pem .decodeToByteString(
252+ source = bytes,
253+ startIndex = beginLineEndIndex + 1 , // 1 because of new line
254+ endIndex = endIndex
255+ )
256+ )
257+ }
258+
259+ // 2 times faster than using lineSequence()
260+ @OptIn(UnsafeByteStringApi ::class )
261+ private fun tryDecodeFromSource (source : Source ): PemDocument ? {
262+ fun Source.indexOf (bytes : ByteArray , startIndex : Long = 0): Long {
263+ return indexOf(UnsafeByteStringOperations .wrapUnsafe(bytes), startIndex)
264+ }
265+
266+ val beginIndex = source.indexOf(BEGIN_BYTES )
267+ if (beginIndex == - 1L ) {
268+ // we haven't found BEGIN label, but we already read everything - discard it
269+ source.transferTo(discardingSink())
270+ return null
271+ }
272+ source.skip(beginIndex + BEGIN_BYTES .size)
273+
274+ val beginLineEndIndex = source.indexOf(NEW_LINE_BYTE )
275+ if (beginLineEndIndex == - 1L ) error(" Invalid PEM format: missing new line after BEGIN label" )
276+ val beginSuffixIndex = source.indexOf(SUFFIX_BYTES )
277+ if (beginSuffixIndex == - 1L || beginSuffixIndex > beginLineEndIndex) error(" Invalid PEM format: missing BEGIN label suffix" )
278+
279+ val beginLabel = source.readByteString(beginSuffixIndex.toInt())
280+ source.skip(beginLineEndIndex + 1 - beginSuffixIndex) // skip suffix & new line
281+
282+ val endIndex = source.indexOf(END_BYTES )
283+ if (endIndex == - 1L ) error(" Invalid PEM format: missing END label" )
284+
285+ val base64Content = source.readByteString(endIndex.toInt())
286+ source.skip(END_BYTES .size.toLong())
287+
288+ val endLineEndIndex = source.indexOf(NEW_LINE_BYTE )
289+ val endSuffixIndex = source.indexOf(SUFFIX_BYTES )
290+ if (endSuffixIndex == - 1L || (endLineEndIndex != - 1L && endSuffixIndex > endLineEndIndex)) error(" Invalid PEM format: missing END label suffix" )
291+
292+ val endLabel = source.readByteString(endSuffixIndex.toInt())
293+ if (endLineEndIndex == - 1L ) {
294+ source.skip(SUFFIX_BYTES .size.toLong())
295+ } else {
296+ source.skip(endLineEndIndex + 1 - endSuffixIndex)
129297 }
298+
299+ if (endLabel != beginLabel) error(" Invalid PEM format: BEGIN=`${beginLabel.decodeToString()} `, END=`${endLabel.decodeToString()} `" )
300+
301+ return PemDocument (
302+ label = PemLabel (beginLabel.decodeToString()),
303+ content = Base64 .Pem .decodeToByteString(base64Content)
304+ )
130305}
0 commit comments