@@ -136,17 +136,122 @@ function linuxkit_build() {
136136 " ${linuxkit_bin} " build " --format" " kernel+initrd" " ${lk_debug_args[@]} " " ${lk_args[@]} "
137137
138138 declare initramfs_path=" ${lk_output_dir} /hook-initrd.img"
139+
139140 # initramfs_path is a gzipped file. obtain the uncompressed byte size, without decompressing it
140- declare -i initramfs_size_bytes=0
141- initramfs_size_bytes=$( gzip -l " ${initramfs_path} " | tail -n 1 | awk ' {print $2}' )
142- log info " Uncompressed initramfs size in bytes: ${initramfs_size_bytes} "
143- # If the size is larger than 900mb, it is unlikely to boot on a 2gb RAM machine. Warn.
144- if [[ " ${initramfs_size_bytes} " -gt 943718400 ]]; then
145- log warn " ${inventory_id} : Uncompressed initramfs size (${initramfs_size_bytes} bytes) is larger than 900mb; it may not boot on a 2gb RAM machine."
146- else
147- log notice " ${inventory_id} : Uncompressed initramfs size (${initramfs_size_bytes} bytes) is smaller than 900mb."
141+ declare -i initramfs_size_bytes_initial=0 initramfs_size_bytes_gzip=0 initramfs_size_bytes_zstd=0
142+ initramfs_size_bytes_gzip=$( stat -c%s " ${initramfs_path} " )
143+ initramfs_size_bytes_initial=$( gzip -l " ${initramfs_path} " | tail -n 1 | awk ' {print $2}' )
144+ log info " Compressed-gzip (initial) initramfs size in bytes: ${initramfs_size_bytes_gzip} "
145+ log info " Uncompressed initial initramfs size in bytes: ${initramfs_size_bytes_initial} "
146+
147+ # Brief detour to:
148+ # 1) Decompress the initramfs (`gunzip`) and extract it to a directory (`cpio`)
149+ # This de-duplicates some cpio-duplicates leftover by linuxkit (some kb's)
150+ # 2) Produce a reports on the initramfs contents:
151+ # - disk usage (by size) of the initramfs contents (du -h -d 10 -x | sort -h | tail -n 20)
152+ # - aggregated basename-identical files in the initramfs, with their size and hash
153+ # This will help us find things to optimize in the lkcontainers:
154+ # - use same base image for all cotntainers (deduplicate musl + others)
155+ # - avoid different versions of stuff (containerd in hook-containerd but also in hook-docker)
156+ # - avoid large files that are not needed in the initramfs (docs)
157+ # 3) Use `rdfind` to replace exact duplicates with hardlinks (many mb's!)
158+ # 4) Repack the initramfs into `cpio` and compress it with `zstd` level 9 (about 30% better, many mb's!)
159+ # All the Hook kernels already support zstd initramfs decompression, so this is safe to do. Performance might be better too.
160+ #
161+ # Since we need tools and do-it-as-root for this, its best done using a Docker container
162+ declare -a compressor_deps=(" bash" " gawk" " cpio" " zstd" " rdfind" " gzip" " pigz" " coreutils" " findutils" " file" " du-dust" )
163+ declare initramfs_compressor_dockerfile=" ${lk_output_dir} /Dockerfile.initramfs_compressor"
164+ declare -r output_compressed_initramfs_name=" initramfs-compressed.img" output_report_name=" report.md"
165+
166+ # I *really* don't want to escape this; bear with me
167+ find_same_name_files_command=" $(
168+ cat << - 'FIND_SAME_NAME_FILES_COMMAND '
169+ find . -type f -size +512k -printf "%f %p\n" | sort | awk '{files[$1]=files[$1] ? files[$1] "\n"$2 : $2; count[$1]++} END {for (f in count) if (count[f]>1) print f "\n" files[f]}' | while read -r line; do if [[ -f "$line" ]]; then stat --printf="%s bytes " "$line"; md5sum "$line"; else echo "### duplicate: '$line'"; fi; done
170+ FIND_SAME_NAME_FILES_COMMAND
171+ ) "
172+
173+ log info " Creating Dockerfile '${initramfs_compressor_dockerfile} '... "
174+ cat << - INITRAMFS_COMPRESSOR_DOCKERFILE > "${initramfs_compressor_dockerfile} "
175+ FROM debian:stable AS builder
176+ RUN mkdir -p /output
177+ ENV DEBIAN_FRONTEND=noninteractive
178+ RUN apt-get -qq -o "Dpkg::Use-Pty=0" update || apt-get -o "Dpkg::Use-Pty=0" update
179+ RUN apt-get -qq install -o "Dpkg::Use-Pty=0" -q -y ${compressor_deps[*]} || apt-get install -o "Dpkg::Use-Pty=0" -q -y ${compressor_deps[*]}
180+ SHELL ["/bin/bash", "-c"]
181+
182+ ADD hook-initrd.img /input/initramfs.img
183+ WORKDIR /work/dir
184+ RUN echo "# Tinkerbell Hook LinuxKit initramfs compressor report" > /output/${output_report_name}
185+ RUN { echo -n "## input magic: " && file /input/initramfs.img; }>> /output/${output_report_name}
186+
187+ RUN pigz -d -c /input/initramfs.img > /input/initramfs_decompress.cpio
188+ #RUN zcat /input/initramfs.img > /input/initramfs_decompress.cpio
189+
190+ RUN { echo -n "## ungzipped input magic: " && file /input/initramfs_decompress.cpio; }>> /output/${output_report_name}
191+
192+ RUN cat /input/initramfs_decompress.cpio | cpio -idm
193+
194+ # Reporting on original...
195+ RUN { echo "## original: dust report: " && dust -x --no-colors --no-percent-bars ; }>> /output/${output_report_name}
196+ RUN { echo "## original: top-40 dirs usage 5-deep (du): " && du -h -d 5 -x . | sort -h | tail -40 ; }>> /output/${output_report_name}
197+ RUN { echo "## original: same-name files, larger than 512kb: " && $find_same_name_files_command ; }>> /output/${output_report_name}
198+ RUN { echo -n "## original: hardlinked files: " && find . -type f -links +1 | wc -l ; }>> /output/${output_report_name}
199+
200+ # -> Deduplicate exact files into hardlinks with rdfind
201+ RUN { echo "## rdfind run: " && rdfind -makehardlinks true -deleteduplicates true -makeresultsfile false . ; }>> /output/${output_report_name}
202+
203+ # Reporting after deduplication
204+ RUN { echo "## deduped: dust report: " && dust -x --no-colors --no-percent-bars ; }>> /output/${output_report_name}
205+ RUN { echo -n "## deduped: hardlinked files: " && find . -type f -links +1 | wc -l ; }>> /output/${output_report_name}
206+
207+ RUN find . | cpio -o -H newc > /output/repacked.cpio
208+ RUN { echo -n "## output, pre compression magic: " && file /output/repacked.cpio; }>> /output/${output_report_name}
209+
210+ RUN zstdmt -9 -o /output/${output_compressed_initramfs_name} /output/repacked.cpio
211+ RUN { echo -n "## output magic: " && file /output/${output_compressed_initramfs_name} ; }>> /output/${output_report_name}
212+ FROM scratch
213+ COPY --from=builder /output/* /
214+ INITRAMFS_COMPRESSOR_DOCKERFILE
215+
216+ declare docker_compressor_output_dir=" ${lk_output_dir} /initramfs_compressor_output"
217+ mkdir -p " ${docker_compressor_output_dir} "
218+
219+ # Now, build the Dockerfile and output the fat32 image directly
220+ log info " Building Dockerfile for initramfs compressor and outputting directly to '${docker_compressor_output_dir} '..."
221+ declare -a compressor_docker_buildx_args=(
222+ --output " type=local,dest=${docker_compressor_output_dir} " # output directly to a local dir, not an image
223+ " --progress=${DOCKER_BUILDX_PROGRESS_TYPE} " # show progress
224+ -f " ${initramfs_compressor_dockerfile} " # Dockerfile path
225+ " ${lk_output_dir} " ) # build context, for easy access to the input initramfs file
226+ docker buildx build " ${compressor_docker_buildx_args[@]} "
227+
228+ # If output not in place, something went wrong
229+ if [[ ! -f " ${docker_compressor_output_dir} /${output_compressed_initramfs_name} " ]]; then
230+ log error " Failed to produce compressed initramfs at expected location '${docker_compressor_output_dir} /${output_compressed_initramfs_name} '"
231+ exit 8
148232 fi
149233
234+ # If report not in place, something went wrong
235+ if [[ ! -f " ${docker_compressor_output_dir} /${output_report_name} " ]]; then
236+ log error " Failed to produce compressed initramfs at expected location '${docker_compressor_output_dir} /${output_report_name} '"
237+ exit 9
238+ fi
239+
240+ # Output the report (use DEBUG=yes to see it)
241+ log_file_bat " ${docker_compressor_output_dir} /${output_report_name} " " info" " Compression report for initramfs ${inventory_id} :"
242+
243+ # Move the outputted compressed initramfs into the original location
244+ mv " ${debug_dash_v[@]} " " ${docker_compressor_output_dir} /${output_compressed_initramfs_name} " " ${initramfs_path} "
245+
246+ # Clean up the temporary Dockerfile and output dir - not if debugging
247+ if [[ " ${DEBUG} " != " yes" ]]; then
248+ rm -rf " ${initramfs_compressor_dockerfile} " " ${docker_compressor_output_dir} "
249+ fi
250+
251+ # Calculate the final initramfs zstd-compressed size, then brag about zstd's prowess
252+ initramfs_size_bytes_zstd=$( stat -c%s " ${initramfs_path} " )
253+ log notice " ${inventory_id} : Final zstd+deduped initramfs size (${initramfs_size_bytes_zstd} bytes) vs initial gzip-compressed size (${initramfs_size_bytes_gzip} bytes): size reduced by $(( 100 - (initramfs_size_bytes_zstd * 100 / initramfs_size_bytes_gzip)) )%"
254+
150255 if [[ " ${LK_RUN} " == " qemu" ]]; then
151256 linuxkit_run_qemu
152257 return 0
0 commit comments