Skip to content

Commit 6219234

Browse files
committed
hook: introduce post-linuxkit initramfs compressor/optimizer/reporter
- turns out LinuxKit ends up producing initramfs's that are very large - multiple copies of exact same files in different fs paths - gzip compression - multiple cpio layers causing multiple instances of same filepath stored - add a Docker-based postprocess step that does multiple tricks to fix it - extract/repack cpio to flatten it - rdfind to replace duplicates with hardlinks - zstd (-9, multithread) compression - reports usage and large duplicate files in different paths - so we can optimize our lk containers for better dedupe - gains are at - 25% for zstd compression (meson64) - 10Mb for rdfind, without any lk optimization yet Signed-off-by: Ricardo Pardini <ricardo@pardini.net>
1 parent fe9a98b commit 6219234

File tree

1 file changed

+113
-8
lines changed

1 file changed

+113
-8
lines changed

bash/linuxkit.sh

Lines changed: 113 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -136,17 +136,122 @@ function linuxkit_build() {
136136
"${linuxkit_bin}" build "--format" "kernel+initrd" "${lk_debug_args[@]}" "${lk_args[@]}"
137137

138138
declare initramfs_path="${lk_output_dir}/hook-initrd.img"
139+
139140
# initramfs_path is a gzipped file. obtain the uncompressed byte size, without decompressing it
140-
declare -i initramfs_size_bytes=0
141-
initramfs_size_bytes=$(gzip -l "${initramfs_path}" | tail -n 1 | awk '{print $2}')
142-
log info "Uncompressed initramfs size in bytes: ${initramfs_size_bytes}"
143-
# If the size is larger than 900mb, it is unlikely to boot on a 2gb RAM machine. Warn.
144-
if [[ "${initramfs_size_bytes}" -gt 943718400 ]]; then
145-
log warn "${inventory_id}: Uncompressed initramfs size (${initramfs_size_bytes} bytes) is larger than 900mb; it may not boot on a 2gb RAM machine."
146-
else
147-
log notice "${inventory_id}: Uncompressed initramfs size (${initramfs_size_bytes} bytes) is smaller than 900mb."
141+
declare -i initramfs_size_bytes_initial=0 initramfs_size_bytes_gzip=0 initramfs_size_bytes_zstd=0
142+
initramfs_size_bytes_gzip=$(stat -c%s "${initramfs_path}")
143+
initramfs_size_bytes_initial=$(gzip -l "${initramfs_path}" | tail -n 1 | awk '{print $2}')
144+
log info "Compressed-gzip (initial) initramfs size in bytes: ${initramfs_size_bytes_gzip}"
145+
log info "Uncompressed initial initramfs size in bytes: ${initramfs_size_bytes_initial}"
146+
147+
# Brief detour to:
148+
# 1) Decompress the initramfs (`gunzip`) and extract it to a directory (`cpio`)
149+
# This de-duplicates some cpio-duplicates leftover by linuxkit (some kb's)
150+
# 2) Produce a reports on the initramfs contents:
151+
# - disk usage (by size) of the initramfs contents (du -h -d 10 -x | sort -h | tail -n 20)
152+
# - aggregated basename-identical files in the initramfs, with their size and hash
153+
# This will help us find things to optimize in the lkcontainers:
154+
# - use same base image for all cotntainers (deduplicate musl + others)
155+
# - avoid different versions of stuff (containerd in hook-containerd but also in hook-docker)
156+
# - avoid large files that are not needed in the initramfs (docs)
157+
# 3) Use `rdfind` to replace exact duplicates with hardlinks (many mb's!)
158+
# 4) Repack the initramfs into `cpio` and compress it with `zstd` level 9 (about 30% better, many mb's!)
159+
# All the Hook kernels already support zstd initramfs decompression, so this is safe to do. Performance might be better too.
160+
#
161+
# Since we need tools and do-it-as-root for this, its best done using a Docker container
162+
declare -a compressor_deps=("bash" "gawk" "cpio" "zstd" "rdfind" "gzip" "pigz" "coreutils" "findutils" "file" "du-dust")
163+
declare initramfs_compressor_dockerfile="${lk_output_dir}/Dockerfile.initramfs_compressor"
164+
declare -r output_compressed_initramfs_name="initramfs-compressed.img" output_report_name="report.md"
165+
166+
# I *really* don't want to escape this; bear with me
167+
find_same_name_files_command="$(
168+
cat <<- 'FIND_SAME_NAME_FILES_COMMAND'
169+
find . -type f -size +512k -printf "%f %p\n" | sort | awk '{files[$1]=files[$1] ? files[$1] "\n"$2 : $2; count[$1]++} END {for (f in count) if (count[f]>1) print f "\n" files[f]}' | while read -r line; do if [[ -f "$line" ]]; then stat --printf="%s bytes " "$line"; md5sum "$line"; else echo "### duplicate: '$line'"; fi; done
170+
FIND_SAME_NAME_FILES_COMMAND
171+
)"
172+
173+
log info "Creating Dockerfile '${initramfs_compressor_dockerfile}'... "
174+
cat <<- INITRAMFS_COMPRESSOR_DOCKERFILE > "${initramfs_compressor_dockerfile}"
175+
FROM debian:stable AS builder
176+
RUN mkdir -p /output
177+
ENV DEBIAN_FRONTEND=noninteractive
178+
RUN apt-get -qq -o "Dpkg::Use-Pty=0" update || apt-get -o "Dpkg::Use-Pty=0" update
179+
RUN apt-get -qq install -o "Dpkg::Use-Pty=0" -q -y ${compressor_deps[*]} || apt-get install -o "Dpkg::Use-Pty=0" -q -y ${compressor_deps[*]}
180+
SHELL ["/bin/bash", "-c"]
181+
182+
ADD hook-initrd.img /input/initramfs.img
183+
WORKDIR /work/dir
184+
RUN echo "# Tinkerbell Hook LinuxKit initramfs compressor report" > /output/${output_report_name}
185+
RUN { echo -n "## input magic: " && file /input/initramfs.img; }>> /output/${output_report_name}
186+
187+
RUN pigz -d -c /input/initramfs.img > /input/initramfs_decompress.cpio
188+
#RUN zcat /input/initramfs.img > /input/initramfs_decompress.cpio
189+
190+
RUN { echo -n "## ungzipped input magic: " && file /input/initramfs_decompress.cpio; }>> /output/${output_report_name}
191+
192+
RUN cat /input/initramfs_decompress.cpio | cpio -idm
193+
194+
# Reporting on original...
195+
RUN { echo "## original: dust report: " && dust -x --no-colors --no-percent-bars ; }>> /output/${output_report_name}
196+
RUN { echo "## original: top-40 dirs usage 5-deep (du): " && du -h -d 5 -x . | sort -h | tail -40 ; }>> /output/${output_report_name}
197+
RUN { echo "## original: same-name files, larger than 512kb: " && $find_same_name_files_command ; }>> /output/${output_report_name}
198+
RUN { echo -n "## original: hardlinked files: " && find . -type f -links +1 | wc -l ; }>> /output/${output_report_name}
199+
200+
# -> Deduplicate exact files into hardlinks with rdfind
201+
RUN { echo "## rdfind run: " && rdfind -makehardlinks true -deleteduplicates true -makeresultsfile false . ; }>> /output/${output_report_name}
202+
203+
# Reporting after deduplication
204+
RUN { echo "## deduped: dust report: " && dust -x --no-colors --no-percent-bars ; }>> /output/${output_report_name}
205+
RUN { echo -n "## deduped: hardlinked files: " && find . -type f -links +1 | wc -l ; }>> /output/${output_report_name}
206+
207+
RUN find . | cpio -o -H newc > /output/repacked.cpio
208+
RUN { echo -n "## output, pre compression magic: " && file /output/repacked.cpio; }>> /output/${output_report_name}
209+
210+
RUN zstdmt -9 -o /output/${output_compressed_initramfs_name} /output/repacked.cpio
211+
RUN { echo -n "## output magic: " && file /output/${output_compressed_initramfs_name}; }>> /output/${output_report_name}
212+
FROM scratch
213+
COPY --from=builder /output/* /
214+
INITRAMFS_COMPRESSOR_DOCKERFILE
215+
216+
declare docker_compressor_output_dir="${lk_output_dir}/initramfs_compressor_output"
217+
mkdir -p "${docker_compressor_output_dir}"
218+
219+
# Now, build the Dockerfile and output the fat32 image directly
220+
log info "Building Dockerfile for initramfs compressor and outputting directly to '${docker_compressor_output_dir}'..."
221+
declare -a compressor_docker_buildx_args=(
222+
--output "type=local,dest=${docker_compressor_output_dir}" # output directly to a local dir, not an image
223+
"--progress=${DOCKER_BUILDX_PROGRESS_TYPE}" # show progress
224+
-f "${initramfs_compressor_dockerfile}" # Dockerfile path
225+
"${lk_output_dir}") # build context, for easy access to the input initramfs file
226+
docker buildx build "${compressor_docker_buildx_args[@]}"
227+
228+
# If output not in place, something went wrong
229+
if [[ ! -f "${docker_compressor_output_dir}/${output_compressed_initramfs_name}" ]]; then
230+
log error "Failed to produce compressed initramfs at expected location '${docker_compressor_output_dir}/${output_compressed_initramfs_name}'"
231+
exit 8
148232
fi
149233

234+
# If report not in place, something went wrong
235+
if [[ ! -f "${docker_compressor_output_dir}/${output_report_name}" ]]; then
236+
log error "Failed to produce compressed initramfs at expected location '${docker_compressor_output_dir}/${output_report_name}'"
237+
exit 9
238+
fi
239+
240+
# Output the report (use DEBUG=yes to see it)
241+
log_file_bat "${docker_compressor_output_dir}/${output_report_name}" "info" "Compression report for initramfs ${inventory_id}:"
242+
243+
# Move the outputted compressed initramfs into the original location
244+
mv "${debug_dash_v[@]}" "${docker_compressor_output_dir}/${output_compressed_initramfs_name}" "${initramfs_path}"
245+
246+
# Clean up the temporary Dockerfile and output dir - not if debugging
247+
if [[ "${DEBUG}" != "yes" ]]; then
248+
rm -rf "${initramfs_compressor_dockerfile}" "${docker_compressor_output_dir}"
249+
fi
250+
251+
# Calculate the final initramfs zstd-compressed size, then brag about zstd's prowess
252+
initramfs_size_bytes_zstd=$(stat -c%s "${initramfs_path}")
253+
log notice "${inventory_id}: Final zstd+deduped initramfs size (${initramfs_size_bytes_zstd} bytes) vs initial gzip-compressed size (${initramfs_size_bytes_gzip} bytes): size reduced by $((100 - (initramfs_size_bytes_zstd * 100 / initramfs_size_bytes_gzip)))%"
254+
150255
if [[ "${LK_RUN}" == "qemu" ]]; then
151256
linuxkit_run_qemu
152257
return 0

0 commit comments

Comments
 (0)