From b5638a3cac2d177cd6fc65a23559e1a8847e8ae0 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 28 May 2025 18:15:41 +0000 Subject: [PATCH 1/3] Remove unneeded C symbols These are now provided by `compiler-builtins`, so there is no need to also build the C versions. This was detected by checking for duplicate symbols and not excluding weak symbols (like CI currently does). --- compiler-builtins/build.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/compiler-builtins/build.rs b/compiler-builtins/build.rs index 90d98ec7c..d37fdc5df 100644 --- a/compiler-builtins/build.rs +++ b/compiler-builtins/build.rs @@ -555,7 +555,6 @@ mod c { if (target.arch == "aarch64" || target.arch == "arm64ec") && consider_float_intrinsics { sources.extend(&[ - ("__comparetf2", "comparetf2.c"), ("__fe_getround", "fp_mode.c"), ("__fe_raise_inexact", "fp_mode.c"), ]); @@ -570,11 +569,11 @@ mod c { } if target.arch == "mips64" { - sources.extend(&[("__netf2", "comparetf2.c"), ("__fe_getround", "fp_mode.c")]); + sources.extend(&[("__fe_getround", "fp_mode.c")]); } if target.arch == "loongarch64" { - sources.extend(&[("__netf2", "comparetf2.c"), ("__fe_getround", "fp_mode.c")]); + sources.extend(&[("__fe_getround", "fp_mode.c")]); } // Remove the assembly implementations that won't compile for the target From 9f0cfc24de919d3b1e6b58bb11994f08db3116f5 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Sat, 19 Apr 2025 07:38:43 +0000 Subject: [PATCH 2/3] Replace the `nm` symbol check with a Rust implementation This should be less error-prone and adaptable than the `nm` version, and have better cross-platform support without needing LLVM `nm` installed. --- Cargo.toml | 1 + ci/run.sh | 125 +++-------------- crates/symbol-check/Cargo.toml | 13 ++ crates/symbol-check/src/main.rs | 231 ++++++++++++++++++++++++++++++++ 4 files changed, 262 insertions(+), 108 deletions(-) create mode 100644 crates/symbol-check/Cargo.toml create mode 100644 crates/symbol-check/src/main.rs diff --git a/Cargo.toml b/Cargo.toml index b39ec8a25..bc6b4bd29 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,6 +6,7 @@ members = [ "crates/libm-macros", "crates/musl-math-sys", "crates/panic-handler", + "crates/symbol-check", "crates/util", "libm", "libm-test", diff --git a/ci/run.sh b/ci/run.sh index 68d13c130..cf3f7dfda 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -47,87 +47,25 @@ else fi fi - -declare -a rlib_paths - -# Set the `rlib_paths` global array to a list of all compiler-builtins rlibs -update_rlib_paths() { - if [ -d /builtins-target ]; then - rlib_paths=( /builtins-target/"${target}"/debug/deps/libcompiler_builtins-*.rlib ) - else - rlib_paths=( target/"${target}"/debug/deps/libcompiler_builtins-*.rlib ) - fi -} - -# Remove any existing artifacts from previous tests that don't set #![compiler_builtins] -update_rlib_paths -rm -f "${rlib_paths[@]}" - -cargo build -p compiler_builtins --target "$target" -cargo build -p compiler_builtins --target "$target" --release -cargo build -p compiler_builtins --target "$target" --features c -cargo build -p compiler_builtins --target "$target" --features c --release -cargo build -p compiler_builtins --target "$target" --features no-asm -cargo build -p compiler_builtins --target "$target" --features no-asm --release -cargo build -p compiler_builtins --target "$target" --features no-f16-f128 -cargo build -p compiler_builtins --target "$target" --features no-f16-f128 --release - -PREFIX=${target//unknown-/}- -case "$target" in - armv7-*) - PREFIX=arm-linux-gnueabihf- - ;; - thumb*) - PREFIX=arm-none-eabi- - ;; - *86*-*) - PREFIX= - ;; -esac - -NM=$(find "$(rustc --print sysroot)" \( -name llvm-nm -o -name llvm-nm.exe \) ) -if [ "$NM" = "" ]; then - NM="${PREFIX}nm" -fi - -# i686-pc-windows-gnu tools have a dependency on some DLLs, so run it with -# rustup run to ensure that those are in PATH. -TOOLCHAIN="$(rustup show active-toolchain | sed 's/ (default)//')" -if [[ "$TOOLCHAIN" == *i686-pc-windows-gnu ]]; then - NM="rustup run $TOOLCHAIN $NM" -fi - -# Look out for duplicated symbols when we include the compiler-rt (C) implementation -update_rlib_paths -for rlib in "${rlib_paths[@]}"; do - set +x - echo "================================================================" - echo "checking $rlib for duplicate symbols" - echo "================================================================" - set -x - - duplicates_found=0 - - # NOTE On i586, It's normal that the get_pc_thunk symbol appears several - # times so ignore it - $NM -g --defined-only "$rlib" 2>&1 | - sort | - uniq -d | - grep -v __x86.get_pc_thunk --quiet | - grep 'T __' && duplicates_found=1 - - if [ "$duplicates_found" != 0 ]; then - echo "error: found duplicate symbols" - exit 1 - else - echo "success; no duplicate symbols found" - fi -done - -rm -f "${rlib_paths[@]}" +# Ensure there are no duplicate symbols or references to `core` when +# `compiler-builtins` is built with various features. Symcheck invokes Cargo to +# build with the arguments we provide it, then validates the built artifacts. +symcheck=(cargo run -p symbol-check --release) +[[ "$target" = "wasm"* ]] && symcheck+=(--features wasm) +symcheck+=(-- build-and-check) + +"${symcheck[@]}" -p compiler_builtins --target "$target" +"${symcheck[@]}" -p compiler_builtins --target "$target" --release +"${symcheck[@]}" -p compiler_builtins --target "$target" --features c +"${symcheck[@]}" -p compiler_builtins --target "$target" --features c --release +"${symcheck[@]}" -p compiler_builtins --target "$target" --features no-asm +"${symcheck[@]}" -p compiler_builtins --target "$target" --features no-asm --release +"${symcheck[@]}" -p compiler_builtins --target "$target" --features no-f16-f128 +"${symcheck[@]}" -p compiler_builtins --target "$target" --features no-f16-f128 --release build_intrinsics_test() { - cargo build \ + # symcheck also checks the results of builtins-test-intrinsics + "${symcheck[@]}" \ --target "$target" --verbose \ --manifest-path builtins-test-intrinsics/Cargo.toml "$@" } @@ -143,35 +81,6 @@ build_intrinsics_test --features c --release CARGO_PROFILE_DEV_LTO=true build_intrinsics_test CARGO_PROFILE_RELEASE_LTO=true build_intrinsics_test --release -# Ensure no references to any symbols from core -update_rlib_paths -for rlib in "${rlib_paths[@]}"; do - set +x - echo "================================================================" - echo "checking $rlib for references to core" - echo "================================================================" - set -x - - tmpdir="${CARGO_TARGET_DIR:-target}/tmp" - test -d "$tmpdir" || mkdir "$tmpdir" - defined="$tmpdir/defined_symbols.txt" - undefined="$tmpdir/defined_symbols.txt" - - $NM --quiet -U "$rlib" | grep 'T _ZN4core' | awk '{print $3}' | sort | uniq > "$defined" - $NM --quiet -u "$rlib" | grep 'U _ZN4core' | awk '{print $2}' | sort | uniq > "$undefined" - grep_has_results=0 - grep -v -F -x -f "$defined" "$undefined" && grep_has_results=1 - - if [ "$target" = "powerpc64-unknown-linux-gnu" ]; then - echo "FIXME: powerpc64 fails these tests" - elif [ "$grep_has_results" != 0 ]; then - echo "error: found unexpected references to core" - exit 1 - else - echo "success; no references to core found" - fi -done - # Test libm # Make sure a simple build works diff --git a/crates/symbol-check/Cargo.toml b/crates/symbol-check/Cargo.toml new file mode 100644 index 000000000..30969ee40 --- /dev/null +++ b/crates/symbol-check/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "symbol-check" +version = "0.1.0" +edition = "2024" +publish = false + +[dependencies] +# FIXME: used as a git dependency since the latest release does not support wasm +object = { git = "https://github.com/gimli-rs/object.git", rev = "013fac75da56a684377af4151b8164b78c1790e0" } +serde_json = "1.0.140" + +[features] +wasm = ["object/wasm"] diff --git a/crates/symbol-check/src/main.rs b/crates/symbol-check/src/main.rs new file mode 100644 index 000000000..104505438 --- /dev/null +++ b/crates/symbol-check/src/main.rs @@ -0,0 +1,231 @@ +//! Tool used by CI to inspect compiler-builtins archives and help ensure we won't run into any +//! linking errors. + +use std::collections::{BTreeMap, BTreeSet}; +use std::fs; +use std::io::{BufRead, BufReader}; +use std::path::{Path, PathBuf}; +use std::process::{Command, Stdio}; + +use object::read::archive::{ArchiveFile, ArchiveMember}; +use object::{Object, ObjectSymbol, Symbol, SymbolKind, SymbolScope, SymbolSection}; +use serde_json::Value; + +const CHECK_LIBRARIES: &[&str] = &["compiler_builtins", "builtins_test_intrinsics"]; +const CHECK_EXTENSIONS: &[Option<&str>] = &[Some("rlib"), Some("a"), Some("exe"), None]; + +const USAGE: &str = "Usage: + + symbol-check build-and-check CARGO_ARGS ... + +Cargo will get invoked with `CARGO_ARGS` and all output +`compiler_builtins*.rlib` files will be checked. +"; + +fn main() { + // Create a `&str` vec so we can match on it. + let args = std::env::args().collect::>(); + let args_ref = args.iter().map(String::as_str).collect::>(); + + match &args_ref[1..] { + ["build-and-check", rest @ ..] if !rest.is_empty() => { + let paths = exec_cargo_with_args(rest); + for path in paths { + println!("Checking {}", path.display()); + verify_no_duplicates(&path); + verify_core_symbols(&path); + } + } + _ => { + println!("{USAGE}"); + std::process::exit(1); + } + } +} + +/// Run `cargo build` with the provided additional arguments, collecting the list of created +/// libraries. +fn exec_cargo_with_args(args: &[&str]) -> Vec { + let mut cmd = Command::new("cargo") + .arg("build") + .arg("--message-format=json") + .args(args) + .stdout(Stdio::piped()) + .spawn() + .expect("failed to launch Cargo"); + + let stdout = cmd.stdout.take().unwrap(); + let reader = BufReader::new(stdout); + let mut check_files = Vec::new(); + + for line in reader.lines() { + let line = line.expect("failed to read line"); + println!("{line}"); // tee to stdout + + // Select only steps that create files + let j: Value = serde_json::from_str(&line).expect("failed to deserialize"); + if j["reason"] != "compiler-artifact" { + continue; + } + + // Find rlibs in the created file list that match our expected library names and + // extensions. + for fpath in j["filenames"].as_array().expect("filenames not an array") { + let path = fpath.as_str().expect("file name not a string"); + let path = PathBuf::from(path); + + if CHECK_EXTENSIONS.contains(&path.extension().map(|ex| ex.to_str().unwrap())) { + let fname = path.file_name().unwrap().to_str().unwrap(); + + if CHECK_LIBRARIES.iter().any(|lib| fname.contains(lib)) { + check_files.push(path); + } + } + } + } + + cmd.wait().expect("failed to wait on Cargo"); + + assert!(!check_files.is_empty(), "no compiler_builtins rlibs found"); + println!("Collected the following rlibs to check: {check_files:#?}"); + + check_files +} + +/// Information collected from `object`, for convenience. +#[expect(unused)] // only for printing +#[derive(Clone, Debug)] +struct SymInfo { + name: String, + kind: SymbolKind, + scope: SymbolScope, + section: SymbolSection, + is_undefined: bool, + is_global: bool, + is_local: bool, + is_weak: bool, + is_common: bool, + address: u64, + object: String, +} + +impl SymInfo { + fn new(sym: &Symbol, member: &ArchiveMember) -> Self { + Self { + name: sym.name().expect("missing name").to_owned(), + kind: sym.kind(), + scope: sym.scope(), + section: sym.section(), + is_undefined: sym.is_undefined(), + is_global: sym.is_global(), + is_local: sym.is_local(), + is_weak: sym.is_weak(), + is_common: sym.is_common(), + address: sym.address(), + object: String::from_utf8_lossy(member.name()).into_owned(), + } + } +} + +/// Ensure that the same global symbol isn't defined in multiple object files within an archive. +/// +/// Note that this will also locate cases where a symbol is weakly defined in more than one place. +/// Technically there are no linker errors that will come from this, but it keeps our binary more +/// straightforward and saves some distribution size. +fn verify_no_duplicates(path: &Path) { + let mut syms = BTreeMap::::new(); + let mut dups = Vec::new(); + let mut found_any = false; + + for_each_symbol(path, |symbol, member| { + // Only check defined globals + if !symbol.is_global() || symbol.is_undefined() { + return; + } + + let sym = SymInfo::new(&symbol, member); + + // x86-32 includes multiple copies of thunk symbols + if sym.name.starts_with("__x86.get_pc_thunk") { + return; + } + + // Windows has symbols for literal numeric constants, string literals, and MinGW pseudo- + // relocations. These are allowed to have repeated definitions. + let win_allowed_dup_pfx = ["__real@", "__xmm@", "??_C@_", ".refptr"]; + if win_allowed_dup_pfx + .iter() + .any(|pfx| sym.name.starts_with(pfx)) + { + return; + } + + match syms.get(&sym.name) { + Some(existing) => { + dups.push(sym); + dups.push(existing.clone()); + } + None => { + syms.insert(sym.name.clone(), sym); + } + } + + found_any = true; + }); + + assert!(found_any, "no symbols found"); + + if !dups.is_empty() { + dups.sort_unstable_by(|a, b| a.name.cmp(&b.name)); + panic!("found duplicate symbols: {dups:#?}"); + } + + println!(" success: no duplicate symbols found"); +} + +/// Ensure that there are no references to symbols from `core` that aren't also (somehow) defined. +fn verify_core_symbols(path: &Path) { + let mut defined = BTreeSet::new(); + let mut undefined = Vec::new(); + let mut has_symbols = false; + + for_each_symbol(path, |symbol, member| { + has_symbols = true; + + // Find only symbols from `core` + if !symbol.name().unwrap().contains("_ZN4core") { + return; + } + + let sym = SymInfo::new(&symbol, member); + if sym.is_undefined { + undefined.push(sym); + } else { + defined.insert(sym.name); + } + }); + + assert!(has_symbols, "no symbols found"); + + // Discard any symbols that are defined somewhere in the archive + undefined.retain(|sym| !defined.contains(&sym.name)); + + if !undefined.is_empty() { + undefined.sort_unstable_by(|a, b| a.name.cmp(&b.name)); + panic!("found undefined symbols from core: {undefined:#?}"); + } + + println!(" success: no undefined references to core found"); +} + +/// For a given archive path, do something with each symbol. +fn for_each_symbol(path: &Path, mut f: impl FnMut(Symbol, &ArchiveMember)) { + let data = fs::read(path).expect("reading file failed"); + let archive = ArchiveFile::parse(data.as_slice()).expect("archive parse failed"); + for member in archive.members() { + let member = member.expect("failed to access member"); + let obj_data = member.data(&*data).expect("failed to access object"); + let obj = object::File::parse(obj_data).expect("failed to parse object"); + obj.symbols().for_each(|sym| f(sym, &member)); + } +} From bfd4058825e0002b0369f105467a93e8290969c5 Mon Sep 17 00:00:00 2001 From: Trevor Gross Date: Wed, 28 May 2025 19:59:16 +0000 Subject: [PATCH 3/3] Remove the now-unneeded llvm-tools-preview Since a working `nm` is no longer needed as part of CI, the rustup component can be removed. --- .github/workflows/main.yaml | 1 - crates/symbol-check/src/main.rs | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index d13dd6b0f..567ad1205 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -119,7 +119,6 @@ jobs: rustup update "$channel" --no-self-update rustup default "$channel" rustup target add "${{ matrix.target }}" - rustup component add llvm-tools-preview - uses: taiki-e/install-action@nextest - uses: Swatinem/rust-cache@v2 with: diff --git a/crates/symbol-check/src/main.rs b/crates/symbol-check/src/main.rs index 104505438..4e6417fdf 100644 --- a/crates/symbol-check/src/main.rs +++ b/crates/symbol-check/src/main.rs @@ -84,7 +84,7 @@ fn exec_cargo_with_args(args: &[&str]) -> Vec { } } - cmd.wait().expect("failed to wait on Cargo"); + assert!(cmd.wait().expect("failed to wait on Cargo").success()); assert!(!check_files.is_empty(), "no compiler_builtins rlibs found"); println!("Collected the following rlibs to check: {check_files:#?}");