diff --git a/src/tools/miri/ci.sh b/src/tools/miri/ci.sh
index eda1ceb408484..5078e9eaab382 100755
--- a/src/tools/miri/ci.sh
+++ b/src/tools/miri/ci.sh
@@ -108,7 +108,7 @@ case $HOST_TARGET in
     MIRI_TEST_TARGET=aarch64-unknown-linux-gnu run_tests
     MIRI_TEST_TARGET=aarch64-apple-darwin run_tests
     MIRI_TEST_TARGET=i686-pc-windows-gnu run_tests
-    MIRI_TEST_TARGET=x86_64-unknown-freebsd run_tests_minimal hello integer vec panic/panic concurrency/simple atomic data_race env/var
+    MIRI_TEST_TARGET=x86_64-unknown-freebsd run_tests_minimal hello integer vec panic/panic concurrency/simple atomic env/var
     MIRI_TEST_TARGET=aarch64-linux-android run_tests_minimal hello integer vec panic/panic
     MIRI_TEST_TARGET=wasm32-wasi run_tests_minimal no_std integer strings wasm
     MIRI_TEST_TARGET=wasm32-unknown-unknown run_tests_minimal no_std integer strings wasm
diff --git a/src/tools/miri/rust-version b/src/tools/miri/rust-version
index 0335ad3c1d87e..60ae5d12598a6 100644
--- a/src/tools/miri/rust-version
+++ b/src/tools/miri/rust-version
@@ -1 +1 @@
-9e3f784eb2c7c847b6c3578b373c0e0bc9233ca3
+2e4e2a8f288f642cafcc41fff211955ceddc453d
diff --git a/src/tools/miri/src/borrow_tracker/tree_borrows/tree.rs b/src/tools/miri/src/borrow_tracker/tree_borrows/tree.rs
index b63b0bdff1217..4232cd396c95e 100644
--- a/src/tools/miri/src/borrow_tracker/tree_borrows/tree.rs
+++ b/src/tools/miri/src/borrow_tracker/tree_borrows/tree.rs
@@ -661,13 +661,11 @@ impl<'tcx> Tree {
             for (perms_range, perms) in self.rperms.iter_mut_all() {
                 let idx = self.tag_mapping.get(&tag).unwrap();
                 // Only visit initialized permissions
-                if let Some(p) = perms.get(idx) && p.initialized {
-                    TreeVisitor {
-                        nodes: &mut self.nodes,
-                        tag_mapping: &self.tag_mapping,
-                        perms,
-                    }
-                    .traverse_nonchildren(
+                if let Some(p) = perms.get(idx)
+                    && p.initialized
+                {
+                    TreeVisitor { nodes: &mut self.nodes, tag_mapping: &self.tag_mapping, perms }
+                        .traverse_nonchildren(
                         tag,
                         |args| node_app(perms_range.clone(), args),
                         |args| err_handler(perms_range.clone(), args),
diff --git a/src/tools/miri/src/concurrency/data_race.rs b/src/tools/miri/src/concurrency/data_race.rs
index bec2972c50d6d..4cab86af8861f 100644
--- a/src/tools/miri/src/concurrency/data_race.rs
+++ b/src/tools/miri/src/concurrency/data_race.rs
@@ -41,6 +41,7 @@
 //! on the data-race detection code.
 
 use std::{
+    borrow::Cow,
     cell::{Cell, Ref, RefCell, RefMut},
     fmt::Debug,
     mem,
@@ -167,7 +168,7 @@ pub struct DataRace;
 /// explicitly to reduce memory usage for the
 /// common case where no atomic operations
 /// exists on the memory cell.
-#[derive(Clone, PartialEq, Eq, Default, Debug)]
+#[derive(Clone, PartialEq, Eq, Debug)]
 struct AtomicMemoryCellClocks {
     /// The clock-vector of the timestamp of the last atomic
     /// read operation performed by each thread.
@@ -186,6 +187,11 @@ struct AtomicMemoryCellClocks {
     /// happen-before a thread if an acquire-load is
     /// performed on the data.
     sync_vector: VClock,
+
+    /// The size of accesses to this atomic location.
+    /// We use this to detect non-synchronized mixed-size accesses. Since all accesses must be
+    /// aligned to their size, this is sufficient to detect imperfectly overlapping accesses.
+    size: Size,
 }
 
 /// Type of write operation: allocating memory
@@ -220,54 +226,101 @@ impl WriteType {
 /// for data-race detection.
 #[derive(Clone, PartialEq, Eq, Debug)]
 struct MemoryCellClocks {
-    /// The vector-clock timestamp of the last write
-    /// corresponding to the writing threads timestamp.
-    write: VTimestamp,
-
-    /// The identifier of the vector index, corresponding to a thread
-    /// that performed the last write operation.
-    write_index: VectorIdx,
+    /// The vector-clock timestamp and the thread that did the last non-atomic write. We don't need
+    /// a full `VClock` here, it's always a single thread and nothing synchronizes, so the effective
+    /// clock is all-0 except for the thread that did the write.
+    write: (VectorIdx, VTimestamp),
 
     /// The type of operation that the write index represents,
     /// either newly allocated memory, a non-atomic write or
     /// a deallocation of memory.
     write_type: WriteType,
 
-    /// The vector-clock of the timestamp of the last read operation
-    /// performed by a thread since the last write operation occurred.
-    /// It is reset to zero on each write operation.
+    /// The vector-clock of all non-atomic reads that happened since the last non-atomic write
+    /// (i.e., we join together the "singleton" clocks corresponding to each read). It is reset to
+    /// zero on each write operation.
     read: VClock,
 
-    /// Atomic acquire & release sequence tracking clocks.
+    /// Atomic access, acquire, release sequence tracking clocks.
     /// For non-atomic memory in the common case this
     /// value is set to None.
     atomic_ops: Option<Box<AtomicMemoryCellClocks>>,
 }
 
+impl AtomicMemoryCellClocks {
+    fn new(size: Size) -> Self {
+        AtomicMemoryCellClocks {
+            read_vector: Default::default(),
+            write_vector: Default::default(),
+            sync_vector: Default::default(),
+            size,
+        }
+    }
+}
+
 impl MemoryCellClocks {
     /// Create a new set of clocks representing memory allocated
     ///  at a given vector timestamp and index.
     fn new(alloc: VTimestamp, alloc_index: VectorIdx) -> Self {
         MemoryCellClocks {
             read: VClock::default(),
-            write: alloc,
-            write_index: alloc_index,
+            write: (alloc_index, alloc),
             write_type: WriteType::Allocate,
             atomic_ops: None,
         }
     }
 
+    #[inline]
+    fn write_was_before(&self, other: &VClock) -> bool {
+        // This is the same as `self.write() <= other` but
+        // without actually manifesting a clock for `self.write`.
+        self.write.1 <= other[self.write.0]
+    }
+
+    #[inline]
+    fn write(&self) -> VClock {
+        VClock::new_with_index(self.write.0, self.write.1)
+    }
+
     /// Load the internal atomic memory cells if they exist.
     #[inline]
     fn atomic(&self) -> Option<&AtomicMemoryCellClocks> {
         self.atomic_ops.as_deref()
     }
 
-    /// Load or create the internal atomic memory metadata
-    /// if it does not exist.
+    /// Load the internal atomic memory cells if they exist.
     #[inline]
-    fn atomic_mut(&mut self) -> &mut AtomicMemoryCellClocks {
-        self.atomic_ops.get_or_insert_with(Default::default)
+    fn atomic_mut_unwrap(&mut self) -> &mut AtomicMemoryCellClocks {
+        self.atomic_ops.as_deref_mut().unwrap()
+    }
+
+    /// Load or create the internal atomic memory metadata if it does not exist. Also ensures we do
+    /// not do mixed-size atomic accesses, and updates the recorded atomic access size.
+    fn atomic_access(
+        &mut self,
+        thread_clocks: &ThreadClockSet,
+        size: Size,
+    ) -> Result<&mut AtomicMemoryCellClocks, DataRace> {
+        match self.atomic_ops {
+            Some(ref mut atomic) => {
+                // We are good if the size is the same or all atomic accesses are before our current time.
+                if atomic.size == size {
+                    Ok(atomic)
+                } else if atomic.read_vector <= thread_clocks.clock
+                    && atomic.write_vector <= thread_clocks.clock
+                {
+                    // This is now the new size that must be used for accesses here.
+                    atomic.size = size;
+                    Ok(atomic)
+                } else {
+                    Err(DataRace)
+                }
+            }
+            None => {
+                self.atomic_ops = Some(Box::new(AtomicMemoryCellClocks::new(size)));
+                Ok(self.atomic_ops.as_mut().unwrap())
+            }
+        }
     }
 
     /// Update memory cell data-race tracking for atomic
@@ -277,23 +330,15 @@ impl MemoryCellClocks {
         &mut self,
         thread_clocks: &mut ThreadClockSet,
         index: VectorIdx,
+        access_size: Size,
     ) -> Result<(), DataRace> {
-        self.atomic_read_detect(thread_clocks, index)?;
+        self.atomic_read_detect(thread_clocks, index, access_size)?;
         if let Some(atomic) = self.atomic() {
             thread_clocks.clock.join(&atomic.sync_vector);
         }
         Ok(())
     }
 
-    /// Checks if the memory cell access is ordered with all prior atomic reads and writes
-    fn race_free_with_atomic(&self, thread_clocks: &ThreadClockSet) -> bool {
-        if let Some(atomic) = self.atomic() {
-            atomic.read_vector <= thread_clocks.clock && atomic.write_vector <= thread_clocks.clock
-        } else {
-            true
-        }
-    }
-
     /// Update memory cell data-race tracking for atomic
     /// load relaxed semantics, is a no-op if this memory was
     /// not used previously as atomic memory.
@@ -301,8 +346,9 @@ impl MemoryCellClocks {
         &mut self,
         thread_clocks: &mut ThreadClockSet,
         index: VectorIdx,
+        access_size: Size,
     ) -> Result<(), DataRace> {
-        self.atomic_read_detect(thread_clocks, index)?;
+        self.atomic_read_detect(thread_clocks, index, access_size)?;
         if let Some(atomic) = self.atomic() {
             thread_clocks.fence_acquire.join(&atomic.sync_vector);
         }
@@ -315,9 +361,10 @@ impl MemoryCellClocks {
         &mut self,
         thread_clocks: &ThreadClockSet,
         index: VectorIdx,
+        access_size: Size,
     ) -> Result<(), DataRace> {
-        self.atomic_write_detect(thread_clocks, index)?;
-        let atomic = self.atomic_mut();
+        self.atomic_write_detect(thread_clocks, index, access_size)?;
+        let atomic = self.atomic_mut_unwrap(); // initialized by `atomic_write_detect`
         atomic.sync_vector.clone_from(&thread_clocks.clock);
         Ok(())
     }
@@ -328,14 +375,15 @@ impl MemoryCellClocks {
         &mut self,
         thread_clocks: &ThreadClockSet,
         index: VectorIdx,
+        access_size: Size,
     ) -> Result<(), DataRace> {
-        self.atomic_write_detect(thread_clocks, index)?;
+        self.atomic_write_detect(thread_clocks, index, access_size)?;
 
         // The handling of release sequences was changed in C++20 and so
         // the code here is different to the paper since now all relaxed
         // stores block release sequences. The exception for same-thread
         // relaxed stores has been removed.
-        let atomic = self.atomic_mut();
+        let atomic = self.atomic_mut_unwrap();
         atomic.sync_vector.clone_from(&thread_clocks.fence_release);
         Ok(())
     }
@@ -346,9 +394,10 @@ impl MemoryCellClocks {
         &mut self,
         thread_clocks: &ThreadClockSet,
         index: VectorIdx,
+        access_size: Size,
     ) -> Result<(), DataRace> {
-        self.atomic_write_detect(thread_clocks, index)?;
-        let atomic = self.atomic_mut();
+        self.atomic_write_detect(thread_clocks, index, access_size)?;
+        let atomic = self.atomic_mut_unwrap();
         atomic.sync_vector.join(&thread_clocks.clock);
         Ok(())
     }
@@ -359,24 +408,31 @@ impl MemoryCellClocks {
         &mut self,
         thread_clocks: &ThreadClockSet,
         index: VectorIdx,
+        access_size: Size,
     ) -> Result<(), DataRace> {
-        self.atomic_write_detect(thread_clocks, index)?;
-        let atomic = self.atomic_mut();
+        self.atomic_write_detect(thread_clocks, index, access_size)?;
+        let atomic = self.atomic_mut_unwrap();
         atomic.sync_vector.join(&thread_clocks.fence_release);
         Ok(())
     }
 
-    /// Detect data-races with an atomic read, caused by a non-atomic write that does
+    /// Detect data-races with an atomic read, caused by a non-atomic access that does
     /// not happen-before the atomic-read.
     fn atomic_read_detect(
         &mut self,
         thread_clocks: &ThreadClockSet,
         index: VectorIdx,
+        access_size: Size,
     ) -> Result<(), DataRace> {
         log::trace!("Atomic read with vectors: {:#?} :: {:#?}", self, thread_clocks);
-        let atomic = self.atomic_mut();
+        let atomic = self.atomic_access(thread_clocks, access_size)?;
         atomic.read_vector.set_at_index(&thread_clocks.clock, index);
-        if self.write <= thread_clocks.clock[self.write_index] { Ok(()) } else { Err(DataRace) }
+        // Make sure the last non-atomic write and all non-atomic reads were before this access.
+        if self.write_was_before(&thread_clocks.clock) && self.read <= thread_clocks.clock {
+            Ok(())
+        } else {
+            Err(DataRace)
+        }
     }
 
     /// Detect data-races with an atomic write, either with a non-atomic read or with
@@ -385,11 +441,13 @@ impl MemoryCellClocks {
         &mut self,
         thread_clocks: &ThreadClockSet,
         index: VectorIdx,
+        access_size: Size,
     ) -> Result<(), DataRace> {
         log::trace!("Atomic write with vectors: {:#?} :: {:#?}", self, thread_clocks);
-        let atomic = self.atomic_mut();
+        let atomic = self.atomic_access(thread_clocks, access_size)?;
         atomic.write_vector.set_at_index(&thread_clocks.clock, index);
-        if self.write <= thread_clocks.clock[self.write_index] && self.read <= thread_clocks.clock {
+        // Make sure the last non-atomic write and all non-atomic reads were before this access.
+        if self.write_was_before(&thread_clocks.clock) && self.read <= thread_clocks.clock {
             Ok(())
         } else {
             Err(DataRace)
@@ -408,9 +466,12 @@ impl MemoryCellClocks {
         if !current_span.is_dummy() {
             thread_clocks.clock[index].span = current_span;
         }
-        if self.write <= thread_clocks.clock[self.write_index] {
+        if self.write_was_before(&thread_clocks.clock) {
             let race_free = if let Some(atomic) = self.atomic() {
+                // We must be ordered-after all atomic accesses, reads and writes.
+                // This ensures we don't mix atomic and non-atomic accesses.
                 atomic.write_vector <= thread_clocks.clock
+                    && atomic.read_vector <= thread_clocks.clock
             } else {
                 true
             };
@@ -434,15 +495,14 @@ impl MemoryCellClocks {
         if !current_span.is_dummy() {
             thread_clocks.clock[index].span = current_span;
         }
-        if self.write <= thread_clocks.clock[self.write_index] && self.read <= thread_clocks.clock {
+        if self.write_was_before(&thread_clocks.clock) && self.read <= thread_clocks.clock {
             let race_free = if let Some(atomic) = self.atomic() {
                 atomic.write_vector <= thread_clocks.clock
                     && atomic.read_vector <= thread_clocks.clock
             } else {
                 true
             };
-            self.write = thread_clocks.clock[index];
-            self.write_index = index;
+            self.write = (index, thread_clocks.clock[index]);
             self.write_type = write_type;
             if race_free {
                 self.read.set_zero_vector();
@@ -473,7 +533,6 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriInterpCxExt<'mir, 'tcx> {
         // the *value* (including the associated provenance if this is an AtomicPtr) at this location.
         // Only metadata on the location itself is used.
         let scalar = this.allow_data_races_ref(move |this| this.read_scalar(place))?;
-        this.validate_overlapping_atomic(place)?;
         this.buffered_atomic_read(place, atomic, scalar, || {
             this.validate_atomic_load(place, atomic)
         })
@@ -489,7 +548,6 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriInterpCxExt<'mir, 'tcx> {
         let this = self.eval_context_mut();
         this.atomic_access_check(dest)?;
 
-        this.validate_overlapping_atomic(dest)?;
         this.allow_data_races_mut(move |this| this.write_scalar(val, dest))?;
         this.validate_atomic_store(dest, atomic)?;
         // FIXME: it's not possible to get the value before write_scalar. A read_scalar will cause
@@ -512,7 +570,6 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriInterpCxExt<'mir, 'tcx> {
         let this = self.eval_context_mut();
         this.atomic_access_check(place)?;
 
-        this.validate_overlapping_atomic(place)?;
         let old = this.allow_data_races_mut(|this| this.read_immediate(place))?;
 
         // Atomics wrap around on overflow.
@@ -537,7 +594,6 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriInterpCxExt<'mir, 'tcx> {
         let this = self.eval_context_mut();
         this.atomic_access_check(place)?;
 
-        this.validate_overlapping_atomic(place)?;
         let old = this.allow_data_races_mut(|this| this.read_scalar(place))?;
         this.allow_data_races_mut(|this| this.write_scalar(new, place))?;
 
@@ -559,10 +615,10 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriInterpCxExt<'mir, 'tcx> {
         let this = self.eval_context_mut();
         this.atomic_access_check(place)?;
 
-        this.validate_overlapping_atomic(place)?;
         let old = this.allow_data_races_mut(|this| this.read_immediate(place))?;
         let lt = this.wrapping_binary_op(mir::BinOp::Lt, &old, &rhs)?.to_scalar().to_bool()?;
 
+        #[rustfmt::skip] // rustfmt makes this unreadable
         let new_val = if min {
             if lt { &old } else { &rhs }
         } else {
@@ -598,7 +654,6 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: MiriInterpCxExt<'mir, 'tcx> {
         let this = self.eval_context_mut();
         this.atomic_access_check(place)?;
 
-        this.validate_overlapping_atomic(place)?;
         // Failure ordering cannot be stronger than success ordering, therefore first attempt
         // to read with the failure ordering and if successful then try again with the success
         // read ordering and write in the success case.
@@ -785,41 +840,54 @@ impl VClockAlloc {
         mem_clocks: &MemoryCellClocks,
         action: &str,
         is_atomic: bool,
+        access_size: Size,
         ptr_dbg: Pointer<AllocId>,
     ) -> InterpResult<'tcx> {
         let (current_index, current_clocks) = global.current_thread_state(thread_mgr);
+        let mut action = Cow::Borrowed(action);
+        let mut involves_non_atomic = true;
         let write_clock;
-        let (other_action, other_thread, other_clock) = if mem_clocks.write
-            > current_clocks.clock[mem_clocks.write_index]
-        {
-            // Convert the write action into the vector clock it
-            // represents for diagnostic purposes.
-            write_clock = VClock::new_with_index(mem_clocks.write_index, mem_clocks.write);
-            (mem_clocks.write_type.get_descriptor(), mem_clocks.write_index, &write_clock)
-        } else if let Some(idx) = Self::find_gt_index(&mem_clocks.read, &current_clocks.clock) {
-            ("Read", idx, &mem_clocks.read)
-        } else if !is_atomic {
-            if let Some(atomic) = mem_clocks.atomic() {
+        let (other_action, other_thread, other_clock) =
+            // First check the atomic-nonatomic cases. If it looks like multiple
+            // cases apply, this one should take precedence, else it might look like
+            // we are reporting races between two non-atomic reads.
+            if !is_atomic &&
+                let Some(atomic) = mem_clocks.atomic() &&
+                let Some(idx) = Self::find_gt_index(&atomic.write_vector, &current_clocks.clock)
+            {
+                (format!("Atomic Store"), idx, &atomic.write_vector)
+            } else if !is_atomic &&
+                let Some(atomic) = mem_clocks.atomic() &&
+                let Some(idx) = Self::find_gt_index(&atomic.read_vector, &current_clocks.clock)
+            {
+                (format!("Atomic Load"), idx, &atomic.read_vector)
+            // Then check races with non-atomic writes/reads.
+            } else if mem_clocks.write.1 > current_clocks.clock[mem_clocks.write.0] {
+                write_clock = mem_clocks.write();
+                (mem_clocks.write_type.get_descriptor().to_owned(), mem_clocks.write.0, &write_clock)
+            } else if let Some(idx) = Self::find_gt_index(&mem_clocks.read, &current_clocks.clock) {
+                (format!("Read"), idx, &mem_clocks.read)
+            // Finally, mixed-size races.
+            } else if is_atomic && let Some(atomic) = mem_clocks.atomic() && atomic.size != access_size {
+                // This is only a race if we are not synchronized with all atomic accesses, so find
+                // the one we are not synchronized with.
+                involves_non_atomic = false;
+                action = format!("{}-byte (different-size) {action}", access_size.bytes()).into();
                 if let Some(idx) = Self::find_gt_index(&atomic.write_vector, &current_clocks.clock)
-                {
-                    ("Atomic Store", idx, &atomic.write_vector)
-                } else if let Some(idx) =
-                    Self::find_gt_index(&atomic.read_vector, &current_clocks.clock)
-                {
-                    ("Atomic Load", idx, &atomic.read_vector)
-                } else {
-                    unreachable!(
-                        "Failed to report data-race for non-atomic operation: no race found"
-                    )
-                }
+                    {
+                        (format!("{}-byte Atomic Store", atomic.size.bytes()), idx, &atomic.write_vector)
+                    } else if let Some(idx) =
+                        Self::find_gt_index(&atomic.read_vector, &current_clocks.clock)
+                    {
+                        (format!("{}-byte Atomic Load", atomic.size.bytes()), idx, &atomic.read_vector)
+                    } else {
+                        unreachable!(
+                            "Failed to report data-race for mixed-size access: no race found"
+                        )
+                    }
             } else {
-                unreachable!(
-                    "Failed to report data-race for non-atomic operation: no atomic component"
-                )
-            }
-        } else {
-            unreachable!("Failed to report data-race for atomic operation")
-        };
+                unreachable!("Failed to report data-race")
+            };
 
         // Load elaborated thread information about the racing thread actions.
         let current_thread_info = global.print_thread_metadata(thread_mgr, current_index);
@@ -827,6 +895,7 @@ impl VClockAlloc {
 
         // Throw the data-race detection.
         Err(err_machine_stop!(TerminationInfo::DataRace {
+            involves_non_atomic,
             ptr: ptr_dbg,
             op1: RacingOp {
                 action: other_action.to_string(),
@@ -841,26 +910,6 @@ impl VClockAlloc {
         }))?
     }
 
-    /// Detect racing atomic read and writes (not data races)
-    /// on every byte of the current access range
-    pub(super) fn race_free_with_atomic(
-        &self,
-        range: AllocRange,
-        global: &GlobalState,
-        thread_mgr: &ThreadManager<'_, '_>,
-    ) -> bool {
-        if global.race_detecting() {
-            let (_, thread_clocks) = global.current_thread_state(thread_mgr);
-            let alloc_ranges = self.alloc_ranges.borrow();
-            for (_, mem_clocks) in alloc_ranges.iter(range.start, range.size) {
-                if !mem_clocks.race_free_with_atomic(&thread_clocks) {
-                    return false;
-                }
-            }
-        }
-        true
-    }
-
     /// Detect data-races for an unsynchronized read operation, will not perform
     /// data-race detection if `race_detecting()` is false, either due to no threads
     /// being created or if it is temporarily disabled during a racy read or write
@@ -890,7 +939,8 @@ impl VClockAlloc {
                         &machine.threads,
                         mem_clocks,
                         "Read",
-                        false,
+                        /* is_atomic */ false,
+                        access_range.size,
                         Pointer::new(alloc_id, Size::from_bytes(mem_clocks_range.start)),
                     );
                 }
@@ -929,7 +979,8 @@ impl VClockAlloc {
                         &machine.threads,
                         mem_clocks,
                         write_type.get_descriptor(),
-                        false,
+                        /* is_atomic */ false,
+                        access_range.size,
                         Pointer::new(alloc_id, Size::from_bytes(mem_clocks_range.start)),
                     );
                 }
@@ -1050,16 +1101,15 @@ trait EvalContextPrivExt<'mir, 'tcx: 'mir>: MiriInterpCxExt<'mir, 'tcx> {
         atomic: AtomicReadOrd,
     ) -> InterpResult<'tcx> {
         let this = self.eval_context_ref();
-        this.validate_overlapping_atomic(place)?;
         this.validate_atomic_op(
             place,
             atomic,
             "Atomic Load",
             move |memory, clocks, index, atomic| {
                 if atomic == AtomicReadOrd::Relaxed {
-                    memory.load_relaxed(&mut *clocks, index)
+                    memory.load_relaxed(&mut *clocks, index, place.layout.size)
                 } else {
-                    memory.load_acquire(&mut *clocks, index)
+                    memory.load_acquire(&mut *clocks, index, place.layout.size)
                 }
             },
         )
@@ -1073,16 +1123,15 @@ trait EvalContextPrivExt<'mir, 'tcx: 'mir>: MiriInterpCxExt<'mir, 'tcx> {
         atomic: AtomicWriteOrd,
     ) -> InterpResult<'tcx> {
         let this = self.eval_context_mut();
-        this.validate_overlapping_atomic(place)?;
         this.validate_atomic_op(
             place,
             atomic,
             "Atomic Store",
             move |memory, clocks, index, atomic| {
                 if atomic == AtomicWriteOrd::Relaxed {
-                    memory.store_relaxed(clocks, index)
+                    memory.store_relaxed(clocks, index, place.layout.size)
                 } else {
-                    memory.store_release(clocks, index)
+                    memory.store_release(clocks, index, place.layout.size)
                 }
             },
         )
@@ -1099,17 +1148,16 @@ trait EvalContextPrivExt<'mir, 'tcx: 'mir>: MiriInterpCxExt<'mir, 'tcx> {
         let acquire = matches!(atomic, Acquire | AcqRel | SeqCst);
         let release = matches!(atomic, Release | AcqRel | SeqCst);
         let this = self.eval_context_mut();
-        this.validate_overlapping_atomic(place)?;
         this.validate_atomic_op(place, atomic, "Atomic RMW", move |memory, clocks, index, _| {
             if acquire {
-                memory.load_acquire(clocks, index)?;
+                memory.load_acquire(clocks, index, place.layout.size)?;
             } else {
-                memory.load_relaxed(clocks, index)?;
+                memory.load_relaxed(clocks, index, place.layout.size)?;
             }
             if release {
-                memory.rmw_release(clocks, index)
+                memory.rmw_release(clocks, index, place.layout.size)
             } else {
-                memory.rmw_relaxed(clocks, index)
+                memory.rmw_relaxed(clocks, index, place.layout.size)
             }
         })
     }
@@ -1160,7 +1208,8 @@ trait EvalContextPrivExt<'mir, 'tcx: 'mir>: MiriInterpCxExt<'mir, 'tcx> {
                                     &this.machine.threads,
                                     mem_clocks,
                                     description,
-                                    true,
+                                    /* is_atomic */ true,
+                                    place.layout.size,
                                     Pointer::new(
                                         alloc_id,
                                         Size::from_bytes(mem_clocks_range.start),
diff --git a/src/tools/miri/src/concurrency/weak_memory.rs b/src/tools/miri/src/concurrency/weak_memory.rs
index 6781b1f6dd382..2ff344bb1a35f 100644
--- a/src/tools/miri/src/concurrency/weak_memory.rs
+++ b/src/tools/miri/src/concurrency/weak_memory.rs
@@ -169,14 +169,6 @@ impl StoreBufferAlloc {
         Self { store_buffers: RefCell::new(RangeObjectMap::new()) }
     }
 
-    /// Checks if the range imperfectly overlaps with existing buffers
-    /// Used to determine if mixed-size atomic accesses
-    fn is_overlapping(&self, range: AllocRange) -> bool {
-        let buffers = self.store_buffers.borrow();
-        let access_type = buffers.access_type(range);
-        matches!(access_type, AccessType::ImperfectlyOverlapping(_))
-    }
-
     /// When a non-atomic access happens on a location that has been atomically accessed
     /// before without data race, we can determine that the non-atomic access fully happens
     /// after all the prior atomic accesses so the location no longer needs to exhibit
@@ -190,6 +182,8 @@ impl StoreBufferAlloc {
                     buffers.remove_from_pos(pos);
                 }
                 AccessType::ImperfectlyOverlapping(pos_range) => {
+                    // We rely on the data-race check making sure this is synchronized.
+                    // Therefore we can forget about the old data here.
                     buffers.remove_pos_range(pos_range);
                 }
                 AccessType::Empty(_) => {
@@ -215,7 +209,7 @@ impl StoreBufferAlloc {
                 pos
             }
             AccessType::ImperfectlyOverlapping(pos_range) => {
-                // Once we reach here we would've already checked that this access is not racy
+                // Once we reach here we would've already checked that this access is not racy.
                 let mut buffers = self.store_buffers.borrow_mut();
                 buffers.remove_pos_range(pos_range.clone());
                 buffers.insert_at_pos(pos_range.start, range, StoreBuffer::new(init));
@@ -240,6 +234,7 @@ impl StoreBufferAlloc {
                 pos
             }
             AccessType::ImperfectlyOverlapping(pos_range) => {
+                // Once we reach here we would've already checked that this access is not racy.
                 buffers.remove_pos_range(pos_range.clone());
                 buffers.insert_at_pos(pos_range.start, range, StoreBuffer::new(init));
                 pos_range.start
@@ -473,37 +468,6 @@ impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir,
 pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
     crate::MiriInterpCxExt<'mir, 'tcx>
 {
-    // If weak memory emulation is enabled, check if this atomic op imperfectly overlaps with a previous
-    // atomic read or write. If it does, then we require it to be ordered (non-racy) with all previous atomic
-    // accesses on all the bytes in range
-    fn validate_overlapping_atomic(
-        &self,
-        place: &MPlaceTy<'tcx, Provenance>,
-    ) -> InterpResult<'tcx> {
-        let this = self.eval_context_ref();
-        let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(place.ptr())?;
-        if let crate::AllocExtra {
-            weak_memory: Some(alloc_buffers),
-            data_race: Some(alloc_clocks),
-            ..
-        } = this.get_alloc_extra(alloc_id)?
-        {
-            let range = alloc_range(base_offset, place.layout.size);
-            if alloc_buffers.is_overlapping(range)
-                && !alloc_clocks.race_free_with_atomic(
-                    range,
-                    this.machine.data_race.as_ref().unwrap(),
-                    &this.machine.threads,
-                )
-            {
-                throw_unsup_format!(
-                    "racy imperfectly overlapping atomic access is not possible in the C++20 memory model, and not supported by Miri's weak memory emulation"
-                );
-            }
-        }
-        Ok(())
-    }
-
     fn buffered_atomic_rmw(
         &mut self,
         new_val: Scalar<Provenance>,
diff --git a/src/tools/miri/src/diagnostics.rs b/src/tools/miri/src/diagnostics.rs
index cb095f94f3598..9b8f263b7ce15 100644
--- a/src/tools/miri/src/diagnostics.rs
+++ b/src/tools/miri/src/diagnostics.rs
@@ -43,9 +43,10 @@ pub enum TerminationInfo {
         span: SpanData,
     },
     DataRace {
+        involves_non_atomic: bool,
+        ptr: Pointer,
         op1: RacingOp,
         op2: RacingOp,
-        ptr: Pointer,
     },
 }
 
@@ -74,11 +75,15 @@ impl fmt::Display for TerminationInfo {
                 write!(f, "multiple definitions of symbol `{link_name}`"),
             SymbolShimClashing { link_name, .. } =>
                 write!(f, "found `{link_name}` symbol definition that clashes with a built-in shim",),
-            DataRace { ptr, op1, op2 } =>
+            DataRace { involves_non_atomic, ptr, op1, op2 } =>
                 write!(
                     f,
-                    "Data race detected between (1) {} on {} and (2) {} on {} at {ptr:?}. (2) just happened here",
-                    op1.action, op1.thread_info, op2.action, op2.thread_info
+                    "{} detected between (1) {} on {} and (2) {} on {} at {ptr:?}. (2) just happened here",
+                    if *involves_non_atomic { "Data race" } else { "Race condition" },
+                    op1.action,
+                    op1.thread_info,
+                    op2.action,
+                    op2.thread_info
                 ),
         }
     }
diff --git a/src/tools/miri/src/intptrcast.rs b/src/tools/miri/src/intptrcast.rs
index ab6a256f71450..9966ee3fd919e 100644
--- a/src/tools/miri/src/intptrcast.rs
+++ b/src/tools/miri/src/intptrcast.rs
@@ -27,9 +27,9 @@ pub type GlobalState = RefCell<GlobalStateInner>;
 #[derive(Clone, Debug)]
 pub struct GlobalStateInner {
     /// This is used as a map between the address of each allocation and its `AllocId`. It is always
-    /// sorted. We cannot use a `HashMap` since we can be given an address that is offset from the
-    /// base address, and we need to find the `AllocId` it belongs to.
-    /// This is not the *full* inverse of `base_addr`; dead allocations have been removed.
+    /// sorted by address. We cannot use a `HashMap` since we can be given an address that is offset
+    /// from the base address, and we need to find the `AllocId` it belongs to. This is not the
+    /// *full* inverse of `base_addr`; dead allocations have been removed.
     int_to_ptr_map: Vec<(u64, AllocId)>,
     /// The base address for each allocation.  We cannot put that into
     /// `AllocExtra` because function pointers also have a base address, and
@@ -285,7 +285,12 @@ impl GlobalStateInner {
         // However, we *can* remove it from `int_to_ptr_map`, since any wildcard pointers that exist
         // can no longer actually be accessing that address. This ensures `alloc_id_from_addr` never
         // returns a dead allocation.
-        self.int_to_ptr_map.retain(|&(_, id)| id != dead_id);
+        // To avoid a linear scan we first look up the address in `base_addr`, and then find it in
+        // `int_to_ptr_map`.
+        let addr = *self.base_addr.get(&dead_id).unwrap();
+        let pos = self.int_to_ptr_map.binary_search_by_key(&addr, |(addr, _)| *addr).unwrap();
+        let removed = self.int_to_ptr_map.remove(pos);
+        assert_eq!(removed, (addr, dead_id)); // double-check that we removed the right thing
         // We can also remove it from `exposed`, since this allocation can anyway not be returned by
         // `alloc_id_from_addr` any more.
         self.exposed.remove(&dead_id);
diff --git a/src/tools/miri/src/shims/time.rs b/src/tools/miri/src/shims/time.rs
index d6d0483f5e3ac..4918698c6b277 100644
--- a/src/tools/miri/src/shims/time.rs
+++ b/src/tools/miri/src/shims/time.rs
@@ -51,13 +51,10 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
             "macos" => {
                 absolute_clocks = vec![this.eval_libc_i32("CLOCK_REALTIME")];
                 relative_clocks = vec![this.eval_libc_i32("CLOCK_MONOTONIC")];
-                // Some clocks only seem to exist in the aarch64 version of the target.
-                if this.tcx.sess.target.arch == "aarch64" {
-                    // `CLOCK_UPTIME_RAW` supposed to not increment while the system is asleep... but
-                    // that's not really something a program running inside Miri can tell, anyway.
-                    // We need to support it because std uses it.
-                    relative_clocks.push(this.eval_libc_i32("CLOCK_UPTIME_RAW"));
-                }
+                // `CLOCK_UPTIME_RAW` supposed to not increment while the system is asleep... but
+                // that's not really something a program running inside Miri can tell, anyway.
+                // We need to support it because std uses it.
+                relative_clocks.push(this.eval_libc_i32("CLOCK_UPTIME_RAW"));
             }
             target => throw_unsup_format!("`clock_gettime` is not supported on target OS {target}"),
         }
@@ -68,7 +65,6 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
         } else if relative_clocks.contains(&clk_id) {
             this.machine.clock.now().duration_since(this.machine.clock.anchor())
         } else {
-            // Unsupported clock.
             let einval = this.eval_libc("EINVAL");
             this.set_last_error(einval)?;
             return Ok(Scalar::from_i32(-1));
diff --git a/src/tools/miri/src/shims/windows/foreign_items.rs b/src/tools/miri/src/shims/windows/foreign_items.rs
index c887e18e97e8d..bddc30b8379b2 100644
--- a/src/tools/miri/src/shims/windows/foreign_items.rs
+++ b/src/tools/miri/src/shims/windows/foreign_items.rs
@@ -363,7 +363,9 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
                     this.check_shim(abi, Abi::System { unwind: false }, link_name, args)?;
                 this.read_target_isize(hModule)?;
                 let name = this.read_c_str(this.read_pointer(lpProcName)?)?;
-                if let Ok(name) = str::from_utf8(name) && is_dyn_sym(name) {
+                if let Ok(name) = str::from_utf8(name)
+                    && is_dyn_sym(name)
+                {
                     let ptr = this.fn_ptr(FnVal::Other(DynSym::from_str(name)));
                     this.write_pointer(ptr, dest)?;
                 } else {
diff --git a/src/tools/miri/tests/fail/data_race/mixed_size_read.rs b/src/tools/miri/tests/fail/data_race/mixed_size_read.rs
new file mode 100644
index 0000000000000..d530ed2f5a406
--- /dev/null
+++ b/src/tools/miri/tests/fail/data_race/mixed_size_read.rs
@@ -0,0 +1,25 @@
+//@compile-flags: -Zmiri-preemption-rate=0.0 -Zmiri-disable-weak-memory-emulation
+use std::sync::atomic::{AtomicU16, AtomicU8, Ordering};
+use std::thread;
+
+fn convert(a: &AtomicU16) -> &[AtomicU8; 2] {
+    unsafe { std::mem::transmute(a) }
+}
+
+// We can't allow mixed-size accesses; they are not possible in C++ and even
+// Intel says you shouldn't do it.
+fn main() {
+    let a = AtomicU16::new(0);
+    let a16 = &a;
+    let a8 = convert(a16);
+
+    thread::scope(|s| {
+        s.spawn(|| {
+            a16.load(Ordering::SeqCst);
+        });
+        s.spawn(|| {
+            a8[0].load(Ordering::SeqCst);
+            //~^ ERROR: Race condition detected between (1) 2-byte Atomic Load on thread `<unnamed>` and (2) 1-byte (different-size) Atomic Load on thread `<unnamed>`
+        });
+    });
+}
diff --git a/src/tools/miri/tests/fail/data_race/mixed_size_read.stderr b/src/tools/miri/tests/fail/data_race/mixed_size_read.stderr
new file mode 100644
index 0000000000000..06944a11db859
--- /dev/null
+++ b/src/tools/miri/tests/fail/data_race/mixed_size_read.stderr
@@ -0,0 +1,20 @@
+error: Undefined Behavior: Race condition detected between (1) 2-byte Atomic Load on thread `<unnamed>` and (2) 1-byte (different-size) Atomic Load on thread `<unnamed>` at ALLOC. (2) just happened here
+  --> $DIR/mixed_size_read.rs:LL:CC
+   |
+LL |             a8[0].load(Ordering::SeqCst);
+   |             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Race condition detected between (1) 2-byte Atomic Load on thread `<unnamed>` and (2) 1-byte (different-size) Atomic Load on thread `<unnamed>` at ALLOC. (2) just happened here
+   |
+help: and (1) occurred earlier here
+  --> $DIR/mixed_size_read.rs:LL:CC
+   |
+LL |             a16.load(Ordering::SeqCst);
+   |             ^^^^^^^^^^^^^^^^^^^^^^^^^^
+   = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
+   = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
+   = note: BACKTRACE (of the first span):
+   = note: inside closure at $DIR/mixed_size_read.rs:LL:CC
+
+note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
+
+error: aborting due to previous error
+
diff --git a/src/tools/miri/tests/fail/data_race/mixed_size_write.rs b/src/tools/miri/tests/fail/data_race/mixed_size_write.rs
new file mode 100644
index 0000000000000..df3551612c3e4
--- /dev/null
+++ b/src/tools/miri/tests/fail/data_race/mixed_size_write.rs
@@ -0,0 +1,25 @@
+//@compile-flags: -Zmiri-preemption-rate=0.0 -Zmiri-disable-weak-memory-emulation
+use std::sync::atomic::{AtomicU16, AtomicU8, Ordering};
+use std::thread;
+
+fn convert(a: &AtomicU16) -> &[AtomicU8; 2] {
+    unsafe { std::mem::transmute(a) }
+}
+
+// We can't allow mixed-size accesses; they are not possible in C++ and even
+// Intel says you shouldn't do it.
+fn main() {
+    let a = AtomicU16::new(0);
+    let a16 = &a;
+    let a8 = convert(a16);
+
+    thread::scope(|s| {
+        s.spawn(|| {
+            a16.store(1, Ordering::SeqCst);
+        });
+        s.spawn(|| {
+            a8[0].store(1, Ordering::SeqCst);
+            //~^ ERROR: Race condition detected between (1) 2-byte Atomic Store on thread `<unnamed>` and (2) 1-byte (different-size) Atomic Store on thread `<unnamed>`
+        });
+    });
+}
diff --git a/src/tools/miri/tests/fail/data_race/mixed_size_write.stderr b/src/tools/miri/tests/fail/data_race/mixed_size_write.stderr
new file mode 100644
index 0000000000000..4bb949175bfac
--- /dev/null
+++ b/src/tools/miri/tests/fail/data_race/mixed_size_write.stderr
@@ -0,0 +1,20 @@
+error: Undefined Behavior: Race condition detected between (1) 2-byte Atomic Store on thread `<unnamed>` and (2) 1-byte (different-size) Atomic Store on thread `<unnamed>` at ALLOC. (2) just happened here
+  --> $DIR/mixed_size_write.rs:LL:CC
+   |
+LL |             a8[0].store(1, Ordering::SeqCst);
+   |             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Race condition detected between (1) 2-byte Atomic Store on thread `<unnamed>` and (2) 1-byte (different-size) Atomic Store on thread `<unnamed>` at ALLOC. (2) just happened here
+   |
+help: and (1) occurred earlier here
+  --> $DIR/mixed_size_write.rs:LL:CC
+   |
+LL |             a16.store(1, Ordering::SeqCst);
+   |             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+   = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
+   = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
+   = note: BACKTRACE (of the first span):
+   = note: inside closure at $DIR/mixed_size_write.rs:LL:CC
+
+note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
+
+error: aborting due to previous error
+
diff --git a/src/tools/miri/tests/fail/data_race/read_read_race1.rs b/src/tools/miri/tests/fail/data_race/read_read_race1.rs
new file mode 100644
index 0000000000000..eebfbc74d40af
--- /dev/null
+++ b/src/tools/miri/tests/fail/data_race/read_read_race1.rs
@@ -0,0 +1,27 @@
+//@compile-flags: -Zmiri-preemption-rate=0.0
+use std::sync::atomic::{AtomicU16, Ordering};
+use std::thread;
+
+// Make sure races between atomic and non-atomic reads are detected.
+// This seems harmless but C++ does not allow them, so we can't allow them for now either.
+// This test coverse the case where the non-atomic access come first.
+fn main() {
+    let a = AtomicU16::new(0);
+
+    thread::scope(|s| {
+        s.spawn(|| {
+            let ptr = &a as *const AtomicU16 as *mut u16;
+            unsafe { ptr.read() };
+        });
+        s.spawn(|| {
+            thread::yield_now();
+
+            // We also put a non-atomic access here, but that should *not* be reported.
+            let ptr = &a as *const AtomicU16 as *mut u16;
+            unsafe { ptr.read() };
+            // Then do the atomic access.
+            a.load(Ordering::SeqCst);
+            //~^ ERROR: Data race detected between (1) Read on thread `<unnamed>` and (2) Atomic Load on thread `<unnamed>`
+        });
+    });
+}
diff --git a/src/tools/miri/tests/fail/data_race/read_read_race1.stderr b/src/tools/miri/tests/fail/data_race/read_read_race1.stderr
new file mode 100644
index 0000000000000..158b438bd0d3e
--- /dev/null
+++ b/src/tools/miri/tests/fail/data_race/read_read_race1.stderr
@@ -0,0 +1,20 @@
+error: Undefined Behavior: Data race detected between (1) Read on thread `<unnamed>` and (2) Atomic Load on thread `<unnamed>` at ALLOC. (2) just happened here
+  --> $DIR/read_read_race1.rs:LL:CC
+   |
+LL |             a.load(Ordering::SeqCst);
+   |             ^^^^^^^^^^^^^^^^^^^^^^^^ Data race detected between (1) Read on thread `<unnamed>` and (2) Atomic Load on thread `<unnamed>` at ALLOC. (2) just happened here
+   |
+help: and (1) occurred earlier here
+  --> $DIR/read_read_race1.rs:LL:CC
+   |
+LL |             unsafe { ptr.read() };
+   |                      ^^^^^^^^^^
+   = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
+   = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
+   = note: BACKTRACE (of the first span):
+   = note: inside closure at $DIR/read_read_race1.rs:LL:CC
+
+note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
+
+error: aborting due to previous error
+
diff --git a/src/tools/miri/tests/fail/data_race/read_read_race2.rs b/src/tools/miri/tests/fail/data_race/read_read_race2.rs
new file mode 100644
index 0000000000000..230b429e2878e
--- /dev/null
+++ b/src/tools/miri/tests/fail/data_race/read_read_race2.rs
@@ -0,0 +1,27 @@
+//@compile-flags: -Zmiri-preemption-rate=0.0
+use std::sync::atomic::{AtomicU16, Ordering};
+use std::thread;
+
+// Make sure races between atomic and non-atomic reads are detected.
+// This seems harmless but C++ does not allow them, so we can't allow them for now either.
+// This test coverse the case where the atomic access come first.
+fn main() {
+    let a = AtomicU16::new(0);
+
+    thread::scope(|s| {
+        s.spawn(|| {
+            // We also put a non-atomic access here, but that should *not* be reported.
+            let ptr = &a as *const AtomicU16 as *mut u16;
+            unsafe { ptr.read() };
+            // Then do the atomic access.
+            a.load(Ordering::SeqCst);
+        });
+        s.spawn(|| {
+            thread::yield_now();
+
+            let ptr = &a as *const AtomicU16 as *mut u16;
+            unsafe { ptr.read() };
+            //~^ ERROR: Data race detected between (1) Atomic Load on thread `<unnamed>` and (2) Read on thread `<unnamed>`
+        });
+    });
+}
diff --git a/src/tools/miri/tests/fail/data_race/read_read_race2.stderr b/src/tools/miri/tests/fail/data_race/read_read_race2.stderr
new file mode 100644
index 0000000000000..7f867b9edbb74
--- /dev/null
+++ b/src/tools/miri/tests/fail/data_race/read_read_race2.stderr
@@ -0,0 +1,20 @@
+error: Undefined Behavior: Data race detected between (1) Atomic Load on thread `<unnamed>` and (2) Read on thread `<unnamed>` at ALLOC. (2) just happened here
+  --> $DIR/read_read_race2.rs:LL:CC
+   |
+LL |             unsafe { ptr.read() };
+   |                      ^^^^^^^^^^ Data race detected between (1) Atomic Load on thread `<unnamed>` and (2) Read on thread `<unnamed>` at ALLOC. (2) just happened here
+   |
+help: and (1) occurred earlier here
+  --> $DIR/read_read_race2.rs:LL:CC
+   |
+LL |             a.load(Ordering::SeqCst);
+   |             ^^^^^^^^^^^^^^^^^^^^^^^^
+   = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
+   = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
+   = note: BACKTRACE (of the first span):
+   = note: inside closure at $DIR/read_read_race2.rs:LL:CC
+
+note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
+
+error: aborting due to previous error
+
diff --git a/src/tools/miri/tests/fail/weak_memory/racing_mixed_size.rs b/src/tools/miri/tests/fail/weak_memory/racing_mixed_size.rs
index 7bbb7f9fe7c2a..36dc0d5f3f7ed 100644
--- a/src/tools/miri/tests/fail/weak_memory/racing_mixed_size.rs
+++ b/src/tools/miri/tests/fail/weak_memory/racing_mixed_size.rs
@@ -19,7 +19,7 @@ fn split_u32_ptr(dword: *const u32) -> *const [u16; 2] {
 
 // Wine's SRWLock implementation does this, which is definitely undefined in C++ memory model
 // https://github.com/wine-mirror/wine/blob/303f8042f9db508adaca02ef21f8de4992cb9c03/dlls/ntdll/sync.c#L543-L566
-// Though it probably works just fine on x86
+// It probably works just fine on x86, but Intel does document this as "don't do it!"
 pub fn main() {
     let x = static_atomic_u32(0);
     let j1 = spawn(move || {
@@ -31,7 +31,7 @@ pub fn main() {
         let x_split = split_u32_ptr(x_ptr);
         unsafe {
             let hi = ptr::addr_of!((*x_split)[0]);
-            std::intrinsics::atomic_load_relaxed(hi); //~ ERROR: imperfectly overlapping
+            std::intrinsics::atomic_load_relaxed(hi); //~ ERROR: different-size
         }
     });
 
diff --git a/src/tools/miri/tests/fail/weak_memory/racing_mixed_size.stderr b/src/tools/miri/tests/fail/weak_memory/racing_mixed_size.stderr
index dda22ac9ce24c..055585ab96f5b 100644
--- a/src/tools/miri/tests/fail/weak_memory/racing_mixed_size.stderr
+++ b/src/tools/miri/tests/fail/weak_memory/racing_mixed_size.stderr
@@ -1,11 +1,17 @@
-error: unsupported operation: racy imperfectly overlapping atomic access is not possible in the C++20 memory model, and not supported by Miri's weak memory emulation
+error: Undefined Behavior: Race condition detected between (1) 4-byte Atomic Store on thread `<unnamed>` and (2) 2-byte (different-size) Atomic Load on thread `<unnamed>` at ALLOC. (2) just happened here
   --> $DIR/racing_mixed_size.rs:LL:CC
    |
 LL |             std::intrinsics::atomic_load_relaxed(hi);
-   |             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ racy imperfectly overlapping atomic access is not possible in the C++20 memory model, and not supported by Miri's weak memory emulation
+   |             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Race condition detected between (1) 4-byte Atomic Store on thread `<unnamed>` and (2) 2-byte (different-size) Atomic Load on thread `<unnamed>` at ALLOC. (2) just happened here
    |
-   = help: this is likely not a bug in the program; it indicates that the program performed an operation that the interpreter does not support
-   = note: BACKTRACE:
+help: and (1) occurred earlier here
+  --> $DIR/racing_mixed_size.rs:LL:CC
+   |
+LL |         x.store(1, Relaxed);
+   |         ^^^^^^^^^^^^^^^^^^^
+   = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
+   = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
+   = note: BACKTRACE (of the first span):
    = note: inside closure at $DIR/racing_mixed_size.rs:LL:CC
 
 note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
diff --git a/src/tools/miri/tests/fail/weak_memory/racing_mixed_size_read.rs b/src/tools/miri/tests/fail/weak_memory/racing_mixed_size_read.rs
index 73178980b7e5a..5cd14540ca3f4 100644
--- a/src/tools/miri/tests/fail/weak_memory/racing_mixed_size_read.rs
+++ b/src/tools/miri/tests/fail/weak_memory/racing_mixed_size_read.rs
@@ -16,7 +16,7 @@ fn split_u32_ptr(dword: *const u32) -> *const [u16; 2] {
 
 // Racing mixed size reads may cause two loads to read-from
 // the same store but observe different values, which doesn't make
-// sense under the formal model so we forbade this.
+// sense under the formal model so we forbid this.
 pub fn main() {
     let x = static_atomic(0);
 
@@ -29,7 +29,7 @@ pub fn main() {
         let x_split = split_u32_ptr(x_ptr);
         unsafe {
             let hi = x_split as *const u16 as *const AtomicU16;
-            (*hi).load(Relaxed); //~ ERROR: imperfectly overlapping
+            (*hi).load(Relaxed); //~ ERROR: different-size
         }
     });
 
diff --git a/src/tools/miri/tests/fail/weak_memory/racing_mixed_size_read.stderr b/src/tools/miri/tests/fail/weak_memory/racing_mixed_size_read.stderr
index 59fa5c7410237..2eefa0a87b49a 100644
--- a/src/tools/miri/tests/fail/weak_memory/racing_mixed_size_read.stderr
+++ b/src/tools/miri/tests/fail/weak_memory/racing_mixed_size_read.stderr
@@ -1,11 +1,17 @@
-error: unsupported operation: racy imperfectly overlapping atomic access is not possible in the C++20 memory model, and not supported by Miri's weak memory emulation
+error: Undefined Behavior: Race condition detected between (1) 4-byte Atomic Load on thread `<unnamed>` and (2) 2-byte (different-size) Atomic Load on thread `<unnamed>` at ALLOC. (2) just happened here
   --> $DIR/racing_mixed_size_read.rs:LL:CC
    |
 LL |             (*hi).load(Relaxed);
-   |             ^^^^^^^^^^^^^^^^^^^ racy imperfectly overlapping atomic access is not possible in the C++20 memory model, and not supported by Miri's weak memory emulation
+   |             ^^^^^^^^^^^^^^^^^^^ Race condition detected between (1) 4-byte Atomic Load on thread `<unnamed>` and (2) 2-byte (different-size) Atomic Load on thread `<unnamed>` at ALLOC. (2) just happened here
    |
-   = help: this is likely not a bug in the program; it indicates that the program performed an operation that the interpreter does not support
-   = note: BACKTRACE:
+help: and (1) occurred earlier here
+  --> $DIR/racing_mixed_size_read.rs:LL:CC
+   |
+LL |         x.load(Relaxed);
+   |         ^^^^^^^^^^^^^^^
+   = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
+   = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
+   = note: BACKTRACE (of the first span):
    = note: inside closure at $DIR/racing_mixed_size_read.rs:LL:CC
 
 note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
diff --git a/src/tools/miri/tests/pass-dep/concurrency/linux-futex.rs b/src/tools/miri/tests/pass-dep/concurrency/linux-futex.rs
index a456528ec2074..648c004c97cc0 100644
--- a/src/tools/miri/tests/pass-dep/concurrency/linux-futex.rs
+++ b/src/tools/miri/tests/pass-dep/concurrency/linux-futex.rs
@@ -2,7 +2,7 @@
 //@compile-flags: -Zmiri-disable-isolation
 
 use std::mem::MaybeUninit;
-use std::ptr;
+use std::ptr::{self, addr_of};
 use std::sync::atomic::AtomicI32;
 use std::sync::atomic::Ordering;
 use std::thread;
@@ -13,7 +13,7 @@ fn wake_nobody() {
 
     // Wake 1 waiter. Expect zero waiters woken up, as nobody is waiting.
     unsafe {
-        assert_eq!(libc::syscall(libc::SYS_futex, &futex as *const i32, libc::FUTEX_WAKE, 1), 0);
+        assert_eq!(libc::syscall(libc::SYS_futex, addr_of!(futex), libc::FUTEX_WAKE, 1), 0);
     }
 
     // Same, but without omitting the unused arguments.
@@ -21,7 +21,7 @@ fn wake_nobody() {
         assert_eq!(
             libc::syscall(
                 libc::SYS_futex,
-                &futex as *const i32,
+                addr_of!(futex),
                 libc::FUTEX_WAKE,
                 1,
                 ptr::null::<libc::timespec>(),
@@ -52,7 +52,7 @@ fn wait_wrong_val() {
         assert_eq!(
             libc::syscall(
                 libc::SYS_futex,
-                &futex as *const i32,
+                addr_of!(futex),
                 libc::FUTEX_WAIT,
                 456,
                 ptr::null::<libc::timespec>(),
@@ -73,7 +73,7 @@ fn wait_timeout() {
         assert_eq!(
             libc::syscall(
                 libc::SYS_futex,
-                &futex as *const i32,
+                addr_of!(futex),
                 libc::FUTEX_WAIT,
                 123,
                 &libc::timespec { tv_sec: 0, tv_nsec: 200_000_000 },
@@ -110,7 +110,7 @@ fn wait_absolute_timeout() {
         assert_eq!(
             libc::syscall(
                 libc::SYS_futex,
-                &futex as *const i32,
+                addr_of!(futex),
                 libc::FUTEX_WAIT_BITSET,
                 123,
                 &timeout,
@@ -136,7 +136,7 @@ fn wait_wake() {
             assert_eq!(
                 libc::syscall(
                     libc::SYS_futex,
-                    &FUTEX as *const i32,
+                    addr_of!(FUTEX),
                     libc::FUTEX_WAKE,
                     10, // Wake up at most 10 threads.
                 ),
@@ -149,7 +149,7 @@ fn wait_wake() {
         assert_eq!(
             libc::syscall(
                 libc::SYS_futex,
-                &FUTEX as *const i32,
+                addr_of!(FUTEX),
                 libc::FUTEX_WAIT,
                 0,
                 ptr::null::<libc::timespec>(),
@@ -173,7 +173,7 @@ fn wait_wake_bitset() {
             assert_eq!(
                 libc::syscall(
                     libc::SYS_futex,
-                    &FUTEX as *const i32,
+                    addr_of!(FUTEX),
                     libc::FUTEX_WAKE_BITSET,
                     10, // Wake up at most 10 threads.
                     ptr::null::<libc::timespec>(),
@@ -188,7 +188,7 @@ fn wait_wake_bitset() {
             assert_eq!(
                 libc::syscall(
                     libc::SYS_futex,
-                    &FUTEX as *const i32,
+                    addr_of!(FUTEX),
                     libc::FUTEX_WAKE_BITSET,
                     10, // Wake up at most 10 threads.
                     ptr::null::<libc::timespec>(),
@@ -204,7 +204,7 @@ fn wait_wake_bitset() {
         assert_eq!(
             libc::syscall(
                 libc::SYS_futex,
-                &FUTEX as *const i32,
+                addr_of!(FUTEX),
                 libc::FUTEX_WAIT_BITSET,
                 0,
                 ptr::null::<libc::timespec>(),
@@ -244,7 +244,7 @@ fn concurrent_wait_wake() {
             unsafe {
                 let ret = libc::syscall(
                     libc::SYS_futex,
-                    &FUTEX as *const AtomicI32,
+                    addr_of!(FUTEX),
                     libc::FUTEX_WAIT,
                     HELD,
                     ptr::null::<libc::timespec>(),
@@ -267,7 +267,7 @@ fn concurrent_wait_wake() {
         FUTEX.store(FREE, Ordering::Relaxed);
         unsafe {
             DATA = 1;
-            libc::syscall(libc::SYS_futex, &FUTEX as *const AtomicI32, libc::FUTEX_WAKE, 1);
+            libc::syscall(libc::SYS_futex, addr_of!(FUTEX), libc::FUTEX_WAKE, 1);
         }
 
         t.join().unwrap();
diff --git a/src/tools/miri/tests/pass-dep/shims/libc-misc.rs b/src/tools/miri/tests/pass-dep/shims/libc-misc.rs
index 82c49cfb17c5a..40d3fa19e5301 100644
--- a/src/tools/miri/tests/pass-dep/shims/libc-misc.rs
+++ b/src/tools/miri/tests/pass-dep/shims/libc-misc.rs
@@ -186,7 +186,7 @@ fn test_clocks() {
             unsafe { libc::clock_gettime(libc::CLOCK_MONOTONIC_COARSE, tp.as_mut_ptr()) };
         assert_eq!(is_error, 0);
     }
-    #[cfg(all(target_os = "macos", target_arch = "aarch64"))]
+    #[cfg(target_os = "macos")]
     {
         let is_error = unsafe { libc::clock_gettime(libc::CLOCK_UPTIME_RAW, tp.as_mut_ptr()) };
         assert_eq!(is_error, 0);
diff --git a/src/tools/miri/tests/pass/concurrency/simple.rs b/src/tools/miri/tests/pass/concurrency/simple.rs
index 556e0a24769d7..ec549a998bae7 100644
--- a/src/tools/miri/tests/pass/concurrency/simple.rs
+++ b/src/tools/miri/tests/pass/concurrency/simple.rs
@@ -62,6 +62,23 @@ fn panic_named() {
         .unwrap_err();
 }
 
+// This is not a data race!
+fn shared_readonly() {
+    use std::sync::Arc;
+
+    let x = Arc::new(42i32);
+    let h = thread::spawn({
+        let x = Arc::clone(&x);
+        move || {
+            assert_eq!(*x, 42);
+        }
+    });
+
+    assert_eq!(*x, 42);
+
+    h.join().unwrap();
+}
+
 fn main() {
     create_and_detach();
     create_and_join();
@@ -71,6 +88,7 @@ fn main() {
     create_nested_and_join();
     create_move_in();
     create_move_out();
+    shared_readonly();
     panic();
     panic_named();
 }
diff --git a/src/tools/miri/tests/pass/intrinsics-math.rs b/src/tools/miri/tests/pass/intrinsics-math.rs
index 5f7730a3e86af..589864f4f4ba7 100644
--- a/src/tools/miri/tests/pass/intrinsics-math.rs
+++ b/src/tools/miri/tests/pass/intrinsics-math.rs
@@ -125,9 +125,8 @@ pub fn main() {
 
     assert_approx_eq!(5.0f32.gamma(), 24.0);
     assert_approx_eq!(5.0f64.gamma(), 24.0);
-    // These fail even on the host, precision seems to be terrible.
-    //assert_approx_eq!(-0.5f32.gamma(), -2.0 * f32::consts::PI.sqrt());
-    //assert_approx_eq!(-0.5f64.gamma(), -2.0 * f64::consts::PI.sqrt());
+    assert_approx_eq!((-0.5f32).gamma(), (-2.0) * f32::consts::PI.sqrt());
+    assert_approx_eq!((-0.5f64).gamma(), (-2.0) * f64::consts::PI.sqrt());
 
     assert_eq!(2.0f32.ln_gamma(), (0.0, 1));
     assert_eq!(2.0f64.ln_gamma(), (0.0, 1));
diff --git a/src/tools/miri/tests/pass/weak_memory/extra_cpp_unsafe.rs b/src/tools/miri/tests/pass/weak_memory/extra_cpp_unsafe.rs
deleted file mode 100644
index 48b15191b38b0..0000000000000
--- a/src/tools/miri/tests/pass/weak_memory/extra_cpp_unsafe.rs
+++ /dev/null
@@ -1,40 +0,0 @@
-//@compile-flags: -Zmiri-ignore-leaks
-
-// Tests operations not performable through C++'s atomic API
-// but doable in unsafe Rust which we think *should* be fine.
-// Nonetheless they may be determined as inconsistent with the
-// memory model in the future.
-
-#![feature(atomic_from_mut)]
-
-use std::sync::atomic::AtomicU32;
-use std::sync::atomic::Ordering::*;
-use std::thread::spawn;
-
-fn static_atomic(val: u32) -> &'static AtomicU32 {
-    let ret = Box::leak(Box::new(AtomicU32::new(val)));
-    ret
-}
-
-// We allow perfectly overlapping non-atomic and atomic reads to race
-fn racing_mixed_atomicity_read() {
-    let x = static_atomic(0);
-    x.store(42, Relaxed);
-
-    let j1 = spawn(move || x.load(Relaxed));
-
-    let j2 = spawn(move || {
-        let x_ptr = x as *const AtomicU32 as *const u32;
-        unsafe { x_ptr.read() }
-    });
-
-    let r1 = j1.join().unwrap();
-    let r2 = j2.join().unwrap();
-
-    assert_eq!(r1, 42);
-    assert_eq!(r2, 42);
-}
-
-pub fn main() {
-    racing_mixed_atomicity_read();
-}