Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions libr/include/r_io.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,22 @@ typedef struct r_io_cache_t {
bool enabled;
} RIOCache;

// Range-based validation cache for is_valid_offset optimization
typedef struct r_io_range_cache_t {
ut64 start_addr;
ut64 end_addr;
bool is_valid;
ut32 map_id; // For cache invalidation tracking
} RIOVALIDRangeCache;

typedef struct r_io_valid_cache_t {
RIOVALIDRangeCache *ranges;
int count;
int capacity;
ut64 last_query_addr; // Last queried address for optimization
bool last_query_result;
} RIOVALIDCache;

// -io-cache-

typedef struct r_io_t {
Expand All @@ -151,6 +167,7 @@ typedef struct r_io_t {
RIDStorage maps; // RIOMaps accessible by their id
RIDStorage banks; // RIOBanks accessible by their id
RIOCache cache;
RIOVALIDCache valid_cache; // Range-based validation cache
ut8 *write_mask;
int write_mask_len;
ut64 mask;
Expand Down Expand Up @@ -632,6 +649,9 @@ R_API char *r_io_map_getattr(RIOMap *map);

/* io/ioutils.c */
R_API bool r_io_is_valid_offset(RIO *io, ut64 offset, int hasperm);
R_API void r_io_valid_cache_init(RIO *io);
R_API void r_io_valid_cache_fini(RIO *io);
R_API void r_io_valid_cache_invalidate(RIO *io);
R_API bool r_io_addr_is_mapped(RIO *io, ut64 vaddr);
R_API bool r_io_read_i(RIO* io, ut64 addr, ut64 *val, int size, bool endian);
R_API bool r_io_write_i(RIO* io, ut64 addr, ut64 *val, int size, bool endian);
Expand Down
2 changes: 2 additions & 0 deletions libr/io/io.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ R_API void r_io_init(RIO* io) {
r_io_cache_init (io);
r_io_plugin_init (io);
r_io_undo_init (io);
r_io_valid_cache_init (io); // Initialize validation cache
io->event = r_event_new (io);
RIOBank *bank = r_io_bank_new ("default");
if (bank) {
Expand Down Expand Up @@ -674,6 +675,7 @@ R_API void r_io_fini(RIO* io) {
r_io_desc_fini (io);
ls_free (io->plugins);
r_io_cache_fini (io);
r_io_valid_cache_fini (io); // Cleanup validation cache
r_list_free (io->undo.w_list);
R_FREE (io->runprofile);
r_event_free (io->event);
Expand Down
3 changes: 3 additions & 0 deletions libr/io/io_map.c
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ R_API RIOMap *r_io_map_add(RIO *io, int fd, int perm, ut64 delta, ut64 addr, ut6
free (map[1]);
return NULL;
}
r_io_valid_cache_invalidate (io); // Invalidate validation cache
return map[1];
}
return NULL;
Expand Down Expand Up @@ -216,6 +217,7 @@ R_API RIOMap *r_io_map_add_bottom(RIO *io, int fd, int perm, ut64 delta, ut64 ad
free (map[1]);
return NULL;
}
r_io_valid_cache_invalidate (io); // Invalidate validation cache
return map[1];
}
return NULL;
Expand Down Expand Up @@ -271,6 +273,7 @@ R_API void r_io_map_del(RIO *io, ut32 id) {
} while (r_id_storage_get_next (&io->banks, &bankid));
r_id_storage_delete (&io->maps, id);
_map_free_cb (NULL, map, id);
r_io_valid_cache_invalidate (io); // Invalidate validation cache
}

//delete all maps with specified fd
Expand Down
135 changes: 115 additions & 20 deletions libr/io/ioutils.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,83 @@

#include <r_io.h>

// Range-based validation cache implementation
#define VALID_CACHE_CAPACITY 64

R_API void r_io_valid_cache_init(RIO *io) {
R_RETURN_IF_FAIL (io);
io->valid_cache.ranges = calloc (VALID_CACHE_CAPACITY, sizeof (RIOVALIDRangeCache));
if (io->valid_cache.ranges) {
io->valid_cache.capacity = VALID_CACHE_CAPACITY;
io->valid_cache.count = 0;
io->valid_cache.last_query_addr = UT64_MAX;
Comment on lines +10 to +14
Copy link

Copilot AI Jan 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Memory allocation failure is not properly handled. If calloc fails, the cache fields are not initialized, leaving them in an undefined state. The function should either set capacity to 0 on allocation failure, or set ranges to NULL explicitly to ensure consistent state.

Suggested change
io->valid_cache.ranges = calloc (VALID_CACHE_CAPACITY, sizeof (RIOVALIDRangeCache));
if (io->valid_cache.ranges) {
io->valid_cache.capacity = VALID_CACHE_CAPACITY;
io->valid_cache.count = 0;
io->valid_cache.last_query_addr = UT64_MAX;
/* Initialize cache to a known safe state first */
io->valid_cache.ranges = NULL;
io->valid_cache.capacity = 0;
io->valid_cache.count = 0;
io->valid_cache.last_query_addr = UT64_MAX;
io->valid_cache.ranges = calloc (VALID_CACHE_CAPACITY, sizeof (RIOVALIDRangeCache));
if (io->valid_cache.ranges) {
io->valid_cache.capacity = VALID_CACHE_CAPACITY;

Copilot uses AI. Check for mistakes.
}
}

R_API void r_io_valid_cache_fini(RIO *io) {
R_RETURN_IF_FAIL (io);
if (io->valid_cache.ranges) {
free (io->valid_cache.ranges);
memset (&io->valid_cache, 0, sizeof (io->valid_cache));
}
}

R_API void r_io_valid_cache_invalidate(RIO *io) {
R_RETURN_IF_FAIL (io);
Copy link

Copilot AI Jan 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The cache invalidation doesn't check if the cache is initialized (ranges is NULL). If initialization failed or the cache was not initialized yet, this will access uninitialized memory when setting last_query_addr. Add a check for io->valid_cache.ranges != NULL before accessing cache fields.

Suggested change
R_RETURN_IF_FAIL (io);
R_RETURN_IF_FAIL (io);
if (!io->valid_cache.ranges) {
return;
}

Copilot uses AI. Check for mistakes.
io->valid_cache.count = 0;
io->valid_cache.last_query_addr = UT64_MAX;
}
Comment on lines +8 to +30
Copy link

Copilot AI Jan 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing API documentation. The new public API functions (r_io_valid_cache_init, r_io_valid_cache_fini, r_io_valid_cache_invalidate) lack documentation comments explaining their purpose, parameters, and when they should be called. Add docstring comments following the project's documentation standards to explain these functions.

Copilot uses AI. Check for mistakes.

// Check if address is in cached range
static bool r_io_valid_cache_lookup(RIO *io, ut64 addr, bool *result) {
RIOVALIDCache *cache = &io->valid_cache;

// Fast path: same as last query (common for consecutive bytes)
if (cache->last_query_addr != UT64_MAX && addr == cache->last_query_addr) {
*result = cache->last_query_result;
return true;
}

// Search through cached ranges
for (int i = 0; i < cache->count; i++) {
RIOVALIDRangeCache *range = &cache->ranges[i];
if (addr >= range->start_addr && addr <= range->end_addr) {
*result = range->is_valid;
cache->last_query_addr = addr;
cache->last_query_result = range->is_valid;
return true;
}
}
Comment on lines +43 to +51
Copy link

Copilot AI Jan 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The cache lookup uses a linear search through all cached ranges, which is O(n) where n is the number of cached entries (up to 64). For better performance with sequential reads, consider organizing ranges in a way that makes lookups more efficient, such as keeping them sorted and using binary search, or using the most recently used entry as a hint.

Copilot uses AI. Check for mistakes.
return false;
}
Comment on lines +33 to +53
Copy link

Copilot AI Jan 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The cache lookup doesn't check if the cache is properly initialized (ranges is NULL). If initialization failed or cache was freed, this will result in dereferencing a NULL pointer when accessing cache->ranges. Add a check for cache->ranges != NULL before attempting to access the cache.

Copilot uses AI. Check for mistakes.

// Add a validation result to cache
static void r_io_valid_cache_add(RIO *io, ut64 addr, bool result) {
RIOVALIDCache *cache = &io->valid_cache;

// If cache is full, replace oldest entry (simple FIFO)
if (cache->count >= cache->capacity) {
// Shift all entries to make space at the end
if (cache->count > 0) {
memmove (&cache->ranges[0], &cache->ranges[1],
sizeof (RIOVALIDRangeCache) * (cache->count - 1));
}
cache->count--;
Comment on lines +59 to +66
Copy link

Copilot AI Jan 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The FIFO eviction strategy is inefficient for the stated use case of sequential reads. The memmove operation shifts all entries on every eviction, which is O(n) complexity. For sequential access patterns, an LRU or circular buffer would be more appropriate. Consider using a circular buffer with a head pointer to avoid the memmove overhead.

Copilot uses AI. Check for mistakes.
}

// Add new entry - we cache a small range around the address since
// most accesses are consecutive
RIOVALIDRangeCache *range = &cache->ranges[cache->count];
range->start_addr = addr;
range->end_addr = addr + 1023; // Cache 1K range
Comment on lines +69 to +73
Copy link

Copilot AI Jan 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The cached range assumes a 1KB (1024-byte) range is uniformly valid/invalid starting from the queried address. However, this assumption may be incorrect when:

  1. The queried address is near a map boundary
  2. Multiple maps with different validity states exist within this range
  3. The address is near the end of a descriptor

This can lead to incorrect cache hits returning stale results. The cache should either validate range boundaries against actual map extents, or use a much smaller range (e.g., single byte) to avoid crossing boundaries.

Suggested change
// Add new entry - we cache a small range around the address since
// most accesses are consecutive
RIOVALIDRangeCache *range = &cache->ranges[cache->count];
range->start_addr = addr;
range->end_addr = addr + 1023; // Cache 1K range
// Add new entry - we cache the exact address
RIOVALIDRangeCache *range = &cache->ranges[cache->count];
range->start_addr = addr;
range->end_addr = addr; // Cache single-byte range to avoid crossing map boundaries

Copilot uses AI. Check for mistakes.
range->is_valid = result;
// TODO: Set proper map_id when we can track it
Copy link

Copilot AI Jan 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment suggests that map_id will be used for cache invalidation tracking, but the field is never set in this implementation. This leaves the field uninitialized (potentially containing garbage values from memory). Either implement the map_id tracking or remove the field and comment if it's not needed yet.

Suggested change
// TODO: Set proper map_id when we can track it
// map_id tracking is not implemented yet; use 0 as the default/sentinel.
range->map_id = 0;

Copilot uses AI. Check for mistakes.

cache->count++;
cache->last_query_addr = addr;
cache->last_query_result = result;
Comment on lines +34 to +79
Copy link

Copilot AI Jan 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The cache is not invalidated when permissions change (e.g., via r_io_map_remap or permission updates). The cache stores validity results that depend on the hasperm parameter, but different permission checks for the same address may have different validity results. Without tracking the hasperm parameter used for each cached entry, this can return incorrect results when the same address is checked with different permission requirements.

Suggested change
RIOVALIDCache *cache = &io->valid_cache;
// Fast path: same as last query (common for consecutive bytes)
if (cache->last_query_addr != UT64_MAX && addr == cache->last_query_addr) {
*result = cache->last_query_result;
return true;
}
// Search through cached ranges
for (int i = 0; i < cache->count; i++) {
RIOVALIDRangeCache *range = &cache->ranges[i];
if (addr >= range->start_addr && addr <= range->end_addr) {
*result = range->is_valid;
cache->last_query_addr = addr;
cache->last_query_result = range->is_valid;
return true;
}
}
return false;
}
// Add a validation result to cache
static void r_io_valid_cache_add(RIO *io, ut64 addr, bool result) {
RIOVALIDCache *cache = &io->valid_cache;
// If cache is full, replace oldest entry (simple FIFO)
if (cache->count >= cache->capacity) {
// Shift all entries to make space at the end
if (cache->count > 0) {
memmove (&cache->ranges[0], &cache->ranges[1],
sizeof (RIOVALIDRangeCache) * (cache->count - 1));
}
cache->count--;
}
// Add new entry - we cache a small range around the address since
// most accesses are consecutive
RIOVALIDRangeCache *range = &cache->ranges[cache->count];
range->start_addr = addr;
range->end_addr = addr + 1023; // Cache 1K range
range->is_valid = result;
// TODO: Set proper map_id when we can track it
cache->count++;
cache->last_query_addr = addr;
cache->last_query_result = result;
R_UNUSED (io);
R_UNUSED (addr);
R_UNUSED (result);
// Disabled cache: always indicate no cached result so callers recompute.
return false;
}
// Add a validation result to cache
static void r_io_valid_cache_add(RIO *io, ut64 addr, bool result) {
R_UNUSED (io);
R_UNUSED (addr);
R_UNUSED (result);
// Disabled cache: do not store any validation results.

Copilot uses AI. Check for mistakes.
}
Comment on lines +56 to +80
Copy link

Copilot AI Jan 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The cache add function doesn't check if the cache is properly initialized (ranges is NULL). If initialization failed, this will attempt to write to NULL pointer. Add a check for cache->ranges != NULL before attempting to add entries.

Copilot uses AI. Check for mistakes.
Comment on lines +8 to +80
Copy link

Copilot AI Jan 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The cache implementation is not thread-safe. Multiple threads can simultaneously read and modify the cache state (count, last_query_addr, last_query_result, and ranges array), leading to race conditions. If r_io_is_valid_offset can be called from multiple threads, add synchronization (e.g., mutex) to protect cache access.

Copilot uses AI. Check for mistakes.

//This helper function only check if the given vaddr is mapped, it does not account
//for map perms
R_API bool r_io_addr_is_mapped(RIO *io, ut64 vaddr) {
Expand All @@ -15,39 +92,57 @@ R_API bool r_io_addr_is_mapped(RIO *io, ut64 vaddr) {
// when io.va is false it only checks for the desc
R_API bool r_io_is_valid_offset(RIO* io, ut64 offset, int hasperm) {
R_RETURN_VAL_IF_FAIL (io, false);

// Try cache lookup first for performance optimization
bool cached_result;
if (r_io_valid_cache_lookup (io, offset, &cached_result)) {
return cached_result;
}

// Compute actual result
bool result = false;
if ((io->cache.mode & R_PERM_X) == R_PERM_X) {
// io.cache must be set to true for this codeblock to be executed
ut8 word[4] = { 0xff, 0xff, 0xff, 0xff};
// TODO: check for (io->cache.mode & R_PERM_S) ?
(void)r_io_read_at (io, offset, (ut8*)&word, 4);
if (!r_io_cache_read_at (io, offset, (ut8*)&word, 4)) {
if (!r_io_read_at (io, offset, (ut8*)&word, 4)) {
return false;
result = false;
}
}
return memcmp (word, "\xff\xff\xff\xff", 4) != 0;
}
if (io->mask) {
result = memcmp (word, "\xff\xff\xff\xff", 4) != 0;
Comment on lines 104 to +114
Copy link

Copilot AI Jan 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Control flow issue: when the cache lookup path is taken (lines 104-114), the variable 'result' is set but may be overwritten by the memcmp on line 114 regardless of the read success. If both cache reads fail (line 110-112), result is set to false, but then line 114 unconditionally overwrites it with the memcmp result, which checks uninitialized or 0xff-filled memory. The 'result' assignment on line 111 should be followed by else clause wrapping line 114, or line 114 should only execute if reads succeed.

Copilot uses AI. Check for mistakes.
} else if (io->mask) {
if (offset > io->mask && hasperm & R_PERM_X) {
return false;
result = false;
} else {
goto check_permissions;
}
}
if (io->va) {
if (!hasperm) {
// return r_io_map_is_mapped (io, offset);
RIOMap* map = r_io_map_get_at (io, offset);
return map? map->perm & R_PERM_R: false;
} else {
check_permissions:
if (io->va) {
if (!hasperm) {
// return r_io_map_is_mapped (io, offset);
RIOMap* map = r_io_map_get_at (io, offset);
result = map ? (map->perm & R_PERM_R) : false;
} else {
RIOMap* map = r_io_map_get_at (io, offset);
result = map ? ((map->perm & hasperm) == hasperm) : false;
}
} else {
if (!io->desc) {
result = false;
} else if (offset > r_io_desc_size (io->desc)) {
result = false;
} else {
result = ((io->desc->perm & hasperm) == hasperm);
}
}
RIOMap* map = r_io_map_get_at (io, offset);
return map? (map->perm & hasperm) == hasperm: false;
}
if (!io->desc) {
return false;
}
if (offset > r_io_desc_size (io->desc)) {
return false;
}
return ((io->desc->perm & hasperm) == hasperm);

// Cache the result for future use
r_io_valid_cache_add (io, offset, result);
return result;
}
Comment on lines 93 to 146
Copy link

Copilot AI Jan 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The cache is not invalidated on write operations. If the cache mode uses R_PERM_X for validation (line 104), writes to memory could change the validation status (e.g., writing 0xffffffff to make an address invalid, or writing other values to make it valid). The cache should be invalidated after write operations to ensure correctness.

Copilot uses AI. Check for mistakes.

// this is wrong, there is more than big and little endian
Expand Down
Loading