Skip to content

Commit dd41956

Browse files
maharmstoneadam900710
authored andcommitted
mkfs: use BTRFS_IOC_GET_CSUMS if available
Use the new BTRFS_IOC_GET_CSUMS ioctl when doing mkfs.btrfs --rootdir if it's available and the csum algorithms match, so we can use the values on disk rather than spending time recomputing them. For the case when we're also using --reflink, this means that we can now forgo reading the data entirely. Signed-off-by: Mark Harmstone <mark@harmstone.com>
1 parent 00bc032 commit dd41956

4 files changed

Lines changed: 246 additions & 12 deletions

File tree

kernel-shared/file-item.c

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -182,8 +182,9 @@ btrfs_lookup_csum(struct btrfs_trans_handle *trans,
182182
return ERR_PTR(ret);
183183
}
184184

185-
int btrfs_csum_file_block(struct btrfs_trans_handle *trans, u64 logical,
186-
u64 csum_objectid, u32 csum_type, const char *data)
185+
int btrfs_insert_file_block_csum(struct btrfs_trans_handle *trans, u64 logical,
186+
u64 csum_objectid, u32 csum_type,
187+
const u8 *csum_result)
187188
{
188189
struct btrfs_root *root = btrfs_csum_root(trans->fs_info, logical);
189190
int ret = 0;
@@ -195,7 +196,6 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, u64 logical,
195196
struct btrfs_csum_item *item;
196197
struct extent_buffer *leaf = NULL;
197198
u64 csum_offset;
198-
u8 csum_result[BTRFS_CSUM_SIZE];
199199
u32 sectorsize = root->fs_info->sectorsize;
200200
u32 nritems;
201201
u32 ins_size;
@@ -314,7 +314,6 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, u64 logical,
314314
item = (struct btrfs_csum_item *)((unsigned char *)item +
315315
csum_offset * csum_size);
316316
found:
317-
btrfs_csum_data(csum_type, (u8 *)data, csum_result, sectorsize);
318317
write_extent_buffer(leaf, csum_result, (unsigned long)item,
319318
csum_size);
320319
btrfs_mark_buffer_dirty(path->nodes[0]);
@@ -323,6 +322,18 @@ int btrfs_csum_file_block(struct btrfs_trans_handle *trans, u64 logical,
323322
return ret;
324323
}
325324

325+
int btrfs_csum_file_block(struct btrfs_trans_handle *trans, u64 logical,
326+
u64 csum_objectid, u32 csum_type, const char *data)
327+
{
328+
u8 csum_result[BTRFS_CSUM_SIZE];
329+
u32 sectorsize = trans->fs_info->sectorsize;
330+
331+
btrfs_csum_data(csum_type, (u8 *)data, csum_result, sectorsize);
332+
333+
return btrfs_insert_file_block_csum(trans, logical, csum_objectid,
334+
csum_type, csum_result);
335+
}
336+
326337
/*
327338
* helper function for csum removal, this expects the
328339
* key to describe the csum pointed to by the path, and it expects

kernel-shared/file-item.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,9 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
9292
struct btrfs_root *root,
9393
u64 ino, u64 file_pos,
9494
struct btrfs_file_extent_item *stack_fi);
95+
int btrfs_insert_file_block_csum(struct btrfs_trans_handle *trans, u64 logical,
96+
u64 csum_objectid, u32 csum_type,
97+
const u8 *csum_result);
9598
int btrfs_csum_file_block(struct btrfs_trans_handle *trans, u64 logical,
9699
u64 csum_objectid, u32 csum_type, const char *data);
97100
struct btrfs_csum_item *

kernel-shared/uapi/btrfs.h

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1348,6 +1348,45 @@ enum btrfs_err_code {
13481348
#define BTRFS_IOC_SUBVOL_SYNC_WAIT _IOW(BTRFS_IOCTL_MAGIC, 65, \
13491349
struct btrfs_ioctl_subvol_wait)
13501350

1351+
#ifndef BTRFS_IOC_GET_CSUMS
1352+
1353+
/* Flags for struct btrfs_ioctl_get_csums_entry::type. */
1354+
#define BTRFS_GET_CSUMS_HAS_CSUMS (1U << 0)
1355+
#define BTRFS_GET_CSUMS_ZEROED (1U << 1)
1356+
#define BTRFS_GET_CSUMS_NODATASUM (1U << 2)
1357+
#define BTRFS_GET_CSUMS_COMPRESSED (1U << 3)
1358+
#define BTRFS_GET_CSUMS_ENCRYPTED (1U << 4)
1359+
#define BTRFS_GET_CSUMS_INLINE (1U << 5)
1360+
1361+
struct btrfs_ioctl_get_csums_entry {
1362+
/* File offset of this range. */
1363+
__u64 offset;
1364+
/* Length in bytes. */
1365+
__u64 length;
1366+
/* One of BTRFS_GET_CSUMS_* types. */
1367+
__u32 type;
1368+
/* Padding, must be 0. */
1369+
__u32 reserved;
1370+
};
1371+
1372+
struct btrfs_ioctl_get_csums_args {
1373+
/* In/out: file offset in bytes. */
1374+
__u64 offset;
1375+
/* In/out: range length in bytes. */
1376+
__u64 length;
1377+
/* In/out: buffer capacity / bytes written. */
1378+
__u64 buf_size;
1379+
/* In: flags, must be 0 for now. */
1380+
__u64 flags;
1381+
/* Out: entries of type btrfs_ioctl_get_csums_entry + csum data */
1382+
__u8 buf[];
1383+
};
1384+
1385+
#define BTRFS_IOC_GET_CSUMS _IOWR(BTRFS_IOCTL_MAGIC, 66, \
1386+
struct btrfs_ioctl_get_csums_args)
1387+
1388+
#endif /* BTRFS_IOC_GET_CSUMS */
1389+
13511390
#ifdef __cplusplus
13521391
}
13531392
#endif

mkfs/rootdir.c

Lines changed: 189 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
#include "kernel-lib/sizes.h"
4040
#include "kernel-shared/accessors.h"
4141
#include "kernel-shared/uapi/btrfs_tree.h"
42+
#include "kernel-shared/uapi/btrfs.h"
4243
#include "kernel-shared/extent_io.h"
4344
#include "kernel-shared/ctree.h"
4445
#include "kernel-shared/volumes.h"
@@ -160,6 +161,9 @@ static u64 default_subvol_id;
160161
static enum btrfs_compression_type g_compression;
161162
static u64 g_compression_level;
162163
static bool g_do_reflink;
164+
static int g_get_csums_supported = -1; /* -1 = unknown, 0 = no, 1 = yes */
165+
static dev_t g_last_csums_dev;
166+
static bool g_last_csums_dev_ok;
163167

164168
static inline struct inode_entry *rootdir_path_last(struct rootdir_path *path)
165169
{
@@ -701,6 +705,7 @@ struct source_descriptor {
701705
const char *path_name;
702706
char *comp_buf;
703707
char *wrkmem;
708+
dev_t st_dev;
704709
};
705710

706711
static int do_reflink_write(struct btrfs_fs_info *info,
@@ -915,6 +920,10 @@ static int add_file_item_extent(struct btrfs_trans_handle *trans,
915920
bool datasum = true;
916921
u64 flags = btrfs_stack_inode_flags(btrfs_inode);
917922
off_t next;
923+
u8 *fetched_csums = NULL;
924+
void *fetched_csums_buf = NULL;
925+
u64 fetched_num_csums = 0;
926+
u16 fetched_csum_size = 0;
918927

919928
if (g_do_reflink || flags & BTRFS_INODE_NOCOMPRESS)
920929
do_comp = false;
@@ -973,6 +982,131 @@ static int add_file_item_extent(struct btrfs_trans_handle *trans,
973982

974983
buf_size = MAX_EXTENT_SIZE;
975984
to_read = min_t(u64, file_pos + buf_size, next) - file_pos;
985+
986+
/*
987+
* Try to get csums from the source filesystem via BTRFS_IOC_GET_CSUMS
988+
* instead of computing them from the data. This works when the source
989+
* is a btrfs filesystem using the same checksum algorithm, and the data
990+
* is not being compressed.
991+
*
992+
* For reflink, this is the fast path: we skip reading file data and
993+
* use the pre-computed csums directly. For non-reflink, the data is
994+
* still read normally but we skip the checksum computation.
995+
*/
996+
if (datasum && !do_comp && g_get_csums_supported != 0) {
997+
u16 csum_size = btrfs_csum_type_size(root->fs_info->csum_type);
998+
u64 ioctl_len = round_up(to_read, sectorsize);
999+
u64 num_csums = ioctl_len / sectorsize;
1000+
u64 csums_bytes = num_csums * csum_size;
1001+
size_t alloc_size = sizeof(struct btrfs_ioctl_get_csums_args) +
1002+
sizeof(struct btrfs_ioctl_get_csums_entry) +
1003+
csums_bytes;
1004+
struct btrfs_ioctl_get_csums_args *cargs;
1005+
struct btrfs_ioctl_get_csums_entry *entry;
1006+
1007+
/*
1008+
* Check that the source file's filesystem uses the same
1009+
* checksum algorithm as the destination. Cache the result
1010+
* per st_dev so we only call FS_INFO when crossing a mount
1011+
* boundary.
1012+
*/
1013+
if (source->st_dev != g_last_csums_dev) {
1014+
struct btrfs_ioctl_fs_info_args fi;
1015+
1016+
g_last_csums_dev = source->st_dev;
1017+
memset(&fi, 0, sizeof(fi));
1018+
fi.flags = BTRFS_FS_INFO_FLAG_CSUM_INFO;
1019+
1020+
if (ioctl(source->fd, BTRFS_IOC_FS_INFO, &fi) == 0 &&
1021+
fi.csum_type == root->fs_info->csum_type &&
1022+
fi.sectorsize == root->fs_info->sectorsize)
1023+
g_last_csums_dev_ok = true;
1024+
else
1025+
g_last_csums_dev_ok = false;
1026+
}
1027+
1028+
if (!g_last_csums_dev_ok)
1029+
goto read_data;
1030+
1031+
cargs = calloc(1, alloc_size);
1032+
if (!cargs)
1033+
return -ENOMEM;
1034+
1035+
cargs->offset = file_pos;
1036+
cargs->length = ioctl_len;
1037+
cargs->buf_size = sizeof(struct btrfs_ioctl_get_csums_entry) +
1038+
csums_bytes;
1039+
1040+
ret = ioctl(source->fd, BTRFS_IOC_GET_CSUMS, cargs);
1041+
if (ret < 0) {
1042+
free(cargs);
1043+
if (errno == ENOTTY) {
1044+
/* Kernel doesn't support the ioctl. */
1045+
g_get_csums_supported = 0;
1046+
goto read_data;
1047+
}
1048+
error("BTRFS_IOC_GET_CSUMS failed on %s: %m",
1049+
source->path_name);
1050+
return -errno;
1051+
}
1052+
1053+
g_get_csums_supported = 1;
1054+
1055+
/*
1056+
* Check that we got a single HAS_CSUMS entry covering the
1057+
* whole range. If not, fall back to read+compute.
1058+
*/
1059+
entry = (struct btrfs_ioctl_get_csums_entry *)cargs->buf;
1060+
1061+
if (cargs->length != 0 || cargs->buf_size < sizeof(*entry) ||
1062+
entry->type != BTRFS_GET_CSUMS_HAS_CSUMS ||
1063+
entry->length != ioctl_len) {
1064+
free(cargs);
1065+
goto read_data;
1066+
}
1067+
1068+
/*
1069+
* Save the pre-fetched csums. They will be used instead of
1070+
* computing checksums after the data is read and written.
1071+
*/
1072+
fetched_csums = cargs->buf + sizeof(*entry);
1073+
fetched_csums_buf = cargs;
1074+
fetched_num_csums = num_csums;
1075+
fetched_csum_size = csum_size;
1076+
1077+
if (g_do_reflink) {
1078+
/*
1079+
* Reflink fast path: skip reading data entirely.
1080+
* Only read the trailing partial sector if needed for
1081+
* do_reflink_write()'s zero-padding.
1082+
*/
1083+
to_write = ioctl_len;
1084+
write_buf = source->buf;
1085+
1086+
if (to_read % sectorsize) {
1087+
u64 partial_off = round_down(to_read, sectorsize);
1088+
ssize_t ret_read;
1089+
1090+
memset(source->buf + partial_off, 0, sectorsize);
1091+
ret_read = pread(source->fd,
1092+
source->buf + partial_off,
1093+
to_read - partial_off,
1094+
file_pos + partial_off);
1095+
if (ret_read < 0) {
1096+
error("cannot read %s at offset %llu: %m",
1097+
source->path_name,
1098+
file_pos + partial_off);
1099+
free(fetched_csums_buf);
1100+
fetched_csums_buf = NULL;
1101+
return -errno;
1102+
}
1103+
}
1104+
1105+
goto do_write;
1106+
}
1107+
}
1108+
1109+
read_data:
9761110
ret = read_from_source(root->fs_info, source->path_name, source->fd,
9771111
source->buf, file_pos, to_read);
9781112
if (ret < 0)
@@ -982,10 +1116,13 @@ static int add_file_item_extent(struct btrfs_trans_handle *trans,
9821116
write_buf = source->buf;
9831117
memset(write_buf + to_read, 0, to_write - to_read);
9841118

1119+
do_write:
9851120
ret = btrfs_reserve_extent(trans, root, to_write, 0, 0,
9861121
(u64)-1, &key, 1);
987-
if (ret)
1122+
if (ret) {
1123+
free(fetched_csums_buf);
9881124
return ret;
1125+
}
9891126

9901127
first_block = key.objectid;
9911128

@@ -998,20 +1135,63 @@ static int add_file_item_extent(struct btrfs_trans_handle *trans,
9981135

9991136
if (ret) {
10001137
error("failed to write %s", source->path_name);
1138+
free(fetched_csums_buf);
10011139
return ret;
10021140
}
10031141

10041142
if (datasum) {
1005-
for (unsigned int i = 0; i < to_write / sectorsize; i++) {
1006-
ret = btrfs_csum_file_block(trans, first_block + (i * sectorsize),
1007-
BTRFS_EXTENT_CSUM_OBJECTID,
1008-
root->fs_info->csum_type,
1009-
write_buf + (i * sectorsize));
1010-
if (ret)
1011-
return ret;
1143+
if (fetched_csums) {
1144+
unsigned int last_full = fetched_num_csums;
1145+
1146+
/*
1147+
* For the trailing partial sector, compute the csum
1148+
* from the data we actually wrote (zero-padded) rather
1149+
* than from the ioctl. The source extent may have
1150+
* different data after the file end.
1151+
*/
1152+
if (to_read % sectorsize)
1153+
last_full = fetched_num_csums - 1;
1154+
1155+
for (unsigned int i = 0; i < last_full; i++) {
1156+
ret = btrfs_insert_file_block_csum(trans,
1157+
first_block + (i * sectorsize),
1158+
BTRFS_EXTENT_CSUM_OBJECTID,
1159+
root->fs_info->csum_type,
1160+
fetched_csums + (i * fetched_csum_size));
1161+
if (ret) {
1162+
free(fetched_csums_buf);
1163+
return ret;
1164+
}
1165+
}
1166+
1167+
if (last_full < fetched_num_csums) {
1168+
u64 partial_off = round_down(to_read, sectorsize);
1169+
1170+
ret = btrfs_csum_file_block(trans,
1171+
first_block + partial_off,
1172+
BTRFS_EXTENT_CSUM_OBJECTID,
1173+
root->fs_info->csum_type,
1174+
source->buf + partial_off);
1175+
if (ret) {
1176+
free(fetched_csums_buf);
1177+
return ret;
1178+
}
1179+
}
1180+
} else {
1181+
for (unsigned int i = 0; i < to_write / sectorsize; i++) {
1182+
ret = btrfs_csum_file_block(trans,
1183+
first_block + (i * sectorsize),
1184+
BTRFS_EXTENT_CSUM_OBJECTID,
1185+
root->fs_info->csum_type,
1186+
write_buf + (i * sectorsize));
1187+
if (ret)
1188+
return ret;
1189+
}
10121190
}
10131191
}
10141192

1193+
free(fetched_csums_buf);
1194+
10151195
btrfs_set_stack_file_extent_type(&stack_fi, BTRFS_FILE_EXTENT_REG);
10161196
btrfs_set_stack_file_extent_disk_bytenr(&stack_fi, first_block);
10171197
btrfs_set_stack_file_extent_disk_num_bytes(&stack_fi, to_write);
@@ -1477,6 +1657,7 @@ static int add_file_items(struct btrfs_trans_handle *trans,
14771657
source.path_name = path_name;
14781658
source.comp_buf = comp_buf;
14791659
source.wrkmem = wrkmem;
1660+
source.st_dev = st->st_dev;
14801661

14811662
while (file_pos < st->st_size) {
14821663
ret = add_file_item_extent(trans, root, btrfs_inode, objectid,

0 commit comments

Comments
 (0)