@@ -290,12 +290,67 @@ class EmbeddingRocksDB : public kv_db::EmbeddingKVDB {
290
290
options.memtable_prefix_bloom_size_ratio = 0.05 ;
291
291
options.memtable_whole_key_filtering = true ;
292
292
options.max_background_jobs = num_threads;
293
+ // disable auto compactions during bulk init, re-enable once done
294
+ // maximum number of concurrent flush operations
295
+ options.max_background_flushes = num_threads;
296
+ options.disable_auto_compactions = true ;
293
297
options.env ->SetBackgroundThreads (4 , rocksdb::Env::HIGH);
294
298
options.env ->SetBackgroundThreads (1 , rocksdb::Env::LOW);
295
-
296
299
options.max_open_files = -1 ;
297
300
301
+ initialize_dbs (num_shards, path, options, use_passed_in_path);
302
+ initialize_initializers (
303
+ num_shards,
304
+ max_D,
305
+ uniform_init_lower,
306
+ uniform_init_upper,
307
+ row_storage_bitwidth);
308
+ executor_ = std::make_unique<folly::CPUThreadPoolExecutor>(num_shards);
309
+ ro_.verify_checksums = false ;
310
+ ro_.async_io = true ;
311
+ wo_.disableWAL = true ;
312
+ wo_.sync = false ;
313
+
314
+ // Setup staggered manual compaction data members
315
+ memtable_flush_period_ = memtable_flush_period;
316
+ if (memtable_flush_period_ > 0 ) {
317
+ done_staggered_flushes_ = false ;
318
+ memtable_flush_offset_ = memtable_flush_offset;
319
+ l0_files_per_compact_ = l0_files_per_compact;
320
+ compaction_period_ = memtable_flush_period_ * l0_files_per_compact *
321
+ options.min_write_buffer_number_to_merge ;
322
+ int64_t period_per_shard = memtable_flush_period_ / num_shards;
323
+ CHECK_GT (period_per_shard, 0 );
324
+ // We want to stagger memory flushes (and then later
325
+ // stagger all compactions)
326
+
327
+ for (int64_t i = 0 ; i < num_shards; i++) {
328
+ shard_flush_compaction_deadlines_.push_back (
329
+ memtable_flush_offset_ + (i * period_per_shard));
330
+ }
331
+ }
332
+ }
333
+
334
+ ~EmbeddingRocksDB () override {
335
+ // clear all the snapshots if not released
336
+ if (snapshots_.size () > 0 ) {
337
+ LOG (WARNING)
338
+ << snapshots_.size ()
339
+ << " snapshots have not been released when db is closing. Releasing them now." ;
340
+ }
341
+ snapshots_.clear ();
342
+ for (auto shard = 0 ; shard < dbs_.size (); ++shard) {
343
+ dbs_[shard]->Close ();
344
+ }
345
+ }
346
+
347
+ void initialize_dbs (
348
+ int64_t num_shards,
349
+ std::string path,
350
+ rocksdb::Options& options,
351
+ bool use_passed_in_path) {
298
352
#ifdef FBGEMM_FBCODE
353
+ std::string used_path = " " ;
299
354
auto serviceInfo = std::make_shared<facebook::fb_rocksdb::ServiceInfo>();
300
355
serviceInfo->oncall = " pyper_training" ;
301
356
serviceInfo->service_name = " ssd_offloading_rocksb" ;
@@ -307,7 +362,6 @@ class EmbeddingRocksDB : public kv_db::EmbeddingKVDB {
307
362
path = ssd_mount_point;
308
363
tbe_uuid = facebook::strings::generateUUID ();
309
364
}
310
- std::string used_path = " " ;
311
365
#endif
312
366
for (auto i = 0 ; i < num_shards; ++i) {
313
367
#ifdef FBGEMM_FBCODE
@@ -350,6 +404,19 @@ class EmbeddingRocksDB : public kv_db::EmbeddingKVDB {
350
404
}
351
405
CHECK (s.ok ()) << s.ToString ();
352
406
dbs_.emplace_back (db);
407
+ }
408
+ #ifdef FBGEMM_FBCODE
409
+ LOG (INFO) << " TBE actual used_path: " << used_path;
410
+ #endif
411
+ }
412
+
413
+ void initialize_initializers (
414
+ int64_t num_shards,
415
+ int64_t max_D,
416
+ float uniform_init_lower,
417
+ float uniform_init_upper,
418
+ int64_t row_storage_bitwidth) {
419
+ for (auto i = 0 ; i < num_shards; ++i) {
353
420
auto * gen = at::check_generator<at::CPUGeneratorImpl>(
354
421
at::detail::getDefaultCPUGenerator ());
355
422
{
@@ -362,46 +429,6 @@ class EmbeddingRocksDB : public kv_db::EmbeddingKVDB {
362
429
row_storage_bitwidth));
363
430
}
364
431
}
365
- #ifdef FBGEMM_FBCODE
366
- LOG (INFO) << " TBE actual used_path: " << used_path;
367
- #endif
368
- executor_ = std::make_unique<folly::CPUThreadPoolExecutor>(num_shards);
369
- ro_.verify_checksums = false ;
370
- ro_.async_io = true ;
371
- wo_.disableWAL = true ;
372
- wo_.sync = false ;
373
-
374
- // Setup staggered manual compaction data members
375
- memtable_flush_period_ = memtable_flush_period;
376
- if (memtable_flush_period_ > 0 ) {
377
- done_staggered_flushes_ = false ;
378
- memtable_flush_offset_ = memtable_flush_offset;
379
- l0_files_per_compact_ = l0_files_per_compact;
380
- compaction_period_ = memtable_flush_period_ * l0_files_per_compact *
381
- options.min_write_buffer_number_to_merge ;
382
- int64_t period_per_shard = memtable_flush_period_ / num_shards;
383
- CHECK_GT (period_per_shard, 0 );
384
- // We want to stagger memory flushes (and then later
385
- // stagger all compactions)
386
-
387
- for (int64_t i = 0 ; i < num_shards; i++) {
388
- shard_flush_compaction_deadlines_.push_back (
389
- memtable_flush_offset_ + (i * period_per_shard));
390
- }
391
- }
392
- }
393
-
394
- ~EmbeddingRocksDB () override {
395
- // clear all the snapshots if not released
396
- if (snapshots_.size () > 0 ) {
397
- LOG (WARNING)
398
- << snapshots_.size ()
399
- << " snapshots have not been released when db is closing. Releasing them now." ;
400
- }
401
- snapshots_.clear ();
402
- for (auto shard = 0 ; shard < dbs_.size (); ++shard) {
403
- dbs_[shard]->Close ();
404
- }
405
432
}
406
433
407
434
folly::SemiFuture<std::vector<folly::Unit>> get_kv_db_async (
@@ -549,6 +576,46 @@ class EmbeddingRocksDB : public kv_db::EmbeddingKVDB {
549
576
folly::coro::blockingWait (set_kv_db_async (seq_indices, weights, count));
550
577
}
551
578
579
+ virtual rocksdb::Status set_rocksdb_option (
580
+ int shard,
581
+ const std::string& key,
582
+ const std::string& value) {
583
+ return dbs_[shard]->SetOptions ({{key, value}});
584
+ }
585
+
586
+ void toggle_compaction (bool enable) {
587
+ int max_retries = 10 ;
588
+ std::vector<folly::Future<bool >> futures;
589
+ for (auto shard = 0 ; shard < dbs_.size (); ++shard) {
590
+ auto f = folly::via (executor_.get ()).thenValue ([=](folly::Unit) -> bool {
591
+ for (int attempt = 0 ; attempt < max_retries; ++attempt) {
592
+ auto val = enable ? " false" : " true" ;
593
+ auto s = set_rocksdb_option (shard, " disable_auto_compactions" , val);
594
+ if (s.ok ()) {
595
+ return true ;
596
+ }
597
+ LOG (WARNING) << " Failed to toggle compaction to " << enable
598
+ << " for shard " << shard << " , attempt=" << attempt
599
+ << " , max_retries=" << max_retries << std::endl;
600
+ std::this_thread::sleep_for (std::chrono::milliseconds (100 ));
601
+ }
602
+ return false ;
603
+ });
604
+ futures.push_back (std::move (f));
605
+ }
606
+ auto results = folly::coro::blockingWait (folly::collectAll (futures));
607
+ for (auto & result : results) {
608
+ if (result.hasValue ()) {
609
+ CHECK (result.value ())
610
+ << " Failed to toggle compaction to " << enable << std::endl;
611
+ } else {
612
+ CHECK (false ) << " Failed to toggle compaction to " << enable
613
+ << " with exception " << result.exception ().what ()
614
+ << std::endl;
615
+ }
616
+ }
617
+ }
618
+
552
619
int64_t get_max_D () {
553
620
return max_D_;
554
621
}
0 commit comments