diff --git a/Rakefile b/Rakefile index 49733ed4af..4a4458070d 100644 --- a/Rakefile +++ b/Rakefile @@ -131,165 +131,4 @@ namespace :docs do end end -require_relative "profile/benchmarking" - -# Some require data files, available from the drivers team. See the comments above each task for details." -namespace :benchmark do - desc "Run the bson benchmarking tests" - task :bson do - puts "BSON BENCHMARK" - Mongo::Benchmarking.report({ - bson: Mongo::Benchmarking::BSON.run_all({ - flat: %i[ encode decode ], - deep: %i[ encode decode ], - full: %i[ encode decode ], - }) - }) - end - - namespace :bson do - namespace :flat do - desc "Benchmarking for flat bson documents." - - # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called flat_bson.json. - task :encode do - puts "BSON BENCHMARK :: FLAT :: ENCODE" - Mongo::Benchmarking.report({ bson: { flat: { encode: Mongo::Benchmarking::BSON.run(:flat, :encode) } } }) - end - - # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called flat_bson.json. - task :decode do - puts "BSON BENCHMARK :: FLAT :: DECODE" - Mongo::Benchmarking.report({ bson: { flat: { decode: Mongo::Benchmarking::BSON.run(:flat, :decode) } } }) - end - end - - namespace :deep do - desc "Benchmarking for deep bson documents." - - # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called deep_bson.json. - task :encode do - puts "BSON BENCHMARK :: DEEP :: ENCODE" - Mongo::Benchmarking.report({ bson: { deep: { encode: Mongo::Benchmarking::BSON.run(:deep, :encode) } } }) - end - - # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called deep_bson.json. - task :decode do - puts "BSON BENCHMARK :: DEEP :: DECODE" - Mongo::Benchmarking.report({ bson: { deep: { decode: Mongo::Benchmarking::BSON.run(:deep, :decode) } } }) - end - end - - namespace :full do - desc "Benchmarking for full bson documents." - - # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called full_bson.json. - task :encode do - puts "BSON BENCHMARK :: FULL :: ENCODE" - Mongo::Benchmarking.report({ bson: { full: { encode: Mongo::Benchmarking::BSON.run(:full, :encode) } } }) - end - - # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called full_bson.json. - task :decode do - puts "BSON BENCHMARK :: FULL :: DECODE" - Mongo::Benchmarking.report({ bson: { full: { decode: Mongo::Benchmarking::BSON.run(:full, :decode) } } }) - end - end - end - - namespace :single_doc do - desc "Run the common driver single-document benchmarking tests" - task :command do - puts "SINGLE DOC BENCHMARK:: COMMAND" - Mongo::Benchmarking::SingleDoc.run(:command) - end - - # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called TWEET.json. - task :find_one do - puts "SINGLE DOC BENCHMARK:: FIND ONE BY ID" - Mongo::Benchmarking::SingleDoc.run(:find_one) - end - - # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called SMALL_DOC.json. - task :insert_one_small do - puts "SINGLE DOC BENCHMARK:: INSERT ONE SMALL DOCUMENT" - Mongo::Benchmarking::SingleDoc.run(:insert_one_small) - end - - # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called LARGE_DOC.json. - task :insert_one_large do - puts "SINGLE DOC BENCHMARK:: INSERT ONE LARGE DOCUMENT" - Mongo::Benchmarking::SingleDoc.run(:insert_one_large) - end - end - - namespace :multi_doc do - desc "Run the common driver multi-document benchmarking tests" - - # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called TWEET.json. - task :find_many do - puts "MULTI DOCUMENT BENCHMARK:: FIND MANY" - Mongo::Benchmarking::MultiDoc.run(:find_many) - end - - # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called SMALL_DOC.json. - task :bulk_insert_small do - puts "MULTI DOCUMENT BENCHMARK:: BULK INSERT SMALL" - Mongo::Benchmarking::MultiDoc.run(:bulk_insert_small) - end - - # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called LARGE_DOC.json. - task :bulk_insert_large do - puts "MULTI DOCUMENT BENCHMARK:: BULK INSERT LARGE" - Mongo::Benchmarking::MultiDoc.run(:bulk_insert_large) - end - - # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called GRIDFS_LARGE. - task :gridfs_upload do - puts "MULTI DOCUMENT BENCHMARK:: GRIDFS UPLOAD" - Mongo::Benchmarking::MultiDoc.run(:gridfs_upload) - end - - # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called GRIDFS_LARGE. - task :gridfs_download do - puts "MULTI DOCUMENT BENCHMARK:: GRIDFS DOWNLOAD" - Mongo::Benchmarking::MultiDoc.run(:gridfs_download) - end - end - - namespace :parallel do - desc "Run the common driver paralell ETL benchmarking tests" - - # Requirement: A directory in Mongo::Benchmarking::DATA_PATH, called LDJSON_MULTI, - # with the files used in this task. - task :import do - puts "PARALLEL ETL BENCHMARK:: IMPORT" - Mongo::Benchmarking::Parallel.run(:import) - end - - # Requirement: A directory in Mongo::Benchmarking::DATA_PATH, called LDJSON_MULTI, - # with the files used in this task. - # Requirement: Another directory in "#{Mongo::Benchmarking::DATA_PATH}/LDJSON_MULTI" - # called 'output'. - task :export do - puts "PARALLEL ETL BENCHMARK:: EXPORT" - Mongo::Benchmarking::Parallel.run(:export) - end - - # Requirement: A directory in Mongo::Benchmarking::DATA_PATH, called GRIDFS_MULTI, - # with the files used in this task. - task :gridfs_upload do - puts "PARALLEL ETL BENCHMARK:: GRIDFS UPLOAD" - Mongo::Benchmarking::Parallel.run(:gridfs_upload) - end - - # Requirement: A directory in Mongo::Benchmarking::DATA_PATH, called GRIDFS_MULTI, - # with the files used in this task. - # Requirement: Another directory in "#{Mongo::Benchmarking::DATA_PATH}/GRIDFS_MULTI" - # called 'output'. - task :gridfs_download do - puts "PARALLEL ETL BENCHMARK:: GRIDFS DOWNLOAD" - Mongo::Benchmarking::Parallel.run(:gridfs_download) - end - end -end +load 'profile/benchmarking/rake/tasks.rake' diff --git a/profile/benchmarking/bson.rb b/profile/benchmarking/bson.rb index 88cb979ab2..415e58114e 100644 --- a/profile/benchmarking/bson.rb +++ b/profile/benchmarking/bson.rb @@ -71,7 +71,7 @@ def score_for(type, percentiles, scale: 10_000) # @return [ Hash<:timings,:percentiles,:score> ] The test results for # the requested benchmark. def run(type, action) - timings = Benchmarking.without_gc { send(action, file_for(type)) } + timings = send(action, file_for(type)) percentiles = Percentiles.new(timings) score = score_for(type, percentiles) diff --git a/profile/benchmarking/helper.rb b/profile/benchmarking/helper.rb index 856cb7659b..70d4e34312 100644 --- a/profile/benchmarking/helper.rb +++ b/profile/benchmarking/helper.rb @@ -58,13 +58,21 @@ def parse_json(document) # iterating. # # @return [ Array ] the timings for each iteration - def benchmark(max_iterations: Benchmarking::TEST_REPETITIONS, min_time: 60, max_time: 5 * 60, &block) + def benchmark(max_iterations: Benchmarking::TEST_REPETITIONS, + min_time: 60, + max_time: 5 * 60, + progress: default_progress_callback, + &block) + progress ||= ->(state) {} # fallback to a no-op callback + progress[:start] + [].tap do |results| iteration_count = 0 cumulative_time = 0 loop do - timing = Benchmark.realtime(&block) + timing = without_gc { Benchmark.realtime(&block) } + progress[:step] iteration_count += 1 cumulative_time += timing @@ -78,6 +86,8 @@ def benchmark(max_iterations: Benchmarking::TEST_REPETITIONS, min_time: 60, max_ # number of iterations have been reached. break if cumulative_time >= min_time && iteration_count >= max_iterations end + + progress[:end] end end @@ -98,32 +108,6 @@ def report(results, indent: 0, percentiles: [ 10, 25, 50, 75, 90, 95, 98, 99 ]) end end - # A utility class for returning the list item at a given percentile - # value. - class Percentiles - # @return [ Array ] the sorted list of numbers to consider - attr_reader :list - - # Create a new Percentiles object that encapsulates the given list of - # numbers. - # - # @param [ Array ] list the list of numbers to considier - def initialize(list) - @list = list.sort - end - - # Finds and returns the element in the list that represents the given - # percentile value. - # - # @param [ Number ] percentile a number in the range [1,100] - # - # @return [ Number ] the element of the list for the given percentile. - def [](percentile) - i = (list.size * percentile / 100.0).ceil - 1 - list[i] - end - end - # Get the median of values in a list. # # @example Get the median. @@ -144,5 +128,37 @@ def without_gc ensure GC.enable end + + private + + # Returns the proc object (or nil) corresponding to the "PROGRESS" + # environment variable. + # + # @return [ Proc | nil ] the callback proc to use (or nil if none should + # be used) + def default_progress_callback + case ENV['PROGRESS'] + when '0', 'false', 'none' + nil + when nil, '1', 'true', 'minimal' + method(:minimal_progress_callback).to_proc + else + raise ArgumentError, "unsupported progress callback #{ENV['PROGRESS'].inspect}" + end + end + + # A minimal progress callback implementation, printing '|' when a benchmark + # starts and '.' for each iteration. + # + # @param [ :start | :step | :end ] state the current progress state + def minimal_progress_callback(state) + case state + when :start then print '|' + when :step then print '.' + when :end then puts + end + + $stdout.flush + end end end diff --git a/profile/benchmarking/rake/bson.rake b/profile/benchmarking/rake/bson.rake new file mode 100644 index 0000000000..4d5bdb1c04 --- /dev/null +++ b/profile/benchmarking/rake/bson.rake @@ -0,0 +1,122 @@ +# frozen_string_literal: true + +# rubocop:disable Layout/FirstHashElementIndentation + +desc 'Run the full BSON benchmarking suite' +task :bson do + puts 'BSON BENCHMARK SUITE' + Mongo::Benchmarking.report({ + bson: Mongo::Benchmarking::BSON.run_all( + flat: %i[ encode decode ], + deep: %i[ encode decode ], + full: %i[ encode decode ] + ) + }) +end + +namespace :bson do # rubocop:disable Metrics/BlockLength + # a convenience task for running all of the bson benchmark tasks; this is + # only useful for testing that they all work. + task test: %w[ + bson + bson:flat bson:flat:encode bson:flat:decode + bson:deep bson:deep:encode bson:deep:decode + bson:full bson:full:encode bson:full:decode + ] + + desc 'Learn how to run the BSON benchmarks' + task :help do + puts <<~HELP + The BSON micro benchmarks require a set of data files that are stored in + the specifications repository, here: + + https://github.com/mongodb/specifications/tree/master/source/benchmarking/data + + Download the `extended_bson.tgz` file and extract its contents. It should + contain a single folder (`extended_bson`) with several files in it. Move + those files to: + + #{Mongo::Benchmarking::DATA_PATH} + + Once there, you may run any of the BSON benchmarking tasks: + + $ rake benchmark:bson:flat:encode + + Tasks may be run in aggregate, as well, by specifying the namespace + directly: + + $ rake benchmark:bson:flat # runs all flat BSON benchmarks + $ rake benchmark:bson:deep # runs all deep BSON benchmarks + $ rake benchmark:bson:full # runs all full BSON benchmarks + # rake benchmark:bson # runs all BSON benchmarks + HELP + end + + desc 'Run the `flat` BSON benchmarking suite' + task :flat do + puts 'BSON BENCHMARK :: FLAT' + Mongo::Benchmarking.report({ + bson: Mongo::Benchmarking::BSON.run_all(flat: %i[ encode decode ]) + }) + end + + namespace :flat do + desc 'Run the `flat` encoding BSON benchmark' + task :encode do + puts 'BSON BENCHMARK :: FLAT :: ENCODE' + Mongo::Benchmarking.report({ bson: { flat: { encode: Mongo::Benchmarking::BSON.run(:flat, :encode) } } }) + end + + desc 'Run the `flat` decoding BSON benchmark' + task :decode do + puts 'BSON BENCHMARK :: FLAT :: DECODE' + Mongo::Benchmarking.report({ bson: { flat: { decode: Mongo::Benchmarking::BSON.run(:flat, :decode) } } }) + end + end + + desc 'Run the `deep` BSON benchmarking suite' + task :deep do + puts 'BSON BENCHMARK :: DEEP' + Mongo::Benchmarking.report({ + bson: Mongo::Benchmarking::BSON.run_all(deep: %i[ encode decode ]) + }) + end + + namespace :deep do + desc 'Run the `deep` encoding BSON benchmark' + task :encode do + puts 'BSON BENCHMARK :: DEEP :: ENCODE' + Mongo::Benchmarking.report({ bson: { deep: { encode: Mongo::Benchmarking::BSON.run(:deep, :encode) } } }) + end + + desc 'Run the `deep` decoding BSON benchmark' + task :decode do + puts 'BSON BENCHMARK :: DEEP :: DECODE' + Mongo::Benchmarking.report({ bson: { deep: { decode: Mongo::Benchmarking::BSON.run(:deep, :decode) } } }) + end + end + + desc 'Run the `full` BSON benchmarking suite' + task :full do + puts 'BSON BENCHMARK :: FULL' + Mongo::Benchmarking.report({ + bson: Mongo::Benchmarking::BSON.run_all({ full: %i[ encode decode ] }) + }) + end + + namespace :full do + desc 'Run the `full` encoding BSON benchmark' + task :encode do + puts 'BSON BENCHMARK :: FULL :: ENCODE' + Mongo::Benchmarking.report({ bson: { full: { encode: Mongo::Benchmarking::BSON.run(:full, :encode) } } }) + end + + desc 'Run the `full` decoding BSON benchmark' + task :decode do + puts 'BSON BENCHMARK :: FULL :: DECODE' + Mongo::Benchmarking.report({ bson: { full: { decode: Mongo::Benchmarking::BSON.run(:full, :decode) } } }) + end + end +end + +# rubocop:enable Layout/FirstHashElementIndentation diff --git a/profile/benchmarking/rake/multi_doc.rake b/profile/benchmarking/rake/multi_doc.rake new file mode 100644 index 0000000000..86c190ef1f --- /dev/null +++ b/profile/benchmarking/rake/multi_doc.rake @@ -0,0 +1,34 @@ +# frozen_string_literal: true +# rubocop:todo all + +namespace :multi_doc do + # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called TWEET.json. + task :find_many do + puts 'MULTI DOCUMENT BENCHMARK :: FIND MANY' + Mongo::Benchmarking::MultiDoc.run(:find_many) + end + + # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called SMALL_DOC.json. + task :bulk_insert_small do + puts 'MULTI DOCUMENT BENCHMARK :: BULK INSERT SMALL' + Mongo::Benchmarking::MultiDoc.run(:bulk_insert_small) + end + + # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called LARGE_DOC.json. + task :bulk_insert_large do + puts 'MULTI DOCUMENT BENCHMARK :: BULK INSERT LARGE' + Mongo::Benchmarking::MultiDoc.run(:bulk_insert_large) + end + + # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called GRIDFS_LARGE. + task :gridfs_upload do + puts 'MULTI DOCUMENT BENCHMARK :: GRIDFS UPLOAD' + Mongo::Benchmarking::MultiDoc.run(:gridfs_upload) + end + + # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called GRIDFS_LARGE. + task :gridfs_download do + puts 'MULTI DOCUMENT BENCHMARK :: GRIDFS DOWNLOAD' + Mongo::Benchmarking::MultiDoc.run(:gridfs_download) + end +end diff --git a/profile/benchmarking/rake/parallel.rake b/profile/benchmarking/rake/parallel.rake new file mode 100644 index 0000000000..98752e231e --- /dev/null +++ b/profile/benchmarking/rake/parallel.rake @@ -0,0 +1,36 @@ +# frozen_string_literal: true +# rubocop:todo all + +namespace :parallel do + # Requirement: A directory in Mongo::Benchmarking::DATA_PATH, called LDJSON_MULTI, + # with the files used in this task. + task :import do + puts 'PARALLEL ETL BENCHMARK :: IMPORT' + Mongo::Benchmarking::Parallel.run(:import) + end + + # Requirement: A directory in Mongo::Benchmarking::DATA_PATH, called LDJSON_MULTI, + # with the files used in this task. + # Requirement: Another directory in '#{Mongo::Benchmarking::DATA_PATH}/LDJSON_MULTI' + # called 'output'. + task :export do + puts 'PARALLEL ETL BENCHMARK :: EXPORT' + Mongo::Benchmarking::Parallel.run(:export) + end + + # Requirement: A directory in Mongo::Benchmarking::DATA_PATH, called GRIDFS_MULTI, + # with the files used in this task. + task :gridfs_upload do + puts 'PARALLEL ETL BENCHMARK :: GRIDFS UPLOAD' + Mongo::Benchmarking::Parallel.run(:gridfs_upload) + end + + # Requirement: A directory in Mongo::Benchmarking::DATA_PATH, called GRIDFS_MULTI, + # with the files used in this task. + # Requirement: Another directory in '#{Mongo::Benchmarking::DATA_PATH}/GRIDFS_MULTI' + # called 'output'. + task :gridfs_download do + puts 'PARALLEL ETL BENCHMARK :: GRIDFS DOWNLOAD' + Mongo::Benchmarking::Parallel.run(:gridfs_download) + end +end diff --git a/profile/benchmarking/rake/single_doc.rake b/profile/benchmarking/rake/single_doc.rake new file mode 100644 index 0000000000..803e28d593 --- /dev/null +++ b/profile/benchmarking/rake/single_doc.rake @@ -0,0 +1,27 @@ +# frozen_string_literal: true +# rubocop:todo all + +namespace :single_doc do + task :command do + puts 'SINGLE DOC BENCHMARK :: COMMAND' + Mongo::Benchmarking::SingleDoc.run(:command) + end + + # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called TWEET.json. + task :find_one do + puts 'SINGLE DOC BENCHMARK :: FIND ONE BY ID' + Mongo::Benchmarking::SingleDoc.run(:find_one) + end + + # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called SMALL_DOC.json. + task :insert_one_small do + puts 'SINGLE DOC BENCHMARK :: INSERT ONE SMALL DOCUMENT' + Mongo::Benchmarking::SingleDoc.run(:insert_one_small) + end + + # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called LARGE_DOC.json. + task :insert_one_large do + puts 'SINGLE DOC BENCHMARK :: INSERT ONE LARGE DOCUMENT' + Mongo::Benchmarking::SingleDoc.run(:insert_one_large) + end +end diff --git a/profile/benchmarking/rake/tasks.rake b/profile/benchmarking/rake/tasks.rake new file mode 100644 index 0000000000..7feae8d2ab --- /dev/null +++ b/profile/benchmarking/rake/tasks.rake @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +require_relative '../../benchmarking' + +# Some require data files, available from the drivers team. +# See the comments above each task for details. +namespace :benchmark do + %w[ bson single_doc multi_doc parallel ].each do |group| + load File.join(__dir__, "#{group}.rake") + end +end