diff --git a/CHANGELOG.md b/CHANGELOG.md index 0c39331a..5e3b79b2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,10 +7,16 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] +### Added +- `v2` boolean option for creating a `utube` tube (#228). It enables the + workaround for slow takes while working with busy tubes. + ### Fixed - Stuck in `INIT` state if an instance failed to enter the `running` mode in time (#226). This fix works only for Tarantool versions >= 2.10.0. +- Slow takes on busy `utube` tubes (#228). The workaround could be enabled by + passing the `v2 = true` option while creating the tube. ## [1.3.3] - 2023-09-13 diff --git a/queue/abstract.lua b/queue/abstract.lua index d8dcff88..01010174 100644 --- a/queue/abstract.lua +++ b/queue/abstract.lua @@ -297,7 +297,11 @@ function tube.drop(self) local space_name = tube[3] - box.space[space_name]:drop() + if self.raw.drop ~= nil then + self.raw:drop() + else + box.space[space_name]:drop() + end box.space._queue:delete{tube_name} -- drop queue queue.tube[tube_name] = nil diff --git a/queue/abstract/driver/utube.lua b/queue/abstract/driver/utube.lua index d3b7e006..cfdde216 100644 --- a/queue/abstract/driver/utube.lua +++ b/queue/abstract/driver/utube.lua @@ -53,6 +53,7 @@ function tube.create_space(space_name, opts) type = 'tree', parts = {2, str_type(), 3, str_type(), 1, num_type()} }) + space.v2 = opts.v2 return space end @@ -60,10 +61,29 @@ end function tube.new(space, on_task_change) validate_space(space) + local space_ready_name = space.name .. "_utube_ready" + local space_ready = box.space[space_ready_name] + if space.v2 and not space_ready then + -- Create a space for first ready tasks from each utube. + space_ready = box.schema.create_space(space_ready_name, space_opts) + space_ready:create_index('task_id', { + type = 'tree', + parts = {1, num_type()}, + unique = true + }) + space_ready:create_index('utube', { + type = 'tree', + parts = {2, str_type()}, + unique = true + }) + end + on_task_change = on_task_change or (function() end) local self = setmetatable({ space = space, + space_ready = space_ready, on_task_change = on_task_change, + v2 = space.v2 or false, }, { __index = method }) return self end @@ -73,37 +93,92 @@ function method.normalize_task(self, task) return task and task:transform(3, 1) end +local function put_ready(self, utube) + local taken = self.space.index.utube:min{state.TAKEN, utube} + if taken == nil or taken[2] ~= state.TAKEN then + local next_task = self.space.index.utube:min{state.READY, utube} + if next_task == nil or next_task[2] ~= state.READY then + return + end + pcall(self.space_ready.insert, self.space_ready, {next_task[1], utube}) + end +end + -- put task in space function method.put(self, data, opts) - local max - - -- Taking the maximum of the index is an implicit transactions, so it is - -- always done with 'read-confirmed' mvcc isolation level. - -- It can lead to errors when trying to make parallel 'put' calls with mvcc enabled. - -- It is hapenning because 'max' for several puts in parallel will be the same since - -- read confirmed isolation level makes visible all transactions that finished the commit. - -- To fix it we wrap it with box.begin/commit and set right isolation level. - -- Current fix does not resolve that bug in situations when we already are in transaction - -- since it will open nested transactions. - -- See https://github.com/tarantool/queue/issues/207 - -- See https://www.tarantool.io/ru/doc/latest/concepts/atomic/txn_mode_mvcc/ - - if box.cfg.memtx_use_mvcc_engine and (not box.is_in_txn()) then + local commit_requirements = box.cfg.memtx_use_mvcc_engine and + (not box.is_in_txn()) + if commit_requirements then box.begin({txn_isolation = 'read-committed'}) - max = self.space.index.task_id:max() - box.commit() - else - max = self.space.index.task_id:max() end + local max + + max = self.space.index.task_id:max() + local id = max and max[1] + 1 or 0 local task = self.space:insert{id, state.READY, tostring(opts.utube), data} + if self.v2 then + put_ready(self, task[3]) + end + + if commit_requirements then + box.commit() + end + self.on_task_change(task, 'put') return task end +local function take_ready(self) + local commit_requirements = box.cfg.memtx_use_mvcc_engine and + (not box.is_in_txn()) + + while true do + if commit_requirements then + box.begin({txn_isolation = 'read-committed'}) + end + + local task_ready = self.space_ready.index.task_id:min() + if task_ready == nil then + if commit_requirements then + box.commit() + end + return + end + + local id = task_ready[1] + local task = self.space:get(id) + + if task[2] == state.READY then + local taken = self.space.index.utube:min{state.TAKEN, task[3]} + + if taken == nil or taken[2] ~= state.TAKEN then + task = self.space:update(id, { { '=', 2, state.TAKEN } }) + + self.space_ready:delete(id) + + if commit_requirements then + box.commit() + end + + self.on_task_change(task, 'take') + return task + end + end + + if commit_requirements then + box.commit() + end + end +end + -- take task function method.take(self) + if self.v2 then + return take_ready(self) + end + for s, task in self.space.index.status:pairs(state.READY, { iterator = 'GE' }) do if task[2] ~= state.READY then @@ -141,41 +216,116 @@ function method.touch(self, id, ttr) error('utube queue does not support touch') end +local function delete_ready(self, id, utube) + self.space_ready:delete(id) + put_ready(self, utube) +end + -- delete task function method.delete(self, id) + local commit_requirements = box.cfg.memtx_use_mvcc_engine and + (not box.is_in_txn()) + if commit_requirements then + box.begin({txn_isolation = 'read-committed'}) + end + local task = self.space:get(id) self.space:delete(id) if task ~= nil then + if self.v2 then + if task[2] == state.TAKEN then + put_ready(self, task[3]) + elseif task[2] == state.READY then + delete_ready(self, id, task[3]) + end + end + task = task:transform(2, 1, state.DONE) local neighbour = self.space.index.utube:min{state.READY, task[3]} + + if commit_requirements then + box.commit() + end + self.on_task_change(task, 'delete') if neighbour then self.on_task_change(neighbour) end + return task + end + + if commit_requirements then + box.commit() end return task end -- release task function method.release(self, id, opts) + local commit_requirements = box.cfg.memtx_use_mvcc_engine and + (not box.is_in_txn()) + if commit_requirements then + box.begin({txn_isolation = 'read-committed'}) + end + local task = self.space:update(id, {{ '=', 2, state.READY }}) if task ~= nil then + if self.v2 then + -- We guarantee that release is called only on TAKEN tasks. + self.space_ready:insert{id, task[3]} + end + + if commit_requirements then + box.commit() + end + self.on_task_change(task, 'release') + return task + end + + if commit_requirements then + box.commit() end return task end -- bury task function method.bury(self, id) + local commit_requirements = box.cfg.memtx_use_mvcc_engine and + (not box.is_in_txn()) + if commit_requirements then + box.begin({txn_isolation = 'read-committed'}) + end + + local current_task = self.space:get{id} local task = self.space:update(id, {{ '=', 2, state.BURIED }}) if task ~= nil then + if self.v2 then + local status = current_task[2] + local ready_task = self.space_ready:get{task[1]} + if ready_task ~= nil then + delete_ready(self, id, task[3]) + elseif status == state.TAKEN then + put_ready(self, task[3]) + end + end + local neighbour = self.space.index.utube:min{state.READY, task[3]} + + if commit_requirements then + box.commit() + end + self.on_task_change(task, 'bury') if neighbour and neighbour[i_status] == state.READY then self.on_task_change(neighbour) end else + if commit_requirements then + box.commit() + end + self.on_task_change(task, 'bury') end return task @@ -183,7 +333,14 @@ end -- unbury several tasks function method.kick(self, count) + local commit_requirements = box.cfg.memtx_use_mvcc_engine and + (not box.is_in_txn()) + for i = 1, count do + if commit_requirements then + box.begin({txn_isolation = 'read-committed'}) + end + local task = self.space.index.status:min{ state.BURIED } if task == nil then return i - 1 @@ -193,6 +350,19 @@ function method.kick(self, count) end task = self.space:update(task[1], {{ '=', 2, state.READY }}) + if self.v2 then + local prev_task = self.space_ready.index.utube:get{task[3]} + if prev_task ~= nil then + delete_ready(self, prev_task[1], task[3]) + else + put_ready(self, task[3]) + end + end + + if commit_requirements then + box.commit() + end + self.on_task_change(task, 'kick') end return count @@ -210,6 +380,9 @@ end function method.truncate(self) self.space:truncate() + if self.v2 then + self.space_ready:truncate() + end end -- This driver has no background activity. @@ -222,4 +395,12 @@ function method.stop() return end +function method.drop(self) + self:stop() + box.space[self.space.name]:drop() + if self.v2 then + box.space[self.space_ready.name]:drop() + end +end + return tube diff --git a/t/030-utube.t b/t/030-utube.t index ffd92136..dd79cdd9 100755 --- a/t/030-utube.t +++ b/t/030-utube.t @@ -17,126 +17,142 @@ test:ok(rawget(box, 'space'), 'box started') test:ok(queue, 'queue is loaded') local tube = queue.create_tube('test', 'utube', { engine = engine }) +local tube_v2 = queue.create_tube('test_v2', 'utube', + { engine = engine, v2 = true }) local tube2 = queue.create_tube('test_stat', 'utube', { engine = engine }) +local tubev2 = queue.create_tube('test_stat_v2', 'utube', + { engine = engine, v2 = true }) test:ok(tube, 'test tube created') test:is(tube.name, 'test', 'tube.name') test:is(tube.type, 'utube', 'tube.type') test:test('Utube statistics', function(test) - test:plan(13) - tube2:put('stat_0') - tube2:put('stat_1') - tube2:put('stat_2') - tube2:put('stat_3') - tube2:put('stat_4') - tube2:delete(4) - tube2:take(.001) - tube2:release(0) - tube2:take(.001) - tube2:ack(0) - tube2:bury(1) - tube2:bury(2) - tube2:kick(1) - tube2:take(.001) - - local stats = queue.statistics('test_stat') - - -- check tasks statistics - test:is(stats.tasks.taken, 1, 'tasks.taken') - test:is(stats.tasks.buried, 1, 'tasks.buried') - test:is(stats.tasks.ready, 1, 'tasks.ready') - test:is(stats.tasks.done, 2, 'tasks.done') - test:is(stats.tasks.delayed, 0, 'tasks.delayed') - test:is(stats.tasks.total, 3, 'tasks.total') - - -- check function call statistics - test:is(stats.calls.delete, 1, 'calls.delete') - test:is(stats.calls.ack, 1, 'calls.ack') - test:is(stats.calls.take, 3, 'calls.take') - test:is(stats.calls.kick, 1, 'calls.kick') - test:is(stats.calls.bury, 2, 'calls.bury') - test:is(stats.calls.put, 5, 'calls.put') - test:is(stats.calls.release, 1, 'calls.release') + test:plan(26) + for _, tube_stat in ipairs({tube2, tubev2}) do + tube_stat:put('stat_0') + tube_stat:put('stat_1') + tube_stat:put('stat_2') + tube_stat:put('stat_3') + tube_stat:put('stat_4') + tube_stat:delete(4) + tube_stat:take(.001) + tube_stat:release(0) + tube_stat:take(.001) + tube_stat:ack(0) + tube_stat:bury(1) + tube_stat:bury(2) + tube_stat:kick(1) + tube_stat:take(.001) + + local stats = queue.statistics(tube_stat.name) + + -- check tasks statistics + test:is(stats.tasks.taken, 1, 'tasks.taken') + test:is(stats.tasks.buried, 1, 'tasks.buried') + test:is(stats.tasks.ready, 1, 'tasks.ready') + test:is(stats.tasks.done, 2, 'tasks.done') + test:is(stats.tasks.delayed, 0, 'tasks.delayed') + test:is(stats.tasks.total, 3, 'tasks.total') + + -- check function call statistics + test:is(stats.calls.delete, 1, 'calls.delete') + test:is(stats.calls.ack, 1, 'calls.ack') + test:is(stats.calls.take, 3, 'calls.take') + test:is(stats.calls.kick, 1, 'calls.kick') + test:is(stats.calls.bury, 2, 'calls.bury') + test:is(stats.calls.put, 5, 'calls.put') + test:is(stats.calls.release, 1, 'calls.release') + end end) test:test('Easy put/take/ack', function(test) - test:plan(12) - - test:ok(tube:put(123, {utube = 1}), 'task was put') - test:ok(tube:put(345, {utube = 1}), 'task was put') - local task = tube:take() - test:ok(task, 'task was taken') - test:is(task[2], state.TAKEN, 'task status') - test:is(task[3], 123, 'task.data') - test:ok(tube:take(.1) == nil, 'second task was not taken (the same tube)') - - task = tube:ack(task[1]) - test:ok(task, 'task was acked') - test:is(task[2], '-', 'task status') - test:is(task[3], 123, 'task.data') - - task = tube:take(.1) - test:ok(task, 'task2 was taken') - test:is(task[3], 345, 'task.data') - test:is(task[2], state.TAKEN, 'task.status') + test:plan(24) + + for _, test_tube in ipairs({tube, tube_v2}) do + test:ok(test_tube:put(123, {utube = 1}), 'task was put') + test:ok(test_tube:put(345, {utube = 1}), 'task was put') + local task = test_tube:take() + test:ok(task, 'task was taken') + test:is(task[2], state.TAKEN, 'task status') + test:is(task[3], 123, 'task.data') + test:ok(test_tube:take(.1) == nil, 'second task was not taken (the same tube)') + + task = test_tube:ack(task[1]) + test:ok(task, 'task was acked') + test:is(task[2], '-', 'task status') + test:is(task[3], 123, 'task.data') + + task = test_tube:take(.1) + test:ok(task, 'task2 was taken') + test:is(task[3], 345, 'task.data') + test:is(task[2], state.TAKEN, 'task.status') + end end) test:test('ack in utube', function(test) - test:plan(8) - - test:ok(tube:put(123, {utube = 'abc'}), 'task was put') - test:ok(tube:put(345, {utube = 'abc'}), 'task was put') - - local state = 0 - fiber.create(function() - fiber.sleep(0.1) - local taken = tube:take() - test:ok(taken, 'second task was taken') - test:is(taken[3], 345, 'task.data') - state = state + 1 - end) - - local taken = tube:take(.1) - state = 1 - test:ok(taken, 'task was taken') - test:is(taken[3], 123, 'task.data') - fiber.sleep(0.3) - test:is(state, 1, 'state was not changed') - tube:ack(taken[1]) - fiber.sleep(0.2) - test:is(state, 2, 'state was changed') + test:plan(16) + + for _, test_tube in ipairs({tube, tube_v2}) do + test:ok(test_tube:put(123, {utube = 'abc'}), 'task was put') + test:ok(test_tube:put(345, {utube = 'abc'}), 'task was put') + + local state = 0 + fiber.create(function() + fiber.sleep(0.1) + local taken = test_tube:take() + test:ok(taken, 'second task was taken') + test:is(taken[3], 345, 'task.data') + state = state + 1 + end) + + local taken = test_tube:take(.1) + state = 1 + test:ok(taken, 'task was taken') + test:is(taken[3], 123, 'task.data') + fiber.sleep(0.3) + test:is(state, 1, 'state was not changed') + test_tube:ack(taken[1]) + fiber.sleep(0.2) + test:is(state, 2, 'state was changed') + end end) test:test('bury in utube', function(test) - test:plan(8) - - test:ok(tube:put(567, {utube = 'cde'}), 'task was put') - test:ok(tube:put(789, {utube = 'cde'}), 'task was put') - - local state = 0 - fiber.create(function() - fiber.sleep(0.1) - local taken = tube:take() - test:ok(taken, 'second task was taken') - test:is(taken[3], 789, 'task.data') - state = state + 1 - end) - - local taken = tube:take(.1) - state = 1 - test:ok(taken, 'task was taken') - test:is(taken[3], 567, 'task.data') - fiber.sleep(0.3) - test:is(state, 1, 'state was not changed') - tube:bury(taken[1]) - fiber.sleep(0.2) - test:is(state, 2, 'state was changed') + test:plan(16) + + for _, test_tube in ipairs({tube, tube_v2}) do + test:ok(test_tube:put(567, {utube = 'cde'}), 'task was put') + test:ok(test_tube:put(789, {utube = 'cde'}), 'task was put') + + local state = 0 + fiber.create(function() + fiber.sleep(0.1) + local taken = test_tube:take() + test:ok(taken, 'second task was taken') + test:is(taken[3], 789, 'task.data') + state = state + 1 + end) + + local taken = test_tube:take(.1) + state = 1 + test:ok(taken, 'task was taken') + test:is(taken[3], 567, 'task.data') + fiber.sleep(0.3) + test:is(state, 1, 'state was not changed') + test_tube:bury(taken[1]) + fiber.sleep(0.2) + test:is(state, 2, 'state was changed') + end end) test:test('instant bury', function(test) - test:plan(1) + test:plan(2) tube:put(1, {ttr=60}) local taken = tube:take(.1) test:is(tube:bury(taken[1])[2], '!', 'task is buried') + + tube_v2:put(1, {ttr=60}) + local taken = tube_v2:take(.1) + test:is(tube_v2:bury(taken[1])[2], '!', 'task is buried') end) test:test('if_not_exists test', function(test) diff --git a/t/benchmark/busy_utubes.lua b/t/benchmark/busy_utubes.lua new file mode 100644 index 00000000..730367a0 --- /dev/null +++ b/t/benchmark/busy_utubes.lua @@ -0,0 +1,94 @@ +#!/usr/bin/env tarantool + +local clock = require('clock') +local os = require('os') +local fiber = require('fiber') +local queue = require('queue') + +-- Set the number of consumers. +local consumers_count = 10 +-- Set the number of tasks processed by one consumer per iteration. +local batch_size = 150000 + +local barrier = fiber.cond() +local wait_count = 0 + +box.cfg() + +local test_queue = queue.create_tube('test_queue', 'utube', + {temporary = true, v2 = true}) + +local function prepare_tasks() + local test_data = 'test data' + + for i = 1, consumers_count do + for _ = 1, batch_size do + test_queue:put(test_data, {utube = tostring(i)}) + end + end +end + +local function prepare_consumers() + local consumers = {} + + -- Make half the utubes busy. + for _ = 1, consumers_count / 2 do + test_queue:take() + end + + for i = 1, consumers_count / 2 do + consumers[i] = fiber.create(function() + wait_count = wait_count + 1 + -- Wait for all consumers to start. + barrier:wait() + + -- Ack the tasks. + for _ = 1, batch_size do + local task = test_queue:take() + test_queue:ack(task[1]) + end + + wait_count = wait_count + 1 + end) + end + + return consumers +end + +local function multi_consumer_bench() + --- Wait for all consumer fibers. + local wait_all = function() + while (wait_count ~= consumers_count / 2) do + fiber.yield() + end + wait_count = 0 + end + + fiber.set_max_slice(100) + + prepare_tasks() + + -- Wait for all consumers to start. + local consumers = prepare_consumers() + wait_all() + + -- Start timing of task confirmation. + local start_ack_time = clock.proc64() + barrier:broadcast() + -- Wait for all tasks to be acked. + wait_all() + -- Complete the timing of task confirmation. + local complete_time = clock.proc64() + + -- Print the result in milliseconds. + print(string.format("Time it takes to confirm the tasks: %i", + tonumber((complete_time - start_ack_time) / 10^6))) +end + +-- Start benchmark. +multi_consumer_bench() + +-- Cleanup. +test_queue:drop() + +os.exit(0) diff --git a/t/benchmark/many_utubes.lua b/t/benchmark/many_utubes.lua new file mode 100644 index 00000000..dbf78c69 --- /dev/null +++ b/t/benchmark/many_utubes.lua @@ -0,0 +1,86 @@ +#!/usr/bin/env tarantool + +local clock = require('clock') +local os = require('os') +local fiber = require('fiber') +local queue = require('queue') + +-- Set the number of consumers. +local consumers_count = 30000 + +local barrier = fiber.cond() +local wait_count = 0 + +box.cfg() + +local test_queue = queue.create_tube('test_queue', 'utube', + {temporary = true, v2 = true}) + +local function prepare_tasks() + local test_data = 'test data' + + for i = 1, consumers_count do + test_queue:put(test_data, {utube = tostring(i)}) + end +end + +local function prepare_consumers() + local consumers = {} + + for i = 1, consumers_count do + consumers[i] = fiber.create(function() + wait_count = wait_count + 1 + -- Wait for all consumers to start. + barrier:wait() + + -- Ack the task. + local task = test_queue:take() + test_queue:ack(task[1]) + + wait_count = wait_count + 1 + end) + end + + return consumers +end + +local function multi_consumer_bench() + --- Wait for all consumer fibers. + local wait_all = function() + while (wait_count ~= consumers_count) do + fiber.yield() + end + wait_count = 0 + end + + fiber.set_max_slice(100) + + -- Wait for all consumers to start. + local consumers = prepare_consumers() + wait_all() + + -- Start timing creation of tasks. + local start_put_time = clock.proc64() + prepare_tasks() + -- Start timing of task confirmation. + local start_ack_time = clock.proc64() + barrier:broadcast() + -- Wait for all tasks to be acked. + wait_all() + -- Complete the timing of task confirmation. + local complete_time = clock.proc64() + + -- Print results in milliseconds. + print(string.format("Time it takes to fill the queue: %i", + tonumber((start_ack_time - start_put_time) / 10^6))) + print(string.format("Time it takes to confirm the tasks: %i", + tonumber((complete_time - start_ack_time) / 10^6))) +end + +-- Start benchmark. +multi_consumer_bench() + +-- Cleanup. +test_queue:drop() + +os.exit(0)