Skip to content

Commit 0e86fbd

Browse files
nshylocker
authored andcommitted
misc: cleanup usage of pthread_cancel
At last we can drop usage of pthread_cancel and associated functions. And remove related leak suppressions. Let's keep memory protection disabling under ASAN. Otherwise leak sanitizer may misbehave on Tarantool panic as below. ``` # Tracer caught signal 11: addr=0x705236d1e000 pc=0x57b7605b10d0 sp=0x705232a00ca0\ # ==1022907==LeakSanitizer has encountered a fatal error.\ # ==1022907==HINT: For debugging, try setting environment variable LSAN_OPTIONS=verbosity=1:log_threads=1\ # ==1022907==HINT: LeakSanitizer does not work under ptrace (strace, gdb, etc)", ``` Let's also add missing pipe/endpoint destroy in wal while at it. Close tarantool#8423 NO_CHANGELOG=internal NO_DOC=internal
1 parent a83d5e3 commit 0e86fbd

File tree

13 files changed

+9
-325
lines changed

13 files changed

+9
-325
lines changed

asan/asan.supp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,3 @@
44
# File format:
55
#fun:*
66
#src:*
7-
# TODO(gh-8423) expected to be removed when the issue is solved
8-
fun:ev_async_stop

asan/lsan.supp

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -61,12 +61,6 @@ leak:libc.so*
6161
# source: src/lib/salad/mhash.h
6262
leak:mh_i32ptr_new
6363

64-
# test: replication/gh-3637-misc-error-on-replica-auth-fail.test.lua
65-
# source: src/lib/core/coio_task.c
66-
leak:coio_on_call
67-
# source: src/lib/salad/mhash.h
68-
leak:mh_i64ptr_new
69-
7064
# test: sql-tap/gh2250-trigger-chain-limit.test.lua
7165
# source: src/lib/core/exception.cc
7266
leak:Exception::operator new
@@ -75,10 +69,6 @@ leak:Exception::operator new
7569
# source: src/lib/core/fiber.h
7670
leak:fiber_cxx_invoke
7771

78-
# test: vinyl/recover.test.lua
79-
# source: src/lib/core/fiber.c
80-
leak:cord_costart_thread_func
81-
8272
# TODO(gh-9213): remove when the issue is fixed
8373
leak:luaT_push_key_def
8474

src/box/wal.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -597,6 +597,7 @@ wal_free(void)
597597
struct wal_writer *writer = &wal_writer_singleton;
598598

599599
cbus_stop_loop(&writer->wal_pipe);
600+
cpipe_destroy(&writer->wal_pipe);
600601

601602
if (cord_join(&writer->cord)) {
602603
/* We can't recover from this in any reasonable way. */
@@ -1306,6 +1307,7 @@ wal_writer_f(va_list ap)
13061307
wal_xlog_close(&vy_log_writer.xlog);
13071308

13081309
cpipe_destroy(&writer->tx_prio_pipe);
1310+
cbus_endpoint_destroy(&endpoint, cbus_process);
13091311
return 0;
13101312
}
13111313

src/lib/core/cbus.c

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -132,13 +132,6 @@ cbus_endpoint_poison_f(struct cmsg *msg)
132132
void
133133
cpipe_destroy(struct cpipe *pipe)
134134
{
135-
/*
136-
* The thread should not be canceled while mutex is locked.
137-
* And everything else must be protected for consistency.
138-
*/
139-
int old_cancel_state;
140-
tt_pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &old_cancel_state);
141-
142135
ev_async_stop(pipe->producer, &pipe->flush_input);
143136

144137
static const struct cmsg_hop route[1] = {
@@ -171,9 +164,6 @@ cpipe_destroy(struct cpipe *pipe)
171164
*/
172165
ev_async_send(endpoint->consumer, &endpoint->async);
173166
tt_pthread_mutex_unlock(&endpoint->mutex);
174-
175-
tt_pthread_setcancelstate(old_cancel_state, NULL);
176-
177167
TRASH(pipe);
178168
}
179169

@@ -291,17 +281,6 @@ cpipe_flush_cb(ev_loop *loop, struct ev_async *watcher, int events)
291281
/* Trigger task processing when the queue becomes non-empty. */
292282
bool output_was_empty;
293283

294-
/*
295-
* We need to set a thread cancellation guard, because
296-
* another thread may cancel the current thread
297-
* (write() is a cancellation point in ev_async_send)
298-
* and the activation of the ev_async watcher
299-
* through ev_async_send will fail.
300-
*/
301-
302-
int old_cancel_state;
303-
tt_pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &old_cancel_state);
304-
305284
tt_pthread_mutex_lock(&endpoint->mutex);
306285
output_was_empty = stailq_empty(&endpoint->output);
307286
/** Flush input */
@@ -315,8 +294,6 @@ cpipe_flush_cb(ev_loop *loop, struct ev_async *watcher, int events)
315294

316295
ev_async_send(endpoint->consumer, &endpoint->async);
317296
}
318-
319-
tt_pthread_setcancelstate(old_cancel_state, NULL);
320297
}
321298

322299
void

src/lib/core/fiber.c

Lines changed: 2 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -209,9 +209,8 @@ fiber_mprotect(void *addr, size_t len, int prot)
209209
goto error;
210210
}
211211
/*
212-
* TODO(gh-8423) Disable mprotect temporarily. Leak sanitizer does not work
213-
* well if memory is protected. We fail to remove protection due to the use of
214-
* `cord_cancel_and_join` to cancel cords.
212+
* If we panic then fiber stacks remain protected which cause leak sanitizer
213+
* failures. Disable memory protection under ASAN.
215214
*/
216215
#ifndef ENABLE_ASAN
217216
if (mprotect(addr, len, prot) != 0)
@@ -2241,29 +2240,6 @@ cord_is_main(void)
22412240
return cord() == &main_cord;
22422241
}
22432242

2244-
void
2245-
cord_cancel_and_join(struct cord *cord)
2246-
{
2247-
assert(cord->id != 0);
2248-
tt_pthread_cancel(cord->id);
2249-
if (tt_pthread_join(cord->id, NULL) != 0)
2250-
panic("failed to join a canceled thread");
2251-
int old_cord_count = pm_atomic_fetch_sub(&cord_count, 1);
2252-
assert(old_cord_count > 0);
2253-
(void)old_cord_count;
2254-
/*
2255-
* Can't destroy the cord safely. The cancellation could even happen
2256-
* before the cord was properly initialized in its own thread. It might
2257-
* be fixed if cord would be initialized before its thread is started.
2258-
*
2259-
* Also obviously even if the creation would be fine, the destruction
2260-
* can't free everything. The cord could have some resources allocated
2261-
* on the heap with pointers not stored anywhere in struct cord - they
2262-
* can't be possibly located.
2263-
*/
2264-
memset(cord, 0, sizeof(*cord));
2265-
}
2266-
22672243
static NOINLINE int
22682244
check_stack_direction(void *prev_stack_frame)
22692245
{

src/lib/core/fiber.h

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -969,15 +969,6 @@ cord_is_main(void);
969969
void
970970
cord_collect_garbage(struct cord *cord);
971971

972-
/**
973-
* Pthread-cancel the thread and join it in a blocking way, without yielding.
974-
* That way is the only possible one if the event loop is already destroyed.
975-
* Should only be used as an emergency, because all the cord resources simply
976-
* leak.
977-
*/
978-
void
979-
cord_cancel_and_join(struct cord *cord);
980-
981972
/**
982973
* Return slab_cache suitable to use with tarantool/small library
983974
*/

src/tt_pthread.h

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -287,12 +287,6 @@
287287
tt_pthread_error(e__); \
288288
})
289289

290-
#define tt_pthread_cancel(thread) \
291-
({ int e__ = pthread_cancel(thread); \
292-
assert(e__ == 0 || e__ == ESRCH); \
293-
e__; \
294-
})
295-
296290
#define tt_pthread_key_create(key, dtor) \
297291
({ int e__ = pthread_key_create(key, dtor); \
298292
tt_pthread_error(e__); \
@@ -310,11 +304,6 @@
310304

311305
#define tt_pthread_getspecific(key) pthread_getspecific(key)
312306

313-
#define tt_pthread_setcancelstate(state, oldstate) \
314-
({ int e__ = pthread_setcancelstate(state, oldstate);\
315-
tt_pthread_error(e__); \
316-
})
317-
318307
/** Set the current thread's name
319308
*/
320309
static inline void

test/box-luatest/gh_3211_per_module_log_level_test.lua

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -42,16 +42,12 @@ g2.before_each(function(cg)
4242
end, {cg.params.cfg_type})
4343
end)
4444

45-
local function drop_server(cg)
46-
-- TODO(gh-8423): Remove this workaround.
47-
-- If log level is 'debug' during server:drop(), the test may hang on macOS
48-
-- (see gh-8420).
49-
cg.server:update_box_cfg({log_level = 'info',
50-
log_modules = {tarantool = 'info'}})
45+
g1.after_each(function(cg)
5146
cg.server:drop()
52-
end
53-
g1.after_each(drop_server)
54-
g2.after_each(drop_server)
47+
end)
48+
g2.after_each(function(cg)
49+
cg.server:drop()
50+
end)
5551

5652
-- Test log.new{...}
5753
g1.test_log_new = function(cg)

test/unit/CMakeLists.txt

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -261,15 +261,6 @@ create_unit_test(PREFIX cbus_call
261261
LIBRARIES core unit stat
262262
)
263263

264-
include(CheckSymbolExists)
265-
check_symbol_exists(__GLIBC__ features.h GLIBC_USED)
266-
if (GLIBC_USED)
267-
create_unit_test(PREFIX cbus_hang
268-
SOURCES cbus_hang.c core_test_utils.c
269-
LIBRARIES core unit stat
270-
)
271-
endif ()
272-
273264
create_unit_test(PREFIX coio
274265
SOURCES coio.cc core_test_utils.c
275266
LIBRARIES core eio bit uri unit

0 commit comments

Comments
 (0)