Implement correct RLIMIT_STACK handling

* miscfuncs.cc (struct pthread_wrapper_arg): Add member guardsize.
        (pthread_wrapper): Set thread stack guarantee according to guardsize.
        Tweak assembler code so that $rax/$eax is not required by GCC to
        prepare the wrapper_arg value.
        (CygwinCreateThread): Fix deadzone handling.  Drop setting a "POSIX"
        guardpage (aka page w/ PAGE_NOACCESS).  Always use Windows guard
        pages instead.  On post-XP systems (providing SetThreadStackGuarantee)
        always set up stack Windows like with reserved/commited areas and
        movable guard pages.  Only on XP set up stack fully commited if the
        guardpage size is not the default system guardpage size.
        Fill out pthread_wrapper_arg::guardsize.  Improve comments.
        * resource.cc: Implement RSTACK_LIMIT Linux-like.
        (DEFAULT_STACKSIZE): New macro.
        (DEFAULT_STACKGUARD): Ditto.
        (rlimit_stack_guard): New muto.
        (rlimit_stack): New global variable holding current RSTACK_LIMIT values.
        (__set_rlimit_stack): Set rlimit_stack under lock.
        (__get_rlimit_stack): Initialize rlimit_stack from executable header
        and return rlimit_stack values under lock.
        (get_rlimit_stack): Filtering function to return useful default
        stacksize from rlimit_stack.rlim_cur value.
        (getrlimit): Call __get_rlimit_stack in RLIMIT_STACK case.
        (setrlimit): Call __set_rlimit_stack in RLIMIT_STACK case.
        * thread.cc (pthread::create): Fetch default stacksize calling
        get_rlimit_stack.
        (pthread_attr::pthread_attr): Fetch default guardsize calling
        wincap.def_guard_page_size.
        (pthread_attr_getstacksize): Fetch default stacksize calling
        get_rlimit_stack.
        * thread.h (PTHREAD_DEFAULT_STACKSIZE): Remove.
        (PTHREAD_DEFAULT_GUARDSIZE): Remove.
        (get_rlimit_stack): Declare.

Signed-off-by: Corinna Vinschen <corinna@vinschen.de>
This commit is contained in:
Corinna Vinschen 2015-07-05 15:51:37 +02:00
parent e426213a88
commit a54bc198b1
7 changed files with 172 additions and 71 deletions

View file

@ -1,3 +1,38 @@
2015-07-05 Corinna Vinschen <corinna@vinschen.de>
* miscfuncs.cc (struct pthread_wrapper_arg): Add member guardsize.
(pthread_wrapper): Set thread stack guarantee according to guardsize.
Tweak assembler code so that $rax/$eax is not required by GCC to
prepare the wrapper_arg value.
(CygwinCreateThread): Fix deadzone handling. Drop setting a "POSIX"
guardpage (aka page w/ PAGE_NOACCESS). Always use Windows guard
pages instead. On post-XP systems (providing SetThreadStackGuarantee)
always set up stack Windows like with reserved/commited areas and
movable guard pages. Only on XP set up stack fully commited if the
guardpage size is not the default system guardpage size.
Fill out pthread_wrapper_arg::guardsize. Improve comments.
* resource.cc: Implement RSTACK_LIMIT Linux-like.
(DEFAULT_STACKSIZE): New macro.
(DEFAULT_STACKGUARD): Ditto.
(rlimit_stack_guard): New muto.
(rlimit_stack): New global variable holding current RSTACK_LIMIT values.
(__set_rlimit_stack): Set rlimit_stack under lock.
(__get_rlimit_stack): Initialize rlimit_stack from executable header
and return rlimit_stack values under lock.
(get_rlimit_stack): Filtering function to return useful default
stacksize from rlimit_stack.rlim_cur value.
(getrlimit): Call __get_rlimit_stack in RLIMIT_STACK case.
(setrlimit): Call __set_rlimit_stack in RLIMIT_STACK case.
* thread.cc (pthread::create): Fetch default stacksize calling
get_rlimit_stack.
(pthread_attr::pthread_attr): Fetch default guardsize calling
wincap.def_guard_page_size.
(pthread_attr_getstacksize): Fetch default stacksize calling
get_rlimit_stack.
* thread.h (PTHREAD_DEFAULT_STACKSIZE): Remove.
(PTHREAD_DEFAULT_GUARDSIZE): Remove.
(get_rlimit_stack): Declare.
2015-07-05 Corinna Vinschen <corinna@vinschen.de>
* fhandler_process.cc (heap_info::heap_info): Disable fetching heap info

View file

@ -560,6 +560,7 @@ struct pthread_wrapper_arg
PBYTE stackaddr;
PBYTE stackbase;
PBYTE stacklimit;
ULONG guardsize;
};
DWORD WINAPI
@ -592,7 +593,14 @@ pthread_wrapper (PVOID arg)
The below assembler code will release the OS stack after switching to our
new stack. */
wrapper_arg.stackaddr = dealloc_addr;
/* On post-XP systems, set thread stack guarantee matching the guardsize.
Note that the guardsize is one page bigger than the guarantee. */
if (wincap.has_set_thread_stack_guarantee ()
&& wrapper_arg.guardsize > wincap.def_guard_page_size ())
{
wrapper_arg.guardsize -= wincap.page_size ();
SetThreadStackGuarantee (&wrapper_arg.guardsize);
}
/* Initialize new _cygtls. */
_my_tls.init_thread (wrapper_arg.stackbase - CYGTLS_PADSIZE,
(DWORD (*)(void*, void*)) wrapper_arg.func);
@ -632,7 +640,7 @@ pthread_wrapper (PVOID arg)
#endif
#ifdef __x86_64__
__asm__ ("\n\
movq %[WRAPPER_ARG], %%rbx # Load &wrapper_arg into rbx \n\
leaq %[WRAPPER_ARG], %%rbx # Load &wrapper_arg into rbx \n\
movq (%%rbx), %%r12 # Load thread func into r12 \n\
movq 8(%%rbx), %%r13 # Load thread arg into r13 \n\
movq 16(%%rbx), %%rcx # Load stackaddr into rcx \n\
@ -652,11 +660,11 @@ pthread_wrapper (PVOID arg)
# register r13 and then just call the function. \n\
movq %%r13, %%rcx # Move thread arg to 1st arg reg\n\
call *%%r12 # Call thread func \n"
: : [WRAPPER_ARG] "r" (&wrapper_arg),
: : [WRAPPER_ARG] "o" (wrapper_arg),
[CYGTLS] "i" (CYGTLS_PADSIZE));
#else
__asm__ ("\n\
movl %[WRAPPER_ARG], %%ebx # Load &wrapper_arg into ebx \n\
leal %[WRAPPER_ARG], %%ebx # Load &wrapper_arg into ebx \n\
movl (%%ebx), %%eax # Load thread func into eax \n\
movl 4(%%ebx), %%ecx # Load thread arg into ecx \n\
movl 8(%%ebx), %%edx # Load stackaddr into edx \n\
@ -683,7 +691,7 @@ pthread_wrapper (PVOID arg)
# stack in the expected spot. \n\
popl %%eax # Pop thread_func address \n\
call *%%eax # Call thread func \n"
: : [WRAPPER_ARG] "r" (&wrapper_arg),
: : [WRAPPER_ARG] "o" (wrapper_arg),
[CYGTLS] "i" (CYGTLS_PADSIZE));
#endif
/* pthread::thread_init_wrapper calls pthread::exit, which
@ -777,7 +785,8 @@ CygwinCreateThread (LPTHREAD_START_ROUTINE thread_func, PVOID thread_arg,
if (stackaddr)
{
/* If the application provided the stack, just use it. */
/* If the application provided the stack, just use it. There won't
be any stack overflow handling! */
wrapper_arg->stackaddr = (PBYTE) stackaddr;
wrapper_arg->stackbase = (PBYTE) stackaddr + stacksize;
}
@ -790,10 +799,8 @@ CygwinCreateThread (LPTHREAD_START_ROUTINE thread_func, PVOID thread_arg,
real_guardsize = roundup2 (guardsize, wincap.page_size ());
/* Add the guardsize to the stacksize */
real_stacksize += real_guardsize;
/* If we use the default Windows guardpage method, we have to take
the 2 pages dead zone into account. */
if (real_guardsize == wincap.page_size ())
real_stacksize += 2 * wincap.page_size ();
/* Take dead zone page into account, which always stays uncommited. */
real_stacksize += wincap.page_size ();
/* Now roundup the result to the next allocation boundary. */
real_stacksize = roundup2 (real_stacksize,
wincap.allocation_granularity ());
@ -811,46 +818,63 @@ CygwinCreateThread (LPTHREAD_START_ROUTINE thread_func, PVOID thread_arg,
#endif
if (!real_stackaddr)
return NULL;
/* Set up committed region. Two cases: */
if (real_guardsize != wincap.page_size ())
/* Set up committed region. We have two cases: */
if (!wincap.has_set_thread_stack_guarantee ()
&& real_guardsize != wincap.def_guard_page_size ())
{
/* If guardsize is set to something other than the page size, we
commit the entire stack and, if guardsize is > 0, we set up a
POSIX guardpage. We don't set up a Windows guardpage. */
if (!VirtualAlloc (real_stackaddr, real_guardsize, MEM_COMMIT,
PAGE_NOACCESS))
/* If guardsize is set to something other than the default guard page
size, and if we're running on Windows XP 32 bit, we commit the
entire stack, and, if guardsize is > 0, set up a guard page. */
real_stacklimit = (PBYTE) real_stackaddr + wincap.page_size ();
if (real_guardsize
&& !VirtualAlloc (real_stacklimit, real_guardsize, MEM_COMMIT,
PAGE_READWRITE | PAGE_GUARD))
goto err;
real_stacklimit = (PBYTE) real_stackaddr + real_guardsize;
if (!VirtualAlloc (real_stacklimit, real_stacksize - real_guardsize,
real_stacklimit += real_guardsize;
if (!VirtualAlloc (real_stacklimit, real_stacksize - real_guardsize
- wincap.page_size (),
MEM_COMMIT, PAGE_READWRITE))
goto err;
}
else
{
/* If guardsize is exactly the page_size, we can assume that the
application will behave Windows conformant in terms of stack usage.
We can especially assume that it never allocates more than one
page at a time (alloca/_chkstk). Therefore, this is the default
case which allows a Windows compatible stack setup with a
reserved region, a guard page, and a commited region. We don't
need to set up a POSIX guardpage since Windows already handles
stack overflow: Trying to extend the stack into the last three
pages of the stack results in a SEGV.
We always commit 64K here, starting with the guardpage. */
/* Otherwise we set up the stack like the OS does, with a reserved
region, the guard pages, and a commited region. We commit the
stack commit size from the executable header, but at least
PTHREAD_STACK_MIN (64K). */
static ULONG exe_commitsize;
if (!exe_commitsize)
{
PIMAGE_DOS_HEADER dosheader;
PIMAGE_NT_HEADERS ntheader;
dosheader = (PIMAGE_DOS_HEADER) GetModuleHandle (NULL);
ntheader = (PIMAGE_NT_HEADERS)
((PBYTE) dosheader + dosheader->e_lfanew);
exe_commitsize = ntheader->OptionalHeader.SizeOfStackCommit;
exe_commitsize = roundup2 (exe_commitsize, wincap.page_size ());
}
ULONG commitsize = exe_commitsize;
if (commitsize > real_stacksize - real_guardsize
- wincap.page_size ())
commitsize = real_stacksize - real_guardsize - wincap.page_size ();
else if (commitsize < PTHREAD_STACK_MIN)
commitsize = PTHREAD_STACK_MIN;
real_stacklimit = (PBYTE) real_stackaddr + real_stacksize
- wincap.allocation_granularity ();
if (!VirtualAlloc (real_stacklimit, wincap.page_size (), MEM_COMMIT,
PAGE_READWRITE | PAGE_GUARD))
- commitsize - real_guardsize;
if (!VirtualAlloc (real_stacklimit, real_guardsize,
MEM_COMMIT, PAGE_READWRITE | PAGE_GUARD))
goto err;
real_stacklimit += wincap.page_size ();
if (!VirtualAlloc (real_stacklimit, wincap.allocation_granularity ()
- wincap.page_size (), MEM_COMMIT,
real_stacklimit += real_guardsize;
if (!VirtualAlloc (real_stacklimit, commitsize, MEM_COMMIT,
PAGE_READWRITE))
goto err;
}
wrapper_arg->stackaddr = (PBYTE) real_stackaddr;
wrapper_arg->stackbase = (PBYTE) real_stackaddr + real_stacksize;
wrapper_arg->stacklimit = real_stacklimit;
wrapper_arg->guardsize = real_guardsize;
}
/* Use the STACK_SIZE_PARAM_IS_A_RESERVATION parameter so only the
minimum size for a thread stack is reserved by the OS. Note that we

View file

@ -1,6 +1,12 @@
What's new:
-----------
- Handle pthread stacksizes as in GLibc: Default to RLIMIT_STACK resource.
Allow to set RLIMIT_STACK via setrlimit. Default RLIMIT_STACK to value
from executable header as described on
https://msdn.microsoft.com/en-us/library/windows/desktop/ms686774.aspx
Default stacksize to 2 Megs in case RLIMIT_STACK is set to RLIM_INFINITY.
- First cut of an implementation to allow signal handlers running on an
alternate signal stack.

View file

@ -111,6 +111,61 @@ getrusage (int intwho, struct rusage *rusage_in)
return res;
}
/* Default stacksize in case RLIMIT_STACK is RLIM_INFINITY is 2 Megs with
system-dependent number of guard pages. The pthread stacksize does not
include the guardpage size, so we have to subtract the default guardpage
size. Additionally the Windows stack handling disallows to commit the
last page, so we subtract it, too. */
#define DEFAULT_STACKSIZE (2 * 1024 * 1024)
#define DEFAULT_STACKGUARD (wincap.def_guard_page_size() + wincap.page_size ())
muto NO_COPY rlimit_stack_guard;
static struct rlimit rlimit_stack = { 0, RLIM_INFINITY };
static void
__set_rlimit_stack (const struct rlimit *rlp)
{
rlimit_stack_guard.init ("rlimit_stack_guard")->acquire ();
rlimit_stack = *rlp;
rlimit_stack_guard.release ();
}
static void
__get_rlimit_stack (struct rlimit *rlp)
{
rlimit_stack_guard.init ("rlimit_stack_guard")->acquire ();
if (!rlimit_stack.rlim_cur)
{
/* Fetch the default stacksize from the executable header... */
PIMAGE_DOS_HEADER dosheader;
PIMAGE_NT_HEADERS ntheader;
dosheader = (PIMAGE_DOS_HEADER) GetModuleHandle (NULL);
ntheader = (PIMAGE_NT_HEADERS) ((PBYTE) dosheader + dosheader->e_lfanew);
rlimit_stack.rlim_cur = ntheader->OptionalHeader.SizeOfStackReserve;
/* ...and subtract the guardpages. */
rlimit_stack.rlim_cur -= DEFAULT_STACKGUARD;
}
*rlp = rlimit_stack;
rlimit_stack_guard.release ();
}
size_t
get_rlimit_stack (void)
{
struct rlimit rl;
__get_rlimit_stack (&rl);
/* RLIM_INFINITY doesn't make much sense. As in glibc, use an
"architecture-specific default". */
if (rl.rlim_cur == RLIM_INFINITY)
rl.rlim_cur = DEFAULT_STACKSIZE - DEFAULT_STACKGUARD;
/* Always return at least minimum stacksize. */
else if (rl.rlim_cur < PTHREAD_STACK_MIN)
rl.rlim_cur = PTHREAD_STACK_MIN;
return (size_t) rl.rlim_cur;
}
extern "C" int
getrlimit (int resource, struct rlimit *rlp)
{
@ -127,32 +182,7 @@ getrlimit (int resource, struct rlimit *rlp)
case RLIMIT_AS:
break;
case RLIMIT_STACK:
PTEB teb;
/* 2015-06-26: Originally rlim_cur returned the size of the still
available stack area on the current stack, rlim_max the total size
of the current stack. Two problems:
- Per POSIX, RLIMIT_STACK returns "the maximum size of the initial
thread's stack, in bytes. The implementation does not
automatically grow the stack beyond this limit".
- With the implementation of sigaltstack, the current stack is not
necessarily the "initial thread's stack" anymore. Rather, when
called from a signal handler running on the alternate stack,
RLIMIT_STACK should return the size of the original stack.
rlim_cur is now the size of the stack. For system-provided stacks
it's the size between DeallocationStack and StackBase. For
application-provided stacks (via pthread_attr_setstack),
DeallocationStack is NULL, but StackLimit points to the bottom
of the stack.
rlim_max is set to RLIM_INFINITY since there's no hard limit
for stack sizes on Windows. */
teb = NtCurrentTeb ();
rlp->rlim_cur = (rlim_t) teb->Tib.StackBase
- (rlim_t) (teb->DeallocationStack
?: teb->Tib.StackLimit);
__get_rlimit_stack (rlp);
break;
case RLIMIT_NOFILE:
rlp->rlim_cur = getdtablesize ();
@ -206,6 +236,9 @@ setrlimit (int resource, const struct rlimit *rlp)
if (rlp->rlim_cur != RLIM_INFINITY)
return setdtablesize (rlp->rlim_cur);
break;
case RLIMIT_STACK:
__set_rlimit_stack (rlp);
break;
default:
set_errno (EINVAL);
__leave;

View file

@ -475,7 +475,7 @@ pthread::create (void *(*func) (void *), pthread_attr *newattr,
mutex.lock ();
/* stackaddr holds the uppermost stack address. See the comments in
pthread_attr_setstack and pthread_attr_setstackaddr for a description. */
ULONG stacksize = attr.stacksize ?: PTHREAD_DEFAULT_STACKSIZE;
ULONG stacksize = attr.stacksize ?: get_rlimit_stack ();
PVOID stackaddr = attr.stackaddr ? ((caddr_t) attr.stackaddr - stacksize)
: NULL;
win32_obj_id = CygwinCreateThread (thread_init_wrapper, this, stackaddr,
@ -1093,7 +1093,7 @@ pthread::resume ()
pthread_attr::pthread_attr ():verifyable_object (PTHREAD_ATTR_MAGIC),
joinable (PTHREAD_CREATE_JOINABLE), contentionscope (PTHREAD_SCOPE_PROCESS),
inheritsched (PTHREAD_INHERIT_SCHED), stackaddr (NULL), stacksize (0),
guardsize (PTHREAD_DEFAULT_GUARDSIZE)
guardsize (wincap.def_guard_page_size ())
{
schedparam.sched_priority = 0;
}
@ -2330,7 +2330,7 @@ pthread_attr_getstacksize (const pthread_attr_t *attr, size_t *size)
/* If the stacksize has not been set by the application, return the
default stacksize. Note that this is different from what
pthread_attr_getstack returns. */
*size = (*attr)->stacksize ?: PTHREAD_DEFAULT_STACKSIZE;
*size = (*attr)->stacksize ?: get_rlimit_stack ();
return 0;
}

View file

@ -16,13 +16,8 @@ details. */
#define WRITE_LOCK 1
#define READ_LOCK 2
/* Default is a 1 Megs stack with a 4K guardpage. The pthread stacksize
does not include the guardpage size, so we subtract the default guardpage
size. Additionally, the Windows stack handling disallows to use the last
two pages as guard page (tested on XP and W7). That results in a zone of
three pages which have to be subtract to get the actual stack size. */
#define PTHREAD_DEFAULT_STACKSIZE (1024 * 1024 - 3 * wincap.page_size ())
#define PTHREAD_DEFAULT_GUARDSIZE (wincap.page_size ())
/* resource.cc */
extern size_t get_rlimit_stack (void);
#include <pthread.h>
#include <limits.h>

View file

@ -8,6 +8,14 @@
<itemizedlist mark="bullet">
<listitem><para>
Handle pthread stacksizes as in GLibc: Default to RLIMIT_STACK resource.
Allow to set RLIMIT_STACK via setrlimit. Default RLIMIT_STACK to value
from executable header as described on the MSDN website
<ulink url="https://msdn.microsoft.com/en-us/library/windows/desktop/ms686774.aspx">Thread Stack Size</ulink>
Default stacksize to 2 Megs in case RLIMIT_STACK is set to RLIM_INFINITY.
</para></listitem>
<listitem><para>
First cut of an implementation to allow signal handlers running on an
alternate signal stack.