diff --git a/winsup/cygwin/ChangeLog b/winsup/cygwin/ChangeLog index 94615cffb..53e527475 100644 --- a/winsup/cygwin/ChangeLog +++ b/winsup/cygwin/ChangeLog @@ -1,3 +1,11 @@ +2012-02-15 Corinna Vinschen + + * miscfuncs.cc: Revert change from 2012-02-13 which used the + Windows-provided stack rather than an own stack created in + CygwinCreateThread. + (struct thread_wrapper_arg): Rename commitsize to stacklimit. + (CygwinCreateThread): Rename commitsize to real_stacklimit. + 2012-02-15 Corinna Vinschen * dtable.cc (dtable::init_std_file_from_handle): Use tmp_pathbuf for diff --git a/winsup/cygwin/miscfuncs.cc b/winsup/cygwin/miscfuncs.cc index 66ed98bdd..4a4cd3ee5 100644 --- a/winsup/cygwin/miscfuncs.cc +++ b/winsup/cygwin/miscfuncs.cc @@ -444,7 +444,7 @@ struct thread_wrapper_arg PVOID arg; PBYTE stackaddr; PBYTE stackbase; - ULONG guardsize; + PBYTE stacklimit; }; DWORD WINAPI @@ -458,127 +458,93 @@ thread_wrapper (VOID *arg) thread_wrapper_arg wrapper_arg = *(thread_wrapper_arg *) arg; cfree (arg); + /* Remove _cygtls from this stack since it won't be used anymore. */ + _my_tls.remove (0); + + /* Set stack values in TEB */ PTEB teb = NtCurrentTeb (); + teb->Tib.StackBase = wrapper_arg.stackbase; + teb->Tib.StackLimit = wrapper_arg.stacklimit ?: wrapper_arg.stackaddr; + /* Set DeallocationStack value. If we have an application-provided stack, + we set DeallocationStack to NULL, so NtTerminateThread does not deallocate + any stack. If we created the stack in CygwinCreateThread, we set + DeallocationStack to the stackaddr of our own stack, so it's automatically + deallocated when the thread is terminated. */ + PBYTE dealloc_addr = (PBYTE) teb->DeallocationStack; + teb->DeallocationStack = wrapper_arg.stacklimit ? wrapper_arg.stackaddr + : NULL; + /* Store the OS-provided DeallocationStack address in wrapper_arg.stackaddr. + The below assembler code will release the OS stack after switching to our + new stack. */ + wrapper_arg.stackaddr = dealloc_addr; - if (!wrapper_arg.stackaddr) + /* Initialize new _cygtls. */ + _my_tls.init_thread (wrapper_arg.stackbase - CYGTLS_PADSIZE, + (DWORD (*)(void*, void*)) wrapper_arg.func); + + /* Copy exception list over to new stack. I'm not quite sure how the + exception list is extended by Windows itself. What's clear is that it + always grows downwards and that it starts right at the stackbase. + Therefore we first count the number of exception records and place + the copy at the stackbase, too, so there's still a lot of room to + extend the list up to where our _cygtls region starts. */ + _exception_list *old_start = (_exception_list *) teb->Tib.ExceptionList; + unsigned count = 0; + teb->Tib.ExceptionList = NULL; + for (_exception_list *e_ptr = old_start; + e_ptr && e_ptr != (_exception_list *) -1; + e_ptr = e_ptr->prev) + ++count; + if (count) { - /* The simple case: Windows-provided stack. */ - - /* If guardsize is exactly the page_size, we can assume that the - application will behave Windows conformant in terms of stack usage. - We can especially assume that it never allocates more than one - page at a time (alloca/_chkstk). Therefore, this is the default - case which allows a Windows compatible stack setup with a - reserved region, a guard page, and a commited region. We don't - need to set up a POSIX guardpage since Windows already handles - stack overflow: Trying to extend the stack into the last three - pages of the stack results in a SEGV. */ - if (wrapper_arg.guardsize != wincap.page_size ()) + _exception_list *new_start = (_exception_list *) wrapper_arg.stackbase + - count; + teb->Tib.ExceptionList = (struct _EXCEPTION_REGISTRATION_RECORD *) + new_start; + while (true) { - /* However, if guardsize is set to something other than the - page size, we commit the entire stack, remove the Windows - guardpage and, if guardsize is > 0, set up a POSIX guardpage. */ - DWORD old_prot; - ULONG stacksize = (uintptr_t) teb->Tib.StackBase - - (uintptr_t) teb->DeallocationStack; - - VirtualAlloc (teb->DeallocationStack, stacksize, - MEM_COMMIT, PAGE_READWRITE); - VirtualProtect (teb->DeallocationStack, stacksize, - PAGE_READWRITE, &old_prot); - VirtualProtect (teb->DeallocationStack, wrapper_arg.guardsize, - PAGE_NOACCESS, &old_prot); - teb->Tib.StackLimit = (PVOID) ((PBYTE) teb->DeallocationStack - + wrapper_arg.guardsize); - } - wrapper_arg.func (wrapper_arg.arg); - } - else - { - /* The tricky case: Application-provided stack. */ - - /* Remove _cygtls from this stack since it won't be used anymore. */ - _my_tls.remove (0); - - /* Set stack values in TEB */ - teb->Tib.StackBase = wrapper_arg.stackbase; - teb->Tib.StackLimit = wrapper_arg.stackaddr; - /* Set DeallocationStack value. If we have an application-provided - stack, we set DeallocationStack to NULL, so NtTerminateThread does - not deallocate any stack. Store the OS-provided DeallocationStack - address in wrapper_arg.stackaddr. The below assembler code will - release the OS stack after switching to our new stack. */ - wrapper_arg.stackaddr = (PBYTE) teb->DeallocationStack; - teb->DeallocationStack = NULL; - - /* Initialize new _cygtls. */ - _my_tls.init_thread (wrapper_arg.stackbase - CYGTLS_PADSIZE, - (DWORD (*)(void*, void*)) wrapper_arg.func); - - /* Copy exception list over to new stack. I'm not quite sure how the - exception list is extended by Windows itself. What's clear is that it - always grows downwards and that it starts right at the stackbase. - Therefore we first count the number of exception records and place - the copy at the stackbase, too, so there's still a lot of room to - extend the list up to where our _cygtls region starts. */ - _exception_list *old_start = (_exception_list *) teb->Tib.ExceptionList; - unsigned count = 0; - teb->Tib.ExceptionList = NULL; - for (_exception_list *e_ptr = old_start; - e_ptr && e_ptr != (_exception_list *) -1; - e_ptr = e_ptr->prev) - ++count; - if (count) - { - _exception_list *new_start = (_exception_list *) wrapper_arg.stackbase - - count; - teb->Tib.ExceptionList = (struct _EXCEPTION_REGISTRATION_RECORD *) - new_start; - while (true) + new_start->handler = old_start->handler; + if (old_start->prev == (_exception_list *) -1) { - new_start->handler = old_start->handler; - if (old_start->prev == (_exception_list *) -1) - { - new_start->prev = (_exception_list *) -1; - break; - } - new_start->prev = new_start + 1; - new_start = new_start->prev; - old_start = old_start->prev; + new_start->prev = (_exception_list *) -1; + break; } + new_start->prev = new_start + 1; + new_start = new_start->prev; + old_start = old_start->prev; } - - __asm__ ("\n\ - movl %[WRAPPER_ARG], %%ebx # Load &wrapper_arg into ebx \n\ - movl (%%ebx), %%eax # Load thread func into eax \n\ - movl 4(%%ebx), %%ecx # Load thread arg into ecx \n\ - movl 8(%%ebx), %%edx # Load stackaddr into edx \n\ - movl 12(%%ebx), %%ebx # Load stackbase into ebx \n\ - subl %[CYGTLS], %%ebx # Subtract CYGTLS_PADSIZE \n\ - subl $4, %%ebx # Subtract another 4 bytes \n\ - movl %%ebx, %%esp # Set esp \n\ - xorl %%ebp, %%ebp # Set ebp to 0 \n\ - # Make gcc 3.x happy and align the stack so that it is \n\ - # 16 byte aligned right before the final call opcode. \n\ - andl $-16, %%esp # 16 byte align \n\ - addl $-12, %%esp # 12 bytes + 4 byte arg = 16 \n\ - # Now we moved to the new stack. Save thread func address\n\ - # and thread arg on new stack \n\ - pushl %%ecx # Push thread arg onto stack \n\ - pushl %%eax # Push thread func onto stack \n\ - # Now it's safe to release the OS stack. \n\ - pushl $0x8000 # dwFreeType: MEM_RELEASE \n\ - pushl $0x0 # dwSize: 0 \n\ - pushl %%edx # lpAddress: stackaddr \n\ - call _VirtualFree@12 # Shoot \n\ - # All set. We can pop the thread function address from \n\ - # the stack and call it. The thread arg is still on the \n\ - # stack in the expected spot. \n\ - popl %%eax # Pop thread_func address \n\ - call *%%eax # Call thread func \n" - : : [WRAPPER_ARG] "r" (&wrapper_arg), - [CYGTLS] "i" (CYGTLS_PADSIZE)); } + + __asm__ ("\n\ + movl %[WRAPPER_ARG], %%ebx # Load &wrapper_arg into ebx \n\ + movl (%%ebx), %%eax # Load thread func into eax \n\ + movl 4(%%ebx), %%ecx # Load thread arg into ecx \n\ + movl 8(%%ebx), %%edx # Load stackaddr into edx \n\ + movl 12(%%ebx), %%ebx # Load stackbase into ebx \n\ + subl %[CYGTLS], %%ebx # Subtract CYGTLS_PADSIZE \n\ + subl $4, %%ebx # Subtract another 4 bytes \n\ + movl %%ebx, %%esp # Set esp \n\ + xorl %%ebp, %%ebp # Set ebp to 0 \n\ + # Make gcc 3.x happy and align the stack so that it is \n\ + # 16 byte aligned right before the final call opcode. \n\ + andl $-16, %%esp # 16 byte align \n\ + addl $-12, %%esp # 12 bytes + 4 byte arg = 16 \n\ + # Now we moved to the new stack. Save thread func address\n\ + # and thread arg on new stack \n\ + pushl %%ecx # Push thread arg onto stack \n\ + pushl %%eax # Push thread func onto stack \n\ + # Now it's safe to release the OS stack. \n\ + pushl $0x8000 # dwFreeType: MEM_RELEASE \n\ + pushl $0x0 # dwSize: 0 \n\ + pushl %%edx # lpAddress: stackaddr \n\ + call _VirtualFree@12 # Shoot \n\ + # All set. We can pop the thread function address from \n\ + # the stack and call it. The thread arg is still on the \n\ + # stack in the expected spot. \n\ + popl %%eax # Pop thread_func address \n\ + call *%%eax # Call thread func \n" + : : [WRAPPER_ARG] "r" (&wrapper_arg), + [CYGTLS] "i" (CYGTLS_PADSIZE)); /* Never return from here. */ ExitThread (0); } @@ -588,9 +554,11 @@ CygwinCreateThread (LPTHREAD_START_ROUTINE thread_func, PVOID thread_arg, PVOID stackaddr, ULONG stacksize, ULONG guardsize, DWORD creation_flags, LPDWORD thread_id) { + PVOID real_stackaddr = NULL; ULONG real_stacksize = 0; ULONG real_guardsize = 0; thread_wrapper_arg *wrapper_arg; + HANDLE thread = NULL; wrapper_arg = (thread_wrapper_arg *) ccalloc (HEAP_STR, 1, sizeof *wrapper_arg); @@ -610,7 +578,9 @@ CygwinCreateThread (LPTHREAD_START_ROUTINE thread_func, PVOID thread_arg, } else { - /* If not, we let CreateThread reserve the stack. */ + PBYTE real_stacklimit; + + /* If not, we have to create the stack here. */ real_stacksize = roundup2 (stacksize, wincap.page_size ()); real_guardsize = roundup2 (guardsize, wincap.page_size ()); /* Add the guardsize to the stacksize */ @@ -622,17 +592,76 @@ CygwinCreateThread (LPTHREAD_START_ROUTINE thread_func, PVOID thread_arg, /* Now roundup the result to the next allocation boundary. */ real_stacksize = roundup2 (real_stacksize, wincap.allocation_granularity ()); - wrapper_arg->guardsize = real_guardsize; + /* Reserve stack. + FIXME? If the TOP_DOWN method tends to collide too much with + other stuff, we should provide our own mechanism to find a + suitable place for the stack. Top down from the start of + the Cygwin DLL comes to mind. */ + real_stackaddr = VirtualAlloc (NULL, real_stacksize, + MEM_RESERVE | MEM_TOP_DOWN, + PAGE_READWRITE); + if (!real_stackaddr) + return NULL; + /* Set up committed region. Two cases: */ + if (real_guardsize != wincap.page_size ()) + { + /* If guardsize is set to something other than the page size, we + commit the entire stack and, if guardsize is > 0, we set up a + POSIX guardpage. We don't set up a Windows guardpage. */ + if (!VirtualAlloc (real_stackaddr, real_guardsize, MEM_COMMIT, + PAGE_NOACCESS)) + goto err; + real_stacklimit = (PBYTE) real_stackaddr + real_guardsize; + if (!VirtualAlloc (real_stacklimit, real_stacksize - real_guardsize, + MEM_COMMIT, PAGE_READWRITE)) + goto err; + } + else + { + /* If guardsize is exactly the page_size, we can assume that the + application will behave Windows conformant in terms of stack usage. + We can especially assume that it never allocates more than one + page at a time (alloca/_chkstk). Therefore, this is the default + case which allows a Windows compatible stack setup with a + reserved region, a guard page, and a commited region. We don't + need to set up a POSIX guardpage since Windows already handles + stack overflow: Trying to extend the stack into the last three + pages of the stack results in a SEGV. + We always commit 64K here, starting with the guardpage. */ + real_stacklimit = (PBYTE) real_stackaddr + real_stacksize + - wincap.allocation_granularity (); + if (!VirtualAlloc (real_stacklimit, wincap.page_size (), MEM_COMMIT, + PAGE_READWRITE | PAGE_GUARD)) + goto err; + real_stacklimit += wincap.page_size (); + if (!VirtualAlloc (real_stacklimit, wincap.allocation_granularity () + - wincap.page_size (), MEM_COMMIT, + PAGE_READWRITE)) + goto err; + } + wrapper_arg->stackaddr = (PBYTE) real_stackaddr; + wrapper_arg->stackbase = (PBYTE) real_stackaddr + real_stacksize; + wrapper_arg->stacklimit = real_stacklimit; } - /* Use the STACK_SIZE_PARAM_IS_A_RESERVATION parameter. This doesn't - work on Windows 2000, but either we deallocate the OS stack in - thread_wrapper anyway, or the reservation is rounded to the next Meg. - Nothing to worry about. - Note that we reserve a 256K stack as minimum size, not 64K, otherwise - the thread creation might crash the process due to a stack overflow. */ - return CreateThread (&sec_none_nih, stackaddr ? 4 * PTHREAD_STACK_MIN - : real_stacksize, - thread_wrapper, wrapper_arg, - creation_flags | STACK_SIZE_PARAM_IS_A_RESERVATION, - thread_id); + /* Use the STACK_SIZE_PARAM_IS_A_RESERVATION parameter so only the + minimum size for a thread stack is reserved by the OS. This doesn't + work on Windows 2000, but we deallocate the OS stack in thread_wrapper + anyway, so this should be a problem only in a tight memory condition. + Note that we reserve a 256K stack, not 64K, otherwise the thread creation + might crash the process due to a stack overflow. */ + thread = CreateThread (&sec_none_nih, 4 * PTHREAD_STACK_MIN, + thread_wrapper, wrapper_arg, + creation_flags | STACK_SIZE_PARAM_IS_A_RESERVATION, + thread_id); + +err: + if (!thread && real_stackaddr) + { + /* Don't report the wrong error even though VirtualFree is very unlikely + to fail. */ + DWORD err = GetLastError (); + VirtualFree (real_stackaddr, 0, MEM_RELEASE); + SetLastError (err); + } + return thread; }