Support SSE float environment in fenv.h functions.

* cpu_features.c: New file.
	* cpu_features.h: New file.
	* crt1.c: Include "cpu_features.h".
	(__mingw_CRTStartup): Call cpu_features_init().
	* Makefile.in (MING_OBJS): Add cpu_features.c.
	(SRCDIST_FILES): Add cpu_features.c, cpu_features.h.
	* include/fenv,h ( fenv_t;): Append  __mxcsr field.
	(__MXCSR_EXCEPT_FLAG_SHIFT): New define.
	(__MXCSR_EXCEPT_MASK_SHIFT): New define.
	(__MXCSR_ROUND_FLAG_SHIFT): New define.
	* mingwex/feclearexcept.c: Include "cpu_features.h".
	Handle SSE environment.
	* mingwex/fegetenv.c: Likewise.
	* mingwex/feholdexcept.c: Likewise.
	* mingwex/fesetenv.c: Likewise.
	* mingwex/fesetexceptflag.c: Likewise.
	* mingwex/fesetround.c: Likewise.
	* mingwex/fetestexcept.c: Likewise.
	* mingwex/feupdateenv.c: Likewise.
	* mingwex/fegetround.c: Add comment.
This commit is contained in:
Danny Smith 2006-07-03 10:32:58 +00:00
parent 69d5f3329f
commit f34428eb35
15 changed files with 252 additions and 17 deletions

View File

@ -1,3 +1,6 @@
2006-07-03 Danny Smith <dannysmith@users.sourceforge.net>
2006-06-25 Chris Sutcliffe <ir0nh34d@users.sourceforge.net>
* Include/_mingw.h: Increment version to 3.10.

View File

@ -156,7 +156,7 @@ CRT0S = crt1.o dllcrt1.o crt2.o dllcrt2.o CRT_noglob.o crtmt.o crtst.o \
CRT_fp8.o CRT_fp10.o txtmode.o binmode.o
MINGW_OBJS = CRTglob.o CRTfmode.o CRTinit.o dllmain.o gccmain.o \
main.o crtst.o mthr_stub.o CRT_fp10.o txtmode.o \
pseudo-reloc.o pseudo-reloc-list.o
pseudo-reloc.o pseudo-reloc-list.o cpu_features.o
MOLD_OBJS = isascii.o iscsym.o iscsymf.o toascii.o \
strcasecmp.o strncasecmp.o wcscmpi.o
@ -187,7 +187,7 @@ mthr.c mthr_init.c mthr_stub.c readme.txt \
isascii.c iscsym.c iscsymf.c toascii.c \
strcasecmp.c strncasecmp.c wcscmpi.c \
CRT_fp8.c CRT_fp10.c test_headers.c txtmode.c binmode.c pseudo-reloc.c \
pseudo-reloc-list.c \
pseudo-reloc-list.c cpu_features.c cpu_features.h\
DISCLAIMER CONTRIBUTORS

105
winsup/mingw/cpu_features.c Executable file
View File

@ -0,0 +1,105 @@
#include <stdbool.h>
#include "cpu_features.h"
/* level 1 edx bits */
#define EDX_CX8 (1 << 8) /* CMPXCHG8B */
#define EDX_CMOV (1 << 15)
#define EDX_MMX (1 << 23)
#define EDX_FXSR (1 << 24) /* FXSAVE and FXRSTOR */
#define EDX_SSE (1 << 25)
#define EDX_SSE2 (1 << 26)
/* level 1 ecx bits */
#define ECX_SSE3 (1 << 0)
#define ECX_CX16 (1 << 13) /* CMPXCHG16B */
/* extended level 0x80000001 edx bits */
#define EDX_3DNOW (1 << 31)
#define EDX_3DNOWP (1 << 30)
#define EDX_LM (1 << 29) /*LONG MODE */
#define __cpuid(level,a,b,c,d) \
__asm__ __volatile__ ("cpuid;" \
: "=a" (a), "=b" (b), "=c" (c), "=d" (d)\
: "0" (level))
/* Combine the different cpuid flags into a single bitmap. */
unsigned int __cpu_features = 0;
void __cpu_features_init (void)
{
unsigned int eax, ebx, ecx, edx;
/* Try to change the value of CPUID bit (bit 21) in EFLAGS.
If the bit can be toggled, CPUID is supported. */
asm volatile ("pushfl; pushfl; popl %0;"
"movl %0,%1; xorl %2,%0;"
"pushl %0; popfl; pushfl; popl %0; popfl"
: "=&r" (eax), "=&r" (ebx)
: "i" (0x00200000));
if (((eax ^ ebx) & 0x00200000) == 0)
return;
__cpuid (0, eax, ebx, ecx, edx);
if (eax == 0)
return;
__cpuid (1, eax, ebx, ecx, edx);
if (edx & EDX_CX8)
__cpu_features |= _CRT_CMPXCHG8B;
if (edx & EDX_CMOV)
__cpu_features |= _CRT_CMOV;
if (edx & EDX_MMX)
__cpu_features |= _CRT_MMX;
if (edx & EDX_FXSR)
__cpu_features |= _CRT_FXSR;
if (edx & EDX_SSE)
__cpu_features |= _CRT_SSE;
if (edx & EDX_SSE2)
__cpu_features |= _CRT_SSE2;
if (ecx & ECX_SSE3)
__cpu_features |= _CRT_SSE3;
if (ecx & ECX_CX16)
__cpu_features |= _CRT_CMPXCHG16B;
__cpuid (0x80000000, eax, ebx, ecx, edx);
if (eax < 0x80000001)
return;
__cpuid (0x80000001, eax, ebx, ecx, edx);
if (edx & EDX_3DNOW);
__cpu_features |= _CRT_3DNOW;
if (edx & EDX_3DNOWP)
__cpu_features |= _CRT_3DNOWP;
return;
}
#ifdef TEST
#include <stdio.h>
#define report(feature) \
if ((feature) & __cpu_features) printf( #feature " found\n")
int main()
{
__cpu_features_init();
report(_CRT_CMPXCHG8B);
report(_CRT_CMOV);
report(_CRT_MMX);
report(_CRT_FXSR);
report(_CRT_SSE);
report(_CRT_SSE2);
report(_CRT_SSE3);
report(_CRT_CMPXCHG16B);
report(_CRT_3DNOW);
report(_CRT_3DNOWP);
return 0;
}
#endif

23
winsup/mingw/cpu_features.h Executable file
View File

@ -0,0 +1,23 @@
#ifndef _CPU_FEATURES_H
#define _CPU_FEATURES_H
#include <stdbool.h>
#define _CRT_CMPXCHG8B 0x0001
#define _CRT_CMOV 0x0002
#define _CRT_MMX 0x0004
#define _CRT_FXSR 0x0008
#define _CRT_SSE 0x0010
#define _CRT_SSE2 0x0020
#define _CRT_SSE3 0x0040
#define _CRT_CMPXCHG16B 0x0080
#define _CRT_3DNOW 0x0100
#define _CRT_3DNOWP 0x0200
extern unsigned int __cpu_features;
/* Currently we use this in fpenv functions */
#define __HAS_SSE __cpu_features & _CRT_SSE
#endif

View File

@ -27,6 +27,7 @@
* be manually synchronized, but it does lead to this not-generally-
* a-good-idea use of include. */
#include "init.c"
#include "cpu_features.h"
extern void _pei386_runtime_relocator (void);
@ -195,6 +196,7 @@ __mingw_CRTStartup (void)
/*
* Initialize floating point unit.
*/
__cpu_features_init (); /* Do we have SSE, etc.*/
_fpreset (); /* Supplied by the runtime library. */
/*

View File

@ -1,7 +1,6 @@
#ifndef _FENV_H_
#define _FENV_H_
/* FPU status word exception flags */
#define FE_INVALID 0x01
#define FE_DENORMAL 0x02
@ -18,6 +17,18 @@
#define FE_UPWARD 0x0800
#define FE_TOWARDZERO 0x0c00
/* The MXCSR exception flags are the same as the
FE flags. */
#define __MXCSR_EXCEPT_FLAG_SHIFT 0
/* How much to shift FE status word exception flags
to get the MXCSR exeptions masks, */
#define __MXCSR_EXCEPT_MASK_SHIFT 7
/* How much to shift FE control word rounding flags
to get MXCSR rounding flags, */
#define __MXCSR_ROUND_FLAG_SHIFT 3
#ifndef RC_INVOKED
/*
For now, support only for the basic abstraction of flags that are
@ -26,8 +37,10 @@
*/
typedef unsigned short fexcept_t;
/* This 28-byte struct represents the entire floating point
environment as stored by fnstenv or fstenv */
/* This 32-byte struct represents the entire floating point
environment as stored by fnstenv or fstenv, augmented by
the contents of the MXCSR register, as stored by stmxcsr
(if CPU supports it). */
typedef struct
{
unsigned short __control_word;
@ -40,8 +53,9 @@ typedef struct
unsigned short __ip_selector;
unsigned short __opcode;
unsigned int __data_offset;
unsigned short __data_selector;
unsigned short __unused3;
unsigned short __data_selector;
unsigned short __unused3;
unsigned int __mxcsr; /* contents of the MXCSR register */
} fenv_t;

View File

@ -1,4 +1,5 @@
#include <fenv.h>
#include "cpu_features.h"
/* 7.6.2.1
The feclearexcept function clears the supported exceptions
@ -7,9 +8,17 @@
int feclearexcept (int excepts)
{
fenv_t _env;
excepts &= FE_ALL_EXCEPT;
__asm__ volatile ("fnstenv %0;" : "=m" (_env)); /* get the env */
_env.__status_word &= ~(excepts & FE_ALL_EXCEPT); /* clear the except */
_env.__status_word &= ~excepts; /* clear the except */
__asm__ volatile ("fldenv %0;" :: "m" (_env)); /*set the env */
if (__HAS_SSE)
{
unsigned _csr;
__asm__ volatile("stmxcsr %0" : "=m" (_csr)); /* get the register */
_csr &= ~excepts; /* clear the except */
__asm__ volatile("ldmxcsr %0" : : "m" (_csr)); /* set the register */
}
return 0;
}

View File

@ -1,4 +1,5 @@
#include <fenv.h>
#include "cpu_features.h"
/* 7.6.4.1
The fegetenv function stores the current floating-point environment
@ -10,5 +11,10 @@ int fegetenv (fenv_t * envp)
/* fnstenv sets control word to non-stop for all exceptions, so we
need to reload our env to restore the original mask. */
__asm__ ("fldenv %0" : : "m" (*envp));
/* And the SSE environment. */
if (__HAS_SSE)
__asm__ ("stmxcsr %0" : "=m" (envp->__mxcsr));
return 0;
}

View File

@ -1,4 +1,5 @@
#include <fenv.h>
#include "cpu_features.h"
/* 7.6.3.1
The fegetround function returns the value of the rounding direction
@ -9,6 +10,10 @@ fegetround (void)
{
unsigned short _cw;
__asm__ ("fnstcw %0;" : "=m" (_cw));
/* If the MXCSR flag is different, there is no way to indicate, so just
report the FPU flag. */
return _cw
& (FE_TONEAREST | FE_DOWNWARD | FE_UPWARD | FE_TOWARDZERO);
}

View File

@ -1,4 +1,5 @@
#include <fenv.h>
#include "cpu_features.h"
/* 7.6.4.2
The feholdexcept function saves the current floating-point
@ -12,5 +13,18 @@ int feholdexcept (fenv_t * envp)
/* fnstenv sets control word to non-stop for all exceptions, so all we
need to do is clear the exception flags. */
__asm__ ("fnclex");
if (__HAS_SSE)
{
unsigned int _csr;
/* Save the SSE MXCSR register. */
__asm__ ("stmxcsr %0" : "=m" (envp->__mxcsr));
/* Clear the exception flags. */
_csr = envp->__mxcsr & ~FE_ALL_EXCEPT;
/* Set exception mask to non-stop */
_csr |= (FE_ALL_EXCEPT << __MXCSR_EXCEPT_MASK_SHIFT) /*= 0x1f80 */;
__asm__ volatile ("ldmxcsr %0" : : "m" (_csr));
}
return 0;
}

View File

@ -1,5 +1,6 @@
#include <fenv.h>
#include <float.h>
#include "cpu_features.h"
/* 7.6.4.3
The fesetenv function establishes the floating-point environment
@ -15,6 +16,11 @@ extern void (*_imp___fpreset)( void ) ;
int fesetenv (const fenv_t * envp)
{
/* Default mxcsr status is to mask all exceptions. All other bits
are zero. */
unsigned int _csr = FE_ALL_EXCEPT << __MXCSR_EXCEPT_MASK_SHIFT /*= 0x1f80 */;
if (envp == FE_PC64_ENV)
/*
* fninit initializes the control register to 0x37f,
@ -37,7 +43,15 @@ int fesetenv (const fenv_t * envp)
_fpreset();
else
__asm__ ("fldenv %0;" : : "m" (*envp));
{
__asm__ ("fldenv %0;" : : "m" (*envp));
/* Setting the reserved high order bits of MXCSR causes a segfault */
_csr = envp ->__mxcsr & 0xffff;
}
/* Set MXCSR */
if (__HAS_SSE)
__asm__ volatile ("ldmxcsr %0" : : "m" (_csr));
return 0;
}

View File

@ -1,4 +1,5 @@
#include <fenv.h>
#include "cpu_features.h"
/* 7.6.2.4
The fesetexceptflag function sets the complete status for those
@ -18,5 +19,15 @@ int fesetexceptflag (const fexcept_t * flagp, int excepts)
_env.__status_word &= ~excepts;
_env.__status_word |= (*flagp & excepts);
__asm__ volatile ("fldenv %0;" : : "m" (_env));
if (__HAS_SSE)
{
unsigned int _csr;
__asm__ __volatile__("stmxcsr %0" : "=m" (_csr));
_csr &= ~excepts;
_csr |= *flagp & excepts;
__asm__ volatile ("ldmxcsr %0" : : "m" (_csr));
}
return 0;
}

View File

@ -1,4 +1,6 @@
#include <fenv.h>
#include "cpu_features.h"
/* 7.6.3.2
The fesetround function establishes the rounding direction
represented by its argument round. If the argument is not equal
@ -15,5 +17,14 @@ int fesetround (int mode)
_cw &= ~(FE_TONEAREST | FE_DOWNWARD | FE_UPWARD | FE_TOWARDZERO);
_cw |= mode;
__asm__ volatile ("fldcw %0;" : : "m" (_cw));
if (__HAS_SSE)
{
__asm__ volatile ("stmxcsr %0" : "=m" (_cw));
_cw &= ~ 0x6000;
_cw |= (mode << __MXCSR_ROUND_FLAG_SHIFT);
__asm__ volatile ("ldmxcsr %0" : : "m" (_cw));
}
return 0;
}

View File

@ -1,4 +1,5 @@
#include <fenv.h>
#include <fenv.h>
#include "cpu_features.h"
/* 7.6.2.5
The fetestexcept function determines which of a specified subset of
the exception flags are currently set. The excepts argument
@ -9,7 +10,18 @@
int fetestexcept (int excepts)
{
unsigned short _sw;
__asm__ ("fnstsw %%ax" : "=a" (_sw));
return _sw & excepts & FE_ALL_EXCEPT;
unsigned int _res;
__asm__ ("fnstsw %%ax" : "=a" (_res));
/* If SSE supported, return the union of the FPU and SSE flags. */
if (__HAS_SSE)
{
unsigned int _csr;
__asm__ volatile("stmxcsr %0" : "=m" (_csr));
_res |= _csr;
}
return (_res & excepts & FE_ALL_EXCEPT);
}

View File

@ -1,4 +1,5 @@
#include <fenv.h>
#include "cpu_features.h"
/* 7.6.4.4
The feupdateenv function saves the currently raised exceptions in
@ -8,13 +9,18 @@
set by a call to feholdexcept or fegetenv, or equal the macro
FE_DFL_ENV or an implementation-defined environment macro. */
/* FIXME: this works but surely there must be a better way. */
int feupdateenv (const fenv_t * envp)
{
unsigned int _fexcept = fetestexcept (FE_ALL_EXCEPT); /*save excepts */
unsigned int _fexcept;
__asm__ ("fnstsw %%ax" : "=a" (_fexcept)); /*save excepts */
if (__HAS_SSE)
{
unsigned int _csr;
__asm__ ("stmxcsr %0" : "=m" (_csr));
_fexcept |= _csr;
}
fesetenv (envp); /* install the env */
feraiseexcept (_fexcept); /* raise the execept */
feraiseexcept (_fexcept & FE_ALL_EXCEPT); /* raise the execeptions */
return 0;
}