2007-09-28 Patrick Mansfield <patmans@us.ibm.com>

* libm/machine/spu/Makefile.am: Add new C files.
	* libm/machine/spu/Makefile.in: Regenerate.
	* libm/machine/spu/headers/acos.h: New file.
	* libm/machine/spu/headers/acosd2.h: Ditto.
	* libm/machine/spu/headers/acosf.h: Ditto.
	* libm/machine/spu/headers/acosf4.h: Ditto.
	* libm/machine/spu/headers/acosh.h: Ditto.
	* libm/machine/spu/headers/acoshd2.h: Ditto.
	* libm/machine/spu/headers/acoshf.h: Ditto.
	* libm/machine/spu/headers/acoshf4.h: Ditto.
	* libm/machine/spu/headers/asin.h: Ditto.
	* libm/machine/spu/headers/asind2.h: Ditto.
	* libm/machine/spu/headers/asinf.h: Ditto.
	* libm/machine/spu/headers/asinf4.h: Ditto.
	* libm/machine/spu/headers/asinh.h: Ditto.
	* libm/machine/spu/headers/asinhd2.h: Ditto.
	* libm/machine/spu/headers/asinhf.h: Ditto.
	* libm/machine/spu/headers/asinhf4.h: Ditto.
	* libm/machine/spu/headers/atan.h: Ditto.
	* libm/machine/spu/headers/atan2.h: Ditto.
	* libm/machine/spu/headers/atan2d2.h: Ditto.
	* libm/machine/spu/headers/atan2f.h: Ditto.
	* libm/machine/spu/headers/atan2f4.h: Ditto.
	* libm/machine/spu/headers/atand2.h: Ditto.
	* libm/machine/spu/headers/atanf4.h: Ditto.
	* libm/machine/spu/headers/atanh.h: Ditto.
	* libm/machine/spu/headers/atanhd2.h: Ditto.
	* libm/machine/spu/headers/atanhf.h: Ditto.
	* libm/machine/spu/headers/atanhf4.h: Ditto.
	* libm/machine/spu/headers/cos.h: Ditto.
	* libm/machine/spu/headers/cos_sin.h: Ditto.
	* libm/machine/spu/headers/cosd2.h: Ditto.
	* libm/machine/spu/headers/cosf.h: Ditto.
	* libm/machine/spu/headers/cosf4.h: Ditto.
	* libm/machine/spu/headers/cosh.h: Ditto.
	* libm/machine/spu/headers/coshd2.h: Ditto.
	* libm/machine/spu/headers/coshf.h: Ditto.
	* libm/machine/spu/headers/coshf4.h: Ditto.
	* libm/machine/spu/headers/divd2.h: Ditto.
	* libm/machine/spu/headers/divf4.h: Ditto.
	* libm/machine/spu/headers/erf.h: Ditto.
	* libm/machine/spu/headers/erf_utils.h: Ditto.
	* libm/machine/spu/headers/erfc.h: Ditto.
	* libm/machine/spu/headers/erfcd2.h: Ditto.
	* libm/machine/spu/headers/erfcf.h: Ditto.
	* libm/machine/spu/headers/erfcf4.h: Ditto.
	* libm/machine/spu/headers/erfd2.h: Ditto.
	* libm/machine/spu/headers/erff.h: Ditto.
	* libm/machine/spu/headers/erff4.h: Ditto.
	* libm/machine/spu/headers/exp.h: Ditto.
	* libm/machine/spu/headers/exp2.h: Ditto.
	* libm/machine/spu/headers/exp2d2.h: Ditto.
	* libm/machine/spu/headers/exp2f4.h: Ditto.
	* libm/machine/spu/headers/expd2.h: Ditto.
	* libm/machine/spu/headers/expf4.h: Ditto.
	* libm/machine/spu/headers/expm1.h: Ditto.
	* libm/machine/spu/headers/expm1d2.h: Ditto.
	* libm/machine/spu/headers/expm1f.h: Ditto.
	* libm/machine/spu/headers/expm1f4.h: Ditto.
	* libm/machine/spu/headers/floord2.h: Ditto.
	* libm/machine/spu/headers/floorf4.h: Ditto.
	* libm/machine/spu/headers/hypot.h: Ditto.
	* libm/machine/spu/headers/hypotd2.h: Ditto.
	* libm/machine/spu/headers/hypotf.h: Ditto.
	* libm/machine/spu/headers/hypotf4.h: Ditto.
	* libm/machine/spu/headers/isnan.h: Ditto.
	* libm/machine/spu/headers/isnand2.h: Ditto.
	* libm/machine/spu/headers/isnanf.h: Ditto.
	* libm/machine/spu/headers/isnanf4.h: Ditto.
	* libm/machine/spu/headers/ldexpd2.h: Ditto.
	* libm/machine/spu/headers/ldexpf4.h: Ditto.
	* libm/machine/spu/headers/lgamma.h: Ditto.
	* libm/machine/spu/headers/lgammad2.h: Ditto.
	* libm/machine/spu/headers/lgammaf.h: Ditto.
	* libm/machine/spu/headers/lgammaf4.h: Ditto.
	* libm/machine/spu/headers/log.h: Ditto.
	* libm/machine/spu/headers/log10.h: Ditto.
	* libm/machine/spu/headers/log10d2.h: Ditto.
	* libm/machine/spu/headers/log1p.h: Ditto.
	* libm/machine/spu/headers/log1pd2.h: Ditto.
	* libm/machine/spu/headers/log1pf.h: Ditto.
	* libm/machine/spu/headers/log1pf4.h: Ditto.
	* libm/machine/spu/headers/log2.h: Ditto.
	* libm/machine/spu/headers/log2d2.h: Ditto.
	* libm/machine/spu/headers/log2f4.h: Ditto.
	* libm/machine/spu/headers/logbf.h: Ditto.
	* libm/machine/spu/headers/logbf4.h: Ditto.
	* libm/machine/spu/headers/logd2.h: Ditto.
	* libm/machine/spu/headers/logf4.h: Ditto.
	* libm/machine/spu/headers/nearbyintf.h: Ditto.
	* libm/machine/spu/headers/nearbyintf4.h: Ditto.
	* libm/machine/spu/headers/nextafter.h: Ditto.
	* libm/machine/spu/headers/nextafterd2.h: Ditto.
	* libm/machine/spu/headers/nextafterf.h: Ditto.
	* libm/machine/spu/headers/nextafterf4.h: Ditto.
	* libm/machine/spu/headers/pow.h: Ditto.
	* libm/machine/spu/headers/powd2.h: Ditto.
	* libm/machine/spu/headers/powf.h: Ditto.
	* libm/machine/spu/headers/powf4.h: Ditto.
	* libm/machine/spu/headers/recipd2.h: Ditto.
	* libm/machine/spu/headers/recipf4.h: Ditto.
	* libm/machine/spu/headers/rintf.h: Ditto.
	* libm/machine/spu/headers/rintf4.h: Ditto.
	* libm/machine/spu/headers/scalbnf4.h: Ditto.
	* libm/machine/spu/headers/signbit.h: Ditto.
	* libm/machine/spu/headers/signbitd2.h: Ditto.
	* libm/machine/spu/headers/simdmath.h: Ditto.
	* libm/machine/spu/headers/sin.h: Ditto.
	* libm/machine/spu/headers/sincos.h: Ditto.
	* libm/machine/spu/headers/sincosd2.h: Ditto.
	* libm/machine/spu/headers/sincosf.h: Ditto.
	* libm/machine/spu/headers/sincosf4.h: Ditto.
	* libm/machine/spu/headers/sind2.h: Ditto.
	* libm/machine/spu/headers/sinf.h: Ditto.
	* libm/machine/spu/headers/sinf4.h: Ditto.
	* libm/machine/spu/headers/sinh.h: Ditto.
	* libm/machine/spu/headers/sinhd2.h: Ditto.
	* libm/machine/spu/headers/sinhf.h: Ditto.
	* libm/machine/spu/headers/sinhf4.h: Ditto.
	* libm/machine/spu/headers/sqrtd2.h: Ditto.
	* libm/machine/spu/headers/sqrtf.h: Ditto.
	* libm/machine/spu/headers/sqrtf4.h: Ditto.
	* libm/machine/spu/headers/tan.h: Ditto.
	* libm/machine/spu/headers/tand2.h: Ditto.
	* libm/machine/spu/headers/tanf.h: Ditto.
	* libm/machine/spu/headers/tanf4.h: Ditto.
	* libm/machine/spu/headers/tanh.h: Ditto.
	* libm/machine/spu/headers/tanhd2.h: Ditto.
	* libm/machine/spu/headers/tanhf.h: Ditto.
	* libm/machine/spu/headers/tanhf4.h: Ditto.
	* libm/machine/spu/headers/tgamma.h: Ditto.
	* libm/machine/spu/headers/tgammad2.h: Ditto.
	* libm/machine/spu/headers/tgammaf.h: Ditto.
	* libm/machine/spu/headers/tgammaf4.h: Ditto.
	* libm/machine/spu/headers/truncd2.h: Ditto.
	* libm/machine/spu/headers/truncf4.h: Ditto.
	* libm/machine/spu/log2.c: Ditto.
	* libm/machine/spu/s_asinh.c: Ditto.
	* libm/machine/spu/s_atan.c: Ditto.
	* libm/machine/spu/s_cos.c: Ditto.
	* libm/machine/spu/s_erf.c: Ditto.
	* libm/machine/spu/s_expm1.c: Ditto.
	* libm/machine/spu/s_isnan.c: Ditto.
	* libm/machine/spu/s_log1p.c: Ditto.
	* libm/machine/spu/s_nextafter.c: Ditto.
	* libm/machine/spu/s_sin.c: Ditto.
	* libm/machine/spu/s_tan.c: Ditto.
	* libm/machine/spu/s_tanh.c: Ditto.
	* libm/machine/spu/sf_asinh.c: Ditto.
	* libm/machine/spu/sf_cos.c: Ditto.
	* libm/machine/spu/sf_erf.c: Ditto.
	* libm/machine/spu/sf_expm1.c: Ditto.
	* libm/machine/spu/sf_log1p.c: Ditto.
	* libm/machine/spu/sf_logb.c: Ditto.
	* libm/machine/spu/sf_nearbyint.c: Ditto.
	* libm/machine/spu/sf_nextafter.c: Ditto.
	* libm/machine/spu/sf_rint.c: Ditto.
	* libm/machine/spu/sf_sin.c: Ditto.
	* libm/machine/spu/sf_tan.c: Ditto.
	* libm/machine/spu/sf_tanh.c: Ditto.
	* libm/machine/spu/w_acos.c: Ditto.
	* libm/machine/spu/w_acosh.c: Ditto.
	* libm/machine/spu/w_asin.c: Ditto.
	* libm/machine/spu/w_atan2.c: Ditto.
	* libm/machine/spu/w_atanh.c: Ditto.
	* libm/machine/spu/w_cosh.c: Ditto.
	* libm/machine/spu/w_exp.c: Ditto.
	* libm/machine/spu/w_exp2.c: Ditto.
	* libm/machine/spu/w_hypot.c: Ditto.
	* libm/machine/spu/w_lgamma.c: Ditto.
	* libm/machine/spu/w_log.c: Ditto.
	* libm/machine/spu/w_log10.c: Ditto.
	* libm/machine/spu/w_pow.c: Ditto.
	* libm/machine/spu/w_sincos.c: Ditto.
	* libm/machine/spu/w_sinh.c: Ditto.
	* libm/machine/spu/w_tgamma.c: Ditto.
	* libm/machine/spu/wf_acos.c: Ditto.
	* libm/machine/spu/wf_acosh.c: Ditto.
	* libm/machine/spu/wf_asin.c: Ditto.
	* libm/machine/spu/wf_atan2.c: Ditto.
	* libm/machine/spu/wf_atanh.c: Ditto.
	* libm/machine/spu/wf_cosh.c: Ditto.
	* libm/machine/spu/wf_hypot.c: Ditto.
	* libm/machine/spu/wf_lgamma.c: Ditto.
	* libm/machine/spu/wf_pow.c: Ditto.
	* libm/machine/spu/wf_sincos.c: Ditto.
	* libm/machine/spu/wf_sinh.c: Ditto.
	* libm/machine/spu/wf_sqrt.c: Ditto.
	* libm/machine/spu/wf_tgamma.c: Ditto.
This commit is contained in:
Jeff Johnston 2007-09-28 18:44:24 +00:00
parent 505ea71432
commit e30a7b84aa
190 changed files with 12452 additions and 37 deletions

View File

@ -1,3 +1,195 @@
2007-09-28 Patrick Mansfield <patmans@us.ibm.com>
* libm/machine/spu/Makefile.am: Add new C files.
* libm/machine/spu/Makefile.in: Regenerate.
* libm/machine/spu/headers/acos.h: New file.
* libm/machine/spu/headers/acosd2.h: Ditto.
* libm/machine/spu/headers/acosf.h: Ditto.
* libm/machine/spu/headers/acosf4.h: Ditto.
* libm/machine/spu/headers/acosh.h: Ditto.
* libm/machine/spu/headers/acoshd2.h: Ditto.
* libm/machine/spu/headers/acoshf.h: Ditto.
* libm/machine/spu/headers/acoshf4.h: Ditto.
* libm/machine/spu/headers/asin.h: Ditto.
* libm/machine/spu/headers/asind2.h: Ditto.
* libm/machine/spu/headers/asinf.h: Ditto.
* libm/machine/spu/headers/asinf4.h: Ditto.
* libm/machine/spu/headers/asinh.h: Ditto.
* libm/machine/spu/headers/asinhd2.h: Ditto.
* libm/machine/spu/headers/asinhf.h: Ditto.
* libm/machine/spu/headers/asinhf4.h: Ditto.
* libm/machine/spu/headers/atan.h: Ditto.
* libm/machine/spu/headers/atan2.h: Ditto.
* libm/machine/spu/headers/atan2d2.h: Ditto.
* libm/machine/spu/headers/atan2f.h: Ditto.
* libm/machine/spu/headers/atan2f4.h: Ditto.
* libm/machine/spu/headers/atand2.h: Ditto.
* libm/machine/spu/headers/atanf4.h: Ditto.
* libm/machine/spu/headers/atanh.h: Ditto.
* libm/machine/spu/headers/atanhd2.h: Ditto.
* libm/machine/spu/headers/atanhf.h: Ditto.
* libm/machine/spu/headers/atanhf4.h: Ditto.
* libm/machine/spu/headers/cos.h: Ditto.
* libm/machine/spu/headers/cos_sin.h: Ditto.
* libm/machine/spu/headers/cosd2.h: Ditto.
* libm/machine/spu/headers/cosf.h: Ditto.
* libm/machine/spu/headers/cosf4.h: Ditto.
* libm/machine/spu/headers/cosh.h: Ditto.
* libm/machine/spu/headers/coshd2.h: Ditto.
* libm/machine/spu/headers/coshf.h: Ditto.
* libm/machine/spu/headers/coshf4.h: Ditto.
* libm/machine/spu/headers/divd2.h: Ditto.
* libm/machine/spu/headers/divf4.h: Ditto.
* libm/machine/spu/headers/erf.h: Ditto.
* libm/machine/spu/headers/erf_utils.h: Ditto.
* libm/machine/spu/headers/erfc.h: Ditto.
* libm/machine/spu/headers/erfcd2.h: Ditto.
* libm/machine/spu/headers/erfcf.h: Ditto.
* libm/machine/spu/headers/erfcf4.h: Ditto.
* libm/machine/spu/headers/erfd2.h: Ditto.
* libm/machine/spu/headers/erff.h: Ditto.
* libm/machine/spu/headers/erff4.h: Ditto.
* libm/machine/spu/headers/exp.h: Ditto.
* libm/machine/spu/headers/exp2.h: Ditto.
* libm/machine/spu/headers/exp2d2.h: Ditto.
* libm/machine/spu/headers/exp2f4.h: Ditto.
* libm/machine/spu/headers/expd2.h: Ditto.
* libm/machine/spu/headers/expf4.h: Ditto.
* libm/machine/spu/headers/expm1.h: Ditto.
* libm/machine/spu/headers/expm1d2.h: Ditto.
* libm/machine/spu/headers/expm1f.h: Ditto.
* libm/machine/spu/headers/expm1f4.h: Ditto.
* libm/machine/spu/headers/floord2.h: Ditto.
* libm/machine/spu/headers/floorf4.h: Ditto.
* libm/machine/spu/headers/hypot.h: Ditto.
* libm/machine/spu/headers/hypotd2.h: Ditto.
* libm/machine/spu/headers/hypotf.h: Ditto.
* libm/machine/spu/headers/hypotf4.h: Ditto.
* libm/machine/spu/headers/isnan.h: Ditto.
* libm/machine/spu/headers/isnand2.h: Ditto.
* libm/machine/spu/headers/isnanf.h: Ditto.
* libm/machine/spu/headers/isnanf4.h: Ditto.
* libm/machine/spu/headers/ldexpd2.h: Ditto.
* libm/machine/spu/headers/ldexpf4.h: Ditto.
* libm/machine/spu/headers/lgamma.h: Ditto.
* libm/machine/spu/headers/lgammad2.h: Ditto.
* libm/machine/spu/headers/lgammaf.h: Ditto.
* libm/machine/spu/headers/lgammaf4.h: Ditto.
* libm/machine/spu/headers/log.h: Ditto.
* libm/machine/spu/headers/log10.h: Ditto.
* libm/machine/spu/headers/log10d2.h: Ditto.
* libm/machine/spu/headers/log1p.h: Ditto.
* libm/machine/spu/headers/log1pd2.h: Ditto.
* libm/machine/spu/headers/log1pf.h: Ditto.
* libm/machine/spu/headers/log1pf4.h: Ditto.
* libm/machine/spu/headers/log2.h: Ditto.
* libm/machine/spu/headers/log2d2.h: Ditto.
* libm/machine/spu/headers/log2f4.h: Ditto.
* libm/machine/spu/headers/logbf.h: Ditto.
* libm/machine/spu/headers/logbf4.h: Ditto.
* libm/machine/spu/headers/logd2.h: Ditto.
* libm/machine/spu/headers/logf4.h: Ditto.
* libm/machine/spu/headers/nearbyintf.h: Ditto.
* libm/machine/spu/headers/nearbyintf4.h: Ditto.
* libm/machine/spu/headers/nextafter.h: Ditto.
* libm/machine/spu/headers/nextafterd2.h: Ditto.
* libm/machine/spu/headers/nextafterf.h: Ditto.
* libm/machine/spu/headers/nextafterf4.h: Ditto.
* libm/machine/spu/headers/pow.h: Ditto.
* libm/machine/spu/headers/powd2.h: Ditto.
* libm/machine/spu/headers/powf.h: Ditto.
* libm/machine/spu/headers/powf4.h: Ditto.
* libm/machine/spu/headers/recipd2.h: Ditto.
* libm/machine/spu/headers/recipf4.h: Ditto.
* libm/machine/spu/headers/rintf.h: Ditto.
* libm/machine/spu/headers/rintf4.h: Ditto.
* libm/machine/spu/headers/scalbnf4.h: Ditto.
* libm/machine/spu/headers/signbit.h: Ditto.
* libm/machine/spu/headers/signbitd2.h: Ditto.
* libm/machine/spu/headers/simdmath.h: Ditto.
* libm/machine/spu/headers/sin.h: Ditto.
* libm/machine/spu/headers/sincos.h: Ditto.
* libm/machine/spu/headers/sincosd2.h: Ditto.
* libm/machine/spu/headers/sincosf.h: Ditto.
* libm/machine/spu/headers/sincosf4.h: Ditto.
* libm/machine/spu/headers/sind2.h: Ditto.
* libm/machine/spu/headers/sinf.h: Ditto.
* libm/machine/spu/headers/sinf4.h: Ditto.
* libm/machine/spu/headers/sinh.h: Ditto.
* libm/machine/spu/headers/sinhd2.h: Ditto.
* libm/machine/spu/headers/sinhf.h: Ditto.
* libm/machine/spu/headers/sinhf4.h: Ditto.
* libm/machine/spu/headers/sqrtd2.h: Ditto.
* libm/machine/spu/headers/sqrtf.h: Ditto.
* libm/machine/spu/headers/sqrtf4.h: Ditto.
* libm/machine/spu/headers/tan.h: Ditto.
* libm/machine/spu/headers/tand2.h: Ditto.
* libm/machine/spu/headers/tanf.h: Ditto.
* libm/machine/spu/headers/tanf4.h: Ditto.
* libm/machine/spu/headers/tanh.h: Ditto.
* libm/machine/spu/headers/tanhd2.h: Ditto.
* libm/machine/spu/headers/tanhf.h: Ditto.
* libm/machine/spu/headers/tanhf4.h: Ditto.
* libm/machine/spu/headers/tgamma.h: Ditto.
* libm/machine/spu/headers/tgammad2.h: Ditto.
* libm/machine/spu/headers/tgammaf.h: Ditto.
* libm/machine/spu/headers/tgammaf4.h: Ditto.
* libm/machine/spu/headers/truncd2.h: Ditto.
* libm/machine/spu/headers/truncf4.h: Ditto.
* libm/machine/spu/log2.c: Ditto.
* libm/machine/spu/s_asinh.c: Ditto.
* libm/machine/spu/s_atan.c: Ditto.
* libm/machine/spu/s_cos.c: Ditto.
* libm/machine/spu/s_erf.c: Ditto.
* libm/machine/spu/s_expm1.c: Ditto.
* libm/machine/spu/s_isnan.c: Ditto.
* libm/machine/spu/s_log1p.c: Ditto.
* libm/machine/spu/s_nextafter.c: Ditto.
* libm/machine/spu/s_sin.c: Ditto.
* libm/machine/spu/s_tan.c: Ditto.
* libm/machine/spu/s_tanh.c: Ditto.
* libm/machine/spu/sf_asinh.c: Ditto.
* libm/machine/spu/sf_cos.c: Ditto.
* libm/machine/spu/sf_erf.c: Ditto.
* libm/machine/spu/sf_expm1.c: Ditto.
* libm/machine/spu/sf_log1p.c: Ditto.
* libm/machine/spu/sf_logb.c: Ditto.
* libm/machine/spu/sf_nearbyint.c: Ditto.
* libm/machine/spu/sf_nextafter.c: Ditto.
* libm/machine/spu/sf_rint.c: Ditto.
* libm/machine/spu/sf_sin.c: Ditto.
* libm/machine/spu/sf_tan.c: Ditto.
* libm/machine/spu/sf_tanh.c: Ditto.
* libm/machine/spu/w_acos.c: Ditto.
* libm/machine/spu/w_acosh.c: Ditto.
* libm/machine/spu/w_asin.c: Ditto.
* libm/machine/spu/w_atan2.c: Ditto.
* libm/machine/spu/w_atanh.c: Ditto.
* libm/machine/spu/w_cosh.c: Ditto.
* libm/machine/spu/w_exp.c: Ditto.
* libm/machine/spu/w_exp2.c: Ditto.
* libm/machine/spu/w_hypot.c: Ditto.
* libm/machine/spu/w_lgamma.c: Ditto.
* libm/machine/spu/w_log.c: Ditto.
* libm/machine/spu/w_log10.c: Ditto.
* libm/machine/spu/w_pow.c: Ditto.
* libm/machine/spu/w_sincos.c: Ditto.
* libm/machine/spu/w_sinh.c: Ditto.
* libm/machine/spu/w_tgamma.c: Ditto.
* libm/machine/spu/wf_acos.c: Ditto.
* libm/machine/spu/wf_acosh.c: Ditto.
* libm/machine/spu/wf_asin.c: Ditto.
* libm/machine/spu/wf_atan2.c: Ditto.
* libm/machine/spu/wf_atanh.c: Ditto.
* libm/machine/spu/wf_cosh.c: Ditto.
* libm/machine/spu/wf_hypot.c: Ditto.
* libm/machine/spu/wf_lgamma.c: Ditto.
* libm/machine/spu/wf_pow.c: Ditto.
* libm/machine/spu/wf_sincos.c: Ditto.
* libm/machine/spu/wf_sinh.c: Ditto.
* libm/machine/spu/wf_sqrt.c: Ditto.
* libm/machine/spu/wf_tgamma.c: Ditto.
2007-09-28 Patrick Mansfield <patmans@us.ibm.com>
* configure.host: Define _POSIX_MODE to match the existing vector

View File

@ -9,17 +9,25 @@ LIB_SOURCES = \
feclearexcept.c fe_dfl_env.c fegetenv.c fegetexceptflag.c \
fegetround.c feholdexcept.c feraiseexcept.c fesetenv.c \
fesetexceptflag.c fesetround.c fetestexcept.c feupdateenv.c \
llrint.c llrintf.c llround.c llroundf.c log2f.c s_cbrt.c s_ceil.c \
s_copysign.c s_fabs.c sf_atan.c sf_cbrt.c sf_ceil.c sf_copysign.c \
s_fdim.c sf_fabs.c sf_fdim.c sf_finite.c sf_floor.c sf_fma.c \
sf_fmax.c sf_fmin.c sf_fpclassify.c sf_frexp.c sf_ilogb.c \
sf_isinf.c sf_isinff.c sf_isnan.c sf_isnanf.c sf_ldexp.c s_floor.c \
llrint.c llrintf.c llround.c llroundf.c log2.c log2f.c s_asinh.c \
s_atan.c s_cbrt.c s_ceil.c s_copysign.c s_cos.c s_erf.c s_expm1.c \
s_fabs.c sf_asinh.c sf_atan.c sf_cbrt.c sf_ceil.c sf_copysign.c \
sf_cos.c s_fdim.c sf_erf.c sf_expm1.c sf_fabs.c sf_fdim.c \
sf_finite.c sf_floor.c sf_fma.c sf_fmax.c sf_fmin.c \
sf_fpclassify.c sf_frexp.c sf_ilogb.c sf_isinf.c sf_isinff.c \
sf_isnan.c sf_isnanf.c sf_ldexp.c sf_log1p.c sf_logb.c s_floor.c \
sf_lrint.c sf_lround.c s_fma.c s_fmax.c s_fmin.c sf_nan.c \
sf_remquo.c s_frexp.c sf_round.c sf_scalbn.c sf_trunc.c s_ilogb.c \
s_ldexp.c s_lrint.c s_lround.c s_nearbyint.c s_remquo.c s_rint.c \
s_round.c s_scalbn.c s_trunc.c wf_exp2.c wf_exp.c wf_fmod.c \
wf_log10.c wf_log.c w_fmod.c wf_remainder.c w_remainder.c \
w_sqrt.c
sf_nearbyint.c sf_nextafter.c sf_remquo.c s_frexp.c sf_rint.c \
sf_round.c sf_scalbn.c sf_sin.c sf_tan.c sf_tanh.c sf_trunc.c \
s_ilogb.c s_isnan.c s_ldexp.c s_log1p.c s_lrint.c s_lround.c \
s_nearbyint.c s_nextafter.c s_remquo.c s_rint.c s_round.c \
s_scalbn.c s_sin.c s_tan.c s_tanh.c s_trunc.c w_acos.c w_acosh.c \
w_asin.c w_atan2.c w_atanh.c w_cosh.c w_exp2.c w_exp.c wf_acos.c \
wf_acosh.c wf_asin.c wf_atan2.c wf_atanh.c wf_cosh.c wf_exp2.c \
wf_exp.c wf_fmod.c wf_hypot.c wf_lgamma.c wf_log10.c wf_log.c \
w_fmod.c wf_pow.c wf_remainder.c wf_sincos.c wf_sinh.c wf_sqrt.c \
wf_tgamma.c w_hypot.c w_lgamma.c w_log10.c w_log.c w_pow.c \
w_remainder.c w_sincos.c w_sinh.c w_sqrt.c w_tgamma.c
noinst_LIBRARIES = lib.a
lib_a_SOURCES = $(LIB_SOURCES)

View File

@ -77,7 +77,34 @@ DIST_COMMON = $(srcdir)/../../../Makefile.shared \
$(srcdir)/../../../../compile $(srcdir)/../../../../compile \
$(srcdir)/../../../../compile $(srcdir)/../../../../compile \
$(srcdir)/../../../../compile $(srcdir)/../../../../compile \
$(srcdir)/../../../../compile $(srcdir)/../../../../compile
$(srcdir)/../../../../compile $(srcdir)/../../../../compile \
$(srcdir)/../../../../compile $(srcdir)/../../../../compile \
$(srcdir)/../../../../compile $(srcdir)/../../../../compile \
$(srcdir)/../../../../compile $(srcdir)/../../../../compile \
$(srcdir)/../../../../compile $(srcdir)/../../../../compile \
$(srcdir)/../../../../compile $(srcdir)/../../../../compile \
$(srcdir)/../../../../compile $(srcdir)/../../../../compile \
$(srcdir)/../../../../compile $(srcdir)/../../../../compile \
$(srcdir)/../../../../compile $(srcdir)/../../../../compile \
$(srcdir)/../../../../compile $(srcdir)/../../../../compile \
$(srcdir)/../../../../compile $(srcdir)/../../../../compile \
$(srcdir)/../../../../compile $(srcdir)/../../../../compile \
$(srcdir)/../../../../compile $(srcdir)/../../../../compile \
$(srcdir)/../../../../compile $(srcdir)/../../../../compile \
$(srcdir)/../../../../compile $(srcdir)/../../../../compile \
$(srcdir)/../../../../compile $(srcdir)/../../../../compile \
$(srcdir)/../../../../compile $(srcdir)/../../../../compile \
$(srcdir)/../../../../compile $(srcdir)/../../../../compile \
$(srcdir)/../../../../compile $(srcdir)/../../../../compile \
$(srcdir)/../../../../compile $(srcdir)/../../../../compile \
$(srcdir)/../../../../compile $(srcdir)/../../../../compile \
$(srcdir)/../../../../compile $(srcdir)/../../../../compile \
$(srcdir)/../../../../compile $(srcdir)/../../../../compile \
$(srcdir)/../../../../compile $(srcdir)/../../../../compile \
$(srcdir)/../../../../compile $(srcdir)/../../../../compile \
$(srcdir)/../../../../compile $(srcdir)/../../../../compile \
$(srcdir)/../../../../compile $(srcdir)/../../../../compile \
$(srcdir)/../../../../compile
subdir = .
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/../../../acinclude.m4 \
@ -100,12 +127,17 @@ am__objects_1 = lib_a-feclearexcept.$(OBJEXT) \
lib_a-fesetround.$(OBJEXT) lib_a-fetestexcept.$(OBJEXT) \
lib_a-feupdateenv.$(OBJEXT) lib_a-llrint.$(OBJEXT) \
lib_a-llrintf.$(OBJEXT) lib_a-llround.$(OBJEXT) \
lib_a-llroundf.$(OBJEXT) lib_a-log2f.$(OBJEXT) \
lib_a-s_cbrt.$(OBJEXT) lib_a-s_ceil.$(OBJEXT) \
lib_a-s_copysign.$(OBJEXT) lib_a-s_fabs.$(OBJEXT) \
lib_a-sf_atan.$(OBJEXT) lib_a-sf_cbrt.$(OBJEXT) \
lib_a-sf_ceil.$(OBJEXT) lib_a-sf_copysign.$(OBJEXT) \
lib_a-s_fdim.$(OBJEXT) lib_a-sf_fabs.$(OBJEXT) \
lib_a-llroundf.$(OBJEXT) lib_a-log2.$(OBJEXT) \
lib_a-log2f.$(OBJEXT) lib_a-s_asinh.$(OBJEXT) \
lib_a-s_atan.$(OBJEXT) lib_a-s_cbrt.$(OBJEXT) \
lib_a-s_ceil.$(OBJEXT) lib_a-s_copysign.$(OBJEXT) \
lib_a-s_cos.$(OBJEXT) lib_a-s_erf.$(OBJEXT) \
lib_a-s_expm1.$(OBJEXT) lib_a-s_fabs.$(OBJEXT) \
lib_a-sf_asinh.$(OBJEXT) lib_a-sf_atan.$(OBJEXT) \
lib_a-sf_cbrt.$(OBJEXT) lib_a-sf_ceil.$(OBJEXT) \
lib_a-sf_copysign.$(OBJEXT) lib_a-sf_cos.$(OBJEXT) \
lib_a-s_fdim.$(OBJEXT) lib_a-sf_erf.$(OBJEXT) \
lib_a-sf_expm1.$(OBJEXT) lib_a-sf_fabs.$(OBJEXT) \
lib_a-sf_fdim.$(OBJEXT) lib_a-sf_finite.$(OBJEXT) \
lib_a-sf_floor.$(OBJEXT) lib_a-sf_fma.$(OBJEXT) \
lib_a-sf_fmax.$(OBJEXT) lib_a-sf_fmin.$(OBJEXT) \
@ -113,22 +145,43 @@ am__objects_1 = lib_a-feclearexcept.$(OBJEXT) \
lib_a-sf_ilogb.$(OBJEXT) lib_a-sf_isinf.$(OBJEXT) \
lib_a-sf_isinff.$(OBJEXT) lib_a-sf_isnan.$(OBJEXT) \
lib_a-sf_isnanf.$(OBJEXT) lib_a-sf_ldexp.$(OBJEXT) \
lib_a-sf_log1p.$(OBJEXT) lib_a-sf_logb.$(OBJEXT) \
lib_a-s_floor.$(OBJEXT) lib_a-sf_lrint.$(OBJEXT) \
lib_a-sf_lround.$(OBJEXT) lib_a-s_fma.$(OBJEXT) \
lib_a-s_fmax.$(OBJEXT) lib_a-s_fmin.$(OBJEXT) \
lib_a-sf_nan.$(OBJEXT) lib_a-sf_remquo.$(OBJEXT) \
lib_a-s_frexp.$(OBJEXT) lib_a-sf_round.$(OBJEXT) \
lib_a-sf_scalbn.$(OBJEXT) lib_a-sf_trunc.$(OBJEXT) \
lib_a-s_ilogb.$(OBJEXT) lib_a-s_ldexp.$(OBJEXT) \
lib_a-sf_nan.$(OBJEXT) lib_a-sf_nearbyint.$(OBJEXT) \
lib_a-sf_nextafter.$(OBJEXT) lib_a-sf_remquo.$(OBJEXT) \
lib_a-s_frexp.$(OBJEXT) lib_a-sf_rint.$(OBJEXT) \
lib_a-sf_round.$(OBJEXT) lib_a-sf_scalbn.$(OBJEXT) \
lib_a-sf_sin.$(OBJEXT) lib_a-sf_tan.$(OBJEXT) \
lib_a-sf_tanh.$(OBJEXT) lib_a-sf_trunc.$(OBJEXT) \
lib_a-s_ilogb.$(OBJEXT) lib_a-s_isnan.$(OBJEXT) \
lib_a-s_ldexp.$(OBJEXT) lib_a-s_log1p.$(OBJEXT) \
lib_a-s_lrint.$(OBJEXT) lib_a-s_lround.$(OBJEXT) \
lib_a-s_nearbyint.$(OBJEXT) lib_a-s_remquo.$(OBJEXT) \
lib_a-s_rint.$(OBJEXT) lib_a-s_round.$(OBJEXT) \
lib_a-s_scalbn.$(OBJEXT) lib_a-s_trunc.$(OBJEXT) \
lib_a-s_nearbyint.$(OBJEXT) lib_a-s_nextafter.$(OBJEXT) \
lib_a-s_remquo.$(OBJEXT) lib_a-s_rint.$(OBJEXT) \
lib_a-s_round.$(OBJEXT) lib_a-s_scalbn.$(OBJEXT) \
lib_a-s_sin.$(OBJEXT) lib_a-s_tan.$(OBJEXT) \
lib_a-s_tanh.$(OBJEXT) lib_a-s_trunc.$(OBJEXT) \
lib_a-w_acos.$(OBJEXT) lib_a-w_acosh.$(OBJEXT) \
lib_a-w_asin.$(OBJEXT) lib_a-w_atan2.$(OBJEXT) \
lib_a-w_atanh.$(OBJEXT) lib_a-w_cosh.$(OBJEXT) \
lib_a-w_exp2.$(OBJEXT) lib_a-w_exp.$(OBJEXT) \
lib_a-wf_acos.$(OBJEXT) lib_a-wf_acosh.$(OBJEXT) \
lib_a-wf_asin.$(OBJEXT) lib_a-wf_atan2.$(OBJEXT) \
lib_a-wf_atanh.$(OBJEXT) lib_a-wf_cosh.$(OBJEXT) \
lib_a-wf_exp2.$(OBJEXT) lib_a-wf_exp.$(OBJEXT) \
lib_a-wf_fmod.$(OBJEXT) lib_a-wf_log10.$(OBJEXT) \
lib_a-wf_fmod.$(OBJEXT) lib_a-wf_hypot.$(OBJEXT) \
lib_a-wf_lgamma.$(OBJEXT) lib_a-wf_log10.$(OBJEXT) \
lib_a-wf_log.$(OBJEXT) lib_a-w_fmod.$(OBJEXT) \
lib_a-wf_remainder.$(OBJEXT) lib_a-w_remainder.$(OBJEXT) \
lib_a-w_sqrt.$(OBJEXT)
lib_a-wf_pow.$(OBJEXT) lib_a-wf_remainder.$(OBJEXT) \
lib_a-wf_sincos.$(OBJEXT) lib_a-wf_sinh.$(OBJEXT) \
lib_a-wf_sqrt.$(OBJEXT) lib_a-wf_tgamma.$(OBJEXT) \
lib_a-w_hypot.$(OBJEXT) lib_a-w_lgamma.$(OBJEXT) \
lib_a-w_log10.$(OBJEXT) lib_a-w_log.$(OBJEXT) \
lib_a-w_pow.$(OBJEXT) lib_a-w_remainder.$(OBJEXT) \
lib_a-w_sincos.$(OBJEXT) lib_a-w_sinh.$(OBJEXT) \
lib_a-w_sqrt.$(OBJEXT) lib_a-w_tgamma.$(OBJEXT)
am_lib_a_OBJECTS = $(am__objects_1)
lib_a_OBJECTS = $(am_lib_a_OBJECTS)
DEFAULT_INCLUDES = -I. -I$(srcdir)
@ -260,17 +313,25 @@ LIB_SOURCES = \
feclearexcept.c fe_dfl_env.c fegetenv.c fegetexceptflag.c \
fegetround.c feholdexcept.c feraiseexcept.c fesetenv.c \
fesetexceptflag.c fesetround.c fetestexcept.c feupdateenv.c \
llrint.c llrintf.c llround.c llroundf.c log2f.c s_cbrt.c s_ceil.c \
s_copysign.c s_fabs.c sf_atan.c sf_cbrt.c sf_ceil.c sf_copysign.c \
s_fdim.c sf_fabs.c sf_fdim.c sf_finite.c sf_floor.c sf_fma.c \
sf_fmax.c sf_fmin.c sf_fpclassify.c sf_frexp.c sf_ilogb.c \
sf_isinf.c sf_isinff.c sf_isnan.c sf_isnanf.c sf_ldexp.c s_floor.c \
llrint.c llrintf.c llround.c llroundf.c log2.c log2f.c s_asinh.c \
s_atan.c s_cbrt.c s_ceil.c s_copysign.c s_cos.c s_erf.c s_expm1.c \
s_fabs.c sf_asinh.c sf_atan.c sf_cbrt.c sf_ceil.c sf_copysign.c \
sf_cos.c s_fdim.c sf_erf.c sf_expm1.c sf_fabs.c sf_fdim.c \
sf_finite.c sf_floor.c sf_fma.c sf_fmax.c sf_fmin.c \
sf_fpclassify.c sf_frexp.c sf_ilogb.c sf_isinf.c sf_isinff.c \
sf_isnan.c sf_isnanf.c sf_ldexp.c sf_log1p.c sf_logb.c s_floor.c \
sf_lrint.c sf_lround.c s_fma.c s_fmax.c s_fmin.c sf_nan.c \
sf_remquo.c s_frexp.c sf_round.c sf_scalbn.c sf_trunc.c s_ilogb.c \
s_ldexp.c s_lrint.c s_lround.c s_nearbyint.c s_remquo.c s_rint.c \
s_round.c s_scalbn.c s_trunc.c wf_exp2.c wf_exp.c wf_fmod.c \
wf_log10.c wf_log.c w_fmod.c wf_remainder.c w_remainder.c \
w_sqrt.c
sf_nearbyint.c sf_nextafter.c sf_remquo.c s_frexp.c sf_rint.c \
sf_round.c sf_scalbn.c sf_sin.c sf_tan.c sf_tanh.c sf_trunc.c \
s_ilogb.c s_isnan.c s_ldexp.c s_log1p.c s_lrint.c s_lround.c \
s_nearbyint.c s_nextafter.c s_remquo.c s_rint.c s_round.c \
s_scalbn.c s_sin.c s_tan.c s_tanh.c s_trunc.c w_acos.c w_acosh.c \
w_asin.c w_atan2.c w_atanh.c w_cosh.c w_exp2.c w_exp.c wf_acos.c \
wf_acosh.c wf_asin.c wf_atan2.c wf_atanh.c wf_cosh.c wf_exp2.c \
wf_exp.c wf_fmod.c wf_hypot.c wf_lgamma.c wf_log10.c wf_log.c \
w_fmod.c wf_pow.c wf_remainder.c wf_sincos.c wf_sinh.c wf_sqrt.c \
wf_tgamma.c w_hypot.c w_lgamma.c w_log10.c w_log.c w_pow.c \
w_remainder.c w_sincos.c w_sinh.c w_sqrt.c w_tgamma.c
noinst_LIBRARIES = lib.a
lib_a_SOURCES = $(LIB_SOURCES)
@ -432,12 +493,30 @@ lib_a-llroundf.o: llroundf.c
lib_a-llroundf.obj: llroundf.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-llroundf.obj `if test -f 'llroundf.c'; then $(CYGPATH_W) 'llroundf.c'; else $(CYGPATH_W) '$(srcdir)/llroundf.c'; fi`
lib_a-log2.o: log2.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-log2.o `test -f 'log2.c' || echo '$(srcdir)/'`log2.c
lib_a-log2.obj: log2.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-log2.obj `if test -f 'log2.c'; then $(CYGPATH_W) 'log2.c'; else $(CYGPATH_W) '$(srcdir)/log2.c'; fi`
lib_a-log2f.o: log2f.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-log2f.o `test -f 'log2f.c' || echo '$(srcdir)/'`log2f.c
lib_a-log2f.obj: log2f.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-log2f.obj `if test -f 'log2f.c'; then $(CYGPATH_W) 'log2f.c'; else $(CYGPATH_W) '$(srcdir)/log2f.c'; fi`
lib_a-s_asinh.o: s_asinh.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-s_asinh.o `test -f 's_asinh.c' || echo '$(srcdir)/'`s_asinh.c
lib_a-s_asinh.obj: s_asinh.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-s_asinh.obj `if test -f 's_asinh.c'; then $(CYGPATH_W) 's_asinh.c'; else $(CYGPATH_W) '$(srcdir)/s_asinh.c'; fi`
lib_a-s_atan.o: s_atan.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-s_atan.o `test -f 's_atan.c' || echo '$(srcdir)/'`s_atan.c
lib_a-s_atan.obj: s_atan.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-s_atan.obj `if test -f 's_atan.c'; then $(CYGPATH_W) 's_atan.c'; else $(CYGPATH_W) '$(srcdir)/s_atan.c'; fi`
lib_a-s_cbrt.o: s_cbrt.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-s_cbrt.o `test -f 's_cbrt.c' || echo '$(srcdir)/'`s_cbrt.c
@ -456,12 +535,36 @@ lib_a-s_copysign.o: s_copysign.c
lib_a-s_copysign.obj: s_copysign.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-s_copysign.obj `if test -f 's_copysign.c'; then $(CYGPATH_W) 's_copysign.c'; else $(CYGPATH_W) '$(srcdir)/s_copysign.c'; fi`
lib_a-s_cos.o: s_cos.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-s_cos.o `test -f 's_cos.c' || echo '$(srcdir)/'`s_cos.c
lib_a-s_cos.obj: s_cos.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-s_cos.obj `if test -f 's_cos.c'; then $(CYGPATH_W) 's_cos.c'; else $(CYGPATH_W) '$(srcdir)/s_cos.c'; fi`
lib_a-s_erf.o: s_erf.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-s_erf.o `test -f 's_erf.c' || echo '$(srcdir)/'`s_erf.c
lib_a-s_erf.obj: s_erf.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-s_erf.obj `if test -f 's_erf.c'; then $(CYGPATH_W) 's_erf.c'; else $(CYGPATH_W) '$(srcdir)/s_erf.c'; fi`
lib_a-s_expm1.o: s_expm1.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-s_expm1.o `test -f 's_expm1.c' || echo '$(srcdir)/'`s_expm1.c
lib_a-s_expm1.obj: s_expm1.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-s_expm1.obj `if test -f 's_expm1.c'; then $(CYGPATH_W) 's_expm1.c'; else $(CYGPATH_W) '$(srcdir)/s_expm1.c'; fi`
lib_a-s_fabs.o: s_fabs.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-s_fabs.o `test -f 's_fabs.c' || echo '$(srcdir)/'`s_fabs.c
lib_a-s_fabs.obj: s_fabs.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-s_fabs.obj `if test -f 's_fabs.c'; then $(CYGPATH_W) 's_fabs.c'; else $(CYGPATH_W) '$(srcdir)/s_fabs.c'; fi`
lib_a-sf_asinh.o: sf_asinh.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-sf_asinh.o `test -f 'sf_asinh.c' || echo '$(srcdir)/'`sf_asinh.c
lib_a-sf_asinh.obj: sf_asinh.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-sf_asinh.obj `if test -f 'sf_asinh.c'; then $(CYGPATH_W) 'sf_asinh.c'; else $(CYGPATH_W) '$(srcdir)/sf_asinh.c'; fi`
lib_a-sf_atan.o: sf_atan.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-sf_atan.o `test -f 'sf_atan.c' || echo '$(srcdir)/'`sf_atan.c
@ -486,12 +589,30 @@ lib_a-sf_copysign.o: sf_copysign.c
lib_a-sf_copysign.obj: sf_copysign.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-sf_copysign.obj `if test -f 'sf_copysign.c'; then $(CYGPATH_W) 'sf_copysign.c'; else $(CYGPATH_W) '$(srcdir)/sf_copysign.c'; fi`
lib_a-sf_cos.o: sf_cos.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-sf_cos.o `test -f 'sf_cos.c' || echo '$(srcdir)/'`sf_cos.c
lib_a-sf_cos.obj: sf_cos.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-sf_cos.obj `if test -f 'sf_cos.c'; then $(CYGPATH_W) 'sf_cos.c'; else $(CYGPATH_W) '$(srcdir)/sf_cos.c'; fi`
lib_a-s_fdim.o: s_fdim.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-s_fdim.o `test -f 's_fdim.c' || echo '$(srcdir)/'`s_fdim.c
lib_a-s_fdim.obj: s_fdim.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-s_fdim.obj `if test -f 's_fdim.c'; then $(CYGPATH_W) 's_fdim.c'; else $(CYGPATH_W) '$(srcdir)/s_fdim.c'; fi`
lib_a-sf_erf.o: sf_erf.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-sf_erf.o `test -f 'sf_erf.c' || echo '$(srcdir)/'`sf_erf.c
lib_a-sf_erf.obj: sf_erf.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-sf_erf.obj `if test -f 'sf_erf.c'; then $(CYGPATH_W) 'sf_erf.c'; else $(CYGPATH_W) '$(srcdir)/sf_erf.c'; fi`
lib_a-sf_expm1.o: sf_expm1.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-sf_expm1.o `test -f 'sf_expm1.c' || echo '$(srcdir)/'`sf_expm1.c
lib_a-sf_expm1.obj: sf_expm1.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-sf_expm1.obj `if test -f 'sf_expm1.c'; then $(CYGPATH_W) 'sf_expm1.c'; else $(CYGPATH_W) '$(srcdir)/sf_expm1.c'; fi`
lib_a-sf_fabs.o: sf_fabs.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-sf_fabs.o `test -f 'sf_fabs.c' || echo '$(srcdir)/'`sf_fabs.c
@ -582,6 +703,18 @@ lib_a-sf_ldexp.o: sf_ldexp.c
lib_a-sf_ldexp.obj: sf_ldexp.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-sf_ldexp.obj `if test -f 'sf_ldexp.c'; then $(CYGPATH_W) 'sf_ldexp.c'; else $(CYGPATH_W) '$(srcdir)/sf_ldexp.c'; fi`
lib_a-sf_log1p.o: sf_log1p.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-sf_log1p.o `test -f 'sf_log1p.c' || echo '$(srcdir)/'`sf_log1p.c
lib_a-sf_log1p.obj: sf_log1p.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-sf_log1p.obj `if test -f 'sf_log1p.c'; then $(CYGPATH_W) 'sf_log1p.c'; else $(CYGPATH_W) '$(srcdir)/sf_log1p.c'; fi`
lib_a-sf_logb.o: sf_logb.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-sf_logb.o `test -f 'sf_logb.c' || echo '$(srcdir)/'`sf_logb.c
lib_a-sf_logb.obj: sf_logb.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-sf_logb.obj `if test -f 'sf_logb.c'; then $(CYGPATH_W) 'sf_logb.c'; else $(CYGPATH_W) '$(srcdir)/sf_logb.c'; fi`
lib_a-s_floor.o: s_floor.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-s_floor.o `test -f 's_floor.c' || echo '$(srcdir)/'`s_floor.c
@ -624,6 +757,18 @@ lib_a-sf_nan.o: sf_nan.c
lib_a-sf_nan.obj: sf_nan.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-sf_nan.obj `if test -f 'sf_nan.c'; then $(CYGPATH_W) 'sf_nan.c'; else $(CYGPATH_W) '$(srcdir)/sf_nan.c'; fi`
lib_a-sf_nearbyint.o: sf_nearbyint.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-sf_nearbyint.o `test -f 'sf_nearbyint.c' || echo '$(srcdir)/'`sf_nearbyint.c
lib_a-sf_nearbyint.obj: sf_nearbyint.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-sf_nearbyint.obj `if test -f 'sf_nearbyint.c'; then $(CYGPATH_W) 'sf_nearbyint.c'; else $(CYGPATH_W) '$(srcdir)/sf_nearbyint.c'; fi`
lib_a-sf_nextafter.o: sf_nextafter.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-sf_nextafter.o `test -f 'sf_nextafter.c' || echo '$(srcdir)/'`sf_nextafter.c
lib_a-sf_nextafter.obj: sf_nextafter.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-sf_nextafter.obj `if test -f 'sf_nextafter.c'; then $(CYGPATH_W) 'sf_nextafter.c'; else $(CYGPATH_W) '$(srcdir)/sf_nextafter.c'; fi`
lib_a-sf_remquo.o: sf_remquo.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-sf_remquo.o `test -f 'sf_remquo.c' || echo '$(srcdir)/'`sf_remquo.c
@ -636,6 +781,12 @@ lib_a-s_frexp.o: s_frexp.c
lib_a-s_frexp.obj: s_frexp.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-s_frexp.obj `if test -f 's_frexp.c'; then $(CYGPATH_W) 's_frexp.c'; else $(CYGPATH_W) '$(srcdir)/s_frexp.c'; fi`
lib_a-sf_rint.o: sf_rint.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-sf_rint.o `test -f 'sf_rint.c' || echo '$(srcdir)/'`sf_rint.c
lib_a-sf_rint.obj: sf_rint.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-sf_rint.obj `if test -f 'sf_rint.c'; then $(CYGPATH_W) 'sf_rint.c'; else $(CYGPATH_W) '$(srcdir)/sf_rint.c'; fi`
lib_a-sf_round.o: sf_round.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-sf_round.o `test -f 'sf_round.c' || echo '$(srcdir)/'`sf_round.c
@ -648,6 +799,24 @@ lib_a-sf_scalbn.o: sf_scalbn.c
lib_a-sf_scalbn.obj: sf_scalbn.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-sf_scalbn.obj `if test -f 'sf_scalbn.c'; then $(CYGPATH_W) 'sf_scalbn.c'; else $(CYGPATH_W) '$(srcdir)/sf_scalbn.c'; fi`
lib_a-sf_sin.o: sf_sin.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-sf_sin.o `test -f 'sf_sin.c' || echo '$(srcdir)/'`sf_sin.c
lib_a-sf_sin.obj: sf_sin.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-sf_sin.obj `if test -f 'sf_sin.c'; then $(CYGPATH_W) 'sf_sin.c'; else $(CYGPATH_W) '$(srcdir)/sf_sin.c'; fi`
lib_a-sf_tan.o: sf_tan.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-sf_tan.o `test -f 'sf_tan.c' || echo '$(srcdir)/'`sf_tan.c
lib_a-sf_tan.obj: sf_tan.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-sf_tan.obj `if test -f 'sf_tan.c'; then $(CYGPATH_W) 'sf_tan.c'; else $(CYGPATH_W) '$(srcdir)/sf_tan.c'; fi`
lib_a-sf_tanh.o: sf_tanh.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-sf_tanh.o `test -f 'sf_tanh.c' || echo '$(srcdir)/'`sf_tanh.c
lib_a-sf_tanh.obj: sf_tanh.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-sf_tanh.obj `if test -f 'sf_tanh.c'; then $(CYGPATH_W) 'sf_tanh.c'; else $(CYGPATH_W) '$(srcdir)/sf_tanh.c'; fi`
lib_a-sf_trunc.o: sf_trunc.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-sf_trunc.o `test -f 'sf_trunc.c' || echo '$(srcdir)/'`sf_trunc.c
@ -660,12 +829,24 @@ lib_a-s_ilogb.o: s_ilogb.c
lib_a-s_ilogb.obj: s_ilogb.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-s_ilogb.obj `if test -f 's_ilogb.c'; then $(CYGPATH_W) 's_ilogb.c'; else $(CYGPATH_W) '$(srcdir)/s_ilogb.c'; fi`
lib_a-s_isnan.o: s_isnan.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-s_isnan.o `test -f 's_isnan.c' || echo '$(srcdir)/'`s_isnan.c
lib_a-s_isnan.obj: s_isnan.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-s_isnan.obj `if test -f 's_isnan.c'; then $(CYGPATH_W) 's_isnan.c'; else $(CYGPATH_W) '$(srcdir)/s_isnan.c'; fi`
lib_a-s_ldexp.o: s_ldexp.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-s_ldexp.o `test -f 's_ldexp.c' || echo '$(srcdir)/'`s_ldexp.c
lib_a-s_ldexp.obj: s_ldexp.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-s_ldexp.obj `if test -f 's_ldexp.c'; then $(CYGPATH_W) 's_ldexp.c'; else $(CYGPATH_W) '$(srcdir)/s_ldexp.c'; fi`
lib_a-s_log1p.o: s_log1p.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-s_log1p.o `test -f 's_log1p.c' || echo '$(srcdir)/'`s_log1p.c
lib_a-s_log1p.obj: s_log1p.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-s_log1p.obj `if test -f 's_log1p.c'; then $(CYGPATH_W) 's_log1p.c'; else $(CYGPATH_W) '$(srcdir)/s_log1p.c'; fi`
lib_a-s_lrint.o: s_lrint.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-s_lrint.o `test -f 's_lrint.c' || echo '$(srcdir)/'`s_lrint.c
@ -684,6 +865,12 @@ lib_a-s_nearbyint.o: s_nearbyint.c
lib_a-s_nearbyint.obj: s_nearbyint.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-s_nearbyint.obj `if test -f 's_nearbyint.c'; then $(CYGPATH_W) 's_nearbyint.c'; else $(CYGPATH_W) '$(srcdir)/s_nearbyint.c'; fi`
lib_a-s_nextafter.o: s_nextafter.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-s_nextafter.o `test -f 's_nextafter.c' || echo '$(srcdir)/'`s_nextafter.c
lib_a-s_nextafter.obj: s_nextafter.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-s_nextafter.obj `if test -f 's_nextafter.c'; then $(CYGPATH_W) 's_nextafter.c'; else $(CYGPATH_W) '$(srcdir)/s_nextafter.c'; fi`
lib_a-s_remquo.o: s_remquo.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-s_remquo.o `test -f 's_remquo.c' || echo '$(srcdir)/'`s_remquo.c
@ -708,12 +895,114 @@ lib_a-s_scalbn.o: s_scalbn.c
lib_a-s_scalbn.obj: s_scalbn.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-s_scalbn.obj `if test -f 's_scalbn.c'; then $(CYGPATH_W) 's_scalbn.c'; else $(CYGPATH_W) '$(srcdir)/s_scalbn.c'; fi`
lib_a-s_sin.o: s_sin.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-s_sin.o `test -f 's_sin.c' || echo '$(srcdir)/'`s_sin.c
lib_a-s_sin.obj: s_sin.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-s_sin.obj `if test -f 's_sin.c'; then $(CYGPATH_W) 's_sin.c'; else $(CYGPATH_W) '$(srcdir)/s_sin.c'; fi`
lib_a-s_tan.o: s_tan.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-s_tan.o `test -f 's_tan.c' || echo '$(srcdir)/'`s_tan.c
lib_a-s_tan.obj: s_tan.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-s_tan.obj `if test -f 's_tan.c'; then $(CYGPATH_W) 's_tan.c'; else $(CYGPATH_W) '$(srcdir)/s_tan.c'; fi`
lib_a-s_tanh.o: s_tanh.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-s_tanh.o `test -f 's_tanh.c' || echo '$(srcdir)/'`s_tanh.c
lib_a-s_tanh.obj: s_tanh.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-s_tanh.obj `if test -f 's_tanh.c'; then $(CYGPATH_W) 's_tanh.c'; else $(CYGPATH_W) '$(srcdir)/s_tanh.c'; fi`
lib_a-s_trunc.o: s_trunc.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-s_trunc.o `test -f 's_trunc.c' || echo '$(srcdir)/'`s_trunc.c
lib_a-s_trunc.obj: s_trunc.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-s_trunc.obj `if test -f 's_trunc.c'; then $(CYGPATH_W) 's_trunc.c'; else $(CYGPATH_W) '$(srcdir)/s_trunc.c'; fi`
lib_a-w_acos.o: w_acos.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-w_acos.o `test -f 'w_acos.c' || echo '$(srcdir)/'`w_acos.c
lib_a-w_acos.obj: w_acos.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-w_acos.obj `if test -f 'w_acos.c'; then $(CYGPATH_W) 'w_acos.c'; else $(CYGPATH_W) '$(srcdir)/w_acos.c'; fi`
lib_a-w_acosh.o: w_acosh.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-w_acosh.o `test -f 'w_acosh.c' || echo '$(srcdir)/'`w_acosh.c
lib_a-w_acosh.obj: w_acosh.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-w_acosh.obj `if test -f 'w_acosh.c'; then $(CYGPATH_W) 'w_acosh.c'; else $(CYGPATH_W) '$(srcdir)/w_acosh.c'; fi`
lib_a-w_asin.o: w_asin.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-w_asin.o `test -f 'w_asin.c' || echo '$(srcdir)/'`w_asin.c
lib_a-w_asin.obj: w_asin.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-w_asin.obj `if test -f 'w_asin.c'; then $(CYGPATH_W) 'w_asin.c'; else $(CYGPATH_W) '$(srcdir)/w_asin.c'; fi`
lib_a-w_atan2.o: w_atan2.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-w_atan2.o `test -f 'w_atan2.c' || echo '$(srcdir)/'`w_atan2.c
lib_a-w_atan2.obj: w_atan2.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-w_atan2.obj `if test -f 'w_atan2.c'; then $(CYGPATH_W) 'w_atan2.c'; else $(CYGPATH_W) '$(srcdir)/w_atan2.c'; fi`
lib_a-w_atanh.o: w_atanh.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-w_atanh.o `test -f 'w_atanh.c' || echo '$(srcdir)/'`w_atanh.c
lib_a-w_atanh.obj: w_atanh.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-w_atanh.obj `if test -f 'w_atanh.c'; then $(CYGPATH_W) 'w_atanh.c'; else $(CYGPATH_W) '$(srcdir)/w_atanh.c'; fi`
lib_a-w_cosh.o: w_cosh.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-w_cosh.o `test -f 'w_cosh.c' || echo '$(srcdir)/'`w_cosh.c
lib_a-w_cosh.obj: w_cosh.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-w_cosh.obj `if test -f 'w_cosh.c'; then $(CYGPATH_W) 'w_cosh.c'; else $(CYGPATH_W) '$(srcdir)/w_cosh.c'; fi`
lib_a-w_exp2.o: w_exp2.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-w_exp2.o `test -f 'w_exp2.c' || echo '$(srcdir)/'`w_exp2.c
lib_a-w_exp2.obj: w_exp2.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-w_exp2.obj `if test -f 'w_exp2.c'; then $(CYGPATH_W) 'w_exp2.c'; else $(CYGPATH_W) '$(srcdir)/w_exp2.c'; fi`
lib_a-w_exp.o: w_exp.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-w_exp.o `test -f 'w_exp.c' || echo '$(srcdir)/'`w_exp.c
lib_a-w_exp.obj: w_exp.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-w_exp.obj `if test -f 'w_exp.c'; then $(CYGPATH_W) 'w_exp.c'; else $(CYGPATH_W) '$(srcdir)/w_exp.c'; fi`
lib_a-wf_acos.o: wf_acos.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-wf_acos.o `test -f 'wf_acos.c' || echo '$(srcdir)/'`wf_acos.c
lib_a-wf_acos.obj: wf_acos.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-wf_acos.obj `if test -f 'wf_acos.c'; then $(CYGPATH_W) 'wf_acos.c'; else $(CYGPATH_W) '$(srcdir)/wf_acos.c'; fi`
lib_a-wf_acosh.o: wf_acosh.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-wf_acosh.o `test -f 'wf_acosh.c' || echo '$(srcdir)/'`wf_acosh.c
lib_a-wf_acosh.obj: wf_acosh.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-wf_acosh.obj `if test -f 'wf_acosh.c'; then $(CYGPATH_W) 'wf_acosh.c'; else $(CYGPATH_W) '$(srcdir)/wf_acosh.c'; fi`
lib_a-wf_asin.o: wf_asin.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-wf_asin.o `test -f 'wf_asin.c' || echo '$(srcdir)/'`wf_asin.c
lib_a-wf_asin.obj: wf_asin.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-wf_asin.obj `if test -f 'wf_asin.c'; then $(CYGPATH_W) 'wf_asin.c'; else $(CYGPATH_W) '$(srcdir)/wf_asin.c'; fi`
lib_a-wf_atan2.o: wf_atan2.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-wf_atan2.o `test -f 'wf_atan2.c' || echo '$(srcdir)/'`wf_atan2.c
lib_a-wf_atan2.obj: wf_atan2.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-wf_atan2.obj `if test -f 'wf_atan2.c'; then $(CYGPATH_W) 'wf_atan2.c'; else $(CYGPATH_W) '$(srcdir)/wf_atan2.c'; fi`
lib_a-wf_atanh.o: wf_atanh.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-wf_atanh.o `test -f 'wf_atanh.c' || echo '$(srcdir)/'`wf_atanh.c
lib_a-wf_atanh.obj: wf_atanh.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-wf_atanh.obj `if test -f 'wf_atanh.c'; then $(CYGPATH_W) 'wf_atanh.c'; else $(CYGPATH_W) '$(srcdir)/wf_atanh.c'; fi`
lib_a-wf_cosh.o: wf_cosh.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-wf_cosh.o `test -f 'wf_cosh.c' || echo '$(srcdir)/'`wf_cosh.c
lib_a-wf_cosh.obj: wf_cosh.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-wf_cosh.obj `if test -f 'wf_cosh.c'; then $(CYGPATH_W) 'wf_cosh.c'; else $(CYGPATH_W) '$(srcdir)/wf_cosh.c'; fi`
lib_a-wf_exp2.o: wf_exp2.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-wf_exp2.o `test -f 'wf_exp2.c' || echo '$(srcdir)/'`wf_exp2.c
@ -732,6 +1021,18 @@ lib_a-wf_fmod.o: wf_fmod.c
lib_a-wf_fmod.obj: wf_fmod.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-wf_fmod.obj `if test -f 'wf_fmod.c'; then $(CYGPATH_W) 'wf_fmod.c'; else $(CYGPATH_W) '$(srcdir)/wf_fmod.c'; fi`
lib_a-wf_hypot.o: wf_hypot.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-wf_hypot.o `test -f 'wf_hypot.c' || echo '$(srcdir)/'`wf_hypot.c
lib_a-wf_hypot.obj: wf_hypot.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-wf_hypot.obj `if test -f 'wf_hypot.c'; then $(CYGPATH_W) 'wf_hypot.c'; else $(CYGPATH_W) '$(srcdir)/wf_hypot.c'; fi`
lib_a-wf_lgamma.o: wf_lgamma.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-wf_lgamma.o `test -f 'wf_lgamma.c' || echo '$(srcdir)/'`wf_lgamma.c
lib_a-wf_lgamma.obj: wf_lgamma.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-wf_lgamma.obj `if test -f 'wf_lgamma.c'; then $(CYGPATH_W) 'wf_lgamma.c'; else $(CYGPATH_W) '$(srcdir)/wf_lgamma.c'; fi`
lib_a-wf_log10.o: wf_log10.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-wf_log10.o `test -f 'wf_log10.c' || echo '$(srcdir)/'`wf_log10.c
@ -750,23 +1051,101 @@ lib_a-w_fmod.o: w_fmod.c
lib_a-w_fmod.obj: w_fmod.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-w_fmod.obj `if test -f 'w_fmod.c'; then $(CYGPATH_W) 'w_fmod.c'; else $(CYGPATH_W) '$(srcdir)/w_fmod.c'; fi`
lib_a-wf_pow.o: wf_pow.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-wf_pow.o `test -f 'wf_pow.c' || echo '$(srcdir)/'`wf_pow.c
lib_a-wf_pow.obj: wf_pow.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-wf_pow.obj `if test -f 'wf_pow.c'; then $(CYGPATH_W) 'wf_pow.c'; else $(CYGPATH_W) '$(srcdir)/wf_pow.c'; fi`
lib_a-wf_remainder.o: wf_remainder.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-wf_remainder.o `test -f 'wf_remainder.c' || echo '$(srcdir)/'`wf_remainder.c
lib_a-wf_remainder.obj: wf_remainder.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-wf_remainder.obj `if test -f 'wf_remainder.c'; then $(CYGPATH_W) 'wf_remainder.c'; else $(CYGPATH_W) '$(srcdir)/wf_remainder.c'; fi`
lib_a-wf_sincos.o: wf_sincos.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-wf_sincos.o `test -f 'wf_sincos.c' || echo '$(srcdir)/'`wf_sincos.c
lib_a-wf_sincos.obj: wf_sincos.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-wf_sincos.obj `if test -f 'wf_sincos.c'; then $(CYGPATH_W) 'wf_sincos.c'; else $(CYGPATH_W) '$(srcdir)/wf_sincos.c'; fi`
lib_a-wf_sinh.o: wf_sinh.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-wf_sinh.o `test -f 'wf_sinh.c' || echo '$(srcdir)/'`wf_sinh.c
lib_a-wf_sinh.obj: wf_sinh.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-wf_sinh.obj `if test -f 'wf_sinh.c'; then $(CYGPATH_W) 'wf_sinh.c'; else $(CYGPATH_W) '$(srcdir)/wf_sinh.c'; fi`
lib_a-wf_sqrt.o: wf_sqrt.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-wf_sqrt.o `test -f 'wf_sqrt.c' || echo '$(srcdir)/'`wf_sqrt.c
lib_a-wf_sqrt.obj: wf_sqrt.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-wf_sqrt.obj `if test -f 'wf_sqrt.c'; then $(CYGPATH_W) 'wf_sqrt.c'; else $(CYGPATH_W) '$(srcdir)/wf_sqrt.c'; fi`
lib_a-wf_tgamma.o: wf_tgamma.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-wf_tgamma.o `test -f 'wf_tgamma.c' || echo '$(srcdir)/'`wf_tgamma.c
lib_a-wf_tgamma.obj: wf_tgamma.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-wf_tgamma.obj `if test -f 'wf_tgamma.c'; then $(CYGPATH_W) 'wf_tgamma.c'; else $(CYGPATH_W) '$(srcdir)/wf_tgamma.c'; fi`
lib_a-w_hypot.o: w_hypot.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-w_hypot.o `test -f 'w_hypot.c' || echo '$(srcdir)/'`w_hypot.c
lib_a-w_hypot.obj: w_hypot.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-w_hypot.obj `if test -f 'w_hypot.c'; then $(CYGPATH_W) 'w_hypot.c'; else $(CYGPATH_W) '$(srcdir)/w_hypot.c'; fi`
lib_a-w_lgamma.o: w_lgamma.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-w_lgamma.o `test -f 'w_lgamma.c' || echo '$(srcdir)/'`w_lgamma.c
lib_a-w_lgamma.obj: w_lgamma.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-w_lgamma.obj `if test -f 'w_lgamma.c'; then $(CYGPATH_W) 'w_lgamma.c'; else $(CYGPATH_W) '$(srcdir)/w_lgamma.c'; fi`
lib_a-w_log10.o: w_log10.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-w_log10.o `test -f 'w_log10.c' || echo '$(srcdir)/'`w_log10.c
lib_a-w_log10.obj: w_log10.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-w_log10.obj `if test -f 'w_log10.c'; then $(CYGPATH_W) 'w_log10.c'; else $(CYGPATH_W) '$(srcdir)/w_log10.c'; fi`
lib_a-w_log.o: w_log.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-w_log.o `test -f 'w_log.c' || echo '$(srcdir)/'`w_log.c
lib_a-w_log.obj: w_log.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-w_log.obj `if test -f 'w_log.c'; then $(CYGPATH_W) 'w_log.c'; else $(CYGPATH_W) '$(srcdir)/w_log.c'; fi`
lib_a-w_pow.o: w_pow.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-w_pow.o `test -f 'w_pow.c' || echo '$(srcdir)/'`w_pow.c
lib_a-w_pow.obj: w_pow.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-w_pow.obj `if test -f 'w_pow.c'; then $(CYGPATH_W) 'w_pow.c'; else $(CYGPATH_W) '$(srcdir)/w_pow.c'; fi`
lib_a-w_remainder.o: w_remainder.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-w_remainder.o `test -f 'w_remainder.c' || echo '$(srcdir)/'`w_remainder.c
lib_a-w_remainder.obj: w_remainder.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-w_remainder.obj `if test -f 'w_remainder.c'; then $(CYGPATH_W) 'w_remainder.c'; else $(CYGPATH_W) '$(srcdir)/w_remainder.c'; fi`
lib_a-w_sincos.o: w_sincos.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-w_sincos.o `test -f 'w_sincos.c' || echo '$(srcdir)/'`w_sincos.c
lib_a-w_sincos.obj: w_sincos.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-w_sincos.obj `if test -f 'w_sincos.c'; then $(CYGPATH_W) 'w_sincos.c'; else $(CYGPATH_W) '$(srcdir)/w_sincos.c'; fi`
lib_a-w_sinh.o: w_sinh.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-w_sinh.o `test -f 'w_sinh.c' || echo '$(srcdir)/'`w_sinh.c
lib_a-w_sinh.obj: w_sinh.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-w_sinh.obj `if test -f 'w_sinh.c'; then $(CYGPATH_W) 'w_sinh.c'; else $(CYGPATH_W) '$(srcdir)/w_sinh.c'; fi`
lib_a-w_sqrt.o: w_sqrt.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-w_sqrt.o `test -f 'w_sqrt.c' || echo '$(srcdir)/'`w_sqrt.c
lib_a-w_sqrt.obj: w_sqrt.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-w_sqrt.obj `if test -f 'w_sqrt.c'; then $(CYGPATH_W) 'w_sqrt.c'; else $(CYGPATH_W) '$(srcdir)/w_sqrt.c'; fi`
lib_a-w_tgamma.o: w_tgamma.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-w_tgamma.o `test -f 'w_tgamma.c' || echo '$(srcdir)/'`w_tgamma.c
lib_a-w_tgamma.obj: w_tgamma.c
$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(lib_a_CFLAGS) $(CFLAGS) -c -o lib_a-w_tgamma.obj `if test -f 'w_tgamma.c'; then $(CYGPATH_W) 'w_tgamma.c'; else $(CYGPATH_W) '$(srcdir)/w_tgamma.c'; fi`
uninstall-info-am:
ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)

View File

@ -0,0 +1,6 @@
#include "headers/acosd2.h"
static __inline double _acos(double x)
{
return spu_extract(_acosd2(spu_promote(x, 0)), 0);
}

View File

@ -0,0 +1,176 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _ACOSD2_H_
#define _ACOSD2_H_ 1
#include "simdmath.h"
#include <spu_intrinsics.h>
#include "sqrtd2.h"
#include "divd2.h"
/*
* FUNCTION
* vector double _acosd2(vector double x)
*
* DESCRIPTION
* Compute the arc cosine of the vector of double precision elements
* specified by x, returning the resulting angles in radians. The input
* elements are to be in the closed interval [-1, 1]. Values outside
* this range result in a invalid operation execption being latched in
* the FPSCR register and a NAN is returned.
*
* The basic algorithm computes the arc cosine using PI/2 - asind2(x).
* However, as |x| approaches 1, there is a cancellation error in
* subtracting asind2(x) from PI/2, so we simplify the evaluation
* instead of layering acosd2 on top of asind2.
*
* This yields the basic algorithm of:
*
* absx = (x < 0.0) ? -x : x;
*
* if (absx > 0.5) {
* if (x < 0) {
* addend = SM_PI;
* multiplier = -2.0;
* } else {
* addend = 0.0;
* multiplier = 2.0;
* }
*
* x = sqrt(-0.5 * absx + 0.5);
* } else {
* addend = SM_PI_2;
* multiplier = -1.0;
* }
*
* x2 = x * x;
* x3 = x2 * x;
*
* p = ((((P5 * x2 + P4)*x2 + P3)*x2 + P2)*x2 + P1)*x2 + P0;
*
* q = ((((Q5 * x2 + Q4)*x2 + Q3)*x2 + Q2)*x2 + Q1)*x2 + Q0;;
*
* pq = p / q;
*
* result = (x3*pq + x)*multiplier - addend;
*
* Where P5-P0 and Q5-Q0 are the polynomial coeficients. See asind2
* for additional details.
*/
static __inline vector double _acosd2(vector double x)
{
vec_uint4 x_gt_half, x_eq_half;
vec_double2 x_neg; // input x is negative
vec_double2 x_abs; // absolute value of x
vec_double2 x_trans; // transformed x when |x| > 0.5
vec_double2 x2, x3; // x squared and x cubed, respectively.
vec_double2 result;
vec_double2 multiplier, addend;
vec_double2 p, q, pq;
vec_double2 half = spu_splats(0.5);
vec_double2 sign = (vec_double2)spu_splats(0x8000000000000000ULL);
vec_uchar16 splat_hi = ((vec_uchar16){0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11});
// Compute the absolute value of x
x_abs = spu_andc(x, sign);
// Perform transformation for the case where |x| > 0.5. We rely on
// sqrtd2 producing a NAN is |x| > 1.0.
x_trans = _sqrtd2(spu_nmsub(x_abs, half, half));
// Determine the correct addend and multiplier.
x_neg = (vec_double2)spu_rlmaska((vec_int4)spu_shuffle(x, x, splat_hi), -31);
x_gt_half = spu_cmpgt((vec_uint4)x_abs, (vec_uint4)half);
x_eq_half = spu_cmpeq((vec_uint4)x_abs, (vec_uint4)half);
x_gt_half = spu_or(x_gt_half, spu_and(x_eq_half, spu_rlqwbyte(x_gt_half, 4)));
x_gt_half = spu_shuffle(x_gt_half, x_gt_half, splat_hi);
addend = spu_sel(spu_splats(SM_PI_2), spu_and(spu_splats(SM_PI), x_neg), (vec_ullong2)x_gt_half);
multiplier = spu_sel(spu_splats(-1.0), spu_sel(spu_splats(2.0), x, (vec_ullong2)sign), (vec_ullong2)x_gt_half);
// Select whether to use the x or the transformed x for the polygon evaluation.
// if |x| > 0.5 use x_trans
// else use x
x = spu_sel(x, x_trans, (vec_ullong2)x_gt_half);
// Compute the polynomials.
x2 = spu_mul(x, x);
x3 = spu_mul(x2, x);
p = spu_madd(spu_splats(0.004253011369004428248960), x2, spu_splats(-0.6019598008014123785661));
p = spu_madd(p, x2, spu_splats(5.444622390564711410273));
p = spu_madd(p, x2, spu_splats(-16.26247967210700244449));
p = spu_madd(p, x2, spu_splats(19.56261983317594739197));
p = spu_madd(p, x2, spu_splats(-8.198089802484824371615));
q = spu_add(x2, spu_splats(-14.74091372988853791896));
q = spu_madd(q, x2, spu_splats(70.49610280856842141659));
q = spu_madd(q, x2, spu_splats(-147.1791292232726029859));
q = spu_madd(q, x2, spu_splats(139.5105614657485689735));
q = spu_madd(q, x2, spu_splats(-49.18853881490881290097));
// Compute the rational solution p/q and final multiplication and addend
// correction.
pq = _divd2(p, q);
result = spu_madd(spu_madd(x3, pq, x), multiplier, addend);
return (result);
}
#endif /* _ACOSD2_H_ */
#endif /* __SPU__ */

View File

@ -0,0 +1,6 @@
#include "headers/acosf4.h"
static __inline float _acosf(float x)
{
return spu_extract(_acosf4(spu_promote(x, 0)), 0);
}

View File

@ -0,0 +1,180 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation, */
/* Sony Computer Entertainment, Incorporated, */
/* Toshiba Corporation, */
/* */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _ACOSF4_H_
#define _ACOSF4_H_ 1
#include <spu_intrinsics.h>
#include "divf4.h"
#include "sqrtf4.h"
/*
* FUNCTION
* vector float _acosf4(vector float x)
*
* DESCRIPTION
* The _acosf4 function computes the arc cosine for a vector of values x;
* that is the values whose cosine is x. Results are undefined if x is
* outside the range [-1, 1].
*
* RETURNS
* The _acosf4 function returns the arc cosine in radians and the value is
* mathematically defined to be in the range [0, pi].
*
*/
static __inline vector float _acosf4(vector float x)
{
vec_float4 zero = spu_splats(0.0f);
vec_float4 half = spu_splats(0.5f);
vec_float4 one = spu_splats(1.0f);
vec_float4 two = spu_splats(2.0f);
vec_float4 pi = spu_splats(3.1415925026e+00f);
vec_float4 pio2_hi = spu_splats(1.5707962513e+00f);
vec_float4 pio2_lo = spu_splats(7.5497894159e-08f);
vec_float4 snan = (vec_float4)spu_splats((unsigned int)0x7FC00000);
vec_uint4 denorm_threshold = spu_splats((unsigned int)0x23000000);
vec_uint4 sign_mask = spu_splats((unsigned int)0x80000000);
vec_float4 p0 = (vec_float4)spu_splats((unsigned int)0x3E2AAAAB);
vec_float4 p1 = (vec_float4)spu_splats((unsigned int)0xBEA6B090);
vec_float4 p2 = (vec_float4)spu_splats((unsigned int)0x3E4E0AA8);
vec_float4 p3 = (vec_float4)spu_splats((unsigned int)0xBD241146);
vec_float4 p4 = (vec_float4)spu_splats((unsigned int)0x3A4F7F04);
vec_float4 p5 = (vec_float4)spu_splats((unsigned int)0x3811EF08);
vec_float4 q1 = (vec_float4)spu_splats((unsigned int)0xC019D139);
vec_float4 q2 = (vec_float4)spu_splats((unsigned int)0x4001572D);
vec_float4 q3 = (vec_float4)spu_splats((unsigned int)0xBF303361);
vec_float4 q4 = (vec_float4)spu_splats((unsigned int)0x3D9DC62E);
vec_uint4 x_abs = spu_andc((vec_uint4)x,sign_mask);
vec_uint4 x_pos = spu_cmpgt(sign_mask,(vec_uint4)x);
vec_uint4 almost_half = spu_splats((unsigned int)0x3EFFFFFF);
vec_uint4 sel0 = spu_nand(spu_splats((unsigned int)0xFFFFFFFF),spu_cmpgt(x_abs,almost_half));
vec_uint4 sel1 = spu_andc(x_pos,sel0); // pos
vec_float4 za = spu_sel(spu_sel(spu_add(one,x),spu_sub(one,x),sel1) ,x,sel0);
vec_float4 zb = spu_sel(half,x,sel0);
vec_float4 z = spu_mul(za,zb);
vec_float4 p;
p = spu_madd(z,p5,p4);
p = spu_madd(p,z,p3);
p = spu_madd(p,z,p2);
p = spu_madd(p,z,p1);
p = spu_madd(p,z,p0);
p = spu_mul(p,z);
vec_float4 q;
q = spu_madd(z,q4,q3);
q = spu_madd(q,z,q2);
q = spu_madd(q,z,q1);
q = spu_madd(q,z,one);
// Only used by secondaries
vec_float4 s = _sqrtf4(z);
vec_float4 r = _divf4(p,q);
vec_float4 w1 = spu_msub(r,s,pio2_lo);
vec_float4 df = (vec_float4)spu_and((vec_uint4)s,0xFFFFF000);
vec_float4 c = _divf4(spu_nmsub(df,df,z),spu_add(s,df));
vec_float4 w2 = spu_madd(r,s,c);
vec_float4 result0 = spu_sub(pio2_hi,spu_sub(x,spu_nmsub(x,r,pio2_lo)));
vec_float4 result1 = spu_mul(two,spu_add(df,w2));
vec_float4 result2 = spu_nmsub(two,spu_add(s,w1),pi);
vec_float4 result;
result = spu_sel(result2,result1,sel1);
result = spu_sel(result,result0,sel0);
// If |x|==1 then:
// if x == 1, return 0
// else return pi
vec_uint4 abs_one = spu_cmpeq(x_abs,(vec_uint4)one);
vec_uint4 out_of_bounds = spu_cmpgt(x_abs,(vec_uint4)one);
vec_uint4 underflow = spu_cmpgt(denorm_threshold,x_abs);
result = spu_sel(result,spu_sel(pi,zero,x_pos),abs_one);
// If 1 < |x| then return sNaN
result = spu_sel(result,snan,out_of_bounds);
// If |x| < 2**-57, then return pi/2 (OFF BY 1 ULP)
result = spu_sel(result,spu_add(pio2_hi,pio2_lo),underflow);
return result;
}
#endif /* _ACOSF4_H_ */
#endif /* __SPU__ */

View File

@ -0,0 +1,6 @@
#include "headers/acoshd2.h"
static __inline double _acosh(double x)
{
return spu_extract(_acoshd2(spu_promote(x, 0)), 0);
}

View File

@ -0,0 +1,172 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _ACOSHD2_H_
#define _ACOSHD2_H_ 1
#include <spu_intrinsics.h>
#include "logd2.h"
#include "sqrtd2.h"
/*
* FUNCTION
* vector double _acoshd2(vector double x)
*
* DESCRIPTION
* The acoshd2 function returns a vector containing the hyperbolic
* arccosines of the corresponding elements of the input vector.
*
* We are using the formula:
* acosh = ln(x + sqrt(x^2 - 1))
*
* For x near one, we use the Taylor series:
*
* infinity
* ------
* - '
* - k
* acosh x = - C (x - 1)
* - k
* - ,
* ------
* k = 0
*
*
* Special Cases:
* - acosh(1) = +0
* - acosh(NaN) = NaN
* - acosh(Infinity) = Infinity
* - acosh(x < 1) = NaN
*
*/
/*
* Taylor Series Coefficients
* for x around 1.
*/
#define ACOSH_TAY01 1.0000000000000000000000000000000000000000000000000000000000000000000000E0 /* 1 / 1 */
#define ACOSH_TAY02 -8.3333333333333333333333333333333333333333333333333333333333333333333333E-2 /* 1 / 12 */
#define ACOSH_TAY03 1.8750000000000000000000000000000000000000000000000000000000000000000000E-2 /* 3 / 160 */
#define ACOSH_TAY04 -5.5803571428571428571428571428571428571428571428571428571428571428571429E-3 /* 5 / 896 */
#define ACOSH_TAY05 1.8988715277777777777777777777777777777777777777777777777777777777777778E-3 /* 35 / 18432 */
#define ACOSH_TAY06 -6.9912997159090909090909090909090909090909090909090909090909090909090909E-4 /* 63 / 90112 */
#define ACOSH_TAY07 2.7113694411057692307692307692307692307692307692307692307692307692307692E-4 /* 231 / 851968 */
#define ACOSH_TAY08 -1.0910034179687500000000000000000000000000000000000000000000000000000000E-4 /* 143 / 1310720 */
#define ACOSH_TAY09 4.5124222250545726102941176470588235294117647058823529411764705882352941E-5 /* 6435 / 142606336 */
#define ACOSH_TAY10 -1.9065643611707185444078947368421052631578947368421052631578947368421053E-5 /* 12155 / 637534208 */
#define ACOSH_TAY11 8.1936873140789213634672619047619047619047619047619047619047619047619048E-6 /* 46189 / 5637144576 */
#define ACOSH_TAY12 -3.5705692742181860882302989130434782608695652173913043478260869565217391E-6 /* 88179 / 24696061952 */
#define ACOSH_TAY13 1.5740259550511837005615234375000000000000000000000000000000000000000000E-6 /* 676039 / 429496729600 */
#define ACOSH_TAY14 -7.0068819224144573564882631655092592592592592592592592592592592592592593E-7 /* 1300075 / 1855425871872 */
#define ACOSH_TAY15 3.1453306166503321507881427633351293103448275862068965517241379310344828E-7 /* 5014575 / 15942918602752 */
#if 0
#define ACOSH_TAY16 -1.4221629293564136230176494967552923387096774193548387096774193548387097E-7 /* 9694845 / 68169720922112 */
#define ACOSH_TAY17 6.4711106776113328206437555226412686434659090909090909090909090909090909E-8 /* 100180065 / 1548112371908608 */
#define ACOSH_TAY18 -2.9609409781171182528071637664522443498883928571428571428571428571428571E-8 /* 116680311 / 3940649673949184 */
#define ACOSH_TAY19 1.3615438056281793767600509061201198680980785472972972972972972972972973E-8 /* 2268783825 / 166633186212708352 */
#endif
static __inline vector double _acoshd2(vector double x)
{
vec_uchar16 dup_even = ((vec_uchar16) { 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 });
vec_double2 minus_oned = spu_splats(-1.0);
vec_double2 twod = spu_splats(2.0);
vec_double2 xminus1;
vec_float4 xf;
/* Where we switch from taylor to formula */
vec_float4 switch_approx = spu_splats(1.15f);
vec_uint4 use_form;
vec_double2 result, fresult, mresult;;
xf = spu_roundtf(x);
xf = spu_shuffle(xf, xf, dup_even);
/*
* Formula:
* acosh = ln(x + sqrt(x^2 - 1))
*/
fresult = _sqrtd2(spu_madd(x, x, minus_oned));
fresult = spu_add(x, fresult);
fresult = _logd2(fresult);
/*
* Taylor Series
*/
xminus1 = spu_add(x, minus_oned);
mresult = spu_madd(xminus1, spu_splats(ACOSH_TAY15), spu_splats(ACOSH_TAY14));
mresult = spu_madd(xminus1, mresult, spu_splats(ACOSH_TAY13));
mresult = spu_madd(xminus1, mresult, spu_splats(ACOSH_TAY12));
mresult = spu_madd(xminus1, mresult, spu_splats(ACOSH_TAY11));
mresult = spu_madd(xminus1, mresult, spu_splats(ACOSH_TAY10));
mresult = spu_madd(xminus1, mresult, spu_splats(ACOSH_TAY09));
mresult = spu_madd(xminus1, mresult, spu_splats(ACOSH_TAY08));
mresult = spu_madd(xminus1, mresult, spu_splats(ACOSH_TAY07));
mresult = spu_madd(xminus1, mresult, spu_splats(ACOSH_TAY06));
mresult = spu_madd(xminus1, mresult, spu_splats(ACOSH_TAY05));
mresult = spu_madd(xminus1, mresult, spu_splats(ACOSH_TAY04));
mresult = spu_madd(xminus1, mresult, spu_splats(ACOSH_TAY03));
mresult = spu_madd(xminus1, mresult, spu_splats(ACOSH_TAY02));
mresult = spu_madd(xminus1, mresult, spu_splats(ACOSH_TAY01));
mresult = spu_mul(mresult, _sqrtd2(spu_mul(xminus1, twod)));
/*
* Select series or formula
*/
use_form = spu_cmpgt(xf, switch_approx);
result = spu_sel(mresult, fresult, (vec_ullong2)use_form);
return result;
}
#endif /* _ACOSHD2_H_ */
#endif /* __SPU__ */

View File

@ -0,0 +1,6 @@
#include "headers/acoshf4.h"
static __inline float _acoshf(float x)
{
return spu_extract(_acoshf4(spu_promote(x, 0)), 0);
}

View File

@ -0,0 +1,167 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _ACOSHF4_H_
#define _ACOSHF4_H_ 1
#include <spu_intrinsics.h>
#include "logf4.h"
#include "sqrtf4.h"
/*
* FUNCTION
* vector float _acoshf4(vector float x)
*
* DESCRIPTION
* The acoshf4 function returns a vector containing the hyperbolic
* arccosines of the corresponding elements of the input vector.
*
* We are using the formula:
* acosh = ln(x + sqrt(x^2 - 1))
*
* For x near one, we use the Taylor series:
*
* infinity
* ------
* - '
* - k
* acosh x = - C (x - 1)
* - k
* - ,
* ------
* k = 0
*
*
* Special Cases:
* - acosh(1) = +0
* - NaNs and Infinity aren't supported for single-precision on SPU.
*
*/
/*
* Taylor Series Coefficients
* for x around 1.
*/
#define ACOSH_TAY01 1.0000000000000000000000000000000000000000000000000000000000000000000000E0 /* 1 / 1 */
#define ACOSH_TAY02 -8.3333333333333333333333333333333333333333333333333333333333333333333333E-2 /* 1 / 12 */
#define ACOSH_TAY03 1.8750000000000000000000000000000000000000000000000000000000000000000000E-2 /* 3 / 160 */
#define ACOSH_TAY04 -5.5803571428571428571428571428571428571428571428571428571428571428571429E-3 /* 5 / 896 */
#define ACOSH_TAY05 1.8988715277777777777777777777777777777777777777777777777777777777777778E-3 /* 35 / 18432 */
#define ACOSH_TAY06 -6.9912997159090909090909090909090909090909090909090909090909090909090909E-4 /* 63 / 90112 */
#define ACOSH_TAY07 2.7113694411057692307692307692307692307692307692307692307692307692307692E-4 /* 231 / 851968 */
#define ACOSH_TAY08 -1.0910034179687500000000000000000000000000000000000000000000000000000000E-4 /* 143 / 1310720 */
#define ACOSH_TAY09 4.5124222250545726102941176470588235294117647058823529411764705882352941E-5 /* 6435 / 142606336 */
#define ACOSH_TAY10 -1.9065643611707185444078947368421052631578947368421052631578947368421053E-5 /* 12155 / 637534208 */
#define ACOSH_TAY11 8.1936873140789213634672619047619047619047619047619047619047619047619048E-6 /* 46189 / 5637144576 */
#define ACOSH_TAY12 -3.5705692742181860882302989130434782608695652173913043478260869565217391E-6 /* 88179 / 24696061952 */
#define ACOSH_TAY13 1.5740259550511837005615234375000000000000000000000000000000000000000000E-6 /* 676039 / 429496729600 */
#define ACOSH_TAY14 -7.0068819224144573564882631655092592592592592592592592592592592592592593E-7 /* 1300075 / 1855425871872 */
#define ACOSH_TAY15 3.1453306166503321507881427633351293103448275862068965517241379310344828E-7 /* 5014575 / 15942918602752 */
#if 0
#define ACOSH_TAY16 -1.4221629293564136230176494967552923387096774193548387096774193548387097E-7 /* 9694845 / 68169720922112 */
#define ACOSH_TAY17 6.4711106776113328206437555226412686434659090909090909090909090909090909E-8 /* 100180065 / 1548112371908608 */
#define ACOSH_TAY18 -2.9609409781171182528071637664522443498883928571428571428571428571428571E-8 /* 116680311 / 3940649673949184 */
#define ACOSH_TAY19 1.3615438056281793767600509061201198680980785472972972972972972972972973E-8 /* 2268783825 / 166633186212708352 */
#endif
static __inline vector float _acoshf4(vector float x)
{
vec_float4 minus_onef = spu_splats(-1.0f);
vec_float4 twof = spu_splats(2.0f);
vec_float4 xminus1;
/* Where we switch from taylor to formula */
vec_float4 switch_approx = spu_splats(2.0f);
vec_uint4 use_form;
vec_float4 result, fresult, mresult;;
/*
* Formula:
* acosh = ln(x + sqrt(x^2 - 1))
*/
fresult = _sqrtf4(spu_madd(x, x, minus_onef));
fresult = spu_add(x, fresult);
fresult = _logf4(fresult);
/*
* Taylor Series
*/
xminus1 = spu_add(x, minus_onef);
mresult = spu_madd(xminus1, spu_splats((float)ACOSH_TAY15), spu_splats((float)ACOSH_TAY14));
mresult = spu_madd(xminus1, mresult, spu_splats((float)ACOSH_TAY13));
mresult = spu_madd(xminus1, mresult, spu_splats((float)ACOSH_TAY12));
mresult = spu_madd(xminus1, mresult, spu_splats((float)ACOSH_TAY11));
mresult = spu_madd(xminus1, mresult, spu_splats((float)ACOSH_TAY10));
mresult = spu_madd(xminus1, mresult, spu_splats((float)ACOSH_TAY09));
mresult = spu_madd(xminus1, mresult, spu_splats((float)ACOSH_TAY08));
mresult = spu_madd(xminus1, mresult, spu_splats((float)ACOSH_TAY07));
mresult = spu_madd(xminus1, mresult, spu_splats((float)ACOSH_TAY06));
mresult = spu_madd(xminus1, mresult, spu_splats((float)ACOSH_TAY05));
mresult = spu_madd(xminus1, mresult, spu_splats((float)ACOSH_TAY04));
mresult = spu_madd(xminus1, mresult, spu_splats((float)ACOSH_TAY03));
mresult = spu_madd(xminus1, mresult, spu_splats((float)ACOSH_TAY02));
mresult = spu_madd(xminus1, mresult, spu_splats((float)ACOSH_TAY01));
mresult = spu_mul(mresult, _sqrtf4(spu_mul(xminus1, twof)));
/*
* Select series or formula
*/
use_form = spu_cmpgt(x, switch_approx);
result = spu_sel(mresult, fresult, use_form);
return result;
}
#endif /* _ACOSHF4_H_ */
#endif /* __SPU__ */

View File

@ -0,0 +1,6 @@
#include "headers/asind2.h"
static __inline double _asin(double x)
{
return spu_extract(_asind2(spu_promote(x, 0)), 0);
}

View File

@ -0,0 +1,177 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _ASIND2_H_
#define _ASIND2_H_ 1
#include "simdmath.h"
#include <spu_intrinsics.h>
#include "sqrtd2.h"
#include "divd2.h"
/*
* FUNCTION
* vector double _asind2(vector double x)
*
* DESCRIPTION
* Compute the arc sine of the vector of double precision elements
* specified by x, returning the resulting angles in radians. The input
* elements are to be in the closed interval [-1, 1]. Values outside
* this range result in a invalid operation execption being latched in
* the FPSCR register and a NAN is returned.
*
* The basic algorithm computes the arc sine using a rational polynomial
* of the form x + x^3 * P(x^2) / Q(x^2) for inputs |x| in the interval
* [0, 0.5]. Values outsize this range are transformed as by:
*
* asin(x) = PI/2 - 2*asin(sqrt((1-x)/2)) for x in the range (0.5, 1.0]
*
* asin(x) = -PI/2 + 2*asin(sqrt((1+x)/2)) for x in the range [-1.0, -0.5)
*
* This yields the basic algorithm of:
*
* absx = (x < 0.0) ? -x : x;
*
* if (absx > 0.5) {
* if (x < 0) {
* addend = -SM_PI_2;
* multiplier = -2.0;
* } else {
* addend = SM_PI_2;
* multiplier = 2.0;
* }
*
* x = sqrt(-0.5 * absx + 0.5);
* } else {
* addend = 0.0;
* multiplier = 1.0;
* }
*
* x2 = x * x;
* x3 = x2 * x;
*
* p = ((((P5 * x2 + P4)*x2 + P3)*x2 + P2)*x2 + P1)*x2 + P0;
*
* q = ((((Q5 * x2 + Q4)*x2 + Q3)*x2 + Q2)*x2 + Q1)*x2 + Q0;;
*
* pq = p / q;
*
* result = addend - (x3*pq + x)*multiplier;
*
* Where P5-P0 and Q5-Q0 are the polynomial coeficients.
*/
static __inline vector double _asind2(vector double x)
{
vec_uint4 x_gt_half, x_eq_half;
vec_double2 x_abs; // absolute value of x
vec_double2 x_trans; // transformed x when |x| > 0.5
vec_double2 x2, x3; // x squared and x cubed, respectively.
vec_double2 result;
vec_double2 multiplier, addend;
vec_double2 p, q, pq;
vec_double2 half = spu_splats(0.5);
vec_double2 sign = (vec_double2)spu_splats(0x8000000000000000ULL);
vec_uchar16 splat_hi = ((vec_uchar16){0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11});
// Compute the absolute value of x
x_abs = spu_andc(x, sign);
// Perform transformation for the case where |x| > 0.5. We rely on
// sqrtd2 producing a NAN is |x| > 1.0.
x_trans = _sqrtd2(spu_nmsub(x_abs, half, half));
// Determine the correct addend and multiplier.
x_gt_half = spu_cmpgt((vec_uint4)x_abs, (vec_uint4)half);
x_eq_half = spu_cmpeq((vec_uint4)x_abs, (vec_uint4)half);
x_gt_half = spu_or(x_gt_half, spu_and(x_eq_half, spu_rlqwbyte(x_gt_half, 4)));
x_gt_half = spu_shuffle(x_gt_half, x_gt_half, splat_hi);
addend = spu_and(spu_sel(spu_splats((double)SM_PI_2), x, (vec_ullong2)sign), (vec_double2)x_gt_half);
multiplier = spu_sel(spu_splats(-1.0), spu_sel(spu_splats(2.0), x, (vec_ullong2)sign), (vec_ullong2)x_gt_half);
// Select whether to use the x or the transformed x for the polygon evaluation.
// if |x| > 0.5 use x_trans
// else use x
x = spu_sel(x, x_trans, (vec_ullong2)x_gt_half);
// Compute the polynomials.
x2 = spu_mul(x, x);
x3 = spu_mul(x2, x);
p = spu_madd(spu_splats(0.004253011369004428248960), x2, spu_splats(-0.6019598008014123785661));
p = spu_madd(p, x2, spu_splats(5.444622390564711410273));
p = spu_madd(p, x2, spu_splats(-16.26247967210700244449));
p = spu_madd(p, x2, spu_splats(19.56261983317594739197));
p = spu_madd(p, x2, spu_splats(-8.198089802484824371615));
q = spu_add(x2, spu_splats(-14.74091372988853791896));
q = spu_madd(q, x2, spu_splats(70.49610280856842141659));
q = spu_madd(q, x2, spu_splats(-147.1791292232726029859));
q = spu_madd(q, x2, spu_splats(139.5105614657485689735));
q = spu_madd(q, x2, spu_splats(-49.18853881490881290097));
// Compute the rational solution p/q and final multiplication and addend
// correction.
pq = _divd2(p, q);
result = spu_nmsub(spu_madd(x3, pq, x), multiplier, addend);
return (result);
}
#endif /* _ASIND2_H_ */
#endif /* __SPU__ */

View File

@ -0,0 +1,6 @@
#include "headers/asinf4.h"
static __inline float _asinf(float x)
{
return spu_extract(_asinf4(spu_promote(x, 0)), 0);
}

View File

@ -0,0 +1,156 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation, */
/* Sony Computer Entertainment, Incorporated, */
/* Toshiba Corporation, */
/* */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _ASINF4_H_
#define _ASINF4_H_ 1
#include <spu_intrinsics.h>
#include "divf4.h"
#include "sqrtf4.h"
/*
* FUNCTION
* vector float _asinf4(vector float x)
*
* DESCRIPTION
* The _asinf4 function computes the arc sine for a vector of values x;
* that is the values whose sine is x. Results are undefined if x is
* outside the range [-1, 1].
*
*/
static __inline vector float _asinf4(vector float x)
{
/* The arcsin is computed using two different algorithms, depending
* upon the absolute value of the input. For inputs in the range
* [0, PI/4], it is computed as the ratio of two polynomials.
*
* asin(x) = p/q;
*
* where p = P11*x^11 + P09*x^9 + P07*x^7 + P05*x^5 + P03*x3 + x
* q = Q08*x^8 + Q06*x^6 + Q04*x^4 + Q02*x^2 + Q00
*
* For the range of value [PI/4, 1], the arcsin is computed using:
*
* asin = PI/2 - sqrt(1 - x) * r;
*
* where r = C07*x^7 + C06*x^6 + C05*x^5 + C04*x^4 + C03*x^3 + C02*x^2
* C01*x + C00;
*/
vector float r, r1, r2, r_hi, r_lo;
vector float xabs, x2, x4, x6;
vector float p, p_hi, p_lo;
vector float q, q_hi, q_lo;
vector float pi_over_2 = spu_splats(1.5707963267949f);
vector float pi_over_4 = spu_splats(0.7853981633974f);
vector unsigned int msb = spu_splats(0x80000000);
x2 = spu_mul(x, x);
x4 = spu_mul(x2, x2);
x6 = spu_mul(x4, x2);
xabs = spu_andc(x, (vector float)msb);
/* Compute arc-sin for values in the range [0, PI/4]
*/
p_hi = spu_madd(spu_splats(0.0000347933107596021167570f), x2,
spu_splats(0.000791534994289814532176f));
p_hi = spu_madd(p_hi, x2, spu_splats(-0.0400555345006794114027f));
p_lo = spu_madd(spu_splats(0.201212532134862925881f), x2,
spu_splats(-0.325565818622400915405f));
p_lo = spu_madd(p_lo, x2, spu_splats(0.166666666666666657415f));
p = spu_madd(p_hi, x6, p_lo);
q_hi = spu_madd(spu_splats(0.0770381505559019352791f), x2,
spu_splats(-0.688283971605453293030f));
q_hi = spu_madd(q_hi, x2, spu_splats(2.02094576023350569471f));
q_lo = spu_madd(spu_splats(-2.40339491173441421878f), x2,
spu_splats(1.0f));
q = spu_madd(q_hi, x4, q_lo);
r1 = spu_madd(_divf4(p, q), spu_mul(xabs, x2), xabs);
/* Compute arc-sin for values in the range [PI/4, 1]
*/
r_hi = spu_madd(spu_splats(-0.0012624911f), xabs,
spu_splats(0.0066700901f));
r_hi = spu_madd(r_hi, xabs, spu_splats(-0.0170881256f));
r_hi = spu_madd(r_hi, xabs, spu_splats(0.0308918810f));
r_lo = spu_madd(spu_splats(-0.0501743046f), xabs,
spu_splats(0.0889789874f));
r_lo = spu_madd(r_lo, xabs, spu_splats(-0.2145988016f));
r_lo = spu_madd(r_lo, xabs, pi_over_2);
r = spu_madd(r_hi, x4, r_lo);
r2 = spu_nmsub(r, _sqrtf4(spu_sub(spu_splats(1.0f), xabs)),
pi_over_2);
/* Select the result depending upon the input value. Correct the
* sign of the result.
*/
return (spu_sel(spu_sel(r1, r2, spu_cmpgt(xabs, pi_over_4)),
x, msb));
}
#endif /* _ASINF4_H_ */
#endif /* __SPU__ */

View File

@ -0,0 +1,6 @@
#include "headers/asinhd2.h"
static __inline double _asinh(double x)
{
return spu_extract(_asinhd2(spu_promote(x, 0)), 0);
}

View File

@ -0,0 +1,189 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _ASINHD2_H_
#define _ASINHD2_H_ 1
#include <spu_intrinsics.h>
#include "logd2.h"
#include "sqrtd2.h"
/*
* FUNCTION
* vector double _asinhd2(vector double x)
*
* DESCRIPTION
* The asinhd2 function returns a vector containing the hyperbolic
* arcsines of the corresponding elements of the input vector.
*
* We are using the formula:
* asinh = ln(|x| + sqrt(x^2 + 1))
* and the anti-symmetry of asinh.
*
* For x near zero, we use the Taylor series:
*
* infinity
* ------
* - ' P (0)
* - k-1 k
* asinh x = - ----- x
* - k
* - ,
* ------
* k = 1
*
* Special Cases:
* asinh(+0) returns +0
* asinh(-0) returns -0
* asinh(+infinity) returns +infinity
* asinh(-infinity) returns -infinity
* asinh(NaN) returns NaN
*
*/
/*
* Maclaurin Series Coefficients
* for x near 0.
*/
#define ASINH_MAC01 1.0000000000000000000000000000000000000000000000000000000000000000000000E0
#define ASINH_MAC03 -1.6666666666666666666666666666666666666666666666666666666666666666666667E-1
#define ASINH_MAC05 7.5000000000000000000000000000000000000000000000000000000000000000000000E-2
#define ASINH_MAC07 -4.4642857142857142857142857142857142857142857142857142857142857142857143E-2
#define ASINH_MAC09 3.0381944444444444444444444444444444444444444444444444444444444444444444E-2
#define ASINH_MAC11 -2.2372159090909090909090909090909090909090909090909090909090909090909091E-2
#define ASINH_MAC13 1.7352764423076923076923076923076923076923076923076923076923076923076923E-2
#define ASINH_MAC15 -1.3964843750000000000000000000000000000000000000000000000000000000000000E-2
#define ASINH_MAC17 1.1551800896139705882352941176470588235294117647058823529411764705882353E-2
#if 0
#define ASINH_MAC19 -9.7616095291940789473684210526315789473684210526315789473684210526315789E-3
#define ASINH_MAC21 8.3903358096168154761904761904761904761904761904761904761904761904761905E-3
#define ASINH_MAC23 -7.3125258735988451086956521739130434782608695652173913043478260869565217E-3
#define ASINH_MAC25 6.4472103118896484375000000000000000000000000000000000000000000000000000E-3
#define ASINH_MAC27 -5.7400376708419234664351851851851851851851851851851851851851851851851852E-3
#define ASINH_MAC29 5.1533096823199041958512931034482758620689655172413793103448275862068966E-3
#define ASINH_MAC31 -4.6601434869150961599042338709677419354838709677419354838709677419354839E-3
#define ASINH_MAC33 4.2409070936793630773370916193181818181818181818181818181818181818181818E-3
#define ASINH_MAC35 -3.8809645588376692363194056919642857142857142857142857142857142857142857E-3
#define ASINH_MAC37 3.5692053938259345454138678473395270270270270270270270270270270270270270E-3
#define ASINH_MAC39 -3.2970595034734847453924325796274038461538461538461538461538461538461538E-3
#define ASINH_MAC41 3.0578216492580306693548109473251714939024390243902439024390243902439024E-3
#define ASINH_MAC43 -2.8461784011089421678767647854117460029069767441860465116279069767441860E-3
#endif
static __inline vector double _asinhd2(vector double x)
{
vec_double2 sign_mask = spu_splats(-0.0);
vec_double2 oned = spu_splats(1.0);
vec_uchar16 dup_even = ((vec_uchar16) { 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 });
vec_uint4 infminus1 = spu_splats(0x7FEFFFFFU);
vec_uint4 isinfnan;
vec_double2 xabs, xsqu;
vec_uint4 xabshigh;
/* Where we switch from maclaurin to formula */
vec_float4 switch_approx = spu_splats(0.165f);
vec_uint4 use_form;
vec_float4 xf;
vec_double2 result, fresult, mresult;
xabs = spu_andc(x, sign_mask);
xsqu = spu_mul(x, x);
xf = spu_roundtf(xabs);
xf = spu_shuffle(xf, xf, dup_even);
/*
* Formula:
* asinh = ln(|x| + sqrt(x^2 + 1))
*/
fresult = _sqrtd2(spu_add(xsqu, oned));
fresult = spu_add(xabs, fresult);
fresult = _logd2(fresult);
/*
* Maclaurin Series approximation
*/
mresult = spu_madd(xsqu, spu_splats(ASINH_MAC17), spu_splats(ASINH_MAC15));
mresult = spu_madd(xsqu, mresult, spu_splats(ASINH_MAC13));
mresult = spu_madd(xsqu, mresult, spu_splats(ASINH_MAC11));
mresult = spu_madd(xsqu, mresult, spu_splats(ASINH_MAC09));
mresult = spu_madd(xsqu, mresult, spu_splats(ASINH_MAC07));
mresult = spu_madd(xsqu, mresult, spu_splats(ASINH_MAC05));
mresult = spu_madd(xsqu, mresult, spu_splats(ASINH_MAC03));
mresult = spu_madd(xsqu, mresult, spu_splats(ASINH_MAC01));
mresult = spu_mul(xabs, mresult);
/*
* Choose between series and formula
*/
use_form = spu_cmpgt(xf, switch_approx);
result = spu_sel(mresult, fresult, (vec_ullong2)use_form);
/* Special Cases */
/* Infinity and NaN */
xabshigh = (vec_uint4)spu_shuffle(xabs, xabs, dup_even);
isinfnan = spu_cmpgt(xabshigh, infminus1);
result = spu_sel(result, x, (vec_ullong2)isinfnan);
/* Restore sign - asinh is an anti-symmetric */
result = spu_sel(result, x, (vec_ullong2)sign_mask);
return result;
}
#endif /* _ASINHD2_H_ */
#endif /* __SPU__ */

View File

@ -0,0 +1,6 @@
#include "headers/asinhf4.h"
static __inline float _asinhf(float x)
{
return spu_extract(_asinhf4(spu_promote(x, 0)), 0);
}

View File

@ -0,0 +1,180 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _ASINHF4_H_
#define _ASINHF4_H_ 1
#include <spu_intrinsics.h>
#include "logf4.h"
#include "sqrtf4.h"
/*
* FUNCTION
* vector float _asinhf4(vector float x)
*
* DESCRIPTION
* The asinhf4 function returns a vector containing the hyperbolic
* arcsines of the corresponding elements of the input vector.
*
* We are using the formula:
* asinh = ln(|x| + sqrt(x^2 + 1))
* and the anti-symmetry of asinh.
*
* For x near zero, we use the Taylor series:
*
* infinity
* ------
* - ' P (0)
* - k-1 k
* asinh x = - ----- x
* - k
* - ,
* ------
* k = 1
*
* Special Cases:
* - asinh(+0) returns +0
* - asinh(-0) returns -0
* - Normally, asinh(+/- infinity) returns +/- infinity,
* but on the SPU, single-precision infinity is not supported,
* so it is treated as a normal number here.
*
*/
/*
* Maclaurin Series Coefficients
* for x near 0.
*/
#define ASINH_MAC01 1.0000000000000000000000000000000000000000000000000000000000000000000000E0
#define ASINH_MAC03 -1.6666666666666666666666666666666666666666666666666666666666666666666667E-1
#define ASINH_MAC05 7.5000000000000000000000000000000000000000000000000000000000000000000000E-2
#define ASINH_MAC07 -4.4642857142857142857142857142857142857142857142857142857142857142857143E-2
#define ASINH_MAC09 3.0381944444444444444444444444444444444444444444444444444444444444444444E-2
#define ASINH_MAC11 -2.2372159090909090909090909090909090909090909090909090909090909090909091E-2
#define ASINH_MAC13 1.7352764423076923076923076923076923076923076923076923076923076923076923E-2
#define ASINH_MAC15 -1.3964843750000000000000000000000000000000000000000000000000000000000000E-2
#define ASINH_MAC17 1.1551800896139705882352941176470588235294117647058823529411764705882353E-2
#define ASINH_MAC19 -9.7616095291940789473684210526315789473684210526315789473684210526315789E-3
#define ASINH_MAC21 8.3903358096168154761904761904761904761904761904761904761904761904761905E-3
#define ASINH_MAC23 -7.3125258735988451086956521739130434782608695652173913043478260869565217E-3
#define ASINH_MAC25 6.4472103118896484375000000000000000000000000000000000000000000000000000E-3
#define ASINH_MAC27 -5.7400376708419234664351851851851851851851851851851851851851851851851852E-3
#define ASINH_MAC29 5.1533096823199041958512931034482758620689655172413793103448275862068966E-3
#if 0
#define ASINH_MAC31 -4.6601434869150961599042338709677419354838709677419354838709677419354839E-3
#define ASINH_MAC33 4.2409070936793630773370916193181818181818181818181818181818181818181818E-3
#define ASINH_MAC35 -3.8809645588376692363194056919642857142857142857142857142857142857142857E-3
#define ASINH_MAC37 3.5692053938259345454138678473395270270270270270270270270270270270270270E-3
#define ASINH_MAC39 -3.2970595034734847453924325796274038461538461538461538461538461538461538E-3
#define ASINH_MAC41 3.0578216492580306693548109473251714939024390243902439024390243902439024E-3
#define ASINH_MAC43 -2.8461784011089421678767647854117460029069767441860465116279069767441860E-3
#endif
static __inline vector float _asinhf4(vector float x)
{
vec_float4 sign_mask = spu_splats(-0.0f);
vec_float4 onef = spu_splats(1.0f);
vec_float4 result, fresult, mresult;;
vec_float4 xabs, xsqu;
/* Where we switch from maclaurin to formula */
vec_float4 switch_approx = spu_splats(0.685f);
vec_uint4 use_form;
xabs = spu_andc(x, sign_mask);
xsqu = spu_mul(x, x);
/*
* Formula:
* asinh = ln(|x| + sqrt(x^2 + 1))
*/
fresult = _sqrtf4(spu_madd(x, x, onef));
fresult = spu_add(xabs, fresult);
fresult = _logf4(fresult);
/*
* Maclaurin Series
*/
mresult = spu_madd(xsqu, spu_splats((float)ASINH_MAC29), spu_splats((float)ASINH_MAC27));
mresult = spu_madd(xsqu, mresult, spu_splats((float)ASINH_MAC25));
mresult = spu_madd(xsqu, mresult, spu_splats((float)ASINH_MAC23));
mresult = spu_madd(xsqu, mresult, spu_splats((float)ASINH_MAC21));
mresult = spu_madd(xsqu, mresult, spu_splats((float)ASINH_MAC19));
mresult = spu_madd(xsqu, mresult, spu_splats((float)ASINH_MAC17));
mresult = spu_madd(xsqu, mresult, spu_splats((float)ASINH_MAC15));
mresult = spu_madd(xsqu, mresult, spu_splats((float)ASINH_MAC13));
mresult = spu_madd(xsqu, mresult, spu_splats((float)ASINH_MAC11));
mresult = spu_madd(xsqu, mresult, spu_splats((float)ASINH_MAC09));
mresult = spu_madd(xsqu, mresult, spu_splats((float)ASINH_MAC07));
mresult = spu_madd(xsqu, mresult, spu_splats((float)ASINH_MAC05));
mresult = spu_madd(xsqu, mresult, spu_splats((float)ASINH_MAC03));
mresult = spu_madd(xsqu, mresult, spu_splats((float)ASINH_MAC01));
mresult = spu_mul(xabs, mresult);
/*
* Choose between series and formula
*/
use_form = spu_cmpgt(xabs, switch_approx);
result = spu_sel(mresult, fresult, use_form);
/* Preserve sign - asinh is anti-symmetric */
result = spu_sel(result, x, (vec_uint4)sign_mask);
return result;
}
#endif /* _ASINHF4_H_ */
#endif /* __SPU__ */

View File

@ -0,0 +1,6 @@
#include "headers/atand2.h"
static __inline double _atan(double x)
{
return spu_extract(_atand2(spu_promote(x, 0)), 0);
}

View File

@ -0,0 +1,6 @@
#include "headers/atan2d2.h"
static __inline double _atan2(double y, double x)
{
return spu_extract(_atan2d2(spu_promote(y, 0), spu_promote(x, 0)), 0);
}

View File

@ -0,0 +1,108 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _ATAN2D2_H_
#define _ATAN2D2_H_ 1
#include <spu_intrinsics.h>
#include "divd2.h"
#include "atand2.h"
/*
* FUNCTION
* vector double _atan2d2(vector double y, vector double x)
*
* DESCRIPTION
* The atan2d2 function returns a vector containing the angles
* whose tangets are y/x for the corresponding elements of the
* input vectors.
*
* The reason this function exists is to use the signs of the
* arguments to determine the quadrant of the result. Consider
* sin(x)/cos(x) on the domain (-pi, pi]. Four quadrants are
* defined by the signs of sin and cos on this domain.
*
* Special Cases:
* - If the corresponding elements of x and y are zero, the
* resulting element is undefined.
*
*/
static __inline vector double _atan2d2(vector double y, vector double x)
{
vec_uchar16 dup_even = ((vec_uchar16) { 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 });
vector double pi = spu_splats(SM_PI);
vector unsigned long long ones = spu_splats(0xFFFFFFFFFFFFFFFFull);
vector unsigned long long quad1;
vector unsigned long long quad4;
vector double result;
vector unsigned long long xlt0;
vector unsigned long long yge0;
vector unsigned long long ylt0;
xlt0 = (vec_ullong2)spu_rlmaska((vec_int4)spu_shuffle(x,x,dup_even), 31);
ylt0 = (vec_ullong2)spu_rlmaska((vec_int4)spu_shuffle(y,y,dup_even), 31);
yge0 = spu_xor(ylt0, ones);
quad1 = spu_and(ylt0, xlt0);
quad4 = spu_and(yge0, xlt0);
result = _atand2(_divd2(y,x));
result = spu_sel(result, spu_sub(result, pi), quad1);
result = spu_sel(result, spu_add(result, pi), quad4);
return result;
}
#endif /* _ATAN2D2_H_ */
#endif /* __SPU__ */

View File

@ -0,0 +1,6 @@
#include "headers/atan2f4.h"
static __inline float _atan2f(float y, float x)
{
return spu_extract(_atan2f4(spu_promote(y, 0), spu_promote(x, 0)), 0);
}

View File

@ -0,0 +1,106 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _ATAN2F4_H_
#define _ATAN2F4_H_ 1
#include <spu_intrinsics.h>
#include "divf4.h"
#include "atanf4.h"
/*
* FUNCTION
* vector float _atan2f4(vector float y, vector float x)
*
* DESCRIPTION
* The atan2f4 function returns a vector containing the angles
* whose tangets are y/x for the corresponding elements of the
* input vectors.
*
* The reason this function exists is to use the signs of the
* arguments to determine the quadrant of the result. Consider
* sin(x)/cos(x) on the domain (-pi, pi]. Four quadrants are
* defined by the signs of sin and cos on this domain.
*
* Special Cases:
* - If the corresponding elements of x and y are zero, the
* resulting element is undefined.
*
*/
static __inline vector float _atan2f4(vector float y, vector float x)
{
vector float pi = spu_splats((float)SM_PI);
vector float zero = spu_splats(0.0f);
vector unsigned int quad1;
vector unsigned int quad4;
vector float result;
vector unsigned int xlt0;
vector unsigned int yge0;
vector unsigned int ylt0;
xlt0 = (vec_uint4)spu_rlmaska((vec_int4)x, 31);
ylt0 = (vec_uint4)spu_rlmaska((vec_int4)y, 31);
yge0 = spu_cmpeq(ylt0, (vec_uint4)zero);
quad1 = spu_and(ylt0, xlt0);
quad4 = spu_and(yge0, xlt0);
result = _atanf4(_divf4(y,x));
result = spu_sel(result, spu_sub(result, pi), quad1);
result = spu_sel(result, spu_add(result, pi), quad4);
return result;
}
#endif /* _ATAN2F4_H_ */
#endif /* __SPU__ */

View File

@ -0,0 +1,115 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _ATAND2_H_
#define _ATAND2_H_ 1
#include <spu_intrinsics.h>
#include "simdmath.h"
#include "recipd2.h"
#include "logd2.h"
#include "acosd2.h"
#include "asind2.h"
#include "sqrtd2.h"
/*
* FUNCTION
* vector double _atand2(vector double x)
*
* DESCRIPTION
* The _atand2 function computes the arc tangent of a vector of values x.
*
* The arc tangent function is computed using the following relations:
* [0, 1] : arcsin(x1/sqrt(spu_add(x1squ + 1 )));
* (1, infinity] : PI/2 + atanf(-1/x)
* [-infinity, 0) : -arcsin(|x|)
*
*/
static __inline vector double _atand2(vector double x)
{
vector double signbit = spu_splats(-0.0);
vector double oned = spu_splats(1.0);
vector double pi2 = spu_splats(SM_PI_2);
vector double xabs, x1;
vector double result;
vector unsigned long long gt1;
xabs = spu_andc(x, signbit);
gt1 = spu_cmpgt(xabs, oned);
/*
* For x > 1, use the relation:
* atan(x) = pi/2 - atan(1/x), x>1
*/
x1 = spu_sel(xabs, _recipd2(xabs), gt1);
vector double x1squ = spu_mul(x1, x1);
result = _asind2(_divd2(x1, _sqrtd2(spu_add(x1squ, oned))));
/*
* For x > 1, use the relation: atan(x) = pi/2 - atan(1/x), x>1
*/
result = spu_sel(result, spu_sub(pi2, result), gt1);
/*
* Antisymmetric function - preserve sign of x in result.
*/
result = spu_sel(result, x, (vec_ullong2)signbit);
return (result);
}
#endif /* _ATAND2_H_ */
#endif /* __SPU__ */

View File

@ -0,0 +1,136 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation, */
/* Sony Computer Entertainment, Incorporated, */
/* Toshiba Corporation, */
/* */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _ATANF4_H_
#define _ATANF4_H_ 1
#include <spu_intrinsics.h>
#include "simdmath.h"
#include "recipf4.h"
/*
* FUNCTION
* vector float _atanf4(vector float x)
*
* DESCRIPTION
* The _atanf4 function computes the arc tangent of a vector of values x;
* that is the values whose tangent is x.
*
* The _atanf4 function returns the arc tangents in radians and the value
* is mathematically defined to be in the range -PI/2 to PI/2.
*
* The arc tangent function is computed using a polynomial approximation
* (B. Carlson, M. Goldstein, Los Alamos Scientific Laboratory, 1955).
* __8__
* \
* \
* atan(x) = / Ci*x^(2*i+1)
* /____
* i=0
*
* for x in the range -1 to 1. The remaining regions are defined to be:
*
* [1, infinity] : PI/2 + atanf(-1/x)
* [-infinity, -1] : -PI/2 + atanf(-1/x)
*
*/
static __inline vector float _atanf4(vector float x)
{
vector float bias;
vector float x2, x3, x4, x8, x9;
vector float hi, lo;
vector float result;
vector float inv_x;
vector unsigned int sign;
vector unsigned int select;
sign = spu_sl(spu_rlmask((vector unsigned int)x, -31), 31);
inv_x = _recipf4(x);
inv_x = (vector float)spu_xor((vector unsigned int)inv_x, spu_splats(0x80000000));
select = (vector unsigned int)spu_cmpabsgt(x, spu_splats(1.0f));
bias = (vector float)spu_or(sign, (vector unsigned int)(spu_splats((float)SM_PI_2)));
bias = (vector float)spu_and((vector unsigned int)bias, select);
x = spu_sel(x, inv_x, select);
/* Instruction counts can be reduced if the polynomial was
* computed entirely from nested (dependent) fma's. However,
* to reduce the number of pipeline stalls, the polygon is evaluated
* in two halves(hi and lo).
*/
bias = spu_add(bias, x);
x2 = spu_mul(x, x);
x3 = spu_mul(x2, x);
x4 = spu_mul(x2, x2);
x8 = spu_mul(x4, x4);
x9 = spu_mul(x8, x);
hi = spu_madd(spu_splats(0.0028662257f), x2, spu_splats(-0.0161657367f));
hi = spu_madd(hi, x2, spu_splats(0.0429096138f));
hi = spu_madd(hi, x2, spu_splats(-0.0752896400f));
hi = spu_madd(hi, x2, spu_splats(0.1065626393f));
lo = spu_madd(spu_splats(-0.1420889944f), x2, spu_splats(0.1999355085f));
lo = spu_madd(lo, x2, spu_splats(-0.3333314528f));
lo = spu_madd(lo, x3, bias);
result = spu_madd(hi, x9, lo);
return (result);
}
#endif /* _ATANF4_H_ */
#endif /* __SPU__ */

View File

@ -0,0 +1,6 @@
#include "headers/atanhd2.h"
static __inline double _atanh(double x)
{
return spu_extract(_atanhd2(spu_promote(x, 0)), 0);
}

View File

@ -0,0 +1,177 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _ATANHD2_H_
#define _ATANHD2_H_ 1
#include <spu_intrinsics.h>
#include "logd2.h"
/*
* FUNCTION
* vector double _atanhd2(vector double x)
*
* DESCRIPTION
* The atanhd2 function returns a vector containing the hyperbolic
* arctangents of the corresponding elements of the input vector.
*
* We are using the formula:
* atanh x = 1/2 * ln((1 + x)/(1 - x)) = 1/2 * [ln(1+x) - ln(1-x)]
* and the anti-symmetry of atanh.
*
* For x near 0, we use the Taylor series:
* atanh x = x + x^3/3 + x^5/5 + x^7/7 + x^9/9 + ...
*
* Special Cases:
* - atanh(1) = Infinity
* - atanh(-1) = -Infinity
* - atanh(x) for |x| > 1 = NaN
*
*/
/*
* Maclaurin Series Coefficients
* for x near 0.
*/
#define ATANH_MAC01 1.0000000000000000000000000000000000000000000000000000000000000000000000E0
#define ATANH_MAC03 3.3333333333333333333333333333333333333333333333333333333333333333333333E-1
#define ATANH_MAC05 2.0000000000000000000000000000000000000000000000000000000000000000000000E-1
#define ATANH_MAC07 1.4285714285714285714285714285714285714285714285714285714285714285714286E-1
#define ATANH_MAC09 1.1111111111111111111111111111111111111111111111111111111111111111111111E-1
#define ATANH_MAC11 9.0909090909090909090909090909090909090909090909090909090909090909090909E-2
#define ATANH_MAC13 7.6923076923076923076923076923076923076923076923076923076923076923076923E-2
#if 0
#define ATANH_MAC15 6.6666666666666666666666666666666666666666666666666666666666666666666667E-2
#define ATANH_MAC17 5.8823529411764705882352941176470588235294117647058823529411764705882353E-2
#define ATANH_MAC19 5.2631578947368421052631578947368421052631578947368421052631578947368421E-2
#define ATANH_MAC21 4.7619047619047619047619047619047619047619047619047619047619047619047619E-2
#define ATANH_MAC23 4.3478260869565217391304347826086956521739130434782608695652173913043478E-2
#define ATANH_MAC25 4.0000000000000000000000000000000000000000000000000000000000000000000000E-2
#define ATANH_MAC27 3.7037037037037037037037037037037037037037037037037037037037037037037037E-2
#define ATANH_MAC29 3.4482758620689655172413793103448275862068965517241379310344827586206897E-2
#define ATANH_MAC31 3.2258064516129032258064516129032258064516129032258064516129032258064516E-2
#define ATANH_MAC33 3.0303030303030303030303030303030303030303030303030303030303030303030303E-2
#define ATANH_MAC35 2.8571428571428571428571428571428571428571428571428571428571428571428571E-2
#define ATANH_MAC37 2.7027027027027027027027027027027027027027027027027027027027027027027027E-2
#define ATANH_MAC39 2.5641025641025641025641025641025641025641025641025641025641025641025641E-2
#define ATANH_MAC41 2.4390243902439024390243902439024390243902439024390243902439024390243902E-2
#define ATANH_MAC43 2.3255813953488372093023255813953488372093023255813953488372093023255814E-2
#define ATANH_MAC45 2.2222222222222222222222222222222222222222222222222222222222222222222222E-2
#define ATANH_MAC47 2.1276595744680851063829787234042553191489361702127659574468085106382979E-2
#define ATANH_MAC49 2.0408163265306122448979591836734693877551020408163265306122448979591837E-2
#define ATANH_MAC51 1.9607843137254901960784313725490196078431372549019607843137254901960784E-2
#define ATANH_MAC53 1.8867924528301886792452830188679245283018867924528301886792452830188679E-2
#define ATANH_MAC55 1.8181818181818181818181818181818181818181818181818181818181818181818182E-2
#define ATANH_MAC57 1.7543859649122807017543859649122807017543859649122807017543859649122807E-2
#define ATANH_MAC59 1.6949152542372881355932203389830508474576271186440677966101694915254237E-2
#endif
static __inline vector double _atanhd2(vector double x)
{
vec_uchar16 dup_even = ((vec_uchar16) { 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 });
vec_double2 sign_mask = spu_splats(-0.0);
vec_double2 oned = spu_splats(1.0);
vec_double2 onehalfd = spu_splats(0.5);
vec_uint4 infminus1 = spu_splats(0x7FEFFFFFU);
vec_uint4 isinfnan;
vec_uint4 xabshigh;
vec_double2 xabs, xsqu;
/* Where we switch from maclaurin to formula */
vec_float4 switch_approx = spu_splats(0.08f);
vec_uint4 use_form;
vec_float4 xf;
vec_double2 result, fresult, mresult;;
xabs = spu_andc(x, sign_mask);
xsqu = spu_mul(x, x);
xf = spu_roundtf(xabs);
xf = spu_shuffle(xf, xf, dup_even);
/*
* Formula:
* atanh = 1/2 * ln((1 + x)/(1 - x)) = 1/2 * [ln(1+x) - ln(1-x)]
*/
fresult = spu_sub(_logd2(spu_add(oned, xabs)), _logd2(spu_sub(oned, xabs)));
fresult = spu_mul(fresult, onehalfd);
/*
* Taylor Series
*/
mresult = spu_madd(xsqu, spu_splats(ATANH_MAC13), spu_splats(ATANH_MAC11));
mresult = spu_madd(xsqu, mresult, spu_splats(ATANH_MAC09));
mresult = spu_madd(xsqu, mresult, spu_splats(ATANH_MAC07));
mresult = spu_madd(xsqu, mresult, spu_splats(ATANH_MAC05));
mresult = spu_madd(xsqu, mresult, spu_splats(ATANH_MAC03));
mresult = spu_madd(xsqu, mresult, spu_splats(ATANH_MAC01));
mresult = spu_mul(xabs, mresult);
/*
* Choose between series and formula
*/
use_form = spu_cmpgt(xf, switch_approx);
result = spu_sel(mresult, fresult, (vec_ullong2)use_form);
/* Infinity and NaN */
xabshigh = (vec_uint4)spu_shuffle(xabs, xabs, dup_even);
isinfnan = spu_cmpgt(xabshigh, infminus1);
result = spu_sel(result, x, (vec_ullong2)isinfnan);
/* Restore sign - atanh is an anti-symmetric */
result = spu_sel(result, x, (vec_ullong2)sign_mask);
return result;
}
#endif /* _ATANHD2_H_ */
#endif /* __SPU__ */

View File

@ -0,0 +1,6 @@
#include "headers/atanhf4.h"
static __inline float _atanhf(float x)
{
return spu_extract(_atanhf4(spu_promote(x, 0)), 0);
}

View File

@ -0,0 +1,149 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _ATANHF4_H_
#define _ATANHF4_H_ 1
#include <spu_intrinsics.h>
#include "logf4.h"
/*
* FUNCTION
* vector float _atanhf4(vector float x)
*
* DESCRIPTION
* The atanhf4 function returns a vector containing the hyperbolic
* arctangents of the corresponding elements of the input vector.
*
* We are using the formula:
* atanh x = 1/2 * ln((1 + x)/(1 - x)) = 1/2 * [ln(1+x) - ln(1-x)]
* and the anti-symmetry of atanh.
*
* For x near 0, we use the Taylor series:
* atanh x = x + x^3/3 + x^5/5 + x^7/7 + x^9/9 + ...
*
* Special Cases:
* - atanh(1) = HUGE_VALF
* - atanh(-1) = -HUGE_VALF
* - The result is undefined for x outside of the domain [-1,1],
* since single-precision NaN is not supported on the SPU.
*
*/
/*
* Maclaurin Series Coefficients
* for x near 0.
*/
#define ATANH_MAC01 1.0000000000000000000000000000000000000000000000000000000000000000000000E0
#define ATANH_MAC03 3.3333333333333333333333333333333333333333333333333333333333333333333333E-1
#define ATANH_MAC05 2.0000000000000000000000000000000000000000000000000000000000000000000000E-1
#define ATANH_MAC07 1.4285714285714285714285714285714285714285714285714285714285714285714286E-1
#if 0
#define ATANH_MAC09 1.1111111111111111111111111111111111111111111111111111111111111111111111E-1
#define ATANH_MAC11 9.0909090909090909090909090909090909090909090909090909090909090909090909E-2
#define ATANH_MAC13 7.6923076923076923076923076923076923076923076923076923076923076923076923E-2
#define ATANH_MAC15 6.6666666666666666666666666666666666666666666666666666666666666666666667E-2
#define ATANH_MAC17 5.8823529411764705882352941176470588235294117647058823529411764705882353E-2
#define ATANH_MAC19 5.2631578947368421052631578947368421052631578947368421052631578947368421E-2
#define ATANH_MAC21 4.7619047619047619047619047619047619047619047619047619047619047619047619E-2
#define ATANH_MAC23 4.3478260869565217391304347826086956521739130434782608695652173913043478E-2
#define ATANH_MAC25 4.0000000000000000000000000000000000000000000000000000000000000000000000E-2
#define ATANH_MAC27 3.7037037037037037037037037037037037037037037037037037037037037037037037E-2
#define ATANH_MAC29 3.4482758620689655172413793103448275862068965517241379310344827586206897E-2
#endif
static __inline vector float _atanhf4(vector float x)
{
vec_float4 sign_mask = spu_splats(-0.0f);
vec_float4 onef = spu_splats(1.0f);
vec_float4 onehalff = spu_splats(0.5f);
vec_float4 result, fresult, mresult;;
vec_float4 xabs, xsqu;
/* Where we switch from maclaurin to formula */
//vec_float4 switch_approx = spu_splats(0.4661f);
vec_float4 switch_approx = spu_splats(0.165f);
vec_uint4 use_form;
xabs = spu_andc(x, sign_mask);
xsqu = spu_mul(x, x);
/*
* Formula:
* atanh = 1/2 * ln((1 + x)/(1 - x)) = 1/2 * [ln(1+x) - ln(1-x)]
*/
fresult = spu_sub(_logf4(spu_add(onef, xabs)), _logf4(spu_sub(onef, xabs)));
fresult = spu_mul(fresult, onehalff);
/*
* Taylor Series
*/
mresult = spu_madd(xsqu, spu_splats((float)ATANH_MAC07), spu_splats((float)ATANH_MAC05));
mresult = spu_madd(xsqu, mresult, spu_splats((float)ATANH_MAC03));
mresult = spu_madd(xsqu, mresult, spu_splats((float)ATANH_MAC01));
mresult = spu_mul(xabs, mresult);
/*
* Choose between series and formula
*/
use_form = spu_cmpgt(xabs, switch_approx);
result = spu_sel(mresult, fresult, use_form);
/* Preserve sign - atanh is anti-symmetric */
result = spu_sel(result, x, (vec_uint4)sign_mask);
return result;
}
#endif /* _ATANHF4_H_ */
#endif /* __SPU__ */

View File

@ -0,0 +1,6 @@
#include "headers/cosd2.h"
static __inline double _cos(double angle)
{
return spu_extract(_cosd2(spu_promote(angle, 0)), 0);
}

View File

@ -0,0 +1,216 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation, */
/* Sony Computer Entertainment, Incorporated, */
/* Toshiba Corporation, */
/* */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _COS_SIN_H_
#define _COS_SIN_H_ 1
#define M_PI_OVER_4_HI_32 0x3fe921fb
#define M_PI_OVER_4 0.78539816339744827900
#define M_FOUR_OVER_PI 1.27323954478442180616
#define M_PI_OVER_2 1.57079632679489655800
#define M_PI_OVER_2_HI 1.57079632673412561417
#define M_PI_OVER_2_LO 0.0000000000607710050650619224932
#define M_PI_OVER_2F_HI 1.570312500000000000
#define M_PI_OVER_2F_LO 0.000483826794896558
/* The following coefficients correspond to the Taylor series
* coefficients for cos and sin.
*/
#define COS_14 -0.00000000001138218794258068723867
#define COS_12 0.000000002087614008917893178252
#define COS_10 -0.0000002755731724204127572108
#define COS_08 0.00002480158729870839541888
#define COS_06 -0.001388888888888735934799
#define COS_04 0.04166666666666666534980
#define COS_02 -0.5000000000000000000000
#define COS_00 1.0
#define SIN_15 -0.00000000000076471637318198164759
#define SIN_13 0.00000000016059043836821614599
#define SIN_11 -0.000000025052108385441718775
#define SIN_09 0.0000027557319223985890653
#define SIN_07 -0.0001984126984126984127
#define SIN_05 0.008333333333333333333
#define SIN_03 -0.16666666666666666666
#define SIN_01 1.0
/* Compute the following for each floating point element of x.
* x = fmod(x, PI/4);
* ix = (int)x * PI/4;
* This allows one to compute cos / sin over the limited range
* and select the sign and correct result based upon the octant
* of the original angle (as defined by the ix result).
*
* Expected Inputs Types:
* x = vec_float4
* ix = vec_int4
*/
#define MOD_PI_OVER_FOUR_F(_x, _ix) { \
vec_float4 fx; \
\
_ix = spu_convts(spu_mul(_x, spu_splats((float)M_FOUR_OVER_PI)), 0); \
_ix = spu_add(_ix, spu_add(spu_rlmaska((vec_int4)_x, -31), 1)); \
\
fx = spu_convtf(spu_rlmaska(_ix, -1), 0); \
_x = spu_nmsub(fx, spu_splats((float)M_PI_OVER_2F_HI), _x); \
_x = spu_nmsub(fx, spu_splats((float)M_PI_OVER_2F_LO), _x); \
}
/* Double precision MOD_PI_OVER_FOUR
*
* Expected Inputs Types:
* x = vec_double2
* ix = vec_int4
*/
#define MOD_PI_OVER_FOUR(_x, _ix) { \
vec_float4 fx; \
vec_double2 dix; \
\
fx = spu_roundtf(spu_mul(_x, spu_splats(M_FOUR_OVER_PI))); \
_ix = spu_convts(fx, 0); \
_ix = spu_add(_ix, spu_add(spu_rlmaska((vec_int4)fx, -31), 1)); \
\
dix = spu_extend(spu_convtf(spu_rlmaska(_ix, -1), 0)); \
_x = spu_nmsub(spu_splats(M_PI_OVER_2_HI), dix, _x); \
_x = spu_nmsub(spu_splats(M_PI_OVER_2_LO), dix, _x); \
}
/* Compute the cos(x) and sin(x) for the range reduced angle x.
* In order to compute these trig functions to full single precision
* accuracy, we solve the Taylor series.
*
* c = cos(x) = 1 - x^2/2! + x^4/4! - x^6/6! + x^8/8! - x^10/10!
* s = sin(x) = x - x^3/4! + x^5/5! - x^7/7! + x^9/9! - x^11/11!
*
* Expected Inputs Types:
* x = vec_float4
* c = vec_float4
* s = vec_float4
*/
#define COMPUTE_COS_SIN_F(_x, _c, _s) { \
vec_float4 x2, x4, x6; \
vec_float4 cos_hi, cos_lo; \
vec_float4 sin_hi, sin_lo; \
\
x2 = spu_mul(_x, _x); \
x4 = spu_mul(x2, x2); \
x6 = spu_mul(x2, x4); \
\
cos_hi = spu_madd(spu_splats((float)COS_10), x2, spu_splats((float)COS_08)); \
cos_lo = spu_madd(spu_splats((float)COS_04), x2, spu_splats((float)COS_02)); \
cos_hi = spu_madd(cos_hi, x2, spu_splats((float)COS_06)); \
cos_lo = spu_madd(cos_lo, x2, spu_splats((float)COS_00)); \
_c = spu_madd(cos_hi, x6, cos_lo); \
\
sin_hi = spu_madd(spu_splats((float)SIN_11), x2, spu_splats((float)SIN_09)); \
sin_lo = spu_madd(spu_splats((float)SIN_05), x2, spu_splats((float)SIN_03)); \
sin_hi = spu_madd(sin_hi, x2, spu_splats((float)SIN_07)); \
sin_lo = spu_madd(sin_lo, x2, spu_splats((float)SIN_01)); \
_s = spu_madd(sin_hi, x6, sin_lo); \
_s = spu_mul(_s, _x); \
}
/* Compute the cos(x) and sin(x) for the range reduced angle x.
* This version computes the cosine and sine to double precision
* accuracy using the Taylor series:
*
* c = cos(x) = 1 - x^2/2! + x^4/4! - x^6/6! + x^8/8! - x^10/10! + x^12/12! - x^14/14!
* s = sin(x) = x - x^3/4! + x^5/5! - x^7/7! + x^9/9! - x^11/11! + x^13/13! - x^15/15!
*
* Expected Inputs Types:
* x = vec_double2
* c = vec_double2
* s = vec_double2
*/
#define COMPUTE_COS_SIN(_x, _c, _s) { \
vec_double2 x2, x4, x8; \
vec_double2 cos_hi, cos_lo; \
vec_double2 sin_hi, sin_lo; \
\
x2 = spu_mul(_x, _x); \
x4 = spu_mul(x2, x2); \
x8 = spu_mul(x4, x4); \
\
cos_hi = spu_madd(spu_splats(COS_14), x2, spu_splats(COS_12)); \
cos_lo = spu_madd(spu_splats(COS_06), x2, spu_splats(COS_04)); \
cos_hi = spu_madd(cos_hi, x2, spu_splats(COS_10)); \
cos_lo = spu_madd(cos_lo, x2, spu_splats(COS_02)); \
cos_hi = spu_madd(cos_hi, x2, spu_splats(COS_08)); \
cos_lo = spu_madd(cos_lo, x2, spu_splats(COS_00)); \
_c = spu_madd(cos_hi, x8, cos_lo); \
\
sin_hi = spu_madd(spu_splats(SIN_15), x2, spu_splats(SIN_13)); \
sin_lo = spu_madd(spu_splats(SIN_07), x2, spu_splats(SIN_05)); \
sin_hi = spu_madd(sin_hi, x2, spu_splats(SIN_11)); \
sin_lo = spu_madd(sin_lo, x2, spu_splats(SIN_03)); \
sin_hi = spu_madd(sin_hi, x2, spu_splats(SIN_09)); \
sin_lo = spu_madd(sin_lo, x2, spu_splats(SIN_01)); \
_s = spu_madd(sin_hi, x8, sin_lo); \
_s = spu_mul(_s, _x); \
}
#endif /* _COS_SIN_H_ */
#endif /* __SPU__ */

View File

@ -0,0 +1,109 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation, */
/* Sony Computer Entertainment, Incorporated, */
/* Toshiba Corporation, */
/* */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _COSD2_H_
#define _COSD2_H_ 1
#include <spu_intrinsics.h>
#include "cos_sin.h"
/*
* FUNCTION
* vector double _cosd2(vector double angle)
*
* DESCRIPTION
* _cosd2 computes the cosine of a vector of angles (expressed
* in radians) to an accuracy of a double precision floating point.
*/
static __inline vector double _cosd2(vector double angle)
{
vec_int4 octant;
vec_ullong2 select;
vec_double2 cos, sin;
vec_double2 toggle_sign, answer;
/* Range reduce the input angle x into the range -PI/4 to PI/4
* by performing simple modulus.
*/
MOD_PI_OVER_FOUR(angle, octant);
/* Compute the cosine and sine of the range reduced input.
*/
COMPUTE_COS_SIN(angle, cos, sin);
/* For each SIMD element, select which result (cos or sin) to use
* with a sign correction depending upon the octant of the original
* angle (Maclaurin series).
*
* octants angles select sign toggle
* ------- ------------ ------ -----------
* 0 0 to 45 cos no
* 1,2 45 to 135 sin yes
* 3,4 135 to 225 cos yes
* 5,6 225 to 315 sin no
* 7 315 to 360 cos no
*/
octant = spu_shuffle(octant, octant, ((vec_uchar16) { 0,1, 2, 3, 0,1, 2, 3, 8,9,10,11, 8,9,10,11 }));
toggle_sign = (vec_double2)spu_sl(spu_and(spu_add(octant, 2), 4), ((vec_uint4) { 29,32,29,32 }));
select = (vec_ullong2)spu_cmpeq(spu_and(octant, 2), 0);
answer = spu_xor(spu_sel(sin, cos, select), toggle_sign);
return (answer);
}
#endif /* _COSD2_H_ */
#endif /* __SPU__ */

View File

@ -0,0 +1,6 @@
#include "headers/cosf4.h"
static __inline float _cosf(float angle)
{
return spu_extract(_cosf4(spu_promote(angle, 0)), 0);
}

View File

@ -0,0 +1,111 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation, */
/* Sony Computer Entertainment, Incorporated, */
/* Toshiba Corporation, */
/* */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _COSF4_H_
#define _COSF4_H_ 1
#include <spu_intrinsics.h>
#include "cos_sin.h"
/*
* FUNCTION
* vector float _cosf4(vector float angle)
*
* DESCRIPTION
* The _cosf4 function computes the cosine of a vector of angles
* (expressed in radians) to an accuracy of single precision floating
* point.
*
*/
static __inline vector float _cosf4(vector float angle)
{
vec_int4 octant;
vec_uint4 select;
vec_float4 cos, sin;
vec_float4 toggle_sign, answer;
/* Range reduce the input angle x into the range -PI/4 to PI/4
* by performing simple modulus.
*/
MOD_PI_OVER_FOUR_F(angle, octant);
/* Compute the cosine and sine of the range reduced input.
*/
COMPUTE_COS_SIN_F(angle, cos, sin);
/* For each SIMD element, select which result (cos or sin) to use
* with a sign correction depending upon the octant of the original
* angle (Maclaurin series).
*
* octants angles select sign toggle
* ------- ------------ ------ -----------
* 0 0 to 45 cos no
* 1,2 45 to 135 sin yes
* 3,4 135 to 225 cos yes
* 5,6 225 to 315 sin no
* 7 315 to 360 cos no
*/
toggle_sign = (vec_float4)spu_sl(spu_and(spu_add(octant, 2), 4), 29);
select = spu_cmpeq(spu_and(octant, 2), 0);
answer = spu_xor(spu_sel(sin, cos, select), toggle_sign);
return (answer);
}
#endif /* _COSF4_H_ */
#endif /* __SPU__ */

View File

@ -0,0 +1,6 @@
#include "headers/coshd2.h"
static __inline double _cosh(double x)
{
return spu_extract(_coshd2(spu_promote(x, 0)), 0);
}

View File

@ -0,0 +1,127 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _COSHD2_H_
#define _COSHD2_H_ 1
#include <spu_intrinsics.h>
#include "expd2.h"
#include "recipd2.h"
/*
* FUNCTION
* vector float _coshd2(vector double angle)
*
* DESCRIPTION
* _coshd2 computes the hyperbolic cosines of a vector of angles
* (expressed in radians) to an accuracy of a double precision
* floating point.
*/
static __inline vector double _coshd2(vector double x)
{
// Coefficents for the power series
vec_double2 f02 = spu_splats(5.00000000000000000000E-1); // 1/(2!)
vec_double2 f04 = spu_splats(4.16666666666666666667E-2); // 1/(4!)
vec_double2 f06 = spu_splats(1.38888888888888888889E-3); // 1/(6!)
vec_double2 f08 = spu_splats(2.48015873015873015873E-5); // 1/(8!)
vec_double2 f10 = spu_splats(2.75573192239858906526E-7); // 1/(10!)
vec_double2 f12 = spu_splats(2.08767569878680989792E-9); // 1/(12!)
vec_double2 f14 = spu_splats(1.14707455977297247139E-11); // 1/(14!)
vec_double2 f16 = spu_splats(4.77947733238738529744E-14); // 1/(16!)
vec_double2 f18 = spu_splats(1.56192069685862264622E-16); // 1/(18!)
vec_double2 f20 = spu_splats(4.11031762331216485848E-19); // 1/(20!)
vec_double2 f22 = spu_splats(8.89679139245057328675E-22); // 1/(22!)
// Check if the input is within the range [ -1.0 ... 1.0 ]
// If it is, we want to use the power series, otherwise
// we want to use the 0.5 * (e^x + e^-x)
// round to float, check if within range. Results will be in
// slots 0 and 2, so we rotate right 4 bytes, and "or" with ourself
// to produce 64 bits of all 1's or 0's.
vec_uint4 use_exp = spu_cmpabsgt(spu_roundtf(x),spu_splats(1.0f));
use_exp = spu_or(use_exp,spu_rlmaskqwbyte(use_exp,-4));
// Perform the calculation of the power series using Horner's method
vec_double2 result;
vec_double2 x2 = spu_mul(x,x);
result = spu_madd(x2,f22,f20);
result = spu_madd(x2,result,f18);
result = spu_madd(x2,result,f16);
result = spu_madd(x2,result,f14);
result = spu_madd(x2,result,f12);
result = spu_madd(x2,result,f10);
result = spu_madd(x2,result,f08);
result = spu_madd(x2,result,f06);
result = spu_madd(x2,result,f04);
result = spu_madd(x2,result,f02);
result = spu_madd(x2,result,spu_splats(1.0));
// Perform calculation as a function of 0.5 * (e^x + e^-x)
vec_double2 ex = _expd2(x);
vec_double2 ex_inv = _recipd2(ex);
vec_double2 r2= spu_add(ex,ex_inv);
r2 = spu_mul(r2,f02); // we can reuse f02 here
// Select either the power series or exp version
result = spu_sel(result,r2,(vec_ullong2)use_exp);
return result;
}
#endif /* _COSHD2_H_ */
#endif /* __SPU__ */

View File

@ -0,0 +1,6 @@
#include "headers/coshf4.h"
static __inline float _coshf(float x)
{
return spu_extract(_coshf4(spu_promote(x, 0)), 0);
}

View File

@ -0,0 +1,112 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _COSHF4_H_
#define _COSHF4_H_ 1
#include <spu_intrinsics.h>
#include "expf4.h"
#include "recipf4.h"
/*
* FUNCTION
* vector float _coshf4(vector float x)
*
* DESCRIPTION
* The _coshf4 function computes the hyperbolic cosines of a vector of
* angles (expressed in radians) to an accuracy of a single precision
* floating point.
*
*/
static __inline vector float _coshf4(vector float x)
{
// 1.0000 (above this number, use sinh(x) = 0.5 * (e^x - e^-x)
vec_uint4 threshold = (vec_uint4)spu_splats(0x3F800000);
vec_uint4 sign_mask = (vec_uint4)spu_splats(0x80000000);
// Coefficents for the Taylor series
vec_float4 f02 = spu_splats(5.0000000000000000E-1f); // 1/2!
vec_float4 f04 = spu_splats(4.1666666666666667E-2f); // 1/4!
vec_float4 f06 = spu_splats(1.3888888888888889E-3f); // 1/6!
vec_float4 f08 = spu_splats(2.4801587301587302E-5f); // 1/8!
vec_float4 f10 = spu_splats(2.7557319223985891E-7f); // 1/10!
vec_float4 f12 = spu_splats(2.0876756987868099E-9f); // 1/12!
// Perform the calculation as a Taylor series
vec_float4 result;
vector float x2 = spu_mul(x,x);
result = spu_madd(x2,f12,f10);
result = spu_madd(x2,result,f08);
result = spu_madd(x2,result,f06);
result = spu_madd(x2,result,f04);
result = spu_madd(x2,result,f02);
result = spu_madd(x2,result,spu_splats(1.0f));
// Perform calculation as a function of 0.5 * (e^x - e^-x)
vec_float4 ex = _expf4(x);
vec_float4 ex_inv = _recipf4(ex);
vec_float4 r2= spu_add(ex,ex_inv);
r2 = spu_mul(r2,f02); // we can reused f02 here
vec_uint4 xabs = spu_andc((vec_uint4)x,sign_mask);
vec_uint4 use_exp = spu_cmpgt(xabs,threshold);
// Select either the Taylor or exp version
result = spu_sel(result,r2,use_exp);
return result;
}
#endif /* _COSHF4_H_ */
#endif /* __SPU__ */

View File

@ -0,0 +1,244 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation, */
/* Sony Computer Entertainment, Incorporated, */
/* Toshiba Corporation, */
/* */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _DIVD2_H_
#define _DIVD2_H_ 1
#include <spu_intrinsics.h>
/*
* FUNCTION
* vector double _divd2(vector double a, vector double b)
*
* DESCRIPTION
* _divd2 divides the vector dividend a by the vector divisor b and
* returns the resulting vector quotient. Maximum error 0.5 ULPS for
* normalized results, 1ulp for denorm results, over entire double
* range including denorms, compared to true result in round-to-nearest
* rounding mode. Handles Inf or NaN operands and results correctly.
*/
static __inline vector double _divd2(vector double a, vector double b)
{
/* Variables
*/
vec_float4 inv_bf, mant_bf;
vec_double2 mant_a, mant_b, inv_b, q0, q1, q2, mult;
vec_int4 exp, tmp;
vec_uint4 exp_a, exp_b, exp_q1, overflow, nounderflow, normal, utmp,
sign_a, sign_b, a_frac, b_frac, a_frac_0, b_frac_0, a_exp_0, b_exp_0,
a_exp_ones, b_exp_ones, a_nan, b_nan, a_inf, b_inf, a_zero, b_zero,
res_nan, sign_res;
/* Constants
*/
vec_float4 onef = spu_splats(1.0f);
vec_double2 one = spu_splats(1.0);
vec_uint4 exp_mask = (vec_uint4) { 0x7FF00000, 0, 0x7FF00000, 0 };
vec_uint4 sign_mask = (vec_uint4) { 0x80000000, 0, 0x80000000, 0};
vec_uint4 sign_exp_mask = (vec_uint4) { 0xFFF00000, 0, 0xFFF00000,0};
vec_uint4 frac_mask =(vec_uint4) { 0x000FFFFF, 0xFFFFFFFF, 0x000FFFFF, 0xFFFFFFFF };
vec_uchar16 swap32 = (vec_uchar16) ((vec_uint4) { 0x04050607, 0x00010203, 0x0C0D0E0F, 0x08090A0B} );
vec_uint4 zero = (vec_uint4) { 0, 0, 0, 0 };
vec_int4 e1022 = (vec_int4) { 0x000003FE, 0, 0x000003FE, 0 };
vec_int4 emax = (vec_int4) { 0x000007FE, 0, 0x000007FE, 0 };
vec_int4 e1 = (vec_int4) { 0x00000001, 0, 0x00000001, 0 };
vec_uint4 nan = (vec_uint4) { 0x7FF80000, 0, 0x7FF80000, 0};
/* Extract exponents and underflow denorm arguments to signed zero.
*/
exp_a = spu_and((vec_uint4)a, exp_mask);
exp_b = spu_and((vec_uint4)b, exp_mask);
sign_a = spu_and((vec_uint4)a, sign_mask);
sign_b = spu_and((vec_uint4)b, sign_mask);
a_exp_0 = spu_cmpeq (exp_a, 0);
utmp = spu_shuffle (a_exp_0, a_exp_0, swap32);
a_exp_0 = spu_and (a_exp_0, utmp);
b_exp_0 = spu_cmpeq (exp_b, 0);
utmp = spu_shuffle (b_exp_0, b_exp_0, swap32);
b_exp_0 = spu_and (b_exp_0, utmp);
a = spu_sel(a, (vec_double2)sign_a, (vec_ullong2)a_exp_0);
b = spu_sel(b, (vec_double2)sign_b, (vec_ullong2)b_exp_0);
/* Force the divisor and dividend into the range [1.0,2.0).
(Unless they're zero.)
*/
mant_a = spu_sel(a, one, (vec_ullong2)sign_exp_mask);
mant_b = spu_sel(b, one, (vec_ullong2)sign_exp_mask);
/* Approximate the single reciprocal of b by using
* the single precision reciprocal estimate followed by one
* single precision iteration of Newton-Raphson.
*/
mant_bf = spu_roundtf(mant_b);
inv_bf = spu_re(mant_bf);
inv_bf = spu_madd(spu_nmsub(mant_bf, inv_bf, onef), inv_bf, inv_bf);
/* Perform 2 more Newton-Raphson iterations in double precision.
*/
inv_b = spu_extend(inv_bf);
inv_b = spu_madd(spu_nmsub(mant_b, inv_b, one), inv_b, inv_b);
q0 = spu_mul(mant_a, inv_b);
q1 = spu_madd(spu_nmsub(mant_b, q0, mant_a), inv_b, q0);
/* Compute the quotient's expected exponent. If the exponent
* is out of range, then force the resulting exponent to 0.
* (1023 with the bias). We correct for the out of range
* values by computing a multiplier (mult) that will force the
* result to the correct out of range value and set the
* correct exception flag (UNF, OVF, or neither).
*/
exp_q1 = spu_and((vec_uint4)q1, exp_mask);
exp = spu_sub((vec_int4)exp_a, (vec_int4)exp_b);
exp = spu_rlmaska(exp, -20); // shift right to allow enough bits for working
tmp = spu_rlmaska((vec_int4)exp_q1, -20);
exp = spu_add(exp, tmp); // biased exponent of result (right justified)
/* The default multiplier is 1.0. If an underflow is detected (the computed
* exponent is less than or equal to a biased 0), force the multiplier to 0.0.
* If exp<=0 set mult = 2**(unbiased exp + 1022) and unbiased exp = -1022
* = biased 1, the smallest normalized exponent. If exp<-51 set
* mult = 2**(-1074) to ensure underflowing result. Otherwise mult=1.
*/
normal = spu_cmpgt(exp, 0);
nounderflow = spu_cmpgt(exp, -52);
tmp = spu_add(exp, e1022);
mult = (vec_double2)spu_sl(tmp, 20);
mult = spu_sel(mult, one, (vec_ullong2)normal);
mult = spu_sel((vec_double2)e1, mult, (vec_ullong2)nounderflow);
exp = spu_sel(e1, exp, normal); // unbiased -1022 is biased 1
/* Force the multiplier to positive infinity (exp_mask) and the biased
* exponent to 1022, if the computed biased exponent is > emax.
*/
overflow = spu_cmpgt(exp, (vec_int4)emax);
exp = spu_sel(exp, (vec_int4)e1022, overflow);
mult = spu_sel(mult, (vec_double2)exp_mask, (vec_ullong2)overflow);
/* Determine if a, b are Inf, NaN, or zero.
* Since these are rare, it would improve speed if these could be detected
* quickly and a branch used to avoid slowing down the main path. However
* most of the work seems to be in the detection.
*/
a_exp_ones = spu_cmpeq (exp_a, exp_mask);
utmp = spu_shuffle (a_exp_ones, a_exp_ones, swap32);
a_exp_ones = spu_and (a_exp_ones, utmp);
a_frac = spu_and ((vec_uint4)a, frac_mask);
a_frac_0 = spu_cmpeq (a_frac, 0);
utmp = spu_shuffle (a_frac_0, a_frac_0, swap32);
a_frac_0 = spu_and (a_frac_0, utmp);
a_zero = spu_and (a_exp_0, a_frac_0);
a_inf = spu_and (a_exp_ones, a_frac_0);
a_nan = spu_andc (a_exp_ones, a_frac_0);
b_exp_ones = spu_cmpeq (exp_b, exp_mask);
utmp = spu_shuffle (b_exp_ones, b_exp_ones, swap32);
b_exp_ones = spu_and (b_exp_ones, utmp);
b_frac = spu_and ((vec_uint4)b, frac_mask);
b_frac_0 = spu_cmpeq (b_frac, 0);
utmp = spu_shuffle (b_frac_0, b_frac_0, swap32);
b_frac_0 = spu_and (b_frac_0, utmp);
b_zero = spu_and (b_exp_0, b_frac_0);
b_inf = spu_and (b_exp_ones, b_frac_0);
b_nan = spu_andc (b_exp_ones, b_frac_0);
/* Handle exception cases */
/* Result is 0 for 0/x, x!=0, or x/Inf, x!=Inf.
* Set mult=0 for 0/0 or Inf/Inf now, since it will be replaced
* with NaN later.
*/
utmp = spu_or (a_zero, b_inf);
mult = spu_sel(mult, (vec_double2)zero, (vec_ullong2)utmp);
/* Result is Inf for x/0, x!=0. Set mult=Inf for 0/0 now, since it
* will be replaced with NaN later.
*/
mult = spu_sel(mult, (vec_double2)exp_mask, (vec_ullong2)b_zero);
/* Result is NaN if either operand is, or Inf/Inf, or 0/0.
*/
res_nan = spu_or (a_nan, b_nan);
utmp = spu_and (a_inf, b_inf);
res_nan = spu_or (res_nan, utmp);
utmp = spu_and (a_zero, b_zero);
res_nan = spu_or (res_nan, utmp);
mult = spu_sel(mult, (vec_double2)nan, (vec_ullong2)res_nan);
/* Insert sign of result into mult.
*/
sign_res = spu_xor (sign_a, sign_b);
mult = spu_or (mult, (vec_double2)sign_res);
/* Insert the sign and exponent into the result and perform the
* final multiplication.
*/
exp = spu_sl(exp, 20);
q2 = spu_sel(q1, (vec_double2)exp, (vec_ullong2)exp_mask);
q2 = spu_mul(q2, mult);
return (q2);
}
#endif /* _DIVD2_H_ */
#endif /* __SPU__ */

View File

@ -0,0 +1,172 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation, */
/* Sony Computer Entertainment, Incorporated, */
/* Toshiba Corporation, */
/* */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _DIVF4_H_
#define _DIVF4_H_ 1
#include <spu_intrinsics.h>
/*
* FUNCTION
* vector float _divf4(vector float dividend, vector float divisor)
*
* DESCRIPTION
* The _divf4 function divides the vector dividend by the vector divisor
* and returns the resulting vector quotient.
*
*/
static __inline vector float _divf4(vector float a, vector float b)
{
/* This function has been designed to provide a
* full function operation that presisely computes
* the quotient for the entire range of extended
* single precision inputs <a> and <b>. This includes:
*
* 1) Computing the quotient to full single precision
* floating point accuracy.
* 2) Round the result consistently with the rounding
* mode of the processor - truncated toward zero.
* 3) Underflow and overflow results are clamped to
* Smin and Smax and flagged with the appropriate
* UNF or OVF exception in the FPSCR.
* 4) Divide By Zero (DBZ) exception is produced when
* the divisor <b> has a zero exponent. A quotient
* of correctly signed Smax is produced.
* 5) Denorm/zero divided by a denorm/zero generates
* a DBZ with the results undefined.
* 6) Resulting denorm quotients will be coerced to +0.
* 7) If a non-compliant IEEE result is produced, the
* a DIFF exception is generated.
*/
vector float inv_b, err, q0, q1, q2;
vector float mult;
vector float mant_a, mant_b;
vector float one = spu_splats(1.0f);
vector unsigned int exp, exp_a, exp_b, overflow;
vector unsigned int exp_mask = (vec_uint4)spu_splats(0x7F800000);
/* If b has a zero exponent, then set the divide by zero
* (DBZ) exception flag. The estimate result is discarded.
* Note: This must be implemented as inline assembly. Otherwise
* the optimizer removes it.
*/
(void)si_frest((qword)(b));
/* For computing the quotient, force the divisor and
* dividend into the range (1.0 <= 0 < 2.0).
*/
mant_a = spu_sel(a, one, exp_mask);
mant_b = spu_sel(b, one, exp_mask);
/* Compute the quotient using reciprocal estimate
* followed by one iteration of the Newton-Raphson.
*/
inv_b = spu_re(mant_b);
q0 = spu_mul(mant_a, inv_b);
q1 = spu_nmsub(mant_b, q0, mant_a);
q1 = spu_madd(inv_b, q1, q0);
/* Due to truncation error, the quotient result
* may be low by 1 ulp (unit of least position),
* Conditionally add one if the estimate is too
* small.
*/
q2 = (vector float)spu_add((vector unsigned int)(q1), 1);
err = spu_nmsub(mant_b, q2, mant_a);
q2 = spu_sel(q1, q2, spu_cmpgt((vector signed int)err, -1));
/* Compute the quotient's expected exponent. If the exponent
* is out of range, then force the resulting exponent to 0.
* (127 with the bias). We correct for the out of range
* values by computing a multiplier (mult) that will force the
* result to the correct out of range value and set the
* correct exception flag (UNF, OVF, or neither). The multiplier
* is also conditioned to generate correctly signed Smax if the
* divisor b is a denorm or zero.
*/
exp_a = spu_and((vector unsigned int)a, exp_mask);
exp_b = spu_and((vector unsigned int)b, exp_mask);
exp = spu_add(spu_sub(spu_add(exp_a, (vector unsigned int)one), exp_b), spu_cmpabsgt(mant_b, mant_a));
/* The default multiplier is 1.0. If an underflow is detected (ie,
* either the dividend <a> is a denorm/zero, or the computed exponent is
* less than or equal to a biased 0), force the multiplier to 0.0.
*/
mult = spu_and(one, (vector float)spu_cmpgt((vector signed int)exp, 0));
/* Force the multiplier to positive Smax (0x7FFFFFFF) and the biased exponent
* to 127, if the divisor is denorm/zero or the computed biased exponent is
* greater than 255.
*/
overflow = spu_or(spu_cmpeq(exp_b, 0), spu_cmpeq(spu_rlmask(exp, -30), 2));
exp = spu_sel(exp, (vector unsigned int)one, overflow);
mult = spu_or(mult, (vector float)spu_rlmask(overflow, -1));
mult = spu_andc(mult, (vector float)spu_cmpeq(exp_a, 0));
/* Insert the exponent into the result and perform the
* final multiplication.
*/
q2 = spu_sel(q2, (vector float)exp, exp_mask);
q2 = spu_mul(q2, mult);
return (q2);
}
#endif /* _DIVF4_H_ */
#endif /* __SPU__ */

View File

@ -0,0 +1,6 @@
#include "headers/erfd2.h"
static __inline double _erf(double x)
{
return spu_extract(_erfd2(spu_promote(x, 0)), 0);
}

View File

@ -0,0 +1,381 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _ERF_UTILS_H_
#define _ERF_UTILS_H_ 1
#include <spu_intrinsics.h>
/*
* This file contains approximation methods for the erf and erfc functions.
*/
#define SQRT_PI 1.7724538509055160272981674833411451827975494561223871282138077898529113E0
#define INV_SQRT_PI 5.6418958354775628694807945156077258584405062932899885684408572171064247E-1
#define TWO_OVER_SQRT_PI 1.1283791670955125738961589031215451716881012586579977136881714434212849E0
/*
* Coefficients of Taylor Series Expansion of Error Function
*/
#define TAYLOR_ERF_00 1.0000000000000000000000000000000000000000000000000000000000000000000000E0
#define TAYLOR_ERF_01 -3.3333333333333333333333333333333333333333333333333333333333333333333333E-1
#define TAYLOR_ERF_02 1.0000000000000000000000000000000000000000000000000000000000000000000000E-1
#define TAYLOR_ERF_03 -2.3809523809523809523809523809523809523809523809523809523809523809523810E-2
#define TAYLOR_ERF_04 4.6296296296296296296296296296296296296296296296296296296296296296296296E-3
#define TAYLOR_ERF_05 -7.5757575757575757575757575757575757575757575757575757575757575757575758E-4
#define TAYLOR_ERF_06 1.0683760683760683760683760683760683760683760683760683760683760683760684E-4
#define TAYLOR_ERF_07 -1.3227513227513227513227513227513227513227513227513227513227513227513228E-5
#define TAYLOR_ERF_08 1.4589169000933706816059757236227824463118580765639589169000933706816060E-6
#define TAYLOR_ERF_09 -1.4503852223150468764503852223150468764503852223150468764503852223150469E-7
#define TAYLOR_ERF_10 1.3122532963802805072646342487612328882170152011421852691693961535231377E-8
#define TAYLOR_ERF_11 -1.0892221037148573380457438428452921206544394950192051641327003645844226E-9
#define TAYLOR_ERF_12 8.3507027951472395916840361284805729250173694618139062583507027951472396E-11
#define TAYLOR_ERF_13 -5.9477940136376350368119915445018325676761890753660300985403866062302276E-12
#define TAYLOR_ERF_14 3.9554295164585257633971372340283122987009139171153402133150354277885750E-13
#define TAYLOR_ERF_15 -2.4668270102644569277100425760606678852113226579859111007771188689434124E-14
#define TAYLOR_ERF_16 1.4483264643598137264964265124598618265445265605599099265926266086599580E-15
#define TAYLOR_ERF_17 -8.0327350124157736091398445228866286178099792434415172399254921152569101E-17
#define TAYLOR_ERF_18 4.2214072888070882330314498243398198441944335363431396906515348954052831E-18
#define TAYLOR_ERF_19 -2.1078551914421358248605080094544309613386510235451574703658136454790212E-19
#define TAYLOR_ERF_20 1.0025164934907719167019489313258878962464315843690383090764235630936808E-20
#define TAYLOR_ERF_21 -4.5518467589282002862436219473268442686715055325725991884976042178118399E-22
#define TAYLOR_ERF_22 1.9770647538779051748330883205561040762916640191981996475292624380394860E-23
#define TAYLOR_ERF_23 -8.2301492992142213568444934713251326025092396728879726307878639881384709E-25
#define TAYLOR_ERF_24 3.2892603491757517327524761322472893904586246991984244357740612877764297E-26
#define TAYLOR_ERF_25 -1.2641078988989163521950692586675857265291969432213552733563059066748632E-27
#define TAYLOR_ERF_26 4.6784835155184857737263085770716162592880293254201102279514950101899871E-29
#define TAYLOR_ERF_27 -1.6697617934173720269864939702679842541566703989714871520634965356233624E-30
#define TAYLOR_ERF_28 5.7541916439821717721965644338808981189609568886862025916975131240153466E-32
#define TAYLOR_ERF_29 -1.9169428621097825307726719621929350834644917747230482041306735714136456E-33
#define TAYLOR_ERF_30 6.1803075882227961374638057797477142035193997108557291827163792739565622E-35
#define TAYLOR_ERF_31 -1.9303572088151078565555153741147494440075954038003045578376811864380455E-36
#define TAYLOR_ERF_32 5.8467550074688362962979552196744814890614668480489993819122074396921572E-38
#define TAYLOR_ERF_33 -1.7188560628017836239681912676564509126594090688520350964463748691994130E-39
#define TAYLOR_ERF_34 4.9089239645234229670020807729318930583197104694410209489303971115243253E-41
#define TAYLOR_ERF_35 -1.3630412617791395763506783635102640685072837923196396196225247512884444E-42
#define TAYLOR_ERF_36 3.6824935154611457351939940566677606112639706717920248475342183158858278E-44
#define TAYLOR_ERF_37 -9.6872802388707617538436600409638387251268417672366779772972229571050606E-46
#define TAYLOR_ERF_38 2.4830690974549115910398991902675594818336060579041382375163763560590552E-47
#define TAYLOR_ERF_39 -6.2056579196373967059419746072899084745598074150801247740591035188752759E-49
#define TAYLOR_ERF_40 1.5131079495412170980537530678268603996611876104670674603415715370097123E-50
#define TAYLOR_ERF_41 -3.6015793098101259166133998969725445892611283117200253978156713046660799E-52
#define TAYLOR_ERF_42 8.3734196838722815428266720293759440030440798283686864991232694198118944E-54
#define TAYLOR_ERF_43 -1.9025412272898795272394202686366085010926137006451172211319911806576077E-55
#define TAYLOR_ERF_44 4.2267897541935525758383443148974703675959497435169866761614717241371774E-57
#define TAYLOR_ERF_45 -9.1864295023986856959612367283485924961181813717463202485560679718732304E-59
/*
* Taylor Series Expansion of Erf
*
* infinite
* ---------
* - n 2n
* 2 * x - -1 * x
* erf(x) = ---- * - ------------
* sqrt(pi) - (2n + 1) * n!
* -
* ---------
* n = 0
*
* 45 terms give us accurate results for 0 <= x < 2.5
*/
#define TAYLOR_ERF(_xabs, _xsqu, _tresult) { \
_tresult = spu_madd(_xsqu, spu_splats(TAYLOR_ERF_45), spu_splats(TAYLOR_ERF_44)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_43)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_42)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_41)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_40)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_39)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_38)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_37)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_36)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_35)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_34)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_33)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_32)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_31)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_30)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_29)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_28)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_27)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_26)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_25)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_24)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_23)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_22)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_21)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_20)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_19)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_18)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_17)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_16)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_15)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_14)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_13)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_12)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_11)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_10)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_09)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_08)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_07)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_06)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_05)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_04)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_03)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_02)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_01)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats(TAYLOR_ERF_00)); \
_tresult = spu_mul(_tresult, _xabs); \
_tresult = spu_mul(_tresult, spu_splats(TWO_OVER_SQRT_PI)); \
}
#define TAYLOR_ERFF4(_xabs, _xsqu, _tresult) { \
_tresult = spu_madd(_xsqu, spu_splats((float)TAYLOR_ERF_45), spu_splats((float)TAYLOR_ERF_44)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_43)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_42)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_41)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_40)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_39)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_38)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_37)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_36)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_35)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_34)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_33)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_32)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_31)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_30)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_29)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_28)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_27)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_26)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_25)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_24)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_23)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_22)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_21)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_20)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_19)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_18)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_17)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_16)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_15)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_14)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_13)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_12)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_11)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_10)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_09)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_08)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_07)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_06)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_05)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_04)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_03)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_02)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_01)); \
_tresult = spu_madd(_tresult, _xsqu, spu_splats((float)TAYLOR_ERF_00)); \
_tresult = spu_mul(_tresult, _xabs); \
_tresult = spu_mul(_tresult, spu_splats((float)TWO_OVER_SQRT_PI)); \
}
/*
* Continued Fractions Approximation of Erfc()
* ( )
* 1 ( 1 v 2v 3v )
* erfc(x) = ------------------------- * ( --- --- --- --- ... )
* sqrt(pi) * x * exp(x^2) ( 1+ 1+ 1+ 1+ )
* ( )
* Continued Fractions
* 1
* v = -----
* 2*x^2
*
* We are using a backward recurrence calculation to estimate the continued fraction.
*
* p = a p + b q
* m,n m m+1,n m m+1,n
*
* q = p
* m,n m+1,n
*
* With,
*
* p = a ; q = 1
* n,n n n,n
*
*
* a = 0, b = 1,
* 0 0
*
* a = 1, b = n/2x^2
* n n
*
*
* F = p / q
* 0,n 0,n 0,n
*
* Ref: "Computing the Incomplete Gamma Function to Arbitrary Precision",
* by Serge Winitzki, Department of Physics, Ludwig-Maximilians University, Munich, Germany.
*
*/
#define CONTFRAC_ERFCF4(_xabs, _xsqu, _presult) { \
vec_float4 v; \
vec_float4 p, q, plast, qlast; \
vec_float4 factor; \
vec_float4 inv_xsqu; \
inv_xsqu = _recipf4(_xsqu); \
v = spu_mul(inv_xsqu, onehalff); \
p = spu_splats(3.025f); q = onef; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(40.0f)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(39.0f)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(38.0f)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(37.0f)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(36.0f)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(35.0f)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(34.0f)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(33.0f)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(32.0f)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(31.0f)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(30.0f)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(29.0f)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(28.0f)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(27.0f)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(26.0f)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(25.0f)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(24.0f)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(23.0f)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(22.0f)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(21.0f)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(20.0f)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(19.0f)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(18.0f)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(17.0f)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(16.0f)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(15.0f)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(14.0f)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(13.0f)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(12.0f)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(11.0f)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(10.0f)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats( 9.0f)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats( 8.0f)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats( 7.0f)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats( 6.0f)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats( 5.0f)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats( 4.0f)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats( 3.0f)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats( 2.0f)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats( 1.0f)), plast); q = plast; plast = p; qlast = q; \
p = qlast; q = plast; \
factor = spu_mul(spu_splats((float)SQRT_PI), spu_mul(_xabs, _expf4(_xsqu))); \
_presult = _divf4(p, spu_mul(factor, q)); \
}
#define CONTFRAC_ERFC(_xabs, _xsqu, _presult) { \
vec_double2 v; \
vec_double2 p, q, plast, qlast; \
vec_double2 factor; \
vec_double2 inv_xsqu; \
inv_xsqu = _recipd2(_xsqu); \
v = spu_mul(inv_xsqu, onehalfd); \
p = spu_splats(3.025); q = oned; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(40.0)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(39.0)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(38.0)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(37.0)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(36.0)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(35.0)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(34.0)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(33.0)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(32.0)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(31.0)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(30.0)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(29.0)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(28.0)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(27.0)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(26.0)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(25.0)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(24.0)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(23.0)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(22.0)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(21.0)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(20.0)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(19.0)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(18.0)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(17.0)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(16.0)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(15.0)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(14.0)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(13.0)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(12.0)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(11.0)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats(10.0)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats( 9.0)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats( 8.0)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats( 7.0)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats( 6.0)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats( 5.0)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats( 4.0)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats( 3.0)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats( 2.0)), plast); q = plast; plast = p; qlast = q; \
p = spu_madd(qlast, spu_mul(v, spu_splats( 1.0)), plast); q = plast; plast = p; qlast = q; \
p = qlast; q = plast; \
factor = spu_mul(spu_splats(SQRT_PI), spu_mul(_xabs, _expd2(_xsqu))); \
_presult = _divd2(p, spu_mul(factor, q)); \
}
#endif /* _ERF_UTILS_H_ */
#endif /* __SPU__ */

View File

@ -0,0 +1,6 @@
#include "headers/erfcd2.h"
static __inline double _erfc(double x)
{
return spu_extract(_erfcd2(spu_promote(x, 0)), 0);
}

View File

@ -0,0 +1,150 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _ERFCD2_H_
#define _ERFCD2_H_ 1
#include <spu_intrinsics.h>
#include "expd2.h"
#include "recipd2.h"
#include "divd2.h"
#include "erf_utils.h"
/*
* FUNCTION
* vector double _erfcd2(vector double x)
*
* DESCRIPTION
* The erfcd2 function computes the complement error function of each element of x.
*
* Accuracy Note: We would benefit from a rational approximation in the domain
* 1.2 < x < 2.0 and also around x = 2.5.
*
* C99 Special Cases:
* - erfc(+0) returns +1
* - erfc(-0) returns +1
* - erfc(+infinite) returns +0
* - erfc(-infinite) returns +2
*
* Other Cases:
* - erfc(Nan) returns Nan
*
*/
static __inline vector double _erfcd2(vector double x)
{
vec_uchar16 dup_even = ((vec_uchar16) { 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 });
vec_double2 onehalfd = spu_splats(0.5);
vec_double2 zerod = spu_splats(0.0);
vec_double2 oned = spu_splats(1.0);
vec_double2 twod = spu_splats(2.0);
vec_double2 sign_mask = spu_splats(-0.0);
/* This is where we switch from near zero approx. */
vec_float4 approx_point = spu_splats(1.71f);
vec_double2 xabs, xsqu, xsign;
vec_uint4 xhigh, xabshigh;
vec_uint4 isnan, isneg;
vec_double2 tresult, presult, result;
xsign = spu_and(x, sign_mask);
/* Force Denorms to 0 */
x = spu_add(x, zerod);
xabs = spu_andc(x, sign_mask);
xsqu = spu_mul(x, x);
/*
* Use Taylor Series for x near 0
* Preserve sign of x in result, since erf(-x) = -erf(x)
* This approximation is for erf, so adjust for erfc.
*/
TAYLOR_ERF(xabs, xsqu, tresult);
tresult = spu_or(tresult, xsign);
tresult = spu_sub(oned, tresult);
/*
* Now, use the Continued Fractions approximation away
* from 0. If x < 0, use erfc(-x) = 2 - erfc(x)
*/
CONTFRAC_ERFC(xabs, xsqu, presult);
isneg = (vec_uint4)spu_shuffle(x, x, dup_even);
isneg = spu_rlmaska(isneg, -32);
presult = spu_sel(presult, spu_sub(twod, presult), (vec_ullong2)isneg);
/*
* Select the appropriate approximation.
*/
vec_float4 xf = spu_roundtf(xabs);
xf = spu_shuffle(xf, xf, dup_even);
result = spu_sel(tresult, presult, (vec_ullong2)spu_cmpgt(xf, approx_point));
/*
* Special cases/errors.
*/
xhigh = (vec_uint4)spu_shuffle(x, x, dup_even);
xabshigh = (vec_uint4)spu_shuffle(xabs, xabs, dup_even);
/* x = +/- infinite */
result = spu_sel(result, zerod, (vec_ullong2)spu_cmpeq(xhigh, 0x7FF00000));
result = spu_sel(result, twod, (vec_ullong2)spu_cmpeq(xhigh, 0xFFF00000));
/* x = nan, return x */
isnan = spu_cmpgt(xabshigh, 0x7FF00000);
result = spu_sel(result, x, (vec_ullong2)isnan);
return result;
}
#endif /* _ERFCD2_H_ */
#endif /* __SPU__ */

View File

@ -0,0 +1,6 @@
#include "headers/erfcf4.h"
static __inline float _erfcf(float x)
{
return spu_extract(_erfcf4(spu_promote(x, 0)), 0);
}

View File

@ -0,0 +1,133 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _ERFCF4_H_
#define _ERFCF4_H_ 1
#include <spu_intrinsics.h>
#include "expf4.h"
#include "recipf4.h"
#include "divf4.h"
#include "erf_utils.h"
/*
* FUNCTION
* vector float _erfcf4(vector float x)
*
* DESCRIPTION
* The erfcf4 function computes the complement error function of each element of x.
*
* C99 Special Cases:
* - erfc(+0) returns +1
* - erfc(-0) returns +1
* - erfc(+infinite) returns +0
* - erfc(-infinite) returns +2
*
*/
static __inline vector float _erfcf4(vector float x)
{
vec_float4 onehalff = spu_splats(0.5f);
vec_float4 zerof = spu_splats(0.0f);
vec_float4 onef = spu_splats(1.0f);
vec_float4 twof = spu_splats(2.0f);
vec_float4 sign_mask = spu_splats(-0.0f);
/* This is where we switch from near zero approx. */
vec_float4 approx_point = spu_splats(0.89f);
vec_float4 xabs, xsqu, xsign;
vec_uint4 isneg;
vec_float4 tresult, presult, result;
xsign = spu_and(x, sign_mask);
/* Force Denorms to 0 */
x = spu_add(x, zerof);
xabs = spu_andc(x, sign_mask);
xsqu = spu_mul(x, x);
/*
* Use Taylor Series for x near 0
* Preserve sign of x in result, since erf(-x) = -erf(x)
* This approximation is for erf, so adjust for erfc.
*/
TAYLOR_ERFF4(xabs, xsqu, tresult);
tresult = spu_or(tresult, xsign);
tresult = spu_sub(onef, tresult);
/*
* Now, use the Continued Fractions approximation away
* from 0. If x < 0, use erfc(-x) = 2 - erfc(x)
*/
CONTFRAC_ERFCF4(xabs, xsqu, presult);
isneg = spu_rlmaska((vec_uint4)x, -32);
presult = spu_sel(presult, spu_sub(twof, presult), isneg);
/*
* Select the appropriate approximation.
*/
result = spu_sel(tresult, presult, spu_cmpgt(xabs, approx_point));
/*
* Special cases/errors.
*/
/* x = +/- infinite */
result = spu_sel(result, zerof, spu_cmpeq((vec_uint4)xabs, 0x7F800000));
result = spu_sel(result, twof, spu_cmpeq((vec_uint4)xabs, 0xFF800000));
return result;
}
#endif /* _ERFCF4_H_ */
#endif /* __SPU__ */

View File

@ -0,0 +1,146 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _ERFD2_H_
#define _ERFD2_H_ 1
#include <spu_intrinsics.h>
#include "expd2.h"
#include "recipd2.h"
#include "divd2.h"
#include "erf_utils.h"
/*
* FUNCTION
* vector double _erfd2(vector double x)
*
* DESCRIPTION
* The erfd2 function computes the error function of each element of x.
*
* C99 Special Cases:
* - erf(+0) returns +0
* - erf(-0) returns -0
* - erf(+infinite) returns +1
* - erf(-infinite) returns -1
*
* Other Cases:
* - erf(Nan) returns Nan
*
*/
static __inline vector double _erfd2(vector double x)
{
vec_uchar16 dup_even = ((vec_uchar16) { 0,1,2,3, 0,1,2,3, 8, 9,10,11, 8, 9,10,11 });
vec_double2 onehalfd = spu_splats(0.5);
vec_double2 zerod = spu_splats(0.0);
vec_double2 oned = spu_splats(1.0);
vec_double2 sign_mask = spu_splats(-0.0);
/* This is where we switch from Taylor Series to Continued Fraction approximation */
vec_float4 approx_point = spu_splats(1.77f);
vec_double2 xabs, xsqu, xsign;
vec_uint4 xabshigh;
vec_uint4 isinf, isnan;
vec_double2 tresult, presult, result;
xsign = spu_and(x, sign_mask);
/* Force Denorms to 0 */
x = spu_add(x, zerod);
xabs = spu_andc(x, sign_mask);
xsqu = spu_mul(x, x);
/*
* Taylor Series Expansion near Zero
*/
TAYLOR_ERF(xabs, xsqu, tresult);
/*
* Continued Fraction Approximation of Erfc().
* erf = 1 - erfc
*/
CONTFRAC_ERFC(xabs, xsqu, presult);
presult = spu_sub(oned, presult);
/*
* Select the appropriate approximation.
*/
vec_float4 xf = spu_roundtf(xabs);
xf = spu_shuffle(xf, xf, dup_even);
result = spu_sel(tresult, presult, (vec_ullong2)spu_cmpgt(xf, approx_point));
/*
* Special cases/errors.
*/
xabshigh = (vec_uint4)spu_shuffle(xabs, xabs, dup_even);
/* x = +/- infinite, return +/-1 */
isinf = spu_cmpeq(xabshigh, 0x7FF00000);
result = spu_sel(result, oned, (vec_ullong2)isinf);
/* x = nan, return x */
isnan = spu_cmpgt(xabshigh, 0x7FF00000);
result = spu_sel(result, x, (vec_ullong2)isnan);
/*
* Preserve sign in result, since erf(-x) = -erf(x)
*/
result = spu_or(result, xsign);
return result;
}
#endif /* _ERFD2_H_ */
#endif /* __SPU__ */

View File

@ -0,0 +1,6 @@
#include "headers/erff4.h"
static __inline float _erff(float x)
{
return spu_extract(_erff4(spu_promote(x, 0)), 0);
}

View File

@ -0,0 +1,132 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _ERFF4_H_
#define _ERFF4_H_ 1
#include <spu_intrinsics.h>
#include "expf4.h"
#include "recipf4.h"
#include "divf4.h"
#include "erf_utils.h"
/*
* FUNCTION
* vector float _erff4(vector float x)
*
* DESCRIPTION
* The erff4 function computes the error function of each element of x.
*
* C99 Special Cases:
* - erf(+0) returns +0
* - erf(-0) returns -0
* - erf(+infinite) returns +1
* - erf(-infinite) returns -1
*
*/
static __inline vector float _erff4(vector float x)
{
vector float onehalff = spu_splats(0.5f);
vector float zerof = spu_splats(0.0f);
vector float onef = spu_splats(1.0f);
vector float sign_mask = spu_splats(-0.0f);
/* This is where we switch from Taylor Series to Continued Fraction approximation */
vec_float4 approx_point = spu_splats(0.89f);
vec_float4 xabs, xsqu, xsign;
vec_uint4 isinf;
vec_float4 tresult, presult, result;
xsign = spu_and(x, sign_mask);
/* Force Denorms to 0 */
x = spu_add(x, zerof);
xabs = spu_andc(x, sign_mask);
xsqu = spu_mul(x, x);
/*
* Taylor Series Expansion near Zero
*/
TAYLOR_ERFF4(xabs, xsqu, tresult);
/*
* Continued Fraction Approximation of Erfc().
* erf = 1 - erfc
*/
CONTFRAC_ERFCF4(xabs, xsqu, presult);
presult = spu_sub(onef, presult);
/*
* Select the appropriate approximation.
*/
result = spu_sel(tresult, presult, spu_cmpgt(xabs, approx_point));
/*
* Special cases/errors.
*/
/* x = +/- infinite, return +/-1 */
isinf = spu_cmpeq((vec_uint4)xabs, 0x7F800000);
result = spu_sel(result, onef, isinf);
/*
* Preserve sign in result, since erf(-x) = -erf(x)
*/
result = spu_or(result, xsign);
return result;
}
#endif /* _ERFF4_H_ */
#endif /* __SPU__ */

View File

@ -0,0 +1,6 @@
#include "headers/expd2.h"
static __inline double _exp(double x)
{
return spu_extract(_expd2(spu_promote(x, 0)), 0);
}

View File

@ -0,0 +1,6 @@
#include "headers/exp2d2.h"
static __inline double _exp2(double vx)
{
return spu_extract(_exp2d2(spu_promote(vx, 0)), 0);
}

View File

@ -0,0 +1,164 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation, */
/* Sony Computer Entertainment, Incorporated, */
/* Toshiba Corporation, */
/* */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _EXP2D2_H_
#define _EXP2D2_H_ 1
#include <spu_intrinsics.h>
/*
* FUNCTION
* vector double _exp2d2(vector double x)
*
* DESCRIPTION
* _exp2d2 computes 2 raised to the input x for each
* of the double word elements of x. Computation is
* performed by observing the 2^(a+b) = 2^a * 2^b.
* We decompose x into a and b (above) by letting.
* a = ceil(x), b = x - a;
*
* 2^a is easily computed by placing a into the exponent
* or a floating point number whose mantissa is all zeros.
*
* 2^b is computed using the polynomial approximation.
*
* __13_
* \
* \
* 2^x = / Ci*x^i
* /____
* i=0
*
* for x in the range 0.0 to 1.0.
*
*/
#define EXP_C00 1.0
#define EXP_C01 6.93147180559945286227e-01
#define EXP_C02 2.40226506959100694072e-01
#define EXP_C03 5.55041086648215761801e-02
#define EXP_C04 9.61812910762847687873e-03
#define EXP_C05 1.33335581464284411157e-03
#define EXP_C06 1.54035303933816060656e-04
#define EXP_C07 1.52527338040598376946e-05
#define EXP_C08 1.32154867901443052734e-06
#define EXP_C09 1.01780860092396959520e-07
#define EXP_C10 7.05491162080112087744e-09
#define EXP_C11 4.44553827187081007394e-10
#define EXP_C12 2.56784359934881958182e-11
#define EXP_C13 1.36914888539041240648e-12
static __inline vector double _exp2d2(vector double vx)
{
vec_int4 ix, exp;
vec_uint4 overflow, underflow;
vec_float4 vxf;
vec_double2 p1, p2, x2, x4, x8;
vec_double2 vy, vxw, out_of_range;
/* Compute: vxw = x - ceil(x)
*/
vxw = spu_add(vx, spu_splats(0.5));
vxf = spu_roundtf(vxw);
ix = spu_convts(vxf, 0);
ix = spu_add(ix, (vec_int4)spu_andc(spu_cmpgt(spu_splats(0.0f), vxf), spu_cmpeq(ix, spu_splats((int)0x80000000))));
vxf = spu_convtf(ix, 0);
vxw = spu_sub(vx, spu_extend(vxf));
/* Detect overflow and underflow. If overflow, force the result
* to infinity (at the end).
*/
exp = spu_shuffle(ix, ix, ((vec_uchar16) { 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 }));
overflow = spu_cmpgt(exp, 1023);
underflow = spu_cmpgt(exp, -1023);
out_of_range = (vec_double2)spu_and(overflow, ((vec_uint4) { 0x7FF00000, 0, 0x7FF00000, 0 }));
/* Calculate the result by evaluating the 13th order polynomial.
* For efficiency, the polynomial is broken into two parts and
* evaluate then using nested
*
* result = (((((c13*x + c12)*x + c11)*x + c10)*x + c9)*x + c8)*x^8 +
* ((((((c7*x + c6)*x + c5)*x + c4)*x + c3)*x + c2)*x + c1)*x + c0
*/
p2 = spu_madd(spu_splats(EXP_C07), vxw, spu_splats(EXP_C06));
p1 = spu_madd(spu_splats(EXP_C13), vxw, spu_splats(EXP_C12));
x2 = spu_mul(vxw, vxw);
p2 = spu_madd(vxw, p2, spu_splats(EXP_C05));
p1 = spu_madd(vxw, p1, spu_splats(EXP_C11));
x4 = spu_mul(x2, x2);
p2 = spu_madd(vxw, p2, spu_splats(EXP_C04));
p1 = spu_madd(vxw, p1, spu_splats(EXP_C10));
p2 = spu_madd(vxw, p2, spu_splats(EXP_C03));
p1 = spu_madd(vxw, p1, spu_splats(EXP_C09));
x8 = spu_mul(x4, x4);
p2 = spu_madd(vxw, p2, spu_splats(EXP_C02));
p1 = spu_madd(vxw, p1, spu_splats(EXP_C08));
p2 = spu_madd(vxw, p2, spu_splats(EXP_C01));
p2 = spu_madd(vxw, p2, spu_splats(EXP_C00));
vy = spu_madd(x8, p1, p2);
/* Align the integer integer portion of x with the exponent.
*/
ix = spu_sl(ix, ((vec_uint4) { 20, 32, 20, 32 }));
vy = (vec_double2)spu_add((vec_int4)vy, ix);
/* Select the result if not overflow or underflow. Otherwise select the
* the out of range value.
*/
return (spu_sel(vy, out_of_range, (vec_ullong2)spu_orc(overflow, underflow)));
}
#endif /* _EXP2D2_H_ */
#endif /* __SPU__ */

View File

@ -0,0 +1,146 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation, */
/* Sony Computer Entertainment, Incorporated, */
/* Toshiba Corporation, */
/* */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _EXP2F4_H_
#define _EXP2F4_H_ 1
#include <spu_intrinsics.h>
#include "simdmath.h"
/*
* FUNCTION
* vector float _exp2f4(vector float x)
*
* DESCRIPTION
* The _exp2f4 function computes 2 raised to the input vector x.
* Computation is performed by observing the 2^(a+b) = 2^a * 2^b.
* We decompose x into a and b (above) by letting.
* a = ceil(x), b = x - a;
*
* 2^a is easilty computed by placing a into the exponent
* or a floating point number whose mantissa is all zeros.
*
* 2^b is computed using the following polynomial approximation.
* (C. Hastings, Jr, 1955).
*
* __7__
* \
* \
* 2^(-x) = / Ci*x^i
* /____
* i=1
*
* for x in the range 0.0 to 1.0
*
* C0 = 1.0
* C1 = -0.9999999995
* C2 = 0.4999999206
* C3 = -0.1666653019
* C4 = 0.0416573475
* C5 = -0.0083013598
* C6 = 0.0013298820
* C7 = -0.0001413161
*
*/
static __inline vector float _exp2f4(vector float x)
{
vector signed int ix;
vector unsigned int overflow, underflow;
vector float frac, frac2, frac4;
vector float exp_int, exp_frac;
vector float result;
vector float hi, lo;
vector float bias;
/* Break in the input x into two parts ceil(x), x - ceil(x).
*/
bias = (vector float)(spu_rlmaska((vector signed int)(x), -31));
bias = (vector float)(spu_andc(spu_splats((unsigned int)0x3F7FFFFF), (vector unsigned int)bias));
ix = spu_convts(spu_add(x, bias), 0);
frac = spu_sub(spu_convtf(ix, 0), x);
frac = spu_mul(frac, spu_splats((float)SM_LN2));
overflow = spu_rlmask(spu_cmpgt(ix, 128), -1);
underflow = spu_cmpgt(ix, -128);
exp_int = (vector float)spu_and((vector unsigned int)spu_sl(spu_add(ix, 127), 23), underflow);
/* Instruction counts can be reduced if the polynomial was
* computed entirely from nested (dependent) fma's. However,
* to reduce the number of pipeline stalls, the polygon is evaluated
* in two halves (hi amd lo).
*/
frac2 = spu_mul(frac, frac);
frac4 = spu_mul(frac2, frac2);
hi = spu_madd(frac, spu_splats(-0.0001413161f), spu_splats(0.0013298820f));
hi = spu_madd(frac, hi, spu_splats(-0.0083013598f));
hi = spu_madd(frac, hi, spu_splats(0.0416573475f));
lo = spu_madd(frac, spu_splats(-0.1666653019f), spu_splats(0.4999999206f));
lo = spu_madd(frac, lo, spu_splats(-0.9999999995f));
lo = spu_madd(frac, lo, spu_splats(1.0f));
exp_frac = spu_madd(frac4, hi, lo);
ix = spu_add(ix, spu_rlmask((vector signed int)(exp_frac), -23));
result = spu_mul(exp_frac, exp_int);
/* Handle overflow */
result = spu_or(result, (vector float)overflow);
return (result);
}
#endif /* _EXP2F4_H_ */
#endif /* __SPU__ */

View File

@ -0,0 +1,165 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation, */
/* Sony Computer Entertainment, Incorporated, */
/* Toshiba Corporation, */
/* */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _EXPD2_H_
#define _EXPD2_H_ 1
#include <spu_intrinsics.h>
#include "floord2.h"
#include "ldexpd2.h"
#define LOG2E 1.4426950408889634073599 // 1/log(2)
/*
* FUNCTION
* vector double _expd2(vector double x)
*
* DESCRIPTION
* _expd2 computes e raised to the input x for
* each of the element of the double word vector.
*
* Calculation is performed by reducing the input argument
* to within a managable range, and then computing the power
* series to the 11th degree.
*
* Range reduction is performed using the property:
*
* exp(x) = 2^n * exp(r)
*
* Values for "n" and "r" are determined such that:
*
* x = n * ln(2) + r, |r| <= ln(2)/2
*
* n = floor( (x/ln(2)) + 1/2 )
* r = x - (n * ln(2))
*
* To enhance the precision for "r", computation is performed
* using a two part representation of ln(2).
*
* Once the input is reduced, the power series is computed:
*
* __12_
* \
* exp(x) = 1 + \ (x^i)/i!
* /
* /____
* i=2
*
* The resulting value is scaled by 2^n and returned.
*
*/
static __inline vector double _expd2(vector double x)
{
vec_uchar16 even2odd = ((vec_uchar16){0x80, 0x80, 0x80, 0x80, 0, 1, 2, 3,
0x80, 0x80, 0x80, 0x80, 8, 9, 10, 11});
// log(2) in extended machine representable precision
vec_double2 ln2_hi = spu_splats(6.9314575195312500E-1); // 3FE62E4000000000
vec_double2 ln2_lo = spu_splats(1.4286068203094172E-6); // 3EB7F7D1CF79ABCA
// coefficients for the power series
vec_double2 f02 = spu_splats(5.00000000000000000000E-1); // 1/(2!)
vec_double2 f03 = spu_splats(1.66666666666666666667E-1); // 1/(3!)
vec_double2 f04 = spu_splats(4.16666666666666666667E-2); // 1/(4!)
vec_double2 f05 = spu_splats(8.33333333333333333333E-3); // 1/(5!)
vec_double2 f06 = spu_splats(1.38888888888888888889E-3); // 1/(6!)
vec_double2 f07 = spu_splats(1.98412698412698412698E-4); // 1/(7!)
vec_double2 f08 = spu_splats(2.48015873015873015873E-5); // 1/(8!)
vec_double2 f09 = spu_splats(2.75573192239858906526E-6); // 1/(9!)
vec_double2 f10 = spu_splats(2.75573192239858906526E-7); // 1/(10!)
vec_double2 f11 = spu_splats(2.50521083854417187751E-8); // 1/(11!)
vec_double2 f12 = spu_splats(2.08767569878680989792E-9); // 1/(12!)
// rx = floor(1/2 + x/log(2))
vec_double2 rx = _floord2(spu_madd(x,spu_splats(LOG2E),spu_splats(0.5)));
// extract the exponent of reduction
vec_int4 nint = spu_convts(spu_roundtf(rx),0);
vec_llong2 n = spu_extend(spu_shuffle(nint, nint, even2odd));
// reduce the input to within [ -ln(2)/2 ... ln(2)/2 ]
vec_double2 r;
r = spu_nmsub(rx,ln2_hi,x);
r = spu_nmsub(rx,ln2_lo,r);
vec_double2 result;
vec_double2 r2 = spu_mul(r,r);
// Use Horner's method on the power series
result = spu_madd(r,f12,f11);
result = spu_madd(result,r,f10);
result = spu_madd(result,r,f09);
result = spu_madd(result,r,f08);
result = spu_madd(result,r,f07);
result = spu_madd(result,r,f06);
result = spu_madd(result,r,f05);
result = spu_madd(result,r,f04);
result = spu_madd(result,r,f03);
result = spu_madd(result,r,f02);
result = spu_madd(result,r2,r);
result = spu_add(result,spu_splats(1.0));
// Scale the result
result = _ldexpd2(result, n);
return result;
}
#endif /* _EXPD2_H_ */
#endif /* __SPU__ */

View File

@ -0,0 +1,124 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation, */
/* Sony Computer Entertainment, Incorporated, */
/* Toshiba Corporation, */
/* */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _EXPF4_H_
#define _EXPF4_H_ 1
#include "floorf4.h"
#include "ldexpf4.h"
/*
* FUNCTION
* vector float _expf4(vector float x)
*
* DESCRIPTION
* The _expf4 function computes e raised to the input x for
* each of the element of the float vector.
*
*/
static __inline vector float _expf4(vector float x)
{
// log2(e)
vec_float4 log2e = spu_splats(1.4426950408889634074f);
// Extra precision for the ln2 multiply
vec_float4 ln2_hi = spu_splats(0.693359375f);
vec_float4 ln2_lo = spu_splats(-2.12194440E-4f);
// Coefficents for the Taylor series
vec_float4 f02 = spu_splats(5.0000000000000000E-1f); // 1/2!
vec_float4 f03 = spu_splats(1.6666666666666667E-1f); // 1/3!
vec_float4 f04 = spu_splats(4.1666666666666667E-2f); // 1/4!
vec_float4 f05 = spu_splats(8.3333333333333333E-3f); // 1/5!
vec_float4 f06 = spu_splats(1.3888888888888889E-3f); // 1/6!
vec_float4 f07 = spu_splats(1.9841269841269841E-4f); // 1/7!
// Range reduce input, so that:
// e^x = e^z * 2^n
// e^x = e^z * e^(n * ln(2))
// e^x = e^(z + (n * ln(2)))
vec_int4 n; // exponent of reduction
vec_float4 q; // range reduced result
vec_float4 z;
vec_float4 r;
z = spu_madd(x,log2e,spu_splats(0.5f));
z = _floorf4(z);
r = spu_nmsub(z,ln2_hi,x);
r = spu_nmsub(z,ln2_lo,r);
n = spu_convts(z,0);
z = spu_mul(r,r);
// Use Horner's method on the Taylor series
q = spu_madd(r,f07,f06);
q = spu_madd(q,r,f05);
q = spu_madd(q,r,f04);
q = spu_madd(q,r,f03);
q = spu_madd(q,r,f02);
q = spu_madd(q,z,r);
q = spu_add(q,spu_splats(1.0f));
// Adjust the result by the range reduction
r = _ldexpf4( q, n );
return(r);
}
#endif /* _EXPF4_H_ */
#endif /* __SPU__ */

View File

@ -0,0 +1,6 @@
#include "headers/expm1d2.h"
static __inline double _expm1(double x)
{
return spu_extract(_expm1d2(spu_promote(x, 0)), 0);
}

View File

@ -0,0 +1,147 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _EXPM1D2_H_
#define _EXPM1D2_H_ 1
#include <spu_intrinsics.h>
#include "expd2.h"
#include "divd2.h"
#define EXPM1_P0 0.0000000000000000000000000e+00
#define EXPM1_P1 1.0000000000000000000000000e+00
#define EXPM1_P2 9.7234232565378004697204117e-04
#define EXPM1_P3 3.3328278237299953368211192e-02
#define EXPM1_P4 3.1156225044634678993365345e-05
#define EXPM1_P5 2.1352206553343212164751408e-04
#define EXPM1_P6 1.6975135794626144795757452e-07
#define EXPM1_P7 2.7686287801334994383131629e-07
#define EXPM1_P8 1.1186114936216450015354379e-10
#define EXPM1_Q0 1.0000000000000000000000000e+00
#define EXPM1_Q1 -4.9902765767434620336473472e-01
#define EXPM1_Q2 1.1617544040780639069687652e-01
#define EXPM1_Q3 -1.6551954366467523660499950e-02
#define EXPM1_Q4 1.5864115838972218334307351e-03
#define EXPM1_Q5 -1.0534540477401370666288988e-04
#define EXPM1_Q6 4.7650003993592160620959043e-06
#define EXPM1_Q7 -1.3529198871087017840776265e-07
#define EXPM1_Q8 1.8635779407675460757658020e-09
/*
* FUNCTION
* vector double _expm1d2(vector double x)
*
* DESCRIPTION
* _expm1d2 computes the exponential - 1 for each element
* of the input vector x.
*
* This function is intended to return accurate values, even
* where exp(x) - 1 would normally produce bad results due to
* floating-point cancellation errors.
*
*/
static __inline vector double _expm1d2(vector double x)
{
vector double oned = spu_splats(1.0);
vector double range = spu_splats(1.0625);
vector unsigned long long use_exp;
vector double pr, qr;
vector double eresult;
vector double rresult;
vector double result;
/* Compiler Bug. Replace xbug with x when spu_cmp*() doesn't
* modify it's arguments! */
volatile vector double xbug = x;
use_exp = spu_cmpabsgt(xbug, range);
/*
* Calculate directly using exp(x) - 1
*/
eresult = spu_sub(_expd2(x), oned);
/*
* For x in [-1.0625,1.0625], use a rational approximation.
* The madd's are interleaved to reduce dependency stalls. Looks
* like gcc is smart enough to do this on it's own... but why
* take the chance.
*/
pr = spu_madd(x, spu_splats(EXPM1_P8), spu_splats(EXPM1_P7));
qr = spu_madd(x, spu_splats(EXPM1_Q8), spu_splats(EXPM1_Q7));
pr = spu_madd(pr, x, spu_splats(EXPM1_P6));
qr = spu_madd(qr, x, spu_splats(EXPM1_Q6));
pr = spu_madd(pr, x, spu_splats(EXPM1_P5));
qr = spu_madd(qr, x, spu_splats(EXPM1_Q5));
pr = spu_madd(pr, x, spu_splats(EXPM1_P4));
qr = spu_madd(qr, x, spu_splats(EXPM1_Q4));
pr = spu_madd(pr, x, spu_splats(EXPM1_P3));
qr = spu_madd(qr, x, spu_splats(EXPM1_Q3));
pr = spu_madd(pr, x, spu_splats(EXPM1_P2));
qr = spu_madd(qr, x, spu_splats(EXPM1_Q2));
pr = spu_madd(pr, x, spu_splats(EXPM1_P1));
qr = spu_madd(qr, x, spu_splats(EXPM1_Q1));
pr = spu_madd(pr, x, spu_splats(EXPM1_P0));
qr = spu_madd(qr, x, spu_splats(EXPM1_Q0));
rresult = _divd2(pr, qr);
/*
* Select either direct calculation or rational approximation.
*/
result = spu_sel(rresult, eresult, use_exp);
return result;
}
#endif /* _EXPM1D2_H_ */
#endif /* __SPU__ */

View File

@ -0,0 +1,6 @@
#include "headers/expm1f4.h"
static __inline float _expm1f(float vx)
{
return spu_extract(_expm1f4(spu_promote(vx, 0)), 0);
}

View File

@ -0,0 +1,130 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _EXPM1F4_H_
#define _EXPM1F4_H_ 1
#include <spu_intrinsics.h>
#include "expf4.h"
#include "divf4.h"
#define EXPM1F4_P0 0.0000000000000000000000000e-00
#define EXPM1F4_P1 9.9999999999999988897769754e-01
#define EXPM1F4_P2 -6.5597409827762467697531701e-04
#define EXPM1F4_P3 2.3800889637330315679042414e-02
#define EXPM1F4_P4 -1.0914929910143700584950963e-05
#define EXPM1F4_Q0 1.0000000000000000000000000e-00
#define EXPM1F4_Q1 -5.0065597410018825019761834e-01
#define EXPM1F4_Q2 1.0746220997195164714721471e-01
#define EXPM1F4_Q3 -1.1966024153043854083566799e-02
#define EXPM1F4_Q4 5.9997727954467768105711878e-04
/*
* FUNCTION
* vector float _expm1f4(vector float x)
*
* _expm1d2 computes the exponential - 1 for each element
* of the input vector x.
*
* This function is intended to return accurate values, even
* where exp(x) - 1 would normally produce bad results due to
* floating-point cancellation errors.
*
*/
static __inline vector float _expm1f4(vector float x)
{
vector float onef = spu_splats(1.0f);
vector float rangelo = spu_splats(-0.4f);
vector float rangehi = spu_splats(0.35f);
vector unsigned int use_exp;
vector float pr, qr;
vector float eresult;
vector float rresult;
vector float result;
use_exp = spu_or(spu_cmpgt(x, rangehi), spu_cmpgt(rangelo, x));
/*
* Calculate directly using exp(x) - 1
*/
eresult = spu_sub(_expf4(x), onef);
/*
* For x in [-0.5,0.5], use a rational approximation.
* The madd's are interleaved to reduce dependency stalls. Looks
* like gcc is smart enough to do this on it's own... but why
* take the chance.
*/
pr = spu_madd(x, spu_splats((float)EXPM1F4_P4), spu_splats((float)EXPM1F4_P3));
qr = spu_madd(x, spu_splats((float)EXPM1F4_Q4), spu_splats((float)EXPM1F4_Q3));
pr = spu_madd(pr, x, spu_splats((float)EXPM1F4_P2));
qr = spu_madd(qr, x, spu_splats((float)EXPM1F4_Q2));
pr = spu_madd(pr, x, spu_splats((float)EXPM1F4_P1));
qr = spu_madd(qr, x, spu_splats((float)EXPM1F4_Q1));
pr = spu_madd(pr, x, spu_splats((float)EXPM1F4_P0));
qr = spu_madd(qr, x, spu_splats((float)EXPM1F4_Q0));
rresult = _divf4(pr, qr);
/*
* Select either direct calculation or rational approximation.
*/
result = spu_sel(rresult, eresult, use_exp);
return result;
}
#endif /* _EXPM1F4_H_ */
#endif /* __SPU__ */

View File

@ -0,0 +1,122 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation, */
/* Sony Computer Entertainment, Incorporated, */
/* Toshiba Corporation, */
/* */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _FLOORD2_H_
#define _FLOORD2_H_ 1
#include <spu_intrinsics.h>
/*
* FUNCTION
* vector double _floord2(vector double x)
*
* DESCRIPTION
* The _floord2 function rounds the elements of an vector double
* input vector downwards to their nearest integer representable
* as a double.
*
*/
static __inline vector double _floord2(vector double in)
{
vec_uchar16 swap_words = (vec_uchar16) { 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11 };
vec_uchar16 splat_hi = (vec_uchar16) { 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 };
vec_uint4 one = (vec_uint4) { 0, 1, 0, 1 };
vec_int4 exp, shift;
vec_uint4 mask, mask_1, frac_mask, addend, insert, pos, equal0;
vec_ullong2 sign = spu_splats(0x8000000000000000ULL);
vec_double2 in_hi, out;
vec_double2 minus_one = spu_splats(-1.0);
/* This function generates the following component
* based upon the inputs.
*
* mask = bits of the input that need to be replaced.
* insert = value of the bits that need to be replaced
* addend = value to be added to perform function.
*
* These are applied as follows:.
*
* out = ((in & mask) | insert) + addend
*/
in_hi = spu_shuffle(in, in, splat_hi);
pos = spu_cmpgt((vec_int4)in_hi, -1);
exp = spu_and(spu_rlmask((vec_int4)in_hi, -20), 0x7FF);
shift = spu_sub(((vec_int4) { 1023, 1043, 1023, 1043 } ), exp);
/* clamp shift to the range 0 to -31.
*/
shift = spu_sel(spu_splats(-32), spu_andc(shift, (vec_int4)spu_cmpgt(shift, 0)), spu_cmpgt(shift, -32));
frac_mask = spu_rlmask(((vec_uint4) { 0xFFFFF, -1, 0xFFFFF, -1 } ), shift);
mask = spu_orc(frac_mask, spu_cmpgt(exp, 0x3FE));
/* addend = ((in & mask) && (in >= 0)) ? mask+1 : 0
*/
mask_1 = spu_addx(mask, one, spu_rlqwbyte(spu_genc(mask, one), 4));
equal0 = spu_cmpeq(spu_and((vec_uint4)in, mask), 0);
addend = spu_andc(spu_andc(mask_1, pos), spu_and(equal0, spu_shuffle(equal0, equal0, swap_words)));
insert = spu_andc(spu_andc((vec_uint4)minus_one, pos),
spu_cmpgt((vec_uint4)spu_add(exp, -1), 1022));
in = spu_sel(in, (vec_double2)insert, spu_andc((vec_ullong2)mask, sign));
out = (vec_double2)spu_addx((vec_uint4)in, addend, spu_rlqwbyte(spu_genc((vec_uint4)in, addend), 4));
return (out);
}
#endif /* _FLOORD2_H_ */
#endif /* __SPU__ */

View File

@ -0,0 +1,117 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation, */
/* Sony Computer Entertainment, Incorporated, */
/* Toshiba Corporation, */
/* */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _FLOORF4_H_
#define _FLOORF4_H_ 1
#include <spu_intrinsics.h>
/*
* FUNCTION
* vector float _floorf4(vector float value)
*
* DESCRIPTION
* The _floorf4 routine rounds a vector of input values "value" downwards
* to their nearest integer returning the result as a vector of floats.
*
* The full range form (default) provides floor computation on
* all IEEE floating point values. The floor of NANs remain NANs.
* The floor of denorms results in zero.
*
*/
static __inline vector float _floorf4(vector float value)
{
/* FULL FLOATING-POINT RANGE
*/
vec_int4 exp, shift;
vec_uint4 mask, frac_mask, addend, insert, pos;
vec_float4 out;
/* This function generates the following component
* based upon the inputs.
*
* mask = bits of the input that need to be replaced.
* insert = value of the bits that need to be replaced
* addend = value to be added to perform function.
*
* These are applied as follows:.
*
* out = ((in & mask) | insert) + addend
*/
pos = spu_cmpgt((vec_int4)value, -1);
exp = spu_and(spu_rlmask((vec_int4)value, -23), 0xFF);
shift = spu_sub(127, exp);
frac_mask = spu_and(spu_rlmask(spu_splats((unsigned int)0x7FFFFF), shift),
spu_cmpgt((vec_int4)shift, -31));
mask = spu_orc(frac_mask, spu_cmpgt(exp, 126));
addend = spu_andc(spu_andc(spu_add(mask, 1), pos), spu_cmpeq(spu_and((vec_uint4)value, mask), 0));
insert = spu_andc(spu_andc(spu_splats((unsigned int)0xBF800000), pos),
spu_cmpgt((vec_uint4)spu_add(exp, -1), 126));
out = (vec_float4)spu_add(spu_sel((vec_uint4)value, insert, mask), addend);
/* Preserve orignal sign bit (for -0 case)
*/
out = spu_sel(out, value, spu_splats((unsigned int)0x80000000));
return (out);
}
#endif /* _FLOORF4_H_ */
#endif /* __SPU__ */

View File

@ -0,0 +1,6 @@
#include "headers/hypotd2.h"
static __inline double _hypot(double x, double y)
{
return spu_extract(_hypotd2(spu_promote(x, 0), spu_promote(y, 0)), 0);
}

View File

@ -0,0 +1,134 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _HYPOTD2_H_
#define _HYPOTD2_H_ 1
#include <spu_intrinsics.h>
#include "sqrtd2.h"
/*
* FUNCTION
* vector double hypotd2(vector double x, vector double y)
*
* DESCRIPTION
* The function hypotd2 returns a double vector in which each element is
* the square root of the sum of the squares of the corresponding
* elements of x and y.
*
* The purpose of this function is to avoid overflow during
* intermediate calculations, and therefore it is slower than
* simply calcualting sqrt(x^2 + y^2).
*
* This function is performed by factoring out the larger of the 2
* input exponents and moving this factor outside of the sqrt calculation.
* This will minimize the possibility of over/underflow when the square
* of the values are calculated. Think of it as normalizing the larger
* input to the range [1,2).
*
* Special Cases:
* - hypot(x, +/-0) returns |x|
* - hypot(+/- infinity, y) returns +infinity
* - hypot(+/- infinity, NaN) returns +infinity
*
*/
static __inline vector double _hypotd2(vector double x, vector double y)
{
vector unsigned long long emask = spu_splats(0x7FF0000000000000ull);
vector unsigned long long mmask = spu_splats(0x000FFFFFFFFFFFFFull);
vector signed long long bias = spu_splats(0x3FF0000000000000ll);
vector double oned = spu_splats(1.0);
vector double sbit = spu_splats(-0.0);
vector double inf = (vector double)spu_splats(0x7FF0000000000000ull);
vector double max, max_e, max_m;
vector double min, min_e, min_m;
vector unsigned long long xgty;
vector double sum;
vector double result;
/* Only need absolute values for this function */
x = spu_andc(x, sbit);
y = spu_andc(y, sbit);
xgty = spu_cmpgt(x,y);
max = spu_sel(y,x,xgty);
min = spu_sel(x,y,xgty);
/* Extract the exponents and mantissas */
max_e = (vec_double2)spu_and((vec_ullong2)max, emask);
max_m = (vec_double2)spu_and((vec_ullong2)max, mmask);
min_e = (vec_double2)spu_and((vec_ullong2)min, emask);
min_m = (vec_double2)spu_and((vec_ullong2)min, mmask);
/* Factor-out max exponent here by subtracting from min exponent */
vec_llong2 min_e_int = (vec_llong2)spu_sub((vec_int4)min_e, (vec_int4)max_e);
min_e = (vec_double2)spu_add((vec_int4)min_e_int, (vec_int4)bias);
/* If the new min exponent is too small, just set it to 0. It
* wouldn't contribute to the final result in either case.
*/
min_e = spu_sel(min_e, sbit, spu_cmpgt(sbit, min_e));
/* Combine new exponents with original mantissas */
max = spu_or(oned, max_m);
min = spu_or(min_e, min_m);
sum = _sqrtd2(spu_madd(max, max, spu_mul(min, min)));
sum = spu_mul(max_e, sum);
/* Special case: x = +/- infinity */
result = spu_sel(sum, inf, spu_cmpeq(x, inf));
return result;
}
#endif /* _HYPOTD2_H_ */
#endif /* __SPU__ */

View File

@ -0,0 +1,6 @@
#include "headers/hypotf4.h"
static __inline float _hypotf(float x, float y)
{
return spu_extract(_hypotf4(spu_promote(x, 0), spu_promote(y, 0)), 0);
}

View File

@ -0,0 +1,139 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _HYPOTF4_H_
#define _HYPOTF4_H_ 1
#include <spu_intrinsics.h>
#include "sqrtf4.h"
/*
* FUNCTION
* vector float _hypotf4(vector float x, vector float y)
*
* DESCRIPTION
* The function hypotf4 returns a float vector in which each element is
* the square root of the sum of the squares of the corresponding
* elements of x and y. In other words, each element is sqrt(x^2 + y^2).
*
* The purpose of this function is to avoid overflow during
* intermediate calculations, and therefore it is slower than
* simply calcualting sqrt(x^2 + y^2).
*
* This function is performed by factoring out the larger of the 2
* input exponents and moving this factor outside of the sqrt calculation.
* This will minimize the possibility of over/underflow when the square
* of the values are calculated. Think of it as normalizing the larger
* input to the range [1,2).
*
*
* Special Cases:
* - hypot(x, +/-0) returns |x|
* - hypot(+/- infinity, y) returns +infinity
* - hypot(+/- infinity, NaN) returns +infinity
*
*/
static __inline vector float _hypotf4(vector float x, vector float y)
{
vector unsigned int emask = spu_splats(0x7F800000u);
vector unsigned int mmask = spu_splats(0x007FFFFFu);
vector signed int bias = spu_splats(0x3F800000);
vector float inf = (vec_float4)spu_splats(0x7F800000);
vector float onef = spu_splats(1.0f);
vector float sbit = spu_splats(-0.0f);
vector float max, max_e, max_m;
vector float min, min_e, min_m;
vector unsigned int xgty;
vector float sum;
vector float result;
/* Only need absolute values for this function */
x = spu_andc(x, sbit);
y = spu_andc(y, sbit);
xgty = spu_cmpgt(x,y);
max = spu_sel(y,x,xgty);
min = spu_sel(x,y,xgty);
/* Extract exponents and mantissas */
max_e = (vec_float4)spu_and((vec_uint4)max, emask);
max_m = (vec_float4)spu_and((vec_uint4)max, mmask);
min_e = (vec_float4)spu_and((vec_uint4)min, emask);
min_m = (vec_float4)spu_and((vec_uint4)min, mmask);
/* Adjust the exponent of the smaller of the 2 input values by
* subtracting max_exp from min_exp.
*/
vec_int4 min_e_int = spu_sub((vec_int4)min_e, (vec_int4)max_e);
min_e = (vec_float4)spu_add(min_e_int, bias);
/* If the new min exponent is too small, just set it to 0. It
* wouldn't contribute to the final result in either case.
*/
min_e = spu_sel(min_e, sbit, spu_cmpgt(sbit, min_e));
/* Combine new exponents with original mantissas */
max = spu_or(onef, max_m);
min = spu_or(min_e, min_m);
sum = _sqrtf4(spu_madd(max, max, spu_mul(min, min)));
sum = spu_mul(max_e, sum);
/* Special case: x = +/- infinity */
result = spu_sel(sum, inf, spu_cmpeq(x, inf));
return result;
}
#endif /* _HYPOTF4_H_ */
#endif /* __SPU__ */

View File

@ -0,0 +1,6 @@
#include "headers/isnand2.h"
static __inline int _isnan(double x)
{
return spu_extract(_isnand2(spu_promote(x, 0)), 0);
}

View File

@ -0,0 +1,114 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _ISNAND2_H_
#define _ISNAND2_H_ 1
#include <spu_intrinsics.h>
/*
* FUNCTION
* vector unsigned long long _isnand2(vector double x)
*
* DESCRIPTION
* The _isnand2 function returns a vector in which each element indicates
* if the corresponding element of x is not a number. (NaN)
*
* RETURNS
* The function _isnand2 returns an unsigned long long vector in which
* each element is defined as:
*
* - ULLONG_MAX if the element of x is NaN
* - 0 otherwise
*
*/
static __inline vector unsigned long long _isnand2(vector double x)
{
#ifndef __SPU_EDP__
vec_uint4 sign_mask = (vec_uint4) { 0x7FFFFFFF, 0xFFFFFFFF, 0x7FFFFFFF, 0xFFFFFFFF };
vec_uint4 test_mask = (vec_uint4) { 0x7FF00000, 0x00000000, 0x7FF00000, 0x00000000 };
vec_uchar16 hi_promote = (vec_uchar16) { 0, 1, 2, 3, 0, 1, 2, 3, 8, 9, 10, 11, 8, 9, 10, 11 };
// Remove the sign bits
vec_uint4 signless = spu_and((vec_uint4)x,sign_mask);
// Check if the high word is equal to the max_exp
vec_uint4 x2 = spu_cmpeq(signless,test_mask);
// This checks two things:
// 1) If the high word is greater than max_exp (indicates a NaN)
// 2) If the low word is non-zero (indicates a NaN in conjunction with an
// exp equal to max_exp)
vec_uint4 x1 = spu_cmpgt(signless,test_mask);
// rotate the low word test of x1 into the high word slot, then and it
// with the high word of x2 (checking for #2 above)
vec_uint4 exp_and_lw = spu_and(spu_rlqwbyte(x1,4),x2);
// All the goodies are in the high words, so if the high word of either x1
// or exp_and_lw is set, then we have a NaN, so we "or" them together
vec_uint4 result = spu_or(x1,exp_and_lw);
// And then promote the resulting high word to 64 bit length
result = spu_shuffle(result,result,hi_promote);
return (vec_ullong2) result;
#else
return spu_testsv(x, SPU_SV_NAN);
#endif /* __SPU_EDP__ */
}
#endif // _ISNAND2_H_
#endif /* __SPU__ */

View File

@ -0,0 +1,6 @@
#include "headers/isnanf4.h"
static __inline unsigned int _isnanf(float x)
{
return spu_extract(_isnanf4(spu_promote(x, 0)), 0);
}

View File

@ -0,0 +1,78 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _ISNANF4_H_
#define _ISNANF4_H_ 1
#include <spu_intrinsics.h>
/*
* FUNCTION
* vector unsigned int _isnanf4(vector float x)
*
* DESCRIPTION
* The _isnanf4 function returns a vector in which each element indicates
* if the corresponding element of x is NaN.
*
* On the SPU, this function always returns 0, since NaNs are not
* supported.
*
* RETURNS
* UINT_MAX (0xFFFFFFFF) if the element of x is a NaN
* 0 (0x00000000) otherwise (always on the SPU)
*
*/
static __inline vector unsigned int _isnanf4(vector float __attribute__((__unused__))x)
{
return spu_splats((unsigned int)0);
}
#endif // _ISNANF4_H_
#endif /* __SPU__ */

View File

@ -0,0 +1,114 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation, */
/* Sony Computer Entertainment, Incorporated, */
/* Toshiba Corporation, */
/* */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _LDEXPD2_H_
#define _LDEXPD2_H_ 1
#include <spu_intrinsics.h>
/*
* FUNCTION
* vector double _ldexpd2(vector double x, vector signed long long exp)
*
* DESCRIPTION
* The _ldexpd2 function Computes x * 2^exp for each of the two elements
* of x using the corresponding elements of exp.
*
*/
static __inline vector double _ldexpd2(vector double x, vector signed long long llexp)
{
vec_uchar16 odd_to_even = ((vec_uchar16) { 4,5,6,7, 0x80,0x80,0x80,0x80,
12,13,14,15, 0x80,0x80,0x80,0x80 });
vec_uchar16 dup_even = ((vec_uchar16) { 0,1,2,3, 0,1,2,3,
8,9,10,11, 8,9,10,11});
vec_int4 exp;
vec_uint4 exphi;
vec_int4 e1, e2;
vec_int4 min = spu_splats(-2044);
vec_int4 max = spu_splats(2046);
vec_uint4 cmp_min, cmp_max;
vec_uint4 shift = (vec_uint4) { 20, 32, 20, 32 };
vec_double2 f1, f2;
vec_double2 out;
exp = (vec_int4)spu_shuffle(llexp, llexp, odd_to_even);
exphi = (vec_uint4)spu_shuffle(llexp, llexp, dup_even);
/* Clamp the specified exponent to the range -2044 to 2046.
*/
cmp_min = spu_cmpgt(exp, min);
cmp_max = spu_cmpgt(exp, max);
exp = spu_sel(min, exp, cmp_min);
exp = spu_sel(exp, max, cmp_max);
/* Generate the factors f1 = 2^e1 and f2 = 2^e2
*/
e1 = spu_rlmaska(exp, -1);
e2 = spu_sub(exp, e1);
f1 = (vec_double2)spu_sl(spu_add(e1, 1023), shift);
f2 = (vec_double2)spu_sl(spu_add(e2, 1023), shift);
/* Compute the product x * 2^e1 * 2^e2
*/
out = spu_mul(spu_mul(x, f1), f2);
return (out);
}
#endif /* _LDEXPD2_H_ */
#endif /* __SPU__ */

View File

@ -0,0 +1,84 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation, */
/* Sony Computer Entertainment, Incorporated, */
/* Toshiba Corporation, */
/* */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _LDEXPF4_H_
#define _LDEXPF4_H_ 1
#include <spu_intrinsics.h>
#include "scalbnf4.h"
/*
* FUNCTION
* vector float _ldexpf4(vector float x, vector signed int exp)
*
* DESCRIPTION
* The _ldexpf4 function returns a vector containing each element of x
* multiplied by 2^exp computed efficiently. This function is computed
* without the assistance of any floating point operations and as such
* does not set any floating point exceptions.
*
* RETURNS
* - if the exponent of x is 0, then x is either 0 or a subnormal,
* and the result will be returned as 0.
* - if the result if underflows, it will be returned as 0.
* - if the result overflows, it will be returned as FLT_MAX.
*
*/
static __inline vector float _ldexpf4(vector float x, vector signed int exp)
{
return _scalbnf4(x, exp);
}
#endif /* _LDEXPF4_H_ */
#endif /* __SPU__ */

View File

@ -0,0 +1,6 @@
#include "headers/lgammad2.h"
static __inline double _lgamma(double x)
{
return spu_extract(_lgammad2(spu_promote(x, 0)), 0);
}

View File

@ -0,0 +1,330 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _LGAMMAD2_H_
#define _LGAMMAD2_H_ 1
#include <spu_intrinsics.h>
#include "divd2.h"
#include "recipd2.h"
#include "logd2.h"
#include "sind2.h"
#include "truncd2.h"
/*
* FUNCTION
* vector double _lgammad2(vector double x) - Natural Log of Gamma Function
*
* DESCRIPTION
* _lgammad2 calculates the natural logarithm of the absolute value of the gamma
* function for the corresponding elements of the input vector.
*
* C99 Special Cases:
* lgamma(0) returns +infinite
* lgamma(1) returns +0
* lgamma(2) returns +0
* lgamma(negative integer) returns +infinite
* lgamma(+infinite) returns +infinite
* lgamma(-infinite) returns +infinite
*
* Other Cases:
* lgamma(Nan) returns Nan
* lgamma(Denorm) treated as lgamma(0) and returns +infinite
*
*/
#define PI 3.1415926535897932384626433832795028841971693993751058209749445923078164
#define HALFLOG2PI 9.1893853320467274178032973640561763986139747363778341281715154048276570E-1
#define EULER_MASCHERONI 0.5772156649015328606065
/*
* Zeta constants for Maclaurin approx. near zero
*/
#define ZETA_02_DIV_02 8.2246703342411321823620758332301E-1
#define ZETA_03_DIV_03 -4.0068563438653142846657938717048E-1
#define ZETA_04_DIV_04 2.7058080842778454787900092413529E-1
#define ZETA_05_DIV_05 -2.0738555102867398526627309729141E-1
#define ZETA_06_DIV_06 1.6955717699740818995241965496515E-1
/*
* More Maclaurin coefficients
*/
/*
#define ZETA_07_DIV_07 -1.4404989676884611811997107854997E-1
#define ZETA_08_DIV_08 1.2550966952474304242233565481358E-1
#define ZETA_09_DIV_09 -1.1133426586956469049087252991471E-1
#define ZETA_10_DIV_10 1.0009945751278180853371459589003E-1
#define ZETA_11_DIV_11 -9.0954017145829042232609298411497E-2
#define ZETA_12_DIV_12 8.3353840546109004024886499837312E-2
#define ZETA_13_DIV_13 -7.6932516411352191472827064348181E-2
#define ZETA_14_DIV_14 7.1432946295361336059232753221795E-2
#define ZETA_15_DIV_15 -6.6668705882420468032903448567376E-2
#define ZETA_16_DIV_16 6.2500955141213040741983285717977E-2
#define ZETA_17_DIV_17 -5.8823978658684582338957270605504E-2
#define ZETA_18_DIV_18 5.5555767627403611102214247869146E-2
#define ZETA_19_DIV_19 -5.2631679379616660733627666155673E-2
#define ZETA_20_DIV_20 5.0000047698101693639805657601934E-2
*/
/*
* Coefficients for Stirling's Series for Lgamma()
*/
#define STIRLING_01 8.3333333333333333333333333333333333333333333333333333333333333333333333E-2
#define STIRLING_02 -2.7777777777777777777777777777777777777777777777777777777777777777777778E-3
#define STIRLING_03 7.9365079365079365079365079365079365079365079365079365079365079365079365E-4
#define STIRLING_04 -5.9523809523809523809523809523809523809523809523809523809523809523809524E-4
#define STIRLING_05 8.4175084175084175084175084175084175084175084175084175084175084175084175E-4
#define STIRLING_06 -1.9175269175269175269175269175269175269175269175269175269175269175269175E-3
#define STIRLING_07 6.4102564102564102564102564102564102564102564102564102564102564102564103E-3
#define STIRLING_08 -2.9550653594771241830065359477124183006535947712418300653594771241830065E-2
#define STIRLING_09 1.7964437236883057316493849001588939669435025472177174963552672531000704E-1
#define STIRLING_10 -1.3924322169059011164274322169059011164274322169059011164274322169059011E0
#define STIRLING_11 1.3402864044168391994478951000690131124913733609385783298826777087646653E1
#define STIRLING_12 -1.5684828462600201730636513245208897382810426288687158252375643679991506E2
#define STIRLING_13 2.1931033333333333333333333333333333333333333333333333333333333333333333E3
#define STIRLING_14 -3.6108771253724989357173265219242230736483610046828437633035334184759472E4
#define STIRLING_15 6.9147226885131306710839525077567346755333407168779805042318946657100161E5
/*
* More Stirling's coefficients
*/
/*
#define STIRLING_16 -1.5238221539407416192283364958886780518659076533839342188488298545224541E7
#define STIRLING_17 3.8290075139141414141414141414141414141414141414141414141414141414141414E8
#define STIRLING_18 -1.0882266035784391089015149165525105374729434879810819660443720594096534E10
#define STIRLING_19 3.4732028376500225225225225225225225225225225225225225225225225225225225E11
#define STIRLING_20 -1.2369602142269274454251710349271324881080978641954251710349271324881081E13
#define STIRLING_21 4.8878806479307933507581516251802290210847053890567382180703629532735764E14
*/
static __inline vector double _lgammad2(vector double x)
{
vec_uchar16 dup_even = ((vec_uchar16) { 0,1,2,3, 0,1,2,3, 8, 9,10,11, 8, 9,10,11 });
vec_uchar16 dup_odd = ((vec_uchar16) { 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 });
vec_uchar16 swap_word = ((vec_uchar16) { 4,5,6,7, 0,1,2,3, 12,13,14,15, 8, 9,10,11 });
vec_double2 infinited = (vec_double2)spu_splats(0x7FF0000000000000ull);
vec_double2 zerod = spu_splats(0.0);
vec_double2 oned = spu_splats(1.0);
vec_double2 twod = spu_splats(2.0);
vec_double2 pi = spu_splats(PI);
vec_double2 sign_maskd = spu_splats(-0.0);
/* This is where we switch from near zero approx. */
vec_float4 zero_switch = spu_splats(0.001f);
vec_float4 shift_switch = spu_splats(6.0f);
vec_float4 xf;
vec_double2 inv_x, inv_xsqu;
vec_double2 xtrunc, xstirling;
vec_double2 sum, xabs;
vec_uint4 xhigh, xlow, xthigh, xtlow;
vec_uint4 x1, isnaninf, isnposint, iszero, isint, isneg, isshifted, is1, is2;
vec_double2 result, stresult, shresult, mresult, nresult;
/* Force Denorms to 0 */
x = spu_add(x, zerod);
xabs = spu_andc(x, sign_maskd);
xf = spu_roundtf(xabs);
xf = spu_shuffle(xf, xf, dup_even);
/*
* For 0 < x <= 0.001.
* Approximation Near Zero
*
* Use Maclaurin Expansion of lgamma()
*
* lgamma(z) = -ln(z) - z * EulerMascheroni + Sum[(-1)^n * z^n * Zeta(n)/n]
*/
mresult = spu_madd(xabs, spu_splats(ZETA_06_DIV_06), spu_splats(ZETA_05_DIV_05));
mresult = spu_madd(xabs, mresult, spu_splats(ZETA_04_DIV_04));
mresult = spu_madd(xabs, mresult, spu_splats(ZETA_03_DIV_03));
mresult = spu_madd(xabs, mresult, spu_splats(ZETA_02_DIV_02));
mresult = spu_mul(xabs, spu_mul(xabs, mresult));
mresult = spu_sub(mresult, spu_add(_logd2(xabs), spu_mul(xabs, spu_splats(EULER_MASCHERONI))));
/*
* For 0.001 < x <= 6.0, we are going to push value
* out to an area where Stirling's approximation is
* accurate. Let's use a constant of 6.
*
* Use the recurrence relation:
* lgamma(x + 1) = ln(x) + lgamma(x)
*
* Note that we shift x here, before Stirling's calculation,
* then after Stirling's, we adjust the result.
*
*/
isshifted = spu_cmpgt(shift_switch, xf);
xstirling = spu_sel(xabs, spu_add(xabs, spu_splats(6.0)), (vec_ullong2)isshifted);
inv_x = _recipd2(xstirling);
inv_xsqu = spu_mul(inv_x, inv_x);
/*
* For 6.0 < x < infinite
*
* Use Stirling's Series.
*
* 1 1 1 1 1
* lgamma(x) = --- ln (2*pi) + (z - ---) ln(x) - x + --- - ----- + ------ ...
* 2 2 12x 360x^3 1260x^5
*
* Taking 10 terms of the sum gives good results for x > 6.0
*
*/
sum = spu_madd(inv_xsqu, spu_splats(STIRLING_15), spu_splats(STIRLING_14));
sum = spu_madd(sum, inv_xsqu, spu_splats(STIRLING_13));
sum = spu_madd(sum, inv_xsqu, spu_splats(STIRLING_12));
sum = spu_madd(sum, inv_xsqu, spu_splats(STIRLING_11));
sum = spu_madd(sum, inv_xsqu, spu_splats(STIRLING_10));
sum = spu_madd(sum, inv_xsqu, spu_splats(STIRLING_09));
sum = spu_madd(sum, inv_xsqu, spu_splats(STIRLING_08));
sum = spu_madd(sum, inv_xsqu, spu_splats(STIRLING_07));
sum = spu_madd(sum, inv_xsqu, spu_splats(STIRLING_06));
sum = spu_madd(sum, inv_xsqu, spu_splats(STIRLING_05));
sum = spu_madd(sum, inv_xsqu, spu_splats(STIRLING_04));
sum = spu_madd(sum, inv_xsqu, spu_splats(STIRLING_03));
sum = spu_madd(sum, inv_xsqu, spu_splats(STIRLING_02));
sum = spu_madd(sum, inv_xsqu, spu_splats(STIRLING_01));
sum = spu_mul(sum, inv_x);
stresult = spu_madd(spu_sub(xstirling, spu_splats(0.5)), _logd2(xstirling), spu_splats(HALFLOG2PI));
stresult = spu_sub(stresult, xstirling);
stresult = spu_add(stresult, sum);
/*
* Adjust result if we shifted x into Stirling range.
*
* lgamma(x) = lgamma(x + n) - ln(x(x+1)(x+2)...(x+n-1)
*
*/
shresult = spu_mul(xabs, spu_add(xabs, spu_splats(1.0)));
shresult = spu_mul(shresult, spu_add(xabs, spu_splats(2.0)));
shresult = spu_mul(shresult, spu_add(xabs, spu_splats(3.0)));
shresult = spu_mul(shresult, spu_add(xabs, spu_splats(4.0)));
shresult = spu_mul(shresult, spu_add(xabs, spu_splats(5.0)));
shresult = _logd2(shresult);
shresult = spu_sub(stresult, shresult);
stresult = spu_sel(stresult, shresult, (vec_ullong2)isshifted);
/*
* Select either Maclaurin or Stirling result before Negative X calc.
*/
xf = spu_shuffle(xf, xf, dup_even);
vec_uint4 useStirlings = spu_cmpgt(xf, zero_switch);
result = spu_sel(mresult, stresult, (vec_ullong2)useStirlings);
/*
* Approximation for Negative X
*
* Use reflection relation
*
* gamma(x) * gamma(-x) = -pi/(x sin(pi x))
*
* lgamma(x) = log(pi/(-x sin(pi x))) - lgamma(-x)
*
*/
nresult = spu_mul(x, _sind2(spu_mul(x, pi)));
nresult = spu_andc(nresult, sign_maskd);
nresult = _logd2(_divd2(pi, nresult));
nresult = spu_sub(nresult, result);
/*
* Select between the negative or positive x approximations.
*/
isneg = (vec_uint4)spu_shuffle(x, x, dup_even);
isneg = spu_rlmaska(isneg, -32);
result = spu_sel(result, nresult, (vec_ullong2)isneg);
/*
* Finally, special cases/errors.
*/
xhigh = (vec_uint4)spu_shuffle(xabs, xabs, dup_even);
xlow = (vec_uint4)spu_shuffle(xabs, xabs, dup_odd);
/* x = zero, return infinite */
x1 = spu_or(xhigh, xlow);
iszero = spu_cmpeq(x1, 0);
/* x = negative integer, return infinite */
xtrunc = _truncd2(xabs);
xthigh = (vec_uint4)spu_shuffle(xtrunc, xtrunc, dup_even);
xtlow = (vec_uint4)spu_shuffle(xtrunc, xtrunc, dup_odd);
isint = spu_and(spu_cmpeq(xthigh, xhigh), spu_cmpeq(xtlow, xlow));
isnposint = spu_or(spu_and(isint, isneg), iszero);
result = spu_sel(result, infinited, (vec_ullong2)isnposint);
/* x = 1.0 or 2.0, return 0.0 */
is1 = spu_cmpeq((vec_uint4)x, (vec_uint4)oned);
is1 = spu_and(is1, spu_shuffle(is1, is1, swap_word));
is2 = spu_cmpeq((vec_uint4)x, (vec_uint4)twod);
is2 = spu_and(is2, spu_shuffle(is2, is2, swap_word));
result = spu_sel(result, zerod, (vec_ullong2)spu_or(is1,is2));
/* x = +/- infinite or nan, return |x| */
isnaninf = spu_cmpgt(xhigh, 0x7FEFFFFF);
result = spu_sel(result, xabs, (vec_ullong2)isnaninf);
return result;
}
#endif /* _LGAMMAD2_H_ */
#endif /* __SPU__ */

View File

@ -0,0 +1,6 @@
#include "headers/lgammaf4.h"
static __inline float _lgammaf(float x)
{
return spu_extract(_lgammaf4(spu_promote(x, 0)), 0);
}

View File

@ -0,0 +1,232 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _LGAMMAF4_H_
#define _LGAMMAF4_H_ 1
#include <spu_intrinsics.h>
#include "lgammad2.h"
#include "recipf4.h"
#include "logf4.h"
#include "sinf4.h"
#include "truncf4.h"
/*
* FUNCTION
* vector float _lgammaf4(vector float x) - Natural Log of Gamma Function
*
* DESCRIPTION
* _lgammaf4 calculates the natural logarithm of the absolute value of the gamma
* function for the corresponding elements of the input vector.
*
* C99 Special Cases:
* lgamma(0) returns +infinite
* lgamma(1) returns +0
* lgamma(2) returns +0
* lgamma(negative integer) returns +infinite
* lgamma(+infinite) returns +infinite
* lgamma(-infinite) returns +infinite
*
* Other Cases:
* lgamma(Nan) returns Nan
* lgamma(Denorm) treated as lgamma(0) and returns +infinite
*
*/
static __inline vector float _lgammaf4(vector float x)
{
vec_float4 inff = (vec_float4)spu_splats(0x7F800000);
vec_float4 zerof = spu_splats(0.0f);
vec_float4 pi = spu_splats((float)PI);
vec_float4 sign_maskf = spu_splats(-0.0f);
vector unsigned int gt0;
/* This is where we switch from near zero approx. */
vec_float4 mac_switch = spu_splats(0.16f);
vec_float4 shift_switch = spu_splats(6.0f);
vec_float4 inv_x, inv_xsqu;
vec_float4 xtrunc, xstirling;
vec_float4 sum, xabs;
vec_uint4 isnaninf, isshifted;
vec_float4 result, stresult, shresult, mresult, nresult;
/* Force Denorms to 0 */
x = spu_add(x, zerof);
xabs = spu_andc(x, sign_maskf);
gt0 = spu_cmpgt(x, zerof);
xtrunc = _truncf4(x);
/*
* For 0 < x <= 0.16.
* Approximation Near Zero
*
* Use Maclaurin Expansion of lgamma()
*
* lgamma(z) = -ln(z) - z * EulerMascheroni + Sum[(-1)^n * z^n * Zeta(n)/n]
*/
mresult = spu_madd(xabs, spu_splats((float)ZETA_06_DIV_06), spu_splats((float)ZETA_05_DIV_05));
mresult = spu_madd(xabs, mresult, spu_splats((float)ZETA_04_DIV_04));
mresult = spu_madd(xabs, mresult, spu_splats((float)ZETA_03_DIV_03));
mresult = spu_madd(xabs, mresult, spu_splats((float)ZETA_02_DIV_02));
mresult = spu_mul(xabs, spu_mul(xabs, mresult));
mresult = spu_sub(mresult, spu_add(_logf4(xabs), spu_mul(xabs, spu_splats((float)EULER_MASCHERONI))));
/*
* For 0.16 < x <= 6.0, we are going to push value
* out to an area where Stirling's approximation is
* accurate. Let's use a constant of 6.
*
* Use the recurrence relation:
* lgamma(x + 1) = ln(x) + lgamma(x)
*
* Note that we shift x here, before Stirling's calculation,
* then after Stirling's, we adjust the result.
*
*/
isshifted = spu_cmpgt(shift_switch, x);
xstirling = spu_sel(xabs, spu_add(xabs, spu_splats(6.0f)), isshifted);
inv_x = _recipf4(xstirling);
inv_xsqu = spu_mul(inv_x, inv_x);
/*
* For 6.0 < x < infinite
*
* Use Stirling's Series.
*
* 1 1 1 1 1
* lgamma(x) = --- ln (2*pi) + (z - ---) ln(x) - x + --- - ----- + ------ ...
* 2 2 12x 360x^3 1260x^5
*
*
*/
sum = spu_madd(inv_xsqu, spu_splats((float)STIRLING_10), spu_splats((float)STIRLING_09));
sum = spu_madd(sum, inv_xsqu, spu_splats((float)STIRLING_08));
sum = spu_madd(sum, inv_xsqu, spu_splats((float)STIRLING_07));
sum = spu_madd(sum, inv_xsqu, spu_splats((float)STIRLING_06));
sum = spu_madd(sum, inv_xsqu, spu_splats((float)STIRLING_05));
sum = spu_madd(sum, inv_xsqu, spu_splats((float)STIRLING_04));
sum = spu_madd(sum, inv_xsqu, spu_splats((float)STIRLING_03));
sum = spu_madd(sum, inv_xsqu, spu_splats((float)STIRLING_02));
sum = spu_madd(sum, inv_xsqu, spu_splats((float)STIRLING_01));
sum = spu_mul(sum, inv_x);
stresult = spu_madd(spu_sub(xstirling, spu_splats(0.5f)), _logf4(xstirling), spu_splats((float)HALFLOG2PI));
stresult = spu_sub(stresult, xstirling);
stresult = spu_add(stresult, sum);
/*
* Adjust result if we shifted x into Stirling range.
*
* lgamma(x) = lgamma(x + n) - ln(x(x+1)(x+2)...(x+n-1)
*
*/
shresult = spu_mul(xabs, spu_add(xabs, spu_splats(1.0f)));
shresult = spu_mul(shresult, spu_add(xabs, spu_splats(2.0f)));
shresult = spu_mul(shresult, spu_add(xabs, spu_splats(3.0f)));
shresult = spu_mul(shresult, spu_add(xabs, spu_splats(4.0f)));
shresult = spu_mul(shresult, spu_add(xabs, spu_splats(5.0f)));
shresult = _logf4(shresult);
shresult = spu_sub(stresult, shresult);
stresult = spu_sel(stresult, shresult, isshifted);
/*
* Select either Maclaurin or Stirling result before Negative X calc.
*/
vec_uint4 useStirlings = spu_cmpgt(xabs, mac_switch);
result = spu_sel(mresult, stresult, useStirlings);
/*
* Approximation for Negative X
*
* Use reflection relation:
*
* gamma(x) * gamma(-x) = -pi/(x sin(pi x))
*
* lgamma(x) = log(pi/(-x sin(pi x))) - lgamma(-x)
*
*/
nresult = spu_mul(x, _sinf4(spu_mul(x, pi)));
nresult = spu_andc(nresult, sign_maskf);
nresult = spu_sub(_logf4(pi), spu_add(result, _logf4(nresult)));
/*
* Select between the negative or positive x approximations.
*/
result = spu_sel(nresult, result, gt0);
/*
* Finally, special cases/errors.
*/
/*
* x = non-positive integer, return infinity.
*/
result = spu_sel(result, inff, spu_andc(spu_cmpeq(x, xtrunc), gt0));
/* x = +/- infinite or nan, return |x| */
isnaninf = spu_cmpgt((vec_uint4)xabs, 0x7FEFFFFF);
result = spu_sel(result, xabs, isnaninf);
return result;
}
#endif /* _LGAMMAF4_H_ */
#endif /* __SPU__ */

View File

@ -0,0 +1,6 @@
#include "headers/logd2.h"
static __inline double _log(double x)
{
return spu_extract(_logd2(spu_promote(x, 0)), 0);
}

View File

@ -0,0 +1,6 @@
#include "headers/log10d2.h"
static __inline double _log10(double x)
{
return spu_extract(_log10d2(spu_promote(x, 0)), 0);
}

View File

@ -0,0 +1,79 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation, */
/* Sony Computer Entertainment, Incorporated, */
/* Toshiba Corporation, */
/* */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _LOG10D2_H_
#define _LOG10D2_H_ 1
#include <spu_intrinsics.h>
#include "log2d2.h"
/*
* FUNCTION
* vector double _log10d2(vector double x)
*
* DESCRIPTION
* _log10d2 computes log (base 10) for each of the double word
* elements the input vector x. log10_v
* is computed using log2d2 as follows:
*
* log10d2(x) = log2d2(x) / log2d2(10);
*/
static __inline vector double _log10d2(vector double x)
{
return (spu_mul(_log2d2(x), spu_splats(0.301029995663981195213)));
}
#endif /* _LOG10D2_H_ */
#endif /* __SPU__ */

View File

@ -0,0 +1,6 @@
#include "headers/log1pd2.h"
static __inline double _log1p(double x)
{
return spu_extract(_log1pd2(spu_promote(x, 0)), 0);
}

View File

@ -0,0 +1,137 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _LOG1PD2_H_
#define _LOG1PD2_H_ 1
#include <spu_intrinsics.h>
#include "simdmath.h"
#include "logd2.h"
#include "divd2.h"
#define LOG1PD2_P0 0.0000000000000000000000000e+00
#define LOG1PD2_P1 1.0000000000000000000000000e+00
#define LOG1PD2_P2 2.3771612265431403265836252e+00
#define LOG1PD2_P3 2.0034423569559494104908026e+00
#define LOG1PD2_P4 7.1309327316770110272159400e-01
#define LOG1PD2_P5 9.8219761968547217301228613e-02
#define LOG1PD2_P6 3.4385125174546914139650511e-03
#define LOG1PD2_Q0 1.0000000000000000000000000e+00
#define LOG1PD2_Q1 2.8771612265431403265836252e+00
#define LOG1PD2_Q2 3.1086896368941925317130881e+00
#define LOG1PD2_Q3 1.5583843494335058998956356e+00
#define LOG1PD2_Q4 3.6047236436186669283898709e-01
#define LOG1PD2_Q5 3.2620075387969869884496887e-02
#define LOG1PD2_Q6 6.8047193336239690346356479e-04
/*
* FUNCTION
* vector double _log1pd2(vector double x)
*
* DESCRIPTION
* The function _log1pd2 computes the natural logarithm of x + 1
* for each of the double word elements of x.
*
*/
static __inline vector double _log1pd2(vector double x)
{
vector double oned = spu_splats(1.0);
vector double rangehi = spu_splats(0.35);
vector double rangelo = spu_splats(0.0);
vector unsigned long long use_log;
vector double pr, qr;
vector double eresult;
vector double rresult;
vector double result;
/* Compiler Bug. Replace xbug with x when spu_cmp*() doesn't
* modify it's arguments! */
volatile vector double xbug = x;
use_log = spu_or(spu_cmpgt(xbug, rangehi), spu_cmpgt(rangelo, xbug));
/*
* Calculate directly using log(x+1)
*/
eresult = _logd2(spu_add(x, oned));
/*
* For x in [0.0,0.35], use a rational approximation.
*/
pr = spu_madd(x, spu_splats(LOG1PD2_P6), spu_splats(LOG1PD2_P5));
qr = spu_madd(x, spu_splats(LOG1PD2_Q6), spu_splats(LOG1PD2_Q5));
pr = spu_madd(pr, x, spu_splats(LOG1PD2_P4));
qr = spu_madd(qr, x, spu_splats(LOG1PD2_Q4));
pr = spu_madd(pr, x, spu_splats(LOG1PD2_P3));
qr = spu_madd(qr, x, spu_splats(LOG1PD2_Q3));
pr = spu_madd(pr, x, spu_splats(LOG1PD2_P2));
qr = spu_madd(qr, x, spu_splats(LOG1PD2_Q2));
pr = spu_madd(pr, x, spu_splats(LOG1PD2_P1));
qr = spu_madd(qr, x, spu_splats(LOG1PD2_Q1));
pr = spu_madd(pr, x, spu_splats(LOG1PD2_P0));
qr = spu_madd(qr, x, spu_splats(LOG1PD2_Q0));
rresult = _divd2(pr, qr);
/*
* Select either direct calculation or rational approximation.
*/
result = spu_sel(rresult, eresult, use_log);
return result;
}
#endif /* _LOG1PD2_H_ */
#endif /* __SPU__ */

View File

@ -0,0 +1,6 @@
#include "headers/log1pf4.h"
static __inline float _log1pf(float x)
{
return spu_extract(_log1pf4(spu_promote(x, 0)), 0);
}

View File

@ -0,0 +1,124 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _LOG1PF4_H_
#define _LOG1PF4_H_ 1
#include <spu_intrinsics.h>
#include "simdmath.h"
#include "logf4.h"
#include "divf4.h"
/*
* FUNCTION
* vector float _log1pf4(vector float x)
*
* DESCRIPTION
* The function _log1pf4 computes the natural logarithm of x + 1
* for each of the float word elements of x.
*
*
*/
#define LOG1PF4_P0 0.0000000000000000000000000e+00f
#define LOG1PF4_P1 1.0000000000000000000000000e+00f
#define LOG1PF4_P2 1.4220868022897381610647471e+00f
#define LOG1PF4_P3 5.4254553902256308361984338e-01f
#define LOG1PF4_P4 4.5971908823142115796400731e-02f
#define LOG1PF4_Q0 1.0000000000000000000000000e+00f
#define LOG1PF4_Q1 1.9220868007537357247116461e+00f
#define LOG1PF4_Q2 1.1702556461286610645089468e+00f
#define LOG1PF4_Q3 2.4040413392943396631018516e-01f
#define LOG1PF4_Q4 1.0637426466449625625521058e-02f
static __inline vector float _log1pf4(vector float x)
{
vector float onef = spu_splats(1.0f);
vector float range = spu_splats(0.35f);
vector unsigned int use_log;
vector float pr, qr;
vector float eresult;
vector float rresult;
vector float result;
use_log = spu_cmpabsgt(x, range);
/*
* Calculate directly using log(x+1)
*/
eresult = _logf4(spu_add(x, onef));
/*
* For x in [-0.35,0.35], use a rational approximation.
*/
pr = spu_madd(x, spu_splats((float)LOG1PF4_P4), spu_splats((float)LOG1PF4_P3));
qr = spu_madd(x, spu_splats((float)LOG1PF4_Q4), spu_splats((float)LOG1PF4_Q3));
pr = spu_madd(pr, x, spu_splats((float)LOG1PF4_P2));
qr = spu_madd(qr, x, spu_splats((float)LOG1PF4_Q2));
pr = spu_madd(pr, x, spu_splats((float)LOG1PF4_P1));
qr = spu_madd(qr, x, spu_splats((float)LOG1PF4_Q1));
pr = spu_madd(pr, x, spu_splats((float)LOG1PF4_P0));
qr = spu_madd(qr, x, spu_splats((float)LOG1PF4_Q0));
rresult = _divf4(pr, qr);
/*
* Select either direct calculation or rational approximation.
*/
result = spu_sel(rresult, eresult, use_log);
return result;
}
#endif /* _LOG1PF4_H_ */
#endif /* __SPU__ */

View File

@ -0,0 +1,6 @@
#include "headers/log2d2.h"
static __inline double _log2(double vx)
{
return spu_extract(_log2d2(spu_promote(vx, 0)), 0);
}

View File

@ -0,0 +1,145 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation, */
/* Sony Computer Entertainment, Incorporated, */
/* Toshiba Corporation, */
/* */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _LOG2D2_H_
#define _LOG2D2_H_ 1
#include <spu_intrinsics.h>
/*
* FUNCTION
* vector double _log2d2(vector double x)
*
* DESCRIPTION
* The function _log2d2 computes log base 2 of the input x for each
* of the double word elements of x. The log2 is decomposed
* into two parts, log2 of the exponent and log2 of the
* fraction. The log2 of the fraction is approximated
* using a 21st order polynomial of the form:
*
* __20_
* \
* log(x) = x * (1 + \ (Ci * x^i))
* /
* /____
* i=0
*
* for x in the range 0-1.
*/
#define LOG_C00
#define LOG_C01
#define LOG_C02
static __inline vector double _log2d2(vector double vx)
{
vec_int4 addval;
vec_ullong2 exp_mask = spu_splats(0x7FF0000000000000ULL);
vec_double2 vy, vxw;
vec_double2 v1 = spu_splats(1.0);
vec_double2 x2, x4, x8, x10, p1, p2;
/* Extract the fraction component of input by forcing
* its exponent so that input is in the range [1.0, 2.0)
* and then subtract 1.0 to force it in the range
* [0.0, 1.0).
*/
vxw = spu_sub(spu_sel(vx, v1, exp_mask), v1);
/* Compute the log2 of the exponent as exp - 1023.
*/
addval = spu_add(spu_rlmask((vec_int4)vx, -20), -1023);
/* Compute the log2 of the fractional component using a 21st
* order polynomial. The polynomial is evaluated in two halves
* to improve efficiency.
*/
p1 = spu_madd(spu_splats(3.61276447184348752E-05), vxw, spu_splats(-4.16662127033480827E-04));
p2 = spu_madd(spu_splats(-1.43988260692073185E-01), vxw, spu_splats(1.60245637034704267E-01));
p1 = spu_madd(vxw, p1, spu_splats(2.28193656337578229E-03));
p2 = spu_madd(vxw, p2, spu_splats(-1.80329036970820794E-01));
p1 = spu_madd(vxw, p1, spu_splats(-7.93793829370930689E-03));
p2 = spu_madd(vxw, p2, spu_splats(2.06098446037376922E-01));
p1 = spu_madd(vxw, p1, spu_splats(1.98461565426430164E-02));
p2 = spu_madd(vxw, p2, spu_splats(-2.40449108727688962E-01));
p1 = spu_madd(vxw, p1, spu_splats(-3.84093543662501949E-02));
p2 = spu_madd(vxw, p2, spu_splats(2.88539004851839364E-01));
p1 = spu_madd(vxw, p1, spu_splats(6.08335872067172597E-02));
p2 = spu_madd(vxw, p2, spu_splats(-3.60673760117245982E-01));
p1 = spu_madd(vxw, p1, spu_splats(-8.27937055456904317E-02));
p2 = spu_madd(vxw, p2, spu_splats(4.80898346961226595E-01));
p1 = spu_madd(vxw, p1, spu_splats(1.01392360727236079E-01));
p2 = spu_madd(vxw, p2, spu_splats(-7.21347520444469934E-01));
p1 = spu_madd(vxw, p1, spu_splats(-1.16530490533844182E-01));
p2 = spu_madd(vxw, p2, spu_splats(0.44269504088896339E+00));
p1 = spu_madd(vxw, p1, spu_splats(1.30009193360025350E-01));
x2 = spu_mul(vxw, vxw);
x4 = spu_mul(x2, x2);
x8 = spu_mul(x4, x4);
x10 = spu_mul(x8, x2);
vy = spu_madd(spu_madd(x10, p1, p2), vxw, vxw);
/* Add the log2(exponent) and the log2(fraction) to
* compute the final result.
*/
vy = spu_add(vy, spu_extend(spu_convtf(addval, 0)));
vxw = spu_extend(spu_convtf(addval, 20));
return(vy);
}
#endif /* _LOG2D2_H_ */
#endif /* __SPU__ */

View File

@ -0,0 +1,136 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation, */
/* Sony Computer Entertainment, Incorporated, */
/* Toshiba Corporation, */
/* */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _LOG2F4_H_
#define _LOG2F4_H_ 1
#include <spu_intrinsics.h>
/*
* FUNCTION
* vector float _log2f4(vector float x)
*
* DESCRIPTION
* The _log2f4 function computes log (base 2) on a vector if inputs
* values x. The _log2f4 function is approximated as a polynomial of
* order 8 (C. Hastings, Jr, 1955).
*
* __8__
* \
* \
* log2f(1+x) = / Ci*x^i
* /____
* i=1
*
* for x in the range 0.0 to 1.0
*
* C1 = 1.4426898816672
* C2 = -0.72116591947498
* C3 = 0.47868480909345
* C4 = -0.34730547155299
* C5 = 0.24187369696082
* C6 = -0.13753123777116
* C7 = 0.052064690894143
* C8 = -0.0093104962134977
*
* This function assumes that x is a non-zero positive value.
*
*/
static __inline vector float _log2f4(vector float x)
{
vector signed int exponent;
vector float result;
vector float x2, x4;
vector float hi, lo;
/* Extract the exponent from the input X.
*/
exponent = (vector signed int)spu_and(spu_rlmask((vector unsigned int)(x), -23), 0xFF);
exponent = spu_add(exponent, -127);
/* Compute the remainder after removing the exponent.
*/
x = (vector float)spu_sub((vector signed int)(x), spu_sl(exponent, 23));
/* Calculate the log2 of the remainder using the polynomial
* approximation.
*/
x = spu_sub(x, spu_splats(1.0f));
/* Instruction counts can be reduced if the polynomial was
* computed entirely from nested (dependent) fma's. However,
* to reduce the number of pipeline stalls, the polygon is evaluated
* in two halves (hi amd lo).
*/
x2 = spu_mul(x, x);
x4 = spu_mul(x2, x2);
hi = spu_madd(x, spu_splats(-0.0093104962134977f), spu_splats(0.052064690894143f));
hi = spu_madd(x, hi, spu_splats(-0.13753123777116f));
hi = spu_madd(x, hi, spu_splats( 0.24187369696082f));
hi = spu_madd(x, hi, spu_splats(-0.34730547155299f));
lo = spu_madd(x, spu_splats(0.47868480909345f), spu_splats(-0.72116591947498f));
lo = spu_madd(x, lo, spu_splats(1.4426898816672f));
lo = spu_mul(x, lo);
result = spu_madd(x4, hi, lo);
/* Add the exponent back into the result.
*/
result = spu_add(result, spu_convtf(exponent, 0));
return (result);
}
#endif /* _LOG2F4_H_ */
#endif /* __SPU__ */

View File

@ -0,0 +1,6 @@
#include "headers/logbf4.h"
static __inline float _logbf(float x)
{
return spu_extract(_logbf4(spu_promote(x, 0)), 0);
}

View File

@ -0,0 +1,111 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _LOGBF4_H_
#define _LOGBF4_H_ 1
#include <spu_intrinsics.h>
#include <vec_types.h>
/*
* FUNCTION
* vector float _scalbnf4(vector float x, vector signed int exp)
*
* DESCRIPTION
* The _scalbnf4 function returns a vector containing each element of x
* multiplied by 2^n computed efficiently. This function is computed
* without the assistance of any floating point operations and as such
* does not set any floating point exceptions.
*
* Special Cases:
* - if the exponent is 0, then x is either 0 or a subnormal, and the
* result will be returned as 0.
* - if the result if underflows, it will be returned as 0.
* - if the result overflows, it will be returned as FLT_MAX.
*
*/
static __inline vector float _logbf4(vector float x)
{
vec_uint4 lzero = (vector unsigned int) {0, 0, 0, 0};
vec_uint4 exp_mask = (vector unsigned int) {0xFF, 0xFF, 0xFF, 0xFF};
vec_int4 exp_shift = (vector signed int) { -23, -23, -23, -23};
vec_int4 exp_bias = (vector signed int) {-127, -127, -127, -127};
vec_uint4 sign_mask = (vector unsigned int) {0x80000000, 0x80000000,
0x80000000, 0x80000000};
vec_uint4 linf = (vector unsigned int) {0x7F800000, 0x7F800000,
0x7F800000, 0x7F800000};
vec_uint4 lminf = (vector unsigned int) {0xFF800000, 0xFF800000,
0xFF800000, 0xFF800000};
vec_uint4 exp;
vec_uint4 xabs;
vec_float4 exp_unbias;
xabs = spu_andc((vec_uint4)x, sign_mask);
exp = spu_and(spu_rlmask((vec_uint4)x, exp_shift), exp_mask);
exp_unbias = spu_convtf(spu_add((vec_int4)exp, exp_bias), 0);
/* Zero */
exp_unbias = spu_sel(exp_unbias, (vec_float4)lminf, (vec_uint4)spu_cmpeq(xabs, lzero));
/* NaN */
exp_unbias = spu_sel(exp_unbias, x, (vec_uint4)spu_cmpgt(xabs, linf));
/* Infinite */
exp_unbias = spu_sel(exp_unbias, (vec_float4)linf, (vec_uint4)spu_cmpeq(xabs, linf));
return (exp_unbias);
}
#endif /* _LOGBF4_H_ */
#endif /* __SPU__ */

View File

@ -0,0 +1,79 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation, */
/* Sony Computer Entertainment, Incorporated, */
/* Toshiba Corporation, */
/* */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _LOGD2_H_
#define _LOGD2_H_ 1
#include <spu_intrinsics.h>
#include "simdmath.h"
#include "log2d2.h"
/*
* FUNCTION
* vector double _logd2(vector double x)
*
* DESCRIPTION
* The _logd2 function computes the natural log for each double word
* element of the input x. _logd2 is computed using log2d2 as follows:
*
* logd2(x) = log2d2(x) / log2d2(e) = log2d2(x) * logd2(2)
*
*/
static __inline vector double _logd2(vector double x)
{
return (spu_mul(_log2d2(x), spu_splats(SM_LN2)));
}
#endif /* _LOGD2_H_ */
#endif /* __SPU__ */

View File

@ -0,0 +1,76 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation, */
/* Sony Computer Entertainment, Incorporated, */
/* Toshiba Corporation, */
/* */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _LOGF4_H_
#define _LOGF4_H_ 1
#include "log2f4.h"
/*
* FUNCTION
* vector float _logf4(vector float x)
*
* DESCRIPTION
* The _logf4 function computes the natural log (base e) of the input
* vector of values x. logf4 is computed using log2f4 as follows:
*
* logf4(x) = log2f4(x) / log2f4(e);
*
*/
static __inline vector float _logf4(vector float x)
{
return (spu_mul(_log2f4(x), spu_splats(0.69314718055995f)));
}
#endif /* _LOGF4_H_ */
#endif /* __SPU__ */

View File

@ -0,0 +1,6 @@
#include "headers/nearbyintf4.h"
static __inline float _nearbyintf(float x)
{
return spu_extract(_nearbyintf4(spu_promote(x, 0)), 0);
}

View File

@ -0,0 +1,74 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation, */
/* Sony Computer Entertainment, Incorporated, */
/* Toshiba Corporation, */
/* */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _NEARBYINTF4_H_
#define _NEARBYINTF4_H_ 1
#include "truncf4.h"
/*
* FUNCTION
* vector float _nearbyintf4(vector float x)
*
* DESCRIPTION
* The SPU doesn't support directed rounding. Within the simdmath
* library, nearbyintf4 is aliased to truncf4. This header merely
* provides inlinable compatibility
*
*/
static __inline vector float _nearbyintf4(vector float x)
{
return _truncf4(x);
}
#endif /* _NEARBYINTF4_H_ */
#endif /* __SPU__ */

View File

@ -0,0 +1,6 @@
#include "headers/nextafterd2.h"
static __inline double _nextafter(double x, double y)
{
return spu_extract(_nextafterd2(spu_promote(x, 0), spu_promote(y, 0)), 0);
}

View File

@ -0,0 +1,124 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _NEXTAFTERD2_H_
#define _NEXTAFTERD2_H_ 1
#include <spu_intrinsics.h>
/*
* FUNCTION
* vector double _nextafterd2(vector double x, vector double y)
*
* DESCRIPTION
* The nextafterf4 function returns a vector containing the next representable
* floating-point number after the element of x, in the direction of the
* corresponding element y.
*
* Special Cases:
* - nextafter(NaN, y) = NaN
* - nextafter(x, NaN) = NaN
* - x = largest finite value, y = infinity, result is undefined
* - x = largest finite negative value, y = -infinity, result is undefined
* - x != y, and result = 0, considered an underflow
*
*/
static __inline vector double _nextafterd2(vector double x, vector double y)
{
vec_double2 n1ulp = (vec_double2)spu_splats(0x8000000000000001ull);
vec_double2 zerod = spu_splats(0.0);
vec_llong2 one = spu_splats(1ll);
vec_ullong2 xlt0, xgty, xeqy, xeq0;
vec_llong2 xllong;
vec_llong2 delta, deltap1;
vec_double2 result;
/* Compiler Bug. Replace xtmp/ytmp with x/y when spu_cmpgt(x,y) doesn't change x/y!*/
volatile vec_double2 xtmp = x;
volatile vec_double2 ytmp = y;
/*
* The idea here is to treat x as a signed long long value, which allows us to
* add or subtact one to/from it to get the next representable value.
*/
xeq0 = spu_cmpeq(xtmp, zerod);
xlt0 = spu_cmpgt(zerod, xtmp);
xeqy = spu_cmpeq(xtmp, ytmp);
xgty = spu_cmpgt(xtmp, ytmp);
/* If x = -0.0, set x = 0.0 */
x = spu_andc(x, (vec_double2)xeq0);
xllong = (vec_llong2)x;
/* Determine value to add to x */
delta = (vec_llong2)spu_xor(xgty, xlt0);
deltap1 = delta + one;
delta = spu_sel(deltap1, delta, (vec_ullong2)delta);
xllong = xllong + delta;
/* Fix the case of x = 0, and answer should be -1 ulp */
result = spu_sel((vec_double2)xllong, n1ulp, spu_and((vec_ullong2)delta, xeq0));
/*
* Special Cases
*/
/* x = y */
result = spu_sel(result, y, xeqy);
return result;
}
#endif /* _NEXTAFTERD2_H_ */
#endif /* __SPU__ */

View File

@ -0,0 +1,6 @@
#include "headers/nextafterf4.h"
static __inline float _nextafterf(float x, float y)
{
return spu_extract(_nextafterf4(spu_promote(x, 0), spu_promote(y, 0)), 0);
}

View File

@ -0,0 +1,120 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _NEXTAFTERF4_H_
#define _NEXTAFTERF4_H_ 1
#include <spu_intrinsics.h>
/*
* FUNCTION
* vector float _nextafterf4(vector float x, vector float y)
*
* DESCRIPTION
* The nextafterf4 function returns a vector containing the next representable
* floating-point number after the element of x, in the direction of the
* corresponding element y.
*
* Special Cases:
* - Infinity and NaN are not supported in single-precision on SPU. They are treated
* as normal numbers.
* - x != y, and result = 0 is considered an underflow.
*
*
*/
static __inline vector float _nextafterf4(vector float x, vector float y)
{
vec_float4 n1ulp = (vec_float4)spu_splats(0x80000001);
vec_float4 zerof = spu_splats(0.0f);
vec_int4 one = spu_splats(1);
vec_uint4 xlt0, xgty, xeqy, xeq0;
vec_int4 xint;
vec_int4 delta, deltap1;
vec_float4 result;
/*
* The idea here is to treat x as a signed int value, which allows us to
* add or subtact one to/from it to get the next representable value.
*/
xeq0 = spu_cmpeq(x, zerof);
xlt0 = spu_cmpgt(zerof, x);
xeqy = spu_cmpeq(x, y);
xgty = spu_cmpgt(x, y);
/* If x = -0.0, set x = 0.0 */
x = spu_andc(x, (vec_float4)xeq0);
xint = (vec_int4)x;
/* Determine value to add to x */
delta = (vec_int4)spu_xor(xgty, xlt0);
deltap1 = delta + one;
delta = spu_sel(deltap1, delta, (vec_uint4)delta);
xint = xint + delta;
/* Fix the case of x = 0, and answer should be -1 ulp */
result = spu_sel((vec_float4)xint, n1ulp, spu_and((vec_uint4)delta, xeq0));
/*
* Special Cases
*/
/* x = y */
result = spu_sel(result, y, xeqy);
return result;
}
#endif /* _NEXTAFTERF4_H_ */
#endif /* __SPU__ */

View File

@ -0,0 +1,6 @@
#include "headers/powd2.h"
static __inline double _pow(double x, double y)
{
return spu_extract(_powd2(spu_promote(x, 0), spu_promote(y, 0)), 0);
}

View File

@ -0,0 +1,133 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation, */
/* Sony Computer Entertainment, Incorporated, */
/* Toshiba Corporation, */
/* */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _POWD2_H_
#define _POWD2_H_ 1
#include "exp2d2.h"
#include "log2d2.h"
/*
* FUNCTION
* vector double _powd2(vector double x, vector double y)
*
* DESCRIPTION
* The _powd2 function computes x raised to the power y for the set of
* vectors. The powd2 function is computed as by decomposing
* the problem into:
*
* x^y = 2^(y*log2(x))
*
*
*/
static __inline vector double _powd2(vector double x, vector double y)
{
vec_uchar16 splat_hi = (vec_uchar16) { 0,1,2,3,0,1,2,3, 8,9,10,11, 8,9,10,11 };
vec_int4 exp, shift;
vec_uint4 sign = (vec_uint4) { 0x80000000, 0, 0x80000000, 0 };
vec_uint4 or_mask, and_mask, evenmask, intmask;
vec_double2 in_hi;
vector double signmask = spu_splats(-0.0);
vector signed int error = spu_splats(-1);
vector double zero = spu_splats(0.0);
vector unsigned int y_is_int, y_is_odd, y_is_even;
vector unsigned int x_is_neg;
vector double xabs, xsign;
vector double out;
xsign = spu_and(x, signmask);
xabs = spu_andc(x, signmask);
x_is_neg = (vec_uint4)spu_cmpgt(zero, x);
/* First we solve assuming x was non-negative */
out = _exp2d2(spu_mul(y, _log2d2(xabs)));
in_hi = spu_shuffle(y, y, splat_hi);
exp = spu_and(spu_rlmask((vec_int4)in_hi, -20), 0x7FF);
/* Determine if y is an integer */
shift = spu_sub(((vec_int4) { 1023, 1043, 1023, 1043 }), exp);
or_mask = spu_andc(spu_cmpgt(shift, 0), sign);
and_mask = spu_rlmask(((vec_uint4) { 0xFFFFF, -1, 0xFFFFF, -1 }), shift);
intmask = spu_or(spu_and(and_mask, spu_cmpgt(shift, -32)), or_mask);
y_is_int = (vec_uint4)spu_cmpeq(y, spu_andc(y, (vec_double2)(intmask)));
/* Determine if y is an even integer */
shift = spu_sub(((vec_int4) { 1024, 1044, 1024, 1044 }), exp);
or_mask = spu_andc(spu_cmpgt(shift, 0), sign);
and_mask = spu_rlmask(((vec_uint4) { 0xFFFFF, -1, 0xFFFFF, -1 }), shift);
evenmask = spu_or(spu_and(and_mask, spu_cmpgt(shift, -32)), or_mask);
y_is_even = (vec_uint4)spu_cmpeq(y, spu_andc(y, (vec_double2)(evenmask)));
y_is_odd = spu_andc(y_is_int, y_is_even);
/* Special Cases
*/
/* x < 0 is only ok when y integer */
out = spu_sel(out, (vec_double2)error, (vec_ullong2)spu_andc(x_is_neg, y_is_int));
/* Preserve the sign of x if y is an odd integer */
out = spu_sel(out, spu_or(out, xsign), (vec_ullong2)y_is_odd);
/* x = anything, y = +/- 0, returns 1 */
out = spu_sel(out, spu_splats(1.0), spu_cmpabseq(y, zero));
return(out);
}
#endif /* _POWD2_H_ */
#endif /* __SPU__ */

View File

@ -0,0 +1,6 @@
#include "headers/powf4.h"
static __inline double _powf(float x, float y)
{
return spu_extract(_powf4(spu_promote(x, 0), spu_promote(y, 0)), 0);
}

View File

@ -0,0 +1,132 @@
/* -------------------------------------------------------------- */
/* (C)Copyright 2006,2007, */
/* International Business Machines Corporation, */
/* Sony Computer Entertainment, Incorporated, */
/* Toshiba Corporation, */
/* */
/* All Rights Reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the */
/* following conditions are met: */
/* */
/* - Redistributions of source code must retain the above copyright*/
/* notice, this list of conditions and the following disclaimer. */
/* */
/* - Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* - Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* Redistributions of source code must retain the above copyright */
/* notice, this list of conditions and the following disclaimer. */
/* */
/* Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* Neither the name of IBM Corporation nor the names of its */
/* contributors may be used to endorse or promote products */
/* derived from this software without specific prior written */
/* permission. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
/* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR */
/* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, */
/* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT */
/* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; */
/* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
/* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */
/* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR */
/* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, */
/* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
/* -------------------------------------------------------------- */
/* PROLOG END TAG zYx */
#ifdef __SPU__
#ifndef _POWF4_H_
#define _POWF4_H_ 1
#include <spu_intrinsics.h>
#include <vec_types.h>
#include "exp2f4.h"
#include "log2f4.h"
/*
* FUNCTION
* vector float _powf4(vector float x, vector float y)
*
* DESCRIPTION
* The _powf4 function computes x raised to the power y for the set of
* vectors. The powf4 function is computed as by decomposing
* the problem into:
*
* x^y = 2^(y*log2(x))
*
*/
static __inline vector float _powf4(vector float x, vector float y)
{
vec_uint4 y_exp;
vec_uint4 y_mantissa;
vec_uint4 mant_shift;
vec_uint4 y_is_int;
vec_uint4 y_is_odd;
vec_uint4 x_sign_bit;
vec_uint4 zero = (vec_uint4)spu_splats(0);
vec_uint4 bit0 = (vec_uint4)spu_splats(0x80000000);
vec_int4 error = spu_splats(-1);
vec_float4 out;
y_exp = spu_and(spu_rlmask((vec_uint4)y, -23), 0x000000FF);
/* Need the implied bit in the mantissa to catch
* y = 1 case later
*/
y_mantissa = spu_or(spu_sl((vec_uint4)y, (unsigned int)8), bit0);
x_sign_bit = spu_and((vec_uint4)x, bit0);
/* We are going to shift the mantissa over enough to
* determine if we have an integer.
*/
mant_shift = spu_add(y_exp, -127);
/* Leave the lowest-order integer bit of mantissa on the
* high end so we can see if the integer is odd.
*/
y_mantissa = spu_sl(y_mantissa, mant_shift);
y_is_int = spu_cmpeq(spu_andc(y_mantissa, bit0), 0);
y_is_int = spu_and(y_is_int, spu_cmpgt(y_exp, 126));
y_is_odd = spu_and(spu_cmpeq(y_mantissa, bit0), y_is_int);
out = _exp2f4(spu_mul(y, _log2f4(spu_andc(x, (vec_float4)bit0))));
/* x < 0 is only ok when y integer
*/
out = spu_sel(out, (vec_float4)error,
spu_andc(spu_cmpeq(x_sign_bit, bit0), y_is_int));
/* Preserve the sign of x if y is an odd integer
*/
out = spu_sel(out, spu_or(out, (vec_float4)x_sign_bit), y_is_odd);
/* x = anything, y = +/- 0, returns 1
*/
out = spu_sel(out, spu_splats(1.0f), spu_cmpabseq(y, (vec_float4)zero));
return(out);
}
#endif /* _POWF4_H_ */
#endif /* __SPU__ */

Some files were not shown because too many files have changed in this diff Show More