diff options
Diffstat (limited to 'src/include/i386')
-rw-r--r-- | src/include/i386/__math.h | 586 | ||||
-rw-r--r-- | src/include/i386/ctrl87.h | 120 | ||||
-rw-r--r-- | src/include/i386/sstring.h | 414 |
3 files changed, 1120 insertions, 0 deletions
diff --git a/src/include/i386/__math.h b/src/include/i386/__math.h new file mode 100644 index 0000000..0274d52 --- /dev/null +++ b/src/include/i386/__math.h @@ -0,0 +1,586 @@ +/* Inline math functions for i387. + Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by John C. Bowman <bowman@math.ualberta.ca>, 1995. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. + + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + + +#ifndef __GLIBC__ +#ifdef __cplusplus +# define __MATH_INLINE __inline +#else +# define __MATH_INLINE extern __inline +#endif + +#ifndef ____MATH_H +#define ____MATH_H + + +#if defined __USE_ISOC9X && defined __GNUC__ && __GNUC__ >= 2 +/* ISO C 9X defines some macros to perform unordered comparisons. The + ix87 FPU supports this with special opcodes and we should use them. + These must not be inline functions since we have to be able to handle + all floating-point types. */ +# ifdef __i686__ +/* For the PentiumPro and more recent processors we can provide + better code. */ +# define isgreater(x, y) \ + ({ register char __result; \ + __asm__ ("fucomip %%st(1), %%st; seta %%al" \ + : "=a" (__result) : "u" (y), "t" (x) : "cc", "st"); \ + __result; }) +# define isgreaterequal(x, y) \ + ({ register char __result; \ + __asm__ ("fucomip %%st(1), %%st; setae %%al" \ + : "=a" (__result) : "u" (y), "t" (x) : "cc", "st"); \ + __result; }) + +# define isless(x, y) \ + ({ register char __result; \ + __asm__ ("fucomip %%st(1), %%st; seta %%al" \ + : "=a" (__result) : "u" (x), "t" (y) : "cc", "st"); \ + __result; }) + +# define islessequal(x, y) \ + ({ register char __result; \ + __asm__ ("fucomip %%st(1), %%st; setae %%al" \ + : "=a" (__result) : "u" (x), "t" (y) : "cc", "st"); \ + __result; }) + +# define islessgreater(x, y) \ + ({ register char __result; \ + __asm__ ("fucomip %%st(1), %%st; setne %%al" \ + : "=a" (__result) : "u" (y), "t" (x) : "cc", "st"); \ + __result; }) + +# define isunordered(x, y) \ + ({ register char __result; \ + __asm__ ("fucomip %%st(1), %%st; setp %%al" \ + : "=a" (__result) : "u" (y), "t" (x) : "cc", "st"); \ + __result; }) +# else +/* This is the dumb, portable code for i386 and above. */ +# define isgreater(x, y) \ + ({ register char __result; \ + __asm__ ("fucompp; fnstsw; testb $0x45, %%ah; setz %%al" \ + : "=a" (__result) : "u" (y), "t" (x) : "cc", "st", "st(1)"); \ + __result; }) + +# define isgreaterequal(x, y) \ + ({ register char __result; \ + __asm__ ("fucompp; fnstsw; testb $0x05, %%ah; setz %%al" \ + : "=a" (__result) : "u" (y), "t" (x) : "cc", "st", "st(1)"); \ + __result; }) + +# define isless(x, y) \ + ({ register char __result; \ + __asm__ ("fucompp; fnstsw; testb $0x45, %%ah; setz %%al" \ + : "=a" (__result) : "u" (x), "t" (y) : "cc", "st", "st(1)"); \ + __result; }) + +# define islessequal(x, y) \ + ({ register char __result; \ + __asm__ ("fucompp; fnstsw; testb $0x05, %%ah; setz %%al" \ + : "=a" (__result) : "u" (x), "t" (y) : "cc", "st", "st(1)"); \ + __result; }) + +# define islessgreater(x, y) \ + ({ register char __result; \ + __asm__ ("fucompp; fnstsw; testb $0x44, %%ah; setz %%al" \ + : "=a" (__result) : "u" (y), "t" (x) : "cc", "st", "st(1)"); \ + __result; }) + +# define isunordered(x, y) \ + ({ register char __result; \ + __asm__ ("fucompp; fnstsw; sahf; setp %%al" \ + : "=a" (__result) : "u" (y), "t" (x) : "cc", "st", "st(1)"); \ + __result; }) +# endif /* __i686__ */ + +/* Test for negative number. Used in the signbit() macro. */ +__MATH_INLINE int __signbitf(float __x) +{ + union { + float __f; + int __i; + } __u = { + __f:__x}; + return __u.__i < 0; +} + +__MATH_INLINE int __signbit(double __x) +{ + union { + double __d; + int __i[2]; + } __u = { + __d:__x}; + return __u.__i[1] < 0; +} + +__MATH_INLINE int __signbitl(long double __x) +{ + union { + long double __l; + int __i[3]; + } __u = { + __l:__x}; + return (__u.__i[2] & 0x8000) != 0; +} +#endif + + +/* The gcc, version 2.7 or below, has problems with all this inlining + code. So disable it for this version of the compiler. */ +#if defined __GNUC__ && (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ > 7)) + +#if ((!defined __NO_MATH_INLINES || defined __LIBC_INTERNAL_MATH_INLINES) \ + && defined __OPTIMIZE__) + +/* A macro to define float, double, and long double versions of various + math functions for the ix87 FPU. FUNC is the function name (which will + be suffixed with f and l for the float and long double version, + respectively). OP is the name of the FPU operation. */ + +#if defined __USE_MISC || defined __USE_ISOC9X +# define __inline_mathop(func, op) \ + __inline_mathop_ (double, func, op) \ + __inline_mathop_ (float, __CONCAT(func,f), op) \ + __inline_mathop_ (long double, __CONCAT(func,l), op) +#else +# define __inline_mathop(func, op) \ + __inline_mathop_ (double, func, op) +#endif + +#define __inline_mathop_(float_type, func, op) \ + __inline_mathop_decl_ (float_type, func, op, "0" (__x)) + + +#if defined __USE_MISC || defined __USE_ISOC9X +# define __inline_mathop_decl(func, op, params...) \ + __inline_mathop_decl_ (double, func, op, params) \ + __inline_mathop_decl_ (float, __CONCAT(func,f), op, params) \ + __inline_mathop_decl_ (long double, __CONCAT(func,l), op, params) +#else +# define __inline_mathop_decl(func, op, params...) \ + __inline_mathop_decl_ (double, func, op, params) +#endif + +#define __inline_mathop_decl_(float_type, func, op, params...) \ + __MATH_INLINE float_type func (float_type); \ + __MATH_INLINE float_type func (float_type __x) \ + { \ + register float_type __result; \ + __asm __volatile__ (op : "=t" (__result) : params); \ + return __result; \ + } + + +#if defined __USE_MISC || defined __USE_ISOC9X +# define __inline_mathcode(func, arg, code) \ + __inline_mathcode_ (double, func, arg, code) \ + __inline_mathcode_ (float, __CONCAT(func,f), arg, code) \ + __inline_mathcode_ (long double, __CONCAT(func,l), arg, code) +# define __inline_mathcode2(func, arg1, arg2, code) \ + __inline_mathcode2_ (double, func, arg1, arg2, code) \ + __inline_mathcode2_ (float, __CONCAT(func,f), arg1, arg2, code) \ + __inline_mathcode2_ (long double, __CONCAT(func,l), arg1, arg2, code) +# define __inline_mathcode3(func, arg1, arg2, arg3, code) \ + __inline_mathcode3_ (double, func, arg1, arg2, arg3, code) \ + __inline_mathcode3_ (float, __CONCAT(func,f), arg1, arg2, arg3, code) \ + __inline_mathcode3_ (long double, __CONCAT(func,l), arg1, arg2, arg3, code) +#else +# define __inline_mathcode(func, arg, code) \ + __inline_mathcode_ (double, func, (arg), code) +# define __inline_mathcode2(func, arg1, arg2, code) \ + __inline_mathcode2_ (double, func, arg1, arg2, code) +# define __inline_mathcode3(func, arg1, arg2, arg3, code) \ + __inline_mathcode3_ (double, func, arg1, arg2, arg3, code) +#endif + +#define __inline_mathcode_(float_type, func, arg, code) \ + __MATH_INLINE float_type func (float_type); \ + __MATH_INLINE float_type func (float_type arg) \ + { \ + code; \ + } + +#define __inline_mathcode2_(float_type, func, arg1, arg2, code) \ + __MATH_INLINE float_type func (float_type, float_type); \ + __MATH_INLINE float_type func (float_type arg1, float_type arg2) \ + { \ + code; \ + } + +#define __inline_mathcode3_(float_type, func, arg1, arg2, arg3, code) \ + __MATH_INLINE float_type func (float_type, float_type, float_type); \ + __MATH_INLINE float_type func (float_type arg1, float_type arg2, \ + float_type arg3) \ + { \ + code; \ + } +#endif + + +#if !defined __NO_MATH_INLINES && defined __OPTIMIZE__ +/* Miscellaneous functions */ + +__inline_mathcode(__sgn, __x, + return __x == 0.0 ? 0.0 : (__x > 0.0 ? 1.0 : -1.0)) + __inline_mathcode(__pow2, __x, + register long double __value; + register long double __exponent; + __extension__ long long int __p = (long long int)__x; + if (__x == (long double) __p) + { +__asm __volatile__("fscale": "=t"(__value):"0"(1.0), "u"(__x)); + return + __value;} +__asm __volatile__("fld %%st(0)\n\t" "frndint # int(x)\n\t" "fxch\n\t" "fsub %%st(1) # fract(x)\n\t" "f2xm1 # 2^(fract(x)) - 1\n\t": "=t"(__value), "=u"(__exponent): "0"(__x)); __value += 1.0; __asm __volatile__("fscale": "=t"(__value):"0"(__value), "u"(__exponent)); + return + __value) +#define __sincos_code \ + register long double __cosr; \ + register long double __sinr; \ + __asm __volatile__ \ + ("fsincos\n\t" \ + "fnstsw %%ax\n\t" \ + "testl $0x400, %%eax\n\t" \ + "jz 1f\n\t" \ + "fldpi\n\t" \ + "fadd %%st(0)\n\t" \ + "fxch %%st(1)\n\t" \ + "2: fprem1\n\t" \ + "fnstsw %%ax\n\t" \ + "testl $0x400, %%eax\n\t" \ + "jnz 2b\n\t" \ + "fstp %%st(1)\n\t" \ + "fsincos\n\t" \ + "1:" \ + : "=t" (__cosr), "=u" (__sinr) : "0" (__x)); \ + *__sinx = __sinr; \ + *__cosx = __cosr + + __MATH_INLINE void __sincos(double __x, double *__sinx, + double *__cosx); +__MATH_INLINE void __sincos(double __x, double *__sinx, double *__cosx) +{ + __sincos_code; +} + +__MATH_INLINE void __sincosf(float __x, float *__sinx, float *__cosx); +__MATH_INLINE void __sincosf(float __x, float *__sinx, float *__cosx) +{ + __sincos_code; +} + +__MATH_INLINE void __sincosl(long double __x, long double *__sinx, + long double *__cosx); +__MATH_INLINE void +__sincosl(long double __x, long double *__sinx, long double *__cosx) +{ + __sincos_code; +} + + +/* Optimized inline implementation, sometimes with reduced precision + and/or argument range. */ + +#define __expm1_code \ + register long double __value; \ + register long double __exponent; \ + register long double __temp; \ + __asm __volatile__ \ + ("fldl2e # e^x - 1 = 2^(x * log2(e)) - 1\n\t" \ + "fmul %%st(1) # x * log2(e)\n\t" \ + "fst %%st(1)\n\t" \ + "frndint # int(x * log2(e))\n\t" \ + "fxch\n\t" \ + "fsub %%st(1) # fract(x * log2(e))\n\t" \ + "f2xm1 # 2^(fract(x * log2(e))) - 1\n\t" \ + "fscale # 2^(x * log2(e)) - 2^(int(x * log2(e)))\n\t" \ + : "=t" (__value), "=u" (__exponent) : "0" (__x)); \ + __asm __volatile__ \ + ("fscale # 2^int(x * log2(e))\n\t" \ + : "=t" (__temp) : "0" (1.0), "u" (__exponent)); \ + __temp -= 1.0; \ + return __temp + __value +__inline_mathcode_(long double, __expm1l, __x, __expm1_code) +#define __exp_code \ + register long double __value; \ + register long double __exponent; \ + __asm __volatile__ \ + ("fldl2e # e^x = 2^(x * log2(e))\n\t" \ + "fmul %%st(1) # x * log2(e)\n\t" \ + "fst %%st(1)\n\t" \ + "frndint # int(x * log2(e))\n\t" \ + "fxch\n\t" \ + "fsub %%st(1) # fract(x * log2(e))\n\t" \ + "f2xm1 # 2^(fract(x * log2(e))) - 1\n\t" \ + : "=t" (__value), "=u" (__exponent) : "0" (__x)); \ + __value += 1.0; \ + __asm __volatile__ \ + ("fscale" \ + : "=t" (__value) : "0" (__value), "u" (__exponent)); \ + return __value +__inline_mathcode(exp, __x, __exp_code) __inline_mathcode_(long double, __expl, __x, __exp_code) __inline_mathcode(tan, __x, register long double __value; register long double __value2 __attribute__ ((__unused__)); __asm __volatile__("fptan": "=t"(__value2), "=u"(__value):"0"(__x)); return __value) +#define __atan2_code \ + register long double __value; \ + __asm __volatile__ \ + ("fpatan\n\t" \ + : "=t" (__value) : "0" (__x), "u" (__y) : "st(1)"); \ + return __value + __inline_mathcode2(atan2, __y, __x, + __atan2_code) __inline_mathcode2_(long double, __atan2l, + __y, __x, + __atan2_code) +__inline_mathcode2(fmod, __x, __y, register long double __value; __asm __volatile__("1: fprem\n\t" "fnstsw %%ax\n\t" "sahf\n\t" "jp 1b": "=t"(__value): "0"(__x), "u"(__y):"ax", "cc"); return __value) + __inline_mathcode2(pow, __x, __y, + register long double __value; + register long double __exponent; + __extension__ long long int __p = + (long long int)__y; if (__x == 0.0 && __y > 0.0) + return 0.0; if (__y == (double) __p) + { + long double __r = 1.0; + if (__p == 0) return 1.0; if (__p < 0) { + __p = -__p; __x = 1.0 / __x;} + while (1) { + if (__p & 1) + __r *= __x; + __p >>= 1; if (__p == 0) return __r; __x *= __x;} + /* NOTREACHED */ + } + +__asm __volatile__("fyl2x": "=t"(__value): "0"(__x), "u"(1.0): "st(1)"); __asm __volatile__("fmul %%st(1) # y * log2(x)\n\t" "fst %%st(1)\n\t" "frndint # int(y * log2(x))\n\t" "fxch\n\t" "fsub %%st(1) # fract(y * log2(x))\n\t" "f2xm1 # 2^(fract(y * log2(x))) - 1\n\t": "=t"(__value), "=u"(__exponent): "0"(__y), "1"(__value)); __value += 1.0; __asm __volatile__("fscale": "=t"(__value):"0"(__value), "u"(__exponent)); + return + __value) + + + __inline_mathop(sqrt, "fsqrt") + __inline_mathop_(long double, __sqrtl, "fsqrt") +#if defined __GNUC__ && (__GNUC__ > 2 || __GNUC__ == 2 && __GNUC_MINOR__ >= 8) + __inline_mathcode_(double, fabs, __x, return __builtin_fabs(__x)) + __inline_mathcode_(float, fabsf, __x, return __builtin_fabsf(__x)) + __inline_mathcode_(long double, fabsl, __x, + return __builtin_fabsl(__x)) + __inline_mathcode_(long double, __fabsl, __x, + return __builtin_fabsl(__x)) +#else + __inline_mathop(fabs, "fabs") + __inline_mathop_(long double, __fabsl, "fabs") +#endif +/* The argument range of this inline version is reduced. */ + __inline_mathop(sin, "fsin") +/* The argument range of this inline version is reduced. */ + __inline_mathop(cos, "fcos") + __inline_mathop(atan, "fld1; fpatan") + __inline_mathop(log, "fldln2; fxch; fyl2x") + __inline_mathop(log10, "fldlg2; fxch; fyl2x") + __inline_mathcode(asin, __x, + return __atan2l(__x, + __sqrtl(1.0 - + __x * + __x))) + __inline_mathcode(acos, __x, + return __atan2l(__sqrtl(1.0 - __x * __x), + __x)) __inline_mathcode_(long + double, + __sgn1l, + __x, + return + __x >= + 0.0 ? + 1.0 : + -1.0) +/* The argument range of the inline version of sinhl is slightly reduced. */ + __inline_mathcode(sinh, __x, register long double __exm1 = + __expm1l(__fabsl(__x)); + return 0.5 * (__exm1 / (__exm1 + 1.0) + + __exm1) * + __sgn1l(__x)) __inline_mathcode(cosh, __x, + register long double + __ex = __expl(__x); + return 0.5 * (__ex + + 1.0 / + __ex)) +__inline_mathcode(tanh, __x, register long double __exm1 = __expm1l(-__fabsl(__x + __x)); return __exm1 / (__exm1 + 2.0) * __sgn1l(-__x)) __inline_mathcode(floor, __x, register long double __value; __volatile unsigned short int __cw; __volatile unsigned short int __cwtmp; __asm __volatile("fnstcw %0":"=m"(__cw)); __cwtmp = (__cw & 0xf3ff) | 0x0400; + /* rounding down */ +__asm __volatile("fldcw %0": : "m"(__cwtmp)); __asm __volatile("frndint": "=t"(__value): "0"(__x)); __asm __volatile("fldcw %0": : "m"(__cw)); return __value) +__inline_mathcode(ceil, __x, register long double __value; __volatile unsigned short int __cw; __volatile unsigned short int __cwtmp; __asm __volatile("fnstcw %0":"=m"(__cw)); __cwtmp = (__cw & 0xf3ff) | 0x0800; + /* rounding up */ +__asm __volatile("fldcw %0": : "m"(__cwtmp)); __asm __volatile("frndint": "=t"(__value): "0"(__x)); __asm __volatile("fldcw %0": :"m"(__cw)); + return + __value) +#define __ldexp_code \ + register long double __value; \ + __asm __volatile__ \ + ("fscale" \ + : "=t" (__value) : "0" (__x), "u" ((long double) __y)); \ + return __value +__MATH_INLINE double ldexp(double __x, int __y); +__MATH_INLINE double ldexp(double __x, int __y) +{ + __ldexp_code; +} + + +/* Optimized versions for some non-standardized functions. */ +#if defined __USE_ISOC9X || defined __USE_MISC + +__inline_mathcode(expm1, __x, __expm1_code) +/* We cannot rely on M_SQRT being defined. So we do it for ourself + here. */ +# define __M_SQRT2 1.41421356237309504880L /* sqrt(2) */ + __inline_mathcode(log1p, __x, + register long double __value; + if (__fabsl(__x) >= 1.0 - 0.5 * __M_SQRT2) + __value = logl(1.0 + __x); + else +__asm __volatile__("fldln2\n\t" "fxch\n\t" "fyl2xp1": "=t"(__value):"0"(__x)); + return + __value) + + +/* The argument range of the inline version of asinhl is slightly reduced. */ + __inline_mathcode(asinh, __x, + register long double __y = __fabsl(__x); + return (log1pl + (__y * __y / + (__sqrtl(__y * __y + 1.0) + 1.0) + + __y) * + __sgn1l(__x))) __inline_mathcode(acosh, __x, + return + logl(__x + + __sqrtl + (__x - + 1.0) + * + __sqrtl + (__x + + 1.0))) + __inline_mathcode(atanh, __x, register long double __y = __fabsl(__x); + return -0.5 * log1pl(-(__y + __y) / (1.0 + __y)) * + __sgn1l(__x)) +/* The argument range of the inline version of hypotl is slightly reduced. */ +__inline_mathcode2(hypot, __x, __y, return __sqrtl(__x * __x + __y * __y)) __inline_mathcode(logb, __x, register long double __value; register long double __junk; __asm __volatile__("fxtract\n\t": "=t"(__junk), "=u"(__value):"0"(__x)); return __value) +#endif +#ifdef __USE_ISOC9X + __inline_mathop(log2, "fld1; fxch; fyl2x") +__MATH_INLINE float ldexpf(float __x, int __y); +__MATH_INLINE float ldexpf(float __x, int __y) +{ + __ldexp_code; +} + +__MATH_INLINE long double ldexpl(long double __x, int __y); +__MATH_INLINE long double ldexpl(long double __x, int __y) +{ + __ldexp_code; +} + +__inline_mathcode3(fma, __x, __y, __z, return (__x * __y) + __z) + __inline_mathop(rint, "frndint") +#define __lrint_code \ + long int __lrintres; \ + __asm__ __volatile__ \ + ("fistpl %0" \ + : "=m" (__lrintres) : "t" (__x) : "st"); \ + return __lrintres +__MATH_INLINE long int lrintf(float __x) +{ + __lrint_code; +} + +__MATH_INLINE long int lrint(double __x) +{ + __lrint_code; +} + +__MATH_INLINE long int lrintl(long double __x) +{ + __lrint_code; +} + +#undef __lrint_code + +#define __llrint_code \ + long long int __llrintres; \ + __asm__ __volatile__ \ + ("fistpll %0" \ + : "=m" (__llrintres) : "t" (__x) : "st"); \ + return __llrintres +__MATH_INLINE long long int llrintf(float __x) +{ + __llrint_code; +} + +__MATH_INLINE long long int llrint(double __x) +{ + __llrint_code; +} + +__MATH_INLINE long long int llrintl(long double __x) +{ + __llrint_code; +} + +#undef __llrint_code + +#endif + + +#ifdef __USE_MISC + +__inline_mathcode2(drem, __x, __y, register double __value; register int __clobbered; __asm __volatile__("1: fprem1\n\t" "fstsw %%ax\n\t" "sahf\n\t" "jp 1b": "=t"(__value), "=&a"(__clobbered): "0"(__x), "u"(__y):"cc"); return __value) +/* This function is used in the `isfinite' macro. */ +#if 0 +__MATH_INLINE int __finite(double __x) __attribute__ ((__const__)); +__MATH_INLINE int __finite(double __x) +{ + return (__extension__(((((union { + double __d; + int __i[2]; + } + ) { + __d: __x} + ).__i[1] | 0x800fffff) + 1) >> 31)); +} +#endif + +/* Miscellaneous functions */ + +__inline_mathcode(__coshm1, __x, + register long double __exm1 = __expm1l(__fabsl(__x)); + return 0.5 * (__exm1 / (__exm1 + 1.0)) * __exm1) + __inline_mathcode(__acosh1p, __x, + return log1pl(__x + __sqrtl(__x) * __sqrtl(__x + 2.0))) +#endif /* __USE_MISC */ +/* Undefine some of the large macros which are not used anymore. */ +#undef __expm1_code +#undef __exp_code +#undef __atan2_code +#undef __sincos_code +#endif /* __NO_MATH_INLINES */ +/* This code is used internally in the GNU libc. */ +#ifdef __LIBC_INTERNAL_MATH_INLINES +__inline_mathop(__ieee754_sqrt, "fsqrt") __inline_mathcode2(__ieee754_atan2, __y, __x, register long double __value; __asm __volatile__("fpatan\n\t": "=t"(__value): "0"(__x), "u"(__y):"st(1)"); return __value; + ) +#endif +#endif /* __GNUC__ */ +#endif +#endif diff --git a/src/include/i386/ctrl87.h b/src/include/i386/ctrl87.h new file mode 100644 index 0000000..02b4490 --- /dev/null +++ b/src/include/i386/ctrl87.h @@ -0,0 +1,120 @@ +/* + * This file is part of ms-dos port for XaoS + * ctrl87.h + * + */ + + +#ifndef __CONTROL87_H__ + + +#define __CONTROL87_H__ + + +#ifdef __CONTROL87_C__ +#define EXTERN +#else +#define EXTERN extern +#endif + + +/* 8087/80287 Status Word format */ + +#define SW_INVALID 0x0001 /* Invalid operation */ +#define SW_DENORMAL 0x0002 /* Denormalized operand */ +#define SW_ZERODIVIDE 0x0004 /* Zero divide */ +#define SW_OVERFLOW 0x0008 /* Overflow */ +#define SW_UNDERFLOW 0x0010 /* Underflow */ +#define SW_INEXACT 0x0020 /* Precision (Inexact result) */ + +/* 8087/80287 Control Word format */ + +#define MCW_EM 0x003f /* interrupt Exception Masks */ +#define EM_INVALID 0x0001 /* invalid */ +#define EM_DENORMAL 0x0002 /* denormal */ +#define EM_ZERODIVIDE 0x0004 /* zero divide */ +#define EM_OVERFLOW 0x0008 /* overflow */ +#define EM_UNDERFLOW 0x0010 /* underflow */ +#define EM_INEXACT 0x0020 /* inexact (precision) */ + +#define MCW_IC 0x1000 /* Infinity Control */ +#define IC_AFFINE 0x1000 /* affine */ +#define IC_PROJECTIVE 0x0000 /* projective */ + +#define MCW_RC 0x0c00 /* Rounding Control */ +#define RC_CHOP 0x0c00 /* chop */ +#define RC_UP 0x0800 /* up */ +#define RC_DOWN 0x0400 /* down */ +#define RC_NEAR 0x0000 /* near */ + +#define MCW_PC 0x0300 /* Precision Control */ +#define PC_24 0x0000 /* 24 bits */ +#define PC_53 0x0200 /* 53 bits */ +#define PC_64 0x0300 /* 64 bits */ + +/**************************************************************************/ +/************************* Type declarations **************************/ +/**************************************************************************/ + +/**************************************************************************/ +/************************ Function declarations ***********************/ +/**************************************************************************/ + +/* + _control87 changes floating-point control word. + + Declaration: + ------------ + unsigned short _control87(unsigned short newcw, unsigned short mask); + + Remarks: + -------- + _control87 retrieves or changes the floating-point control word. + + The floating-point control word is an unsigned short that specifies the + following modes in the 80x87 FPU: + o allowed exceptions + o infinity mode + o rounding mode + o precision mode + + Changing these modes allows you to mask or unmask floating-point exceptions. + + _control87 matches the bits in mask to the bits in newcw. + + If any mask bit = 1, the corresponding bit in newcw contains the new value + for the same bit in the floating-point control word. + + If mask = 0000, _control87 returns the floating-point control word without + altering it. + + Examples: + --------- + Switching to projective infinity mode: + _control87(IC_PROJECTIVE, MCW_IC); + + Disabling all exceptions: + _control87(MCW_EM, MCW_EM); + + Return Value: + ------------- + The bits in the value returned reflect the new floating-point control word. +*/ +EXTERN unsigned short _control87(unsigned short, unsigned short); + + +/**************************************************************************/ +/************************** Global variables **************************/ +/**************************************************************************/ + + + +#ifdef __CONTROL87_C__ +#else +#endif + + +#undef EXTERN + + +#endif diff --git a/src/include/i386/sstring.h b/src/include/i386/sstring.h new file mode 100644 index 0000000..bd2dd2d --- /dev/null +++ b/src/include/i386/sstring.h @@ -0,0 +1,414 @@ +#if 0 +#ifndef _I386_STRING_I486_H_ +#define _I386_STRING_I486_H_ +#if defined(__OPTIMIZE__) && defined(__GNUC__) && defined(__i386__) +/* + * This string-include defines all string functions as inline + * functions. Use gcc. It also assumes ds=es=data space, this should be + * normal. Most of the string-functions are rather heavily hand-optimized, + * see especially strtok,strstr,str[c]spn. They should work, but are not + * very easy to understand. Everything is done entirely within the register + * set, making the functions fast and clean. + * + * Copyright (C) 1991, 1992 Linus Torvalds + * Revised and optimized for i486/pentium + * 1994/03/15 by Alberto Vignani/Davide Parodi @crf.it + * + * Split into 2 CPU specific files by Alan Cox to keep #ifdef noise down. + * + * Revised and optimized again by Jan Hubicka (1997/11/16) + * (please report bugs to hubicka@paru.cas.cz) + * + * memset and memcpy routines seems to be always faster at 486 and + * pentium but at pentium MMX they are sometimes bit slower (5-10%).. + * because of less strict register allocation they produces better code. + */ + + +#define __HAVE_ARCH_MEMCPY +#define memcpy(d,s,count) \ +(__builtin_constant_p(count) ? \ + __memcpy_c((d),(s),(count)) : \ + __memcpy_g((d),(s),(count))) + +/* + * These ought to get tweaked to do some cache priming. + */ + + +/* This implementation of the memcpy is designed for moveoldpoints from + * mkrealloctables. It is expected to work well for both small and large + * sizes. + * + * Small (1-10) and meduim (300) sizes seems to be important for XaoS. + * So implementation is not super fast for large sizes, but my experiemnts + * don't show large improvements in speed anyway. + * + * We use rep movsX operations (they works well on PPro and don't seems to be + * so bad on Pentium) and expect cld operation to be set. Hope that it will + * not make problems. + * + * My attempt was to use c code where possible to let GCC do the + */ +extern inline void *__memcpy_g(void *to, const register void *from, + register size_t n); +extern inline void *__memcpy_g(void *to, const register void *from, + register size_t n) +{ + register void *tmp = (void *) to; + if (n >= 7) { + register int c = (-(int) to) & 3; + n -= c; + __asm__ __volatile__( /*Align the destination */ + "rep\n\tmovsb":"=c"(c), "=D"(tmp), + "=S"(from):"c"(c), "D"((long) tmp), + "S"((long) from):"memory"); + c = n >> 2; + __asm__ __volatile__( /*Copy the main body */ + "rep\n\tmovsl":"=c"(c), "=D"(tmp), + "=S"(from):"c"(c), "D"((long) tmp), + "S"((long) from):"memory"); + n &= 3; + } + __asm__ __volatile__("rep\n\tmovsb":"=c"(n), "=D"(tmp), + "=S"(from):"c"(n), "D"((long) tmp), + "S"((long) from):"memory"); + return (to); +} + +/* + * This looks horribly ugly, but the compiler can optimize it totally, + * as the count is constant. + */ + +#define COMMON(x) \ +__asm__ __volatile__ ( \ + "\n.align 4\n" \ + "1:\tmovl (%2),%0\n\t" \ + "addl $4,%2\n\t" \ + "movl %0,(%1)\n\t" \ + "addl $4,%1\n\t" \ + "decl %3\n\t" \ + "jnz 1b\n" \ + x \ + :"=r" (dummy1), "=r" (tmp), "=r" (from), "=r" (dummy2) \ + :"1" (tmp), "2" (from), "3" (n/4) \ + :"memory"); \ +return (to); \ + +extern inline void *__memcpy_c(void *to, const void *from, size_t n); +extern inline void *__memcpy_c(void *to, const void *from, size_t n) +{ + if (n < 24) { + if (n >= 4) + ((unsigned long *) to)[0] = ((const unsigned long *) from)[0]; + if (n >= 8) + ((unsigned long *) to)[1] = ((const unsigned long *) from)[1]; + if (n >= 12) + ((unsigned long *) to)[2] = ((const unsigned long *) from)[2]; + if (n >= 16) + ((unsigned long *) to)[3] = ((const unsigned long *) from)[3]; + if (n >= 20) + ((unsigned long *) to)[4] = ((const unsigned long *) from)[4]; + switch ((unsigned int) (n % 4)) { + case 3: + ((unsigned short *) to)[n / 2 - 1] = + ((const unsigned short *) from)[n / 2 - 1]; + ((unsigned char *) to)[n - 1] = + ((const unsigned char *) from)[n - 1]; + return to; + case 2: + ((unsigned short *) to)[n / 2 - 1] = + ((const unsigned short *) from)[n / 2 - 1]; + return to; + case 1: + ((unsigned char *) to)[n - 1] = + ((const unsigned char *) from)[n - 1]; + case 0: + return to; + } + } + { + register void *tmp = (void *) to; + register int dummy1, dummy2; + switch ((unsigned int) (n % 4)) { + case 0: + COMMON(""); + case 1: + COMMON("movb (%2),%b0 ; movb %b0,(%1)"); + case 2: + COMMON("movw (%2),%w0 ; movw %w0,(%1)"); + case 3: + COMMON("movw (%2),%w0 ; movw %w0,(%1)\n\t" + "movb 2(%2),%b0 ; movb %b0,2(%1)"); + } + } + return to; +} + +#undef COMMON + + +#define __HAVE_ARCH_MEMMOVE +extern inline void *memmove(void *dest, const void *src, size_t n); +extern inline void *memmove(void *dest, const void *src, size_t n) +{ + register void *tmp = (void *) dest; + if (dest < src) + __asm__ __volatile__("cld\n\t" "rep\n\t" "movsb": /* no output */ + :"c"(n), "S"(src), "D"(tmp):"cx", "si", "di", + "memory"); + else + __asm__ __volatile__("std\n\t" "rep\n\t" "movsb\n\t" "cld": /* no output */ + : "c"(n), "S"(n - 1 + (const char *) src), "D"(n - 1 + (char *) tmp):"cx", "si", "di", "memory"); + return dest; +} + +#define memcmp __builtin_memcmp + +#define __HAVE_ARCH_MEMCHR +extern inline void *memchr(const void *cs, int c, size_t count); +extern inline void *memchr(const void *cs, int c, size_t count) +{ + register void *__res; + if (!count) + return NULL; + __asm__ __volatile__("cld\n\t" + "repne\n\t" + "scasb\n\t" + "je 1f\n\t" + "movl $1,%0\n" + "1:\tdecl %0":"=D"(__res):"a"(c), "D"(cs), + "c"(count):"cx"); + return __res; +} + + + +#define __HAVE_ARCH_MEMSET +#define memset(s,c,count) \ +(__builtin_constant_p(c) ? \ + (__builtin_constant_p(count) ? \ + __memset_cc((s),(c),(count)) : \ + __memset_cg((s),(c),(count))) : \ + (__builtin_constant_p(count) ? \ + __memset_gc((s),(c),(count)) : \ + __memset_gg((s),(c),(count)))) + + + + +extern inline void *__memset_cg(void *s, char c, size_t count); +extern inline void *__memset_cg(void *s, char c, size_t count) +{ + int tmp2; + register void *tmp = (void *) s; + __asm__ __volatile__("shrl $1,%%ecx\n\t" + "rep\n\t" + "stosw\n\t" + "jnc 1f\n\t" + "movb %%al,(%%edi)\n" + "1:":"=c"(tmp2), "=D"(tmp):"c"(count), "D"(tmp), + "a"(0x0101U * (unsigned char) c):"memory"); + return s; +} + +extern inline void *__memset_gg(void *s, char c, size_t count); +extern inline void *__memset_gg(void *s, char c, size_t count) +{ + register void *tmp = (void *) s; + int tmp2; + __asm__ __volatile__("movb %%al,%%ah\n\t" + "shrl $1,%%ecx\n\t" + "rep\n\t" + "stosw\n\t" + "jnc 1f\n\t" + "movb %%al,(%%edi)\n" + "1:":"=c"(tmp2), "=D"(tmp):"c"(count), "D"(tmp), + "a"(c):"memory"); + return s; +} + +/* + * This non-rep routines are not much faster (slower for small strings) + * but they allows better register allocation + */ +#define COMMON(x) \ +__asm__ __volatile__ ( \ + "\n.align 4\n" \ + "1:\tmovl %k2,(%k0)\n\t" \ + "addl $4,%k0\n\t" \ + "decl %k1\n\t" \ + "jnz 1b\n" \ + x \ + :"=r" (tmp), "=r" (dummy) \ + :"q" ((unsigned) pattern), "0" (tmp), "1" (count/4) \ + :"memory"); \ +return s; + +extern inline void *__memset_cc(void *s, unsigned long pattern, + size_t count); +extern inline void *__memset_cc(void *s, unsigned long pattern, + size_t count) +{ + pattern = ((unsigned char) pattern) * 0x01010101UL; + if (count < 24) { + /*Handle small values manualy since they are incredibly slow */ + + if (count >= 4) + *(unsigned long *) s = pattern; + if (count >= 8) + ((unsigned long *) s)[1] = pattern; + if (count >= 12) + ((unsigned long *) s)[2] = pattern; + if (count >= 16) + ((unsigned long *) s)[3] = pattern; + if (count >= 20) + ((unsigned long *) s)[4] = pattern; + switch ((unsigned int) (count % 4)) { + case 3: + ((unsigned short *) s)[count / 2 - 1] = pattern; + ((unsigned char *) s)[count - 1] = pattern; + return s; + case 2: + ((unsigned short *) s)[count / 2 - 1] = pattern; + return s; + case 1: + ((unsigned char *) s)[count - 1] = pattern; + case 0: + return s; + } + } else { + register void *tmp = (void *) s; + register int dummy; + switch ((unsigned int) (count % 4)) { + case 0: + COMMON(""); + case 1: + COMMON("movb %b2,(%0)"); + case 2: + COMMON("movw %w2,(%0)"); + case 3: + COMMON("movw %w2,(%0)\n\tmovb %b2,2(%0)"); + } + } + return s; +} + +extern inline void *__memset_gc(void *s, unsigned long pattern, + size_t count); +extern inline void *__memset_gc(void *s, unsigned long pattern, + size_t count) +{ + if (count < 4) { + if (count > 1) + __asm__("movb %b0,%h0\n\t": "=q"(pattern):"0"((unsigned) + pattern)); + switch ((unsigned int) (count)) { + case 3: + ((unsigned short *) s)[0] = pattern; + ((unsigned char *) s)[2] = pattern; + return s; + case 2: + *((unsigned short *) s) = pattern; + return s; + case 1: + *(unsigned char *) s = pattern; + case 0: + return s; + } + } + + __asm__("movb %b0,%h0\n\t" "pushw %w0\n\t" "shll $16,%k0\n\t" "popw %w0\n": "=q"(pattern):"0"((unsigned) + pattern)); + + if (count < 24) { + /*Handle small values manualy since they are incredibly slow */ + + *(unsigned long *) s = pattern; + if (count >= 8) + ((unsigned long *) s)[1] = pattern; + if (count >= 12) + ((unsigned long *) s)[2] = pattern; + if (count >= 16) + ((unsigned long *) s)[3] = pattern; + if (count >= 20) + ((unsigned long *) s)[4] = pattern; + switch ((unsigned int) (count % 4)) { + case 3: + ((unsigned short *) s)[count / 2 - 1] = pattern; + ((unsigned char *) s)[count - 1] = pattern; + return s; + case 2: + ((unsigned short *) s)[count / 2 - 1] = pattern; + return s; + case 1: + ((unsigned char *) s)[count - 1] = pattern; + case 0: + return s; + } + } else { + register void *tmp = (void *) s; + register int dummy; + switch ((unsigned int) (count % 4)) { + case 0: + COMMON(""); + case 1: + COMMON("movb %b2,(%0)"); + case 2: + COMMON("movw %w2,(%0)"); + case 3: + COMMON("movw %w2,(%0)\n\tmovb %b2,2(%0)"); + } + } + return s; +} + +#undef COMMON + + +/* + * find the first occurrence of byte 'c', or 1 past the area if none + */ +#define __HAVE_ARCH_MEMSCAN +extern inline void *memscan(void *addr, int c, size_t size); +extern inline void *memscan(void *addr, int c, size_t size) +{ + if (!size) + return addr; + __asm__ __volatile__("cld\n\t" + "repnz; scasb\n\t" + "jnz 1f\n\t" + "dec %%edi\n\t" + "1:":"=D"(addr), "=c"(size):"0"(addr), "1"(size), + "a"(c)); + return addr; +} + +#define memset_long(x,y,z) __memset_long(x,y,z) + +extern inline void *__memset_long(void *s, char c, size_t count); +extern inline void *__memset_long(void *s, char c, size_t count) +{ + register unsigned int fill = c; + register void *tmp = (void *) s; + if (count >= 7) { + register int c = (-(int) s) & 3; +/*__asm__ __volatile__ ("movb %b0,%h0":"=r"(fill):"r"(fill));*/ + fill |= fill << 8; + count -= c; + fill |= fill << 16; + __asm__ __volatile__("rep\n\tstosb":"=c"(c), "=D"(tmp):"c"(c), + "D"(tmp), "a"(fill):"memory"); + c = count >> 2; + __asm__ __volatile__("rep\n\tstosl":"=c"(c), "=D"(tmp):"c"(c), + "D"(tmp), "a"(fill):"memory"); + count &= 3; + } + __asm__ __volatile__("rep\n\tstosb":"=c"(count), "=D"(tmp):"c"(count), + "D"(tmp), "a"((char) fill):"memory"); + return s; +} +#endif +#endif +#endif |