This appendix describes the floating-point formats used for pixel storage in framebuffers and renderbuffers, and texel storage in textures. It has the following major sections:
• “Reduced-Precision Floating-Point Values”
• “16-Bit Floating-Point Values”
• “10- and 11-Bit Unsigned Floating-Point Values”
In addition to the normal 32-bit single-precision floating-point values you normally use when you declare a GLfloat in your application, OpenGL supports reduced-precision floating-point representations for storing data more compactly than its 32-bit representation. In many instances, your floating-point data may not require the entire dynamic range of a 32-bit float, and storing or processing data in a reduced-precision format may save memory and increase data transfer rates.
OpenGL supports three reduced-precision floating-point formats: 16-bit (signed) floating-point values, and 10- and 11-bit unsigned floating-point values. Table F.1 describes the bit layout of each representation and the associated pixel formats.
For signed 16-bit floating-point values, the minimum and maximum values that can be represented are (about) 6.103 × 10—5, and 65504.0, respectively.
The following routine, F32toF16(), will convert a single, full-precision 32-bit floating-point value to a 16-bit reduced-precision form (stored as an unsigned-short integer).
#define F16_EXPONENT_BITS 0x1F
#define F16_EXPONENT_SHIFT 10
#define F16_EXPONENT_BIAS 15
#define F16_MANTISSA_BITS 0x3ff
#define F16_MANTISSA_SHIFT (23 - F16_EXPONENT_SHIFT)
#define F16_MAX_EXPONENT
(F16_EXPONENT_BITS << F16_EXPONENT_SHIFT)
GLushort
F32toF16(GLfloat val)
{
GLuint f32 = (*(GLuint *) &val);
GLushort f16 = 0;
/* Decode IEEE 754 little-endian 32-bit floating-point value */
int sign = (f32 >> 16) & 0x8000;
/* Map exponent to the range [-127,128] */
int exponent = ((f32 >> 23) & 0xff) - 127;
int mantissa = f32 & 0x007fffff;
if (exponent == 128) { /* Infinity or NaN */
f16 = sign | F16_MAX_EXPONENT;
if (mantissa) f16 |= (mantissa & F16_MANTISSA_BITS);
}
else if (exponent > 15) { /* Overflow - flush to Infinity */
f16 = sign | F16_MAX_EXPONENT;
}
else if (exponent > -15) { /* Representable value */
exponent += F16_EXPONENT_BIAS;
mantissa >>= F16_MANTISSA_SHIFT;
f16 = sign | exponent << F16_EXPONENT_SHIFT | mantissa;
}
else {
f16 = sign;
}
return f16;
}
Likewise, F16toF32() converts from the reduced-precision floating-point form into a normal 32-bit floating-point value.
#define F32_INFINITY 0x7f800000
GLfloat
F16toF32(GLushort val)
{
union {
GLfloat f;
GLuint ui;
} f32;
int sign = (val & 0x8000) << 15;
int exponent = (val & 0x7c00) >> 10;
int mantissa = (val & 0x03ff);
f32.f = 0.0;
if (exponent == 0) {
if (mantissa != 0) {
const GLfloat scale = 1.0 / (1 << 24);
f32.f = scale * mantissa;
}
}
else if (exponent == 31) {
f32.ui = sign | F32_INFINITY | mantissa;
}
else {
GLfloat scale, decimal;
exponent -= 15;
if (exponent < 0) {
scale = 1.0 / (1 << -exponent);
}
else {
scale = 1 << exponent;
}
decimal = 1.0 + (float) mantissa / (1 << 10);
f32.f = scale * decimal;
}
if (sign) f32.f = -f32.f;
return f32.f;
}
For normalized color values in the range [0, 1], unsigned 10- and 11-bit floating-point formats may provide a more compact format with better dynamic range than either floating-point values or OpenGL’s unsigned integer pixel formats. The maximum representable values are 65204 and 64512, respectively.
These are the routines for converting floating-point values into 10-bit unsigned floating-point values, and vice versa:
#define UF11_EXPONENT_BIAS 15
#define UF11_EXPONENT_BITS 0x1F
#define UF11_EXPONENT_SHIFT 6
#define UF11_MANTISSA_BITS 0x3F
#define UF11_MANTISSA_SHIFT (23 - UF11_EXPONENT_SHIFT)
#define UF11_MAX_EXPONENT
(UF11_EXPONENT_BITS << UF11_EXPONENT_SHIFT)
GLushort
F32toUF11(GLfloat val)
{
GLuint f32 = (*(GLuint *) &val);
GLushort uf11 = 0;
/* Decode little-endian 32-bit floating-point value */
int sign = (f32 >> 16) & 0x8000;
/* Map exponent to the range [-127,128] */
int exponent = ((f32 >> 23) & 0xff) - 127;
int mantissa = f32 & 0x007fffff;
if (sign) return 0;
if (exponent == 128) { /* Infinity or NaN */
uf11 = UF11_MAX_EXPONENT;
if (mantissa) uf11 |= (mantissa & UF11_MANTISSA_BITS);
}
else if (exponent > 15) { /* Overflow - flush to Infinity */
uf11 = UF11_MAX_EXPONENT;
}
else if (exponent > -15) { /* Representable value */
exponent += UF11_EXPONENT_BIAS;
mantissa >>= UF11_MANTISSA_SHIFT;
uf11 = exponent << UF11_EXPONENT_SHIFT | mantissa;
}
return uf11;
}
#define F32_INFINITY 0x7f800000
GLfloat
UF11toF32(GLushort val)
{
union {
GLfloat f;
GLuint ui;
} f32;
int exponent = (val & 0x07c0) >> UF11_EXPONENT_SHIFT;
int mantissa = (val & 0x003f);
f32.f = 0.0;
if (exponent == 0) {
if (mantissa != 0) {
const GLfloat scale = 1.0 / (1 << 20);
f32.f = scale * mantissa;
}
}
else if (exponent == 31) {
f32.ui = F32_INFINITY | mantissa;
}
else {
GLfloat scale, decimal;
exponent -= 15;
if (exponent < 0) {
scale = 1.0 / (1 << -exponent);
}
else {
scale = 1 << exponent;
}
decimal = 1.0 + (float) mantissa / 64;
f32.f = scale * decimal;
}
return f32.f;
}
For completeness, we present similar routines for converting 10-bit unsigned floating-point values.
#define UF10_EXPONENT_BIAS 15
#define UF10_EXPONENT_BITS 0x1F
#define UF10_EXPONENT_SHIFT 5
#define UF10_MANTISSA_BITS 0x3F
#define UF10_MANTISSA_SHIFT (23 - UF10_EXPONENT_SHIFT)
#define UF10_MAX_EXPONENT
(UF10_EXPONENT_BITS << UF10_EXPONENT_SHIFT)
GLushort
F32toUF10(GLfloat val)
{
GLuint f32 = (*(GLuint *) &val);
GLushort uf10 = 0;
/* Decode little-endian 32-bit floating-point value */
int sign = (f32 >> 16) & 0x8000;
/* Map exponent to the range [-127,128] */
int exponent = ((f32 >> 23) & 0xff) - 127;
int mantissa = f32 & 0x007fffff;
if (sign) return 0;
if (exponent == 128) { /* Infinity or NaN */
uf10 = UF10_MAX_EXPONENT;
if (mantissa) uf10 |= (mantissa & UF10_MANTISSA_BITS);
}
else if (exponent > 15) { /* Overflow - flush to Infinity */
uf10 = UF10_MAX_EXPONENT;
}
else if (exponent > -15) { /* Representable value */
exponent += UF10_EXPONENT_BIAS;
mantissa >>= UF10_MANTISSA_SHIFT;
uf10 = exponent << UF10_EXPONENT_SHIFT | mantissa;
}
return uf10;
}
#define F32_INFINITY 0x7f800000
GLfloat
UF10toF32(GLushort val)
{
union {
GLfloat f;
GLuint ui;
} f32;
int exponent = (val & 0x07c0) >> UF10_EXPONENT_SHIFT;
int mantissa = (val & 0x003f);
f32.f = 0.0;
if (exponent == 0) {
if (mantissa != 0) {
const GLfloat scale = 1.0 / (1 << 20);
f32.f = scale * mantissa;
}
}
else if (exponent == 31) {
f32.ui = F32_INFINITY | mantissa;
}
else {
GLfloat scale, decimal;
exponent -= 15;
if (exponent < 0) {
scale = 1.0 / (1 << -exponent);
}
else {
scale = 1 << exponent;
}
decimal = 1.0 + (float) mantissa / 64;
f32.f = scale * decimal;
}
return f32.f;
}
3.19.27.178