Computer Systems Fundamentals



Demonstrate approximate representation of reals producing error. sometimes if we subtract two approximations which are very close together we can can get a large relative error
correct answer if x == 0.000000011 (1 - cos(x)) / (x * x) is very close to 0.5 code prints 0.917540 which is wrong by a factor of almost two
#include <stdio.h>
#include <math.h>

int main(void) {
    double x, y;
    x = 0.000000011;
    y = (1 - cos(x)) / (x * x);
    printf("correct answer = ~0.5 but y = %lf\n", y);
    return 0;
}


Print size and min and max values of floating point types
#include <stdio.h>
#include <float.h>

int main(void) {

    float f;
    printf("float       %2lu bytes  min=%-12g  max=%g\n", sizeof f, FLT_MIN, FLT_MAX);
    double d;
    printf("double      %2lu bytes  min=%-12g  max=%g\n", sizeof d, DBL_MIN, DBL_MAX);
    long double l;
    printf("long double %2lu bytes  min=%-12Lg  max=%Lg\n", sizeof l, LDBL_MIN, LDBL_MAX);

    return 0;
}


The value 0.1 can not be precisely represented as a double
As a result b != 0
#include <stdio.h>

int main(void) {
    double a, b;

    a = 0.1;
    b = 1 - (a + a + a + a + a + a + a + a + a + a);

    if (b != 0) {
        printf("1 != 0.1 + 0.1 + 0.1 + 0.1 + 0.1 + 0.1 + 0.1 + 0.1 + 0.1 + 0.1\n");
    }

    printf("b = %g\n", b); // prints 1.11022e-16

    return 0;
}
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>

int main(int argc, char *argv[]) {
    assert(argc == 2);

    double d = strtod(argv[1], NULL);

    if (d == d) {
        printf("This should be always executed\n");
    } else {
        // will be executed if d is a NaN
        printf("This should never executed\n");
    }
    printf("d=%g\n", d);
    return 0;
}



Print the underlying represntation of a float
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <math.h>
#include <float.h>
#include <assert.h>

void display_float(float value);
uint32_t get_float_bits(float f);
void print_float_bits(uint32_t bits);
void print_bits(uint32_t value, int high, int low);
void print_float_details(uint32_t bits);
uint32_t extract_bit_range(uint32_t value, int high, int low);

int main(int argc, char *argv[]) {
    for (int arg = 1; arg < argc; arg++) {
        printf("arg[%d] = \"%s\"\n", arg, argv[arg]);
        display_float(strtof(argv[arg], NULL));
    }
    return 0;
}


#define SIGN_BIT           31
#define EXPONENT_HIGH_BIT  30
#define EXPONENT_LOW_BIT   23
#define FRACTION_HIGH_BIT  22
#define FRACTION_LOW_BIT    0

#define EXPONENT_OFFSET   127
#define EXPONENT_INF_NAN  255

void display_float(float number) {
    uint32_t bits = get_float_bits(number);

    print_float_bits(bits);

    print_float_details(bits);
}

// For en explnation of calculation see:
// https://en.wikipedia.org/wiki/Single-precision_floating-point_format

void print_float_details(uint32_t bits) {
    uint32_t sign_bit = extract_bit_range(bits, SIGN_BIT, SIGN_BIT);
    uint32_t fraction_bits = extract_bit_range(bits, FRACTION_HIGH_BIT, FRACTION_LOW_BIT);
    uint32_t exponent_bits = extract_bit_range(bits, EXPONENT_HIGH_BIT, EXPONENT_LOW_BIT);

    int sign_char = sign_bit ? '-' : '+';
    int exponent = exponent_bits - EXPONENT_OFFSET;

    printf("sign = %c \n", sign_char);
    printf("raw exponent = %d\n", exponent_bits);

    int implicit_bit = 1;

    // handle special cases of +infinity, -infinity
    // and Not a Number (NaN)
    if (exponent_bits == EXPONENT_INF_NAN) {
        if (fraction_bits == 0) {
            printf("number = %cinf\n", sign_char);
        } else {
            // https://en.wikipedia.org/wiki/NaN
            printf("number = NaN\n");
        }
        return;
    }

    if (exponent_bits == 0) {
        // if the exponent_bits are zero its a special case
        // called a denormal number
        // https://en.wikipedia.org/wiki/Denormal_number
        implicit_bit = 0;
        exponent++;
    }

    printf("actual exponent = %d - %d = %d\n", exponent_bits, EXPONENT_OFFSET, exponent);


    printf("number = %c%d.", sign_char, implicit_bit);
    print_bits(bits, FRACTION_HIGH_BIT, FRACTION_LOW_BIT);
    printf("b * 2**%d\n", exponent);

    int fraction_size = FRACTION_HIGH_BIT - FRACTION_LOW_BIT + 1;
    double fraction_max = ((uint32_t)1) << fraction_size;
    double fraction = implicit_bit + fraction_bits / fraction_max;

    printf("       = %g * %g\n", fraction, exp2(exponent));
    printf("       = %g\n", fraction * exp2(exponent));
}

union overlay_float {
    float f;
    uint32_t u;
};

// return the raw bits of a float
uint32_t get_float_bits(float f) {
    union overlay_float overlay;
    overlay.f = f;
    return overlay.u;
}

// print out the bits of a float
void print_float_bits(uint32_t bits) {
    printf("sign | exponent |  fraction\n");
    printf("   ");
    print_bits(bits, SIGN_BIT, SIGN_BIT);
    printf(" | ");
    print_bits(bits, EXPONENT_HIGH_BIT, EXPONENT_LOW_BIT);
    printf(" | ");
    print_bits(bits, FRACTION_HIGH_BIT, FRACTION_LOW_BIT);
    printf("\n");
}

// print the binary representation of a value
void print_bits(uint32_t value, int high, int low) {
    for (int i = high; i >= low; i--) {
        int bit = extract_bit_range(value, i, i);
        printf("%d", bit);
    }
}

// extract a range of bits from a value
uint32_t extract_bit_range(uint32_t value, int high, int low) {
    uint32_t mask = (((uint32_t)1) << (high - low + 1)) - 1;
    return (value >> low) & mask;
}

9007199254740993 is smallest integer which can not be represented exactly as a double

The closed double to 9007199254740993 is 9007199254740992.0
As a result loop never terminates
9007199254740992 is 2 to the power of 53
It can not be represented by a int32_t,
It can be represented by int64_t
#include <stdio.h>

int main(void) {
    double d = 9007199254740992;
    while (d < 9007199254740999) {
        printf("%lf\n", d); // prints 9007199254740992.000000
        d = d + 1;
    }
    return 0;
}