/* * a small library of types, functions and macros that * are used throughout the bootstrap compiler. * allocation done purely statically. * * Copyright (c) 2025, Mel G. * * SPDX-License-Identifier: MPL-2.0 */ #pragma once #include #include #include #include #include #include #include #include #define uint8 uint8_t #define uint16 uint16_t #define uint32 uint32_t #define uint64 uint64_t #define int8 int8_t #define int16 int16_t #define int32 int32_t #define int64 int64_t #define float32 float #define float64 double #define real float64 #define uint uint64 #define integer int64 #define flags int32 #define ascii char #define byte char #define bool _Bool #define true 1 #define false 0 #define nil NULL #define unknown void #define NORETURN _Noreturn // ansi escape codes for terminal color and style #define ANSI(code) "\33[" code "m" #define ANSI_RESET ANSI("0") #define ANSI_DEFAULT ANSI("39") #define ANSI_RED ANSI("31") #define ANSI_RED_BG ANSI("41") #define ANSI_GREEN ANSI("32") #define ANSI_GREEN_BG ANSI("42") #define ANSI_YELLOW ANSI("33") #define ANSI_YELLOW_BG ANSI("43") #define ANSI_BLUE ANSI("34") #define ANSI_BLUE_BG ANSI("44") #define ANSI_MAGENTA ANSI("35") #define ANSI_MAGENTA_BG ANSI("45") #define ANSI_CYAN ANSI("36") #define ANSI_CYAN_BG ANSI("46") #define ANSI_WHITE ANSI("37") #define ANSI_WHITE_BG ANSI("47") #define ANSI_BLACK ANSI("30") #define ANSI_BLACK_BG ANSI("40") #define ANSI_BOLD ANSI("1") #define ANSI_NO_BOLD ANSI("22") #define ANSI_UNDERLINE ANSI("4") #define ANSI_NO_UNDERLINE ANSI("24") #define FAILURE_MESSAGE ANSI_BOLD ";( sorry, a failure has occurred..." ANSI_NO_BOLD // call on irrecoverable failure. // prints a very sad, apologetic message for // the user and aborts program with failure status. NORETURN void failure(const ascii* message, ...) { fflush(stdout); // flush stdout to ensure any message is printed before the error. fprintf(stderr, ANSI_RED FAILURE_MESSAGE "\n-> "); va_list args; va_start(args, message); vfprintf(stderr, message, args); va_end(args); fprintf(stderr, "!\n" ANSI_RESET); exit(EXIT_FAILURE); } // internal check function, use the check() macro instead. void _check(bool condition, const ascii* condition_str, const ascii* message, ...) { if (!condition) { va_list args; va_start(args, message); fflush(stdout); fprintf(stderr, ANSI_RED FAILURE_MESSAGE "\n"); fprintf(stderr, "-> check failed: %s\n", condition_str); fprintf(stderr, "-> "); vfprintf(stderr, message, args); fprintf(stderr, "!\n" ANSI_RESET); va_end(args); exit(EXIT_FAILURE); } } // check a condition, triggering a failure if it's false. // prints both the condition and the formatted message. #define check(condition, message, ...) _check((condition), #condition, (message), ##__VA_ARGS__) NORETURN void unreachable() { failure("unreachable code reached"); } // for each entry in a linked list. #define FOR_EACH(type, cursor, head) for (type cursor = head; cursor != nil; cursor = cursor->next) #define ARRAY_SIZE(array) (sizeof(array) / sizeof((array)[0])) // the common size of region memory blocks. #define REGION_SIZE 65536 // statically allocates a region of memory of a given size // for a single type. #define REGION_OF_SIZE(type, of, size) \ type region_##of[size]; \ uint region_##of##_cursor = 0; // statically allocates a region of memory for a type. #define REGION(type, of) REGION_OF_SIZE(type, of, REGION_SIZE) // the global array data region. REGION(byte, array_data) // a simple fixed-size array type that uses static allocation. struct _Array { uint element_size; void* data; uint length; uint capacity; }; #define Array(type) struct _Array // allocate memory from the static array region. void* array_allocate_static(size_t size) { check(region_array_data_cursor + size <= ARRAY_SIZE(region_array_data), "out of array memory"); void* ptr = region_array_data + region_array_data_cursor; region_array_data_cursor += size; return ptr; } // creates a new, empty array with fixed capacity using static allocation. struct _Array _array_new(uint element_size, uint capacity) { void* data = array_allocate_static(capacity * element_size); struct _Array array = { .element_size = element_size, .data = data, .length = 0, .capacity = capacity, }; return array; } #define array_new(type, capacity) _array_new(sizeof(type), capacity) // returns the number of elements in the array. uint array_length(const struct _Array* array) { return array->length; } // returns the total capacity of the array. uint array_capacity(const struct _Array* array) { return array->capacity; } // checks if the array is empty. bool array_is_empty(const struct _Array* array) { return array->length == 0; } // internal use, prefer the array_push macro. void _array_push(struct _Array* array, const void* element) { check(array->length < array->capacity, "array is full: %u/%u", array->length, array->capacity); memcpy((ascii*)array->data + (array->length * array->element_size), element, array->element_size); ++array->length; } #define array_push(array, element) _array_push(array, (const void*)(element)) // internal use, prefer the array_at macro. void* _array_at(const struct _Array* array, uint index) { check(index < array->length, "array index out of bounds: %u (length: %u)", index, array->length); return (ascii*)array->data + (index * array->element_size); } #define array_at(type, array, index) ((type*)_array_at(array, index)) // internal use, prefer the array_get macro. void _array_get(const struct _Array* array, uint index, void* out_element) { check(index < array->length, "array index out of bounds: %u (length: %u)", index, array->length); memcpy(out_element, (ascii*)array->data + (index * array->element_size), array->element_size); } #define array_get(array, index, out_element) _array_get(array, index, (void*)(out_element)) // a slice is a view into a block of memory. struct _Slice { uint element_size; void* data; uint length; }; #define Slice(type) struct _Slice // creates a new slice from data. struct _Slice _slice_new(void* data, uint length, uint element_size) { struct _Slice slice = { .data = data, .length = length, .element_size = element_size }; return slice; } #define slice_new(data, length) _slice_new((void*)(data), (length), sizeof(*(data))) // returns the length of a slice. uint slice_length(const struct _Slice* slice) { return slice->length; } // internal use, prefer the slice_at macro. void* _slice_at(const struct _Slice* slice, uint index) { check(index < slice->length, "slice index out of bounds: %u (length: %u)", index, slice->length); return (ascii*)slice->data + (index * slice->element_size); } #define slice_at(type, slice, index) ((type*)_slice_at(slice, index)) // a macro for iterating over each element in an array. #define FOR_EACH_ARRAY(type, element_var, array_ptr, action) \ for (uint i = 0; i < array_length(array_ptr); ++i) { \ type element_var = *array_at(type, array_ptr, i); \ action; \ } // the global string region. REGION(ascii, string) // a string. struct String { ascii* data; uint length; }; // a view of a string. // used for passing around strings without copying them, // it is not assumed to have a permanent lifetime, // nor is it guaranteed to be null-terminated. struct String_View { ascii* data; uint length; }; // formats and prints a string with a given format to stdout. // to use the given string in the format, use the `%S` specifier. // the string is required to be the first argument in the format string. // accepts both `struct String` and `struct String_View`. #define STRING_FORMAT(s, format, ...) \ _internal_string_format(stdout, s.length, format, s.data, ##__VA_ARGS__) // formats and prints a string with a given format to `stream`. // to use the given string in the format, use the `%S` specifier. // the string is required to be the first argument in the format string. // accepts both `struct String` and `struct String_View`. #define STRING_FORMAT_TO(s, stream, format, ...) \ _internal_string_format(stream, s.length, format, s.data, ##__VA_ARGS__) // iterates over each character in a string. // accepts both `struct String` and `struct String_View`. #define STRING_ITERATE(index, c, str) \ ascii c = str.data[0]; \ for (uint index = 0; index < str.length; c = str.data[++index]) // allocates a new string in the global string region. struct String string_new(const ascii* data, uint length) { // for compatibility, we include an additional null byte at the end. uint allocation_length = length + 1; check(region_string_cursor + allocation_length < REGION_SIZE, "out of string memory"); ascii* at = region_string + region_string_cursor; region_string_cursor += allocation_length; for (uint i = 0; i < length; ++i) at[i] = data[i]; at[length] = '\0'; return (struct String){ .data = at, .length = length, }; } struct String string_empty(void) { return (struct String){ .data = nil, .length = 0, }; } bool string_is_empty(struct String s) { return s.data == nil || s.length == 0; } // allocates a new string in the global string region, // taking the data from a null-terminated C string. struct String string_from_c_string(const char* c_string) { uint length = strlen(c_string); return string_new(c_string, length); } // allocates a new string in the global string region, // taking the data from a static null-terminated C string. // // NOTE: The string is not copied, so it MUST have a lifetime // spanning the entire program. struct String string_from_static_c_string(const char* c_string) { uint length = strlen(c_string); return (struct String){ .data = (ascii*)c_string, .length = length, }; } // returns the character at a given index. // does bounds-checking. ascii string_at(struct String s, uint index) { check(index < s.length, "index out of bounds"); return s.data[index]; } uint string_length(struct String s) { return s.length; } bool string_equals(const struct String a, const struct String b) { if (string_length(a) != string_length(b)) return false; if (string_length(a) == 0) return true; return memcmp(a.data, b.data, a.length) == 0; } bool string_equals_c_str(const struct String a, const ascii* b) { uint b_length = strlen(b); if (string_length(a) != b_length) return false; if (string_length(a) == 0) return true; return memcmp(a.data, b, a.length) == 0; } // returns a null-terminated C string representation. // the string is already null-terminated in our implementation, // thus no copy is required. const ascii* string_c_str(struct String s) { return s.data; } // creates a new string with a character appended to // the end of the given string. struct String string_push(struct String s, ascii c) { check(region_string_cursor + s.length + 2 < REGION_SIZE, "out of string memory for push"); ascii* at = region_string + region_string_cursor; region_string_cursor += s.length + 2; for (uint i = 0; i < s.length; ++i) at[i] = s.data[i]; at[s.length] = c; at[s.length + 1] = '\0'; return (struct String){ .data = at, .length = s.length + 1, }; } // creates a new string with the last character of // the given string removed. // pass a non-nil pointer as `removed_char` to retrieve // the removed character. struct String string_pop(struct String s, ascii* removed_char) { check(s.length > 0, "cannot pop from an empty string"); if (removed_char) *removed_char = s.data[s.length - 1]; return (struct String){ .data = s.data, .length = s.length - 1, }; } // creates a new string consisting of string `a` followed by string `b`. struct String string_append(struct String a, struct String b) { if (string_is_empty(b)) return a; uint new_length = a.length + b.length; check(region_string_cursor + new_length + 1 < REGION_SIZE, "out of string memory for append"); ascii* at = region_string + region_string_cursor; region_string_cursor += new_length + 1; for (uint i = 0; i < a.length; ++i) at[i] = a.data[i]; for (uint i = 0; i < b.length; ++i) at[a.length + i] = b.data[i]; at[new_length] = '\0'; return (struct String){ .data = at, .length = new_length, }; } // creates a new string consisting of string `a` followed // by the contents of the c-style string buffer `b`. struct String string_append_c_str(struct String a, const ascii* b) { uint c_str_len = strlen(b); if (c_str_len == 0) return a; uint new_length = a.length + c_str_len; check(region_string_cursor + new_length + 1 < REGION_SIZE, "out of string memory for append_c_str"); ascii* at = region_string + region_string_cursor; region_string_cursor += new_length + 1; for (uint i = 0; i < a.length; ++i) at[i] = a.data[i]; for (uint i = 0; i < c_str_len; ++i) at[a.length + i] = b[i]; at[new_length] = '\0'; return (struct String){ .data = at, .length = new_length, }; } // creates a copy of a string. struct String string_clone(struct String s) { return string_new(s.data, s.length); } // creates a new string consisting of a slice of the original string. // the slice range is defined by `[start, end)`. // if `start == end`, returns an empty string. struct String string_slice(struct String s, uint start, uint end) { check(start <= end && end <= s.length, "invalid slice range [%u, %u) for string of length %u", start, end, s.length); if (start == end) return string_empty(); uint slice_len = end - start; return string_new(s.data + start, slice_len); } // creates a new string with the contents of string `s`, // with the character at index `index` modified to `c`. struct String string_set(struct String s, uint index, ascii c) { check(index < s.length, "index out of bounds: %u (length: %u)", index, s.length); struct String new_s = string_clone(s); new_s.data[index] = c; // safe because we just allocated this return new_s; } // prints the contents of string `s` to stdout. void string_print(struct String s) { printf("%.*s", (int32)s.length, s.data); } enum String_Concat_Arg { ARG_END, ARG_STRING, ARG_ASCII, }; #define MAX_STRING_CONCAT_LENGTH 2048 // concatenates multiple strings and ascii buffers into a single string. // each new argument is prepended by a `String_Concat_Arg` value, // either `ARG_STRING` or `ARG_ASCII`, followed by the argument itself. // the final argument must be `ARG_END`. struct String string_concatenate(enum String_Concat_Arg type1, ...) { va_list args; va_start(args, type1); uint total_length = 0; enum String_Concat_Arg type = type1; while (type != ARG_END) { switch (type) { case ARG_STRING: { struct String s = va_arg(args, struct String); if (!string_is_empty(s)) total_length += s.length; break; } case ARG_ASCII: { ascii* str = va_arg(args, ascii*); if (str) total_length += strlen(str); break; } default: break; } type = va_arg(args, enum String_Concat_Arg); } va_end(args); if (total_length == 0) return string_empty(); check(total_length < MAX_STRING_CONCAT_LENGTH - 1, "string concatenation too long"); ascii buffer[MAX_STRING_CONCAT_LENGTH]; ascii* cursor = buffer; va_start(args, type1); type = type1; while (type != ARG_END) { switch (type) { case ARG_STRING: { struct String s = va_arg(args, struct String); if (!string_is_empty(s)) { memcpy(cursor, s.data, s.length); cursor += s.length; } break; } case ARG_ASCII: { ascii* str = va_arg(args, ascii*); if (str) { uint length = strlen(str); memcpy(cursor, str, length); cursor += length; } break; } default: break; } type = va_arg(args, enum String_Concat_Arg); } va_end(args); return string_new(buffer, total_length); } // creates a new string view from a substring of the given string `s`. // the view range is defined by `[start, end)`. struct String_View string_substring(struct String s, uint start, uint end) { check(start <= end && end <= s.length, "substring out of bounds"); return (struct String_View){ .data = s.data + start, .length = end - start, }; } // creates a new string view from an ascii buffer. // the buffer is not copied, so it must have a lifetime // spanning the at least the lifetime of the string view. // null-termination is not required. struct String_View string_view_new(ascii* data, uint length) { return (struct String_View){ .data = data, .length = length, }; } // creates a new string view from a string. struct String_View string_view_from_string(struct String s) { return (struct String_View){ .data = s.data, .length = s.length, }; } // creates empty string view. struct String_View string_view_empty(void) { return (struct String_View){ .data = nil, .length = 0, }; } // checks if a string view is empty. bool string_view_is_empty(struct String_View view) { return view.data == nil || view.length == 0; } uint string_view_length(struct String_View view) { return view.length; } // returns the character at a given index in a string view. ascii string_view_at(struct String_View view, uint index) { check(index < view.length, "index out of bounds"); return view.data[index]; } // creates a new string view from a c-style string buffer. struct String_View string_view_from_c_str(const ascii* c_str) { return (struct String_View){ .data = (ascii*)c_str, .length = strlen(c_str), }; } // compares two string views for equality. bool string_view_equals(struct String_View a, struct String_View b) { if (a.length != b.length) return false; if (a.length == 0) return true; return memcmp(a.data, b.data, a.length) == 0; } // prints a string view to stdout. void string_view_print(struct String_View view) { printf("%.*s", (int32)view.length, view.data); } #define MAX_FORMAT_STRING_SIZE 256 // creates a real c format string from our faux-format string. // example: // "Hello %S! You are %d years old." // becomes: // "Hello %.6s! You are %d years old." // if we assume the string length is 6. void _internal_prepare_format_string(ascii* format_buffer, const ascii* format, uint static_length) { ascii specifier[10]; int specifier_length = snprintf(specifier, sizeof(specifier), "%%.%lus", static_length); check(specifier_length < sizeof(specifier), "string to format is too long"); for (uint fi = 0, fbi = 0; fi < MAX_FORMAT_STRING_SIZE && fbi < MAX_FORMAT_STRING_SIZE; ++fi, ++fbi) { ascii c = format[fi]; format_buffer[fbi] = c; if (c == '\0') break; // check if the next character is the format specifier 'S', that // we specified as our own custom format specifier. if (c == '%' && format[fi + 1] == 'S') { // copy the specifier into the format buffer. for (uint si = 0; si < specifier_length; ++si, ++fbi) format_buffer[fbi] = specifier[si]; fbi--, fi++; // increment `fi` to skip 'S', decrement `fbi` to not leave a gap. } } } // formats a string using the given faux-format string, printing it to `stream`. // the first VA arguments is expected to be the string data. // do not use directly! use `STRING_FORMAT_TO`. void _internal_string_format(FILE* stream, uint string_length, const ascii* format, ...) { ascii format_buffer[MAX_FORMAT_STRING_SIZE]; _internal_prepare_format_string(format_buffer, format, string_length); va_list args; va_start(args, format); vfprintf(stream, format_buffer, args); va_end(args); } // a source file given to the compiler. struct Source_File { struct String source; // path to the source file, relative to the current working directory. struct String path; }; // single iteration of the CRC32 checksum algorithm // described in POSIX. // see: https://pubs.opengroup.org/onlinepubs/9799919799/utilities/cksum.html // used by `crc32_posix`. uint32 crc32_posix_iteration(uint32 initial_hash, uint8 octet) { const uint32 iso_polynomial = 0x4C11DB7; octet ^= initial_hash >> 24; uint32 hash = 0; uint32 poly = iso_polynomial; for (uint8 bit = 0; bit < 8; bit++) { if (octet & (1 << bit)) hash ^= poly; uint32 poly_msb = poly & (1 << 31); poly <<= 1; if (poly_msb) poly ^= iso_polynomial; } return hash ^ (initial_hash << 8); } // terse implementation of the POSIX CRC32 checksum algorithm // meant for the `cksum` utility, which can be used through // the GNU coreutils `cksum command`: // `echo -ne "string to hash" | cksum` // see: https://pubs.opengroup.org/onlinepubs/9799919799/utilities/cksum.html uint32 crc32_posix(struct String str) { uint32 hash = 0; STRING_ITERATE(i, c, str) { hash = crc32_posix_iteration(hash, c); } uint32 length = string_length(str); while (length) { hash = ~crc32_posix_iteration(hash, length); length >>= 8; } return hash; }