/* * a small library of types, functions and macros that * are used throughout the bootstrap compiler. * allocation done purely statically. * * Copyright (c) 2025, Mel G. * * SPDX-License-Identifier: MPL-2.0 */ // TODO: merge our common library with `boot/runtime/core.c` // to avoid implementing a semi-standard library twice! #pragma once #include #include #include #include #include #include #include #include #define uint8 uint8_t #define uint16 uint16_t #define uint32 uint32_t #define uint64 uint64_t #define int8 int8_t #define int16 int16_t #define int32 int32_t #define int64 int64_t #define float32 float #define float64 double #define real float64 #define uint uint64 #define integer int64 #define flags int32 #define ascii char #define byte char #define bool _Bool #define true 1 #define false 0 #define nil NULL #define unknown void #define NORETURN _Noreturn // ansi escape codes for terminal color and style #define ANSI(code) "\33[" code "m" #define ANSI_RESET ANSI("0") #define ANSI_DEFAULT ANSI("39") #define ANSI_RED ANSI("31") #define ANSI_RED_BG ANSI("41") #define ANSI_GREEN ANSI("32") #define ANSI_GREEN_BG ANSI("42") #define ANSI_YELLOW ANSI("33") #define ANSI_YELLOW_BG ANSI("43") #define ANSI_BLUE ANSI("34") #define ANSI_BLUE_BG ANSI("44") #define ANSI_MAGENTA ANSI("35") #define ANSI_MAGENTA_BG ANSI("45") #define ANSI_CYAN ANSI("36") #define ANSI_CYAN_BG ANSI("46") #define ANSI_WHITE ANSI("37") #define ANSI_WHITE_BG ANSI("47") #define ANSI_BLACK ANSI("30") #define ANSI_BLACK_BG ANSI("40") #define ANSI_BOLD ANSI("1") #define ANSI_NO_BOLD ANSI("22") #define ANSI_UNDERLINE ANSI("4") #define ANSI_NO_UNDERLINE ANSI("24") // call on irrecoverable failure. // prints a very sad, apologetic message for // the user and aborts program with failure status. NORETURN void failure(const ascii* message) { fflush(stdout); // flush stdout to ensure any message is printed before the error. const ascii* format = ANSI_RED ANSI_BOLD ";( sorry, a failure has occurred..." ANSI_NO_BOLD "\n" "-> %s!" "\n" ANSI_RESET; fprintf(stderr, format, message); exit(EXIT_FAILURE); } // check a condition, triggering a failure if it's false. void check(bool condition, const ascii* message) { if (!condition) failure(message); } NORETURN void unreachable() { failure("unreachable code reached"); } // for each entry in a linked list. #define FOR_EACH(type, cursor, head) for (type cursor = head; cursor != nil; cursor = cursor->next) #define ARRAY_SIZE(array) (sizeof(array) / sizeof((array)[0])) // the common size of region memory blocks. #define REGION_SIZE 65536 // statically allocates a region of memory of a given size // for a single type. #define REGION_OF_SIZE(type, of, size) \ type region_##of[size]; \ uint region_##of##_cursor = 0; // statically allocates a region of memory for a type. #define REGION(type, of) REGION_OF_SIZE(type, of, REGION_SIZE) // the global string region. REGION(ascii, string) // a string. struct String { ascii* data; uint length; }; // a view of a string. // used for passing around strings without copying them, // it is not assumed to have a permanent lifetime, // nor is it guaranteed to be null-terminated. struct String_View { ascii* data; uint length; }; // formats and prints a string with a given format to stdout. // to use the given string in the format, use the `%S` specifier. // the string is required to be the first argument in the format string. // accepts both `struct String` and `struct String_View`. #define STRING_FORMAT(s, format, ...) \ _internal_string_format(stdout, s.length, format, s.data, ##__VA_ARGS__) // formats and prints a string with a given format to `stream`. // to use the given string in the format, use the `%S` specifier. // the string is required to be the first argument in the format string. // accepts both `struct String` and `struct String_View`. #define STRING_FORMAT_TO(s, stream, format, ...) \ _internal_string_format(stream, s.length, format, s.data, ##__VA_ARGS__) // iterates over each character in a string. // accepts both `struct String` and `struct String_View`. #define STRING_ITERATE(index, c, str) \ ascii c = str.data[0]; \ for (uint index = 0; index < str.length; c = str.data[++index]) // allocates a new string in the global string region. struct String string_new(const ascii* data, uint length) { // for compatibility, we include an additional null byte at the end. uint allocation_length = length + 1; check(region_string_cursor + allocation_length < REGION_SIZE, "out of string memory"); ascii* at = region_string + region_string_cursor; region_string_cursor += allocation_length; for (uint i = 0; i < length; ++i) at[i] = data[i]; at[length] = '\0'; return (struct String){ .data = at, .length = length, }; } struct String string_empty(void) { return (struct String){ .data = nil, .length = 0, }; } bool string_is_empty(struct String s) { return s.data == nil || s.length == 0; } // allocates a new string in the global string region, // taking the data from a null-terminated C string. struct String string_from_c_string(const char* c_string) { uint length = strlen(c_string); return string_new(c_string, length); } // allocates a new string in the global string region, // taking the data from a static null-terminated C string. // // NOTE: The string is not copied, so it MUST have a lifetime // spanning the entire program. struct String string_from_static_c_string(const char* c_string) { uint length = strlen(c_string); return (struct String){ .data = (ascii*)c_string, .length = length, }; } // returns the character at a given index. // does bounds-checking. ascii string_at(struct String s, uint index) { check(index < s.length, "index out of bounds"); return s.data[index]; } uint string_length(struct String s) { return s.length; } bool string_equals(const struct String a, const struct String b) { if (string_length(a) != string_length(b)) return false; if (string_length(a) == 0) return true; return memcmp(a.data, b.data, a.length) == 0; } bool string_equals_c_str(const struct String a, const ascii* b) { uint b_length = strlen(b); if (string_length(a) != b_length) return false; if (string_length(a) == 0) return true; return memcmp(a.data, b, a.length) == 0; } void string_print(struct String s) { printf("%.*s", (int32)s.length, s.data); } void string_format(struct String s, const ascii* format, ...) { va_list args; va_start(args, format); vprintf(format, args); va_end(args); } enum String_Concat_Arg { ARG_END, ARG_STRING, ARG_ASCII, }; #define MAX_STRING_CONCAT_LENGTH 2048 struct String string_concatenate(enum String_Concat_Arg type1, ...) { va_list args; va_start(args, type1); uint total_length = 0; enum String_Concat_Arg type = type1; while (type != ARG_END) { switch (type) { case ARG_STRING: { struct String s = va_arg(args, struct String); if (!string_is_empty(s)) total_length += s.length; break; } case ARG_ASCII: { ascii* str = va_arg(args, ascii*); if (str) total_length += strlen(str); break; } default: break; } type = va_arg(args, enum String_Concat_Arg); } va_end(args); if (total_length == 0) return string_empty(); check(total_length < MAX_STRING_CONCAT_LENGTH - 1, "string concatenation too long"); ascii buffer[MAX_STRING_CONCAT_LENGTH]; ascii* cursor = buffer; va_start(args, type1); type = type1; while (type != ARG_END) { switch (type) { case ARG_STRING: { struct String s = va_arg(args, struct String); if (!string_is_empty(s)) { memcpy(cursor, s.data, s.length); cursor += s.length; } break; } case ARG_ASCII: { ascii* str = va_arg(args, ascii*); if (str) { uint length = strlen(str); memcpy(cursor, str, length); cursor += length; } break; } default: break; } type = va_arg(args, enum String_Concat_Arg); } va_end(args); return string_new(buffer, total_length); } struct String_View string_substring(struct String s, uint start, uint end) { check(start <= end && end <= s.length, "substring out of bounds"); return (struct String_View){ .data = s.data + start, .length = end - start, }; } // creates a new string view from an ascii buffer. // the buffer is not copied, so it must have a lifetime // spanning the at least the lifetime of the string view. // null-termination is not required. struct String_View string_view_new(ascii* data, uint length) { return (struct String_View){ .data = data, .length = length, }; } // creates a new string view from a string. struct String_View string_view_from_string(struct String s) { return (struct String_View){ .data = s.data, .length = s.length, }; } // creates empty string view. struct String_View string_view_empty(void) { return (struct String_View){ .data = nil, .length = 0, }; } // checks if a string view is empty. bool string_view_is_empty(struct String_View view) { return view.data == nil || view.length == 0; } uint string_view_length(struct String_View view) { return view.length; } // returns the character at a given index in a string view. ascii string_view_at(struct String_View view, uint index) { check(index < view.length, "index out of bounds"); return view.data[index]; } // prints a string view to stdout. void string_view_print(struct String_View view) { printf("%.*s", (int32)view.length, view.data); } #define MAX_FORMAT_STRING_SIZE 256 // creates a real c format string from our faux-format string. // example: // "Hello %S! You are %d years old." // becomes: // "Hello %.6s! You are %d years old." // if we assume the string length is 6. void _internal_prepare_format_string(ascii* format_buffer, const ascii* format, uint static_length) { ascii specifier[10]; int specifier_length = snprintf(specifier, sizeof(specifier), "%%.%lus", static_length); check(specifier_length < sizeof(specifier), "string to format is too long"); for (uint fi = 0, fbi = 0; fi < MAX_FORMAT_STRING_SIZE && fbi < MAX_FORMAT_STRING_SIZE; ++fi, ++fbi) { ascii c = format[fi]; format_buffer[fbi] = c; if (c == '\0') break; // check if the next character is the format specifier 'S', that // we specified as our own custom format specifier. if (c == '%' && format[fi + 1] == 'S') { // copy the specifier into the format buffer. for (uint si = 0; si < specifier_length; ++si, ++fbi) format_buffer[fbi] = specifier[si]; fbi--, fi++; // increment `fi` to skip 'S', decrement `fbi` to not leave a gap. } } } // formats a string using the given faux-format string, printing it to `stream`. // the first VA arguments is expected to be the string data. // do not use directly! use `STRING_FORMAT_TO`. void _internal_string_format(FILE* stream, uint string_length, const ascii* format, ...) { ascii format_buffer[MAX_FORMAT_STRING_SIZE]; _internal_prepare_format_string(format_buffer, format, string_length); va_list args; va_start(args, format); vfprintf(stream, format_buffer, args); va_end(args); } #define STRING_ARRAY_MAX 8 // a string array, used for storing multiple strings. // if we ever need more strings, just bump `STRING_ARRAY_MAX` up. struct String_Array { struct String strings[STRING_ARRAY_MAX]; uint count; }; // initializes a string array with no strings. struct String_Array string_array_new(void) { return (struct String_Array){ .strings = { 0 }, .count = 0, }; } // adds a string to the string array. bool string_array_add(struct String_Array* array, struct String string) { if (array->count >= STRING_ARRAY_MAX) return false; array->strings[array->count++] = string; return true; } struct String string_array_at(const struct String_Array* array, uint index) { check(index < array->count, "index out of bounds"); return array->strings[index]; } #define STRING_ARRAY_FOR_EACH(cursor, str, array) \ struct String str = array.strings[0]; \ for (uint cursor = 0; cursor < array.count; str = array.strings[++cursor]) // a source file given to the compiler. struct Source_File { struct String source; // path to the source file, relative to the current working directory. struct String path; }; // single iteration of the CRC32 checksum algorithm // described in POSIX. // see: https://pubs.opengroup.org/onlinepubs/9799919799/utilities/cksum.html // used by `crc32_posix`. uint32 crc32_posix_iteration(uint32 initial_hash, uint8 octet) { const uint32 iso_polynomial = 0x4C11DB7; octet ^= initial_hash >> 24; uint32 hash = 0; uint32 poly = iso_polynomial; for (uint8 bit = 0; bit < 8; bit++) { if (octet & (1 << bit)) hash ^= poly; uint32 poly_msb = poly & (1 << 31); poly <<= 1; if (poly_msb) poly ^= iso_polynomial; } return hash ^ (initial_hash << 8); } // terse implementation of the POSIX CRC32 checksum algorithm // meant for the `cksum` utility, which can be used through // the GNU coreutils `cksum command`: // `echo -ne "string to hash" | cksum` // see: https://pubs.opengroup.org/onlinepubs/9799919799/utilities/cksum.html uint32 crc32_posix(struct String str) { uint32 hash = 0; STRING_ITERATE(i, c, str) { hash = crc32_posix_iteration(hash, c); } uint32 length = string_length(str); while (length) { hash = ~crc32_posix_iteration(hash, length); length >>= 8; } return hash; }