From 6b8e95d250272a53dfe73e03fb08cb88b1eee275 Mon Sep 17 00:00:00 2001 From: hiperiondev Date: Tue, 22 Aug 2023 18:23:27 -0300 Subject: [PATCH] Add disassembler alternative format --- .gitignore | 88 +++-- tools/disassembler/cargs.c | 422 ++++++++++++++++++++++ tools/disassembler/cargs.h | 164 +++++++++ tools/disassembler/disassembler.c | 560 +++++++++++++++++++----------- tools/disassembler/disassembler.h | 2 +- tools/disassembler/main.c | 42 ++- tools/disassembler/utils.c | 52 +++ tools/disassembler/utils.h | 24 ++ 8 files changed, 1110 insertions(+), 244 deletions(-) create mode 100644 tools/disassembler/cargs.c create mode 100644 tools/disassembler/cargs.h create mode 100644 tools/disassembler/utils.c create mode 100644 tools/disassembler/utils.h diff --git a/.gitignore b/.gitignore index a3e12f7..183033f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,35 +1,59 @@ -#Editor generated files -*.suo -*.ncb -*.user -compile_commands.json +# Prerequisites +*.d -#Directories -Release/ -Debug/ -Out/ -release/ -debug/ -out/ -bin/ -.cache/ -.vs/ - -#Project generated files -*.db +# Object files *.o -*.a -*.so -*.dll -*.exe -*.meta -*.log -*.out -*.stackdump -*.tb -*.filters -[Dd]ocs/ +*.ko +*.obj +*.elf -#Shell files -*.bat -*.sh +# Linker output +*.ilk +*.map +*.exp + +# Precompiled Headers +*.gch +*.pch + +# Libraries +*.lib +*.a +*.la +*.lo + +# Shared objects (inc. Windows DLLs) +*.dll +*.so +*.so.* +*.dylib + +# Executables +*.exe +*.out +*.app +*.i*86 +*.x86_64 +*.hex + +# Debug files +*.dSYM/ +*.su +*.idb +*.pdb + +# Kernel Module Compile Results +*.mod* +*.cmd +.tmp_versions/ +modules.order +Module.symvers +Mkfile.old +dkms.conf + +.cproject +.project +.settings/ +temp/ +Release/ +out/ \ No newline at end of file diff --git a/tools/disassembler/cargs.c b/tools/disassembler/cargs.c new file mode 100644 index 0000000..9b1a36b --- /dev/null +++ b/tools/disassembler/cargs.c @@ -0,0 +1,422 @@ +#include +#include +#include +#include +#include + +#define CAG_OPTION_PRINT_DISTANCE 4 +#define CAG_OPTION_PRINT_MIN_INDENTION 20 + +static void cag_option_print_value(const cag_option *option, + size_t *accessor_length, FILE *destination) { + if (option->value_name != NULL) { + *accessor_length += fprintf(destination, "=%s", option->value_name); + } +} + +static void cag_option_print_letters(const cag_option *option, bool *first, + size_t *accessor_length, FILE *destination) { + const char *access_letter; + access_letter = option->access_letters; + if (access_letter != NULL) { + while (*access_letter) { + if (*first) { + *accessor_length += fprintf(destination, "-%c", *access_letter); + *first = false; + } else { + *accessor_length += fprintf(destination, ", -%c", + *access_letter); + } + ++access_letter; + } + } +} + +static void cag_option_print_name(const cag_option *option, bool *first, + size_t *accessor_length, FILE *destination) { + if (option->access_name != NULL) { + if (*first) { + *accessor_length += fprintf(destination, "--%s", + option->access_name); + } else { + *accessor_length += fprintf(destination, ", --%s", + option->access_name); + } + } +} + +static size_t cag_option_get_print_indention(const cag_option *options, + size_t option_count) { + size_t option_index, indention, result; + const cag_option *option; + + result = CAG_OPTION_PRINT_MIN_INDENTION; + + for (option_index = 0; option_index < option_count; ++option_index) { + indention = CAG_OPTION_PRINT_DISTANCE; + option = &options[option_index]; + if (option->access_letters != NULL && *option->access_letters) { + indention += strlen(option->access_letters) * 4 - 2; + if (option->access_name != NULL) { + indention += strlen(option->access_name) + 4; + } + } else if (option->access_name != NULL) { + indention += strlen(option->access_name) + 2; + } + + if (option->value_name != NULL) { + indention += strlen(option->value_name) + 1; + } + + if (indention > result) { + result = indention; + } + } + + return result; +} + +void cag_option_print(const cag_option *options, size_t option_count, + FILE *destination) { + size_t option_index, indention, i, accessor_length; + const cag_option *option; + bool first; + + indention = cag_option_get_print_indention(options, option_count); + + for (option_index = 0; option_index < option_count; ++option_index) { + option = &options[option_index]; + accessor_length = 0; + first = true; + + fputs(" ", destination); + + cag_option_print_letters(option, &first, &accessor_length, destination); + cag_option_print_name(option, &first, &accessor_length, destination); + cag_option_print_value(option, &accessor_length, destination); + + for (i = accessor_length; i < indention; ++i) { + fputs(" ", destination); + } + + fputs(" ", destination); + fputs(option->description, destination); + + fprintf(destination, "\n"); + } +} + +void cag_option_prepare(cag_option_context *context, const cag_option *options, + size_t option_count, int argc, char **argv) { + // This just initialized the values to the beginning of all the arguments. + context->options = options; + context->option_count = option_count; + context->argc = argc; + context->argv = argv; + context->index = 1; + context->inner_index = 0; + context->forced_end = false; +} + +static const cag_option* cag_option_find_by_name(cag_option_context *context, + char *name, size_t name_size) { + const cag_option *option; + size_t i; + + // We loop over all the available options and stop as soon as we have found + // one. We don't use any hash map table, since there won't be that many + // arguments anyway. + for (i = 0; i < context->option_count; ++i) { + option = &context->options[i]; + + // The option might not have an item name, we can just skip those. + if (option->access_name == NULL) { + continue; + } + + // Try to compare the name of the access name. We can use the name_size or + // this comparison, since we are guaranteed to have null-terminated access + // names. + if (strncmp(option->access_name, name, name_size) == 0) { + return option; + } + } + + return NULL; +} + +static const cag_option* cag_option_find_by_letter(cag_option_context *context, + char letter) { + const cag_option *option; + size_t i; + + // We loop over all the available options and stop as soon as we have found + // one. We don't use any look up table, since there won't be that many + // arguments anyway. + for (i = 0; i < context->option_count; ++i) { + option = &context->options[i]; + + // If this option doesn't have any access letters we will skip them. + if (option->access_letters == NULL) { + continue; + } + + // Verify whether this option has the access letter in it's access letter + // string. If it does, then this is our option. + if (strchr(option->access_letters, letter) != NULL) { + return option; + } + } + + return NULL; +} + +static void cag_option_parse_value(cag_option_context *context, + const cag_option *option, char **c) { + // And now let's check whether this option is supposed to have a value, which + // is the case if there is a value name set. The value can be either submitted + // with a '=' sign or a space, which means we would have to jump over to the + // next argv index. This is somewhat ugly, but we do it to behave the same as + // the other option parsers. + if (option->value_name != NULL) { + if (**c == '=') { + context->value = ++(*c); + } else { + // If the next index is larger or equal to the argument count, then the + // parameter for this option is missing. The user will know about this, + // since the value pointer of the context will be NULL because we don't + // set it here in that case. + if (context->argc > context->index + 1) { + // We consider this argv to be the value, no matter what the contents + // are. + ++context->index; + *c = context->argv[context->index]; + context->value = *c; + } + } + + // Move c to the end of the value, to not confuse the caller about our + // position. + while (**c) { + ++(*c); + } + } +} + +static void cag_option_parse_access_name(cag_option_context *context, char **c) { + const cag_option *option; + char *n; + + // Now we need to extract the access name, which is any symbol up to a '=' or + // a '\0'. + n = *c; + while (**c && **c != '=') { + ++*c; + } + + // Now this will obviously always be true, but we are paranoid. Sometimes. It + // doesn't hurt to check. + assert(*c >= n); + + // Figure out which option this name belongs to. This might return NULL if the + // name is not registered, which means the user supplied an unknown option. In + // that case we return true to indicate that we finished with this option. We + // have to skip the value parsing since we don't know whether the user thinks + // this option has one or not. Since we don't set any identifier specifically, + // it will remain '?' within the context. + option = cag_option_find_by_name(context, n, (size_t) (*c - n)); + if (option == NULL) { + // Since this option is invalid, we will move on to the next index. There is + // nothing we can do about this. + ++context->index; + return; + } + + // We found an option and now we can specify the identifier within the + // context. + context->identifier = option->identifier; + + // And now we try to parse the value. This function will also check whether + // this option is actually supposed to have a value. + cag_option_parse_value(context, option, c); + + // And finally we move on to the next index. + ++context->index; +} + +static void cag_option_parse_access_letter(cag_option_context *context, + char **c) { + const cag_option *option; + char *n = *c; + char *v; + + // Figure out which option this letter belongs to. This might return NULL if + // the letter is not registered, which means the user supplied an unknown + // option. In that case we return true to indicate that we finished with this + // option. We have to skip the value parsing since we don't know whether the + // user thinks this option has one or not. Since we don't set any identifier + // specifically, it will remain '?' within the context. + option = cag_option_find_by_letter(context, n[context->inner_index]); + if (option == NULL) { + ++context->index; + context->inner_index = 0; + return; + } + + // We found an option and now we can specify the identifier within the + // context. + context->identifier = option->identifier; + + // And now we try to parse the value. This function will also check whether + // this option is actually supposed to have a value. + v = &n[++context->inner_index]; + cag_option_parse_value(context, option, &v); + + // Check whether we reached the end of this option argument. + if (*v == '\0') { + ++context->index; + context->inner_index = 0; + } +} + +static void cag_option_shift(cag_option_context *context, int start, int option, + int end) { + char *tmp; + int a_index, shift_index, shift_count, left_index, right_index; + + shift_count = option - start; + + // There is no shift is required if the start and the option have the same + // index. + if (shift_count == 0) { + return; + } + + // Lets loop through the option strings first, which we will move towards the + // beginning. + for (a_index = option; a_index < end; ++a_index) { + // First remember the current option value, because we will have to save + // that later at the beginning. + tmp = context->argv[a_index]; + + // Let's loop over all option values and shift them one towards the end. + // This will override the option value we just stored temporarily. + for (shift_index = 0; shift_index < shift_count; ++shift_index) { + left_index = a_index - shift_index; + right_index = a_index - shift_index - 1; + context->argv[left_index] = context->argv[right_index]; + } + + // Now restore the saved option value at the beginning. + context->argv[a_index - shift_count] = tmp; + } + + // The new index will be before all non-option values, in such a way that they + // all will be moved again in the next fetch call. + context->index = end - shift_count; +} + +static bool cag_option_is_argument_string(const char *c) { + return *c == '-' && *(c + 1) != '\0'; +} + +static int cag_option_find_next(cag_option_context *context) { + int next_index, next_option_index; + char *c; + + // Prepare to search the next option at the next index. + next_index = context->index; + next_option_index = next_index; + + // Grab a pointer to the string and verify that it is not the end. If it is + // the end, we have to return false to indicate that we finished. + c = context->argv[next_option_index]; + if (context->forced_end || c == NULL) { + return -1; + } + + // Check whether it is a '-'. We need to find the next option - and an option + // always starts with a '-'. If there is a string "-\0", we don't consider it + // as an option neither. + while (!cag_option_is_argument_string(c)) { + c = context->argv[++next_option_index]; + if (c == NULL) { + // We reached the end and did not find any argument anymore. Let's tell + // our caller that we reached the end. + return -1; + } + } + + // Indicate that we found an option which can be processed. The index of the + // next option will be returned. + return next_option_index; +} + +bool cag_option_fetch(cag_option_context *context) { + char *c; + int old_index, new_index; + + // Reset our identifier to a question mark, which indicates an "unknown" + // option. The value is set to NULL, to make sure we are not carrying the + // parameter from the previous option to this one. + context->identifier = '?'; + context->value = NULL; + + // Check whether there are any options left to parse and remember the old + // index as well as the new index. In the end we will move the option junk to + // the beginning, so that non option arguments can be read. + old_index = context->index; + new_index = cag_option_find_next(context); + if (new_index >= 0) { + context->index = new_index; + } else { + return false; + } + + // Grab a pointer to the beginning of the option. At this point, the next + // character must be a '-', since if it was not the prepare function would + // have returned false. We will skip that symbol and proceed. + c = context->argv[context->index]; + assert(*c == '-'); + ++c; + + // Check whether this is a long option, starting with a double "--". + if (*c == '-') { + ++c; + + // This might be a double "--" which indicates the end of options. If this + // is the case, we will not move to the next index. That ensures that + // another call to the fetch function will not skip the "--". + if (*c == '\0') { + context->forced_end = true; + } else { + // We parse now the access name. All information about it will be written + // to the context. + cag_option_parse_access_name(context, &c); + } + } else { + // This is no long option, so we can just parse an access letter. + cag_option_parse_access_letter(context, &c); + } + + // Move the items so that the options come first followed by non-option + // arguments. + cag_option_shift(context, old_index, new_index, context->index); + + return context->forced_end == false; +} + +char cag_option_get(const cag_option_context *context) { + // We just return the identifier here. + return context->identifier; +} + +const char* cag_option_get_value(const cag_option_context *context) { + // We just return the internal value pointer of the context. + return context->value; +} + +int cag_option_get_index(const cag_option_context *context) { + // Either we point to a value item, + return context->index; +} diff --git a/tools/disassembler/cargs.h b/tools/disassembler/cargs.h new file mode 100644 index 0000000..d1ae159 --- /dev/null +++ b/tools/disassembler/cargs.h @@ -0,0 +1,164 @@ +#ifndef CARGS_H_ +#define CARGS_H_ + +/** + * This is a simple alternative cross-platform implementation of getopt, which + * is used to parse argument strings submitted to the executable (argc and argv + * which are received in the main function). + */ + +#ifndef CAG_LIBRARY_H +#define CAG_LIBRARY_H + +#include +#include +#include + +#if defined(_WIN32) || defined(__CYGWIN__) +#define CAG_EXPORT __declspec(dllexport) +#define CAG_IMPORT __declspec(dllimport) +#elif __GNUC__ >= 4 +#define CAG_EXPORT __attribute__((visibility("default"))) +#define CAG_IMPORT __attribute__((visibility("default"))) +#else +#define CAG_EXPORT +#define CAG_IMPORT +#endif + +#if defined(CAG_SHARED) +#if defined(CAG_EXPORTS) +#define CAG_PUBLIC CAG_EXPORT +#else +#define CAG_PUBLIC CAG_IMPORT +#endif +#else +#define CAG_PUBLIC +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * An option is used to describe a flag/argument option submitted when the + * program is run. + */ +typedef struct cag_option { + const char identifier; + const char *access_letters; + const char *access_name; + const char *value_name; + const char *description; +} cag_option; + +/** + * A context is used to iterate over all options provided. It stores the parsing + * state. + */ +typedef struct cag_option_context { + const struct cag_option *options; + size_t option_count; + int argc; + char **argv; + int index; + int inner_index; + bool forced_end; + char identifier; + char *value; +} cag_option_context; + +/** + * This is just a small macro which calculates the size of an array. + */ +#define CAG_ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) + +/** + * @brief Prints all options to the terminal. + * + * This function prints all options to the terminal. This can be used to + * generate the output for a "--help" option. + * + * @param options The options which will be printed. + * @param option_count The option count which will be printed. + * @param destination The destination where the output will be printed. + */ +CAG_PUBLIC void cag_option_print(const cag_option *options, size_t option_count, + FILE *destination); + +/** + * @brief Prepare argument options context for parsing. + * + * This function prepares the context for iteration and initializes the context + * with the supplied options and arguments. After the context has been prepared, + * it can be used to fetch arguments from it. + * + * @param context The context which will be initialized. + * @param options The registered options which are available for the program. + * @param option_count The amount of options which are available for the + * program. + * @param argc The amount of arguments the user supplied in the main function. + * @param argv A pointer to the arguments of the main function. + */ +CAG_PUBLIC void cag_option_prepare(cag_option_context *context, + const cag_option *options, size_t option_count, int argc, char **argv); + +/** + * @brief Fetches an option from the argument list. + * + * This function fetches a single option from the argument list. The context + * will be moved to that item. Information can be extracted from the context + * after the item has been fetched. + * The arguments will be re-ordered, which means that non-option arguments will + * be moved to the end of the argument list. After all options have been + * fetched, all non-option arguments will be positioned after the index of + * the context. + * + * @param context The context from which we will fetch the option. + * @return Returns true if there was another option or false if the end is + * reached. + */ +CAG_PUBLIC bool cag_option_fetch(cag_option_context *context); + +/** + * @brief Gets the identifier of the option. + * + * This function gets the identifier of the option, which should be unique to + * this option and can be used to determine what kind of option this is. + * + * @param context The context from which the option was fetched. + * @return Returns the identifier of the option. + */ +CAG_PUBLIC char cag_option_get(const cag_option_context *context); + +/** + * @brief Gets the value from the option. + * + * This function gets the value from the option, if any. If the option does not + * contain a value, this function will return NULL. + * + * @param context The context from which the option was fetched. + * @return Returns a pointer to the value or NULL if there is no value. + */ +CAG_PUBLIC const char* cag_option_get_value(const cag_option_context *context); + +/** + * @brief Gets the current index of the context. + * + * This function gets the index within the argv arguments of the context. The + * context always points to the next item which it will inspect. This is + * particularly useful to inspect the original argument array, or to get + * non-option arguments after option fetching has finished. + * + * @param context The context from which the option was fetched. + * @return Returns the current index of the context. + */ +CAG_PUBLIC int cag_option_get_index(const cag_option_context *context); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif + +#endif /* CARGS_H_ */ + diff --git a/tools/disassembler/disassembler.c b/tools/disassembler/disassembler.c index a6f70d2..bad723f 100644 --- a/tools/disassembler/disassembler.c +++ b/tools/disassembler/disassembler.c @@ -13,6 +13,7 @@ #include #include +#include "utils.h" #include "disassembler.h" #define SPC(n) printf("%.*s", n, "| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |"); @@ -167,10 +168,11 @@ typedef struct dis_program_s { uint32_t pc; } dis_program_t; -typedef struct dis_func_op_s { - uint32_t start; - uint32_t end; -} dis_func_op_t; +typedef struct fun_code_s { + uint32_t start; + uint32_t len; + char *fun; +} fun_code_t; static void dis_print_opcode(uint8_t op); @@ -307,259 +309,399 @@ static void dis_print_opcode(uint8_t op) { exit(1); \ } -static void dis_disassemble_section(dis_program_t **prg, uint32_t pc, uint32_t len, uint8_t spaces, bool is_function) { - uint8_t opcode; - uint32_t uint; - int32_t intg; - float flt; - char *str; +static void dis_disassemble_section(dis_program_t **prg, uint32_t pc, + uint32_t len, uint8_t spaces, bool is_function, bool alt_fmt) { + uint8_t opcode; + uint32_t uint; + int32_t intg; + float flt; + char *str; - //first 4 bytes of the program section within a function are actually specifying the parameter and return lists - if (is_function) { - printf("\n"); - uint16_t args = readWord((*prg)->program, &pc); - uint16_t rets = readWord((*prg)->program, &pc); - SPC(spaces); - printf("| ( args [%d], rets [%d] )", args, rets); - } + //first 4 bytes of the program section within a function are actually specifying the parameter and return lists + if (is_function) { + printf("\n"); + uint16_t args = readWord((*prg)->program, &pc); + uint16_t rets = readWord((*prg)->program, &pc); + if (!alt_fmt) { + SPC(spaces); + printf("| "); + } else + printf(" .comment args [%d], rets [%d]", args, rets); + } - uint32_t pc_start = pc; - while (pc < len) { - opcode = (*prg)->program[pc]; - printf("\n"); - SPC(spaces); - printf("| [%05d](%03d) ", (pc++) - pc_start, opcode); - dis_print_opcode(opcode); + uint32_t pc_start = pc; + while (pc < len) { + opcode = (*prg)->program[pc]; + if (alt_fmt && (opcode == 255 || opcode == 0)) { + ++pc; + continue; + } - if (opcode > DIS_OP_END_OPCODES) - continue; + printf("\n"); + if (!alt_fmt) { + SPC(spaces); + printf("| "); + } else + printf(" "); - S_OP(0); - S_OP(1); - } + printf("[%05d](%03d) ", (pc++) - pc_start, opcode); + dis_print_opcode(opcode); + + if (opcode > DIS_OP_END_OPCODES) + continue; + + S_OP(0); + S_OP(1); + } } #define LIT_ADD(a, b, c) b[c] = a; ++c; -static void dis_read_interpreter_sections(dis_program_t **prg, uint32_t *pc, uint8_t spaces, char *tree) { - uint32_t literal_count = 0; - uint8_t literal_type[65536]; +static void dis_read_interpreter_sections(dis_program_t **prg, uint32_t *pc, + uint8_t spaces, char *tree, bool alt_fmt) { + uint32_t literal_count = 0; + uint8_t literal_type[65536]; - const unsigned short literalCount = readWord((*prg)->program, pc); + const unsigned short literalCount = readWord((*prg)->program, pc); - printf("\n"); - SPC(spaces); - printf("| --- ( Reading %d literals from cache ) ---\n", literalCount); + printf("\n"); + if (!alt_fmt) { + SPC(spaces); + printf("| "); + printf(" "); + printf("--- ( Reading %d literals from cache ) ---\n", literalCount); + } - for (int i = 0; i < literalCount; i++) { - const unsigned char literalType = readByte((*prg)->program, pc); + for (int i = 0; i < literalCount; i++) { + const unsigned char literalType = readByte((*prg)->program, pc); - switch (literalType) { - case DIS_LITERAL_NULL: - LIT_ADD(DIS_LITERAL_NULL, literal_type, literal_count); - SPC(spaces); - printf("| | [%05d] ( null )\n", i); - break; + switch (literalType) { + case DIS_LITERAL_NULL: + LIT_ADD(DIS_LITERAL_NULL, literal_type, literal_count) ; + if (!alt_fmt) { + SPC(spaces); + printf("| | "); + } else + printf(" "); + printf("[%05d] ( null )\n", i); + break; - case DIS_LITERAL_BOOLEAN: { - const bool b = readByte((*prg)->program, pc); - LIT_ADD(DIS_LITERAL_BOOLEAN, literal_type, literal_count); - SPC(spaces); - printf("| | [%05d] ( boolean %s )\n", i, b ? "true" : "false"); - } - break; + case DIS_LITERAL_BOOLEAN: { + const bool b = readByte((*prg)->program, pc); + LIT_ADD(DIS_LITERAL_BOOLEAN, literal_type, literal_count); + if (!alt_fmt) { + SPC(spaces); + printf("| | "); + } else + printf(" "); + printf("[%05d] ( boolean %s )\n", i, b ? "true" : "false"); + } + break; - case DIS_LITERAL_INTEGER: { - const int d = readInt((*prg)->program, pc); - LIT_ADD(DIS_LITERAL_INTEGER, literal_type, literal_count); - SPC(spaces); - printf("| | [%05d] ( integer %d )\n", i, d); - } - break; + case DIS_LITERAL_INTEGER: { + const int d = readInt((*prg)->program, pc); + LIT_ADD(DIS_LITERAL_INTEGER, literal_type, literal_count); + if (!alt_fmt) { + SPC(spaces); + printf("| | "); + } else + printf(" "); + printf("[%05d] ( integer %d )\n", i, d); + } + break; - case DIS_LITERAL_FLOAT: { - const float f = readFloat((*prg)->program, pc); - LIT_ADD(DIS_LITERAL_FLOAT, literal_type, literal_count); - SPC(spaces); - printf("| | [%05d] ( float %f )\n", i, f); - } - break; + case DIS_LITERAL_FLOAT: { + const float f = readFloat((*prg)->program, pc); + LIT_ADD(DIS_LITERAL_FLOAT, literal_type, literal_count); + if (!alt_fmt) { + SPC(spaces); + printf("| | "); + } else + printf(" "); + printf("[%05d] ( float %f )\n", i, f); + } + break; - case DIS_LITERAL_STRING: { - const char *s = readString((*prg)->program, pc); - LIT_ADD(DIS_LITERAL_STRING, literal_type, literal_count); - SPC(spaces); - printf("| | [%05d] ( string \"%s\" )\n", i, s); - } - break; + case DIS_LITERAL_STRING: { + const char *s = readString((*prg)->program, pc); + LIT_ADD(DIS_LITERAL_STRING, literal_type, literal_count); + if (!alt_fmt) { + SPC(spaces); + printf("| | "); + } else + printf(" "); + printf("[%05d] ( string \"%s\" )\n", i, s); + } + break; - case DIS_LITERAL_ARRAY_INTERMEDIATE: - case DIS_LITERAL_ARRAY: { - unsigned short length = readWord((*prg)->program, pc); - SPC(spaces); - printf("| | [%05d] ( array ", i); - for (int i = 0; i < length; i++) { - int index = readWord((*prg)->program, pc); - printf("%d ", index); - LIT_ADD(DIS_LITERAL_NULL, literal_type, literal_count); - if (!(i % 15) && i != 0) { - printf("\n"); - SPC(spaces); - printf("| | "); - } - } - printf(")\n"); - LIT_ADD(DIS_LITERAL_ARRAY, literal_type, literal_count); - } - break; + case DIS_LITERAL_ARRAY_INTERMEDIATE: + case DIS_LITERAL_ARRAY: { + unsigned short length = readWord((*prg)->program, pc); + if (!alt_fmt) { + SPC(spaces); + printf("| | "); + } else + printf(" "); + printf("[%05d] ( array ", i); + for (int i = 0; i < length; i++) { + int index = readWord((*prg)->program, pc); + printf("%d ", index); + LIT_ADD(DIS_LITERAL_NULL, literal_type, literal_count); + if (!(i % 15) && i != 0) { + printf("\n"); + if (!alt_fmt) { + SPC(spaces); + printf("| | "); + } else + printf(" "); + printf(" "); + } + } + printf(")\n"); + LIT_ADD(DIS_LITERAL_ARRAY, literal_type, literal_count); + } + break; - case DIS_LITERAL_DICTIONARY_INTERMEDIATE: - case DIS_LITERAL_DICTIONARY: { - unsigned short length = readWord((*prg)->program, pc); - SPC(spaces); - printf("| | [%05d] ( dictionary ", i); - for (int i = 0; i < length / 2; i++) { - int key = readWord((*prg)->program, pc); - int val = readWord((*prg)->program, pc); - printf("(key: %d, val:%d) ", key, val); - if(!(i % 5) && i != 0){ - printf("\n"); - SPC(spaces); - printf("| | "); - } - } - printf(")\n"); - LIT_ADD(DIS_LITERAL_DICTIONARY, literal_type, literal_count); - } - break; + case DIS_LITERAL_DICTIONARY_INTERMEDIATE: + case DIS_LITERAL_DICTIONARY: { + unsigned short length = readWord((*prg)->program, pc); + if (!alt_fmt) { + SPC(spaces); + printf("| | "); + } else + printf(" "); + printf("[%05d] ( dictionary ", i); + for (int i = 0; i < length / 2; i++) { + int key = readWord((*prg)->program, pc); + int val = readWord((*prg)->program, pc); + printf("(key: %d, val:%d) ", key, val); + if (!(i % 5) && i != 0) { + printf("\n"); + if (!alt_fmt) { + SPC(spaces); + printf("| | "); + } else + printf(" "); + printf(" "); + } + } + printf(")\n"); + LIT_ADD(DIS_LITERAL_DICTIONARY, literal_type, literal_count); + } + break; - case DIS_LITERAL_FUNCTION: { - unsigned short index = readWord((*prg)->program, pc); - LIT_ADD(DIS_LITERAL_FUNCTION_INTERMEDIATE, literal_type, literal_count); - SPC(spaces); - printf("| | [%05d] ( function index: %d )\n", i, index); - } - break; + case DIS_LITERAL_FUNCTION: { + unsigned short index = readWord((*prg)->program, pc); + LIT_ADD(DIS_LITERAL_FUNCTION_INTERMEDIATE, literal_type, + literal_count); + if (!alt_fmt) { + SPC(spaces); + printf("| | "); + } else + printf(" "); + printf("[%05d] ( function index: %d )\n", i, index); + } + break; - case DIS_LITERAL_IDENTIFIER: { - const char *str = readString((*prg)->program, pc); - LIT_ADD(DIS_LITERAL_IDENTIFIER, literal_type, literal_count); - SPC(spaces); - printf("| | [%05d] ( identifier %s )\n", i, str); - } - break; + case DIS_LITERAL_IDENTIFIER: { + const char *str = readString((*prg)->program, pc); + LIT_ADD(DIS_LITERAL_IDENTIFIER, literal_type, literal_count); + if (!alt_fmt) { + SPC(spaces); + printf("| | "); + } else + printf(" "); + printf("[%05d] ( identifier %s )\n", i, str); + } + break; - case DIS_LITERAL_TYPE: - case DIS_LITERAL_TYPE_INTERMEDIATE: { - uint8_t literalType = readByte((*prg)->program, pc); - uint8_t constant = readByte((*prg)->program, pc); - SPC(spaces); - printf("| | [%05d] ( type %s: %d)\n", i, (LIT_STR[literalType] + 12), constant); - if (literalType == DIS_LITERAL_ARRAY) { - uint16_t vt = readWord((*prg)->program, pc); - SPC(spaces); - printf("| | ( subtype: %d)\n", vt); - } + case DIS_LITERAL_TYPE: + case DIS_LITERAL_TYPE_INTERMEDIATE: { + uint8_t literalType = readByte((*prg)->program, pc); + uint8_t constant = readByte((*prg)->program, pc); + if (!alt_fmt) { + SPC(spaces); + printf("| | "); + } else + printf(" "); + printf("[%05d] ( type %s: %d)\n", i, (LIT_STR[literalType] + 12), + constant); + if (literalType == DIS_LITERAL_ARRAY) { + uint16_t vt = readWord((*prg)->program, pc); + if (!alt_fmt) { + SPC(spaces); + printf("| | "); + } else + printf(" "); + printf(" ( subtype: %d)\n", vt); + } - if (literalType == DIS_LITERAL_DICTIONARY) { - uint8_t kt = readWord((*prg)->program, pc); - uint8_t vt = readWord((*prg)->program, pc); - SPC(spaces); - printf("| | ( subtype: [%d, %d] )\n", kt, vt); - } - LIT_ADD(literalType, literal_type, literal_count); - } - break; + if (literalType == DIS_LITERAL_DICTIONARY) { + uint8_t kt = readWord((*prg)->program, pc); + uint8_t vt = readWord((*prg)->program, pc); + if (!alt_fmt) { + SPC(spaces); + printf("| | "); + } else + printf(" "); + printf(" ( subtype: [%d, %d] )\n", kt, vt); + } + LIT_ADD(literalType, literal_type, literal_count); + } + break; - case DIS_LITERAL_INDEX_BLANK: - LIT_ADD(DIS_LITERAL_INDEX_BLANK, literal_type, literal_count); - SPC(spaces); - printf("| | [%05d] ( blank )\n", i); - break; - } - } + case DIS_LITERAL_INDEX_BLANK: + LIT_ADD(DIS_LITERAL_INDEX_BLANK, literal_type, literal_count); + if (!alt_fmt) { + SPC(spaces); + printf("| | "); + } else + printf(" "); + printf("[%05d] ( blank )\n", i); + break; + } + } - consumeByte(DIS_OP_SECTION_END, (*prg)->program, pc); + consumeByte(DIS_OP_SECTION_END, (*prg)->program, pc); - SPC(spaces); - printf("| --- ( end literal section ) ---\n"); + if (!alt_fmt) { + SPC(spaces); + printf("| "); + printf("--- ( end literal section ) ---\n"); + } - int functionCount = readWord((*prg)->program, pc); - int functionSize = readWord((*prg)->program, pc); - if (functionCount) { - SPC(spaces); - printf("|\n"); - SPC(spaces); - printf("| --- ( fn count: %d, total size: %d ) ---\n", functionCount, functionSize); + int functionCount = readWord((*prg)->program, pc); + int functionSize = readWord((*prg)->program, pc); + + if (functionCount) { + if (!alt_fmt) { + SPC(spaces); + printf("|\n"); + SPC(spaces); + printf("| "); + printf("--- ( fn count: %d, total size: %d ) ---\n", functionCount, functionSize); + } uint32_t fcnt = 0; char tree_local[2048]; for (uint32_t i = 0; i < literal_count; i++) { - if (literal_type[i] == DIS_LITERAL_FUNCTION_INTERMEDIATE) { - size_t size = (size_t) readWord((*prg)->program, pc); + if (literal_type[i] == DIS_LITERAL_FUNCTION_INTERMEDIATE) { + size_t size = (size_t) readWord((*prg)->program, pc); - uint32_t fpc_start = *pc; - uint32_t fpc_end = *pc + size - 1; + uint32_t fpc_start = *pc; + uint32_t fpc_end = *pc + size - 1; tree_local[0] = '\0'; - sprintf(tree_local, "%s.%d",tree, fcnt); - if (tree_local[0] == '.') - memcpy(tree_local, tree_local + 1, strlen(tree_local)); - SPC(spaces); - printf("| |\n"); - SPC(spaces); - printf("| | ( fun %s [ start: %d, end: %d ] )", tree_local, fpc_start, fpc_end); - if ((*prg)->program[*pc + size - 1] != DIS_OP_FN_END) { - printf("\nERROR: Failed to find function end\n"); - exit(1); - } + if (!alt_fmt) { + sprintf(tree_local, "%s.%d", tree, fcnt); + if (tree_local[0] == '_') + memcpy(tree_local, tree_local + 1, strlen(tree_local)); + } else { + sprintf(tree_local, "%s_%d", tree, fcnt); + if (tree_local[0] == '_') + memcpy(tree_local, tree_local + 1, strlen(tree_local)); + } - dis_read_interpreter_sections(prg, &fpc_start, spaces + 4, tree_local); - SPC(spaces); - printf("| | |\n"); + if (!alt_fmt) { + SPC(spaces); + printf("| |\n"); + SPC(spaces); + printf("| | "); + printf("( fun %s [ start: %d, end: %d ] )", tree_local, fpc_start, fpc_end); + } else + printf("\nLIT_FUN_%s:", tree_local); - SPC(spaces + 4); - printf("| --- ( reading code for %s ) ---", tree_local); - dis_disassemble_section(prg, fpc_start, fpc_end, spaces + 4, true); - printf("\n"); - SPC(spaces + 4); - printf("| --- ( end code section ) ---\n"); - fcnt++; - *pc += size; - } - } - SPC(spaces); - printf("|\n"); - SPC(spaces); - printf("| --- ( end fn section ) ---\n"); + if ((*prg)->program[*pc + size - 1] != DIS_OP_FN_END) { + printf("\nERROR: Failed to find function end\n"); + exit(1); + } - } + dis_read_interpreter_sections(prg, &fpc_start, spaces + 4, tree_local, alt_fmt); - consumeByte(DIS_OP_SECTION_END, (*prg)->program, pc); + if (!alt_fmt) { + SPC(spaces); + printf("| | |\n"); + SPC(spaces + 4); + printf("| "); + printf("--- ( reading code for %s ) ---", tree_local); + dis_disassemble_section(prg, fpc_start, fpc_end, spaces + 4, true, alt_fmt); + printf("\n"); + SPC(spaces + 4); + printf("| "); + printf("--- ( end code section ) ---\n"); + } else { + fun_code_t *fun = malloc(sizeof(struct fun_code_s)); + fun->fun = malloc(strlen(tree_local) + 1); + strcpy(fun->fun, tree_local); + fun->start = fpc_start; + fun->len = fpc_end; + enqueue((void*) fun); + } + + fcnt++; + *pc += size; + } + } + + if (!alt_fmt) { + SPC(spaces); + printf("|\n"); + SPC(spaces); + printf("| "); + printf("--- ( end fn section ) ---\n"); + } + } + + consumeByte(DIS_OP_SECTION_END, (*prg)->program, pc); } /////////////////////////////////////////////////////////////////////////////// -void disassemble(const char *filename) { - dis_program_t *prg; +void disassemble(const char *filename, bool alt_fmt) { + dis_program_t *prg; + queue_front = NULL; + queue_rear = NULL; - dis_disassembler_init(&prg); - if (dis_load_file(filename, &prg)) - exit(1); + dis_disassembler_init(&prg); + if (dis_load_file(filename, &prg)) + exit(1); - dis_read_header(&prg); + dis_read_header(&prg); - consumeByte(DIS_OP_SECTION_END, prg->program, &(prg->pc)); + consumeByte(DIS_OP_SECTION_END, prg->program, &(prg->pc)); - dis_read_interpreter_sections(&prg, &(prg->pc), 0, ""); + if (alt_fmt) + printf("\nLIT_MAIN:"); + dis_read_interpreter_sections(&prg, &(prg->pc), 0, "", alt_fmt); + if (!alt_fmt) { + printf("|\n| "); + printf("--- ( reading main code ) ---"); + } else + printf("\nMAIN:"); + dis_disassemble_section(&prg, prg->pc, prg->len, 0, false, alt_fmt); + if (!alt_fmt) { + printf("\n| "); + printf("--- ( end main code section ) ---"); + } else + printf("\n"); - printf("|\n| --- ( reading main code ) ---"); - dis_disassemble_section(&prg, prg->pc, prg->len, 0, false); - printf("\n| --- ( end main code section ) ---"); + if (alt_fmt) { + while (queue_front != NULL) { + fun_code_t *fun = (fun_code_t*)front(); + printf("\nFUN_%s:", fun->fun); + free(fun->fun); - printf("\n\n"); - dis_disassembler_deinit(&prg); + dis_disassemble_section(&prg, fun->start, fun->len, 0, true, alt_fmt); + + dequeue(); + printf("\n"); + } + + } + + printf("\n"); + dis_disassembler_deinit(&prg); } diff --git a/tools/disassembler/disassembler.h b/tools/disassembler/disassembler.h index 7a34f0b..6d01461 100644 --- a/tools/disassembler/disassembler.h +++ b/tools/disassembler/disassembler.h @@ -123,6 +123,6 @@ typedef enum DIS_LITERAL_TYPE { DIS_LITERAL_INDEX_BLANK, // for blank indexing i.e. arr[:] } dis_literal_type_t; -extern void disassemble(const char *filename); +extern void disassemble(const char *filename, bool alt_fmt); #endif /* DISASSEMBLER_H_ */ diff --git a/tools/disassembler/main.c b/tools/disassembler/main.c index 4028039..406412e 100644 --- a/tools/disassembler/main.c +++ b/tools/disassembler/main.c @@ -1,8 +1,46 @@ #include +#include "cargs.h" #include "disassembler.h" -int main(int argc, const char* argv[]) { - disassemble(argv[1]); +static struct cag_option options[] = { + { + .identifier = 'a', + .access_letters = "a", + .access_name = NULL, + .value_name = NULL, + .description = "Alternate format" + }, { + .identifier = 'h', + .access_letters = "h", + .access_name = "help", + .description = "Shows the command help" + } +}; + +struct options { + bool alternate_flag; +}; + +int main(int argc, char *argv[]) { + char identifier; + cag_option_context context; + struct options config = { false }; + + cag_option_prepare(&context, options, CAG_ARRAY_SIZE(options), argc, argv); + while (cag_option_fetch(&context)) { + identifier = cag_option_get(&context); + switch (identifier) { + case 'a': + config.alternate_flag = true; + break; + case 'h': + printf("Usage: disassembler [OPTION] file\n"); + cag_option_print(options, CAG_ARRAY_SIZE(options), stdout); + return EXIT_SUCCESS; + } + } + + disassemble(argv[context.index], config.alternate_flag); return EXIT_SUCCESS; } diff --git a/tools/disassembler/utils.c b/tools/disassembler/utils.c new file mode 100644 index 0000000..681336a --- /dev/null +++ b/tools/disassembler/utils.c @@ -0,0 +1,52 @@ +/* + * utils.c + * + * Created on: 10 ago. 2023 + * Original Author: Emiliano Augusto Gonzalez (egonzalez . hiperion @ gmail . com) + * + * Further modified by Kayne Ruse, and added to the Toy Programming Language tool repository. + */ + +#include "stdio.h" +#include "stdlib.h" + +#include "utils.h" + +struct Node *queue_front, *queue_rear; + +void enqueue(void *x) { + struct Node *temp; + + temp = (struct Node*) malloc(sizeof(struct Node)); + temp->data = x; + temp->next = NULL; + + if (queue_front == NULL && queue_rear == NULL) { + queue_front = queue_rear = temp; + return; + } + queue_rear->next = temp; + queue_rear = temp; + +} + +void dequeue(void) { + struct Node *temp = queue_front; + + if (queue_front == NULL) { + printf("Error : QUEUE is empty!!"); + return; + } + if (queue_front == queue_rear) + queue_front = queue_rear = NULL; + + else + queue_front = queue_front->next; + + free(temp->data); + free(temp); +} + +void* front(void) { + return queue_front->data; +} diff --git a/tools/disassembler/utils.h b/tools/disassembler/utils.h new file mode 100644 index 0000000..fa804ca --- /dev/null +++ b/tools/disassembler/utils.h @@ -0,0 +1,24 @@ +/* + * utils.h + * + * Created on: 10 ago. 2023 + * Original Author: Emiliano Augusto Gonzalez (egonzalez . hiperion @ gmail . com) + * + * Further modified by Kayne Ruse, and added to the Toy Programming Language tool repository. + */ + +#ifndef UTILS_H_ +#define UTILS_H_ + +struct Node { + void *data; + struct Node *next; +}; + +extern struct Node *queue_front, *queue_rear; + +void enqueue(void *x); +void dequeue(void); +void* front(void); + +#endif /* UTILS_H_ */