From 1b0ef5d896a1652dc52a5353521d9990d481c76b Mon Sep 17 00:00:00 2001 From: IIITM-Jay Date: Tue, 17 Sep 2024 00:53:00 +0530 Subject: [PATCH 01/18] Refactored and Optimized Logic:: Parser Logic, Latex Based Output & Shared Modules --- c_utils.py | 71 +++ chisel_utils.py | 88 ++++ go_utils.py | 65 +++ latex_utils.py | 435 +++++++++++++++++ parse.py | 1182 ++++----------------------------------------- rust_utils.py | 31 ++ shared_utils.py | 547 +++++++++++++++++++++ sverilog_utils.py | 30 ++ 8 files changed, 1368 insertions(+), 1081 deletions(-) create mode 100644 c_utils.py create mode 100644 chisel_utils.py create mode 100644 go_utils.py create mode 100644 latex_utils.py create mode 100644 rust_utils.py create mode 100644 shared_utils.py create mode 100644 sverilog_utils.py diff --git a/c_utils.py b/c_utils.py new file mode 100644 index 00000000..4143625f --- /dev/null +++ b/c_utils.py @@ -0,0 +1,71 @@ +import re +import glob +import os +import pprint +import logging +import collections +import yaml +import sys +# from shared_utils import overlaps, overlap_allowed, extension_overlap_allowed, instruction_overlap_allowed, process_enc_line, same_base_isa, add_segmented_vls_insn, expand_nf_field +from shared_utils import * + +pp = pprint.PrettyPrinter(indent=2) +logging.basicConfig(level=logging.INFO, format='%(levelname)s:: %(message)s') + +def make_c(instr_dict): + mask_match_str = '' + declare_insn_str = '' + for i in instr_dict: + mask_match_str += f'#define MATCH_{i.upper().replace(".","_")} {instr_dict[i]["match"]}\n' + mask_match_str += f'#define MASK_{i.upper().replace(".","_")} {instr_dict[i]["mask"]}\n' + declare_insn_str += f'DECLARE_INSN({i.replace(".","_")}, MATCH_{i.upper().replace(".","_")}, MASK_{i.upper().replace(".","_")})\n' + + csr_names_str = '' + declare_csr_str = '' + for num, name in csrs+csrs32: + csr_names_str += f'#define CSR_{name.upper()} {hex(num)}\n' + declare_csr_str += f'DECLARE_CSR({name}, CSR_{name.upper()})\n' + + causes_str= '' + declare_cause_str = '' + for num, name in causes: + causes_str += f"#define CAUSE_{name.upper().replace(' ', '_')} {hex(num)}\n" + declare_cause_str += f"DECLARE_CAUSE(\"{name}\", CAUSE_{name.upper().replace(' ','_')})\n" + + arg_str = '' + for name, rng in arg_lut.items(): + begin = rng[1] + end = rng[0] + mask = ((1 << (end - begin + 1)) - 1) << begin + arg_str += f"#define INSN_FIELD_{name.upper().replace(' ', '_')} {hex(mask)}\n" + + with open(f'{os.path.dirname(__file__)}/encoding.h', 'r') as file: + enc_header = file.read() + + commit = os.popen('git log -1 --format="format:%h"').read() + enc_file = open('encoding.out.h','w') + enc_file.write(f'''/* SPDX-License-Identifier: BSD-3-Clause */ + +/* Copyright (c) 2023 RISC-V International */ + +/* + * This file is auto-generated by running 'make' in + * https://github.com/riscv/riscv-opcodes ({commit}) + */ + +{enc_header} +/* Automatically generated by parse_opcodes. */ +#ifndef RISCV_ENCODING_H +#define RISCV_ENCODING_H +{mask_match_str} +{csr_names_str} +{causes_str} +{arg_str}#endif +#ifdef DECLARE_INSN +{declare_insn_str}#endif +#ifdef DECLARE_CSR +{declare_csr_str}#endif +#ifdef DECLARE_CAUSE +{declare_cause_str}#endif +''') + enc_file.close() diff --git a/chisel_utils.py b/chisel_utils.py new file mode 100644 index 00000000..061eef4b --- /dev/null +++ b/chisel_utils.py @@ -0,0 +1,88 @@ +from constants import * +import copy +import re +import glob +import os +import pprint +import logging +import collections +import yaml +import sys +# from shared_utils import overlaps, overlap_allowed, extension_overlap_allowed, instruction_overlap_allowed, process_enc_line, same_base_isa, add_segmented_vls_insn, expand_nf_field +from shared_utils import * + +pp = pprint.PrettyPrinter(indent=2) +logging.basicConfig(level=logging.INFO, format='%(levelname)s:: %(message)s') + +def make_chisel(instr_dict, spinal_hdl=False): + + chisel_names='' + cause_names_str='' + csr_names_str = '' + for i in instr_dict: + if spinal_hdl: + chisel_names += f' def {i.upper().replace(".","_"):<18s} = M"b{instr_dict[i]["encoding"].replace("-","-")}"\n' + # else: + # chisel_names += f' def {i.upper().replace(".","_"):<18s} = BitPat("b{instr_dict[i]["encoding"].replace("-","?")}")\n' + if not spinal_hdl: + extensions = instr_dict_2_extensions(instr_dict) + for e in extensions: + e_instrs = filter(lambda i: instr_dict[i]['extension'][0] == e, instr_dict) + if "rv64_" in e: + e_format = e.replace("rv64_", "").upper() + "64" + elif "rv32_" in e: + e_format = e.replace("rv32_", "").upper() + "32" + elif "rv_" in e: + e_format = e.replace("rv_", "").upper() + else: + e_format = e.upper + chisel_names += f' val {e_format+"Type"} = Map(\n' + for instr in e_instrs: + tmp_instr_name = '"'+instr.upper().replace(".","_")+'"' + chisel_names += f' {tmp_instr_name:<18s} -> BitPat("b{instr_dict[instr]["encoding"].replace("-","?")}"),\n' + chisel_names += f' )\n' + + for num, name in causes: + cause_names_str += f' val {name.lower().replace(" ","_")} = {hex(num)}\n' + cause_names_str += ''' val all = { + val res = collection.mutable.ArrayBuffer[Int]() +''' + for num, name in causes: + cause_names_str += f' res += {name.lower().replace(" ","_")}\n' + cause_names_str += ''' res.toArray + }''' + + for num, name in csrs+csrs32: + csr_names_str += f' val {name} = {hex(num)}\n' + csr_names_str += ''' val all = { + val res = collection.mutable.ArrayBuffer[Int]() +''' + for num, name in csrs: + csr_names_str += f''' res += {name}\n''' + csr_names_str += ''' res.toArray + } + val all32 = { + val res = collection.mutable.ArrayBuffer(all:_*) +''' + for num, name in csrs32: + csr_names_str += f''' res += {name}\n''' + csr_names_str += ''' res.toArray + }''' + + if spinal_hdl: + chisel_file = open('inst.spinalhdl','w') + else: + chisel_file = open('inst.chisel','w') + chisel_file.write(f''' +/* Automatically generated by parse_opcodes */ +object Instructions {{ +{chisel_names} +}} +object Causes {{ +{cause_names_str} +}} +object CSRs {{ +{csr_names_str} +}} +''') + chisel_file.close() diff --git a/go_utils.py b/go_utils.py new file mode 100644 index 00000000..9c5ef2b1 --- /dev/null +++ b/go_utils.py @@ -0,0 +1,65 @@ +import re +import glob +import os +import pprint +import logging +import collections +import yaml +import sys +# from shared_utils import overlaps, overlap_allowed, extension_overlap_allowed, instruction_overlap_allowed, process_enc_line, same_base_isa, add_segmented_vls_insn, expand_nf_field +from shared_utils import * + +pp = pprint.PrettyPrinter(indent=2) +logging.basicConfig(level=logging.INFO, format='%(levelname)s:: %(message)s') + +def make_go(instr_dict): + + args = " ".join(sys.argv) + prelude = f'''// Code generated by {args}; DO NOT EDIT.''' + + prelude += ''' +package riscv + +import "cmd/internal/obj" + +type inst struct { + opcode uint32 + funct3 uint32 + rs1 uint32 + rs2 uint32 + csr int64 + funct7 uint32 +} + +func encode(a obj.As) *inst { + switch a { +''' + + endoffile = ''' } + return nil +} +''' + + instr_str = '' + for i in instr_dict: + enc_match = int(instr_dict[i]['match'],0) + opcode = (enc_match >> 0) & ((1<<7)-1) + funct3 = (enc_match >> 12) & ((1<<3)-1) + rs1 = (enc_match >> 15) & ((1<<5)-1) + rs2 = (enc_match >> 20) & ((1<<5)-1) + csr = (enc_match >> 20) & ((1<<12)-1) + funct7 = (enc_match >> 25) & ((1<<7)-1) + instr_str += f''' case A{i.upper().replace("_","")}: + return &inst{{ {hex(opcode)}, {hex(funct3)}, {hex(rs1)}, {hex(rs2)}, {signed(csr,12)}, {hex(funct7)} }} +''' + + with open('inst.go','w') as file: + file.write(prelude) + file.write(instr_str) + file.write(endoffile) + + try: + import subprocess + subprocess.run(["go", "fmt", "inst.go"]) + except: + pass \ No newline at end of file diff --git a/latex_utils.py b/latex_utils.py new file mode 100644 index 00000000..59a8bb52 --- /dev/null +++ b/latex_utils.py @@ -0,0 +1,435 @@ +#!/usr/bin/env python3 +import pprint +import logging +from constants import * +from shared_utils import create_inst_dict + +LOG_FORMAT = '%(levelname)s:: %(message)s' +LOG_LEVEL = logging.INFO + +pretty_printer = pprint.PrettyPrinter(indent=2) +logging.basicConfig(level=LOG_LEVEL, format=LOG_FORMAT) + + +def create_priv_instr_dataset(): + """Create dataset list for privileged instructions.""" + system_instr = ['_h', '_s', '_system', '_svinval', '64_h'] + return [ + (system_instr, 'Trap-Return Instructions', ['sret', 'mret'], False), + (system_instr, 'Interrupt-Management Instructions', ['wfi'], False), + (system_instr, 'Supervisor Memory-Management Instructions', ['sfence_vma'], False), + (system_instr, 'Hypervisor Memory-Management Instructions', ['hfence_vvma', 'hfence_gvma'], False), + (system_instr, 'Hypervisor Virtual-Machine Load and Store Instructions', + ['hlv_b', 'hlv_bu', 'hlv_h', 'hlv_hu', 'hlv_w', 'hlvx_hu', 'hlvx_wu', 'hsv_b', 'hsv_h', 'hsv_w'], False), + (system_instr, 'Hypervisor Virtual-Machine Load and Store Instructions, RV64 only', ['hlv_wu', 'hlv_d', 'hsv_d'], False), + (system_instr, 'Svinval Memory-Management Instructions', ['sinval_vma', 'sfence_w_inval', 'sfence_inval_ir', 'hinval_vvma', 'hinval_gvma'], False) + ] + + +def make_priv_latex_table(): + """Generate and write the LaTeX table for privileged instructions.""" + type_list = ['R-type', 'I-type'] + dataset_list = create_priv_instr_dataset() + caption = '\\caption{RISC-V Privileged Instructions}' + + with open('priv-instr-table.tex', 'w') as latex_file: + make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) + + +def make_latex_table(): + ''' + - This function is mean to create the instr-table.tex that is meant to be used + by the riscv-isa-manual. + 1. creates a single latex file of multiple table + 2. Each table limited to a single page + 3. Only the last table is assigned a latex-caption. + + - For each table, we assign a type-list that captures the different instruction types (R, I, B, etc.) required for that table. + 1. Specify the type-list to capture various instruction types (e.g., R-type, I-type, B-type). + 2. Select a list of extensions (e.g., _i, 32_i) whose instructions are necessary to populate the table. + 3. For each extension or collection of extensions, assign a title that appears as a subheading within the table (these are inlined headings, not captions). + + * All of the above information is collected/created and sent to + make_ext_latex_table function to dump out the latex contents into a file. + + * The last table only has to be given a caption - as per the policy of the + riscv-isa-manual. + ''' + # File for writing LaTeX content + with open('instr-table.tex', 'w') as latex_file: + # Prepare table configurations with type list, datasets, word size & caption + table_configurations = get_table_configurations() + + # Map each configuration from above with variables to pass as argumnet + for config in table_configurations: + # Unpack configuration dictionary into arguments for make_ext_latex_table + type_list = config['type_list'] + datasets = config['datasets'] + word_size = config['word_size'] + caption = config['caption'] + + # LaTeX table generation function + make_ext_latex_table( + type_list, + datasets, + latex_file, + word_size, + caption + ) + + +def get_table_configurations(): + ''' + Returns a list of table configurations, each specifying the type list, datasets, + word size, and caption for LaTeX table generation. + + Returns: + list: A list of dictionaries, each representing a table's configuration. + ''' + return [ + create_table_configuration( + type_list=['R-type', 'I-type', 'S-type', 'B-type', 'U-type', 'J-type'], + datasets=[ + create_dataset(['_i', '32_i'], 'RV32I Base Instruction Set', [], False), + create_dataset(['_i'], '', ['fence_tso', 'pause'], True) + ], + word_size=32 + ), + create_table_configuration( + type_list=['R-type', 'I-type', 'S-type'], + datasets=[ + create_dataset(['64_i'], 'RV64I Base Instruction Set (in addition to RV32I)', [], False), + create_dataset(['_zifencei'], 'RV32/RV64 Zifencei Standard Extension', [], False), + create_dataset(['_zicsr'], 'RV32/RV64 Zicsr Standard Extension', [], False), + create_dataset(['_m', '32_m'], 'RV32M Standard Extension', [], False), + create_dataset(['64_m'], 'RV64M Standard Extension (in addition to RV32M)', [], False) + ], + word_size=32 + ), + create_table_configuration( + type_list=['R-type'], + datasets=[ + create_dataset(['_a'], 'RV32A Standard Extension', [], False), + create_dataset(['64_a'], 'RV64A Standard Extension (in addition to RV32A)', [], False) + ], + word_size=32 + ), + create_table_configuration( + type_list=['R-type', 'R4-type', 'I-type', 'S-type'], + datasets=[ + create_dataset(['_f'], 'RV32F Standard Extension', [], False), + create_dataset(['64_f'], 'RV64F Standard Extension (in addition to RV32F)', [], False) + ], + word_size=32 + ), + create_table_configuration( + type_list=['R-type', 'R4-type', 'I-type', 'S-type'], + datasets=[ + create_dataset(['_d'], 'RV32D Standard Extension', [], False), + create_dataset(['64_d'], 'RV64D Standard Extension (in addition to RV32D)', [], False) + ], + word_size=32 + ), + create_table_configuration( + type_list=['R-type', 'R4-type', 'I-type', 'S-type'], + datasets=[ + create_dataset(['_q'], 'RV32Q Standard Extension', [], False), + create_dataset(['64_q'], 'RV64Q Standard Extension (in addition to RV32Q)', [], False) + ], + word_size=32 + ), + create_table_configuration( + type_list=['R-type', 'R4-type', 'I-type', 'S-type'], + datasets=[ + create_dataset(['_zfh', '_d_zfh', '_q_zfh'], 'RV32Zfh Standard Extension', [], False), + create_dataset(['64_zfh'], 'RV64Zfh Standard Extension (in addition to RV32Zfh)', [], False) + ], + word_size=32, + caption='\\caption{Instruction listing for RISC-V}' + ), + create_table_configuration( + type_list=[''], + datasets=[ + create_dataset(['_c', '32_c', '32_c_f', '_c_d'], 'RV32C Standard Extension', [], False), + create_dataset(['64_c'], 'RV64C Standard Extension (in addition to RV32C)', [], False) + ], + word_size=16, + caption='' + ) + ] + + +def create_table_configuration(type_list, datasets, word_size, caption=''): + ''' + Creates a table configuration dictionary with the provided parameters. + + Parameters: + type_list (list): List of instruction types to include in the table. + datasets (list of tuples): Each tuple contains: + - list_of_extensions (list): List of extension names. + - title (str): Title to appear as a subsection in the table. + - list_of_instructions (list): Specific instructions to include. + - include_pseudo_ops (bool): Whether to include pseudo-operations. + word_size (int): The word size for the instructions (32 or 16). + caption (str): The caption to include at the end of the table. + + Returns: + dict: A dictionary representing the table configuration. + ''' + return { + "type_list": type_list, + "datasets": datasets, + "word_size": word_size, + "caption": caption + } + + +def create_dataset(extensions, title, instructions, include_pseudo_ops): + ''' + Creates a dataset tuple for table configuration. + + Parameters: + extensions (list): List of extension names. + title (str): Title for the dataset. + instructions (list): List of specific instructions to include. + include_pseudo_ops (bool): Whether to include pseudo-operations. + + Returns: + tuple: A tuple representing the dataset configuration. + ''' + return (extensions, title, instructions, include_pseudo_ops) + + +def make_ext_latex_table(type_list, dataset, latex_file, ilen, caption): + ''' + For a given collection of extensions this function dumps out a complete + latex table which includes the encodings of the instructions. + + Args: + - type_list (list of str): + 1. A list of instruction types (R, I, B, etc) that are treated as header for each table. + 2. Each table will have its own requirements and type_list must include all the instruction-types that the table needs. + 3. All elements of this list must be present in the latex_inst_type dictionary defined in constants.py + + + - dataset (list of tuples): A list of 3-element tuples where each tuple consists of: + 1. list_of_extensions (list): A list of extensions whose instructions will be populated under the respective title. + 2. title (str): A title associated with the respective table. + 3. list_of_instructions (list): If not empty, only these instructions present in the corresponding extension + will be included in the table, while others will be ignored. + + - latex_file (file pointer): A file pointer to the LaTeX file where the generated table will be written. + + - ilen (int): The ilen input indicates the length of the instruction for which the table is created. + + - caption (str): The caption for the LaTeX table. + + Returns: + - None: The function writes the generated LaTeX table directly to the provided `latex_file`. + + Process: + 1. Creates table headers based on the instruction types in `type_list` using the `latex_inst_type` dictionary + from constants.py. + + 2. Iterates through each entry in the dataset to: + - Generate an exhaustive list of instructions for each dataset using `create_inst_dict`. + - Apply any instruction filters based on `list_of_instructions` to select only relevant instructions. + + 3. For each instruction, generates LaTeX table entries. + - Uses `arg_lut` from constants.py to determine the position of arguments in the encoding, and creates multicolumn + LaTeX entries for these arguments. + - Handles hardcoded bits (e.g., strings of 1s and 0s) similarly, creating multicolumn entries for continuous + strings of bits. + + 4. Writes the LaTeX table to `latex_file` with a specific format suitable for instructions of size `ilen`. + ''' + + column_size = get_column_size(ilen) + type_entries = generate_type_entries(ilen) + type_dict = get_type_dict(type_list) + + # Build the table entry with each instruction types + for inst_type, fields in type_dict.items(): + type_entries += build_instruction_type_entry(inst_type, fields, ilen) + + # Create a table for each dataset entry + content = generate_dataset_content(dataset, ilen) + + header = generate_table_header(column_size, ilen, type_entries) + endtable = generate_table_footer(caption) + + # Dump the contents to the latex file + latex_file.write(header + content + endtable) + + +def get_column_size(ilen): + """Generate the column size string based on instruction length (ilen).""" + return "".join(['p{0.002in}'] * (ilen + 1)) + + +def generate_type_entries(ilen): + """Generate the type entries section of the LaTeX table.""" + if ilen == 32: + return ''' + \\multicolumn{3}{l}{31} & + \\multicolumn{2}{r}{27} & + \\multicolumn{1}{c}{26} & + \\multicolumn{1}{r}{25} & + \\multicolumn{3}{l}{24} & + \\multicolumn{2}{r}{20} & + \\multicolumn{3}{l}{19} & + \\multicolumn{2}{r}{15} & + \\multicolumn{2}{l}{14} & + \\multicolumn{1}{r}{12} & + \\multicolumn{4}{l}{11} & + \\multicolumn{1}{r}{7} & + \\multicolumn{6}{l}{6} & + \\multicolumn{1}{r}{0} \\\\ + \\cline{2-33}\n&\n\n''' + else: + return ''' + \\multicolumn{1}{c}{15} & + \\multicolumn{1}{c}{14} & + \\multicolumn{1}{c}{13} & + \\multicolumn{1}{c}{12} & + \\multicolumn{1}{c}{11} & + \\multicolumn{1}{c}{10} & + \\multicolumn{1}{c}{9} & + \\multicolumn{1}{c}{8} & + \\multicolumn{1}{c}{7} & + \\multicolumn{1}{c}{6} & + \\multicolumn{1}{c}{5} & + \\multicolumn{1}{c}{4} & + \\multicolumn{1}{c}{3} & + \\multicolumn{1}{c}{2} & + \\multicolumn{1}{c}{1} & + \\multicolumn{1}{c}{0} \\\\ + \\cline{2-17}\n&\n\n''' + + +def get_type_dict(type_list): + """Create a subset dictionary of latex_inst_type for the given type_list.""" + return {key: value for key, value in latex_inst_type.items() if key in type_list} + + +def build_instruction_type_entry(inst_type, fields, ilen): + """Build a LaTeX table entry for each instruction type.""" + entries = [] + for field in fields['variable_fields']: + (msb, lsb) = arg_lut[field] + name = latex_mapping.get(field, field) + entries.append((msb, lsb, name)) + + return format_table_entry(entries, inst_type, ilen) + + +def format_table_entry(fields, entry_type, ilen): + """Generate formatted LaTeX table entry.""" + fields.sort(key=lambda f: f[0], reverse=True) + entry = '' + for i, (msb, lsb, name) in enumerate(fields): + col_size = msb - lsb + 1 + if i == len(fields) - 1: + entry += f'\\multicolumn{{{col_size}}}{{|c|}}{{{name}}} & {entry_type} \\\\\n' + elif i == 0: + entry += f'\\multicolumn{{{col_size}}}{{|c|}}{{{name}}} &\n' + else: + entry += f'\\multicolumn{{{col_size}}}{{c|}}{{{name}}} &\n' + return entry + f'\\cline{{2-{ilen+1}}}\n&\n\n' + + +def generate_dataset_content(dataset, ilen): + """Generate LaTeX content for each dataset entry.""" + content = '' + for ext_list, title, filter_list, include_pseudo in dataset: + instr_dict = get_instruction_dict(ext_list, include_pseudo) + filtered_list = filter_list if filter_list else list(instr_dict.keys()) + instr_entries = generate_instruction_entries(instr_dict, filtered_list, ilen) + + if title: + content += generate_dataset_title(title, ilen) + instr_entries + else: + content += instr_entries + return content + + +def get_instruction_dict(ext_list, include_pseudo): + """Create a dictionary of instructions for given extensions.""" + instr_dict = {} + for ext in ext_list: + instr_dict.update(create_inst_dict([f'rv{ext}'], include_pseudo)) + return instr_dict + + +def generate_instruction_entries(instr_dict, inst_list, ilen): + """Generate LaTeX entries for each instruction in the list.""" + instr_entries = '' + for inst in inst_list: + if inst not in instr_dict: + logging.error(f'Instruction {inst} not found in instr_dict') + raise SystemExit(1) + + fields = parse_instruction_fields(instr_dict[inst], ilen) + instr_entries += format_table_entry(fields, inst.upper().replace("_", "."), ilen) + + return instr_entries + + +def parse_instruction_fields(inst_data, ilen): + """Parse and extract fields from instruction data.""" + fields = [] + encoding = inst_data['encoding'][16:] if ilen == 16 else inst_data['encoding'] + msb = ilen - 1 + y = '' + + for i in range(ilen): + x = encoding[i] + if x == '-': + if y: + fields.append((msb, ilen - i, y)) + y = '' + msb -= 1 + else: + y += str(x) + + if i == ilen - 1 and y: + fields.append((msb, 0, y)) + + fields.sort(key=lambda f: f[0], reverse=True) + return fields + + +def generate_dataset_title(title, ilen): + """Generate LaTeX dataset title.""" + return f''' +\\multicolumn{{{ilen}}}{{c}}{{}} & \\\\ +\\multicolumn{{{ilen}}}{{c}}{{\\bf {title} }} & \\\\ +\\cline{{2-{ilen + 1}}} +''' + +def generate_table_header(column_size, ilen, type_entries): + """Generate LaTeX table header.""" + return f''' +\\newpage + +\\begin{{table}}[p] +\\begin{{small}} +\\begin{{center}} + \\begin{{tabular}} {{{column_size}l}} + {" ".join(['&'] * ilen)} \\\\ + + & +{type_entries} +''' + + +def generate_table_footer(caption): + """Generate LaTeX table footer.""" + return f''' + +\\end{{tabular}} +\\end{{center}} +\\end{{small}} +{caption} +\\end{{table}} +''' diff --git a/parse.py b/parse.py index fb0c2e70..586cd66e 100755 --- a/parse.py +++ b/parse.py @@ -1,1093 +1,113 @@ #!/usr/bin/env python3 - -from constants import * -import copy -import re -import glob -import os -import pprint -import logging import collections -import yaml +import logging import sys +import yaml +import pprint -pp = pprint.PrettyPrinter(indent=2) -logging.basicConfig(level=logging.INFO, format='%(levelname)s:: %(message)s') - -def process_enc_line(line, ext): - ''' - This function processes each line of the encoding files (rv*). As part of - the processing, the function ensures that the encoding is legal through the - following checks:: - - - there is no over specification (same bits assigned different values) - - there is no under specification (some bits not assigned values) - - bit ranges are in the format hi..lo=val where hi > lo - - value assigned is representable in the bit range - - also checks that the mapping of arguments of an instruction exists in - arg_lut. - - If the above checks pass, then the function returns a tuple of the name and - a dictionary containing basic information of the instruction which includes: - - variables: list of arguments used by the instruction whose mapping - exists in the arg_lut dictionary - - encoding: this contains the 32-bit encoding of the instruction where - '-' is used to represent position of arguments and 1/0 is used to - reprsent the static encoding of the bits - - extension: this field contains the rv* filename from which this - instruction was included - - match: hex value representing the bits that need to match to detect - this instruction - - mask: hex value representin the bits that need to be masked to extract - the value required for matching. - ''' - single_dict = {} - - # fill all bits with don't care. we use '-' to represent don't care - # TODO: hardcoded for 32-bits. - encoding = ['-'] * 32 - - # get the name of instruction by splitting based on the first space - [name, remaining] = line.split(' ', 1) - - # replace dots with underscores as dot doesn't work with C/Sverilog, etc - name = name.replace('.', '_') - - # remove leading whitespaces - remaining = remaining.lstrip() - - # check each field for it's length and overlapping bits - # ex: 1..0=5 will result in an error --> x overlapping bits - for (s2, s1, entry) in fixed_ranges.findall(remaining): - msb = int(s2) - lsb = int(s1) - - # check msb < lsb - if msb < lsb: - logging.error( - f'{line.split(" ")[0]:<10} has position {msb} less than position {lsb} in it\'s encoding' - ) - raise SystemExit(1) - - # illegal value assigned as per bit width - entry_value = int(entry, 0) - if entry_value >= (1 << (msb - lsb + 1)): - logging.error( - f'{line.split(" ")[0]:<10} has an illegal value {entry_value} assigned as per the bit width {msb - lsb}' - ) - raise SystemExit(1) - - for ind in range(lsb, msb + 1): - # overlapping bits - if encoding[31 - ind] != '-': - logging.error( - f'{line.split(" ")[0]:<10} has {ind} bit overlapping in it\'s opcodes' - ) - raise SystemExit(1) - bit = str((entry_value >> (ind - lsb)) & 1) - encoding[31 - ind] = bit - - # extract bit pattern assignments of the form hi..lo=val - remaining = fixed_ranges.sub(' ', remaining) - - # do the same as above but for = pattern. single_fixed is a regex - # expression present in constants.py - for (lsb, value, drop) in single_fixed.findall(remaining): - lsb = int(lsb, 0) - value = int(value, 0) - if encoding[31 - lsb] != '-': - logging.error( - f'{line.split(" ")[0]:<10} has {lsb} bit overlapping in it\'s opcodes' - ) - raise SystemExit(1) - encoding[31 - lsb] = str(value) - - # convert the list of encodings into a single string for match and mask - match = "".join(encoding).replace('-','0') - mask = "".join(encoding).replace('0','1').replace('-','0') - - # check if all args of the instruction are present in arg_lut present in - # constants.py - args = single_fixed.sub(' ', remaining).split() - encoding_args = encoding.copy() - for a in args: - if a not in arg_lut: - if len(parts := a.split('=')) == 2: - existing_arg, new_arg = parts - if existing_arg in arg_lut: - arg_lut[a] = arg_lut[existing_arg] - - else: - logging.error(f' Found field {existing_arg} in variable {a} in instruction {name} whose mapping in arg_lut does not exist') - raise SystemExit(1) - else: - logging.error(f' Found variable {a} in instruction {name} whose mapping in arg_lut does not exist') - raise SystemExit(1) - (msb, lsb) = arg_lut[a] - for ind in range(lsb, msb + 1): - # overlapping bits - if encoding_args[31 - ind] != '-': - logging.error(f' Found variable {a} in instruction {name} overlapping {encoding_args[31 - ind]} variable in bit {ind}') - raise SystemExit(1) - encoding_args[31 - ind] = a - - - # update the fields of the instruction as a dict and return back along with - # the name of the instruction - single_dict['encoding'] = "".join(encoding) - single_dict['variable_fields'] = args - single_dict['extension'] = [os.path.basename(ext)] - single_dict['match']=hex(int(match,2)) - single_dict['mask']=hex(int(mask,2)) - - return (name, single_dict) - -def same_base_isa(ext_name, ext_name_list): - type1 = ext_name.split("_")[0] - for ext_name1 in ext_name_list: - type2 = ext_name1.split("_")[0] - # "rv" mean insn for rv32 and rv64 - if (type1 == type2 or - (type2 == "rv" and (type1 == "rv32" or type1 == "rv64")) or - (type1 == "rv" and (type2 == "rv32" or type2 == "rv64"))): - return True - return False - -def overlaps(x, y): - x = x.rjust(len(y), '-') - y = y.rjust(len(x), '-') - - for i in range(0, len(x)): - if not (x[i] == '-' or y[i] == '-' or x[i] == y[i]): - return False - - return True - -def overlap_allowed(a, x, y): - return x in a and y in a[x] or \ - y in a and x in a[y] - -def extension_overlap_allowed(x, y): - return overlap_allowed(overlapping_extensions, x, y) - -def instruction_overlap_allowed(x, y): - return overlap_allowed(overlapping_instructions, x, y) - -def add_segmented_vls_insn(instr_dict): - updated_dict = {} - for k, v in instr_dict.items(): - if "nf" in v['variable_fields']: - for new_key, new_value in expand_nf_field(k,v): - updated_dict[new_key] = new_value - else: - updated_dict[k] = v - return updated_dict - -def expand_nf_field(name, single_dict): - if "nf" not in single_dict['variable_fields']: - logging.error(f"Cannot expand nf field for instruction {name}") - raise SystemExit(1) - - # nf no longer a variable field - single_dict['variable_fields'].remove("nf") - # include nf in mask - single_dict['mask'] = hex(int(single_dict['mask'],16) | 0b111 << 29) - - name_expand_index = name.find('e') - expanded_instructions = [] - for nf in range(0,8): - new_single_dict = copy.deepcopy(single_dict) - new_single_dict['match'] = hex(int(single_dict['match'],16) | nf << 29) - new_single_dict['encoding'] = format(nf, '03b') + single_dict['encoding'][3:] - new_name = name if nf == 0 else name[:name_expand_index] + "seg" + str(nf+1) + name[name_expand_index:] - expanded_instructions.append((new_name, new_single_dict)) - return expanded_instructions - - -def create_inst_dict(file_filter, include_pseudo=False, include_pseudo_ops=[]): - ''' - This function return a dictionary containing all instructions associated - with an extension defined by the file_filter input. The file_filter input - needs to be rv* file name with out the 'rv' prefix i.e. '_i', '32_i', etc. - - Each node of the dictionary will correspond to an instruction which again is - a dictionary. The dictionary contents of each instruction includes: - - variables: list of arguments used by the instruction whose mapping - exists in the arg_lut dictionary - - encoding: this contains the 32-bit encoding of the instruction where - '-' is used to represent position of arguments and 1/0 is used to - reprsent the static encoding of the bits - - extension: this field contains the rv* filename from which this - instruction was included - - match: hex value representing the bits that need to match to detect - this instruction - - mask: hex value representin the bits that need to be masked to extract - the value required for matching. - - In order to build this dictionary, the function does 2 passes over the same - rv file. The first pass is to extract all standard - instructions. In this pass, all pseudo ops and imported instructions are - skipped. For each selected line of the file, we call process_enc_line - function to create the above mentioned dictionary contents of the - instruction. Checks are performed in this function to ensure that the same - instruction is not added twice to the overall dictionary. - - In the second pass, this function parses only pseudo_ops. For each pseudo_op - this function checks if the dependent extension and instruction, both, exist - before parsing it. The pseudo op is only added to the overall dictionary if - the dependent instruction is not present in the dictionary, else it is - skipped. - - - ''' - opcodes_dir = os.path.dirname(os.path.realpath(__file__)) - instr_dict = {} - - # file_names contains all files to be parsed in the riscv-opcodes directory - file_names = [] - for fil in file_filter: - file_names += glob.glob(f'{opcodes_dir}/{fil}') - file_names.sort(reverse=True) - # first pass if for standard/regular instructions - logging.debug('Collecting standard instructions first') - for f in file_names: - logging.debug(f'Parsing File: {f} for standard instructions') - with open(f) as fp: - lines = (line.rstrip() - for line in fp) # All lines including the blank ones - lines = list(line for line in lines if line) # Non-blank lines - lines = list( - line for line in lines - if not line.startswith("#")) # remove comment lines - - # go through each line of the file - for line in lines: - # if the an instruction needs to be imported then go to the - # respective file and pick the line that has the instruction. - # The variable 'line' will now point to the new line from the - # imported file - - # ignore all lines starting with $import and $pseudo - if '$import' in line or '$pseudo' in line: - continue - logging.debug(f' Processing line: {line}') - - # call process_enc_line to get the data about the current - # instruction - (name, single_dict) = process_enc_line(line, f) - ext_name = os.path.basename(f) - - # if an instruction has already been added to the filtered - # instruction dictionary throw an error saying the given - # instruction is already imported and raise SystemExit - if name in instr_dict: - var = instr_dict[name]["extension"] - if same_base_isa(ext_name, var): - # disable same names on the same base ISA - err_msg = f'instruction : {name} from ' - err_msg += f'{ext_name} is already ' - err_msg += f'added from {var} in same base ISA' - logging.error(err_msg) - raise SystemExit(1) - elif instr_dict[name]['encoding'] != single_dict['encoding']: - # disable same names with different encodings on different base ISAs - err_msg = f'instruction : {name} from ' - err_msg += f'{ext_name} is already ' - err_msg += f'added from {var} but each have different encodings in different base ISAs' - logging.error(err_msg) - raise SystemExit(1) - instr_dict[name]['extension'].extend(single_dict['extension']) - else: - for key in instr_dict: - item = instr_dict[key] - if overlaps(item['encoding'], single_dict['encoding']) and \ - not extension_overlap_allowed(ext_name, item['extension'][0]) and \ - not instruction_overlap_allowed(name, key) and \ - same_base_isa(ext_name, item['extension']): - # disable different names with overlapping encodings on the same base ISA - err_msg = f'instruction : {name} in extension ' - err_msg += f'{ext_name} overlaps instruction {key} ' - err_msg += f'in extension {item["extension"]}' - logging.error(err_msg) - raise SystemExit(1) - - if name not in instr_dict: - # update the final dict with the instruction - instr_dict[name] = single_dict - - # second pass if for pseudo instructions - logging.debug('Collecting pseudo instructions now') - for f in file_names: - logging.debug(f'Parsing File: {f} for pseudo_ops') - with open(f) as fp: - lines = (line.rstrip() - for line in fp) # All lines including the blank ones - lines = list(line for line in lines if line) # Non-blank lines - lines = list( - line for line in lines - if not line.startswith("#")) # remove comment lines - - # go through each line of the file - for line in lines: - - # ignore all lines not starting with $pseudo - if '$pseudo' not in line: - continue - logging.debug(f' Processing line: {line}') - - # use the regex pseudo_regex from constants.py to find the dependent - # extension, dependent instruction, the pseudo_op in question and - # its encoding - (ext, orig_inst, pseudo_inst, line) = pseudo_regex.findall(line)[0] - ext_file = f'{opcodes_dir}/{ext}' - - # check if the file of the dependent extension exist. Throw error if - # it doesn't - if not os.path.exists(ext_file): - ext1_file = f'{opcodes_dir}/unratified/{ext}' - if not os.path.exists(ext1_file): - logging.error(f'Pseudo op {pseudo_inst} in {f} depends on {ext} which is not available') - raise SystemExit(1) - else: - ext_file = ext1_file - - # check if the dependent instruction exist in the dependent - # extension. Else throw error. - found = False - for oline in open(ext_file): - if not re.findall(f'^\\s*{orig_inst}\\s+',oline): - continue - else: - found = True - break - if not found: - logging.error(f'Orig instruction {orig_inst} not found in {ext}. Required by pseudo_op {pseudo_inst} present in {f}') - raise SystemExit(1) - - - (name, single_dict) = process_enc_line(pseudo_inst + ' ' + line, f) - # add the pseudo_op to the dictionary only if the original - # instruction is not already in the dictionary. - if orig_inst.replace('.','_') not in instr_dict \ - or include_pseudo \ - or name in include_pseudo_ops: - - # update the final dict with the instruction - if name not in instr_dict: - instr_dict[name] = single_dict - logging.debug(f' including pseudo_ops:{name}') - else: - if(single_dict['match'] != instr_dict[name]['match']): - instr_dict[name + '_pseudo'] = single_dict - - # if a pseudo instruction has already been added to the filtered - # instruction dictionary but the extension is not in the current - # list, add it - else: - ext_name = single_dict['extension'] - - if (ext_name not in instr_dict[name]['extension']) & (name + '_pseudo' not in instr_dict): - instr_dict[name]['extension'].extend(ext_name) - else: - logging.debug(f' Skipping pseudo_op {pseudo_inst} since original instruction {orig_inst} already selected in list') - - # third pass if for imported instructions - logging.debug('Collecting imported instructions') - for f in file_names: - logging.debug(f'Parsing File: {f} for imported ops') - with open(f) as fp: - lines = (line.rstrip() - for line in fp) # All lines including the blank ones - lines = list(line for line in lines if line) # Non-blank lines - lines = list( - line for line in lines - if not line.startswith("#")) # remove comment lines - - # go through each line of the file - for line in lines: - # if the an instruction needs to be imported then go to the - # respective file and pick the line that has the instruction. - # The variable 'line' will now point to the new line from the - # imported file - - # ignore all lines starting with $import and $pseudo - if '$import' not in line : - continue - logging.debug(f' Processing line: {line}') - - (import_ext, reg_instr) = imported_regex.findall(line)[0] - import_ext_file = f'{opcodes_dir}/{import_ext}' - - # check if the file of the dependent extension exist. Throw error if - # it doesn't - if not os.path.exists(import_ext_file): - ext1_file = f'{opcodes_dir}/unratified/{import_ext}' - if not os.path.exists(ext1_file): - logging.error(f'Instruction {reg_instr} in {f} cannot be imported from {import_ext}') - raise SystemExit(1) - else: - ext_file = ext1_file - else: - ext_file = import_ext_file - - # check if the dependent instruction exist in the dependent - # extension. Else throw error. - found = False - for oline in open(ext_file): - if not re.findall(f'^\\s*{reg_instr}\\s+',oline): - continue - else: - found = True - break - if not found: - logging.error(f'imported instruction {reg_instr} not found in {ext_file}. Required by {line} present in {f}') - logging.error(f'Note: you cannot import pseudo/imported ops.') - raise SystemExit(1) - - # call process_enc_line to get the data about the current - # instruction - (name, single_dict) = process_enc_line(oline, f) - - # if an instruction has already been added to the filtered - # instruction dictionary throw an error saying the given - # instruction is already imported and raise SystemExit - if name in instr_dict: - var = instr_dict[name]["extension"] - if instr_dict[name]['encoding'] != single_dict['encoding']: - err_msg = f'imported instruction : {name} in ' - err_msg += f'{os.path.basename(f)} is already ' - err_msg += f'added from {var} but each have different encodings for the same instruction' - logging.error(err_msg) - raise SystemExit(1) - instr_dict[name]['extension'].extend(single_dict['extension']) - else: - # update the final dict with the instruction - instr_dict[name] = single_dict - return instr_dict - -def make_priv_latex_table(): - latex_file = open('priv-instr-table.tex','w') - type_list = ['R-type','I-type'] - system_instr = ['_h','_s','_system','_svinval', '64_h'] - dataset_list = [ (system_instr, 'Trap-Return Instructions',['sret','mret'], False) ] - dataset_list.append((system_instr, 'Interrupt-Management Instructions',['wfi'], False)) - dataset_list.append((system_instr, 'Supervisor Memory-Management Instructions',['sfence_vma'], False)) - dataset_list.append((system_instr, 'Hypervisor Memory-Management Instructions',['hfence_vvma', 'hfence_gvma'], False)) - dataset_list.append((system_instr, 'Hypervisor Virtual-Machine Load and Store Instructions', - ['hlv_b','hlv_bu', 'hlv_h','hlv_hu', 'hlv_w', 'hlvx_hu', 'hlvx_wu', 'hsv_b', 'hsv_h','hsv_w'], False)) - dataset_list.append((system_instr, 'Hypervisor Virtual-Machine Load and Store Instructions, RV64 only', ['hlv_wu','hlv_d','hsv_d'], False)) - dataset_list.append((system_instr, 'Svinval Memory-Management Instructions', ['sinval_vma', 'sfence_w_inval','sfence_inval_ir', 'hinval_vvma','hinval_gvma'], False)) - caption = '\\caption{RISC-V Privileged Instructions}' - make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) - - latex_file.close() - -def make_latex_table(): - ''' - This function is mean to create the instr-table.tex that is meant to be used - by the riscv-isa-manual. This function basically creates a single latext - file of multiple tables with each table limited to a single page. Only the - last table is assigned a latex-caption. - - For each table we assign a type-list which capture the different instruction - types (R, I, B, etc) that will be required for the table. Then we select the - list of extensions ('_i, '32_i', etc) whose instructions are required to - populate the table. For each extension or collection of extension we can - assign Title, such that in the end they appear as subheadings within - the table (note these are inlined headings and not captions of the table). - - All of the above information is collected/created and sent to - make_ext_latex_table function to dump out the latex contents into a file. - - The last table only has to be given a caption - as per the policy of the - riscv-isa-manual. - ''' - # open the file and use it as a pointer for all further dumps - latex_file = open('instr-table.tex','w') - - # create the rv32i table first. Here we set the caption to empty. We use the - # files rv_i and rv32_i to capture instructions relevant for rv32i - # configuration. The dataset is a list of 4-element tuples : - # (list_of_extensions, title, list_of_instructions, include_pseudo_ops). If list_of_instructions - # is empty then it indicates that all instructions of the all the extensions - # in list_of_extensions need to be dumped. If not empty, then only the - # instructions listed in list_of_instructions will be dumped into latex. - caption = '' - type_list = ['R-type','I-type','S-type','B-type','U-type','J-type'] - dataset_list = [(['_i','32_i'], 'RV32I Base Instruction Set', [], False)] - dataset_list.append((['_i'], '', ['fence_tso','pause'], True)) - make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) - - type_list = ['R-type','I-type','S-type'] - dataset_list = [(['64_i'], 'RV64I Base Instruction Set (in addition to RV32I)', [], False)] - dataset_list.append((['_zifencei'], 'RV32/RV64 Zifencei Standard Extension', [], False)) - dataset_list.append((['_zicsr'], 'RV32/RV64 Zicsr Standard Extension', [], False)) - dataset_list.append((['_m','32_m'], 'RV32M Standard Extension', [], False)) - dataset_list.append((['64_m'],'RV64M Standard Extension (in addition to RV32M)', [], False)) - make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) - - type_list = ['R-type'] - dataset_list = [(['_a'],'RV32A Standard Extension', [], False)] - dataset_list.append((['64_a'],'RV64A Standard Extension (in addition to RV32A)', [], False)) - make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) - - type_list = ['R-type','R4-type','I-type','S-type'] - dataset_list = [(['_f'],'RV32F Standard Extension', [], False)] - dataset_list.append((['64_f'],'RV64F Standard Extension (in addition to RV32F)', [], False)) - make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) - - type_list = ['R-type','R4-type','I-type','S-type'] - dataset_list = [(['_d'],'RV32D Standard Extension', [], False)] - dataset_list.append((['64_d'],'RV64D Standard Extension (in addition to RV32D)', [], False)) - make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) - - type_list = ['R-type','R4-type','I-type','S-type'] - dataset_list = [(['_q'],'RV32Q Standard Extension', [], False)] - dataset_list.append((['64_q'],'RV64Q Standard Extension (in addition to RV32Q)', [], False)) - make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) - - caption = '\\caption{Instruction listing for RISC-V}' - type_list = ['R-type','R4-type','I-type','S-type'] - dataset_list = [(['_zfh', '_d_zfh','_q_zfh'],'RV32Zfh Standard Extension', [], False)] - dataset_list.append((['64_zfh'],'RV64Zfh Standard Extension (in addition to RV32Zfh)', [], False)) - make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) - - ## The following is demo to show that Compressed instructions can also be - # dumped in the same manner as above - - #type_list = [''] - #dataset_list = [(['_c', '32_c', '32_c_f','_c_d'],'RV32C Standard Extension', [])] - #dataset_list.append((['64_c'],'RV64C Standard Extension (in addition to RV32C)', [])) - #make_ext_latex_table(type_list, dataset_list, latex_file, 16, caption) - - latex_file.close() - -def make_ext_latex_table(type_list, dataset, latex_file, ilen, caption): - ''' - For a given collection of extensions this function dumps out a complete - latex table which includes the encodings of the instructions. - - The ilen input indicates the length of the instruction for which the table - is created. - - The caption input is used to create the latex-table caption. - - The type_list input is a list of instruction types (R, I, B, etc) that are - treated as header for each table. Each table will have its own requirements - and type_list must include all the instruction-types that the table needs. - Note, all elements of this list must be present in the latex_inst_type - dictionary defined in constants.py - - The latex_file is a file pointer to which the latex-table will dumped into - - The dataset is a list of 3-element tuples containing: - (list_of_extensions, title, list_of_instructions) - The list_of_extensions must contain all the set of extensions whose - instructions must be populated under a given title. If list_of_instructions - is not empty, then only those instructions mentioned in list_of_instructions - present in the extension will be dumped into the latex-table, other - instructions will be ignored. - - Once the above inputs are received then function first creates table entries - for the instruction types. To simplify things, we maintain a dictionary - called latex_inst_type in constants.py which is created in the same way the - instruction dictionary is created. This allows us to re-use the same logic - to create the instruction types table as well - - Once the header is created, we then parse through every entry in the - dataset. For each list dataset entry we use the create_inst_dict function to - create an exhaustive list of instructions associated with the respective - collection of the extension of that dataset. Then we apply the instruction - filter, if any, indicated by the list_of_instructions of that dataset. - Thereon, for each instruction we create a latex table entry. - - Latex table specification for ilen sized instructions: - Each table is created with ilen+1 columns - ilen columns for each bit of the - instruction and one column to hold the name of the instruction. - - For each argument of an instruction we use the arg_lut from constants.py - to identify its position in the encoding, and thus create a multicolumn - entry with the name of the argument as the data. For hardcoded bits, we - do the same where we capture a string of continuous 1s and 0s, identify - the position and assign the same string as the data of the - multicolumn entry in the table. - - ''' - column_size = "".join(['p{0.002in}']*(ilen+1)) - - type_entries = ''' - \\multicolumn{3}{l}{31} & - \\multicolumn{2}{r}{27} & - \\multicolumn{1}{c}{26} & - \\multicolumn{1}{r}{25} & - \\multicolumn{3}{l}{24} & - \\multicolumn{2}{r}{20} & - \\multicolumn{3}{l}{19} & - \\multicolumn{2}{r}{15} & - \\multicolumn{2}{l}{14} & - \\multicolumn{1}{r}{12} & - \\multicolumn{4}{l}{11} & - \\multicolumn{1}{r}{7} & - \\multicolumn{6}{l}{6} & - \\multicolumn{1}{r}{0} \\\\ - \\cline{2-33}\n&\n\n -''' if ilen == 32 else ''' - \\multicolumn{1}{c}{15} & - \\multicolumn{1}{c}{14} & - \\multicolumn{1}{c}{13} & - \\multicolumn{1}{c}{12} & - \\multicolumn{1}{c}{11} & - \\multicolumn{1}{c}{10} & - \\multicolumn{1}{c}{9} & - \\multicolumn{1}{c}{8} & - \\multicolumn{1}{c}{7} & - \\multicolumn{1}{c}{6} & - \\multicolumn{1}{c}{5} & - \\multicolumn{1}{c}{4} & - \\multicolumn{1}{c}{3} & - \\multicolumn{1}{c}{2} & - \\multicolumn{1}{c}{1} & - \\multicolumn{1}{c}{0} \\\\ - \\cline{2-17}\n&\n\n -''' - - # depending on the type_list input we create a subset dictionary of - # latex_inst_type dictionary present in constants.py - type_dict = {key: value for key, value in latex_inst_type.items() if key in type_list} - - # iterate ovr each instruction type and create a table entry - for t in type_dict: - fields = [] - - # first capture all "arguments" of the type (funct3, funct7, rd, etc) - # and capture their positions using arg_lut. - for f in type_dict[t]['variable_fields']: - (msb, lsb) = arg_lut[f] - name = f if f not in latex_mapping else latex_mapping[f] - fields.append((msb, lsb, name)) - - # iterate through the 32 bits, starting from the msb, and assign - # argument names to the relevant portions of the instructions. This - # information is stored as a 3-element tuple containing the msb, lsb - # position of the arugment and the name of the argument. - msb = ilen - 1 - y = '' - for r in range(0,ilen): - if y != '': - fields.append((msb,ilen-1-r+1,y)) - y = '' - msb = ilen-1-r-1 - if r == 31: - if y != '': - fields.append((msb, 0, y)) - y = '' - - # sort the arguments in decreasing order of msb position - fields.sort(key=lambda y: y[0], reverse=True) - - # for each argument/string of 1s or 0s, create a multicolumn latex table - # entry - entry = '' - for r in range(len(fields)): - (msb, lsb, name) = fields[r] - if r == len(fields)-1: - entry += f'\\multicolumn{{{msb - lsb + 1}}}{{|c|}}{{{name}}} & {t} \\\\\n' - elif r == 0: - entry += f'\\multicolumn{{{msb - lsb + 1}}}{{|c|}}{{{name}}} &\n' - else: - entry += f'\\multicolumn{{{msb - lsb + 1}}}{{c|}}{{{name}}} &\n' - entry += f'\\cline{{2-{ilen+1}}}\n&\n\n' - type_entries += entry - - # for each entry in the dataset create a table - content = '' - for (ext_list, title, filter_list, include_pseudo) in dataset: - instr_dict = {} - - # for all extensions list in ext_list, create a dictionary of - # instructions associated with those extensions. - for e in ext_list: - instr_dict.update(create_inst_dict(['rv'+e], include_pseudo)) - - # if filter_list is not empty then use that as the official set of - # instructions that need to be dumped into the latex table - inst_list = list(instr_dict.keys()) if not filter_list else filter_list - - # for each instruction create an latex table entry just like how we did - # above with the instruction-type table. - instr_entries = '' - for inst in inst_list: - if inst not in instr_dict: - logging.error(f'in make_ext_latex_table: Instruction: {inst} not found in instr_dict') - raise SystemExit(1) - fields = [] - - # only if the argument is available in arg_lut we consume it, else - # throw error. - for f in instr_dict[inst]['variable_fields']: - if f not in arg_lut: - logging.error(f'Found variable {f} in instruction {inst} whose mapping is not available') - raise SystemExit(1) - (msb,lsb) = arg_lut[f] - name = f.replace('_','.') if f not in latex_mapping else latex_mapping[f] - fields.append((msb, lsb, name)) - - msb = ilen -1 - y = '' - if ilen == 16: - encoding = instr_dict[inst]['encoding'][16:] - else: - encoding = instr_dict[inst]['encoding'] - for r in range(0,ilen): - x = encoding [r] - if ((msb, ilen-1-r+1)) in latex_fixed_fields: - fields.append((msb,ilen-1-r+1,y)) - msb = ilen-1-r - y = '' - if x == '-': - if y != '': - fields.append((msb,ilen-1-r+1,y)) - y = '' - msb = ilen-1-r-1 - else: - y += str(x) - if r == ilen-1: - if y != '': - fields.append((msb, 0, y)) - y = '' - - fields.sort(key=lambda y: y[0], reverse=True) - entry = '' - for r in range(len(fields)): - (msb, lsb, name) = fields[r] - if r == len(fields)-1: - entry += f'\\multicolumn{{{msb - lsb + 1}}}{{|c|}}{{{name}}} & {inst.upper().replace("_",".")} \\\\\n' - elif r == 0: - entry += f'\\multicolumn{{{msb - lsb + 1}}}{{|c|}}{{{name}}} &\n' +from constants import * +from shared_utils import create_inst_dict, add_segmented_vls_insn +from latex_utils import make_latex_table, make_priv_latex_table +from chisel_utils import make_chisel +from rust_utils import make_rust +from sverilog_utils import make_sverilog +from c_utils import make_c +from go_utils import make_go + +LOG_FORMAT = '%(levelname)s:: %(message)s' +LOG_LEVEL = logging.INFO + +pretty_printer = pprint.PrettyPrinter(indent=2) +logging.basicConfig(level=LOG_LEVEL, format=LOG_FORMAT) + +def remove_non_extensions(args): + """ + Removes non-extension flags from the command-line arguments. + """ + extensions = args[1:] + flags = ['-c', '-latex', '-chisel', '-sverilog', '-rust', '-go', '-spinalhdl'] + return [ext for ext in extensions if ext not in flags] + +def process_instruction_dict(extensions, include_pseudo): + """ + Processes the instruction dictionary by creating and adding segmented instructions. + """ + instr_dict = create_inst_dict(extensions, include_pseudo) + instr_dict = add_segmented_vls_insn(instr_dict) + return collections.OrderedDict(sorted(instr_dict.items())) + +def write_yaml(instr_dict, filename='instr_dict.yaml'): + """ + Writes the instruction dictionary to a YAML file. + """ + with open(filename, 'w') as outfile: + yaml.dump(instr_dict, outfile, default_flow_style=False) + +def generate_outputs(instr_dict, extensions): + """ + Generates output files based on selected extensions and flags. + """ + # Dictionary to map extensions to their respective functions and logging messages + extension_map = { + '-c': { + 'function': lambda: make_c(collections.OrderedDict(sorted(create_inst_dict(extensions, False, include_pseudo_ops=emitted_pseudo_ops).items()))), + 'message': 'encoding.out.h generated successfully' + }, + '-chisel': { + 'function': lambda: make_chisel(instr_dict), + 'message': 'inst.chisel generated successfully' + }, + '-spinalhdl': { + 'function': lambda: make_chisel(instr_dict, spinal_hdl=True), + 'message': 'inst.spinalhdl generated successfully' + }, + '-sverilog': { + 'function': lambda: make_sverilog(instr_dict), + 'message': 'inst.sverilog generated successfully' + }, + '-rust': { + 'function': lambda: make_rust(instr_dict), + 'message': 'inst.rs generated successfully' + }, + '-go': { + 'function': lambda: make_go(instr_dict), + 'message': 'inst.go generated successfully' + }, + '-latex': { + 'function': lambda: (make_latex_table(), make_priv_latex_table()), + 'message': [ + 'instr-table.tex generated successfully', + 'priv-instr-table.tex generated successfully' + ] + } + } + + for ext, actions in extension_map.items(): + if ext in extensions: + try: + actions['function']() + if isinstance(actions['message'], list): + for msg in actions['message']: + logging.info(msg) else: - entry += f'\\multicolumn{{{msb - lsb + 1}}}{{c|}}{{{name}}} &\n' - entry += f'\\cline{{2-{ilen+1}}}\n&\n\n' - instr_entries += entry - - # once an entry of the dataset is completed we create the whole table - # with the title of that dataset as sub-heading (sort-of) - if title != '': - content += f''' - -\\multicolumn{{{ilen}}}{{c}}{{}} & \\\\ -\\multicolumn{{{ilen}}}{{c}}{{\\bf {title} }} & \\\\ -\\cline{{2-{ilen+1}}} - - & -{instr_entries} -''' - else: - content += f''' -{instr_entries} -''' - - - header = f''' -\\newpage - -\\begin{{table}}[p] -\\begin{{small}} -\\begin{{center}} - \\begin{{tabular}} {{{column_size}l}} - {" ".join(['&']*ilen)} \\\\ + logging.info(actions['message']) - & -{type_entries} -''' - endtable=f''' + except Exception as e: + logging.error(f"Error generating output for {ext}: {e}") -\\end{{tabular}} -\\end{{center}} -\\end{{small}} -{caption} -\\end{{table}} -''' - # dump the contents and return - latex_file.write(header+content+endtable) - -def instr_dict_2_extensions(instr_dict): - extensions = [] - for item in instr_dict.values(): - if item['extension'][0] not in extensions: - extensions.append(item['extension'][0]) - return extensions - -def make_chisel(instr_dict, spinal_hdl=False): - - chisel_names='' - cause_names_str='' - csr_names_str = '' - for i in instr_dict: - if spinal_hdl: - chisel_names += f' def {i.upper().replace(".","_"):<18s} = M"b{instr_dict[i]["encoding"].replace("-","-")}"\n' - # else: - # chisel_names += f' def {i.upper().replace(".","_"):<18s} = BitPat("b{instr_dict[i]["encoding"].replace("-","?")}")\n' - if not spinal_hdl: - extensions = instr_dict_2_extensions(instr_dict) - for e in extensions: - e_instrs = filter(lambda i: instr_dict[i]['extension'][0] == e, instr_dict) - if "rv64_" in e: - e_format = e.replace("rv64_", "").upper() + "64" - elif "rv32_" in e: - e_format = e.replace("rv32_", "").upper() + "32" - elif "rv_" in e: - e_format = e.replace("rv_", "").upper() - else: - e_format = e.upper - chisel_names += f' val {e_format+"Type"} = Map(\n' - for instr in e_instrs: - tmp_instr_name = '"'+instr.upper().replace(".","_")+'"' - chisel_names += f' {tmp_instr_name:<18s} -> BitPat("b{instr_dict[instr]["encoding"].replace("-","?")}"),\n' - chisel_names += f' )\n' - - for num, name in causes: - cause_names_str += f' val {name.lower().replace(" ","_")} = {hex(num)}\n' - cause_names_str += ''' val all = { - val res = collection.mutable.ArrayBuffer[Int]() -''' - for num, name in causes: - cause_names_str += f' res += {name.lower().replace(" ","_")}\n' - cause_names_str += ''' res.toArray - }''' - - for num, name in csrs+csrs32: - csr_names_str += f' val {name} = {hex(num)}\n' - csr_names_str += ''' val all = { - val res = collection.mutable.ArrayBuffer[Int]() -''' - for num, name in csrs: - csr_names_str += f''' res += {name}\n''' - csr_names_str += ''' res.toArray - } - val all32 = { - val res = collection.mutable.ArrayBuffer(all:_*) -''' - for num, name in csrs32: - csr_names_str += f''' res += {name}\n''' - csr_names_str += ''' res.toArray - }''' - - if spinal_hdl: - chisel_file = open('inst.spinalhdl','w') - else: - chisel_file = open('inst.chisel','w') - chisel_file.write(f''' -/* Automatically generated by parse_opcodes */ -object Instructions {{ -{chisel_names} -}} -object Causes {{ -{cause_names_str} -}} -object CSRs {{ -{csr_names_str} -}} -''') - chisel_file.close() - -def make_rust(instr_dict): - mask_match_str= '' - for i in instr_dict: - mask_match_str += f'const MATCH_{i.upper().replace(".","_")}: u32 = {(instr_dict[i]["match"])};\n' - mask_match_str += f'const MASK_{i.upper().replace(".","_")}: u32 = {(instr_dict[i]["mask"])};\n' - for num, name in csrs+csrs32: - mask_match_str += f'const CSR_{name.upper()}: u16 = {hex(num)};\n' - for num, name in causes: - mask_match_str += f'const CAUSE_{name.upper().replace(" ","_")}: u8 = {hex(num)};\n' - rust_file = open('inst.rs','w') - rust_file.write(f''' -/* Automatically generated by parse_opcodes */ -{mask_match_str} -''') - rust_file.close() - -def make_sverilog(instr_dict): - names_str = '' - for i in instr_dict: - names_str += f" localparam [31:0] {i.upper().replace('.','_'):<18s} = 32'b{instr_dict[i]['encoding'].replace('-','?')};\n" - names_str += ' /* CSR Addresses */\n' - for num, name in csrs+csrs32: - names_str += f" localparam logic [11:0] CSR_{name.upper()} = 12'h{hex(num)[2:]};\n" - - sverilog_file = open('inst.sverilog','w') - sverilog_file.write(f''' -/* Automatically generated by parse_opcodes */ -package riscv_instr; -{names_str} -endpackage -''') - sverilog_file.close() -def make_c(instr_dict): - mask_match_str = '' - declare_insn_str = '' - for i in instr_dict: - mask_match_str += f'#define MATCH_{i.upper().replace(".","_")} {instr_dict[i]["match"]}\n' - mask_match_str += f'#define MASK_{i.upper().replace(".","_")} {instr_dict[i]["mask"]}\n' - declare_insn_str += f'DECLARE_INSN({i.replace(".","_")}, MATCH_{i.upper().replace(".","_")}, MASK_{i.upper().replace(".","_")})\n' - - csr_names_str = '' - declare_csr_str = '' - for num, name in csrs+csrs32: - csr_names_str += f'#define CSR_{name.upper()} {hex(num)}\n' - declare_csr_str += f'DECLARE_CSR({name}, CSR_{name.upper()})\n' - - causes_str= '' - declare_cause_str = '' - for num, name in causes: - causes_str += f"#define CAUSE_{name.upper().replace(' ', '_')} {hex(num)}\n" - declare_cause_str += f"DECLARE_CAUSE(\"{name}\", CAUSE_{name.upper().replace(' ','_')})\n" - - arg_str = '' - for name, rng in arg_lut.items(): - begin = rng[1] - end = rng[0] - mask = ((1 << (end - begin + 1)) - 1) << begin - arg_str += f"#define INSN_FIELD_{name.upper().replace(' ', '_')} {hex(mask)}\n" - - with open(f'{os.path.dirname(__file__)}/encoding.h', 'r') as file: - enc_header = file.read() - - commit = os.popen('git log -1 --format="format:%h"').read() - enc_file = open('encoding.out.h','w') - enc_file.write(f'''/* SPDX-License-Identifier: BSD-3-Clause */ - -/* Copyright (c) 2023 RISC-V International */ - -/* - * This file is auto-generated by running 'make' in - * https://github.com/riscv/riscv-opcodes ({commit}) - */ - -{enc_header} -/* Automatically generated by parse_opcodes. */ -#ifndef RISCV_ENCODING_H -#define RISCV_ENCODING_H -{mask_match_str} -{csr_names_str} -{causes_str} -{arg_str}#endif -#ifdef DECLARE_INSN -{declare_insn_str}#endif -#ifdef DECLARE_CSR -{declare_csr_str}#endif -#ifdef DECLARE_CAUSE -{declare_cause_str}#endif -''') - enc_file.close() - -def make_go(instr_dict): - - args = " ".join(sys.argv) - prelude = f'''// Code generated by {args}; DO NOT EDIT.''' - - prelude += ''' -package riscv - -import "cmd/internal/obj" - -type inst struct { - opcode uint32 - funct3 uint32 - rs1 uint32 - rs2 uint32 - csr int64 - funct7 uint32 -} - -func encode(a obj.As) *inst { - switch a { -''' - - endoffile = ''' } - return nil -} -''' - - instr_str = '' - for i in instr_dict: - enc_match = int(instr_dict[i]['match'],0) - opcode = (enc_match >> 0) & ((1<<7)-1) - funct3 = (enc_match >> 12) & ((1<<3)-1) - rs1 = (enc_match >> 15) & ((1<<5)-1) - rs2 = (enc_match >> 20) & ((1<<5)-1) - csr = (enc_match >> 20) & ((1<<12)-1) - funct7 = (enc_match >> 25) & ((1<<7)-1) - instr_str += f''' case A{i.upper().replace("_","")}: - return &inst{{ {hex(opcode)}, {hex(funct3)}, {hex(rs1)}, {hex(rs2)}, {signed(csr,12)}, {hex(funct7)} }} -''' - - with open('inst.go','w') as file: - file.write(prelude) - file.write(instr_str) - file.write(endoffile) - - try: - import subprocess - subprocess.run(["go", "fmt", "inst.go"]) - except: - pass - -def signed(value, width): - if 0 <= value < (1<<(width-1)): - return value - else: - return value - (1< lo + - value assigned is representable in the bit range + - also checks that the mapping of arguments of an instruction exists in + arg_lut. + + If the above checks pass, then the function returns a tuple of the name and + a dictionary containing basic information of the instruction which includes: + - variables: list of arguments used by the instruction whose mapping + exists in the arg_lut dictionary + - encoding: this contains the 32-bit encoding of the instruction where + '-' is used to represent position of arguments and 1/0 is used to + reprsent the static encoding of the bits + - extension: this field contains the rv* filename from which this + instruction was included + - match: hex value representing the bits that need to match to detect + this instruction + - mask: hex value representin the bits that need to be masked to extract + the value required for matching. + ''' + encoding = initialize_encoding() + name, remaining = parse_instruction_name(line) + + # Fixed ranges of the form hi..lo=val + process_fixed_ranges(remaining, encoding, line) + + # Single fixed values of the form = + remaining = process_single_fixed(remaining, encoding, line) + + # Create match and mask strings + match, mask = create_match_and_mask(encoding) + + # Process instruction arguments + args = process_arguments(remaining, encoding, name) + + # Create and return the final instruction dictionary + instruction_dict = create_instruction_dict(encoding, args, ext, match, mask) + + return name, instruction_dict + + +def initialize_encoding(): + """Initialize a 32-bit encoding with '-' representing 'don't care'.""" + return ['-'] * 32 + + +def parse_instruction_name(line): + """Extract the instruction name and remaining part of the line.""" + name, remaining = line.split(' ', 1) + name = name.replace('.', '_').lstrip() + return name, remaining + + +def process_fixed_ranges(remaining, encoding, line): + """Process bit ranges of the form hi..lo=val, checking for errors and updating encoding.""" + for s2, s1, entry in fixed_ranges.findall(remaining): + msb, lsb = int(s2), int(s1) + validate_bit_range(msb, lsb, line) + validate_entry_value(msb, lsb, entry, line) + update_encoding(msb, lsb, entry, encoding, line) + + +def validate_bit_range(msb, lsb, line): + """Ensure that msb > lsb and raise an error if not.""" + if msb < lsb: + log_and_exit(f"{get_instruction_name(line)} has msb < lsb in its encoding") + + +def validate_entry_value(msb, lsb, entry, line): + """Ensure that the value assigned to a bit range is legal for its width.""" + entry_value = int(entry, 0) + if entry_value >= (1 << (msb - lsb + 1)): + log_and_exit(f"{get_instruction_name(line)} has an illegal value for the bit width {msb - lsb}") + + +def update_encoding(msb, lsb, entry, encoding, line): + """Update the encoding array for a given bit range.""" + entry_value = int(entry, 0) + for ind in range(lsb, msb + 1): + if encoding[31 - ind] != '-': + log_and_exit(f"{get_instruction_name(line)} has overlapping bits in its opcodes") + encoding[31 - ind] = str((entry_value >> (ind - lsb)) & 1) + + +def process_single_fixed(remaining, encoding, line): + """Process single fixed values of the form =.""" + for lsb, value, _ in single_fixed.findall(remaining): + lsb = int(lsb, 0) + value = int(value, 0) + if encoding[31 - lsb] != '-': + log_and_exit(f"{get_instruction_name(line)} has overlapping bits in its opcodes") + encoding[31 - lsb] = str(value) + return fixed_ranges.sub(' ', remaining) + + +def create_match_and_mask(encoding): + """Generate match and mask strings from the encoding array.""" + match = ''.join(encoding).replace('-', '0') + mask = ''.join(encoding).replace('0', '1').replace('-', '0') + return match, mask + + +def process_arguments(remaining, encoding, name): + """Process instruction arguments and update the encoding with argument positions.""" + args = single_fixed.sub(' ', remaining).split() + encoding_args = encoding.copy() + for arg in args: + if arg not in arg_lut: + handle_missing_arg(arg, name) + msb, lsb = arg_lut[arg] + update_arg_encoding(msb, lsb, arg, encoding_args, name) + return args, encoding_args + + +def handle_missing_arg(arg, name): + """Handle missing argument mapping in arg_lut.""" + if '=' in arg and (existing_arg := arg.split('=')[0]) in arg_lut: + arg_lut[arg] = arg_lut[existing_arg] + else: + log_and_exit(f"Variable {arg} in instruction {name} not mapped in arg_lut") + + +def update_arg_encoding(msb, lsb, arg, encoding_args, name): + """Update the encoding array with the argument positions.""" + for ind in range(lsb, msb + 1): + if encoding_args[31 - ind] != '-': + log_and_exit(f"Variable {arg} overlaps in bit {ind} in instruction {name}") + encoding_args[31 - ind] = arg + + +def create_instruction_dict(encoding, args, ext, match, mask): + """Create the final dictionary for the instruction.""" + return { + 'encoding': ''.join(encoding), + 'variable_fields': args, + 'extension': [os.path.basename(ext)], + 'match': hex(int(match, 2)), + 'mask': hex(int(mask, 2)), + } + + +def log_and_exit(message): + """Log an error message and exit the program.""" + logging.error(message) + raise SystemExit(1) + + +def get_instruction_name(line): + """Helper to extract the instruction name from a line.""" + return line.split(' ')[0] + +def overlaps(x, y): + """ + Check if two bit strings overlap without conflicts. + + Args: + x (str): First bit string. + y (str): Second bit string. + + Returns: + bool: True if the bit strings overlap without conflicts, False otherwise. + + In the context of RISC-V opcodes, this function ensures that the bit ranges + defined by two different bit strings do not conflict. + """ + + # Minimum length of the two strings + min_len = min(len(x), len(y)) + + for char_x, char_y in zip(x[:min_len], y[:min_len]): + if char_x != '-' and char_y != '-' and char_x != char_y: + return False + + return True + + +def overlap_allowed(a, x, y): + """ + Check if there is an overlap between keys and values in a dictionary. + + Args: + a (dict): The dictionary where keys are mapped to sets or lists of keys. + x (str): The first key to check. + y (str): The second key to check. + + Returns: + bool: True if both (x, y) or (y, x) are present in the dictionary + as described, False otherwise. + + This function determines if `x` is a key in the dictionary `a` and + its corresponding value contains `y`, or if `y` is a key and its + corresponding value contains `x`. + """ + + return x in a and y in a[x] or \ + y in a and x in a[y] + + +# Checks if overlap between two extensions is allowed +def extension_overlap_allowed(x, y): + return overlap_allowed(overlapping_extensions, x, y) + + +# Checks if overlap between two instructions is allowed +def instruction_overlap_allowed(x, y): + return overlap_allowed(overlapping_instructions, x, y) + + +# Checks if ext_name shares the same base ISA with any in ext_name_list +def same_base_isa(ext_name, ext_name_list): + type1 = ext_name.split("_")[0] + for ext_name1 in ext_name_list: + type2 = ext_name1.split("_")[0] + if type1 == type2 or \ + (type2 == "rv" and type1 in ["rv32", "rv64"]) or \ + (type1 == "rv" and type2 in ["rv32", "rv64"]): + return True + return False + + +# Expands instructions with "nf" field in variable_fields, otherwise returns unchanged +def add_segmented_vls_insn(instr_dict): + updated_dict = {} + for k, v in instr_dict.items(): + if "nf" in v['variable_fields']: + updated_dict.update(expand_nf_field(k, v)) + else: + updated_dict[k] = v + return updated_dict + + +# Expands nf field in instruction name and updates instruction details +def expand_nf_field(name, single_dict): + if "nf" not in single_dict['variable_fields']: + logging.error(f"Cannot expand nf field for instruction {name}") + raise SystemExit(1) + + single_dict['variable_fields'].remove("nf") # Remove "nf" from variable fields + single_dict['mask'] = hex(int(single_dict['mask'], 16) | (0b111 << 29)) # Update mask + + name_expand_index = name.find('e') + expanded_instructions = [] + for nf in range(8): # Expand nf for values 0 to 7 + new_single_dict = copy.deepcopy(single_dict) + new_single_dict['match'] = hex(int(single_dict['match'], 16) | (nf << 29)) + new_single_dict['encoding'] = format(nf, '03b') + single_dict['encoding'][3:] + new_name = name if nf == 0 else f"{name[:name_expand_index]}seg{nf+1}{name[name_expand_index:]}" + expanded_instructions.append((new_name, new_single_dict)) + return expanded_instructions + + +# Extracts the extensions used in an instruction dictionary +def instr_dict_2_extensions(instr_dict): + return list({item['extension'][0] for item in instr_dict.values()}) + + +# Returns signed interpretation of a value within a given width +def signed(value, width): + return value if 0 <= value < (1 << (width - 1)) else value - (1 << width) + + +def create_inst_dict(file_filter, include_pseudo=False, include_pseudo_ops=[]): + ''' + This function return a dictionary containing all instructions associated + with an extension defined by the file_filter input. The file_filter input + needs to be rv* file name with out the 'rv' prefix i.e. '_i', '32_i', etc. + + Each node of the dictionary will correspond to an instruction which again is + a dictionary. The dictionary contents of each instruction includes: + - variables: list of arguments used by the instruction whose mapping + exists in the arg_lut dictionary + - encoding: this contains the 32-bit encoding of the instruction where + '-' is used to represent position of arguments and 1/0 is used to + reprsent the static encoding of the bits + - extension: this field contains the rv* filename from which this + instruction was included + - match: hex value representing the bits that need to match to detect + this instruction + - mask: hex value representin the bits that need to be masked to extract + the value required for matching. + + In order to build this dictionary, the function does 2 passes over the same + rv file. The first pass is to extract all standard + instructions. In this pass, all pseudo ops and imported instructions are + skipped. For each selected line of the file, we call process_enc_line + function to create the above mentioned dictionary contents of the + instruction. Checks are performed in this function to ensure that the same + instruction is not added twice to the overall dictionary. + + In the second pass, this function parses only pseudo_ops. For each pseudo_op + this function checks if the dependent extension and instruction, both, exist + before parsing it. The pseudo op is only added to the overall dictionary if + the dependent instruction is not present in the dictionary, else it is + skipped. + + + ''' + opcodes_dir = os.path.dirname(os.path.realpath(__file__)) + instr_dict = {} + + # file_names contains all files to be parsed in the riscv-opcodes directory + file_names = [] + for fil in file_filter: + file_names += glob.glob(f'{opcodes_dir}/{fil}') + file_names.sort(reverse=True) + # first pass if for standard/regular instructions + logging.debug('Collecting standard instructions first') + for f in file_names: + logging.debug(f'Parsing File: {f} for standard instructions') + with open(f) as fp: + lines = (line.rstrip() + for line in fp) # All lines including the blank ones + lines = list(line for line in lines if line) # Non-blank lines + lines = list( + line for line in lines + if not line.startswith("#")) # remove comment lines + + # go through each line of the file + for line in lines: + # if the an instruction needs to be imported then go to the + # respective file and pick the line that has the instruction. + # The variable 'line' will now point to the new line from the + # imported file + + # ignore all lines starting with $import and $pseudo + if '$import' in line or '$pseudo' in line: + continue + logging.debug(f' Processing line: {line}') + + # call process_enc_line to get the data about the current + # instruction + (name, single_dict) = process_enc_line(line, f) + ext_name = os.path.basename(f) + + # if an instruction has already been added to the filtered + # instruction dictionary throw an error saying the given + # instruction is already imported and raise SystemExit + if name in instr_dict: + var = instr_dict[name]["extension"] + if same_base_isa(ext_name, var): + # disable same names on the same base ISA + err_msg = f'instruction : {name} from ' + err_msg += f'{ext_name} is already ' + err_msg += f'added from {var} in same base ISA' + logging.error(err_msg) + raise SystemExit(1) + elif instr_dict[name]['encoding'] != single_dict['encoding']: + # disable same names with different encodings on different base ISAs + err_msg = f'instruction : {name} from ' + err_msg += f'{ext_name} is already ' + err_msg += f'added from {var} but each have different encodings in different base ISAs' + logging.error(err_msg) + raise SystemExit(1) + instr_dict[name]['extension'].extend(single_dict['extension']) + else: + for key in instr_dict: + item = instr_dict[key] + if overlaps(item['encoding'], single_dict['encoding']) and \ + not extension_overlap_allowed(ext_name, item['extension'][0]) and \ + not instruction_overlap_allowed(name, key) and \ + same_base_isa(ext_name, item['extension']): + # disable different names with overlapping encodings on the same base ISA + err_msg = f'instruction : {name} in extension ' + err_msg += f'{ext_name} overlaps instruction {key} ' + err_msg += f'in extension {item["extension"]}' + logging.error(err_msg) + raise SystemExit(1) + + if name not in instr_dict: + # update the final dict with the instruction + instr_dict[name] = single_dict + + # second pass if for pseudo instructions + logging.debug('Collecting pseudo instructions now') + for f in file_names: + logging.debug(f'Parsing File: {f} for pseudo_ops') + with open(f) as fp: + lines = (line.rstrip() + for line in fp) # All lines including the blank ones + lines = list(line for line in lines if line) # Non-blank lines + lines = list( + line for line in lines + if not line.startswith("#")) # remove comment lines + + # go through each line of the file + for line in lines: + + # ignore all lines not starting with $pseudo + if '$pseudo' not in line: + continue + logging.debug(f' Processing line: {line}') + + # use the regex pseudo_regex from constants.py to find the dependent + # extension, dependent instruction, the pseudo_op in question and + # its encoding + (ext, orig_inst, pseudo_inst, line) = pseudo_regex.findall(line)[0] + ext_file = f'{opcodes_dir}/{ext}' + + # check if the file of the dependent extension exist. Throw error if + # it doesn't + if not os.path.exists(ext_file): + ext1_file = f'{opcodes_dir}/unratified/{ext}' + if not os.path.exists(ext1_file): + logging.error(f'Pseudo op {pseudo_inst} in {f} depends on {ext} which is not available') + raise SystemExit(1) + else: + ext_file = ext1_file + + # check if the dependent instruction exist in the dependent + # extension. Else throw error. + found = False + for oline in open(ext_file): + if not re.findall(f'^\\s*{orig_inst}\\s+',oline): + continue + else: + found = True + break + if not found: + logging.error(f'Orig instruction {orig_inst} not found in {ext}. Required by pseudo_op {pseudo_inst} present in {f}') + raise SystemExit(1) + + + (name, single_dict) = process_enc_line(pseudo_inst + ' ' + line, f) + # add the pseudo_op to the dictionary only if the original + # instruction is not already in the dictionary. + if orig_inst.replace('.','_') not in instr_dict \ + or include_pseudo \ + or name in include_pseudo_ops: + + # update the final dict with the instruction + if name not in instr_dict: + instr_dict[name] = single_dict + logging.debug(f' including pseudo_ops:{name}') + else: + if(single_dict['match'] != instr_dict[name]['match']): + instr_dict[name + '_pseudo'] = single_dict + + # if a pseudo instruction has already been added to the filtered + # instruction dictionary but the extension is not in the current + # list, add it + else: + ext_name = single_dict['extension'] + + if (ext_name not in instr_dict[name]['extension']) & (name + '_pseudo' not in instr_dict): + instr_dict[name]['extension'].extend(ext_name) + else: + logging.debug(f' Skipping pseudo_op {pseudo_inst} since original instruction {orig_inst} already selected in list') + + # third pass if for imported instructions + logging.debug('Collecting imported instructions') + for f in file_names: + logging.debug(f'Parsing File: {f} for imported ops') + with open(f) as fp: + lines = (line.rstrip() + for line in fp) # All lines including the blank ones + lines = list(line for line in lines if line) # Non-blank lines + lines = list( + line for line in lines + if not line.startswith("#")) # remove comment lines + + # go through each line of the file + for line in lines: + # if the an instruction needs to be imported then go to the + # respective file and pick the line that has the instruction. + # The variable 'line' will now point to the new line from the + # imported file + + # ignore all lines starting with $import and $pseudo + if '$import' not in line : + continue + logging.debug(f' Processing line: {line}') + + (import_ext, reg_instr) = imported_regex.findall(line)[0] + import_ext_file = f'{opcodes_dir}/{import_ext}' + + # check if the file of the dependent extension exist. Throw error if + # it doesn't + if not os.path.exists(import_ext_file): + ext1_file = f'{opcodes_dir}/unratified/{import_ext}' + if not os.path.exists(ext1_file): + logging.error(f'Instruction {reg_instr} in {f} cannot be imported from {import_ext}') + raise SystemExit(1) + else: + ext_file = ext1_file + else: + ext_file = import_ext_file + + # check if the dependent instruction exist in the dependent + # extension. Else throw error. + found = False + for oline in open(ext_file): + if not re.findall(f'^\\s*{reg_instr}\\s+',oline): + continue + else: + found = True + break + if not found: + logging.error(f'imported instruction {reg_instr} not found in {ext_file}. Required by {line} present in {f}') + logging.error(f'Note: you cannot import pseudo/imported ops.') + raise SystemExit(1) + + # call process_enc_line to get the data about the current + # instruction + (name, single_dict) = process_enc_line(oline, f) + + # if an instruction has already been added to the filtered + # instruction dictionary throw an error saying the given + # instruction is already imported and raise SystemExit + if name in instr_dict: + var = instr_dict[name]["extension"] + if instr_dict[name]['encoding'] != single_dict['encoding']: + err_msg = f'imported instruction : {name} in ' + err_msg += f'{os.path.basename(f)} is already ' + err_msg += f'added from {var} but each have different encodings for the same instruction' + logging.error(err_msg) + raise SystemExit(1) + instr_dict[name]['extension'].extend(single_dict['extension']) + else: + # update the final dict with the instruction + instr_dict[name] = single_dict + return instr_dict + + diff --git a/sverilog_utils.py b/sverilog_utils.py new file mode 100644 index 00000000..4a6ace10 --- /dev/null +++ b/sverilog_utils.py @@ -0,0 +1,30 @@ +import re +import glob +import os +import pprint +import logging +import collections +import yaml +import sys +# from shared_utils import overlaps, overlap_allowed, extension_overlap_allowed, instruction_overlap_allowed, process_enc_line, same_base_isa, add_segmented_vls_insn, expand_nf_field +from shared_utils import * + +pp = pprint.PrettyPrinter(indent=2) +logging.basicConfig(level=logging.INFO, format='%(levelname)s:: %(message)s') + +def make_sverilog(instr_dict): + names_str = '' + for i in instr_dict: + names_str += f" localparam [31:0] {i.upper().replace('.','_'):<18s} = 32'b{instr_dict[i]['encoding'].replace('-','?')};\n" + names_str += ' /* CSR Addresses */\n' + for num, name in csrs+csrs32: + names_str += f" localparam logic [11:0] CSR_{name.upper()} = 12'h{hex(num)[2:]};\n" + + sverilog_file = open('inst.sverilog','w') + sverilog_file.write(f''' +/* Automatically generated by parse_opcodes */ +package riscv_instr; +{names_str} +endpackage +''') + sverilog_file.close() \ No newline at end of file From 3dd127374aefd57482c999de00489522fee12764 Mon Sep 17 00:00:00 2001 From: IIITM-Jay Date: Tue, 17 Sep 2024 01:36:20 +0530 Subject: [PATCH 02/18] modified test.py for running test cases --- test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test.py b/test.py index 9ddd0727..d2f37511 100644 --- a/test.py +++ b/test.py @@ -3,6 +3,7 @@ from parse import * import logging import unittest +from shared_utils import * class EncodingLineTest(unittest.TestCase): def setUp(self): From 88e98091df01469d3a7cc14070ceab175d63b1cd Mon Sep 17 00:00:00 2001 From: IIITM-Jay Date: Wed, 25 Sep 2024 22:46:56 +0530 Subject: [PATCH 03/18] Optimized and modularized method for Instruction Dictionary --- shared_utils.py | 367 +++++++++++++++++------------------------------- 1 file changed, 130 insertions(+), 237 deletions(-) diff --git a/shared_utils.py b/shared_utils.py index 0fc6db91..c10d175f 100644 --- a/shared_utils.py +++ b/shared_utils.py @@ -283,11 +283,110 @@ def signed(value, width): return value if 0 <= value < (1 << (width - 1)) else value - (1 << width) +def read_lines(file): + """Reads lines from a file and returns non-blank, non-comment lines.""" + with open(file) as fp: + lines = (line.rstrip() for line in fp) + return [line for line in lines if line and not line.startswith("#")] + +def process_standard_instructions(lines, instr_dict, file_name): + """Processes standard instructions from the given lines and updates the instruction dictionary.""" + for line in lines: + if '$import' in line or '$pseudo' in line: + continue + logging.debug(f'Processing line: {line}') + name, single_dict = process_enc_line(line, file_name) + ext_name = os.path.basename(file_name) + + if name in instr_dict: + var = instr_dict[name]["extension"] + if same_base_isa(ext_name, var): + log_and_exit(f'Instruction {name} from {ext_name} is already added from {var} in same base ISA') + elif instr_dict[name]['encoding'] != single_dict['encoding']: + log_and_exit(f'Instruction {name} from {ext_name} has different encodings in different base ISAs') + + instr_dict[name]['extension'].extend(single_dict['extension']) + else: + for key, item in instr_dict.items(): + if overlaps(item['encoding'], single_dict['encoding']) and \ + not extension_overlap_allowed(ext_name, item['extension'][0]) and \ + not instruction_overlap_allowed(name, key) and \ + same_base_isa(ext_name, item['extension']): + log_and_exit(f'Instruction {name} in extension {ext_name} overlaps with {key} in {item["extension"]}') + + instr_dict[name] = single_dict + +def process_pseudo_instructions(lines, instr_dict, file_name, opcodes_dir, include_pseudo, include_pseudo_ops): + """Processes pseudo instructions from the given lines and updates the instruction dictionary.""" + for line in lines: + if '$pseudo' not in line: + continue + logging.debug(f'Processing pseudo line: {line}') + ext, orig_inst, pseudo_inst, line_content = pseudo_regex.findall(line)[0] + ext_file = find_extension_file(ext, opcodes_dir) + + validate_instruction_in_extension(orig_inst, ext_file, file_name, pseudo_inst) + + name, single_dict = process_enc_line(f'{pseudo_inst} {line_content}', file_name) + if orig_inst.replace('.', '_') not in instr_dict or include_pseudo or name in include_pseudo_ops: + if name not in instr_dict: + instr_dict[name] = single_dict + logging.debug(f'Including pseudo_op: {name}') + else: + if single_dict['match'] != instr_dict[name]['match']: + instr_dict[f'{name}_pseudo'] = single_dict + elif single_dict['extension'] not in instr_dict[name]['extension']: + instr_dict[name]['extension'].extend(single_dict['extension']) + +def process_imported_instructions(lines, instr_dict, file_name, opcodes_dir): + """Processes imported instructions from the given lines and updates the instruction dictionary.""" + for line in lines: + if '$import' not in line: + continue + logging.debug(f'Processing imported line: {line}') + import_ext, reg_instr = imported_regex.findall(line)[0] + ext_file = find_extension_file(import_ext, opcodes_dir) + + validate_instruction_in_extension(reg_instr, ext_file, file_name, line) + + for oline in open(ext_file): + if re.findall(f'^\\s*{reg_instr}\\s+', oline): + name, single_dict = process_enc_line(oline, file_name) + if name in instr_dict: + if instr_dict[name]['encoding'] != single_dict['encoding']: + log_and_exit(f'Imported instruction {name} from {os.path.basename(file_name)} has different encodings') + instr_dict[name]['extension'].extend(single_dict['extension']) + else: + instr_dict[name] = single_dict + break + +def find_extension_file(ext, opcodes_dir): + """Finds the extension file path, considering the unratified directory if necessary.""" + ext_file = f'{opcodes_dir}/{ext}' + if not os.path.exists(ext_file): + ext_file = f'{opcodes_dir}/unratified/{ext}' + if not os.path.exists(ext_file): + log_and_exit(f'Extension {ext} not found.') + return ext_file + +def validate_instruction_in_extension(inst, ext_file, file_name, pseudo_inst): + """Validates if the original instruction exists in the dependent extension.""" + found = False + for oline in open(ext_file): + if re.findall(f'^\\s*{inst}\\s+', oline): + found = True + break + if not found: + log_and_exit(f'Original instruction {inst} required by pseudo_op {pseudo_inst} in {file_name} not found in {ext_file}') + def create_inst_dict(file_filter, include_pseudo=False, include_pseudo_ops=[]): + """Creates a dictionary of instructions based on the provided file filters.""" + ''' This function return a dictionary containing all instructions associated - with an extension defined by the file_filter input. The file_filter input - needs to be rv* file name with out the 'rv' prefix i.e. '_i', '32_i', etc. + with an extension defined by the file_filter input. + + Allowed input extensions: needs to be rv* file name without the 'rv' prefix i.e. '_i', '32_i', etc. Each node of the dictionary will correspond to an instruction which again is a dictionary. The dictionary contents of each instruction includes: @@ -302,246 +401,40 @@ def create_inst_dict(file_filter, include_pseudo=False, include_pseudo_ops=[]): this instruction - mask: hex value representin the bits that need to be masked to extract the value required for matching. - + In order to build this dictionary, the function does 2 passes over the same - rv file. The first pass is to extract all standard - instructions. In this pass, all pseudo ops and imported instructions are - skipped. For each selected line of the file, we call process_enc_line - function to create the above mentioned dictionary contents of the - instruction. Checks are performed in this function to ensure that the same - instruction is not added twice to the overall dictionary. - - In the second pass, this function parses only pseudo_ops. For each pseudo_op - this function checks if the dependent extension and instruction, both, exist - before parsing it. The pseudo op is only added to the overall dictionary if - the dependent instruction is not present in the dictionary, else it is - skipped. - - + rv file: + - First pass: extracts all standard instructions, skipping pseudo ops + and imported instructions. For each selected line, the `process_enc_line` + function is called to create the dictionary contents of the instruction. + Checks are performed to ensure that the same instruction is not added + twice to the overall dictionary. + - Second pass: parses only pseudo_ops. For each pseudo_op, the function: + - Checks if the dependent extension and instruction exist. + - Adds the pseudo_op to the dictionary if the dependent instruction + is not already present; otherwise, it is skipped. ''' opcodes_dir = os.path.dirname(os.path.realpath(__file__)) instr_dict = {} - # file_names contains all files to be parsed in the riscv-opcodes directory - file_names = [] - for fil in file_filter: - file_names += glob.glob(f'{opcodes_dir}/{fil}') - file_names.sort(reverse=True) - # first pass if for standard/regular instructions - logging.debug('Collecting standard instructions first') - for f in file_names: - logging.debug(f'Parsing File: {f} for standard instructions') - with open(f) as fp: - lines = (line.rstrip() - for line in fp) # All lines including the blank ones - lines = list(line for line in lines if line) # Non-blank lines - lines = list( - line for line in lines - if not line.startswith("#")) # remove comment lines - - # go through each line of the file - for line in lines: - # if the an instruction needs to be imported then go to the - # respective file and pick the line that has the instruction. - # The variable 'line' will now point to the new line from the - # imported file - - # ignore all lines starting with $import and $pseudo - if '$import' in line or '$pseudo' in line: - continue - logging.debug(f' Processing line: {line}') - - # call process_enc_line to get the data about the current - # instruction - (name, single_dict) = process_enc_line(line, f) - ext_name = os.path.basename(f) - - # if an instruction has already been added to the filtered - # instruction dictionary throw an error saying the given - # instruction is already imported and raise SystemExit - if name in instr_dict: - var = instr_dict[name]["extension"] - if same_base_isa(ext_name, var): - # disable same names on the same base ISA - err_msg = f'instruction : {name} from ' - err_msg += f'{ext_name} is already ' - err_msg += f'added from {var} in same base ISA' - logging.error(err_msg) - raise SystemExit(1) - elif instr_dict[name]['encoding'] != single_dict['encoding']: - # disable same names with different encodings on different base ISAs - err_msg = f'instruction : {name} from ' - err_msg += f'{ext_name} is already ' - err_msg += f'added from {var} but each have different encodings in different base ISAs' - logging.error(err_msg) - raise SystemExit(1) - instr_dict[name]['extension'].extend(single_dict['extension']) - else: - for key in instr_dict: - item = instr_dict[key] - if overlaps(item['encoding'], single_dict['encoding']) and \ - not extension_overlap_allowed(ext_name, item['extension'][0]) and \ - not instruction_overlap_allowed(name, key) and \ - same_base_isa(ext_name, item['extension']): - # disable different names with overlapping encodings on the same base ISA - err_msg = f'instruction : {name} in extension ' - err_msg += f'{ext_name} overlaps instruction {key} ' - err_msg += f'in extension {item["extension"]}' - logging.error(err_msg) - raise SystemExit(1) - - if name not in instr_dict: - # update the final dict with the instruction - instr_dict[name] = single_dict - - # second pass if for pseudo instructions - logging.debug('Collecting pseudo instructions now') - for f in file_names: - logging.debug(f'Parsing File: {f} for pseudo_ops') - with open(f) as fp: - lines = (line.rstrip() - for line in fp) # All lines including the blank ones - lines = list(line for line in lines if line) # Non-blank lines - lines = list( - line for line in lines - if not line.startswith("#")) # remove comment lines - - # go through each line of the file - for line in lines: - - # ignore all lines not starting with $pseudo - if '$pseudo' not in line: - continue - logging.debug(f' Processing line: {line}') - - # use the regex pseudo_regex from constants.py to find the dependent - # extension, dependent instruction, the pseudo_op in question and - # its encoding - (ext, orig_inst, pseudo_inst, line) = pseudo_regex.findall(line)[0] - ext_file = f'{opcodes_dir}/{ext}' - - # check if the file of the dependent extension exist. Throw error if - # it doesn't - if not os.path.exists(ext_file): - ext1_file = f'{opcodes_dir}/unratified/{ext}' - if not os.path.exists(ext1_file): - logging.error(f'Pseudo op {pseudo_inst} in {f} depends on {ext} which is not available') - raise SystemExit(1) - else: - ext_file = ext1_file - - # check if the dependent instruction exist in the dependent - # extension. Else throw error. - found = False - for oline in open(ext_file): - if not re.findall(f'^\\s*{orig_inst}\\s+',oline): - continue - else: - found = True - break - if not found: - logging.error(f'Orig instruction {orig_inst} not found in {ext}. Required by pseudo_op {pseudo_inst} present in {f}') - raise SystemExit(1) - - - (name, single_dict) = process_enc_line(pseudo_inst + ' ' + line, f) - # add the pseudo_op to the dictionary only if the original - # instruction is not already in the dictionary. - if orig_inst.replace('.','_') not in instr_dict \ - or include_pseudo \ - or name in include_pseudo_ops: - - # update the final dict with the instruction - if name not in instr_dict: - instr_dict[name] = single_dict - logging.debug(f' including pseudo_ops:{name}') - else: - if(single_dict['match'] != instr_dict[name]['match']): - instr_dict[name + '_pseudo'] = single_dict - - # if a pseudo instruction has already been added to the filtered - # instruction dictionary but the extension is not in the current - # list, add it - else: - ext_name = single_dict['extension'] - - if (ext_name not in instr_dict[name]['extension']) & (name + '_pseudo' not in instr_dict): - instr_dict[name]['extension'].extend(ext_name) - else: - logging.debug(f' Skipping pseudo_op {pseudo_inst} since original instruction {orig_inst} already selected in list') + file_names = [file for fil in file_filter for file in sorted(glob.glob(f'{opcodes_dir}/{fil}'), reverse=True)] + + logging.debug('Collecting standard instructions') + for file_name in file_names: + logging.debug(f'Parsing File: {file_name} for standard instructions') + lines = read_lines(file_name) + process_standard_instructions(lines, instr_dict, file_name) + + logging.debug('Collecting pseudo instructions') + for file_name in file_names: + logging.debug(f'Parsing File: {file_name} for pseudo instructions') + lines = read_lines(file_name) + process_pseudo_instructions(lines, instr_dict, file_name, opcodes_dir, include_pseudo, include_pseudo_ops) - # third pass if for imported instructions logging.debug('Collecting imported instructions') - for f in file_names: - logging.debug(f'Parsing File: {f} for imported ops') - with open(f) as fp: - lines = (line.rstrip() - for line in fp) # All lines including the blank ones - lines = list(line for line in lines if line) # Non-blank lines - lines = list( - line for line in lines - if not line.startswith("#")) # remove comment lines - - # go through each line of the file - for line in lines: - # if the an instruction needs to be imported then go to the - # respective file and pick the line that has the instruction. - # The variable 'line' will now point to the new line from the - # imported file - - # ignore all lines starting with $import and $pseudo - if '$import' not in line : - continue - logging.debug(f' Processing line: {line}') - - (import_ext, reg_instr) = imported_regex.findall(line)[0] - import_ext_file = f'{opcodes_dir}/{import_ext}' - - # check if the file of the dependent extension exist. Throw error if - # it doesn't - if not os.path.exists(import_ext_file): - ext1_file = f'{opcodes_dir}/unratified/{import_ext}' - if not os.path.exists(ext1_file): - logging.error(f'Instruction {reg_instr} in {f} cannot be imported from {import_ext}') - raise SystemExit(1) - else: - ext_file = ext1_file - else: - ext_file = import_ext_file - - # check if the dependent instruction exist in the dependent - # extension. Else throw error. - found = False - for oline in open(ext_file): - if not re.findall(f'^\\s*{reg_instr}\\s+',oline): - continue - else: - found = True - break - if not found: - logging.error(f'imported instruction {reg_instr} not found in {ext_file}. Required by {line} present in {f}') - logging.error(f'Note: you cannot import pseudo/imported ops.') - raise SystemExit(1) - - # call process_enc_line to get the data about the current - # instruction - (name, single_dict) = process_enc_line(oline, f) - - # if an instruction has already been added to the filtered - # instruction dictionary throw an error saying the given - # instruction is already imported and raise SystemExit - if name in instr_dict: - var = instr_dict[name]["extension"] - if instr_dict[name]['encoding'] != single_dict['encoding']: - err_msg = f'imported instruction : {name} in ' - err_msg += f'{os.path.basename(f)} is already ' - err_msg += f'added from {var} but each have different encodings for the same instruction' - logging.error(err_msg) - raise SystemExit(1) - instr_dict[name]['extension'].extend(single_dict['extension']) - else: - # update the final dict with the instruction - instr_dict[name] = single_dict - return instr_dict - + for file_name in file_names: + logging.debug(f'Parsing File: {file_name} for imported instructions') + lines = read_lines(file_name) + process_imported_instructions(lines, instr_dict, file_name, opcodes_dir) + return instr_dict \ No newline at end of file From 27a708c60651500674638b16e00d6eefbe1ce30d Mon Sep 17 00:00:00 2001 From: Afonso Oliveira Date: Wed, 2 Oct 2024 16:04:19 +0100 Subject: [PATCH 04/18] Add c.sext.w and zext.w Signed-off-by: Afonso Oliveira --- rv64_zba | 2 ++ rv64_zcb | 2 ++ 2 files changed, 4 insertions(+) diff --git a/rv64_zba b/rv64_zba index 5378e52f..3a1186aa 100644 --- a/rv64_zba +++ b/rv64_zba @@ -3,3 +3,5 @@ sh1add.uw rd rs1 rs2 31..25=16 14..12=2 6..2=0x0E 1..0=3 sh2add.uw rd rs1 rs2 31..25=16 14..12=4 6..2=0x0E 1..0=3 sh3add.uw rd rs1 rs2 31..25=16 14..12=6 6..2=0x0E 1..0=3 slli.uw rd rs1 31..26=2 shamtd 14..12=1 6..2=0x06 1..0=3 + +$pseudo_op rv64_zba::add.uw zext.w rd rs1 31..25=4 24..20=0 14..12=0 6..2=0x0E 1..0=3 diff --git a/rv64_zcb b/rv64_zcb index ed38047e..c47d0114 100644 --- a/rv64_zcb +++ b/rv64_zcb @@ -1 +1,3 @@ c.zext.w rd_rs1_p 1..0=1 15..13=4 12..10=7 6..5=3 4..2=4 + +$pseudo_op rv64_c::c.addiw c.sext.w rd_rs1_n0 15..13=1 12=0 6..2=0 1..0=1 From 41fc44b695882c5f5a606e50a1854170f4055755 Mon Sep 17 00:00:00 2001 From: IIITM-Jay Date: Wed, 9 Oct 2024 20:50:59 +0530 Subject: [PATCH 05/18] removed walrus operator --- c_utils.py | 57 ++++---- chisel_utils.py | 66 +++++----- go_utils.py | 50 +++---- latex_utils.py | 327 ++++++++++++++++++++++++++++++---------------- parse.py | 99 ++++++++------ rust_utils.py | 34 +++-- shared_utils.py | 265 ++++++++++++++++++++++--------------- sverilog_utils.py | 33 +++-- 8 files changed, 564 insertions(+), 367 deletions(-) diff --git a/c_utils.py b/c_utils.py index 4143625f..ee25bc24 100644 --- a/c_utils.py +++ b/c_utils.py @@ -1,50 +1,60 @@ -import re +import collections import glob +import logging import os import pprint -import logging -import collections -import yaml +import re import sys + +import yaml + # from shared_utils import overlaps, overlap_allowed, extension_overlap_allowed, instruction_overlap_allowed, process_enc_line, same_base_isa, add_segmented_vls_insn, expand_nf_field from shared_utils import * pp = pprint.PrettyPrinter(indent=2) -logging.basicConfig(level=logging.INFO, format='%(levelname)s:: %(message)s') +logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s") + def make_c(instr_dict): - mask_match_str = '' - declare_insn_str = '' + mask_match_str = "" + declare_insn_str = "" for i in instr_dict: - mask_match_str += f'#define MATCH_{i.upper().replace(".","_")} {instr_dict[i]["match"]}\n' - mask_match_str += f'#define MASK_{i.upper().replace(".","_")} {instr_dict[i]["mask"]}\n' + mask_match_str += ( + f'#define MATCH_{i.upper().replace(".","_")} {instr_dict[i]["match"]}\n' + ) + mask_match_str += ( + f'#define MASK_{i.upper().replace(".","_")} {instr_dict[i]["mask"]}\n' + ) declare_insn_str += f'DECLARE_INSN({i.replace(".","_")}, MATCH_{i.upper().replace(".","_")}, MASK_{i.upper().replace(".","_")})\n' - csr_names_str = '' - declare_csr_str = '' - for num, name in csrs+csrs32: - csr_names_str += f'#define CSR_{name.upper()} {hex(num)}\n' - declare_csr_str += f'DECLARE_CSR({name}, CSR_{name.upper()})\n' + csr_names_str = "" + declare_csr_str = "" + for num, name in csrs + csrs32: + csr_names_str += f"#define CSR_{name.upper()} {hex(num)}\n" + declare_csr_str += f"DECLARE_CSR({name}, CSR_{name.upper()})\n" - causes_str= '' - declare_cause_str = '' + causes_str = "" + declare_cause_str = "" for num, name in causes: causes_str += f"#define CAUSE_{name.upper().replace(' ', '_')} {hex(num)}\n" - declare_cause_str += f"DECLARE_CAUSE(\"{name}\", CAUSE_{name.upper().replace(' ','_')})\n" + declare_cause_str += ( + f"DECLARE_CAUSE(\"{name}\", CAUSE_{name.upper().replace(' ','_')})\n" + ) - arg_str = '' + arg_str = "" for name, rng in arg_lut.items(): begin = rng[1] - end = rng[0] + end = rng[0] mask = ((1 << (end - begin + 1)) - 1) << begin arg_str += f"#define INSN_FIELD_{name.upper().replace(' ', '_')} {hex(mask)}\n" - with open(f'{os.path.dirname(__file__)}/encoding.h', 'r') as file: + with open(f"{os.path.dirname(__file__)}/encoding.h", "r") as file: enc_header = file.read() commit = os.popen('git log -1 --format="format:%h"').read() - enc_file = open('encoding.out.h','w') - enc_file.write(f'''/* SPDX-License-Identifier: BSD-3-Clause */ + enc_file = open("encoding.out.h", "w") + enc_file.write( + f"""/* SPDX-License-Identifier: BSD-3-Clause */ /* Copyright (c) 2023 RISC-V International */ @@ -67,5 +77,6 @@ def make_c(instr_dict): {declare_csr_str}#endif #ifdef DECLARE_CAUSE {declare_cause_str}#endif -''') +""" + ) enc_file.close() diff --git a/chisel_utils.py b/chisel_utils.py index 061eef4b..957e4f8a 100644 --- a/chisel_utils.py +++ b/chisel_utils.py @@ -1,24 +1,28 @@ -from constants import * +import collections import copy -import re import glob +import logging import os import pprint -import logging -import collections -import yaml +import re import sys + +import yaml + +from constants import * + # from shared_utils import overlaps, overlap_allowed, extension_overlap_allowed, instruction_overlap_allowed, process_enc_line, same_base_isa, add_segmented_vls_insn, expand_nf_field from shared_utils import * pp = pprint.PrettyPrinter(indent=2) -logging.basicConfig(level=logging.INFO, format='%(levelname)s:: %(message)s') +logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s") + def make_chisel(instr_dict, spinal_hdl=False): - chisel_names='' - cause_names_str='' - csr_names_str = '' + chisel_names = "" + cause_names_str = "" + csr_names_str = "" for i in instr_dict: if spinal_hdl: chisel_names += f' def {i.upper().replace(".","_"):<18s} = M"b{instr_dict[i]["encoding"].replace("-","-")}"\n' @@ -27,7 +31,7 @@ def make_chisel(instr_dict, spinal_hdl=False): if not spinal_hdl: extensions = instr_dict_2_extensions(instr_dict) for e in extensions: - e_instrs = filter(lambda i: instr_dict[i]['extension'][0] == e, instr_dict) + e_instrs = filter(lambda i: instr_dict[i]["extension"][0] == e, instr_dict) if "rv64_" in e: e_format = e.replace("rv64_", "").upper() + "64" elif "rv32_" in e: @@ -38,42 +42,43 @@ def make_chisel(instr_dict, spinal_hdl=False): e_format = e.upper chisel_names += f' val {e_format+"Type"} = Map(\n' for instr in e_instrs: - tmp_instr_name = '"'+instr.upper().replace(".","_")+'"' + tmp_instr_name = '"' + instr.upper().replace(".", "_") + '"' chisel_names += f' {tmp_instr_name:<18s} -> BitPat("b{instr_dict[instr]["encoding"].replace("-","?")}"),\n' - chisel_names += f' )\n' + chisel_names += f" )\n" for num, name in causes: cause_names_str += f' val {name.lower().replace(" ","_")} = {hex(num)}\n' - cause_names_str += ''' val all = { + cause_names_str += """ val all = { val res = collection.mutable.ArrayBuffer[Int]() -''' +""" for num, name in causes: cause_names_str += f' res += {name.lower().replace(" ","_")}\n' - cause_names_str += ''' res.toArray - }''' + cause_names_str += """ res.toArray + }""" - for num, name in csrs+csrs32: - csr_names_str += f' val {name} = {hex(num)}\n' - csr_names_str += ''' val all = { + for num, name in csrs + csrs32: + csr_names_str += f" val {name} = {hex(num)}\n" + csr_names_str += """ val all = { val res = collection.mutable.ArrayBuffer[Int]() -''' +""" for num, name in csrs: - csr_names_str += f''' res += {name}\n''' - csr_names_str += ''' res.toArray + csr_names_str += f""" res += {name}\n""" + csr_names_str += """ res.toArray } val all32 = { val res = collection.mutable.ArrayBuffer(all:_*) -''' +""" for num, name in csrs32: - csr_names_str += f''' res += {name}\n''' - csr_names_str += ''' res.toArray - }''' + csr_names_str += f""" res += {name}\n""" + csr_names_str += """ res.toArray + }""" if spinal_hdl: - chisel_file = open('inst.spinalhdl','w') + chisel_file = open("inst.spinalhdl", "w") else: - chisel_file = open('inst.chisel','w') - chisel_file.write(f''' + chisel_file = open("inst.chisel", "w") + chisel_file.write( + f""" /* Automatically generated by parse_opcodes */ object Instructions {{ {chisel_names} @@ -84,5 +89,6 @@ def make_chisel(instr_dict, spinal_hdl=False): object CSRs {{ {csr_names_str} }} -''') +""" + ) chisel_file.close() diff --git a/go_utils.py b/go_utils.py index 9c5ef2b1..1f4c94bb 100644 --- a/go_utils.py +++ b/go_utils.py @@ -1,23 +1,26 @@ -import re +import collections import glob +import logging import os import pprint -import logging -import collections -import yaml +import re import sys + +import yaml + # from shared_utils import overlaps, overlap_allowed, extension_overlap_allowed, instruction_overlap_allowed, process_enc_line, same_base_isa, add_segmented_vls_insn, expand_nf_field from shared_utils import * pp = pprint.PrettyPrinter(indent=2) -logging.basicConfig(level=logging.INFO, format='%(levelname)s:: %(message)s') +logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s") + def make_go(instr_dict): args = " ".join(sys.argv) - prelude = f'''// Code generated by {args}; DO NOT EDIT.''' + prelude = f"""// Code generated by {args}; DO NOT EDIT.""" - prelude += ''' + prelude += """ package riscv import "cmd/internal/obj" @@ -33,33 +36,34 @@ def make_go(instr_dict): func encode(a obj.As) *inst { switch a { -''' +""" - endoffile = ''' } + endoffile = """ } return nil } -''' +""" - instr_str = '' + instr_str = "" for i in instr_dict: - enc_match = int(instr_dict[i]['match'],0) - opcode = (enc_match >> 0) & ((1<<7)-1) - funct3 = (enc_match >> 12) & ((1<<3)-1) - rs1 = (enc_match >> 15) & ((1<<5)-1) - rs2 = (enc_match >> 20) & ((1<<5)-1) - csr = (enc_match >> 20) & ((1<<12)-1) - funct7 = (enc_match >> 25) & ((1<<7)-1) - instr_str += f''' case A{i.upper().replace("_","")}: + enc_match = int(instr_dict[i]["match"], 0) + opcode = (enc_match >> 0) & ((1 << 7) - 1) + funct3 = (enc_match >> 12) & ((1 << 3) - 1) + rs1 = (enc_match >> 15) & ((1 << 5) - 1) + rs2 = (enc_match >> 20) & ((1 << 5) - 1) + csr = (enc_match >> 20) & ((1 << 12) - 1) + funct7 = (enc_match >> 25) & ((1 << 7) - 1) + instr_str += f""" case A{i.upper().replace("_","")}: return &inst{{ {hex(opcode)}, {hex(funct3)}, {hex(rs1)}, {hex(rs2)}, {signed(csr,12)}, {hex(funct7)} }} -''' - - with open('inst.go','w') as file: +""" + + with open("inst.go", "w") as file: file.write(prelude) file.write(instr_str) file.write(endoffile) try: import subprocess + subprocess.run(["go", "fmt", "inst.go"]) except: - pass \ No newline at end of file + pass diff --git a/latex_utils.py b/latex_utils.py index 59a8bb52..ee3a7232 100644 --- a/latex_utils.py +++ b/latex_utils.py @@ -1,10 +1,11 @@ #!/usr/bin/env python3 -import pprint import logging +import pprint + from constants import * from shared_utils import create_inst_dict -LOG_FORMAT = '%(levelname)s:: %(message)s' +LOG_FORMAT = "%(levelname)s:: %(message)s" LOG_LEVEL = logging.INFO pretty_printer = pprint.PrettyPrinter(indent=2) @@ -13,37 +14,78 @@ def create_priv_instr_dataset(): """Create dataset list for privileged instructions.""" - system_instr = ['_h', '_s', '_system', '_svinval', '64_h'] + system_instr = ["_h", "_s", "_system", "_svinval", "64_h"] return [ - (system_instr, 'Trap-Return Instructions', ['sret', 'mret'], False), - (system_instr, 'Interrupt-Management Instructions', ['wfi'], False), - (system_instr, 'Supervisor Memory-Management Instructions', ['sfence_vma'], False), - (system_instr, 'Hypervisor Memory-Management Instructions', ['hfence_vvma', 'hfence_gvma'], False), - (system_instr, 'Hypervisor Virtual-Machine Load and Store Instructions', - ['hlv_b', 'hlv_bu', 'hlv_h', 'hlv_hu', 'hlv_w', 'hlvx_hu', 'hlvx_wu', 'hsv_b', 'hsv_h', 'hsv_w'], False), - (system_instr, 'Hypervisor Virtual-Machine Load and Store Instructions, RV64 only', ['hlv_wu', 'hlv_d', 'hsv_d'], False), - (system_instr, 'Svinval Memory-Management Instructions', ['sinval_vma', 'sfence_w_inval', 'sfence_inval_ir', 'hinval_vvma', 'hinval_gvma'], False) + (system_instr, "Trap-Return Instructions", ["sret", "mret"], False), + (system_instr, "Interrupt-Management Instructions", ["wfi"], False), + ( + system_instr, + "Supervisor Memory-Management Instructions", + ["sfence_vma"], + False, + ), + ( + system_instr, + "Hypervisor Memory-Management Instructions", + ["hfence_vvma", "hfence_gvma"], + False, + ), + ( + system_instr, + "Hypervisor Virtual-Machine Load and Store Instructions", + [ + "hlv_b", + "hlv_bu", + "hlv_h", + "hlv_hu", + "hlv_w", + "hlvx_hu", + "hlvx_wu", + "hsv_b", + "hsv_h", + "hsv_w", + ], + False, + ), + ( + system_instr, + "Hypervisor Virtual-Machine Load and Store Instructions, RV64 only", + ["hlv_wu", "hlv_d", "hsv_d"], + False, + ), + ( + system_instr, + "Svinval Memory-Management Instructions", + [ + "sinval_vma", + "sfence_w_inval", + "sfence_inval_ir", + "hinval_vvma", + "hinval_gvma", + ], + False, + ), ] def make_priv_latex_table(): """Generate and write the LaTeX table for privileged instructions.""" - type_list = ['R-type', 'I-type'] + type_list = ["R-type", "I-type"] dataset_list = create_priv_instr_dataset() - caption = '\\caption{RISC-V Privileged Instructions}' + caption = "\\caption{RISC-V Privileged Instructions}" - with open('priv-instr-table.tex', 'w') as latex_file: + with open("priv-instr-table.tex", "w") as latex_file: make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) def make_latex_table(): - ''' + """ - This function is mean to create the instr-table.tex that is meant to be used by the riscv-isa-manual. 1. creates a single latex file of multiple table 2. Each table limited to a single page 3. Only the last table is assigned a latex-caption. - + - For each table, we assign a type-list that captures the different instruction types (R, I, B, etc.) required for that table. 1. Specify the type-list to capture various instruction types (e.g., R-type, I-type, B-type). 2. Select a list of extensions (e.g., _i, 32_i) whose instructions are necessary to populate the table. @@ -54,113 +96,161 @@ def make_latex_table(): * The last table only has to be given a caption - as per the policy of the riscv-isa-manual. - ''' + """ # File for writing LaTeX content - with open('instr-table.tex', 'w') as latex_file: + with open("instr-table.tex", "w") as latex_file: # Prepare table configurations with type list, datasets, word size & caption table_configurations = get_table_configurations() - + # Map each configuration from above with variables to pass as argumnet for config in table_configurations: # Unpack configuration dictionary into arguments for make_ext_latex_table - type_list = config['type_list'] - datasets = config['datasets'] - word_size = config['word_size'] - caption = config['caption'] - + type_list = config["type_list"] + datasets = config["datasets"] + word_size = config["word_size"] + caption = config["caption"] + # LaTeX table generation function - make_ext_latex_table( - type_list, - datasets, - latex_file, - word_size, - caption - ) + make_ext_latex_table(type_list, datasets, latex_file, word_size, caption) def get_table_configurations(): - ''' + """ Returns a list of table configurations, each specifying the type list, datasets, word size, and caption for LaTeX table generation. - + Returns: list: A list of dictionaries, each representing a table's configuration. - ''' + """ return [ create_table_configuration( - type_list=['R-type', 'I-type', 'S-type', 'B-type', 'U-type', 'J-type'], + type_list=["R-type", "I-type", "S-type", "B-type", "U-type", "J-type"], datasets=[ - create_dataset(['_i', '32_i'], 'RV32I Base Instruction Set', [], False), - create_dataset(['_i'], '', ['fence_tso', 'pause'], True) + create_dataset(["_i", "32_i"], "RV32I Base Instruction Set", [], False), + create_dataset(["_i"], "", ["fence_tso", "pause"], True), ], - word_size=32 + word_size=32, ), create_table_configuration( - type_list=['R-type', 'I-type', 'S-type'], + type_list=["R-type", "I-type", "S-type"], datasets=[ - create_dataset(['64_i'], 'RV64I Base Instruction Set (in addition to RV32I)', [], False), - create_dataset(['_zifencei'], 'RV32/RV64 Zifencei Standard Extension', [], False), - create_dataset(['_zicsr'], 'RV32/RV64 Zicsr Standard Extension', [], False), - create_dataset(['_m', '32_m'], 'RV32M Standard Extension', [], False), - create_dataset(['64_m'], 'RV64M Standard Extension (in addition to RV32M)', [], False) + create_dataset( + ["64_i"], + "RV64I Base Instruction Set (in addition to RV32I)", + [], + False, + ), + create_dataset( + ["_zifencei"], "RV32/RV64 Zifencei Standard Extension", [], False + ), + create_dataset( + ["_zicsr"], "RV32/RV64 Zicsr Standard Extension", [], False + ), + create_dataset(["_m", "32_m"], "RV32M Standard Extension", [], False), + create_dataset( + ["64_m"], + "RV64M Standard Extension (in addition to RV32M)", + [], + False, + ), ], - word_size=32 + word_size=32, ), create_table_configuration( - type_list=['R-type'], + type_list=["R-type"], datasets=[ - create_dataset(['_a'], 'RV32A Standard Extension', [], False), - create_dataset(['64_a'], 'RV64A Standard Extension (in addition to RV32A)', [], False) + create_dataset(["_a"], "RV32A Standard Extension", [], False), + create_dataset( + ["64_a"], + "RV64A Standard Extension (in addition to RV32A)", + [], + False, + ), ], - word_size=32 + word_size=32, ), create_table_configuration( - type_list=['R-type', 'R4-type', 'I-type', 'S-type'], + type_list=["R-type", "R4-type", "I-type", "S-type"], datasets=[ - create_dataset(['_f'], 'RV32F Standard Extension', [], False), - create_dataset(['64_f'], 'RV64F Standard Extension (in addition to RV32F)', [], False) + create_dataset(["_f"], "RV32F Standard Extension", [], False), + create_dataset( + ["64_f"], + "RV64F Standard Extension (in addition to RV32F)", + [], + False, + ), ], - word_size=32 + word_size=32, ), create_table_configuration( - type_list=['R-type', 'R4-type', 'I-type', 'S-type'], + type_list=["R-type", "R4-type", "I-type", "S-type"], datasets=[ - create_dataset(['_d'], 'RV32D Standard Extension', [], False), - create_dataset(['64_d'], 'RV64D Standard Extension (in addition to RV32D)', [], False) + create_dataset(["_d"], "RV32D Standard Extension", [], False), + create_dataset( + ["64_d"], + "RV64D Standard Extension (in addition to RV32D)", + [], + False, + ), ], - word_size=32 + word_size=32, ), create_table_configuration( - type_list=['R-type', 'R4-type', 'I-type', 'S-type'], + type_list=["R-type", "R4-type", "I-type", "S-type"], datasets=[ - create_dataset(['_q'], 'RV32Q Standard Extension', [], False), - create_dataset(['64_q'], 'RV64Q Standard Extension (in addition to RV32Q)', [], False) + create_dataset(["_q"], "RV32Q Standard Extension", [], False), + create_dataset( + ["64_q"], + "RV64Q Standard Extension (in addition to RV32Q)", + [], + False, + ), ], - word_size=32 + word_size=32, ), create_table_configuration( - type_list=['R-type', 'R4-type', 'I-type', 'S-type'], + type_list=["R-type", "R4-type", "I-type", "S-type"], datasets=[ - create_dataset(['_zfh', '_d_zfh', '_q_zfh'], 'RV32Zfh Standard Extension', [], False), - create_dataset(['64_zfh'], 'RV64Zfh Standard Extension (in addition to RV32Zfh)', [], False) + create_dataset( + ["_zfh", "_d_zfh", "_q_zfh"], + "RV32Zfh Standard Extension", + [], + False, + ), + create_dataset( + ["64_zfh"], + "RV64Zfh Standard Extension (in addition to RV32Zfh)", + [], + False, + ), ], word_size=32, - caption='\\caption{Instruction listing for RISC-V}' + caption="\\caption{Instruction listing for RISC-V}", ), create_table_configuration( - type_list=[''], + type_list=[""], datasets=[ - create_dataset(['_c', '32_c', '32_c_f', '_c_d'], 'RV32C Standard Extension', [], False), - create_dataset(['64_c'], 'RV64C Standard Extension (in addition to RV32C)', [], False) + create_dataset( + ["_c", "32_c", "32_c_f", "_c_d"], + "RV32C Standard Extension", + [], + False, + ), + create_dataset( + ["64_c"], + "RV64C Standard Extension (in addition to RV32C)", + [], + False, + ), ], word_size=16, - caption='' - ) + caption="", + ), ] -def create_table_configuration(type_list, datasets, word_size, caption=''): - ''' +def create_table_configuration(type_list, datasets, word_size, caption=""): + """ Creates a table configuration dictionary with the provided parameters. Parameters: @@ -175,17 +265,17 @@ def create_table_configuration(type_list, datasets, word_size, caption=''): Returns: dict: A dictionary representing the table configuration. - ''' + """ return { "type_list": type_list, "datasets": datasets, "word_size": word_size, - "caption": caption + "caption": caption, } def create_dataset(extensions, title, instructions, include_pseudo_ops): - ''' + """ Creates a dataset tuple for table configuration. Parameters: @@ -196,53 +286,53 @@ def create_dataset(extensions, title, instructions, include_pseudo_ops): Returns: tuple: A tuple representing the dataset configuration. - ''' + """ return (extensions, title, instructions, include_pseudo_ops) def make_ext_latex_table(type_list, dataset, latex_file, ilen, caption): - ''' + """ For a given collection of extensions this function dumps out a complete latex table which includes the encodings of the instructions. Args: - - type_list (list of str): - 1. A list of instruction types (R, I, B, etc) that are treated as header for each table. + - type_list (list of str): + 1. A list of instruction types (R, I, B, etc) that are treated as header for each table. 2. Each table will have its own requirements and type_list must include all the instruction-types that the table needs. 3. All elements of this list must be present in the latex_inst_type dictionary defined in constants.py - - + + - dataset (list of tuples): A list of 3-element tuples where each tuple consists of: 1. list_of_extensions (list): A list of extensions whose instructions will be populated under the respective title. 2. title (str): A title associated with the respective table. - 3. list_of_instructions (list): If not empty, only these instructions present in the corresponding extension + 3. list_of_instructions (list): If not empty, only these instructions present in the corresponding extension will be included in the table, while others will be ignored. - latex_file (file pointer): A file pointer to the LaTeX file where the generated table will be written. - + - ilen (int): The ilen input indicates the length of the instruction for which the table is created. - + - caption (str): The caption for the LaTeX table. Returns: - None: The function writes the generated LaTeX table directly to the provided `latex_file`. Process: - 1. Creates table headers based on the instruction types in `type_list` using the `latex_inst_type` dictionary + 1. Creates table headers based on the instruction types in `type_list` using the `latex_inst_type` dictionary from constants.py. - + 2. Iterates through each entry in the dataset to: - Generate an exhaustive list of instructions for each dataset using `create_inst_dict`. - Apply any instruction filters based on `list_of_instructions` to select only relevant instructions. - + 3. For each instruction, generates LaTeX table entries. - - Uses `arg_lut` from constants.py to determine the position of arguments in the encoding, and creates multicolumn + - Uses `arg_lut` from constants.py to determine the position of arguments in the encoding, and creates multicolumn LaTeX entries for these arguments. - - Handles hardcoded bits (e.g., strings of 1s and 0s) similarly, creating multicolumn entries for continuous + - Handles hardcoded bits (e.g., strings of 1s and 0s) similarly, creating multicolumn entries for continuous strings of bits. 4. Writes the LaTeX table to `latex_file` with a specific format suitable for instructions of size `ilen`. - ''' + """ column_size = get_column_size(ilen) type_entries = generate_type_entries(ilen) @@ -264,13 +354,13 @@ def make_ext_latex_table(type_list, dataset, latex_file, ilen, caption): def get_column_size(ilen): """Generate the column size string based on instruction length (ilen).""" - return "".join(['p{0.002in}'] * (ilen + 1)) + return "".join(["p{0.002in}"] * (ilen + 1)) def generate_type_entries(ilen): """Generate the type entries section of the LaTeX table.""" if ilen == 32: - return ''' + return """ \\multicolumn{3}{l}{31} & \\multicolumn{2}{r}{27} & \\multicolumn{1}{c}{26} & @@ -285,9 +375,9 @@ def generate_type_entries(ilen): \\multicolumn{1}{r}{7} & \\multicolumn{6}{l}{6} & \\multicolumn{1}{r}{0} \\\\ - \\cline{2-33}\n&\n\n''' + \\cline{2-33}\n&\n\n""" else: - return ''' + return """ \\multicolumn{1}{c}{15} & \\multicolumn{1}{c}{14} & \\multicolumn{1}{c}{13} & @@ -304,7 +394,7 @@ def generate_type_entries(ilen): \\multicolumn{1}{c}{2} & \\multicolumn{1}{c}{1} & \\multicolumn{1}{c}{0} \\\\ - \\cline{2-17}\n&\n\n''' + \\cline{2-17}\n&\n\n""" def get_type_dict(type_list): @@ -315,7 +405,7 @@ def get_type_dict(type_list): def build_instruction_type_entry(inst_type, fields, ilen): """Build a LaTeX table entry for each instruction type.""" entries = [] - for field in fields['variable_fields']: + for field in fields["variable_fields"]: (msb, lsb) = arg_lut[field] name = latex_mapping.get(field, field) entries.append((msb, lsb, name)) @@ -326,21 +416,23 @@ def build_instruction_type_entry(inst_type, fields, ilen): def format_table_entry(fields, entry_type, ilen): """Generate formatted LaTeX table entry.""" fields.sort(key=lambda f: f[0], reverse=True) - entry = '' + entry = "" for i, (msb, lsb, name) in enumerate(fields): col_size = msb - lsb + 1 if i == len(fields) - 1: - entry += f'\\multicolumn{{{col_size}}}{{|c|}}{{{name}}} & {entry_type} \\\\\n' + entry += ( + f"\\multicolumn{{{col_size}}}{{|c|}}{{{name}}} & {entry_type} \\\\\n" + ) elif i == 0: - entry += f'\\multicolumn{{{col_size}}}{{|c|}}{{{name}}} &\n' + entry += f"\\multicolumn{{{col_size}}}{{|c|}}{{{name}}} &\n" else: - entry += f'\\multicolumn{{{col_size}}}{{c|}}{{{name}}} &\n' - return entry + f'\\cline{{2-{ilen+1}}}\n&\n\n' + entry += f"\\multicolumn{{{col_size}}}{{c|}}{{{name}}} &\n" + return entry + f"\\cline{{2-{ilen+1}}}\n&\n\n" def generate_dataset_content(dataset, ilen): """Generate LaTeX content for each dataset entry.""" - content = '' + content = "" for ext_list, title, filter_list, include_pseudo in dataset: instr_dict = get_instruction_dict(ext_list, include_pseudo) filtered_list = filter_list if filter_list else list(instr_dict.keys()) @@ -357,20 +449,22 @@ def get_instruction_dict(ext_list, include_pseudo): """Create a dictionary of instructions for given extensions.""" instr_dict = {} for ext in ext_list: - instr_dict.update(create_inst_dict([f'rv{ext}'], include_pseudo)) + instr_dict.update(create_inst_dict([f"rv{ext}"], include_pseudo)) return instr_dict def generate_instruction_entries(instr_dict, inst_list, ilen): """Generate LaTeX entries for each instruction in the list.""" - instr_entries = '' + instr_entries = "" for inst in inst_list: if inst not in instr_dict: - logging.error(f'Instruction {inst} not found in instr_dict') + logging.error(f"Instruction {inst} not found in instr_dict") raise SystemExit(1) fields = parse_instruction_fields(instr_dict[inst], ilen) - instr_entries += format_table_entry(fields, inst.upper().replace("_", "."), ilen) + instr_entries += format_table_entry( + fields, inst.upper().replace("_", "."), ilen + ) return instr_entries @@ -378,16 +472,16 @@ def generate_instruction_entries(instr_dict, inst_list, ilen): def parse_instruction_fields(inst_data, ilen): """Parse and extract fields from instruction data.""" fields = [] - encoding = inst_data['encoding'][16:] if ilen == 16 else inst_data['encoding'] + encoding = inst_data["encoding"][16:] if ilen == 16 else inst_data["encoding"] msb = ilen - 1 - y = '' + y = "" for i in range(ilen): x = encoding[i] - if x == '-': + if x == "-": if y: fields.append((msb, ilen - i, y)) - y = '' + y = "" msb -= 1 else: y += str(x) @@ -401,15 +495,16 @@ def parse_instruction_fields(inst_data, ilen): def generate_dataset_title(title, ilen): """Generate LaTeX dataset title.""" - return f''' + return f""" \\multicolumn{{{ilen}}}{{c}}{{}} & \\\\ \\multicolumn{{{ilen}}}{{c}}{{\\bf {title} }} & \\\\ \\cline{{2-{ilen + 1}}} -''' +""" + def generate_table_header(column_size, ilen, type_entries): """Generate LaTeX table header.""" - return f''' + return f""" \\newpage \\begin{{table}}[p] @@ -420,16 +515,16 @@ def generate_table_header(column_size, ilen, type_entries): & {type_entries} -''' +""" def generate_table_footer(caption): """Generate LaTeX table footer.""" - return f''' + return f""" \\end{{tabular}} \\end{{center}} \\end{{small}} {caption} \\end{{table}} -''' +""" diff --git a/parse.py b/parse.py index 586cd66e..bba59d81 100755 --- a/parse.py +++ b/parse.py @@ -1,33 +1,36 @@ #!/usr/bin/env python3 import collections import logging +import pprint import sys + import yaml -import pprint -from constants import * -from shared_utils import create_inst_dict, add_segmented_vls_insn -from latex_utils import make_latex_table, make_priv_latex_table +from c_utils import make_c from chisel_utils import make_chisel +from constants import * +from go_utils import make_go +from latex_utils import make_latex_table, make_priv_latex_table from rust_utils import make_rust +from shared_utils import add_segmented_vls_insn, create_inst_dict from sverilog_utils import make_sverilog -from c_utils import make_c -from go_utils import make_go -LOG_FORMAT = '%(levelname)s:: %(message)s' +LOG_FORMAT = "%(levelname)s:: %(message)s" LOG_LEVEL = logging.INFO pretty_printer = pprint.PrettyPrinter(indent=2) logging.basicConfig(level=LOG_LEVEL, format=LOG_FORMAT) + def remove_non_extensions(args): """ Removes non-extension flags from the command-line arguments. """ extensions = args[1:] - flags = ['-c', '-latex', '-chisel', '-sverilog', '-rust', '-go', '-spinalhdl'] + flags = ["-c", "-latex", "-chisel", "-sverilog", "-rust", "-go", "-spinalhdl"] return [ext for ext in extensions if ext not in flags] + def process_instruction_dict(extensions, include_pseudo): """ Processes the instruction dictionary by creating and adding segmented instructions. @@ -36,78 +39,90 @@ def process_instruction_dict(extensions, include_pseudo): instr_dict = add_segmented_vls_insn(instr_dict) return collections.OrderedDict(sorted(instr_dict.items())) -def write_yaml(instr_dict, filename='instr_dict.yaml'): + +def write_yaml(instr_dict, filename="instr_dict.yaml"): """ Writes the instruction dictionary to a YAML file. """ - with open(filename, 'w') as outfile: + with open(filename, "w") as outfile: yaml.dump(instr_dict, outfile, default_flow_style=False) + def generate_outputs(instr_dict, extensions): """ Generates output files based on selected extensions and flags. """ # Dictionary to map extensions to their respective functions and logging messages extension_map = { - '-c': { - 'function': lambda: make_c(collections.OrderedDict(sorted(create_inst_dict(extensions, False, include_pseudo_ops=emitted_pseudo_ops).items()))), - 'message': 'encoding.out.h generated successfully' + "-c": { + "function": lambda: make_c( + collections.OrderedDict( + sorted( + create_inst_dict( + extensions, False, include_pseudo_ops=emitted_pseudo_ops + ).items() + ) + ) + ), + "message": "encoding.out.h generated successfully", + }, + "-chisel": { + "function": lambda: make_chisel(instr_dict), + "message": "inst.chisel generated successfully", }, - '-chisel': { - 'function': lambda: make_chisel(instr_dict), - 'message': 'inst.chisel generated successfully' + "-spinalhdl": { + "function": lambda: make_chisel(instr_dict, spinal_hdl=True), + "message": "inst.spinalhdl generated successfully", }, - '-spinalhdl': { - 'function': lambda: make_chisel(instr_dict, spinal_hdl=True), - 'message': 'inst.spinalhdl generated successfully' + "-sverilog": { + "function": lambda: make_sverilog(instr_dict), + "message": "inst.sverilog generated successfully", }, - '-sverilog': { - 'function': lambda: make_sverilog(instr_dict), - 'message': 'inst.sverilog generated successfully' + "-rust": { + "function": lambda: make_rust(instr_dict), + "message": "inst.rs generated successfully", }, - '-rust': { - 'function': lambda: make_rust(instr_dict), - 'message': 'inst.rs generated successfully' + "-go": { + "function": lambda: make_go(instr_dict), + "message": "inst.go generated successfully", }, - '-go': { - 'function': lambda: make_go(instr_dict), - 'message': 'inst.go generated successfully' + "-latex": { + "function": lambda: (make_latex_table(), make_priv_latex_table()), + "message": [ + "instr-table.tex generated successfully", + "priv-instr-table.tex generated successfully", + ], }, - '-latex': { - 'function': lambda: (make_latex_table(), make_priv_latex_table()), - 'message': [ - 'instr-table.tex generated successfully', - 'priv-instr-table.tex generated successfully' - ] - } } for ext, actions in extension_map.items(): if ext in extensions: try: - actions['function']() - if isinstance(actions['message'], list): - for msg in actions['message']: + actions["function"]() + if isinstance(actions["message"], list): + for msg in actions["message"]: logging.info(msg) else: - logging.info(actions['message']) + logging.info(actions["message"]) except Exception as e: logging.error(f"Error generating output for {ext}: {e}") + def main(): """ Main function for processing and generation of files based on command-line arguments. """ - print(f'Running with args : {sys.argv}') + print(f"Running with args : {sys.argv}") extensions = remove_non_extensions(sys.argv) - print(f'Extensions selected : {extensions}') + print(f"Extensions selected : {extensions}") - include_pseudo = '-go' in sys.argv[1:] + include_pseudo = "-go" in sys.argv[1:] instr_dict = process_instruction_dict(extensions, include_pseudo) write_yaml(instr_dict) generate_outputs(instr_dict, sys.argv[1:]) + if __name__ == "__main__": main() diff --git a/rust_utils.py b/rust_utils.py index 6f540a25..19a47b95 100644 --- a/rust_utils.py +++ b/rust_utils.py @@ -1,31 +1,39 @@ -from constants import * +import collections import copy -import re import glob +import logging import os import pprint -import logging -import collections -import yaml +import re import sys + +import yaml + +from constants import * + # from shared_utils import overlaps, overlap_allowed, extension_overlap_allowed, instruction_overlap_allowed, process_enc_line, same_base_isa, add_segmented_vls_insn, expand_nf_field from shared_utils import * pp = pprint.PrettyPrinter(indent=2) -logging.basicConfig(level=logging.INFO, format='%(levelname)s:: %(message)s') +logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s") + def make_rust(instr_dict): - mask_match_str= '' + mask_match_str = "" for i in instr_dict: mask_match_str += f'const MATCH_{i.upper().replace(".","_")}: u32 = {(instr_dict[i]["match"])};\n' mask_match_str += f'const MASK_{i.upper().replace(".","_")}: u32 = {(instr_dict[i]["mask"])};\n' - for num, name in csrs+csrs32: - mask_match_str += f'const CSR_{name.upper()}: u16 = {hex(num)};\n' + for num, name in csrs + csrs32: + mask_match_str += f"const CSR_{name.upper()}: u16 = {hex(num)};\n" for num, name in causes: - mask_match_str += f'const CAUSE_{name.upper().replace(" ","_")}: u8 = {hex(num)};\n' - rust_file = open('inst.rs','w') - rust_file.write(f''' + mask_match_str += ( + f'const CAUSE_{name.upper().replace(" ","_")}: u8 = {hex(num)};\n' + ) + rust_file = open("inst.rs", "w") + rust_file.write( + f""" /* Automatically generated by parse_opcodes */ {mask_match_str} -''') +""" + ) rust_file.close() diff --git a/shared_utils.py b/shared_utils.py index c10d175f..8c081e20 100644 --- a/shared_utils.py +++ b/shared_utils.py @@ -1,14 +1,14 @@ #!/usr/bin/env python3 import copy -import re import glob +import logging import os import pprint -import logging +import re from constants import * -LOG_FORMAT = '%(levelname)s:: %(message)s' +LOG_FORMAT = "%(levelname)s:: %(message)s" LOG_LEVEL = logging.INFO pretty_printer = pprint.PrettyPrinter(indent=2) @@ -16,7 +16,7 @@ def process_enc_line(line, ext): - ''' + """ This function processes each line of the encoding files (rv*). As part of the processing, the function ensures that the encoding is legal through the following checks:: @@ -41,7 +41,7 @@ def process_enc_line(line, ext): this instruction - mask: hex value representin the bits that need to be masked to extract the value required for matching. - ''' + """ encoding = initialize_encoding() name, remaining = parse_instruction_name(line) @@ -65,13 +65,13 @@ def process_enc_line(line, ext): def initialize_encoding(): """Initialize a 32-bit encoding with '-' representing 'don't care'.""" - return ['-'] * 32 + return ["-"] * 32 def parse_instruction_name(line): """Extract the instruction name and remaining part of the line.""" - name, remaining = line.split(' ', 1) - name = name.replace('.', '_').lstrip() + name, remaining = line.split(" ", 1) + name = name.replace(".", "_").lstrip() return name, remaining @@ -94,15 +94,19 @@ def validate_entry_value(msb, lsb, entry, line): """Ensure that the value assigned to a bit range is legal for its width.""" entry_value = int(entry, 0) if entry_value >= (1 << (msb - lsb + 1)): - log_and_exit(f"{get_instruction_name(line)} has an illegal value for the bit width {msb - lsb}") + log_and_exit( + f"{get_instruction_name(line)} has an illegal value for the bit width {msb - lsb}" + ) def update_encoding(msb, lsb, entry, encoding, line): """Update the encoding array for a given bit range.""" entry_value = int(entry, 0) for ind in range(lsb, msb + 1): - if encoding[31 - ind] != '-': - log_and_exit(f"{get_instruction_name(line)} has overlapping bits in its opcodes") + if encoding[31 - ind] != "-": + log_and_exit( + f"{get_instruction_name(line)} has overlapping bits in its opcodes" + ) encoding[31 - ind] = str((entry_value >> (ind - lsb)) & 1) @@ -111,22 +115,24 @@ def process_single_fixed(remaining, encoding, line): for lsb, value, _ in single_fixed.findall(remaining): lsb = int(lsb, 0) value = int(value, 0) - if encoding[31 - lsb] != '-': - log_and_exit(f"{get_instruction_name(line)} has overlapping bits in its opcodes") + if encoding[31 - lsb] != "-": + log_and_exit( + f"{get_instruction_name(line)} has overlapping bits in its opcodes" + ) encoding[31 - lsb] = str(value) - return fixed_ranges.sub(' ', remaining) + return fixed_ranges.sub(" ", remaining) def create_match_and_mask(encoding): """Generate match and mask strings from the encoding array.""" - match = ''.join(encoding).replace('-', '0') - mask = ''.join(encoding).replace('0', '1').replace('-', '0') + match = "".join(encoding).replace("-", "0") + mask = "".join(encoding).replace("0", "1").replace("-", "0") return match, mask def process_arguments(remaining, encoding, name): """Process instruction arguments and update the encoding with argument positions.""" - args = single_fixed.sub(' ', remaining).split() + args = single_fixed.sub(" ", remaining).split() encoding_args = encoding.copy() for arg in args: if arg not in arg_lut: @@ -138,16 +144,18 @@ def process_arguments(remaining, encoding, name): def handle_missing_arg(arg, name): """Handle missing argument mapping in arg_lut.""" - if '=' in arg and (existing_arg := arg.split('=')[0]) in arg_lut: - arg_lut[arg] = arg_lut[existing_arg] - else: - log_and_exit(f"Variable {arg} in instruction {name} not mapped in arg_lut") + if "=" in arg: + existing_arg = arg.split("=")[0] + if existing_arg in arg_lut: + arg_lut[arg] = arg_lut[existing_arg] + return + log_and_exit(f"Variable {arg} in instruction {name} not mapped in arg_lut") def update_arg_encoding(msb, lsb, arg, encoding_args, name): """Update the encoding array with the argument positions.""" for ind in range(lsb, msb + 1): - if encoding_args[31 - ind] != '-': + if encoding_args[31 - ind] != "-": log_and_exit(f"Variable {arg} overlaps in bit {ind} in instruction {name}") encoding_args[31 - ind] = arg @@ -155,11 +163,11 @@ def update_arg_encoding(msb, lsb, arg, encoding_args, name): def create_instruction_dict(encoding, args, ext, match, mask): """Create the final dictionary for the instruction.""" return { - 'encoding': ''.join(encoding), - 'variable_fields': args, - 'extension': [os.path.basename(ext)], - 'match': hex(int(match, 2)), - 'mask': hex(int(mask, 2)), + "encoding": "".join(encoding), + "variable_fields": args, + "extension": [os.path.basename(ext)], + "match": hex(int(match, 2)), + "mask": hex(int(mask, 2)), } @@ -171,53 +179,53 @@ def log_and_exit(message): def get_instruction_name(line): """Helper to extract the instruction name from a line.""" - return line.split(' ')[0] + return line.split(" ")[0] + def overlaps(x, y): """ Check if two bit strings overlap without conflicts. - + Args: x (str): First bit string. y (str): Second bit string. - + Returns: bool: True if the bit strings overlap without conflicts, False otherwise. - - In the context of RISC-V opcodes, this function ensures that the bit ranges + + In the context of RISC-V opcodes, this function ensures that the bit ranges defined by two different bit strings do not conflict. """ - + # Minimum length of the two strings min_len = min(len(x), len(y)) - + for char_x, char_y in zip(x[:min_len], y[:min_len]): - if char_x != '-' and char_y != '-' and char_x != char_y: + if char_x != "-" and char_y != "-" and char_x != char_y: return False - + return True def overlap_allowed(a, x, y): """ Check if there is an overlap between keys and values in a dictionary. - + Args: a (dict): The dictionary where keys are mapped to sets or lists of keys. x (str): The first key to check. y (str): The second key to check. - + Returns: - bool: True if both (x, y) or (y, x) are present in the dictionary + bool: True if both (x, y) or (y, x) are present in the dictionary as described, False otherwise. - - This function determines if `x` is a key in the dictionary `a` and - its corresponding value contains `y`, or if `y` is a key and its + + This function determines if `x` is a key in the dictionary `a` and + its corresponding value contains `y`, or if `y` is a key and its corresponding value contains `x`. """ - - return x in a and y in a[x] or \ - y in a and x in a[y] + + return x in a and y in a[x] or y in a and x in a[y] # Checks if overlap between two extensions is allowed @@ -235,9 +243,11 @@ def same_base_isa(ext_name, ext_name_list): type1 = ext_name.split("_")[0] for ext_name1 in ext_name_list: type2 = ext_name1.split("_")[0] - if type1 == type2 or \ - (type2 == "rv" and type1 in ["rv32", "rv64"]) or \ - (type1 == "rv" and type2 in ["rv32", "rv64"]): + if ( + type1 == type2 + or (type2 == "rv" and type1 in ["rv32", "rv64"]) + or (type1 == "rv" and type2 in ["rv32", "rv64"]) + ): return True return False @@ -246,7 +256,7 @@ def same_base_isa(ext_name, ext_name_list): def add_segmented_vls_insn(instr_dict): updated_dict = {} for k, v in instr_dict.items(): - if "nf" in v['variable_fields']: + if "nf" in v["variable_fields"]: updated_dict.update(expand_nf_field(k, v)) else: updated_dict[k] = v @@ -255,27 +265,33 @@ def add_segmented_vls_insn(instr_dict): # Expands nf field in instruction name and updates instruction details def expand_nf_field(name, single_dict): - if "nf" not in single_dict['variable_fields']: + if "nf" not in single_dict["variable_fields"]: logging.error(f"Cannot expand nf field for instruction {name}") raise SystemExit(1) - single_dict['variable_fields'].remove("nf") # Remove "nf" from variable fields - single_dict['mask'] = hex(int(single_dict['mask'], 16) | (0b111 << 29)) # Update mask + single_dict["variable_fields"].remove("nf") # Remove "nf" from variable fields + single_dict["mask"] = hex( + int(single_dict["mask"], 16) | (0b111 << 29) + ) # Update mask - name_expand_index = name.find('e') + name_expand_index = name.find("e") expanded_instructions = [] for nf in range(8): # Expand nf for values 0 to 7 new_single_dict = copy.deepcopy(single_dict) - new_single_dict['match'] = hex(int(single_dict['match'], 16) | (nf << 29)) - new_single_dict['encoding'] = format(nf, '03b') + single_dict['encoding'][3:] - new_name = name if nf == 0 else f"{name[:name_expand_index]}seg{nf+1}{name[name_expand_index:]}" + new_single_dict["match"] = hex(int(single_dict["match"], 16) | (nf << 29)) + new_single_dict["encoding"] = format(nf, "03b") + single_dict["encoding"][3:] + new_name = ( + name + if nf == 0 + else f"{name[:name_expand_index]}seg{nf+1}{name[name_expand_index:]}" + ) expanded_instructions.append((new_name, new_single_dict)) return expanded_instructions # Extracts the extensions used in an instruction dictionary def instr_dict_2_extensions(instr_dict): - return list({item['extension'][0] for item in instr_dict.values()}) + return list({item["extension"][0] for item in instr_dict.values()}) # Returns signed interpretation of a value within a given width @@ -289,103 +305,127 @@ def read_lines(file): lines = (line.rstrip() for line in fp) return [line for line in lines if line and not line.startswith("#")] + def process_standard_instructions(lines, instr_dict, file_name): """Processes standard instructions from the given lines and updates the instruction dictionary.""" for line in lines: - if '$import' in line or '$pseudo' in line: + if "$import" in line or "$pseudo" in line: continue - logging.debug(f'Processing line: {line}') + logging.debug(f"Processing line: {line}") name, single_dict = process_enc_line(line, file_name) ext_name = os.path.basename(file_name) if name in instr_dict: var = instr_dict[name]["extension"] if same_base_isa(ext_name, var): - log_and_exit(f'Instruction {name} from {ext_name} is already added from {var} in same base ISA') - elif instr_dict[name]['encoding'] != single_dict['encoding']: - log_and_exit(f'Instruction {name} from {ext_name} has different encodings in different base ISAs') - - instr_dict[name]['extension'].extend(single_dict['extension']) + log_and_exit( + f"Instruction {name} from {ext_name} is already added from {var} in same base ISA" + ) + elif instr_dict[name]["encoding"] != single_dict["encoding"]: + log_and_exit( + f"Instruction {name} from {ext_name} has different encodings in different base ISAs" + ) + + instr_dict[name]["extension"].extend(single_dict["extension"]) else: for key, item in instr_dict.items(): - if overlaps(item['encoding'], single_dict['encoding']) and \ - not extension_overlap_allowed(ext_name, item['extension'][0]) and \ - not instruction_overlap_allowed(name, key) and \ - same_base_isa(ext_name, item['extension']): - log_and_exit(f'Instruction {name} in extension {ext_name} overlaps with {key} in {item["extension"]}') + if ( + overlaps(item["encoding"], single_dict["encoding"]) + and not extension_overlap_allowed(ext_name, item["extension"][0]) + and not instruction_overlap_allowed(name, key) + and same_base_isa(ext_name, item["extension"]) + ): + log_and_exit( + f'Instruction {name} in extension {ext_name} overlaps with {key} in {item["extension"]}' + ) instr_dict[name] = single_dict -def process_pseudo_instructions(lines, instr_dict, file_name, opcodes_dir, include_pseudo, include_pseudo_ops): + +def process_pseudo_instructions( + lines, instr_dict, file_name, opcodes_dir, include_pseudo, include_pseudo_ops +): """Processes pseudo instructions from the given lines and updates the instruction dictionary.""" for line in lines: - if '$pseudo' not in line: + if "$pseudo" not in line: continue - logging.debug(f'Processing pseudo line: {line}') + logging.debug(f"Processing pseudo line: {line}") ext, orig_inst, pseudo_inst, line_content = pseudo_regex.findall(line)[0] ext_file = find_extension_file(ext, opcodes_dir) validate_instruction_in_extension(orig_inst, ext_file, file_name, pseudo_inst) - name, single_dict = process_enc_line(f'{pseudo_inst} {line_content}', file_name) - if orig_inst.replace('.', '_') not in instr_dict or include_pseudo or name in include_pseudo_ops: + name, single_dict = process_enc_line(f"{pseudo_inst} {line_content}", file_name) + if ( + orig_inst.replace(".", "_") not in instr_dict + or include_pseudo + or name in include_pseudo_ops + ): if name not in instr_dict: instr_dict[name] = single_dict - logging.debug(f'Including pseudo_op: {name}') + logging.debug(f"Including pseudo_op: {name}") else: - if single_dict['match'] != instr_dict[name]['match']: - instr_dict[f'{name}_pseudo'] = single_dict - elif single_dict['extension'] not in instr_dict[name]['extension']: - instr_dict[name]['extension'].extend(single_dict['extension']) + if single_dict["match"] != instr_dict[name]["match"]: + instr_dict[f"{name}_pseudo"] = single_dict + elif single_dict["extension"] not in instr_dict[name]["extension"]: + instr_dict[name]["extension"].extend(single_dict["extension"]) + def process_imported_instructions(lines, instr_dict, file_name, opcodes_dir): """Processes imported instructions from the given lines and updates the instruction dictionary.""" for line in lines: - if '$import' not in line: + if "$import" not in line: continue - logging.debug(f'Processing imported line: {line}') + logging.debug(f"Processing imported line: {line}") import_ext, reg_instr = imported_regex.findall(line)[0] ext_file = find_extension_file(import_ext, opcodes_dir) validate_instruction_in_extension(reg_instr, ext_file, file_name, line) for oline in open(ext_file): - if re.findall(f'^\\s*{reg_instr}\\s+', oline): + if re.findall(f"^\\s*{reg_instr}\\s+", oline): name, single_dict = process_enc_line(oline, file_name) if name in instr_dict: - if instr_dict[name]['encoding'] != single_dict['encoding']: - log_and_exit(f'Imported instruction {name} from {os.path.basename(file_name)} has different encodings') - instr_dict[name]['extension'].extend(single_dict['extension']) + if instr_dict[name]["encoding"] != single_dict["encoding"]: + log_and_exit( + f"Imported instruction {name} from {os.path.basename(file_name)} has different encodings" + ) + instr_dict[name]["extension"].extend(single_dict["extension"]) else: instr_dict[name] = single_dict break + def find_extension_file(ext, opcodes_dir): """Finds the extension file path, considering the unratified directory if necessary.""" - ext_file = f'{opcodes_dir}/{ext}' + ext_file = f"{opcodes_dir}/{ext}" if not os.path.exists(ext_file): - ext_file = f'{opcodes_dir}/unratified/{ext}' + ext_file = f"{opcodes_dir}/unratified/{ext}" if not os.path.exists(ext_file): - log_and_exit(f'Extension {ext} not found.') + log_and_exit(f"Extension {ext} not found.") return ext_file + def validate_instruction_in_extension(inst, ext_file, file_name, pseudo_inst): """Validates if the original instruction exists in the dependent extension.""" found = False for oline in open(ext_file): - if re.findall(f'^\\s*{inst}\\s+', oline): + if re.findall(f"^\\s*{inst}\\s+", oline): found = True break if not found: - log_and_exit(f'Original instruction {inst} required by pseudo_op {pseudo_inst} in {file_name} not found in {ext_file}') + log_and_exit( + f"Original instruction {inst} required by pseudo_op {pseudo_inst} in {file_name} not found in {ext_file}" + ) + def create_inst_dict(file_filter, include_pseudo=False, include_pseudo_ops=[]): """Creates a dictionary of instructions based on the provided file filters.""" - - ''' + + """ This function return a dictionary containing all instructions associated - with an extension defined by the file_filter input. - + with an extension defined by the file_filter input. + Allowed input extensions: needs to be rv* file name without the 'rv' prefix i.e. '_i', '32_i', etc. Each node of the dictionary will correspond to an instruction which again is @@ -401,7 +441,7 @@ def create_inst_dict(file_filter, include_pseudo=False, include_pseudo_ops=[]): this instruction - mask: hex value representin the bits that need to be masked to extract the value required for matching. - + In order to build this dictionary, the function does 2 passes over the same rv file: - First pass: extracts all standard instructions, skipping pseudo ops @@ -413,28 +453,39 @@ def create_inst_dict(file_filter, include_pseudo=False, include_pseudo_ops=[]): - Checks if the dependent extension and instruction exist. - Adds the pseudo_op to the dictionary if the dependent instruction is not already present; otherwise, it is skipped. - ''' + """ opcodes_dir = os.path.dirname(os.path.realpath(__file__)) instr_dict = {} - file_names = [file for fil in file_filter for file in sorted(glob.glob(f'{opcodes_dir}/{fil}'), reverse=True)] - - logging.debug('Collecting standard instructions') + file_names = [ + file + for fil in file_filter + for file in sorted(glob.glob(f"{opcodes_dir}/{fil}"), reverse=True) + ] + + logging.debug("Collecting standard instructions") for file_name in file_names: - logging.debug(f'Parsing File: {file_name} for standard instructions') + logging.debug(f"Parsing File: {file_name} for standard instructions") lines = read_lines(file_name) process_standard_instructions(lines, instr_dict, file_name) - logging.debug('Collecting pseudo instructions') + logging.debug("Collecting pseudo instructions") for file_name in file_names: - logging.debug(f'Parsing File: {file_name} for pseudo instructions') + logging.debug(f"Parsing File: {file_name} for pseudo instructions") lines = read_lines(file_name) - process_pseudo_instructions(lines, instr_dict, file_name, opcodes_dir, include_pseudo, include_pseudo_ops) - - logging.debug('Collecting imported instructions') + process_pseudo_instructions( + lines, + instr_dict, + file_name, + opcodes_dir, + include_pseudo, + include_pseudo_ops, + ) + + logging.debug("Collecting imported instructions") for file_name in file_names: - logging.debug(f'Parsing File: {file_name} for imported instructions') + logging.debug(f"Parsing File: {file_name} for imported instructions") lines = read_lines(file_name) process_imported_instructions(lines, instr_dict, file_name, opcodes_dir) - return instr_dict \ No newline at end of file + return instr_dict diff --git a/sverilog_utils.py b/sverilog_utils.py index 4a6ace10..1fe20680 100644 --- a/sverilog_utils.py +++ b/sverilog_utils.py @@ -1,30 +1,37 @@ -import re +import collections import glob +import logging import os import pprint -import logging -import collections -import yaml +import re import sys + +import yaml + # from shared_utils import overlaps, overlap_allowed, extension_overlap_allowed, instruction_overlap_allowed, process_enc_line, same_base_isa, add_segmented_vls_insn, expand_nf_field from shared_utils import * pp = pprint.PrettyPrinter(indent=2) -logging.basicConfig(level=logging.INFO, format='%(levelname)s:: %(message)s') +logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s") + def make_sverilog(instr_dict): - names_str = '' + names_str = "" for i in instr_dict: names_str += f" localparam [31:0] {i.upper().replace('.','_'):<18s} = 32'b{instr_dict[i]['encoding'].replace('-','?')};\n" - names_str += ' /* CSR Addresses */\n' - for num, name in csrs+csrs32: - names_str += f" localparam logic [11:0] CSR_{name.upper()} = 12'h{hex(num)[2:]};\n" + names_str += " /* CSR Addresses */\n" + for num, name in csrs + csrs32: + names_str += ( + f" localparam logic [11:0] CSR_{name.upper()} = 12'h{hex(num)[2:]};\n" + ) - sverilog_file = open('inst.sverilog','w') - sverilog_file.write(f''' + sverilog_file = open("inst.sverilog", "w") + sverilog_file.write( + f""" /* Automatically generated by parse_opcodes */ package riscv_instr; {names_str} endpackage -''') - sverilog_file.close() \ No newline at end of file +""" + ) + sverilog_file.close() From ea2eddb875ca12cfce5f464b382f751186540f2d Mon Sep 17 00:00:00 2001 From: IIITM-Jay Date: Wed, 9 Oct 2024 20:52:56 +0530 Subject: [PATCH 06/18] pre commit fixes --- test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test.py b/test.py index 4ef289ff..eb9b6783 100644 --- a/test.py +++ b/test.py @@ -2,9 +2,9 @@ import logging import unittest -from shared_utils import * from parse import * +from shared_utils import * class EncodingLineTest(unittest.TestCase): From 99f393697addd25dd6f4ef1a4bed6e3c8f1e1507 Mon Sep 17 00:00:00 2001 From: Jay Dev Jha Date: Sun, 20 Oct 2024 19:09:34 +0530 Subject: [PATCH 07/18] Update latex_utils.py to include hinval instructions Signed-off-by: Jay Dev Jha --- latex_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/latex_utils.py b/latex_utils.py index ee3a7232..2e6c649a 100644 --- a/latex_utils.py +++ b/latex_utils.py @@ -14,7 +14,7 @@ def create_priv_instr_dataset(): """Create dataset list for privileged instructions.""" - system_instr = ["_h", "_s", "_system", "_svinval", "64_h"] + system_instr = ["_h", "_s", "_system", "_svinval", "64_h", "_svinval_h"] return [ (system_instr, "Trap-Return Instructions", ["sret", "mret"], False), (system_instr, "Interrupt-Management Instructions", ["wfi"], False), From 0b7f6180f893ec7cddaa6fcf55c7a7e3969ccd17 Mon Sep 17 00:00:00 2001 From: IIITM-Jay Date: Sun, 20 Oct 2024 19:16:49 +0530 Subject: [PATCH 08/18] Pre commit Fixes --- parse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parse.py b/parse.py index cef27317..bba59d81 100755 --- a/parse.py +++ b/parse.py @@ -125,4 +125,4 @@ def main(): if __name__ == "__main__": - main() \ No newline at end of file + main() From c673ee53a239604fa90fe45750fff71aec0c4021 Mon Sep 17 00:00:00 2001 From: Joel Sing Date: Fri, 25 Oct 2024 01:55:26 +1100 Subject: [PATCH 09/18] Stop including pseudo instructions in output for Go Go really only needs the instruction encodings for actual instructions. Additional pseudo-encodings have since been added to the riscv-opcodes generation, which also include instructions that are aliased to themselves (e.g. AJALPSEUDO/AJALRPSEUDO). Instead of dealing with these complications, stop including pseudo instructions in the output for Go and we'll synthesise pseudo instructions that we need. Add -pseudo which can be used to enable the generation of pseudo-instructions, restoring the previous behaviour. --- parse.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/parse.py b/parse.py index 17cd5a2f..8e2f4623 100755 --- a/parse.py +++ b/parse.py @@ -1199,13 +1199,14 @@ def signed(value, width): print(f"Running with args : {sys.argv}") extensions = sys.argv[1:] - for i in ["-c", "-latex", "-chisel", "-sverilog", "-rust", "-go", "-spinalhdl"]: + for i in ["-c", "-chisel", "-go", "-latex", "-pseudo" "-rust", "-spinalhdl", "-sverilog"]: if i in extensions: extensions.remove(i) print(f"Extensions selected : {extensions}") include_pseudo = False - if "-go" in sys.argv[1:]: + + if "-pseudo" in sys.argv[1:]: include_pseudo = True instr_dict = create_inst_dict(extensions, include_pseudo) From d57a94cf8eb07917a2084b6d502b9225fc9ce210 Mon Sep 17 00:00:00 2001 From: IIITM-Jay Date: Fri, 25 Oct 2024 01:43:58 +0530 Subject: [PATCH 10/18] clean up codes for refactoring parsing logic --- c_utils.py | 15 +- latex_utils.py | 778 ++++++++++++++++++++++-------------------------- parse.py | 161 ++++------ shared_utils.py | 481 +++++++++++++++++------------- 4 files changed, 690 insertions(+), 745 deletions(-) diff --git a/c_utils.py b/c_utils.py index ee25bc24..cff33dd8 100644 --- a/c_utils.py +++ b/c_utils.py @@ -43,18 +43,19 @@ def make_c(instr_dict): arg_str = "" for name, rng in arg_lut.items(): + sanitized_name = name.replace(" ", "_").replace("=", "_eq_") begin = rng[1] end = rng[0] mask = ((1 << (end - begin + 1)) - 1) << begin - arg_str += f"#define INSN_FIELD_{name.upper().replace(' ', '_')} {hex(mask)}\n" + arg_str += f"#define INSN_FIELD_{sanitized_name.upper()} {hex(mask)}\n" with open(f"{os.path.dirname(__file__)}/encoding.h", "r") as file: enc_header = file.read() commit = os.popen('git log -1 --format="format:%h"').read() - enc_file = open("encoding.out.h", "w") - enc_file.write( - f"""/* SPDX-License-Identifier: BSD-3-Clause */ + + # Generate the output as a string + output_str = f"""/* SPDX-License-Identifier: BSD-3-Clause */ /* Copyright (c) 2023 RISC-V International */ @@ -78,5 +79,7 @@ def make_c(instr_dict): #ifdef DECLARE_CAUSE {declare_cause_str}#endif """ - ) - enc_file.close() + + # Write the modified output to the file + with open("encoding.out.h", "w") as enc_file: + enc_file.write(output_str) diff --git a/latex_utils.py b/latex_utils.py index 2e6c649a..ab5f6f92 100644 --- a/latex_utils.py +++ b/latex_utils.py @@ -1,35 +1,46 @@ -#!/usr/bin/env python3 +import collections +import copy +import glob import logging +import os import pprint +import re +import sys -from constants import * -from shared_utils import create_inst_dict +import yaml -LOG_FORMAT = "%(levelname)s:: %(message)s" -LOG_LEVEL = logging.INFO +from constants import * +from shared_utils import * -pretty_printer = pprint.PrettyPrinter(indent=2) -logging.basicConfig(level=LOG_LEVEL, format=LOG_FORMAT) +pp = pprint.PrettyPrinter(indent=2) +logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s") -def create_priv_instr_dataset(): - """Create dataset list for privileged instructions.""" +def make_priv_latex_table(): + latex_file = open("priv-instr-table.tex", "w") + type_list = ["R-type", "I-type"] system_instr = ["_h", "_s", "_system", "_svinval", "64_h", "_svinval_h"] - return [ - (system_instr, "Trap-Return Instructions", ["sret", "mret"], False), - (system_instr, "Interrupt-Management Instructions", ["wfi"], False), + dataset_list = [(system_instr, "Trap-Return Instructions", ["sret", "mret"], False)] + dataset_list.append( + (system_instr, "Interrupt-Management Instructions", ["wfi"], False) + ) + dataset_list.append( ( system_instr, "Supervisor Memory-Management Instructions", ["sfence_vma"], False, - ), + ) + ) + dataset_list.append( ( system_instr, "Hypervisor Memory-Management Instructions", ["hfence_vvma", "hfence_gvma"], False, - ), + ) + ) + dataset_list.append( ( system_instr, "Hypervisor Virtual-Machine Load and Store Instructions", @@ -46,13 +57,17 @@ def create_priv_instr_dataset(): "hsv_w", ], False, - ), + ) + ) + dataset_list.append( ( system_instr, "Hypervisor Virtual-Machine Load and Store Instructions, RV64 only", ["hlv_wu", "hlv_d", "hsv_d"], False, - ), + ) + ) + dataset_list.append( ( system_instr, "Svinval Memory-Management Instructions", @@ -64,230 +79,111 @@ def create_priv_instr_dataset(): "hinval_gvma", ], False, - ), - ] - - -def make_priv_latex_table(): - """Generate and write the LaTeX table for privileged instructions.""" - type_list = ["R-type", "I-type"] - dataset_list = create_priv_instr_dataset() + ) + ) caption = "\\caption{RISC-V Privileged Instructions}" + make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) - with open("priv-instr-table.tex", "w") as latex_file: - make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) + latex_file.close() def make_latex_table(): """ - - This function is mean to create the instr-table.tex that is meant to be used - by the riscv-isa-manual. - 1. creates a single latex file of multiple table - 2. Each table limited to a single page - 3. Only the last table is assigned a latex-caption. - - - For each table, we assign a type-list that captures the different instruction types (R, I, B, etc.) required for that table. - 1. Specify the type-list to capture various instruction types (e.g., R-type, I-type, B-type). - 2. Select a list of extensions (e.g., _i, 32_i) whose instructions are necessary to populate the table. - 3. For each extension or collection of extensions, assign a title that appears as a subheading within the table (these are inlined headings, not captions). - - * All of the above information is collected/created and sent to + This function is mean to create the instr-table.tex that is meant to be used + by the riscv-isa-manual. This function basically creates a single latext + file of multiple tables with each table limited to a single page. Only the + last table is assigned a latex-caption. + + For each table we assign a type-list which capture the different instruction + types (R, I, B, etc) that will be required for the table. Then we select the + list of extensions ('_i, '32_i', etc) whose instructions are required to + populate the table. For each extension or collection of extension we can + assign Title, such that in the end they appear as subheadings within + the table (note these are inlined headings and not captions of the table). + + All of the above information is collected/created and sent to make_ext_latex_table function to dump out the latex contents into a file. - * The last table only has to be given a caption - as per the policy of the + The last table only has to be given a caption - as per the policy of the riscv-isa-manual. """ - # File for writing LaTeX content - with open("instr-table.tex", "w") as latex_file: - # Prepare table configurations with type list, datasets, word size & caption - table_configurations = get_table_configurations() - - # Map each configuration from above with variables to pass as argumnet - for config in table_configurations: - # Unpack configuration dictionary into arguments for make_ext_latex_table - type_list = config["type_list"] - datasets = config["datasets"] - word_size = config["word_size"] - caption = config["caption"] - - # LaTeX table generation function - make_ext_latex_table(type_list, datasets, latex_file, word_size, caption) - - -def get_table_configurations(): - """ - Returns a list of table configurations, each specifying the type list, datasets, - word size, and caption for LaTeX table generation. - - Returns: - list: A list of dictionaries, each representing a table's configuration. - """ - return [ - create_table_configuration( - type_list=["R-type", "I-type", "S-type", "B-type", "U-type", "J-type"], - datasets=[ - create_dataset(["_i", "32_i"], "RV32I Base Instruction Set", [], False), - create_dataset(["_i"], "", ["fence_tso", "pause"], True), - ], - word_size=32, - ), - create_table_configuration( - type_list=["R-type", "I-type", "S-type"], - datasets=[ - create_dataset( - ["64_i"], - "RV64I Base Instruction Set (in addition to RV32I)", - [], - False, - ), - create_dataset( - ["_zifencei"], "RV32/RV64 Zifencei Standard Extension", [], False - ), - create_dataset( - ["_zicsr"], "RV32/RV64 Zicsr Standard Extension", [], False - ), - create_dataset(["_m", "32_m"], "RV32M Standard Extension", [], False), - create_dataset( - ["64_m"], - "RV64M Standard Extension (in addition to RV32M)", - [], - False, - ), - ], - word_size=32, - ), - create_table_configuration( - type_list=["R-type"], - datasets=[ - create_dataset(["_a"], "RV32A Standard Extension", [], False), - create_dataset( - ["64_a"], - "RV64A Standard Extension (in addition to RV32A)", - [], - False, - ), - ], - word_size=32, - ), - create_table_configuration( - type_list=["R-type", "R4-type", "I-type", "S-type"], - datasets=[ - create_dataset(["_f"], "RV32F Standard Extension", [], False), - create_dataset( - ["64_f"], - "RV64F Standard Extension (in addition to RV32F)", - [], - False, - ), - ], - word_size=32, - ), - create_table_configuration( - type_list=["R-type", "R4-type", "I-type", "S-type"], - datasets=[ - create_dataset(["_d"], "RV32D Standard Extension", [], False), - create_dataset( - ["64_d"], - "RV64D Standard Extension (in addition to RV32D)", - [], - False, - ), - ], - word_size=32, - ), - create_table_configuration( - type_list=["R-type", "R4-type", "I-type", "S-type"], - datasets=[ - create_dataset(["_q"], "RV32Q Standard Extension", [], False), - create_dataset( - ["64_q"], - "RV64Q Standard Extension (in addition to RV32Q)", - [], - False, - ), - ], - word_size=32, - ), - create_table_configuration( - type_list=["R-type", "R4-type", "I-type", "S-type"], - datasets=[ - create_dataset( - ["_zfh", "_d_zfh", "_q_zfh"], - "RV32Zfh Standard Extension", - [], - False, - ), - create_dataset( - ["64_zfh"], - "RV64Zfh Standard Extension (in addition to RV32Zfh)", - [], - False, - ), - ], - word_size=32, - caption="\\caption{Instruction listing for RISC-V}", - ), - create_table_configuration( - type_list=[""], - datasets=[ - create_dataset( - ["_c", "32_c", "32_c_f", "_c_d"], - "RV32C Standard Extension", - [], - False, - ), - create_dataset( - ["64_c"], - "RV64C Standard Extension (in addition to RV32C)", - [], - False, - ), - ], - word_size=16, - caption="", - ), + # open the file and use it as a pointer for all further dumps + latex_file = open("instr-table.tex", "w") + + # create the rv32i table first. Here we set the caption to empty. We use the + # files rv_i and rv32_i to capture instructions relevant for rv32i + # configuration. The dataset is a list of 4-element tuples : + # (list_of_extensions, title, list_of_instructions, include_pseudo_ops). If list_of_instructions + # is empty then it indicates that all instructions of the all the extensions + # in list_of_extensions need to be dumped. If not empty, then only the + # instructions listed in list_of_instructions will be dumped into latex. + caption = "" + type_list = ["R-type", "I-type", "S-type", "B-type", "U-type", "J-type"] + dataset_list = [(["_i", "32_i"], "RV32I Base Instruction Set", [], False)] + dataset_list.append((["_i"], "", ["fence_tso", "pause"], True)) + make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) + + type_list = ["R-type", "I-type", "S-type"] + dataset_list = [ + (["64_i"], "RV64I Base Instruction Set (in addition to RV32I)", [], False) ] + dataset_list.append( + (["_zifencei"], "RV32/RV64 Zifencei Standard Extension", [], False) + ) + dataset_list.append((["_zicsr"], "RV32/RV64 Zicsr Standard Extension", [], False)) + dataset_list.append((["_m", "32_m"], "RV32M Standard Extension", [], False)) + dataset_list.append( + (["64_m"], "RV64M Standard Extension (in addition to RV32M)", [], False) + ) + make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) + + type_list = ["R-type"] + dataset_list = [(["_a"], "RV32A Standard Extension", [], False)] + dataset_list.append( + (["64_a"], "RV64A Standard Extension (in addition to RV32A)", [], False) + ) + make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) + + type_list = ["R-type", "R4-type", "I-type", "S-type"] + dataset_list = [(["_f"], "RV32F Standard Extension", [], False)] + dataset_list.append( + (["64_f"], "RV64F Standard Extension (in addition to RV32F)", [], False) + ) + make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) + + type_list = ["R-type", "R4-type", "I-type", "S-type"] + dataset_list = [(["_d"], "RV32D Standard Extension", [], False)] + dataset_list.append( + (["64_d"], "RV64D Standard Extension (in addition to RV32D)", [], False) + ) + make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) + + type_list = ["R-type", "R4-type", "I-type", "S-type"] + dataset_list = [(["_q"], "RV32Q Standard Extension", [], False)] + dataset_list.append( + (["64_q"], "RV64Q Standard Extension (in addition to RV32Q)", [], False) + ) + make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) + + caption = "\\caption{Instruction listing for RISC-V}" + type_list = ["R-type", "R4-type", "I-type", "S-type"] + dataset_list = [ + (["_zfh", "_d_zfh", "_q_zfh"], "RV32Zfh Standard Extension", [], False) + ] + dataset_list.append( + (["64_zfh"], "RV64Zfh Standard Extension (in addition to RV32Zfh)", [], False) + ) + make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) + ## The following is demo to show that Compressed instructions can also be + # dumped in the same manner as above -def create_table_configuration(type_list, datasets, word_size, caption=""): - """ - Creates a table configuration dictionary with the provided parameters. - - Parameters: - type_list (list): List of instruction types to include in the table. - datasets (list of tuples): Each tuple contains: - - list_of_extensions (list): List of extension names. - - title (str): Title to appear as a subsection in the table. - - list_of_instructions (list): Specific instructions to include. - - include_pseudo_ops (bool): Whether to include pseudo-operations. - word_size (int): The word size for the instructions (32 or 16). - caption (str): The caption to include at the end of the table. - - Returns: - dict: A dictionary representing the table configuration. - """ - return { - "type_list": type_list, - "datasets": datasets, - "word_size": word_size, - "caption": caption, - } - - -def create_dataset(extensions, title, instructions, include_pseudo_ops): - """ - Creates a dataset tuple for table configuration. + # type_list = [''] + # dataset_list = [(['_c', '32_c', '32_c_f','_c_d'],'RV32C Standard Extension', [])] + # dataset_list.append((['64_c'],'RV64C Standard Extension (in addition to RV32C)', [])) + # make_ext_latex_table(type_list, dataset_list, latex_file, 16, caption) - Parameters: - extensions (list): List of extension names. - title (str): Title for the dataset. - instructions (list): List of specific instructions to include. - include_pseudo_ops (bool): Whether to include pseudo-operations. - - Returns: - tuple: A tuple representing the dataset configuration. - """ - return (extensions, title, instructions, include_pseudo_ops) + latex_file.close() def make_ext_latex_table(type_list, dataset, latex_file, ilen, caption): @@ -295,232 +191,252 @@ def make_ext_latex_table(type_list, dataset, latex_file, ilen, caption): For a given collection of extensions this function dumps out a complete latex table which includes the encodings of the instructions. - Args: - - type_list (list of str): - 1. A list of instruction types (R, I, B, etc) that are treated as header for each table. - 2. Each table will have its own requirements and type_list must include all the instruction-types that the table needs. - 3. All elements of this list must be present in the latex_inst_type dictionary defined in constants.py - - - - dataset (list of tuples): A list of 3-element tuples where each tuple consists of: - 1. list_of_extensions (list): A list of extensions whose instructions will be populated under the respective title. - 2. title (str): A title associated with the respective table. - 3. list_of_instructions (list): If not empty, only these instructions present in the corresponding extension - will be included in the table, while others will be ignored. - - - latex_file (file pointer): A file pointer to the LaTeX file where the generated table will be written. - - - ilen (int): The ilen input indicates the length of the instruction for which the table is created. - - - caption (str): The caption for the LaTeX table. + The ilen input indicates the length of the instruction for which the table + is created. + + The caption input is used to create the latex-table caption. + + The type_list input is a list of instruction types (R, I, B, etc) that are + treated as header for each table. Each table will have its own requirements + and type_list must include all the instruction-types that the table needs. + Note, all elements of this list must be present in the latex_inst_type + dictionary defined in constants.py + + The latex_file is a file pointer to which the latex-table will dumped into + + The dataset is a list of 3-element tuples containing: + (list_of_extensions, title, list_of_instructions) + The list_of_extensions must contain all the set of extensions whose + instructions must be populated under a given title. If list_of_instructions + is not empty, then only those instructions mentioned in list_of_instructions + present in the extension will be dumped into the latex-table, other + instructions will be ignored. + + Once the above inputs are received then function first creates table entries + for the instruction types. To simplify things, we maintain a dictionary + called latex_inst_type in constants.py which is created in the same way the + instruction dictionary is created. This allows us to re-use the same logic + to create the instruction types table as well + + Once the header is created, we then parse through every entry in the + dataset. For each list dataset entry we use the create_inst_dict function to + create an exhaustive list of instructions associated with the respective + collection of the extension of that dataset. Then we apply the instruction + filter, if any, indicated by the list_of_instructions of that dataset. + Thereon, for each instruction we create a latex table entry. + + Latex table specification for ilen sized instructions: + Each table is created with ilen+1 columns - ilen columns for each bit of the + instruction and one column to hold the name of the instruction. + + For each argument of an instruction we use the arg_lut from constants.py + to identify its position in the encoding, and thus create a multicolumn + entry with the name of the argument as the data. For hardcoded bits, we + do the same where we capture a string of continuous 1s and 0s, identify + the position and assign the same string as the data of the + multicolumn entry in the table. - Returns: - - None: The function writes the generated LaTeX table directly to the provided `latex_file`. - - Process: - 1. Creates table headers based on the instruction types in `type_list` using the `latex_inst_type` dictionary - from constants.py. - - 2. Iterates through each entry in the dataset to: - - Generate an exhaustive list of instructions for each dataset using `create_inst_dict`. - - Apply any instruction filters based on `list_of_instructions` to select only relevant instructions. - - 3. For each instruction, generates LaTeX table entries. - - Uses `arg_lut` from constants.py to determine the position of arguments in the encoding, and creates multicolumn - LaTeX entries for these arguments. - - Handles hardcoded bits (e.g., strings of 1s and 0s) similarly, creating multicolumn entries for continuous - strings of bits. - - 4. Writes the LaTeX table to `latex_file` with a specific format suitable for instructions of size `ilen`. """ + column_size = "".join(["p{0.002in}"] * (ilen + 1)) + + type_entries = ( + """ + \\multicolumn{3}{l}{31} & + \\multicolumn{2}{r}{27} & + \\multicolumn{1}{c}{26} & + \\multicolumn{1}{r}{25} & + \\multicolumn{3}{l}{24} & + \\multicolumn{2}{r}{20} & + \\multicolumn{3}{l}{19} & + \\multicolumn{2}{r}{15} & + \\multicolumn{2}{l}{14} & + \\multicolumn{1}{r}{12} & + \\multicolumn{4}{l}{11} & + \\multicolumn{1}{r}{7} & + \\multicolumn{6}{l}{6} & + \\multicolumn{1}{r}{0} \\\\ + \\cline{2-33}\n&\n\n +""" + if ilen == 32 + else """ + \\multicolumn{1}{c}{15} & + \\multicolumn{1}{c}{14} & + \\multicolumn{1}{c}{13} & + \\multicolumn{1}{c}{12} & + \\multicolumn{1}{c}{11} & + \\multicolumn{1}{c}{10} & + \\multicolumn{1}{c}{9} & + \\multicolumn{1}{c}{8} & + \\multicolumn{1}{c}{7} & + \\multicolumn{1}{c}{6} & + \\multicolumn{1}{c}{5} & + \\multicolumn{1}{c}{4} & + \\multicolumn{1}{c}{3} & + \\multicolumn{1}{c}{2} & + \\multicolumn{1}{c}{1} & + \\multicolumn{1}{c}{0} \\\\ + \\cline{2-17}\n&\n\n +""" + ) - column_size = get_column_size(ilen) - type_entries = generate_type_entries(ilen) - type_dict = get_type_dict(type_list) - - # Build the table entry with each instruction types - for inst_type, fields in type_dict.items(): - type_entries += build_instruction_type_entry(inst_type, fields, ilen) - - # Create a table for each dataset entry - content = generate_dataset_content(dataset, ilen) - - header = generate_table_header(column_size, ilen, type_entries) - endtable = generate_table_footer(caption) - - # Dump the contents to the latex file - latex_file.write(header + content + endtable) - - -def get_column_size(ilen): - """Generate the column size string based on instruction length (ilen).""" - return "".join(["p{0.002in}"] * (ilen + 1)) - - -def generate_type_entries(ilen): - """Generate the type entries section of the LaTeX table.""" - if ilen == 32: - return """ - \\multicolumn{3}{l}{31} & - \\multicolumn{2}{r}{27} & - \\multicolumn{1}{c}{26} & - \\multicolumn{1}{r}{25} & - \\multicolumn{3}{l}{24} & - \\multicolumn{2}{r}{20} & - \\multicolumn{3}{l}{19} & - \\multicolumn{2}{r}{15} & - \\multicolumn{2}{l}{14} & - \\multicolumn{1}{r}{12} & - \\multicolumn{4}{l}{11} & - \\multicolumn{1}{r}{7} & - \\multicolumn{6}{l}{6} & - \\multicolumn{1}{r}{0} \\\\ - \\cline{2-33}\n&\n\n""" - else: - return """ - \\multicolumn{1}{c}{15} & - \\multicolumn{1}{c}{14} & - \\multicolumn{1}{c}{13} & - \\multicolumn{1}{c}{12} & - \\multicolumn{1}{c}{11} & - \\multicolumn{1}{c}{10} & - \\multicolumn{1}{c}{9} & - \\multicolumn{1}{c}{8} & - \\multicolumn{1}{c}{7} & - \\multicolumn{1}{c}{6} & - \\multicolumn{1}{c}{5} & - \\multicolumn{1}{c}{4} & - \\multicolumn{1}{c}{3} & - \\multicolumn{1}{c}{2} & - \\multicolumn{1}{c}{1} & - \\multicolumn{1}{c}{0} \\\\ - \\cline{2-17}\n&\n\n""" - - -def get_type_dict(type_list): - """Create a subset dictionary of latex_inst_type for the given type_list.""" - return {key: value for key, value in latex_inst_type.items() if key in type_list} - - -def build_instruction_type_entry(inst_type, fields, ilen): - """Build a LaTeX table entry for each instruction type.""" - entries = [] - for field in fields["variable_fields"]: - (msb, lsb) = arg_lut[field] - name = latex_mapping.get(field, field) - entries.append((msb, lsb, name)) - - return format_table_entry(entries, inst_type, ilen) - - -def format_table_entry(fields, entry_type, ilen): - """Generate formatted LaTeX table entry.""" - fields.sort(key=lambda f: f[0], reverse=True) - entry = "" - for i, (msb, lsb, name) in enumerate(fields): - col_size = msb - lsb + 1 - if i == len(fields) - 1: - entry += ( - f"\\multicolumn{{{col_size}}}{{|c|}}{{{name}}} & {entry_type} \\\\\n" - ) - elif i == 0: - entry += f"\\multicolumn{{{col_size}}}{{|c|}}{{{name}}} &\n" - else: - entry += f"\\multicolumn{{{col_size}}}{{c|}}{{{name}}} &\n" - return entry + f"\\cline{{2-{ilen+1}}}\n&\n\n" + # depending on the type_list input we create a subset dictionary of + # latex_inst_type dictionary present in constants.py + type_dict = { + key: value for key, value in latex_inst_type.items() if key in type_list + } + # iterate ovr each instruction type and create a table entry + for t in type_dict: + fields = [] + + # first capture all "arguments" of the type (funct3, funct7, rd, etc) + # and capture their positions using arg_lut. + for f in type_dict[t]["variable_fields"]: + (msb, lsb) = arg_lut[f] + name = f if f not in latex_mapping else latex_mapping[f] + fields.append((msb, lsb, name)) + + # iterate through the 32 bits, starting from the msb, and assign + # argument names to the relevant portions of the instructions. This + # information is stored as a 3-element tuple containing the msb, lsb + # position of the arugment and the name of the argument. + msb = ilen - 1 + y = "" + for r in range(0, ilen): + if y != "": + fields.append((msb, ilen - 1 - r + 1, y)) + y = "" + msb = ilen - 1 - r - 1 + if r == 31: + if y != "": + fields.append((msb, 0, y)) + y = "" -def generate_dataset_content(dataset, ilen): - """Generate LaTeX content for each dataset entry.""" + # sort the arguments in decreasing order of msb position + fields.sort(key=lambda y: y[0], reverse=True) + + # for each argument/string of 1s or 0s, create a multicolumn latex table + # entry + entry = "" + for r in range(len(fields)): + (msb, lsb, name) = fields[r] + if r == len(fields) - 1: + entry += ( + f"\\multicolumn{{{msb - lsb + 1}}}{{|c|}}{{{name}}} & {t} \\\\\n" + ) + elif r == 0: + entry += f"\\multicolumn{{{msb - lsb + 1}}}{{|c|}}{{{name}}} &\n" + else: + entry += f"\\multicolumn{{{msb - lsb + 1}}}{{c|}}{{{name}}} &\n" + entry += f"\\cline{{2-{ilen+1}}}\n&\n\n" + type_entries += entry + + # for each entry in the dataset create a table content = "" for ext_list, title, filter_list, include_pseudo in dataset: - instr_dict = get_instruction_dict(ext_list, include_pseudo) - filtered_list = filter_list if filter_list else list(instr_dict.keys()) - instr_entries = generate_instruction_entries(instr_dict, filtered_list, ilen) - - if title: - content += generate_dataset_title(title, ilen) + instr_entries - else: - content += instr_entries - return content - - -def get_instruction_dict(ext_list, include_pseudo): - """Create a dictionary of instructions for given extensions.""" - instr_dict = {} - for ext in ext_list: - instr_dict.update(create_inst_dict([f"rv{ext}"], include_pseudo)) - return instr_dict + instr_dict = {} + + # for all extensions list in ext_list, create a dictionary of + # instructions associated with those extensions. + for e in ext_list: + instr_dict.update(create_inst_dict(["rv" + e], include_pseudo)) + + # if filter_list is not empty then use that as the official set of + # instructions that need to be dumped into the latex table + inst_list = list(instr_dict.keys()) if not filter_list else filter_list + + # for each instruction create an latex table entry just like how we did + # above with the instruction-type table. + instr_entries = "" + for inst in inst_list: + if inst not in instr_dict: + logging.error( + f"in make_ext_latex_table: Instruction: {inst} not found in instr_dict" + ) + raise SystemExit(1) + fields = [] + + # only if the argument is available in arg_lut we consume it, else + # throw error. + for f in instr_dict[inst]["variable_fields"]: + if f not in arg_lut: + logging.error( + f"Found variable {f} in instruction {inst} whose mapping is not available" + ) + raise SystemExit(1) + (msb, lsb) = arg_lut[f] + name = ( + f.replace("_", ".") if f not in latex_mapping else latex_mapping[f] + ) + fields.append((msb, lsb, name)) + + msb = ilen - 1 + y = "" + if ilen == 16: + encoding = instr_dict[inst]["encoding"][16:] + else: + encoding = instr_dict[inst]["encoding"] + for r in range(0, ilen): + x = encoding[r] + if ((msb, ilen - 1 - r + 1)) in latex_fixed_fields: + fields.append((msb, ilen - 1 - r + 1, y)) + msb = ilen - 1 - r + y = "" + if x == "-": + if y != "": + fields.append((msb, ilen - 1 - r + 1, y)) + y = "" + msb = ilen - 1 - r - 1 + else: + y += str(x) + if r == ilen - 1: + if y != "": + fields.append((msb, 0, y)) + y = "" + + fields.sort(key=lambda y: y[0], reverse=True) + entry = "" + for r in range(len(fields)): + (msb, lsb, name) = fields[r] + if r == len(fields) - 1: + entry += f'\\multicolumn{{{msb - lsb + 1}}}{{|c|}}{{{name}}} & {inst.upper().replace("_",".")} \\\\\n' + elif r == 0: + entry += f"\\multicolumn{{{msb - lsb + 1}}}{{|c|}}{{{name}}} &\n" + else: + entry += f"\\multicolumn{{{msb - lsb + 1}}}{{c|}}{{{name}}} &\n" + entry += f"\\cline{{2-{ilen+1}}}\n&\n\n" + instr_entries += entry + + # once an entry of the dataset is completed we create the whole table + # with the title of that dataset as sub-heading (sort-of) + if title != "": + content += f""" - -def generate_instruction_entries(instr_dict, inst_list, ilen): - """Generate LaTeX entries for each instruction in the list.""" - instr_entries = "" - for inst in inst_list: - if inst not in instr_dict: - logging.error(f"Instruction {inst} not found in instr_dict") - raise SystemExit(1) - - fields = parse_instruction_fields(instr_dict[inst], ilen) - instr_entries += format_table_entry( - fields, inst.upper().replace("_", "."), ilen - ) - - return instr_entries - - -def parse_instruction_fields(inst_data, ilen): - """Parse and extract fields from instruction data.""" - fields = [] - encoding = inst_data["encoding"][16:] if ilen == 16 else inst_data["encoding"] - msb = ilen - 1 - y = "" - - for i in range(ilen): - x = encoding[i] - if x == "-": - if y: - fields.append((msb, ilen - i, y)) - y = "" - msb -= 1 - else: - y += str(x) - - if i == ilen - 1 and y: - fields.append((msb, 0, y)) - - fields.sort(key=lambda f: f[0], reverse=True) - return fields - - -def generate_dataset_title(title, ilen): - """Generate LaTeX dataset title.""" - return f""" \\multicolumn{{{ilen}}}{{c}}{{}} & \\\\ \\multicolumn{{{ilen}}}{{c}}{{\\bf {title} }} & \\\\ -\\cline{{2-{ilen + 1}}} -""" +\\cline{{2-{ilen+1}}} + & +{instr_entries} +""" + else: + content += f""" +{instr_entries} +""" -def generate_table_header(column_size, ilen, type_entries): - """Generate LaTeX table header.""" - return f""" + header = f""" \\newpage \\begin{{table}}[p] \\begin{{small}} \\begin{{center}} \\begin{{tabular}} {{{column_size}l}} - {" ".join(['&'] * ilen)} \\\\ + {" ".join(['&']*ilen)} \\\\ & {type_entries} """ - - -def generate_table_footer(caption): - """Generate LaTeX table footer.""" - return f""" + endtable = f""" \\end{{tabular}} \\end{{center}} @@ -528,3 +444,5 @@ def generate_table_footer(caption): {caption} \\end{{table}} """ + # dump the contents and return + latex_file.write(header + content + endtable) diff --git a/parse.py b/parse.py index bba59d81..19574562 100755 --- a/parse.py +++ b/parse.py @@ -6,14 +6,14 @@ import yaml -from c_utils import make_c -from chisel_utils import make_chisel +from c_utils import * +from chisel_utils import * from constants import * -from go_utils import make_go -from latex_utils import make_latex_table, make_priv_latex_table -from rust_utils import make_rust -from shared_utils import add_segmented_vls_insn, create_inst_dict -from sverilog_utils import make_sverilog +from go_utils import * +from latex_utils import * +from rust_utils import * +from shared_utils import * +from sverilog_utils import * LOG_FORMAT = "%(levelname)s:: %(message)s" LOG_LEVEL = logging.INFO @@ -21,108 +21,55 @@ pretty_printer = pprint.PrettyPrinter(indent=2) logging.basicConfig(level=LOG_LEVEL, format=LOG_FORMAT) - -def remove_non_extensions(args): - """ - Removes non-extension flags from the command-line arguments. - """ - extensions = args[1:] - flags = ["-c", "-latex", "-chisel", "-sverilog", "-rust", "-go", "-spinalhdl"] - return [ext for ext in extensions if ext not in flags] - - -def process_instruction_dict(extensions, include_pseudo): - """ - Processes the instruction dictionary by creating and adding segmented instructions. - """ - instr_dict = create_inst_dict(extensions, include_pseudo) - instr_dict = add_segmented_vls_insn(instr_dict) - return collections.OrderedDict(sorted(instr_dict.items())) - - -def write_yaml(instr_dict, filename="instr_dict.yaml"): - """ - Writes the instruction dictionary to a YAML file. - """ - with open(filename, "w") as outfile: - yaml.dump(instr_dict, outfile, default_flow_style=False) - - -def generate_outputs(instr_dict, extensions): - """ - Generates output files based on selected extensions and flags. - """ - # Dictionary to map extensions to their respective functions and logging messages - extension_map = { - "-c": { - "function": lambda: make_c( - collections.OrderedDict( - sorted( - create_inst_dict( - extensions, False, include_pseudo_ops=emitted_pseudo_ops - ).items() - ) - ) - ), - "message": "encoding.out.h generated successfully", - }, - "-chisel": { - "function": lambda: make_chisel(instr_dict), - "message": "inst.chisel generated successfully", - }, - "-spinalhdl": { - "function": lambda: make_chisel(instr_dict, spinal_hdl=True), - "message": "inst.spinalhdl generated successfully", - }, - "-sverilog": { - "function": lambda: make_sverilog(instr_dict), - "message": "inst.sverilog generated successfully", - }, - "-rust": { - "function": lambda: make_rust(instr_dict), - "message": "inst.rs generated successfully", - }, - "-go": { - "function": lambda: make_go(instr_dict), - "message": "inst.go generated successfully", - }, - "-latex": { - "function": lambda: (make_latex_table(), make_priv_latex_table()), - "message": [ - "instr-table.tex generated successfully", - "priv-instr-table.tex generated successfully", - ], - }, - } - - for ext, actions in extension_map.items(): - if ext in extensions: - try: - actions["function"]() - if isinstance(actions["message"], list): - for msg in actions["message"]: - logging.info(msg) - else: - logging.info(actions["message"]) - - except Exception as e: - logging.error(f"Error generating output for {ext}: {e}") - - -def main(): - """ - Main function for processing and generation of files based on command-line arguments. - """ +if __name__ == "__main__": print(f"Running with args : {sys.argv}") - extensions = remove_non_extensions(sys.argv) - print(f"Extensions selected : {extensions}") - include_pseudo = "-go" in sys.argv[1:] - instr_dict = process_instruction_dict(extensions, include_pseudo) + extensions = sys.argv[1:] + for i in ["-c", "-latex", "-chisel", "-sverilog", "-rust", "-go", "-spinalhdl"]: + if i in extensions: + extensions.remove(i) + print(f"Extensions selected : {extensions}") - write_yaml(instr_dict) - generate_outputs(instr_dict, sys.argv[1:]) + include_pseudo = False + if "-go" in sys.argv[1:]: + include_pseudo = True + instr_dict = create_inst_dict(extensions, include_pseudo) -if __name__ == "__main__": - main() + with open("instr_dict.yaml", "w") as outfile: + yaml.dump(add_segmented_vls_insn(instr_dict), outfile, default_flow_style=False) + instr_dict = collections.OrderedDict(sorted(instr_dict.items())) + + if "-c" in sys.argv[1:]: + instr_dict_c = create_inst_dict( + extensions, False, include_pseudo_ops=emitted_pseudo_ops + ) + instr_dict_c = collections.OrderedDict(sorted(instr_dict_c.items())) + make_c(instr_dict_c) + logging.info("encoding.out.h generated successfully") + + if "-chisel" in sys.argv[1:]: + make_chisel(instr_dict) + logging.info("inst.chisel generated successfully") + + if "-spinalhdl" in sys.argv[1:]: + make_chisel(instr_dict, True) + logging.info("inst.spinalhdl generated successfully") + + if "-sverilog" in sys.argv[1:]: + make_sverilog(instr_dict) + logging.info("inst.sverilog generated successfully") + + if "-rust" in sys.argv[1:]: + make_rust(instr_dict) + logging.info("inst.rs generated successfully") + + if "-go" in sys.argv[1:]: + make_go(instr_dict) + logging.info("inst.go generated successfully") + + if "-latex" in sys.argv[1:]: + make_latex_table() + logging.info("instr-table.tex generated successfully") + make_priv_latex_table() + logging.info("priv-instr-table.tex generated successfully") diff --git a/shared_utils.py b/shared_utils.py index 8c081e20..5c925151 100644 --- a/shared_utils.py +++ b/shared_utils.py @@ -5,6 +5,7 @@ import os import pprint import re +from itertools import chain from constants import * @@ -15,290 +16,353 @@ logging.basicConfig(level=LOG_LEVEL, format=LOG_FORMAT) -def process_enc_line(line, ext): - """ - This function processes each line of the encoding files (rv*). As part of - the processing, the function ensures that the encoding is legal through the - following checks:: - - - there is no over specification (same bits assigned different values) - - there is no under specification (some bits not assigned values) - - bit ranges are in the format hi..lo=val where hi > lo - - value assigned is representable in the bit range - - also checks that the mapping of arguments of an instruction exists in - arg_lut. - - If the above checks pass, then the function returns a tuple of the name and - a dictionary containing basic information of the instruction which includes: - - variables: list of arguments used by the instruction whose mapping - exists in the arg_lut dictionary - - encoding: this contains the 32-bit encoding of the instruction where - '-' is used to represent position of arguments and 1/0 is used to - reprsent the static encoding of the bits - - extension: this field contains the rv* filename from which this - instruction was included - - match: hex value representing the bits that need to match to detect - this instruction - - mask: hex value representin the bits that need to be masked to extract - the value required for matching. - """ - encoding = initialize_encoding() - name, remaining = parse_instruction_name(line) - - # Fixed ranges of the form hi..lo=val - process_fixed_ranges(remaining, encoding, line) - - # Single fixed values of the form = - remaining = process_single_fixed(remaining, encoding, line) +# Initialize encoding to 32-bit '-' values +def initialize_encoding(bits=32): + """Initialize encoding with '-' to represent don't care bits.""" + return ["-"] * bits - # Create match and mask strings - match, mask = create_match_and_mask(encoding) - - # Process instruction arguments - args = process_arguments(remaining, encoding, name) - - # Create and return the final instruction dictionary - instruction_dict = create_instruction_dict(encoding, args, ext, match, mask) - - return name, instruction_dict +# Validate bit range and value +def validate_bit_range(msb, lsb, entry_value, line): + """Validate the bit range and entry value.""" + if msb < lsb: + logging.error( + f'{line.split(" ")[0]:<10} has position {msb} less than position {lsb} in its encoding' + ) + raise SystemExit(1) -def initialize_encoding(): - """Initialize a 32-bit encoding with '-' representing 'don't care'.""" - return ["-"] * 32 + if entry_value >= (1 << (msb - lsb + 1)): + logging.error( + f'{line.split(" ")[0]:<10} has an illegal value {entry_value} assigned as per the bit width {msb - lsb}' + ) + raise SystemExit(1) -def parse_instruction_name(line): - """Extract the instruction name and remaining part of the line.""" +# Split the instruction line into name and remaining part +def parse_instruction_line(line): + """Parse the instruction name and the remaining encoding details.""" name, remaining = line.split(" ", 1) - name = name.replace(".", "_").lstrip() + name = name.replace(".", "_") # Replace dots for compatibility + remaining = remaining.lstrip() # Remove leading whitespace return name, remaining -def process_fixed_ranges(remaining, encoding, line): - """Process bit ranges of the form hi..lo=val, checking for errors and updating encoding.""" - for s2, s1, entry in fixed_ranges.findall(remaining): - msb, lsb = int(s2), int(s1) - validate_bit_range(msb, lsb, line) - validate_entry_value(msb, lsb, entry, line) - update_encoding(msb, lsb, entry, encoding, line) +# Verify Overlapping Bits +def check_overlapping_bits(encoding, ind, line): + """Check for overlapping bits in the encoding.""" + if encoding[31 - ind] != "-": + logging.error( + f'{line.split(" ")[0]:<10} has {ind} bit overlapping in its opcodes' + ) + raise SystemExit(1) -def validate_bit_range(msb, lsb, line): - """Ensure that msb > lsb and raise an error if not.""" - if msb < lsb: - log_and_exit(f"{get_instruction_name(line)} has msb < lsb in its encoding") +# Update encoding for fixed ranges +def update_encoding_for_fixed_range(encoding, msb, lsb, entry_value, line): + """ + Update encoding bits for a given bit range. + Checks for overlapping bits and assigns the value accordingly. + """ + for ind in range(lsb, msb + 1): + check_overlapping_bits(encoding, ind, line) + bit = str((entry_value >> (ind - lsb)) & 1) + encoding[31 - ind] = bit -def validate_entry_value(msb, lsb, entry, line): - """Ensure that the value assigned to a bit range is legal for its width.""" - entry_value = int(entry, 0) - if entry_value >= (1 << (msb - lsb + 1)): - log_and_exit( - f"{get_instruction_name(line)} has an illegal value for the bit width {msb - lsb}" - ) +# Process fixed bit patterns +def process_fixed_ranges(remaining, encoding, line): + """Process fixed bit ranges in the encoding.""" + for s2, s1, entry in fixed_ranges.findall(remaining): + msb, lsb, entry_value = int(s2), int(s1), int(entry, 0) + # Validate bit range and entry value + validate_bit_range(msb, lsb, entry_value, line) + update_encoding_for_fixed_range(encoding, msb, lsb, entry_value, line) -def update_encoding(msb, lsb, entry, encoding, line): - """Update the encoding array for a given bit range.""" - entry_value = int(entry, 0) - for ind in range(lsb, msb + 1): - if encoding[31 - ind] != "-": - log_and_exit( - f"{get_instruction_name(line)} has overlapping bits in its opcodes" - ) - encoding[31 - ind] = str((entry_value >> (ind - lsb)) & 1) + return fixed_ranges.sub(" ", remaining) +# Process single bit assignments def process_single_fixed(remaining, encoding, line): - """Process single fixed values of the form =.""" - for lsb, value, _ in single_fixed.findall(remaining): + """Process single fixed assignments in the encoding.""" + for lsb, value, drop in single_fixed.findall(remaining): lsb = int(lsb, 0) value = int(value, 0) - if encoding[31 - lsb] != "-": - log_and_exit( - f"{get_instruction_name(line)} has overlapping bits in its opcodes" - ) - encoding[31 - lsb] = str(value) - return fixed_ranges.sub(" ", remaining) - -def create_match_and_mask(encoding): - """Generate match and mask strings from the encoding array.""" - match = "".join(encoding).replace("-", "0") - mask = "".join(encoding).replace("0", "1").replace("-", "0") - return match, mask + check_overlapping_bits(encoding, lsb, line) + encoding[31 - lsb] = str(value) -def process_arguments(remaining, encoding, name): - """Process instruction arguments and update the encoding with argument positions.""" - args = single_fixed.sub(" ", remaining).split() - encoding_args = encoding.copy() +# Main function to check argument look-up table +def check_arg_lut(args, encoding_args, name): + """Check if arguments are present in arg_lut.""" for arg in args: if arg not in arg_lut: - handle_missing_arg(arg, name) + arg = handle_arg_lut_mapping(arg, name) msb, lsb = arg_lut[arg] - update_arg_encoding(msb, lsb, arg, encoding_args, name) - return args, encoding_args + update_encoding_args(encoding_args, arg, msb, lsb) -def handle_missing_arg(arg, name): - """Handle missing argument mapping in arg_lut.""" - if "=" in arg: - existing_arg = arg.split("=")[0] +# Handle missing argument mappings +def handle_arg_lut_mapping(arg, name): + """Handle cases where an argument needs to be mapped to an existing one.""" + parts = arg.split("=") + if len(parts) == 2: + existing_arg, new_arg = parts if existing_arg in arg_lut: arg_lut[arg] = arg_lut[existing_arg] - return - log_and_exit(f"Variable {arg} in instruction {name} not mapped in arg_lut") + else: + logging.error( + f" Found field {existing_arg} in variable {arg} in instruction {name} " + f"whose mapping in arg_lut does not exist" + ) + raise SystemExit(1) + else: + logging.error( + f" Found variable {arg} in instruction {name} " + f"whose mapping in arg_lut does not exist" + ) + raise SystemExit(1) + return arg -def update_arg_encoding(msb, lsb, arg, encoding_args, name): - """Update the encoding array with the argument positions.""" +# Update encoding args with variables +def update_encoding_args(encoding_args, arg, msb, lsb): + """Update encoding arguments and ensure no overlapping.""" for ind in range(lsb, msb + 1): - if encoding_args[31 - ind] != "-": - log_and_exit(f"Variable {arg} overlaps in bit {ind} in instruction {name}") + check_overlapping_bits(encoding_args, ind, arg) encoding_args[31 - ind] = arg -def create_instruction_dict(encoding, args, ext, match, mask): - """Create the final dictionary for the instruction.""" - return { +# Compute match and mask +def convert_encoding_to_match_mask(encoding): + """Convert the encoding list to match and mask strings.""" + match = "".join(encoding).replace("-", "0") + mask = "".join(encoding).replace("0", "1").replace("-", "0") + return hex(int(match, 2)), hex(int(mask, 2)) + + +# Processing main function for a line in the encoding file +def process_enc_line(line, ext): + """ + This function processes each line of the encoding files (rv*). As part of + the processing, the function ensures that the encoding is legal through the + following checks:: + - there is no over specification (same bits assigned different values) + - there is no under specification (some bits not assigned values) + - bit ranges are in the format hi..lo=val where hi > lo + - value assigned is representable in the bit range + - also checks that the mapping of arguments of an instruction exists in + arg_lut. + If the above checks pass, then the function returns a tuple of the name and + a dictionary containing basic information of the instruction which includes: + - variables: list of arguments used by the instruction whose mapping + exists in the arg_lut dictionary + - encoding: this contains the 32-bit encoding of the instruction where + '-' is used to represent position of arguments and 1/0 is used to + reprsent the static encoding of the bits + - extension: this field contains the rv* filename from which this + instruction was included + - match: hex value representing the bits that need to match to detect + this instruction + - mask: hex value representin the bits that need to be masked to extract + the value required for matching. + """ + encoding = initialize_encoding() + + # Parse the instruction line + name, remaining = parse_instruction_line(line) + + # Process fixed ranges + remaining = process_fixed_ranges(remaining, encoding, line) + + # Process single fixed assignments + process_single_fixed(remaining, encoding, line) + + # Convert the list of encodings into a match and mask + match, mask = convert_encoding_to_match_mask(encoding) + + # Check arguments in arg_lut + args = single_fixed.sub(" ", remaining).split() + encoding_args = encoding.copy() + + check_arg_lut(args, encoding_args, name) + + # Return single_dict + return name, { "encoding": "".join(encoding), "variable_fields": args, "extension": [os.path.basename(ext)], - "match": hex(int(match, 2)), - "mask": hex(int(mask, 2)), + "match": match, + "mask": mask, } -def log_and_exit(message): - """Log an error message and exit the program.""" - logging.error(message) - raise SystemExit(1) +# Extract ISA Type +def extract_isa_type(ext_name): + """Extracts the ISA type from the extension name.""" + return ext_name.split("_")[0] -def get_instruction_name(line): - """Helper to extract the instruction name from a line.""" - return line.split(" ")[0] +# Verify the types for RV* +def is_rv_variant(type1, type2): + """Checks if the types are RV variants (rv32/rv64).""" + return (type2 == "rv" and type1 in {"rv32", "rv64"}) or ( + type1 == "rv" and type2 in {"rv32", "rv64"} + ) -def overlaps(x, y): - """ - Check if two bit strings overlap without conflicts. +# Check for same base ISA +def has_same_base_isa(type1, type2): + """Determines if the two ISA types share the same base.""" + return type1 == type2 or is_rv_variant(type1, type2) - Args: - x (str): First bit string. - y (str): Second bit string. - Returns: - bool: True if the bit strings overlap without conflicts, False otherwise. +# Compare the base ISA type of a given extension name against a list of extension names +def same_base_isa(ext_name, ext_name_list): + """Checks if the base ISA type of ext_name matches any in ext_name_list.""" + type1 = extract_isa_type(ext_name) + return any(has_same_base_isa(type1, extract_isa_type(ext)) for ext in ext_name_list) - In the context of RISC-V opcodes, this function ensures that the bit ranges - defined by two different bit strings do not conflict. - """ - # Minimum length of the two strings - min_len = min(len(x), len(y)) +# Pad two strings to equal length +def pad_to_equal_length(str1, str2, pad_char="-"): + """Pads two strings to equal length using the given padding character.""" + max_len = max(len(str1), len(str2)) + return str1.rjust(max_len, pad_char), str2.rjust(max_len, pad_char) - for char_x, char_y in zip(x[:min_len], y[:min_len]): - if char_x != "-" and char_y != "-" and char_x != char_y: - return False - return True +# Check compatibility for two characters +def has_no_conflict(char1, char2): + """Checks if two characters are compatible (either matching or don't-care).""" + return char1 == "-" or char2 == "-" or char1 == char2 -def overlap_allowed(a, x, y): - """ - Check if there is an overlap between keys and values in a dictionary. +# Conflict check between two encoded strings +def overlaps(x, y): + """Checks if two encoded strings overlap without conflict.""" + x, y = pad_to_equal_length(x, y) + return all(has_no_conflict(x[i], y[i]) for i in range(len(x))) - Args: - a (dict): The dictionary where keys are mapped to sets or lists of keys. - x (str): The first key to check. - y (str): The second key to check. - Returns: - bool: True if both (x, y) or (y, x) are present in the dictionary - as described, False otherwise. +# Check presence of keys in dictionary. +def is_in_nested_dict(a, key1, key2): + """Checks if key2 exists in the dictionary under key1.""" + return key1 in a and key2 in a[key1] - This function determines if `x` is a key in the dictionary `a` and - its corresponding value contains `y`, or if `y` is a key and its - corresponding value contains `x`. - """ - return x in a and y in a[x] or y in a and x in a[y] +# Overlap allowance +def overlap_allowed(a, x, y): + """Determines if overlap is allowed between x and y based on nested dictionary checks""" + return is_in_nested_dict(a, x, y) or is_in_nested_dict(a, y, x) -# Checks if overlap between two extensions is allowed +# Check overlap allowance between extensions def extension_overlap_allowed(x, y): + """Checks if overlap is allowed between two extensions using the overlapping_extensions dictionary.""" return overlap_allowed(overlapping_extensions, x, y) -# Checks if overlap between two instructions is allowed +# Check overlap allowance between instructions def instruction_overlap_allowed(x, y): + """Checks if overlap is allowed between two instructions using the overlapping_instructions dictionary.""" return overlap_allowed(overlapping_instructions, x, y) -# Checks if ext_name shares the same base ISA with any in ext_name_list -def same_base_isa(ext_name, ext_name_list): - type1 = ext_name.split("_")[0] - for ext_name1 in ext_name_list: - type2 = ext_name1.split("_")[0] - if ( - type1 == type2 - or (type2 == "rv" and type1 in ["rv32", "rv64"]) - or (type1 == "rv" and type2 in ["rv32", "rv64"]) - ): - return True - return False +# Check 'nf' field +def is_segmented_instruction(instruction): + """Checks if an instruction contains the 'nf' field.""" + return "nf" in instruction["variable_fields"] + +# Expand 'nf' fields +def update_with_expanded_instructions(updated_dict, key, value): + """Expands 'nf' fields in the instruction dictionary and updates it with new instructions.""" + for new_key, new_value in expand_nf_field(key, value): + updated_dict[new_key] = new_value -# Expands instructions with "nf" field in variable_fields, otherwise returns unchanged + +# Process instructions, expanding segmented ones and updating the dictionary def add_segmented_vls_insn(instr_dict): - updated_dict = {} - for k, v in instr_dict.items(): - if "nf" in v["variable_fields"]: - updated_dict.update(expand_nf_field(k, v)) - else: - updated_dict[k] = v - return updated_dict + """Processes instructions, expanding segmented ones and updating the dictionary.""" + # Use dictionary comprehension for efficiency + return dict( + chain.from_iterable( + ( + expand_nf_field(key, value) + if is_segmented_instruction(value) + else [(key, value)] + ) + for key, value in instr_dict.items() + ) + ) -# Expands nf field in instruction name and updates instruction details +# Expand the 'nf' field in the instruction dictionary def expand_nf_field(name, single_dict): + """Validate and prepare the instruction dictionary.""" + validate_nf_field(single_dict, name) + remove_nf_field(single_dict) + update_mask(single_dict) + + name_expand_index = name.find("e") + + # Pre compute the base match value and encoding prefix + base_match = int(single_dict["match"], 16) + encoding_prefix = single_dict["encoding"][3:] + + expanded_instructions = [ + create_expanded_instruction( + name, single_dict, nf, name_expand_index, base_match, encoding_prefix + ) + for nf in range(8) # Range of 0 to 7 + ] + + return expanded_instructions + + +# Validate the presence of 'nf' +def validate_nf_field(single_dict, name): + """Validates the presence of 'nf' in variable fields before expansion.""" if "nf" not in single_dict["variable_fields"]: logging.error(f"Cannot expand nf field for instruction {name}") raise SystemExit(1) - single_dict["variable_fields"].remove("nf") # Remove "nf" from variable fields - single_dict["mask"] = hex( - int(single_dict["mask"], 16) | (0b111 << 29) - ) # Update mask - name_expand_index = name.find("e") - expanded_instructions = [] - for nf in range(8): # Expand nf for values 0 to 7 - new_single_dict = copy.deepcopy(single_dict) - new_single_dict["match"] = hex(int(single_dict["match"], 16) | (nf << 29)) - new_single_dict["encoding"] = format(nf, "03b") + single_dict["encoding"][3:] - new_name = ( - name - if nf == 0 - else f"{name[:name_expand_index]}seg{nf+1}{name[name_expand_index:]}" - ) - expanded_instructions.append((new_name, new_single_dict)) - return expanded_instructions +# Remove 'nf' from variable fields +def remove_nf_field(single_dict): + """Removes 'nf' from variable fields in the instruction dictionary.""" + single_dict["variable_fields"].remove("nf") -# Extracts the extensions used in an instruction dictionary -def instr_dict_2_extensions(instr_dict): - return list({item["extension"][0] for item in instr_dict.values()}) +# Update the mask to include the 'nf' field +def update_mask(single_dict): + """Updates the mask to include the 'nf' field in the instruction dictionary.""" + single_dict["mask"] = hex(int(single_dict["mask"], 16) | 0b111 << 29) -# Returns signed interpretation of a value within a given width -def signed(value, width): - return value if 0 <= value < (1 << (width - 1)) else value - (1 << width) +# Create an expanded instruction +def create_expanded_instruction( + name, single_dict, nf, name_expand_index, base_match, encoding_prefix +): + """Creates an expanded instruction based on 'nf' value.""" + new_single_dict = copy.deepcopy(single_dict) + + # Update match value in one step + new_single_dict["match"] = hex(base_match | (nf << 29)) + new_single_dict["encoding"] = format(nf, "03b") + encoding_prefix + # Construct new instruction name + new_name = ( + name + if nf == 0 + else f"{name[:name_expand_index]}seg{nf + 1}{name[name_expand_index:]}" + ) + return (new_name, new_single_dict) + + +# Return a list of relevant lines from the specified file def read_lines(file): """Reads lines from a file and returns non-blank, non-comment lines.""" with open(file) as fp: @@ -306,6 +370,7 @@ def read_lines(file): return [line for line in lines if line and not line.startswith("#")] +# Update the instruction dictionary def process_standard_instructions(lines, instr_dict, file_name): """Processes standard instructions from the given lines and updates the instruction dictionary.""" for line in lines: @@ -342,6 +407,7 @@ def process_standard_instructions(lines, instr_dict, file_name): instr_dict[name] = single_dict +# Incorporate pseudo instructions into the instruction dictionary based on given conditions def process_pseudo_instructions( lines, instr_dict, file_name, opcodes_dir, include_pseudo, include_pseudo_ops ): @@ -371,6 +437,7 @@ def process_pseudo_instructions( instr_dict[name]["extension"].extend(single_dict["extension"]) +# Integrate imported instructions into the instruction dictionary def process_imported_instructions(lines, instr_dict, file_name, opcodes_dir): """Processes imported instructions from the given lines and updates the instruction dictionary.""" for line in lines: @@ -396,6 +463,7 @@ def process_imported_instructions(lines, instr_dict, file_name, opcodes_dir): break +# Locate the path of the specified extension file, checking fallback directories def find_extension_file(ext, opcodes_dir): """Finds the extension file path, considering the unratified directory if necessary.""" ext_file = f"{opcodes_dir}/{ext}" @@ -406,6 +474,7 @@ def find_extension_file(ext, opcodes_dir): return ext_file +# Confirm the presence of an original instruction in the corresponding extension file. def validate_instruction_in_extension(inst, ext_file, file_name, pseudo_inst): """Validates if the original instruction exists in the dependent extension.""" found = False @@ -419,15 +488,14 @@ def validate_instruction_in_extension(inst, ext_file, file_name, pseudo_inst): ) +# Construct a dictionary of instructions filtered by specified criteria def create_inst_dict(file_filter, include_pseudo=False, include_pseudo_ops=[]): """Creates a dictionary of instructions based on the provided file filters.""" """ This function return a dictionary containing all instructions associated with an extension defined by the file_filter input. - Allowed input extensions: needs to be rv* file name without the 'rv' prefix i.e. '_i', '32_i', etc. - Each node of the dictionary will correspond to an instruction which again is a dictionary. The dictionary contents of each instruction includes: - variables: list of arguments used by the instruction whose mapping @@ -441,7 +509,6 @@ def create_inst_dict(file_filter, include_pseudo=False, include_pseudo_ops=[]): this instruction - mask: hex value representin the bits that need to be masked to extract the value required for matching. - In order to build this dictionary, the function does 2 passes over the same rv file: - First pass: extracts all standard instructions, skipping pseudo ops @@ -489,3 +556,13 @@ def create_inst_dict(file_filter, include_pseudo=False, include_pseudo_ops=[]): process_imported_instructions(lines, instr_dict, file_name, opcodes_dir) return instr_dict + + +# Extracts the extensions used in an instruction dictionary +def instr_dict_2_extensions(instr_dict): + return list({item["extension"][0] for item in instr_dict.values()}) + + +# Returns signed interpretation of a value within a given width +def signed(value, width): + return value if 0 <= value < (1 << (width - 1)) else value - (1 << width) From 90854e66f8d82580fd428c2a690ebdf20bd85655 Mon Sep 17 00:00:00 2001 From: Joel Sing Date: Sat, 26 Oct 2024 00:15:32 +1100 Subject: [PATCH 11/18] blacken --- parse.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/parse.py b/parse.py index 8e2f4623..72af94d0 100755 --- a/parse.py +++ b/parse.py @@ -1199,7 +1199,16 @@ def signed(value, width): print(f"Running with args : {sys.argv}") extensions = sys.argv[1:] - for i in ["-c", "-chisel", "-go", "-latex", "-pseudo" "-rust", "-spinalhdl", "-sverilog"]: + for i in [ + "-c", + "-chisel", + "-go", + "-latex", + "-pseudo", + "-rust", + "-spinalhdl", + "-sverilog", + ]: if i in extensions: extensions.remove(i) print(f"Extensions selected : {extensions}") From c1ba2ffb0968d2121d08a9b31d61c43de85684c1 Mon Sep 17 00:00:00 2001 From: IIITM-Jay Date: Sun, 27 Oct 2024 20:19:57 +0530 Subject: [PATCH 12/18] added pseudo flag --- parse.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/parse.py b/parse.py index 19574562..2571c986 100755 --- a/parse.py +++ b/parse.py @@ -25,13 +25,22 @@ print(f"Running with args : {sys.argv}") extensions = sys.argv[1:] - for i in ["-c", "-latex", "-chisel", "-sverilog", "-rust", "-go", "-spinalhdl"]: + for i in [ + "-c", + "-chisel", + "-go", + "-latex", + "-pseudo", + "-rust", + "-spinalhdl", + "-sverilog", + ]: if i in extensions: extensions.remove(i) print(f"Extensions selected : {extensions}") include_pseudo = False - if "-go" in sys.argv[1:]: + if "-pseudo" in sys.argv[1:]: include_pseudo = True instr_dict = create_inst_dict(extensions, include_pseudo) From 837fbba9982e7eb0a9daa88cb640306754b2b993 Mon Sep 17 00:00:00 2001 From: IIITM-Jay Date: Sun, 27 Oct 2024 20:31:45 +0530 Subject: [PATCH 13/18] optimized the for loop for extensions and targets usage --- parse.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/parse.py b/parse.py index 2571c986..29f6062e 100755 --- a/parse.py +++ b/parse.py @@ -25,7 +25,8 @@ print(f"Running with args : {sys.argv}") extensions = sys.argv[1:] - for i in [ + + targets = { "-c", "-chisel", "-go", @@ -34,9 +35,9 @@ "-rust", "-spinalhdl", "-sverilog", - ]: - if i in extensions: - extensions.remove(i) + } + + extensions = [ext for ext in extensions if ext not in targets] print(f"Extensions selected : {extensions}") include_pseudo = False From 6900b2aba2ab6e895cf21bcf37c9cea4e97409bf Mon Sep 17 00:00:00 2001 From: IIITM-Jay Date: Sun, 27 Oct 2024 20:33:39 +0530 Subject: [PATCH 14/18] Fixed pre-commit issues --- rv64_zcb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rv64_zcb b/rv64_zcb index c47d0114..8ce4429f 100644 --- a/rv64_zcb +++ b/rv64_zcb @@ -1,3 +1,3 @@ c.zext.w rd_rs1_p 1..0=1 15..13=4 12..10=7 6..5=3 4..2=4 -$pseudo_op rv64_c::c.addiw c.sext.w rd_rs1_n0 15..13=1 12=0 6..2=0 1..0=1 +$pseudo_op rv64_c::c.addiw c.sext.w rd_rs1_n0 15..13=1 12=0 6..2=0 1..0=1 From 27fa19babcbc60d12feb29c19eb612fd8e83e972 Mon Sep 17 00:00:00 2001 From: Tim Hutt Date: Sun, 27 Oct 2024 22:22:33 +0000 Subject: [PATCH 15/18] Minor simplification of Python boolean --- parse.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/parse.py b/parse.py index 29f6062e..9677ed6d 100755 --- a/parse.py +++ b/parse.py @@ -40,9 +40,7 @@ extensions = [ext for ext in extensions if ext not in targets] print(f"Extensions selected : {extensions}") - include_pseudo = False - if "-pseudo" in sys.argv[1:]: - include_pseudo = True + include_pseudo = "-pseudo" in sys.argv[1:] instr_dict = create_inst_dict(extensions, include_pseudo) From 83b56f6ce605f06dd605f43d36729dc89f92bd07 Mon Sep 17 00:00:00 2001 From: Afonso Oliveira Date: Mon, 28 Oct 2024 19:31:02 +0000 Subject: [PATCH 16/18] Added make option to include Pseudo Signed-off-by: Afonso Oliveira --- Makefile | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/Makefile b/Makefile index 6837ea04..17f991be 100644 --- a/Makefile +++ b/Makefile @@ -5,30 +5,39 @@ ENV_H := ../riscv-tests/env/encoding.h OPENOCD_H := ../riscv-openocd/src/target/riscv/encoding.h INSTALL_HEADER_FILES := $(ISASIM_H) $(PK_H) $(ENV_H) $(OPENOCD_H) +ifdef PSEUDO + PSEUDO_FLAG := -pseudo +else + PSEUDO_FLAG := +endif + default: everything -.PHONY: everything encoding.out.h inst.chisel inst.go latex inst.sverilog inst.rs clean install instr-table.tex priv-instr-table.tex inst.spinalhdl +.PHONY: everything encoding.out.h inst.chisel inst.go latex inst.sverilog inst.rs clean install instr-table.tex priv-instr-table.tex inst.spinalhdl pseudo + +pseudo: + @$(MAKE) PSEUDO=1 everything everything: - @./parse.py -c -go -chisel -sverilog -rust -latex -spinalhdl $(EXTENSIONS) + @./parse.py $(PSEUDO_FLAG) -c -go -chisel -sverilog -rust -latex -spinalhdl $(EXTENSIONS) encoding.out.h: - @./parse.py -c rv* unratified/rv_* unratified/rv32* unratified/rv64* + @./parse.py -c $(PSEUDO_FLAG) rv* unratified/rv_* unratified/rv32* unratified/rv64* inst.chisel: - @./parse.py -chisel $(EXTENSIONS) + @./parse.py -chisel $(PSEUDO_FLAG) $(EXTENSIONS) inst.go: - @./parse.py -go $(EXTENSIONS) + @./parse.py -go $(PSEUDO_FLAG) $(EXTENSIONS) latex: - @./parse.py -latex $(EXTENSIONS) + @./parse.py -latex $(PSEUDO_FLAG) $(EXTENSIONS) inst.sverilog: - @./parse.py -sverilog $(EXTENSIONS) + @./parse.py -sverilog $(PSEUDO_FLAG) $(EXTENSIONS) inst.rs: - @./parse.py -rust $(EXTENSIONS) + @./parse.py -rust $(PSEUDO_FLAG) $(EXTENSIONS) clean: rm -f inst* priv-instr-table.tex encoding.out.h @@ -44,4 +53,4 @@ instr-table.tex: latex priv-instr-table.tex: latex inst.spinalhdl: - @./parse.py -spinalhdl $(EXTENSIONS) + @./parse.py -spinalhdl $(PSEUDO_FLAG) $(EXTENSIONS) From 938e6d5b038426cf881e433040b16803d82288a3 Mon Sep 17 00:00:00 2001 From: Tim Hutt Date: Tue, 29 Oct 2024 21:11:51 +0000 Subject: [PATCH 17/18] Remove unused imports Remove unused Python import statements. --- c_utils.py | 6 ------ chisel_utils.py | 8 -------- go_utils.py | 6 ------ rust_utils.py | 8 -------- sverilog_utils.py | 7 ------- 5 files changed, 35 deletions(-) diff --git a/c_utils.py b/c_utils.py index cff33dd8..78ed0e5c 100644 --- a/c_utils.py +++ b/c_utils.py @@ -1,12 +1,6 @@ -import collections -import glob import logging import os import pprint -import re -import sys - -import yaml # from shared_utils import overlaps, overlap_allowed, extension_overlap_allowed, instruction_overlap_allowed, process_enc_line, same_base_isa, add_segmented_vls_insn, expand_nf_field from shared_utils import * diff --git a/chisel_utils.py b/chisel_utils.py index 957e4f8a..0943584d 100644 --- a/chisel_utils.py +++ b/chisel_utils.py @@ -1,13 +1,5 @@ -import collections -import copy -import glob import logging -import os import pprint -import re -import sys - -import yaml from constants import * diff --git a/go_utils.py b/go_utils.py index 1f4c94bb..ed47441f 100644 --- a/go_utils.py +++ b/go_utils.py @@ -1,13 +1,7 @@ -import collections -import glob import logging -import os import pprint -import re import sys -import yaml - # from shared_utils import overlaps, overlap_allowed, extension_overlap_allowed, instruction_overlap_allowed, process_enc_line, same_base_isa, add_segmented_vls_insn, expand_nf_field from shared_utils import * diff --git a/rust_utils.py b/rust_utils.py index 19a47b95..68e0c8cd 100644 --- a/rust_utils.py +++ b/rust_utils.py @@ -1,13 +1,5 @@ -import collections -import copy -import glob import logging -import os import pprint -import re -import sys - -import yaml from constants import * diff --git a/sverilog_utils.py b/sverilog_utils.py index 1fe20680..89163137 100644 --- a/sverilog_utils.py +++ b/sverilog_utils.py @@ -1,12 +1,5 @@ -import collections -import glob import logging -import os import pprint -import re -import sys - -import yaml # from shared_utils import overlaps, overlap_allowed, extension_overlap_allowed, instruction_overlap_allowed, process_enc_line, same_base_isa, add_segmented_vls_insn, expand_nf_field from shared_utils import * From 14d6674bed2c6f0967872be7f4588f64134ebaa4 Mon Sep 17 00:00:00 2001 From: Jay Dev Jha Date: Wed, 30 Oct 2024 15:22:09 +0530 Subject: [PATCH 18/18] Remove commented out import lines Signed-off-by: Jay Dev Jha --- c_utils.py | 1 - go_utils.py | 1 - sverilog_utils.py | 1 - 3 files changed, 3 deletions(-) diff --git a/c_utils.py b/c_utils.py index 78ed0e5c..40fa4cb4 100644 --- a/c_utils.py +++ b/c_utils.py @@ -2,7 +2,6 @@ import os import pprint -# from shared_utils import overlaps, overlap_allowed, extension_overlap_allowed, instruction_overlap_allowed, process_enc_line, same_base_isa, add_segmented_vls_insn, expand_nf_field from shared_utils import * pp = pprint.PrettyPrinter(indent=2) diff --git a/go_utils.py b/go_utils.py index ed47441f..9815e702 100644 --- a/go_utils.py +++ b/go_utils.py @@ -2,7 +2,6 @@ import pprint import sys -# from shared_utils import overlaps, overlap_allowed, extension_overlap_allowed, instruction_overlap_allowed, process_enc_line, same_base_isa, add_segmented_vls_insn, expand_nf_field from shared_utils import * pp = pprint.PrettyPrinter(indent=2) diff --git a/sverilog_utils.py b/sverilog_utils.py index 89163137..ff116ccb 100644 --- a/sverilog_utils.py +++ b/sverilog_utils.py @@ -1,7 +1,6 @@ import logging import pprint -# from shared_utils import overlaps, overlap_allowed, extension_overlap_allowed, instruction_overlap_allowed, process_enc_line, same_base_isa, add_segmented_vls_insn, expand_nf_field from shared_utils import * pp = pprint.PrettyPrinter(indent=2)