diff --git a/Makefile b/Makefile index 6837ea04..17f991be 100644 --- a/Makefile +++ b/Makefile @@ -5,30 +5,39 @@ ENV_H := ../riscv-tests/env/encoding.h OPENOCD_H := ../riscv-openocd/src/target/riscv/encoding.h INSTALL_HEADER_FILES := $(ISASIM_H) $(PK_H) $(ENV_H) $(OPENOCD_H) +ifdef PSEUDO + PSEUDO_FLAG := -pseudo +else + PSEUDO_FLAG := +endif + default: everything -.PHONY: everything encoding.out.h inst.chisel inst.go latex inst.sverilog inst.rs clean install instr-table.tex priv-instr-table.tex inst.spinalhdl +.PHONY: everything encoding.out.h inst.chisel inst.go latex inst.sverilog inst.rs clean install instr-table.tex priv-instr-table.tex inst.spinalhdl pseudo + +pseudo: + @$(MAKE) PSEUDO=1 everything everything: - @./parse.py -c -go -chisel -sverilog -rust -latex -spinalhdl $(EXTENSIONS) + @./parse.py $(PSEUDO_FLAG) -c -go -chisel -sverilog -rust -latex -spinalhdl $(EXTENSIONS) encoding.out.h: - @./parse.py -c rv* unratified/rv_* unratified/rv32* unratified/rv64* + @./parse.py -c $(PSEUDO_FLAG) rv* unratified/rv_* unratified/rv32* unratified/rv64* inst.chisel: - @./parse.py -chisel $(EXTENSIONS) + @./parse.py -chisel $(PSEUDO_FLAG) $(EXTENSIONS) inst.go: - @./parse.py -go $(EXTENSIONS) + @./parse.py -go $(PSEUDO_FLAG) $(EXTENSIONS) latex: - @./parse.py -latex $(EXTENSIONS) + @./parse.py -latex $(PSEUDO_FLAG) $(EXTENSIONS) inst.sverilog: - @./parse.py -sverilog $(EXTENSIONS) + @./parse.py -sverilog $(PSEUDO_FLAG) $(EXTENSIONS) inst.rs: - @./parse.py -rust $(EXTENSIONS) + @./parse.py -rust $(PSEUDO_FLAG) $(EXTENSIONS) clean: rm -f inst* priv-instr-table.tex encoding.out.h @@ -44,4 +53,4 @@ instr-table.tex: latex priv-instr-table.tex: latex inst.spinalhdl: - @./parse.py -spinalhdl $(EXTENSIONS) + @./parse.py -spinalhdl $(PSEUDO_FLAG) $(EXTENSIONS) diff --git a/c_utils.py b/c_utils.py new file mode 100644 index 00000000..40fa4cb4 --- /dev/null +++ b/c_utils.py @@ -0,0 +1,78 @@ +import logging +import os +import pprint + +from shared_utils import * + +pp = pprint.PrettyPrinter(indent=2) +logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s") + + +def make_c(instr_dict): + mask_match_str = "" + declare_insn_str = "" + for i in instr_dict: + mask_match_str += ( + f'#define MATCH_{i.upper().replace(".","_")} {instr_dict[i]["match"]}\n' + ) + mask_match_str += ( + f'#define MASK_{i.upper().replace(".","_")} {instr_dict[i]["mask"]}\n' + ) + declare_insn_str += f'DECLARE_INSN({i.replace(".","_")}, MATCH_{i.upper().replace(".","_")}, MASK_{i.upper().replace(".","_")})\n' + + csr_names_str = "" + declare_csr_str = "" + for num, name in csrs + csrs32: + csr_names_str += f"#define CSR_{name.upper()} {hex(num)}\n" + declare_csr_str += f"DECLARE_CSR({name}, CSR_{name.upper()})\n" + + causes_str = "" + declare_cause_str = "" + for num, name in causes: + causes_str += f"#define CAUSE_{name.upper().replace(' ', '_')} {hex(num)}\n" + declare_cause_str += ( + f"DECLARE_CAUSE(\"{name}\", CAUSE_{name.upper().replace(' ','_')})\n" + ) + + arg_str = "" + for name, rng in arg_lut.items(): + sanitized_name = name.replace(" ", "_").replace("=", "_eq_") + begin = rng[1] + end = rng[0] + mask = ((1 << (end - begin + 1)) - 1) << begin + arg_str += f"#define INSN_FIELD_{sanitized_name.upper()} {hex(mask)}\n" + + with open(f"{os.path.dirname(__file__)}/encoding.h", "r") as file: + enc_header = file.read() + + commit = os.popen('git log -1 --format="format:%h"').read() + + # Generate the output as a string + output_str = f"""/* SPDX-License-Identifier: BSD-3-Clause */ + +/* Copyright (c) 2023 RISC-V International */ + +/* + * This file is auto-generated by running 'make' in + * https://github.com/riscv/riscv-opcodes ({commit}) + */ + +{enc_header} +/* Automatically generated by parse_opcodes. */ +#ifndef RISCV_ENCODING_H +#define RISCV_ENCODING_H +{mask_match_str} +{csr_names_str} +{causes_str} +{arg_str}#endif +#ifdef DECLARE_INSN +{declare_insn_str}#endif +#ifdef DECLARE_CSR +{declare_csr_str}#endif +#ifdef DECLARE_CAUSE +{declare_cause_str}#endif +""" + + # Write the modified output to the file + with open("encoding.out.h", "w") as enc_file: + enc_file.write(output_str) diff --git a/chisel_utils.py b/chisel_utils.py new file mode 100644 index 00000000..0943584d --- /dev/null +++ b/chisel_utils.py @@ -0,0 +1,86 @@ +import logging +import pprint + +from constants import * + +# from shared_utils import overlaps, overlap_allowed, extension_overlap_allowed, instruction_overlap_allowed, process_enc_line, same_base_isa, add_segmented_vls_insn, expand_nf_field +from shared_utils import * + +pp = pprint.PrettyPrinter(indent=2) +logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s") + + +def make_chisel(instr_dict, spinal_hdl=False): + + chisel_names = "" + cause_names_str = "" + csr_names_str = "" + for i in instr_dict: + if spinal_hdl: + chisel_names += f' def {i.upper().replace(".","_"):<18s} = M"b{instr_dict[i]["encoding"].replace("-","-")}"\n' + # else: + # chisel_names += f' def {i.upper().replace(".","_"):<18s} = BitPat("b{instr_dict[i]["encoding"].replace("-","?")}")\n' + if not spinal_hdl: + extensions = instr_dict_2_extensions(instr_dict) + for e in extensions: + e_instrs = filter(lambda i: instr_dict[i]["extension"][0] == e, instr_dict) + if "rv64_" in e: + e_format = e.replace("rv64_", "").upper() + "64" + elif "rv32_" in e: + e_format = e.replace("rv32_", "").upper() + "32" + elif "rv_" in e: + e_format = e.replace("rv_", "").upper() + else: + e_format = e.upper + chisel_names += f' val {e_format+"Type"} = Map(\n' + for instr in e_instrs: + tmp_instr_name = '"' + instr.upper().replace(".", "_") + '"' + chisel_names += f' {tmp_instr_name:<18s} -> BitPat("b{instr_dict[instr]["encoding"].replace("-","?")}"),\n' + chisel_names += f" )\n" + + for num, name in causes: + cause_names_str += f' val {name.lower().replace(" ","_")} = {hex(num)}\n' + cause_names_str += """ val all = { + val res = collection.mutable.ArrayBuffer[Int]() +""" + for num, name in causes: + cause_names_str += f' res += {name.lower().replace(" ","_")}\n' + cause_names_str += """ res.toArray + }""" + + for num, name in csrs + csrs32: + csr_names_str += f" val {name} = {hex(num)}\n" + csr_names_str += """ val all = { + val res = collection.mutable.ArrayBuffer[Int]() +""" + for num, name in csrs: + csr_names_str += f""" res += {name}\n""" + csr_names_str += """ res.toArray + } + val all32 = { + val res = collection.mutable.ArrayBuffer(all:_*) +""" + for num, name in csrs32: + csr_names_str += f""" res += {name}\n""" + csr_names_str += """ res.toArray + }""" + + if spinal_hdl: + chisel_file = open("inst.spinalhdl", "w") + else: + chisel_file = open("inst.chisel", "w") + chisel_file.write( + f""" +/* Automatically generated by parse_opcodes */ +object Instructions {{ +{chisel_names} +}} +object Causes {{ +{cause_names_str} +}} +object CSRs {{ +{csr_names_str} +}} +""" + ) + chisel_file.close() diff --git a/go_utils.py b/go_utils.py new file mode 100644 index 00000000..9815e702 --- /dev/null +++ b/go_utils.py @@ -0,0 +1,62 @@ +import logging +import pprint +import sys + +from shared_utils import * + +pp = pprint.PrettyPrinter(indent=2) +logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s") + + +def make_go(instr_dict): + + args = " ".join(sys.argv) + prelude = f"""// Code generated by {args}; DO NOT EDIT.""" + + prelude += """ +package riscv + +import "cmd/internal/obj" + +type inst struct { + opcode uint32 + funct3 uint32 + rs1 uint32 + rs2 uint32 + csr int64 + funct7 uint32 +} + +func encode(a obj.As) *inst { + switch a { +""" + + endoffile = """ } + return nil +} +""" + + instr_str = "" + for i in instr_dict: + enc_match = int(instr_dict[i]["match"], 0) + opcode = (enc_match >> 0) & ((1 << 7) - 1) + funct3 = (enc_match >> 12) & ((1 << 3) - 1) + rs1 = (enc_match >> 15) & ((1 << 5) - 1) + rs2 = (enc_match >> 20) & ((1 << 5) - 1) + csr = (enc_match >> 20) & ((1 << 12) - 1) + funct7 = (enc_match >> 25) & ((1 << 7) - 1) + instr_str += f""" case A{i.upper().replace("_","")}: + return &inst{{ {hex(opcode)}, {hex(funct3)}, {hex(rs1)}, {hex(rs2)}, {signed(csr,12)}, {hex(funct7)} }} +""" + + with open("inst.go", "w") as file: + file.write(prelude) + file.write(instr_str) + file.write(endoffile) + + try: + import subprocess + + subprocess.run(["go", "fmt", "inst.go"]) + except: + pass diff --git a/latex_utils.py b/latex_utils.py new file mode 100644 index 00000000..ab5f6f92 --- /dev/null +++ b/latex_utils.py @@ -0,0 +1,448 @@ +import collections +import copy +import glob +import logging +import os +import pprint +import re +import sys + +import yaml + +from constants import * +from shared_utils import * + +pp = pprint.PrettyPrinter(indent=2) +logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s") + + +def make_priv_latex_table(): + latex_file = open("priv-instr-table.tex", "w") + type_list = ["R-type", "I-type"] + system_instr = ["_h", "_s", "_system", "_svinval", "64_h", "_svinval_h"] + dataset_list = [(system_instr, "Trap-Return Instructions", ["sret", "mret"], False)] + dataset_list.append( + (system_instr, "Interrupt-Management Instructions", ["wfi"], False) + ) + dataset_list.append( + ( + system_instr, + "Supervisor Memory-Management Instructions", + ["sfence_vma"], + False, + ) + ) + dataset_list.append( + ( + system_instr, + "Hypervisor Memory-Management Instructions", + ["hfence_vvma", "hfence_gvma"], + False, + ) + ) + dataset_list.append( + ( + system_instr, + "Hypervisor Virtual-Machine Load and Store Instructions", + [ + "hlv_b", + "hlv_bu", + "hlv_h", + "hlv_hu", + "hlv_w", + "hlvx_hu", + "hlvx_wu", + "hsv_b", + "hsv_h", + "hsv_w", + ], + False, + ) + ) + dataset_list.append( + ( + system_instr, + "Hypervisor Virtual-Machine Load and Store Instructions, RV64 only", + ["hlv_wu", "hlv_d", "hsv_d"], + False, + ) + ) + dataset_list.append( + ( + system_instr, + "Svinval Memory-Management Instructions", + [ + "sinval_vma", + "sfence_w_inval", + "sfence_inval_ir", + "hinval_vvma", + "hinval_gvma", + ], + False, + ) + ) + caption = "\\caption{RISC-V Privileged Instructions}" + make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) + + latex_file.close() + + +def make_latex_table(): + """ + This function is mean to create the instr-table.tex that is meant to be used + by the riscv-isa-manual. This function basically creates a single latext + file of multiple tables with each table limited to a single page. Only the + last table is assigned a latex-caption. + + For each table we assign a type-list which capture the different instruction + types (R, I, B, etc) that will be required for the table. Then we select the + list of extensions ('_i, '32_i', etc) whose instructions are required to + populate the table. For each extension or collection of extension we can + assign Title, such that in the end they appear as subheadings within + the table (note these are inlined headings and not captions of the table). + + All of the above information is collected/created and sent to + make_ext_latex_table function to dump out the latex contents into a file. + + The last table only has to be given a caption - as per the policy of the + riscv-isa-manual. + """ + # open the file and use it as a pointer for all further dumps + latex_file = open("instr-table.tex", "w") + + # create the rv32i table first. Here we set the caption to empty. We use the + # files rv_i and rv32_i to capture instructions relevant for rv32i + # configuration. The dataset is a list of 4-element tuples : + # (list_of_extensions, title, list_of_instructions, include_pseudo_ops). If list_of_instructions + # is empty then it indicates that all instructions of the all the extensions + # in list_of_extensions need to be dumped. If not empty, then only the + # instructions listed in list_of_instructions will be dumped into latex. + caption = "" + type_list = ["R-type", "I-type", "S-type", "B-type", "U-type", "J-type"] + dataset_list = [(["_i", "32_i"], "RV32I Base Instruction Set", [], False)] + dataset_list.append((["_i"], "", ["fence_tso", "pause"], True)) + make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) + + type_list = ["R-type", "I-type", "S-type"] + dataset_list = [ + (["64_i"], "RV64I Base Instruction Set (in addition to RV32I)", [], False) + ] + dataset_list.append( + (["_zifencei"], "RV32/RV64 Zifencei Standard Extension", [], False) + ) + dataset_list.append((["_zicsr"], "RV32/RV64 Zicsr Standard Extension", [], False)) + dataset_list.append((["_m", "32_m"], "RV32M Standard Extension", [], False)) + dataset_list.append( + (["64_m"], "RV64M Standard Extension (in addition to RV32M)", [], False) + ) + make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) + + type_list = ["R-type"] + dataset_list = [(["_a"], "RV32A Standard Extension", [], False)] + dataset_list.append( + (["64_a"], "RV64A Standard Extension (in addition to RV32A)", [], False) + ) + make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) + + type_list = ["R-type", "R4-type", "I-type", "S-type"] + dataset_list = [(["_f"], "RV32F Standard Extension", [], False)] + dataset_list.append( + (["64_f"], "RV64F Standard Extension (in addition to RV32F)", [], False) + ) + make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) + + type_list = ["R-type", "R4-type", "I-type", "S-type"] + dataset_list = [(["_d"], "RV32D Standard Extension", [], False)] + dataset_list.append( + (["64_d"], "RV64D Standard Extension (in addition to RV32D)", [], False) + ) + make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) + + type_list = ["R-type", "R4-type", "I-type", "S-type"] + dataset_list = [(["_q"], "RV32Q Standard Extension", [], False)] + dataset_list.append( + (["64_q"], "RV64Q Standard Extension (in addition to RV32Q)", [], False) + ) + make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) + + caption = "\\caption{Instruction listing for RISC-V}" + type_list = ["R-type", "R4-type", "I-type", "S-type"] + dataset_list = [ + (["_zfh", "_d_zfh", "_q_zfh"], "RV32Zfh Standard Extension", [], False) + ] + dataset_list.append( + (["64_zfh"], "RV64Zfh Standard Extension (in addition to RV32Zfh)", [], False) + ) + make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) + + ## The following is demo to show that Compressed instructions can also be + # dumped in the same manner as above + + # type_list = [''] + # dataset_list = [(['_c', '32_c', '32_c_f','_c_d'],'RV32C Standard Extension', [])] + # dataset_list.append((['64_c'],'RV64C Standard Extension (in addition to RV32C)', [])) + # make_ext_latex_table(type_list, dataset_list, latex_file, 16, caption) + + latex_file.close() + + +def make_ext_latex_table(type_list, dataset, latex_file, ilen, caption): + """ + For a given collection of extensions this function dumps out a complete + latex table which includes the encodings of the instructions. + + The ilen input indicates the length of the instruction for which the table + is created. + + The caption input is used to create the latex-table caption. + + The type_list input is a list of instruction types (R, I, B, etc) that are + treated as header for each table. Each table will have its own requirements + and type_list must include all the instruction-types that the table needs. + Note, all elements of this list must be present in the latex_inst_type + dictionary defined in constants.py + + The latex_file is a file pointer to which the latex-table will dumped into + + The dataset is a list of 3-element tuples containing: + (list_of_extensions, title, list_of_instructions) + The list_of_extensions must contain all the set of extensions whose + instructions must be populated under a given title. If list_of_instructions + is not empty, then only those instructions mentioned in list_of_instructions + present in the extension will be dumped into the latex-table, other + instructions will be ignored. + + Once the above inputs are received then function first creates table entries + for the instruction types. To simplify things, we maintain a dictionary + called latex_inst_type in constants.py which is created in the same way the + instruction dictionary is created. This allows us to re-use the same logic + to create the instruction types table as well + + Once the header is created, we then parse through every entry in the + dataset. For each list dataset entry we use the create_inst_dict function to + create an exhaustive list of instructions associated with the respective + collection of the extension of that dataset. Then we apply the instruction + filter, if any, indicated by the list_of_instructions of that dataset. + Thereon, for each instruction we create a latex table entry. + + Latex table specification for ilen sized instructions: + Each table is created with ilen+1 columns - ilen columns for each bit of the + instruction and one column to hold the name of the instruction. + + For each argument of an instruction we use the arg_lut from constants.py + to identify its position in the encoding, and thus create a multicolumn + entry with the name of the argument as the data. For hardcoded bits, we + do the same where we capture a string of continuous 1s and 0s, identify + the position and assign the same string as the data of the + multicolumn entry in the table. + + """ + column_size = "".join(["p{0.002in}"] * (ilen + 1)) + + type_entries = ( + """ + \\multicolumn{3}{l}{31} & + \\multicolumn{2}{r}{27} & + \\multicolumn{1}{c}{26} & + \\multicolumn{1}{r}{25} & + \\multicolumn{3}{l}{24} & + \\multicolumn{2}{r}{20} & + \\multicolumn{3}{l}{19} & + \\multicolumn{2}{r}{15} & + \\multicolumn{2}{l}{14} & + \\multicolumn{1}{r}{12} & + \\multicolumn{4}{l}{11} & + \\multicolumn{1}{r}{7} & + \\multicolumn{6}{l}{6} & + \\multicolumn{1}{r}{0} \\\\ + \\cline{2-33}\n&\n\n +""" + if ilen == 32 + else """ + \\multicolumn{1}{c}{15} & + \\multicolumn{1}{c}{14} & + \\multicolumn{1}{c}{13} & + \\multicolumn{1}{c}{12} & + \\multicolumn{1}{c}{11} & + \\multicolumn{1}{c}{10} & + \\multicolumn{1}{c}{9} & + \\multicolumn{1}{c}{8} & + \\multicolumn{1}{c}{7} & + \\multicolumn{1}{c}{6} & + \\multicolumn{1}{c}{5} & + \\multicolumn{1}{c}{4} & + \\multicolumn{1}{c}{3} & + \\multicolumn{1}{c}{2} & + \\multicolumn{1}{c}{1} & + \\multicolumn{1}{c}{0} \\\\ + \\cline{2-17}\n&\n\n +""" + ) + + # depending on the type_list input we create a subset dictionary of + # latex_inst_type dictionary present in constants.py + type_dict = { + key: value for key, value in latex_inst_type.items() if key in type_list + } + + # iterate ovr each instruction type and create a table entry + for t in type_dict: + fields = [] + + # first capture all "arguments" of the type (funct3, funct7, rd, etc) + # and capture their positions using arg_lut. + for f in type_dict[t]["variable_fields"]: + (msb, lsb) = arg_lut[f] + name = f if f not in latex_mapping else latex_mapping[f] + fields.append((msb, lsb, name)) + + # iterate through the 32 bits, starting from the msb, and assign + # argument names to the relevant portions of the instructions. This + # information is stored as a 3-element tuple containing the msb, lsb + # position of the arugment and the name of the argument. + msb = ilen - 1 + y = "" + for r in range(0, ilen): + if y != "": + fields.append((msb, ilen - 1 - r + 1, y)) + y = "" + msb = ilen - 1 - r - 1 + if r == 31: + if y != "": + fields.append((msb, 0, y)) + y = "" + + # sort the arguments in decreasing order of msb position + fields.sort(key=lambda y: y[0], reverse=True) + + # for each argument/string of 1s or 0s, create a multicolumn latex table + # entry + entry = "" + for r in range(len(fields)): + (msb, lsb, name) = fields[r] + if r == len(fields) - 1: + entry += ( + f"\\multicolumn{{{msb - lsb + 1}}}{{|c|}}{{{name}}} & {t} \\\\\n" + ) + elif r == 0: + entry += f"\\multicolumn{{{msb - lsb + 1}}}{{|c|}}{{{name}}} &\n" + else: + entry += f"\\multicolumn{{{msb - lsb + 1}}}{{c|}}{{{name}}} &\n" + entry += f"\\cline{{2-{ilen+1}}}\n&\n\n" + type_entries += entry + + # for each entry in the dataset create a table + content = "" + for ext_list, title, filter_list, include_pseudo in dataset: + instr_dict = {} + + # for all extensions list in ext_list, create a dictionary of + # instructions associated with those extensions. + for e in ext_list: + instr_dict.update(create_inst_dict(["rv" + e], include_pseudo)) + + # if filter_list is not empty then use that as the official set of + # instructions that need to be dumped into the latex table + inst_list = list(instr_dict.keys()) if not filter_list else filter_list + + # for each instruction create an latex table entry just like how we did + # above with the instruction-type table. + instr_entries = "" + for inst in inst_list: + if inst not in instr_dict: + logging.error( + f"in make_ext_latex_table: Instruction: {inst} not found in instr_dict" + ) + raise SystemExit(1) + fields = [] + + # only if the argument is available in arg_lut we consume it, else + # throw error. + for f in instr_dict[inst]["variable_fields"]: + if f not in arg_lut: + logging.error( + f"Found variable {f} in instruction {inst} whose mapping is not available" + ) + raise SystemExit(1) + (msb, lsb) = arg_lut[f] + name = ( + f.replace("_", ".") if f not in latex_mapping else latex_mapping[f] + ) + fields.append((msb, lsb, name)) + + msb = ilen - 1 + y = "" + if ilen == 16: + encoding = instr_dict[inst]["encoding"][16:] + else: + encoding = instr_dict[inst]["encoding"] + for r in range(0, ilen): + x = encoding[r] + if ((msb, ilen - 1 - r + 1)) in latex_fixed_fields: + fields.append((msb, ilen - 1 - r + 1, y)) + msb = ilen - 1 - r + y = "" + if x == "-": + if y != "": + fields.append((msb, ilen - 1 - r + 1, y)) + y = "" + msb = ilen - 1 - r - 1 + else: + y += str(x) + if r == ilen - 1: + if y != "": + fields.append((msb, 0, y)) + y = "" + + fields.sort(key=lambda y: y[0], reverse=True) + entry = "" + for r in range(len(fields)): + (msb, lsb, name) = fields[r] + if r == len(fields) - 1: + entry += f'\\multicolumn{{{msb - lsb + 1}}}{{|c|}}{{{name}}} & {inst.upper().replace("_",".")} \\\\\n' + elif r == 0: + entry += f"\\multicolumn{{{msb - lsb + 1}}}{{|c|}}{{{name}}} &\n" + else: + entry += f"\\multicolumn{{{msb - lsb + 1}}}{{c|}}{{{name}}} &\n" + entry += f"\\cline{{2-{ilen+1}}}\n&\n\n" + instr_entries += entry + + # once an entry of the dataset is completed we create the whole table + # with the title of that dataset as sub-heading (sort-of) + if title != "": + content += f""" + +\\multicolumn{{{ilen}}}{{c}}{{}} & \\\\ +\\multicolumn{{{ilen}}}{{c}}{{\\bf {title} }} & \\\\ +\\cline{{2-{ilen+1}}} + + & +{instr_entries} +""" + else: + content += f""" +{instr_entries} +""" + + header = f""" +\\newpage + +\\begin{{table}}[p] +\\begin{{small}} +\\begin{{center}} + \\begin{{tabular}} {{{column_size}l}} + {" ".join(['&']*ilen)} \\\\ + + & +{type_entries} +""" + endtable = f""" + +\\end{{tabular}} +\\end{{center}} +\\end{{small}} +{caption} +\\end{{table}} +""" + # dump the contents and return + latex_file.write(header + content + endtable) diff --git a/parse.py b/parse.py index 17cd5a2f..9677ed6d 100755 --- a/parse.py +++ b/parse.py @@ -1,1212 +1,46 @@ #!/usr/bin/env python3 - import collections -import copy -import glob import logging -import os import pprint -import re import sys import yaml +from c_utils import * +from chisel_utils import * from constants import * +from go_utils import * +from latex_utils import * +from rust_utils import * +from shared_utils import * +from sverilog_utils import * -pp = pprint.PrettyPrinter(indent=2) -logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s") - - -def process_enc_line(line, ext): - """ - This function processes each line of the encoding files (rv*). As part of - the processing, the function ensures that the encoding is legal through the - following checks:: - - - there is no over specification (same bits assigned different values) - - there is no under specification (some bits not assigned values) - - bit ranges are in the format hi..lo=val where hi > lo - - value assigned is representable in the bit range - - also checks that the mapping of arguments of an instruction exists in - arg_lut. - - If the above checks pass, then the function returns a tuple of the name and - a dictionary containing basic information of the instruction which includes: - - variables: list of arguments used by the instruction whose mapping - exists in the arg_lut dictionary - - encoding: this contains the 32-bit encoding of the instruction where - '-' is used to represent position of arguments and 1/0 is used to - reprsent the static encoding of the bits - - extension: this field contains the rv* filename from which this - instruction was included - - match: hex value representing the bits that need to match to detect - this instruction - - mask: hex value representin the bits that need to be masked to extract - the value required for matching. - """ - single_dict = {} - - # fill all bits with don't care. we use '-' to represent don't care - # TODO: hardcoded for 32-bits. - encoding = ["-"] * 32 - - # get the name of instruction by splitting based on the first space - [name, remaining] = line.split(" ", 1) - - # replace dots with underscores as dot doesn't work with C/Sverilog, etc - name = name.replace(".", "_") - - # remove leading whitespaces - remaining = remaining.lstrip() - - # check each field for it's length and overlapping bits - # ex: 1..0=5 will result in an error --> x overlapping bits - for s2, s1, entry in fixed_ranges.findall(remaining): - msb = int(s2) - lsb = int(s1) - - # check msb < lsb - if msb < lsb: - logging.error( - f'{line.split(" ")[0]:<10} has position {msb} less than position {lsb} in it\'s encoding' - ) - raise SystemExit(1) - - # illegal value assigned as per bit width - entry_value = int(entry, 0) - if entry_value >= (1 << (msb - lsb + 1)): - logging.error( - f'{line.split(" ")[0]:<10} has an illegal value {entry_value} assigned as per the bit width {msb - lsb}' - ) - raise SystemExit(1) - - for ind in range(lsb, msb + 1): - # overlapping bits - if encoding[31 - ind] != "-": - logging.error( - f'{line.split(" ")[0]:<10} has {ind} bit overlapping in it\'s opcodes' - ) - raise SystemExit(1) - bit = str((entry_value >> (ind - lsb)) & 1) - encoding[31 - ind] = bit - - # extract bit pattern assignments of the form hi..lo=val - remaining = fixed_ranges.sub(" ", remaining) - - # do the same as above but for = pattern. single_fixed is a regex - # expression present in constants.py - for lsb, value, drop in single_fixed.findall(remaining): - lsb = int(lsb, 0) - value = int(value, 0) - if encoding[31 - lsb] != "-": - logging.error( - f'{line.split(" ")[0]:<10} has {lsb} bit overlapping in it\'s opcodes' - ) - raise SystemExit(1) - encoding[31 - lsb] = str(value) - - # convert the list of encodings into a single string for match and mask - match = "".join(encoding).replace("-", "0") - mask = "".join(encoding).replace("0", "1").replace("-", "0") - - # check if all args of the instruction are present in arg_lut present in - # constants.py - args = single_fixed.sub(" ", remaining).split() - encoding_args = encoding.copy() - for a in args: - if a not in arg_lut: - parts = a.split("=") - if len(parts) == 2: - existing_arg, new_arg = parts - if existing_arg in arg_lut: - arg_lut[a] = arg_lut[existing_arg] - - else: - logging.error( - f" Found field {existing_arg} in variable {a} in instruction {name} whose mapping in arg_lut does not exist" - ) - raise SystemExit(1) - else: - logging.error( - f" Found variable {a} in instruction {name} whose mapping in arg_lut does not exist" - ) - raise SystemExit(1) - (msb, lsb) = arg_lut[a] - for ind in range(lsb, msb + 1): - # overlapping bits - if encoding_args[31 - ind] != "-": - logging.error( - f" Found variable {a} in instruction {name} overlapping {encoding_args[31 - ind]} variable in bit {ind}" - ) - raise SystemExit(1) - encoding_args[31 - ind] = a - - # update the fields of the instruction as a dict and return back along with - # the name of the instruction - single_dict["encoding"] = "".join(encoding) - single_dict["variable_fields"] = args - single_dict["extension"] = [os.path.basename(ext)] - single_dict["match"] = hex(int(match, 2)) - single_dict["mask"] = hex(int(mask, 2)) - - return (name, single_dict) - - -def same_base_isa(ext_name, ext_name_list): - type1 = ext_name.split("_")[0] - for ext_name1 in ext_name_list: - type2 = ext_name1.split("_")[0] - # "rv" mean insn for rv32 and rv64 - if ( - type1 == type2 - or (type2 == "rv" and (type1 == "rv32" or type1 == "rv64")) - or (type1 == "rv" and (type2 == "rv32" or type2 == "rv64")) - ): - return True - return False - - -def overlaps(x, y): - x = x.rjust(len(y), "-") - y = y.rjust(len(x), "-") - - for i in range(0, len(x)): - if not (x[i] == "-" or y[i] == "-" or x[i] == y[i]): - return False - - return True - - -def overlap_allowed(a, x, y): - return x in a and y in a[x] or y in a and x in a[y] - - -def extension_overlap_allowed(x, y): - return overlap_allowed(overlapping_extensions, x, y) - - -def instruction_overlap_allowed(x, y): - return overlap_allowed(overlapping_instructions, x, y) - - -def add_segmented_vls_insn(instr_dict): - updated_dict = {} - for k, v in instr_dict.items(): - if "nf" in v["variable_fields"]: - for new_key, new_value in expand_nf_field(k, v): - updated_dict[new_key] = new_value - else: - updated_dict[k] = v - return updated_dict - - -def expand_nf_field(name, single_dict): - if "nf" not in single_dict["variable_fields"]: - logging.error(f"Cannot expand nf field for instruction {name}") - raise SystemExit(1) - - # nf no longer a variable field - single_dict["variable_fields"].remove("nf") - # include nf in mask - single_dict["mask"] = hex(int(single_dict["mask"], 16) | 0b111 << 29) - - name_expand_index = name.find("e") - expanded_instructions = [] - for nf in range(0, 8): - new_single_dict = copy.deepcopy(single_dict) - new_single_dict["match"] = hex(int(single_dict["match"], 16) | nf << 29) - new_single_dict["encoding"] = format(nf, "03b") + single_dict["encoding"][3:] - new_name = ( - name - if nf == 0 - else name[:name_expand_index] - + "seg" - + str(nf + 1) - + name[name_expand_index:] - ) - expanded_instructions.append((new_name, new_single_dict)) - return expanded_instructions - - -def create_inst_dict(file_filter, include_pseudo=False, include_pseudo_ops=[]): - """ - This function return a dictionary containing all instructions associated - with an extension defined by the file_filter input. The file_filter input - needs to be rv* file name with out the 'rv' prefix i.e. '_i', '32_i', etc. - - Each node of the dictionary will correspond to an instruction which again is - a dictionary. The dictionary contents of each instruction includes: - - variables: list of arguments used by the instruction whose mapping - exists in the arg_lut dictionary - - encoding: this contains the 32-bit encoding of the instruction where - '-' is used to represent position of arguments and 1/0 is used to - reprsent the static encoding of the bits - - extension: this field contains the rv* filename from which this - instruction was included - - match: hex value representing the bits that need to match to detect - this instruction - - mask: hex value representin the bits that need to be masked to extract - the value required for matching. - - In order to build this dictionary, the function does 2 passes over the same - rv file. The first pass is to extract all standard - instructions. In this pass, all pseudo ops and imported instructions are - skipped. For each selected line of the file, we call process_enc_line - function to create the above mentioned dictionary contents of the - instruction. Checks are performed in this function to ensure that the same - instruction is not added twice to the overall dictionary. - - In the second pass, this function parses only pseudo_ops. For each pseudo_op - this function checks if the dependent extension and instruction, both, exist - before parsing it. The pseudo op is only added to the overall dictionary if - the dependent instruction is not present in the dictionary, else it is - skipped. - - - """ - opcodes_dir = os.path.dirname(os.path.realpath(__file__)) - instr_dict = {} - - # file_names contains all files to be parsed in the riscv-opcodes directory - file_names = [] - for fil in file_filter: - file_names += glob.glob(f"{opcodes_dir}/{fil}") - file_names.sort(reverse=True) - # first pass if for standard/regular instructions - logging.debug("Collecting standard instructions first") - for f in file_names: - logging.debug(f"Parsing File: {f} for standard instructions") - with open(f) as fp: - lines = (line.rstrip() for line in fp) # All lines including the blank ones - lines = list(line for line in lines if line) # Non-blank lines - lines = list( - line for line in lines if not line.startswith("#") - ) # remove comment lines - - # go through each line of the file - for line in lines: - # if the an instruction needs to be imported then go to the - # respective file and pick the line that has the instruction. - # The variable 'line' will now point to the new line from the - # imported file - - # ignore all lines starting with $import and $pseudo - if "$import" in line or "$pseudo" in line: - continue - logging.debug(f" Processing line: {line}") - - # call process_enc_line to get the data about the current - # instruction - (name, single_dict) = process_enc_line(line, f) - ext_name = os.path.basename(f) - - # if an instruction has already been added to the filtered - # instruction dictionary throw an error saying the given - # instruction is already imported and raise SystemExit - if name in instr_dict: - var = instr_dict[name]["extension"] - if same_base_isa(ext_name, var): - # disable same names on the same base ISA - err_msg = f"instruction : {name} from " - err_msg += f"{ext_name} is already " - err_msg += f"added from {var} in same base ISA" - logging.error(err_msg) - raise SystemExit(1) - elif instr_dict[name]["encoding"] != single_dict["encoding"]: - # disable same names with different encodings on different base ISAs - err_msg = f"instruction : {name} from " - err_msg += f"{ext_name} is already " - err_msg += f"added from {var} but each have different encodings in different base ISAs" - logging.error(err_msg) - raise SystemExit(1) - instr_dict[name]["extension"].extend(single_dict["extension"]) - else: - for key in instr_dict: - item = instr_dict[key] - if ( - overlaps(item["encoding"], single_dict["encoding"]) - and not extension_overlap_allowed( - ext_name, item["extension"][0] - ) - and not instruction_overlap_allowed(name, key) - and same_base_isa(ext_name, item["extension"]) - ): - # disable different names with overlapping encodings on the same base ISA - err_msg = f"instruction : {name} in extension " - err_msg += f"{ext_name} overlaps instruction {key} " - err_msg += f'in extension {item["extension"]}' - logging.error(err_msg) - raise SystemExit(1) - - if name not in instr_dict: - # update the final dict with the instruction - instr_dict[name] = single_dict - - # second pass if for pseudo instructions - logging.debug("Collecting pseudo instructions now") - for f in file_names: - logging.debug(f"Parsing File: {f} for pseudo_ops") - with open(f) as fp: - lines = (line.rstrip() for line in fp) # All lines including the blank ones - lines = list(line for line in lines if line) # Non-blank lines - lines = list( - line for line in lines if not line.startswith("#") - ) # remove comment lines - - # go through each line of the file - for line in lines: - - # ignore all lines not starting with $pseudo - if "$pseudo" not in line: - continue - logging.debug(f" Processing line: {line}") - - # use the regex pseudo_regex from constants.py to find the dependent - # extension, dependent instruction, the pseudo_op in question and - # its encoding - (ext, orig_inst, pseudo_inst, line) = pseudo_regex.findall(line)[0] - ext_file = f"{opcodes_dir}/{ext}" - - # check if the file of the dependent extension exist. Throw error if - # it doesn't - if not os.path.exists(ext_file): - ext1_file = f"{opcodes_dir}/unratified/{ext}" - if not os.path.exists(ext1_file): - logging.error( - f"Pseudo op {pseudo_inst} in {f} depends on {ext} which is not available" - ) - raise SystemExit(1) - else: - ext_file = ext1_file - - # check if the dependent instruction exist in the dependent - # extension. Else throw error. - found = False - for oline in open(ext_file): - if not re.findall(f"^\\s*{orig_inst}\\s+", oline): - continue - else: - found = True - break - if not found: - logging.error( - f"Orig instruction {orig_inst} not found in {ext}. Required by pseudo_op {pseudo_inst} present in {f}" - ) - raise SystemExit(1) - - (name, single_dict) = process_enc_line(pseudo_inst + " " + line, f) - # add the pseudo_op to the dictionary only if the original - # instruction is not already in the dictionary. - if ( - orig_inst.replace(".", "_") not in instr_dict - or include_pseudo - or name in include_pseudo_ops - ): - - # update the final dict with the instruction - if name not in instr_dict: - instr_dict[name] = single_dict - logging.debug(f" including pseudo_ops:{name}") - else: - if single_dict["match"] != instr_dict[name]["match"]: - instr_dict[name + "_pseudo"] = single_dict - - # if a pseudo instruction has already been added to the filtered - # instruction dictionary but the extension is not in the current - # list, add it - else: - ext_name = single_dict["extension"] - - if (ext_name not in instr_dict[name]["extension"]) & ( - name + "_pseudo" not in instr_dict - ): - instr_dict[name]["extension"].extend(ext_name) - else: - logging.debug( - f" Skipping pseudo_op {pseudo_inst} since original instruction {orig_inst} already selected in list" - ) - - # third pass if for imported instructions - logging.debug("Collecting imported instructions") - for f in file_names: - logging.debug(f"Parsing File: {f} for imported ops") - with open(f) as fp: - lines = (line.rstrip() for line in fp) # All lines including the blank ones - lines = list(line for line in lines if line) # Non-blank lines - lines = list( - line for line in lines if not line.startswith("#") - ) # remove comment lines - - # go through each line of the file - for line in lines: - # if the an instruction needs to be imported then go to the - # respective file and pick the line that has the instruction. - # The variable 'line' will now point to the new line from the - # imported file - - # ignore all lines starting with $import and $pseudo - if "$import" not in line: - continue - logging.debug(f" Processing line: {line}") - - (import_ext, reg_instr) = imported_regex.findall(line)[0] - import_ext_file = f"{opcodes_dir}/{import_ext}" - - # check if the file of the dependent extension exist. Throw error if - # it doesn't - if not os.path.exists(import_ext_file): - ext1_file = f"{opcodes_dir}/unratified/{import_ext}" - if not os.path.exists(ext1_file): - logging.error( - f"Instruction {reg_instr} in {f} cannot be imported from {import_ext}" - ) - raise SystemExit(1) - else: - ext_file = ext1_file - else: - ext_file = import_ext_file - - # check if the dependent instruction exist in the dependent - # extension. Else throw error. - found = False - for oline in open(ext_file): - if not re.findall(f"^\\s*{reg_instr}\\s+", oline): - continue - else: - found = True - break - if not found: - logging.error( - f"imported instruction {reg_instr} not found in {ext_file}. Required by {line} present in {f}" - ) - logging.error(f"Note: you cannot import pseudo/imported ops.") - raise SystemExit(1) - - # call process_enc_line to get the data about the current - # instruction - (name, single_dict) = process_enc_line(oline, f) - - # if an instruction has already been added to the filtered - # instruction dictionary throw an error saying the given - # instruction is already imported and raise SystemExit - if name in instr_dict: - var = instr_dict[name]["extension"] - if instr_dict[name]["encoding"] != single_dict["encoding"]: - err_msg = f"imported instruction : {name} in " - err_msg += f"{os.path.basename(f)} is already " - err_msg += f"added from {var} but each have different encodings for the same instruction" - logging.error(err_msg) - raise SystemExit(1) - instr_dict[name]["extension"].extend(single_dict["extension"]) - else: - # update the final dict with the instruction - instr_dict[name] = single_dict - return instr_dict - - -def make_priv_latex_table(): - latex_file = open("priv-instr-table.tex", "w") - type_list = ["R-type", "I-type"] - system_instr = ["_h", "_s", "_system", "_svinval", "64_h", "_svinval_h"] - dataset_list = [(system_instr, "Trap-Return Instructions", ["sret", "mret"], False)] - dataset_list.append( - (system_instr, "Interrupt-Management Instructions", ["wfi"], False) - ) - dataset_list.append( - ( - system_instr, - "Supervisor Memory-Management Instructions", - ["sfence_vma"], - False, - ) - ) - dataset_list.append( - ( - system_instr, - "Hypervisor Memory-Management Instructions", - ["hfence_vvma", "hfence_gvma"], - False, - ) - ) - dataset_list.append( - ( - system_instr, - "Hypervisor Virtual-Machine Load and Store Instructions", - [ - "hlv_b", - "hlv_bu", - "hlv_h", - "hlv_hu", - "hlv_w", - "hlvx_hu", - "hlvx_wu", - "hsv_b", - "hsv_h", - "hsv_w", - ], - False, - ) - ) - dataset_list.append( - ( - system_instr, - "Hypervisor Virtual-Machine Load and Store Instructions, RV64 only", - ["hlv_wu", "hlv_d", "hsv_d"], - False, - ) - ) - dataset_list.append( - ( - system_instr, - "Svinval Memory-Management Instructions", - [ - "sinval_vma", - "sfence_w_inval", - "sfence_inval_ir", - "hinval_vvma", - "hinval_gvma", - ], - False, - ) - ) - caption = "\\caption{RISC-V Privileged Instructions}" - make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) - - latex_file.close() - - -def make_latex_table(): - """ - This function is mean to create the instr-table.tex that is meant to be used - by the riscv-isa-manual. This function basically creates a single latext - file of multiple tables with each table limited to a single page. Only the - last table is assigned a latex-caption. - - For each table we assign a type-list which capture the different instruction - types (R, I, B, etc) that will be required for the table. Then we select the - list of extensions ('_i, '32_i', etc) whose instructions are required to - populate the table. For each extension or collection of extension we can - assign Title, such that in the end they appear as subheadings within - the table (note these are inlined headings and not captions of the table). - - All of the above information is collected/created and sent to - make_ext_latex_table function to dump out the latex contents into a file. - - The last table only has to be given a caption - as per the policy of the - riscv-isa-manual. - """ - # open the file and use it as a pointer for all further dumps - latex_file = open("instr-table.tex", "w") - - # create the rv32i table first. Here we set the caption to empty. We use the - # files rv_i and rv32_i to capture instructions relevant for rv32i - # configuration. The dataset is a list of 4-element tuples : - # (list_of_extensions, title, list_of_instructions, include_pseudo_ops). If list_of_instructions - # is empty then it indicates that all instructions of the all the extensions - # in list_of_extensions need to be dumped. If not empty, then only the - # instructions listed in list_of_instructions will be dumped into latex. - caption = "" - type_list = ["R-type", "I-type", "S-type", "B-type", "U-type", "J-type"] - dataset_list = [(["_i", "32_i"], "RV32I Base Instruction Set", [], False)] - dataset_list.append((["_i"], "", ["fence_tso", "pause"], True)) - make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) - - type_list = ["R-type", "I-type", "S-type"] - dataset_list = [ - (["64_i"], "RV64I Base Instruction Set (in addition to RV32I)", [], False) - ] - dataset_list.append( - (["_zifencei"], "RV32/RV64 Zifencei Standard Extension", [], False) - ) - dataset_list.append((["_zicsr"], "RV32/RV64 Zicsr Standard Extension", [], False)) - dataset_list.append((["_m", "32_m"], "RV32M Standard Extension", [], False)) - dataset_list.append( - (["64_m"], "RV64M Standard Extension (in addition to RV32M)", [], False) - ) - make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) - - type_list = ["R-type"] - dataset_list = [(["_a"], "RV32A Standard Extension", [], False)] - dataset_list.append( - (["64_a"], "RV64A Standard Extension (in addition to RV32A)", [], False) - ) - make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) - - type_list = ["R-type", "R4-type", "I-type", "S-type"] - dataset_list = [(["_f"], "RV32F Standard Extension", [], False)] - dataset_list.append( - (["64_f"], "RV64F Standard Extension (in addition to RV32F)", [], False) - ) - make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) - - type_list = ["R-type", "R4-type", "I-type", "S-type"] - dataset_list = [(["_d"], "RV32D Standard Extension", [], False)] - dataset_list.append( - (["64_d"], "RV64D Standard Extension (in addition to RV32D)", [], False) - ) - make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) - - type_list = ["R-type", "R4-type", "I-type", "S-type"] - dataset_list = [(["_q"], "RV32Q Standard Extension", [], False)] - dataset_list.append( - (["64_q"], "RV64Q Standard Extension (in addition to RV32Q)", [], False) - ) - make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) - - caption = "\\caption{Instruction listing for RISC-V}" - type_list = ["R-type", "R4-type", "I-type", "S-type"] - dataset_list = [ - (["_zfh", "_d_zfh", "_q_zfh"], "RV32Zfh Standard Extension", [], False) - ] - dataset_list.append( - (["64_zfh"], "RV64Zfh Standard Extension (in addition to RV32Zfh)", [], False) - ) - make_ext_latex_table(type_list, dataset_list, latex_file, 32, caption) - - ## The following is demo to show that Compressed instructions can also be - # dumped in the same manner as above - - # type_list = [''] - # dataset_list = [(['_c', '32_c', '32_c_f','_c_d'],'RV32C Standard Extension', [])] - # dataset_list.append((['64_c'],'RV64C Standard Extension (in addition to RV32C)', [])) - # make_ext_latex_table(type_list, dataset_list, latex_file, 16, caption) - - latex_file.close() - - -def make_ext_latex_table(type_list, dataset, latex_file, ilen, caption): - """ - For a given collection of extensions this function dumps out a complete - latex table which includes the encodings of the instructions. - - The ilen input indicates the length of the instruction for which the table - is created. - - The caption input is used to create the latex-table caption. - - The type_list input is a list of instruction types (R, I, B, etc) that are - treated as header for each table. Each table will have its own requirements - and type_list must include all the instruction-types that the table needs. - Note, all elements of this list must be present in the latex_inst_type - dictionary defined in constants.py - - The latex_file is a file pointer to which the latex-table will dumped into - - The dataset is a list of 3-element tuples containing: - (list_of_extensions, title, list_of_instructions) - The list_of_extensions must contain all the set of extensions whose - instructions must be populated under a given title. If list_of_instructions - is not empty, then only those instructions mentioned in list_of_instructions - present in the extension will be dumped into the latex-table, other - instructions will be ignored. - - Once the above inputs are received then function first creates table entries - for the instruction types. To simplify things, we maintain a dictionary - called latex_inst_type in constants.py which is created in the same way the - instruction dictionary is created. This allows us to re-use the same logic - to create the instruction types table as well - - Once the header is created, we then parse through every entry in the - dataset. For each list dataset entry we use the create_inst_dict function to - create an exhaustive list of instructions associated with the respective - collection of the extension of that dataset. Then we apply the instruction - filter, if any, indicated by the list_of_instructions of that dataset. - Thereon, for each instruction we create a latex table entry. - - Latex table specification for ilen sized instructions: - Each table is created with ilen+1 columns - ilen columns for each bit of the - instruction and one column to hold the name of the instruction. - - For each argument of an instruction we use the arg_lut from constants.py - to identify its position in the encoding, and thus create a multicolumn - entry with the name of the argument as the data. For hardcoded bits, we - do the same where we capture a string of continuous 1s and 0s, identify - the position and assign the same string as the data of the - multicolumn entry in the table. - - """ - column_size = "".join(["p{0.002in}"] * (ilen + 1)) - - type_entries = ( - """ - \\multicolumn{3}{l}{31} & - \\multicolumn{2}{r}{27} & - \\multicolumn{1}{c}{26} & - \\multicolumn{1}{r}{25} & - \\multicolumn{3}{l}{24} & - \\multicolumn{2}{r}{20} & - \\multicolumn{3}{l}{19} & - \\multicolumn{2}{r}{15} & - \\multicolumn{2}{l}{14} & - \\multicolumn{1}{r}{12} & - \\multicolumn{4}{l}{11} & - \\multicolumn{1}{r}{7} & - \\multicolumn{6}{l}{6} & - \\multicolumn{1}{r}{0} \\\\ - \\cline{2-33}\n&\n\n -""" - if ilen == 32 - else """ - \\multicolumn{1}{c}{15} & - \\multicolumn{1}{c}{14} & - \\multicolumn{1}{c}{13} & - \\multicolumn{1}{c}{12} & - \\multicolumn{1}{c}{11} & - \\multicolumn{1}{c}{10} & - \\multicolumn{1}{c}{9} & - \\multicolumn{1}{c}{8} & - \\multicolumn{1}{c}{7} & - \\multicolumn{1}{c}{6} & - \\multicolumn{1}{c}{5} & - \\multicolumn{1}{c}{4} & - \\multicolumn{1}{c}{3} & - \\multicolumn{1}{c}{2} & - \\multicolumn{1}{c}{1} & - \\multicolumn{1}{c}{0} \\\\ - \\cline{2-17}\n&\n\n -""" - ) - - # depending on the type_list input we create a subset dictionary of - # latex_inst_type dictionary present in constants.py - type_dict = { - key: value for key, value in latex_inst_type.items() if key in type_list - } - - # iterate ovr each instruction type and create a table entry - for t in type_dict: - fields = [] - - # first capture all "arguments" of the type (funct3, funct7, rd, etc) - # and capture their positions using arg_lut. - for f in type_dict[t]["variable_fields"]: - (msb, lsb) = arg_lut[f] - name = f if f not in latex_mapping else latex_mapping[f] - fields.append((msb, lsb, name)) - - # iterate through the 32 bits, starting from the msb, and assign - # argument names to the relevant portions of the instructions. This - # information is stored as a 3-element tuple containing the msb, lsb - # position of the arugment and the name of the argument. - msb = ilen - 1 - y = "" - for r in range(0, ilen): - if y != "": - fields.append((msb, ilen - 1 - r + 1, y)) - y = "" - msb = ilen - 1 - r - 1 - if r == 31: - if y != "": - fields.append((msb, 0, y)) - y = "" - - # sort the arguments in decreasing order of msb position - fields.sort(key=lambda y: y[0], reverse=True) - - # for each argument/string of 1s or 0s, create a multicolumn latex table - # entry - entry = "" - for r in range(len(fields)): - (msb, lsb, name) = fields[r] - if r == len(fields) - 1: - entry += ( - f"\\multicolumn{{{msb - lsb + 1}}}{{|c|}}{{{name}}} & {t} \\\\\n" - ) - elif r == 0: - entry += f"\\multicolumn{{{msb - lsb + 1}}}{{|c|}}{{{name}}} &\n" - else: - entry += f"\\multicolumn{{{msb - lsb + 1}}}{{c|}}{{{name}}} &\n" - entry += f"\\cline{{2-{ilen+1}}}\n&\n\n" - type_entries += entry - - # for each entry in the dataset create a table - content = "" - for ext_list, title, filter_list, include_pseudo in dataset: - instr_dict = {} - - # for all extensions list in ext_list, create a dictionary of - # instructions associated with those extensions. - for e in ext_list: - instr_dict.update(create_inst_dict(["rv" + e], include_pseudo)) - - # if filter_list is not empty then use that as the official set of - # instructions that need to be dumped into the latex table - inst_list = list(instr_dict.keys()) if not filter_list else filter_list - - # for each instruction create an latex table entry just like how we did - # above with the instruction-type table. - instr_entries = "" - for inst in inst_list: - if inst not in instr_dict: - logging.error( - f"in make_ext_latex_table: Instruction: {inst} not found in instr_dict" - ) - raise SystemExit(1) - fields = [] - - # only if the argument is available in arg_lut we consume it, else - # throw error. - for f in instr_dict[inst]["variable_fields"]: - if f not in arg_lut: - logging.error( - f"Found variable {f} in instruction {inst} whose mapping is not available" - ) - raise SystemExit(1) - (msb, lsb) = arg_lut[f] - name = ( - f.replace("_", ".") if f not in latex_mapping else latex_mapping[f] - ) - fields.append((msb, lsb, name)) - - msb = ilen - 1 - y = "" - if ilen == 16: - encoding = instr_dict[inst]["encoding"][16:] - else: - encoding = instr_dict[inst]["encoding"] - for r in range(0, ilen): - x = encoding[r] - if ((msb, ilen - 1 - r + 1)) in latex_fixed_fields: - fields.append((msb, ilen - 1 - r + 1, y)) - msb = ilen - 1 - r - y = "" - if x == "-": - if y != "": - fields.append((msb, ilen - 1 - r + 1, y)) - y = "" - msb = ilen - 1 - r - 1 - else: - y += str(x) - if r == ilen - 1: - if y != "": - fields.append((msb, 0, y)) - y = "" - - fields.sort(key=lambda y: y[0], reverse=True) - entry = "" - for r in range(len(fields)): - (msb, lsb, name) = fields[r] - if r == len(fields) - 1: - entry += f'\\multicolumn{{{msb - lsb + 1}}}{{|c|}}{{{name}}} & {inst.upper().replace("_",".")} \\\\\n' - elif r == 0: - entry += f"\\multicolumn{{{msb - lsb + 1}}}{{|c|}}{{{name}}} &\n" - else: - entry += f"\\multicolumn{{{msb - lsb + 1}}}{{c|}}{{{name}}} &\n" - entry += f"\\cline{{2-{ilen+1}}}\n&\n\n" - instr_entries += entry - - # once an entry of the dataset is completed we create the whole table - # with the title of that dataset as sub-heading (sort-of) - if title != "": - content += f""" - -\\multicolumn{{{ilen}}}{{c}}{{}} & \\\\ -\\multicolumn{{{ilen}}}{{c}}{{\\bf {title} }} & \\\\ -\\cline{{2-{ilen+1}}} - - & -{instr_entries} -""" - else: - content += f""" -{instr_entries} -""" - - header = f""" -\\newpage - -\\begin{{table}}[p] -\\begin{{small}} -\\begin{{center}} - \\begin{{tabular}} {{{column_size}l}} - {" ".join(['&']*ilen)} \\\\ - - & -{type_entries} -""" - endtable = f""" - -\\end{{tabular}} -\\end{{center}} -\\end{{small}} -{caption} -\\end{{table}} -""" - # dump the contents and return - latex_file.write(header + content + endtable) - - -def instr_dict_2_extensions(instr_dict): - extensions = [] - for item in instr_dict.values(): - if item["extension"][0] not in extensions: - extensions.append(item["extension"][0]) - return extensions - - -def make_chisel(instr_dict, spinal_hdl=False): - - chisel_names = "" - cause_names_str = "" - csr_names_str = "" - for i in instr_dict: - if spinal_hdl: - chisel_names += f' def {i.upper().replace(".","_"):<18s} = M"b{instr_dict[i]["encoding"].replace("-","-")}"\n' - # else: - # chisel_names += f' def {i.upper().replace(".","_"):<18s} = BitPat("b{instr_dict[i]["encoding"].replace("-","?")}")\n' - if not spinal_hdl: - extensions = instr_dict_2_extensions(instr_dict) - for e in extensions: - e_instrs = filter(lambda i: instr_dict[i]["extension"][0] == e, instr_dict) - if "rv64_" in e: - e_format = e.replace("rv64_", "").upper() + "64" - elif "rv32_" in e: - e_format = e.replace("rv32_", "").upper() + "32" - elif "rv_" in e: - e_format = e.replace("rv_", "").upper() - else: - e_format = e.upper - chisel_names += f' val {e_format+"Type"} = Map(\n' - for instr in e_instrs: - tmp_instr_name = '"' + instr.upper().replace(".", "_") + '"' - chisel_names += f' {tmp_instr_name:<18s} -> BitPat("b{instr_dict[instr]["encoding"].replace("-","?")}"),\n' - chisel_names += f" )\n" - - for num, name in causes: - cause_names_str += f' val {name.lower().replace(" ","_")} = {hex(num)}\n' - cause_names_str += """ val all = { - val res = collection.mutable.ArrayBuffer[Int]() -""" - for num, name in causes: - cause_names_str += f' res += {name.lower().replace(" ","_")}\n' - cause_names_str += """ res.toArray - }""" - - for num, name in csrs + csrs32: - csr_names_str += f" val {name} = {hex(num)}\n" - csr_names_str += """ val all = { - val res = collection.mutable.ArrayBuffer[Int]() -""" - for num, name in csrs: - csr_names_str += f""" res += {name}\n""" - csr_names_str += """ res.toArray - } - val all32 = { - val res = collection.mutable.ArrayBuffer(all:_*) -""" - for num, name in csrs32: - csr_names_str += f""" res += {name}\n""" - csr_names_str += """ res.toArray - }""" - - if spinal_hdl: - chisel_file = open("inst.spinalhdl", "w") - else: - chisel_file = open("inst.chisel", "w") - chisel_file.write( - f""" -/* Automatically generated by parse_opcodes */ -object Instructions {{ -{chisel_names} -}} -object Causes {{ -{cause_names_str} -}} -object CSRs {{ -{csr_names_str} -}} -""" - ) - chisel_file.close() - - -def make_rust(instr_dict): - mask_match_str = "" - for i in instr_dict: - mask_match_str += f'const MATCH_{i.upper().replace(".","_")}: u32 = {(instr_dict[i]["match"])};\n' - mask_match_str += f'const MASK_{i.upper().replace(".","_")}: u32 = {(instr_dict[i]["mask"])};\n' - for num, name in csrs + csrs32: - mask_match_str += f"const CSR_{name.upper()}: u16 = {hex(num)};\n" - for num, name in causes: - mask_match_str += ( - f'const CAUSE_{name.upper().replace(" ","_")}: u8 = {hex(num)};\n' - ) - rust_file = open("inst.rs", "w") - rust_file.write( - f""" -/* Automatically generated by parse_opcodes */ -{mask_match_str} -""" - ) - rust_file.close() - - -def make_sverilog(instr_dict): - names_str = "" - for i in instr_dict: - names_str += f" localparam [31:0] {i.upper().replace('.','_'):<18s} = 32'b{instr_dict[i]['encoding'].replace('-','?')};\n" - names_str += " /* CSR Addresses */\n" - for num, name in csrs + csrs32: - names_str += ( - f" localparam logic [11:0] CSR_{name.upper()} = 12'h{hex(num)[2:]};\n" - ) - - sverilog_file = open("inst.sverilog", "w") - sverilog_file.write( - f""" -/* Automatically generated by parse_opcodes */ -package riscv_instr; -{names_str} -endpackage -""" - ) - sverilog_file.close() - - -def make_c(instr_dict): - mask_match_str = "" - declare_insn_str = "" - for i in instr_dict: - mask_match_str += ( - f'#define MATCH_{i.upper().replace(".","_")} {instr_dict[i]["match"]}\n' - ) - mask_match_str += ( - f'#define MASK_{i.upper().replace(".","_")} {instr_dict[i]["mask"]}\n' - ) - declare_insn_str += f'DECLARE_INSN({i.replace(".","_")}, MATCH_{i.upper().replace(".","_")}, MASK_{i.upper().replace(".","_")})\n' - - csr_names_str = "" - declare_csr_str = "" - for num, name in csrs + csrs32: - csr_names_str += f"#define CSR_{name.upper()} {hex(num)}\n" - declare_csr_str += f"DECLARE_CSR({name}, CSR_{name.upper()})\n" - - causes_str = "" - declare_cause_str = "" - for num, name in causes: - causes_str += f"#define CAUSE_{name.upper().replace(' ', '_')} {hex(num)}\n" - declare_cause_str += ( - f"DECLARE_CAUSE(\"{name}\", CAUSE_{name.upper().replace(' ','_')})\n" - ) - - arg_str = "" - for name, rng in arg_lut.items(): - sanitized_name = name.replace(" ", "_").replace("=", "_eq_") - begin = rng[1] - end = rng[0] - mask = ((1 << (end - begin + 1)) - 1) << begin - arg_str += f"#define INSN_FIELD_{sanitized_name.upper()} {hex(mask)}\n" - - with open(f"{os.path.dirname(__file__)}/encoding.h", "r") as file: - enc_header = file.read() - - commit = os.popen('git log -1 --format="format:%h"').read() - - # Generate the output as a string - output_str = f"""/* SPDX-License-Identifier: BSD-3-Clause */ - -/* Copyright (c) 2023 RISC-V International */ - -/* - * This file is auto-generated by running 'make' in - * https://github.com/riscv/riscv-opcodes ({commit}) - */ - -{enc_header} -/* Automatically generated by parse_opcodes. */ -#ifndef RISCV_ENCODING_H -#define RISCV_ENCODING_H -{mask_match_str} -{csr_names_str} -{causes_str} -{arg_str}#endif -#ifdef DECLARE_INSN -{declare_insn_str}#endif -#ifdef DECLARE_CSR -{declare_csr_str}#endif -#ifdef DECLARE_CAUSE -{declare_cause_str}#endif -""" - - # Write the modified output to the file - with open("encoding.out.h", "w") as enc_file: - enc_file.write(output_str) - - -def make_go(instr_dict): - - args = " ".join(sys.argv) - prelude = f"""// Code generated by {args}; DO NOT EDIT.""" - - prelude += """ -package riscv - -import "cmd/internal/obj" - -type inst struct { - opcode uint32 - funct3 uint32 - rs1 uint32 - rs2 uint32 - csr int64 - funct7 uint32 -} - -func encode(a obj.As) *inst { - switch a { -""" - - endoffile = """ } - return nil -} -""" - - instr_str = "" - for i in instr_dict: - enc_match = int(instr_dict[i]["match"], 0) - opcode = (enc_match >> 0) & ((1 << 7) - 1) - funct3 = (enc_match >> 12) & ((1 << 3) - 1) - rs1 = (enc_match >> 15) & ((1 << 5) - 1) - rs2 = (enc_match >> 20) & ((1 << 5) - 1) - csr = (enc_match >> 20) & ((1 << 12) - 1) - funct7 = (enc_match >> 25) & ((1 << 7) - 1) - instr_str += f""" case A{i.upper().replace("_","")}: - return &inst{{ {hex(opcode)}, {hex(funct3)}, {hex(rs1)}, {hex(rs2)}, {signed(csr,12)}, {hex(funct7)} }} -""" - - with open("inst.go", "w") as file: - file.write(prelude) - file.write(instr_str) - file.write(endoffile) - - try: - import subprocess - - subprocess.run(["go", "fmt", "inst.go"]) - except: - pass - - -def signed(value, width): - if 0 <= value < (1 << (width - 1)): - return value - else: - return value - (1 << width) +LOG_FORMAT = "%(levelname)s:: %(message)s" +LOG_LEVEL = logging.INFO +pretty_printer = pprint.PrettyPrinter(indent=2) +logging.basicConfig(level=LOG_LEVEL, format=LOG_FORMAT) if __name__ == "__main__": print(f"Running with args : {sys.argv}") extensions = sys.argv[1:] - for i in ["-c", "-latex", "-chisel", "-sverilog", "-rust", "-go", "-spinalhdl"]: - if i in extensions: - extensions.remove(i) + + targets = { + "-c", + "-chisel", + "-go", + "-latex", + "-pseudo", + "-rust", + "-spinalhdl", + "-sverilog", + } + + extensions = [ext for ext in extensions if ext not in targets] print(f"Extensions selected : {extensions}") - include_pseudo = False - if "-go" in sys.argv[1:]: - include_pseudo = True + include_pseudo = "-pseudo" in sys.argv[1:] instr_dict = create_inst_dict(extensions, include_pseudo) diff --git a/rust_utils.py b/rust_utils.py new file mode 100644 index 00000000..68e0c8cd --- /dev/null +++ b/rust_utils.py @@ -0,0 +1,31 @@ +import logging +import pprint + +from constants import * + +# from shared_utils import overlaps, overlap_allowed, extension_overlap_allowed, instruction_overlap_allowed, process_enc_line, same_base_isa, add_segmented_vls_insn, expand_nf_field +from shared_utils import * + +pp = pprint.PrettyPrinter(indent=2) +logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s") + + +def make_rust(instr_dict): + mask_match_str = "" + for i in instr_dict: + mask_match_str += f'const MATCH_{i.upper().replace(".","_")}: u32 = {(instr_dict[i]["match"])};\n' + mask_match_str += f'const MASK_{i.upper().replace(".","_")}: u32 = {(instr_dict[i]["mask"])};\n' + for num, name in csrs + csrs32: + mask_match_str += f"const CSR_{name.upper()}: u16 = {hex(num)};\n" + for num, name in causes: + mask_match_str += ( + f'const CAUSE_{name.upper().replace(" ","_")}: u8 = {hex(num)};\n' + ) + rust_file = open("inst.rs", "w") + rust_file.write( + f""" +/* Automatically generated by parse_opcodes */ +{mask_match_str} +""" + ) + rust_file.close() diff --git a/rv64_zba b/rv64_zba index 5378e52f..3a1186aa 100644 --- a/rv64_zba +++ b/rv64_zba @@ -3,3 +3,5 @@ sh1add.uw rd rs1 rs2 31..25=16 14..12=2 6..2=0x0E 1..0=3 sh2add.uw rd rs1 rs2 31..25=16 14..12=4 6..2=0x0E 1..0=3 sh3add.uw rd rs1 rs2 31..25=16 14..12=6 6..2=0x0E 1..0=3 slli.uw rd rs1 31..26=2 shamtd 14..12=1 6..2=0x06 1..0=3 + +$pseudo_op rv64_zba::add.uw zext.w rd rs1 31..25=4 24..20=0 14..12=0 6..2=0x0E 1..0=3 diff --git a/rv64_zcb b/rv64_zcb index ed38047e..8ce4429f 100644 --- a/rv64_zcb +++ b/rv64_zcb @@ -1 +1,3 @@ c.zext.w rd_rs1_p 1..0=1 15..13=4 12..10=7 6..5=3 4..2=4 + +$pseudo_op rv64_c::c.addiw c.sext.w rd_rs1_n0 15..13=1 12=0 6..2=0 1..0=1 diff --git a/shared_utils.py b/shared_utils.py new file mode 100644 index 00000000..5c925151 --- /dev/null +++ b/shared_utils.py @@ -0,0 +1,568 @@ +#!/usr/bin/env python3 +import copy +import glob +import logging +import os +import pprint +import re +from itertools import chain + +from constants import * + +LOG_FORMAT = "%(levelname)s:: %(message)s" +LOG_LEVEL = logging.INFO + +pretty_printer = pprint.PrettyPrinter(indent=2) +logging.basicConfig(level=LOG_LEVEL, format=LOG_FORMAT) + + +# Initialize encoding to 32-bit '-' values +def initialize_encoding(bits=32): + """Initialize encoding with '-' to represent don't care bits.""" + return ["-"] * bits + + +# Validate bit range and value +def validate_bit_range(msb, lsb, entry_value, line): + """Validate the bit range and entry value.""" + if msb < lsb: + logging.error( + f'{line.split(" ")[0]:<10} has position {msb} less than position {lsb} in its encoding' + ) + raise SystemExit(1) + + if entry_value >= (1 << (msb - lsb + 1)): + logging.error( + f'{line.split(" ")[0]:<10} has an illegal value {entry_value} assigned as per the bit width {msb - lsb}' + ) + raise SystemExit(1) + + +# Split the instruction line into name and remaining part +def parse_instruction_line(line): + """Parse the instruction name and the remaining encoding details.""" + name, remaining = line.split(" ", 1) + name = name.replace(".", "_") # Replace dots for compatibility + remaining = remaining.lstrip() # Remove leading whitespace + return name, remaining + + +# Verify Overlapping Bits +def check_overlapping_bits(encoding, ind, line): + """Check for overlapping bits in the encoding.""" + if encoding[31 - ind] != "-": + logging.error( + f'{line.split(" ")[0]:<10} has {ind} bit overlapping in its opcodes' + ) + raise SystemExit(1) + + +# Update encoding for fixed ranges +def update_encoding_for_fixed_range(encoding, msb, lsb, entry_value, line): + """ + Update encoding bits for a given bit range. + Checks for overlapping bits and assigns the value accordingly. + """ + for ind in range(lsb, msb + 1): + check_overlapping_bits(encoding, ind, line) + bit = str((entry_value >> (ind - lsb)) & 1) + encoding[31 - ind] = bit + + +# Process fixed bit patterns +def process_fixed_ranges(remaining, encoding, line): + """Process fixed bit ranges in the encoding.""" + for s2, s1, entry in fixed_ranges.findall(remaining): + msb, lsb, entry_value = int(s2), int(s1), int(entry, 0) + + # Validate bit range and entry value + validate_bit_range(msb, lsb, entry_value, line) + update_encoding_for_fixed_range(encoding, msb, lsb, entry_value, line) + + return fixed_ranges.sub(" ", remaining) + + +# Process single bit assignments +def process_single_fixed(remaining, encoding, line): + """Process single fixed assignments in the encoding.""" + for lsb, value, drop in single_fixed.findall(remaining): + lsb = int(lsb, 0) + value = int(value, 0) + + check_overlapping_bits(encoding, lsb, line) + encoding[31 - lsb] = str(value) + + +# Main function to check argument look-up table +def check_arg_lut(args, encoding_args, name): + """Check if arguments are present in arg_lut.""" + for arg in args: + if arg not in arg_lut: + arg = handle_arg_lut_mapping(arg, name) + msb, lsb = arg_lut[arg] + update_encoding_args(encoding_args, arg, msb, lsb) + + +# Handle missing argument mappings +def handle_arg_lut_mapping(arg, name): + """Handle cases where an argument needs to be mapped to an existing one.""" + parts = arg.split("=") + if len(parts) == 2: + existing_arg, new_arg = parts + if existing_arg in arg_lut: + arg_lut[arg] = arg_lut[existing_arg] + else: + logging.error( + f" Found field {existing_arg} in variable {arg} in instruction {name} " + f"whose mapping in arg_lut does not exist" + ) + raise SystemExit(1) + else: + logging.error( + f" Found variable {arg} in instruction {name} " + f"whose mapping in arg_lut does not exist" + ) + raise SystemExit(1) + return arg + + +# Update encoding args with variables +def update_encoding_args(encoding_args, arg, msb, lsb): + """Update encoding arguments and ensure no overlapping.""" + for ind in range(lsb, msb + 1): + check_overlapping_bits(encoding_args, ind, arg) + encoding_args[31 - ind] = arg + + +# Compute match and mask +def convert_encoding_to_match_mask(encoding): + """Convert the encoding list to match and mask strings.""" + match = "".join(encoding).replace("-", "0") + mask = "".join(encoding).replace("0", "1").replace("-", "0") + return hex(int(match, 2)), hex(int(mask, 2)) + + +# Processing main function for a line in the encoding file +def process_enc_line(line, ext): + """ + This function processes each line of the encoding files (rv*). As part of + the processing, the function ensures that the encoding is legal through the + following checks:: + - there is no over specification (same bits assigned different values) + - there is no under specification (some bits not assigned values) + - bit ranges are in the format hi..lo=val where hi > lo + - value assigned is representable in the bit range + - also checks that the mapping of arguments of an instruction exists in + arg_lut. + If the above checks pass, then the function returns a tuple of the name and + a dictionary containing basic information of the instruction which includes: + - variables: list of arguments used by the instruction whose mapping + exists in the arg_lut dictionary + - encoding: this contains the 32-bit encoding of the instruction where + '-' is used to represent position of arguments and 1/0 is used to + reprsent the static encoding of the bits + - extension: this field contains the rv* filename from which this + instruction was included + - match: hex value representing the bits that need to match to detect + this instruction + - mask: hex value representin the bits that need to be masked to extract + the value required for matching. + """ + encoding = initialize_encoding() + + # Parse the instruction line + name, remaining = parse_instruction_line(line) + + # Process fixed ranges + remaining = process_fixed_ranges(remaining, encoding, line) + + # Process single fixed assignments + process_single_fixed(remaining, encoding, line) + + # Convert the list of encodings into a match and mask + match, mask = convert_encoding_to_match_mask(encoding) + + # Check arguments in arg_lut + args = single_fixed.sub(" ", remaining).split() + encoding_args = encoding.copy() + + check_arg_lut(args, encoding_args, name) + + # Return single_dict + return name, { + "encoding": "".join(encoding), + "variable_fields": args, + "extension": [os.path.basename(ext)], + "match": match, + "mask": mask, + } + + +# Extract ISA Type +def extract_isa_type(ext_name): + """Extracts the ISA type from the extension name.""" + return ext_name.split("_")[0] + + +# Verify the types for RV* +def is_rv_variant(type1, type2): + """Checks if the types are RV variants (rv32/rv64).""" + return (type2 == "rv" and type1 in {"rv32", "rv64"}) or ( + type1 == "rv" and type2 in {"rv32", "rv64"} + ) + + +# Check for same base ISA +def has_same_base_isa(type1, type2): + """Determines if the two ISA types share the same base.""" + return type1 == type2 or is_rv_variant(type1, type2) + + +# Compare the base ISA type of a given extension name against a list of extension names +def same_base_isa(ext_name, ext_name_list): + """Checks if the base ISA type of ext_name matches any in ext_name_list.""" + type1 = extract_isa_type(ext_name) + return any(has_same_base_isa(type1, extract_isa_type(ext)) for ext in ext_name_list) + + +# Pad two strings to equal length +def pad_to_equal_length(str1, str2, pad_char="-"): + """Pads two strings to equal length using the given padding character.""" + max_len = max(len(str1), len(str2)) + return str1.rjust(max_len, pad_char), str2.rjust(max_len, pad_char) + + +# Check compatibility for two characters +def has_no_conflict(char1, char2): + """Checks if two characters are compatible (either matching or don't-care).""" + return char1 == "-" or char2 == "-" or char1 == char2 + + +# Conflict check between two encoded strings +def overlaps(x, y): + """Checks if two encoded strings overlap without conflict.""" + x, y = pad_to_equal_length(x, y) + return all(has_no_conflict(x[i], y[i]) for i in range(len(x))) + + +# Check presence of keys in dictionary. +def is_in_nested_dict(a, key1, key2): + """Checks if key2 exists in the dictionary under key1.""" + return key1 in a and key2 in a[key1] + + +# Overlap allowance +def overlap_allowed(a, x, y): + """Determines if overlap is allowed between x and y based on nested dictionary checks""" + return is_in_nested_dict(a, x, y) or is_in_nested_dict(a, y, x) + + +# Check overlap allowance between extensions +def extension_overlap_allowed(x, y): + """Checks if overlap is allowed between two extensions using the overlapping_extensions dictionary.""" + return overlap_allowed(overlapping_extensions, x, y) + + +# Check overlap allowance between instructions +def instruction_overlap_allowed(x, y): + """Checks if overlap is allowed between two instructions using the overlapping_instructions dictionary.""" + return overlap_allowed(overlapping_instructions, x, y) + + +# Check 'nf' field +def is_segmented_instruction(instruction): + """Checks if an instruction contains the 'nf' field.""" + return "nf" in instruction["variable_fields"] + + +# Expand 'nf' fields +def update_with_expanded_instructions(updated_dict, key, value): + """Expands 'nf' fields in the instruction dictionary and updates it with new instructions.""" + for new_key, new_value in expand_nf_field(key, value): + updated_dict[new_key] = new_value + + +# Process instructions, expanding segmented ones and updating the dictionary +def add_segmented_vls_insn(instr_dict): + """Processes instructions, expanding segmented ones and updating the dictionary.""" + # Use dictionary comprehension for efficiency + return dict( + chain.from_iterable( + ( + expand_nf_field(key, value) + if is_segmented_instruction(value) + else [(key, value)] + ) + for key, value in instr_dict.items() + ) + ) + + +# Expand the 'nf' field in the instruction dictionary +def expand_nf_field(name, single_dict): + """Validate and prepare the instruction dictionary.""" + validate_nf_field(single_dict, name) + remove_nf_field(single_dict) + update_mask(single_dict) + + name_expand_index = name.find("e") + + # Pre compute the base match value and encoding prefix + base_match = int(single_dict["match"], 16) + encoding_prefix = single_dict["encoding"][3:] + + expanded_instructions = [ + create_expanded_instruction( + name, single_dict, nf, name_expand_index, base_match, encoding_prefix + ) + for nf in range(8) # Range of 0 to 7 + ] + + return expanded_instructions + + +# Validate the presence of 'nf' +def validate_nf_field(single_dict, name): + """Validates the presence of 'nf' in variable fields before expansion.""" + if "nf" not in single_dict["variable_fields"]: + logging.error(f"Cannot expand nf field for instruction {name}") + raise SystemExit(1) + + +# Remove 'nf' from variable fields +def remove_nf_field(single_dict): + """Removes 'nf' from variable fields in the instruction dictionary.""" + single_dict["variable_fields"].remove("nf") + + +# Update the mask to include the 'nf' field +def update_mask(single_dict): + """Updates the mask to include the 'nf' field in the instruction dictionary.""" + single_dict["mask"] = hex(int(single_dict["mask"], 16) | 0b111 << 29) + + +# Create an expanded instruction +def create_expanded_instruction( + name, single_dict, nf, name_expand_index, base_match, encoding_prefix +): + """Creates an expanded instruction based on 'nf' value.""" + new_single_dict = copy.deepcopy(single_dict) + + # Update match value in one step + new_single_dict["match"] = hex(base_match | (nf << 29)) + new_single_dict["encoding"] = format(nf, "03b") + encoding_prefix + + # Construct new instruction name + new_name = ( + name + if nf == 0 + else f"{name[:name_expand_index]}seg{nf + 1}{name[name_expand_index:]}" + ) + + return (new_name, new_single_dict) + + +# Return a list of relevant lines from the specified file +def read_lines(file): + """Reads lines from a file and returns non-blank, non-comment lines.""" + with open(file) as fp: + lines = (line.rstrip() for line in fp) + return [line for line in lines if line and not line.startswith("#")] + + +# Update the instruction dictionary +def process_standard_instructions(lines, instr_dict, file_name): + """Processes standard instructions from the given lines and updates the instruction dictionary.""" + for line in lines: + if "$import" in line or "$pseudo" in line: + continue + logging.debug(f"Processing line: {line}") + name, single_dict = process_enc_line(line, file_name) + ext_name = os.path.basename(file_name) + + if name in instr_dict: + var = instr_dict[name]["extension"] + if same_base_isa(ext_name, var): + log_and_exit( + f"Instruction {name} from {ext_name} is already added from {var} in same base ISA" + ) + elif instr_dict[name]["encoding"] != single_dict["encoding"]: + log_and_exit( + f"Instruction {name} from {ext_name} has different encodings in different base ISAs" + ) + + instr_dict[name]["extension"].extend(single_dict["extension"]) + else: + for key, item in instr_dict.items(): + if ( + overlaps(item["encoding"], single_dict["encoding"]) + and not extension_overlap_allowed(ext_name, item["extension"][0]) + and not instruction_overlap_allowed(name, key) + and same_base_isa(ext_name, item["extension"]) + ): + log_and_exit( + f'Instruction {name} in extension {ext_name} overlaps with {key} in {item["extension"]}' + ) + + instr_dict[name] = single_dict + + +# Incorporate pseudo instructions into the instruction dictionary based on given conditions +def process_pseudo_instructions( + lines, instr_dict, file_name, opcodes_dir, include_pseudo, include_pseudo_ops +): + """Processes pseudo instructions from the given lines and updates the instruction dictionary.""" + for line in lines: + if "$pseudo" not in line: + continue + logging.debug(f"Processing pseudo line: {line}") + ext, orig_inst, pseudo_inst, line_content = pseudo_regex.findall(line)[0] + ext_file = find_extension_file(ext, opcodes_dir) + + validate_instruction_in_extension(orig_inst, ext_file, file_name, pseudo_inst) + + name, single_dict = process_enc_line(f"{pseudo_inst} {line_content}", file_name) + if ( + orig_inst.replace(".", "_") not in instr_dict + or include_pseudo + or name in include_pseudo_ops + ): + if name not in instr_dict: + instr_dict[name] = single_dict + logging.debug(f"Including pseudo_op: {name}") + else: + if single_dict["match"] != instr_dict[name]["match"]: + instr_dict[f"{name}_pseudo"] = single_dict + elif single_dict["extension"] not in instr_dict[name]["extension"]: + instr_dict[name]["extension"].extend(single_dict["extension"]) + + +# Integrate imported instructions into the instruction dictionary +def process_imported_instructions(lines, instr_dict, file_name, opcodes_dir): + """Processes imported instructions from the given lines and updates the instruction dictionary.""" + for line in lines: + if "$import" not in line: + continue + logging.debug(f"Processing imported line: {line}") + import_ext, reg_instr = imported_regex.findall(line)[0] + ext_file = find_extension_file(import_ext, opcodes_dir) + + validate_instruction_in_extension(reg_instr, ext_file, file_name, line) + + for oline in open(ext_file): + if re.findall(f"^\\s*{reg_instr}\\s+", oline): + name, single_dict = process_enc_line(oline, file_name) + if name in instr_dict: + if instr_dict[name]["encoding"] != single_dict["encoding"]: + log_and_exit( + f"Imported instruction {name} from {os.path.basename(file_name)} has different encodings" + ) + instr_dict[name]["extension"].extend(single_dict["extension"]) + else: + instr_dict[name] = single_dict + break + + +# Locate the path of the specified extension file, checking fallback directories +def find_extension_file(ext, opcodes_dir): + """Finds the extension file path, considering the unratified directory if necessary.""" + ext_file = f"{opcodes_dir}/{ext}" + if not os.path.exists(ext_file): + ext_file = f"{opcodes_dir}/unratified/{ext}" + if not os.path.exists(ext_file): + log_and_exit(f"Extension {ext} not found.") + return ext_file + + +# Confirm the presence of an original instruction in the corresponding extension file. +def validate_instruction_in_extension(inst, ext_file, file_name, pseudo_inst): + """Validates if the original instruction exists in the dependent extension.""" + found = False + for oline in open(ext_file): + if re.findall(f"^\\s*{inst}\\s+", oline): + found = True + break + if not found: + log_and_exit( + f"Original instruction {inst} required by pseudo_op {pseudo_inst} in {file_name} not found in {ext_file}" + ) + + +# Construct a dictionary of instructions filtered by specified criteria +def create_inst_dict(file_filter, include_pseudo=False, include_pseudo_ops=[]): + """Creates a dictionary of instructions based on the provided file filters.""" + + """ + This function return a dictionary containing all instructions associated + with an extension defined by the file_filter input. + Allowed input extensions: needs to be rv* file name without the 'rv' prefix i.e. '_i', '32_i', etc. + Each node of the dictionary will correspond to an instruction which again is + a dictionary. The dictionary contents of each instruction includes: + - variables: list of arguments used by the instruction whose mapping + exists in the arg_lut dictionary + - encoding: this contains the 32-bit encoding of the instruction where + '-' is used to represent position of arguments and 1/0 is used to + reprsent the static encoding of the bits + - extension: this field contains the rv* filename from which this + instruction was included + - match: hex value representing the bits that need to match to detect + this instruction + - mask: hex value representin the bits that need to be masked to extract + the value required for matching. + In order to build this dictionary, the function does 2 passes over the same + rv file: + - First pass: extracts all standard instructions, skipping pseudo ops + and imported instructions. For each selected line, the `process_enc_line` + function is called to create the dictionary contents of the instruction. + Checks are performed to ensure that the same instruction is not added + twice to the overall dictionary. + - Second pass: parses only pseudo_ops. For each pseudo_op, the function: + - Checks if the dependent extension and instruction exist. + - Adds the pseudo_op to the dictionary if the dependent instruction + is not already present; otherwise, it is skipped. + """ + opcodes_dir = os.path.dirname(os.path.realpath(__file__)) + instr_dict = {} + + file_names = [ + file + for fil in file_filter + for file in sorted(glob.glob(f"{opcodes_dir}/{fil}"), reverse=True) + ] + + logging.debug("Collecting standard instructions") + for file_name in file_names: + logging.debug(f"Parsing File: {file_name} for standard instructions") + lines = read_lines(file_name) + process_standard_instructions(lines, instr_dict, file_name) + + logging.debug("Collecting pseudo instructions") + for file_name in file_names: + logging.debug(f"Parsing File: {file_name} for pseudo instructions") + lines = read_lines(file_name) + process_pseudo_instructions( + lines, + instr_dict, + file_name, + opcodes_dir, + include_pseudo, + include_pseudo_ops, + ) + + logging.debug("Collecting imported instructions") + for file_name in file_names: + logging.debug(f"Parsing File: {file_name} for imported instructions") + lines = read_lines(file_name) + process_imported_instructions(lines, instr_dict, file_name, opcodes_dir) + + return instr_dict + + +# Extracts the extensions used in an instruction dictionary +def instr_dict_2_extensions(instr_dict): + return list({item["extension"][0] for item in instr_dict.values()}) + + +# Returns signed interpretation of a value within a given width +def signed(value, width): + return value if 0 <= value < (1 << (width - 1)) else value - (1 << width) diff --git a/sverilog_utils.py b/sverilog_utils.py new file mode 100644 index 00000000..ff116ccb --- /dev/null +++ b/sverilog_utils.py @@ -0,0 +1,29 @@ +import logging +import pprint + +from shared_utils import * + +pp = pprint.PrettyPrinter(indent=2) +logging.basicConfig(level=logging.INFO, format="%(levelname)s:: %(message)s") + + +def make_sverilog(instr_dict): + names_str = "" + for i in instr_dict: + names_str += f" localparam [31:0] {i.upper().replace('.','_'):<18s} = 32'b{instr_dict[i]['encoding'].replace('-','?')};\n" + names_str += " /* CSR Addresses */\n" + for num, name in csrs + csrs32: + names_str += ( + f" localparam logic [11:0] CSR_{name.upper()} = 12'h{hex(num)[2:]};\n" + ) + + sverilog_file = open("inst.sverilog", "w") + sverilog_file.write( + f""" +/* Automatically generated by parse_opcodes */ +package riscv_instr; +{names_str} +endpackage +""" + ) + sverilog_file.close() diff --git a/test.py b/test.py index 699b0a17..eb9b6783 100644 --- a/test.py +++ b/test.py @@ -4,6 +4,7 @@ import unittest from parse import * +from shared_utils import * class EncodingLineTest(unittest.TestCase):