fx9860-emulator-playground/scripts/extract_instructions.js

186 lines
6.3 KiB
JavaScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

const fs = require('fs')
// let debug_str = ''
main()
function main() {
let instructions = []
// Read the raw txt file
let string_content = fs.readFileSync('raw_instructions.txt', 'utf8').toString()
let instruction_count = string_content.match(/[01imnd]{16}/gm).length
// Remove lines of type "Rev. 1.50, 10/04, page 378 of 610"
string_content = string_content.split('\n').filter(s => !s.includes('Rev.')).join('\n')
// Process each instruction set
for (let i = 0; i < instruction_count * 0 + 100; i++) {
const [instruction_set, new_string] = extractNextInstructionSet(string_content)
// We reached the end of the file
if (instruction_set == null) break
string_content = new_string
// Add each instruction to the list
instructions.push(...instruction_set)
}
console.log(instructions.map(e => e.name))
// Check that the correct numbers of instructions were extracted
console.log(instructions.length, instruction_count)
// Save to file
fs.writeFileSync('generated/instructions.json', JSON.stringify(instructions, null, 4))
// fs.writeFileSync('debug.txt', debug_str)
}
// Extract the next instruction set from the string
// Starts with "Format Operation Instruction Code" string
// which is the header of the table containing the instructions
function extractNextInstructionSet(input_str) {
// Start index of the current instruction set
const start = input_str.indexOf('T Bit') + 5
// If it was not found, we reached the end of the file
if (start-5 == -1) return [null, null]
// Start index of the next instruction set
let tmp = input_str.slice(start)
const end = tmp.indexOf('Format Operation')
// Slice the string so it only contains the current instruction set
const instructionSet = tmp.slice(0, end)
// debug_str += instructionSet + '\n\n========================================\n\n'
// Extract the instructions from the table
const instructions = extractInstructions(instructionSet)
// Slice the string to advance to the "Operation" section
const operationStart = instructionSet.indexOf('Operation:') + 10
const operationSection = instructionSet.slice(operationStart)
// Add the implementation code to each instruction
extractImplementations(operationSection, instructions)
// Return the instructions and the remaining string
return [
instructions,
input_str.slice(start + end)
]
}
// Extract the code, type and description of each instruction
// given a string containing an instruction set table
function extractInstructions(input_str) {
const matches = input_str.match(/[01imnd]{16}/gm) // Match tokens like 0100nnnnmmmm1100, 1010dddddddddddd or 0000000000001011
// const matches_dsp = input_str.match(/[01ADefguxyz*]{16}/gm) // Match tokens like 0100nnnnmmmm1100, 1010dddddddddddd or 0000000000001011
const instructions = []
// For each token match
for (let code of matches) {
// Get the token position in the string
const id = input_str.indexOf(code)
// Isolate what's before the token
let str = input_str.slice(0, id)
// Advance the string to the next token
input_str = input_str.slice(id + 16)
// Extract name
const name = str.match(/[A-Z.01268\/]{2,}/)[0] // Match tokens like JSR, LDRS, MOV.L, DIV0S, CMP/EQ
// Extract description
const desc = str.slice(str.indexOf(name) + name.length).trim()
.replaceAll(', ', ',')
.split(' ')[0]
.split('\r')[0]
// console.log(code, '.', name, '.', desc)
console.log({
code,
desc,
family: name,
})
instructions.push({
code,
desc,
family: name,
})
}
return instructions
}
// Given a string containing the "Operation" section of an instruction set,
// extract every function in order and assign it to the corresponding instruction
function extractImplementations(input_str, instructions) {
// Match 3 groups: INSTRUCTION_NAME(OPERANDS) DESCRIPTION {
const matches = input_str.matchAll(/\n {0,1}([A-Znm_012468]{2,}) {0,1}(\([^)]*\))[^\/]*(\/*.*\/)/gm)
let index = 0
// For each function match
for (let [_, name, operands, desc] of matches) {
// Starting position of the function
const start = input_str.indexOf(name)
// Ending position of the function (last bracket)
const end = findLastFunctionBracket(input_str)
// Extract the function
const implementation = input_str.slice(start, end)
.replaceAll('', '~')
.replaceAll('', '-')
// Remove the function from the string
input_str = input_str.slice(end)
// Prevent macro name collision
if (name == 'MACL' || name == 'MACH') name += '_'
// console.log(start, end, name, operands, desc)
// Add the implementation details to the instruction
if (!name.includes('_BANK')) {
instructions[index].name = name
instructions[index].operands = operands
instructions[index].implementation = implementation
index++
}
// Special case for Rn_BANK instructions
else {
let i = 0
while (instructions[index].desc.includes('_BANK')) {
instructions[index].name = name.replace(/[nm]/, i++)
instructions[index].operands = operands
instructions[index].implementation = implementation
if (++index == instructions.length) break
}
}
}
if (index != instructions.length)
throw new Error(`Not all instructions were matched: ${index} != ${instructions.length}`)
}
// Get the index of the last bracket in a function passed as a string
function findLastFunctionBracket(input_str) {
const firstBracketIndex = input_str.indexOf('{')
let i = 0
let depth = 0
input_str = input_str.slice(firstBracketIndex)
for (const char of input_str) {
if (char == '{') depth++
if (char == '}') depth--
if (depth == 0) return i + firstBracketIndex + 1
i++
}
throw new Error('No closing bracket found')
}