diff --git a/app/Main.hs b/app/Main.hs index ae0b37d..2b5978f 100644 --- a/app/Main.hs +++ b/app/Main.hs @@ -5,7 +5,7 @@ import CmdOptions (Options (..), runCmdOptions) import Data.Text (Text) import Data.Text qualified as T import Data.Text.IO qualified as T -import Expr qualified +import Parser qualified import Path main :: IO () @@ -15,7 +15,7 @@ main = do assemblyCode <- T.readFile sourceCodePath -- T.putStrLn assemblyCode -- T.putStrLn $ T.replicate 10 "-" - mayStatements <- Expr.mainLocal assemblyCode + mayStatements <- Parser.mainLocal assemblyCode case mayStatements of Nothing -> T.putStrLn "No results to be written" Just statements -> do diff --git a/asmh.cabal b/asmh.cabal index 1b2ef3b..e9db9e2 100644 --- a/asmh.cabal +++ b/asmh.cabal @@ -52,6 +52,7 @@ library exposed-modules: Bin Expr + Parser build-depends: base >=4.7 && <5, diff --git a/src/Bin.hs b/src/Bin.hs index 16da0fd..6040467 100644 --- a/src/Bin.hs +++ b/src/Bin.hs @@ -87,16 +87,17 @@ trans labelMap instrLoc = \case checkLabel label = case M.lookup label labelMap of Nothing -> error $ concat ["Label '", T.unpack label, "' not found'"] Just v -> v - {-# INLINE putEitherW #-} - putEitherW :: Either Word8 Word16 -> Put - putEitherW = \case - Left w -> putWord8 w - Right w -> putWord16le w + putAsW8 :: RawValue -> Put = + putWord8 . \case + W8 w -> w + W16 w -> fromIntegral w + IntOrChar w -> fromIntegral w - putAsW8 :: Either Word8 Word16 -> Put = - putWord8 . either id fromIntegral - putAsW16 :: Either Word8 Word16 -> Put = - putWord16le . either fromIntegral id + putAsW16 :: RawValue -> Put = + putWord16le . \case + W8 w -> fromIntegral w + W16 w -> w + IntOrChar w -> fromIntegral w firstPass :: [Statement] -> ProgramInfo firstPass ls = go ls 0 (ProgramInfo mempty mempty mempty) diff --git a/src/Expr.hs b/src/Expr.hs index e18854e..15083b5 100644 --- a/src/Expr.hs +++ b/src/Expr.hs @@ -1,23 +1,18 @@ -module Expr where +module Expr + ( Register (..) + , RawValue (..) + , Label + , Statement (..) + , Operand (..) + , Instruction (..) + , Directive (..) + ) +where -import Control.Applicative hiding (many, some) -import Control.Monad -import Data.Char -import Data.Foldable (traverse_) -import Data.List.NonEmpty import Data.Text (Text) -import Data.Text qualified as T -import Data.Void (Void) import Data.Word -import Debug.Trace -import Text.Megaparsec -import Text.Megaparsec.Char -import Text.Megaparsec.Char.Lexer qualified as L -import Text.Megaparsec.Debug import Prelude hiding (take) -type Parser = Parsec Void Text - data Register = -- | (16 bit) the accumulator register (divided into AH / AL). AX @@ -53,7 +48,11 @@ data Register DH deriving (Show, Eq) -type RawValue = (Either Word8 Word16) +data RawValue + = W8 Word8 + | W16 Word16 + | IntOrChar Int + deriving (Show, Eq) type Label = Text @@ -90,222 +89,3 @@ data Directive | END | NAME Text deriving (Show, Eq) - --- Parser for registers -parseRegister :: Parser Register -parseRegister = - choice - [ AX <$ string' "AX" - , BX <$ string' "BX" - , CX <$ string' "CX" - , DX <$ string' "DX" - , SI <$ string' "SI" - , DI <$ string' "DI" - , BP <$ string' "BP" - , SP <$ string' "SP" - , AL <$ string' "AL" - , BL <$ string' "BL" - , CL <$ string' "CL" - , DL <$ string' "DL" - , AH <$ string' "AH" - , BH <$ string' "BH" - , CH <$ string' "CH" - , DH <$ string' "DH" - ] - -parseNum :: Num b => Int -> (Char -> Bool) -> Char -> Int -> Parser b -parseNum base cond ending numberOfDigits = try $ do - bintxt <- takeP (Just $ show base <> " digits") numberOfDigits - guard (T.all cond bintxt) - satisfy (== ending) - pure $ fromIntegral $ textToInt bintxt - where - textToInt :: Text -> Int - textToInt = T.foldl (\acc x -> acc * base + digitToInt x) 0 - -parseBin :: Num b => Int -> Parser b -parseBin = parseNum 2 (\ch -> ch == '0' || ch == '1') 'b' - -parseBin8 :: Parser Word8 -parseBin8 = parseBin 8 - -parseBin12 :: Parser Word16 -parseBin12 = parseBin 12 - -parseBin16 :: Parser Word16 -parseBin16 = parseBin 16 - -parseHex :: Num b => Int -> Parser b -parseHex = parseNum 16 isHexDigit 'h' - -parseHex8 :: Parser Word8 -parseHex8 = parseHex 2 - -parseHex12 :: Parser Word16 -parseHex12 = parseHex 3 - -parseHex16 :: Parser Word16 -parseHex16 = parseHex 4 - -parseChar :: Num a => Parser a -parseChar = fromIntegral . ord <$> between "'" "'" anySingle - -parseChar8 :: Parser Word8 -parseChar8 = parseChar - -parseChar12 :: Parser Word16 -parseChar12 = parseChar - -parseChar16 :: Parser Word16 -parseChar16 = parseChar - --- Parser for immediate values -parseImmediate8 :: Parser Word8 -parseImmediate8 = - label "expecting 8bit number" $ - choice - [ parseBin8 - , parseHex8 - , L.lexeme space1 L.decimal - , parseChar8 - ] - -parseImmediate12 :: Parser Word16 -parseImmediate12 = - label "expecting 12bit number" $ - choice - [ parseBin12 - , parseHex12 - , L.lexeme space1 L.decimal - , parseChar12 - ] - -parseImmediate16 :: Parser Word16 -parseImmediate16 = - label "expecting 16bit number" $ - choice - [ parseBin16 - , parseHex16 - , L.lexeme space1 L.decimal - , parseChar16 - ] - --- Parser for memory operands (simplified, just accepting labels for now) -parseMemory :: Parser Text -parseMemory = T.pack <$> (char '[' *> some letterChar <* char ']') - -parseVarOrLabelName :: Parser Text -parseVarOrLabelName = do - (t, _) <- - match (letterChar >> takeWhileP Nothing (\c -> isAlphaNum c || c == '_')) - pure t - --- do - --- first <- letterChar --- rest <- many (alphaNumChar <|> char '_') --- return (first : rest) - --- Parser for operands -parseOperand :: Parser Operand -parseOperand = - label "parsingOperand" $ - choice - [ RegOp <$> try parseRegister - , ImmOp - <$> (try (Right <$> parseImmediate16) <|> try (Left <$> parseImmediate8)) - , MemOp <$> try parseMemory - , do - c <- lookAhead anySingle - failure (Just $ Label (c :| [])) mempty - ] - --- Parser for instructions -parseInstruction :: Parser Instruction -parseInstruction = - L.lexeme hspace $ - choice - [ binaryP MOV "mov" - , binaryP ADD "add" - , binaryP SUB "sub" - , binaryP OR "or" - , INT - <$> (L.symbol' hspace1 "int" *> parseImmediate8) - , JNS <$> (L.symbol' hspace1 "jns" *> parseVarOrLabelName) - , JMP <$> (L.symbol' hspace1 "jmp" *> parseVarOrLabelName) - , INC <$> (L.symbol' hspace1 "inc" *> parseOperand) - , binaryP CMP "cmp" - , JE <$> (L.symbol' hspace1 "je" *> parseVarOrLabelName) - , RET <$ L.symbol' hspace "ret" - ] - where - binaryP constr txt = - constr - <$> (L.symbol' hspace1 txt *> L.lexeme hspace parseOperand) - <*> (char ',' *> hspace *> parseOperand) - -parseDirective :: Parser Directive -parseDirective = - choice - [ ORG <$> (L.symbol' hspace1 "org" >> parseImmediate12) - , END <$ L.symbol' hspace1 "end" - , NAME - <$> ( L.symbol' hspace1 "name" *> between "\"" "\"" (takeWhileP Nothing isAlphaNum) - ) - , parseDBdirective - ] - where - parseDBdirective = - choice - [ do - vname <- - T.toLower - <$> L.lexeme hspace (takeWhile1P (Just "variable char") isLetter) - "Variable name" - L.symbol' hspace1 "db" - v <- sepBy1 (L.lexeme hspace parseImmediate8) (char ',') - when (vname == "db") $ fail "you cannot name a variable 'db'" - pure $ DB $ Left (vname, v) - , do - L.symbol' hspace1 "db" - v <- sepBy1 (L.lexeme hspace parseImmediate8) (char ',') - pure $ DB $ Right v - ] - -parseLabel :: Parser Text -parseLabel = L.lexeme hspace parseVarOrLabelName <* char ':' - --- Parser for a full line (instruction + optional label) -parseStatement :: Parser Statement -parseStatement = - L.lexeme sc $ - choice - [ try (Dir <$> parseDirective) - , try (Lab <$> parseLabel) - , try (Ins <$> parseInstruction) - ] - -sc :: Parser () -sc = L.space space1 (L.skipLineComment ";") (L.skipBlockComment "/*" "*/") - --- Main parser function -parseAssembly - :: Text - -> Either - (ParseErrorBundle Text Void) - [Statement] -parseAssembly = - parse - (sc *> some parseStatement <* eof) - "myfile" - -mainLocal :: Text -> IO (Maybe [Statement]) -mainLocal assemblyCode = do - case parseAssembly assemblyCode of - Left err -> do - putStrLn "Error: " - putStrLn $ errorBundlePretty err - pure Nothing - Right statements -> do - -- traverse_ print statements - pure $ Just statements diff --git a/src/Parser.hs b/src/Parser.hs new file mode 100644 index 0000000..1355d4b --- /dev/null +++ b/src/Parser.hs @@ -0,0 +1,252 @@ +module Parser where + +import Control.Applicative hiding (many, some) +import Control.Monad +import Data.Char +import Data.List.NonEmpty qualified as N +import Data.Set qualified as S +import Data.Text (Text) +import Data.Text qualified as T +import Data.Void (Void) +import Data.Word +import Expr +import Text.Megaparsec +import Text.Megaparsec.Char +import Text.Megaparsec.Char.Lexer qualified as L +import Prelude hiding (take) + +type Parser = Parsec Void Text + +-- Parser for registers +parseRegister :: Parser Register +parseRegister = + choice + [ AX <$ string' "AX" + , BX <$ string' "BX" + , CX <$ string' "CX" + , DX <$ string' "DX" + , SI <$ string' "SI" + , DI <$ string' "DI" + , BP <$ string' "BP" + , SP <$ string' "SP" + , AL <$ string' "AL" + , BL <$ string' "BL" + , CL <$ string' "CL" + , DL <$ string' "DL" + , AH <$ string' "AH" + , BH <$ string' "BH" + , CH <$ string' "CH" + , DH <$ string' "DH" + ] + +parseNum :: Parser b -> Parser a -> Int -> String -> Parser b +parseNum lexer ending numberOfDigits lstr = label lstr $ do + o <- getOffset + region (\_ -> TrivialError o Nothing $ S.singleton $ Label $ N.fromList lstr) $ do + (toks, val) <- match lexer + guard (T.length toks == numberOfDigits) + ending + pure val + +parseBin :: Num b => Int -> Parser b +parseBin ndigits = parseNum L.binary (char 'b') ndigits lstr + where + lstr = + concat + [ "binary number of " + , show ndigits + , " digits [" + , show $ ndigits `div` 8 + , " byte(s)]" + ] + +parseBin8 :: Parser Word8 +parseBin8 = parseBin 8 + +parseBin12 :: Parser Word16 +parseBin12 = parseBin 12 + +parseBin16 :: Parser Word16 +parseBin16 = parseBin 16 + +parseHex :: Num b => Int -> Parser b +parseHex ndigits = parseNum L.hexadecimal (char 'h') ndigits lstr + where + lstr = + concat + [ "hexadecimal number of " + , show ndigits + , " digits [" + , show $ ndigits `div` 2 + , " bytes(s)]" + ] + +parseHex8 :: Parser Word8 +parseHex8 = parseHex 2 + +parseHex12 :: Parser Word16 +parseHex12 = parseHex 3 + +parseHex16 :: Parser Word16 +parseHex16 = parseHex 4 + +parseChar :: Parser Int +parseChar = + label "character enclosed in <'>" $ + ord <$> between "'" "'" anySingle + +parseDecimal :: Parser Int +parseDecimal = + label "decimal number" $ + L.lexeme space $ + hidden L.decimal <* lookAhead (label "no postfix for number" spaceChar) + +-- Parser for immediate values +parseImmediate8 :: Parser Word8 +parseImmediate8 = + label "8bit number" $ + choice + [ try parseBin8 + , parseHex8 + ] + +parseImmediate12 :: Parser Word16 +parseImmediate12 = + label "12bit number" $ + choice + [ try parseBin12 + , parseHex12 + ] + +parseImmediate16 :: Parser Word16 +parseImmediate16 = + label "16bit number" $ + choice + [ try parseBin16 + , parseHex16 + ] + +-- Parser for memory operands (simplified, just accepting labels for now) +parseMemory :: Parser Text +parseMemory = T.pack <$> (char '[' *> some letterChar <* char ']') + +parseVarOrLabelName :: Parser Text +parseVarOrLabelName = do + (t, _) <- + match (letterChar >> takeWhileP Nothing (\c -> isAlphaNum c || c == '_')) + pure t + +-- Parser for operands +parseOperand :: Parser Operand +parseOperand = + -- label "Instruction operand" $ + choice + [ try $ label "register" $ RegOp <$> parseRegister + , label "immediate value" $ + ImmOp + <$> choice + [ try $ IntOrChar <$> parseDecimal + , try $ W16 <$> parseImmediate16 + , try $ W8 <$> parseImmediate8 + , IntOrChar <$> parseChar + ] + , label "memory address" $ MemOp <$> parseMemory + ] + +-- Parser for instructions +parseInstruction :: Parser Instruction +parseInstruction = + label "Instruction" $ + L.lexeme hspace $ + choice + [ binaryP MOV "mov" + , binaryP ADD "add" + , binaryP SUB "sub" + , binaryP OR "or" + , INT <$> (L.symbol' hspace1 "int" *> parseImmediate8) + , JNS <$> (L.symbol' hspace1 "jns" *> parseVarOrLabelName) + , JMP <$> (L.symbol' hspace1 "jmp" *> parseVarOrLabelName) + , INC <$> (L.symbol' hspace1 "inc" *> parseOperand) + , binaryP CMP "cmp" + , JE <$> (L.symbol' hspace1 "je" *> parseVarOrLabelName) + , RET <$ L.symbol' hspace "ret" + ] + where + binaryP constr txt = do + L.symbol' hspace1 txt + constr + <$> label "op1" (L.lexeme hspace parseOperand) + <*> label "op2" (char ',' *> hidden hspace *> L.lexeme hspace parseOperand) + +parseDirective :: Parser Directive +parseDirective = + label "Compiler directive" $ + choice + [ ORG <$> (L.symbol' hspace1 "org" >> parseImmediate12) + , END <$ L.symbol' hspace1 "end" + , NAME + <$> ( L.symbol' hspace1 "name" *> between "\"" "\"" (takeWhileP Nothing isAlphaNum) + ) + , parseDBdirective + ] + where + parseDBdirective = + choice + [ do + vname <- + T.toLower + <$> L.lexeme hspace (takeWhile1P (Just "variable char") isLetter) + "Variable name" + L.symbol' hspace1 "db" + v <- sepBy1 (L.lexeme hspace parseImmediate8) (char ',') + when (vname == "db") $ fail "you cannot name a variable 'db'" + pure $ DB $ Left (vname, v) + , do + L.symbol' hspace1 "db" + v <- sepBy1 (L.lexeme hspace parseImmediate8) (char ',') + pure $ DB $ Right v + ] + +parseLabel :: Parser Text +parseLabel = label "Label" $ L.lexeme hspace parseVarOrLabelName <* char ':' + +-- Parser for a full line (instruction + optional label) +parseStatement :: Parser Statement +parseStatement = + L.lexeme sc $ + choice + [ try (Dir <$> parseDirective) + , try (Lab <$> parseLabel) + , Ins <$> parseInstruction + ] + +sc :: Parser () +sc = L.space space1 (L.skipLineComment ";") (L.skipBlockComment "/*" "*/") + +-- Main parser function +parseAssembly + :: Text + -> Either + (ParseErrorBundle Text Void) + [Statement] +parseAssembly = + parse + (sc *> some parseStatement <* eof) + "myfile" + +mainLocal :: Text -> IO (Maybe [Statement]) +mainLocal assemblyCode = do + case parseAssembly assemblyCode of + Left err -> do + putStrLn "Error: " + putStrLn $ errorBundlePretty err + pure Nothing + Right statements -> do + -- traveabse_ print statements + pure $ Just statements + +parseTest :: IO (Maybe [Statement]) +parseTest = do + mainLocal "mov ah, \"text3243242moremore\"" + +-- mainLocal "mov ah, 3243242moremore\"" \ No newline at end of file diff --git a/test/Spec.hs b/test/Spec.hs index a4ed5fa..cf0e100 100644 --- a/test/Spec.hs +++ b/test/Spec.hs @@ -15,8 +15,8 @@ import Bits.Show (showFiniteBits) import Data.Text (Text) import Data.Text qualified as T import Data.Word -import Expr import Numeric (showHex) +import Parser import Test.Tasty import Test.Tasty.HUnit import Text.Megaparsec @@ -51,7 +51,7 @@ tests = [ testGroup "Basic parsing tests" [ testGroup - "parseImmediate16" + "parseImmediate" [ testCaseNum Bin 0b1010_1010 parseImmediate8 , testCaseNum Bin 0b1010_1010_1010_1010 parseImmediate16 , testCaseNum Hex 0x2A parseImmediate8