This commit is contained in:
Edgar 2023-12-14 10:25:56 +01:00
commit d9281843f2
No known key found for this signature in database
GPG Key ID: 70ADAE8F35904387
11 changed files with 2183 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/target

1870
Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

21
Cargo.toml Normal file
View File

@ -0,0 +1,21 @@
[package]
name = "pascal-mlir"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
melior = { version = "0.14.0", features = ["ods-dialects"] }
clap = { version = "4.3.3", features = ["derive"] }
color-eyre = "0.6.2"
itertools = "0.12"
lalrpop-util = { version = "0.20.0", features = ["lexer"] }
regex = "1.9"
tracing = "0.1.37"
tracing-subscriber = { version = "0.3.17", features = ["env-filter"] }
annotate-snippets = { version = "0.9.1", features = ["color"] }
logos = "0.13.0"
[build-dependencies]
lalrpop = "0.20.0"

4
README.md Normal file
View File

@ -0,0 +1,4 @@
https://www.cs.utexas.edu/users/novak/iso7185.pdf
https://lalrpop.github.io/lalrpop/lexer_tutorial/004_token_references.html
im at 6.1.7

3
build.rs Normal file
View File

@ -0,0 +1,3 @@
fn main() {
lalrpop::process_root().unwrap();
}

6
programs/first.pas Normal file
View File

@ -0,0 +1,6 @@
program learn_pascal;
const
PI = 3.141592654;
GNU = 'GNU''s Not Unix';

20
src/ast.rs Normal file
View File

@ -0,0 +1,20 @@
pub enum Number<'a> {
Integer(&'a str),
Real(&'a str)
}
pub enum Constant<'a> {
Identifier {
is_negative: bool,
ident: &'a str
},
Number(Number<'a>),
String(&'a str),
}
pub struct ConstantDef<'a> {
pub ident: &'a str,
pub value: Constant<'a>
}

49
src/grammar.lalrpop Normal file
View File

@ -0,0 +1,49 @@
use crate::{
ast,
tokens::Token,
lexer::LexicalError,
};
grammar<'input>(input: &'input str);
extern {
type Location = usize;
type Error = LexicalError;
enum Token<'input> {
"program" => Token::WordProgram,
"identifier" => Token::Identifier(<&'input str>),
"integer" => Token::Integer(<&'input str>),
"real" => Token::Real(<&'input str>),
"string" => Token::String(<&'input str>),
"-" => Token::SpecialMinus,
"+" => Token::SpecialPlus,
}
}
Comma<T>: Vec<T> = {
<mut v:(<T> ",")*> <e:T?> => match e {
None => v,
Some(e) => {
v.push(e);
v
}
}
};
pub Hello: String = {
"program" => "let".to_string()
}
Number: ast::Number<'input> = {
<"integer"> => ast::Number::Integer(<>),
<"real"> => ast::Number::Real(<>),
}
Constant: ast::Constant<'input> = {
<Number> => ast::Constant::Number(<>),
<"string"> => ast::Constant::String(<>),
"+"? <ident:"identifier"> => ast::Constant::Identifier { is_negative: false, ident },
"-" <ident:"identifier"> => ast::Constant::Identifier { is_negative: true, ident },
}

47
src/lexer.rs Normal file
View File

@ -0,0 +1,47 @@
use std::{fmt::Display, ops::Range};
use logos::{Logos, SpannedIter};
use crate::tokens::{LexingError, Token};
pub type Spanned<Tok, Loc, Error> = Result<(Loc, Tok, Loc), Error>;
#[derive(Debug, Clone)]
pub enum LexicalError {
InvalidToken(LexingError, Range<usize>),
}
impl Display for LexicalError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
LexicalError::InvalidToken(err, span) => {
write!(f, "lexical error at ({:?}): {:?}", err, span)
}
}
}
}
pub struct Lexer<'input> {
// instead of an iterator over characters, we have a token iterator
token_stream: SpannedIter<'input, Token<'input>>,
}
impl<'input> Lexer<'input> {
pub fn new(input: &'input str) -> Self {
// the Token::lexer() method is provided by the Logos trait
Self {
token_stream: Token::lexer(input).spanned(),
}
}
}
impl<'input> Iterator for Lexer<'input> {
type Item = Spanned<Token<'input>, usize, LexicalError>;
fn next(&mut self) -> Option<Self::Item> {
self.token_stream.next().map(|(token, span)| match token {
Ok(token) => Ok((span.start, token, span.end)),
Err(err) => Err(LexicalError::InvalidToken(err, span)),
})
}
}

11
src/main.rs Normal file
View File

@ -0,0 +1,11 @@
use lalrpop_util::lalrpop_mod;
lalrpop_mod!(pub grammar);
pub mod lexer;
pub mod tokens;
pub mod ast;
fn main() {
println!("Hello, world!");
}

151
src/tokens.rs Normal file
View File

@ -0,0 +1,151 @@
use logos::Logos;
use std::convert::Infallible;
// https://github.com/maciejhirsz/logos/issues/133
#[derive(Debug, PartialEq, Clone, Default)]
pub enum LexingError {
NumberParseError,
#[default]
Other,
}
impl From<std::num::ParseIntError> for LexingError {
fn from(_: std::num::ParseIntError) -> Self {
LexingError::NumberParseError
}
}
impl From<Infallible> for LexingError {
fn from(_: Infallible) -> Self {
LexingError::Other
}
}
#[derive(Logos, Debug, PartialEq, Clone)]
#[logos(error = LexingError, skip r"[ \t\n\f]+", skip r"//.*\n?", skip r"\{[^}]*\}" skip r"\(\*(.|[\r\n])*?\*\)")]
pub enum Token<'input> {
#[regex(r"[a-zA-Z][a-zA-Z\d]*")]
Identifier(&'input str), // also directive
#[regex(r"[+-]?[0-9][0-9]*")]
Integer(&'input str),
#[regex(r"[+-]?[0-9][0-9]*\.[0-9][0-9]*([eE][+-]?[0-9][0-9]*)?")]
#[regex(r"[+-]?[0-9][0-9]*[eE][+-]?[0-9][0-9]*")]
Real(&'input str),
#[regex(r#""(?:[^"]|\\")*""#)]
String(&'input str),
// special symbols
#[token("+")]
SpecialPlus,
#[token("-")]
SpecialMinus,
#[token("*")]
SpecialMul,
#[token("/")]
SpecialDiv,
#[token("=")]
SpecialEqual,
#[token("<")]
SpecialLower,
#[token(">")]
SpecialGreater,
#[token("[")]
SpecialOpenBracket,
#[token("]")]
SpecialCloseBracket,
#[token(".")]
SpecialDot,
#[token(",")]
SpecialComma,
#[token(";")]
SpecialDotComma,
#[token("\"")]
SpecialQuotation,
#[token("(")]
SpecialOpenParen,
#[token(")")]
SpecialCloseParen,
#[token("<>")]
SpecialSpaceship,
#[token("<=")]
SpecialLessEqual,
#[token(">=")]
SpecialGreaterEqual,
#[token(":=")]
SpecialAssign,
#[token("..")]
SpecialRange,
// special symbols - word symbols
#[token("and")]
WordAnd,
#[token("array")]
WordArray,
#[token("begin")]
WordBegin,
#[token("case")]
WordCase,
#[token("const")]
WordConst,
#[token("div")]
WordDiv,
#[token("do")]
WordDo,
#[token("downto")]
WordDownto,
#[token("else")]
WordElse,
#[token("end")]
WordEnd,
#[token("file")]
WordFile,
#[token("for")]
WordFor,
#[token("function")]
WordFunction,
#[token("goto")]
WordGoto,
#[token("if")]
WordIf,
#[token("in")]
WordIn,
#[token("label")]
WordLabel,
#[token("mod")]
WordMod,
#[token("nil")]
WordNil,
#[token("not")]
WordNot,
#[token("of")]
WordOf,
#[token("or")]
WordOr,
#[token("packed")]
WordPacked,
#[token("procedure")]
WordProcedure,
#[token("program")]
WordProgram,
#[token("record")]
WordRecord,
#[token("repeat")]
WordRepeat,
#[token("set")]
WordSet,
#[token("then")]
WordThen,
#[token("to")]
WordTo,
#[token("type")]
WordType,
#[token("until")]
WordUntil,
#[token("var")]
WordVar,
#[token("while")]
WordWhile,
#[token("with")]
WordWith,
}