Compilerbau: Lexikalische Analyse |
|
1module PPL.Symbol where
2
3data Token
4 = Eof
5 | IdentSy -- e.g. i1
6 | StringConst -- e.g. "hahahahah\"jajaj"
7 | BoolConst -- false or true
8 | IntConst -- e.g. 123
9 | FloatConst -- e.g. 123.456, 123., .456
10 | AndSy -- and
11 | OrSy -- or
12 | XorSy -- xor
13 | NotSy -- not
14 | BeginSy -- begin
15 | DivSy -- div
16 | ModSy -- mod
17 | MinSy -- min
18 | MaxSy -- max
19 | IfSy -- if
20 | ThenSy -- then
21 | ElseSy -- else
22 | ElseIfSy -- elseif
23 | WhileSy -- while
24 | DoSy -- do
25 | RepeatSy -- repeat
26 | UntilSy -- until
27 | EndSy -- end
28 | EndIfSy -- endif
29 | EndWhileSy -- endwhile
30 | ReturnSy -- return
31 | OfSy -- of
32 | VarSy -- var
33 | FunctionSy -- function
34 | ProcedureSy -- procedure
35 | IntSy -- int
36 | FloatSy -- real
37 | BoolSy -- boolean
38 | StringSy -- string
39 | PicSy -- picture
40 | ListSy -- list
41 | LPar -- (
42 | RPar -- )
43 | LBr -- [
44 | RBr -- ]
45 | Assign -- :=
46 | Colon -- :
47 | Semicolon -- ;
48 | Comma -- ,
49 | Dot -- .
50 | PlusOp -- +
51 | MinusOp -- -
52 | MultOp -- *
53 | DivOp -- /
54 | EqOp -- =
55 | NeOp -- /=
56 | GeOp -- >=
57 | GrOp -- >
58 | LeOp -- <=
59 | LtOp -- <
60 | EquivOp -- <=>
61 | ImplOp -- =>
62 | Illegal -- everything else
63 deriving Show
64
65type Symbol = (Token, String)
|
1begin
2 var
3 i,j : int := 7, 3;
4
5 -- if statement
6
7 if i < j
8 then
9 i, j := j, i
10 elseif i = j
11 then
12 i, j := i - 1, j + 1
13 else
14 i := i+1
15 endif;
16
17 -- while loop
18
19 while i < j
20 do
21 i, j := i + 1, j - 1
22 endwhile;
23
24 -- repeat loop
25
26 repeat
27 i := i + 1
28 until i > j;
29
30 -- block
31
32 begin
33 var
34 k : int := i;
35 var
36 j : int := k;
37
38 j := i;
39 k := j
40
41 end
42
43end
|
1BeginSy "begin"
2VarSy "var"
3IdentSy "i"
4Comma ","
5IdentSy "j"
6Colon ":"
7IntSy "int"
8Assign ":="
9IntConst "7"
10Comma ","
11IntConst "3"
12Semicolon ";"
13IfSy "if"
14IdentSy "i"
15LtOp "<"
16IdentSy "j"
17ThenSy "then"
18IdentSy "i"
19Comma ","
20IdentSy "j"
21Assign ":="
22IdentSy "j"
23Comma ","
24IdentSy "i"
25ElseIfSy "elseif"
26IdentSy "i"
27EqOp "="
28IdentSy "j"
29ThenSy "then"
30IdentSy "i"
31Comma ","
32IdentSy "j"
33Assign ":="
34IdentSy "i"
35MinusOp "-"
36IntConst "1"
37Comma ","
38IdentSy "j"
39PlusOp "+"
40IntConst "1"
41ElseSy "else"
42IdentSy "i"
43Assign ":="
44IdentSy "i"
45PlusOp "+"
46IntConst "1"
47EndIfSy "endif"
48Semicolon ";"
49WhileSy "while"
50IdentSy "i"
51LtOp "<"
52IdentSy "j"
53DoSy "do"
54IdentSy "i"
55Comma ","
56IdentSy "j"
57Assign ":="
58IdentSy "i"
59PlusOp "+"
60IntConst "1"
61Comma ","
62IdentSy "j"
63MinusOp "-"
64IntConst "1"
65EndWhileSy "endwhile"
66Semicolon ";"
67RepeatSy "repeat"
68IdentSy "i"
69Assign ":="
70IdentSy "i"
71PlusOp "+"
72IntConst "1"
73UntilSy "until"
74IdentSy "i"
75GrOp ">"
76IdentSy "j"
77Semicolon ";"
78BeginSy "begin"
79VarSy "var"
80IdentSy "k"
81Colon ":"
82IntSy "int"
83Assign ":="
84IdentSy "i"
85Semicolon ";"
86VarSy "var"
87IdentSy "j"
88Colon ":"
89IntSy "int"
90Assign ":="
91IdentSy "k"
92Semicolon ";"
93IdentSy "j"
94Assign ":="
95IdentSy "i"
96Semicolon ";"
97IdentSy "k"
98Assign ":="
99IdentSy "j"
100EndSy "end"
101EndSy "end"
102Eof "<end of file>"
|
1module PPL.Lexer where
2
3import PPL.Symbol
4
5import Data.Char
6import Data.List
7
8lexer :: String -> [Symbol]
9
10lexer []
11 = [(Eof, "<end of file>")]
12
13lexer cs@(c:cs1)
14 | isSpace c = lexer cs1 -- separators
15 | isAlpha c = lexId cs -- ids or keywords
16 | isDigit c = lexNum cs -- numbers
17
18lexer ('\"':cs)
19 = lexString "" cs
20
21lexer ('-':'-':cs) -- comments like this one
22 = lexComment cs
23
24lexer cs -- symbols
25 | found = (tok, cs1)
26 : lexer (drop (length cs1) cs)
27 where
28 sys = filter (\ (ts,_) ->
29 isPrefixOf ts cs
30 ) symbols
31 found = not (null sys)
32 sy = head sys
33 (cs1, tok) = sy
34 symbols = [ (":=", Assign)
35 , (":", Colon)
36 , (",", Comma)
37 , ("(", LPar)
38 , (")", RPar)
39 , ("[", LBr)
40 , ("]", RBr)
41 , (";", Semicolon)
42 , (".", Dot)
43 , ("+", PlusOp)
44 , ("-", MinusOp)
45 , ("*", MultOp)
46 , ("=>", ImplOp)
47 , ("=", EqOp)
48 , ("/=", NeOp)
49 , ("/", DivOp)
50 , (">=", GeOp)
51 , (">", GrOp)
52 , ("<=>", EquivOp)
53 , ("<=", LeOp)
54 , ("<", LtOp)
55 ]
56
57lexer (c:_cs)
58 = error ("illegal character: " ++ [c])
59
60-- --------------------
61
62lexNum :: String -> [(Token, String)]
63
64lexNum cs
65 = lexNum1 ns cs1
66 where
67 (ns, cs1) = span isDigit cs
68
69-- --------------------
70
71lexNum1 :: String -> String -> [(Token, String)]
72
73lexNum1 ns ('.':cs1)
74 | hasFraction
75 = lexNum2 (ns ++ "." ++ fraction) cs2
76 where
77 (fraction, cs2) = span isDigit cs1
78 hasFraction = not (null fraction)
79
80lexNum1 ns cs
81 = (IntConst, ns) : lexer cs
82
83-- --------------------
84
85lexNum2 :: String -> String -> [(Token, String)]
86
87lexNum2 ns (c1:c2:cs2@(c3:_))
88 | c1 `elem` "Ee"
89 &&
90 c2 `elem` "+-"
91 &&
92 isDigit c3
93 = (FloatConst, ns ++ [c1] ++ [c2] ++ exp2)
94 : lexer rest2
95 where
96 (exp2, rest2) = span isDigit cs2
97
98lexNum2 ns (c1:cs1@(c2:_))
99 | c1 `elem` "Ee"
100 &&
101 isDigit c2
102 = (FloatConst, ns ++ [c1] ++ "+" ++ exp1)
103 : lexer rest1
104 where
105 (exp1, rest1) = span isDigit cs1
106
107lexNum2 ns cs
108 = (FloatConst, ns) : lexer cs
109
110-- --------------------
111
112lexId :: String -> [(Token, String)]
113
114lexId cs
115 = (token, ident) : lexer cs1
116 where
117 (ident, cs1) = span isAlphaNum cs
118 token = keyword (lookup ident keywords)
119 keyword (Just kw) = kw
120 keyword Nothing = IdentSy
121 keywords = [ ("and", AndSy)
122 , ("begin", BeginSy)
123 , ("boolean", BoolSy)
124 , ("div", DivSy)
125 , ("do", DoSy)
126 , ("else", ElseSy)
127 , ("elseif", ElseIfSy)
128 , ("end", EndSy)
129 , ("endif", EndIfSy)
130 , ("endwhile",EndWhileSy)
131 , ("return", ReturnSy)
132 , ("false", BoolConst)
133 , ("float", FloatSy)
134 , ("if", IfSy)
135 , ("int", IntSy)
136 , ("list", ListSy)
137 , ("max", MaxSy)
138 , ("min", MinSy)
139 , ("mod", ModSy)
140 , ("not", NotSy)
141 , ("of", OfSy)
142 , ("or", OrSy)
143 , ("picture", PicSy)
144 , ("repeat", RepeatSy)
145 , ("string", StringSy)
146 , ("then", ThenSy)
147 , ("true", BoolConst)
148 , ("until", UntilSy)
149 , ("var", VarSy)
150 , ("function",FunctionSy)
151 , ("procedure", ProcedureSy)
152 , ("while", WhileSy)
153 , ("xor", XorSy)
154 ]
155
156-- --------------------
157
158lexComment :: String -> [Symbol]
159
160lexComment ('\n':cs)
161 = lexer cs -- end of line
162 -- => end of comment
163lexComment (_:cs)
164 = lexComment cs
165
166lexComment []
167 = lexer [] -- eof in comment
168
169-- --------------------
170
171lexString :: String -> String -> [(Token, String)]
172 -- end of string const
173lexString ss ('\"':cs)
174 = (StringConst, reverse ss) : lexer cs
175
176lexString ss ('\\':c:cs) -- quoted char
177 = lexString ((decodeQuoteChar c):ss) cs
178
179lexString ss (c:cs) -- unquoted char
180 = lexString (c:ss) cs
181
182 -- end of file in string const
183lexString _ []
184 = error "end of input in string constant"
185
186-- --------------------
187
188decodeQuoteChar :: Char -> Char
189
190decodeQuoteChar 'n' = '\n'
191decodeQuoteChar 't' = '\t'
192decodeQuoteChar 'r' = '\r'
193decodeQuoteChar 'b' = '\b'
194decodeQuoteChar ch = ch
195
196showLex :: [Symbol] -> String
197showLex
198 = concat . map showToc
199 where
200 showToc (tok, txt)
201 = take 16 (show tok ++ replicate 16 ' ') ++
202 show txt ++ "\n"
|
Letzte Änderung: 14.02.2012 | © Prof. Dr. Uwe Schmidt |