Compilerbau: Deterministische endliche Automaten |
|
1-- running a deterministic finite automaton
2-- for scanning an input sequence and computing
3-- the token sequence
4
5module DFA
6 ( I,
7 Alphabet,
8 Q,
9 SetOfQ,
10 Delta,
11 Input,
12 Token,
13 Tokens,
14 DFA,
15
16 run,
17 accept,
18 showTokens
19 )
20where
21
22import Data.Maybe
23
24-- automaton types
25
26type I = Char
27 -- input symbols are Char's
28
29type Alphabet = [I]
30 -- input alphabet is a subset of the Char type
31
32type Q = Int
33 -- states are represented as Int's
34
35type SetOfQ = [Q]
36 -- state sets are represented as lists
37
38type Delta = Q -> I -> Maybe Q
39 -- transition table as function with Maybe result
40 -- to represent undefined as Nothing
41
42type Input = [I]
43 -- sequence of input symbols
44
45type Token = [I]
46 -- one token, one output symbol
47
48type Tokens = ([(Q, Token)], Input)
49 -- sequence of pairs of final state number
50 -- accepted token
51 -- and rest of input, if input can't be parsed
52
53type DFA = (SetOfQ, Alphabet, Q, SetOfQ, Delta)
54 -- the complete automaton, consisting of start state,
55 -- set of final states and transition function
56
57-- local data types
58
59type DFAState = (Q, Token, Input)
60 -- state of running automaton
61 -- consisting of current state q,
62 -- the already parsed char sequence
63 -- and the remaining input
64
65
66-- run an automaton on an input string
67
68run :: DFA -> Input -> Tokens
69
70run (_allStates, _allSymbols, start, finalStates, delta) input
71 | null input
72 &&
73 isFinalState start
74 = ([(start, "")], "")
75 | otherwise
76 = loop input
77
78 where
79 -- the main loop
80
81 loop :: Input -> Tokens
82 loop inp
83 | null s
84 = ([], inp)
85 | null inp'
86 = ([(q, s)], "")
87 | otherwise
88 = let
89 (ts, rest) = loop inp'
90 in
91 ((q, s) : ts, rest)
92 where
93 init = (start, "", inp)
94 (q, s, inp') = symbol init init
95
96 isFinalState :: Q -> Bool
97 isFinalState q = q `elem` finalStates
98
99 -- scan one symbol
100
101 symbol :: DFAState -> DFAState -> DFAState
102
103 symbol lastFinalState currState@(q, s, i)
104 -- success: token recognized
105 | isFinalState q && longestMatch
106 = currState
107
108 -- token may still be longer
109 | isFinalState q && not longestMatch
110 = symbol currState nextState
111
112 -- failure: restore last possible token
113 | not (isFinalState q) && longestMatch
114 = lastFinalState
115
116 -- token not yet complete
117 | not (isFinalState q) && not longestMatch
118 = symbol lastFinalState nextState
119
120 where
121 -- EOF or delta undefined
122 longestMatch = null i
123 ||
124 isNothing (delta q nextChar)
125
126 nextChar = head i
127 -- compute next state
128 -- and read next input char
129
130 nextState = ( fromJust (delta q nextChar)
131 , s ++ [nextChar]
132 , tail i
133 )
134
135
136-- word test
137
138accept :: DFA -> Input -> Bool
139accept a
140 = oneSymbol . run a
141 where
142 oneSymbol ([_], "") = True
143 oneSymbol _ = False
144
145-- format result
146
147showTokens :: Tokens -> String
148showTokens (ts, rest)
149 = concatMap showToken ts
150 ++
151 showRest rest
152 where
153 showToken (q, s)
154 = showState q ++ show s ++ "\n"
155 showState q'
156 = take 4 (show q' ++ repeat ' ') ++ ": "
157 showRest ""
158 = ""
159 showRest r
160 = "input not accepted: " ++ show r ++ "\n"
161
162
|
1dfa1 :: DFA
2dfa1
3 = (states, alphabet, q0, f, delta)
4 where
5 states = [1..12]
6 alphabet = "\n\t" ++ [' '..'~']
7 q0 = 1
8 f = [2,3,4,5,6,7,8,10,11,12]
9
10 delta 1 c
11 | c == 'i' = Just 2
12 | c `elem` ['a'..'h']
13 ||
14 c `elem` ['j'..'z'] = Just 4
15 | c == '.' = Just 5
16 | c `elem` ['0'..'9'] = Just 7
17 | c == '-' = Just 8
18 | c `elem` " \t\n" = Just 11
19 | otherwise = Just 12
20
21 delta 2 c
22 | c == 'f' = Just 3
23 | c `elem` ['a'..'e']
24 ||
25 c `elem` ['g'..'z']
26 ||
27 c `elem` ['0'..'9'] = Just 4
28
29 delta 3 c
30 | c `elem` ['a'..'z']
31 ||
32 c `elem` ['0'..'9'] = Just 4
33
34 delta 4 c
35 | c `elem` ['a'..'z']
36 ||
37 c `elem` ['0'..'9'] = Just 4
38
39 delta 5 c
40 | c `elem` ['0'..'9'] = Just 6
41
42 delta 6 c
43 | c `elem` ['0'..'9'] = Just 6
44
45 delta 7 c
46 | c `elem` ['0'..'9'] = Just 7
47 | c == '.' = Just 6
48
49 delta 8 c
50 | c == '-' = Just 9
51
52 delta 9 c
53 | c `elem` ['a'..'z'] = Just 9
54 | c == '\n' = Just 10
55
56 delta 11 c
57 | c `elem` " \t\n" = Just 11
58
59 delta _ _ = Nothing
|
|
1dfa2 :: DFA
2dfa2
3 = (states, alphabet, q0, f, delta)
4 where
5 states = [1..4]
6 alphabet = "ab"
7 q0 = 1
8 f = [2,4]
9 delta 1 'a' = Just 2
10 delta 1 'b' = Just 4
11 delta 2 'a' = Just 3
12 delta 3 'a' = Just 3
13 delta 3 'b' = Just 4
14 delta _ _ = Nothing
|
|
Letzte Änderung: 11.11.2017 | © Prof. Dr. Uwe Schmidt |