module RegexParser
( parseRegex )
where
import Data.Maybe
import Text.ParserCombinators.Parsec
import DeltaRE(Regex)
import SmartConstr
parseRegex :: String -> Either String Regex
parseRegex
= either (Left . show) Right
.
parse ( do
r <- regExp
eof
return r
) ""
regExp :: Parser Regex
regExp
= do
r1 <- branch
rs <- many branch1
return (foldr1 mkUnion $ r1:rs)
where
branch1
= do
_ <- char '|'
branch
branch :: Parser Regex
branch
= do
rs <- many piece
return $ foldr mkSeq mkUnit rs
piece :: Parser Regex
piece
= do
r <- atom
quantifier r
quantifier :: Regex -> Parser Regex
quantifier r
= ( do
_ <- char '?'
return $ mkOpt r )
<|>
( do
_ <- char '*'
return $ mkStar r )
<|>
( do
_ <- char '+'
return $ mkRep 1 r )
<|>
( do
_ <- char '{'
res <- quantity r
_ <- char '}'
return res
)
<|>
( return r )
quantity :: Regex -> Parser Regex
quantity r
= do
lb <- many1 digit
quantityRest r (read lb)
quantityRest :: Regex -> Int -> Parser Regex
quantityRest r lb
= ( do
_ <- char ','
ub <- many digit
return ( if null ub
then mkRep lb r
else mkRng lb (read ub) r
)
)
<|>
( return $ mkRng lb lb r)
atom :: Parser Regex
atom
= char1
<|>
charClass
<|>
between (char '(') (char ')') regExp
char1 :: Parser Regex
char1
= do
c <- satisfy $ (`notElem` ".\\?*+{}()|[]")
return $ mkSym c
charClass :: Parser Regex
charClass
= charClassEsc
<|>
charClassExpr
<|>
wildCardEsc
charClassEsc :: Parser Regex
charClassEsc
= do
_ <- char '\\'
singleCharEsc
singleCharEsc :: Parser Regex
singleCharEsc
= do
c <- singleCharEsc'
return $ mkSym c
singleCharEsc' :: Parser Char
singleCharEsc'
= do
c <- satisfy (`elem` "nrt\\|.?*+(){}-[]^")
return $ maybe c id . lookup c . zip "ntr" $ "\n\r\t"
charClassExpr :: Parser Regex
charClassExpr
= between (char '[') (char ']') charGroup
charGroup :: Parser Regex
charGroup
= do
r <- ( negCharGroup
<|>
posCharGroup
)
s <- option mkZero
( do
_ <- char '-'
charClassExpr
)
return $ mkDiff r s
posCharGroup :: Parser Regex
posCharGroup
= do
rs <- many1 (charRange <|> charClassEsc)
return $ foldr1 mkUnion rs
charRange :: Parser Regex
charRange
= try seRange
<|>
xmlCharIncDash
seRange :: Parser Regex
seRange
= do
c1 <- charOrEsc'
_ <- char '-'
c2 <- charOrEsc'
return $ mkSymRng c1 c2
charOrEsc' :: Parser Char
charOrEsc'
= satisfy (`notElem` "\\-[]")
<|>
singleCharEsc'
xmlCharIncDash :: Parser Regex
xmlCharIncDash
= do
c <- satisfy (`notElem` "\\[]")
return $ mkSym c
negCharGroup :: Parser Regex
negCharGroup
= do
_ <- char '^'
r <- posCharGroup
return $ mkCompl r
wildCardEsc :: Parser Regex
wildCardEsc
= do
_ <- char '.'
return $ mkDot