-- ------------------------------------------------------------ module RegexParser ( parseRegex ) where import Data.Maybe import Text.ParserCombinators.Parsec import DeltaRE(Regex) import SmartConstr -- ------------------------------------------------------------ parseRegex :: String -> Either String Regex parseRegex = either (Left . show) Right . parse ( do r <- regExp eof return r ) "" -- ------------------------------------------------------------ regExp :: Parser Regex regExp = do r1 <- branch rs <- many branch1 return (foldr1 mkUnion $ r1:rs) where branch1 = do _ <- char '|' branch branch :: Parser Regex branch = do rs <- many piece return $ foldr mkSeq mkUnit rs piece :: Parser Regex piece = do r <- atom quantifier r quantifier :: Regex -> Parser Regex quantifier r = ( do _ <- char '?' return $ mkOpt r ) <|> ( do _ <- char '*' return $ mkStar r ) <|> ( do _ <- char '+' return $ mkRep 1 r ) <|> ( do _ <- char '{' res <- quantity r _ <- char '}' return res ) <|> ( return r ) quantity :: Regex -> Parser Regex quantity r = do lb <- many1 digit quantityRest r (read lb) quantityRest :: Regex -> Int -> Parser Regex quantityRest r lb = ( do _ <- char ',' ub <- many digit return ( if null ub then mkRep lb r else mkRng lb (read ub) r ) ) <|> ( return $ mkRng lb lb r) atom :: Parser Regex atom = char1 <|> charClass <|> between (char '(') (char ')') regExp char1 :: Parser Regex char1 = do c <- satisfy $ (`notElem` ".\\?*+{}()|[]") return $ mkSym c charClass :: Parser Regex charClass = charClassEsc <|> charClassExpr <|> wildCardEsc charClassEsc :: Parser Regex charClassEsc = do _ <- char '\\' singleCharEsc singleCharEsc :: Parser Regex singleCharEsc = do c <- singleCharEsc' return $ mkSym c singleCharEsc' :: Parser Char singleCharEsc' = do c <- satisfy (`elem` "nrt\\|.?*+(){}-[]^") return $ maybe c id . lookup c . zip "ntr" $ "\n\r\t" charClassExpr :: Parser Regex charClassExpr = between (char '[') (char ']') charGroup charGroup :: Parser Regex charGroup = do r <- ( negCharGroup -- a ^ at beginning denotes negation, not start of posCharGroup <|> posCharGroup ) s <- option mkZero -- charClassSub ( do _ <- char '-' charClassExpr ) return $ mkDiff r s posCharGroup :: Parser Regex posCharGroup = do rs <- many1 (charRange <|> charClassEsc) return $ foldr1 mkUnion rs charRange :: Parser Regex charRange = try seRange <|> xmlCharIncDash seRange :: Parser Regex seRange = do c1 <- charOrEsc' _ <- char '-' c2 <- charOrEsc' return $ mkSymRng c1 c2 charOrEsc' :: Parser Char charOrEsc' = satisfy (`notElem` "\\-[]") <|> singleCharEsc' xmlCharIncDash :: Parser Regex xmlCharIncDash = do c <- satisfy (`notElem` "\\[]") return $ mkSym c negCharGroup :: Parser Regex negCharGroup = do _ <- char '^' r <- posCharGroup return $ mkCompl r wildCardEsc :: Parser Regex wildCardEsc = do _ <- char '.' return $ mkDot -- mkSym (`notElem` "\n\r") -- ------------------------------------------------------------