{- find-grep-sed A program for searching files (like with Unix find), for seaching the contents (like with grep) and for editing the contents (like wih sed) -} module Main where -- some possibly usefull stuff import Control.Monad ( filterM , mapM , liftM , liftM2 ) import Data.List ( isSuffixOf , sort ) import Data.Maybe ( isJust , isNothing , fromMaybe ) import Text.Regex ( mkRegex , matchRegex ) import System.Environment ( getArgs , getProgName ) import System.IO ( openFile , hGetContents , hPutStr , hPutStrLn , hClose , stdout , stderr , IOMode(..) ) import System.Directory ( getCurrentDirectory , getDirectoryContents , doesDirectoryExist ) import System.Posix.Files ( getFileStatus , isRegularFile , isDirectory ) -- a data type for find expressions data FindExpr = Ext String -- test file extension | Name String -- test file name (not path name) | RE String -- test path with regular expr | IsFile -- test file type | IsDir | HasCont (String -> Bool) -- test the contents | AndExpr [FindExpr] -- combine tests with AND | OrExpr [FindExpr] -- combine tests with OR | NotExpr FindExpr -- negate test type FilePred = FilePath -> IO Bool -- map a find expr to an IO action working on a file fe2T :: FindExpr -> FilePred fe2T (Ext ext) f = return (isSuffixOf ext f) fe2T (Name f1) f = undefined -- ??? fe2T (RE re) f = return . isJust . matchRegex (mkRegex ("^(" ++ re ++ ")$")) $ f fe2T IsFile f -- uses functions from System.Posix.Files = do s <- getFileStatus f return (isRegularFile s) fe2T IsDir f = undefined -- ??? fe2T (HasCont p) f = (fe2T IsFile) `andFct` contentFind p $ f fe2T (AndExpr fl) f = undefined -- a fold ??? fe2T (OrExpr fl) f = undefined -- ??? fe2T (NotExpr e) f = do r <- (fe2T e) f return (not r) -- ------------------------------ trueFct :: FilePred trueFct = return . const True falseFct :: FilePred falseFct = return . const False andFct :: FilePred -> FilePred -> FilePred andFct fct1 fct2 f = undefined -- monadic version of "fct1 f && fct2 f" orFct :: FilePred -> FilePred -> FilePred orFct fct1 fct2 f = undefined -- ??? -- ------------------------------ contentFind :: (String -> Bool) -> FilePred contentFind p f = do hPutStrLn stderr ( "contentFind: " ++ f ) -- debug h <- openFile f ReadMode r <- undefined -- ??? hClose h return r -- ------------------------------ isAscii :: String -> Bool isAscii = undefined isLatin1 :: String -> Bool isLatin1 = undefined isUmlaut :: String -> Bool isUmlaut s = isLatin1 s && not (isAscii s) isAsciiChar :: Char -> Bool isAsciiChar c = (' ' <= c && c <= '~') || (c `elem` "\n\t\r") isLatin1Char :: Char -> Bool isLatin1Char c = isAsciiChar c || ( '\160' <= c && c <= '\255') -- ------------------------------ -- find all entries in a given dir -- -- if dir == "", use current dir -- -- caution: getDirectoryContents delivers a list of entries -- containing "." and "..", which must be ignored findAllEntries :: FilePath -> IO [FilePath] findAllEntries dir = do dir' <- if null dir then getCurrentDirectory else return dir hPutStrLn stderr ( "dir: " ++ dir' ) -- debug entries <- getDirectoryContents dir' -- ??? return undefined -- ??? where findRec path = undefined -- ??? -- ------------------------------ -- the main search function find :: FilePath -> FindExpr -> IO [FilePath] find dir test = do entries <- findAllEntries dir return undefined -- ??? -- ------------------------------ -- filename manipulation joinFile :: FilePath -> FilePath -> FilePath joinFile "" f = f joinFile d "" = d joinFile d f = d ++ "/" ++ f basename :: FilePath -> FilePath basename = reverse . takeWhile (/= '/') . reverse dirname :: FilePath -> FilePath dirname = reverse . drop 1 . dropWhile (/= '/') . reverse extension :: FilePath -> FilePath extension = reverse . takeWhile (/= '.') . reverse . hasDot . basename where hasDot s | all (== '.') s = "" | all (/= '.') s = "" | head s == '.' = hasDot (tail s) | otherwise = s -- ------------------------------ -- example for an edit function substUmlauts :: String -> String substUmlauts = concatMap transUmlaut where transUmlaut c | isAsciiChar c = [c] | otherwise = fromMaybe [c] . lookup c $ umlautMap umlautMap = [ ('\196', "Ae") , ('\214', "Oe") , ('\220', "Ue") , ('\223', "ss") , ('\228', "ae") , ('\246', "oe") , ('\252', "ue") ] -- ------------------------------ -- actions on found files -- print printFiles :: [FilePath] -> IO () printFiles = undefined -- ??? -- ------------------------------ -- | grep lines from file contents contentGrep :: (String -> Bool) -> FilePath -> IO () contentGrep p f = do h <- openFile f ReadMode c <- hGetContents h putStr $ grep (lines c) hClose h where grep = concatMap format . filter (p . snd) . zip [(1::Int)..] where format (n, k) = f ++ ":" ++ show n ++ ": " ++ k ++ "\n" -- ------------------------------ -- make backup file and edit the contents of a file contentEdit :: (String -> String) -> FilePath -> IO () contentEdit ef f = do hPutStrLn stderr ( "contentEdit: " ++ f ) -- debug undefined -- ??? -- ------------------------------ -- | find and process files processFiles :: ([FilePath] -> IO ()) -> FindExpr -> FilePath -> IO () processFiles action expr dir = do fs <- find dir expr action fs findFiles :: FindExpr -> FilePath -> IO () findFiles = processFiles undefined -- ??? grepFiles :: (String -> Bool) -> FindExpr -> FilePath -> IO () grepFiles prd = processFiles undefined -- ??? ... contentGrep ... sedFiles :: (String -> String) -> FindExpr -> FilePath -> IO () sedFiles edit = processFiles undefined -- ??? ... contentEdit ... -- ------------------------------ boringFiles = OrExpr [ Ext "~" , Ext ".bak" , Ext ".old" , Ext ".out" , Ext ".tmp" , Ext ".aux" ] -- ------------------------------ -- check for "none standard" file names badNames = AndExpr [ NotExpr boringFiles, RE ".*[^-+,._/a-zA-Z0-9].*" ] -- ------------------------------ binaryFiles = OrExpr [ Ext ".a" , Ext ".bz2" , Ext ".class" , Ext ".dep" , Ext ".gz" , Ext ".fig" , Ext ".hi" , Ext ".gif" , Ext ".gz" , Ext ".hi" , Ext ".jar" , Ext ".jpg" , Ext ".jpeg" , Ext ".nef" , Ext ".o" , Ext ".pdf" , Ext ".pgm" , Ext ".png" , Ext ".ppm" , Ext ".ps" , Ext ".psd" , Ext ".rpm" , Ext ".rws" , Ext ".tar" , Ext ".tgz" , Ext ".tif" , Ext ".tiff" , Ext ".xbm" , Ext ".xcf" , Ext ".zip" ] htmlFiles = OrExpr [ Ext ".htm" , Ext ".html" , Ext ".style" , Ext ".css" ] progFiles = OrExpr [ Name "Makefile" , Name "makefile" , Ext ".c" , Ext ".cc" , Ext ".cgi" , Ext ".cup" -- CUP input , Ext ".dot" -- dot graph input , Ext ".exp" , Ext ".h" , Ext ".hs" , Ext ".java" , Ext ".js" , Ext ".lex" , Ext ".lhs" , Ext ".pl" , Ext ".sh" , Ext ".tcl" , Ext ".y" ] -- ------------------------------ htmlLatin1Files = AndExpr [ htmlFiles , HasCont isUmlaut ] noneAsciiProgFiles = AndExpr [ progFiles , HasCont isUmlaut ] -- ------------------------------ actions :: [(String, FilePath -> IO () )] actions = [ ("-h", usage ) , ("-"++"-help", usage ) , ("findBadFilenames", findFiles badNames ) , ("findBinaryFiles", findFiles binaryFiles ) , ("findBoringFiles", findFiles boringFiles ) , ("findHtmlLatin1", findFiles htmlLatin1Files ) , ("findNoneAsciiProgs", findFiles noneAsciiProgFiles ) , ("grepHtmlLatin1", grepFiles isUmlaut htmlLatin1Files ) , ("grepNoneAsciiProgs", grepFiles isUmlaut noneAsciiProgFiles ) , ("sedNoneAsciiProgs", sedFiles substUmlauts noneAsciiProgFiles) ] main :: IO () main = do al <- getArgs let fct = head . (++ ["-h"]) $ al let dir = head . (++ [""]) . drop 1 $ al ( fromMaybe usage . lookup fct $ actions) dir return () usage :: FilePath -> IO () usage _dir = do pn <- getProgName putStrLn ( "usage: " ++ pn ++ " [" ++ cmds ++ "] [dir]\n" ) where cmds = foldl1 (\ x y -> x ++ " | " ++ y) . map fst $ actions -- ------------------------------ -- tests working with current dir -- find all files with "none standard" names findBadNames = findFiles badNames "." -- find all HTML files containing none ASCII chars findHtmlLatin1 = findFiles htmlLatin1Files "." -- grep the lines of containing the none ASCII chars -- output in grep format to be used e.g. with emacs grepHtmlLatin1 = grepFiles isUmlaut htmlLatin1Files -- edit all none ASCII progam files for removal of umlauts -- Caution: make backup before running test sedNoneAsciiProgs = sedFiles substUmlauts noneAsciiProgFiles -- ------------------------------