Funktionale Programmierung: Ein- und Ausgabe |
|
1{-
2 find-grep-sed
3
4 A program for searching files (like with Unix find),
5 for seaching the contents (like with grep) and
6 for editing the contents (like wih sed)
7-}
8
9module Main
10where
11
12-- some possibly usefull stuff
13
14import Control.Monad
15 ( filterM
16 , mapM
17 , liftM
18 , liftM2
19 )
20
21import Data.List
22 ( isSuffixOf
23 , sort
24 )
25
26import Data.Maybe
27 ( isJust
28 , isNothing
29 , fromMaybe
30 )
31
32import Text.Regex
33 ( mkRegex
34 , matchRegex
35 )
36
37import System.Environment
38 ( getArgs
39 , getProgName
40 )
41
42import System.IO
43 ( openFile
44 , hGetContents
45 , hPutStr
46 , hPutStrLn
47 , hClose
48 , stdout
49 , stderr
50 , IOMode(..)
51 )
52
53import System.Directory
54 ( getCurrentDirectory
55 , getDirectoryContents
56 , doesDirectoryExist
57 )
58
59import System.Posix.Files
60 ( getFileStatus
61 , isRegularFile
62 , isDirectory
63 )
64
65-- a data type for find expressions
66
67data FindExpr
68 = Ext String -- test file extension
69 | Name String -- test file name (not path name)
70 | RE String -- test path with regular expr
71 | IsFile -- test file type
72 | IsDir
73 | HasCont (String -> Bool) -- test the contents
74 | AndExpr [FindExpr] -- combine tests with AND
75 | OrExpr [FindExpr] -- combine tests with OR
76 | NotExpr FindExpr -- negate test
77
78type FilePred = FilePath -> IO Bool
79
80-- map a find expr to an IO action working on a file
81
82fe2T :: FindExpr -> FilePred
83
84fe2T (Ext ext) f
85 = return (isSuffixOf ext f)
86
87fe2T (Name f1) f
88 = undefined -- ???
89
90fe2T (RE re) f
91 = return . isJust . matchRegex (mkRegex ("^(" ++ re ++ ")$")) $ f
92
93fe2T IsFile f -- uses functions from System.Posix.Files
94 = do
95 s <- getFileStatus f
96 return (isRegularFile s)
97
98fe2T IsDir f
99 = undefined -- ???
100
101fe2T (HasCont p) f
102 = (fe2T IsFile) `andFct` contentFind p $ f
103
104fe2T (AndExpr fl) f
105 = undefined -- a fold ???
106
107fe2T (OrExpr fl) f
108 = undefined -- ???
109
110fe2T (NotExpr e) f
111 = do
112 r <- (fe2T e) f
113 return (not r)
114
115-- ------------------------------
116
117trueFct :: FilePred
118trueFct = return . const True
119
120falseFct :: FilePred
121falseFct = return . const False
122
123andFct :: FilePred -> FilePred -> FilePred
124andFct fct1 fct2 f
125 = undefined -- monadic version of "fct1 f && fct2 f"
126
127orFct :: FilePred -> FilePred -> FilePred
128orFct fct1 fct2 f
129 = undefined -- ???
130
131-- ------------------------------
132
133contentFind :: (String -> Bool) -> FilePred
134contentFind p f
135 = do
136 hPutStrLn stderr ( "contentFind: " ++ f ) -- debug
137 h <- openFile f ReadMode
138 r <- undefined -- ???
139 hClose h
140 return r
141
142-- ------------------------------
143
144isAscii :: String -> Bool
145isAscii = undefined
146
147isLatin1 :: String -> Bool
148isLatin1 = undefined
149
150isUmlaut :: String -> Bool
151isUmlaut s = isLatin1 s && not (isAscii s)
152
153isAsciiChar :: Char -> Bool
154isAsciiChar c = (' ' <= c && c <= '~') || (c `elem` "\n\t\r")
155
156isLatin1Char :: Char -> Bool
157isLatin1Char c = isAsciiChar c || ( '\160' <= c && c <= '\255')
158
159-- ------------------------------
160
161-- find all entries in a given dir
162--
163-- if dir == "", use current dir
164--
165-- caution: getDirectoryContents delivers a list of entries
166-- containing "." and "..", which must be ignored
167
168findAllEntries :: FilePath -> IO [FilePath]
169findAllEntries dir
170 = do
171 dir' <- if null dir
172 then getCurrentDirectory
173 else return dir
174 hPutStrLn stderr ( "dir: " ++ dir' ) -- debug
175 entries <- getDirectoryContents dir'
176 -- ???
177 return undefined -- ???
178 where
179 findRec path
180 = undefined -- ???
181
182-- ------------------------------
183
184-- the main search function
185
186find :: FilePath -> FindExpr -> IO [FilePath]
187find dir test
188 = do
189 entries <- findAllEntries dir
190 return undefined -- ???
191
192-- ------------------------------
193
194-- filename manipulation
195
196joinFile :: FilePath -> FilePath -> FilePath
197joinFile "" f = f
198joinFile d "" = d
199joinFile d f = d ++ "/" ++ f
200
201basename :: FilePath -> FilePath
202basename = reverse . takeWhile (/= '/') . reverse
203
204dirname :: FilePath -> FilePath
205dirname = reverse . drop 1 . dropWhile (/= '/') . reverse
206
207extension :: FilePath -> FilePath
208extension
209 = reverse . takeWhile (/= '.') . reverse
210 . hasDot . basename
211 where
212 hasDot s
213 | all (== '.') s = ""
214 | all (/= '.') s = ""
215 | head s == '.' = hasDot (tail s)
216 | otherwise = s
217
218-- ------------------------------
219
220-- example for an edit function
221
222substUmlauts :: String -> String
223substUmlauts
224 = concatMap transUmlaut
225 where
226 transUmlaut c
227 | isAsciiChar c
228 = [c]
229 | otherwise
230 = fromMaybe [c]
231 . lookup c
232 $ umlautMap
233 umlautMap
234 = [ ('\196', "Ae")
235 , ('\214', "Oe")
236 , ('\220', "Ue")
237 , ('\223', "ss")
238 , ('\228', "ae")
239 , ('\246', "oe")
240 , ('\252', "ue")
241 ]
242
243-- ------------------------------
244
245-- actions on found files
246
247-- print
248
249printFiles :: [FilePath] -> IO ()
250printFiles = undefined -- ???
251
252-- ------------------------------
253
254-- | grep lines from file contents
255
256contentGrep :: (String -> Bool) -> FilePath -> IO ()
257contentGrep p f
258 = do
259 h <- openFile f ReadMode
260 c <- hGetContents h
261 putStr $ grep (lines c)
262 hClose h
263 where
264 grep
265 = concatMap format . filter (p . snd) . zip [(1::Int)..]
266 where
267 format (n, k) = f ++ ":" ++ show n ++ ": " ++ k ++ "\n"
268
269-- ------------------------------
270
271-- make backup file and edit the contents of a file
272
273contentEdit :: (String -> String) -> FilePath -> IO ()
274contentEdit ef f
275 = do
276 hPutStrLn stderr ( "contentEdit: " ++ f ) -- debug
277 undefined -- ???
278
279-- ------------------------------
280
281-- | find and process files
282
283processFiles :: ([FilePath] -> IO ()) ->
284 FindExpr -> FilePath -> IO ()
285processFiles action expr dir
286 = do
287 fs <- find dir expr
288 action fs
289
290findFiles :: FindExpr -> FilePath -> IO ()
291findFiles = processFiles undefined -- ???
292
293grepFiles :: (String -> Bool) -> FindExpr -> FilePath -> IO ()
294grepFiles prd = processFiles undefined -- ??? ... contentGrep ...
295
296sedFiles :: (String -> String) -> FindExpr -> FilePath -> IO ()
297sedFiles edit = processFiles undefined -- ??? ... contentEdit ...
298
299-- ------------------------------
300
301boringFiles
302 = OrExpr [ Ext "~"
303 , Ext ".bak"
304 , Ext ".old"
305 , Ext ".out"
306 , Ext ".tmp"
307 , Ext ".aux"
308 ]
309
310-- ------------------------------
311
312-- check for "none standard" file names
313
314badNames
315 = AndExpr [ NotExpr boringFiles,
316 RE ".*[^-+,._/a-zA-Z0-9].*"
317 ]
318
319-- ------------------------------
320
321binaryFiles
322 = OrExpr [ Ext ".a"
323 , Ext ".bz2"
324 , Ext ".class"
325 , Ext ".dep"
326 , Ext ".gz"
327 , Ext ".fig"
328 , Ext ".hi"
329 , Ext ".gif"
330 , Ext ".gz"
331 , Ext ".hi"
332 , Ext ".jar"
333 , Ext ".jpg"
334 , Ext ".jpeg"
335 , Ext ".nef"
336 , Ext ".o"
337 , Ext ".pdf"
338 , Ext ".pgm"
339 , Ext ".png"
340 , Ext ".ppm"
341 , Ext ".ps"
342 , Ext ".psd"
343 , Ext ".rpm"
344 , Ext ".rws"
345 , Ext ".tar"
346 , Ext ".tgz"
347 , Ext ".tif"
348 , Ext ".tiff"
349 , Ext ".xbm"
350 , Ext ".xcf"
351 , Ext ".zip"
352 ]
353
354htmlFiles
355 = OrExpr [ Ext ".htm"
356 , Ext ".html"
357 , Ext ".style"
358 , Ext ".css"
359 ]
360
361progFiles
362 = OrExpr [ Name "Makefile"
363 , Name "makefile"
364 , Ext ".c"
365 , Ext ".cc"
366 , Ext ".cgi"
367 , Ext ".cup" -- CUP input
368 , Ext ".dot" -- dot graph input
369 , Ext ".exp"
370 , Ext ".h"
371 , Ext ".hs"
372 , Ext ".java"
373 , Ext ".js"
374 , Ext ".lex"
375 , Ext ".lhs"
376 , Ext ".pl"
377 , Ext ".sh"
378 , Ext ".tcl"
379 , Ext ".y"
380 ]
381
382-- ------------------------------
383
384htmlLatin1Files
385 = AndExpr [ htmlFiles
386 , HasCont isUmlaut
387 ]
388
389noneAsciiProgFiles
390 = AndExpr [ progFiles
391 , HasCont isUmlaut
392 ]
393
394-- ------------------------------
395
396actions :: [(String, FilePath -> IO () )]
397actions
398 = [ ("-h", usage )
399 , ("-"++"-help", usage )
400
401 , ("findBadFilenames", findFiles badNames )
402 , ("findBinaryFiles", findFiles binaryFiles )
403 , ("findBoringFiles", findFiles boringFiles )
404 , ("findHtmlLatin1", findFiles htmlLatin1Files )
405 , ("findNoneAsciiProgs", findFiles noneAsciiProgFiles )
406
407 , ("grepHtmlLatin1", grepFiles isUmlaut htmlLatin1Files )
408 , ("grepNoneAsciiProgs", grepFiles isUmlaut noneAsciiProgFiles )
409
410 , ("sedNoneAsciiProgs", sedFiles substUmlauts noneAsciiProgFiles)
411 ]
412
413main :: IO ()
414main
415 = do
416 al <- getArgs
417 let fct = head . (++ ["-h"]) $ al
418 let dir = head . (++ [""]) . drop 1 $ al
419 ( fromMaybe usage . lookup fct $ actions) dir
420 return ()
421
422
423usage :: FilePath -> IO ()
424usage _dir
425 = do
426 pn <- getProgName
427 putStrLn ( "usage: " ++ pn ++ " [" ++ cmds ++ "] [dir]\n" )
428 where
429 cmds = foldl1 (\ x y -> x ++ " | " ++ y) . map fst $ actions
430
431-- ------------------------------
432
433-- tests working with current dir
434
435-- find all files with "none standard" names
436
437findBadNames = findFiles badNames "."
438
439-- find all HTML files containing none ASCII chars
440
441findHtmlLatin1 = findFiles htmlLatin1Files "."
442
443-- grep the lines of containing the none ASCII chars
444-- output in grep format to be used e.g. with emacs
445
446grepHtmlLatin1 = grepFiles isUmlaut htmlLatin1Files
447
448-- edit all none ASCII progam files for removal of umlauts
449-- Caution: make backup before running test
450
451sedNoneAsciiProgs = sedFiles substUmlauts noneAsciiProgFiles
452
453-- ------------------------------
|
|
Letzte Änderung: 27.03.2015 | © Prof. Dr. Uwe Schmidt |