{-# LANGUAGE FlexibleContexts #-}
module Text.Regex.XMLSchema.Generic.RegexParser
( parseRegex
, parseRegexExt
, parseRegex'
, parseRegexExt'
, parseContextRegex
)
where
import Data.Char.Properties.UnicodeBlocks
import Data.Char.Properties.UnicodeCharProps
import Data.Char.Properties.XMLCharProps
import Data.List (isPrefixOf,
isSuffixOf)
import Data.Maybe
import Data.Set.CharSet
import Text.ParserCombinators.Parsec
import Text.Regex.XMLSchema.Generic.Regex
import Text.Regex.XMLSchema.Generic.StringLike
parseRegex :: StringLike s => s -> GenRegex s
parseRegex :: forall s. StringLike s => s -> GenRegex s
parseRegex = String -> GenRegex s
forall s. StringLike s => String -> GenRegex s
parseRegex' (String -> GenRegex s) -> (s -> String) -> s -> GenRegex s
forall b c a. (b -> c) -> (a -> b) -> a -> c
. s -> String
forall a. StringLike a => a -> String
toString
parseRegex' :: StringLike s => String -> GenRegex s
parseRegex' :: forall s. StringLike s => String -> GenRegex s
parseRegex' = Parser (GenRegex s) -> String -> GenRegex s
forall s.
StringLike s =>
Parser (GenRegex s) -> String -> GenRegex s
parseRegex'' Parser (GenRegex s)
forall s. StringLike s => Parser (GenRegex s)
regExpStd
parseRegexExt :: StringLike s => s -> GenRegex s
parseRegexExt :: forall s. StringLike s => s -> GenRegex s
parseRegexExt = String -> GenRegex s
forall s. StringLike s => String -> GenRegex s
parseRegexExt' (String -> GenRegex s) -> (s -> String) -> s -> GenRegex s
forall b c a. (b -> c) -> (a -> b) -> a -> c
. s -> String
forall a. StringLike a => a -> String
toString
parseRegexExt' :: StringLike s => String -> GenRegex s
parseRegexExt' :: forall s. StringLike s => String -> GenRegex s
parseRegexExt' = Parser (GenRegex s) -> String -> GenRegex s
forall s.
StringLike s =>
Parser (GenRegex s) -> String -> GenRegex s
parseRegex'' Parser (GenRegex s)
forall s. StringLike s => Parser (GenRegex s)
regExpExt
parseRegex'' :: StringLike s => Parser (GenRegex s) -> String -> GenRegex s
parseRegex'' :: forall s.
StringLike s =>
Parser (GenRegex s) -> String -> GenRegex s
parseRegex'' Parser (GenRegex s)
regExp'
= (ParseError -> GenRegex s)
-> (GenRegex s -> GenRegex s)
-> Either ParseError (GenRegex s)
-> GenRegex s
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (String -> GenRegex s
forall s. StringLike s => String -> GenRegex s
mkZero' (String -> GenRegex s)
-> (ParseError -> String) -> ParseError -> GenRegex s
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (String
"syntax error: " String -> String -> String
forall a. [a] -> [a] -> [a]
++) (String -> String)
-> (ParseError -> String) -> ParseError -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ParseError -> String
forall a. Show a => a -> String
show) GenRegex s -> GenRegex s
forall a. a -> a
id
(Either ParseError (GenRegex s) -> GenRegex s)
-> (String -> Either ParseError (GenRegex s))
-> String
-> GenRegex s
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Parser (GenRegex s)
-> String -> String -> Either ParseError (GenRegex s)
forall s t a.
Stream s Identity t =>
Parsec s () a -> String -> s -> Either ParseError a
parse ( do
r <- Parser (GenRegex s)
regExp'
eof
return r
) String
""
parseContextRegex :: StringLike s => (String -> GenRegex s) -> s -> GenRegex s
parseContextRegex :: forall s. StringLike s => (String -> GenRegex s) -> s -> GenRegex s
parseContextRegex String -> GenRegex s
parseRe s
re0
= GenRegex s
re'
where
parseAW :: GenRegex s
parseAW = String -> GenRegex s
forall s. StringLike s => String -> GenRegex s
parseRegexExt' String
"(\\A\\W)?"
parseWA :: GenRegex s
parseWA = String -> GenRegex s
forall s. StringLike s => String -> GenRegex s
parseRegexExt' String
"(\\W\\A)?"
re :: String
re = s -> String
forall a. StringLike a => a -> String
toString s
re0
re' :: GenRegex s
re' = [GenRegex s] -> GenRegex s
forall s. [GenRegex s] -> GenRegex s
mkSeqs ([GenRegex s] -> GenRegex s)
-> ([[GenRegex s]] -> [GenRegex s]) -> [[GenRegex s]] -> GenRegex s
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [[GenRegex s]] -> [GenRegex s]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([[GenRegex s]] -> GenRegex s) -> [[GenRegex s]] -> GenRegex s
forall a b. (a -> b) -> a -> b
$ [ [GenRegex s]
startContext
, (GenRegex s -> [GenRegex s] -> [GenRegex s]
forall a. a -> [a] -> [a]
:[]) (GenRegex s -> [GenRegex s])
-> (String -> GenRegex s) -> String -> [GenRegex s]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> GenRegex s
parseRe (String -> [GenRegex s]) -> String -> [GenRegex s]
forall a b. (a -> b) -> a -> b
$ String
re2
, [GenRegex s]
endContext
]
([GenRegex s]
startContext, String
re1)
| String
"^" String -> String -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`isPrefixOf` String
re = ([], String -> String
forall a. HasCallStack => [a] -> [a]
tail String
re)
| String
"\\<" String -> String -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`isPrefixOf` String
re = ([GenRegex s
parseAW], Int -> String -> String
forall a. Int -> [a] -> [a]
drop Int
2 String
re)
| Bool
otherwise = ([GenRegex s -> GenRegex s
forall s. StringLike s => GenRegex s -> GenRegex s
mkStar GenRegex s
forall s. GenRegex s
mkDot], String
re)
([GenRegex s]
endContext, String
re2)
| String
"$" String -> String -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`isSuffixOf` String
re1 = ([], String -> String
forall a. HasCallStack => [a] -> [a]
init String
re1)
| String
"\\>" String -> String -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`isSuffixOf` String
re1 = ([GenRegex s
parseWA], String -> String
forall a. HasCallStack => [a] -> [a]
init (String -> String) -> (String -> String) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
forall a. HasCallStack => [a] -> [a]
init (String -> String) -> String -> String
forall a b. (a -> b) -> a -> b
$ String
re1)
| Bool
otherwise = ([GenRegex s -> GenRegex s
forall s. StringLike s => GenRegex s -> GenRegex s
mkStar GenRegex s
forall s. GenRegex s
mkDot], String
re1)
regExpExt :: StringLike s => Parser (GenRegex s)
regExpExt :: forall s. StringLike s => Parser (GenRegex s)
regExpExt = Parser (GenRegex s) -> Parser (GenRegex s)
forall s.
StringLike s =>
Parser (GenRegex s) -> Parser (GenRegex s)
branchList Parser (GenRegex s)
forall s. StringLike s => Parser (GenRegex s)
orElseList
regExpStd :: StringLike s => Parser (GenRegex s)
regExpStd :: forall s. StringLike s => Parser (GenRegex s)
regExpStd = Parser (GenRegex s) -> Parser (GenRegex s)
forall s.
StringLike s =>
Parser (GenRegex s) -> Parser (GenRegex s)
branchList Parser (GenRegex s)
forall s. StringLike s => Parser (GenRegex s)
seqListStd
branchList :: StringLike s => Parser (GenRegex s) -> Parser (GenRegex s)
branchList :: forall s.
StringLike s =>
Parser (GenRegex s) -> Parser (GenRegex s)
branchList Parser (GenRegex s)
exParser
= do
r1 <- Parser (GenRegex s)
exParser
rs <- many branchList1
return (foldr1 mkAlt $ r1:rs)
where
branchList1 :: Parser (GenRegex s)
branchList1
= do
_ <- Char -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'|'
exParser
orElseList :: StringLike s => Parser (GenRegex s)
orElseList :: forall s. StringLike s => Parser (GenRegex s)
orElseList
= do
r1 <- Parser (GenRegex s)
forall s. StringLike s => Parser (GenRegex s)
interleaveList
rs <- many orElseList1
return (foldr1 mkElse $ r1:rs)
where
orElseList1 :: Parser (GenRegex s)
orElseList1
= do
_ <- GenParser Char () String -> GenParser Char () String
forall tok st a. GenParser tok st a -> GenParser tok st a
try (String -> GenParser Char () String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"{|}")
interleaveList
interleaveList :: StringLike s => Parser (GenRegex s)
interleaveList :: forall s. StringLike s => Parser (GenRegex s)
interleaveList
= do
r1 <- Parser (GenRegex s)
forall s. StringLike s => Parser (GenRegex s)
exorList
rs <- many interleaveList1
return (foldr1 mkInterleave $ r1:rs)
where
interleaveList1 :: Parser (GenRegex s)
interleaveList1
= do
_ <- GenParser Char () String -> GenParser Char () String
forall tok st a. GenParser tok st a -> GenParser tok st a
try (String -> GenParser Char () String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"{:}")
exorList
exorList :: StringLike s => Parser (GenRegex s)
exorList :: forall s. StringLike s => Parser (GenRegex s)
exorList
= do
r1 <- Parser (GenRegex s)
forall s. StringLike s => Parser (GenRegex s)
diffList
rs <- many exorList1
return (foldr1 mkExor $ r1:rs)
where
exorList1 :: Parser (GenRegex s)
exorList1
= do
_ <- GenParser Char () String -> GenParser Char () String
forall tok st a. GenParser tok st a -> GenParser tok st a
try (String -> GenParser Char () String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"{^}")
diffList
diffList :: StringLike s => Parser (GenRegex s)
diffList :: forall s. StringLike s => Parser (GenRegex s)
diffList
= do
r1 <- Parser (GenRegex s)
forall s. StringLike s => Parser (GenRegex s)
intersectList
rs <- many diffList1
return (foldl1 mkDiff $ r1:rs)
where
diffList1 :: Parser (GenRegex s)
diffList1
= do
_ <- GenParser Char () String -> GenParser Char () String
forall tok st a. GenParser tok st a -> GenParser tok st a
try (String -> GenParser Char () String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"{\\}")
intersectList
intersectList :: StringLike s => Parser (GenRegex s)
intersectList :: forall s. StringLike s => Parser (GenRegex s)
intersectList
= do
r1 <- Parser (GenRegex s)
forall s. StringLike s => Parser (GenRegex s)
seqListExt
rs <- many intersectList1
return (foldr1 mkIsect $ r1:rs)
where
intersectList1 :: Parser (GenRegex s)
intersectList1
= do
_ <- GenParser Char () String -> GenParser Char () String
forall tok st a. GenParser tok st a -> GenParser tok st a
try (String -> GenParser Char () String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"{&}")
seqListExt
seqListExt :: StringLike s => Parser (GenRegex s)
seqListExt :: forall s. StringLike s => Parser (GenRegex s)
seqListExt = Parser (GenRegex s) -> Parser (GenRegex s) -> Parser (GenRegex s)
forall s.
StringLike s =>
Parser (GenRegex s) -> Parser (GenRegex s) -> Parser (GenRegex s)
seqList' Parser (GenRegex s)
forall s. StringLike s => Parser (GenRegex s)
regExpLabel Parser (GenRegex s)
forall s. StringLike s => Parser (GenRegex s)
multiCharEscExt
seqListStd :: StringLike s => Parser (GenRegex s)
seqListStd :: forall s. StringLike s => Parser (GenRegex s)
seqListStd = Parser (GenRegex s) -> Parser (GenRegex s) -> Parser (GenRegex s)
forall s.
StringLike s =>
Parser (GenRegex s) -> Parser (GenRegex s) -> Parser (GenRegex s)
seqList' Parser (GenRegex s)
forall s. StringLike s => Parser (GenRegex s)
regExpStd Parser (GenRegex s)
forall s. StringLike s => Parser (GenRegex s)
multiCharEsc
seqList' :: StringLike s => Parser (GenRegex s) -> Parser (GenRegex s) -> Parser (GenRegex s)
seqList' :: forall s.
StringLike s =>
Parser (GenRegex s) -> Parser (GenRegex s) -> Parser (GenRegex s)
seqList' Parser (GenRegex s)
regExp' Parser (GenRegex s)
multiCharEsc'
= do
rs <- Parser (GenRegex s) -> ParsecT String () Identity [GenRegex s]
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many Parser (GenRegex s)
piece
return $ mkSeqs rs
where
piece :: Parser (GenRegex s)
piece
= do
r <- Parser (GenRegex s)
atom
quantifier r
atom :: Parser (GenRegex s)
atom
= Parser (GenRegex s)
forall s. StringLike s => Parser (GenRegex s)
char1
Parser (GenRegex s) -> Parser (GenRegex s) -> Parser (GenRegex s)
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
Parser (GenRegex s)
charClass
Parser (GenRegex s) -> Parser (GenRegex s) -> Parser (GenRegex s)
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
ParsecT String () Identity Char
-> ParsecT String () Identity Char
-> Parser (GenRegex s)
-> Parser (GenRegex s)
forall s (m :: * -> *) t u open close a.
Stream s m t =>
ParsecT s u m open
-> ParsecT s u m close -> ParsecT s u m a -> ParsecT s u m a
between (Char -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'(') (Char -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
')') Parser (GenRegex s)
regExp'
charClass :: Parser (GenRegex s)
charClass
= Parser (GenRegex s) -> Parser (GenRegex s)
forall s.
StringLike s =>
Parser (GenRegex s) -> Parser (GenRegex s)
charClassEsc Parser (GenRegex s)
multiCharEsc'
Parser (GenRegex s) -> Parser (GenRegex s) -> Parser (GenRegex s)
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
Parser (GenRegex s) -> Parser (GenRegex s)
forall s.
StringLike s =>
Parser (GenRegex s) -> Parser (GenRegex s)
charClassExpr Parser (GenRegex s)
multiCharEsc'
Parser (GenRegex s) -> Parser (GenRegex s) -> Parser (GenRegex s)
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
Parser (GenRegex s)
forall s. StringLike s => Parser (GenRegex s)
wildCardEsc
quantifier :: StringLike s => GenRegex s -> Parser (GenRegex s)
quantifier :: forall s. StringLike s => GenRegex s -> Parser (GenRegex s)
quantifier GenRegex s
r
= ( do
_ <- Char -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'?'
return $ mkOpt r )
ParsecT String () Identity (GenRegex s)
-> ParsecT String () Identity (GenRegex s)
-> ParsecT String () Identity (GenRegex s)
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
( do
_ <- Char -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'*'
return $ mkStar r )
ParsecT String () Identity (GenRegex s)
-> ParsecT String () Identity (GenRegex s)
-> ParsecT String () Identity (GenRegex s)
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
( do
_ <- Char -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'+'
return $ mkRep 1 r )
ParsecT String () Identity (GenRegex s)
-> ParsecT String () Identity (GenRegex s)
-> ParsecT String () Identity (GenRegex s)
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
ParsecT String () Identity (GenRegex s)
-> ParsecT String () Identity (GenRegex s)
forall tok st a. GenParser tok st a -> GenParser tok st a
try ( do
_ <- Char -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'{'
res <- quantity r
_ <- char '}'
return res
)
ParsecT String () Identity (GenRegex s)
-> ParsecT String () Identity (GenRegex s)
-> ParsecT String () Identity (GenRegex s)
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
( GenRegex s -> ParsecT String () Identity (GenRegex s)
forall a. a -> ParsecT String () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return GenRegex s
r )
quantity :: StringLike s => GenRegex s -> Parser (GenRegex s)
quantity :: forall s. StringLike s => GenRegex s -> Parser (GenRegex s)
quantity GenRegex s
r
= do
lb <- ParsecT String () Identity Char -> GenParser Char () String
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String () Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit
quantityRest r (read lb)
quantityRest :: StringLike s => GenRegex s -> Int -> Parser (GenRegex s)
quantityRest :: forall s. StringLike s => GenRegex s -> Int -> Parser (GenRegex s)
quantityRest GenRegex s
r Int
lb
= ( do
_ <- Char -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
','
ub <- many digit
return ( if null ub
then mkRep lb r
else mkRng lb (read ub) r
)
)
ParsecT String () Identity (GenRegex s)
-> ParsecT String () Identity (GenRegex s)
-> ParsecT String () Identity (GenRegex s)
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
( GenRegex s -> ParsecT String () Identity (GenRegex s)
forall a. a -> ParsecT String () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return (GenRegex s -> ParsecT String () Identity (GenRegex s))
-> GenRegex s -> ParsecT String () Identity (GenRegex s)
forall a b. (a -> b) -> a -> b
$ Int -> Int -> GenRegex s -> GenRegex s
forall s. StringLike s => Int -> Int -> GenRegex s -> GenRegex s
mkRng Int
lb Int
lb GenRegex s
r)
regExpLabel :: StringLike s => Parser (GenRegex s)
regExpLabel :: forall s. StringLike s => Parser (GenRegex s)
regExpLabel
= do
lab <- (GenRegex s -> GenRegex s)
-> ParsecT String () Identity (GenRegex s -> GenRegex s)
-> ParsecT String () Identity (GenRegex s -> GenRegex s)
forall s (m :: * -> *) t a u.
Stream s m t =>
a -> ParsecT s u m a -> ParsecT s u m a
option GenRegex s -> GenRegex s
forall a. a -> a
id (ParsecT String () Identity Char
-> ParsecT String () Identity Char
-> ParsecT String () Identity (GenRegex s -> GenRegex s)
-> ParsecT String () Identity (GenRegex s -> GenRegex s)
forall s (m :: * -> *) t u open close a.
Stream s m t =>
ParsecT s u m open
-> ParsecT s u m close -> ParsecT s u m a -> ParsecT s u m a
between (Char -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'{') (Char -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'}') ParsecT String () Identity (GenRegex s -> GenRegex s)
forall {u}. ParsecT String u Identity (GenRegex s -> GenRegex s)
label')
r <- regExpExt
return $ lab r
where
label' :: ParsecT String u Identity (GenRegex s -> GenRegex s)
label'
= do
l <- ParsecT String u Identity Char -> ParsecT String u Identity String
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many1 ((Char -> Bool) -> ParsecT String u Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
(Char -> Bool) -> ParsecT s u m Char
satisfy Char -> Bool
isXmlNameChar)
return $ mkBr' l
char1 :: StringLike s => Parser (GenRegex s)
char1 :: forall s. StringLike s => Parser (GenRegex s)
char1
= do
c <- (Char -> Bool) -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
(Char -> Bool) -> ParsecT s u m Char
satisfy (Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` String
".\\?*+{}()|[]")
return $ mkSym1 c
charClassEsc :: StringLike s => Parser (GenRegex s) -> Parser (GenRegex s)
charClassEsc :: forall s.
StringLike s =>
Parser (GenRegex s) -> Parser (GenRegex s)
charClassEsc Parser (GenRegex s)
multiCharEsc'
= do
_ <- Char -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'\\'
( singleCharEsc
<|>
multiCharEsc'
<|>
catEsc
<|>
complEsc )
singleCharEsc :: StringLike s => Parser (GenRegex s)
singleCharEsc :: forall s. StringLike s => Parser (GenRegex s)
singleCharEsc
= do
c <- ParsecT String () Identity Char
singleCharEsc'
return $ mkSym1 c
singleCharEsc' :: Parser Char
singleCharEsc' :: ParsecT String () Identity Char
singleCharEsc'
= do
c <- (Char -> Bool) -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
(Char -> Bool) -> ParsecT s u m Char
satisfy (Char -> String -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` String
"nrt\\|.?*+(){}-[]^")
return $ maybe c id . lookup c . zip "ntr" $ "\n\r\t"
multiCharEscExt :: StringLike s => Parser (GenRegex s)
multiCharEscExt :: forall s. StringLike s => Parser (GenRegex s)
multiCharEscExt
= Parser (GenRegex s)
forall s. StringLike s => Parser (GenRegex s)
multiCharEsc
Parser (GenRegex s) -> Parser (GenRegex s) -> Parser (GenRegex s)
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
( do
_ <- Char -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'a'
return mkDot )
Parser (GenRegex s) -> Parser (GenRegex s) -> Parser (GenRegex s)
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
( do
_ <- Char -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'A'
return mkAll )
multiCharEsc :: StringLike s => Parser (GenRegex s)
multiCharEsc :: forall s. StringLike s => Parser (GenRegex s)
multiCharEsc
= ( do
c <- (Char -> Bool) -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
(Char -> Bool) -> ParsecT s u m Char
satisfy (Char -> String -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` String
es)
return $ mkSym . fromJust . lookup c $ pm )
where
es :: String
es = ((Char, [(Char, Char)]) -> Char)
-> [(Char, [(Char, Char)])] -> String
forall a b. (a -> b) -> [a] -> [b]
map (Char, [(Char, Char)]) -> Char
forall a b. (a, b) -> a
fst [(Char, [(Char, Char)])]
pm
pm :: [(Char, [(Char, Char)])]
pm = [ (Char
's', [(Char, Char)]
charPropXmlSpaceChar )
, (Char
'S', [(Char, Char)] -> [(Char, Char)]
compCS [(Char, Char)]
charPropXmlSpaceChar )
, (Char
'i', [(Char, Char)]
charPropXmlNameStartChar )
, (Char
'I', [(Char, Char)] -> [(Char, Char)]
compCS [(Char, Char)]
charPropXmlNameStartChar )
, (Char
'c', [(Char, Char)]
charPropXmlNameChar )
, (Char
'C', [(Char, Char)] -> [(Char, Char)]
compCS [(Char, Char)]
charPropXmlNameChar )
, (Char
'd', [(Char, Char)]
charPropDigit )
, (Char
'D', [(Char, Char)] -> [(Char, Char)]
compCS [(Char, Char)]
charPropDigit )
, (Char
'w', [(Char, Char)] -> [(Char, Char)]
compCS [(Char, Char)]
charPropNotWord )
, (Char
'W', [(Char, Char)]
charPropNotWord )
]
charPropDigit :: [(Char, Char)]
charPropDigit = Char -> Char -> [(Char, Char)]
rangeCS Char
'0' Char
'9'
charPropNotWord :: [(Char, Char)]
charPropNotWord = [(Char, Char)]
charPropUnicodeP
[(Char, Char)] -> [(Char, Char)] -> [(Char, Char)]
`unionCS`
[(Char, Char)]
charPropUnicodeZ
[(Char, Char)] -> [(Char, Char)] -> [(Char, Char)]
`unionCS`
[(Char, Char)]
charPropUnicodeC
catEsc :: StringLike s => Parser (GenRegex s)
catEsc :: forall s. StringLike s => Parser (GenRegex s)
catEsc
= do
_ <- Char -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'p'
s <- between (char '{') (char '}') charProp
return $ mkSym s
charProp :: Parser CharSet
charProp :: ParsecT String () Identity [(Char, Char)]
charProp
= ParsecT String () Identity [(Char, Char)]
isCategory
ParsecT String () Identity [(Char, Char)]
-> ParsecT String () Identity [(Char, Char)]
-> ParsecT String () Identity [(Char, Char)]
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
ParsecT String () Identity [(Char, Char)]
isBlock
isBlock :: Parser CharSet
isBlock :: ParsecT String () Identity [(Char, Char)]
isBlock
= do
_ <- String -> GenParser Char () String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"Is"
name <- many1 (satisfy legalChar)
case lookup name codeBlocks of
Just (Char, Char)
b -> [(Char, Char)] -> ParsecT String () Identity [(Char, Char)]
forall a. a -> ParsecT String () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return ([(Char, Char)] -> ParsecT String () Identity [(Char, Char)])
-> [(Char, Char)] -> ParsecT String () Identity [(Char, Char)]
forall a b. (a -> b) -> a -> b
$ (Char -> Char -> [(Char, Char)]) -> (Char, Char) -> [(Char, Char)]
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry Char -> Char -> [(Char, Char)]
rangeCS (Char, Char)
b
Maybe (Char, Char)
Nothing -> String -> ParsecT String () Identity [(Char, Char)]
forall a. String -> ParsecT String () Identity a
forall (m :: * -> *) a. MonadFail m => String -> m a
fail (String -> ParsecT String () Identity [(Char, Char)])
-> String -> ParsecT String () Identity [(Char, Char)]
forall a b. (a -> b) -> a -> b
$ String
"unknown Unicode code block " String -> String -> String
forall a. [a] -> [a] -> [a]
++ String -> String
forall a. Show a => a -> String
show String
name
where
legalChar :: Char -> Bool
legalChar Char
c = Char
'A' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'Z' Bool -> Bool -> Bool
||
Char
'a' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'z' Bool -> Bool -> Bool
||
Char
'0' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'9' Bool -> Bool -> Bool
||
Char
'-' Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
c
isCategory :: Parser CharSet
isCategory :: ParsecT String () Identity [(Char, Char)]
isCategory
= do
pr <- GenParser Char () String
isCategory'
return $ fromJust (lookup pr categories)
categories :: [(String, CharSet)]
categories :: [(String, [(Char, Char)])]
categories
= [ (String
"C", [(Char, Char)]
charPropUnicodeC )
, (String
"Cc", [(Char, Char)]
charPropUnicodeCc)
, (String
"Cf", [(Char, Char)]
charPropUnicodeCf)
, (String
"Co", [(Char, Char)]
charPropUnicodeCo)
, (String
"Cs", [(Char, Char)]
charPropUnicodeCs)
, (String
"L", [(Char, Char)]
charPropUnicodeL )
, (String
"Ll", [(Char, Char)]
charPropUnicodeLl)
, (String
"Lm", [(Char, Char)]
charPropUnicodeLm)
, (String
"Lo", [(Char, Char)]
charPropUnicodeLo)
, (String
"Lt", [(Char, Char)]
charPropUnicodeLt)
, (String
"Lu", [(Char, Char)]
charPropUnicodeLu)
, (String
"M", [(Char, Char)]
charPropUnicodeM )
, (String
"Mc", [(Char, Char)]
charPropUnicodeMc)
, (String
"Me", [(Char, Char)]
charPropUnicodeMe)
, (String
"Mn", [(Char, Char)]
charPropUnicodeMn)
, (String
"N", [(Char, Char)]
charPropUnicodeN )
, (String
"Nd", [(Char, Char)]
charPropUnicodeNd)
, (String
"Nl", [(Char, Char)]
charPropUnicodeNl)
, (String
"No", [(Char, Char)]
charPropUnicodeNo)
, (String
"P", [(Char, Char)]
charPropUnicodeP )
, (String
"Pc", [(Char, Char)]
charPropUnicodePc)
, (String
"Pd", [(Char, Char)]
charPropUnicodePd)
, (String
"Pe", [(Char, Char)]
charPropUnicodePe)
, (String
"Pf", [(Char, Char)]
charPropUnicodePf)
, (String
"Pi", [(Char, Char)]
charPropUnicodePi)
, (String
"Po", [(Char, Char)]
charPropUnicodePo)
, (String
"Ps", [(Char, Char)]
charPropUnicodePs)
, (String
"S", [(Char, Char)]
charPropUnicodeS )
, (String
"Sc", [(Char, Char)]
charPropUnicodeSc)
, (String
"Sk", [(Char, Char)]
charPropUnicodeSk)
, (String
"Sm", [(Char, Char)]
charPropUnicodeSm)
, (String
"So", [(Char, Char)]
charPropUnicodeSo)
, (String
"Z", [(Char, Char)]
charPropUnicodeZ )
, (String
"Zl", [(Char, Char)]
charPropUnicodeZl)
, (String
"Zp", [(Char, Char)]
charPropUnicodeZp)
, (String
"Zs", [(Char, Char)]
charPropUnicodeZs)
]
isCategory' :: Parser String
isCategory' :: GenParser Char () String
isCategory'
= ( (GenParser Char () String
-> GenParser Char () String -> GenParser Char () String)
-> [GenParser Char () String] -> GenParser Char () String
forall a. (a -> a -> a) -> [a] -> a
forall (t :: * -> *) a. Foldable t => (a -> a -> a) -> t a -> a
foldr1 GenParser Char () String
-> GenParser Char () String -> GenParser Char () String
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
(<|>) ([GenParser Char () String] -> GenParser Char () String)
-> ([(Char, String)] -> [GenParser Char () String])
-> [(Char, String)]
-> GenParser Char () String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Char, String) -> GenParser Char () String)
-> [(Char, String)] -> [GenParser Char () String]
forall a b. (a -> b) -> [a] -> [b]
map ((Char -> String -> GenParser Char () String)
-> (Char, String) -> GenParser Char () String
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry Char -> String -> GenParser Char () String
forall {s} {m :: * -> *} {t :: * -> *} {u}.
(Stream s m Char, Foldable t) =>
Char -> t Char -> ParsecT s u m String
prop) ([(Char, String)] -> GenParser Char () String)
-> [(Char, String)] -> GenParser Char () String
forall a b. (a -> b) -> a -> b
$
[ (Char
'L', String
"ultmo")
, (Char
'M', String
"nce")
, (Char
'N', String
"dlo")
, (Char
'P', String
"cdseifo")
, (Char
'Z', String
"slp")
, (Char
'S', String
"mcko")
, (Char
'C', String
"cfon")
]
) GenParser Char () String -> String -> GenParser Char () String
forall s u (m :: * -> *) a.
ParsecT s u m a -> String -> ParsecT s u m a
<?> String
"illegal Unicode character property"
where
prop :: Char -> t Char -> ParsecT s u m String
prop Char
c1 t Char
cs2
= do
_ <- Char -> ParsecT s u m Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
c1
s2 <- option ""
( do
c2 <- satisfy (`elem` cs2)
return [c2] )
return $ c1:s2
complEsc :: StringLike s => Parser (GenRegex s)
complEsc :: forall s. StringLike s => Parser (GenRegex s)
complEsc
= do
_ <- Char -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'P'
s <- between (char '{') (char '}') charProp
return $ mkSym $ compCS s
charClassExpr :: StringLike s => Parser (GenRegex s) -> Parser (GenRegex s)
charClassExpr :: forall s.
StringLike s =>
Parser (GenRegex s) -> Parser (GenRegex s)
charClassExpr Parser (GenRegex s)
multiCharEsc'
= ParsecT String () Identity Char
-> ParsecT String () Identity Char
-> Parser (GenRegex s)
-> Parser (GenRegex s)
forall s (m :: * -> *) t u open close a.
Stream s m t =>
ParsecT s u m open
-> ParsecT s u m close -> ParsecT s u m a -> ParsecT s u m a
between (Char -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'[') (Char -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
']') Parser (GenRegex s)
charGroup
where
charGroup :: Parser (GenRegex s)
charGroup
= do
r <- ( Parser (GenRegex s)
negCharGroup
Parser (GenRegex s) -> Parser (GenRegex s) -> Parser (GenRegex s)
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
Parser (GenRegex s)
posCharGroup
)
s <- option (mkZero' "")
( do
_ <- char '-'
charClassExpr multiCharEsc'
)
return $ mkDiff r s
posCharGroup :: Parser (GenRegex s)
posCharGroup
= do
rs <- Parser (GenRegex s) -> ParsecT String () Identity [GenRegex s]
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many1 (Parser (GenRegex s)
forall s. StringLike s => Parser (GenRegex s)
charRange Parser (GenRegex s) -> Parser (GenRegex s) -> Parser (GenRegex s)
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> Parser (GenRegex s) -> Parser (GenRegex s)
forall s.
StringLike s =>
Parser (GenRegex s) -> Parser (GenRegex s)
charClassEsc Parser (GenRegex s)
multiCharEsc')
return $ foldr1 mkAlt rs
negCharGroup :: Parser (GenRegex s)
negCharGroup
= do
_ <- Char -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'^'
r <- posCharGroup
return $ mkDiff mkDot r
charRange :: StringLike s => Parser (GenRegex s)
charRange :: forall s. StringLike s => Parser (GenRegex s)
charRange
= GenParser Char () (GenRegex s) -> GenParser Char () (GenRegex s)
forall tok st a. GenParser tok st a -> GenParser tok st a
try GenParser Char () (GenRegex s)
forall s. StringLike s => Parser (GenRegex s)
seRange
GenParser Char () (GenRegex s)
-> GenParser Char () (GenRegex s) -> GenParser Char () (GenRegex s)
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
GenParser Char () (GenRegex s)
forall s. StringLike s => Parser (GenRegex s)
xmlCharIncDash
seRange :: StringLike s => Parser (GenRegex s)
seRange :: forall s. StringLike s => Parser (GenRegex s)
seRange
= do
c1 <- ParsecT String () Identity Char
charOrEsc'
_ <- char '-'
c2 <- charOrEsc'
return $ mkSymRng c1 c2
charOrEsc' :: Parser Char
charOrEsc' :: ParsecT String () Identity Char
charOrEsc'
= ( do
_ <- Char -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'\\'
singleCharEsc'
)
ParsecT String () Identity Char
-> ParsecT String () Identity Char
-> ParsecT String () Identity Char
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
(Char -> Bool) -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
(Char -> Bool) -> ParsecT s u m Char
satisfy (Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` String
"\\-[]")
xmlCharIncDash :: StringLike s => Parser (GenRegex s)
xmlCharIncDash :: forall s. StringLike s => Parser (GenRegex s)
xmlCharIncDash
= GenParser Char () (GenRegex s) -> GenParser Char () (GenRegex s)
forall tok st a. GenParser tok st a -> GenParser tok st a
try ( do
_ <- Char -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'-'
notFollowedBy (char '[')
return $ mkSym1 '-'
)
GenParser Char () (GenRegex s)
-> GenParser Char () (GenRegex s) -> GenParser Char () (GenRegex s)
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
( do
c <- (Char -> Bool) -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
(Char -> Bool) -> ParsecT s u m Char
satisfy (Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` String
"-\\[]")
return $ mkSym1 c
)
wildCardEsc :: StringLike s => Parser (GenRegex s)
wildCardEsc :: forall s. StringLike s => Parser (GenRegex s)
wildCardEsc
= do
_ <- Char -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'.'
return . mkSym . compCS $ stringCS "\n\r"