{-# LANGUAGE FlexibleContexts #-}

-- ------------------------------------------------------------

{- |
   Module     : Text.Regex.XMLSchema.RegexParser
   Copyright  : Copyright (C) 2014- Uwe Schmidt
   License    : MIT

   Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
   Stability  : stable
   Portability: portable

   W3C XML Schema Regular Expression Parser

   This parser supports the full W3C standard, the
   complete grammar can be found under <http://www.w3.org/TR/xmlschema11-2/#regexs>
   and extensions for all missing set operations, intersection,
   difference, exclusive or, interleave, complement

-}

-- ------------------------------------------------------------

module Text.Regex.XMLSchema.Generic.RegexParser
    ( parseRegex
    , parseRegexExt
    , parseRegex'
    , parseRegexExt'
    , parseContextRegex
    )
where

import           Data.Char.Properties.UnicodeBlocks
import           Data.Char.Properties.UnicodeCharProps
import           Data.Char.Properties.XMLCharProps

import           Data.List                               (isPrefixOf,
                                                          isSuffixOf)
import           Data.Maybe
import           Data.Set.CharSet

import           Text.ParserCombinators.Parsec
import           Text.Regex.XMLSchema.Generic.Regex
import           Text.Regex.XMLSchema.Generic.StringLike

-- ------------------------------------------------------------

-- | parse a standard W3C XML Schema regular expression

parseRegex              :: StringLike s => s -> GenRegex s
parseRegex :: forall s. StringLike s => s -> GenRegex s
parseRegex              = String -> GenRegex s
forall s. StringLike s => String -> GenRegex s
parseRegex' (String -> GenRegex s) -> (s -> String) -> s -> GenRegex s
forall b c a. (b -> c) -> (a -> b) -> a -> c
. s -> String
forall a. StringLike a => a -> String
toString

parseRegex'             :: StringLike s => String -> GenRegex s
parseRegex' :: forall s. StringLike s => String -> GenRegex s
parseRegex'             = Parser (GenRegex s) -> String -> GenRegex s
forall s.
StringLike s =>
Parser (GenRegex s) -> String -> GenRegex s
parseRegex'' Parser (GenRegex s)
forall s. StringLike s => Parser (GenRegex s)
regExpStd

-- | parse an extended syntax W3C XML Schema regular expression
--
-- The Syntax of the W3C XML Schema spec is extended by
-- further useful set operations, like intersection, difference, exor.
-- Subexpression match becomes possible with \"named\" pairs of parentheses.
-- The multi char escape sequence \\a represents any Unicode char,
-- The multi char escape sequence \\A represents any Unicode word, (\\A = \\a*).
-- All syntactically wrong inputs are mapped to the Zero expression representing the
-- empty set of words. Zero contains as data field a string for an error message.
-- So error checking after parsing becomes possible by checking against Zero ('isZero' predicate)

parseRegexExt   :: StringLike s => s -> GenRegex s
parseRegexExt :: forall s. StringLike s => s -> GenRegex s
parseRegexExt   = String -> GenRegex s
forall s. StringLike s => String -> GenRegex s
parseRegexExt' (String -> GenRegex s) -> (s -> String) -> s -> GenRegex s
forall b c a. (b -> c) -> (a -> b) -> a -> c
. s -> String
forall a. StringLike a => a -> String
toString

parseRegexExt'  :: StringLike s => String -> GenRegex s
parseRegexExt' :: forall s. StringLike s => String -> GenRegex s
parseRegexExt'  = Parser (GenRegex s) -> String -> GenRegex s
forall s.
StringLike s =>
Parser (GenRegex s) -> String -> GenRegex s
parseRegex'' Parser (GenRegex s)
forall s. StringLike s => Parser (GenRegex s)
regExpExt

parseRegex'' :: StringLike s => Parser (GenRegex s) -> String -> GenRegex s
parseRegex'' :: forall s.
StringLike s =>
Parser (GenRegex s) -> String -> GenRegex s
parseRegex'' Parser (GenRegex s)
regExp'
  = (ParseError -> GenRegex s)
-> (GenRegex s -> GenRegex s)
-> Either ParseError (GenRegex s)
-> GenRegex s
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (String -> GenRegex s
forall s. StringLike s => String -> GenRegex s
mkZero' (String -> GenRegex s)
-> (ParseError -> String) -> ParseError -> GenRegex s
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (String
"syntax error: " String -> String -> String
forall a. [a] -> [a] -> [a]
++) (String -> String)
-> (ParseError -> String) -> ParseError -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ParseError -> String
forall a. Show a => a -> String
show) GenRegex s -> GenRegex s
forall a. a -> a
id
    (Either ParseError (GenRegex s) -> GenRegex s)
-> (String -> Either ParseError (GenRegex s))
-> String
-> GenRegex s
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Parser (GenRegex s)
-> String -> String -> Either ParseError (GenRegex s)
forall s t a.
Stream s Identity t =>
Parsec s () a -> String -> s -> Either ParseError a
parse ( do
                 GenRegex s
r <- Parser (GenRegex s)
regExp'
                 ParsecT String () Identity ()
forall s (m :: * -> *) t u.
(Stream s m t, Show t) =>
ParsecT s u m ()
eof
                 GenRegex s -> Parser (GenRegex s)
forall a. a -> ParsecT String () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return GenRegex s
r
            ) String
""

-- ------------------------------------------------------------

-- | parse a regular expression surrounded by contenxt spec
--
-- a leading @^@ denotes start of text,
-- a trailing @$@ denotes end of text,
-- a leading @\\<@ denotes word start,
-- a trailing @\\>@ denotes word end.
--
-- The 1. param ist the regex parser ('parseRegex' or 'parseRegexExt')

parseContextRegex :: StringLike s => (String -> GenRegex s) -> s -> GenRegex s
parseContextRegex :: forall s. StringLike s => (String -> GenRegex s) -> s -> GenRegex s
parseContextRegex String -> GenRegex s
parseRe s
re0
    = GenRegex s
re'
    where
      parseAW :: GenRegex s
parseAW = String -> GenRegex s
forall s. StringLike s => String -> GenRegex s
parseRegexExt' String
"(\\A\\W)?"
      parseWA :: GenRegex s
parseWA = String -> GenRegex s
forall s. StringLike s => String -> GenRegex s
parseRegexExt' String
"(\\W\\A)?"

      re :: String
re  = s -> String
forall a. StringLike a => a -> String
toString s
re0
      re' :: GenRegex s
re' = [GenRegex s] -> GenRegex s
forall s. [GenRegex s] -> GenRegex s
mkSeqs ([GenRegex s] -> GenRegex s)
-> ([[GenRegex s]] -> [GenRegex s]) -> [[GenRegex s]] -> GenRegex s
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [[GenRegex s]] -> [GenRegex s]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([[GenRegex s]] -> GenRegex s) -> [[GenRegex s]] -> GenRegex s
forall a b. (a -> b) -> a -> b
$ [ [GenRegex s]
startContext
                              , (GenRegex s -> [GenRegex s] -> [GenRegex s]
forall a. a -> [a] -> [a]
:[]) (GenRegex s -> [GenRegex s])
-> (String -> GenRegex s) -> String -> [GenRegex s]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> GenRegex s
parseRe (String -> [GenRegex s]) -> String -> [GenRegex s]
forall a b. (a -> b) -> a -> b
$ String
re2
                              , [GenRegex s]
endContext
                              ]
      ([GenRegex s]
startContext, String
re1)
          | String
"^"   String -> String -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`isPrefixOf` String
re   = ([],             String -> String
forall a. HasCallStack => [a] -> [a]
tail   String
re)
          | String
"\\<" String -> String -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`isPrefixOf` String
re   = ([GenRegex s
parseAW],      Int -> String -> String
forall a. Int -> [a] -> [a]
drop Int
2 String
re)
          | Bool
otherwise               = ([GenRegex s -> GenRegex s
forall s. StringLike s => GenRegex s -> GenRegex s
mkStar GenRegex s
forall s. GenRegex s
mkDot],        String
re)
      ([GenRegex s]
endContext, String
re2)
          | String
"$"   String -> String -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`isSuffixOf` String
re1  = ([],             String -> String
forall a. HasCallStack => [a] -> [a]
init          String
re1)
          | String
"\\>" String -> String -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`isSuffixOf` String
re1  = ([GenRegex s
parseWA],      String -> String
forall a. HasCallStack => [a] -> [a]
init (String -> String) -> (String -> String) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String
forall a. HasCallStack => [a] -> [a]
init (String -> String) -> String -> String
forall a b. (a -> b) -> a -> b
$ String
re1)
          | Bool
otherwise               = ([GenRegex s -> GenRegex s
forall s. StringLike s => GenRegex s -> GenRegex s
mkStar GenRegex s
forall s. GenRegex s
mkDot],               String
re1)

-- ------------------------------------------------------------

regExpExt  :: StringLike s => Parser (GenRegex s)
regExpExt :: forall s. StringLike s => Parser (GenRegex s)
regExpExt  = Parser (GenRegex s) -> Parser (GenRegex s)
forall s.
StringLike s =>
Parser (GenRegex s) -> Parser (GenRegex s)
branchList Parser (GenRegex s)
forall s. StringLike s => Parser (GenRegex s)
orElseList

regExpStd  :: StringLike s => Parser (GenRegex s)
regExpStd :: forall s. StringLike s => Parser (GenRegex s)
regExpStd  = Parser (GenRegex s) -> Parser (GenRegex s)
forall s.
StringLike s =>
Parser (GenRegex s) -> Parser (GenRegex s)
branchList Parser (GenRegex s)
forall s. StringLike s => Parser (GenRegex s)
seqListStd

branchList :: StringLike s => Parser (GenRegex s) -> Parser (GenRegex s)
branchList :: forall s.
StringLike s =>
Parser (GenRegex s) -> Parser (GenRegex s)
branchList Parser (GenRegex s)
exParser
    = do
      GenRegex s
r1 <- Parser (GenRegex s)
exParser
      [GenRegex s]
rs <- Parser (GenRegex s) -> ParsecT String () Identity [GenRegex s]
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many Parser (GenRegex s)
branchList1
      GenRegex s -> Parser (GenRegex s)
forall a. a -> ParsecT String () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return ((GenRegex s -> GenRegex s -> GenRegex s)
-> [GenRegex s] -> GenRegex s
forall a. (a -> a -> a) -> [a] -> a
forall (t :: * -> *) a. Foldable t => (a -> a -> a) -> t a -> a
foldr1 GenRegex s -> GenRegex s -> GenRegex s
forall s. StringLike s => GenRegex s -> GenRegex s -> GenRegex s
mkAlt ([GenRegex s] -> GenRegex s) -> [GenRegex s] -> GenRegex s
forall a b. (a -> b) -> a -> b
$ GenRegex s
r1GenRegex s -> [GenRegex s] -> [GenRegex s]
forall a. a -> [a] -> [a]
:[GenRegex s]
rs)     -- union is associative, so we use right ass.
                                        -- as with seq, alt and exor
    where
    branchList1 :: Parser (GenRegex s)
branchList1
        = do
          Char
_ <- Char -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'|'
          Parser (GenRegex s)
exParser

orElseList      :: StringLike s => Parser (GenRegex s)
orElseList :: forall s. StringLike s => Parser (GenRegex s)
orElseList
    = do
      GenRegex s
r1 <- Parser (GenRegex s)
forall s. StringLike s => Parser (GenRegex s)
interleaveList
      [GenRegex s]
rs <- Parser (GenRegex s) -> ParsecT String () Identity [GenRegex s]
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many Parser (GenRegex s)
orElseList1
      GenRegex s -> Parser (GenRegex s)
forall a. a -> ParsecT String () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return ((GenRegex s -> GenRegex s -> GenRegex s)
-> [GenRegex s] -> GenRegex s
forall a. (a -> a -> a) -> [a] -> a
forall (t :: * -> *) a. Foldable t => (a -> a -> a) -> t a -> a
foldr1 GenRegex s -> GenRegex s -> GenRegex s
forall s. StringLike s => GenRegex s -> GenRegex s -> GenRegex s
mkElse ([GenRegex s] -> GenRegex s) -> [GenRegex s] -> GenRegex s
forall a b. (a -> b) -> a -> b
$ GenRegex s
r1GenRegex s -> [GenRegex s] -> [GenRegex s]
forall a. a -> [a] -> [a]
:[GenRegex s]
rs)    -- orElse is associative, so we choose right ass.
                                        -- as with seq and alt ops
    where
    orElseList1 :: Parser (GenRegex s)
orElseList1
        = do
          String
_ <- GenParser Char () String -> GenParser Char () String
forall tok st a. GenParser tok st a -> GenParser tok st a
try (String -> GenParser Char () String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"{|}")
          Parser (GenRegex s)
forall s. StringLike s => Parser (GenRegex s)
interleaveList

interleaveList  :: StringLike s => Parser (GenRegex s)
interleaveList :: forall s. StringLike s => Parser (GenRegex s)
interleaveList
    = do
      GenRegex s
r1 <- Parser (GenRegex s)
forall s. StringLike s => Parser (GenRegex s)
exorList
      [GenRegex s]
rs <- Parser (GenRegex s) -> ParsecT String () Identity [GenRegex s]
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many Parser (GenRegex s)
interleaveList1
      GenRegex s -> Parser (GenRegex s)
forall a. a -> ParsecT String () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return ((GenRegex s -> GenRegex s -> GenRegex s)
-> [GenRegex s] -> GenRegex s
forall a. (a -> a -> a) -> [a] -> a
forall (t :: * -> *) a. Foldable t => (a -> a -> a) -> t a -> a
foldr1 GenRegex s -> GenRegex s -> GenRegex s
forall s. GenRegex s -> GenRegex s -> GenRegex s
mkInterleave ([GenRegex s] -> GenRegex s) -> [GenRegex s] -> GenRegex s
forall a b. (a -> b) -> a -> b
$ GenRegex s
r1GenRegex s -> [GenRegex s] -> [GenRegex s]
forall a. a -> [a] -> [a]
:[GenRegex s]
rs)      -- interleave is associative, so we choose right ass.
                                                -- as with seq and alt ops
    where
    interleaveList1 :: Parser (GenRegex s)
interleaveList1
        = do
          String
_ <- GenParser Char () String -> GenParser Char () String
forall tok st a. GenParser tok st a -> GenParser tok st a
try (String -> GenParser Char () String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"{:}")
          Parser (GenRegex s)
forall s. StringLike s => Parser (GenRegex s)
exorList

exorList        :: StringLike s => Parser (GenRegex s)
exorList :: forall s. StringLike s => Parser (GenRegex s)
exorList
    = do
      GenRegex s
r1 <- Parser (GenRegex s)
forall s. StringLike s => Parser (GenRegex s)
diffList
      [GenRegex s]
rs <- Parser (GenRegex s) -> ParsecT String () Identity [GenRegex s]
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many Parser (GenRegex s)
exorList1
      GenRegex s -> Parser (GenRegex s)
forall a. a -> ParsecT String () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return ((GenRegex s -> GenRegex s -> GenRegex s)
-> [GenRegex s] -> GenRegex s
forall a. (a -> a -> a) -> [a] -> a
forall (t :: * -> *) a. Foldable t => (a -> a -> a) -> t a -> a
foldr1 GenRegex s -> GenRegex s -> GenRegex s
forall s. StringLike s => GenRegex s -> GenRegex s -> GenRegex s
mkExor ([GenRegex s] -> GenRegex s) -> [GenRegex s] -> GenRegex s
forall a b. (a -> b) -> a -> b
$ GenRegex s
r1GenRegex s -> [GenRegex s] -> [GenRegex s]
forall a. a -> [a] -> [a]
:[GenRegex s]
rs)    -- exor is associative, so we choose right ass.
    where
    exorList1 :: Parser (GenRegex s)
exorList1
        = do
          String
_ <- GenParser Char () String -> GenParser Char () String
forall tok st a. GenParser tok st a -> GenParser tok st a
try (String -> GenParser Char () String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"{^}")
          Parser (GenRegex s)
forall s. StringLike s => Parser (GenRegex s)
diffList

diffList        :: StringLike s => Parser (GenRegex s)
diffList :: forall s. StringLike s => Parser (GenRegex s)
diffList
    = do
      GenRegex s
r1 <- Parser (GenRegex s)
forall s. StringLike s => Parser (GenRegex s)
intersectList
      [GenRegex s]
rs <- Parser (GenRegex s) -> ParsecT String () Identity [GenRegex s]
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many Parser (GenRegex s)
diffList1
      GenRegex s -> Parser (GenRegex s)
forall a. a -> ParsecT String () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return ((GenRegex s -> GenRegex s -> GenRegex s)
-> [GenRegex s] -> GenRegex s
forall a. (a -> a -> a) -> [a] -> a
forall (t :: * -> *) a. Foldable t => (a -> a -> a) -> t a -> a
foldl1 GenRegex s -> GenRegex s -> GenRegex s
forall s. StringLike s => GenRegex s -> GenRegex s -> GenRegex s
mkDiff ([GenRegex s] -> GenRegex s) -> [GenRegex s] -> GenRegex s
forall a b. (a -> b) -> a -> b
$ GenRegex s
r1GenRegex s -> [GenRegex s] -> [GenRegex s]
forall a. a -> [a] -> [a]
:[GenRegex s]
rs)    -- diff is not associative, so we choose left ass.
    where
    diffList1 :: Parser (GenRegex s)
diffList1
        = do
          String
_ <- GenParser Char () String -> GenParser Char () String
forall tok st a. GenParser tok st a -> GenParser tok st a
try (String -> GenParser Char () String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"{\\}")
          Parser (GenRegex s)
forall s. StringLike s => Parser (GenRegex s)
intersectList

intersectList   :: StringLike s => Parser (GenRegex s)
intersectList :: forall s. StringLike s => Parser (GenRegex s)
intersectList
    = do
      GenRegex s
r1 <- Parser (GenRegex s)
forall s. StringLike s => Parser (GenRegex s)
seqListExt
      [GenRegex s]
rs <- Parser (GenRegex s) -> ParsecT String () Identity [GenRegex s]
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many Parser (GenRegex s)
intersectList1
      GenRegex s -> Parser (GenRegex s)
forall a. a -> ParsecT String () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return ((GenRegex s -> GenRegex s -> GenRegex s)
-> [GenRegex s] -> GenRegex s
forall a. (a -> a -> a) -> [a] -> a
forall (t :: * -> *) a. Foldable t => (a -> a -> a) -> t a -> a
foldr1 GenRegex s -> GenRegex s -> GenRegex s
forall s. StringLike s => GenRegex s -> GenRegex s -> GenRegex s
mkIsect ([GenRegex s] -> GenRegex s) -> [GenRegex s] -> GenRegex s
forall a b. (a -> b) -> a -> b
$ GenRegex s
r1GenRegex s -> [GenRegex s] -> [GenRegex s]
forall a. a -> [a] -> [a]
:[GenRegex s]
rs)
    where
    intersectList1 :: Parser (GenRegex s)
intersectList1
        = do
          String
_ <- GenParser Char () String -> GenParser Char () String
forall tok st a. GenParser tok st a -> GenParser tok st a
try (String -> GenParser Char () String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"{&}")
          Parser (GenRegex s)
forall s. StringLike s => Parser (GenRegex s)
seqListExt

seqListExt      :: StringLike s => Parser (GenRegex s)
seqListExt :: forall s. StringLike s => Parser (GenRegex s)
seqListExt      = Parser (GenRegex s) -> Parser (GenRegex s) -> Parser (GenRegex s)
forall s.
StringLike s =>
Parser (GenRegex s) -> Parser (GenRegex s) -> Parser (GenRegex s)
seqList' Parser (GenRegex s)
forall s. StringLike s => Parser (GenRegex s)
regExpLabel Parser (GenRegex s)
forall s. StringLike s => Parser (GenRegex s)
multiCharEscExt

seqListStd      :: StringLike s => Parser (GenRegex s)
seqListStd :: forall s. StringLike s => Parser (GenRegex s)
seqListStd      = Parser (GenRegex s) -> Parser (GenRegex s) -> Parser (GenRegex s)
forall s.
StringLike s =>
Parser (GenRegex s) -> Parser (GenRegex s) -> Parser (GenRegex s)
seqList' Parser (GenRegex s)
forall s. StringLike s => Parser (GenRegex s)
regExpStd Parser (GenRegex s)
forall s. StringLike s => Parser (GenRegex s)
multiCharEsc

seqList'        :: StringLike s => Parser (GenRegex s) -> Parser (GenRegex s) -> Parser (GenRegex s)
seqList' :: forall s.
StringLike s =>
Parser (GenRegex s) -> Parser (GenRegex s) -> Parser (GenRegex s)
seqList' Parser (GenRegex s)
regExp' Parser (GenRegex s)
multiCharEsc'
    = do
      [GenRegex s]
rs <- Parser (GenRegex s) -> ParsecT String () Identity [GenRegex s]
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many Parser (GenRegex s)
piece
      GenRegex s -> Parser (GenRegex s)
forall a. a -> ParsecT String () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return (GenRegex s -> Parser (GenRegex s))
-> GenRegex s -> Parser (GenRegex s)
forall a b. (a -> b) -> a -> b
$ [GenRegex s] -> GenRegex s
forall s. [GenRegex s] -> GenRegex s
mkSeqs [GenRegex s]
rs
    where
    -- piece :: StringLike s => Parser (GenRegex s)
    piece :: Parser (GenRegex s)
piece
        = do
          GenRegex s
r <- Parser (GenRegex s)
atom
          GenRegex s -> Parser (GenRegex s)
forall s. StringLike s => GenRegex s -> Parser (GenRegex s)
quantifier GenRegex s
r

    -- atom :: StringLike s => Parser (GenRegex s)
    atom :: Parser (GenRegex s)
atom
        = Parser (GenRegex s)
forall s. StringLike s => Parser (GenRegex s)
char1
          Parser (GenRegex s) -> Parser (GenRegex s) -> Parser (GenRegex s)
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
          Parser (GenRegex s)
charClass
          Parser (GenRegex s) -> Parser (GenRegex s) -> Parser (GenRegex s)
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
          ParsecT String () Identity Char
-> ParsecT String () Identity Char
-> Parser (GenRegex s)
-> Parser (GenRegex s)
forall s (m :: * -> *) t u open close a.
Stream s m t =>
ParsecT s u m open
-> ParsecT s u m close -> ParsecT s u m a -> ParsecT s u m a
between (Char -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'(') (Char -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
')') Parser (GenRegex s)
regExp'

    -- charClass :: StringLike s => Parser (GenRegex s)
    charClass :: Parser (GenRegex s)
charClass
        = Parser (GenRegex s) -> Parser (GenRegex s)
forall s.
StringLike s =>
Parser (GenRegex s) -> Parser (GenRegex s)
charClassEsc Parser (GenRegex s)
multiCharEsc'
          Parser (GenRegex s) -> Parser (GenRegex s) -> Parser (GenRegex s)
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
          Parser (GenRegex s) -> Parser (GenRegex s)
forall s.
StringLike s =>
Parser (GenRegex s) -> Parser (GenRegex s)
charClassExpr Parser (GenRegex s)
multiCharEsc'
          Parser (GenRegex s) -> Parser (GenRegex s) -> Parser (GenRegex s)
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
          Parser (GenRegex s)
forall s. StringLike s => Parser (GenRegex s)
wildCardEsc



quantifier      :: StringLike s => GenRegex s -> Parser (GenRegex s)
quantifier :: forall s. StringLike s => GenRegex s -> Parser (GenRegex s)
quantifier GenRegex s
r
    = ( do
        Char
_ <- Char -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'?'
        GenRegex s -> ParsecT String () Identity (GenRegex s)
forall a. a -> ParsecT String () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return (GenRegex s -> ParsecT String () Identity (GenRegex s))
-> GenRegex s -> ParsecT String () Identity (GenRegex s)
forall a b. (a -> b) -> a -> b
$ GenRegex s -> GenRegex s
forall s. StringLike s => GenRegex s -> GenRegex s
mkOpt GenRegex s
r )
      ParsecT String () Identity (GenRegex s)
-> ParsecT String () Identity (GenRegex s)
-> ParsecT String () Identity (GenRegex s)
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
      ( do
        Char
_ <- Char -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'*'
        GenRegex s -> ParsecT String () Identity (GenRegex s)
forall a. a -> ParsecT String () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return (GenRegex s -> ParsecT String () Identity (GenRegex s))
-> GenRegex s -> ParsecT String () Identity (GenRegex s)
forall a b. (a -> b) -> a -> b
$ GenRegex s -> GenRegex s
forall s. StringLike s => GenRegex s -> GenRegex s
mkStar GenRegex s
r )
      ParsecT String () Identity (GenRegex s)
-> ParsecT String () Identity (GenRegex s)
-> ParsecT String () Identity (GenRegex s)
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
      ( do
        Char
_ <- Char -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'+'
        GenRegex s -> ParsecT String () Identity (GenRegex s)
forall a. a -> ParsecT String () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return (GenRegex s -> ParsecT String () Identity (GenRegex s))
-> GenRegex s -> ParsecT String () Identity (GenRegex s)
forall a b. (a -> b) -> a -> b
$ Int -> GenRegex s -> GenRegex s
forall s. StringLike s => Int -> GenRegex s -> GenRegex s
mkRep Int
1 GenRegex s
r )
      ParsecT String () Identity (GenRegex s)
-> ParsecT String () Identity (GenRegex s)
-> ParsecT String () Identity (GenRegex s)
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
      ParsecT String () Identity (GenRegex s)
-> ParsecT String () Identity (GenRegex s)
forall tok st a. GenParser tok st a -> GenParser tok st a
try ( do
            Char
_ <- Char -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'{'
            GenRegex s
res <- GenRegex s -> ParsecT String () Identity (GenRegex s)
forall s. StringLike s => GenRegex s -> Parser (GenRegex s)
quantity GenRegex s
r
            Char
_ <- Char -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'}'
            GenRegex s -> ParsecT String () Identity (GenRegex s)
forall a. a -> ParsecT String () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return GenRegex s
res
          )
      ParsecT String () Identity (GenRegex s)
-> ParsecT String () Identity (GenRegex s)
-> ParsecT String () Identity (GenRegex s)
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
      ( GenRegex s -> ParsecT String () Identity (GenRegex s)
forall a. a -> ParsecT String () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return GenRegex s
r )

quantity        :: StringLike s => GenRegex s -> Parser (GenRegex s)
quantity :: forall s. StringLike s => GenRegex s -> Parser (GenRegex s)
quantity GenRegex s
r
    = do
      String
lb <- ParsecT String () Identity Char -> GenParser Char () String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ParsecT String () Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit
      GenRegex s -> Int -> Parser (GenRegex s)
forall s. StringLike s => GenRegex s -> Int -> Parser (GenRegex s)
quantityRest GenRegex s
r (String -> Int
forall a. Read a => String -> a
read String
lb)

quantityRest    :: StringLike s => GenRegex s -> Int -> Parser (GenRegex s)
quantityRest :: forall s. StringLike s => GenRegex s -> Int -> Parser (GenRegex s)
quantityRest GenRegex s
r Int
lb
    = ( do
        Char
_ <- Char -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
','
        String
ub <- ParsecT String () Identity Char -> GenParser Char () String
forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many ParsecT String () Identity Char
forall s (m :: * -> *) u. Stream s m Char => ParsecT s u m Char
digit
        GenRegex s -> ParsecT String () Identity (GenRegex s)
forall a. a -> ParsecT String () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return ( if String -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
ub
                 then Int -> GenRegex s -> GenRegex s
forall s. StringLike s => Int -> GenRegex s -> GenRegex s
mkRep Int
lb GenRegex s
r
                 else Int -> Int -> GenRegex s -> GenRegex s
forall s. StringLike s => Int -> Int -> GenRegex s -> GenRegex s
mkRng Int
lb (String -> Int
forall a. Read a => String -> a
read String
ub) GenRegex s
r
               )
      )
      ParsecT String () Identity (GenRegex s)
-> ParsecT String () Identity (GenRegex s)
-> ParsecT String () Identity (GenRegex s)
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
      ( GenRegex s -> ParsecT String () Identity (GenRegex s)
forall a. a -> ParsecT String () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return (GenRegex s -> ParsecT String () Identity (GenRegex s))
-> GenRegex s -> ParsecT String () Identity (GenRegex s)
forall a b. (a -> b) -> a -> b
$ Int -> Int -> GenRegex s -> GenRegex s
forall s. StringLike s => Int -> Int -> GenRegex s -> GenRegex s
mkRng Int
lb Int
lb GenRegex s
r)

regExpLabel :: StringLike s => Parser (GenRegex s)
regExpLabel :: forall s. StringLike s => Parser (GenRegex s)
regExpLabel
    = do
      GenRegex s -> GenRegex s
lab <- (GenRegex s -> GenRegex s)
-> ParsecT String () Identity (GenRegex s -> GenRegex s)
-> ParsecT String () Identity (GenRegex s -> GenRegex s)
forall s (m :: * -> *) t a u.
Stream s m t =>
a -> ParsecT s u m a -> ParsecT s u m a
option GenRegex s -> GenRegex s
forall a. a -> a
id (ParsecT String () Identity Char
-> ParsecT String () Identity Char
-> ParsecT String () Identity (GenRegex s -> GenRegex s)
-> ParsecT String () Identity (GenRegex s -> GenRegex s)
forall s (m :: * -> *) t u open close a.
Stream s m t =>
ParsecT s u m open
-> ParsecT s u m close -> ParsecT s u m a -> ParsecT s u m a
between (Char -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'{') (Char -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'}') ParsecT String () Identity (GenRegex s -> GenRegex s)
forall {u}. ParsecT String u Identity (GenRegex s -> GenRegex s)
label')
      GenRegex s
r    <- Parser (GenRegex s)
forall s. StringLike s => Parser (GenRegex s)
regExpExt
      GenRegex s -> Parser (GenRegex s)
forall a. a -> ParsecT String () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return (GenRegex s -> Parser (GenRegex s))
-> GenRegex s -> Parser (GenRegex s)
forall a b. (a -> b) -> a -> b
$ GenRegex s -> GenRegex s
lab GenRegex s
r
    where
    label' :: ParsecT String u Identity (GenRegex s -> GenRegex s)
label'
        = do
          String
l <- ParsecT String u Identity Char -> ParsecT String u Identity String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ((Char -> Bool) -> ParsecT String u Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
(Char -> Bool) -> ParsecT s u m Char
satisfy Char -> Bool
isXmlNameChar)
          (GenRegex s -> GenRegex s)
-> ParsecT String u Identity (GenRegex s -> GenRegex s)
forall a. a -> ParsecT String u Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return ((GenRegex s -> GenRegex s)
 -> ParsecT String u Identity (GenRegex s -> GenRegex s))
-> (GenRegex s -> GenRegex s)
-> ParsecT String u Identity (GenRegex s -> GenRegex s)
forall a b. (a -> b) -> a -> b
$ String -> GenRegex s -> GenRegex s
forall s. StringLike s => String -> GenRegex s -> GenRegex s
mkBr' String
l

char1   :: StringLike s => Parser (GenRegex s)
char1 :: forall s. StringLike s => Parser (GenRegex s)
char1
    = do
      Char
c <- (Char -> Bool) -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
(Char -> Bool) -> ParsecT s u m Char
satisfy (Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` String
".\\?*+{}()|[]")
      GenRegex s -> Parser (GenRegex s)
forall a. a -> ParsecT String () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return (GenRegex s -> Parser (GenRegex s))
-> GenRegex s -> Parser (GenRegex s)
forall a b. (a -> b) -> a -> b
$ Char -> GenRegex s
forall s. StringLike s => Char -> GenRegex s
mkSym1 Char
c

charClassEsc    :: StringLike s => Parser (GenRegex s) -> Parser (GenRegex s)
charClassEsc :: forall s.
StringLike s =>
Parser (GenRegex s) -> Parser (GenRegex s)
charClassEsc Parser (GenRegex s)
multiCharEsc'
    = do
      Char
_ <- Char -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'\\'
      ( Parser (GenRegex s)
forall s. StringLike s => Parser (GenRegex s)
singleCharEsc
        Parser (GenRegex s) -> Parser (GenRegex s) -> Parser (GenRegex s)
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
        Parser (GenRegex s)
multiCharEsc'
        Parser (GenRegex s) -> Parser (GenRegex s) -> Parser (GenRegex s)
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
        Parser (GenRegex s)
forall s. StringLike s => Parser (GenRegex s)
catEsc
        Parser (GenRegex s) -> Parser (GenRegex s) -> Parser (GenRegex s)
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
        Parser (GenRegex s)
forall s. StringLike s => Parser (GenRegex s)
complEsc )

singleCharEsc   :: StringLike s => Parser (GenRegex s)
singleCharEsc :: forall s. StringLike s => Parser (GenRegex s)
singleCharEsc
    = do
      Char
c <- ParsecT String () Identity Char
singleCharEsc'
      GenRegex s -> Parser (GenRegex s)
forall a. a -> ParsecT String () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return (GenRegex s -> Parser (GenRegex s))
-> GenRegex s -> Parser (GenRegex s)
forall a b. (a -> b) -> a -> b
$ Char -> GenRegex s
forall s. StringLike s => Char -> GenRegex s
mkSym1 Char
c

singleCharEsc'  :: Parser Char
singleCharEsc' :: ParsecT String () Identity Char
singleCharEsc'
    = do
      Char
c <- (Char -> Bool) -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
(Char -> Bool) -> ParsecT s u m Char
satisfy (Char -> String -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` String
"nrt\\|.?*+(){}-[]^")
      Char -> ParsecT String () Identity Char
forall a. a -> ParsecT String () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return (Char -> ParsecT String () Identity Char)
-> Char -> ParsecT String () Identity Char
forall a b. (a -> b) -> a -> b
$ Char -> (Char -> Char) -> Maybe Char -> Char
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Char
c Char -> Char
forall a. a -> a
id (Maybe Char -> Char) -> (String -> Maybe Char) -> String -> Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> [(Char, Char)] -> Maybe Char
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup Char
c ([(Char, Char)] -> Maybe Char)
-> (String -> [(Char, Char)]) -> String -> Maybe Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> String -> [(Char, Char)]
forall a b. [a] -> [b] -> [(a, b)]
zip String
"ntr" (String -> Char) -> String -> Char
forall a b. (a -> b) -> a -> b
$ String
"\n\r\t"

multiCharEscExt    :: StringLike s => Parser (GenRegex s)
multiCharEscExt :: forall s. StringLike s => Parser (GenRegex s)
multiCharEscExt
    = Parser (GenRegex s)
forall s. StringLike s => Parser (GenRegex s)
multiCharEsc
      Parser (GenRegex s) -> Parser (GenRegex s) -> Parser (GenRegex s)
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
      ( do                      -- extension: \a represents the whole alphabet inclusive newline chars: \a == .|\n|\r
        Char
_ <- Char -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'a'
        GenRegex s -> Parser (GenRegex s)
forall a. a -> ParsecT String () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return GenRegex s
forall s. GenRegex s
mkDot )
      Parser (GenRegex s) -> Parser (GenRegex s) -> Parser (GenRegex s)
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
      ( do                      -- extension: \A represents all words: \A == \a* or \A == (.|\n|\r)*
        Char
_ <- Char -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'A'
        GenRegex s -> Parser (GenRegex s)
forall a. a -> ParsecT String () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return GenRegex s
forall s. StringLike s => GenRegex s
mkAll )

multiCharEsc    :: StringLike s => Parser (GenRegex s)
multiCharEsc :: forall s. StringLike s => Parser (GenRegex s)
multiCharEsc
    = ( do
        Char
c <- (Char -> Bool) -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
(Char -> Bool) -> ParsecT s u m Char
satisfy (Char -> String -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` String
es)
        GenRegex s -> Parser (GenRegex s)
forall a. a -> ParsecT String () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return (GenRegex s -> Parser (GenRegex s))
-> GenRegex s -> Parser (GenRegex s)
forall a b. (a -> b) -> a -> b
$ [(Char, Char)] -> GenRegex s
forall s. StringLike s => [(Char, Char)] -> GenRegex s
mkSym ([(Char, Char)] -> GenRegex s)
-> ([(Char, [(Char, Char)])] -> [(Char, Char)])
-> [(Char, [(Char, Char)])]
-> GenRegex s
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Maybe [(Char, Char)] -> [(Char, Char)]
forall a. HasCallStack => Maybe a -> a
fromJust (Maybe [(Char, Char)] -> [(Char, Char)])
-> ([(Char, [(Char, Char)])] -> Maybe [(Char, Char)])
-> [(Char, [(Char, Char)])]
-> [(Char, Char)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> [(Char, [(Char, Char)])] -> Maybe [(Char, Char)]
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup Char
c ([(Char, [(Char, Char)])] -> GenRegex s)
-> [(Char, [(Char, Char)])] -> GenRegex s
forall a b. (a -> b) -> a -> b
$ [(Char, [(Char, Char)])]
pm )
    where
    es :: String
es = ((Char, [(Char, Char)]) -> Char)
-> [(Char, [(Char, Char)])] -> String
forall a b. (a -> b) -> [a] -> [b]
map (Char, [(Char, Char)]) -> Char
forall a b. (a, b) -> a
fst [(Char, [(Char, Char)])]
pm
    pm :: [(Char, [(Char, Char)])]
pm = [ (Char
's',        [(Char, Char)]
charPropXmlSpaceChar          )
         , (Char
'S', [(Char, Char)] -> [(Char, Char)]
compCS [(Char, Char)]
charPropXmlSpaceChar          )
         , (Char
'i',        [(Char, Char)]
charPropXmlNameStartChar      )
         , (Char
'I', [(Char, Char)] -> [(Char, Char)]
compCS [(Char, Char)]
charPropXmlNameStartChar      )
         , (Char
'c',        [(Char, Char)]
charPropXmlNameChar           )
         , (Char
'C', [(Char, Char)] -> [(Char, Char)]
compCS [(Char, Char)]
charPropXmlNameChar           )
         , (Char
'd',        [(Char, Char)]
charPropDigit                 )
         , (Char
'D', [(Char, Char)] -> [(Char, Char)]
compCS [(Char, Char)]
charPropDigit                 )
         , (Char
'w', [(Char, Char)] -> [(Char, Char)]
compCS [(Char, Char)]
charPropNotWord               )
         , (Char
'W',        [(Char, Char)]
charPropNotWord               )
         ]
    charPropDigit :: [(Char, Char)]
charPropDigit   = Char -> Char -> [(Char, Char)]
rangeCS Char
'0' Char
'9'
    charPropNotWord :: [(Char, Char)]
charPropNotWord = [(Char, Char)]
charPropUnicodeP
                      [(Char, Char)] -> [(Char, Char)] -> [(Char, Char)]
`unionCS`
                      [(Char, Char)]
charPropUnicodeZ
                      [(Char, Char)] -> [(Char, Char)] -> [(Char, Char)]
`unionCS`
                      [(Char, Char)]
charPropUnicodeC

catEsc  :: StringLike s => Parser (GenRegex s)
catEsc :: forall s. StringLike s => Parser (GenRegex s)
catEsc
    = do
      Char
_ <- Char -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'p'
      [(Char, Char)]
s <- ParsecT String () Identity Char
-> ParsecT String () Identity Char
-> ParsecT String () Identity [(Char, Char)]
-> ParsecT String () Identity [(Char, Char)]
forall s (m :: * -> *) t u open close a.
Stream s m t =>
ParsecT s u m open
-> ParsecT s u m close -> ParsecT s u m a -> ParsecT s u m a
between (Char -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'{') (Char -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'}') ParsecT String () Identity [(Char, Char)]
charProp
      GenRegex s -> Parser (GenRegex s)
forall a. a -> ParsecT String () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return (GenRegex s -> Parser (GenRegex s))
-> GenRegex s -> Parser (GenRegex s)
forall a b. (a -> b) -> a -> b
$ [(Char, Char)] -> GenRegex s
forall s. StringLike s => [(Char, Char)] -> GenRegex s
mkSym [(Char, Char)]
s

charProp        :: Parser CharSet
charProp :: ParsecT String () Identity [(Char, Char)]
charProp
    = ParsecT String () Identity [(Char, Char)]
isCategory
      ParsecT String () Identity [(Char, Char)]
-> ParsecT String () Identity [(Char, Char)]
-> ParsecT String () Identity [(Char, Char)]
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
      ParsecT String () Identity [(Char, Char)]
isBlock

isBlock         :: Parser CharSet
isBlock :: ParsecT String () Identity [(Char, Char)]
isBlock
    = do
      String
_ <- String -> GenParser Char () String
forall s (m :: * -> *) u.
Stream s m Char =>
String -> ParsecT s u m String
string String
"Is"
      String
name <- ParsecT String () Identity Char -> GenParser Char () String
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 ((Char -> Bool) -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
(Char -> Bool) -> ParsecT s u m Char
satisfy Char -> Bool
legalChar)
      case String -> [(String, (Char, Char))] -> Maybe (Char, Char)
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup String
name [(String, (Char, Char))]
codeBlocks of
        Just (Char, Char)
b  -> [(Char, Char)] -> ParsecT String () Identity [(Char, Char)]
forall a. a -> ParsecT String () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return ([(Char, Char)] -> ParsecT String () Identity [(Char, Char)])
-> [(Char, Char)] -> ParsecT String () Identity [(Char, Char)]
forall a b. (a -> b) -> a -> b
$ (Char -> Char -> [(Char, Char)]) -> (Char, Char) -> [(Char, Char)]
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry Char -> Char -> [(Char, Char)]
rangeCS (Char, Char)
b
        Maybe (Char, Char)
Nothing -> String -> ParsecT String () Identity [(Char, Char)]
forall a. String -> ParsecT String () Identity a
forall (m :: * -> *) a. MonadFail m => String -> m a
fail (String -> ParsecT String () Identity [(Char, Char)])
-> String -> ParsecT String () Identity [(Char, Char)]
forall a b. (a -> b) -> a -> b
$ String
"unknown Unicode code block " String -> String -> String
forall a. [a] -> [a] -> [a]
++ String -> String
forall a. Show a => a -> String
show String
name
    where
    legalChar :: Char -> Bool
legalChar Char
c  = Char
'A' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'Z' Bool -> Bool -> Bool
||
                   Char
'a' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'z' Bool -> Bool -> Bool
||
                   Char
'0' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'9' Bool -> Bool -> Bool
||
                   Char
'-' Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
c

isCategory      :: Parser CharSet
isCategory :: ParsecT String () Identity [(Char, Char)]
isCategory
    = do
      String
pr <- GenParser Char () String
isCategory'
      [(Char, Char)] -> ParsecT String () Identity [(Char, Char)]
forall a. a -> ParsecT String () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return ([(Char, Char)] -> ParsecT String () Identity [(Char, Char)])
-> [(Char, Char)] -> ParsecT String () Identity [(Char, Char)]
forall a b. (a -> b) -> a -> b
$ Maybe [(Char, Char)] -> [(Char, Char)]
forall a. HasCallStack => Maybe a -> a
fromJust (String -> [(String, [(Char, Char)])] -> Maybe [(Char, Char)]
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup String
pr [(String, [(Char, Char)])]
categories)

categories      :: [(String, CharSet)]
categories :: [(String, [(Char, Char)])]
categories
    = [ (String
"C",  [(Char, Char)]
charPropUnicodeC )
      , (String
"Cc", [(Char, Char)]
charPropUnicodeCc)
      , (String
"Cf", [(Char, Char)]
charPropUnicodeCf)
      , (String
"Co", [(Char, Char)]
charPropUnicodeCo)
      , (String
"Cs", [(Char, Char)]
charPropUnicodeCs)
      , (String
"L",  [(Char, Char)]
charPropUnicodeL )
      , (String
"Ll", [(Char, Char)]
charPropUnicodeLl)
      , (String
"Lm", [(Char, Char)]
charPropUnicodeLm)
      , (String
"Lo", [(Char, Char)]
charPropUnicodeLo)
      , (String
"Lt", [(Char, Char)]
charPropUnicodeLt)
      , (String
"Lu", [(Char, Char)]
charPropUnicodeLu)
      , (String
"M",  [(Char, Char)]
charPropUnicodeM )
      , (String
"Mc", [(Char, Char)]
charPropUnicodeMc)
      , (String
"Me", [(Char, Char)]
charPropUnicodeMe)
      , (String
"Mn", [(Char, Char)]
charPropUnicodeMn)
      , (String
"N",  [(Char, Char)]
charPropUnicodeN )
      , (String
"Nd", [(Char, Char)]
charPropUnicodeNd)
      , (String
"Nl", [(Char, Char)]
charPropUnicodeNl)
      , (String
"No", [(Char, Char)]
charPropUnicodeNo)
      , (String
"P",  [(Char, Char)]
charPropUnicodeP )
      , (String
"Pc", [(Char, Char)]
charPropUnicodePc)
      , (String
"Pd", [(Char, Char)]
charPropUnicodePd)
      , (String
"Pe", [(Char, Char)]
charPropUnicodePe)
      , (String
"Pf", [(Char, Char)]
charPropUnicodePf)
      , (String
"Pi", [(Char, Char)]
charPropUnicodePi)
      , (String
"Po", [(Char, Char)]
charPropUnicodePo)
      , (String
"Ps", [(Char, Char)]
charPropUnicodePs)
      , (String
"S",  [(Char, Char)]
charPropUnicodeS )
      , (String
"Sc", [(Char, Char)]
charPropUnicodeSc)
      , (String
"Sk", [(Char, Char)]
charPropUnicodeSk)
      , (String
"Sm", [(Char, Char)]
charPropUnicodeSm)
      , (String
"So", [(Char, Char)]
charPropUnicodeSo)
      , (String
"Z",  [(Char, Char)]
charPropUnicodeZ )
      , (String
"Zl", [(Char, Char)]
charPropUnicodeZl)
      , (String
"Zp", [(Char, Char)]
charPropUnicodeZp)
      , (String
"Zs", [(Char, Char)]
charPropUnicodeZs)
      ]

isCategory'     :: Parser String
isCategory' :: GenParser Char () String
isCategory'
    = ( (GenParser Char () String
 -> GenParser Char () String -> GenParser Char () String)
-> [GenParser Char () String] -> GenParser Char () String
forall a. (a -> a -> a) -> [a] -> a
forall (t :: * -> *) a. Foldable t => (a -> a -> a) -> t a -> a
foldr1 GenParser Char () String
-> GenParser Char () String -> GenParser Char () String
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
(<|>) ([GenParser Char () String] -> GenParser Char () String)
-> ([(Char, String)] -> [GenParser Char () String])
-> [(Char, String)]
-> GenParser Char () String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Char, String) -> GenParser Char () String)
-> [(Char, String)] -> [GenParser Char () String]
forall a b. (a -> b) -> [a] -> [b]
map ((Char -> String -> GenParser Char () String)
-> (Char, String) -> GenParser Char () String
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry Char -> String -> GenParser Char () String
forall {s} {m :: * -> *} {t :: * -> *} {u}.
(Stream s m Char, Foldable t) =>
Char -> t Char -> ParsecT s u m String
prop) ([(Char, String)] -> GenParser Char () String)
-> [(Char, String)] -> GenParser Char () String
forall a b. (a -> b) -> a -> b
$
        [ (Char
'L', String
"ultmo")
        , (Char
'M', String
"nce")
        , (Char
'N', String
"dlo")
        , (Char
'P', String
"cdseifo")
        , (Char
'Z', String
"slp")
        , (Char
'S', String
"mcko")
        , (Char
'C', String
"cfon")
        ]
      ) GenParser Char () String -> String -> GenParser Char () String
forall s u (m :: * -> *) a.
ParsecT s u m a -> String -> ParsecT s u m a
<?> String
"illegal Unicode character property"
    where
    prop :: Char -> t Char -> ParsecT s u m String
prop Char
c1 t Char
cs2
        = do
          Char
_ <- Char -> ParsecT s u m Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
c1
          String
s2 <- String -> ParsecT s u m String -> ParsecT s u m String
forall s (m :: * -> *) t a u.
Stream s m t =>
a -> ParsecT s u m a -> ParsecT s u m a
option String
""
                ( do
                  Char
c2 <- (Char -> Bool) -> ParsecT s u m Char
forall s (m :: * -> *) u.
Stream s m Char =>
(Char -> Bool) -> ParsecT s u m Char
satisfy (Char -> t Char -> Bool
forall a. Eq a => a -> t a -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` t Char
cs2)
                  String -> ParsecT s u m String
forall a. a -> ParsecT s u m a
forall (m :: * -> *) a. Monad m => a -> m a
return [Char
c2] )
          String -> ParsecT s u m String
forall a. a -> ParsecT s u m a
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> ParsecT s u m String) -> String -> ParsecT s u m String
forall a b. (a -> b) -> a -> b
$ Char
c1Char -> String -> String
forall a. a -> [a] -> [a]
:String
s2

complEsc        :: StringLike s => Parser (GenRegex s)
complEsc :: forall s. StringLike s => Parser (GenRegex s)
complEsc
    = do
      Char
_ <- Char -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'P'
      [(Char, Char)]
s <- ParsecT String () Identity Char
-> ParsecT String () Identity Char
-> ParsecT String () Identity [(Char, Char)]
-> ParsecT String () Identity [(Char, Char)]
forall s (m :: * -> *) t u open close a.
Stream s m t =>
ParsecT s u m open
-> ParsecT s u m close -> ParsecT s u m a -> ParsecT s u m a
between (Char -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'{') (Char -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'}') ParsecT String () Identity [(Char, Char)]
charProp
      GenRegex s -> Parser (GenRegex s)
forall a. a -> ParsecT String () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return (GenRegex s -> Parser (GenRegex s))
-> GenRegex s -> Parser (GenRegex s)
forall a b. (a -> b) -> a -> b
$ [(Char, Char)] -> GenRegex s
forall s. StringLike s => [(Char, Char)] -> GenRegex s
mkSym ([(Char, Char)] -> GenRegex s) -> [(Char, Char)] -> GenRegex s
forall a b. (a -> b) -> a -> b
$ [(Char, Char)] -> [(Char, Char)]
compCS [(Char, Char)]
s

charClassExpr   :: StringLike s => Parser (GenRegex s) -> Parser (GenRegex s)
charClassExpr :: forall s.
StringLike s =>
Parser (GenRegex s) -> Parser (GenRegex s)
charClassExpr Parser (GenRegex s)
multiCharEsc'
    = ParsecT String () Identity Char
-> ParsecT String () Identity Char
-> Parser (GenRegex s)
-> Parser (GenRegex s)
forall s (m :: * -> *) t u open close a.
Stream s m t =>
ParsecT s u m open
-> ParsecT s u m close -> ParsecT s u m a -> ParsecT s u m a
between (Char -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'[') (Char -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
']') Parser (GenRegex s)
charGroup
    where

    -- charGroup       :: StringLike s => Parser (GenRegex s)
    charGroup :: Parser (GenRegex s)
charGroup
        = do
          GenRegex s
r <- ( Parser (GenRegex s)
negCharGroup       -- a ^ at beginning denotes negation, not start of posCharGroup
                 Parser (GenRegex s) -> Parser (GenRegex s) -> Parser (GenRegex s)
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
                 Parser (GenRegex s)
posCharGroup
               )
          GenRegex s
s <- GenRegex s -> Parser (GenRegex s) -> Parser (GenRegex s)
forall s (m :: * -> *) t a u.
Stream s m t =>
a -> ParsecT s u m a -> ParsecT s u m a
option (String -> GenRegex s
forall s. StringLike s => String -> GenRegex s
mkZero' String
"")   -- charClassSub
               ( do
                 Char
_ <- Char -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'-'
                 Parser (GenRegex s) -> Parser (GenRegex s)
forall s.
StringLike s =>
Parser (GenRegex s) -> Parser (GenRegex s)
charClassExpr Parser (GenRegex s)
multiCharEsc'
               )
          GenRegex s -> Parser (GenRegex s)
forall a. a -> ParsecT String () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return (GenRegex s -> Parser (GenRegex s))
-> GenRegex s -> Parser (GenRegex s)
forall a b. (a -> b) -> a -> b
$ GenRegex s -> GenRegex s -> GenRegex s
forall s. StringLike s => GenRegex s -> GenRegex s -> GenRegex s
mkDiff GenRegex s
r GenRegex s
s

    -- posCharGroup    :: StringLike s => Parser (GenRegex s)
    posCharGroup :: Parser (GenRegex s)
posCharGroup
        = do
          [GenRegex s]
rs <- Parser (GenRegex s) -> ParsecT String () Identity [GenRegex s]
forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 (Parser (GenRegex s)
forall s. StringLike s => Parser (GenRegex s)
charRange Parser (GenRegex s) -> Parser (GenRegex s) -> Parser (GenRegex s)
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> Parser (GenRegex s) -> Parser (GenRegex s)
forall s.
StringLike s =>
Parser (GenRegex s) -> Parser (GenRegex s)
charClassEsc Parser (GenRegex s)
multiCharEsc')
          GenRegex s -> Parser (GenRegex s)
forall a. a -> ParsecT String () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return (GenRegex s -> Parser (GenRegex s))
-> GenRegex s -> Parser (GenRegex s)
forall a b. (a -> b) -> a -> b
$ (GenRegex s -> GenRegex s -> GenRegex s)
-> [GenRegex s] -> GenRegex s
forall a. (a -> a -> a) -> [a] -> a
forall (t :: * -> *) a. Foldable t => (a -> a -> a) -> t a -> a
foldr1 GenRegex s -> GenRegex s -> GenRegex s
forall s. StringLike s => GenRegex s -> GenRegex s -> GenRegex s
mkAlt [GenRegex s]
rs

    -- negCharGroup    :: StringLike s => Parser (GenRegex s)
    negCharGroup :: Parser (GenRegex s)
negCharGroup
        = do
          Char
_ <- Char -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'^'
          GenRegex s
r <- Parser (GenRegex s)
posCharGroup
          GenRegex s -> Parser (GenRegex s)
forall a. a -> ParsecT String () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return (GenRegex s -> Parser (GenRegex s))
-> GenRegex s -> Parser (GenRegex s)
forall a b. (a -> b) -> a -> b
$ GenRegex s -> GenRegex s -> GenRegex s
forall s. StringLike s => GenRegex s -> GenRegex s -> GenRegex s
mkDiff GenRegex s
forall s. GenRegex s
mkDot GenRegex s
r

charRange       :: StringLike s => Parser (GenRegex s)
charRange :: forall s. StringLike s => Parser (GenRegex s)
charRange
    = GenParser Char () (GenRegex s) -> GenParser Char () (GenRegex s)
forall tok st a. GenParser tok st a -> GenParser tok st a
try GenParser Char () (GenRegex s)
forall s. StringLike s => Parser (GenRegex s)
seRange
      GenParser Char () (GenRegex s)
-> GenParser Char () (GenRegex s) -> GenParser Char () (GenRegex s)
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
      GenParser Char () (GenRegex s)
forall s. StringLike s => Parser (GenRegex s)
xmlCharIncDash

seRange :: StringLike s => Parser (GenRegex s)
seRange :: forall s. StringLike s => Parser (GenRegex s)
seRange
    = do
      Char
c1 <- ParsecT String () Identity Char
charOrEsc'
      Char
_ <- Char -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'-'
      Char
c2 <- ParsecT String () Identity Char
charOrEsc'
      GenRegex s -> Parser (GenRegex s)
forall a. a -> ParsecT String () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return (GenRegex s -> Parser (GenRegex s))
-> GenRegex s -> Parser (GenRegex s)
forall a b. (a -> b) -> a -> b
$ Char -> Char -> GenRegex s
forall s. StringLike s => Char -> Char -> GenRegex s
mkSymRng Char
c1 Char
c2

charOrEsc'      :: Parser Char
charOrEsc' :: ParsecT String () Identity Char
charOrEsc'
    = ( do
        Char
_ <- Char -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'\\'
        ParsecT String () Identity Char
singleCharEsc'
      )
      ParsecT String () Identity Char
-> ParsecT String () Identity Char
-> ParsecT String () Identity Char
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
      (Char -> Bool) -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
(Char -> Bool) -> ParsecT s u m Char
satisfy (Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` String
"\\-[]")

xmlCharIncDash  :: StringLike s => Parser (GenRegex s)
xmlCharIncDash :: forall s. StringLike s => Parser (GenRegex s)
xmlCharIncDash
    = GenParser Char () (GenRegex s) -> GenParser Char () (GenRegex s)
forall tok st a. GenParser tok st a -> GenParser tok st a
try ( do                          -- dash is only allowed if not followed by a [, else charGroup differences do not parse correctly
            Char
_ <- Char -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'-'
            ParsecT String () Identity Char -> ParsecT String () Identity ()
forall s (m :: * -> *) t a u.
(Stream s m t, Show a) =>
ParsecT s u m a -> ParsecT s u m ()
notFollowedBy (Char -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'[')
            GenRegex s -> GenParser Char () (GenRegex s)
forall a. a -> ParsecT String () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return (GenRegex s -> GenParser Char () (GenRegex s))
-> GenRegex s -> GenParser Char () (GenRegex s)
forall a b. (a -> b) -> a -> b
$ Char -> GenRegex s
forall s. StringLike s => Char -> GenRegex s
mkSym1 Char
'-'
          )
      GenParser Char () (GenRegex s)
-> GenParser Char () (GenRegex s) -> GenParser Char () (GenRegex s)
forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|>
      ( do
        Char
c <- (Char -> Bool) -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
(Char -> Bool) -> ParsecT s u m Char
satisfy (Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` String
"-\\[]")
        GenRegex s -> GenParser Char () (GenRegex s)
forall a. a -> ParsecT String () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return (GenRegex s -> GenParser Char () (GenRegex s))
-> GenRegex s -> GenParser Char () (GenRegex s)
forall a b. (a -> b) -> a -> b
$ Char -> GenRegex s
forall s. StringLike s => Char -> GenRegex s
mkSym1 Char
c
      )

wildCardEsc     :: StringLike s => Parser (GenRegex s)
wildCardEsc :: forall s. StringLike s => Parser (GenRegex s)
wildCardEsc
    = do
      Char
_ <- Char -> ParsecT String () Identity Char
forall s (m :: * -> *) u.
Stream s m Char =>
Char -> ParsecT s u m Char
char Char
'.'
      GenRegex s -> Parser (GenRegex s)
forall a. a -> ParsecT String () Identity a
forall (m :: * -> *) a. Monad m => a -> m a
return (GenRegex s -> Parser (GenRegex s))
-> ([(Char, Char)] -> GenRegex s)
-> [(Char, Char)]
-> Parser (GenRegex s)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(Char, Char)] -> GenRegex s
forall s. StringLike s => [(Char, Char)] -> GenRegex s
mkSym ([(Char, Char)] -> GenRegex s)
-> ([(Char, Char)] -> [(Char, Char)])
-> [(Char, Char)]
-> GenRegex s
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(Char, Char)] -> [(Char, Char)]
compCS ([(Char, Char)] -> Parser (GenRegex s))
-> [(Char, Char)] -> Parser (GenRegex s)
forall a b. (a -> b) -> a -> b
$ String -> [(Char, Char)]
stringCS String
"\n\r"


-- ------------------------------------------------------------