{-# LANGUAGE CPP #-}
{-# LANGUAGE MultiParamTypeClasses, FunctionalDependencies, FlexibleInstances, TypeSynonymInstances #-}
-----------------------------------------------------------------------------
-- |
-- Module      :  Text.Regex.Base.RegexLike
-- Copyright   :  (c) Chris Kuklewicz 2006
-- SPDX-License-Identifier: BSD-3-Clause
--
-- Maintainer  :  hvr@gnu.org, Andreas Abel
-- Stability   :  stable
-- Portability :  non-portable (MPTC+FD)
--
-- Classes and instances for Regex matching.
--
-- All the /classes/ are declared here, and some common type aliases, and
-- the 'MatchResult' data type.
--
-- The only /instances/ here are for 'Extract' 'String', 'Extract' 'SB.ByteString',
-- and 'Extract' 'ST.Text'. There are no data values.  The 'RegexContext'
-- instances are in "Text.Regex.Base.Context", except for ones which
-- run afoul of a repeated variable (@'RegexContext' regex a a@), which
-- are defined in each modules' String and ByteString modules.
-----------------------------------------------------------------------------

module Text.Regex.Base.RegexLike (
  -- ** Type aliases
  MatchOffset,
  MatchLength,
  MatchArray,
  MatchText,
  -- ** Data types
  MatchResult(..),
  -- ** Classes
  RegexOptions(..),
  RegexMaker(..),
  RegexLike(..),
  RegexContext(..),
  Extract(..),
  AllSubmatches(..),AllTextSubmatches(..),AllMatches(..),AllTextMatches(..)
  ) where

#if !MIN_VERSION_base(4,13,0)
import Control.Monad.Fail as Fail (MonadFail)
#endif
import Data.Array(Array,(!))
import Data.Maybe(isJust)
import qualified Data.ByteString as SB (take,drop,empty,ByteString)
import qualified Data.ByteString.Lazy as LB (take,drop,empty,ByteString)
import qualified Data.Sequence as S(take,drop,empty,Seq)
import qualified Data.Text as ST (take,drop,empty,Text)
import qualified Data.Text.Lazy as LT (take,drop,empty,Text)

-- | 0 based index from start of source, or (-1) for unused
type MatchOffset = Int

-- | non-negative length of a match
type MatchLength = Int

-- | 0 based array, with 0th index indicating the full match.  If the
-- full match location is not available, represent as (0,0).
type MatchArray = Array Int (MatchOffset,MatchLength)
type MatchText source = Array Int (source,(MatchOffset,MatchLength))

-- | This is the same as the type from JRegex.
data MatchResult a = MR {
    forall a. MatchResult a -> a
mrBefore :: a,
    forall a. MatchResult a -> a
mrMatch  :: a,
    forall a. MatchResult a -> a
mrAfter  :: a,
    forall a. MatchResult a -> [a]
mrSubList :: [a],
    forall a. MatchResult a -> Array Int a
mrSubs   :: Array Int a
}


-- | Rather than carry them around separately, the options for how to
-- execute a regex are kept as part of the regex.  There are two types
-- of options.  Those that can only be specified at compilation time
-- and never changed are @compOpt@.  Those that can be changed later and
-- affect how matching is performed are @execOpt@.  The actually types
-- for these depend on the backend.
--
class RegexOptions regex compOpt execOpt
  | regex   -> compOpt execOpt
  , compOpt -> regex execOpt
  , execOpt -> regex compOpt
  where

  -- | No options set at all in the backend.
  blankCompOpt   :: compOpt

  -- | No options set at all in the backend.
  blankExecOpt   :: execOpt

  -- | Reasonable options (extended, caseSensitive, multiline regex).
  defaultCompOpt :: compOpt

  -- | Reasonable options (extended, caseSensitive, multiline regex).
  defaultExecOpt :: execOpt

  -- | Forget old flags and use new ones.
  setExecOpts    :: execOpt -> regex -> regex

  -- | Retrieve the current flags.
  getExecOpts    :: regex -> execOpt


-- | @RegexMaker@ captures the creation of the compiled regular
-- expression from a source type and an option type.  Methods 'makeRegexM' and
-- 'makeRegexM' report parse errors using 'MonadError', usually (@Either
-- String regex@).
--
-- The 'makeRegex' function has a default implementation that depends
-- on 'makeRegexOpts' and uses 'defaultCompOpt' and 'defaultExecOpt'.
-- Similarly for 'makeRegexM' and 'makeRegexOptsM'.
--
-- There are also default implementaions for 'makeRegexOpts' and
-- 'makeRegexOptsM' in terms of each other.  So a minimal instance
-- definition needs to only define one of these, hopefully
-- 'makeRegexOptsM'.
--
class (RegexOptions regex compOpt execOpt) => RegexMaker regex compOpt execOpt source
  | regex -> compOpt execOpt, compOpt -> regex execOpt, execOpt -> regex compOpt where

  -- | Use the 'defaultCompOpt' and 'defaultExecOpt'.
  makeRegex :: source -> regex

  -- | Specify your own options.
  makeRegexOpts :: compOpt -> execOpt -> source -> regex

  -- | Use the 'defaultCompOpt' and 'defaultExecOpt', reporting errors with 'fail'.
  makeRegexM :: (MonadFail m) => source -> m regex

  -- | Specify your own options, reporting errors with fail
  makeRegexOptsM :: (MonadFail m) => compOpt -> execOpt -> source -> m regex

  makeRegex = compOpt -> execOpt -> source -> regex
forall regex compOpt execOpt source.
RegexMaker regex compOpt execOpt source =>
compOpt -> execOpt -> source -> regex
makeRegexOpts compOpt
forall regex compOpt execOpt.
RegexOptions regex compOpt execOpt =>
compOpt
defaultCompOpt execOpt
forall regex compOpt execOpt.
RegexOptions regex compOpt execOpt =>
execOpt
defaultExecOpt
  makeRegexM = compOpt -> execOpt -> source -> m regex
forall regex compOpt execOpt source (m :: * -> *).
(RegexMaker regex compOpt execOpt source, MonadFail m) =>
compOpt -> execOpt -> source -> m regex
forall (m :: * -> *).
MonadFail m =>
compOpt -> execOpt -> source -> m regex
makeRegexOptsM compOpt
forall regex compOpt execOpt.
RegexOptions regex compOpt execOpt =>
compOpt
defaultCompOpt execOpt
forall regex compOpt execOpt.
RegexOptions regex compOpt execOpt =>
execOpt
defaultExecOpt
  makeRegexOpts compOpt
c execOpt
e source
s = regex -> (regex -> regex) -> Maybe regex -> regex
forall b a. b -> (a -> b) -> Maybe a -> b
maybe ([Char] -> regex
forall a. HasCallStack => [Char] -> a
error [Char]
"makeRegexOpts failed") regex -> regex
forall a. a -> a
id (compOpt -> execOpt -> source -> Maybe regex
forall regex compOpt execOpt source (m :: * -> *).
(RegexMaker regex compOpt execOpt source, MonadFail m) =>
compOpt -> execOpt -> source -> m regex
forall (m :: * -> *).
MonadFail m =>
compOpt -> execOpt -> source -> m regex
makeRegexOptsM compOpt
c execOpt
e source
s)
  makeRegexOptsM compOpt
c execOpt
e source
s = regex -> m regex
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return (compOpt -> execOpt -> source -> regex
forall regex compOpt execOpt source.
RegexMaker regex compOpt execOpt source =>
compOpt -> execOpt -> source -> regex
makeRegexOpts compOpt
c execOpt
e source
s)


-- | RegexLike is parametrized on a regular expression type and a
-- source type to run the matching on.
--
-- There are default implementations: 'matchTest' and 'matchOnceText' use
-- 'matchOnce'; 'matchCount' and 'matchAllText' use 'matchAll'.
-- Conversely, 'matchOnce' uses
-- 'matchOnceText' and 'matchAll' uses 'matchAllText'. So a minimal complete
-- instance need to provide at least ('matchOnce' or 'matchOnceText') and
-- ('matchAll' or 'matchAllText').  Additional definitions are often
-- provided where they will increase efficiency.
--
-- > [ c | let notVowel = makeRegex "[^aeiou]" :: Regex, c <- ['a'..'z'], matchTest notVowel [c]  ]
-- >
-- > "bcdfghjklmnpqrstvwxyz"
--
-- The strictness of these functions is instance dependent.
--
class (Extract source) => RegexLike regex source where

  -- | This returns the first match in the source (it checks the whole
  -- source, not just at the start). This returns an array of
  -- (offset,length) index pairs for the match and captured
  -- substrings.  The offset is 0-based.  A (-1) for an offset means a
  -- failure to match.  The lower bound of the array is 0, and the 0th
  -- element is the (offset,length) for the whole match.
  matchOnce  :: regex -> source -> Maybe MatchArray

  -- | @matchAll@ returns a list of matches.  The matches are in order
  -- and do not overlap. If any match succeeds but has 0 length then
  -- this will be the last match in the list.
  matchAll   :: regex -> source -> [MatchArray]

  -- | @matchCount@ returns the number of non-overlapping matches
  -- returned by @matchAll@.
  matchCount :: regex -> source -> Int

  -- | @matchTest@ returns @True@ if there is a match somewhere in the
  -- source (it checks the whole source not just at the start).
  matchTest  :: regex -> source -> Bool

  -- | This is @matchAll@ with the actual subsections of the source
  -- instead of just the (offset,length) information.
  matchAllText  :: regex -> source -> [MatchText source]

  -- | This can return a tuple of three items: the source before the
  -- match, an array of the match and captured substrings (with their
  -- indices), and the source after the match.
  matchOnceText :: regex -> source -> Maybe (source, MatchText source, source)

  matchAll regex
regex source
source = (Array Int (source, (Int, Int)) -> MatchArray)
-> [Array Int (source, (Int, Int))] -> [MatchArray]
forall a b. (a -> b) -> [a] -> [b]
map (((source, (Int, Int)) -> (Int, Int))
-> Array Int (source, (Int, Int)) -> MatchArray
forall a b. (a -> b) -> Array Int a -> Array Int b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (source, (Int, Int)) -> (Int, Int)
forall a b. (a, b) -> b
snd) (regex -> source -> [Array Int (source, (Int, Int))]
forall regex source.
RegexLike regex source =>
regex -> source -> [Array Int (source, (Int, Int))]
matchAllText regex
regex source
source)
  matchOnce regex
regex source
source = ((source, Array Int (source, (Int, Int)), source) -> MatchArray)
-> Maybe (source, Array Int (source, (Int, Int)), source)
-> Maybe MatchArray
forall a b. (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\(source
_,Array Int (source, (Int, Int))
mt,source
_) -> ((source, (Int, Int)) -> (Int, Int))
-> Array Int (source, (Int, Int)) -> MatchArray
forall a b. (a -> b) -> Array Int a -> Array Int b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (source, (Int, Int)) -> (Int, Int)
forall a b. (a, b) -> b
snd Array Int (source, (Int, Int))
mt) (regex
-> source -> Maybe (source, Array Int (source, (Int, Int)), source)
forall regex source.
RegexLike regex source =>
regex
-> source -> Maybe (source, Array Int (source, (Int, Int)), source)
matchOnceText regex
regex source
source)
  matchTest regex
regex source
source = Maybe MatchArray -> Bool
forall a. Maybe a -> Bool
isJust (regex -> source -> Maybe MatchArray
forall regex source.
RegexLike regex source =>
regex -> source -> Maybe MatchArray
matchOnce regex
regex source
source)
  matchCount regex
regex source
source = [MatchArray] -> Int
forall a. [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length (regex -> source -> [MatchArray]
forall regex source.
RegexLike regex source =>
regex -> source -> [MatchArray]
matchAll regex
regex source
source)
  matchOnceText regex
regex source
source =
    (MatchArray -> (source, Array Int (source, (Int, Int)), source))
-> Maybe MatchArray
-> Maybe (source, Array Int (source, (Int, Int)), source)
forall a b. (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\MatchArray
ma -> let (Int
o,Int
l) = MatchArray
ma MatchArray -> Int -> (Int, Int)
forall i e. Ix i => Array i e -> i -> e
! Int
0
                 in (Int -> source -> source
forall source. Extract source => Int -> source -> source
before Int
o source
source
                    ,((Int, Int) -> (source, (Int, Int)))
-> MatchArray -> Array Int (source, (Int, Int))
forall a b. (a -> b) -> Array Int a -> Array Int b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\(Int, Int)
ol -> ((Int, Int) -> source -> source
forall source. Extract source => (Int, Int) -> source -> source
extract (Int, Int)
ol source
source,(Int, Int)
ol)) MatchArray
ma
                    ,Int -> source -> source
forall source. Extract source => Int -> source -> source
after (Int
oInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
l) source
source))
         (regex -> source -> Maybe MatchArray
forall regex source.
RegexLike regex source =>
regex -> source -> Maybe MatchArray
matchOnce regex
regex source
source)
  matchAllText regex
regex source
source =
    (MatchArray -> Array Int (source, (Int, Int)))
-> [MatchArray] -> [Array Int (source, (Int, Int))]
forall a b. (a -> b) -> [a] -> [b]
map (((Int, Int) -> (source, (Int, Int)))
-> MatchArray -> Array Int (source, (Int, Int))
forall a b. (a -> b) -> Array Int a -> Array Int b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\(Int, Int)
ol -> ((Int, Int) -> source -> source
forall source. Extract source => (Int, Int) -> source -> source
extract (Int, Int)
ol source
source,(Int, Int)
ol)))
        (regex -> source -> [MatchArray]
forall regex source.
RegexLike regex source =>
regex -> source -> [MatchArray]
matchAll regex
regex source
source)


-- | @RegexContext@ is the polymorphic interface to do matching.  Since
-- 'target' is polymorphic you may need to supply the type explicitly
-- in contexts where it cannot be inferred.
--
-- The monadic 'matchM' version uses 'fail' to report when the 'regex'
-- has no match in 'source'.  Two examples:
--
-- Here the contest 'Bool' is inferred:
--
-- > [ c | let notVowel = makeRegex "[^aeiou]" :: Regex, c <- ['a'..'z'], match notVowel [c]  ]
-- >
-- > "bcdfghjklmnpqrstvwxyz"
--
-- Here the context @[String]@ must be supplied:
--
-- > let notVowel = (makeRegex "[^aeiou]" :: Regex )
-- > in do { c <- ['a'..'z'] ; matchM notVowel [c] } :: [String]
-- >
-- > ["b","c","d","f","g","h","j","k","l","m","n","p","q","r","s","t","v","w","x","y","z"]
--
class (RegexLike regex source) => RegexContext regex source target where
  match :: regex -> source -> target
  matchM :: (MonadFail m) => regex -> source -> m target


-- | Extract allows for indexing operations on 'String' or 'ByteString'.
--
class Extract source where

  -- | @before@ is a renamed 'take'.
  before :: Int -> source -> source

  -- | @after@ is a renamed 'drop'.
  after :: Int -> source -> source

  -- | When there is no match, this can construct an empty data value.
  empty :: source

  -- | @extract@ takes an offset and length, and has this default implementation:
  --
  -- @
  --   extract (off, len) source = before len (after off source)
  -- @
  extract :: (Int,Int) -> source -> source
  extract (Int
off,Int
len) source
source = Int -> source -> source
forall source. Extract source => Int -> source -> source
before Int
len (Int -> source -> source
forall source. Extract source => Int -> source -> source
after Int
off source
source)

instance Extract String where
  before :: Int -> [Char] -> [Char]
before =  Int -> [Char] -> [Char]
forall a. Int -> [a] -> [a]
take; after :: Int -> [Char] -> [Char]
after = Int -> [Char] -> [Char]
forall a. Int -> [a] -> [a]
drop; empty :: [Char]
empty = []

instance Extract SB.ByteString where
  before :: Int -> ByteString -> ByteString
before = Int -> ByteString -> ByteString
SB.take; after :: Int -> ByteString -> ByteString
after = Int -> ByteString -> ByteString
SB.drop; empty :: ByteString
empty = ByteString
SB.empty

instance Extract LB.ByteString where
  before :: Int -> ByteString -> ByteString
before = Int64 -> ByteString -> ByteString
LB.take (Int64 -> ByteString -> ByteString)
-> (Int -> Int64) -> Int -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Int64
forall a. Enum a => Int -> a
toEnum; after :: Int -> ByteString -> ByteString
after = Int64 -> ByteString -> ByteString
LB.drop (Int64 -> ByteString -> ByteString)
-> (Int -> Int64) -> Int -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Int64
forall a. Enum a => Int -> a
toEnum; empty :: ByteString
empty = ByteString
LB.empty

instance Extract (S.Seq a) where
  before :: Int -> Seq a -> Seq a
before = Int -> Seq a -> Seq a
forall a. Int -> Seq a -> Seq a
S.take; after :: Int -> Seq a -> Seq a
after = Int -> Seq a -> Seq a
forall a. Int -> Seq a -> Seq a
S.drop; empty :: Seq a
empty = Seq a
forall a. Seq a
S.empty

-- | @since 0.94.0.0
instance Extract ST.Text where
  before :: Int -> Text -> Text
before = Int -> Text -> Text
ST.take; after :: Int -> Text -> Text
after = Int -> Text -> Text
ST.drop; empty :: Text
empty = Text
ST.empty

-- | @since 0.94.0.0
instance Extract LT.Text where
  before :: Int -> Text -> Text
before = Int64 -> Text -> Text
LT.take (Int64 -> Text -> Text) -> (Int -> Int64) -> Int -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Int64
forall a. Enum a => Int -> a
toEnum; after :: Int -> Text -> Text
after = Int64 -> Text -> Text
LT.drop (Int64 -> Text -> Text) -> (Int -> Int64) -> Int -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Int64
forall a. Enum a => Int -> a
toEnum; empty :: Text
empty = Text
LT.empty

-- | Used in results of 'RegexContext' instances.
newtype AllSubmatches f b = AllSubmatches {forall (f :: * -> *) b. AllSubmatches f b -> f b
getAllSubmatches :: (f b)}

-- | Used in results of 'RegexContext' instances.
newtype AllTextSubmatches f b = AllTextSubmatches {forall (f :: * -> *) b. AllTextSubmatches f b -> f b
getAllTextSubmatches :: (f b)}

-- | Used in results of 'RegexContext' instances.
newtype AllMatches f b = AllMatches {forall (f :: * -> *) b. AllMatches f b -> f b
getAllMatches :: (f b)}

-- | Used in results of 'RegexContext' instances.
newtype AllTextMatches f b = AllTextMatches {forall (f :: * -> *) b. AllTextMatches f b -> f b
getAllTextMatches :: (f b) }