{-# LANGUAGE CPP #-}
-----------------------------------------------------------------------------
-- |
-- Module      :  Distribution.Fields.LexerMonad
-- License     :  BSD3
--
-- Maintainer  :  cabal-devel@haskell.org
-- Portability :  portable
module Distribution.Fields.LexerMonad (
    InputStream,
    LexState(..),
    LexResult(..),

    Lex(..),
    execLexer,

    getPos,
    setPos,
    adjustPos,

    getInput,
    setInput,

    getStartCode,
    setStartCode,

    LexWarning(..),
    LexWarningType(..),
    addWarning,
    toPWarnings,

  ) where

import qualified Data.ByteString              as B
import qualified Data.List.NonEmpty           as NE
import           Distribution.Compat.Prelude
import           Distribution.Parsec.Position (Position (..), showPos)
import           Distribution.Parsec.Warning  (PWarnType (..), PWarning (..))
import           Prelude ()

import qualified Data.Map.Strict as Map

#ifdef CABAL_PARSEC_DEBUG
-- testing only:
import qualified Data.Text          as T
import qualified Data.Text.Encoding as T
import qualified Data.Vector        as V
#endif

-- simple state monad
newtype Lex a = Lex { forall a. Lex a -> LexState -> LexResult a
unLex :: LexState -> LexResult a }

instance Functor Lex where
  fmap :: forall a b. (a -> b) -> Lex a -> Lex b
fmap = (a -> b) -> Lex a -> Lex b
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM

instance Applicative Lex where
  pure :: forall a. a -> Lex a
pure = a -> Lex a
forall a. a -> Lex a
returnLex
  <*> :: forall a b. Lex (a -> b) -> Lex a -> Lex b
(<*>) = Lex (a -> b) -> Lex a -> Lex b
forall (m :: * -> *) a b. Monad m => m (a -> b) -> m a -> m b
ap

instance Monad Lex where
  return :: forall a. a -> Lex a
return = a -> Lex a
forall a. a -> Lex a
forall (f :: * -> *) a. Applicative f => a -> f a
pure
  >>= :: forall a b. Lex a -> (a -> Lex b) -> Lex b
(>>=)  = Lex a -> (a -> Lex b) -> Lex b
forall a b. Lex a -> (a -> Lex b) -> Lex b
thenLex

data LexResult a = LexResult {-# UNPACK #-} !LexState a

data LexWarningType
    = LexWarningNBSP  -- ^ Encountered non breaking space
    | LexWarningBOM   -- ^ BOM at the start of the cabal file
    | LexWarningTab   -- ^ Leading tags
  deriving (LexWarningType -> LexWarningType -> Bool
(LexWarningType -> LexWarningType -> Bool)
-> (LexWarningType -> LexWarningType -> Bool) -> Eq LexWarningType
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: LexWarningType -> LexWarningType -> Bool
== :: LexWarningType -> LexWarningType -> Bool
$c/= :: LexWarningType -> LexWarningType -> Bool
/= :: LexWarningType -> LexWarningType -> Bool
Eq, Eq LexWarningType
Eq LexWarningType
-> (LexWarningType -> LexWarningType -> Ordering)
-> (LexWarningType -> LexWarningType -> Bool)
-> (LexWarningType -> LexWarningType -> Bool)
-> (LexWarningType -> LexWarningType -> Bool)
-> (LexWarningType -> LexWarningType -> Bool)
-> (LexWarningType -> LexWarningType -> LexWarningType)
-> (LexWarningType -> LexWarningType -> LexWarningType)
-> Ord LexWarningType
LexWarningType -> LexWarningType -> Bool
LexWarningType -> LexWarningType -> Ordering
LexWarningType -> LexWarningType -> LexWarningType
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
$ccompare :: LexWarningType -> LexWarningType -> Ordering
compare :: LexWarningType -> LexWarningType -> Ordering
$c< :: LexWarningType -> LexWarningType -> Bool
< :: LexWarningType -> LexWarningType -> Bool
$c<= :: LexWarningType -> LexWarningType -> Bool
<= :: LexWarningType -> LexWarningType -> Bool
$c> :: LexWarningType -> LexWarningType -> Bool
> :: LexWarningType -> LexWarningType -> Bool
$c>= :: LexWarningType -> LexWarningType -> Bool
>= :: LexWarningType -> LexWarningType -> Bool
$cmax :: LexWarningType -> LexWarningType -> LexWarningType
max :: LexWarningType -> LexWarningType -> LexWarningType
$cmin :: LexWarningType -> LexWarningType -> LexWarningType
min :: LexWarningType -> LexWarningType -> LexWarningType
Ord, Int -> LexWarningType -> ShowS
[LexWarningType] -> ShowS
LexWarningType -> String
(Int -> LexWarningType -> ShowS)
-> (LexWarningType -> String)
-> ([LexWarningType] -> ShowS)
-> Show LexWarningType
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> LexWarningType -> ShowS
showsPrec :: Int -> LexWarningType -> ShowS
$cshow :: LexWarningType -> String
show :: LexWarningType -> String
$cshowList :: [LexWarningType] -> ShowS
showList :: [LexWarningType] -> ShowS
Show)

data LexWarning = LexWarning                !LexWarningType
                             {-# UNPACK #-} !Position
  deriving (Int -> LexWarning -> ShowS
[LexWarning] -> ShowS
LexWarning -> String
(Int -> LexWarning -> ShowS)
-> (LexWarning -> String)
-> ([LexWarning] -> ShowS)
-> Show LexWarning
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> LexWarning -> ShowS
showsPrec :: Int -> LexWarning -> ShowS
$cshow :: LexWarning -> String
show :: LexWarning -> String
$cshowList :: [LexWarning] -> ShowS
showList :: [LexWarning] -> ShowS
Show)

toPWarnings :: [LexWarning] -> [PWarning]
toPWarnings :: [LexWarning] -> [PWarning]
toPWarnings
    = ((LexWarningType, NonEmpty Position) -> PWarning)
-> [(LexWarningType, NonEmpty Position)] -> [PWarning]
forall a b. (a -> b) -> [a] -> [b]
map ((LexWarningType -> NonEmpty Position -> PWarning)
-> (LexWarningType, NonEmpty Position) -> PWarning
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry LexWarningType -> NonEmpty Position -> PWarning
toWarning)
    ([(LexWarningType, NonEmpty Position)] -> [PWarning])
-> ([LexWarning] -> [(LexWarningType, NonEmpty Position)])
-> [LexWarning]
-> [PWarning]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Map LexWarningType (NonEmpty Position)
-> [(LexWarningType, NonEmpty Position)]
forall k a. Map k a -> [(k, a)]
Map.toList
    (Map LexWarningType (NonEmpty Position)
 -> [(LexWarningType, NonEmpty Position)])
-> ([LexWarning] -> Map LexWarningType (NonEmpty Position))
-> [LexWarning]
-> [(LexWarningType, NonEmpty Position)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (NonEmpty Position -> NonEmpty Position -> NonEmpty Position)
-> [(LexWarningType, NonEmpty Position)]
-> Map LexWarningType (NonEmpty Position)
forall k a. Ord k => (a -> a -> a) -> [(k, a)] -> Map k a
Map.fromListWith NonEmpty Position -> NonEmpty Position -> NonEmpty Position
forall a. Semigroup a => a -> a -> a
(<>)
    ([(LexWarningType, NonEmpty Position)]
 -> Map LexWarningType (NonEmpty Position))
-> ([LexWarning] -> [(LexWarningType, NonEmpty Position)])
-> [LexWarning]
-> Map LexWarningType (NonEmpty Position)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (LexWarning -> (LexWarningType, NonEmpty Position))
-> [LexWarning] -> [(LexWarningType, NonEmpty Position)]
forall a b. (a -> b) -> [a] -> [b]
map (\(LexWarning LexWarningType
t Position
p) -> (LexWarningType
t, Position -> NonEmpty Position
forall a. a -> NonEmpty a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Position
p))
  where
    toWarning :: LexWarningType -> NonEmpty Position -> PWarning
toWarning LexWarningType
LexWarningBOM NonEmpty Position
poss =
        PWarnType -> Position -> String -> PWarning
PWarning PWarnType
PWTLexBOM (NonEmpty Position -> Position
forall a. NonEmpty a -> a
NE.head NonEmpty Position
poss) String
"Byte-order mark found at the beginning of the file"
    toWarning LexWarningType
LexWarningNBSP NonEmpty Position
poss =
        PWarnType -> Position -> String -> PWarning
PWarning PWarnType
PWTLexNBSP (NonEmpty Position -> Position
forall a. NonEmpty a -> a
NE.head NonEmpty Position
poss) (String -> PWarning) -> String -> PWarning
forall a b. (a -> b) -> a -> b
$ String
"Non breaking spaces at " String -> ShowS
forall a. [a] -> [a] -> [a]
++ String -> [String] -> String
forall a. [a] -> [[a]] -> [a]
intercalate String
", " (NonEmpty String -> [String]
forall a. NonEmpty a -> [a]
NE.toList (NonEmpty String -> [String]) -> NonEmpty String -> [String]
forall a b. (a -> b) -> a -> b
$ (Position -> String) -> NonEmpty Position -> NonEmpty String
forall a b. (a -> b) -> NonEmpty a -> NonEmpty b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Position -> String
showPos NonEmpty Position
poss)
    toWarning LexWarningType
LexWarningTab NonEmpty Position
poss =
        PWarnType -> Position -> String -> PWarning
PWarning PWarnType
PWTLexTab (NonEmpty Position -> Position
forall a. NonEmpty a -> a
NE.head NonEmpty Position
poss) (String -> PWarning) -> String -> PWarning
forall a b. (a -> b) -> a -> b
$ String
"Tabs used as indentation at " String -> ShowS
forall a. [a] -> [a] -> [a]
++ String -> [String] -> String
forall a. [a] -> [[a]] -> [a]
intercalate String
", " (NonEmpty String -> [String]
forall a. NonEmpty a -> [a]
NE.toList (NonEmpty String -> [String]) -> NonEmpty String -> [String]
forall a b. (a -> b) -> a -> b
$ (Position -> String) -> NonEmpty Position -> NonEmpty String
forall a b. (a -> b) -> NonEmpty a -> NonEmpty b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Position -> String
showPos NonEmpty Position
poss)

data LexState = LexState {
        LexState -> Position
curPos   :: {-# UNPACK #-} !Position,        -- ^ position at current input location
        LexState -> InputStream
curInput :: {-# UNPACK #-} !InputStream,     -- ^ the current input
        LexState -> Int
curCode  :: {-# UNPACK #-} !StartCode,       -- ^ lexer code
        LexState -> [LexWarning]
warnings :: [LexWarning]
#ifdef CABAL_PARSEC_DEBUG
        , dbgText  :: V.Vector T.Text                -- ^ input lines, to print pretty debug info
#endif
     } --TODO: check if we should cache the first token
       -- since it looks like parsec's uncons can be called many times on the same input

type StartCode   = Int    -- ^ An @alex@ lexer start code
type InputStream = B.ByteString



-- | Execute the given lexer on the supplied input stream.
execLexer :: Lex a -> InputStream -> ([LexWarning], a)
execLexer :: forall a. Lex a -> InputStream -> ([LexWarning], a)
execLexer (Lex LexState -> LexResult a
lexer) InputStream
input =
    case LexState -> LexResult a
lexer LexState
initialState of
      LexResult LexState{ warnings :: LexState -> [LexWarning]
warnings = [LexWarning]
ws } a
result -> ([LexWarning]
ws, a
result)
  where
    initialState :: LexState
initialState = LexState
      -- TODO: add 'startPosition'
      { curPos :: Position
curPos   = Int -> Int -> Position
Position Int
1 Int
1
      , curInput :: InputStream
curInput = InputStream
input
      , curCode :: Int
curCode  = Int
0
      , warnings :: [LexWarning]
warnings = []
#ifdef CABAL_PARSEC_DEBUG
      , dbgText  = V.fromList . T.lines . T.decodeUtf8 $ input
#endif
      }

{-# INLINE returnLex #-}
returnLex :: a -> Lex a
returnLex :: forall a. a -> Lex a
returnLex a
a = (LexState -> LexResult a) -> Lex a
forall a. (LexState -> LexResult a) -> Lex a
Lex ((LexState -> LexResult a) -> Lex a)
-> (LexState -> LexResult a) -> Lex a
forall a b. (a -> b) -> a -> b
$ \LexState
s -> LexState -> a -> LexResult a
forall a. LexState -> a -> LexResult a
LexResult LexState
s a
a

{-# INLINE thenLex #-}
thenLex :: Lex a -> (a -> Lex b) -> Lex b
(Lex LexState -> LexResult a
m) thenLex :: forall a b. Lex a -> (a -> Lex b) -> Lex b
`thenLex` a -> Lex b
k = (LexState -> LexResult b) -> Lex b
forall a. (LexState -> LexResult a) -> Lex a
Lex ((LexState -> LexResult b) -> Lex b)
-> (LexState -> LexResult b) -> Lex b
forall a b. (a -> b) -> a -> b
$ \LexState
s -> case LexState -> LexResult a
m LexState
s of LexResult LexState
s' a
a -> (Lex b -> LexState -> LexResult b
forall a. Lex a -> LexState -> LexResult a
unLex (a -> Lex b
k a
a)) LexState
s'

setPos :: Position -> Lex ()
setPos :: Position -> Lex ()
setPos Position
pos = (LexState -> LexResult ()) -> Lex ()
forall a. (LexState -> LexResult a) -> Lex a
Lex ((LexState -> LexResult ()) -> Lex ())
-> (LexState -> LexResult ()) -> Lex ()
forall a b. (a -> b) -> a -> b
$ \LexState
s -> LexState -> () -> LexResult ()
forall a. LexState -> a -> LexResult a
LexResult LexState
s{ curPos :: Position
curPos = Position
pos } ()

getPos :: Lex Position
getPos :: Lex Position
getPos = (LexState -> LexResult Position) -> Lex Position
forall a. (LexState -> LexResult a) -> Lex a
Lex ((LexState -> LexResult Position) -> Lex Position)
-> (LexState -> LexResult Position) -> Lex Position
forall a b. (a -> b) -> a -> b
$ \s :: LexState
s@LexState{ curPos :: LexState -> Position
curPos = Position
pos } -> LexState -> Position -> LexResult Position
forall a. LexState -> a -> LexResult a
LexResult LexState
s Position
pos

adjustPos :: (Position -> Position) -> Lex ()
adjustPos :: (Position -> Position) -> Lex ()
adjustPos Position -> Position
f = (LexState -> LexResult ()) -> Lex ()
forall a. (LexState -> LexResult a) -> Lex a
Lex ((LexState -> LexResult ()) -> Lex ())
-> (LexState -> LexResult ()) -> Lex ()
forall a b. (a -> b) -> a -> b
$ \s :: LexState
s@LexState{ curPos :: LexState -> Position
curPos = Position
pos } -> LexState -> () -> LexResult ()
forall a. LexState -> a -> LexResult a
LexResult LexState
s{ curPos :: Position
curPos = Position -> Position
f Position
pos } ()

getInput :: Lex InputStream
getInput :: Lex InputStream
getInput = (LexState -> LexResult InputStream) -> Lex InputStream
forall a. (LexState -> LexResult a) -> Lex a
Lex ((LexState -> LexResult InputStream) -> Lex InputStream)
-> (LexState -> LexResult InputStream) -> Lex InputStream
forall a b. (a -> b) -> a -> b
$ \s :: LexState
s@LexState{ curInput :: LexState -> InputStream
curInput = InputStream
i } -> LexState -> InputStream -> LexResult InputStream
forall a. LexState -> a -> LexResult a
LexResult LexState
s InputStream
i

setInput :: InputStream -> Lex ()
setInput :: InputStream -> Lex ()
setInput InputStream
i = (LexState -> LexResult ()) -> Lex ()
forall a. (LexState -> LexResult a) -> Lex a
Lex ((LexState -> LexResult ()) -> Lex ())
-> (LexState -> LexResult ()) -> Lex ()
forall a b. (a -> b) -> a -> b
$ \LexState
s -> LexState -> () -> LexResult ()
forall a. LexState -> a -> LexResult a
LexResult LexState
s{ curInput :: InputStream
curInput = InputStream
i } ()

getStartCode :: Lex Int
getStartCode :: Lex Int
getStartCode = (LexState -> LexResult Int) -> Lex Int
forall a. (LexState -> LexResult a) -> Lex a
Lex ((LexState -> LexResult Int) -> Lex Int)
-> (LexState -> LexResult Int) -> Lex Int
forall a b. (a -> b) -> a -> b
$ \s :: LexState
s@LexState{ curCode :: LexState -> Int
curCode = Int
c } -> LexState -> Int -> LexResult Int
forall a. LexState -> a -> LexResult a
LexResult LexState
s Int
c

setStartCode :: Int -> Lex ()
setStartCode :: Int -> Lex ()
setStartCode Int
c = (LexState -> LexResult ()) -> Lex ()
forall a. (LexState -> LexResult a) -> Lex a
Lex ((LexState -> LexResult ()) -> Lex ())
-> (LexState -> LexResult ()) -> Lex ()
forall a b. (a -> b) -> a -> b
$ \LexState
s -> LexState -> () -> LexResult ()
forall a. LexState -> a -> LexResult a
LexResult LexState
s{ curCode :: Int
curCode = Int
c } ()

-- | Add warning at the current position
addWarning :: LexWarningType -> Lex ()
addWarning :: LexWarningType -> Lex ()
addWarning LexWarningType
wt = (LexState -> LexResult ()) -> Lex ()
forall a. (LexState -> LexResult a) -> Lex a
Lex ((LexState -> LexResult ()) -> Lex ())
-> (LexState -> LexResult ()) -> Lex ()
forall a b. (a -> b) -> a -> b
$ \s :: LexState
s@LexState{ curPos :: LexState -> Position
curPos = Position
pos, warnings :: LexState -> [LexWarning]
warnings = [LexWarning]
ws  } ->
    LexState -> () -> LexResult ()
forall a. LexState -> a -> LexResult a
LexResult LexState
s{ warnings :: [LexWarning]
warnings = LexWarningType -> Position -> LexWarning
LexWarning LexWarningType
wt Position
pos LexWarning -> [LexWarning] -> [LexWarning]
forall a. a -> [a] -> [a]
: [LexWarning]
ws } ()