-
Notifications
You must be signed in to change notification settings - Fork 30
Open
Description
This is a follow up from michaelt/streaming#6, wherein it was observed that it is hard to fight undesirable sharing without turning off full-laziness. Well, I think I've found a way!
It seems like it's possible to avoid this problem without invoking no-full-laziness (which is a very blunt instrument) by taking advantage of oneShot. Futhermore, (# #) -> is somewhat faster than () ->. The former has about an 8% slowdown compared to current Of (i.e. Of with no adjustment to second component). The latter has about an 11% slowdown.
- I think the slowdown is tolerable to eliminate the risk of catastrophic memory usage, especially since non-stream-bound computations (i.e. those that do relatively less computation to get the next element) will suffer much less
- Can anyone come up with an example that demonstrates catastrophic sharing even when using
NoMemo?
I still believe it's better to make the full-laziness transformation faster, but in the absence of that, the approach elaborated here might work. I welcome others' thoughts.
{-# LANGUAGE ViewPatterns #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE UnboxedTuples #-}
module Main where
import Prelude hiding (sum, drop, enumFrom, take)
import GHC.Exts
import Streaming
import Streaming.Internal
import Streaming.Prelude hiding (print)
import Data.Functor.Identity
import Data.Functor.Compose
newtype NoMemo a = NoMemo ((# #) -> a)
noMemo :: a -> NoMemo a
noMemo x = NoMemo (oneShot (\(# #) -> x))
unNoMemo :: NoMemo a -> a
unNoMemo (NoMemo f) = f (# #)
instance Functor NoMemo where
fmap f = noMemo . f . unNoMemo
instance Applicative NoMemo where
pure = noMemo
f <*> x = noMemo (unNoMemo f (unNoMemo x))
newtype Unit a = Unit (() -> a)
unit :: a -> Unit a
unit a = Unit (oneShot (\() -> a))
unUnit :: Unit a -> a
unUnit (Unit a) = a ()
instance Functor Unit where
fmap f = unit . f . unUnit
instance Applicative Unit where
pure = unit
f <*> x = unit (unUnit f (unUnit x))
big :: Int
big = 4 * 1000 * 1000 * 1000
enumFrom2 :: (Monad m, Enum n)
=> n -> Stream (Compose (Of n) NoMemo) m r
enumFrom2 = loop where
loop !n = Effect (return (Step (Compose (n :> pure (loop (succ n))))))
enumFrom3 :: (Monad m, Enum n)
=> n -> Stream (Compose (Of n) Unit) m r
enumFrom3 = loop where
loop !n = Effect (return (Step (Compose (n :> pure (loop (succ n))))))
stream1 :: Stream (Of Integer) Identity ()
stream1 = take big (enumFrom 0)
stream2 :: Stream (Compose (Of Integer) NoMemo) Identity ()
stream2 = take big (enumFrom2 0)
stream3 :: Stream (Compose (Of Integer) Unit) Identity ()
stream3 = take big (enumFrom3 0)
fold2 :: Monad m => (x -> a -> x) -> x -> (x -> b)
-> Stream (Compose (Of a) NoMemo) m r
-> m (Of b r)
fold2 step begin done str = fold_loop str begin
where
fold_loop stream !x = case stream of
Return r -> return (done x :> r)
Effect m -> m >>= \str' -> fold_loop str' x
Step (Compose (a :> (unNoMemo -> rest))) -> fold_loop rest $! step x a
fold3 :: Monad m => (x -> a -> x) -> x -> (x -> b)
-> Stream (Compose (Of a) Unit) m r
-> m (Of b r)
fold3 step begin done str = fold_loop str begin
where
fold_loop stream !x = case stream of
Return r -> return (done x :> r)
Effect m -> m >>= \str' -> fold_loop str' x
Step (Compose (a :> (unUnit -> rest))) -> fold_loop rest $! step x a
sum1 :: Monad m
=> Stream (Of Integer) m r
-> m (Of Integer r)
sum1 = fold (+) 0 id
sum2 :: Monad m
=> Stream (Compose (Of Integer) NoMemo) m r
-> m (Of Integer r)
sum2 = fold2 (+) 0 id
sum3 :: Monad m
=> Stream (Compose (Of Integer) Unit) m r
-> m (Of Integer r)
sum3 = fold3 (+) 0 id
drop2 :: (Monad m) => Int
-> Stream (Compose (Of a) NoMemo) m r
-> Stream (Compose (Of a) NoMemo) m r
drop2 n str | n <= 0 = str
drop2 n str = loop n str where
loop 0 stream = stream
loop m stream = case stream of
Return r -> Return r
Effect ma -> Effect (fmap (loop m) ma)
Step (Compose (_ :> (unNoMemo -> as))) -> loop (m-1) as
drop3 :: (Monad m) => Int
-> Stream (Compose (Of a) Unit) m r
-> Stream (Compose (Of a) Unit) m r
drop3 n str | n <= 0 = str
drop3 n str = loop n str where
loop 0 stream = stream
loop m stream = case stream of
Return r -> Return r
Effect ma -> Effect (fmap (loop m) ma)
Step (Compose (_ :> (unUnit -> as))) -> loop (m-1) as
main :: IO ()
main = do
-- Time to sum 4bn elements
-- Current version of Of is the fastest.
-- NoMemo, i.e. `(# #) ->`, is about 8% slower
-- Unit, i.e. `() ->`, is about 11% slower.
-- print $ sum stream1 -- 2:00.3
-- print $ sum (drop 1 stream1)
-- print $ sum2 stream2 -- 2:14.5
-- print $ sum2 (drop2 1 stream2)
print $ sum3 stream3 -- 2:10.5
-- print $ sum3 (drop3 1 stream3)danidiaz
Metadata
Metadata
Assignees
Labels
No labels