Skip to content

How to fight undesirable sharing: redux #110

@tomjaguarpaw

Description

@tomjaguarpaw

This is a follow up from michaelt/streaming#6, wherein it was observed that it is hard to fight undesirable sharing without turning off full-laziness. Well, I think I've found a way!

It seems like it's possible to avoid this problem without invoking no-full-laziness (which is a very blunt instrument) by taking advantage of oneShot. Futhermore, (# #) -> is somewhat faster than () ->. The former has about an 8% slowdown compared to current Of (i.e. Of with no adjustment to second component). The latter has about an 11% slowdown.

  1. I think the slowdown is tolerable to eliminate the risk of catastrophic memory usage, especially since non-stream-bound computations (i.e. those that do relatively less computation to get the next element) will suffer much less
  2. Can anyone come up with an example that demonstrates catastrophic sharing even when using NoMemo?

I still believe it's better to make the full-laziness transformation faster, but in the absence of that, the approach elaborated here might work. I welcome others' thoughts.

{-# LANGUAGE ViewPatterns #-}
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE UnboxedTuples #-}

module Main where

import Prelude hiding (sum, drop, enumFrom, take)
import GHC.Exts
import Streaming
import Streaming.Internal
import Streaming.Prelude hiding (print)
import Data.Functor.Identity
import Data.Functor.Compose

newtype NoMemo a = NoMemo ((# #) -> a)

noMemo :: a -> NoMemo a
noMemo x = NoMemo (oneShot (\(# #) -> x))

unNoMemo :: NoMemo a -> a
unNoMemo (NoMemo f) = f (# #)

instance Functor NoMemo where
  fmap f = noMemo . f . unNoMemo

instance Applicative NoMemo where
  pure = noMemo
  f <*> x = noMemo (unNoMemo f (unNoMemo x))


newtype Unit a = Unit (() -> a)

unit :: a -> Unit a
unit a = Unit (oneShot (\() -> a))

unUnit :: Unit a -> a
unUnit (Unit a) = a ()

instance Functor Unit where
  fmap f = unit . f . unUnit

instance Applicative Unit where
  pure = unit
  f <*> x = unit (unUnit f (unUnit x))


big :: Int
big = 4 * 1000 * 1000 * 1000

enumFrom2 :: (Monad m, Enum n)
          => n -> Stream (Compose (Of n) NoMemo) m r
enumFrom2 = loop where
  loop !n = Effect (return (Step (Compose (n :> pure (loop (succ n))))))

enumFrom3 :: (Monad m, Enum n)
          => n -> Stream (Compose (Of n) Unit) m r
enumFrom3 = loop where
  loop !n = Effect (return (Step (Compose (n :> pure (loop (succ n))))))

stream1 :: Stream (Of Integer) Identity ()
stream1 = take big (enumFrom 0)

stream2 :: Stream (Compose (Of Integer) NoMemo) Identity ()
stream2 = take big (enumFrom2 0)

stream3 :: Stream (Compose (Of Integer) Unit) Identity ()
stream3 = take big (enumFrom3 0)

fold2 :: Monad m => (x -> a -> x) -> x -> (x -> b)
      -> Stream (Compose (Of a) NoMemo) m r
      -> m (Of b r)
fold2 step begin done str = fold_loop str begin
  where
    fold_loop stream !x = case stream of
      Return r         -> return (done x :> r)
      Effect m         -> m >>= \str' -> fold_loop str' x
      Step (Compose (a :> (unNoMemo -> rest))) -> fold_loop rest $! step x a

fold3 :: Monad m => (x -> a -> x) -> x -> (x -> b)
      -> Stream (Compose (Of a) Unit) m r
      -> m (Of b r)
fold3 step begin done str = fold_loop str begin
  where
    fold_loop stream !x = case stream of
      Return r         -> return (done x :> r)
      Effect m         -> m >>= \str' -> fold_loop str' x
      Step (Compose (a :> (unUnit -> rest))) -> fold_loop rest $! step x a

sum1 :: Monad m
     => Stream (Of Integer) m r
     -> m (Of Integer r)
sum1 = fold (+) 0 id

sum2 :: Monad m
     => Stream (Compose (Of Integer) NoMemo) m r
     -> m (Of Integer r)
sum2 = fold2 (+) 0 id

sum3 :: Monad m
     => Stream (Compose (Of Integer) Unit) m r
     -> m (Of Integer r)
sum3 = fold3 (+) 0 id

drop2 :: (Monad m) => Int
      -> Stream (Compose (Of a) NoMemo) m r
      -> Stream (Compose (Of a) NoMemo) m r
drop2 n str | n <= 0 = str
drop2 n str = loop n str where
  loop 0 stream = stream
  loop m stream = case stream of
      Return r       -> Return r
      Effect ma      -> Effect (fmap (loop m) ma)
      Step (Compose (_ :> (unNoMemo -> as))) -> loop (m-1) as

drop3 :: (Monad m) => Int
      -> Stream (Compose (Of a) Unit) m r
      -> Stream (Compose (Of a) Unit) m r
drop3 n str | n <= 0 = str
drop3 n str = loop n str where
  loop 0 stream = stream
  loop m stream = case stream of
      Return r       -> Return r
      Effect ma      -> Effect (fmap (loop m) ma)
      Step (Compose (_ :> (unUnit -> as))) -> loop (m-1) as

main :: IO ()
main = do
-- Time to sum 4bn elements

-- Current version of Of is the fastest.
-- NoMemo, i.e. `(# #) ->`, is about 8% slower
-- Unit, i.e. `() ->`, is about 11% slower.

--  print $ sum stream1 -- 2:00.3
--  print $ sum (drop 1 stream1)
--  print $ sum2 stream2 -- 2:14.5
--  print $ sum2 (drop2 1 stream2)
  print $ sum3 stream3 -- 2:10.5
--  print $ sum3 (drop3 1 stream3)

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions