[ create a new paste ] login | about

Link: http://codepad.org/hiq26gHs    [ raw code | fork ]

OCaml, pasted on May 17:
    (* I pulled this out from amidst some other Stream related code.
     * Since I didn't test the extracted version I may have left out
     * some definitions. If I have and you want them let me know.
     *)

    type Stream< 'a > 
        = private S of Lazy< StreamCell< 'a > >
    with
        interface System.Collections.IEnumerable
        interface System.Collections.Generic.IEnumerable< 'a >

    and private StreamCell< 'a >
        = Cons of 'a * Lazy< StreamCell< 'a > > 
        | Nil

    [< CompilationRepresentation(CompilationRepresentationFlags.ModuleSuffix) >]
    module Stream =

        open Microsoft.FSharp.Core.OptimizedClosures

        type private LazyCell< 'a > =
            Lazy< StreamCell< 'a > >

        (**********************************************************************
         * sort
         **********************************************************************)

        type private Comparison< 'a >
            = FSharpFunc< 'a , 'a , int >

        (*
        // The sort below is incremental. It requires O(n) preprocessing after
        // which producing each subsequent element of the output requires a
        // further O(log(n)) time. The algorithm used is based on a lazy bottom
        // up version of merge sort. For small lists we fall back to selection
        // sort. Selection sort is prefered to insertion sort here because it
        // produces the first k elements of the output after the first k
        // iterations. If we used insertion sort we would have to completely
        // sort the list to determine a single element of the output.
        //
        // To reduce memory pressure where ever possible we perform the merge
        // sort and selection sort inplace. Without this optimization
        // completely sorting a one million element list requires over 15
        // seconds, however, with the optimization this is reduced to
        // approximately 3 seconds.
        *)

        (*
        // A value of type SelectionSort< 'a > represents a delayed application
        // of the selection sort algorithm. Each time an element of the output
        // is requested we perform only as much sorting as is required to
        // determine that value.
        *)

        type private SelectionSort< 'a > =
            {         values : 'a []
            ; mutable index  : int
            ;         finish : int
            }

        [< CompilationRepresentation(CompilationRepresentationFlags.ModuleSuffix) >]
        module private SelectionSort =

            let empty< 'a > : SelectionSort< 'a > =
                { values = Unchecked.defaultof< 'a [] > ; index = 0 ; finish = 0 }

            let isEmpty (xs : SelectionSort< 'a >) : bool =
                xs.index >= xs.finish

            (*
            // The first element of the array is assumed to be in it's final
            // position so that head takes O(1). This invariant is maintained
            // by tail and ofSubArray.
            *)

            let head (xs : SelectionSort< 'a >) : 'a =
                xs.values.[xs.index]

            (*
            // A single iteration of selection sort performed inplace. This
            // results in the value at xs.index being in it's final position.
            *)

            let private minToFront (compare : Comparison< 'a >) (xs : SelectionSort< 'a >) : unit =
                let mutable x = xs.values.[xs.index]
                let mutable i = xs.index
                for j = xs.index + 1 to xs.finish - 1 do
                    let y = xs.values.[j]
                    if compare.Invoke(x, y) > 0 then
                        x <- y ; i <- j
                xs.values.[i] <- xs.values.[xs.index]
                xs.values.[xs.index] <- x

            (*
            // In both tail and ofSubArray we perform a single iteration of
            // selection sort. This ensures that the invariant required by head
            // is satisfied even though subsequent elements of the array may
            // still be unsorted.
            *)

            let tailInplace (compare : Comparison< 'a >) (xs : SelectionSort< 'a >) : unit =
                xs.index <- xs.index + 1
                if xs.index < xs.finish then
                    minToFront compare xs

            let ofSubArray (compare : Comparison< 'a >) (xs : 'a []) (start : int) (finish : int) : SelectionSort< 'a > =
                let xs = { values = xs ; index = start ; finish = finish }
                if xs.index < xs.finish then
                    minToFront compare xs
                xs

        (*
        // As with SelectionSort< 'a >, a value of type MergeSort< 'a >
        // represents a delayed application of the selection sort algorithm.
        // There are two cases for MergeSort< 'a >.
        //     - The input is large. In this case the output is represented
        //       as a delayed merge or two sorted sub-lists.
        //     - The input is small. In this case we fall back to selection
        //       sort which is likely to be more efficient for small lists.
        *)

        type private MergeSort< 'a >
            = Merge of Merge< 'a >
            | Small of SelectionSort< 'a >

        (*
        // A Merge< 'a > may represent one of two delayed computations
        // depending on the value of tail. These are given by
        //     head :: merge (tail left) right
        // when tail = Left and
        //     head :: merge left (tail right)
        // when tail = Right. By representing merge in this form we ensure
        // that the first element of the output is always known.
        *)

        and private Merge< 'a > =
            { mutable head  : 'a
            ; mutable left  : MergeSort< 'a >
            ; mutable right : MergeSort< 'a >
            ; mutable tail  : Tail
            }

        and private Tail
            = Left  = 0
            | Right = 1

        [< CompilationRepresentation(CompilationRepresentationFlags.ModuleSuffix) >]
        module private MergeSort =

            let empty< 'a > : MergeSort< 'a > =
                Small(SelectionSort.empty)

            let isEmpty (xs : MergeSort< 'a >) : bool =
                match xs with
                  Merge(m) -> false
                | Small(s) -> SelectionSort.isEmpty s

            let head (xs : MergeSort< 'a >) : 'a =
                match xs with
                  Merge(m) -> m.head
                | Small(s) -> SelectionSort.head s
            
            (*
            // This computes a single step of a merge operation converting
            //     merge left right
            // into a form which can be represented by a Merge< 'a > or
            // returning left or right if either right or left respectively
            // are empty.
            *)

            let private mergeInplace (compare : Comparison< 'a >) (out : byref< MergeSort< 'a > >) (m : Merge< 'a >) : unit =
                if isEmpty m.left  then out <- m.right else
                if isEmpty m.right then out <- m.left  else
                    let x = head m.left
                    let y = head m.right
                    if compare.Invoke(x, y) < 0
                    then m.head <- x ; m.tail <- Tail.Left  // head left  :: merge (tail left) right
                    else m.head <- y ; m.tail <- Tail.Right // head right :: merge left (tail right)

            let merge (compare : Comparison< 'a >) (xs : MergeSort< 'a >) (ys : MergeSort< 'a >) : MergeSort< 'a > =
                let (h, t)    = (Unchecked.defaultof< 'a >, Unchecked.defaultof< Tail >)
                let mutable m = { head = h ; left = xs ; right = ys ; tail = t }
                let mutable o = Merge(m)
                mergeInplace compare (&o) m ; o

            (*
            // In taking the tail of a delayed merge operation there are two
            // possibilities, corresponding to the two types of delayed merge
            //     tail (head :: merge (tail left) right) = merge (tail left) right
            // and
            //     tail (head :: merge left (tail right)) = merge left (tail right)
            // In both of these cases we proceed by first computing the delayed
            // tail operation, either 'tail left' or 'tail right' and then
            // constructing a new delayed merge to represent the result.
            *)

            let rec tailInplace (compare : Comparison< 'a >) (xs : byref< MergeSort< 'a > >) : unit =
                match xs with
                  Merge(m) ->
                      match m.tail with
                        Tail.Left  -> tailInplace compare (&m.left)
                      | Tail.Right -> tailInplace compare (&m.right)
                      | _          -> failwith "Impossible."
                      mergeInplace compare (&xs) m
                | Small(s) ->
                      SelectionSort.tailInplace compare s
        
        (*
        // To sort a Stream we proceed in two stages. In the first stage we
        // group the input into small clusters of size groupSize which are to
        // be sorted with selection sort.
        *)

        let rec private group (compare : Comparison< 'a >) (groupSize : int) (xs : Stream< 'a >) : List< MergeSort< 'a > > =
            let xs     = toArray xs
            let groups = Array.length xs / groupSize
            let extras = Array.length xs - groups * groupSize
            let rest   =
                List.init groups (fun i ->
                    let start  = extras + groupSize * i
                    let finish = start + groupSize
                    Small(SelectionSort.ofSubArray compare xs start finish))
            if extras > 0 then
                Small(SelectionSort.ofSubArray compare xs 0 extras) :: rest
            else
                rest

        (*
        // In the second stage we repeatedly merge together pairs list's until
        // only a single list remains. This takes O(n) time as each iteration
        // of mergeAll must merge half as many pairs as the previous iteration.
        *)

        let rec private mergePairs (compare : Comparison< 'a >) (xss : List< MergeSort< 'a > >) (yss : List< MergeSort< 'a > >) : List< MergeSort< 'a > > =
            match xss with
              xs :: ys :: xss -> mergePairs compare xss (MergeSort.merge compare xs ys :: yss)
            | xs :: []        -> xs :: yss
            | []              -> yss

        let rec private mergeAll (compare : Comparison< 'a >) (xss : List< MergeSort< 'a > >) : MergeSort< 'a > =
            match xss with
              []   -> MergeSort.empty
            | [xs] -> xs
            | xss  -> mergeAll compare (mergePairs compare xss [])
        
        (*
        // We compute the result stream lazily by repeatedly taking the head
        // and tail of the MergeSort'd list. As mergeAll builds a balanced
        // tree of merge's and tail is linear time in the height of the tree
        // this requires O(log(n)) time for each element of the stream which
        // must be computed.
        *)

        let rec private flatten (compare : Comparison< 'a >) (xs : Ref< MergeSort< 'a > >) : StreamCell< 'a > =
            if MergeSort.isEmpty (!xs) then Nil else 
                let x  = MergeSort.head (!xs)
                let xs = lazy (MergeSort.tailInplace compare (&xs.contents) ; flatten compare xs)
                Cons(x, xs)

        /// (*<remarks>The operation 'sortWith compare xs' requires O(n) time
        /// initially and forces computation of all elements of xs. Forcing
        /// the computation of each element of the result requires a further
        /// O(log(n)) time.</remarks>*)
        let sortWith (compare : 'a -> 'a -> int) (xs : Stream< 'a >) : Stream< 'a > =
            let cmp  = FSharpFunc< _ , _ , _ >.Adapt(compare)
            let tree = ref (mergeAll cmp (group cmp 16 xs))
            S(lazy flatten cmp tree)

        let inline sortBy (f : 'a -> 'b) (xs : Stream< 'a >) : Stream< 'a > =
            sortWith (fun x y -> compare (f x) (f y)) xs

        let inline sort (xs : Stream< 'a >) : Stream< 'a > =
            sortWith compare xs


Create a new paste based on this one


Comments: