mirror of
https://github.com/Sneed-Group/Poodletooth-iLand
synced 2024-12-25 12:42:41 -06:00
203 lines
8 KiB
Python
203 lines
8 KiB
Python
|
from itertools import tee
|
||
|
|
||
|
def itersorted(iterable, cmp = cmp, key = lambda x: x, reverse = False):
|
||
|
"""
|
||
|
This function returns a generator object that yields sorted items from
|
||
|
'iterable'.
|
||
|
|
||
|
It implements a form of lazy sorting that's most useful in two cases:
|
||
|
1) When you only need the first few values in the sorted data.
|
||
|
2) When you want to amortize the cost of the sort over the time
|
||
|
you use the data.
|
||
|
|
||
|
It is to be considered a 'stable sort', where values with equivalent
|
||
|
sorting criteria maintain their relative order as it is in the input
|
||
|
data set.
|
||
|
|
||
|
'cmp' MUST return values in [-1,0,1]. Otherwise, behavior is
|
||
|
undefined, and will most likely be very incorrect.
|
||
|
"""
|
||
|
|
||
|
# Notes:
|
||
|
# Understanding the concepts of 'left' and 'right' here is important.
|
||
|
# 'left' values are those that are yielded earlier in the sort. So
|
||
|
# each subsequent value yielded is 'to the right' of the previous one.
|
||
|
# A stack is used to maintain sets of values who share the same key
|
||
|
# value. Each layer corresponds to one key. During the traversals of
|
||
|
# the input data, values are added to each layer in such a way that
|
||
|
# they maintain their relative position (to others in the same layer)
|
||
|
# from the original data. This ensures a 'stable sort'.
|
||
|
|
||
|
# Create our working structures
|
||
|
stack = [] # holds a stack of 'layers'.
|
||
|
# 'left' value layers are above 'right' ones.
|
||
|
layer = () # A 3-tuple of the form:
|
||
|
# (key, data iterator, [values])
|
||
|
init = True # Is set to true for the first pass through
|
||
|
# the data.
|
||
|
if reverse: # Use this to easily switch the direction of the sort.
|
||
|
rev = -1
|
||
|
else:
|
||
|
rev = 1
|
||
|
|
||
|
# Create the base iterator that will track our
|
||
|
# main progress through the data.
|
||
|
a = ((key(x),x) for x in iterable)
|
||
|
|
||
|
# Begin the main loop
|
||
|
while 1:
|
||
|
# If the stack is empty, we must now seed it.
|
||
|
# Advance the base iterator until we find a value 'to the right' of
|
||
|
# anything we've yielded so far. (All values 'to the left' have
|
||
|
# already been yielded)
|
||
|
if not stack:
|
||
|
# pull next value off the base iterator
|
||
|
k,val = a.next()
|
||
|
|
||
|
# If init, get the first value and stop.
|
||
|
# Otherwise, find the first value 'to the right'
|
||
|
# of the most recently yielded value.
|
||
|
while (not init) and (cmp(k,lLimit) != rev):
|
||
|
k,val = a.next()
|
||
|
pass
|
||
|
|
||
|
# Place the found value as the initial stack value
|
||
|
# (and store its iteration progress as well).
|
||
|
a,b = tee(a)
|
||
|
stack.append([k, b, [val]])
|
||
|
pass
|
||
|
|
||
|
|
||
|
# We now iterate through the data, starting where the value
|
||
|
# at the top of the stack left off.
|
||
|
layer = stack[-1]
|
||
|
b = layer[1]
|
||
|
for k,val in b:
|
||
|
# If the next data element is 'to the left' of (or equal to)
|
||
|
# the top off the stack and 'to the right' of the last element
|
||
|
# yielded, add it to the stack.
|
||
|
if cmp(k,layer[0]) != rev and (init or cmp(k,lLimit) == rev):
|
||
|
# If it's 'to the left' of the current stack value,
|
||
|
# make a new layer and add it to the top of the stack.
|
||
|
# Otherwise, it's equivalent so we'll just append it
|
||
|
# to the values in the top layer of the stack.
|
||
|
if cmp(k,layer[0]) == -rev:
|
||
|
b,layer[1] = tee(b)
|
||
|
stack.append([k, b, []])
|
||
|
layer = stack[-1]
|
||
|
pass
|
||
|
layer[2].append(val)
|
||
|
pass
|
||
|
pass
|
||
|
# Remove the initialization condition to enable lLimit checking.
|
||
|
init = False
|
||
|
|
||
|
# Whatever values that are on the top stack at this point are
|
||
|
# the 'left-most' we've found that we haven't yet yielded. Yield
|
||
|
# them in the order that we discovered them in the source data.
|
||
|
# Define lLimit as the right-most limit for values that have not
|
||
|
# yet been yielded. This will allow us to ignore these values
|
||
|
# on future iterations.
|
||
|
lLimit, b, vals = stack.pop()
|
||
|
for val in vals:
|
||
|
yield val
|
||
|
pass
|
||
|
|
||
|
if __debug__:
|
||
|
def P(i):
|
||
|
for x in reversed(i):
|
||
|
print x
|
||
|
|
||
|
def test():
|
||
|
import random
|
||
|
from itertools import islice
|
||
|
|
||
|
control = sorted(data, key = lambda x: x[0])
|
||
|
variable = itersorted(data, key = lambda x: x[0])
|
||
|
|
||
|
print control[:10] == [x for x in islice(variable,10)]
|
||
|
print data
|
||
|
print control
|
||
|
|
||
|
variable = itersorted(data, key = lambda x: x[0])
|
||
|
print [x for x in islice(variable,10)]
|
||
|
|
||
|
from unittest import TestCase, main
|
||
|
from random import shuffle
|
||
|
from itertools import islice
|
||
|
|
||
|
class LazySortTest(TestCase):
|
||
|
"""
|
||
|
Run these tests with:
|
||
|
> python LazySort.py
|
||
|
"""
|
||
|
|
||
|
TESTLEN = 10
|
||
|
RANGELEN = max(TESTLEN, 10)
|
||
|
|
||
|
a = range(RANGELEN/2)*2
|
||
|
b = range(RANGELEN/2)*2
|
||
|
shuffle(a)
|
||
|
shuffle(b)
|
||
|
DATA = zip(a,b)
|
||
|
shuffle(DATA)
|
||
|
del a
|
||
|
del b
|
||
|
|
||
|
def testRange(self):
|
||
|
control = sorted(self.DATA)
|
||
|
variable = itersorted(self.DATA)
|
||
|
self.assertEqual(control[:10], [x for x in islice(variable, self.TESTLEN)])
|
||
|
|
||
|
def testRangeCompare(self):
|
||
|
control = sorted(self.DATA, cmp = lambda a,b: -cmp(a,b))
|
||
|
variable = itersorted(self.DATA, cmp = lambda a,b: -cmp(a,b))
|
||
|
self.assertEqual(control[:10], [x for x in islice(variable, self.TESTLEN)])
|
||
|
|
||
|
def testRangeKey(self):
|
||
|
control = sorted(self.DATA, key = lambda x: x[0])
|
||
|
variable = itersorted(self.DATA, key = lambda x: x[0])
|
||
|
self.assertEqual(control[:10], [x for x in islice(variable, self.TESTLEN)])
|
||
|
|
||
|
def testRangeReverse(self):
|
||
|
control = sorted(self.DATA, reverse = True)
|
||
|
variable = itersorted(self.DATA, reverse = True)
|
||
|
self.assertEqual(control[:10], [x for x in islice(variable, self.TESTLEN)])
|
||
|
|
||
|
def testRangeCompareKey(self):
|
||
|
control = sorted(self.DATA, cmp = lambda a,b: -cmp(a,b),
|
||
|
key = lambda x: x[0])
|
||
|
variable = itersorted(self.DATA, cmp = lambda a,b: -cmp(a,b),
|
||
|
key = lambda x: x[0])
|
||
|
self.assertEqual(control[:10], [x for x in islice(variable, self.TESTLEN)])
|
||
|
|
||
|
def testRangeCompareReverse(self):
|
||
|
control = sorted(self.DATA, cmp = lambda a,b: -cmp(a,b),
|
||
|
reverse = True)
|
||
|
variable = itersorted(self.DATA, cmp = lambda a,b: -cmp(a,b),
|
||
|
reverse = True)
|
||
|
self.assertEqual(control[:10], [x for x in islice(variable, self.TESTLEN)])
|
||
|
|
||
|
def testRangeKeyReverse(self):
|
||
|
control = sorted(self.DATA, key = lambda x: x[0], reverse = True)
|
||
|
variable = itersorted(self.DATA, key = lambda x: x[0], reverse = True)
|
||
|
self.assertEqual(control[:10], [x for x in islice(variable, self.TESTLEN)])
|
||
|
|
||
|
control = sorted(self.DATA, key = lambda x: x[1], reverse = True)
|
||
|
variable = itersorted(self.DATA, key = lambda x: x[1], reverse = True)
|
||
|
self.assertEqual(control[:10], [x for x in islice(variable, self.TESTLEN)])
|
||
|
|
||
|
def testRangeCompareKeyReverse(self):
|
||
|
control = sorted(self.DATA, cmp = lambda a,b: -cmp(a,b),
|
||
|
key = lambda x: x[0],
|
||
|
reverse = True)
|
||
|
variable = itersorted(self.DATA, cmp = lambda a,b: -cmp(a,b),
|
||
|
key = lambda x: x[0],
|
||
|
reverse = True)
|
||
|
self.assertEqual(control[:10], [x for x in islice(variable, self.TESTLEN)])
|
||
|
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
main() # unittest.main
|
||
|
|