Your split function is indeed a bottleneck here as it creates sublists with all the memory overhead that it represents. If msort() was designed to process an iterator, then you could change split() to return two iterators (e.g. one for odd positions and one for even positions). This would eliminate most of the memory allocation and management in the sort process.
Here's an example of what I have in mind:
from itertools import tee,islice
def split(xs,size):
i1,i2 = tee(xs) # two iterators
s1,s2 = size//2 + size%2, size//2 # corresponding sizes (evens, odds)
yield islice(i1,0,None,2),s1 # even positions (iterator,size)
yield islice(i2,1,None,2),s2 # odd positions (iterator,size)
def msort(xs,size=None): # use iterators when recursing
if size is None:size = len(xs) # first call expects a list
if size == 1: yield from xs; return # don't split single item
yield from merge(*(msort(i,s) # msort(iterator)
for i,s in split(xs,size))) # split as iterators
def merge(xs, ys, x=None, y=None):
if not y:
y = next(ys,None) # using next(..,default) instead of try/except
if not x:
x = next(xs,None)
if x and y:
if x <= y:
yield x
yield from merge(xs, ys, y=y)
else:
yield y
yield from merge(xs, ys, x=x)
else:
if x:
yield x
yield from xs
if y:
yield y
yield from ys
output:
print(*msort([2,3,5,6,3,2,3,4])) # 2 2 3 3 3 4 5 6
print(*msort([4,6,5,1,7,2,3])) # 1 2 3 4 5 6 7
print(*msort([5,6,4,1,2,3])) # 1 2 3 4 5 6
print(*msort([5,4,3,2,1])) # 1 2 3 4 5
print(*msort([21,32,17,23])) # 17 21 23 32
print(*msort([17, 3,10])) # 3 10 17
print(*msort([5, 3])) # 3 5
print(*msort([1])) # 1
Note that this still has lots of room for improvements, it's just an illustration of the iterator approach.
If you streamline the even/odd strategy into the algorithm, the split() function could be eliminated in favour of a slicing approach directly in msort():
from itertools import islice
def mySort(A,start=None,step=None):
if start is None: start,step = 0,1
eStart, eStep = start ,step*2
oStart, oStep = start+step,step*2
if eStart >= len(A):
yield from islice(A,oStart,None,oStep)
elif oStart >=len(A):
yield from islice(A,eStart,None,eStep)
else:
yield from merge(mySort(A,eStart,eStep),mySort(A,oStart,oStep))
def merge(A,B,a=None,b=None):
if a is None: a = next(A,None)
if b is None: b = next(B,None)
if a is None: yield b; yield from B
elif b is None: yield a; yield from A
elif a<b: yield a; yield from merge(A,B,b=b)
else: yield b; yield from merge(A,B,a=a)
Note that, in both cases, the recursion depth will be 2*len(A) so you won't be able to sort arrays larger than about 490 elements.