# -*- coding: iso-8859-2 -*-
# $Date: 2008-06-08 23:00:14 $, $Revision: 1.4 $
#
# More flexible string splits, using predicate.
# 
# Author: Wojciech Muła
# e-mail: wojciech_mula@poczta.onet.pl
# www:    http://0x80.pl
#
# License: public domain

def pred_split(string, pred, count=-1):
	"""
	Split 'string' at points, where predicate
	'pred' returns True.
	
	Optional argument count sets maximal number
	of splits.  If negative, then all possible
	splits are done.

	Predicate gets three arguments:
	1. string
	2. current character
	3. character's position
	"""
	first = 0
	last  = 0
	tokens  = []

	pp = pred(string, string[0], 0)
	for i, c in enumerate(string + "\x00"):
		pc = (i == len(string)) or pred(string, c, i)
		if pc != pp:
			if not pc:
				first = i
			else:
				tokens.append(string[first:i])
				last = i

		pp = pc

		if count >= 0 and len(tokens) == count:
			return tokens + [string[last:]]
	
	return tokens


if __name__ == "__main__":
	def pred(string, char, position):
		return char in "!?_"
	
	s1 = "?!!___Beautiful?_!is!??????better_!!??then__ugly.!!???!!!!"
	s2 = "Beautiful?_!is!??????better_!!??then__ugly.!!???!!!!"
	s3 = "Beautiful?_!is!??????better_!!??then__ugly."

	print pred_split(s1, pred)
	print pred_split(s2, pred)
	print pred_split(s3, pred)

	for s in [s1, s2, s3]:
		print
		print "="*77
		print
		for count in range(7):
			print "count =", count,
			print  pred_split(s, pred, count)

# vim: ts=4 sw=4 nowrap noexpandtab

