"""
Tests for the various corpus-related classes.
"""

import unittest

from ucdresolve import evaluation

class CrossValidationTest(unittest.TestCase):
	"""tests for correct splitting of sequence based corpus sources.
	"""
	def _runSplitTest(self, num, sample):
		numIters = 0
		testLen = len(sample)/num
		trainLen = len(sample)-testLen
		for training, test in evaluation.iterCrossValidationIterable(num,
				sample):
			self.assertEqual(len(training), trainLen)
			self.assertEqual(len(test), testLen)
			self.assert_(not set(training)&set(test), "test and training overlap")
			numIters += 1
		self.assertEqual(numIters, num, "Didn't get as many pairs as I expected")
	
	def testSplitSmall(self):
		self._runSplitTest(4, range(4))

	def testSplitLarger(self):
		self._runSplitTest(4, range(64))
		self._runSplitTest(3, range(99))

	def testSplitNonDiv(self):
		self._runSplitTest(10, range(99))


if __name__=="__main__":
	unittest.main()
