"""
A very rough grammar to parse skyglow files.

We don't actually understand much of the data, but it should be enough
to pull data from the files and to do something with the metadata
for clients that actually understand skyglow's data model.

The document row contains information from the header, assuming each key
is unique (which means comments are lost); header lines not containing
a : are ignored.  The key is the full string between the initial "# " and
the colon, with blanks replaced by underscores, and lowercased.
"""

import gzip
import re

from gavo import base
from gavo.grammars.customgrammar import CustomRowIterator

class RowIterator(CustomRowIterator):
	curLine = 0

	def __init__(self, grammar, sourceToken, sourceRow=None):
		CustomRowIterator.__init__(self, grammar, sourceToken, sourceRow)
		self.lines = self._iterLines()
		try:
			self.docRec = self._parseHeader()
		except base.SourceParseError:
			raise
		except Exception as ex:
			import traceback; traceback.print_exc()
			raise base.SourceParseError(str(ex), location=self.getLocator())

	def _iterLines(self):
		if self.sourceToken.endswith(".gz"):
			f = gzip.open(self.sourceToken, "rt", encoding="utf-8")
		else:
			f = open(self.sourceToken, "r", encoding="utf-8")

		try:
			for ln in f:
				self.curLine += 1
				yield ln
		finally:
			f.close()

	def _parsePair(self, inLine):
		"""returns a dictionary key -> value based on a skyglow
		header line.

		Undecipherable lines cause this to raise a SourceParseError.
		"""
		mat = re.match("^# ([^:]*):(.*)", inLine)
		if not mat:
			raise base.SourceParseError("Malformed skyglow header line",
				offending=inLine, location=self.getLocator())
		return {
			mat.group(1).replace(" ", "_").lower():
				mat.group(2).strip()}

	def _parseHeader(self):
		try:
			ln = next(self.lines)
		except StopIteration:
			raise base.ReportableError("Empty file")
		docRec = {}
		if not ln.startswith("# Community Standard Skyglow Data Format"):
			raise ValueError("Magic missing")

		docRec.update(self._parsePair(next(self.lines)))
		docRec.update(self._parsePair(next(self.lines)))
		try:
			toGo = int(docRec["number_of_header_lines"])-3
		except KeyError:
			raise ValueError("Number of header lines bad or missing.")

		while toGo>3:
			try:
				docRec.update(self._parsePair(next(self.lines)))
			except base.SourceParseError:
				pass # ignore malformed header cards for now
			except StopIteration:
				raise base.SourceParseError("Unexpected end of header",
					location=self.getLocator())
			toGo -= 1

		# the remaining lines are magic
		docRec["COLUMN_HEADINGS"] = next(self.lines)
		docRec["UNITS"] = next(self.lines)
		if not next(self.lines).strip().endswith("END OF HEADER"):
			raise ValueError("Header end marking not found where expected.")
		return docRec

	def _iterRows(self):
		# We hardcode IYA Lightmeter columns for now.  Think about
		# what to do sensibly later on.
		keys = ["epUTC", "epLocal", "temperature", "counts"]
		for ln in self.lines:
			yield dict(zip(keys, ln.strip().split(";")))

	def getParameters(self):
		res = {"parser_": self}
		if self.sourceRow:
			res.update(self.sourceRow)
		res.update(self.docRec)
		return res

	def getLocator(self):
		return "%s, line %s"%(self.sourceToken, self.curLine)


if __name__=="__main__":
	from gavo.grammars.customgrammar import CustomGrammar
	ti = RowIterator(CustomGrammar(None),
		"../uploads/DE_HEIDELBERG_2/20130919_111529_DE_HEIDELBERG_2.skyglow.gz")
	print(ti.getParameters())
	for ind, rec in enumerate(ti):
		print(rec)
		if ind>3:
			break
