#!/usr/bin/python
import sys, os, math, string, re, gzip, urllib, shutil, Bio
import cStringIO
from Bio.PDB import PDBParser
import Bio.PDB.Dice
from Bio.PDB.DSSP import *
from numpy import *
from Bio.PDB.Polypeptide import *



resdict = { 'ALA': 'A', 'CYS': 'C', 'ASP': 'D', 'GLU': 'E', 'PHE': 'F', \
	    'GLY': 'G', 'HIS': 'H', 'ILE': 'I', 'LYS': 'K', 'LEU': 'L', \
	    'MET': 'M', 'ASN': 'N', 'PRO': 'P', 'GLN': 'Q', 'ARG': 'R', \
	    'SER': 'S', 'THR': 'T', 'VAL': 'V', 'TRP': 'W', 'TYR': 'Y' }


residue_max_acc = {'A': 113.0, 'R': 241.0, 'N': 158.0, 'D': 151.0, \
		   'C': 140.0, 'Q': 189.0, 'E': 183.0, 'G': 85.0,  \
		   'H': 194.0, 'I': 182.0, 'L': 180.0, 'K': 211.0, \
		   'M': 204.0, 'F': 218.0, 'P': 143.0, 'S': 122.0, \
		   'T': 146.0, 'W': 259.0, 'Y': 229.0, 'V': 160.0}


def buildLocalPDBName( pdb_id ):
	"""Constructs the name of the pdb file in the local
	pdb cache from the pdb identifier. The path to the
	cache is currently hardcoded."""
	dir = "C:\\Python26\\RSA\\structures\\"
	# we create the path if it doesn't exist yet
	if not os.access( dir, os.R_OK ):
		os.makedirs( dir )
	pdb_id = string.lower( pdb_id )
	return dir + pdb_id + ".pdb.gz"

def downloadStatus( count, bsize, tsize ):
	"""Reports the download status of a file"""
	if count*bsize > tsize:
		print "\t", tsize, "/", tsize, "done"
	else:
		if count/8. == count/8:
			print "\t", count*bsize, "/", tsize

def retrievePDBStructure( pdb_id ):
	pdb_id = string.lower( pdb_id )
	middle = pdb_id[1:3]
	source = "ftp://ftp.wwpdb.org/pub/pdb/data/structures/divided/pdb/" + middle + "/pdb" + pdb_id + ".ent.gz"
	target = buildLocalPDBName( pdb_id )
	print "   downloading structure", pdb_id
	print source
	try:
		urllib.urlretrieve( source, target, downloadStatus )
		"""Here is to determine whether the target file is empty"""
		f = open( target, "r" )
		f.seek( 0, 2 )
		if f.tell() == 0:
			print "      structure not available. trying archive of obsolete structures..."
			source = "ftp://ftp.wwpdb.org/pub/pdb/data/structures/obsolete/pdb/" + middle + "/pdb" + pdb_id + ".ent.gz"
			print source
			try:
				urllib.urlretrieve( source, target, downloadStatus )
				f = open( target, "r")
				f.seek( 0, 2 )
				if f.tell() == 0:
					print "      doesn't work. structure not available."
					return False
				else:
					return True
			except:
				print "      doesn't work. structure not available."
				return False
		else:
			return True
	
	except:
		print "      structure not available. trying archive of all structures..."
		source = "ftp://ftp.wwpdb.org/pub/pdb/data/structures/all/pdb/" + "pdb" + pdb_id + ".ent.gz"
		print source
		try:
			urllib.urlretrieve( source, target, downloadStatus )
			f = open( target, "r")
			f.seek( 0, 2 )
			if f.tell() == 0:
				print "      doesn't work. structure not available."
				return False
			else:
				return True
		except:
			print "      doesn't work. structure not available."
			return False

def parsePDBStructure( pdb_id ):
	filename = buildLocalPDBName( pdb_id )
	if not os.access( filename, os.R_OK ):
		found = retrievePDBStructure( pdb_id )
	else:
		found = True
	
	if not found:
		try:
			os.unlink(filename)
			print "   structure", pdb_id, "cannot be retrieved"
			return False
		except WindowsError:
			return False
		
	
	parser=PDBParser()
	tmpfname = "tmpfile1.pdb"
	shutil.copy( filename, tmpfname+".gz" )
	import gzip
	f_in = gzip.open('tmpfile1.pdb.gz', 'rb')
	f_out = open('tmpfile1.pdb', 'wb')
	f_out.writelines(f_in)
	f_out.close()
	f_in.close()

	print pdb_id, filename, tmpfname
	
	import time
	time_count = 0
	while not os.access( tmpfname, os.R_OK ):
		time.sleep(1)
		time_count += 1
		if time_count > 10:
			return False
	try:
		structure = parser.get_structure( pdb_id, tmpfname )
	except:
		time.sleep(1)
		time_count += 1
		os.unlink(filename)
		print "   PDBException: 'No parent' for", pdb_id
		os.unlink( tmpfname )
		return False
	return structure

def getStruInfo( pdb_id, chain_id, DSSP = 'dsspcmbi.exe' ):
	"""Default setting: DSSP executable file should be in the local directory."""

	if chain_id == '-':
		chain_id = ' '

	structure = parsePDBStructure( pdb_id )
	
	if not structure:
		print "could not get structure"
		return ("", "", "", "")
	print "   calculating secondary structure for", pdb_id + chain_id
	
	model = structure[0]
	chain = model[chain_id]
	
		
	#try:
		#extract( structure, chain_id, 0, 50000, './tempfile.pdb' )
		#dssp_dict, dssp_key = dssp_dict_from_pdb_file( './tempfile.pdb', DSSP )
	#try:
	Bio.PDB.Dice.extract( structure, chain_id, 0, 50000, 'C:\\Python26\\RSA\\tempfile.pdb' )
	dssp_dict, dssp_key = dssp_dict_from_pdb_file( 'C:\\Python26\\RSA\\tempfile.pdb', DSSP )
	#except:
		#return ("", "", "", "3")
	if not dssp_dict:
		print "could not get the secondary structure for", pdb_id + chain_id
		return ("", "",  "", "")
	bury = ""
	ss = ""
	seq = ""
	seq_list = []
	acc_list = []
	for i in range(0,len(dssp_key)):
		key = dssp_key[i]
		if not dssp_dict[key][0] in resdict.values():
			print "unusual residue %s" % dssp_dict[key][0]
			bury += "x"
		else:
			acc = float( dssp_dict[key][2] ) / float( residue_max_acc[dssp_dict[key][0]] )
			acc_list.append(acc)
			if acc < 0.25:
				bury += "b"
			else:
				bury += "e"
			seq += dssp_dict[key][0]
			seq_list.append(dssp_dict[key][0])
			if dssp_dict[key][1] == '-':
				ss += 'n'
			else:
				ss += dssp_dict[key][1]
	return (seq_list, acc_list)
	
	

def get_structure_header(pdb_id):
	structure = parsePDBStructure( pdb_id )
	if not structure:
		print "could not get structure"
		return False
	resolution = structure.header['resolution']
	if not resolution:
		resolution = 'NA'
	method = structure.header['structure_method']
	model_count = 0
	print "Resolution is ", resolution
	return (resolution, method, len(structure))