Module dadi.TwoLocus.TLSpectrum_mod
Contains triallelic Spectrum object
Expand source code
"""
Contains triallelic Spectrum object
"""
import os
import numpy as np
import dadi
class TLSpectrum(np.ma.masked_array):
"""
Represents a two-locus frequency spectrum.
The constructor has the format:
fs = dadi.Triallele.TLSpectrum(data, mask, mask_infeasible,
data_folded,
extrap_x, extrap_t)
data: The triallelic frequency spectrum data
mask: An optional array of the same size as data, similar to dadi.Spectrum
data_folded: If True, it is assumed that the input data is folded
check_folding: If True and data_folded=True, the data and
mask will be checked to ensure they are consistent
extrap_x: Optional floating point value specifying x value to use in
extrapolation.
extrap_t: Optional floating point value specifying t value to use in
extrapolation.
"""
def __new__(subtype, data, mask=np.ma.nomask, mask_infeasible=True,
data_folded=None, check_folding=True,
dtype=float, copy=True, fill_value=np.nan, keep_mask=True,
shrink=True, extrap_x=None, extrap_t=None):
data = np.asanyarray(data)
if mask is np.ma.nomask:
mask = np.ma.make_mask_none(data.shape)
subarr = np.ma.masked_array(data, mask=mask, dtype=dtype, copy=copy,
fill_value=fill_value, keep_mask=True,
shrink=True)
subarr = subarr.view(subtype)
if hasattr(data, 'folded'):
if data_folded is None or data_folded == data.folded:
subarr.folded = data.folded
elif data_folded != data.folded:
raise ValueError('Data does not have same folding status as '
'was called for in TLSpectrum constructor.')
elif data_folded is not None:
subarr.folded = data_folded
else:
subarr.folded = False
### XXX To do: ensure that all goes well when creating the TLSpectrum, come
### back to this
# Check that if we're declaring that the input data is folded, it actually is,
# and the mask reflects this.
if mask_infeasible:
subarr.mask_infeasible()
subarr.extrap_x = extrap_x
subarr.extrap_t = extrap_t
return subarr
# See https://scipy.github.io/old-wiki/pages/Subclasses.html for information on
# __array_finalize__ and __array_wrap__ methods.
#
# We need these methods to ensure extra attributes get copied along when
# we do arithmetic on the FS.
def __array_finalize__(self, obj):
if obj is None:
return
np.ma.masked_array.__array_finalize__(self, obj)
self.folded = getattr(obj, 'folded', 'unspecified')
self.extrap_x = getattr(obj, 'extrap_x', None)
self.extrap_t = getattr(obj, 'extrap_t', None)
def __array_wrap__(self, obj, context=None):
result = obj.view(type(self))
result = np.ma.masked_array.__array_wrap__(self, obj,
context=context)
result.folded = self.folded
result.extrap_t = self.extrap_t
return result
def _update_from(self, obj):
np.ma.masked_array._update_from(self, obj)
if hasattr(obj, 'folded'):
self.folded = obj.folded
if hasattr(obj, 'extrap_x'):
self.extrap_x = obj.extrap_x
if hasattr(obj, 'extrap_t'):
self.extrap_t = obj.extrap_t
# masked_array has priority 15.
__array_priority__ = 20
def __repr__(self):
return 'TLSpectrum(%s, folded=%s)'\
% (str(self), str(self.folded))
def mask_infeasible(self):
"""
Mask any infeasible entries.
"""
ns = len(self)-1
self.mask[0,0,0] = True
self.mask[0,:,0] = True
self.mask[0,0,:] = True
for ii in range(len(self)):
for jj in range(len(self)):
for kk in range(len(self)):
if ii+jj+kk > ns:
self.mask[ii,jj,kk] = True
for ii in range(len(self)):
self.mask[ii,ns-ii,0] = True
self.mask[ii,0,ns-ii] = True
return self
def unfold(self):
if not self.folded:
raise ValueError('Input Spectrum is not folded.')
data = self.data
unfolded = TLSpectrum(data, mask_infeasible=True)
unfolded.extrap_x = self.extrap_x
unfolded.extrap_t = self.extrap_t
return unfolded
def _get_sample_size(self):
return np.asarray(self.shape)[0] - 1
sample_size = property(_get_sample_size)
def _ensure_shape_and_dimension(self):
"""
Ensure that fs has Npop dimensions.
"""
pass
# Make from_file a static method, so we can use it without an instance.
@staticmethod
def from_file(fid, mask_infeasible=True, return_comments=False):
"""
Read frequency spectrum from file.
fid: string with file name to read from or an open file object.
mask_infeasible: If True, mask the infeasible entries in the triallelic spectrum.
return_comments: If true, the return value is (fs, comments), where
comments is a list of strings containing the comments
from the file (without #'s).
See to_file method for details on the file format.
"""
newfile = False
# Try to read from fid. If we can't, assume it's something that we can
# use to open a file.
if not hasattr(fid, 'read'):
newfile = True
fid = open(fid, 'r')
line = fid.readline()
# Strip out the comments
comments = []
while line.startswith('#'):
comments.append(line[1:].strip())
line = fid.readline()
# Read the shape of the data
shape,folded,extrap_x,extrap_t = line.split()
shape = [int(shape)+1,int(shape)+1,int(shape)+1]
data = np.fromstring(fid.readline().strip(),
count=np.product(shape), sep=' ')
# fromfile returns a 1-d array. Reshape it to the proper form.
data = data.reshape(*shape)
maskline = fid.readline().strip()
mask = np.fromstring(maskline,
count=np.product(shape), sep=' ')
mask = mask.reshape(*shape)
if folded == 'folded':
folded = True
else:
folded = False
if extrap_x == 'None':
extrap_x = None
else:
extrap_x = float(extrap_x)
if extrap_t == 'None':
extrap_t = None
else:
extrap_t = float(extrap_t)
# If we opened a new file, clean it up.
if newfile:
fid.close()
fs = TLSpectrum(data, mask, mask_infeasible, data_folded=folded)
fs.extrap_x = extrap_x
fs.extrap_t = extrap_t
if not return_comments:
return fs
else:
return fs,comments
def to_file(self, fid, precision=16, comment_lines=[], foldmaskinfo=True, extrapinfo=True):
"""
Write frequency spectrum to file.
fid: string with file name to write to or an open file object.
precision: precision with which to write out entries of the SFS. (They
are formated via %.<p>g, where <p> is the precision.)
comment lines: list of strings to be used as comment lines in the header
of the output file.
foldmaskinfo: If False, folding and mask and population label
information will not be saved. This conforms to the file
format for dadi versions prior to 1.3.0.
The file format is:
# Any number of comment lines beginning with a '#'
A single line containing N integers giving the dimensions of the fs
array. So this line would be '5 5 3' for an SFS that was 5x5x3.
(That would be 4x4x2 *samples*.)
On the *same line*, the string 'folded' or 'unfolded'
denoting the folding status of the array
A single line giving the array elements. The order of elements is
e.g.: fs[0,0,0] fs[0,0,1] fs[0,0,2] ... fs[0,1,0] fs[0,1,1] ...
A single line giving the elements of the mask in the same order as
the data line. '1' indicates masked, '0' indicates unmasked.
"""
# Open the file object.
newfile = False
if not hasattr(fid, 'write'):
newfile = True
fid = open(fid, 'w')
# Write comments
for line in comment_lines:
fid.write('# ')
fid.write(line.strip())
fid.write(os.linesep)
# Write out the shape of the fs
fid.write('{0} '.format(self.sample_size))
if foldmaskinfo:
if not self.folded:
fid.write('unfolded ')
else:
fid.write('folded ')
if extrapinfo:
if not self.extrap_x:
fid.write('None ')
else:
fid.write('{0} '.format(self.extrap_x))
if not self.extrap_t:
fid.write('None')
else:
fid.write('{0}'.format(self.extrap_t))
fid.write(os.linesep)
# Write the data to the file
self.data.tofile(fid, ' ', '%%.%ig' % precision)
fid.write(os.linesep)
if foldmaskinfo:
# Write the mask to the file
np.asarray(self.mask,int).tofile(fid, ' ')
fid.write(os.linesep)
# Close file
if newfile:
fid.close()
tofile = to_file
def marginalA(self):
"""
Marginal 1D frequency spectrum for A locus.
"""
ns = self.shape[0] - 1
marg = dadi.Spectrum(np.zeros(ns+1))
for fAB in range(ns):
for fAb in range(ns-fAB):
marg[fAB+fAb] += self[fAB,fAb,:].sum()
marg.extrap_x = self.extrap_x
marg.extrap_t = self.extrap_t
return marg
def marginalB(self):
"""
Marginal 1D frequency spectrum for B locus.
"""
ns = self.shape[0] - 1
marg = dadi.Spectrum(np.zeros(ns+1))
for fAB in range(ns):
for faB in range(ns-fAB):
marg[fAB+faB] += self[fAB,:,faB].sum()
marg.extrap_x = self.extrap_x
marg.extrap_t = self.extrap_t
return marg
def mean_r2(self):
"""
Mean of normalized squared correlation coefficient between A and B loci.
"""
from . import numerics
ns = self.shape[0] - 1
norm = self.sum()
Dbin, r2bin = numerics.LD_per_bin(ns)
return (self*r2bin).sum()/self.sum()
def fold(self):
if self.folded:
raise ValueError('Input Spectrum is already folded.')
ns = self.shape[0] - 1
folded = 0*self
for ii in range(ns+1):
for jj in range(ns+1):
for kk in range(ns+1):
if self.mask[ii,jj,kk]:
continue
p = ii + jj
q = ii + kk
if p > ns/2 and q > ns/2:
# Switch A/a and B/b, so AB becomes ab, Ab becomes aB, etc
folded[ns-ii-jj-kk,kk,jj] = self[ns-ii-jj-kk,kk,jj] + self[ii,jj,kk]
folded.mask[ii,jj,kk] = True
elif p > ns/2:
# Switch A/a, so AB -> aB, Ab -> ab, aB -> AB, and ab -> Ab
folded[kk,ns-ii-jj-kk,ii] = self[kk,ns-ii-jj-kk,ii] + self[ii,jj,kk]
folded.mask[ii,jj,kk] = True
elif q > ns/2:
# Switch B/b, so AB -> Ab, Ab -> AB, aB -> ab, and ab -> aB
folded[jj,ii,ns-ii-jj-kk] = self[jj,ii,ns-ii-jj-kk] + self[ii,jj,kk]
folded.mask[ii,jj,kk] = True
folded.folded = True
folded.extrap_x = self.extrap_x
folded.extrap_t = self.extrap_t
return folded
# Ensures that when arithmetic is done with TLSpectrum objects,
# attributes are preserved. For details, see similar code in
# dadi.Spectrum_mod
for method in ['__add__','__radd__','__sub__','__rsub__','__mul__',
'__rmul__','__div__','__rdiv__','__truediv__','__rtruediv__',
'__floordiv__','__rfloordiv__','__rpow__','__pow__']:
exec("""
def %(method)s(self, other):
self._check_other_folding(other)
if isinstance(other, np.ma.masked_array):
newdata = self.data.%(method)s (other.data)
newmask = np.ma.mask_or(self.mask, other.mask)
else:
newdata = self.data.%(method)s (other)
newmask = self.mask
if hasattr(other, 'extrap_x') and self.extrap_x != other.extrap_x:
extrap_x = None
else:
extrap_x = self.extrap_x
if hasattr(other, 'extrap_t') and self.extrap_t != other.extrap_t:
extrap_t = None
else:
extrap_t = self.extrap_t
outfs = self.__class__.__new__(self.__class__, newdata, newmask,
mask_infeasible=False,
data_folded=self.folded,
extrap_x=extrap_x, extrap_t=extrap_t)
return outfs
""" % {'method':method})
# Methods that modify the Spectrum in-place.
for method in ['__iadd__','__isub__','__imul__','__idiv__',
'__itruediv__','__ifloordiv__','__ipow__']:
exec("""
def %(method)s(self, other):
self._check_other_folding(other)
if isinstance(other, np.ma.masked_array):
self.data.%(method)s (other.data)
self.mask = np.ma.mask_or(self.mask, other.mask)
else:
self.data.%(method)s (other)
if hasattr(other, 'extrap_x') and self.extrap_x != other.extrap_x:
self.extrap_x = None
if hasattr(other, 'extrap_t') and self.extrap_t != other.extrap_t:
self.extrap_t = None
return self
""" % {'method':method})
def _check_other_folding(self, other):
"""
Ensure other Spectrum has same .folded status
"""
if isinstance(other, self.__class__)\
and (other.folded != self.folded):
raise ValueError('Cannot operate with a folded Spectrum and an '
'unfolded one.')
# Allow TLSpectrum objects to be pickled.
# See http://effbot.org/librarybook/copy-reg.htm
try:
import copyreg
except:
# For Python 2.x compatibility
import copy_reg as copyreg
def TLSpectrum_pickler(fs):
# Collect all the info necessary to save the state of a TLSpectrum
return TLSpectrum_unpickler, (fs.data, fs.mask, fs.folded,
fs.extrap_x, fs.extrap_t)
def TLSpectrum_unpickler(data, mask, folded,
extrap_x, extrap_t):
# Use that info to recreate the TLSpectrum
return TLSpectrum(data, mask, mask_infeasible=False,
data_folded=folded,
extrap_x=extrap_x, extrap_t=extrap_t)
copyreg.pickle(TLSpectrum, TLSpectrum_pickler, TLSpectrum_unpickler)
Functions
def TLSpectrum_pickler(fs)
-
Expand source code
def TLSpectrum_pickler(fs): # Collect all the info necessary to save the state of a TLSpectrum return TLSpectrum_unpickler, (fs.data, fs.mask, fs.folded, fs.extrap_x, fs.extrap_t)
def TLSpectrum_unpickler(data, mask, folded, extrap_x, extrap_t)
-
Expand source code
def TLSpectrum_unpickler(data, mask, folded, extrap_x, extrap_t): # Use that info to recreate the TLSpectrum return TLSpectrum(data, mask, mask_infeasible=False, data_folded=folded, extrap_x=extrap_x, extrap_t=extrap_t)
Classes
class TLSpectrum (data, mask=False, mask_infeasible=True, data_folded=None, check_folding=True, dtype=builtins.float, copy=True, fill_value=nan, keep_mask=True, shrink=True, extrap_x=None, extrap_t=None)
-
Represents a two-locus frequency spectrum.
The constructor has the format: fs = dadi.Triallele.TLSpectrum(data, mask, mask_infeasible, data_folded, extrap_x, extrap_t)
data: The triallelic frequency spectrum data mask: An optional array of the same size as data, similar to dadi.Spectrum data_folded: If True, it is assumed that the input data is folded check_folding: If True and data_folded=True, the data and mask will be checked to ensure they are consistent extrap_x: Optional floating point value specifying x value to use in extrapolation. extrap_t: Optional floating point value specifying t value to use in extrapolation.
Expand source code
class TLSpectrum(np.ma.masked_array): """ Represents a two-locus frequency spectrum. The constructor has the format: fs = dadi.Triallele.TLSpectrum(data, mask, mask_infeasible, data_folded, extrap_x, extrap_t) data: The triallelic frequency spectrum data mask: An optional array of the same size as data, similar to dadi.Spectrum data_folded: If True, it is assumed that the input data is folded check_folding: If True and data_folded=True, the data and mask will be checked to ensure they are consistent extrap_x: Optional floating point value specifying x value to use in extrapolation. extrap_t: Optional floating point value specifying t value to use in extrapolation. """ def __new__(subtype, data, mask=np.ma.nomask, mask_infeasible=True, data_folded=None, check_folding=True, dtype=float, copy=True, fill_value=np.nan, keep_mask=True, shrink=True, extrap_x=None, extrap_t=None): data = np.asanyarray(data) if mask is np.ma.nomask: mask = np.ma.make_mask_none(data.shape) subarr = np.ma.masked_array(data, mask=mask, dtype=dtype, copy=copy, fill_value=fill_value, keep_mask=True, shrink=True) subarr = subarr.view(subtype) if hasattr(data, 'folded'): if data_folded is None or data_folded == data.folded: subarr.folded = data.folded elif data_folded != data.folded: raise ValueError('Data does not have same folding status as ' 'was called for in TLSpectrum constructor.') elif data_folded is not None: subarr.folded = data_folded else: subarr.folded = False ### XXX To do: ensure that all goes well when creating the TLSpectrum, come ### back to this # Check that if we're declaring that the input data is folded, it actually is, # and the mask reflects this. if mask_infeasible: subarr.mask_infeasible() subarr.extrap_x = extrap_x subarr.extrap_t = extrap_t return subarr # See https://scipy.github.io/old-wiki/pages/Subclasses.html for information on # __array_finalize__ and __array_wrap__ methods. # # We need these methods to ensure extra attributes get copied along when # we do arithmetic on the FS. def __array_finalize__(self, obj): if obj is None: return np.ma.masked_array.__array_finalize__(self, obj) self.folded = getattr(obj, 'folded', 'unspecified') self.extrap_x = getattr(obj, 'extrap_x', None) self.extrap_t = getattr(obj, 'extrap_t', None) def __array_wrap__(self, obj, context=None): result = obj.view(type(self)) result = np.ma.masked_array.__array_wrap__(self, obj, context=context) result.folded = self.folded result.extrap_t = self.extrap_t return result def _update_from(self, obj): np.ma.masked_array._update_from(self, obj) if hasattr(obj, 'folded'): self.folded = obj.folded if hasattr(obj, 'extrap_x'): self.extrap_x = obj.extrap_x if hasattr(obj, 'extrap_t'): self.extrap_t = obj.extrap_t # masked_array has priority 15. __array_priority__ = 20 def __repr__(self): return 'TLSpectrum(%s, folded=%s)'\ % (str(self), str(self.folded)) def mask_infeasible(self): """ Mask any infeasible entries. """ ns = len(self)-1 self.mask[0,0,0] = True self.mask[0,:,0] = True self.mask[0,0,:] = True for ii in range(len(self)): for jj in range(len(self)): for kk in range(len(self)): if ii+jj+kk > ns: self.mask[ii,jj,kk] = True for ii in range(len(self)): self.mask[ii,ns-ii,0] = True self.mask[ii,0,ns-ii] = True return self def unfold(self): if not self.folded: raise ValueError('Input Spectrum is not folded.') data = self.data unfolded = TLSpectrum(data, mask_infeasible=True) unfolded.extrap_x = self.extrap_x unfolded.extrap_t = self.extrap_t return unfolded def _get_sample_size(self): return np.asarray(self.shape)[0] - 1 sample_size = property(_get_sample_size) def _ensure_shape_and_dimension(self): """ Ensure that fs has Npop dimensions. """ pass # Make from_file a static method, so we can use it without an instance. @staticmethod def from_file(fid, mask_infeasible=True, return_comments=False): """ Read frequency spectrum from file. fid: string with file name to read from or an open file object. mask_infeasible: If True, mask the infeasible entries in the triallelic spectrum. return_comments: If true, the return value is (fs, comments), where comments is a list of strings containing the comments from the file (without #'s). See to_file method for details on the file format. """ newfile = False # Try to read from fid. If we can't, assume it's something that we can # use to open a file. if not hasattr(fid, 'read'): newfile = True fid = open(fid, 'r') line = fid.readline() # Strip out the comments comments = [] while line.startswith('#'): comments.append(line[1:].strip()) line = fid.readline() # Read the shape of the data shape,folded,extrap_x,extrap_t = line.split() shape = [int(shape)+1,int(shape)+1,int(shape)+1] data = np.fromstring(fid.readline().strip(), count=np.product(shape), sep=' ') # fromfile returns a 1-d array. Reshape it to the proper form. data = data.reshape(*shape) maskline = fid.readline().strip() mask = np.fromstring(maskline, count=np.product(shape), sep=' ') mask = mask.reshape(*shape) if folded == 'folded': folded = True else: folded = False if extrap_x == 'None': extrap_x = None else: extrap_x = float(extrap_x) if extrap_t == 'None': extrap_t = None else: extrap_t = float(extrap_t) # If we opened a new file, clean it up. if newfile: fid.close() fs = TLSpectrum(data, mask, mask_infeasible, data_folded=folded) fs.extrap_x = extrap_x fs.extrap_t = extrap_t if not return_comments: return fs else: return fs,comments def to_file(self, fid, precision=16, comment_lines=[], foldmaskinfo=True, extrapinfo=True): """ Write frequency spectrum to file. fid: string with file name to write to or an open file object. precision: precision with which to write out entries of the SFS. (They are formated via %.<p>g, where <p> is the precision.) comment lines: list of strings to be used as comment lines in the header of the output file. foldmaskinfo: If False, folding and mask and population label information will not be saved. This conforms to the file format for dadi versions prior to 1.3.0. The file format is: # Any number of comment lines beginning with a '#' A single line containing N integers giving the dimensions of the fs array. So this line would be '5 5 3' for an SFS that was 5x5x3. (That would be 4x4x2 *samples*.) On the *same line*, the string 'folded' or 'unfolded' denoting the folding status of the array A single line giving the array elements. The order of elements is e.g.: fs[0,0,0] fs[0,0,1] fs[0,0,2] ... fs[0,1,0] fs[0,1,1] ... A single line giving the elements of the mask in the same order as the data line. '1' indicates masked, '0' indicates unmasked. """ # Open the file object. newfile = False if not hasattr(fid, 'write'): newfile = True fid = open(fid, 'w') # Write comments for line in comment_lines: fid.write('# ') fid.write(line.strip()) fid.write(os.linesep) # Write out the shape of the fs fid.write('{0} '.format(self.sample_size)) if foldmaskinfo: if not self.folded: fid.write('unfolded ') else: fid.write('folded ') if extrapinfo: if not self.extrap_x: fid.write('None ') else: fid.write('{0} '.format(self.extrap_x)) if not self.extrap_t: fid.write('None') else: fid.write('{0}'.format(self.extrap_t)) fid.write(os.linesep) # Write the data to the file self.data.tofile(fid, ' ', '%%.%ig' % precision) fid.write(os.linesep) if foldmaskinfo: # Write the mask to the file np.asarray(self.mask,int).tofile(fid, ' ') fid.write(os.linesep) # Close file if newfile: fid.close() tofile = to_file def marginalA(self): """ Marginal 1D frequency spectrum for A locus. """ ns = self.shape[0] - 1 marg = dadi.Spectrum(np.zeros(ns+1)) for fAB in range(ns): for fAb in range(ns-fAB): marg[fAB+fAb] += self[fAB,fAb,:].sum() marg.extrap_x = self.extrap_x marg.extrap_t = self.extrap_t return marg def marginalB(self): """ Marginal 1D frequency spectrum for B locus. """ ns = self.shape[0] - 1 marg = dadi.Spectrum(np.zeros(ns+1)) for fAB in range(ns): for faB in range(ns-fAB): marg[fAB+faB] += self[fAB,:,faB].sum() marg.extrap_x = self.extrap_x marg.extrap_t = self.extrap_t return marg def mean_r2(self): """ Mean of normalized squared correlation coefficient between A and B loci. """ from . import numerics ns = self.shape[0] - 1 norm = self.sum() Dbin, r2bin = numerics.LD_per_bin(ns) return (self*r2bin).sum()/self.sum() def fold(self): if self.folded: raise ValueError('Input Spectrum is already folded.') ns = self.shape[0] - 1 folded = 0*self for ii in range(ns+1): for jj in range(ns+1): for kk in range(ns+1): if self.mask[ii,jj,kk]: continue p = ii + jj q = ii + kk if p > ns/2 and q > ns/2: # Switch A/a and B/b, so AB becomes ab, Ab becomes aB, etc folded[ns-ii-jj-kk,kk,jj] = self[ns-ii-jj-kk,kk,jj] + self[ii,jj,kk] folded.mask[ii,jj,kk] = True elif p > ns/2: # Switch A/a, so AB -> aB, Ab -> ab, aB -> AB, and ab -> Ab folded[kk,ns-ii-jj-kk,ii] = self[kk,ns-ii-jj-kk,ii] + self[ii,jj,kk] folded.mask[ii,jj,kk] = True elif q > ns/2: # Switch B/b, so AB -> Ab, Ab -> AB, aB -> ab, and ab -> aB folded[jj,ii,ns-ii-jj-kk] = self[jj,ii,ns-ii-jj-kk] + self[ii,jj,kk] folded.mask[ii,jj,kk] = True folded.folded = True folded.extrap_x = self.extrap_x folded.extrap_t = self.extrap_t return folded # Ensures that when arithmetic is done with TLSpectrum objects, # attributes are preserved. For details, see similar code in # dadi.Spectrum_mod for method in ['__add__','__radd__','__sub__','__rsub__','__mul__', '__rmul__','__div__','__rdiv__','__truediv__','__rtruediv__', '__floordiv__','__rfloordiv__','__rpow__','__pow__']: exec(""" def %(method)s(self, other): self._check_other_folding(other) if isinstance(other, np.ma.masked_array): newdata = self.data.%(method)s (other.data) newmask = np.ma.mask_or(self.mask, other.mask) else: newdata = self.data.%(method)s (other) newmask = self.mask if hasattr(other, 'extrap_x') and self.extrap_x != other.extrap_x: extrap_x = None else: extrap_x = self.extrap_x if hasattr(other, 'extrap_t') and self.extrap_t != other.extrap_t: extrap_t = None else: extrap_t = self.extrap_t outfs = self.__class__.__new__(self.__class__, newdata, newmask, mask_infeasible=False, data_folded=self.folded, extrap_x=extrap_x, extrap_t=extrap_t) return outfs """ % {'method':method}) # Methods that modify the Spectrum in-place. for method in ['__iadd__','__isub__','__imul__','__idiv__', '__itruediv__','__ifloordiv__','__ipow__']: exec(""" def %(method)s(self, other): self._check_other_folding(other) if isinstance(other, np.ma.masked_array): self.data.%(method)s (other.data) self.mask = np.ma.mask_or(self.mask, other.mask) else: self.data.%(method)s (other) if hasattr(other, 'extrap_x') and self.extrap_x != other.extrap_x: self.extrap_x = None if hasattr(other, 'extrap_t') and self.extrap_t != other.extrap_t: self.extrap_t = None return self """ % {'method':method}) def _check_other_folding(self, other): """ Ensure other Spectrum has same .folded status """ if isinstance(other, self.__class__)\ and (other.folded != self.folded): raise ValueError('Cannot operate with a folded Spectrum and an ' 'unfolded one.')
Ancestors
- numpy.ma.core.MaskedArray
- numpy.ndarray
Class variables
var method
Static methods
def from_file(fid, mask_infeasible=True, return_comments=False)
-
Read frequency spectrum from file.
fid: string with file name to read from or an open file object. mask_infeasible: If True, mask the infeasible entries in the triallelic spectrum. return_comments: If true, the return value is (fs, comments), where comments is a list of strings containing the comments from the file (without #'s).
See to_file method for details on the file format.
Expand source code
@staticmethod def from_file(fid, mask_infeasible=True, return_comments=False): """ Read frequency spectrum from file. fid: string with file name to read from or an open file object. mask_infeasible: If True, mask the infeasible entries in the triallelic spectrum. return_comments: If true, the return value is (fs, comments), where comments is a list of strings containing the comments from the file (without #'s). See to_file method for details on the file format. """ newfile = False # Try to read from fid. If we can't, assume it's something that we can # use to open a file. if not hasattr(fid, 'read'): newfile = True fid = open(fid, 'r') line = fid.readline() # Strip out the comments comments = [] while line.startswith('#'): comments.append(line[1:].strip()) line = fid.readline() # Read the shape of the data shape,folded,extrap_x,extrap_t = line.split() shape = [int(shape)+1,int(shape)+1,int(shape)+1] data = np.fromstring(fid.readline().strip(), count=np.product(shape), sep=' ') # fromfile returns a 1-d array. Reshape it to the proper form. data = data.reshape(*shape) maskline = fid.readline().strip() mask = np.fromstring(maskline, count=np.product(shape), sep=' ') mask = mask.reshape(*shape) if folded == 'folded': folded = True else: folded = False if extrap_x == 'None': extrap_x = None else: extrap_x = float(extrap_x) if extrap_t == 'None': extrap_t = None else: extrap_t = float(extrap_t) # If we opened a new file, clean it up. if newfile: fid.close() fs = TLSpectrum(data, mask, mask_infeasible, data_folded=folded) fs.extrap_x = extrap_x fs.extrap_t = extrap_t if not return_comments: return fs else: return fs,comments
Instance variables
var sample_size
-
Expand source code
def _get_sample_size(self): return np.asarray(self.shape)[0] - 1
Methods
def fold(self)
-
Expand source code
def fold(self): if self.folded: raise ValueError('Input Spectrum is already folded.') ns = self.shape[0] - 1 folded = 0*self for ii in range(ns+1): for jj in range(ns+1): for kk in range(ns+1): if self.mask[ii,jj,kk]: continue p = ii + jj q = ii + kk if p > ns/2 and q > ns/2: # Switch A/a and B/b, so AB becomes ab, Ab becomes aB, etc folded[ns-ii-jj-kk,kk,jj] = self[ns-ii-jj-kk,kk,jj] + self[ii,jj,kk] folded.mask[ii,jj,kk] = True elif p > ns/2: # Switch A/a, so AB -> aB, Ab -> ab, aB -> AB, and ab -> Ab folded[kk,ns-ii-jj-kk,ii] = self[kk,ns-ii-jj-kk,ii] + self[ii,jj,kk] folded.mask[ii,jj,kk] = True elif q > ns/2: # Switch B/b, so AB -> Ab, Ab -> AB, aB -> ab, and ab -> aB folded[jj,ii,ns-ii-jj-kk] = self[jj,ii,ns-ii-jj-kk] + self[ii,jj,kk] folded.mask[ii,jj,kk] = True folded.folded = True folded.extrap_x = self.extrap_x folded.extrap_t = self.extrap_t return folded
def marginalA(self)
-
Marginal 1D frequency spectrum for A locus.
Expand source code
def marginalA(self): """ Marginal 1D frequency spectrum for A locus. """ ns = self.shape[0] - 1 marg = dadi.Spectrum(np.zeros(ns+1)) for fAB in range(ns): for fAb in range(ns-fAB): marg[fAB+fAb] += self[fAB,fAb,:].sum() marg.extrap_x = self.extrap_x marg.extrap_t = self.extrap_t return marg
def marginalB(self)
-
Marginal 1D frequency spectrum for B locus.
Expand source code
def marginalB(self): """ Marginal 1D frequency spectrum for B locus. """ ns = self.shape[0] - 1 marg = dadi.Spectrum(np.zeros(ns+1)) for fAB in range(ns): for faB in range(ns-fAB): marg[fAB+faB] += self[fAB,:,faB].sum() marg.extrap_x = self.extrap_x marg.extrap_t = self.extrap_t return marg
def mask_infeasible(self)
-
Mask any infeasible entries.
Expand source code
def mask_infeasible(self): """ Mask any infeasible entries. """ ns = len(self)-1 self.mask[0,0,0] = True self.mask[0,:,0] = True self.mask[0,0,:] = True for ii in range(len(self)): for jj in range(len(self)): for kk in range(len(self)): if ii+jj+kk > ns: self.mask[ii,jj,kk] = True for ii in range(len(self)): self.mask[ii,ns-ii,0] = True self.mask[ii,0,ns-ii] = True return self
def mean_r2(self)
-
Mean of normalized squared correlation coefficient between A and B loci.
Expand source code
def mean_r2(self): """ Mean of normalized squared correlation coefficient between A and B loci. """ from . import numerics ns = self.shape[0] - 1 norm = self.sum() Dbin, r2bin = numerics.LD_per_bin(ns) return (self*r2bin).sum()/self.sum()
def to_file(self, fid, precision=16, comment_lines=[], foldmaskinfo=True, extrapinfo=True)
-
Write frequency spectrum to file.
fid: string with file name to write to or an open file object. precision: precision with which to write out entries of the SFS. (They are formated via %.
g, where
is the precision.) comment lines: list of strings to be used as comment lines in the header of the output file. foldmaskinfo: If False, folding and mask and population label information will not be saved. This conforms to the file format for dadi versions prior to 1.3.0.
The file format is: # Any number of comment lines beginning with a '#' A single line containing N integers giving the dimensions of the fs array. So this line would be '5 5 3' for an SFS that was 5x5x3. (That would be 4x4x2 samples.) On the same line, the string 'folded' or 'unfolded' denoting the folding status of the array A single line giving the array elements. The order of elements is e.g.: fs[0,0,0] fs[0,0,1] fs[0,0,2] … fs[0,1,0] fs[0,1,1] … A single line giving the elements of the mask in the same order as the data line. '1' indicates masked, '0' indicates unmasked.
Expand source code
def to_file(self, fid, precision=16, comment_lines=[], foldmaskinfo=True, extrapinfo=True): """ Write frequency spectrum to file. fid: string with file name to write to or an open file object. precision: precision with which to write out entries of the SFS. (They are formated via %.<p>g, where <p> is the precision.) comment lines: list of strings to be used as comment lines in the header of the output file. foldmaskinfo: If False, folding and mask and population label information will not be saved. This conforms to the file format for dadi versions prior to 1.3.0. The file format is: # Any number of comment lines beginning with a '#' A single line containing N integers giving the dimensions of the fs array. So this line would be '5 5 3' for an SFS that was 5x5x3. (That would be 4x4x2 *samples*.) On the *same line*, the string 'folded' or 'unfolded' denoting the folding status of the array A single line giving the array elements. The order of elements is e.g.: fs[0,0,0] fs[0,0,1] fs[0,0,2] ... fs[0,1,0] fs[0,1,1] ... A single line giving the elements of the mask in the same order as the data line. '1' indicates masked, '0' indicates unmasked. """ # Open the file object. newfile = False if not hasattr(fid, 'write'): newfile = True fid = open(fid, 'w') # Write comments for line in comment_lines: fid.write('# ') fid.write(line.strip()) fid.write(os.linesep) # Write out the shape of the fs fid.write('{0} '.format(self.sample_size)) if foldmaskinfo: if not self.folded: fid.write('unfolded ') else: fid.write('folded ') if extrapinfo: if not self.extrap_x: fid.write('None ') else: fid.write('{0} '.format(self.extrap_x)) if not self.extrap_t: fid.write('None') else: fid.write('{0}'.format(self.extrap_t)) fid.write(os.linesep) # Write the data to the file self.data.tofile(fid, ' ', '%%.%ig' % precision) fid.write(os.linesep) if foldmaskinfo: # Write the mask to the file np.asarray(self.mask,int).tofile(fid, ' ') fid.write(os.linesep) # Close file if newfile: fid.close()
def tofile(self, fid, precision=16, comment_lines=[], foldmaskinfo=True, extrapinfo=True)
-
Write frequency spectrum to file.
fid: string with file name to write to or an open file object. precision: precision with which to write out entries of the SFS. (They are formated via %.
g, where
is the precision.) comment lines: list of strings to be used as comment lines in the header of the output file. foldmaskinfo: If False, folding and mask and population label information will not be saved. This conforms to the file format for dadi versions prior to 1.3.0.
The file format is: # Any number of comment lines beginning with a '#' A single line containing N integers giving the dimensions of the fs array. So this line would be '5 5 3' for an SFS that was 5x5x3. (That would be 4x4x2 samples.) On the same line, the string 'folded' or 'unfolded' denoting the folding status of the array A single line giving the array elements. The order of elements is e.g.: fs[0,0,0] fs[0,0,1] fs[0,0,2] … fs[0,1,0] fs[0,1,1] … A single line giving the elements of the mask in the same order as the data line. '1' indicates masked, '0' indicates unmasked.
Expand source code
def to_file(self, fid, precision=16, comment_lines=[], foldmaskinfo=True, extrapinfo=True): """ Write frequency spectrum to file. fid: string with file name to write to or an open file object. precision: precision with which to write out entries of the SFS. (They are formated via %.<p>g, where <p> is the precision.) comment lines: list of strings to be used as comment lines in the header of the output file. foldmaskinfo: If False, folding and mask and population label information will not be saved. This conforms to the file format for dadi versions prior to 1.3.0. The file format is: # Any number of comment lines beginning with a '#' A single line containing N integers giving the dimensions of the fs array. So this line would be '5 5 3' for an SFS that was 5x5x3. (That would be 4x4x2 *samples*.) On the *same line*, the string 'folded' or 'unfolded' denoting the folding status of the array A single line giving the array elements. The order of elements is e.g.: fs[0,0,0] fs[0,0,1] fs[0,0,2] ... fs[0,1,0] fs[0,1,1] ... A single line giving the elements of the mask in the same order as the data line. '1' indicates masked, '0' indicates unmasked. """ # Open the file object. newfile = False if not hasattr(fid, 'write'): newfile = True fid = open(fid, 'w') # Write comments for line in comment_lines: fid.write('# ') fid.write(line.strip()) fid.write(os.linesep) # Write out the shape of the fs fid.write('{0} '.format(self.sample_size)) if foldmaskinfo: if not self.folded: fid.write('unfolded ') else: fid.write('folded ') if extrapinfo: if not self.extrap_x: fid.write('None ') else: fid.write('{0} '.format(self.extrap_x)) if not self.extrap_t: fid.write('None') else: fid.write('{0}'.format(self.extrap_t)) fid.write(os.linesep) # Write the data to the file self.data.tofile(fid, ' ', '%%.%ig' % precision) fid.write(os.linesep) if foldmaskinfo: # Write the mask to the file np.asarray(self.mask,int).tofile(fid, ' ') fid.write(os.linesep) # Close file if newfile: fid.close()
def unfold(self)
-
Expand source code
def unfold(self): if not self.folded: raise ValueError('Input Spectrum is not folded.') data = self.data unfolded = TLSpectrum(data, mask_infeasible=True) unfolded.extrap_x = self.extrap_x unfolded.extrap_t = self.extrap_t return unfolded