导入各种包,准备工作

In [1]:
import numpy as np
from astropy.io import fits
from astropy import time, coordinates as coord, units as u
from matplotlib import pyplot as plt
import sep
import os
import datetime
WARNING: IERSStaleWarning: leap-second file is expired. [astropy.utils.iers.iers]
In [ ]:
# 禁止下载地球自转数据
from astropy.utils import iers
iers.conf.auto_download = False
In [2]:
# 数据处理的根目录,实际工作中一般会分为原始数据目录和处理结果目录
rawdir = "PiA2021raw/"
reddir = "PiA2021red/"
In [3]:
# 调用操作系统命令生成列表,这里使用了多行命令(用\在行末换行),一句命令写多条,循环等shell编程技巧
# 可以直接在终端窗口一条条写
# 另外,这里用一个!开头,实际上新开了一个进程,不会影响实际当前路径
! cd PiA2021raw; \
ls bias_*.fit > bias.lst; \
for b in U B V R I; do ls flat_${b}_*.fit    > flat_${b}.lst;    done; \
for b in B V R; do ls V398UMa_${b}_*.fit > V398UMa_${b}.lst; done;
In [4]:
# 一个小函数,生成当前UTC时间的字符串形式,用于在header中加注数据处理时间
def utcnowstr():
    return datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S")

加载各种文件清单

使用了列表生成器等语法,该语法也可以生成元组Tuple、字典Dict

In [5]:
# 本底
bias_lst = open(rawdir + "bias.lst").read().split("\n")
bias_lst
Out[5]:
['bias_001.fit',
 'bias_002.fit',
 'bias_003.fit',
 'bias_004.fit',
 'bias_005.fit',
 'bias_006.fit',
 'bias_007.fit',
 'bias_008.fit',
 'bias_009.fit',
 'bias_010.fit']
In [ ]:
# 平场。注意平场比数据的波段多,如果不想处理数据未用到的平场也是可以的
flat_band = "UBVRI"
# flat_lst = {b: open(rawdir + "flat_"+b+".lst").read().split() for b in flat_band}
b = "B"
flat_lst = open(rawdir + "flat_"+b+".lst").read().split("\n")
flat_lst
In [7]:
# 科学数据。这里因为只有一个目标,实际上有些步骤是多余的
obj_band = "BVR"
objs = ["V398UMa"]
obj_bands = [(o, b) for b in obj_band for o in objs]
obj_bands
# 本格可以忽略
Out[7]:
[('V398UMa', 'B'), ('V398UMa', 'V'), ('V398UMa', 'R')]
In [ ]:
o, b = "V398UMa", "B"
obj_lst = open(rawdir + o+"_"+b+".lst").read().split("\n")
obj_lst

读取图像的大小,便于后面处理

In [9]:
nx = fits.getval(rawdir + bias_lst[0], "NAXIS1")
ny = fits.getval(rawdir + bias_lst[0], "NAXIS2")
nx, ny
WARNING: File may have been truncated: actual file length (2100032) is smaller than the expected size (2102400) [astropy.io.fits.file]
WARNING: The following header keyword is invalid or follows an unrecognized non-standard convention:
FILTER                                                                           [astropy.io.fits.card]
Out[9]:
(1024, 1024)

本底合并

先读入数据,然后进行合并,合并使用中值。最后生成新文件

In [10]:
nbias = len(bias_lst)
In [11]:
# 创建空数组,准备接收数据,注意下标顺序是ny nx,数据类型是16位无符号整数
# 此外注意这里用到是empty,而不是ones或者zeros,不会对数组进行初始化,节约时间
bias_cube = np.empty((nbias, ny, nx), np.uint16)
In [12]:
# 读入数据。对三维数组,只给出第一个下标,表示后面是全部都用到,相当于 [f,:,:]
for f in range(nbias):
    bias_cube[f] = fits.getdata(rawdir + bias_lst[f])
In [13]:
# 求中值,注意axis=0,表示按照0维度进行求中值
# 即对于[y,x],计算方式是求median([:, y, x])
# 还有就是,需要把数据类型转换成float32,默认是64位,精度太高,浪费空间
bias_data = np.float32(np.median(bias_cube, axis=0))
bias_data.dtype, bias_data.shape
Out[13]:
(dtype('float32'), (1024, 1024))
In [14]:
# 读第0个bias文件的头,适当改造作为最终的头
hdr = fits.getheader(rawdir + bias_lst[0])
In [15]:
# 在头里加上处理时间
# 其他字段这里没有处理。数据类型会被自动根据实际情况更正,不需要干预
hdr.append(("PROCTIME", utcnowstr(), "DateTime of processing"))
hdr
Out[15]:
SIMPLE  = T                                                                     
BITPIX  = 16 / Data Type                                                        
NAXIS   = 2                                                                     
NAXIS1  = 1024                                                                  
NAXIS2  = 1024                                                                  
BSCALE  = 1.000000                                                              
BZERO   = 32768.000000                                                          
OBJ_NAME= ''                                                                    
CCDTYPE = 'ZERO'                                                                
TIME-TEL= ''                                                                    
RA      = ''                                                                    
DEC     = ''                                                                    
EPOCH   = ''                                                                    
EXPTIME = 0.000010                                                              
FILTER                                                                          
DATE-OBS= '2018-05-23'                                                          
TIME-OBS= '12:02:25.672'                                                        
PROCTIME= '2021-05-19T05:25:52' / DateTime of processing                        
In [16]:
# 创建一个新文件,每个fits文件在astropy中表现为一个列表,哪怕是单图像,也是单元素的列表
# 每个单元是一个HDU(Header-Data-Unit),包括header头部和data数据部
hdul_new = fits.HDUList([
    fits.PrimaryHDU(header=hdr, data=bias_data)
])
# 保存文件。如果文件已经存在,需要加上overwrite=True
hdul_new.writeto(reddir + "BIAS.fits")

绘制本底

In [17]:
bias_med = np.median(bias_data)
bias_std = np.std(bias_data)
In [18]:
fig, ax = plt.subplots(1, 1, figsize=(10, 10))
ax.imshow(bias_data, vmin=bias_med-3*bias_std, vmax=bias_med+3*bias_std, cmap="rainbow")
Out[18]:
<matplotlib.image.AxesImage at 0x7fdb3112b198>

平场合并

In [19]:
# 注意,如果是像课程示例中这样连续处理,这一步是多余的
# 但是实践中一般本底合并、平场合并、科学数据改正等各步骤是独立的,所以必须有读入过程
bias_data = fits.getdata(reddir + "BIAS.fits")
In [20]:
# 这里不做循环,只演示一个波段,所以看到的是直接读取波段字符串的某一个
# 如果要全处理,那么接下来的几个步骤要放到一个for里,并且在一个格子内完成
# 对于测试程序而言,这样逐个波段比较方便
# for b in flat_band:
b = "B"
In [21]:
# 基本上和bias合并相同,但是注意这里,是float32,因为待会要减去本底,而合并后的本底是float32
nflat = len(flat_lst[b])
flat_cube = np.empty((nflat, ny, nx), np.float32)
In [22]:
for f in range(nflat):
    # 读平场数据,注意这里的数组形式,[b]是针对字典的,而[f]是针对列表的
    dd = fits.getdata(rawdir + flat_lst[b][f])
    # 减本底
    dd = dd - bias_data
    # 计算中值,并利用中值进行归一化
    dmed = np.median(dd)
    dd /= dmed
    # 保存到最终数组中
    flat_cube[f] = dd
In [23]:
flat_data = np.median(flat_cube, axis=0)
flat_data.dtype, flat_data.shape
Out[23]:
(dtype('float32'), (1024, 1024))
In [24]:
hdr = fits.getheader(rawdir + flat_lst[b][0])
hdr.append(("PROCTIME", utcnowstr(), "DateTime of processing"))
In [25]:
hdul_new = fits.HDUList([
    fits.PrimaryHDU(header=hdr, data=flat_data)
])
# 这里尽可能用字符串格式化的方式去拼接文件名的各部分,因为这个方式系统开销较小
hdul_new.writeto(reddir + "FLAT_{b}.fits".format(b=b))

绘制平场

In [26]:
flat_med = np.median(flat_data)
flat_std = np.std(flat_data)
In [27]:
fig, ax = plt.subplots(1, 1, figsize=(10, 10))
ax.imshow(flat_data, vmin=flat_med-3*flat_std, vmax=flat_med+3*flat_std, cmap="hot")
Out[27]:
<matplotlib.image.AxesImage at 0x7fdb2f30c5c0>

科学数据改正

和平场合并类似,但是这次是逐个处理,不需要把多幅叠加起来。此外,公共数据只需要读一次就行

In [28]:
# 老规矩。但是这里注意,是放在选择波段和目标之前的,也就是不管处理几个波段,只读入一次
bias_data = fits.getdata(reddir + "BIAS.fits")
In [29]:
# 同样的,从一组obj_bands中选一个来做演示
o, b = obj_bands[0]
In [30]:
# 读该波段的平场数据。注意这里是在科学数据的循环之前的,也就是对每个波段也是只读入一次
flat_data = fits.getdata(reddir + "FLAT_{b}.fits".format(b=b))
In [31]:
# 对该波段的目标数据,逐个进行循环
# for f in range(len()):
f = 0
# 这里加这么一步,不是必须的,但是这样后续做处理的时候省点事,因为还得写输出文件,每次写这么复杂很累
objf = obj_lst[f]
In [32]:
# 打开文件
obj_hdu = fits.open(rawdir + objf)
# 读出原始数据,注意后面改正之后变量名不同
ori_data = obj_hdu[0].data
# 读出数据头
obj_hdr = obj_hdu[0].header
# 这里指定变量只是为了后续编程省事,不这么做也行
In [33]:
# 生成观测站地理坐标对象
site = coord.EarthLocation(lat=40.4 * u.deg, lon=117.6 * u.deg, height=900 * u.m)
# 生成观测目标天球对象
obj_coord = coord.SkyCoord(obj_hdr["RA"], obj_hdr["DEC"], unit=(u.hour, u.deg), frame="icrs")
In [34]:
# 减本底,除平场
obj_data = (ori_data - bias_data) / flat_data
In [35]:
# 找出头中的观测日期时间
obs_dt = obj_hdr["DATE-OBS"] + "T" + obj_hdr["TIME-OBS"]
# 计算JD
obs_jd = time.Time(obs_dt, format='isot', scale='utc', location=site)
# 计算地球和太阳系质心之间的光程差
ltt_bary = obs_jd.light_travel_time(obj_coord)
# 计算BJD
obs_bjd = (obs_jd.tdb + ltt_bary).jd
obs_dt, obs_jd.jd, ltt_bary, obs_bjd
WARNING: failed to download ftp://cddis.gsfc.nasa.gov/pub/products/iers/finals2000A.all and https://datacenter.iers.org/data/9/finals2000A.all, using local IERS-B: <urlopen error Unable to open any source! Exceptions were {'ftp://cddis.gsfc.nasa.gov/pub/products/iers/finals2000A.all': URLError("ftp error: timeout('timed out')"), 'https://datacenter.iers.org/data/9/finals2000A.all': URLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: self signed certificate in certificate chain (_ssl.c:1056)'))}> [astropy.utils.iers.iers]
WARNING: Tried to get polar motions for times after IERS data is valid. Defaulting to polar motion from the 50-yr mean for those. This may affect precision at the 10s of arcsec level [astropy.coordinates.builtin_frames.utils]
WARNING: (some) times are outside of range covered by IERS table. Assuming UT1-UTC=0 for coordinate transformations. [astropy.coordinates.builtin_frames.utils]
Out[35]:
('2018-05-23T12:50:54.836',
 2458262.0353568983,
 <TimeDelta object: scale='tdb' format='jd' value=-0.00022392500621064792>,
 2458262.0359337265)
In [36]:
# 使用update去更新header,一次性多个,如果已经存在则更新,不存在就添加
obj_hdr.update({
    "PROCTIME": (utcnowstr(), "DateTime of processing"),
    'JD': (obs_jd.jd, "JD of observation"),
    'BJD': (obs_bjd, "Barycentric Julian Day")
})
obj_hdr
WARNING: VerifyWarning: Verification reported errors: [astropy.io.fits.verify]
WARNING: VerifyWarning: Card 'FILTER' is not FITS standard (invalid value string: 'B').  Fixed 'FILTER' card to meet the FITS standard. [astropy.io.fits.verify]
WARNING: VerifyWarning: Note: astropy.io.fits uses zero-based indexing.
 [astropy.io.fits.verify]
Out[36]:
SIMPLE  = T                                                                     
BITPIX  = 16 / Data Type                                                        
NAXIS   = 2                                                                     
NAXIS1  = 1024                                                                  
NAXIS2  = 1024                                                                  
BSCALE  = 1.000000                                                              
BZERO   = 32768.000000                                                          
OBJ_NAME= ' '                                                                   
CCDTYPE = 'OBJECT'                                                              
TIME-TEL= ''                                                                    
RA      = '11:48:42'                                                            
DEC     = '+54:43:07'                                                           
EPOCH   = '2000'                                                                
EXPTIME = 35.000000                                                             
FILTER  = 'B       '                                                            
DATE-OBS= '2018-05-23'                                                          
TIME-OBS= '12:50:54.836'                                                        
PROCTIME= '2021-05-19T05:26:14' / DateTime of processing                        
JD      =    2458262.035356898 / JD of observation                              
BJD     =    2458262.035933726 / Barycentric Julian Day                         
In [37]:
hdul_new = fits.HDUList([
    fits.PrimaryHDU(header=obj_hdr, data=obj_data)
])
# 在原始文件名基础上加上后缀,表示经过本底(bias)平场(flat)改正的结果
hdul_new.writeto(reddir + os.path.splitext(objf)[0] + "_bf.fits")

绘制观测图像

注意和绘制平场、本底时的差异

In [38]:
obj_med, obj_std = np.median(obj_data), np.std(obj_data)
In [39]:
obj_med, obj_std
Out[39]:
(779.303, 592.6469)
In [40]:
fig, ax = plt.subplots(1, 1, figsize=(10, 10))
ax.imshow(obj_data, vmin=obj_med-1*50, vmax=obj_med+5*50, cmap="cool")
Out[40]:
<matplotlib.image.AxesImage at 0x7fdb185b3748>

找星、测光等处理

这里就不写循环,以及读取改正后数据等步骤,直接开始处理

In [41]:
# 计算背景
bkg = sep.Background(obj_data)
In [42]:
# 背景的全局中值和标准差,注意和图像的标准差的区别
# 图像的标准差受到星像的影响,而这里实际上是剔除了星像之后的
bkg.globalback, bkg.globalrms
Out[42]:
(778.7703857421875, 38.02693557739258)
In [43]:
# 背景及其误差
bkg_image = bkg.back()
bkg_rms = bkg.rms()
In [44]:
fig, ax = plt.subplots(1, 2, figsize=(12, 6))
ax[0].imshow(bkg_image)
ax[1].imshow(bkg_rms);
In [45]:
# 图像扣减背景
data_sub = obj_data - bkg
In [46]:
fig, ax = plt.subplots(1, 1, figsize=(10, 10))
ax.imshow(data_sub, vmin=-1*30, vmax=+5*30, cmap="cool")
Out[46]:
<matplotlib.image.AxesImage at 0x7fdb1c8cfa58>
In [47]:
# 找源
stars = sep.extract(data_sub, 1.5, err=bkg.globalrms)
stars.shape, stars.dtype
Out[47]:
((51,),
 dtype([('thresh', '<f8'), ('npix', '<i8'), ('tnpix', '<i8'), ('xmin', '<i8'), ('xmax', '<i8'), ('ymin', '<i8'), ('ymax', '<i8'), ('x', '<f8'), ('y', '<f8'), ('x2', '<f8'), ('y2', '<f8'), ('xy', '<f8'), ('errx2', '<f8'), ('erry2', '<f8'), ('errxy', '<f8'), ('a', '<f8'), ('b', '<f8'), ('theta', '<f8'), ('cxx', '<f8'), ('cyy', '<f8'), ('cxy', '<f8'), ('cflux', '<f8'), ('flux', '<f8'), ('cpeak', '<f8'), ('peak', '<f8'), ('xcpeak', '<i8'), ('ycpeak', '<i8'), ('xpeak', '<i8'), ('ypeak', '<i8'), ('flag', '<i8')]))
In [48]:
from matplotlib.patches import Ellipse

# 画观测图像
fig, ax = plt.subplots(1, 1, figsize=(10, 10))
im = ax.imshow(data_sub, interpolation='nearest', cmap='gray',
               vmin=-1*30, vmax=+5*30, origin='lower')

# 挨个目标画椭圆
for i in range(len(stars)):
    e = Ellipse(xy=(stars['x'][i], stars['y'][i]),
                width=15*stars['a'][i],
                height=15*stars['b'][i],
                angle=np.deg2rad(stars['theta'][i]),)
    e.set_facecolor('none')
    e.set_edgecolor('red')
    ax.add_artist(e)
In [49]:
stars["a"], stars["b"]
Out[49]:
(array([1.27491343, 1.01281226, 0.97928321, 1.76752913, 2.06216383,
        2.3710444 , 1.16401315, 1.79871261, 1.45357203, 1.39474797,
        1.44445014, 0.94173837, 1.41447234, 1.24905741, 1.69859254,
        0.77692348, 0.7494297 , 1.67597377, 0.91054791, 0.91560316,
        1.92234254, 1.37144649, 1.99279857, 0.73611385, 2.16399884,
        0.80327523, 1.33087218, 1.33360541, 1.33636785, 1.53233981,
        0.74949938, 1.16270125, 1.36636829, 0.74526751, 0.99247164,
        1.00844312, 1.49565506, 0.95424831, 1.105358  , 1.27833819,
        1.31499493, 1.39230156, 1.87875867, 1.18946528, 1.37493265,
        0.76078796, 1.25878751, 1.97350335, 0.93825614, 1.14569914,
        1.11167586]),
 array([1.12573719, 0.82078314, 0.862526  , 1.67944968, 1.50700188,
        2.05940485, 0.97575533, 1.69448876, 1.29150915, 1.30440366,
        1.35990083, 0.74306858, 1.30549407, 1.09916031, 1.63373423,
        0.44383407, 0.44728458, 1.62495768, 0.75017434, 0.72534609,
        1.79244578, 1.2624011 , 1.88000619, 0.67972773, 2.00714731,
        0.68042648, 1.15500712, 0.4932074 , 1.24605072, 1.49533474,
        0.45020291, 0.85633558, 0.79278195, 0.56073779, 0.65976173,
        0.99181694, 1.48338389, 0.93327332, 0.89956385, 1.1307528 ,
        1.28406596, 0.98609799, 1.72087705, 0.95584428, 1.07163489,
        0.44711083, 0.71561742, 1.8330245 , 0.68940008, 0.66393965,
        0.46547633]))
In [50]:
# 简单孔径测光,孔径=5.0像素
flux1, fluxerr1, flag1 = sep.sum_circle(data_sub, stars['x'], stars['y'],
                                     5.0, err=bkg.globalrms, gain=1.0)
# 流量转星等,误差分析公式略
mag1 = 25.0 - 2.5 * np.log10(flux1)
magerr1 = 2.5 / np.log(10) * fluxerr1 / flux1
In [51]:
# 不用全局统一的误差,而是用每个像素的局部误差
flux2, fluxerr2, flag2 = sep.sum_circle(data_sub, stars['x'], stars['y'], 5.0,
                                     err=bkg_rms, gain=1.0)
# 流量转星等,误差分析公式略
mag2 = 25.0 - 2.5 * np.log10(flux2)
magerr2 = 2.5 / np.log(10) * fluxerr2 / flux2
In [52]:
fig, ax = plt.subplots(1, 1, figsize=(10, 5))
ax.plot(mag1, magerr1, "x")
ax.set_xlabel("Mag $_1$")
ax.set_ylabel("Error $_1$")
Out[52]:
Text(0, 0.5, 'Error $_1$')
In [53]:
fig, ax = plt.subplots(1, 1, figsize=(10, 5))
ax.plot(mag2, magerr2, "x")
ax.set_xlabel("Mag $_2$")
ax.set_ylabel("Error $_2$")
Out[53]:
Text(0, 0.5, 'Error $_2$')
In [54]:
# 计算Source-Extractor的AUTO流量/星等
kronrad, krflag = sep.kron_radius(data_sub, stars["x"], stars["y"], 
                                  stars["a"], stars["b"], stars["theta"], 6.0)
fluxa, fluxerra, flaga = sep.sum_ellipse(data_sub, stars["x"], stars["y"], 
                                      stars["a"], stars["b"], stars["theta"], 2.5*kronrad,
                                      err=bkg_rms, subpix=1)
flaga |= krflag
maga = 25.0 - 2.5 * np.log10(fluxa)
magerra = 2.5 / np.log(10) * fluxerra / fluxa
In [55]:
fig, ax = plt.subplots(1, 1, figsize=(10, 5))
ax.plot(maga, magerra, "x")
ax.set_xlabel("Mag $_{auto}$")
ax.set_ylabel("Error $_{auto}$")
Out[55]:
Text(0, 0.5, 'Error $_{auto}$')
In [56]:
# 构建星表
dt_cat = [
    ("x", float),
    ("y", float),
    ("a", float),
    ("b", float),
    ("theta", float),
    ("flux_aper", float),
    ("fluxerr_aper", float),
    ("mag_aper", float),
    ("magerr_aper", float),
    ("flag_aper", int),
    ("flux_auto", float),
    ("fluxerr_auto", float),
    ("mag_auto", float),
    ("magerr_auto", float),
    ("flag_auto", int),
]
# 创建空数组,xxx_like表示数组的维度大小和参考数组一致
cat = np.empty_like(stars, dt_cat)
# 逐个字段填充数据
cat["x"] = stars["x"]
cat["y"] = stars["y"]
cat["a"] = stars["a"]
cat["b"] = stars["b"]
cat["theta"] = stars["theta"]
cat["flux_aper"] = flux2
cat["fluxerr_aper"] = fluxerr2
cat["mag_aper"] = mag2
cat["magerr_aper"] = magerr2
cat["flag_aper"] = flag2
cat["flux_auto"] = flaga
cat["fluxerr_auto"] = fluxerra
cat["mag_auto"] = maga
cat["magerr_auto"] = magerra
cat["flag_auto"] = flaga
In [57]:
# 创建一个带表格的fits文件,这时候,[0]的HDU必须是空的,只能有heaader,不能有数据
hdul_new = fits.HDUList([
    fits.PrimaryHDU(header=obj_hdr, ),
    fits.BinTableHDU(data=cat)
])
# 在原始文件名基础上加上后缀,表示这是星表
hdul_new.writeto(reddir + os.path.splitext(objf)[0] + "_cat.fits")
# 文本格式的数据输出略,网上有很多案例
In [ ]: