## forward modelling with metropolis-hasting algorithm import numpy as np import scipy.optimize as so from scipy.special import gamma import matplotlib.pyplot as plt import scipy.integrate as integrate import cosmology as cosmo from scipy.interpolate import interp1d from scipy import special #import linmix from scipy.stats import lognorm from scipy.stats import norm import emcee import corner from multiprocessing import Pool from astropy.io import fits import os import linmix as linmix import scipy.stats as st def metropolis_hastings(p, x0, sigma, *args, nsamp=1000): ## p: function to fit ## x0: initial guess ## sigma: steps ## args: data ndim = len(x0) samples = np.zeros((nsamp, ndim)) x = np.array(x0) sigma = np.array(sigma) # the position and step size arrays had better be the same assert x.shape == sigma.shape, 'Shape of x and shape of sigma must be the same' n = 0 # while we want more samples for i in range(nsamp): # now we adjust the initial position a little x_prime = x + sigma*np.random.randn(ndim) if np.random.rand() < (p(*x_prime, *args) / p(*x, *args)): x = x_prime n = n+1 # we save the sample to the chain samples[i] = x #print(i, x[:3]) print('acceptance rate', n/nsamp) return samples def LogPrior(alpha, beta, sca_sqr, pi1, pi2, pi3, mu1, mu2, mu3, tausqr1, tausqr2, tausqr3): theta = alpha, beta, sca_sqr, pi1, pi2, pi3, mu1, mu2, mu3, tausqr1, tausqr2, tausqr3 alpha, beta, sca_sqr = theta[0:3] nk = int((len(theta)-3)/3) pi = np.abs(theta[3:3+nk])/np.sum(np.abs(theta[3:3+nk])) mu = theta[3+nk:3+2*nk] tau_sqr = theta[3+2*nk:3+3*nk] if (alpha < 5. or alpha > 10.) or (beta < 0. and beta > 3.) or (sca_sqr < 0.0001 or sca_sqr > 1) \ or np.any(mu)<0. or np.any(pi)>1. or np.any(tau_sqr)<0.: return -np.inf; ''' p_alpha = st.norm.pdf(alpha, loc=7.2, scale=2.) p_beta = st.norm.pdf(beta, loc=1.2, scale=0.5) p_scasqr = st.norm.pdf(sca_sqr, loc=0.1, scale=0.2) ''' p_alpha = st.uniform.pdf(alpha, loc=5., scale=5.) ## uniform prior between 5-10 p_beta = st.uniform.pdf(beta, loc=0., scale=3.0) ## uniform prior between 0-3 p_scasqr = st.uniform.pdf(sca_sqr, loc=0.0001, scale=1) ## uniform prior between 0.0001-1 (sca 0.01-1) prior = p_alpha*p_beta*p_scasqr lnprior = np.log(prior) return lnprior def LogLikelihood(alpha, beta, sca_sqr, pi1, pi2, pi3, mu1, mu2, mu3, tausqr1, tausqr2, tausqr3, x, y, z, xerr, yerr): theta = alpha, beta, sca_sqr, pi1, pi2, pi3, mu1, mu2, mu3, tausqr1, tausqr2, tausqr3 #best from linmix #theta = 7.35, 2, 0.11, 0.30239357, 0.32041853, 0.25445745, 0.23774759, 0.24707381, 0.24495498, 0.06585905, 0.07556199, 0.07331773 #(alpha, beta, sca_sqr, pi1, pi2, pi3, mu1, mu2, mu3, tau_sqr1, tau_sqr2, tau_sqr3, x, y, z, xerr, yerr): ## Eq 16-18 from Kelly07; for linear regression + gaussian mixture model for underlying distribution alpha, beta, sca_sqr = theta[0:3] nk = int((len(theta)-3)/3) pi = np.abs(theta[3:3+nk])/np.sum(np.abs(theta[3:3+nk])) mu = theta[3+nk:3+2*nk] tau_sqr = theta[3+2*nk:3+3*nk] loglike_i = np.zeros(len(x)) for i in range(len(x)): tmp = 0. z_i = np.matrix([y[i],x[i]]) for k in range(len(pi)): ## loop and add the gaussian components zeta_k = np.matrix([alpha+beta*mu[k],mu[k]]) v_ki = np.matrix([[beta**2*tau_sqr[k]+sca_sqr+yerr[i]**2, \ beta*tau_sqr[k]],[beta*tau_sqr[k], tau_sqr[k]+xerr[i]**2]]) if np.linalg.det(v_ki)>0: tmp = tmp + float(pi[k]/2./np.pi/np.linalg.det(v_ki)**0.5 * \ np.exp(-0.5*np.matmul(np.matmul((z_i-zeta_k), v_ki.getI()),(z_i-zeta_k).T))) loglike_i[i] = loglike_i[i]+np.log(tmp) ## Eq 41 from Kelly07; for the selection function ## use median xerr, yerr to calculate the selection function xgrid = np.linspace(-3,3,num=30) ygrid = np.linspace(5,11,num=30) p_grid_all = np.zeros((len(xgrid),len(ygrid))) p_select_all = np.zeros((len(xgrid),len(ygrid))) yerr_med = np.median(yerr) xerr_med = np.median(xerr) for ix, xx in enumerate(xgrid): for iy, yy in enumerate(ygrid): z_i = np.matrix([yy,xx]) for k in range(len(pi)): ## loop and add the gaussian components zeta_k = np.matrix([alpha+beta*mu[k],mu[k]]) v_ki = np.matrix([[beta**2*tau_sqr[k]+sca_sqr+yerr_med**2, \ beta*tau_sqr[k]],[beta*tau_sqr[k], tau_sqr[k]+xerr_med**2]]) if np.linalg.det(v_ki)>0: p_grid_all[ix,iy] = p_grid_all[ix,iy] + float(pi[k]/2./np.pi/np.linalg.det(v_ki)**0.5 * \ np.exp(-0.5*np.matmul(np.matmul((z_i-zeta_k), v_ki.getI()),(z_i-zeta_k).T))) p_det = np.zeros((len(xgrid),len(ygrid))) for i in range(len(x)): for iy, yy in enumerate(ygrid): p_det[:,iy] = selection(yy,z[i]) p_select_all = p_select_all+p_det*p_grid_all p_det = np.sum(p_select_all) loglike = -float(len(x))*np.log(p_det)+np.sum(loglike_i) return loglike def Posterior(alpha, beta, sca_sqr, pi1, pi2, pi3, mu1, mu2, mu3, tausqr1, tausqr2, tausqr3, x, y, z, xerr, yerr): lnlike = LogLikelihood(alpha, beta, sca_sqr, pi1, pi2, pi3, mu1, mu2, mu3, \ tausqr1, tausqr2, tausqr3, x, y, z, xerr, yerr) lnprior = LogPrior(alpha, beta, sca_sqr, pi1, pi2, pi3, mu1, mu2, mu3, \ tausqr1, tausqr2, tausqr3) lnposterior = lnlike + lnprior #print(np.round(alpha,2), np.round(beta,2), np.round(sca_sqr,2), lnlike, lnprior, lnposterior) return np.exp(lnposterior) def get_imag(logL5100, z1, omegal = 0.7, omega0 = 0.3, h100 = 0.7): logL2500 = logL5100+0.5*np.log10(5100./2500.) d10pc = 3.08e19 mi_z2 = (logL2500 - np.log10(3e18/2500.) - np.log10(4.*np.pi*d10pc**2))/(-0.4)-2.5*np.log10(3.)-48.6 dm = cosmo.dist_modulus(np.array([z1]))[0] imag = mi_z2+dm+K_z(z1) return imag; def selection(y,z): lum_edd = 1.26*1e38*10**(y) redd_mean = -1. redd_mu = 0.3 logedd = np.random.normal(redd_mean,redd_mu,1)#,size=10000) logL5100 = np.log10(lum_edd*10**(logedd)/10) imag = get_imag(logL5100,z) if imag<=21.7: return 1. if imag>21.7: return 0. np.random.seed(123) comp = 'bulge' K_corr_file = './Richards_K_corr.txt' dir_mcmc = '/Users/jennili/Dropbox/HST_hostgal/fit_emcee/' z_lis, K_corr = np.loadtxt(K_corr_file,skiprows=22).T K_z = interp1d(z_lis, K_corr, kind='cubic') Mgal_all, Mgal_err_all, Mbh_all, Mbh_err_all, z_all = np.loadtxt('mbh_ms_'+comp+'.txt').T ## linimx results if comp == 'tot': alpha_linmix = 7.34 alpha_linmix_err = [-0.17,0.14] beta_linmix = 1.36 beta_linmix_err = [-0.4,0.59] scasqr_linmix = 0.34 scasqr_linmix_err = [-0.11,0.11] if comp == 'bulge': alpha_linmix = 7.44 alpha_linmix_err = [-0.16,0.13] beta_linmix = 1.18 beta_linmix_err = [-0.52,0.76] scasqr_linmix = 0.39 scasqr_linmix_err = [-0.11,0.11] ## set up MH run rng = np.random.RandomState() nchain = 32 nstep = 1600 nburn = 800 labels = ['alpha', 'beta', 'sca_sqr'] + ['pi']*3 + ['mu']*3 + ['tausqr']*3 ndim = len(labels) outfile = './fit_MH/samples_selection_nc'+str(nchain)+'_nw'+str(nstep)+'_uniprior_'+comp+'_v4.txt' if os.path.exists(outfile) is False: ## setup priors K = 3 alpha_prior = alpha_linmix beta_prior = beta_linmix scasqr_prior = scasqr_linmix**2. mu0 = np.median(Mgal_all-10.) wsqr = np.var(Mgal_all-10.,ddof=1)-np.median(Mgal_err_all) wsqr = np.max([np.var(Mgal_all-10.,ddof=1)-np.median(Mgal_err_all),0.01*np.var(Mgal_all-10.,ddof=1)]) pi_prior = np.random.default_rng().dirichlet(np.ones(K), 1)[0] #[0.3,0.3,0.4]# usqr0 = np.abs(np.random.normal(loc=0,scale=wsqr,size=K)) usqr0[np.where(usqr0>np.var(Mgal_all-10.,ddof=1)*1.5)] = np.var(Mgal_all-10.,ddof=1)*1.5 ''' tausqr0 = [0.4,0.4,0.2]#np.random.normal(loc=0,scale=wsqr,size=K) mu0 = np.random.uniform(np.min(Mgal_all-10.),np.max(Mgal_all-10.)) mu_prior = [0.3,0.3,0.3]#np.random.normal(loc=mu0,scale=usqr0,size=K) tausqr_prior = [0.01,0.01,0.01] #0.5 * wsqr * nchains / rng.chisquare(nchains, size=K) ''' tausqr0 = np.random.normal(loc=0,scale=wsqr,size=K) mu0 = np.random.uniform(np.min(Mgal_all-10.),np.max(Mgal_all-10.)) mu_prior = np.random.normal(loc=mu0,scale=usqr0,size=K) tausqr_prior = 0.5 * wsqr * nchain / rng.chisquare(nchain, size=K) guess = np.array([alpha_prior, beta_prior, scasqr_prior] + list(pi_prior) + list(mu_prior) + list(tausqr_prior)) ## run MH+MCMC #sigmas = [0.2,0.2,0.1,5e-2,5e-2,5e-2,5e-2,5e-2,5e-2,5e-2,5e-2,5e-2] sigmas = [0.5,0.2,0.2,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1] print(guess) print(sigmas) args = Mgal_all-10., Mbh_all, z_all, Mgal_err_all, Mbh_err_all samples = np.zeros((nchain,nstep,len(guess))) for i in range(nchain): print('chain',i) guess = guess + 0.01*np.array(sigmas)*np.random.randn(len(guess)) samples[i,:,:] = metropolis_hastings(Posterior, guess, np.array(sigmas), *args, nsamp=nstep) np.savetxt(outfile,samples.flatten()) samples = np.loadtxt(outfile) samples = samples.reshape([nchain,nstep,ndim]) ## check burn-in ''' for frac in [0.1,0.3,0.5,0.7,0.9]: print('frac',frac,int(frac*nstep)) for i in range(3): mcmc = np.percentile(samples[:,int(frac*nstep):int(frac*nstep)+150,i], [16, 50, 84]) q = np.diff(mcmc) print('{0:.2f} + {1:.2f} - {2:.2f}'.format(mcmc[1],q[1],q[0])) ''' #fig, axes = plt.subplots(ndim, figsize=(4, 10), sharex=True) fig, axes = plt.subplots(3, figsize=(6,6), sharex=True) print ('MCMC') for i in range(3): mcmc = np.percentile(samples[:,nburn:,i], [16, 50, 84]) q = np.diff(mcmc) if i!=2: print(labels[i]+' = {0:.2f} + {1:.2f} - {2:.2f}'.format(mcmc[1],q[1],q[0])) ax = axes[i] ax.plot(samples[:,:,i].T, "k", alpha=0.2) else: print('sigma = {0:.2f} + {1:.2f} - {2:.2f}'.format(np.sqrt(mcmc[1]),np.sqrt(mcmc[1]+q[1])-np.sqrt(mcmc[1]),np.sqrt(mcmc[1])-np.sqrt(mcmc[1]-q[0]))) ax = axes[i] ax.plot(samples[:,:,i].T, "k", alpha=0.2) ax.set_xlim(0, nstep) ax.set_ylabel(labels[i]) axes[-1].set_xlabel("step number") plt.savefig(outfile[:-4]+'_chain.pdf',bbox_inches='tight') ### ### corner plot ### ## plot into scatter instead of sca_sqr samples[:,:,2] = np.sqrt(samples[:,:,2]) fig = corner.corner(samples[:,nburn:,:3].reshape([int((nstep-nburn)*nchain), 3]), plot_contours=True, \ quantiles=[0.16, 0.5, 0.84], show_titles=True, smooth=2, labels=['a','b',r'$\sigma$'],\ truths=[alpha_linmix,beta_linmix,scasqr_linmix],truth_color='b',label_kwargs={"fontsize": 16}) plt.savefig(outfile[:-4]+'_corner.pdf',bbox_inches='tight') ### ### MCMC plot ### fig3, ax3 = plt.subplots(figsize=(8,8)) #ax3.errorbar(Mgal_all-10., Mbh_all, xerr=Mgal_err_all, yerr=Mbh_err_all, fmt=".k", capsize=0) ax3.errorbar(10**Mgal_all, 10**Mbh_all, yerr=[10**Mbh_all-10**(Mbh_all-Mbh_err_all),10**(Mbh_all+Mbh_err_all)-10**Mbh_all], \ xerr=[10**Mgal_all-10**(Mgal_all-Mgal_err_all),10**(Mgal_all+Mgal_err_all)-10**Mgal_all],\ fmt='o',color='k',zorder=100,ms=8,alpha=0.8) #plt.plot(x0, m_true * x0 + b_true, "k", alpha=0.3, lw=3, label="truth") #plt.plot(x0, np.dot(np.vander(x0, 2), w), "--k", label="LS") x0 = np.linspace(-2, 3, 51) ax3.plot(10**(x0+10), 10**(x0*np.median(samples[:,nburn:,1].flatten())+np.median(samples[:,nburn:,0].flatten())), "--k", label="Bias-Corrected",lw=2) flat_alpha = samples[:,nburn:,0].flatten() flat_beta = samples[:,nburn:,1].flatten() mstar_plot = 10**(x0+10) fity1= 10.**(np.percentile(flat_alpha,84)+np.log10(mstar_plot/1e10)*np.percentile(flat_beta,16)) fity2= 10.**(np.percentile(flat_alpha,16)+np.log10(mstar_plot/1e10)*np.percentile(flat_beta,16)) fity3= 10.**(np.percentile(flat_alpha,84)+np.log10(mstar_plot/1e10)*np.percentile(flat_beta,84)) fity4= 10.**(np.percentile(flat_alpha,16)+np.log10(mstar_plot/1e10)*np.percentile(flat_beta,84)) plt.fill_between(mstar_plot[np.where(mstar_plot<=1e10)],y1=fity4[np.where(mstar_plot<=1e10)],y2=fity1[np.where(mstar_plot<=1e10)],color='k',alpha=0.1,zorder=0) plt.fill_between(mstar_plot[np.where(mstar_plot>=1e10)],y1=fity2[np.where(mstar_plot>=1e10)],y2=fity3[np.where(mstar_plot>=1e10)],color='k',alpha=0.1,zorder=0) ''' inds = np.random.randint(len(flat_alpha), size=100) for i in inds: #print(flat_alpha[i], flat_beta[i]) ax3.plot(10**(x0+10), 10**(x0*flat_beta[i]+flat_alpha[i]), "-b", alpha=0.1) ''' ############## ### linmix ### ############## ax3.plot(10**(x0+10), 10**(x0*beta_linmix+alpha_linmix), "--b", label="Original", lw=2) fity1= 10.**((alpha_linmix+alpha_linmix_err[1])+np.log10(mstar_plot/1e10)*(beta_linmix+beta_linmix_err[0])) fity2= 10.**((alpha_linmix+alpha_linmix_err[0])+np.log10(mstar_plot/1e10)*(beta_linmix+beta_linmix_err[0])) fity3= 10.**((alpha_linmix+alpha_linmix_err[1])+np.log10(mstar_plot/1e10)*(beta_linmix+beta_linmix_err[1])) fity4= 10.**((alpha_linmix+alpha_linmix_err[0])+np.log10(mstar_plot/1e10)*(beta_linmix+beta_linmix_err[1])) #plt.plot(mstar_plot,fity_all,'k-',lw=2.5,zorder=10)#,label=r'$\alpha$='+str(round(np.mean(beta_all),3))+'$\pm$'+str(round(np.std(beta_all),3))+',scatter='+str(round(np.median(lm.chain['sigsqr']),2))) #plt.fill_between(mstar_plot,y1=fity1,y2=fity4,color='k',alpha=0.1) plt.fill_between(mstar_plot[np.where(mstar_plot<=1e10)],y1=fity4[np.where(mstar_plot<=1e10)],y2=fity1[np.where(mstar_plot<=1e10)],color='b',alpha=0.1,zorder=0) plt.fill_between(mstar_plot[np.where(mstar_plot>=1e10)],y1=fity2[np.where(mstar_plot>=1e10)],y2=fity3[np.where(mstar_plot>=1e10)],color='b',alpha=0.1,zorder=0) mbh_kh13 = 1e9*(0.49*((10**(x0+10)/1e11)**1.17)) ax3.plot(10**(x0+10), mbh_kh13, "-", color='tab:red',label="KH13",lw=1.5) ax3.legend(fontsize=14) #plt.xlim(0, 10) if comp == 'tot': ax3.set_xlabel(r'$M_{\rm *, Host}$ [M$_{\rm \odot}$]',fontsize=20) if comp == 'bulge': ax3.set_xlabel(r'$M_{\rm *, Bulge}$ [M$_{\rm \odot}$]',fontsize=20) ax3.set_ylabel(r'$M_{\rm BH}$ [M$_{\rm \odot}$]',fontsize=20) ax3.set_xscale('log') ax3.set_yscale('log') ax3.legend(fontsize=16,loc='lower right') ax3.set_aspect('equal') ax3.set_ylim([1e6,3e9]) ax3.set_xlim([4e8,3e12]) plt.tick_params(which='both',labelsize=18,top=True,right=True,direction='in') #ax3.set_title(comp) plt.savefig(outfile[:-4]+'_mcmc.pdf',bbox_inches='tight') #plt.show()