diff --git a/.gitignore b/.gitignore index 457d7bb..5f26292 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,10 @@ build* ext-* +# python +pafi/__pycache__ +*.egg-info + # test data example/pathway_test/* !example/pathway_test/run.sh diff --git a/pafi/__init__.py b/pafi/__init__.py new file mode 100644 index 0000000..4f271b2 --- /dev/null +++ b/pafi/__init__.py @@ -0,0 +1,7 @@ +import pafi.parser +import pafi.plots +from pafi import pafi + +def cli(): + args = pafi.parser.parse_cli_args() + args.func(args) diff --git a/pafi/pafi.py b/pafi/pafi.py new file mode 100644 index 0000000..bf4680f --- /dev/null +++ b/pafi/pafi.py @@ -0,0 +1,352 @@ +""" +raw_ensemble_output has i + 4*nWorker colums. i = 2 since commit 3ca8f884 (20/09/21). For older versions of PAFI, i=1. + +Column 0 : r +Column 1 : Number of workers with max_jump < user threshold at run time +-> nWorker+i : av(dF), the gradient +-> 2*nWorker+i : std(dF), the *worker* variance, *not* ensemble error, should not be zero even in infinite time limit! +-> 3*nWorker+i : av(Psi) = av(dX).dX_0/|dX_0|^2, the tangent projection +-> 4*nWorker+i : the maximum per-atom jump following the MD +""" + +import io +import numpy as np +import matplotlib.pyplot as plt +from scipy.integrate import cumtrapz +from scipy.interpolate import interp1d +import pandas as pd +import pathlib +from scipy.interpolate import UnivariateSpline + +kb = 8.617e-5 + +class Profile(): + """Class for handling (free) energy profiles. + + Parameters + ---------- + + data : np.ndarray + A Nx3 shaped array that contains [reaction coordinate, free energy, and error] as columns. + + Attributes + ---------- + + data : np.ndarray + The Nx3 array passed as argument. + barrier : float + The maximum Free Energy value. + error : float + error = error(R=0)+error(R=Rm), where Rm is the reaction corrdinate at which the Free Energy is maximal. + """ + + def __init__(self, data, neb_rcoord=None): + self.data = data + self.barrier = data[:,1].max() - data[:data[:,1].argmax()].min() + self.error = data[0][2] + data[data[:,1].argmax()][2] + self.neb_rcoord = neb_rcoord + +class PafiResult(): + """Class for manipulating PAFI simulation results. + + Parameters + ---------- + + file : pathlib.Path object or str + The path to the pafi raw_ensemble file to extract data from. + Should have a name of the form "raw_ensemble_output_*K_*", unless optional temperature parameter is given. + temperature : int or float + The temperature of the simulation. + + Attributes + ---------- + + temperature : str + The standard output passed by PreciSo during its execution. + file : raw_results file + discrete_profile : pafi.Profile + The free energy profile as a pafi.Profile object. Contains the discrete data. + splined_profile : pafi.Profile + The free energy profile as a pafi.Profile object. Rediscretized in order to have a smooth profile curve. + bar : np.array + A convenient array that contains commonly used data : [temperature, discrete_profile.barrier, discrete_profile.error, splined_profile.barrier, splined_profile.error] + See documentation of pafi.Profile for details on .barrier and .error attributes. + + Examples + -------- + + >>> import pafi + >>> import matplotlib.pyplot as plt + >>> p = pafi.PafiResult('raw_ensemble_output_0K_0') + >>> ax = p.plot() + >>> plt.show() + """ + + def __init__(self, file, logfile=None, temperature=None, neb_csv=None, full_path=False): + assert (isinstance(file, str)) or (isinstance(file, pathlib.Path)) ,"`file` must be str or pathlib.Path object." + + self.file = file + if neb_csv is not None: + self.neb_rcoord = self.get_r_from_neb_csv(neb_csv) + else: + self.neb_rcoord = None + + self.r_coord = None + if logfile is not None: + self.r_coord = self.log_parser(logfile) + + if temperature is not None: + self.temperature = temperature + else: + self.temperature = self.get_temperature() + + raw, self.hist_data, self.std = self.raw_parser() + + self.discrete_profile = Profile(PafiResult.integrate(raw, full_path=full_path), self.neb_rcoord) + + remeshed = PafiResult.remesh(raw) + self.splined_profile = Profile(PafiResult.integrate(remeshed, full_path=full_path), self.neb_rcoord) + + self.bar = self.get_bar() + + def get_r_from_neb_csv(self, file): + # a = np.genfromtxt(file) + # a = a[np.isfinite(a).any(axis=1)][:,0] + # return (a-a[0])/a[-1] + + return np.loadtxt(file)[:,0] + + def raw_parser(self, disp_thresh=1.0, mask_outliers=False): + """Parse the raw data file""" + try: + f = open(self.file,'r') + except IOError: + print("raw data file %s not found" % self.file) + return np.zeros((1,3)) + + count_data = count_valid = 0 + r_dFave_dFstd = [] + dF_raw = [] + std_raw = [] + + + if PafiResult.has_old_data_format(self.file): + if self.r_coord is not None: + # Use rcoord from logfile + print('Use rcoord from logfile') + for line, r_c in zip(f.readlines(), self.r_coord): + if line[0] != "#": + fields = np.loadtxt(io.StringIO(line.strip())) + n_data = (fields.size-1)//4 + n_valid = (fields[-n_data:] < disp_thresh).sum() + + count_data += n_data + if n_valid>0: + count_valid += n_valid + + r_dFave_dFstd += [[r_c, + fields[1:n_valid+1].mean(), + fields[1:n_valid+1].std()/np.sqrt(n_valid)]] + # print(self.file, " data :", count_data, " valid : ", count_valid, " ({}%)".format(int(count_valid/count_data*100))) + r_dFave_dFstd = np.r_[r_dFave_dFstd] + r_dFave_dFstd[:,0]/=r_dFave_dFstd[-1][0] # r : 0 -> 1 + return r_dFave_dFstd, len(r_dFave_dFstd) + else: + # reaction coordinate is interpolated (may cause deviations at large T) + # needed for backward compatibility + print('reaction coordinate is interpolated (may cause deviations at high T)') + r = 0.0 + for line in f.readlines(): + if line[0] != "#": + fields = np.loadtxt(io.StringIO(line.strip())) + n_data = (fields.size-1)//4 + n_valid = (fields[-n_data:] < disp_thresh).sum() + + count_data += n_data + if n_valid>0: + count_valid += n_valid + + r_dFave_dFstd += [[r, + fields[1:n_valid+1].mean(), + fields[1:n_valid+1].std()/np.sqrt(n_valid)]] + r += 1.0 # always increment even if n_valid==0 + # print(self.file, " data :", count_data, " valid : ", count_valid, " ({}%)".format(int(count_valid/count_data*100))) + r_dFave_dFstd = np.r_[r_dFave_dFstd] + r_dFave_dFstd[:,0]/=r_dFave_dFstd[-1][0] # r : 0 -> 1 + return r_dFave_dFstd, len(r_dFave_dFstd) + else: + # reaction coordinate is read from file + print("Reaction coordinate is read from raw file") + outliers = 0.5 + for line in f.readlines(): + if line[0] != "#": + fields = np.loadtxt(io.StringIO(line.strip())) + n_data = (fields.size-2)//4 + n_valid = (fields[-n_data:] < disp_thresh).sum() + + count_data += n_data + if n_valid>0: + count_valid += n_valid + + dF = np.array(fields[2:n_valid+2]) + low = np.outer(np.percentile(dF, outliers, axis=0),np.ones(dF.shape[0])) + high = np.outer(np.percentile(dF, 100-outliers, axis=0),np.ones(dF.shape[0])) + mask = (dF>low) * (dF0: + idata = idata[run_min == run_min_shift,:] + idata[:,1]-=idata[0][1] + return idata + + def plot(self, ax=None, color=None, label_prepend="", use_neb_rcoord=False): + + if ax is None: + fig, ax = plt.subplots() + if color is None: + color="C0" + if use_neb_rcoord: + if self.neb_rcoord is None: + raise ValueError("Provide neb_csv file with NEB r coordinate as 1st column") + discrete = self.discrete_profile.data + splined = self.splined_profile.data + + for i, profile in enumerate([self.discrete_profile.data, self.splined_profile.data]): + + style = ["o", "-"][i] + label = [label_prepend + f'{self.temperature}K', ''][i] + + if use_neb_rcoord: + x = np.linspace(0, 1, self.neb_rcoord.shape[0]) + spl = UnivariateSpline(x, self.neb_rcoord, s=0) + r = spl(np.linspace(0,1, profile[:,1].shape[0])) + print("using rcorrdinate from NEB file") + else: + r = profile[:,0] + + ax.plot(r, + profile[:,1], + style, + color=color, + label=label) + if i==1: # only errorbars of splined profile + ax.fill_between(r, + profile[:,1]-profile[:,2], + profile[:,1]+profile[:,2], + facecolor='0.8') + + ax.set_xlabel("Reaction Coordinate") + ax.set_ylabel("Gibbs energy profile (eV)") + ax.legend(loc="best") + return ax + +def free_energy_vs_temperature(flist, ax=None, fit_harmonic=True, harmonic_until_T=100, ymin=-0.05, label_prepend="", + start_color_at_index=0, return_poly=False, add_pts=None, full_path=False): + + if (ax is None) or len(ax)!=2: + fig, ax = plt.subplots(1,2, figsize=(9,5),dpi=144,sharey=True) + + data = [PafiResult(file, full_path=full_path) for file in flist] + data.sort(key=lambda x: x.temperature) + bar = np.array([x.bar for x in data]) + + # to add arbitrary pts to the right panel plot + if add_pts: + pts = [[pt[0], pt[1], 1e-4, pt[1], 1e-4] for pt in add_pts] + bar = np.concatenate((np.array(pts), bar)) + for i, r in enumerate(data): + a = r.plot(ax[0], color=f'C{i+start_color_at_index}', label_prepend=label_prepend) + + # bar = bar[np.argsort(bar[:, 0])] + bar = bar.astype(float) + + if fit_harmonic: + # Fit a linear regression on the domain below harmonic_until_T K + harmonic_regime = bar[np.where((bar[:,0]<=harmonic_until_T))] + p = np.polyfit(harmonic_regime[:,0], harmonic_regime[:,1], 1) + + ax[1].plot(bar[:,0], bar[:,3], "-o", color=f'C{start_color_at_index}', label=label_prepend) + ax[1].fill_between(bar[:,0], bar[:,1]-bar[:,2], bar[:,1]+bar[:,2],facecolor='0.93') + ax[1].fill_between(bar[:,0], bar[:,3]-bar[:,4], bar[:,3]+bar[:,4],facecolor='0.93') + if fit_harmonic: + # harmonic domain + ax[1].plot(bar[:,0],p[1]+p[0]*bar[:,0],'--', color=f'C{start_color_at_index}', label=label_prepend + r'$\Delta U_0=%2.3geV,\Delta S_0=%2.3g{\rm k_B}$' % (p[1], -p[0]/kb)) + ax[1].set_xlabel("Temperature (K)") + ax[1].set_ylabel("Gibbs energy of activation (eV)") + ax[1].legend(loc='best') + # ax[1].set_ylim(ymin=ymin) + plt.subplots_adjust(wspace=0) + plt.tight_layout() + + if return_poly: return ax, p + else: return ax diff --git a/pafi/parser.py b/pafi/parser.py new file mode 100644 index 0000000..86e5bf7 --- /dev/null +++ b/pafi/parser.py @@ -0,0 +1,60 @@ +import argparse +import pafi.plots as plots + +def parse_cli_args(): + parser = argparse.ArgumentParser() + + subparsers = parser.add_subparsers(help='', required=True, title='Available modes',) + + ## NEB MODE + parser_NEB = subparsers.add_parser('neb', aliases=["NEB"], help='Plot the energy profile from a NEB log.lammps file.') + parser_NEB.add_argument('log_lammps', + help="log.lammps file from a NEB run.") + parser_NEB.add_argument('-f', '--final', action="store_true", + help="Only display the final stage of the relaxation. Otherwise, profiles at the initial and pre-climb stages are also shown if possible.") + parser_NEB.add_argument('-C', '--cut-at-image', default=-1, metavar="N", + help="If N is specified, only the first N images will be shown.") + parser_NEB.add_argument('-n', '--normalize-r', action="store_true", + help="Normalize r such that its maximum value is 1.0, even if -C is used.") + parser_NEB.add_argument('-N', '--image-number', action="store_true", + help="Use image number as xlabel instead of reaction coordinate.") + parser_NEB.add_argument('-E', '--energy_diff', action="store_true", + help="Add a subplot showing the spacing of points on the Energy axis. Useful to spot large gaps in energy that would cause a large error for PAFI integration.") + parser_NEB.add_argument('-R', '--rcdiff', action="store_true", + help="Add a subplot showing the spacing of points on the r axis. Useful to spot large gaps in reaction coordinate that could cause a large integration error.") + parser_NEB.add_argument('--print-profile', action="store_true", + help="Print NEB profile raw data to terminal.") + parser_NEB.set_defaults(func=plots.plot_neb_results) + + ## PAFI MODE + parser_pafi = subparsers.add_parser('pafi', aliases=["PAFI"], help='Plot the free energy profile from a PAFI run using a raw_ensemble_output file.') + parser_pafi.add_argument('file', + help="A PAFI raw_ensemble_output file, or a glob expression that matches a list of raw_ensemble_output files.") + parser_pafi.add_argument('--mode', choices="sm", + help="To either plot a single or multiple raw files. Default behaviour is to try to guess, as glob patterns most often include the * asterisk character.") + parser_pafi.add_argument('--H0', '--zero', action="store", + help="A value for the 0 Kelvin activation enthalpy, e.g obtained from NEB.") + parser_pafi.add_argument('--harmonic-limit', default=100, type=float, + help="The upper temperature limit above which the entropy is considered non-linear. Defaults to 100K. System dependant.") + parser_pafi.add_argument('--no-fit', action="store_false", + help="Disable fitting the entropy on the harmonic domain. Default is to fit it up to 100K.") + parser_pafi.add_argument('--hist', action="store_true", + help="Plot an histogram of dF to see its distribution.") + parser_pafi.add_argument('--std', action="store_true", + help="Plot std of dF for each hyperplane.") + parser_pafi.add_argument('--full-path', '--fp', action="store_true", + help="Show all points instead of cutting the part before the initial minimum (which is the default).") + parser_pafi.set_defaults(func=plots.plot_pafi_results) + + # COMMON TO ALL MODES + for p in (parser_pafi, parser_NEB): + p.add_argument('-t', '--terminal', action="store_true", + help="Experimental: use plotext as backend for display in terminal.") + p.add_argument('-s', '--save', + help='Name of a file destination (e.g. "plot.pdf", "plot.png")') + p.add_argument('-q', '--quiet', action="store_true", + help="Do not open matplotlib popup nor display plots in terminal.") + p.add_argument('--size', default=4, + help="Size of the figure passed to matplotlib (default is 4).") + + return parser.parse_args() \ No newline at end of file diff --git a/pafi/plots.py b/pafi/plots.py new file mode 100644 index 0000000..c4b1ec1 --- /dev/null +++ b/pafi/plots.py @@ -0,0 +1,197 @@ +import numpy as np +from glob import glob +from pafi import pafi +import matplotlib.pyplot as plt +import matplotlib.pyplot as plt + +def plot_neb_results(args): + """ + Utility to plot the results of a NEB calculation from a log.lammps file. + """ + if args.terminal: + import plotext as plx + + d = np.genfromtxt(args.log_lammps, skip_header=3, invalid_raise=False) + + if len(np.where(np.isnan(d[:, 0]))[0])>0: + # seems needed for some LAMMPS versions + i_before_climb = np.where(np.isnan(d[:, 0]))[0][0]-1 + else: + i_before_climb = None + + d = d[np.isfinite(d).any(axis=1)] + a = d[:, 9:].reshape((d.shape[0], int((d.shape[1]-9)/2), 2)) + if int(args.cut_at_image) > -1: + a = a[:, :int(args.cut_at_image), :] + + cols = 1 + if args.energy_diff: + cols += 1 + if args.rcdiff: + cols += 1 + + fig, ax = plt.subplots(1, cols, figsize=(cols*int(args.size), int(args.size))) + if cols == 1: ax = [ax] + E_ref = a[-1, 0, 1] + + for i, (p, l) in enumerate(zip([0, i_before_climb, -1], ("Initial", "Before Climb", "Final "))): + if p is None: continue + if (args.final) and ("Final" not in l): continue + + norm = a[p, :, 0].max() if args.normalize_r else 1 + n_img = a[p, :, 0].shape[0] + + if args.image_number: + r = np.arange(len(a[p, :,1]))+1 + for axi in ax: + axi.set_xlim(xmin=r[0]-0.2, xmax=r[-1]+0.2) + else: + r = a[p, :, 0]/norm + + # 1st panel: energy profile + l += f" max. {(a[p, :, 1]-E_ref).max():.3f} eV" + ax[0].plot(r, a[p, :, 1]-E_ref, ".-", label=l, color=f"C{i}") + if "Final" in l: + ax[0].fill_between(r, (a[p, :, 1]-E_ref).min(), a[p, :, 1]-E_ref, color=f"C{i}", alpha=0.1) + # ax[0].annotate(f"{l} max: {(a[p, :, 1]-E_ref).max():.3f} eV", (5, 200-12*i), xycoords="axes points") + if args.energy_diff: + # 2nd panel: energy spacing + ax[1].step(np.arange(n_img)+1, + np.abs(np.diff(a[p, :, 1], prepend=a[p, 0, 1])), color=f"C{i}") + ax[1].fill_between(np.arange(n_img)+1, + 0, np.abs(np.diff(a[p, :, 1], prepend=a[p, 0, 1])), + label=l, color=f"C{i}", step="pre", alpha=0.1) + + ax[1].set_title("Energy spacing") + ax[1].set_xlabel("Image Number") + ax[1].set_ylabel("Energy difference (eV)") + + if args.print_profile: + print(l, "\n", f"{a[p, :, 0]/norm}", a[p, :, 1]-a[p, 0, 1]) + if args.rcdiff: + # 3rd panel with rc spacing + ax[-1].step(np.arange(n_img)+1, np.abs(np.diff(a[p, :, 0], prepend=np.diff(a[p, :, 0])[0])), color=f"C{i}") + ax[-1].fill_between(np.arange(n_img)+1, 0, np.abs(np.diff(a[p, :, 0], prepend=np.diff(a[p, :, 0])[0])), + step="pre", color=f"C{i}", alpha=0.1) + ax[-1].set_title("Reaction Coordinate spacing") + ax[-1].set_xlabel("Image Number") + ax[-1].set_ylabel("Reaction coordinate") + + ax[0].set_title("Energy profile") + if args.image_number: + ax[0].set_xlabel("Image Number") + else: + ax[0].set_xlabel("Reaction Coordinate") + ax[0].set_ylabel("Energy (eV)") + ax[0].legend() + + fig.tight_layout() + if args.save is not None: + try: + plt.savefig(args.save) + except: + print(f"failed saving plot to {args.f}") + if not args.quiet: + if args.terminal: + plx.from_matplotlib(fig) + plx.show() + else: + plt.show() + + +def guess_mode(x): + if (not isinstance(x, str)) or ("*" in x): + return "m" + else : + return "s" + + +def plot_profile(args): + if args.terminal: + import plotext as plx + + cols = 1 + if args.hist: + cols += 1 + if args.std: + cols += 1 + + fig,axs = plt.subplots(1, cols, figsize=(cols*args.size, args.size)) + if cols == 1: axs = [axs] + + plt.sca(axs[0]) + for file in glob(args.file): + r = pafi.PafiResult(file, full_path=args.full_path) + r.plot(ax=axs[0]) + if cols>1: + try: + # will fail at 0K + if args.hist: + plt.sca(axs[1]) + plt.hist(r.hist_data, bins=30) + plt.ylabel("(dF-dFmean)/std") + + if args.std: + plt.sca(axs[-1]) + plt.plot(r.std, "o-") + plt.ylabel("std") + plt.xlabel("hyperplane") + except: pass + plt.tight_layout() + + if args.save is not None: + try: + plt.savefig(args.save) + except: + print(f"failed saving plot to {args.f}") + if not args.quiet: + if args.terminal: + plx.from_matplotlib(fig) + plx.show() + else: + plt.show() + + + +def plot_pafi_results(args): + + if args.terminal: + import plotext as plx + + + if args.mode is None: + # If not specified, try to guess from pattern + args.mode = guess_mode(args.file) + + # Plot a single profile + if args.mode == "s": + plot_profile(args) + elif args.mode == "m": + fig, ax = plt.subplots(1, 2, figsize=(args.size*2, args.size*1),dpi=144,sharey=True) + + if args.H0 is not None: + additional_pts = [(0, args.H0)] + else: additional_pts = None + + pafi.free_energy_vs_temperature(glob(args.file), + ax=ax, + fit_harmonic=args.no_fit, + add_pts=additional_pts, + harmonic_until_T=args.harmonic_limit, + full_path=args.full_path) + + fig.tight_layout() + if args.save is not None: + try: + plt.savefig(args.save) + except: + print(f"failed saving plot to {args.f}") + if not args.quiet: + if args.terminal: + plx.from_matplotlib(fig) + plx.show() + else: + plt.show() + + + \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..9787c3b --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,3 @@ +[build-system] +requires = ["setuptools", "wheel"] +build-backend = "setuptools.build_meta" diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 0000000..3310e8e --- /dev/null +++ b/setup.cfg @@ -0,0 +1,15 @@ +[metadata] +name = pafi +version = 0.0.1 + +[options] +packages = find: +install_requires = + numpy + matplotlib + scipy + pandas + +[options.entry_points] +console_scripts = + pafi_plot = pafi:cli \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..a4f49f9 --- /dev/null +++ b/setup.py @@ -0,0 +1,2 @@ +import setuptools +setuptools.setup()