Source code for neorl.utils.neorlcalls

#    This file is part of NEORL.

#    Copyright (c) 2021 Exelon Corporation and MIT Nuclear Science and Engineering
#    NEORL is free software: you can redistribute it and/or modify
#    it under the terms of the MIT LICENSE

#    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
#    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
#    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
#    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
#    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
#    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
#    SOFTWARE.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#"""
#Created on Wed Mar  4 11:51:22 2020
#
#@author: majdi
#"""

import numpy as np
import pandas as pd
from neorl.rl.baselines.shared.callbacks import BaseCallback
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import os
import copy

class SavePlotCallback(BaseCallback):
    """
    Callback for saving a model (the check is done every ``check_freq`` steps)
    based on the training reward (in practice, we recommend using ``EvalCallback``).
    """
    def __init__(self, check_freq, avg_step, log_dir, total_timesteps, basecall, plot_mode='subplot'):
        self.base=basecall
        self.plot_mode=plot_mode
        self.n_calls=self.base.n_calls
        self.model=self.base.model
        self.num_timesteps=self.base.num_timesteps
        self.total_timesteps=total_timesteps
        self.verbose=1
        self.check_freq = check_freq
        self.avg_step=avg_step
        self.log_dir = log_dir
        self.best_save_path = self.log_dir + '_bestmodel.pkl'
        self.save_path = self.log_dir + '_lastmodel.pkl'
        self.best_mean_reward = -np.inf

        #avoid activating 'Agg' in the header so not to affect other classes/algs
        import matplotlib
        matplotlib.use('Agg')

    def runcall(self):
        
        print('num_timesteps={}/{}'.format (self.num_timesteps, self.total_timesteps))
            
        # Retrieve training reward
        y= pd.read_csv(self.log_dir+'_out.csv')
        y=y["reward"].values
        # Mean training reward over the last 100 episodes
        mean_reward = np.mean(y[-self.avg_step:])
               
        # New best model, you could save the agent here
        print('--debug: current mean reward={}, previous best mean reward = {}'.format(np.round(mean_reward), np.round(self.best_mean_reward)))
        if mean_reward > self.best_mean_reward:
              self.best_mean_reward = copy.copy(mean_reward)
              #saving best model
              print('--debug: improvement in reward is observed, new best model is saved to {}'.format(self.best_save_path))
              self.model.save(self.best_save_path)    #best model found so far

        #saving current model
        print('--debug: current model model is saved to {}'.format(self.save_path))
        self.model.save(self.save_path)   #latest model
              
        self.out_data=pd.read_csv(self.log_dir+'_out.csv')
        #-------------------
        # Progress Plot
        #-------------------
        self.plot_progress()
                
    def _on_step(self) -> bool:
        
        try:
            if (self.num_timesteps % self.check_freq == 0) or (self.num_timesteps == self.total_timesteps):
                self.runcall()
        except:
            print('--warning: No plot is generated, NEORL tried to plot the output csv logger, but failed for some reason, you may increase `check_freq` to a large value to allow some data printed in the csv logger')
        
        if self.num_timesteps == self.total_timesteps:
            print('system exit')
            os._exit(1)
            
            
        return True
    
    def _on_training_end(self) -> None:
        self.runcall()
        print('Training is finished')
        os._exit(1)
        #pass

    def calc_cumavg(self, data, N):
    
        cum_aves=[np.mean(data[i:i+N]) for i in range(0,len(data),N)]
        cum_std=[np.std(data[i:i+N]) for i in range(0,len(data),N)]
        cum_max=[np.max(data[i:i+N]) for i in range(0,len(data),N)]
        cum_min=[np.min(data[i:i+N]) for i in range(0,len(data),N)]
    
        return cum_aves, cum_std, cum_max, cum_min
    
    
    def plot_progress(self, method_xlabel='Epoch'):

        self.out_data=pd.read_csv(self.log_dir+'_out.csv')
        color_list=['b', 'g', 'r', 'c', 'm', 'y', 'darkorange', 'purple', 'tab:brown', 'lime']
        plot_data=self.out_data.drop(['caseid'], axis=1)  #exclude caseid, which is the first column from plotting (meaningless)
        
        labels=list(plot_data.columns.values)
            
        ny=plot_data.shape[1] 
        
        assert ny == len(labels), 'number of columns ({}) to plot in the csv file {} is not equal to the number of labels provided by the user ({})'.format(ny, self.log_dir+'_out.csv', len(labels))
        
        # classic mode
        if self.plot_mode=='classic' or ny == 1:
            color_index=0
            for i in range (ny): #exclude caseid from plot, which is the first column 
                plt.figure()
                ravg, rstd, rmax, rmin=self.calc_cumavg(plot_data.iloc[:,i],self.avg_step)
                epochs=np.array(range(1,len(ravg)+1),dtype=int)
                plt.plot(epochs, ravg,'-o', c=color_list[color_index], label='Average per {}'.format(method_xlabel))
                
                plt.fill_between(epochs,[a_i - b_i for a_i, b_i in zip(ravg, rstd)], [a_i + b_i for a_i, b_i in zip(ravg, rstd)],
                alpha=0.2, edgecolor=color_list[color_index], facecolor=color_list[color_index], label=r'$1-\sigma$ per {}'.format(method_xlabel))
                
                plt.plot(epochs, rmax,'s', c='k', label='Max per {}'.format(method_xlabel), markersize=4)
                plt.plot(epochs,rmin,'d', c='k', label='Min per {}'.format(method_xlabel), markersize=4)
                plt.legend()
                plt.xlabel(method_xlabel)
                plt.ylabel(labels[i])
                
                if color_index==9:
                    color_index=0
                else:
                    color_index+=1
                    
                plt.tight_layout()
                plt.savefig(self.log_dir+'_'+labels[i]+'.png', format='png', dpi=150)
                plt.close()
        
        # subplot mode           
        elif self.plot_mode=='subplot':
            # determine subplot size
            if ny == 2:
                xx= [(1,2,1),(1,2,2)]
                plt.figure(figsize=(12, 4.0))
            elif ny==3:
                xx= [(1,3,1), (1,3,2), (1,3,3)]
                plt.figure(figsize=(12, 4.0))
            elif ny==4:
                xx= [(2,2,1), (2,2,2), (2,2,3), (2,2,4)]
                plt.figure(figsize=(12, 8))
            elif ny > 4 and ny <= 21:
                nrows=int(np.ceil(ny/3))
                xx= [(nrows,3,item) for item in range(1,ny+1)]
                adj_fac=(nrows - 2.0)*0.25 + 1
                plt.figure(figsize=(12, adj_fac*8))
            elif ny > 21 and ny <= 99:
                nrows=int(np.ceil(ny/4))
                xx= [(nrows,4,item) for item in range(1,ny+1)]
                adj_fac=(nrows - 2.0)*0.25 + 1
                plt.figure(figsize=(15, adj_fac*8))
                
                
            color_index=0
            for i in range (ny): #exclude caseid from plot, which is the first column 
                plt.subplot(xx[i][0], xx[i][1], xx[i][2])
                ravg, rstd, rmax, rmin=self.calc_cumavg(plot_data.iloc[:,i],self.avg_step)
                epochs=np.array(range(1,len(ravg)+1),dtype=int)
                plt.plot(epochs,ravg,'-o', c=color_list[color_index])
                
                plt.fill_between(epochs,[a_i - b_i for a_i, b_i in zip(ravg, rstd)], [a_i + b_i for a_i, b_i in zip(ravg, rstd)],
                alpha=0.2, edgecolor=color_list[color_index], facecolor=color_list[color_index])
                
                plt.plot(epochs,rmax,'s', c='k', markersize=4)
                
                plt.plot(epochs,rmin,'d', c='k', markersize=4)
                plt.xlabel(method_xlabel)
                plt.ylabel(labels[i])
                if color_index==9:
                    color_index=0
                else:
                    color_index+=1
            
            #speical legend is created for all subplots to save space
            legend_elements = [Line2D([0], [0], color='k', marker='o', label='Mean ' + r'$\pm$ ' +r'$1\sigma$' + ' per {} (color changes)'.format(method_xlabel)),
                  Line2D([0], [0], color='k', marker='s', label='Max per {} (color changes)'.format(method_xlabel)),
                  Line2D([0], [0], linestyle='-.', color='k', marker='d', label='Min per {} (color changes)'.format(method_xlabel))]
            plt.figlegend(handles=legend_elements, loc='upper center', bbox_to_anchor=(0.5, 1.02), ncol=3)
            plt.tight_layout()
            plt.savefig(self.log_dir+'_res.png', format='png', dpi=200, bbox_inches="tight")
            plt.close()
            
        else:
            raise Exception ('the plot mode defined by the user does not exist')
    
[docs]class RLLogger(BaseCallback):
    """
    Callback for logging data of RL algorathims (x,y), compatible with: A2C, ACER, ACKTR, DQN, PPO

    :param check_freq: (int) logging frequency, e.g. 1 will record every time step 
    :param plot_freq: (int) frequency of plotting the fitness progress (if ``None``, plotter is deactivated)
    :param n_avg_steps: (int) if ``plot_freq`` is NOT ``None``, then this is the number of timesteps to group to draw statistics for the plotter (e.g. 10 will group every 10 time steps to estimate min, max, mean, and std).
    :param pngname: (str) name of the plot that will be saved if ``plot_freq`` is NOT ``None``.
    :param save_model: (bool) whether or not to save the RL neural network model (model is saved every ``check_freq``)
    :param model_name: (str) name of the model to be saved  if ``save_model=True``
    :param save_best_only: (bool) if ``save_model = True``, then this flag only saves the model if the fitness value improves. 
    :param verbose: (bool) print updates to the screen
    """
    def __init__(self, check_freq=1, plot_freq=None, n_avg_steps=10, pngname='history', 
                 save_model=False, model_name='bestmodel.pkl', save_best_only=True, 
                 verbose=False):
        super(RLLogger, self).__init__(verbose)
        self.check_freq = check_freq
        self.plot_freq=plot_freq
        self.pngname=pngname
        self.n_avg_steps=n_avg_steps
        self.model_name = model_name
        self.save_model=save_model
        self.verbose=verbose
        self.save_best_only=save_best_only
        self.rbest = -np.inf
        self.rbest_maxonly = -np.inf
        self.r_hist=[]
        self.x_hist=[]
        
        if self.plot_freq:
            #avoid activating 'Agg' in the header so not to affect other classes/algs
            import matplotlib
            matplotlib.use('Agg')
            
    def _init_callback(self) -> None:
        # Create folder if needed
        try:
            self.mode=self.training_env.get_attr('mode')[0]   #PPO/ACER/A2C/ACKTR
        except:
            try:
                self.mode=self.training_env.mode       #DQN
            except:
                print('--warning: the logger cannot find mode in the environment, it is set by default to `max`')
                self.mode='max'
        
        if self.mode not in ['min', 'max']:
            self.mode='max'
            print('--warning: The mode entered by user is invalid, use either `min` or `max`')

        #if self.save_model:
        #    if self.log_dir is not None:
        #        os.makedirs(self.log_dir, exist_ok=True)

    def _on_step(self) -> bool:
        
        
        if self.n_calls % self.check_freq == 0:
            
            if self.verbose:
                print('----------------------------------------------------------------------------------')
                print('RL callback at step {}/{}'.format(self.n_calls, self.locals['total_timesteps']))
            
            try:
                rwd=self.locals['rew']   #DQN case (special dict naming)
            except:
                rwd=self.locals['rewards'][0] #A2C/PPO/ACER/ACKTR
                
            try:
                x=self.locals['infos'][0]['x'] #A2C/PPO/ACKTR cases
            except:
                if 'mus' in list(self.locals.keys()):
                    x=self.locals['_'][0]['x']     #ACER case (special dict naming)
                else:
                    x=self.locals['info']['x']   #DQN case (special dict naming)
                    
            if self.save_model and not self.save_best_only:
                self.model.save(self.model_name)
                if self.verbose:
                    print('A new model is saved to {}'.format(self.model_name))
                
            if rwd > self.rbest_maxonly:
                self.xbest=x.copy()
                self.rbest_maxonly=rwd
                
                if self.mode=='max':
                    self.rbest=self.rbest_maxonly
                else:
                    self.rbest=-self.rbest_maxonly
                
            
                if self.save_model and self.save_best_only:
                    self.model.save(self.model_name)
                    if self.verbose:
                        print('An improvement is observed, new model is saved to {}'.format(self.model_name))
            
            if self.mode=='max':
                self.r_hist.append(rwd)
            else:
                self.r_hist.append(-rwd)
            
            self.x_hist.append(list(x))
            
            if self.plot_freq:
                if self.n_calls % self.plot_freq == 0:
                    self.plot_progress()
                
            
            if self.verbose:
                print('----------------------------------------------------------------------------------')
        return True
    
    def plot_progress(self): 
    
        plt.figure()
        
        ravg, rstd, rmax, rmin=self.calc_cumavg(self.r_hist,self.n_avg_steps)
        epochs=np.array(range(1,len(ravg)+1),dtype=int)
        plt.plot(epochs, ravg,'-o', c='g', label='Average per epoch')
        
        plt.fill_between(epochs,[a_i - b_i for a_i, b_i in zip(ravg, rstd)], [a_i + b_i for a_i, b_i in zip(ravg, rstd)],
        alpha=0.2, edgecolor='g', facecolor='g', label=r'$1-\sigma$ per epoch')
        
        plt.plot(epochs, rmax,'s', c='k', label='Max per epoch', markersize=4)
        plt.plot(epochs,rmin,'d', c='k', label='Min per epoch', markersize=4)
        plt.legend()
        plt.xlabel('Epoch')
        plt.ylabel('Fitness')
        plt.savefig(self.pngname+'.png',format='png' ,dpi=300, bbox_inches="tight")
        plt.close()

    def calc_cumavg(self, data, N):
    
        cum_aves=[np.mean(data[i:i+N]) for i in range(0,len(data),N)]
        cum_std=[np.std(data[i:i+N]) for i in range(0,len(data),N)]
        cum_max=[np.max(data[i:i+N]) for i in range(0,len(data),N)]
        cum_min=[np.min(data[i:i+N]) for i in range(0,len(data),N)]
    
        return cum_aves, cum_std, cum_max, cum_min