Source code for neorl.utils.neorlcalls

#    This file is part of NEORL.

#    Copyright (c) 2021 Exelon Corporation and MIT Nuclear Science and Engineering
#    NEORL is free software: you can redistribute it and/or modify
#    it under the terms of the MIT LICENSE

#    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
#    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
#    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
#    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
#    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
#    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
#    SOFTWARE.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#"""
#Created on Wed Mar  4 11:51:22 2020
#
#@author: majdi
#"""

import numpy as np
import pandas as pd
from neorl.rl.baselines.shared.callbacks import BaseCallback
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import os
import copy

class SavePlotCallback(BaseCallback):
    """
    Callback for saving a model (the check is done every ``check_freq`` steps)
    based on the training reward (in practice, we recommend using ``EvalCallback``).
    """
    def __init__(self, check_freq, avg_step, log_dir, total_timesteps, basecall, plot_mode='subplot'):
        self.base=basecall
        self.plot_mode=plot_mode
        self.n_calls=self.base.n_calls
        self.model=self.base.model
        self.num_timesteps=self.base.num_timesteps
        self.total_timesteps=total_timesteps
        self.verbose=1
        self.check_freq = check_freq
        self.avg_step=avg_step
        self.log_dir = log_dir
        self.best_save_path = self.log_dir + '_bestmodel.pkl'
        self.save_path = self.log_dir + '_lastmodel.pkl'
        self.best_mean_reward = -np.inf

        #avoid activating 'Agg' in the header so not to affect other classes/algs
        import matplotlib
        matplotlib.use('Agg')

    def runcall(self):
        
        print('num_timesteps={}/{}'.format (self.num_timesteps, self.total_timesteps))
            
        # Retrieve training reward
        y= pd.read_csv(self.log_dir+'_out.csv')
        y=y["reward"].values
        # Mean training reward over the last 100 episodes
        mean_reward = np.mean(y[-self.avg_step:])
               
        # New best model, you could save the agent here
        print('--debug: current mean reward={}, previous best mean reward = {}'.format(np.round(mean_reward), np.round(self.best_mean_reward)))
        if mean_reward > self.best_mean_reward:
              self.best_mean_reward = copy.copy(mean_reward)
              #saving best model
              print('--debug: improvement in reward is observed, new best model is saved to {}'.format(self.best_save_path))
              self.model.save(self.best_save_path)    #best model found so far

        #saving current model
        print('--debug: current model model is saved to {}'.format(self.save_path))
        self.model.save(self.save_path)   #latest model
              
        self.out_data=pd.read_csv(self.log_dir+'_out.csv')
        #-------------------
        # Progress Plot
        #-------------------
        self.plot_progress()
                
    def _on_step(self) -> bool:
        
        try:
            if (self.num_timesteps % self.check_freq == 0) or (self.num_timesteps == self.total_timesteps):
                self.runcall()
        except:
            print('--warning: No plot is generated, NEORL tried to plot the output csv logger, but failed for some reason, you may increase `check_freq` to a large value to allow some data printed in the csv logger')
        
        if self.num_timesteps == self.total_timesteps:
            print('system exit')
            os._exit(1)
            
            
        return True
    
    def _on_training_end(self) -> None:
        self.runcall()
        print('Training is finished')
        os._exit(1)
        #pass

    def calc_cumavg(self, data, N):
    
        cum_aves=[np.mean(data[i:i+N]) for i in range(0,len(data),N)]
        cum_std=[np.std(data[i:i+N]) for i in range(0,len(data),N)]
        cum_max=[np.max(data[i:i+N]) for i in range(0,len(data),N)]
        cum_min=[np.min(data[i:i+N]) for i in range(0,len(data),N)]
    
        return cum_aves, cum_std, cum_max, cum_min
    
    
    def plot_progress(self, method_xlabel='Epoch'):

        self.out_data=pd.read_csv(self.log_dir+'_out.csv')
        color_list=['b', 'g', 'r', 'c', 'm', 'y', 'darkorange', 'purple', 'tab:brown', 'lime']
        plot_data=self.out_data.drop(['caseid'], axis=1)  #exclude caseid, which is the first column from plotting (meaningless)
        
        labels=list(plot_data.columns.values)
            
        ny=plot_data.shape[1] 
        
        assert ny == len(labels), 'number of columns ({}) to plot in the csv file {} is not equal to the number of labels provided by the user ({})'.format(ny, self.log_dir+'_out.csv', len(labels))
        
        # classic mode
        if self.plot_mode=='classic' or ny == 1:
            color_index=0
            for i in range (ny): #exclude caseid from plot, which is the first column 
                plt.figure()
                ravg, rstd, rmax, rmin=self.calc_cumavg(plot_data.iloc[:,i],self.avg_step)
                epochs=np.array(range(1,len(ravg)+1),dtype=int)
                plt.plot(epochs, ravg,'-o', c=color_list[color_index], label='Average per {}'.format(method_xlabel))
                
                plt.fill_between(epochs,[a_i - b_i for a_i, b_i in zip(ravg, rstd)], [a_i + b_i for a_i, b_i in zip(ravg, rstd)],
                alpha=0.2, edgecolor=color_list[color_index], facecolor=color_list[color_index], label=r'$1-\sigma$ per {}'.format(method_xlabel))
                
                plt.plot(epochs, rmax,'s', c='k', label='Max per {}'.format(method_xlabel), markersize=4)
                plt.plot(epochs,rmin,'d', c='k', label='Min per {}'.format(method_xlabel), markersize=4)
                plt.legend()
                plt.xlabel(method_xlabel)
                plt.ylabel(labels[i])
                
                if color_index==9:
                    color_index=0
                else:
                    color_index+=1
                    
                plt.tight_layout()
                plt.savefig(self.log_dir+'_'+labels[i]+'.png', format='png', dpi=150)
                plt.close()
        
        # subplot mode           
        elif self.plot_mode=='subplot':
            # determine subplot size
            if ny == 2:
                xx= [(1,2,1),(1,2,2)]
                plt.figure(figsize=(12, 4.0))
            elif ny==3:
                xx= [(1,3,1), (1,3,2), (1,3,3)]
                plt.figure(figsize=(12, 4.0))
            elif ny==4:
                xx= [(2,2,1), (2,2,2), (2,2,3), (2,2,4)]
                plt.figure(figsize=(12, 8))
            elif ny > 4 and ny <= 21:
                nrows=int(np.ceil(ny/3))
                xx= [(nrows,3,item) for item in range(1,ny+1)]
                adj_fac=(nrows - 2.0)*0.25 + 1
                plt.figure(figsize=(12, adj_fac*8))
            elif ny > 21 and ny <= 99:
                nrows=int(np.ceil(ny/4))
                xx= [(nrows,4,item) for item in range(1,ny+1)]
                adj_fac=(nrows - 2.0)*0.25 + 1
                plt.figure(figsize=(15, adj_fac*8))
                
                
            color_index=0
            for i in range (ny): #exclude caseid from plot, which is the first column 
                plt.subplot(xx[i][0], xx[i][1], xx[i][2])
                ravg, rstd, rmax, rmin=self.calc_cumavg(plot_data.iloc[:,i],self.avg_step)
                epochs=np.array(range(1,len(ravg)+1),dtype=int)
                plt.plot(epochs,ravg,'-o', c=color_list[color_index])
                
                plt.fill_between(epochs,[a_i - b_i for a_i, b_i in zip(ravg, rstd)], [a_i + b_i for a_i, b_i in zip(ravg, rstd)],
                alpha=0.2, edgecolor=color_list[color_index], facecolor=color_list[color_index])
                
                plt.plot(epochs,rmax,'s', c='k', markersize=4)
                
                plt.plot(epochs,rmin,'d', c='k', markersize=4)
                plt.xlabel(method_xlabel)
                plt.ylabel(labels[i])
                if color_index==9:
                    color_index=0
                else:
                    color_index+=1
            
            #speical legend is created for all subplots to save space
            legend_elements = [Line2D([0], [0], color='k', marker='o', label='Mean ' + r'$\pm$ ' +r'$1\sigma$' + ' per {} (color changes)'.format(method_xlabel)),
                  Line2D([0], [0], color='k', marker='s', label='Max per {} (color changes)'.format(method_xlabel)),
                  Line2D([0], [0], linestyle='-.', color='k', marker='d', label='Min per {} (color changes)'.format(method_xlabel))]
            plt.figlegend(handles=legend_elements, loc='upper center', bbox_to_anchor=(0.5, 1.02), ncol=3)
            plt.tight_layout()
            plt.savefig(self.log_dir+'_res.png', format='png', dpi=200, bbox_inches="tight")
            plt.close()
            
        else:
            raise Exception ('the plot mode defined by the user does not exist')
    
[docs]class RLLogger(BaseCallback): """ Callback for logging data of RL algorathims (x,y), compatible with: A2C, ACER, ACKTR, DQN, PPO :param check_freq: (int) logging frequency, e.g. 1 will record every time step :param plot_freq: (int) frequency of plotting the fitness progress (if ``None``, plotter is deactivated) :param n_avg_steps: (int) if ``plot_freq`` is NOT ``None``, then this is the number of timesteps to group to draw statistics for the plotter (e.g. 10 will group every 10 time steps to estimate min, max, mean, and std). :param pngname: (str) name of the plot that will be saved if ``plot_freq`` is NOT ``None``. :param save_model: (bool) whether or not to save the RL neural network model (model is saved every ``check_freq``) :param model_name: (str) name of the model to be saved if ``save_model=True`` :param save_best_only: (bool) if ``save_model = True``, then this flag only saves the model if the fitness value improves. :param verbose: (bool) print updates to the screen """ def __init__(self, check_freq=1, plot_freq=None, n_avg_steps=10, pngname='history', save_model=False, model_name='bestmodel.pkl', save_best_only=True, verbose=False): super(RLLogger, self).__init__(verbose) self.check_freq = check_freq self.plot_freq=plot_freq self.pngname=pngname self.n_avg_steps=n_avg_steps self.model_name = model_name self.save_model=save_model self.verbose=verbose self.save_best_only=save_best_only self.rbest = -np.inf self.rbest_maxonly = -np.inf self.r_hist=[] self.x_hist=[] if self.plot_freq: #avoid activating 'Agg' in the header so not to affect other classes/algs import matplotlib matplotlib.use('Agg') def _init_callback(self) -> None: # Create folder if needed try: self.mode=self.training_env.get_attr('mode')[0] #PPO/ACER/A2C/ACKTR except: try: self.mode=self.training_env.mode #DQN except: print('--warning: the logger cannot find mode in the environment, it is set by default to `max`') self.mode='max' if self.mode not in ['min', 'max']: self.mode='max' print('--warning: The mode entered by user is invalid, use either `min` or `max`') #if self.save_model: # if self.log_dir is not None: # os.makedirs(self.log_dir, exist_ok=True) def _on_step(self) -> bool: if self.n_calls % self.check_freq == 0: if self.verbose: print('----------------------------------------------------------------------------------') print('RL callback at step {}/{}'.format(self.n_calls, self.locals['total_timesteps'])) try: rwd=self.locals['rew'] #DQN case (special dict naming) except: rwd=self.locals['rewards'][0] #A2C/PPO/ACER/ACKTR try: x=self.locals['infos'][0]['x'] #A2C/PPO/ACKTR cases except: if 'mus' in list(self.locals.keys()): x=self.locals['_'][0]['x'] #ACER case (special dict naming) else: x=self.locals['info']['x'] #DQN case (special dict naming) if self.save_model and not self.save_best_only: self.model.save(self.model_name) if self.verbose: print('A new model is saved to {}'.format(self.model_name)) if rwd > self.rbest_maxonly: self.xbest=x.copy() self.rbest_maxonly=rwd if self.mode=='max': self.rbest=self.rbest_maxonly else: self.rbest=-self.rbest_maxonly if self.save_model and self.save_best_only: self.model.save(self.model_name) if self.verbose: print('An improvement is observed, new model is saved to {}'.format(self.model_name)) if self.mode=='max': self.r_hist.append(rwd) else: self.r_hist.append(-rwd) self.x_hist.append(list(x)) if self.plot_freq: if self.n_calls % self.plot_freq == 0: self.plot_progress() if self.verbose: print('----------------------------------------------------------------------------------') return True def plot_progress(self): plt.figure() ravg, rstd, rmax, rmin=self.calc_cumavg(self.r_hist,self.n_avg_steps) epochs=np.array(range(1,len(ravg)+1),dtype=int) plt.plot(epochs, ravg,'-o', c='g', label='Average per epoch') plt.fill_between(epochs,[a_i - b_i for a_i, b_i in zip(ravg, rstd)], [a_i + b_i for a_i, b_i in zip(ravg, rstd)], alpha=0.2, edgecolor='g', facecolor='g', label=r'$1-\sigma$ per epoch') plt.plot(epochs, rmax,'s', c='k', label='Max per epoch', markersize=4) plt.plot(epochs,rmin,'d', c='k', label='Min per epoch', markersize=4) plt.legend() plt.xlabel('Epoch') plt.ylabel('Fitness') plt.savefig(self.pngname+'.png',format='png' ,dpi=300, bbox_inches="tight") plt.close() def calc_cumavg(self, data, N): cum_aves=[np.mean(data[i:i+N]) for i in range(0,len(data),N)] cum_std=[np.std(data[i:i+N]) for i in range(0,len(data),N)] cum_max=[np.max(data[i:i+N]) for i in range(0,len(data),N)] cum_min=[np.min(data[i:i+N]) for i in range(0,len(data),N)] return cum_aves, cum_std, cum_max, cum_min