Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
6bb7ac2
Got MAML working with multiple workers and iteration steps and increa…
Demosthen Apr 2, 2021
61a1f36
Added MAML validation
Demosthen Apr 6, 2021
2f8ee14
merging main into maml branch
lucas-spangher Apr 6, 2021
9488985
updating params for Savio
lucas-spangher Apr 6, 2021
c85465b
added capability to validate from a saved model checkpoint
Demosthen Apr 7, 2021
7a491a3
forgot to commit socialgame_env
Demosthen Apr 7, 2021
96f6766
Merge branch 'austin_maml' of https://github.com/Aphoh/temp_tc into a…
Demosthen Apr 7, 2021
9159ad1
removing cvxpy
lucas-spangher Apr 7, 2021
143c136
Merge branch 'austin_maml' of https://github.com/Aphoh/temp_tc into a…
lucas-spangher Apr 7, 2021
7849172
singularity trial
lucas-spangher Apr 7, 2021
4d1b5d6
make MAML evaluation faster and wandb compatible
Demosthen Apr 8, 2021
2aa958c
Merge branch 'austin_maml' of https://github.com/Aphoh/temp_tc into a…
Demosthen Apr 8, 2021
5bb6819
run some MAML validation runs
Demosthen Apr 9, 2021
80f0d3a
adding singularity_example to merge
lucas-spangher Apr 9, 2021
daae578
Add more training and validation runs
Demosthen Apr 9, 2021
9a4471b
Add some comments to maml_run_analysis.py
Demosthen Apr 9, 2021
0f78745
Merge branch 'austin_maml' of https://github.com/Aphoh/temp_tc into a…
Demosthen Apr 9, 2021
d366040
warm sac
Demosthen May 1, 2021
c2d3167
update StableBaselines.py
Demosthen May 1, 2021
f8013ed
add ppo checkpoints
Demosthen May 3, 2021
1649d9f
test
lucas-spangher May 3, 2021
61a68d5
fdmkdmkdf
lucas-spangher May 3, 2021
51a8a4f
offline experiments
lucas-spangher May 3, 2021
7d050c8
trying with PPO and prop
lucas-spangher May 4, 2021
4f777b5
adding output simulation data
lucas-spangher May 4, 2021
bf14453
batch 2
lucas-spangher May 4, 2021
adbfad2
savio edits
lucas-spangher May 4, 2021
3dec001
gym microgrid
lucas-spangher May 4, 2021
a6e144f
adding comma
lucas-spangher May 4, 2021
1617edc
typo
lucas-spangher May 4, 2021
a685117
Ran PPO with multiple different environments
Demosthen May 4, 2021
20e3787
Fix merge conflict
Demosthen May 4, 2021
8ae08f7
Added offline pretrained SAC checkpoints
Demosthen May 7, 2021
4a2340c
Add dataset building bash script
Demosthen May 10, 2021
661bae3
Don't push a dataset to git
Demosthen May 10, 2021
94f71ad
Don't push a dataset to git
Demosthen May 10, 2021
3033817
Add additional arguments to StableBaselines.py
Demosthen May 11, 2021
e3e9e1a
Add changes to environment
Demosthen May 11, 2021
fe491b7
Add tensorflow probabilities
Demosthen May 11, 2021
aeeb91a
Change dataset shell script to generate DeterministicFn Datasets
Demosthen May 19, 2021
e1e08b1
Forgot to add dataset script in last commit
Demosthen May 19, 2021
1a7c64e
untrack datasets in git and add to dvc
Demosthen May 24, 2021
27a284f
Merging with main
Demosthen May 26, 2021
fb0634c
Finish merging main into branch
Demosthen May 28, 2021
08464f1
Merge environment files from main into branch
Demosthen May 28, 2021
e33c9be
Fixed small bug in checkpointing for ppo
Demosthen May 28, 2021
5212e7e
Add new MAML checkpoints
Demosthen May 28, 2021
01052f6
Merge remote to local Merge branch 'austin_maml' of https://github.co…
Demosthen May 28, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 5 additions & 12 deletions example_run.sh
Original file line number Diff line number Diff line change
@@ -1,12 +1,5 @@
python3 rl_algos/StableBaselines.py --num_steps=300000 --exp_name=baseline_ppo --algo=ppo --library=rllib --one_day=15 --energy_in_state=T --price_in_state=F
python3 rl_algos/StableBaselines.py --num_steps=300000 --exp_name=smirl_300_ppo --algo=ppo --library=rllib --one_day=15 --smirl_weight=3.00 --energy_in_state=T --price_in_state=F
python3 rl_algos/StableBaselines.py --num_steps=300000 --exp_name=smirl_050_ppo --algo=ppo --library=rllib --one_day=15 --smirl_weight=0.50 --energy_in_state=T --price_in_state=F
python3 rl_algos/StableBaselines.py --num_steps=300000 --exp_name=smirl_010_ppo --algo=ppo --library=rllib --one_day=15 --smirl_weight=0.10 --energy_in_state=T --price_in_state=F
python3 rl_algos/StableBaselines.py --num_steps=300000 --exp_name=smirl_005_ppo --algo=ppo --library=rllib --one_day=15 --smirl_weight=0.05 --energy_in_state=T --price_in_state=F
python3 rl_algos/StableBaselines.py --num_steps=300000 --exp_name=smirl_003_ppo --algo=ppo --library=rllib --one_day=15 --smirl_weight=0.03 --energy_in_state=T --price_in_state=F
python3 rl_algos/StableBaselines.py --num_steps=300000 --exp_name=smirl_0003_ppo --algo=ppo --library=rllib --one_day=15 --smirl_weight=0.003 --energy_in_state=T --price_in_state=F
python3 rl_algos/StableBaselines.py --num_steps=300000 --exp_name=smirl_0001_ppo --algo=ppo --library=rllib --one_day=15 --smirl_weight=0.001 --energy_in_state=T --price_in_state=F
#python3 rl_algos/StableBaselines.py --exp_name=test --algo=ppo --library=tune --one_day=15 --energy_in_state=T --price_in_state=F



python3 rl_algos/StableBaselines.py --num_steps=300000 --exp_name=sac_offline.1 --algo=sac --library=rllib --offline_sampling_prop=.1
python3 rl_algos/StableBaselines.py --num_steps=300000 --exp_name=sac_offline.3 --algo=sac --library=rllib --offline_sampling_prop=.3
python3 rl_algos/StableBaselines.py --num_steps=300000 --exp_name=sac_offline.5 --algo=sac --library=rllib --offline_sampling_prop=.5
python3 rl_algos/StableBaselines.py --num_steps=300000 --exp_name=sac_offline.7 --algo=sac --library=rllib --offline_sampling_prop=.7
python3 rl_algos/StableBaselines.py --num_steps=300000 --exp_name=sac_offline.9 --algo=sac --library=rllib --offline_sampling_prop=.9
8,780 changes: 8,780 additions & 0 deletions gym-microgrid/gym_microgrid/envs/building_data.csv

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions gym-socialgame/gym_socialgame/envs/agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ class DeterministicFunctionPerson(Person):
def __init__(self, baseline_energy_df, points_multiplier = 1, response = 't', **kwargs):
super().__init__(baseline_energy_df, points_multiplier)
self.response = response
print("response type: ", response)

def threshold_response_func(self, points):
points = np.array(points) * self.points_multiplier
Expand Down
72 changes: 38 additions & 34 deletions gym-socialgame/gym_socialgame/envs/socialgame_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from gym_socialgame.envs.utils import price_signal
from gym_socialgame.envs.agents import *
from gym_socialgame.envs.reward import Reward
import wandb
from gym_socialgame.envs.buffers import (GaussianBuffer, GaussianCircularBuffer)

class SocialGameEnv(gym.Env):
Expand All @@ -24,6 +25,8 @@ def __init__(self,
reward_function = "log_cost_regularized",
bin_observation_space=False,
manual_tou_magnitude=.3,
person_type_string="c",
points_multiplier=10,
smirl_weight=None,
circ_buffer_size=None):

Expand Down Expand Up @@ -73,6 +76,8 @@ def __init__(self,
self.hours_in_day = 10
self.last_smirl_reward = None
self.last_energy_reward = None
self.person_type_string = person_type_string
self.points_multiplier=points_multiplier

self.day = 0
self.days_of_week = [0, 1, 2, 3, 4]
Expand Down Expand Up @@ -201,7 +206,10 @@ def _create_agents(self):
my_baseline_energy = pd.DataFrame(data = {"net_energy_use" : working_hour_energy})

for i in range(self.number_of_participants):
player = CurtailAndShiftPerson(my_baseline_energy, points_multiplier = 10, response = 'l')
if self.person_type_string=="c":
player = CurtailAndShiftPerson(my_baseline_energy, points_multiplier = 10, response = 'l')
elif self.person_type_string=="d":
player = DeterministicFunctionPerson(my_baseline_energy, response=self.response_type_string, points_multiplier = self.points_multiplier)
player_dict['player_{}'.format(i)] = player

return player_dict
Expand Down Expand Up @@ -379,7 +387,7 @@ def step(self, action):
self.action = action

if not self.action_space.contains(action):
print("made it within the if statement in SG_E that tests if the the action space doesn't have the action")
print("made it within the if statement in SG_E that tests if the action space doesn't have the action")
action = np.asarray(action)
if self.action_space_string == 'continuous':
action = np.clip(action, -1, 1) #TODO: check if correct
Expand Down Expand Up @@ -408,6 +416,10 @@ def step(self, action):
if self.use_smirl:
self.buffer.add(observation)

# if not self.total_iter % 10:
# print("Iteration: "+str(self.total_iter) + " reward: " + str(reward))
# wandb.log({"environment_reward":reward})

info = {}
return observation, reward, done, info

Expand Down Expand Up @@ -510,19 +522,8 @@ class SocialGameMetaEnv(SocialGameEnvRLLib):
def __init__(self,
env_config,
task = None):

# self.goal_direction = goal_direction if goal_direction else 1.0

self.task = (task if task else {
"person_type":np.random.choice([DeterministicFunctionPerson, CurtailAndShiftPerson]),
"points_multiplier":np.random.choice(range(20)),
"response":np.random.choice(['t','l', 's']),
"shiftable_load_frac":np.random.uniform(0, 1),
"curtailable_load_frac":np.random.uniform(0, 1),
"shiftByHours":np.random.choice(range(8), ),
"maxCurtailHours":np.random.choice(range(8),)
})

self.mode = env_config["mode"]
self.task = (task if task else self.sample_tasks(1)[0])
super().__init__(
env_config=env_config,
)
Expand All @@ -533,24 +534,26 @@ def sample_tasks(self, n_tasks):
"""
n_tasks will be passed in as a hyperparameter
"""
# points_multiplier = 1,
# response = 't'
# baseline_energy_df,
# points_multiplier = 1,
# shiftable_load_frac = .7,
# curtailable_load_frac = .4,
# shiftByHours = 3,
# maxCurtailHours=5,
# baseline_energy_df_variance = # add random noise to the existing?

person_type = np.random.choice([DeterministicFunctionPerson, CurtailAndShiftPerson], size = (n_tasks, ))
points_multiplier = np.random.choice(range(20), size = (n_tasks, ))
response = np.random.choice(['t','l', 's'], size = (n_tasks, ))
shiftable_load_frac = np.random.uniform(0, 1, size = (n_tasks, ))
curtailable_load_frac = np.random.uniform(0, 1, size = (n_tasks, ))
shiftByHours = np.random.choice(range(8), (n_tasks, ))
maxCurtailHours=np.random.choice(range(8), (n_tasks, ))

if self.mode == "train":
print("SAMPLING TRAIN ENVIRONMENT")
person_type = np.random.choice([DeterministicFunctionPerson], size = (n_tasks, ))
points_multiplier = [10 for i in range(n_tasks)]
response = np.random.choice(['s', 'l', 't'], size = (n_tasks, ))
shiftable_load_frac = np.random.uniform(0, 1, size = (n_tasks, ))
curtailable_load_frac = np.random.uniform(0, 1, size = (n_tasks, ))
shiftByHours = np.random.choice(range(8), (n_tasks, ))
maxCurtailHours=np.random.choice(range(8), (n_tasks, ))
elif self.mode == "test":
print("SAMPLING TEST ENVIRONMENT")
person_type = [CurtailAndShiftPerson for i in range(n_tasks)]
points_multiplier = [10 for i in range(n_tasks)]
response = ['t' for i in range(n_tasks)]
shiftable_load_frac = [0.2 for i in range(n_tasks)]
curtailable_load_frac = [0.2 for i in range(n_tasks)]
shiftByHours = [2 for i in range(n_tasks)]
maxCurtailHours=[5 for i in range(n_tasks)]
else:
raise Exception("Please specify whether this is a training or evaluation run")
task_parameters = {
"person_type":person_type,
"points_multiplier":points_multiplier,
Expand All @@ -566,7 +569,7 @@ def sample_tasks(self, n_tasks):
temp_dict = {k: v[i] for k, v in task_parameters.items()}
tasks_dicts.append(temp_dict)

return task_dicts
return tasks_dicts


def set_task(self, task):
Expand All @@ -575,6 +578,7 @@ def set_task(self, task):
task: task of the meta-learning environment
"""
self.task=task
self.player_dict = self._create_agents()
# self.person_type = task["person_type"]
# self.points_multiplier = task["points_multiplier"]
# self.response = task["response"]
Expand Down
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
cvxpy==1.1.7
gym==0.17.3
scikit-learn==0.23.2
wandb==0.10.25
stable-baselines3==0.11.1
tensorboard==2.3.0
tensorflow-gpu==2.3.2
tensorflow-probability==0.11.1
ray[rllib,tune]==1.2.0
higher==0.2.1
GPUtil==1.4.0
pandas==1.1.5
tables==3.6.1
3 changes: 3 additions & 0 deletions rl_algos/.gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
logs/
/ppo_output_sim_data
/sac_output_sim_data2
/offline_data
Loading