diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..9478db48e881240987301c71b97f727996fa6db0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ + +.idea/misc.xml diff --git a/.idea/misc.xml b/.idea/misc.xml deleted file mode 100644 index ea26b4d402d793a4a3def4e8ebe7d9aa86654d0e..0000000000000000000000000000000000000000 --- a/.idea/misc.xml +++ /dev/null @@ -1,7 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> -<project version="4"> - <component name="Black"> - <option name="sdkName" value="Python 3.11 (Ion_ai1)" /> - </component> - <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.11 (Ion_ai1)" project-jdk-type="Python SDK" /> -</project> \ No newline at end of file diff --git a/ionen_env/__pycache__/__init__.cpython-311.pyc b/ionen_env/__pycache__/__init__.cpython-311.pyc index 6c05e9b44f82b7361b73958ffad5dcecab3baf23..c28dd447b716b3dc3ca1a4dd89c5e5c79f158ec0 100644 Binary files a/ionen_env/__pycache__/__init__.cpython-311.pyc and b/ionen_env/__pycache__/__init__.cpython-311.pyc differ diff --git a/ionen_env/envs/__pycache__/Ionenbeschl.cpython-311.pyc b/ionen_env/envs/__pycache__/Ionenbeschl.cpython-311.pyc index 0adf496aceb7959d101a620716df52c6b7f47ea9..70da98ada6274a8b619b7f3b15119b45a15fec9e 100644 Binary files a/ionen_env/envs/__pycache__/Ionenbeschl.cpython-311.pyc and b/ionen_env/envs/__pycache__/Ionenbeschl.cpython-311.pyc differ diff --git a/ionen_env/envs/__pycache__/__init__.cpython-311.pyc b/ionen_env/envs/__pycache__/__init__.cpython-311.pyc index 97eeabc04b3ff97a363510f6aacb6a0a8d4d6918..8ba7f2d88be810cfa9c4e89c1b8bc0d86a5df78d 100644 Binary files a/ionen_env/envs/__pycache__/__init__.cpython-311.pyc and b/ionen_env/envs/__pycache__/__init__.cpython-311.pyc differ diff --git a/main.py b/main.py index f63c0660ae8c24d12e202a6361c6a98485f783a8..0e3077058c857e9b83b7bca8d1a276c60d2b440a 100644 --- a/main.py +++ b/main.py @@ -46,7 +46,7 @@ import ionen_env """ Global Variables: """ -MODELPATH = "model.pth" +MODELPATH = "model1.pth" # Neuer versuch mit pythorchrl def torchrlAlg1(): @@ -62,7 +62,7 @@ def torchrlAlg1(): frames_per_batch = 1000 # For a complete training, bring the number of frames up to 1M - total_frames = 1_000_000 + total_frames = 10_000 sub_batch_size = 64 # cardinality of the sub-samples gathered from the current data in the inner loop num_epochs = 10 # optimization steps per batch of data collected @@ -259,12 +259,12 @@ def torchrlAlg1(): # We evaluate the policy once every 10 batches of data. # Evaluation is rather simple: execute the policy without exploration # (take the expected value of the action distribution) for a given - # number of steps (1000, which is our ``env`` horizon). + # number of steps (1000, which is our ``env`` horizon). ****500 for me # The ``rollout`` method of the ``env`` can take a policy as argument: # it will then execute this policy at each step. with set_exploration_type(ExplorationType.MEAN), torch.no_grad(): # execute a rollout with the trained policy - eval_rollout = env.rollout(1000, policy_module) + eval_rollout = env.rollout(500, policy_module) logs["eval reward"].append(eval_rollout["next", "reward"].mean().item()) logs["eval reward (sum)"].append( eval_rollout["next", "reward"].sum().item() @@ -282,10 +282,10 @@ def torchrlAlg1(): # this is a nice-to-have but nothing necessary for PPO to work. scheduler.step() - torch.save({'policy_state_dict': policy_module.state_dict(), - 'value_state_dict': value_module.state_dict(), - 'loss_state_dict': loss_module.state_dict()}, - MODELPATH) + # torch.save({'policy_state_dict': policy_module.state_dict(), + # 'value_state_dict': value_module.state_dict(), + # 'loss_state_dict': loss_module.state_dict()}, + # MODELPATH) # These modules are not good to load # 'collector_state_dict': collector.state_dict(), diff --git a/model1.pth b/model1.pth new file mode 100644 index 0000000000000000000000000000000000000000..e6c89ce1cb516e76b7d680db6c04021aad0d914e Binary files /dev/null and b/model1.pth differ