implementation of drecon in unity 2022 lts
forked from:
https://github.com/joanllobera/marathon-envs
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
99 lines
3.3 KiB
99 lines
3.3 KiB
9 months ago
|
using System;
|
||
|
using System.Collections;
|
||
|
using System.Collections.Generic;
|
||
|
using System.Linq;
|
||
|
using UnityEngine;
|
||
|
using Unity.MLAgents;
|
||
|
using Unity.MLAgents.Actuators;
|
||
|
using Unity.MLAgents.Sensors;
|
||
|
public class DeepMindHopperAgent : MarathonAgent
|
||
|
{
|
||
|
|
||
|
public List<float> RewardHackingVector;
|
||
|
|
||
|
public override void OnEpisodeBegin()
|
||
|
{
|
||
|
base.OnEpisodeBegin();
|
||
|
|
||
|
// set to true this to show monitor while training
|
||
|
//Monitor.SetActive(true);
|
||
|
|
||
|
StepRewardFunction = StepRewardHopper101;
|
||
|
TerminateFunction = TerminateOnNonFootHitTerrain;
|
||
|
ObservationsFunction = ObservationsDefault;
|
||
|
|
||
|
BodyParts["pelvis"] = GetComponentsInChildren<Rigidbody>().FirstOrDefault(x => x.name == "torso");
|
||
|
BodyParts["foot"] = GetComponentsInChildren<Rigidbody>().FirstOrDefault(x => x.name == "foot");
|
||
|
SetupBodyParts();
|
||
|
}
|
||
|
|
||
|
void ObservationsDefault(VectorSensor sensor)
|
||
|
{
|
||
|
if (ShowMonitor)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
var pelvis = BodyParts["pelvis"];
|
||
|
Vector3 normalizedVelocity = this.GetNormalizedVelocity(pelvis.velocity);
|
||
|
sensor.AddObservation(normalizedVelocity);
|
||
|
sensor.AddObservation(pelvis.transform.forward); // gyroscope
|
||
|
sensor.AddObservation(pelvis.transform.up);
|
||
|
|
||
|
sensor.AddObservation(SensorIsInTouch);
|
||
|
JointRotations.ForEach(x => sensor.AddObservation(x));
|
||
|
sensor.AddObservation(JointVelocity);
|
||
|
var foot = BodyParts["foot"];
|
||
|
Vector3 normalizedFootPosition = this.GetNormalizedPosition(foot.transform.position);
|
||
|
sensor.AddObservation(normalizedFootPosition.y);
|
||
|
}
|
||
|
|
||
|
float GetRewardOnEpisodeComplete()
|
||
|
{
|
||
|
return FocalPoint.transform.position.x;
|
||
|
}
|
||
|
|
||
|
void UpdateRewardHackingVector()
|
||
|
{
|
||
|
// float uprightBonus = GetForwardBonus("pelvis");
|
||
|
float uprightBonus = GetDirectionBonus("pelvis", Vector3.forward, 1f);
|
||
|
uprightBonus = Mathf.Clamp(uprightBonus, 0f, 1f);
|
||
|
float velocity = Mathf.Clamp(GetNormalizedVelocity("pelvis").x, 0f, 1f);
|
||
|
float position = Mathf.Clamp(GetNormalizedPosition("pelvis").x, 0f, 1f);
|
||
|
float effort = 1f - GetEffortNormalized();
|
||
|
|
||
|
if (RewardHackingVector?.Count == 0)
|
||
|
RewardHackingVector = Enumerable.Range(0, 6).Select(x => 0f).ToList();
|
||
|
RewardHackingVector[0] = velocity;
|
||
|
RewardHackingVector[1] = position;
|
||
|
RewardHackingVector[2] = effort;
|
||
|
RewardHackingVector[3] = uprightBonus;
|
||
|
}
|
||
|
|
||
|
float StepRewardHopper101()
|
||
|
{
|
||
|
UpdateRewardHackingVector();
|
||
|
float uprightBonus = GetDirectionBonus("pelvis", Vector3.forward, 1f);
|
||
|
uprightBonus = Mathf.Clamp(uprightBonus, 0f, 1f);
|
||
|
float velocity = Mathf.Clamp(GetNormalizedVelocity("pelvis").x, 0f, 1f);
|
||
|
// float position = Mathf.Clamp(GetNormalizedPosition("pelvis").x, 0f, 1f);
|
||
|
float effort = 1f - GetEffortNormalized();
|
||
|
|
||
|
uprightBonus *= 0.05f;
|
||
|
velocity *= 0.7f;
|
||
|
if (velocity >= .25f)
|
||
|
effort *= 0.25f;
|
||
|
else
|
||
|
effort *= velocity;
|
||
|
|
||
|
var reward = velocity
|
||
|
+ uprightBonus
|
||
|
+ effort;
|
||
|
if (ShowMonitor)
|
||
|
{
|
||
|
//var hist = new[] {reward, velocity, uprightBonus, effort};
|
||
|
//Monitor.Log("rewardHist", hist, displayType: Monitor.DisplayType.Independent);
|
||
|
}
|
||
|
|
||
|
return reward;
|
||
|
}
|
||
|
}
|