implementation of drecon in unity 2022 lts forked from: https://github.com/joanllobera/marathon-envs
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

99 lines
3.3 KiB

using System;
using System.Collections;
using System.Collections.Generic;
using System.Linq;
using UnityEngine;
using Unity.MLAgents;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Sensors;
public class DeepMindHopperAgent : MarathonAgent
{
public List<float> RewardHackingVector;
public override void OnEpisodeBegin()
{
base.OnEpisodeBegin();
// set to true this to show monitor while training
//Monitor.SetActive(true);
StepRewardFunction = StepRewardHopper101;
TerminateFunction = TerminateOnNonFootHitTerrain;
ObservationsFunction = ObservationsDefault;
BodyParts["pelvis"] = GetComponentsInChildren<Rigidbody>().FirstOrDefault(x => x.name == "torso");
BodyParts["foot"] = GetComponentsInChildren<Rigidbody>().FirstOrDefault(x => x.name == "foot");
SetupBodyParts();
}
void ObservationsDefault(VectorSensor sensor)
{
if (ShowMonitor)
{
}
var pelvis = BodyParts["pelvis"];
Vector3 normalizedVelocity = this.GetNormalizedVelocity(pelvis.velocity);
sensor.AddObservation(normalizedVelocity);
sensor.AddObservation(pelvis.transform.forward); // gyroscope
sensor.AddObservation(pelvis.transform.up);
sensor.AddObservation(SensorIsInTouch);
JointRotations.ForEach(x => sensor.AddObservation(x));
sensor.AddObservation(JointVelocity);
var foot = BodyParts["foot"];
Vector3 normalizedFootPosition = this.GetNormalizedPosition(foot.transform.position);
sensor.AddObservation(normalizedFootPosition.y);
}
float GetRewardOnEpisodeComplete()
{
return FocalPoint.transform.position.x;
}
void UpdateRewardHackingVector()
{
// float uprightBonus = GetForwardBonus("pelvis");
float uprightBonus = GetDirectionBonus("pelvis", Vector3.forward, 1f);
uprightBonus = Mathf.Clamp(uprightBonus, 0f, 1f);
float velocity = Mathf.Clamp(GetNormalizedVelocity("pelvis").x, 0f, 1f);
float position = Mathf.Clamp(GetNormalizedPosition("pelvis").x, 0f, 1f);
float effort = 1f - GetEffortNormalized();
if (RewardHackingVector?.Count == 0)
RewardHackingVector = Enumerable.Range(0, 6).Select(x => 0f).ToList();
RewardHackingVector[0] = velocity;
RewardHackingVector[1] = position;
RewardHackingVector[2] = effort;
RewardHackingVector[3] = uprightBonus;
}
float StepRewardHopper101()
{
UpdateRewardHackingVector();
float uprightBonus = GetDirectionBonus("pelvis", Vector3.forward, 1f);
uprightBonus = Mathf.Clamp(uprightBonus, 0f, 1f);
float velocity = Mathf.Clamp(GetNormalizedVelocity("pelvis").x, 0f, 1f);
// float position = Mathf.Clamp(GetNormalizedPosition("pelvis").x, 0f, 1f);
float effort = 1f - GetEffortNormalized();
uprightBonus *= 0.05f;
velocity *= 0.7f;
if (velocity >= .25f)
effort *= 0.25f;
else
effort *= velocity;
var reward = velocity
+ uprightBonus
+ effort;
if (ShowMonitor)
{
//var hist = new[] {reward, velocity, uprightBonus, effort};
//Monitor.Log("rewardHist", hist, displayType: Monitor.DisplayType.Independent);
}
return reward;
}
}