MATLAB代做|近似动态规划(ADP)的无限时间值函数迭代

功能描述：
联系：highspeedlogic
QQ ：1224848052
微信：HuangL1121
邮箱：1224848052@qq.com
网站：http://www.mat7lab.com/
网站：http://www.hslogic.com/
微信扫一扫：
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
% This is the main program which does initializations and calls all other programs % 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
 
% Close figures, start the clock, define constants 
clear, close, tic 
global nic		% these globals are needed in atraingd 
global nh1c 
global noc 
global nia 
global nh1a 
global noa 
dsp=0;      % display or not 
batch=1;    % batch or non-batch training 
nst=4;      % how many states 
twelve=12;  % use degrees 
N=6000;     % maximum number of steps to try 
gamma=0.9;  % critic discount factor 
 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
% For tracking problems, define ystar here % 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
%ystar=zeros(1,N+3); for t=1:N+3, ystar(t)=sin(2*pi*(t-1)/100); end 
%for t=1:N+3, ystar(t)=(sin(2*pi*(t-1)/50)+sin(2*pi*(t-1)/30)); end 
 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
% Define the critic network; cnet %  
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
disp(' '); disp('Define the critic network'), disp(' '); 
critic_total=1; nc=[5 6 1]; nic=nc(1); nh1c=nc(2); noc=nc(3);  
%Mcp=[1.5; 10*twelve/20; 2.4; twelve/4; 1]; mcp=-Mcp; % x_dot theta_dot x theta u 
Mcp=[twelve/4; 10*twelve/20; 2.4; 1.5; 1]; mcp=-Mcp; % theta theta_dot x x_dot u 
cnet_in=[-ones(nic,1) ones(nic,1)]; 
%cnet=newff(cnet_in, [nh1c noc],{'tansig','purelin'},'ctraingd');  
cnet=newff(cnet_in, [nh1c noc],{'tansig','purelin'},'traingd');  
% ctraingd = traingd with display turned off 
cnet.trainParam.epochs = 200; 	cnet.trainParam.show = 500; 
cnet.trainParam.goal   = 1e-5; 	cnet.trainParam.lr   = 0.15;  
 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
% Define the action network; anet % 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
disp(' '); disp('Define the action network'), disp(' '); 
action_total=200; na=[4 6 1]; nia=na(1); nh1a=na(2); noa=na(3); 
Map=Mcp(1:4); map=-Map; 
anet_in=[-ones(nia,1) ones(nia,1)]; 
anet=newff(anet_in, [nh1a noa],{'tansig','tansig'},'atraingd');  
% atraingd = traingd without updating cnet and with no displays 
anet.trainParam.epochs = action_total; 	anet.trainParam.show = 500; 
anet.trainParam.goal   = 2e-3; 		anet.trainParam.lr   = 0.2; 
if batch==0 
   cnet.trainParam.epochs = 50;		anet.trainParam.lr   = 0.1; % non-batch 
end 
anet.performFcn = 'mse';  
 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
% Call wnet to construct a network for anet-cnet %  
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
wnet 
 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
% Continuously train cnet and anet until good ones are obtained % 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  
for run=1:1000 
 
    % Every run should use a different seed; can also assign a seed 
    rn=8241; nc=clock; nc(1)=0; nc(3)=2*nc(3); nc(4)=5*nc(4); nc(6)=60*nc(6); 
    rn=fix(516*sum(nc)+rn); if rn>1.5e9, rn=fix(rn/5e6); end, rand('seed',rn); 
    %rand('seed',1551111); % set seed 
    rseed=rand('seed'); fprintf('Random seed is %d\n\n',rseed); 
 
    % Initialize cnet and anet % do not use cnet=init(cnet); anet=init(anet); 
    cw=getx(cnet); [m,n]=size(cw); cw=2*(0.2*rand(m,n)-0.1); cnet=setx(cnet,cw);  
    cw=getx(anet); [m,n]=size(cw); cw=2*(0.2*rand(m,n)-0.1); anet=setx(anet,cw); 
    disp('Initialize cnet and anet'); disp(' ');  
    if dsp==1 
        for i=1:27 
            fprintf('---');  
        end 
        fprintf('\n');  
    end 
 
    for trial=1:100   
 
       % Empty all vectors and matrices 
       u=zeros(noa,N); xstar=zeros(nst,N); t=1; fall=0; UU=[]; 
       tarc=1.234; inputct=[]; targetct=[]; 
 
       % Set plant's initial states & calculate the initial action for t=1 
       pout=zeros(nst,1); pout(1)=(rand*twelve*2-twelve)/twelve/2;  
       xstar(1,1)=pout(1); xstar(:,1)=pout; 
       for i=1:27, fprintf('---'); end, fprintf('\n');  
       fprintf('Initial states: %g, %g, %g, %g\n',pout); 
       aino=[pout(4); pout(2); pout(3); pout(1)]; 
       ain=tramnmx(aino,map,Map);  
       ano=0.1*rand-0.05; u1=0*ano+sim(anet,ain); u(:,t)=u1; 
       cino=[ain; u1]; 
 
       % The cycle starts here  
       while fall==0 && t<=N,  
 
          % Apply action to the plant and determine the output 
          pin=pout; uin=u1;  
          pout=plant(pin,uin);  
          if abs(pout(1))>twelve || abs(pout(3))>2.4, fall=1; end  
 
          % Increment time by 1 and store states starting from t=2 
          t=t+1; xstar(:,t)=pout;    
 
          % Calculate and store the action from t=2 
          aino=[pout(4); pout(2); pout(3); pout(1)]; 
          ain=tramnmx(aino,map,Map); 
          ano=0.1*rand-0.05; u1=0*ano+sim(anet,ain); u(:,t)=u1; 
          cin=[ain; u1]; 
 
          % Calculate and store the utility function  
          U=[]; U=[U fall]; UU=[UU fall]; % U for non-batch and UU for batch 
 
          % If it is a success, save the results 
          if t>N && fall==0,  
             eval(['save all-' num2str(run) '-' num2str(trial)]) 
             fprintf('The pole has been balanced for %d steps.\n',t);  
             break 
          end       
 
          %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
          % Cnet non-batch training here 
          %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
 
          % The input to cnet starting from t=1 and t=2 
          inputc=tramnmx(cino,mcp,Mcp);  
          inputct=[inputct inputc];  
          inputc1=tramnmx(cin,mcp,Mcp); cino=cin; 
          Qold=sim(cnet,inputc); 
          Q=sim(cnet,inputc1); targetc=U+gamma*Q;  
 
          % Non-batch cnet training 
          if batch==0, 
              for c_cycle=1:critic_total 
                Q=sim(cnet,inputc1); targetc=U+gamma*Q; 
                [cnet,ctr]=train(cnet,inputc,targetc); % non-batch training 
              end  
              targetct=[targetct targetc]; tarc=[tarc targetc];  
          end 
 
          % Displays etc. 
          if dsp==1 
              if t==2 
                  fprintf(' %2d-%d-%d, f=%7.4f, tht=%8.4f\n',t-1,trial,run,uin,pin(1));  
              end 
              if t<200 
                  fprintf(' %2d-%d-%d, f=%7.4f, tht=%8.4f\n',t,trial,run,u1,pout(1));  
              end 
             %fprintf(', tarc=%6.3f, oldc=%6.3f\n',targetc,Qold); end 
          end %dsp 
 
          if t==fix(t/500)*500 
             fprintf('Trial %d has lasted for %d steps.\n',trial,t);  
          end 
 
       end % while 
 
       %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
       % Training of cnet and anet here; batch mode only 
       %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 
 
       % Training only if failure and small norm 
       if fall~=0   
 
          fprintf('Trial %d was %d steps (in run %d).\n',trial,t,run); 
 
          % Weight scaling before training to prevent local minima 
          cw=getx(anet); norma=norm(cw,inf);  
          if norma>=0.9 
             norma=norm(cw,1); cw=cw/norma;  
             [m,n]=size(cw); cw=1.5*(0.2*rand(m,n)-0.1);  
             anet=setx(anet,cw); norma=norm(cw,inf); 
             cw=getx(cnet); normc=norm(cw,1); cw=cw/normc; 
             [m,n]=size(cw); cw=1.5*(0.2*rand(m,n)-0.1);  
             cnet=setx(cnet,cw); normc=norm(cw,inf); 
          else 
 
              if batch==1,  
                  % Cnet batch training 
                  inputb=[xstar(4,1:t-1); xstar(2,1:t-1); xstar(3,1:t-1); xstar(1:t-1); ... 
                    u(:,1:t-1)]; 
                  inputc=tramnmx(inputb,mcp,Mcp); 
                  inputb=[xstar(4,2:t); xstar(2,2:t); xstar(3,2:t); xstar(1,2:t); u(:,2:t)]; 
                  inputc1=tramnmx(inputb,mcp,Mcp); 
                  for c_cycle=1:critic_total 
                    Qc=sim(cnet,inputc1); targetc=UU+gamma*Qc; 
                    for ic=1:t-1, targetc(ic)=gamma^(t-ic+1); end 
                    [cnet,ctr]=train(cnet,inputc,targetc); 	 
                  end 
                  tarc=[targetc 0];  
              end 
 
              fprintf('Rescale: anet norm ='); 
              fprintf(' %6.4f, cnet = %6.4f, gamma = %g.\n',norma,normc,gamma);  
 
              % Assign cnet to part of wnet 
              for i=1:nia, wnet.iw{3,nia+i}=cnet.iw{1,1}(:,i); end 
              wnet.lw{3,2}=cnet.iw{1,1}(:,nia+1:nic); wnet.b{3}=cnet.b{1}; 
              wnet.lw{4,3}=cnet.lw{2,1};              wnet.b{4}=cnet.b{2}; 
              % Assign anet to part of wnet 
              for l=1:2, wnet.b{l}=anet.b{l}; end 
              for i=1:nia, wnet.iw{1,i}=anet.iw{1,1}(:,i); end 
              wnet.lw{2,1}=anet.lw{2,1}; 
 
              % Determine the inputs and taregts for anet training   
              xb=[xstar(4,1:t); xstar(2,1:t); xstar(3,1:t); xstar(1,1:t)]; 
              xa=tramnmx(xb,map,Map); inpa=[]; inpa=[inpa [xa; xa]];  
              inputa=mat2cell(inpa,ones(1,2*nia),t); 
              targetb=0*ones(1,t); targeta={targetb};    
 
              [wnet,wtr]=train(wnet,inputa,targeta); 
 
              % Read the anet out from wnet 
              for l=1:2, anet.b{l}=wnet.b{l}; end 
              amat=[]; for i=1:nia, amat=[amat wnet.iw{1,i}]; end 
              anet.iw{1,1}=amat; anet.lw{2,1}=wnet.lw{2,1};  
 
              cw=getx(cnet); normc=norm(cw,inf); 
              cw=getx(anet); norma=norm(cw,inf); 
              fprintf('Trained: anet norm ='); 
              fprintf(' %6.4f, cnet = %6.4f, gamma = %g.\n',norma,normc,gamma);  
 
               % Summary displays here 
               if dsp==1 && fall~=0 
 
                  inputb=[xstar(4,1:t); xstar(2,1:t); xstar(3,1:t); xstar(1,1:t); u(:,1:t)]; 
                  inputc=tramnmx(inputb,mcp,Mcp);  
                  Qc=sim(cnet,inputc); Q=[Q Qc];  
 
                  xb=[xstar(4,1:t); xstar(2,1:t); xstar(3,1:t); xstar(1,1:t)]; 
                  xa=tramnmx(xb,map,Map); inpa=[]; inpa=[inpa [xa; xa]];  
                  inputa=mat2cell(inpa,ones(1,2*nia),t); 
                  new1=sim(wnet,inputa); new2=cell2mat(new1); 
                  neww=new2(2,:);  
 
                  for i=1:t 
                     fprintf(' %2d-%d-%d, f=%4.1f, tht=%5.1f, tarc=%6.3f',... 
                        i,trial,run,u(1,i),xstar(1,i),tarc(i)); 
                     fprintf(', newc=%6.3f, neww=%6.3f\n',Q(i),neww(i)); 
                  end 
               end % dsp and fall 
           end % norma 
       end % fall  
 
       %if fall~=0, fprintf('Trial %d was %d steps.\n',trial,t); end 
 
       if t>N,  
          fprintf('The pole was balanced in trial %d.\n',trial);  
          cw=getx(anet); norma=norm(cw,inf);  
          fprintf(' .... anet norm = %g\n',norma);  
          fprintf(' .... aino = %g, %g, %g, %g\n',aino); 
          fprintf(' .... ain  = %g, %g, %g, %g\n',ain); break 
       end 
 
    end % trial 
 
    if t>N, fprintf('The pole balancing was successful in run %d.\n',run); end 
 
    fprintf('\n'); fprintf('\n'); for i=1:28, fprintf('###'); end, fprintf('\n');  
 
end % run 
 
hr=fix(toc/3600); mint=fix(toc/60)-hr*60; sec=toc-hr*3600-mint*60; 
disp(['   Time used =',int2str(hr),':',int2str(mint),':',int2str(sec)]); 
disp(['   Program finished at :',int2str(fix(clock))]);
首页

业务范围

项目展示

资源共享

业内新闻

合作说明

关于我们

关于我们

业务范围

项目展示

资源共享

联系我们