Gradient descent: Difference between revisions

m
m (→‎{{header|Phix}}: applied the Go updates)
m (→‎{{header|Wren}}: Minor tidy)
 
(22 intermediate revisions by 10 users not shown)
Line 18:
{{Trans|Fortran}}
THe results agree with the Fortran sample and the Julia sample to 6 places.
<langsyntaxhighlight lang="algol68">PROC steepest descent = ( REF[]LONG REAL x, LONG REAL alphap, tolerance )VOID:
BEGIN
LONG REAL alpha := alphap;
Line 89:
print( ( "Testing steepest descent method:", newline ) );
print( ( "The minimum is at x[0] = ", fixed( x[ 0 ], -10, 6 ), ", x[1] = ", fixed( x[ 1 ], -10, 6 ), newline ) )
END</langsyntaxhighlight>
{{out}}
<pre>
Line 99:
{{Trans|ALGOL 68}} which is a {{Trans|Go}} with the gradient function from {{Trans|Fortran}}
The results agree (to 6 places) with the Fortran and Julia samples.
<langsyntaxhighlight lang="algolw">begin
procedure steepestDescent ( long real array x ( * ); long real value alphap, tolerance ) ;
begin
long real array fi ( 0 :: 1 );
long real alpha, g0, g1, delG, b;
alpha := alphap;
g0 := g( x ); % Initial estimate of result. %
Line 111:
delG := 0.0;
for i := 0 until 1 do delG := delG + ( fi( i ) * fi( i ) );
delG := sqrtlongSqrt( delG );
b := alpha / delG;
% Iterate until value is <= tolerance. %
Line 122:
delG := 0;
for i := 0 until 1 do delG := delg + ( fi( i ) * fi( i ) );
delG := sqrtlongSqrt( delG );
if delG > 0 then begin
b := alpha / delG;
Line 135:
end steepestDescent ;
% Provides a rough calculation of gradient g(x). %
procedure gradG ( long real array z, p ( * ) ) ;
begin
long real x, y;
x := p( 0 );
y := p( 1 );
z( 0 ) := 2 * ( x - 1 ) * explongExp( - ( y * y ) )
- 4 * x * explongExp( -2 * ( x * x ) ) * y * ( y + 2 );
z( 1 ) := ( -2 * ( x - 1 ) * ( x - 1 ) * y ) * explongExp( - y * y )
+ explongExp( -2 * x * x ) * ( y + 2 )
+ explongExp( -2 * x * x ) * y
end gradG ;
% Function for which minimum is to be found. %
long real procedure g ( long real array x ( * ) ) ;
( x( 0 ) - 1 ) * ( x( 0 ) - 1 ) * longExp( - x( 1 ) * x( 1 ) )
+ x( 1 ) * ( x( 01 ) + 2 ) * longExp( - 12 * x( 0 ) * x( 0 ) )
* exp( - x( 1 ) * x( 1 ) ) + x( 1 ) * ( x( 1 ) + 2 )
* exp( - 2 * x( 0 ) * x( 0 ) )
;
begin
long real alpha, tolerance;
long real array x ( 0 :: 1 );
x( 0 ) := 0.1; % Initial guess of location of minimum. %
x( 1 ) := -1;
Line 165 ⟶ 163:
write( "The minimum is at x(0) = ", x( 0 ), ", x(1) = ", x( 1 ) )
end
end.</langsyntaxhighlight>
{{out}}
<pre>
Line 171 ⟶ 169:
The minimum is at x(0) = 0.1076268, x(1) = -1.2232596
</pre>
 
 
 
 
=={{header|Common Lisp}}==
{{works with|SBCL}}
 
<syntaxhighlight lang="Lisp">
(defparameter *epsilon* (sqrt 0.0000001d0))
 
(defun myfun (x y)
(+ (* (- x 1)
(- x 1)
(exp (- (expt y 2))))
(* y (+ 2 y)
(exp (- (* 2 (expt x 2)))))))
 
(defun xd (x y)
(/ (- (myfun (* x (+ 1 *epsilon*)) y)
(myfun (* x (- 1 *epsilon*)) y))
(* 2 x *epsilon*)))
 
(defun yd (x y)
(/ (- (myfun x (* y (+ 1 *epsilon*)))
(myfun x (* y (- 1 *epsilon*))))
(* 2 y *epsilon*)))
 
(defun gd ()
(let ((x 0.1d0)
(y -1.0d0))
(dotimes (i 31)
(setf x (- x (* 0.3d0 (xd x y))))
(setf y (- y (* 0.3d0 (yd x y )))))
(list x y)))
 
 
 
</syntaxhighlight>
{{out}}
<pre>
(0.10762684380416455 -1.2232596637047382)
</pre>
 
 
=={{header|Forth}}==
{{works with|SwiftForth}}
{{libheader|fpmath}}
 
 
Approach with partial differential equations GD1, GD2
 
<syntaxhighlight lang="forth">
 
0e fvalue px
0e fvalue py
fvariable x 0.1e x F!
fvariable y -1e y F!
 
: ^2 FDUP F* ;
 
\ gradient for x, analytical function
 
: GD1 ( F: x y -- dfxy)
to py to px
2e px 1e F- F* py ^2 FNEGATE FEXP F*
-4e px F* py F* py 2e F+ F* px ^2 -2e F* FEXP F*
F+
;
 
\ gradient for y, analytical function
 
: GD2 ( F: x y -- dfxy )
to py to px
px ^2 -2e F* FEXP FDUP
py F* FSWAP
py 2e F+ F* F+
py ^2 FNEGATE FEXP py F*
px 1e F- ^2 F*
-2e F*
F+
;
 
\ gradient descent
: GD ( x y n -- ) \ gradient descent, initial guess and number of iterations
FOVER FOVER y F! x F!
0 do
FOVER FOVER
GD1 0.3e F* x F@ FSWAP F- x F!
GD2 0.3e F* y F@ FSWAP F- y F!
x F@ y F@ FOVER FOVER FSWAP F. F. CR
loop
;
 
</syntaxhighlight>
 
Approach with main function and numerical gradients xd xy:
 
<syntaxhighlight lang="forth">
 
0e fvalue px
0e fvalue py
fvariable x 0.1e x F!
fvariable y -1e y F!
 
1e-6 fvalue EPSILON
 
: fep EPSILON FSQRT ;
 
 
: MYFUN ( x y -- fxy )
FDUP FROT FDUP ( y y x x )
( x ) 1e F- FDUP F* ( y y x a )
FROT ( y x a y )
( y ) FDUP F* FNEGATE FEXP F* ( y x b )
FROT ( x b y )
( y ) FDUP 2e F+ F* ( x b c )
FROT ( b c x )
( x ) FDUP F* -2e F* FEXP F*
F+
;
 
 
: xd ( x y )
to py to px
px 1e fep F+ F* py myfun px 1e fep F- F* py myfun F- 2e px F* fep F* F/
;
 
: xy ( x y )
to py to px
py 1e fep F+ F* px FSWAP myfun py 1e fep F- F* px FSWAP myfun F- 2e py F* fep F* F/
;
 
 
: GDB ( x y n -- ) \ gradient descent, initial guess and number of iterations
CR FOVER FOVER y F! x F!
0 do
FOVER FOVER
xd 0.3e F* x F@ FSWAP F- x F!
xy 0.3e F* y F@ FSWAP F- y F!
x F@ y F@ FOVER FOVER FSWAP F. F. CR
loop
;
</syntaxhighlight>
 
{{out}}
<pre>
 
0.1e -1e 20 GD
0.1810310 -1.1787894
0.1065262 -1.1965306
0.1144636 -1.2181779
0.1075002 -1.2206140
0.1082816 -1.2227594
0.1076166 -1.2230041
0.1076894 -1.2232108
0.1076261 -1.2232350
0.1076328 -1.2232549
0.1076267 -1.2232573
0.1076274 -1.2232592
0.1076268 -1.2232594
0.1076269 -1.2232596
0.1076268 -1.2232596
0.1076268 -1.2232597
0.1076268 -1.2232597
0.1076268 -1.2232597
0.1076268 -1.2232597
0.1076268 -1.2232597
0.1076268 -1.2232597
 
0.1e -1e 20 GDB
0.1810310 -1.1787893
0.1065262 -1.1965305
0.1144636 -1.2181779
0.1075002 -1.2206140
0.1082816 -1.2227594
0.1076166 -1.2230041
0.1076894 -1.2232108
0.1076261 -1.2232350
0.1076328 -1.2232549
0.1076268 -1.2232573
0.1076274 -1.2232592
0.1076268 -1.2232594
0.1076269 -1.2232596
0.1076268 -1.2232596
0.1076268 -1.2232597
0.1076268 -1.2232597
0.1076268 -1.2232597
0.1076268 -1.2232597
0.1076268 -1.2232597
0.1076268 -1.2232597
</pre>
 
 
 
 
=={{header|Fortran}}==
Line 194 ⟶ 388:
(-2*(x-1)**2*y*exp(-y**2))+exp(-2*x**2)*(y+2)+exp(-2*x**2)*y
(%o7) done
The optimization subroutine GD sets the reverse communication variable IFLAG. This allows the evaluation of the function and the gradient to be done separately.
<langsyntaxhighlight Fortranlang="fortran"> SUBROUTINE EVALFG (N, X, F, G)
IMPLICIT NONE
INTEGER N
REALDOUBLE PRECISION X(N), F, G(N)
F = (X(1) - 1.D0)**2 * EXP(-X(2)**2) +
$ X(2) * (X(2) + 2.D0) * EXP(-2.D0 * X(1)**2)
G(1) = 2.D0 * (X(1) - 1.D0) * EXP(-X(2)**2) - 4.D0 * X(1) *
$ EXP(-2.D0 * X(1)**2) * X(2) * (X(2) + 2.D0)
G(2) = (-2.D0 * (X(1) - 1.D0)**2 * X(2) * EXP(-X(2)**2)) +
$ EXP(-2.D0 * X(1)**2) * (X(2) + 2.D0) +
$ EXP(-2.D0 * X(1)**2) * X(2)
RETURN
END
Line 215 ⟶ 409:
*___Name______Type________In/Out___Description_________________________
* N Integer In Number of Variables.
* X(N) Real Double Both Variables
* G(N) Real Double Both Gradient
* TOL Real Double In Relative convergence tolerance
* IFLAG Integer BothOut Reverse Communication Flag
* 0 on inputoutput: begin0 done
* 0 on output: done 1 compute G and call again
* 1 compute G
*-----------------------------------------------------------------------
SUBROUTINE GD (N, X, G, TOL, IFLAG)
IMPLICIT NONE
INTEGER N, IFLAG
REALDOUBLE PRECISION X(N), G(N), TOL
REALDOUBLE PRECISION ETA
PARAMETER (ETA = 0.33D0) ! Learning rate
INTEGER I
REALDOUBLE PRECISION GNORM ! norm of gradient
 
IF (IFLAG .EQ. 1) GO TO 20
 
10 DO I = 1, N ! take step
X(I) = X(I) - ETA * G(I)
END DO
IFLAG = 1
RETURN ! let main program evaluate G
 
20 GNORM = 0. D0 ! convergence test
DO I = 1, N
GNORM = GNORM + G(I)**2
Line 249 ⟶ 434:
RETURN ! success
END IF
 
GO TO 10
DO I = 1, N ! take step
X(I) = X(I) - ETA * G(I)
END DO
IFLAG = 1
RETURN ! let main program evaluate G
END ! of gd
 
Line 257 ⟶ 447:
PARAMETER (N = 2)
INTEGER ITER, J, IFLAG
REALDOUBLE PRECISION X(N), F, G(N), TOL
 
X(1) = -0.1 1D0 ! initial values
X(2) = -1.00D0
TOL = 1.ED-615
CALL EVALFG (N, X, F, G)
IFLAG = 0
DO J = 1, 1 000 000
Line 274 ⟶ 465:
STOP 'too many iterations!'
 
50 PRINT '(A, I7, A, F10F19.615, A, F10F19.615, A, F10F19.615)',
$ 'After ', ITER, ' steps, found minimum at x=',
$ X(1), ' y=', X(2), ' of f=', F
STOP 'program complete'
END
</syntaxhighlight>
</lang>
{{out}}
<pre>After 1431 steps, found minimum at x= 0.107627107626843548372 y= -1.223260223259663839920 of f= -0.750063750063420551493
STOP program complete
</pre>
Line 289 ⟶ 480:
 
However, since it was originally written, I've substituted Fortran's gradient function for the original one (see Talk page) which now gives results which agree (to 6 decimal places) with those of the Fortran, Julia, Algol 68 and Algol W solutions. As a number of other solutions are based on this one, I suggest their authors update them accordingly.
<langsyntaxhighlight lang="go">package main
 
import (
Line 366 ⟶ 557:
fmt.Println("Testing steepest descent method:")
fmt.Printf("The minimum is at x = %f, y = %f for which f(x, y) = %f.\n", x[0], x[1], g(x))
}</langsyntaxhighlight>
 
{{out}}
Line 372 ⟶ 563:
Testing steepest descent method:
The minimum is at x = 0.107627, y = -1.223260 for which f(x, y) = -0.750063.
</pre>
 
=={{header|J}}==
{{works with|Jsoftware|>= 9.0.1}}
{{libheader|math/calculus}}
 
In earlier versions without calculus library, D. can be used instead of pderiv
 
<syntaxhighlight lang="J">
 
 
load 'math/calculus'
coinsert 'jcalculus'
 
NB. initial guess
 
go=: 0.1 _1
 
NB. function
 
func=: monad define
'xp yp' =. y
((1-xp)*(1-xp) * ] ^-(yp)^2) + yp*(yp+2)* ] ^ _2 * xp^2 NB. evaluates from right to left without precedence
)
 
NB. gradient descent
 
shortygd =: monad define
go=.y
go=. go - 0.03 * func pderiv 1 ] go NB. use D. instead of pderiv for earlier versions
)
 
NB. use:
 
shortygd ^:_ go
 
</syntaxhighlight>
{{out}}
<pre>
0.107627 _1.22326
</pre>
 
 
 
 
=={{header|jq}}==
'''Adapted from [[#Go|Go]]'''
 
'''Works with jq and gojq, the C and Go implementations of jq'''
 
The function, g, and its gradient, gradG, are defined as 0-arity filters so that they
can be passed as formal parameters to the `steepestDescent` function.
<syntaxhighlight lang=jq>
def sq: .*.;
 
def ss(stream): reduce stream as $x (0; . + ($x|sq));
 
# Function for which minimum is to be found: input is a 2-array
def g:
. as $x
| (($x[0]-1)|sq) *
(( - ($x[1]|sq))|exp) + $x[1]*($x[1]+2) *
((-2*($x[0]|sq))|exp);
 
# Provide a rough calculation of gradient g(p):
def gradG:
. as [$x, $y]
| ($x*$x) as $x2
| [2*($x-1)*(-($y|sq)|exp) - 4 * $x * (-2*($x|sq)|exp) * $y*($y+2),
- 2*($x-1)*($x-1)*$y*((-$y*$y)|exp)
+ ((-2*$x2)|exp)*($y+2) + ((-2*$x2)|exp)*$y] ;
 
def steepestDescent(g; gradG; $x; $alpha; $tolerance):
($x|length) as $n
| {g0 : (x|g), # Initial estimate of result
alpha: $alpha,
fi: ($x|gradG), # Calculate initial gradient
x: $x,
iterations: 0
}
 
# Calculate initial norm:
| .delG = (ss( .fi[] ) | sqrt )
| .b = .alpha/.delG
 
# Iterate until value is <= $tolerance:
| until (.delG <= $tolerance;
.iterations += 1
# Calculate next value:
| reduce range(0; $n) as $i(.;
.x[$i] += (- .b * .fi[$i]) )
 
# Calculate next gradient:
| .fi = (.x|gradG)
 
# Calculate next norm:
| .delG = (ss( .fi[] ) | sqrt)
| .b = .alpha/.delG
 
# Calculate next value:
| .g1 = (.x|g)
 
# Adjust parameter:
| if .g1 > .g0
then .alpha /= 2
else .g0 = .g1
end
) ;
 
def tolerance: 0.0000006;
def alpha: 0.1;
def x: [0.1, -1]; # Initial guess of location of minimum.
 
def task:
steepestDescent(g; gradG; x; alpha; tolerance)
| "Testing the steepest descent method:",
"After \(.iterations) iterations,",
"the minimum is at x = \(.x[0]), y = \(.x[1]) for which f(x, y) = \(.x|g)." ;
 
task
</syntaxhighlight>
{{output}}
<pre>
Testing the steepest descent method:
After 24 iterations,
the minimum is at x = 0.10762682432948058, y = -1.2232596548816101 for which f(x, y) = -0.7500634205514924.
</pre>
 
=={{header|Julia}}==
<langsyntaxhighlight lang="julia">using Optim, Base.MathConstants
 
f(x) = (x[1] - 1) * (x[1] - 1) * e^(-x[2]^2) + x[2] * (x[2] + 2) * e^(-2 * x[1]^2)
 
println(optimize(f, [0.1, -1.0], GradientDescent()))
</langsyntaxhighlight>{{out}}
<pre>
Results of Optimization Algorithm
Line 400 ⟶ 717:
* Gradient Calls: 35
</pre>
 
=={{header|Nim}}==
{{trans|Go}}
The gradient function has been rewritten to remove a term in the partial derivative with respect to y (two terms instead of three). This doesn’t change the result which agrees with those of Go, Fortran, Julia, etc.
 
<syntaxhighlight lang="nim">import math, strformat
 
 
func steepDescent(g: proc(x: openArray[float]): float;
gradG: proc(p: openArray[float]): seq[float];
x: var openArray[float]; alpha, tolerance: float) =
let n = x.len
var alpha = alpha
var g0 = g(x) # Initial estimate of result.
 
# Calculate initial gradient.
var fi = gradG(x)
 
# Calculate initial norm.
var delG = 0.0
for i in 0..<n:
delG += fi[i] * fi[i]
delG = sqrt(delG)
var b = alpha / delG
 
# Iterate until value is <= tolerance.
while delG > tolerance:
# Calculate next value.
for i in 0..<n:
x[i] -= b * fi[i]
 
# Calculate next gradient.
fi = gradG(x)
 
# Calculate next norm.
delG = 0
for i in 0..<n:
delG += fi[i] * fi[i]
delG = sqrt(delG)
b = alpha / delG
 
# Calculate next value.
let g1 = g(x)
 
# Adjust parameter.
if g1 > g0: alpha *= 0.5
else: g0 = g1
 
 
when isMainModule:
 
func g(x: openArray[float]): float =
## Function for which minimum is to be found.
(x[0]-1) * (x[0]-1) * exp(-x[1]*x[1]) + x[1] * (x[1]+2) * exp(-2*x[0]*x[0])
 
func gradG(p: openArray[float]): seq[float] =
## Provides a rough calculation of gradient g(p).
result = newSeq[float](p.len)
let x = p[0]
let y = p[1]
result[0] = 2 * (x-1) * exp(-y*y) - 4 * x * exp(-2*x*x) * y * (y+2)
result[1] = -2 * (x-1) * (x-1) * y * exp(-y*y) + 2 * (y+1) * exp(-2*x*x)
 
const
Tolerance = 0.0000006
Alpha = 0.1
 
var x = [0.1, -1.0] # Initial guess of location of minimum.
 
steepDescent(g, gradG, x, Alpha, Tolerance)
echo "Testing steepest descent method:"
echo &"The minimum is at x = {x[0]:.12f}, y = {x[1]:.12f} for which f(x, y) = {g(x):.12f}"</syntaxhighlight>
 
{{out}}
<pre>Testing steepest descent method:
The minimum is at x = 0.107626824329, y = -1.223259654882 for which f(x, y) = -0.750063420551</pre>
 
=={{header|Perl}}==
Calculate with <code>bignum</code> for numerical stability.
{{trans|Raku}}
<langsyntaxhighlight lang="perl">use strict;
use warnings;
use bignum;
Line 457 ⟶ 850:
my @x = <0.1 -1>; # Initial guess of location of minimum.
 
printf "The minimum is at x[0] = %.6f, x[1] = %.6f", steepestDescent($alpha, $tolerance, @x);</langsyntaxhighlight>
{{out}}
<pre>The minimum is at x[0] = 0.107653, x[1] = -1.223370</pre>
Line 463 ⟶ 856:
=={{header|Phix}}==
{{trans|Go}}
<!--<syntaxhighlight lang="phix">(phixonline)-->
<lang Phix>-- Function for which minimum is to be found.
<span style="color: #008080;">with</span> <span style="color: #008080;">javascript_semantics</span>
function g(sequence x)
<span style="color: #000080;font-style:italic;">-- Function for which minimum is to be found.</span>
atom {x0,x1} = x
<span style="color: #008080;">function</span> <span style="color: #000000;">g</span><span style="color: #0000FF;">(</span><span style="color: #004080;">sequence</span> <span style="color: #000000;">x</span><span style="color: #0000FF;">)</span>
return (x0-1)*(x0-1)*exp(-x1*x1) +
<span style="color: #004080;">atom</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">x0</span><span style="color: #0000FF;">,</span><span style="color: #000000;">x1</span><span style="color: #0000FF;">}</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">x</span>
x1*(x1+2)*exp(-2*x0*x0)
<span style="color: #008080;">return</span> <span style="color: #0000FF;">(</span><span style="color: #000000;">x0</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">)*(</span><span style="color: #000000;">x0</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">)*</span><span style="color: #7060A8;">exp</span><span style="color: #0000FF;">(-</span><span style="color: #000000;">x1</span><span style="color: #0000FF;">*</span><span style="color: #000000;">x1</span><span style="color: #0000FF;">)</span> <span style="color: #0000FF;">+</span>
end function
<span style="color: #000000;">x1</span><span style="color: #0000FF;">*(</span><span style="color: #000000;">x1</span><span style="color: #0000FF;">+</span><span style="color: #000000;">2</span><span style="color: #0000FF;">)*</span><span style="color: #7060A8;">exp</span><span style="color: #0000FF;">(-</span><span style="color: #000000;">2</span><span style="color: #0000FF;">*</span><span style="color: #000000;">x0</span><span style="color: #0000FF;">*</span><span style="color: #000000;">x0</span><span style="color: #0000FF;">)</span>
 
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
-- Provides a rough calculation of gradient g(x).
function gradG(sequence p)
<span style="color: #000080;font-style:italic;">-- Provides a rough calculation of gradient g(x).</span>
atom {x,y} = p
<span style="color: #008080;">function</span> <span style="color: #000000;">gradG</span><span style="color: #0000FF;">(</span><span style="color: #004080;">sequence</span> <span style="color: #000000;">p</span><span style="color: #0000FF;">)</span>
p[1] = 2*(x-1)*exp(-y*y) - 4*x*exp(-2*x*x)*y*(y+2)
<span style="color: #004080;">atom</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">x</span><span style="color: #0000FF;">,</span><span style="color: #000000;">y</span><span style="color: #0000FF;">}</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">p</span><span style="color: #0000FF;">,</span>
p[2] = -2*(x-1)*(x-1)*y*exp(-y*y) + exp(-2*x*x)*(y+2) + exp(-2*x*x)*y
<span style="color: #000000;">xm1</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">x</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span>
return p
<span style="color: #000000;">emyy</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">exp</span><span style="color: #0000FF;">(-</span><span style="color: #000000;">y</span><span style="color: #0000FF;">*</span><span style="color: #000000;">y</span><span style="color: #0000FF;">),</span>
end function
<span style="color: #000000;">em2xx</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">exp</span><span style="color: #0000FF;">(-</span><span style="color: #000000;">2</span><span style="color: #0000FF;">*</span><span style="color: #000000;">x</span><span style="color: #0000FF;">*</span><span style="color: #000000;">x</span><span style="color: #0000FF;">),</span>
<span style="color: #000000;">xm12emyy</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">xm1</span><span style="color: #0000FF;">*</span><span style="color: #000000;">2</span><span style="color: #0000FF;">*</span><span style="color: #000000;">emyy</span>
function steepestDescent(sequence x, atom alpha, tolerance)
<span style="color: #000000;">p</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">xm12emyy</span> <span style="color: #0000FF;">-</span> <span style="color: #000000;">4</span><span style="color: #0000FF;">*</span><span style="color: #000000;">x</span><span style="color: #0000FF;">*</span><span style="color: #000000;">em2xx</span><span style="color: #0000FF;">*</span><span style="color: #000000;">y</span><span style="color: #0000FF;">*(</span><span style="color: #000000;">y</span><span style="color: #0000FF;">+</span><span style="color: #000000;">2</span><span style="color: #0000FF;">),</span>
integer n = length(x)
<span style="color: #0000FF;">-</span><span style="color: #000000;">xm12emyy</span><span style="color: #0000FF;">*</span><span style="color: #000000;">xm1</span><span style="color: #0000FF;">*</span><span style="color: #000000;">y</span> <span style="color: #0000FF;">+</span> <span style="color: #000000;">em2xx</span><span style="color: #0000FF;">*(</span><span style="color: #000000;">2</span><span style="color: #0000FF;">*</span><span style="color: #000000;">y</span><span style="color: #0000FF;">+</span><span style="color: #000000;">2</span><span style="color: #0000FF;">)}</span>
atom g0 = g(x) -- Initial estimate of result.
<span style="color: #008080;">return</span> <span style="color: #000000;">p</span>
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
-- Calculate initial gradient.
sequence fi = gradG(x)
<span style="color: #008080;">function</span> <span style="color: #000000;">steepestDescent</span><span style="color: #0000FF;">(</span><span style="color: #004080;">sequence</span> <span style="color: #000000;">x</span><span style="color: #0000FF;">,</span> <span style="color: #004080;">atom</span> <span style="color: #000000;">alpha</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">tolerance</span><span style="color: #0000FF;">)</span>
<span style="color: #004080;">sequence</span> <span style="color: #000000;">fi</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">gradG</span><span style="color: #0000FF;">(</span><span style="color: #000000;">x</span><span style="color: #0000FF;">)</span> <span style="color: #000080;font-style:italic;">-- Calculate initial gradient</span>
-- Calculate initial norm.
<span style="color: #004080;">atom</span> <span style="color: #000000;">g0</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">g</span><span style="color: #0000FF;">(</span><span style="color: #000000;">x</span><span style="color: #0000FF;">),</span> <span style="color: #000080;font-style:italic;">-- Initial estimate of result</span>
atom delG = sqrt(sum(sq_mul(fi,fi))),
<span style="color: #000000;">delG</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">sqrt</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">sum</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">sq_mul</span><span style="color: #0000FF;">(</span><span style="color: #000000;">fi</span><span style="color: #0000FF;">,</span><span style="color: #000000;">fi</span><span style="color: #0000FF;">))),</span> <span style="color: #000080;font-style:italic;">-- & norm</span>
b = alpha / delG
<span style="color: #000000;">b</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">alpha</span> <span style="color: #0000FF;">/</span> <span style="color: #000000;">delG</span>
<span style="color: #004080;">integer</span> <span style="color: #000000;">icount</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span> <span style="color: #000080;font-style:italic;">-- iteration limitor/sanity</span>
-- Iterate until value is <= tolerance.
<span style="color: #008080;">while</span> <span style="color: #000000;">delG</span><span style="color: #0000FF;">></span><span style="color: #000000;">tolerance</span> <span style="color: #008080;">do</span> <span style="color: #000080;font-style:italic;">-- Iterate until &lt;= tolerance</span>
while delG>tolerance do
<span style="color: #000000;">x</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">sq_sub</span><span style="color: #0000FF;">(</span><span style="color: #000000;">x</span><span style="color: #0000FF;">,</span><span style="color: #7060A8;">sq_mul</span><span style="color: #0000FF;">(</span><span style="color: #000000;">b</span><span style="color: #0000FF;">,</span><span style="color: #000000;">fi</span><span style="color: #0000FF;">))</span> <span style="color: #000080;font-style:italic;">-- Calculate next value</span>
-- Calculate next value.
<span style="color: #000000;">fi</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">gradG</span><span style="color: #0000FF;">(</span><span style="color: #000000;">x</span><span style="color: #0000FF;">)</span> <span style="color: #000080;font-style:italic;">-- next gradient</span>
x = sq_sub(x,sq_mul(b,fi))
<span style="color: #000000;">delG</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">sqrt</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">sum</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">sq_mul</span><span style="color: #0000FF;">(</span><span style="color: #000000;">fi</span><span style="color: #0000FF;">,</span><span style="color: #000000;">fi</span><span style="color: #0000FF;">)))</span> <span style="color: #000080;font-style:italic;">-- next norm</span>
<span style="color: #000000;">b</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">alpha</span> <span style="color: #0000FF;">/</span> <span style="color: #000000;">delG</span>
-- Calculate next gradient.
<span style="color: #004080;">atom</span> <span style="color: #000000;">g1</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">g</span><span style="color: #0000FF;">(</span><span style="color: #000000;">x</span><span style="color: #0000FF;">)</span> <span style="color: #000080;font-style:italic;">-- next value</span>
fi = gradG(x)
<span style="color: #008080;">if</span> <span style="color: #000000;">g1</span><span style="color: #0000FF;">></span><span style="color: #000000;">g0</span> <span style="color: #008080;">then</span>
<span style="color: #000000;">alpha</span> <span style="color: #0000FF;">/=</span> <span style="color: #000000;">2</span> <span style="color: #000080;font-style:italic;">-- Adjust parameter</span>
-- Calculate next norm.
<span style="color: #008080;">else</span>
delG = sqrt(sum(sq_mul(fi,fi)))
<span style="color: #000000;">g0</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">g1</span>
b = alpha / delG
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #000000;">icount</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
-- Calculate next value.
<span style="color: #7060A8;">assert</span><span style="color: #0000FF;">(</span><span style="color: #000000;">icount</span><span style="color: #0000FF;"><</span><span style="color: #000000;">100</span><span style="color: #0000FF;">)</span> <span style="color: #000080;font-style:italic;">-- (increase if/when necessary)</span>
atom g1 = g(x)
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
<span style="color: #008080;">return</span> <span style="color: #000000;">x</span>
-- Adjust parameter.
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
if g1>g0 then
alpha /= 2
<span style="color: #008080;">constant</span> <span style="color: #000000;">alpha</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0.1</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">tolerance</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0.00000000000001</span>
else
<span style="color: #004080;">sequence</span> <span style="color: #000000;">x</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">steepestDescent</span><span style="color: #0000FF;">({</span><span style="color: #000000;">0.1</span><span style="color: #0000FF;">,-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">},</span> <span style="color: #000000;">alpha</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">tolerance</span><span style="color: #0000FF;">)</span>
g0 = g1
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"Testing steepest descent method:\n"</span><span style="color: #0000FF;">)</span>
end if
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"The minimum is at x = %.13f, y = %.13f for which f(x, y) = %.15f\n"</span><span style="color: #0000FF;">,</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">x</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">],</span> <span style="color: #000000;">x</span><span style="color: #0000FF;">[</span><span style="color: #000000;">2</span><span style="color: #0000FF;">],</span> <span style="color: #000000;">g</span><span style="color: #0000FF;">(</span><span style="color: #000000;">x</span><span style="color: #0000FF;">)})</span>
end while
<!--</syntaxhighlight>-->
return x
end function
constant tolerance = 0.0000006, alpha = 0.1
sequence x = steepestDescent({0.1,-1}, alpha, tolerance)
printf(1,"Testing steepest descent method:\n")
printf(1,"The minimum is at x = %.13f, y = %.13f for which f(x, y) = %.16f\n", {x[1], x[2], g(x)})</lang>
{{out}}
Results match Fortran, most others to 6 or 7dp<br>
Results now match (at least) Go, Fortran, Julia, and Wren [to >=6dp]. Results on 32/64 bit agree to 13dp, which I therefore choose to show in full here.
Some slightly unexpected/unusual (but I think acceptable) variance was noted when playing with different tolerances, be warned.<br>
Results on 32/64 bit Phix agree to 13dp, which I therefore choose to show in full here (but otherwise would not really trust).
<pre>
Testing steepest descent method:
The minimum is at x = 0.10762682432951076268435484, y = -1.22325965488162232596638399 for which f(x, y) = -0.7500634205514924750063420551493
</pre>
 
Line 533 ⟶ 922:
I could have used ∇ and Δ in the variable names, but it looked too confusing, so I've gone with <var>grad-</var> and <var>del-</var>
 
<langsyntaxhighlight lang="racket">#lang racket
 
(define (apply-vector f v)
Line 580 ⟶ 969:
(module+ main
(Gradient-descent))
</syntaxhighlight>
</lang>
 
{{out}}
Line 588 ⟶ 977:
(formerly Perl 6)
{{trans|Go}}
<syntaxhighlight lang="raku" line># 20200904 Updated Raku programming solution
<lang perl6>use v6.d;
 
sub steepestDescent(@x, $alpha is copy, $h is copy) {
 
my $g0 = g( |@x) ; # Initial estimate of result.
 
my @fi = gradG( |@x, $h, $g0) ; # Calculate initial gradient
 
my $b = $alpha / my $delG = sqrt ( sum @fi»² ) ; # Calculate initial norm.
my $b = $alpha / sqrt(my $delG = sum(map {$_²}, @fi));
 
while ( $delG > $h ) { # Iterate until value is <= tolerance.
 
for @fi.kv -> $i, $j { @x[$i] «-»= $b «*« $j }@fi; # Calculate next value.
# Calculate next gradient and next value
@fi = gradG(@x, $h /= 2, my $g1 = g(@x));
 
$b@fi = $alpha / sqrt($delG = sum(map {$_²},gradG |@fi)x ); # Calculate next norm.gradient and next value
 
$b = $alpha / ($delG = sqrt( sum @fi»² )); # Calculate next norm.
 
my $g1 = g |@x ;
 
$g1 > $g0 ?? ( $alpha /= 2 ) !! ( $g0 = $g1 ) # Adjust parameter.
Line 612 ⟶ 1,001:
}
 
sub gradG(@\x is copy, $h, $g0\y) { # gives a rough calculation of gradient g(x).
2*(x-1)*exp(-y²) - 4*x*exp(-2*x²)*y*(y+2) , -2*(x-1)²*y*exp(-y²) + exp(-2*x²)*(2*y+2)
return map { $_ += $h ; (g(@x) - $g0) / $h }, @x
}
 
# Function for which minimum is to be found.
sub g(\x,\y) { (x[0]-1)² * exp(-x[1]y²) + x[1]y*(x[1]y+2) * exp(-2*x[0]²) }
 
 
my $tolerance = 0.0000006 ; my $alpha = 0.1;
Line 628 ⟶ 1,016:
say "Testing steepest descent method:";
say "The minimum is at x[0] = ", @x[0], ", x[1] = ", @x[1];
</syntaxhighlight>
</lang>
{{out}}
<pre>Testing steepest descent method:
The minimum is at x[0] = 0.1074345079465696410762682432947938, x[1] = -1.22339567117745432232596548816097
</pre>
 
=={{header|REXX}}==
The &nbsp; ''tolerance'' &nbsp; can be much smaller; &nbsp; a tolerance of &nbsp; '''1e-200''' &nbsp; was tested. &nbsp; It works, but causes the program to execute a bit slower, but still sub-second execution time.
<lang rexx>/*REXX pgm searches for minimum values of the bi─variate function (AKA steepest descent)*/
<syntaxhighlight lang="rexx">/*REXX pgm searches for minimum values of the bi─variate function (AKA steepest descent)*/
numeric digits length( e() ) % 2
numeric digits (length( e() ) - length(.) ) % 2 /*use half of number decimal digs in E.*/
tolerance= 0.0000006
tolerance= 1e-30 /*use a much smaller tolerance for REXX*/
alpha= 0.1
x.0= 0.1; x.1= -1; n= 2
say center(' testing for the steepest descent method ', 79, "═")
call steepestD /* ┌──◄── # digs past dec. point ─►───┐*/
call steepestD
say 'The minimum is at: x[0]=' format(x.0,,49) " x[1]=" format(x.1,,49)
exit /*stick a fork in it, we're all done. */
/*──────────────────────────────────────────────────────────────────────────────────────*/
e: return 2.718281828459045235360287471352662497757247093699959574966967627724
gx: return (x.0-1)**2 * exp(- (x.1**2) ) + x.1 * (x.1 + 2) * exp(-2 * x.0**2)
gyg: return (yx.0-1)**2 * exp(- (yx.1**2) ) + yx.1 * (yx.1 + 2) * exp(-2 * yx.0**2)
/*──────────────────────────────────────────────────────────────────────────────────────*/
gradG: x= x.0; do jy=0 x.1 for n; y.j= x.j/*define X and Y from the X array. /*copy X ──► Y */
xm= x-1; eny2= exp(-y*y); enx2= exp(-2 * x**2) end /*jshortcuts.*/
z.0= 2 * xm * eny2 - 4 * x * enx2 * y * (y+2)
g0= gx()
z.1= -2 * xm**2 * y * eny2 + enx2 do j=0 for n; * (y+2) + enx2 * y.j= y.j + h
return /*a rough calculation of the zgradient.j= (gy() - g0) */ h
end /*j*/
return
/*──────────────────────────────────────────────────────────────────────────────────────*/
steepestD: g0= g() /*the initial estimate of the result. */
steepestD: h= tolerance
call gradG /*calculate the initial gradient. */
g0= gx()
delG= sqrt(z.0**2 + z.1**2) /* " " " norm. */
call gradG
delG= 0
do j=0 for n; delG= delG + z.j**2
end /*j*/
delG= sqrt(delG)
b= alpha / delG
 
do while delG>tolerance
x.0= x.0 - b * z.0; do j=0 for n; x.j1= x.j1 - b * z.j1
end /*j*/
h= h / 2
call gradG
delG= sqrt(z.0**2 + z.1**2); if delG=0 then return
do j=0 for n; delG= delG + z.j**2
end /*j*/
delG= sqrt(delG)
if delG=0 then return
b= alpha / delG
g1= gxg() /*find minimum.*/
if g1>g0 then alpha= alpha * .5 /*adjust 2ALPHA.*/
else g0= g1 /* " G0. */
end /*while*/
return
/*──────────────────────────────────────────────────────────────────────────────────────*/
e: e= 2.7182818284590452353602874713526624977572470936999595749669676277240766303 || ,
535475945713821785; return e
/*──────────────────────────────────────────────────────────────────────────────────────*/
exp: procedure; parse arg x; ix= x%1; if abs(x-ix)>.5 then ix= ix + sign(x); x= x - ix
Line 692 ⟶ 1,065:
numeric form; m.=9; parse value format(x,2,1,,0) 'E0' with g "E" _ .; g=g *.5'e'_ %2
do j=0 while h>9; m.j=h; h= h % 2 + 1; end /*j*/
do k=j+5 to 0 by -1; numeric digits m.k; g=(g+x/g)*.5; end /*k*/; return g</langsyntaxhighlight>
{{out|output|text=&nbsp; when using the internal default inputs:}}
<pre>
═══════════════════ testing for the steepest descent method ═══════════════════
The minimum is at: x[0]= 0.1062107626824 x[1]= -1.2264223259655
</pre>
 
=={{header|Scala}}==
{{trans|Go}}
<langsyntaxhighlight lang="scala">object GradientDescent {
 
/** Steepest descent method modifying input values*/
Line 775 ⟶ 1,148:
}
}
</syntaxhighlight>
</lang>
{{out}}
<pre>
Line 787 ⟶ 1,160:
<br><br>
 
<syntaxhighlight lang="typescript">
<lang Typescript>
// Using the steepest-descent method to search
// for minimum values of a multi-variable function
Line 884 ⟶ 1,257:
gradientDescentMain();
 
</syntaxhighlight>
</lang>
{{out}}
<pre>
Line 895 ⟶ 1,268:
* &nbsp; [Linear Regression using Gradient Descent by Adarsh Menon]
<br><br>
<syntaxhighlight lang="typescript">
<lang Typescript>
let data: number[][] =
[[32.5023452694530, 31.70700584656990],
Line 1,043 ⟶ 1,416:
 
gradientDescentMain();
</syntaxhighlight>
</lang>
 
=={{header|Wren}}==
{{trans|Go}}
{{libheader|Wren-math}}
{{libheader|Wren-fmt}}
<langsyntaxhighlight ecmascriptlang="wren">import "./mathfmt" for MathFmt
import "/fmt" for Fmt
 
// Function for which minimum is to be found.
var g = Fn.new { |x|
return (x[0]-1)*(x[0]-1)*
Math.exp(-x[1]*x[1]).exp + x[1]*(x[1]+2)*
Math.exp(-2*x[0]*x[0]).exp
}
 
Line 1,063 ⟶ 1,434:
var x = p[0]
var y = p[1]
return [2*(x-1)*Math.exp(-y*y).exp - 4*x*Math.exp(-2*x*x).exp*y*(y+2),
-2*(x-1)*(x-1)*y*Math.exp(-y*y).exp + Math.exp(-2*x*x).exp*(y+2) + Math.exp(-2*x*x).exp*y]
}
 
Line 1,112 ⟶ 1,483:
steepestDescent.call(x, alpha, tolerance)
System.print("Testing steepest descent method:")
Fmt.print("The minimum is at x = $f, y = $f for which f(x, y) = $f.", x[0], x[1], g.call(x))</langsyntaxhighlight>
 
{{out}}
Line 1,122 ⟶ 1,493:
=={{header|zkl}}==
{{trans|Go}} with tweaked gradG
<langsyntaxhighlight lang="zkl">fcn steepestDescent(f, x,y, alpha, h){
g0:=f(x,y); # Initial estimate of result.
fix,fiy := gradG(f,x,y,h); # Calculate initial gradient
Line 1,141 ⟶ 1,512:
g0:=f(x,y);
return((f(x + h, y) - g0)/h, (f(x, y + h) - g0)/h)
}</langsyntaxhighlight>
<langsyntaxhighlight lang="zkl">fcn f(x,y){ # Function for which minimum is to be found.
(x - 1).pow(2)*(-y.pow(2)).exp() +
y*(y + 2)*(-2.0*x.pow(2)).exp()
Line 1,153 ⟶ 1,524:
println("Testing steepest descent method:");
println("The minimum is at (x,y) = (%f,%f). f(x,y) = %f".fmt(x,y,f(x,y)));</langsyntaxhighlight>
{{out}}
<pre>
9,479

edits