Gradient descent: Difference between revisions

From Rosetta Code
Content added Content deleted
(→‎{{header|ALGOL 68}}: Use the actual gradient.)
m (→‎{{header|Wren}}: Minor tidy)
 
(28 intermediate revisions by 10 users not shown)
Line 16: Line 16:
{{works with|ALGOL 68G|Any - tested with release 2.8.3.win32}}
{{works with|ALGOL 68G|Any - tested with release 2.8.3.win32}}
{{Trans|Go}} modified to use the actual gradient function -
{{Trans|Go}} modified to use the actual gradient function -
{{Trans|FORTRAN}}
{{Trans|Fortran}}
THe results agree with the Fortran sample and the Julia sample to 6 places.
THe results agree with the Fortran sample and the Julia sample to 6 places.
<lang algol68>PROC steepest descent = ( REF[]LONG REAL x, LONG REAL alphap, tolerance )VOID:
<syntaxhighlight lang="algol68">PROC steepest descent = ( REF[]LONG REAL x, LONG REAL alphap, tolerance )VOID:
BEGIN
BEGIN
LONG REAL alpha := alphap;
LONG REAL alpha := alphap;
Line 89: Line 89:
print( ( "Testing steepest descent method:", newline ) );
print( ( "Testing steepest descent method:", newline ) );
print( ( "The minimum is at x[0] = ", fixed( x[ 0 ], -10, 6 ), ", x[1] = ", fixed( x[ 1 ], -10, 6 ), newline ) )
print( ( "The minimum is at x[0] = ", fixed( x[ 0 ], -10, 6 ), ", x[1] = ", fixed( x[ 1 ], -10, 6 ), newline ) )
END</lang>
END</syntaxhighlight>
{{out}}
{{out}}
<pre>
<pre>
Line 95: Line 95:
The minimum is at x[0] = 0.107627, x[1] = -1.223260
The minimum is at x[0] = 0.107627, x[1] = -1.223260
</pre>
</pre>

=={{header|ALGOL W}}==
{{Trans|ALGOL 68}} which is a {{Trans|Go}} with the gradient function from {{Trans|Fortran}}
The results agree (to 6 places) with the Fortran and Julia samples.
<syntaxhighlight lang="algolw">begin
procedure steepestDescent ( long real array x ( * ); long real value alphap, tolerance ) ;
begin
long real array fi ( 0 :: 1 );
long real alpha, g0, g1, delG, b;
alpha := alphap;
g0 := g( x ); % Initial estimate of result. %
% Calculate initial gradient. %
gradG( fi, x );
% Calculate initial norm. %
delG := 0.0;
for i := 0 until 1 do delG := delG + ( fi( i ) * fi( i ) );
delG := longSqrt( delG );
b := alpha / delG;
% Iterate until value is <= tolerance. %
while delG > tolerance do begin
% Calculate next value. %
for i := 0 until 1 do x(i) := x( i ) - ( b * fi(i) );
% Calculate next gradient. %
gradG( fi, x );
% Calculate next norm. %
delG := 0;
for i := 0 until 1 do delG := delg + ( fi( i ) * fi( i ) );
delG := longSqrt( delG );
if delG > 0 then begin
b := alpha / delG;
% Calculate next value. %
g1 := g( x );
% Adjust parameter. %
if g1 > g0
then alpha := alpha / 2
else g0 := g1
end if_delG_gt_0
end while_delG_gt_tolerance
end steepestDescent ;
% Provides a rough calculation of gradient g(x). %
procedure gradG ( long real array z, p ( * ) ) ;
begin
long real x, y;
x := p( 0 );
y := p( 1 );
z( 0 ) := 2 * ( x - 1 ) * longExp( - ( y * y ) )
- 4 * x * longExp( -2 * ( x * x ) ) * y * ( y + 2 );
z( 1 ) := ( -2 * ( x - 1 ) * ( x - 1 ) * y ) * longExp( - y * y )
+ longExp( -2 * x * x ) * ( y + 2 )
+ longExp( -2 * x * x ) * y
end gradG ;
% Function for which minimum is to be found. %
long real procedure g ( long real array x ( * ) ) ;
( x( 0 ) - 1 ) * ( x( 0 ) - 1 ) * longExp( - x( 1 ) * x( 1 ) )
+ x( 1 ) * ( x( 1 ) + 2 ) * longExp( - 2 * x( 0 ) * x( 0 ) )
;
begin
long real alpha, tolerance;
long real array x ( 0 :: 1 );
x( 0 ) := 0.1; % Initial guess of location of minimum. %
x( 1 ) := -1;
tolerance := 0.0000006;
alpha := 0.1;
steepestDescent( x, alpha, tolerance );
r_format := "A"; r_w := 11; r_d := 7; s_w := 0; % output formatting %
write( "Testing steepest descent method:" );
write( "The minimum is at x(0) = ", x( 0 ), ", x(1) = ", x( 1 ) )
end
end.</syntaxhighlight>
{{out}}
<pre>
Testing steepest descent method:
The minimum is at x(0) = 0.1076268, x(1) = -1.2232596
</pre>




=={{header|Common Lisp}}==
{{works with|SBCL}}

<syntaxhighlight lang="Lisp">
(defparameter *epsilon* (sqrt 0.0000001d0))

(defun myfun (x y)
(+ (* (- x 1)
(- x 1)
(exp (- (expt y 2))))
(* y (+ 2 y)
(exp (- (* 2 (expt x 2)))))))

(defun xd (x y)
(/ (- (myfun (* x (+ 1 *epsilon*)) y)
(myfun (* x (- 1 *epsilon*)) y))
(* 2 x *epsilon*)))

(defun yd (x y)
(/ (- (myfun x (* y (+ 1 *epsilon*)))
(myfun x (* y (- 1 *epsilon*))))
(* 2 y *epsilon*)))

(defun gd ()
(let ((x 0.1d0)
(y -1.0d0))
(dotimes (i 31)
(setf x (- x (* 0.3d0 (xd x y))))
(setf y (- y (* 0.3d0 (yd x y )))))
(list x y)))



</syntaxhighlight>
{{out}}
<pre>
(0.10762684380416455 -1.2232596637047382)
</pre>


=={{header|Forth}}==
{{works with|SwiftForth}}
{{libheader|fpmath}}


Approach with partial differential equations GD1, GD2

<syntaxhighlight lang="forth">

0e fvalue px
0e fvalue py
fvariable x 0.1e x F!
fvariable y -1e y F!

: ^2 FDUP F* ;

\ gradient for x, analytical function

: GD1 ( F: x y -- dfxy)
to py to px
2e px 1e F- F* py ^2 FNEGATE FEXP F*
-4e px F* py F* py 2e F+ F* px ^2 -2e F* FEXP F*
F+
;

\ gradient for y, analytical function

: GD2 ( F: x y -- dfxy )
to py to px
px ^2 -2e F* FEXP FDUP
py F* FSWAP
py 2e F+ F* F+
py ^2 FNEGATE FEXP py F*
px 1e F- ^2 F*
-2e F*
F+
;

\ gradient descent
: GD ( x y n -- ) \ gradient descent, initial guess and number of iterations
FOVER FOVER y F! x F!
0 do
FOVER FOVER
GD1 0.3e F* x F@ FSWAP F- x F!
GD2 0.3e F* y F@ FSWAP F- y F!
x F@ y F@ FOVER FOVER FSWAP F. F. CR
loop
;

</syntaxhighlight>

Approach with main function and numerical gradients xd xy:

<syntaxhighlight lang="forth">

0e fvalue px
0e fvalue py
fvariable x 0.1e x F!
fvariable y -1e y F!

1e-6 fvalue EPSILON

: fep EPSILON FSQRT ;


: MYFUN ( x y -- fxy )
FDUP FROT FDUP ( y y x x )
( x ) 1e F- FDUP F* ( y y x a )
FROT ( y x a y )
( y ) FDUP F* FNEGATE FEXP F* ( y x b )
FROT ( x b y )
( y ) FDUP 2e F+ F* ( x b c )
FROT ( b c x )
( x ) FDUP F* -2e F* FEXP F*
F+
;


: xd ( x y )
to py to px
px 1e fep F+ F* py myfun px 1e fep F- F* py myfun F- 2e px F* fep F* F/
;

: xy ( x y )
to py to px
py 1e fep F+ F* px FSWAP myfun py 1e fep F- F* px FSWAP myfun F- 2e py F* fep F* F/
;


: GDB ( x y n -- ) \ gradient descent, initial guess and number of iterations
CR FOVER FOVER y F! x F!
0 do
FOVER FOVER
xd 0.3e F* x F@ FSWAP F- x F!
xy 0.3e F* y F@ FSWAP F- y F!
x F@ y F@ FOVER FOVER FSWAP F. F. CR
loop
;
</syntaxhighlight>

{{out}}
<pre>

0.1e -1e 20 GD
0.1810310 -1.1787894
0.1065262 -1.1965306
0.1144636 -1.2181779
0.1075002 -1.2206140
0.1082816 -1.2227594
0.1076166 -1.2230041
0.1076894 -1.2232108
0.1076261 -1.2232350
0.1076328 -1.2232549
0.1076267 -1.2232573
0.1076274 -1.2232592
0.1076268 -1.2232594
0.1076269 -1.2232596
0.1076268 -1.2232596
0.1076268 -1.2232597
0.1076268 -1.2232597
0.1076268 -1.2232597
0.1076268 -1.2232597
0.1076268 -1.2232597
0.1076268 -1.2232597

0.1e -1e 20 GDB
0.1810310 -1.1787893
0.1065262 -1.1965305
0.1144636 -1.2181779
0.1075002 -1.2206140
0.1082816 -1.2227594
0.1076166 -1.2230041
0.1076894 -1.2232108
0.1076261 -1.2232350
0.1076328 -1.2232549
0.1076268 -1.2232573
0.1076274 -1.2232592
0.1076268 -1.2232594
0.1076269 -1.2232596
0.1076268 -1.2232596
0.1076268 -1.2232597
0.1076268 -1.2232597
0.1076268 -1.2232597
0.1076268 -1.2232597
0.1076268 -1.2232597
0.1076268 -1.2232597
</pre>





=={{header|Fortran}}==
=={{header|Fortran}}==
Line 118: Line 388:
(-2*(x-1)**2*y*exp(-y**2))+exp(-2*x**2)*(y+2)+exp(-2*x**2)*y
(-2*(x-1)**2*y*exp(-y**2))+exp(-2*x**2)*(y+2)+exp(-2*x**2)*y
(%o7) done
(%o7) done
The optimization subroutine GD sets the reverse communication variable IFLAG. This allows the evaluation of the function and the gradient to be done separately.
The optimization subroutine GD sets the reverse communication variable IFLAG. This allows the evaluation of the gradient to be done separately.
<lang Fortran> SUBROUTINE EVALFG (N, X, F, G)
<syntaxhighlight lang="fortran"> SUBROUTINE EVALFG (N, X, F, G)
IMPLICIT NONE
IMPLICIT NONE
INTEGER N
INTEGER N
REAL X(N), F, G(N)
DOUBLE PRECISION X(N), F, G(N)
F = (X(1) - 1.)**2 * EXP(-X(2)**2) +
F = (X(1) - 1.D0)**2 * EXP(-X(2)**2) +
$ X(2) * (X(2) + 2.) * EXP(-2. * X(1)**2)
$ X(2) * (X(2) + 2.D0) * EXP(-2.D0 * X(1)**2)
G(1) = 2. * (X(1) - 1.) * EXP(-X(2)**2) - 4. * X(1) *
G(1) = 2.D0 * (X(1) - 1.D0) * EXP(-X(2)**2) - 4.D0 * X(1) *
$ EXP(-2. * X(1)**2) * X(2) * (X(2) + 2.)
$ EXP(-2.D0 * X(1)**2) * X(2) * (X(2) + 2.D0)
G(2) = (-2. * (X(1) - 1.)**2 * X(2) * EXP(-X(2)**2)) +
G(2) = (-2.D0 * (X(1) - 1.D0)**2 * X(2) * EXP(-X(2)**2)) +
$ EXP(-2. * X(1)**2) * (X(2) + 2.) +
$ EXP(-2.D0 * X(1)**2) * (X(2) + 2.D0) +
$ EXP(-2. * X(1)**2) * X(2)
$ EXP(-2.D0 * X(1)**2) * X(2)
RETURN
RETURN
END
END
Line 139: Line 409:
*___Name______Type________In/Out___Description_________________________
*___Name______Type________In/Out___Description_________________________
* N Integer In Number of Variables.
* N Integer In Number of Variables.
* X(N) Real Both Variables
* X(N) Double Both Variables
* G(N) Real Both Gradient
* G(N) Double Both Gradient
* TOL Real In Relative convergence tolerance
* TOL Double In Relative convergence tolerance
* IFLAG Integer Both Reverse Communication Flag
* IFLAG Integer Out Reverse Communication Flag
* 0 on input: begin
* on output: 0 done
* 0 on output: done
* 1 compute G and call again
* 1 compute G
*-----------------------------------------------------------------------
*-----------------------------------------------------------------------
SUBROUTINE GD (N, X, G, TOL, IFLAG)
SUBROUTINE GD (N, X, G, TOL, IFLAG)
IMPLICIT NONE
IMPLICIT NONE
INTEGER N, IFLAG
INTEGER N, IFLAG
REAL X(N), G(N), TOL
DOUBLE PRECISION X(N), G(N), TOL
REAL ETA
DOUBLE PRECISION ETA
PARAMETER (ETA = 0.3) ! Learning rate
PARAMETER (ETA = 0.3D0) ! Learning rate
INTEGER I
INTEGER I
REAL GNORM ! norm of gradient
DOUBLE PRECISION GNORM ! norm of gradient


IF (IFLAG .EQ. 1) GO TO 20
GNORM = 0.D0 ! convergence test

10 DO I = 1, N ! take step
X(I) = X(I) - ETA * G(I)
END DO
IFLAG = 1
RETURN ! let main program evaluate G

20 GNORM = 0. ! convergence test
DO I = 1, N
DO I = 1, N
GNORM = GNORM + G(I)**2
GNORM = GNORM + G(I)**2
Line 173: Line 434:
RETURN ! success
RETURN ! success
END IF
END IF

GO TO 10
DO I = 1, N ! take step
X(I) = X(I) - ETA * G(I)
END DO
IFLAG = 1
RETURN ! let main program evaluate G
END ! of gd
END ! of gd


Line 181: Line 447:
PARAMETER (N = 2)
PARAMETER (N = 2)
INTEGER ITER, J, IFLAG
INTEGER ITER, J, IFLAG
REAL X(N), F, G(N), TOL
DOUBLE PRECISION X(N), F, G(N), TOL


X(1) = -0.1 ! initial values
X(1) = -0.1D0 ! initial values
X(2) = -1.0
X(2) = -1.0D0
TOL = 1.E-6
TOL = 1.D-15
CALL EVALFG (N, X, F, G)
IFLAG = 0
IFLAG = 0
DO J = 1, 1 000 000
DO J = 1, 1 000 000
Line 198: Line 465:
STOP 'too many iterations!'
STOP 'too many iterations!'


50 PRINT '(A, I7, A, F10.6, A, F10.6, A, F10.6)',
50 PRINT '(A, I7, A, F19.15, A, F19.15, A, F19.15)',
$ 'After ', ITER, ' steps, found minimum at x=',
$ 'After ', ITER, ' steps, found minimum at x=',
$ X(1), ' y=', X(2), ' of f=', F
$ X(1), ' y=', X(2), ' of f=', F
STOP 'program complete'
STOP 'program complete'
END
END
</syntaxhighlight>
</lang>
{{out}}
{{out}}
<pre>After 14 steps, found minimum at x= 0.107627 y= -1.223260 of f= -0.750063
<pre>After 31 steps, found minimum at x= 0.107626843548372 y= -1.223259663839920 of f= -0.750063420551493
STOP program complete
STOP program complete
</pre>
</pre>
Line 212: Line 479:
This is a translation of the C# code in the book excerpt linked to above and hence also of the first Typescript example below.
This is a translation of the C# code in the book excerpt linked to above and hence also of the first Typescript example below.


However, since it was originally written, I've substituted Fortran's gradient function for the original one (see Talk page) which now gives results which agree (to 6 decimal places) with those of the Fortran, Julia, Algol 68 and Algol W solutions. As a number of other solutions are based on this one, I suggest their authors update them accordingly.
For some unknown reason the results differ from the other solutions after the first 4 decimal places but are near enough for an approximate method such as this.
<lang go>package main
<syntaxhighlight lang="go">package main


import (
import (
Line 222: Line 489:
func steepestDescent(x []float64, alpha, tolerance float64) {
func steepestDescent(x []float64, alpha, tolerance float64) {
n := len(x)
n := len(x)
h := tolerance
g0 := g(x) // Initial estimate of result.
g0 := g(x) // Initial estimate of result.


// Calculate initial gradient.
// Calculate initial gradient.
fi := gradG(x, h)
fi := gradG(x)


// Calculate initial norm.
// Calculate initial norm.
Line 242: Line 508:
x[i] -= b * fi[i]
x[i] -= b * fi[i]
}
}
h /= 2


// Calculate next gradient.
// Calculate next gradient.
fi = gradG(x, h)
fi = gradG(x)


// Calculate next norm.
// Calculate next norm.
Line 267: Line 532:
}
}


// Provides a rough calculation of gradient g(x).
// Provides a rough calculation of gradient g(p).
func gradG(x []float64, h float64) []float64 {
func gradG(p []float64) []float64 {
n := len(x)
z := make([]float64, len(p))
z := make([]float64, n)
x := p[0]
y := make([]float64, n)
y := p[1]
z[0] = 2*(x-1)*math.Exp(-y*y) - 4*x*math.Exp(-2*x*x)*y*(y+2)
copy(y, x)
z[1] = -2*(x-1)*(x-1)*y*math.Exp(-y*y) + math.Exp(-2*x*x)*(y+2) + math.Exp(-2*x*x)*y
g0 := g(x)

for i := 0; i < n; i++ {
y[i] += h
z[i] = (g(y) - g0) / h
}
return z
return z
}
}
Line 296: Line 556:
steepestDescent(x, alpha, tolerance)
steepestDescent(x, alpha, tolerance)
fmt.Println("Testing steepest descent method:")
fmt.Println("Testing steepest descent method:")
fmt.Println("The minimum is at x[0] =", x[0], "\b, x[1] =", x[1])
fmt.Printf("The minimum is at x = %f, y = %f for which f(x, y) = %f.\n", x[0], x[1], g(x))
}</syntaxhighlight>
}
</lang>


{{out}}
{{out}}
<pre>
<pre>
Testing steepest descent method:
Testing steepest descent method:
The minimum is at x[0] = 0.10764302056464771, x[1] = -1.223351901171944
The minimum is at x = 0.107627, y = -1.223260 for which f(x, y) = -0.750063.
</pre>

=={{header|J}}==
{{works with|Jsoftware|>= 9.0.1}}
{{libheader|math/calculus}}

In earlier versions without calculus library, D. can be used instead of pderiv

<syntaxhighlight lang="J">


load 'math/calculus'
coinsert 'jcalculus'

NB. initial guess

go=: 0.1 _1

NB. function

func=: monad define
'xp yp' =. y
((1-xp)*(1-xp) * ] ^-(yp)^2) + yp*(yp+2)* ] ^ _2 * xp^2 NB. evaluates from right to left without precedence
)

NB. gradient descent

shortygd =: monad define
go=.y
go=. go - 0.03 * func pderiv 1 ] go NB. use D. instead of pderiv for earlier versions
)

NB. use:

shortygd ^:_ go

</syntaxhighlight>
{{out}}
<pre>
0.107627 _1.22326
</pre>




=={{header|jq}}==
'''Adapted from [[#Go|Go]]'''

'''Works with jq and gojq, the C and Go implementations of jq'''

The function, g, and its gradient, gradG, are defined as 0-arity filters so that they
can be passed as formal parameters to the `steepestDescent` function.
<syntaxhighlight lang=jq>
def sq: .*.;

def ss(stream): reduce stream as $x (0; . + ($x|sq));

# Function for which minimum is to be found: input is a 2-array
def g:
. as $x
| (($x[0]-1)|sq) *
(( - ($x[1]|sq))|exp) + $x[1]*($x[1]+2) *
((-2*($x[0]|sq))|exp);

# Provide a rough calculation of gradient g(p):
def gradG:
. as [$x, $y]
| ($x*$x) as $x2
| [2*($x-1)*(-($y|sq)|exp) - 4 * $x * (-2*($x|sq)|exp) * $y*($y+2),
- 2*($x-1)*($x-1)*$y*((-$y*$y)|exp)
+ ((-2*$x2)|exp)*($y+2) + ((-2*$x2)|exp)*$y] ;

def steepestDescent(g; gradG; $x; $alpha; $tolerance):
($x|length) as $n
| {g0 : (x|g), # Initial estimate of result
alpha: $alpha,
fi: ($x|gradG), # Calculate initial gradient
x: $x,
iterations: 0
}

# Calculate initial norm:
| .delG = (ss( .fi[] ) | sqrt )
| .b = .alpha/.delG

# Iterate until value is <= $tolerance:
| until (.delG <= $tolerance;
.iterations += 1
# Calculate next value:
| reduce range(0; $n) as $i(.;
.x[$i] += (- .b * .fi[$i]) )

# Calculate next gradient:
| .fi = (.x|gradG)

# Calculate next norm:
| .delG = (ss( .fi[] ) | sqrt)
| .b = .alpha/.delG

# Calculate next value:
| .g1 = (.x|g)

# Adjust parameter:
| if .g1 > .g0
then .alpha /= 2
else .g0 = .g1
end
) ;

def tolerance: 0.0000006;
def alpha: 0.1;
def x: [0.1, -1]; # Initial guess of location of minimum.

def task:
steepestDescent(g; gradG; x; alpha; tolerance)
| "Testing the steepest descent method:",
"After \(.iterations) iterations,",
"the minimum is at x = \(.x[0]), y = \(.x[1]) for which f(x, y) = \(.x|g)." ;

task
</syntaxhighlight>
{{output}}
<pre>
Testing the steepest descent method:
After 24 iterations,
the minimum is at x = 0.10762682432948058, y = -1.2232596548816101 for which f(x, y) = -0.7500634205514924.
</pre>
</pre>


=={{header|Julia}}==
=={{header|Julia}}==
<lang julia>using Optim, Base.MathConstants
<syntaxhighlight lang="julia">using Optim, Base.MathConstants


f(x) = (x[1] - 1) * (x[1] - 1) * e^(-x[2]^2) + x[2] * (x[2] + 2) * e^(-2 * x[1]^2)
f(x) = (x[1] - 1) * (x[1] - 1) * e^(-x[2]^2) + x[2] * (x[2] + 2) * e^(-2 * x[1]^2)


println(optimize(f, [0.1, -1.0], GradientDescent()))
println(optimize(f, [0.1, -1.0], GradientDescent()))
</lang>{{out}}
</syntaxhighlight>{{out}}
<pre>
<pre>
Results of Optimization Algorithm
Results of Optimization Algorithm
Line 332: Line 717:
* Gradient Calls: 35
* Gradient Calls: 35
</pre>
</pre>

=={{header|Nim}}==
{{trans|Go}}
The gradient function has been rewritten to remove a term in the partial derivative with respect to y (two terms instead of three). This doesn’t change the result which agrees with those of Go, Fortran, Julia, etc.

<syntaxhighlight lang="nim">import math, strformat


func steepDescent(g: proc(x: openArray[float]): float;
gradG: proc(p: openArray[float]): seq[float];
x: var openArray[float]; alpha, tolerance: float) =
let n = x.len
var alpha = alpha
var g0 = g(x) # Initial estimate of result.

# Calculate initial gradient.
var fi = gradG(x)

# Calculate initial norm.
var delG = 0.0
for i in 0..<n:
delG += fi[i] * fi[i]
delG = sqrt(delG)
var b = alpha / delG

# Iterate until value is <= tolerance.
while delG > tolerance:
# Calculate next value.
for i in 0..<n:
x[i] -= b * fi[i]

# Calculate next gradient.
fi = gradG(x)

# Calculate next norm.
delG = 0
for i in 0..<n:
delG += fi[i] * fi[i]
delG = sqrt(delG)
b = alpha / delG

# Calculate next value.
let g1 = g(x)

# Adjust parameter.
if g1 > g0: alpha *= 0.5
else: g0 = g1


when isMainModule:

func g(x: openArray[float]): float =
## Function for which minimum is to be found.
(x[0]-1) * (x[0]-1) * exp(-x[1]*x[1]) + x[1] * (x[1]+2) * exp(-2*x[0]*x[0])

func gradG(p: openArray[float]): seq[float] =
## Provides a rough calculation of gradient g(p).
result = newSeq[float](p.len)
let x = p[0]
let y = p[1]
result[0] = 2 * (x-1) * exp(-y*y) - 4 * x * exp(-2*x*x) * y * (y+2)
result[1] = -2 * (x-1) * (x-1) * y * exp(-y*y) + 2 * (y+1) * exp(-2*x*x)

const
Tolerance = 0.0000006
Alpha = 0.1

var x = [0.1, -1.0] # Initial guess of location of minimum.

steepDescent(g, gradG, x, Alpha, Tolerance)
echo "Testing steepest descent method:"
echo &"The minimum is at x = {x[0]:.12f}, y = {x[1]:.12f} for which f(x, y) = {g(x):.12f}"</syntaxhighlight>

{{out}}
<pre>Testing steepest descent method:
The minimum is at x = 0.107626824329, y = -1.223259654882 for which f(x, y) = -0.750063420551</pre>


=={{header|Perl}}==
=={{header|Perl}}==
Calculate with <code>bignum</code> for numerical stability.
Calculate with <code>bignum</code> for numerical stability.
{{trans|Raku}}
{{trans|Raku}}
<lang perl>use strict;
<syntaxhighlight lang="perl">use strict;
use warnings;
use warnings;
use bignum;
use bignum;
Line 389: Line 850:
my @x = <0.1 -1>; # Initial guess of location of minimum.
my @x = <0.1 -1>; # Initial guess of location of minimum.


printf "The minimum is at x[0] = %.6f, x[1] = %.6f", steepestDescent($alpha, $tolerance, @x);</lang>
printf "The minimum is at x[0] = %.6f, x[1] = %.6f", steepestDescent($alpha, $tolerance, @x);</syntaxhighlight>
{{out}}
{{out}}
<pre>The minimum is at x[0] = 0.107653, x[1] = -1.223370</pre>
<pre>The minimum is at x[0] = 0.107653, x[1] = -1.223370</pre>
Line 395: Line 856:
=={{header|Phix}}==
=={{header|Phix}}==
{{trans|Go}}
{{trans|Go}}
<!--<syntaxhighlight lang="phix">(phixonline)-->
... and just like Go, the results don't quite match anything else.
<span style="color: #008080;">with</span> <span style="color: #008080;">javascript_semantics</span>
<lang Phix>-- Function for which minimum is to be found.
<span style="color: #000080;font-style:italic;">-- Function for which minimum is to be found.</span>
function g(sequence x)
<span style="color: #008080;">function</span> <span style="color: #000000;">g</span><span style="color: #0000FF;">(</span><span style="color: #004080;">sequence</span> <span style="color: #000000;">x</span><span style="color: #0000FF;">)</span>
atom {x0,x1} = x
<span style="color: #004080;">atom</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">x0</span><span style="color: #0000FF;">,</span><span style="color: #000000;">x1</span><span style="color: #0000FF;">}</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">x</span>
return (x0-1)*(x0-1)*exp(-x1*x1) +
<span style="color: #008080;">return</span> <span style="color: #0000FF;">(</span><span style="color: #000000;">x0</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">)*(</span><span style="color: #000000;">x0</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">)*</span><span style="color: #7060A8;">exp</span><span style="color: #0000FF;">(-</span><span style="color: #000000;">x1</span><span style="color: #0000FF;">*</span><span style="color: #000000;">x1</span><span style="color: #0000FF;">)</span> <span style="color: #0000FF;">+</span>
x1*(x1+2)*exp(-2*x0*x0)
<span style="color: #000000;">x1</span><span style="color: #0000FF;">*(</span><span style="color: #000000;">x1</span><span style="color: #0000FF;">+</span><span style="color: #000000;">2</span><span style="color: #0000FF;">)*</span><span style="color: #7060A8;">exp</span><span style="color: #0000FF;">(-</span><span style="color: #000000;">2</span><span style="color: #0000FF;">*</span><span style="color: #000000;">x0</span><span style="color: #0000FF;">*</span><span style="color: #000000;">x0</span><span style="color: #0000FF;">)</span>
end function
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>

-- Provides a rough calculation of gradient g(x).
<span style="color: #000080;font-style:italic;">-- Provides a rough calculation of gradient g(x).</span>
function gradG(sequence x, atom h)
<span style="color: #008080;">function</span> <span style="color: #000000;">gradG</span><span style="color: #0000FF;">(</span><span style="color: #004080;">sequence</span> <span style="color: #000000;">p</span><span style="color: #0000FF;">)</span>
integer n = length(x)
<span style="color: #004080;">atom</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">x</span><span style="color: #0000FF;">,</span><span style="color: #000000;">y</span><span style="color: #0000FF;">}</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">p</span><span style="color: #0000FF;">,</span>
sequence z = repeat(0, n)
<span style="color: #000000;">xm1</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">x</span><span style="color: #0000FF;">-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span>
atom g0 := g(x)
<span style="color: #000000;">emyy</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">exp</span><span style="color: #0000FF;">(-</span><span style="color: #000000;">y</span><span style="color: #0000FF;">*</span><span style="color: #000000;">y</span><span style="color: #0000FF;">),</span>
for i=1 to n do
<span style="color: #000000;">em2xx</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">exp</span><span style="color: #0000FF;">(-</span><span style="color: #000000;">2</span><span style="color: #0000FF;">*</span><span style="color: #000000;">x</span><span style="color: #0000FF;">*</span><span style="color: #000000;">x</span><span style="color: #0000FF;">),</span>
x[i] += h
<span style="color: #000000;">xm12emyy</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">xm1</span><span style="color: #0000FF;">*</span><span style="color: #000000;">2</span><span style="color: #0000FF;">*</span><span style="color: #000000;">emyy</span>
z[i] = (g(x) - g0) / h
<span style="color: #000000;">p</span> <span style="color: #0000FF;">=</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">xm12emyy</span> <span style="color: #0000FF;">-</span> <span style="color: #000000;">4</span><span style="color: #0000FF;">*</span><span style="color: #000000;">x</span><span style="color: #0000FF;">*</span><span style="color: #000000;">em2xx</span><span style="color: #0000FF;">*</span><span style="color: #000000;">y</span><span style="color: #0000FF;">*(</span><span style="color: #000000;">y</span><span style="color: #0000FF;">+</span><span style="color: #000000;">2</span><span style="color: #0000FF;">),</span>
end for
<span style="color: #0000FF;">-</span><span style="color: #000000;">xm12emyy</span><span style="color: #0000FF;">*</span><span style="color: #000000;">xm1</span><span style="color: #0000FF;">*</span><span style="color: #000000;">y</span> <span style="color: #0000FF;">+</span> <span style="color: #000000;">em2xx</span><span style="color: #0000FF;">*(</span><span style="color: #000000;">2</span><span style="color: #0000FF;">*</span><span style="color: #000000;">y</span><span style="color: #0000FF;">+</span><span style="color: #000000;">2</span><span style="color: #0000FF;">)}</span>
return z
<span style="color: #008080;">return</span> <span style="color: #000000;">p</span>
end function
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>

function steepestDescent(sequence x, atom alpha, tolerance)
<span style="color: #008080;">function</span> <span style="color: #000000;">steepestDescent</span><span style="color: #0000FF;">(</span><span style="color: #004080;">sequence</span> <span style="color: #000000;">x</span><span style="color: #0000FF;">,</span> <span style="color: #004080;">atom</span> <span style="color: #000000;">alpha</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">tolerance</span><span style="color: #0000FF;">)</span>
integer n = length(x)
<span style="color: #004080;">sequence</span> <span style="color: #000000;">fi</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">gradG</span><span style="color: #0000FF;">(</span><span style="color: #000000;">x</span><span style="color: #0000FF;">)</span> <span style="color: #000080;font-style:italic;">-- Calculate initial gradient</span>
atom h = tolerance,
<span style="color: #004080;">atom</span> <span style="color: #000000;">g0</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">g</span><span style="color: #0000FF;">(</span><span style="color: #000000;">x</span><span style="color: #0000FF;">),</span> <span style="color: #000080;font-style:italic;">-- Initial estimate of result</span>
g0 = g(x) -- Initial estimate of result.
<span style="color: #000000;">delG</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">sqrt</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">sum</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">sq_mul</span><span style="color: #0000FF;">(</span><span style="color: #000000;">fi</span><span style="color: #0000FF;">,</span><span style="color: #000000;">fi</span><span style="color: #0000FF;">))),</span> <span style="color: #000080;font-style:italic;">-- & norm</span>

<span style="color: #000000;">b</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">alpha</span> <span style="color: #0000FF;">/</span> <span style="color: #000000;">delG</span>
-- Calculate initial gradient.
<span style="color: #004080;">integer</span> <span style="color: #000000;">icount</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0</span> <span style="color: #000080;font-style:italic;">-- iteration limitor/sanity</span>
sequence fi = gradG(x, h)
<span style="color: #008080;">while</span> <span style="color: #000000;">delG</span><span style="color: #0000FF;">></span><span style="color: #000000;">tolerance</span> <span style="color: #008080;">do</span> <span style="color: #000080;font-style:italic;">-- Iterate until &lt;= tolerance</span>

<span style="color: #000000;">x</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">sq_sub</span><span style="color: #0000FF;">(</span><span style="color: #000000;">x</span><span style="color: #0000FF;">,</span><span style="color: #7060A8;">sq_mul</span><span style="color: #0000FF;">(</span><span style="color: #000000;">b</span><span style="color: #0000FF;">,</span><span style="color: #000000;">fi</span><span style="color: #0000FF;">))</span> <span style="color: #000080;font-style:italic;">-- Calculate next value</span>
-- Calculate initial norm.
<span style="color: #000000;">fi</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">gradG</span><span style="color: #0000FF;">(</span><span style="color: #000000;">x</span><span style="color: #0000FF;">)</span> <span style="color: #000080;font-style:italic;">-- next gradient</span>
atom delG = sqrt(sum(sq_mul(fi,fi))),
<span style="color: #000000;">delG</span> <span style="color: #0000FF;">=</span> <span style="color: #7060A8;">sqrt</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">sum</span><span style="color: #0000FF;">(</span><span style="color: #7060A8;">sq_mul</span><span style="color: #0000FF;">(</span><span style="color: #000000;">fi</span><span style="color: #0000FF;">,</span><span style="color: #000000;">fi</span><span style="color: #0000FF;">)))</span> <span style="color: #000080;font-style:italic;">-- next norm</span>
b = alpha / delG
<span style="color: #000000;">b</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">alpha</span> <span style="color: #0000FF;">/</span> <span style="color: #000000;">delG</span>

<span style="color: #004080;">atom</span> <span style="color: #000000;">g1</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">g</span><span style="color: #0000FF;">(</span><span style="color: #000000;">x</span><span style="color: #0000FF;">)</span> <span style="color: #000080;font-style:italic;">-- next value</span>
-- Iterate until value is <= tolerance.
<span style="color: #008080;">if</span> <span style="color: #000000;">g1</span><span style="color: #0000FF;">></span><span style="color: #000000;">g0</span> <span style="color: #008080;">then</span>
while delG>tolerance do
<span style="color: #000000;">alpha</span> <span style="color: #0000FF;">/=</span> <span style="color: #000000;">2</span> <span style="color: #000080;font-style:italic;">-- Adjust parameter</span>
-- Calculate next value.
x = sq_sub(x,sq_mul(b,fi))
<span style="color: #008080;">else</span>
<span style="color: #000000;">g0</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">g1</span>
h /= 2
<span style="color: #008080;">end</span> <span style="color: #008080;">if</span>
<span style="color: #000000;">icount</span> <span style="color: #0000FF;">+=</span> <span style="color: #000000;">1</span>
-- Calculate next gradient.
<span style="color: #7060A8;">assert</span><span style="color: #0000FF;">(</span><span style="color: #000000;">icount</span><span style="color: #0000FF;"><</span><span style="color: #000000;">100</span><span style="color: #0000FF;">)</span> <span style="color: #000080;font-style:italic;">-- (increase if/when necessary)</span>
fi = gradG(x, h)
<span style="color: #008080;">end</span> <span style="color: #008080;">while</span>
<span style="color: #008080;">return</span> <span style="color: #000000;">x</span>
-- Calculate next norm.
<span style="color: #008080;">end</span> <span style="color: #008080;">function</span>
delG = sqrt(sum(sq_mul(fi,fi)))
b = alpha / delG
<span style="color: #008080;">constant</span> <span style="color: #000000;">alpha</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0.1</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">tolerance</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">0.00000000000001</span>
<span style="color: #004080;">sequence</span> <span style="color: #000000;">x</span> <span style="color: #0000FF;">=</span> <span style="color: #000000;">steepestDescent</span><span style="color: #0000FF;">({</span><span style="color: #000000;">0.1</span><span style="color: #0000FF;">,-</span><span style="color: #000000;">1</span><span style="color: #0000FF;">},</span> <span style="color: #000000;">alpha</span><span style="color: #0000FF;">,</span> <span style="color: #000000;">tolerance</span><span style="color: #0000FF;">)</span>
-- Calculate next value.
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"Testing steepest descent method:\n"</span><span style="color: #0000FF;">)</span>
atom g1 = g(x)
<span style="color: #7060A8;">printf</span><span style="color: #0000FF;">(</span><span style="color: #000000;">1</span><span style="color: #0000FF;">,</span><span style="color: #008000;">"The minimum is at x = %.13f, y = %.13f for which f(x, y) = %.15f\n"</span><span style="color: #0000FF;">,</span> <span style="color: #0000FF;">{</span><span style="color: #000000;">x</span><span style="color: #0000FF;">[</span><span style="color: #000000;">1</span><span style="color: #0000FF;">],</span> <span style="color: #000000;">x</span><span style="color: #0000FF;">[</span><span style="color: #000000;">2</span><span style="color: #0000FF;">],</span> <span style="color: #000000;">g</span><span style="color: #0000FF;">(</span><span style="color: #000000;">x</span><span style="color: #0000FF;">)})</span>
<!--</syntaxhighlight>-->
-- Adjust parameter.
if g1>g0 then
alpha /= 2
else
g0 = g1
end if
end while
return x
end function
constant tolerance = 0.0000006, alpha = 0.1
sequence x = steepestDescent({0.1,-1}, alpha, tolerance)
printf(1,"Testing steepest descent method:\n")
printf(1,"The minimum is at x[1] = %.16f, x[1] = %.16f\n", x)</lang>
{{out}}
{{out}}
Results match Fortran, most others to 6 or 7dp<br>
Some slightly unexpected/unusual (but I think acceptable) variance was noted when playing with different tolerances, be warned.<br>
Results on 32/64 bit Phix agree to 13dp, which I therefore choose to show in full here (but otherwise would not really trust).
<pre>
<pre>
Testing steepest descent method:
Testing steepest descent method:
The minimum is at x[1] = 0.1076572080934996, x[1] = -1.2232976080475890 -- (64 bit)
The minimum is at x = 0.1076268435484, y = -1.2232596638399 for which f(x, y) = -0.750063420551493
The minimum is at x[1] = 0.1073980565405569, x[1] = -1.2233251778997771 -- (32 bit)
</pre>
</pre>


Line 472: Line 922:
I could have used ∇ and Δ in the variable names, but it looked too confusing, so I've gone with <var>grad-</var> and <var>del-</var>
I could have used ∇ and Δ in the variable names, but it looked too confusing, so I've gone with <var>grad-</var> and <var>del-</var>


<lang racket>#lang racket
<syntaxhighlight lang="racket">#lang racket


(define (apply-vector f v)
(define (apply-vector f v)
Line 519: Line 969:
(module+ main
(module+ main
(Gradient-descent))
(Gradient-descent))
</syntaxhighlight>
</lang>


{{out}}
{{out}}
Line 527: Line 977:
(formerly Perl 6)
(formerly Perl 6)
{{trans|Go}}
{{trans|Go}}
<syntaxhighlight lang="raku" line># 20200904 Updated Raku programming solution
<lang perl6>use v6.d;


sub steepestDescent(@x, $alpha is copy, $h is copy) {
sub steepestDescent(@x, $alpha is copy, $h) {


my $g0 = g(@x) ; # Initial estimate of result.
my $g0 = g |@x ; # Initial estimate of result.


my @fi = gradG(@x, $h, $g0) ; # Calculate initial gradient
my @fi = gradG |@x ; # Calculate initial gradient


# Calculate initial norm.
my $b = $alpha / my $delG = sqrt ( sum @fi»² ) ; # Calculate initial norm.
my $b = $alpha / sqrt(my $delG = sum(map {$_²}, @fi));


while ( $delG > $h ) { # Iterate until value is <= tolerance.
while ( $delG > $h ) { # Iterate until value is <= tolerance.


for @fi.kv -> $i, $j { @x[$i] -= $b * $j } # Calculate next value.
@x «-»= $b «*« @fi; # Calculate next value.
# Calculate next gradient and next value
@fi = gradG(@x, $h /= 2, my $g1 = g(@x));


$b = $alpha / sqrt($delG = sum(map {$_²}, @fi) ); # Calculate next norm.
@fi = gradG |@x ; # Calculate next gradient and next value

$b = $alpha / ($delG = sqrt( sum @fi»² )); # Calculate next norm.

my $g1 = g |@x ;


$g1 > $g0 ?? ( $alpha /= 2 ) !! ( $g0 = $g1 ) # Adjust parameter.
$g1 > $g0 ?? ( $alpha /= 2 ) !! ( $g0 = $g1 ) # Adjust parameter.
Line 551: Line 1,001:
}
}


sub gradG(@x is copy, $h, $g0) { # gives a rough calculation of gradient g(x).
sub gradG(\x,\y) { # gives a rough calculation of gradient g(x).
2*(x-1)*exp(-y²) - 4*x*exp(-2*x²)*y*(y+2) , -2*(x-1)²*y*exp(-y²) + exp(-2*x²)*(2*y+2)
return map { $_ += $h ; (g(@x) - $g0) / $h }, @x
}
}


# Function for which minimum is to be found.
# Function for which minimum is to be found.
sub g(\x) { (x[0]-1)² * exp(-x[1]²) + x[1]*(x[1]+2) * exp(-2*x[0]²) }
sub g(\x,\y) { (x-1)² * exp(-y²) + y*(y+2) * exp(-2*x²) }



my $tolerance = 0.0000006 ; my $alpha = 0.1;
my $tolerance = 0.0000006 ; my $alpha = 0.1;
Line 567: Line 1,016:
say "Testing steepest descent method:";
say "Testing steepest descent method:";
say "The minimum is at x[0] = ", @x[0], ", x[1] = ", @x[1];
say "The minimum is at x[0] = ", @x[0], ", x[1] = ", @x[1];
</syntaxhighlight>
</lang>
{{out}}
{{out}}
<pre>Testing steepest descent method:
<pre>Testing steepest descent method:
The minimum is at x[0] = 0.10743450794656964, x[1] = -1.2233956711774543
The minimum is at x[0] = 0.10762682432947938, x[1] = -1.2232596548816097
</pre>
</pre>


=={{header|REXX}}==
=={{header|REXX}}==
The &nbsp; ''tolerance'' &nbsp; can be much smaller; &nbsp; a tolerance of &nbsp; '''1e-200''' &nbsp; was tested. &nbsp; It works, but causes the program to execute a bit slower, but still sub-second execution time.
<lang rexx>/*REXX pgm searches for minimum values of the bi─variate function (AKA steepest descent)*/
<syntaxhighlight lang="rexx">/*REXX pgm searches for minimum values of the bi─variate function (AKA steepest descent)*/
numeric digits length( e() ) % 2
numeric digits (length( e() ) - length(.) ) % 2 /*use half of number decimal digs in E.*/
tolerance= 0.0000006
tolerance= 1e-30 /*use a much smaller tolerance for REXX*/
alpha= 0.1
alpha= 0.1
x.0= 0.1; x.1= -1; n= 2
x.0= 0.1; x.1= -1
say center(' testing for the steepest descent method ', 79, "═")
say center(' testing for the steepest descent method ', 79, "═")
call steepestD /* ┌──◄── # digs past dec. point ─►───┐*/
call steepestD
say 'The minimum is at: x[0]=' format(x.0,,4) " x[1]=" format(x.1,,4)
say 'The minimum is at: x[0]=' format(x.0,,9) " x[1]=" format(x.1,,9)
exit /*stick a fork in it, we're all done. */
exit /*stick a fork in it, we're all done. */
/*──────────────────────────────────────────────────────────────────────────────────────*/
/*──────────────────────────────────────────────────────────────────────────────────────*/
e: return 2.718281828459045235360287471352662497757247093699959574966967627724
gx: return (x.0-1)**2 * exp(- (x.1**2) ) + x.1 * (x.1 + 2) * exp(-2 * x.0**2)
gy: return (y.0-1)**2 * exp(- (y.1**2) ) + y.1 * (y.1 + 2) * exp(-2 * y.0**2)
g: return (x.0-1)**2 * exp(- (x.1**2) ) + x.1 * (x.1 + 2) * exp(-2 * x.0**2)
/*──────────────────────────────────────────────────────────────────────────────────────*/
/*──────────────────────────────────────────────────────────────────────────────────────*/
gradG: do j=0 for n; y.j= x.j /*copy X ──► Y */
gradG: x= x.0; y= x.1 /*define X and Y from the X array. */
end /*j*/
xm= x-1; eny2= exp(-y*y); enx2= exp(-2 * x**2) /*shortcuts.*/
z.0= 2 * xm * eny2 - 4 * x * enx2 * y * (y+2)
g0= gx()
do j=0 for n; y.j= y.j + h
z.1= -2 * xm**2 * y * eny2 + enx2 * (y+2) + enx2 * y
z.j= (gy() - g0) / h
return /*a rough calculation of the gradient. */
end /*j*/
return
/*──────────────────────────────────────────────────────────────────────────────────────*/
/*──────────────────────────────────────────────────────────────────────────────────────*/
steepestD: g0= g() /*the initial estimate of the result. */
steepestD: h= tolerance
call gradG /*calculate the initial gradient. */
g0= gx()
delG= sqrt(z.0**2 + z.1**2) /* " " " norm. */
call gradG
delG= 0
do j=0 for n; delG= delG + z.j**2
end /*j*/
delG= sqrt(delG)
b= alpha / delG
b= alpha / delG

do while delG>tolerance
do while delG>tolerance
do j=0 for n; x.j= x.j - b*z.j
x.0= x.0 - b * z.0; x.1= x.1 - b * z.1
end /*j*/
h= h / 2
call gradG
call gradG
delG= 0
delG= sqrt(z.0**2 + z.1**2); if delG=0 then return
do j=0 for n; delG= delG + z.j**2
end /*j*/
delG= sqrt(delG)
if delG=0 then return
b= alpha / delG
b= alpha / delG
g1= gx()
g1= g() /*find minimum.*/
if g1>g0 then alpha= alpha / 2
if g1>g0 then alpha= alpha * .5 /*adjust ALPHA.*/
else g0= g1
else g0= g1 /* " G0. */
end /*while*/
end /*while*/
return
return
/*──────────────────────────────────────────────────────────────────────────────────────*/
e: e= 2.7182818284590452353602874713526624977572470936999595749669676277240766303 || ,
535475945713821785; return e
/*──────────────────────────────────────────────────────────────────────────────────────*/
/*──────────────────────────────────────────────────────────────────────────────────────*/
exp: procedure; parse arg x; ix= x%1; if abs(x-ix)>.5 then ix= ix + sign(x); x= x - ix
exp: procedure; parse arg x; ix= x%1; if abs(x-ix)>.5 then ix= ix + sign(x); x= x - ix
Line 631: Line 1,065:
numeric form; m.=9; parse value format(x,2,1,,0) 'E0' with g "E" _ .; g=g *.5'e'_ %2
numeric form; m.=9; parse value format(x,2,1,,0) 'E0' with g "E" _ .; g=g *.5'e'_ %2
do j=0 while h>9; m.j=h; h= h % 2 + 1; end /*j*/
do j=0 while h>9; m.j=h; h= h % 2 + 1; end /*j*/
do k=j+5 to 0 by -1; numeric digits m.k; g=(g+x/g)*.5; end /*k*/; return g</lang>
do k=j+5 to 0 by -1; numeric digits m.k; g=(g+x/g)*.5; end /*k*/; return g</syntaxhighlight>
{{out|output|text=&nbsp; when using the internal default inputs:}}
{{out|output|text=&nbsp; when using the internal default inputs:}}
<pre>
<pre>
═══════════════════ testing for the steepest descent method ═══════════════════
═══════════════════ testing for the steepest descent method ═══════════════════
The minimum is at: x[0]= 0.1062 x[1]= -1.2264
The minimum is at: x[0]= 0.107626824 x[1]= -1.223259655
</pre>
</pre>


=={{header|Scala}}==
=={{header|Scala}}==
{{trans|Go}}
{{trans|Go}}
<lang scala>object GradientDescent {
<syntaxhighlight lang="scala">object GradientDescent {


/** Steepest descent method modifying input values*/
/** Steepest descent method modifying input values*/
Line 714: Line 1,148:
}
}
}
}
</syntaxhighlight>
</lang>
{{out}}
{{out}}
<pre>
<pre>
Line 726: Line 1,160:
<br><br>
<br><br>


<syntaxhighlight lang="typescript">
<lang Typescript>
// Using the steepest-descent method to search
// Using the steepest-descent method to search
// for minimum values of a multi-variable function
// for minimum values of a multi-variable function
Line 823: Line 1,257:
gradientDescentMain();
gradientDescentMain();


</syntaxhighlight>
</lang>
{{out}}
{{out}}
<pre>
<pre>
Line 834: Line 1,268:
* &nbsp; [Linear Regression using Gradient Descent by Adarsh Menon]
* &nbsp; [Linear Regression using Gradient Descent by Adarsh Menon]
<br><br>
<br><br>
<syntaxhighlight lang="typescript">
<lang Typescript>
let data: number[][] =
let data: number[][] =
[[32.5023452694530, 31.70700584656990],
[[32.5023452694530, 31.70700584656990],
Line 982: Line 1,416:


gradientDescentMain();
gradientDescentMain();
</syntaxhighlight>
</lang>


=={{header|Wren}}==
=={{header|Wren}}==
{{trans|zkl}}
{{trans|Go}}
{{libheader|Wren-math}}
{{libheader|Wren-fmt}}
{{libheader|Wren-fmt}}
<syntaxhighlight lang="wren">import "./fmt" for Fmt
Slightly different result but close enough.
<lang ecmascript>import "/math" for Math
import "/fmt" for Fmt


// Function for which minimum is to be found.
// Function for which minimum is to be found.
var f = Fn.new { |x, y| (x-1)*(x-1)*Math.exp(-y*y) + y*(y+2)*Math.exp(-2*x*x) }
var g = Fn.new { |x|
return (x[0]-1)*(x[0]-1)*
(-x[1]*x[1]).exp + x[1]*(x[1]+2)*
(-2*x[0]*x[0]).exp
}


// Provides a rough calculation of gradient f(x, y).
// Provides a rough calculation of gradient g(p).
var gradG = Fn.new { |f, x, y, h|
var gradG = Fn.new { |p|
var g0 = f.call(x, y)
var x = p[0]
var y = p[1]
return [(f.call(x + h, y) - g0) / h, (f.call(x, y + h) - g0) / h]
return [2*(x-1)*(-y*y).exp - 4*x*(-2*x*x).exp*y*(y+2),
-2*(x-1)*(x-1)*y*(-y*y).exp + (-2*x*x).exp*(y+2) + (-2*x*x).exp*y]
}
}


var steepestDescent = Fn.new { |f, x, y, alpha, h|
var steepestDescent = Fn.new { |x, alpha, tolerance|
var g0 = f.call(x, y) // Initial estimate of result.
var n = x.count
var gra = gradG.call(f, x, y, h) // Calculate initial gradient.
var g0 = g.call(x) // // Initial estimate of result.

var fix = gra[0]
// Calculate initial gradient.
var fiy = gra[1]
var fi = gradG.call(x)


// Calculate initial norm.
// Calculate initial norm.
var delG = (fix*fix + fiy*fiy).sqrt
var delG = 0
for (i in 0...n) delG = delG + fi[i]*fi[i]
delG = delG.sqrt
var b = alpha/delG
var b = alpha/delG


// Iterate until value is <= tolerance.
// Iterate until value is <= tolerance.
while (delG > h) {
while (delG > tolerance) {
x = x - b*fix
// Calculate next value.
y = y - b*fiy
for (i in 0...n) x[i] = x[i] - b*fi[i]


// Calculate next gradient and next value.
// Calculate next gradient.
h = h / 2
fi = gradG.call(x)
gra = gradG.call(f, x, y, h)
fix = gra[0]
fiy = gra[1]


// Calculate next norm.
// Calculate next norm.
delG = (fix*fix + fiy*fiy).sqrt
delG = 0
for (i in 0...n) delG = delG + fi[i]*fi[i]
delG = delG.sqrt
b = alpha/delG
b = alpha/delG

// Calculate next value.
var g1 = g.call(x)


// Adjust parameter.
// Adjust parameter.
var g1 = f.call(x, y)
if (g1 > g0) {
if (g1 > g0) {
alpha = alpha / 2
alpha = alpha / 2
Line 1,034: Line 1,475:
}
}
}
}
return [x, y]
}
}


var tolerance = 0.0000006
var tolerance = 0.0000006
var alpha = 0.1
var alpha = 0.1
var x = [0.1, -1] // Initial guess of location of minimum.


steepestDescent.call(x, alpha, tolerance)
// Initial guess of location of minimum.
var x = 0.1
var y = -1
var sd = steepestDescent.call(f, x, y, alpha, tolerance)
x = sd[0]
y = sd[1]
System.print("Testing steepest descent method:")
System.print("Testing steepest descent method:")
Fmt.print("The minimum is at (x, y) = ($f, $f). f(x, y) = $f", x, y, f.call(x, y))</lang>
Fmt.print("The minimum is at x = $f, y = $f for which f(x, y) = $f.", x[0], x[1], g.call(x))</syntaxhighlight>


{{out}}
{{out}}
<pre>
<pre>
Testing steepest descent method:
Testing steepest descent method:
The minimum is at (x, y) = (0.107612, -1.223291). f(x, y) = -0.750063
The minimum is at x = 0.107627, y = -1.223260 for which f(x, y) = -0.750063.
</pre>
</pre>


=={{header|zkl}}==
=={{header|zkl}}==
{{trans|Go}} with tweaked gradG
{{trans|Go}} with tweaked gradG
<lang zkl>fcn steepestDescent(f, x,y, alpha, h){
<syntaxhighlight lang="zkl">fcn steepestDescent(f, x,y, alpha, h){
g0:=f(x,y); # Initial estimate of result.
g0:=f(x,y); # Initial estimate of result.
fix,fiy := gradG(f,x,y,h); # Calculate initial gradient
fix,fiy := gradG(f,x,y,h); # Calculate initial gradient
Line 1,076: Line 1,512:
g0:=f(x,y);
g0:=f(x,y);
return((f(x + h, y) - g0)/h, (f(x, y + h) - g0)/h)
return((f(x + h, y) - g0)/h, (f(x, y + h) - g0)/h)
}</lang>
}</syntaxhighlight>
<lang zkl>fcn f(x,y){ # Function for which minimum is to be found.
<syntaxhighlight lang="zkl">fcn f(x,y){ # Function for which minimum is to be found.
(x - 1).pow(2)*(-y.pow(2)).exp() +
(x - 1).pow(2)*(-y.pow(2)).exp() +
y*(y + 2)*(-2.0*x.pow(2)).exp()
y*(y + 2)*(-2.0*x.pow(2)).exp()
Line 1,088: Line 1,524:
println("Testing steepest descent method:");
println("Testing steepest descent method:");
println("The minimum is at (x,y) = (%f,%f). f(x,y) = %f".fmt(x,y,f(x,y)));</lang>
println("The minimum is at (x,y) = (%f,%f). f(x,y) = %f".fmt(x,y,f(x,y)));</syntaxhighlight>
{{out}}
{{out}}
<pre>
<pre>

Latest revision as of 17:20, 7 December 2023

Gradient descent is a draft programming task. It is not yet considered ready to be promoted as a complete task, for reasons that should be found in its talk page.

Gradient descent (also known as steepest descent) is a first-order iterative optimization algorithm for finding the minimum of a function which is described in this Wikipedia article.

Task

Use this algorithm to search for minimum values of the bi-variate function:

  f(x, y) = (x - 1)(x - 1)e^(-y^2) + y(y+2)e^(-2x^2)

around x = 0.1 and y = -1.

This book excerpt shows sample C# code for solving this task.

ALGOL 68

Works with: ALGOL 68G version Any - tested with release 2.8.3.win32
Translation of: Go

modified to use the actual gradient function -

Translation of: Fortran

THe results agree with the Fortran sample and the Julia sample to 6 places.

PROC steepest descent = ( REF[]LONG REAL x, LONG REAL alphap, tolerance )VOID:
BEGIN
    LONG REAL alpha := alphap;
    LONG REAL g0    := g( x ); # Initial estimate of result. #
    # Calculate initial gradient. #
    [ LWB x : UPB x ]LONG REAL fi  := grad g( x );
    # Calculate initial norm. #
    LONG REAL del g := 0.0;
    FOR i FROM LWB x TO UPB x DO
        del g +:= fi[ i ] * fi[ i ]
    OD;
    del g       := long sqrt( del g );
    LONG REAL b := alpha / del g;
    # Iterate until value is <= tolerance. #
    WHILE del g > tolerance DO
        # Calculate next value. #
        FOR i FROM LWB x TO UPB x DO
            x[i] -:= b * fi[i]
        OD;
        # Calculate next gradient. #
        fi := grad g( x );
        # Calculate next norm. #
        del g := 0;
        FOR i FROM LWB x TO UPB x DO
            del g +:= fi[ i ] * fi[ i ]
        OD;
        del g := long sqrt( del g );
        IF del g > 0 THEN
            b     := alpha / del g; 
            # Calculate next value. #
            LONG REAL g1 := g( x );
            # Adjust parameter. #
            IF g1 > g0 THEN
                alpha /:= 2
            ELSE
                g0 := g1
            FI
        FI
    OD
END # steepest descent # ;
# calculates the gradient of g(p).                                                     #
# The derivitives wrt x and y are (as in the Fortran sample ):                         #
# g' wrt x = 2( x - 1 )e^( - ( y^2 ) ) - 4xe^( -2( x^2) )y( y + 2 )                    # 
# g' wrt y = ( -2( x-1 )^2ye^( - (y^2) )) + e^(-2( x^2 ) )( y + 2 ) + e^( -2( x^2 ) )y #
PROC grad g = ( []LONG REAL p )[]LONG REAL:
BEGIN
    [ LWB p : UPB p ]LONG REAL z;
    LONG REAL x = p[ 0 ];
    LONG REAL y = p[ 1 ];
    z[ 0 ] := 2 * ( x - 1 ) * long exp( - ( y * y ) )
            - 4 * x * long exp( -2 * ( x * x ) ) * y * ( y + 2 );
    z[ 1 ] := ( -2 * ( x - 1 ) * ( x - 1 ) * y ) * long exp( - y * y )
            + long exp( -2 * x * x ) * ( y + 2 )
            + long exp( -2 * x * x ) * y;
    z
END # grad g # ;
# Function for which minimum is to be found. #
# g( x, y ) = ( ( x - 1 )^2 )e^( - ( x^2 ) ) + y( y + 2 )e^( - 2(x^2)) #
PROC g = ( []LONG REAL x )LONG REAL:
    ( x[ 0 ] - 1 )
  * ( x[ 0 ] - 1 )
  * long exp( - x[ 1 ] * x[ 1 ] ) + x[ 1 ] * ( x[ 1 ] + 2 )
  * long exp( - 2 * x[ 0 ] * x[ 0 ] )
  ;
BEGIN
    LONG REAL          tolerance := 0.0000006;
    LONG REAL          alpha     := 0.1;
    [ 0 : 1 ]LONG REAL x         := ( []LONG REAL( 0.1, -1 ) )[ AT 0 ]; # Initial guess of location of minimum. #
    steepest descent( x, alpha, tolerance );
    print( ( "Testing steepest descent method:", newline ) );
    print( ( "The minimum is at x[0] = ", fixed( x[ 0 ], -10, 6 ), ", x[1] = ", fixed( x[ 1 ], -10, 6 ), newline ) )
END
Output:
Testing steepest descent method:
The minimum is at x[0] =   0.107627, x[1] =  -1.223260

ALGOL W

Translation of: ALGOL 68

which is a

Translation of: Go

with the gradient function from

Translation of: Fortran

The results agree (to 6 places) with the Fortran and Julia samples.

begin
    procedure steepestDescent ( long real array x ( * ); long real value alphap, tolerance ) ;
    begin
        long real array fi ( 0 :: 1 );
        long real alpha, g0, g1, delG, b;
        alpha := alphap;
        g0    := g( x ); % Initial estimate of result. %
        % Calculate initial gradient. %
        gradG( fi, x );
        % Calculate initial norm. %
        delG  := 0.0;
        for i := 0 until 1 do delG := delG + ( fi( i ) * fi( i ) );
        delG  := longSqrt( delG );
        b     := alpha / delG;
        % Iterate until value is <= tolerance. %
        while delG > tolerance do begin
            % Calculate next value. %
            for i := 0 until 1 do x(i) := x( i ) - ( b * fi(i) );
            % Calculate next gradient. %
            gradG( fi, x );
            % Calculate next norm. %
            delG  := 0;
            for i := 0 until 1 do delG := delg + ( fi( i ) * fi( i ) );
            delG  := longSqrt( delG );
            if delG > 0 then begin
                b     := alpha / delG; 
                % Calculate next value. %
                g1 := g( x );
                % Adjust parameter. %
                if g1 > g0
                then alpha := alpha / 2
                else g0 := g1
            end if_delG_gt_0
        end while_delG_gt_tolerance
    end steepestDescent ;
    % Provides a rough calculation of gradient g(x). %
    procedure gradG ( long real array z, p ( * ) ) ;
    begin
        long real x, y;
        x := p( 0 );
        y := p( 1 );
        z( 0 ) := 2 * ( x - 1 ) * longExp( - ( y * y ) )
                - 4 * x * longExp( -2 * ( x * x ) ) * y * ( y + 2 );
        z( 1 ) := ( -2 * ( x - 1 ) * ( x - 1 ) * y ) * longExp( - y * y )
                + longExp( -2 * x * x ) * ( y + 2 )
                + longExp( -2 * x * x ) * y
    end gradG ;
    % Function for which minimum is to be found. %
    long real procedure g ( long real array x ( * ) ) ;
          ( x( 0 ) - 1 ) * ( x( 0 ) - 1 ) * longExp( - x( 1 ) * x( 1 ) )
        + x( 1 ) * ( x( 1 ) + 2 ) * longExp( - 2 * x( 0 ) * x( 0 ) )
        ;
    begin
        long real alpha, tolerance;
        long real array x ( 0 :: 1 );
        x( 0 ) :=  0.1; % Initial guess of location of minimum. %
        x( 1 ) := -1;       
        tolerance := 0.0000006;
        alpha     := 0.1;
        steepestDescent( x, alpha, tolerance );
        r_format := "A"; r_w := 11; r_d := 7; s_w := 0; % output formatting %
        write( "Testing steepest descent method:" );
        write( "The minimum is at x(0) = ", x( 0 ), ", x(1) = ", x( 1 ) )
    end
end.
Output:
Testing steepest descent method:
The minimum is at x(0) =   0.1076268, x(1) =  -1.2232596



Common Lisp

Works with: SBCL
(defparameter *epsilon* (sqrt 0.0000001d0))

(defun myfun (x y)
  (+  (* (- x 1)
	 (- x 1)
	 (exp (- (expt y 2))))
      (* y (+ 2 y)
	 (exp (- (* 2 (expt x 2)))))))

(defun xd (x y)
  (/ (-  (myfun (* x (+ 1 *epsilon*)) y)
	 (myfun (* x (- 1 *epsilon*)) y))
     (* 2 x *epsilon*)))

(defun yd (x y)
  (/ (-  (myfun x (* y (+ 1 *epsilon*)))
	 (myfun x (* y (- 1 *epsilon*))))
     (* 2 y *epsilon*)))

(defun gd ()
  (let ((x 0.1d0)
	(y -1.0d0))
    (dotimes (i 31)
      (setf x (- x  (* 0.3d0 (xd x y))))
      (setf y (- y   (* 0.3d0 (yd x y )))))
    (list x y)))
Output:
(0.10762684380416455 -1.2232596637047382)


Forth

Works with: SwiftForth
Library: fpmath


Approach with partial differential equations GD1, GD2

0e fvalue px
0e fvalue py
fvariable x 0.1e x F!
fvariable y -1e y F!

: ^2 FDUP F* ;

\ gradient for x, analytical function

: GD1 ( F: x y  -- dfxy)
    to py to px
    2e px 1e F- F* py ^2 FNEGATE FEXP F*
    -4e px F* py  F* py 2e F+ F* px ^2 -2e F* FEXP F*
    F+
;

\ gradient for y, analytical function

: GD2 ( F: x y -- dfxy )
    to py to px
    px ^2 -2e F* FEXP FDUP
    py F* FSWAP
    py 2e F+ F* F+
    py ^2 FNEGATE FEXP py F*
    px 1e F- ^2 F*
    -2e F*
    F+
;

\ gradient descent  
    
: GD ( x y n  -- ) \ gradient descent, initial guess and number of iterations
    FOVER FOVER y F! x F!
    0 do
	FOVER FOVER
	GD1 0.3e F* x F@ FSWAP F- x F!
	GD2 0.3e F* y F@ FSWAP  F- y F!
	x F@ y F@ FOVER FOVER FSWAP F. F.  CR
    loop
;

Approach with main function and numerical gradients xd xy:

0e fvalue px
0e fvalue py
fvariable x 0.1e x F!
fvariable y -1e y F!

1e-6 fvalue EPSILON

: fep EPSILON FSQRT ;


: MYFUN   ( x y  -- fxy ) 
    FDUP FROT FDUP ( y y x x ) 
    ( x ) 1e F- FDUP F*   ( y y x a ) 
    FROT ( y x a y )
    ( y ) FDUP F* FNEGATE FEXP F* ( y x b ) 
    FROT ( x b y ) 
    ( y ) FDUP 2e F+ F* (  x b c )  
    FROT ( b c x ) 
    ( x ) FDUP F* -2e F* FEXP F* 
    F+
;


: xd ( x y )
    to py to px
    px 1e  fep F+ F* py  myfun px 1e fep F- F*  py myfun F- 2e px F* fep F* F/
;

: xy ( x y )
    to py to px
    py 1e  fep F+ F* px FSWAP myfun py 1e fep F- F*  px FSWAP myfun F- 2e py F* fep F* F/
;


: GDB ( x y n -- ) \ gradient descent, initial guess and number of iterations
       CR FOVER FOVER y F! x F!
    0 do
	FOVER FOVER
	 xd 0.3e F* x F@ FSWAP F- x F!
	 xy 0.3e F* y F@ FSWAP F- y F!
	x F@ y F@ FOVER FOVER FSWAP F. F.  CR
    loop
;
Output:

0.1e -1e 20 GD                                                                 
0.1810310 -1.1787894 
0.1065262 -1.1965306 
0.1144636 -1.2181779 
0.1075002 -1.2206140 
0.1082816 -1.2227594 
0.1076166 -1.2230041 
0.1076894 -1.2232108 
0.1076261 -1.2232350 
0.1076328 -1.2232549 
0.1076267 -1.2232573 
0.1076274 -1.2232592 
0.1076268 -1.2232594 
0.1076269 -1.2232596 
0.1076268 -1.2232596 
0.1076268 -1.2232597 
0.1076268 -1.2232597 
0.1076268 -1.2232597 
0.1076268 -1.2232597 
0.1076268 -1.2232597 
0.1076268 -1.2232597 

 
0.1e -1e 20 GDB                                                                                                                                             
0.1810310 -1.1787893 
0.1065262 -1.1965305 
0.1144636 -1.2181779 
0.1075002 -1.2206140 
0.1082816 -1.2227594 
0.1076166 -1.2230041 
0.1076894 -1.2232108 
0.1076261 -1.2232350 
0.1076328 -1.2232549 
0.1076268 -1.2232573 
0.1076274 -1.2232592 
0.1076268 -1.2232594 
0.1076269 -1.2232596 
0.1076268 -1.2232596 
0.1076268 -1.2232597 
0.1076268 -1.2232597 
0.1076268 -1.2232597 
0.1076268 -1.2232597 
0.1076268 -1.2232597 
0.1076268 -1.2232597 



Fortran

Compiler: gfortran 8.3.0
The way a FORTRAN programmer would do this would be to automatically differentiate the function using the diff command in Maxima:

(%i3) (x-1)*(x-1)*exp(-y^2)+y*(y+2)*exp(-2*x^2);
                                   2          2
                            2   - y      - 2 x
(%o3)                (x - 1)  %e     + %e       y (y + 2)
(%i4) diff(%o3,x);
                                  2              2
                               - y          - 2 x
(%o4)              2 (x - 1) %e     - 4 x %e       y (y + 2)
(%i5) diff(%o3,y);
                                 2           2                  2
                        2     - y       - 2 x              - 2 x
(%o5)       (- 2 (x - 1)  y %e    ) + %e       (y + 2) + %e       y

and then have it automatically turned into statements with the fortran command:

(%i6) fortran(%o4);
      2*(x-1)*exp(-y**2)-4*x*exp(-2*x**2)*y*(y+2)
(%o6)                                done
(%i7) fortran(%o5);
      (-2*(x-1)**2*y*exp(-y**2))+exp(-2*x**2)*(y+2)+exp(-2*x**2)*y
(%o7)                                done

The optimization subroutine GD sets the reverse communication variable IFLAG. This allows the evaluation of the gradient to be done separately.

      SUBROUTINE EVALFG (N, X, F, G)
       IMPLICIT NONE
       INTEGER N
       DOUBLE PRECISION X(N), F, G(N)
       F = (X(1) - 1.D0)**2 * EXP(-X(2)**2) + 
     $      X(2) * (X(2) + 2.D0) * EXP(-2.D0 * X(1)**2)
       G(1) = 2.D0 * (X(1) - 1.D0) * EXP(-X(2)**2) - 4.D0 * X(1) * 
     $        EXP(-2.D0 * X(1)**2) * X(2) * (X(2) + 2.D0)
       G(2) = (-2.D0 * (X(1) - 1.D0)**2 * X(2) * EXP(-X(2)**2)) + 
     $        EXP(-2.D0 * X(1)**2) * (X(2) + 2.D0) + 
     $        EXP(-2.D0 * X(1)**2) * X(2)
       RETURN
      END

*-----------------------------------------------------------------------
* gd - Gradient descent
* G must be set correctly at the initial point X.
*
*___Name______Type________In/Out___Description_________________________
*   N         Integer     In       Number of Variables.
*   X(N)      Double      Both     Variables
*   G(N)      Double      Both     Gradient
*   TOL       Double      In       Relative convergence tolerance
*   IFLAG     Integer     Out      Reverse Communication Flag
*                                    on output:  0  done
*                                                1  compute G and call again
*-----------------------------------------------------------------------
      SUBROUTINE GD (N, X, G, TOL, IFLAG)
       IMPLICIT NONE
       INTEGER N, IFLAG
       DOUBLE PRECISION X(N), G(N), TOL
       DOUBLE PRECISION ETA
       PARAMETER (ETA = 0.3D0)     ! Learning rate
       INTEGER I
       DOUBLE PRECISION GNORM      ! norm of gradient

       GNORM = 0.D0                ! convergence test
       DO I = 1, N
         GNORM = GNORM + G(I)**2
       END DO
       GNORM = SQRT(GNORM)
       IF (GNORM < TOL) THEN
         IFLAG = 0
         RETURN                     ! success
       END IF

       DO I = 1, N                 ! take step
         X(I) = X(I) - ETA * G(I)
       END DO
       IFLAG = 1
       RETURN                      ! let main program evaluate G
      END  ! of gd

      PROGRAM GDDEMO
       IMPLICIT NONE
       INTEGER N
       PARAMETER (N = 2)
       INTEGER ITER, J, IFLAG
       DOUBLE PRECISION X(N), F, G(N), TOL

       X(1) = -0.1D0         ! initial values
       X(2) = -1.0D0
       TOL = 1.D-15
       CALL EVALFG (N, X, F, G)
       IFLAG = 0
       DO J = 1, 1 000 000
         CALL GD (N, X, G, TOL, IFLAG)
         IF (IFLAG .EQ. 1) THEN
           CALL EVALFG (N, X, F, G)
         ELSE
           ITER = J
           GO TO 50
         END IF
       END DO
       STOP 'too many iterations!'

  50   PRINT '(A, I7, A, F19.15, A, F19.15, A, F19.15)', 
     $          'After ', ITER, ' steps, found minimum at x=', 
     $           X(1), ' y=', X(2), ' of f=', F
       STOP 'program complete'
      END
Output:
After      31 steps, found minimum at x=  0.107626843548372 y= -1.223259663839920 of f= -0.750063420551493
STOP program complete

Go

This is a translation of the C# code in the book excerpt linked to above and hence also of the first Typescript example below.

However, since it was originally written, I've substituted Fortran's gradient function for the original one (see Talk page) which now gives results which agree (to 6 decimal places) with those of the Fortran, Julia, Algol 68 and Algol W solutions. As a number of other solutions are based on this one, I suggest their authors update them accordingly.

package main

import (
    "fmt"
    "math"
)

func steepestDescent(x []float64, alpha, tolerance float64) {
    n := len(x)
    g0 := g(x) // Initial estimate of result.

    // Calculate initial gradient.
    fi := gradG(x)

    // Calculate initial norm.
    delG := 0.0
    for i := 0; i < n; i++ {
        delG += fi[i] * fi[i]
    }
    delG = math.Sqrt(delG)
    b := alpha / delG

    // Iterate until value is <= tolerance.
    for delG > tolerance {
        // Calculate next value.
        for i := 0; i < n; i++ {
            x[i] -= b * fi[i]
        }

        // Calculate next gradient.
        fi = gradG(x)

        // Calculate next norm.
        delG = 0
        for i := 0; i < n; i++ {
            delG += fi[i] * fi[i]
        }
        delG = math.Sqrt(delG)
        b = alpha / delG

        // Calculate next value.
        g1 := g(x)

        // Adjust parameter.
        if g1 > g0 {
            alpha /= 2
        } else {
            g0 = g1
        }
    }
}

// Provides a rough calculation of gradient g(p).
func gradG(p []float64) []float64 {
    z := make([]float64, len(p))
    x := p[0]
    y := p[1]
    z[0] = 2*(x-1)*math.Exp(-y*y) - 4*x*math.Exp(-2*x*x)*y*(y+2)
    z[1] = -2*(x-1)*(x-1)*y*math.Exp(-y*y) + math.Exp(-2*x*x)*(y+2) + math.Exp(-2*x*x)*y
    return z
}

// Function for which minimum is to be found.
func g(x []float64) float64 {
    return (x[0]-1)*(x[0]-1)*
        math.Exp(-x[1]*x[1]) + x[1]*(x[1]+2)*
        math.Exp(-2*x[0]*x[0])
}

func main() {
    tolerance := 0.0000006
    alpha := 0.1
    x := []float64{0.1, -1} // Initial guess of location of minimum.

    steepestDescent(x, alpha, tolerance)
    fmt.Println("Testing steepest descent method:")
    fmt.Printf("The minimum is at x = %f, y = %f for which f(x, y) = %f.\n", x[0], x[1], g(x))
}
Output:
Testing steepest descent method:
The minimum is at x = 0.107627, y = -1.223260 for which f(x, y) = -0.750063.

J

Works with: Jsoftware
Library: math/calculus

In earlier versions without calculus library, D. can be used instead of pderiv

load 'math/calculus'
coinsert 'jcalculus'

NB. initial guess

go=: 0.1 _1

NB. function

func=: monad define
'xp yp' =. y
((1-xp)*(1-xp) * ]  ^-(yp)^2) + yp*(yp+2)* ] ^ _2 * xp^2   NB. evaluates from right to left without precedence
)

NB. gradient descent

shortygd =: monad define
go=.y
go=. go - 0.03 * func pderiv 1 ] go   NB. use D. instead of pderiv for earlier versions
)

NB. use: 

shortygd ^:_ go
Output:
0.107627 _1.22326



jq

Adapted from Go

Works with jq and gojq, the C and Go implementations of jq

The function, g, and its gradient, gradG, are defined as 0-arity filters so that they can be passed as formal parameters to the `steepestDescent` function.

def sq: .*.;

def ss(stream): reduce stream as $x (0; . + ($x|sq));

# Function for which minimum is to be found: input is a 2-array
def g:
  . as $x
  | (($x[0]-1)|sq) *
        (( - ($x[1]|sq))|exp) + $x[1]*($x[1]+2) *
        ((-2*($x[0]|sq))|exp);

# Provide a rough calculation of gradient g(p):
def gradG:
  . as [$x, $y]
  | ($x*$x) as $x2
  | [2*($x-1)*(-($y|sq)|exp) - 4 * $x * (-2*($x|sq)|exp) * $y*($y+2),
     - 2*($x-1)*($x-1)*$y*((-$y*$y)|exp)
	    + ((-2*$x2)|exp)*($y+2) + ((-2*$x2)|exp)*$y] ;

def steepestDescent(g; gradG; $x; $alpha; $tolerance):
  ($x|length) as $n
  | {g0   : (x|g),          # Initial estimate of result
     alpha: $alpha,
        fi: ($x|gradG),      # Calculate initial gradient
	 x: $x,
     iterations: 0
    }

   # Calculate initial norm:
   | .delG = (ss( .fi[] ) | sqrt )
   | .b = .alpha/.delG

   # Iterate until value is <= $tolerance:
   | until (.delG <= $tolerance;
        .iterations += 1 
        # Calculate next value:
        | reduce range(0; $n) as $i(.;
	    .x[$i] +=  (- .b * .fi[$i]) )

        # Calculate next gradient:
        | .fi = (.x|gradG)

        # Calculate next norm:
        | .delG = (ss( .fi[] ) | sqrt)
        | .b = .alpha/.delG

        # Calculate next value:
        | .g1 = (.x|g)

        # Adjust parameter:
        | if .g1 > .g0
          then .alpha /= 2
          else .g0 = .g1
          end
       ) ;

def tolerance: 0.0000006;
def alpha: 0.1;
def x: [0.1, -1]; # Initial guess of location of minimum.

def task:
  steepestDescent(g; gradG; x; alpha; tolerance)
  | "Testing the steepest descent method:",
    "After \(.iterations) iterations,",
    "the minimum is at x = \(.x[0]), y = \(.x[1]) for which f(x, y) = \(.x|g)." ;

task
Output:
Testing the steepest descent method:
After 24 iterations,
the minimum is at x = 0.10762682432948058, y = -1.2232596548816101 for which f(x, y) = -0.7500634205514924.

Julia

using Optim, Base.MathConstants

f(x) = (x[1] - 1) * (x[1] - 1) * e^(-x[2]^2) + x[2] * (x[2] + 2) * e^(-2 * x[1]^2)

println(optimize(f, [0.1, -1.0], GradientDescent()))
Output:
Results of Optimization Algorithm
 * Algorithm: Gradient Descent
 * Starting Point: [0.1,-1.0]
 * Minimizer: [0.107626844383003,-1.2232596628723371]
 * Minimum: -7.500634e-01
 * Iterations: 14
 * Convergence: true
   * |x - x'| ≤ 0.0e+00: false
     |x - x'| = 2.97e-09
   * |f(x) - f(x')| ≤ 0.0e+00 |f(x)|: true
     |f(x) - f(x')| = 0.00e+00 |f(x)|
   * |g(x)| ≤ 1.0e-08: true
     |g(x)| = 2.54e-09
   * Stopped by an increasing objective: false
   * Reached Maximum Number of Iterations: false
 * Objective Calls: 35
 * Gradient Calls: 35

Nim

Translation of: Go

The gradient function has been rewritten to remove a term in the partial derivative with respect to y (two terms instead of three). This doesn’t change the result which agrees with those of Go, Fortran, Julia, etc.

import math, strformat


func steepDescent(g: proc(x: openArray[float]): float;
                  gradG: proc(p: openArray[float]): seq[float];
                  x: var openArray[float]; alpha, tolerance: float) =
  let n = x.len
  var alpha = alpha
  var g0 = g(x)   # Initial estimate of result.

  # Calculate initial gradient.
  var fi = gradG(x)

  # Calculate initial norm.
  var delG = 0.0
  for i in 0..<n:
    delG += fi[i] * fi[i]
  delG = sqrt(delG)
  var b = alpha / delG

  # Iterate until value is <= tolerance.
  while delG > tolerance:
    # Calculate next value.
    for i in 0..<n:
      x[i] -= b * fi[i]

    # Calculate next gradient.
    fi = gradG(x)

    # Calculate next norm.
    delG = 0
    for i in 0..<n:
      delG += fi[i] * fi[i]
    delG = sqrt(delG)
    b = alpha / delG

    # Calculate next value.
    let g1 = g(x)

    # Adjust parameter.
    if g1 > g0: alpha *= 0.5
    else: g0 = g1


when isMainModule:

  func g(x: openArray[float]): float =
    ## Function for which minimum is to be found.
    (x[0]-1) * (x[0]-1) * exp(-x[1]*x[1]) + x[1] * (x[1]+2) * exp(-2*x[0]*x[0])

  func gradG(p: openArray[float]): seq[float] =
    ## Provides a rough calculation of gradient g(p).
    result = newSeq[float](p.len)
    let x = p[0]
    let y = p[1]
    result[0] = 2 * (x-1) * exp(-y*y) - 4 * x * exp(-2*x*x) * y * (y+2)
    result[1] = -2 * (x-1) * (x-1) * y * exp(-y*y) + 2 * (y+1) * exp(-2*x*x)

  const
    Tolerance = 0.0000006
    Alpha = 0.1

  var x = [0.1, -1.0]   # Initial guess of location of minimum.

  steepDescent(g, gradG, x, Alpha, Tolerance)
  echo "Testing steepest descent method:"
  echo &"The minimum is at x = {x[0]:.12f}, y = {x[1]:.12f} for which f(x, y) = {g(x):.12f}"
Output:
Testing steepest descent method:
The minimum is at x = 0.107626824329, y = -1.223259654882 for which f(x, y) = -0.750063420551

Perl

Calculate with bignum for numerical stability.

Translation of: Raku
use strict;
use warnings;
use bignum;

sub steepestDescent {
    my($alpha, $tolerance, @x) = @_;
    my $N = @x;
    my $h = $tolerance;
    my $g0 = g(@x) ;    # Initial estimate of result.

    my @fi = gradG($h, @x) ;    #  Calculate initial gradient

    # Calculate initial norm.
    my $delG = 0;
    for (0..$N-1) { $delG += $fi[$_]**2 }
    my $b = $alpha / sqrt($delG);

    while ( $delG > $tolerance ) {   # Iterate until value is <= tolerance.
       #  Calculate next value.
       for (0..$N-1) { $x[$_] -= $b * $fi[$_] }
       $h /= 2;

       @fi = gradG($h, @x);    # Calculate next gradient.
       # Calculate next norm.
       $delG = 0;
       for (0..$N-1) { $delG += $fi[$_]**2 }
       $b = $alpha / sqrt($delG);

       my $g1 = g(@x);   # Calculate next value.

       $g1 > $g0 ? ($alpha /= 2) : ($g0 = $g1);  # Adjust parameter.
    }
    @x
}

# Provides a rough calculation of gradient g(x).
sub gradG {
    my($h, @x) = @_;
    my $N = @x;
    my @y = @x;
    my $g0 = g(@x);
    my @z;
    for (0..$N-1) { $y[$_] += $h ; $z[$_] = (g(@y) - $g0) / $h }
    return @z
}

# Function for which minimum is to be found.
sub g { my(@x) = @_; ($x[0]-1)**2 * exp(-$x[1]**2) + $x[1]*($x[1]+2) * exp(-2*$x[0]**2) };

my $tolerance = 0.0000001;
my $alpha     = 0.01;
my @x = <0.1 -1>; # Initial guess of location of minimum.

printf "The minimum is at x[0] = %.6f, x[1] = %.6f", steepestDescent($alpha, $tolerance, @x);
Output:
The minimum is at x[0] = 0.107653, x[1] = -1.223370

Phix

Translation of: Go
with javascript_semantics
-- Function for which minimum is to be found.
function g(sequence x)
    atom {x0,x1} = x
    return (x0-1)*(x0-1)*exp(-x1*x1) + 
               x1*(x1+2)*exp(-2*x0*x0)
end function
 
-- Provides a rough calculation of gradient g(x).
function gradG(sequence p)
    atom {x,y} = p,
         xm1 = x-1,
         emyy = exp(-y*y),
         em2xx = exp(-2*x*x),
         xm12emyy = xm1*2*emyy
    p = {xm12emyy   - 4*x*em2xx*y*(y+2),
        -xm12emyy*xm1*y + em2xx*(2*y+2)}
    return p
end function
 
function steepestDescent(sequence x, atom alpha, tolerance)
    sequence fi = gradG(x)  -- Calculate initial gradient
    atom g0 = g(x),         -- Initial estimate of result
         delG = sqrt(sum(sq_mul(fi,fi))),       -- & norm
         b = alpha / delG
    integer icount = 0      -- iteration limitor/sanity
    while delG>tolerance do     -- Iterate until <= tolerance
        x = sq_sub(x,sq_mul(b,fi))  -- Calculate next value
        fi = gradG(x)               --           next gradient
        delG = sqrt(sum(sq_mul(fi,fi))) --       next norm
        b = alpha / delG
        atom g1 = g(x)                  --       next value
        if g1>g0 then
            alpha /= 2                  -- Adjust parameter
        else
            g0 = g1
        end if
        icount += 1
        assert(icount<100)  -- (increase if/when necessary)
    end while
    return x
end function
 
constant alpha = 0.1, tolerance = 0.00000000000001
sequence x = steepestDescent({0.1,-1}, alpha, tolerance)
printf(1,"Testing steepest descent method:\n")
printf(1,"The minimum is at x = %.13f, y = %.13f for which f(x, y) = %.15f\n", {x[1], x[2], g(x)})
Output:

Results match Fortran, most others to 6 or 7dp
Some slightly unexpected/unusual (but I think acceptable) variance was noted when playing with different tolerances, be warned.
Results on 32/64 bit Phix agree to 13dp, which I therefore choose to show in full here (but otherwise would not really trust).

Testing steepest descent method:
The minimum is at x = 0.1076268435484, y = -1.2232596638399 for which f(x, y) = -0.750063420551493

Racket

Translation of: Go

Note the different implementation of grad. I believe that the vector should be reset and only the partial derivative in a particular dimension is to be used. For this reason, I've _yet another_ result!

I could have used ∇ and Δ in the variable names, but it looked too confusing, so I've gone with grad- and del-

#lang racket

(define (apply-vector f v)
  (apply f (vector->list v)))

;; Provides a rough calculation of gradient g(v).
(define ((grad/del f) v δ #:fv (fv (apply-vector f v)))
  (define dim (vector-length v))
  (define tmp (vector-copy v))
  (define grad (for/vector #:length dim ((i dim)
                            (v_i v))
              (vector-set! tmp i (+ v_i δ))
              (define ∂f/∂v_i (/ (- (apply-vector f tmp) fv) δ))
              (vector-set! tmp i v_i)
              ∂f/∂v_i))
  (values grad (sqrt (for/sum ((∂_i grad)) (sqr ∂_i)))))

(define (steepest-descent g x α tolerance)
  (define grad/del-g (grad/del g))

  (define (loop x δ α gx grad-gx del-gx b)
    (cond
      [(<= del-gx tolerance) x]
      [else
        (define δ´ (/ δ 2))
        (define  (vector-map + (vector-map (curry * (- b)) grad-gx) x))
        (define gx´ (apply-vector g ))
        (define-values (grad-gx´ del-gx´) (grad/del-g  δ´ #:fv gx´))
        (define  (/ α del-gx´))
        (if (> gx´ gx)
            (loop  δ´ (/ α 2) gx  grad-gx´ del-gx´ )
            (loop  δ´ α       gx´ grad-gx´ del-gx´ ))]))

  (define gx (apply-vector g x))
  (define δ tolerance)
  (define-values (grad-gx del-gx) (grad/del-g x δ #:fv gx))
  (loop x δ α gx grad-gx del-gx (/ α del-gx)))

(define (Gradient-descent)
  (steepest-descent
    (λ (x y)
       (+ (* (- x 1) (- x 1) (exp (- (sqr y))))
        (* y (+ y 2) (exp (- (* 2 (sqr x)))))))
    #(0.1 -1.) 0.1 0.0000006))

(module+ main
  (Gradient-descent))
Output:
'#(0.10760797905122492 -1.2232993981966753)

Raku

(formerly Perl 6)

Translation of: Go
# 20200904 Updated Raku programming solution

sub steepestDescent(@x, $alpha is copy, $h) {

   my $g0 = g |@x ; # Initial estimate of result.

   my @fi = gradG |@x ; #  Calculate initial gradient

   my $b = $alpha / my $delG = sqrt ( sum @fi»² ) ;  # Calculate initial norm.

   while ( $delG > $h ) {   # Iterate until value is <= tolerance.

      @x «-»= $b «*« @fi; #  Calculate next value.

      @fi = gradG |@x ; # Calculate next gradient and next value

      $b = $alpha / ($delG = sqrt( sum @fi»² ));  # Calculate next norm.

      my $g1 = g |@x ;

      $g1 > $g0 ?? ( $alpha /= 2 ) !! ( $g0 = $g1 )   # Adjust parameter.
   }
}

sub gradG(\x,\y) { # gives a rough calculation of gradient g(x).
   2*(x-1)*exp(-) - 4*x*exp(-2*)*y*(y+2) , -2*(x-1)²*y*exp(-) + exp(-2*)*(2*y+2)
}

# Function for which minimum is to be found.
sub g(\x,\y) { (x-1)² * exp(-) + y*(y+2) * exp(-2*) }

my $tolerance = 0.0000006 ; my $alpha = 0.1;

my @x = 0.1, -1; # Initial guess of location of minimum.

steepestDescent(@x, $alpha, $tolerance);

say "Testing steepest descent method:";
say "The minimum is at x[0] = ", @x[0], ", x[1] = ", @x[1];
Output:
Testing steepest descent method:
The minimum is at x[0] = 0.10762682432947938, x[1] = -1.2232596548816097

REXX

The   tolerance   can be much smaller;   a tolerance of   1e-200   was tested.   It works, but causes the program to execute a bit slower, but still sub-second execution time.

/*REXX pgm searches for minimum values of the bi─variate function (AKA steepest descent)*/
numeric digits (length( e() ) - length(.) ) % 2  /*use half of number decimal digs in E.*/
tolerance=  1e-30                                /*use a much smaller tolerance for REXX*/ 
    alpha=  0.1
      x.0=  0.1;     x.1= -1
say center(' testing for the steepest descent method ', 79, "═")
call steepestD                                   /* ┌──◄── # digs past dec. point ─►───┐*/
say 'The minimum is at:     x[0]='      format(x.0,,9)    "     x[1]="     format(x.1,,9)
exit                                             /*stick a fork in it,  we're all done. */
/*──────────────────────────────────────────────────────────────────────────────────────*/
e:     return 2.718281828459045235360287471352662497757247093699959574966967627724
g:     return (x.0-1)**2  *  exp(- (x.1**2) )    +    x.1 * (x.1 + 2)  *  exp(-2 * x.0**2)
/*──────────────────────────────────────────────────────────────────────────────────────*/
gradG: x= x.0;               y= x.1              /*define X and Y  from the  X  array.  */
       xm= x-1;              eny2= exp(-y*y);   enx2= exp(-2 * x**2)        /*shortcuts.*/
       z.0=  2 * xm        * eny2   -   4 * x * enx2 * y * (y+2)
       z.1= -2 * xm**2 * y * eny2   +           enx2     * (y+2)   +   enx2 * y
       return                                    /*a rough calculation of the gradient. */
/*──────────────────────────────────────────────────────────────────────────────────────*/
steepestD: g0= g()                               /*the initial estimate of the result.  */
           call gradG                            /*calculate the initial gradient.      */
           delG= sqrt(z.0**2  +  z.1**2)         /*    "      "     "    norm.          */
           b= alpha / delG
                             do while delG>tolerance
                             x.0= x.0   -   b * z.0;               x.1= x.1   -   b * z.1
                             call gradG
                             delG= sqrt(z.0**2  +  z.1**2);        if delG=0  then return
                             b= alpha / delG
                             g1= g()                                     /*find minimum.*/
                             if g1>g0  then alpha= alpha * .5            /*adjust ALPHA.*/
                                       else    g0= g1                    /*   "   G0.   */
                             end   /*while*/
           return
/*──────────────────────────────────────────────────────────────────────────────────────*/
exp:  procedure; parse arg x; ix= x%1;  if abs(x-ix)>.5  then ix= ix + sign(x);  x= x - ix
      z=1;  _=1;  w=z;   do j=1;  _= _*x/j;  z= (z+_)/1;  if z==w  then leave;  w= z;  end
      if z\==0  then z= z * e() ** ix;                                          return z/1
/*──────────────────────────────────────────────────────────────────────────────────────*/
sqrt: procedure; parse arg x; if x=0  then return 0; d= digits();  numeric digits;  h= d+6
      numeric form; m.=9; parse value format(x,2,1,,0) 'E0' with g "E" _ .; g=g *.5'e'_ %2
        do j=0  while h>9;      m.j=h;                h= h % 2 + 1;   end  /*j*/
        do k=j+5  to 0  by -1;  numeric digits m.k;   g=(g+x/g)*.5;   end  /*k*/; return g
output   when using the internal default inputs:
═══════════════════ testing for the steepest descent method ═══════════════════
The minimum is at:     x[0]= 0.107626824      x[1]= -1.223259655

Scala

Translation of: Go
object GradientDescent {

  /** Steepest descent method modifying input values*/
  def steepestDescent(x : Array[Double], learningRate : Double, tolerance : Double) = {
    val n = x.size
    var h = tolerance
    var alpha = learningRate
    var g0 = g(x) // Initial estimate of result.

    // Calculate initial gradient.
    var fi = gradG(x,h)

    // Calculate initial norm.
    var delG = 0.0
    for (i <- 0 until n by 1)  delG += fi(i) * fi(i)
    delG = math.sqrt(delG)
    var b = alpha / delG

    // Iterate until value is <= tolerance.
    while(delG > tolerance){
      // Calculate next value.
      for (i <- 0 until n by 1) x(i) -= b * fi(i)
      h /= 2

      // Calculate next gradient.
      fi = gradG(x,h)

      // Calculate next norm.
      delG = 0.0
      for (i <- 0 until n by 1) delG += fi(i) * fi(i)
      delG = math.sqrt(delG)
      b = alpha / delG

      // Calculate next value.
      var g1 = g(x)

      // Adjust parameter.
      if(g1 > g0) alpha = alpha / 2
      else g0 = g1
    }

  }

  /** Gradient of the input function given in the task*/
  def gradG(x : Array[Double], h : Double) : Array[Double] = {
    val n = x.size
    val z : Array[Double] = Array.fill(n){0}
    val y = x
    val g0 = g(x)

    for(i <- 0 until n by 1){
      y(i) += h
      z(i) = (g(y) - g0) / h
    }

    z

  }

  /** Bivariate function given in the task*/
  def g( x : Array[Double]) : Double = {
    ( (x(0)-1) * (x(0)-1) * math.exp( -x(1)*x(1) ) + x(1) * (x(1)+2) * math.exp( -2*x(0)*x(0) ) )
  }

  def main(args: Array[String]): Unit = {
    val tolerance = 0.0000006
    val learningRate = 0.1
    val x =  Array(0.1, -1) // Initial guess of location of minimum.

    steepestDescent(x, learningRate, tolerance)
    println("Testing steepest descent method")
    println("The minimum is at x : " + x(0) + ", y : " + x(1))
  }
}
Output:
Testing steepest descent method
The minimum is at x : 0.10756393294495799, y : -1.2234116852966237

TypeScript

Translation of
  •   [Numerical Methods, Algorithms and Tools in C# by Waldemar Dos Passos (18.2 Gradient Descent Method]



// Using the steepest-descent method to search
// for minimum values of a multi-variable function
export const steepestDescent = (x: number[], alpha: number, tolerance: number) => {

    let n: number = x.length; // size of input array
    let h: number = 0.0000006; //Tolerance factor
    let g0: number = g(x); //Initial estimate of result

    //Calculate initial gradient
    let fi: number[] = [n];

    //Calculate initial norm
    fi = GradG(x, h);
    // console.log("fi:"+fi);

    //Calculate initial norm
    let DelG: number = 0.0;

    for (let i: number = 0; i < n; ++i) {
        DelG += fi[i] * fi[i];
    }
    DelG = Math.sqrt(DelG);
    let b: number = alpha / DelG;

    //Iterate until value is <= tolerance limit
    while (DelG > tolerance) {
        //Calculate next value
        for (let i = 0; i < n; ++i) {
            x[i] -= b * fi[i];
        }
        h /= 2;

        //Calculate next gradient
        fi = GradG(x, h);
        //Calculate next norm
        DelG = 0;
        for (let i: number = 0; i < n; ++i) {
            DelG += fi[i] * fi[i];
        }

        DelG = Math.sqrt(DelG);
        b = alpha / DelG;

        //Calculate next value
        let g1: number = g(x);

        //Adjust parameter
        if (g1 > g0) alpha /= 2;
        else g0 = g1;
    }
}

// Provides a rough calculation of gradient g(x).
export const GradG = (x: number[], h: number) => {

    let n: number = x.length;
    let z: number[] = [n];
    let y: number[] = x;
    let g0: number = g(x);

    // console.log("y:" + y);

    for (let i = 0; i < n; ++i) {
        y[i] += h;
        z[i] = (g(y) - g0) / h;
    }
    // console.log("z:"+z);
    return z;
}

// Method to provide function g(x).
export const g = (x: number[]) => {
    return (x[0] - 1) * (x[0] - 1)
        * Math.exp(-x[1] * x[1]) + x[1] * (x[1] + 2)
        * Math.exp(-2 * x[0] * x[0]);
}

export const gradientDescentMain = () => {
    let tolerance: number = 0.0000006;
    let alpha: number = 0.1;
    let x: number[] = [2];

    //Initial guesses
    x[0] = 0.1;
    //of location of minimums 
    x[1] = -1;
    steepestDescent(x, alpha, tolerance);

    console.log("Testing steepest descent method");
    console.log("The minimum is at x[0] = " + x[0]
        + ", x[1] = " + x[1]);
    // console.log("");
}

gradientDescentMain();
Output:
Testing steepest descent method
The minimum is at x[0] = 0.10768224291553158, x[1] = -1.2233090211217854

Linear Regression

Translation of
  •   [Linear Regression using Gradient Descent by Adarsh Menon]



let data: number[][] =
    [[32.5023452694530, 31.70700584656990],
    [53.4268040332750, 68.77759598163890],
    [61.5303580256364, 62.56238229794580],
    [47.4756396347860, 71.54663223356770],
    [59.8132078695123, 87.23092513368730],
    [55.1421884139438, 78.21151827079920],
    [52.2117966922140, 79.64197304980870],
    [39.2995666943170, 59.17148932186950],
    [48.1050416917682, 75.33124229706300],
    [52.5500144427338, 71.30087988685030],
    [45.4197301449737, 55.16567714595910],
    [54.3516348812289, 82.47884675749790],
    [44.1640494967733, 62.00892324572580],
    [58.1684707168577, 75.39287042599490],
    [56.7272080570966, 81.43619215887860],
    [48.9558885660937, 60.72360244067390],
    [44.6871962314809, 82.89250373145370],
    [60.2973268513334, 97.37989686216600],
    [45.6186437729558, 48.84715331735500],
    [38.8168175374456, 56.87721318626850],
    [66.1898166067526, 83.87856466460270],
    [65.4160517451340, 118.59121730252200],
    [47.4812086078678, 57.25181946226890],
    [41.5756426174870, 51.39174407983230],
    [51.8451869056394, 75.38065166531230],
    [59.3708220110895, 74.76556403215130],
    [57.3100034383480, 95.45505292257470],
    [63.6155612514533, 95.22936601755530],
    [46.7376194079769, 79.05240616956550],
    [50.5567601485477, 83.43207142132370],
    [52.2239960855530, 63.35879031749780],
    [35.5678300477466, 41.41288530370050],
    [42.4364769440556, 76.61734128007400],
    [58.1645401101928, 96.76956642610810],
    [57.5044476153417, 74.08413011660250],
    [45.4405307253199, 66.58814441422850],
    [61.8962226802912, 77.76848241779300],
    [33.0938317361639, 50.71958891231200],
    [36.4360095113868, 62.12457081807170],
    [37.6756548608507, 60.81024664990220],
    [44.5556083832753, 52.68298336638770],
    [43.3182826318657, 58.56982471769280],
    [50.0731456322890, 82.90598148507050],
    [43.8706126452183, 61.42470980433910],
    [62.9974807475530, 115.24415280079500],
    [32.6690437634671, 45.57058882337600],
    [40.1668990087037, 54.08405479622360],
    [53.5750775316736, 87.99445275811040],
    [33.8642149717782, 52.72549437590040],
    [64.7071386661212, 93.57611869265820],
    [38.1198240268228, 80.16627544737090],
    [44.5025380646451, 65.10171157056030],
    [40.5995383845523, 65.56230126040030],
    [41.7206763563412, 65.28088692082280],
    [51.0886346783367, 73.43464154632430],
    [55.0780959049232, 71.13972785861890],
    [41.3777265348952, 79.10282968354980],
    [62.4946974272697, 86.52053844034710],
    [49.2038875408260, 84.74269780782620],
    [41.1026851873496, 59.35885024862490],
    [41.1820161051698, 61.68403752483360],
    [50.1863894948806, 69.84760415824910],
    [52.3784462192362, 86.09829120577410],
    [50.1354854862861, 59.10883926769960],
    [33.6447060061917, 69.89968164362760],
    [39.5579012229068, 44.86249071116430],
    [56.1303888168754, 85.49806777884020],
    [57.3620521332382, 95.53668684646720],
    [60.2692143939979, 70.25193441977150],
    [35.6780938894107, 52.72173496477490],
    [31.5881169981328, 50.39267013507980],
    [53.6609322616730, 63.64239877565770],
    [46.6822286494719, 72.24725106866230],
    [43.1078202191024, 57.81251297618140],
    [70.3460756150493, 104.25710158543800],
    [44.4928558808540, 86.64202031882200],
    [57.5045333032684, 91.48677800011010],
    [36.9300766091918, 55.23166088621280],
    [55.8057333579427, 79.55043667850760],
    [38.9547690733770, 44.84712424246760],
    [56.9012147022470, 80.20752313968270],
    [56.8689006613840, 83.14274979204340],
    [34.3331247042160, 55.72348926054390],
    [59.0497412146668, 77.63418251167780],
    [57.7882239932306, 99.05141484174820],
    [54.2823287059674, 79.12064627468000],
    [51.0887198989791, 69.58889785111840],
    [50.2828363482307, 69.51050331149430],
    [44.2117417520901, 73.68756431831720],
    [38.0054880080606, 61.36690453724010],
    [32.9404799426182, 67.17065576899510],
    [53.6916395710700, 85.66820314500150],
    [68.7657342696216, 114.85387123391300],
    [46.2309664983102, 90.12357206996740],
    [68.3193608182553, 97.91982103524280],
    [50.0301743403121, 81.53699078301500],
    [49.2397653427537, 72.11183246961560],
    [50.0395759398759, 85.23200734232560],
    [48.1498588910288, 66.22495788805460],
    [25.1284846477723, 53.45439421485050]];

function lossFunction(arr0: number[], arr1: number[], arr2: number[]) {

    let n: number = arr0.length; // Number of elements in X

    //D_m = (-2/n) * sum(X * (Y - Y_pred))  # Derivative wrt m
    let a: number = (-2 / n) * (arr0.map((a, i) => a * (arr1[i] - arr2[i]))).reduce((sum, current) => sum + current);
    //D_c = (-2/n) * sum(Y - Y_pred)  # Derivative wrt c
    let b: number = (-2 / n) * (arr1.map((a, i) => (a - arr2[i]))).reduce((sum, current) => sum + current);
    return [a, b];
}

export const gradientDescentMain = () => {

    // Building the model
    let m: number = 0;
    let c: number = 0;
    let X_arr: number[];
    let Y_arr: number[];
    let Y_pred_arr: number[];
    let D_m: number = 0;
    let D_c: number = 0;

    let L: number = 0.00000001;  // The learning Rate
    let epochs: number = 10000000;  // The number of iterations to perform gradient descent

    //Initial guesses
    for (let i = 0; i < epochs; i++) {
        X_arr = data.map(function (value, index) { return value[0]; });
        Y_arr = data.map(function (value, index) { return value[1]; });

        // The current predicted value of Y
        Y_pred_arr = X_arr.map((a) => ((m * a) + c));

        let all = lossFunction(X_arr, Y_arr, Y_pred_arr);
        D_m = all[0];
        D_c = all[1];

        m = m - L * D_m;  // Update m
        c = c - L * D_c;  // Update c
    }

    console.log("m: " + m + " c: " + c);
}

gradientDescentMain();

Wren

Translation of: Go
Library: Wren-fmt
import "./fmt" for Fmt

// Function for which minimum is to be found.
var g = Fn.new { |x|
    return (x[0]-1)*(x[0]-1)*
        (-x[1]*x[1]).exp + x[1]*(x[1]+2)*
        (-2*x[0]*x[0]).exp
}

// Provides a rough calculation of gradient g(p).
var gradG = Fn.new { |p|
    var x = p[0]
    var y = p[1]
    return [2*(x-1)*(-y*y).exp - 4*x*(-2*x*x).exp*y*(y+2),
            -2*(x-1)*(x-1)*y*(-y*y).exp + (-2*x*x).exp*(y+2) + (-2*x*x).exp*y]
}

var steepestDescent = Fn.new { |x, alpha, tolerance|
    var n = x.count
    var g0 = g.call(x) // // Initial estimate of result.

    // Calculate initial gradient.
    var fi = gradG.call(x)

    // Calculate initial norm.
    var delG = 0
    for (i in 0...n) delG = delG + fi[i]*fi[i]
    delG = delG.sqrt
    var b = alpha/delG

    // Iterate until value is <= tolerance.
    while (delG > tolerance) {
        // Calculate next value.
        for (i in 0...n) x[i] = x[i] - b*fi[i]

        // Calculate next gradient.
        fi = gradG.call(x)

        // Calculate next norm.
        delG = 0
        for (i in 0...n) delG = delG + fi[i]*fi[i]
        delG = delG.sqrt
        b = alpha/delG

        // Calculate next value.
        var g1 = g.call(x)

        // Adjust parameter.
        if (g1 > g0) {
            alpha = alpha / 2
        } else {
            g0 = g1
        }
    }
}

var tolerance = 0.0000006
var alpha = 0.1
var x = [0.1, -1] // Initial guess of location of minimum.

steepestDescent.call(x, alpha, tolerance)
System.print("Testing steepest descent method:")
Fmt.print("The minimum is at x = $f, y = $f for which f(x, y) = $f.", x[0], x[1], g.call(x))
Output:
Testing steepest descent method:
The minimum is at x = 0.107627, y = -1.223260 for which f(x, y) = -0.750063.

zkl

Translation of: Go

with tweaked gradG

fcn steepestDescent(f, x,y, alpha, h){
   g0:=f(x,y);	# Initial estimate of result.
   fix,fiy := gradG(f,x,y,h);	# Calculate initial gradient
 
   # Calculate initial norm.
   b:=alpha / (delG := (fix*fix + fiy*fiy).sqrt());
   while(delG > h){	# Iterate until value is <= tolerance.
      x,y = x - b*fix, y - b*fiy;
      # Calculate next gradient and next value
      fix,fiy = gradG(f,x,y, h/=2);
      b=alpha / (delG = (fix*fix + fiy*fiy).sqrt());	# Calculate next norm.
      if((g1:=f(x,y)) > g0) alpha/=2 else g0 = g1;	# Adjust parameter.
   }
   return(x,y)
}
 
fcn gradG(f,x,y,h){	# gives a rough calculation of gradient f(x,y).
   g0:=f(x,y);
   return((f(x + h, y) - g0)/h, (f(x, y + h) - g0)/h)
}
fcn f(x,y){	# Function for which minimum is to be found.
   (x - 1).pow(2)*(-y.pow(2)).exp() + 
   y*(y + 2)*(-2.0*x.pow(2)).exp()
}

tolerance,alpha := 0.0000006, 0.1;
 
x,y := 0.1, -1.0;	# Initial guess of location of minimum.
x,y = steepestDescent(f,x,y,alpha,tolerance);
 
println("Testing steepest descent method:");
println("The minimum is at (x,y) = (%f,%f). f(x,y) = %f".fmt(x,y,f(x,y)));
Output:
Testing steepest descent method:
The minimum is at (x,y) = (0.107608,-1.223299). f(x,y) = -0.750063