Linear2d layer (#197)

* linear2d_layer forward implementation * implement backward * introduce concurrency, outtroduce stupidity * fix style * add parameters api to linear2d_layer * add constructor for linear2d_layer * add integration for linear2d layer * set usage rules for linear2d_layer * add linear2d_layer to public api * update tests for linear2d layer * remove extra comment * remove rubbish * move linear2d layer logic into submodule * update cmake for linear2d_layer * update tests for linear2d_layer * update linear2d_layer tests * update linear2d_layer tests for batch last * make linear2d_layer with batch as last dimension (performance) * linear2d_layer: fix gradient updates * linear2d_layer: make it 2d * linear2d_layer: forgot a file * linear2d_layer: temporarily remove api * Don't expose the concrete layer type via nf * Report success to stdout * Include linear2d test in cmake * Add Linear2d to README * Plumbing of linear2d with input2d and linear2d * linear2d_layer: add flatten2d layer * linear2d_layer: make linear2d layer work with input2d and flatten2d * update cmake * linear2d_layer: use flatten layer instead of flatten2d * linear2d_layer: remove flatten2d layer * linear2d_layer: remove public api * linear2d_layer: update cmakelists * linear2d_layer: workaround cpu imprecision to make ci happy * Add linear2d example * linear2d_layer: remove redundant constructor args * linear2d_layer: make example converge * linear2d_layer: make weighs init with normal distribution * linear2d_layer: add loss stopping and more iterations * linear2d_layer: update tests * Tidy up * Require passing only out_features to linear2d(); tidy up * Remove linear2d example --------- Co-authored-by: milancurcic <[email protected]>
modern-fortran · Feb 17, 2025 · c316ee1 · c316ee1
1 parent 4ad75bc
commit c316ee1
Show file tree

Hide file tree

Showing 11 changed files with 476 additions and 10 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -38,6 +38,8 @@ add_library(neural-fortran
   src/nf/nf_layer_constructors_submodule.f90
   src/nf/nf_layer.f90
   src/nf/nf_layer_submodule.f90
+  src/nf/nf_linear2d_layer.f90
+  src/nf/nf_linear2d_layer_submodule.f90
   src/nf/nf_loss.f90
   src/nf/nf_loss_submodule.f90
   src/nf/nf_maxpool2d_layer.f90

diff --git a/README.md b/README.md
@@ -34,6 +34,7 @@ Read the paper [here](https://arxiv.org/abs/1902.06714).
 | Convolutional (2-d) | `conv2d` | `input3d`, `conv2d`, `maxpool2d`, `reshape` | 3 | ✅ | ✅(*) |
 | Max-pooling (2-d) | `maxpool2d` | `input3d`, `conv2d`, `maxpool2d`, `reshape` | 3 | ✅ | ✅ |
 | Flatten | `flatten` | `input2d`, `input3d`, `conv2d`, `maxpool2d`, `reshape` | 1 | ✅ | ✅ |
+| Linear (2-d) | `linear2d` | `input2d` | 2 | ✅ | ✅ |
 | Reshape (1-d to 3-d) | `reshape` | `input1d`, `dense`, `flatten` | 3 | ✅ | ✅ |
 
 (*) See Issue [#145](https://github.com/modern-fortran/neural-fortran/issues/145) regarding non-converging CNN training on the MNIST dataset.

diff --git a/src/nf.f90 b/src/nf.f90
@@ -3,7 +3,7 @@ module nf
   use nf_datasets_mnist, only: label_digits, load_mnist
   use nf_layer, only: layer
   use nf_layer_constructors, only: &
-    conv2d, dense, flatten, input, maxpool2d, reshape
+    conv2d, dense, flatten, input, maxpool2d, reshape, linear2d
   use nf_loss, only: mse, quadratic
   use nf_metrics, only: corr, maxabs
   use nf_network, only: network

diff --git a/src/nf/nf_layer_constructors.f90 b/src/nf/nf_layer_constructors.f90
@@ -8,7 +8,7 @@ module nf_layer_constructors
   implicit none
 
   private
-  public :: conv2d, dense, flatten, input, maxpool2d, reshape
+  public :: conv2d, dense, flatten, input, maxpool2d, reshape, linear2d
 
   interface input
 
@@ -185,6 +185,16 @@ module function reshape(output_shape) result(res)
         !! Resulting layer instance
     end function reshape
 
+    module function linear2d(out_features) result(res)
+      !! Rank-2 (sequence_length, out_features) linear layer constructor.
+      !! sequence_length is determined at layer initialization, based on the
+      !! output shape of the previous layer.
+      integer, intent(in) :: out_features
+        !! Number of output features
+      type(layer) :: res
+        !! Resulting layer instance
+    end function linear2d
+
   end interface
 
 end module nf_layer_constructors
diff --git a/src/nf/nf_layer_constructors_submodule.f90 b/src/nf/nf_layer_constructors_submodule.f90
@@ -9,6 +9,7 @@
   use nf_input3d_layer, only: input3d_layer
   use nf_maxpool2d_layer, only: maxpool2d_layer
   use nf_reshape_layer, only: reshape3d_layer
+  use nf_linear2d_layer, only: linear2d_layer
   use nf_activation, only: activation_function, relu, sigmoid
 
   implicit none
@@ -71,6 +72,7 @@ module function flatten() result(res)
   end function flatten
 
 
+
   module function input1d(layer_size) result(res)
     integer, intent(in) :: layer_size
     type(layer) :: res
@@ -148,4 +150,14 @@ module function reshape(output_shape) result(res)
 
   end function reshape
 
+
+  module function linear2d(out_features) result(res)
+    integer, intent(in) :: out_features
+    type(layer) :: res
+
+    res % name = 'linear2d'
+    allocate(res % p, source=linear2d_layer(out_features))
+
+  end function linear2d
+
 end submodule nf_layer_constructors_submodule
diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90
@@ -9,6 +9,7 @@
   use nf_input3d_layer, only: input3d_layer
   use nf_maxpool2d_layer, only: maxpool2d_layer
   use nf_reshape_layer, only: reshape3d_layer
+  use nf_linear2d_layer, only: linear2d_layer
   use nf_optimizers, only: optimizer_base_type
 
 contains
@@ -47,6 +48,8 @@ pure module subroutine backward_1d(self, previous, gradient)
             call this_layer % backward(prev_layer % output, gradient)
           type is(maxpool2d_layer)
             call this_layer % backward(prev_layer % output, gradient)
+          type is(linear2d_layer)
+            call this_layer % backward(prev_layer % output, gradient)
         end select
 
     end select
@@ -60,9 +63,19 @@ pure module subroutine backward_2d(self, previous, gradient)
     class(layer), intent(in) :: previous
     real, intent(in) :: gradient(:,:)
 
-    ! Backward pass from a 2-d layer downstream currently implemented
-    ! only for dense and flatten layers
-    ! CURRENTLY NO LAYERS, tbd: pull/197 and pull/199
+    select type(this_layer => self % p)
+
+      type is(linear2d_layer)
+
+        select type(prev_layer => previous % p)
+          type is(input2d_layer)
+            call this_layer % backward(prev_layer % output, gradient)
+          type is(linear2d_layer)
+            call this_layer % backward(prev_layer % output, gradient)
+        end select
+
+    end select
+
   end subroutine backward_2d
 
 
@@ -182,6 +195,8 @@ pure module subroutine forward(self, input)
             call this_layer % forward(prev_layer % output)
           type is(reshape3d_layer)
             call this_layer % forward(prev_layer % output)
+          type is(linear2d_layer)
+            call this_layer % forward(prev_layer % output)
         end select
 
       type is(reshape3d_layer)
@@ -196,6 +211,16 @@ pure module subroutine forward(self, input)
             call this_layer % forward(prev_layer % output)
         end select
 
+      type is(linear2d_layer)
+
+        ! Upstream layers permitted: input2d, linear2d
+        select type(prev_layer => input % p)
+          type is(input2d_layer)
+            call this_layer % forward(prev_layer % output)
+          type is(linear2d_layer)
+            call this_layer % forward(prev_layer % output)
+        end select
+
     end select
 
   end subroutine forward
@@ -231,8 +256,10 @@ pure module subroutine get_output_2d(self, output)
 
       type is(input2d_layer)
         allocate(output, source=this_layer % output)
+      type is(linear2d_layer)
+        allocate(output, source=this_layer % output)
       class default
-        error stop '1-d output can only be read from an input1d, dense, or flatten layer.'
+        error stop '2-d output can only be read from an input2d or linear2d layer.'
 
     end select
 
@@ -274,7 +301,7 @@ impure elemental module subroutine init(self, input)
       call this_layer % init(input % layer_shape)
     end select
 
-    ! The shape of conv2d, maxpool2d, or flatten layers is not known
+    ! The shape of linear2d, conv2d, maxpool2d, or flatten layers is not known
     ! until we receive an input layer.
     select type(this_layer => self % p)
       type is(conv2d_layer)
@@ -283,9 +310,11 @@ impure elemental module subroutine init(self, input)
         self % layer_shape = shape(this_layer % output)
       type is(flatten_layer)
         self % layer_shape = shape(this_layer % output)
+      type is(linear2d_layer)
+        self % layer_shape = shape(this_layer % output)
     end select
 
-    self % input_layer_shape = input % layer_shape 
+    self % input_layer_shape = input % layer_shape
     self % initialized = .true.
 
   end subroutine init
@@ -328,6 +357,8 @@ elemental module function get_num_params(self) result(num_params)
         num_params = 0
       type is (reshape3d_layer)
         num_params = 0
+      type is (linear2d_layer)
+        num_params = this_layer % get_num_params()
       class default
         error stop 'Unknown layer type.'
     end select
@@ -355,6 +386,8 @@ module function get_params(self) result(params)
         ! No parameters to get.
       type is (reshape3d_layer)
         ! No parameters to get.
+      type is (linear2d_layer)
+        params = this_layer % get_params()
       class default
         error stop 'Unknown layer type.'
     end select
@@ -379,9 +412,11 @@ module function get_gradients(self) result(gradients)
       type is (maxpool2d_layer)
         ! No gradients to get.
       type is (flatten_layer)
-        ! No gradients to get.
+        ! No parameters to get.
       type is (reshape3d_layer)
         ! No gradients to get.
+      type is (linear2d_layer)
+        gradients = this_layer % get_gradients()
       class default
         error stop 'Unknown layer type.'
     end select
@@ -429,6 +464,9 @@ module subroutine set_params(self, params)
       type is (conv2d_layer)
         call this_layer % set_params(params)
 
+      type is (linear2d_layer)
+        call this_layer % set_params(params)
+
       type is (maxpool2d_layer)
         ! No parameters to set.
         write(stderr, '(a)') 'Warning: calling set_params() ' &
@@ -446,6 +484,7 @@ module subroutine set_params(self, params)
 
           class default
         error stop 'Unknown layer type.'
+
     end select
 
   end subroutine set_params

diff --git a/src/nf/nf_linear2d_layer.f90 b/src/nf/nf_linear2d_layer.f90
@@ -0,0 +1,77 @@
+module nf_linear2d_layer
+
+  use nf_activation, only: activation_function
+  use nf_base_layer, only: base_layer
+
+  implicit none
+
+  private
+  public :: linear2d_layer
+
+  type, extends(base_layer) :: linear2d_layer
+    integer :: sequence_length, in_features, out_features, batch_size
+
+    real, allocatable :: weights(:,:)
+    real, allocatable :: biases(:)
+    real, allocatable :: output(:,:)
+    real, allocatable :: gradient(:,:) ! input gradient
+    real, allocatable :: dw(:,:) ! weight gradients
+    real, allocatable :: db(:) ! bias gradients
+
+  contains
+
+    procedure :: backward
+    procedure :: forward
+    procedure :: init
+    procedure :: get_num_params
+    procedure :: get_params
+    procedure :: get_gradients
+    procedure :: set_params
+
+  end type linear2d_layer
+
+  interface linear2d_layer
+    module function linear2d_layer_cons(out_features) result(res)
+      integer, intent(in) :: out_features
+      type(linear2d_layer) :: res
+    end function linear2d_layer_cons
+  end interface linear2d_layer
+
+  interface
+    pure module subroutine forward(self, input)
+      class(linear2d_layer), intent(in out) :: self
+      real, intent(in) :: input(:,:)
+    end subroutine forward
+
+    pure module subroutine backward(self, input, gradient)
+      class(linear2d_layer), intent(in out) :: self
+      real, intent(in) :: input(:,:)
+      real, intent(in) :: gradient(:,:)
+    end subroutine backward
+
+    module subroutine init(self, input_shape)
+      class(linear2d_layer), intent(in out) :: self
+      integer, intent(in) :: input_shape(:)
+    end subroutine init
+
+    pure module function get_num_params(self) result(num_params)
+       class(linear2d_layer), intent(in) :: self
+       integer :: num_params
+    end function get_num_params
+
+    module function get_params(self) result(params)
+      class(linear2d_layer), intent(in), target :: self
+      real, allocatable :: params(:)
+    end function get_params
+
+    module function get_gradients(self) result(gradients)
+      class(linear2d_layer), intent(in), target :: self
+      real, allocatable :: gradients(:)
+    end function get_gradients
+
+    module subroutine set_params(self, params)
+      class(linear2d_layer), intent(in out) :: self
+      real, intent(in), target :: params(:)
+    end subroutine set_params
+  end interface
+end module nf_linear2d_layer