for i, (img, label) in enumerate(train_loader):
model_params = model.trainable_variables
with tf.GradientTape() as tape: # torch는 forward하면 tensor에 autograd 된다.
out = model(img) # tf 는 tape에 기록하는 느낌으로 생각하면 된다.
loss = loss_fn(out, label)
grads = tape.gradient(loss, model_params) # gradients 를 계산한다. loss.backward()
optimizer.apply_gradients(zip(grads, model_params)) # optimizer.step()
import tensorflow as tf
from tensorflow.keras.layers import *
cifar = tf.random.uniform([1, 32, 32, 3], maxval=1)
out1 = Conv2DTranspose(filters=6,
kernel_size=3,
strides=1,
padding='valid')(cifar)
out2 = Conv2DTranspose(filters=6,
kernel_size=3,
strides=1,
padding='same')(cifar)
out3 = Conv2DTranspose(filters=6,
kernel_size=3,
strides=2,
padding='valid')(cifar)
out4 = Conv2DTranspose(filters=6,
kernel_size=3,
strides=2,
padding='same')(cifar)
print(out1.shape) # (1, 34, 34, 6)
print(out2.shape) # (1, 32, 32, 6)
print(out3.shape) # (1, 65, 65, 6)
print(out4.shape) # (1, 64, 64, 6)
@tf.function
checkpoint_path = "checkpoint/model_epoch_1.ckpt"
model.load_weights(checkpoint_path) ## epoch 1 model
checkpoint_dir = "checkpoint/"
latest = tf.train.latest_checkpoint(checkpoint_dir)
latest_model.load_weights(latest)
tf.debugging.set_log_device_placement(True) # 무슨 일이 일어나는 지 보자
See the Keras RNN API guide for details about the usage of RNN API.
Based on available runtime hardware and constraints, this layer will choose different implementations(cuDNN-based or pure-TensorFlow)
to maximize the performance.
If a GPU is available and all the arguments to the layer meet the requirement of the CuDNN kernel(see below for details),
the layer will use a fast cuDNN implementation.
The requirements to use the cuDNN implementation are:
- activation == tanh
- recurrent_activation == sigmoid
- recurrent_dropout == 0
- unroll is False
- use_bias is True
- Inputs, if use masking, are strictly right-padded.
- Eager execution is enabled in the outermost context.
class Model(tf.keras.Model):
def __init__(self):
super().__init__()
...
self.trainable = tf.constant(True) # 서치한 바로는 graph생성이 다시 안된다고 함. 속도 이득.
self.training = tf.constant(True)
self.batchnorm = BatchNormalization(trainable=self.trainable)
self.dropout = Dropout(rate=self.rate, training=self.training)
for epoch in range(epochs):
for i, (img, label) in enumerate(train_loader):
model.trainable = tf.constant(True) # training 모드
model.training = tf.constant(True) # training 모드
model_params = model.trainable_variables
for j, (val_img, val_label) in enumerate(valid_loader):
model.trainable = tf.constant(False) # evaluating 모드
model.training = tf.constant(False) # evaluating 모드
dataset = tf.data.Dataset.from_tensor_slices((x_data, y_data))
data_loader = dataset.map(preprocess).shuffle(60000, reshuffle_each_iteration=True).batch(32, drop_remainder=False)
#map : 전처리함수(data 하나에 대한 preprocess 함수를 작성하면 된다)
### 예를 들면 이렇게
def preprocess(x, y):
image = tf.reshape(x, [32, 32, 3])
image = tf.cast(image, tf.float32) / 255.0
label = tf.one_hot(y, depth=10)
label = tf.squeeze(label) # [1, 10] -> [10]
return image, label
###
#shuffle : dataset 길이만큼 shuffle, reshuffle_each_iteration=False 면 같은 shuffle 반복
#batch: drop_remainder=True 면 마지막 batch_size 보다 작은 data 버림
for (x_batch, y_batch) in data_loader:
...
class Model(tf.keras.Model):
def __init__(self):
super().__init__()
self.block = tf.keras.models.Sequential([ ... ]) # TF ver 올라가면서 사용 가능.
self.block = tf.keras.Model.Sequential([ ... ]) # TF ver 올라가면서 사용 가능.
class Block(tf.keras.layers.Layer): # 해결 완료. class로 선언해야됨.
def __init__(self):
super(Block, self).__init__()
def call(self, x)
return out
out = tf.keras.layers.Concatenate(axis=-1)([in1, in2]) # default: axis=-1
out = tf.keras.layers.concatenate([in1, in2])
subclassing API 방식 model을 만들면 model.summary() 가 안된다.
def model(self):
inputs = tf.keras.Input(shape=(32, 32, 3))
outputs = self.call(inputs)
return tf.keras.Model(inputs=inputs, outputs=outputs)
model = Model()
model.model.().summary()
plot_model(model.model(), to_file='model.png', show_shapes=True, show_layer_names=False)
import tensorflow as tf
import os
import random
import numpy as np
def set_seeds(seed=SEED):
os.environ['PYTHONHASHSEED'] = str(seed)
random.seed(seed)
tf.random.set_seed(seed)
np.random.seed(seed)
def set_global_determinism(seed=SEED, fast_n_close=False):
"""
Enable 100% reproducibility on operations related to tensor and randomness.
Parameters:
seed (int): seed value for global randomness
fast_n_close (bool): whether to achieve efficient at the cost of determinism/reproducibility
"""
set_seeds(seed=seed)
if fast_n_close:
return
"""
logging.warning("*******************************************************************************")
logging.warning("*** set_global_determinism is called,setting full determinism, will be slow ***")
logging.warning("*******************************************************************************")
"""
os.environ['TF_DETERMINISTIC_OPS'] = '1'
os.environ['TF_CUDNN_DETERMINISTIC'] = '1'
# https://www.tensorflow.org/api_docs/python/tf/config/threading/set_inter_op_parallelism_threads
tf.config.threading.set_inter_op_parallelism_threads(1)
tf.config.threading.set_intra_op_parallelism_threads(1)
from tfdeterminism import patch
patch()
import tensorflow as tf
# matmul
x = tf.random.uniform([2, 3])
y = tf.random.uniform([4, 3])
z = tf.einsum("ij, kj->ik", x, y)
print(z.shape)
#Fully Connected Layer
a = tf.random.uniform([32, 3, 228, 228])
b = tf.random.uniform([32, 228, 228, 3])
w1 = tf.random.uniform([10, 3 * 228 * 228])
w2 = tf.random.uniform([228 * 228 * 3, 10])
y1 = tf.einsum("nchw, kchw-> nk", a, w1.numpy().reshape([10, 3, 228, 228])) #PyTorch
y2 = tf.einsum("nhwc, hwck-> nk", b, w2.numpy().reshape([228, 228, 3, 10])) #TensorFlow
print(y1.shape)
print(y2.shape)
import tensorflow.keras.backend as K
# default : channels_last
print(K.image_data_format())
K.set_image_data_format('channels_first')
print(K.image_data_format())
K.set_image_data_format('channels_last')
print(K.image_data_format())