Skip to content

Commit

Permalink
update transformer
Browse files Browse the repository at this point in the history
  • Loading branch information
sunlanchang committed Jun 14, 2020
1 parent befde29 commit 8180cba
Show file tree
Hide file tree
Showing 5 changed files with 143 additions and 51 deletions.
101 changes: 73 additions & 28 deletions LSTM_age_multi_input.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,20 @@
# %%
# 生成词嵌入文件
from tqdm import tqdm
import os
import tensorflow as tf
import numpy as np
import pandas as pd
from tqdm import tqdm
from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler
from gensim.models import Word2Vec, KeyedVectors
from tensorflow.keras.layers import Input, LSTM, Embedding, Dense, Dropout, concatenate
from tensorflow.keras import layers
from tensorflow.keras.layers import Input, LSTM, Bidirectional, Embedding, Dense, Dropout, concatenate
from tensorflow.keras.models import Model, Sequential
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import to_categorical
from gensim.models import Word2Vec, KeyedVectors
from mymail import mail
import os
from keras.utils import to_categorical

import argparse
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

# %%
Expand All @@ -23,8 +24,31 @@
# current_line_len = len(line.strip().split(' '))
# LEN_creative_id = max(LEN_creative_id, current_line_len)
# f.close()


# %%
parser = argparse.ArgumentParser()
parser.add_argument('--load_from_npy', action='store_true',
help='从npy文件加载数据',
default=False)
parser.add_argument('--not_train_embedding', action='store_false',
help='从npy文件加载数据',
default=True)

parser.add_argument('--epoch', type=int,
help='epoch 大小',
default=5)
parser.add_argument('--batch_size', type=int,
help='batch size大小',
default=256)
parser.add_argument('--examples', type=int,
help='训练数据,默认为训练集,不包含验证集',
default=810000)


parser.add_argument('--num_lstm', type=int,
help='LSTM head个数',
default=1)

args = parser.parse_args()
# %%
NUM_creative_id = 2481135+1
NUM_ad_id = 2264190+1
Expand Down Expand Up @@ -161,41 +185,41 @@ def get_age_model(creative_id_emb, ad_id_emb, product_id_emb):
x1 = Embedding(input_dim=NUM_creative_id,
output_dim=128,
weights=[creative_id_emb],
trainable=True,
trainable=args.not_train_embedding,
input_length=LEN_creative_id,
mask_zero=True)(input_creative_id)
x1 = LSTM(1024, return_sequences=True)(x1)
x1 = LSTM(512, return_sequences=True)(x1)
x1 = LSTM(256, return_sequences=False)(x1)
for _ in range(args.num_lstm):
x1 = Bidirectional(LSTM(256, return_sequences=True))(x1)
x1 = layers.GlobalMaxPooling1D()(x1)

# second input
input_ad_id = Input(shape=(None,), name='ad_id')
x2 = Embedding(input_dim=NUM_ad_id,
output_dim=128,
weights=[ad_id_emb],
trainable=True,
trainable=args.not_train_embedding,
input_length=LEN_ad_id,
mask_zero=True)(input_ad_id)
x2 = LSTM(1024, return_sequences=True)(x2)
x2 = LSTM(512, return_sequences=True)(x2)
x2 = LSTM(256, return_sequences=False)(x2)
for _ in range(args.num_lstm):
x2 = Bidirectional(LSTM(256, return_sequences=True))(x2)
x2 = layers.GlobalMaxPooling1D()(x2)

# third input
input_product_id = Input(shape=(None,), name='product_id')
x3 = Embedding(input_dim=NUM_product_id,
output_dim=128,
weights=[product_id_emb],
trainable=True,
trainable=args.not_train_embedding,
input_length=LEN_product_id,
mask_zero=True)(input_product_id)
x3 = LSTM(1024, return_sequences=True)(x3)
x3 = LSTM(512, return_sequences=True)(x3)
x3 = LSTM(256, return_sequences=False)(x3)
for _ in range(args.num_lstm):
x3 = Bidirectional(LSTM(256, return_sequences=True))(x3)
x3 = layers.GlobalMaxPooling1D()(x3)

# concat x1 x2
x = concatenate([x1, x2, x3])
x = Dense(128)(x)
x = Dropout(0.1)(x)
# x = Dense(128)(x)
# x = Dropout(0.1)(x)
output_y = Dense(10, activation='softmax')(x)

model = Model([input_creative_id, input_ad_id, input_product_id], output_y)
Expand All @@ -207,10 +231,31 @@ def get_age_model(creative_id_emb, ad_id_emb, product_id_emb):


# %%
mail('start getting train data')
x1_train, x1_val, x2_train, x2_val, x3_train, x3_val, y_train, y_val, creative_id_emb, ad_id_emb, product_id_emb = get_train_val()
mail('get train data done.')
if not args.load_from_npy:
mail('start getting train data')
x1_train, x1_val, x2_train, x2_val, x3_train, x3_val, y_train, y_val, creative_id_emb, ad_id_emb, product_id_emb = get_train_val()
mail('get train data done.')

def save_data(datas):
for i, data in enumerate(datas):
np.save(f'tmp/transformer_input_{i}.npy', data)
datas = [x1_train, x1_val, x2_train, x2_val, x3_train, x3_val,
y_train, y_val, creative_id_emb, ad_id_emb, product_id_emb]
save_data(datas)
else:
x1_train = np.load('tmp/transformer_input_0.npy', allow_pickle=True)
x1_val = np.load('tmp/transformer_input_1.npy', allow_pickle=True)
x2_train = np.load('tmp/transformer_input_2.npy', allow_pickle=True)
x2_val = np.load('tmp/transformer_input_3.npy', allow_pickle=True)
x3_train = np.load('tmp/transformer_input_4.npy', allow_pickle=True)
x3_val = np.load('tmp/transformer_input_5.npy', allow_pickle=True)
y_train = np.load('tmp/transformer_input_6.npy', allow_pickle=True)
y_val = np.load('tmp/transformer_input_7.npy', allow_pickle=True)
creative_id_emb = np.load('tmp/transformer_input_8.npy', allow_pickle=True)
ad_id_emb = np.load('tmp/transformer_input_9.npy', allow_pickle=True)
product_id_emb = np.load('tmp/transformer_input_10.npy', allow_pickle=True)

# %%
model = get_age_model(creative_id_emb, ad_id_emb, product_id_emb)
# %%
# %%
Expand Down Expand Up @@ -240,8 +285,8 @@ def get_age_model(creative_id_emb, ad_id_emb, product_id_emb):
{'creative_id': x1_train, 'ad_id': x2_train, 'product_id': x3_train},
y_train,
validation_data=([x1_val, x2_val, x3_val], y_val),
epochs=3,
batch_size=256,
epochs=args.epoch,
batch_size=args.batch_size,
callbacks=[checkpoint],
)
mail('train lstm done!!!')
Expand Down
7 changes: 3 additions & 4 deletions Transformer_multi_input.py → Transformer_keras.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,10 @@
default=1)
parser.add_argument('--num_lstm', type=int,
help='LSTM head个数',
default=0)
default=1)
parser.add_argument('--examples', type=int,
help='数据个数',
default=900000)
help='训练数据,默认为训练集,不包含验证集',
default=810000)
args = parser.parse_args()
# %%

Expand Down Expand Up @@ -230,7 +230,6 @@ def get_age_model(creative_id_emb, ad_id_emb, product_id_emb):

for _ in range(args.num_lstm):
x1 = Bidirectional(LSTM(256, return_sequences=True))(x1)
# x1 = Bidirectional(LSTM(256, return_sequences=False))(x1)
x1 = layers.GlobalMaxPooling1D()(x1)

# second input
Expand Down
File renamed without changes.
38 changes: 19 additions & 19 deletions run.sh
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
python Transformer_multi_input.py --load_from_npy --epoch 10 --batch_size 256 --num_transformer 3 --head_transformer 4 --num_lstm 3 --not_train_embeddin --examples 300000 > tmp/log_1.txt 2>&1
sleep 60
python Transformer_multi_input.py --load_from_npy --epoch 10 --batch_size 256 --num_transformer 6 --head_transformer 4 --num_lstm 3 --not_train_embeddin --examples 300000 > tmp/log_2.txt 2>&1
sleep 60
python Transformer_multi_input.py --load_from_npy --epoch 10 --batch_size 256 --num_transformer 9 --head_transformer 4 --num_lstm 3 --not_train_embeddin --examples 300000 > tmp/log_3.txt 2>&1
sleep 60
# python Transformer_multi_input.py --load_from_npy --epoch 10 --batch_size 256 --num_transformer 12 --head_transformer 4 --num_lstm 3 --not_train_embeddin --examples 300000 > tmp/log_4.txt 2>&1
sleep 60
python Transformer_multi_input.py --load_from_npy --epoch 10 --batch_size 256 --num_transformer 3 --head_transformer 4 --num_lstm 6 --not_train_embeddin --examples 300000 > tmp/log_5.txt 2>&1
sleep 60
python Transformer_multi_input.py --load_from_npy --epoch 10 --batch_size 256 --num_transformer 3 --head_transformer 4 --num_lstm 9 --not_train_embeddin --examples 300000 > tmp/log_6.txt 2>&1
sleep 60
# python Transformer_multi_input.py --load_from_npy --epoch 10 --batch_size 256 --num_transformer 3 --head_transformer 4 --num_lstm 12 --not_train_embeddin --examples 300000 > tmp/log_7.txt 2>&1
sleep 60
python Transformer_multi_input.py --load_from_npy --epoch 10 --batch_size 256 --num_transformer 3 --head_transformer 4 --num_lstm 3 --examples 300000 > tmp/log_8.txt 2>&1
sleep 60
# python Transformer_multi_input.py --load_from_npy --epoch 10 --batch_size 256 --num_transformer 12 --head_transformer 4 --num_lstm 3 --examples 300000 > tmp/log_9.txt 2>&1
sleep 60
# python Transformer_multi_input.py --load_from_npy --epoch 10 --batch_size 256 --num_transformer 3 --head_transformer 4 --num_lstm 12 --examples 300000 > tmp/log_10.txt 2>&1
# python Transformer_multi_input.py --load_from_npy --epoch 3 --batch_size 256 --num_transformer 3 --head_transformer 4 --num_lstm 3 --not_train_embedding --examples 300000 > tmp/log_1.txt 2>&1
# sleep 60
# python Transformer_multi_input.py --load_from_npy --epoch 3 --batch_size 256 --num_transformer 6 --head_transformer 4 --num_lstm 3 --not_train_embedding --examples 300000 > tmp/log_2.txt 2>&1
# sleep 60
# python Transformer_multi_input.py --load_from_npy --epoch 3 --batch_size 256 --num_transformer 9 --head_transformer 4 --num_lstm 3 --not_train_embedding --examples 300000 > tmp/log_3.txt 2>&1
# sleep 60
# python Transformer_multi_input.py --load_from_npy --epoch 3 --batch_size 256 --num_transformer 12 --head_transformer 4 --num_lstm 3 --not_train_embedding --examples 300000 > tmp/log_4.txt 2>&1
# sleep 60
python Transformer_multi_input.py --load_from_npy --epoch 3 --batch_size 256 --num_transformer 1 --head_transformer 4 --num_lstm 6 --not_train_embedding --examples 100000 > tmp/log_5.txt 2>&1
sleep 60
python Transformer_multi_input.py --load_from_npy --epoch 3 --batch_size 256 --num_transformer 1 --head_transformer 4 --num_lstm 9 --not_train_embedding --examples 100000 > tmp/log_6.txt 2>&1
sleep 60
# python Transformer_multi_input.py --load_from_npy --epoch 3 --batch_size 256 --num_transformer 1 --head_transformer 4 --num_lstm 12 --not_train_embedding --examples 300000 > tmp/log_7.txt 2>&1
# sleep 60
python Transformer_multi_input.py --load_from_npy --epoch 3 --batch_size 256 --num_transformer 1 --head_transformer 4 --num_lstm 3 --examples 100000 > tmp/log_8.txt 2>&1
sleep 60
# python Transformer_multi_input.py --load_from_npy --epoch 3 --batch_size 256 --num_transformer 12 --head_transformer 4 --num_lstm 3 --examples 300000 > tmp/log_9.txt 2>&1
# sleep 60
# python Transformer_multi_input.py --load_from_npy --epoch 3 --batch_size 256 --num_transformer 1 --head_transformer 4 --num_lstm 12 --examples 300000 > tmp/log_10.txt 2>&1
48 changes: 48 additions & 0 deletions tmp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
def get_age_model(creative_id_emb, ad_id_emb, product_id_emb):
embed_dim = 128 # Embedding size for each token
num_heads = 1 # Number of attention heads
ff_dim = 256 # Hidden layer size in feed forward network inside transformer

# shape:(sequence长度, )
# first input
input_creative_id = Input(shape=(None,), name='creative_id')
x1 = TokenAndPositionEmbedding(
maxlen, NUM_creative_id, embed_dim, creative_id_emb)(input_creative_id)
for _ in range(args.num_transformer):
x1 = TransformerBlock(embed_dim, num_heads, ff_dim)(x1)
for _ in range(args.num_lstm):
x1 = Bidirectional(LSTM(256, return_sequences=True))(x1)
x1 = layers.GlobalMaxPooling1D()(x1)

# second input
input_ad_id = Input(shape=(None,), name='ad_id')
x2 = TokenAndPositionEmbedding(
maxlen, NUM_ad_id, embed_dim, ad_id_emb)(input_ad_id)
for _ in range(args.num_transformer):
x2 = TransformerBlock(embed_dim, num_heads, ff_dim)(x2)
for _ in range(args.num_lstm):
x2 = Bidirectional(LSTM(256, return_sequences=True))(x2)
x2 = layers.GlobalMaxPooling1D()(x2)

# third input
input_product_id = Input(shape=(None,), name='product_id')
x3 = TokenAndPositionEmbedding(
maxlen, NUM_product_id, embed_dim, product_id_emb)(input_product_id)
for _ in range(args.num_transformer):
x3 = TransformerBlock(embed_dim, num_heads, ff_dim)(x3)
for _ in range(args.num_lstm):
x3 = Bidirectional(LSTM(256, return_sequences=True))(x3)
x3 = layers.GlobalMaxPooling1D()(x3)

# concat x1 x2 x3
x = concatenate([x1, x2, x3])
# x = x1 + x2 + x3
x = Dense(20)(x)
output_y = Dense(10, activation='softmax')(x)

model = Model([input_creative_id, input_ad_id, input_product_id], output_y)
model.compile(loss='categorical_crossentropy',
optimizer='adam', metrics=['accuracy'])
model.summary()

return model

0 comments on commit 8180cba

Please sign in to comment.