From 5a8f57c56ae3dc62a1a2da7a89ebef39078e2de2 Mon Sep 17 00:00:00 2001 From: sunlanchang Date: Sat, 20 Jun 2020 04:33:13 +0800 Subject: [PATCH] update lstm --- .vscode/settings.json | 2 +- LSTM_age_gender.py | 7 ++++--- process_data.ipynb | 44 +++++++++++++++++++++---------------------- 3 files changed, 27 insertions(+), 26 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 4da6104..14bd17e 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,3 +1,3 @@ { - "python.pythonPath": "/Users/sunlanchang/anaconda3/envs/dev/bin/python" + "python.pythonPath": "/root/anaconda3/envs/python377/bin/python" } \ No newline at end of file diff --git a/LSTM_age_gender.py b/LSTM_age_gender.py index f3cbc24..1e175ca 100644 --- a/LSTM_age_gender.py +++ b/LSTM_age_gender.py @@ -95,7 +95,8 @@ def get_embedding(feature_name, tokenizer): # 从序列文件提取array格式数据 def get_train(feature_name, vocab_size, len_feature): f = open(f'word2vec_new/{feature_name}.txt') - tokenizer = Tokenizer(num_words=vocab_size) + # tokenizer = Tokenizer(num_words=vocab_size) + tokenizer = Tokenizer(num_words=None) tokenizer.fit_on_texts(f) f.close() @@ -284,7 +285,7 @@ def get_tail_concat_model(DATA, predict_age=True, predict_gender=False): x = layers.Concatenate(axis=2)([x1, x2, x3, x4, x5, x6]) x = layers.GlobalMaxPooling1D()(x) - + if predict_age and predict_gender: output_gender = Dense(2, activation='softmax', name='gender')(x) output_age = Dense(10, activation='softmax', name='age')(x) @@ -611,7 +612,7 @@ def merge_age_gender_csv(): ans['predicted_gender'] = gender.predicted_gender ans['predicted_age'] = age.predicted_age ans.to_csv('data/ans/LSTM.csv', header=True, index=False, - columns=['user_id', 'predicted_age', 'predicted_gender']) + columns=['user_id', 'predicted_age', 'predicted_gender']) # %% # mail('save ans to csv done!') # %% diff --git a/process_data.ipynb b/process_data.ipynb index 8348bde..5f26c1c 100644 --- a/process_data.ipynb +++ b/process_data.ipynb @@ -59,7 +59,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -69,7 +69,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -78,7 +78,7 @@ "text/plain": "(63668283, 4)" }, "metadata": {}, - "execution_count": 21 + "execution_count": 6 } ], "source": [ @@ -94,7 +94,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -107,7 +107,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -120,7 +120,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -130,7 +130,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -139,7 +139,7 @@ "text/plain": "(5099294, 6)" }, "metadata": {}, - "execution_count": 25 + "execution_count": 10 } ], "source": [ @@ -148,7 +148,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -157,7 +157,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -166,7 +166,7 @@ "text/plain": "(3412772, 6)" }, "metadata": {}, - "execution_count": 27 + "execution_count": 12 } ], "source": [ @@ -175,7 +175,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -185,7 +185,7 @@ "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
timeuser_idcreative_idclick_timesad_idproduct_idproduct_categoryadvertiser_idindustry
0930920567330150442330673332638319
165309203072255126423001261267836
256309202361327120359181261267836
36309204325532129252327081332066242
4593092042746730123622080181468288
\n
" }, "metadata": {}, - "execution_count": 28 + "execution_count": 13 } ], "source": [ @@ -195,7 +195,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ @@ -204,7 +204,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -213,7 +213,7 @@ "text/plain": "(63668283, 9)" }, "metadata": {}, - "execution_count": 30 + "execution_count": 15 } ], "source": [ @@ -222,7 +222,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -232,7 +232,7 @@ "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
timeuser_idcreative_idclick_timesad_idproduct_idproduct_categoryadvertiser_idindustry
261965132018213961724607057293326
261965142012097781188507136297026
2619651520187746817734450529455106
261965173911683713114588780514668326
26196524401122032110995913342114110
\n
" }, "metadata": {}, - "execution_count": 34 + "execution_count": 16 } ], "source": [ @@ -241,7 +241,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -260,12 +260,12 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.5-final" + "version": "3.7.7-final" }, "orig_nbformat": 2, "kernelspec": { - "name": "python36564bit792083a9d155497086f5b8bc917c01d5", - "display_name": "Python 3.6.5 64-bit" + "name": "python3", + "display_name": "Python 3" } }, "nbformat": 4,