Skip to content

Commit

Permalink
update lstm
Browse files Browse the repository at this point in the history
  • Loading branch information
sunlanchang committed Jun 19, 2020
1 parent 1833bef commit 5a8f57c
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 26 deletions.
2 changes: 1 addition & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
{
"python.pythonPath": "/Users/sunlanchang/anaconda3/envs/dev/bin/python"
"python.pythonPath": "/root/anaconda3/envs/python377/bin/python"
}
7 changes: 4 additions & 3 deletions LSTM_age_gender.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,8 @@ def get_embedding(feature_name, tokenizer):
# 从序列文件提取array格式数据
def get_train(feature_name, vocab_size, len_feature):
f = open(f'word2vec_new/{feature_name}.txt')
tokenizer = Tokenizer(num_words=vocab_size)
# tokenizer = Tokenizer(num_words=vocab_size)
tokenizer = Tokenizer(num_words=None)
tokenizer.fit_on_texts(f)
f.close()

Expand Down Expand Up @@ -284,7 +285,7 @@ def get_tail_concat_model(DATA, predict_age=True, predict_gender=False):

x = layers.Concatenate(axis=2)([x1, x2, x3, x4, x5, x6])
x = layers.GlobalMaxPooling1D()(x)

if predict_age and predict_gender:
output_gender = Dense(2, activation='softmax', name='gender')(x)
output_age = Dense(10, activation='softmax', name='age')(x)
Expand Down Expand Up @@ -611,7 +612,7 @@ def merge_age_gender_csv():
ans['predicted_gender'] = gender.predicted_gender
ans['predicted_age'] = age.predicted_age
ans.to_csv('data/ans/LSTM.csv', header=True, index=False,
columns=['user_id', 'predicted_age', 'predicted_gender'])
columns=['user_id', 'predicted_age', 'predicted_gender'])
# %%
# mail('save ans to csv done!')
# %%
44 changes: 22 additions & 22 deletions process_data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -69,7 +69,7 @@
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": 6,
"metadata": {},
"outputs": [
{
Expand All @@ -78,7 +78,7 @@
"text/plain": "(63668283, 4)"
},
"metadata": {},
"execution_count": 21
"execution_count": 6
}
],
"source": [
Expand All @@ -94,7 +94,7 @@
},
{
"cell_type": "code",
"execution_count": 22,
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -107,7 +107,7 @@
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -120,7 +120,7 @@
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -130,7 +130,7 @@
},
{
"cell_type": "code",
"execution_count": 25,
"execution_count": 10,
"metadata": {},
"outputs": [
{
Expand All @@ -139,7 +139,7 @@
"text/plain": "(5099294, 6)"
},
"metadata": {},
"execution_count": 25
"execution_count": 10
}
],
"source": [
Expand All @@ -148,7 +148,7 @@
},
{
"cell_type": "code",
"execution_count": 26,
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -157,7 +157,7 @@
},
{
"cell_type": "code",
"execution_count": 27,
"execution_count": 12,
"metadata": {},
"outputs": [
{
Expand All @@ -166,7 +166,7 @@
"text/plain": "(3412772, 6)"
},
"metadata": {},
"execution_count": 27
"execution_count": 12
}
],
"source": [
Expand All @@ -175,7 +175,7 @@
},
{
"cell_type": "code",
"execution_count": 28,
"execution_count": 13,
"metadata": {},
"outputs": [
{
Expand All @@ -185,7 +185,7 @@
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>time</th>\n <th>user_id</th>\n <th>creative_id</th>\n <th>click_times</th>\n <th>ad_id</th>\n <th>product_id</th>\n <th>product_category</th>\n <th>advertiser_id</th>\n <th>industry</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>9</td>\n <td>30920</td>\n <td>567330</td>\n <td>1</td>\n <td>504423</td>\n <td>30673</td>\n <td>3</td>\n <td>32638</td>\n <td>319</td>\n </tr>\n <tr>\n <th>1</th>\n <td>65</td>\n <td>30920</td>\n <td>3072255</td>\n <td>1</td>\n <td>2642300</td>\n <td>1261</td>\n <td>2</td>\n <td>6783</td>\n <td>6</td>\n </tr>\n <tr>\n <th>2</th>\n <td>56</td>\n <td>30920</td>\n <td>2361327</td>\n <td>1</td>\n <td>2035918</td>\n <td>1261</td>\n <td>2</td>\n <td>6783</td>\n <td>6</td>\n </tr>\n <tr>\n <th>3</th>\n <td>6</td>\n <td>309204</td>\n <td>325532</td>\n <td>1</td>\n <td>292523</td>\n <td>27081</td>\n <td>3</td>\n <td>32066</td>\n <td>242</td>\n </tr>\n <tr>\n <th>4</th>\n <td>59</td>\n <td>309204</td>\n <td>2746730</td>\n <td>1</td>\n <td>2362208</td>\n <td>0</td>\n <td>18</td>\n <td>14682</td>\n <td>88</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {},
"execution_count": 28
"execution_count": 13
}
],
"source": [
Expand All @@ -195,7 +195,7 @@
},
{
"cell_type": "code",
"execution_count": 29,
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -204,7 +204,7 @@
},
{
"cell_type": "code",
"execution_count": 30,
"execution_count": 15,
"metadata": {},
"outputs": [
{
Expand All @@ -213,7 +213,7 @@
"text/plain": "(63668283, 9)"
},
"metadata": {},
"execution_count": 30
"execution_count": 15
}
],
"source": [
Expand All @@ -222,7 +222,7 @@
},
{
"cell_type": "code",
"execution_count": 34,
"execution_count": 16,
"metadata": {},
"outputs": [
{
Expand All @@ -232,7 +232,7 @@
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>time</th>\n <th>user_id</th>\n <th>creative_id</th>\n <th>click_times</th>\n <th>ad_id</th>\n <th>product_id</th>\n <th>product_category</th>\n <th>advertiser_id</th>\n <th>industry</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>26196513</th>\n <td>20</td>\n <td>1</td>\n <td>821396</td>\n <td>1</td>\n <td>724607</td>\n <td>0</td>\n <td>5</td>\n <td>7293</td>\n <td>326</td>\n </tr>\n <tr>\n <th>26196514</th>\n <td>20</td>\n <td>1</td>\n <td>209778</td>\n <td>1</td>\n <td>188507</td>\n <td>136</td>\n <td>2</td>\n <td>9702</td>\n <td>6</td>\n </tr>\n <tr>\n <th>26196515</th>\n <td>20</td>\n <td>1</td>\n <td>877468</td>\n <td>1</td>\n <td>773445</td>\n <td>0</td>\n <td>5</td>\n <td>29455</td>\n <td>106</td>\n </tr>\n <tr>\n <th>26196517</th>\n <td>39</td>\n <td>1</td>\n <td>1683713</td>\n <td>1</td>\n <td>1458878</td>\n <td>0</td>\n <td>5</td>\n <td>14668</td>\n <td>326</td>\n </tr>\n <tr>\n <th>26196524</th>\n <td>40</td>\n <td>1</td>\n <td>122032</td>\n <td>1</td>\n <td>109959</td>\n <td>1334</td>\n <td>2</td>\n <td>11411</td>\n <td>0</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"metadata": {},
"execution_count": 34
"execution_count": 16
}
],
"source": [
Expand All @@ -241,7 +241,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -260,12 +260,12 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5-final"
"version": "3.7.7-final"
},
"orig_nbformat": 2,
"kernelspec": {
"name": "python36564bit792083a9d155497086f5b8bc917c01d5",
"display_name": "Python 3.6.5 64-bit"
"name": "python3",
"display_name": "Python 3"
}
},
"nbformat": 4,
Expand Down

0 comments on commit 5a8f57c

Please sign in to comment.