Skip to content

Commit

Permalink
新的分数
Browse files Browse the repository at this point in the history
  • Loading branch information
sunlanchang committed May 27, 2020
1 parent 0ec05a7 commit 90d9c2c
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 8 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
- [ ] +RNN等序列模型
- [ ] +LightGBM
- [ ] TF-IDF
- [ ] +LightGBM
- [ ] +LightGBM (**accuracy: 1.26**)
- [ ] +Dense
- [ ] DeepFM、DeepFFM等
- [ ] 集成学习:比赛最后阶段使用上分
Expand Down
Empty file added stacking.py
Empty file.
21 changes: 14 additions & 7 deletions tf_idf.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
# %%
user = pd.read_csv(
'data/train_preliminary/user.csv').sort_values(['user_id'], ascending=(True,))
# %%
Y_train_gender = user.gender
Y_train_age = user.age
corpus = []
Expand All @@ -31,16 +30,21 @@
Y_train_gender = Y_train_gender.iloc[:train_examples]-1
Y_train_age = Y_train_age.iloc[:train_examples]-1
# %%
min_df = 30
max_df = 0.001
vectorizer = TfidfVectorizer(
token_pattern=r"(?u)\b\w+\b",
min_df=100,
max_df=0.1,
min_df=min_df,
# max_df=max_df,
# max_features=128,
dtype=np.float32,
)
all_data = vectorizer.fit_transform(corpus)
print('(examples, features)', all_data.shape)
mail('train tfidf done!')
print('train tfidf done! min_df={}, max_df={} shape is {}'.format(
min_df, max_df, all_data.shape[1]))
mail('train tfidf done! min_df={}, max_df={} shape is {}'.format(
min_df, max_df, all_data.shape[1]))
# %%
train_val = all_data[:train_examples, :]
# %%
Expand Down Expand Up @@ -95,7 +99,7 @@ def LGBM_gender(epoch, early_stopping_rounds):
print('training done!')
print('Saving model...')
# save model to file
gbm.save_model('tmp/model_gender.txt')
gbm.save_model('tmp/model_gender_dfmin_30.txt')
print('save model done!')
return gbm
# %%
Expand Down Expand Up @@ -131,13 +135,16 @@ def LGBM_age(epoch, early_stopping_rounds):
)
print('Saving model...')
# save model to file
gbm.save_model('tmp/model_age.txt')
gbm.save_model('tmp/model_age_dfmin_30.txt')
print('save model done!')
return gbm


# %%
gbm_gender = LGBM_gender(epoch=1500, early_stopping_rounds=500)
# gbm_gender = lgb.Booster(model_file='tmp/model_gender.txt')
# gbm_age = lgb.Booster(model_file='tmp/model_age.txt')
# %%
gbm_gender = LGBM_gender(epoch=2000, early_stopping_rounds=500)
# %%
mail('train gender done!')
gbm_age = LGBM_age(epoch=2000, early_stopping_rounds=500)
Expand Down

0 comments on commit 90d9c2c

Please sign in to comment.