Skip to content

Commit

Permalink
Merge pull request #4 from labdac/last_fm_dataset
Browse files Browse the repository at this point in the history
last_fm dataset analisis and sessions generation
  • Loading branch information
Roj authored Mar 8, 2019
2 parents 1dfa8b7 + 0376ea7 commit 3dbc47d
Show file tree
Hide file tree
Showing 5 changed files with 676 additions and 0 deletions.
611 changes: 611 additions & 0 deletions Lasf FM exploracion inicial + preprocesamiento.ipynb

Large diffs are not rendered by default.

10 changes: 10 additions & 0 deletions setup.sh
Original file line number Diff line number Diff line change
@@ -1,8 +1,18 @@
#!/usr/bin/env bash

cd data

wget http://static.echonest.com/millionsongsubset_full.tar.gz
tar -xvzf millionsongsubset_full.tar.gz

wget https://crossnox.sytes.net/owncloud/index.php/s/edt1ULkBOeQacul/download -O ThirtyMusic.tar.gz
tar -xvzf ThirtyMusic.tar.gz

#wget http://labrosa.ee.columbia.edu/millionsong/sites/default/files/lastfm/lastfm_train.zip -O lastfm_train.zip
#unzip -q lastfm_train.zip

#wget http://labrosa.ee.columbia.edu/millionsong/sites/default/files/lastfm/lastfm_test.zip
#unzip -q lastfm_test.zip

wget http://mtg.upf.edu/static/datasets/last.fm/lastfm-dataset-1K.tar.gz -O lastfm-dataset-1K.tar.gz
tar -xvzf lastfm-dataset-1K.tar.gz
12 changes: 12 additions & 0 deletions src/utils/files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import pickle


def save_to_pickle(a, file_name: str):
with open(file_name, 'wb') as handle:
pickle.dump(a, handle, protocol=pickle.HIGHEST_PROTOCOL)


def read_from_pickle(file_name: str):
with open(file_name, 'rb') as handle:
return pickle.load(handle)

28 changes: 28 additions & 0 deletions src/utils/jupyter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Hack to make jupyter notebook wider
from IPython.display import display, Markdown, HTML


def showToggleCodeButton(default_hide=False):
code_show = 'true'
if not default_hide:
code_show = 'false'
display(HTML("<style>.container { width:98% !important; }</style>"))
display(HTML("<style>.output.output_scroll { height: auto; }</style>"))
display(HTML('''<script>
code_show=%s;
function code_toggle() {
if (code_show) {
$('div.input').hide();
$('div.cell.code_cell:not(:has( .output_wrapper .output_area))').hide();
} else {
$('div.input').show();
$('div.cell.code_cell').show();
}
code_show = !code_show
}
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" class="btn btn-primary" value="Click here to toggle on/off the code."></form>''' % code_show))

def display_mk(mk: str):
display(Markdown(mk))
15 changes: 15 additions & 0 deletions src/utils/pandas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import pandas as pd
import copy

class DataFrameWithName(pd.DataFrame):
# normal properties
_metadata = ['metadata_df_name']

@property
def _constructor(self):
return DataFrameWithName

def put_df_name(df, name):
df = DataFrameWithName(df)
df.metadata_df_name = name
return df

0 comments on commit 3dbc47d

Please sign in to comment.