Skip to content

Commit

Permalink
Merge pull request #46 from solegalli/recreate_load_boston_function
Browse files Browse the repository at this point in the history
[MRG] replace boston import from sklearn
  • Loading branch information
lopuhin authored Dec 8, 2024
2 parents 03ec379 + 45791c2 commit a3a420a
Show file tree
Hide file tree
Showing 3 changed files with 543 additions and 1 deletion.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
*.DS_Store
*.pyc
*.joblib
*egg-info
Expand Down
35 changes: 34 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
# -*- coding: utf-8 -*-
from os.path import dirname
from os.path import join
import csv
import pytest
import numpy as np
from sklearn.datasets import fetch_20newsgroups, load_boston, load_iris
from sklearn.datasets import fetch_20newsgroups, load_iris
from sklearn.utils import shuffle

NEWSGROUPS_CATEGORIES = [
Expand Down Expand Up @@ -48,6 +51,36 @@ def newsgroups_train_big():
def newsgroups_train_binary_big():
return _get_newsgroups(binary=True, remove_chrome=True, size=1000)

class Bunch(dict):
"""Container object for datasets: dictionary-like object that
exposes its keys as attributes."""

def __init__(self, **kwargs):
dict.__init__(self, kwargs)
self.__dict__ = self

def load_boston():
module_path = dirname(__file__)

data_file_name = join(module_path, 'data', 'boston_house_prices.csv')
with open(data_file_name) as f:
data_file = csv.reader(f)
temp = next(data_file)
n_samples = int(temp[0])
n_features = int(temp[1])
data = np.empty((n_samples, n_features))
target = np.empty((n_samples,))
temp = next(data_file) # names of features
feature_names = np.array(temp)

for i, d in enumerate(data_file):
data[i] = np.asarray(d[:-1], dtype=float)
target[i] = np.asarray(d[-1], dtype=float)

return Bunch(data=data,
target=target,
# last column is target value
feature_names=feature_names[:-1])

@pytest.fixture(scope="session")
def boston_train(size=SIZE):
Expand Down
Loading

0 comments on commit a3a420a

Please sign in to comment.