-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathblocking.py
96 lines (85 loc) · 3.09 KB
/
blocking.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# ---
# jupyter:
# jupytext_format_version: '1.2'
# kernelspec:
# display_name: Python [default]
# language: python
# name: python3
# language_info:
# codemirror_mode:
# name: ipython
# version: 3
# file_extension: .py
# mimetype: text/x-python
# name: python
# nbconvert_exporter: python
# pygments_lexer: ipython3
# version: 3.6.4
# ---
import numpy as np
from functools import reduce
# https://stackoverflow.com/questions/6800193/what-is-the-most-efficient-way-of-finding-all-the-factors-of-a-number-in-python
# THis is good.
def fctors(n):
return sorted(
list(
set(
reduce(
list.__add__,
([i, n // i] for i in range(1, int(n ** 0.5) + 1) if n % i == 0),
)
)
)
)
def get_nearest_max(n):
"""
Return the number with the largest number of factors between n-100 and n.
"""
max_factors = 0
if n % 2 == 0:
beg = n - 100
end = n
else:
beg = n - 101
end = n - 1
if beg < 0:
beg = 0
for i in range(beg, end + 2, 2):
num_factors = len(fctors(i))
if num_factors >= max_factors:
max_factors = num_factors
most_factors = i
return most_factors
def get_block_sem(data_array):
"""
Compute the standard error of the mean (SEM) for a data_array using the blocking method."
"""
# Get the integer factors for the number of data points. These
# are equivalent to the block sizes we will check.
block_sizes = fctors(len(data_array))
# An array to store means for each block ... make it bigger than we need.
block_means = np.zeros([block_sizes[-1]], np.float64)
# Store the SEM for each block size, except the last two size for which
# there will only be two or one blocks total and thus very noisy.
sems = np.zeros([len(block_sizes) - 2], np.float64)
sems_error = np.zeros([len(block_sizes) - 2], np.float64)
# Check each block size except the last two.
for size_idx in range(len(block_sizes) - 2):
# Check each block, the number of which is conveniently found as
# the other number of the factor pair in block_sizes
num_blocks = block_sizes[-size_idx - 1]
for blk_idx in range(num_blocks):
# Find the index for beg and end of data points for each block
data_beg_idx = blk_idx * block_sizes[size_idx]
data_end_idx = (blk_idx + 1) * block_sizes[size_idx]
# Compute the mean of this block and store in array
block_means[blk_idx] = np.mean(data_array[data_beg_idx:data_end_idx])
# Compute the standard deviation across all blocks, devide by num_blocks-1
# for SEM
sems[size_idx] = np.std(block_means[0:num_blocks], ddof=0) / np.sqrt(
num_blocks - 1
)
# Hmm or should ddof=1? I think 0, see Flyvbjerg -----^
sems_error[size_idx] = sems[size_idx] / np.sqrt(2 * (num_blocks - 1))
# Return the max SEM found ... this is a conservative approach.
return np.max(sems)