-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathnormalize_integrations_C_protocol.py
195 lines (148 loc) · 7.18 KB
/
normalize_integrations_C_protocol.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.cm import get_cmap
import matplotlib as mpl
mpl.use('SVG')
mpl.rcParams['svg.fonttype'] = 'none' # Do not convert fonts to paths
input_file = '...'
input_folder = os.path.dirname(input_file) # Get the folder where the input CSV is located
output_folder = os.path.join(input_folder, 'Integrations Water Normalized Method') # Create output folder inside the same directory
if not os.path.exists(output_folder):
os.makedirs(output_folder)
df = pd.read_csv(input_file).iloc[:-1]
peaks = ['Mean 3680.00 cm⁻¹', 'Mean 3520.00 cm⁻¹', 'Mean 3360.00 cm⁻¹',
'Mean 3210.00 cm⁻¹', 'Mean 3100.00 cm⁻¹', 'Mean 2870.00 cm⁻¹']
columns_of_interest = ['Time (s)'] + peaks
df_filtered = df[columns_of_interest]
#%% normalization steps following C´s protocol
# 1
min_per_peak = df_filtered[peaks].min()
max_per_peak = df_filtered[peaks].max()
# 2
absolute_min = min_per_peak.min()
absolute_max = max_per_peak.max()
# 3
magnitude_per_peak = max_per_peak - min_per_peak
# 4
absolute_magnitude_per_peak = magnitude_per_peak.abs()
# 5
absolute_magnitude = absolute_magnitude_per_peak.sum()
# 6
fraction_per_peak = absolute_magnitude_per_peak / absolute_magnitude
# 7
adjusted_values = df_filtered[peaks] + abs(absolute_min)
# 8
scaled_values = adjusted_values.multiply(fraction_per_peak, axis=1)
# 9
percentage_values = scaled_values.div(scaled_values.sum(axis=1), axis=0) * 100
#%%
df_percentage = pd.concat([df_filtered['Time (s)'], percentage_values], axis=1)
cycle_duration = 100 # seconds per cycle, since 0.01 V/s
time_col = df_percentage['Time (s)']
start_index = 0
cycle_count = 1
all_cycles_data = []
cmap = get_cmap('magma')
colors = [cmap(i) for i in np.linspace(0.15, 1, len(peaks))] # not using the black tone of magma
while start_index < len(time_col):
# Find the indices where the time values fall within the current cycle
end_index = np.where(time_col >= time_col[start_index] + cycle_duration)[0]
if len(end_index) > 0:
end_index = end_index[0] # Take the first instance where the time exceeds the cycle duration
else:
end_index = len(time_col) # Last cycle may be incomplete, take until the end
# Slice the DataFrame for this cycle
cycle_df = df_percentage.iloc[start_index:end_index]
all_cycles_data.append(cycle_df) # Collect the data for combined plot
# Check if the cycle_df is empty
if cycle_df.empty:
print(f"Cycle {cycle_count} has no data, skipping.")
start_index = end_index
cycle_count += 1
continue
# Plot the percentage values for each cycle
time = cycle_df['Time (s)']
percentages = [cycle_df[peak] for peak in peaks]
fig, ax = plt.subplots(figsize=(10, 6)) # Control figure size
ax.stackplot(time, percentages, labels=peaks, colors=colors)
ax.legend(loc='upper right', fontsize='small', title='Peaks') # Adjust legend
ax.set_title(f'Percentage Values Over Time - Cycle {cycle_count}', fontsize=14)
ax.set_xlabel('Time (s)', fontsize=12)
ax.set_ylabel('Percentage (%)', fontsize=12)
ax.set_ylim(0, 100) # Set y-limit to 100% since we are plotting percentages
ax.set_xlim(time.min(), time.max())
#ax.grid(True, linestyle='--', alpha=0.7) # Add a grid for better readability
# Save the plot for the current cycle
plot_filename_png = os.path.join(output_folder, f'cycle_{cycle_count}_percentage_plot.png')
plot_filename_svg = os.path.join(output_folder, f'cycle_{cycle_count}_percentage_plot.svg')
fig.tight_layout()
fig.savefig(plot_filename_png)
fig.savefig(plot_filename_svg)
plt.close(fig)
# Move to the next cycle
start_index = end_index
cycle_count += 1
# Plot all cycles together using the same consistent colors
fig, ax = plt.subplots(figsize=(12, 8))
for cycle_data in all_cycles_data:
time = cycle_data['Time (s)']
percentages = [cycle_data[peak] for peak in peaks]
ax.stackplot(time, percentages, colors=colors)
ax.legend(peaks, loc='upper right', fontsize='small', title='Peaks')
ax.set_title('Percentage Values Over Time - All Cycles Combined', fontsize=14)
ax.set_xlabel('Time (s)', fontsize=12)
ax.set_ylabel('Percentage (%)', fontsize=12)
ax.set_ylim(0, 100)
# x-limit based on the combined data to avoid blank space
combined_time_min = min([cycle['Time (s)'].min() for cycle in all_cycles_data])
combined_time_max = max([cycle['Time (s)'].max() for cycle in all_cycles_data])
ax.set_xlim(combined_time_min, combined_time_max)
#ax.grid(True, linestyle='--', alpha=0.7)
# Save the combined plot
combined_plot_filename_png = os.path.join(output_folder, 'all_cycles_percentage_plot.png')
combined_plot_filename_svg = os.path.join(output_folder, 'all_cycles_percentage_plot.svg')
fig.tight_layout()
fig.savefig(combined_plot_filename_png) # Save as PNG
fig.savefig(combined_plot_filename_svg) # Save as SVG
plt.close(fig)
# Combine all cycle data into a single DataFrame for export to CSV
combined_cycles_df = pd.concat(all_cycles_data, ignore_index=True)
#%% Save the combined data into a CSV file
csv_filename = os.path.join(output_folder, 'combined_cycles_percentage_data.csv')
combined_cycles_df.to_csv(csv_filename, index=False)
combined_cycles_file = os.path.join(output_folder, 'combined_cycles_percentage_data.csv')
df_combined = pd.read_csv(combined_cycles_file)
# Function to calculate stats for custom time ranges
def calculate_stats_for_ranges(df, time_col, ranges):
filtered_df = df[df[time_col].apply(lambda x: any(lower <= x <= upper for lower, upper in ranges))]
mean_values = filtered_df.mean()
std_values = filtered_df.std()
median_values = filtered_df.median()
return mean_values, std_values, median_values
# Time ranges for custom calculations
ranges_101_199 = [(101, 199), (301, 399), (501, 599), (701, 799), (901, 999)]
ranges_201_299 = [(1, 99), (201, 299), (401, 499), (601, 699), (801, 899)]
# Calculate stats
mean_all = df_combined[peaks].mean()
std_all = df_combined[peaks].std()
median_all = df_combined[peaks].median()
mean_101_199, std_101_199, median_101_199 = calculate_stats_for_ranges(df_combined, 'Time (s)', ranges_101_199) # Calculate the stats for time ranges 101-199, 301-399, etc.
mean_201_299, std_201_299, median_201_299 = calculate_stats_for_ranges(df_combined, 'Time (s)', ranges_201_299) # Calculate the stats for time ranges 201-299, 401-499, etc.
stats_df = pd.DataFrame({
'Time (s)': ['All Data', 'All Data', 'All Data', 'High overpotentials', 'High overpotentials', 'High overpotentials',
'Low overpotentials', 'Low overpotentials', 'Low overpotentials'],
'Statistic': ['Mean', 'Std', 'Median', 'Mean', 'Std', 'Median', 'Mean', 'Std', 'Median']
})
# Add peak statistics
for peak in peaks:
stats_df[peak] = [
mean_all[peak], std_all[peak], median_all[peak],
mean_101_199[peak], std_101_199[peak], median_101_199[peak],
mean_201_299[peak], std_201_299[peak], median_201_299[peak]
]
# Concatenate the original data with the new statistics
df_with_stats = pd.concat([df_combined, stats_df], ignore_index=True)
# Save the updated combined CSV file with the statistics appended
df_with_stats.to_csv(combined_cycles_file, index=False)