遍历CSV文件中的多列，找到峰值并写入excel工作表

长期潜伏者和python新手。事先感谢您的帮助。

我有包含2+列的CSV文件;第1列表示X个时间序列数据，第2+列包含荧光值，我想在其中找到几个峰值。我可以使用下面的代码(从这里修改)从单个列中找到峰值。但是，我不知道如何循环遍历这些列。CSV文件可能有2到几十列，所以我不想手动更改要读取和分析的列。

问题的总和-不能循环通过XYYYY+ CSV文件，每个XY被写入CSV或excel文件。

我已经尝试了很多次迭代，但最近一直在尝试使用"for column in data.T:">

# -*- coding: utf-8 -*-
"""
Created on Sat Mar 25 14:40:52 2023
@author: klinedd
"""
#import pyabf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import gridspec

#import scipy
#from scipy import signal
from scipy.signal import find_peaks
from scipy.optimize import curve_fit
import csv
my_file = r'C:/David Documents/Python/_CaImaging_python/Python_test_GABARx.csv'
# Identify number of columns, or time series rois, in the CSV file
f = open(my_file)
reader = csv.reader(f,delimiter=',')
ncol = len(next(reader)) # Read first line and count columns
print("The number of columns in this file is:", ncol)
# Recordings in CSV files. Load the first trace
data = np.loadtxt(my_file, delimiter=",")
time = data [:,0] 
f_signal = data [:,1]
f_signal_all = data [:,1:]
#-------------
# Trying this. https://stackoverflow.com/questions/10148818/numpy-how-to-iterate-over-columns-of-array
#-------------

# Determine interval and frequency
interval = np.diff(time).min()  #in sec
fs = 1/interval #in hertz
print('Images aquired every', interval, 'seconds or', fs, 'Hz')
# If sampling frequency is manually added, replace number
# fs = x  # Sampling frequency, change x if needed
# Plot the full trace
fig1 = plt.figure(figsize=(12, 12))
ax1 = fig1.add_subplot(211)
ax1.set_title("F data")
ax1.plot(time, f_signal_all, linewidth=0.5)
ax1.set_ylabel("Fluoro")
ax1.set_xlabel("Time (s)")
fig1.tight_layout()
# Set variables for finding peial window
pretrigger_window = (2000 * fs)/1000  # Pre-event time window in ms
posttrigger_window = (15000 * fs)/1000  # Post-event time window in ms
event_no = 0  # Event viewer: 0 is the first event
window_length = posttrigger_window - pretrigger_window # Detemine the minimal length of the analysis window
print('Analysis window length is', window_length, 'sec')
# Set parameters of the Find peaks function
thresh_min = 10
thresh_max = 1200
thresh_prominence = 16
thresh_min_width = 0.9 * (fs/1000)
peaks, peaks_dict = find_peaks(f_signal, 
height=(thresh_min, thresh_max),  # Min and max thresholds to detect peaks.
threshold=None,  # Min and max vertical distance to neighboring samples.
distance=None,  # Min horizontal distance between peaks.
prominence=thresh_prominence,  # Vertical distance between the peak and lowest contour line.
width=thresh_min_width,  # Min required width (in bins). E.g. For 10Khz, 10 bins = 1 ms.
wlen=None,  # Window length to calculate prominence.
rel_height=0.5,  # Relative height at which the peak width is measured.
plateau_size=None)
for column in data.T:    


# Create table with results
table = pd.DataFrame(columns = ['event', 'peak_position', 
'peak_position_s',
'event_start', 'event_end',
'Peak_Amp', 'Width_ms', 
'inst_freq', 'isi_s', 
'Area_pA/ms', 'log_decay', 
'tau_exp'])

table.event = np.arange(1, len(peaks) + 1)
table.peak_position = peaks
table.peak_position_s = peaks / fs  # Divided by fs to get s
table.event_start = peaks_dict['left_ips'] - pretrigger_window
table.event_end = peaks_dict['right_ips'] + posttrigger_window
table.Peak_Amp = peaks_dict['peak_heights']  # height parameter is needed
table.Width_ms = peaks_dict['widths']/(fs/1000) # Width (ms) at half-height 

# Optional parameters (remember to add the column names to the table)
# table.rise_half_amp_ms = (peaks - peaks_dict['left_ips'])/(sampling_rate/1000) 
# table.decay_half_amp_ms = (peaks_dict['right_ips'] - peaks)/(sampling_rate/1000)

# Calculations based on the parameters above
table.inst_freq = np.append((1 / (np.array(table.peak_position[1:]) -
np.array(peaks_dict['left_ips'][:-1])) * fs), 'nan')  
table.isi_s = np.diff(peaks, axis=0, prepend=peaks[0]) / fs

# Area
for i, event in table.iterrows():
individual_event = f_signal[int(event.event_start) : int(event.event_end)]
table.loc[i, 'Area_pA/ms'] = np.round(individual_event.sum(), 1)/(fs/1000) 

# Decay tau from logistic regression
for i, event in table.iterrows():
decay_tau = abs(f_signal[int(event.peak_position) : int(event.event_end)])
log_decay_tau = np.log(decay_tau)
decay_width = int(len(decay_tau))
decay_width_array = list(range(0, decay_width))
slope, _ = np.polyfit(decay_width_array, log_decay_tau, 1)
tau = -1 / slope
table.loc[i, 'log_decay'] = tau/(fs/1000) 

# Decay tau from monoexponential fitting
for i, event in table.iterrows():
decay_tau = f_signal[int(event.peak_position) : int(event.event_end)]
decay_width = int(len(decay_tau))
decay_width_array = list(range(0, decay_width))
a_initial = 200
b_initial = 0.1
# popt: optimal values for the parameters, pcov: estimated covariance of popt
popt, pcov = curve_fit(lambda t, a, b: a * np.exp(b * t), 
decay_width_array, decay_tau, 
p0=(a_initial, b_initial), 
maxfev=1000)  # maxfev: number of iterations
a = popt[0]  
b = popt[1]      
table.loc[i, 'tau_exp'] = abs((1/b)/(fs/1000))


# Plot the event detection in the trace
fig2 = plt.figure(figsize=(12,4))
gridspec = fig2.add_gridspec(ncols=2, nrows=1, width_ratios=[2, 1])
ax1 = fig2.add_subplot(gridspec[0])
ax1.set_title("Events detection")   
ax1.plot(f_signal)
ax1.plot(peaks, f_signal[peaks], "r.")
for i, txt in enumerate(table.event):
ax1.annotate(table.event[i], (peaks[i], f_signal[peaks][i]))
ax1.set_xlabel("Sec")
ax1.set_ylabel("Fluoro")
# ax1.axes.set_xlim(4000, 10000)  #changes the x-axis to view

#Plot a single event
ax2 = fig2.add_subplot(gridspec[1])  # gridspec specifies the ratio between plots
ax2.set_title("Event viewer")
ax2.plot(f_signal, "gray")
ax2.plot(peaks, f_signal[peaks], "r.")
ax2.set_xlabel("Sec bin")
ax2.set_ylabel("Fluoro")
# Event time window
ax2.set_xlim(table.event_start[event_no], table.event_end[event_no]) 
# Labelling the event
line, = ax2.plot(peaks, f_signal[peaks], "r.") 
line.set_label(table.event[event_no]) 
ax2.legend()


# Show graph and table
plt.show()
table

table.to_csv('C:/David Documents/Python/_CaImaging_python/CaStuffdata2.csv')

不是真正的答案，可能是一个更完整解决方案的起点。

CSV文件

cat column_test.csv                                                                                                                                                                                                     
x,y1,y2,y3
1,6,7,8
2,3,2,6
3,8,9,3

简单的Python代码，遍历数据并将第一列与随后的每个列配对。

import csv
with open('column_test.csv', 'r', newline='') as csv_file:
cReader = csv.reader(csv_file, delimiter=',')
next(cReader)
data_list = []
for row in cReader:
row_list = []
for col in row[1:]: # Slice out the first column 
row_list.append([row[0], col])
data_list.append(row_list)
data_list                                                                                                                                                                                                               

[[['1', '6'], ['1', '7'], ['1', '8']],
[['2', '3'], ['2', '2'], ['2', '6']],
[['3', '8'], ['3', '9'], ['3', '3']]]

相关内容

最新更新

热门标签：