别再只调包了!手把手教你用Python从CWRU轴承数据集中提取SKF620/6303的故障特征
从振动信号到特征矩阵Python实战CWRU轴承故障诊断全流程轴承故障诊断是工业设备预测性维护的核心场景。当拿到CWRU数据集时许多初学者会陷入调包侠的困境——直接使用封装好的特征数据却对原始振动信号的处理过程一无所知。本文将带你用Python从零处理.mat文件针对SKF6205和SKF6303轴承完整实现从时域到频域的特征提取全流程。1. 理解CWRU数据集的结构与物理意义CWRU轴承数据集的.mat文件就像一座未经开采的矿山每个振动信号都蕴含着设备状态的密码。以典型的105.mat文件为例加载后我们会看到四个关键通道import scipy.io as sio data sio.loadmat(105.mat) print(data.keys()) # 输出[__header__, __version__, __globals__, X105_DE_time, X105_FE_time, X105_BA_time, X105_RPM]三个加速度计通道的物理含义需要特别关注DEDrive End电机驱动端轴承振动信号故障特征最明显FEFan End风扇端轴承振动信号BABase基座振动信号常作为环境噪声参考采样率差异直接影响频域分析DE数据12kHz或48kHz采样FE数据统一12kHz采样轴承型号故障直径(mils)对应品牌SKF62057, 14, 21SKFSKF63037, 14, 21SKF等效型号28, 40NTN2. 数据预处理与可视化探索原始振动信号往往包含噪声和干扰合理的预处理能提升特征质量。我们先定义一个数据加载函数import numpy as np import pandas as pd from scipy import signal def load_bearing_data(filepath, channelDE, length1024): mat_data sio.loadmat(filepath) time_series mat_data[fX{filepath.stem}_{channel}_time].flatten() return pd.Series(time_series[:length], indexnp.arange(length)/12000, # 假设12kHz采样 namef{filepath.stem}_{channel})时域波形可视化能直观展现故障特征import matplotlib.pyplot as plt de_data load_bearing_data(105.mat) plt.figure(figsize(12,4)) de_data.plot(titleSKF6205 DE端原始振动信号7mils故障) plt.xlabel(时间(s)); plt.ylabel(加速度(g)) plt.grid(True)典型预处理步骤包括去趋势消除信号基线漂移带通滤波保留轴承特征频率范围通常500Hz-5kHz重采样统一不同采样率的数据# 巴特沃斯带通滤波示例 sos signal.butter(4, [500, 5000], bandpass, fs12000, outputsos) filtered signal.sosfilt(sos, de_data.values)3. 时域特征工程实战时域特征是计算效率最高的故障指标。对于SKF6205轴承我们计算6个核心指标特征类型计算公式物理意义RMS值$\sqrt{\frac{1}{N}\sum_{i1}^N x_i^2}$振动能量水平峰值$\max(x峰峰值$\max(x) - \min(x)$振动幅度范围峭度$\frac{\frac{1}{N}\sum(x_i-\mu)^4}{\sigma^4}$冲击成分度量波形指标RMS/均值绝对值波形复杂度脉冲指标峰值/均值绝对值冲击特性Python实现代码def time_domain_features(signal): features {} features[RMS] np.sqrt(np.mean(signal**2)) features[Peak] np.max(np.abs(signal)) features[Kurtosis] stats.kurtosis(signal) features[CrestFactor] features[Peak]/features[RMS] return pd.DataFrame(features, index[0])不同负载下的特征对比揭示重要规律hp0_features time_domain_features(load_bearing_data(97.mat)) # 0HP hp3_features time_domain_features(load_bearing_data(105.mat)) # 3HP pd.concat([hp0_features, hp3_features], keys[0HP, 3HP]).round(4)负载(HP)RMS峰值峭度00.1420.563.2130.3181.895.474. 频域特征提取与故障频率计算频域分析能识别轴承各部件的缺陷特征频率。对于SKF6205轴承我们需要先计算其几何参数def bearing_frequencies(rpm, bore_diameter, pitch_diameter, n_balls, contact_angle0): rps rpm / 60 ball_diameter (pitch_diameter - bore_diameter)/2 FTF rps * (1 - (ball_diameter/pitch_diameter)*np.cos(contact_angle))/2 BPFO n_balls * rps * (1 - (ball_diameter/pitch_diameter)*np.cos(contact_angle))/2 BPFI n_balls * rps * (1 (ball_diameter/pitch_diameter)*np.cos(contact_angle))/2 BSF pitch_diameter * rps * (1 - (ball_diameter/pitch_diameter)**2 * np.cos(contact_angle)**2)/(2*ball_diameter) return {FTF:FTF, BPFO:BPFO, BPFI:BPFI, BSF:BSF} skf6205_params {bore_diameter:25.4, pitch_diameter:39.04, n_balls:9} freqs bearing_frequencies(1772, **skf6205_params) # 105.mat对应转速FFT频谱分析代码实现def frequency_analysis(signal, fs, bearing_freqs): n len(signal) fft_vals np.fft.rfft(signal * np.hanning(n)) freqs np.fft.rfftfreq(n, 1/fs) plt.figure(figsize(12,4)) plt.plot(freqs, np.abs(fft_vals)) for name, freq in bearing_freqs.items(): plt.axvline(freq, colorr, linestyle--, alpha0.5) plt.text(freq, np.max(np.abs(fft_vals))*0.9, name) plt.xlim(0, 2000) # 聚焦轴承特征频率范围关键频域特征包括故障频率幅值BPFO、BPFI等频率处的频谱幅值边带分析故障频率周围的调制现象谐波成分故障频率的整数倍频成分频带能量划分多个频段计算能量占比def envelope_analysis(signal, fs, bpfo): analytic_signal signal 1j * signal envelope np.abs(analytic_signal) envelope_fft np.fft.rfft(envelope) return np.max(envelope_fft[np.where(np.fft.rfftfreq(len(signal), 1/fs) bpfo)])5. 特征工程完整流程与模型对接构建最终特征矩阵的完整Pipelinefrom sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler class BearingFeatureExtractor: def __init__(self, fs, bearing_params): self.fs fs self.bearing_freqs bearing_frequencies(fs, **bearing_params) def transform(self, X): features time_domain_features(X) fft np.abs(np.fft.rfft(X)) freqs np.fft.rfftfreq(len(X), 1/self.fs) for name, freq in self.bearing_freqs.items(): idx np.argmin(np.abs(freqs - freq)) features[f{name}_amp] fft[idx] features[Envelope_BPFO] envelope_analysis(X, self.fs, self.bearing_freqs[BPFO]) return features pipeline Pipeline([ (features, BearingFeatureExtractor(fs12000, bearing_paramsskf6205_params)), (scaler, StandardScaler()) ])实际项目中我们会批量处理多个文件构建特征数据集def process_directory(file_pattern): all_features [] for filepath in glob.glob(file_pattern): data load_bearing_data(filepath) features pipeline.named_steps[features].transform(data) features[file] filepath.stem all_features.append(features) return pd.concat(all_features).set_index(file) feature_matrix process_directory(*.mat)处理不同型号轴承时只需调整bearing_params参数。对于SKF6303轴承skf6303_params {bore_diameter:17.0, pitch_diameter:35.5, n_balls:8} extractor BearingFeatureExtractor(fs12000, bearing_paramsskf6303_params)在工业现场部署时建议将特征提取逻辑封装为实时处理模块class RealTimeMonitor: def __init__(self, pipeline, threshold3.0): self.pipeline pipeline self.threshold threshold self.buffer [] def update(self, new_samples): self.buffer.extend(new_samples) if len(self.buffer) 1024: # 处理固定长度窗口 features self.pipeline.transform(np.array(self.buffer[:1024])) self.buffer self.buffer[512:] # 50%重叠 return self._check_anomaly(features) return None def _check_anomaly(self, features): return features[Kurtosis] self.threshold