RuntimeWarning:在double_scalars diff += X_values * (y_values


import math, copy 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
df = pd.read_csv("D:/Code/Vscode/test1/honeyproduction (3).csv")
prod_per_year = df.groupby('year').totalprod.mean().reset_index()
X = prod_per_year["year"].values.reshape(-1,)
y = prod_per_year["totalprod"].values.reshape(-1,)
prod_per_year.fillna(method='ffill', inplace=True)
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X.reshape(-1,1))
y_scaled = scaler.fit_transform(y.reshape(-1,1))

def get_gradient_at_b(X, y, b, m):
N = len(X)
diff = 0
for i in range(N):
X_values = X[i]
y_values = y[i]
diff += y_values - ((X_values * m) + b)
b_gradient = -(2/N) * diff
return b_gradient    
def get_gradient_at_m(X, y, b, m):
N = len(X)
diff = 0
for i in range(N):
X_values = X[i]
y_values = y[i]
diff += X_values * (y_values - ((X_values * m) + b))
m_gradient = -(2/N) * diff
return m_gradient
def step_gradient(X, y, learning_rate, b_current, m_current):
b_gradient = get_gradient_at_b(X, y, b_current, m_current)
m_gradient = get_gradient_at_m(X, y, b_current, m_current)
b = b_current - (b_gradient * learning_rate)
m = m_current - (m_gradient * learning_rate)
return [b, m]
def gradient_descent(X, y, learning_rate, num_iterations):
b = 0
m = 0
for i in range(num_iterations):
b, m = step_gradient(X, y, learning_rate, b, m)
return b, m
b, m = gradient_descent(X, y, 0.004, 1000)
y_pred_scaled = [(x * m + b) for x in X_scaled]
y_pred_scaled = np.array(y_pred_scaled) 
y_pred = scaler.inverse_transform(y_pred_scaled.reshape(-1,1))
X = X.tolist()
y = y.tolist()
print(X)
print(np.shape(X))
print(y)
print(y_pred)
print(np.shape(y_pred))
plt.scatter(X, y)
plt.plot(X, y_pred, color='red')
plt.show()

我尝试缩放X,y,尝试修复任何缺失的值,并且在打印出y_pred后仍然是NaN,

我还得到了运行时错误在double_scalars中遇到的无效值m = m_current - (m_gradient * learning_rate)
所以rn我试图降低学习率或增加迭代次数,看看是否能提高模型的性能,

任何人可以检查我的代码,请,谢谢大家!

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
def get_gradients(X, y, b, m):
N = len(X)
b_gradient = -(2/N) * np.sum(y - (m*X + b))
m_gradient = -(2/N) * np.sum(X * (y - (m*X + b)))
return b_gradient, m_gradient
def gradient_descent(X, y, learning_rate, num_iterations):
b = 0
m = 0
for i in range(num_iterations):
b_gradient, m_gradient = get_gradients(X, y, b, m)
b -= learning_rate * b_gradient
m -= learning_rate * m_gradient
return b, m
df = pd.read_csv("D:/Code/Vscode/test1/honeyproduction (3).csv")
prod_per_year = df.groupby('year').totalprod.mean().reset_index()
prod_per_year.fillna(method='ffill', inplace=True)
X = prod_per_year["year"].values
y = prod_per_year["totalprod"].values
# Normalize the input data
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X.reshape(-1, 1))
y_scaled = scaler.fit_transform(y.reshape(-1,1))
b, m = gradient_descent(X_scaled, y_scaled, learning_rate=0.1, num_iterations=1000)
# Inverse transform the predicted data to obtain the original scale
y_pred_scaled = m * X_scaled + b
y_pred = scaler.inverse_transform(y_pred_scaled)
plt.scatter(X, y)
plt.plot(X, y_pred,)
plt.show()

这是修改后的代码