机器学习题目 | 马克图布

无标签

机器学习

发布日期: 2022-10-01

文章字数: 847

阅读时长: 5 分

阅读次数:

机器学习

简单线性回归

题目1

代码：

import pandas
import numpy as np
import matplotlib.pyplot as plt
# load dataset
df = pandas.read_csv('temperature_dataset.csv')
data = np.array(df)

# config
lr = 0.0001
epoch = 1000
total_size = len(data)
train_size = 3000
test_size = total_size - train_size

# dataset
train_set = data[0:3000,1:5]
train_target = data[0:3000,0]

test_set = data[3000:total_size,1:5]
test_target = data[3000:total_size,0]

#train
# w,y,y_hat 为列向量,
w = np.zeros(4).reshape((-1,1))
b = 0
train_loss = []
for _ in range(epoch):
    y_hat = np.dot(train_set,w) + b 
    y = train_target.reshape((-1,1))
    b = b - 2*lr*((y_hat - y).sum().item(0))/train_size
    w = w - 2*lr*np.dot(train_set.T,(y_hat-y))/train_size
    train_loss.append((np.abs(y_hat - y)).sum().item(0)/train_size)
    
# loss曲线
plt.plot(train_loss)
    
#rmse
e = (np.dot(train_set,w)+b-train_target.reshape((-1,1)))# y_hat - y
train_rmse = np.sqrt(np.dot(e.T,e)/train_size)



'''
加上特征缩放
'''
w = np.zeros(4).reshape((-1,1))
b = 0
lr = 0.1 #必须要调整学习率
# min-max特征缩放
x_max = train_set.max()
x_min = train_set.min()
train_set = (train_set - x_min) /(x_max - x_min)

train_loss = []
for _ in range(epoch):
    y_hat = np.dot(train_set,w) + b
    y = train_target.reshape((-1,1))
#     print((y_hat - y).sum())
    b = b - 2*lr*((y_hat - y).sum().item(0))/train_size
    w = w - 2*lr*np.dot(train_set.T,(y_hat-y))/train_size
    train_loss.append((np.abs(y_hat - y)).sum().item(0)/train_size)
    
#rmse
e = (np.dot(train_set,w)+b-train_target.reshape((-1,1)))# y_hat - y
train_rmse = np.sqrt(np.dot(e.T,e)/train_size)

题目2

import numpy as np
import matplotlib.pyplot as plt

# parameters
dataset = 1 # index of training dataset

# datasets for training
if dataset == 1: # balanced dataset
    x_train = np.array([50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70]).reshape((1, -1))
    y_train = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]).reshape((1, -1))
elif dataset == 2: # unbalanced dataset 1
    x_train = np.array([0, 5, 10, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70]).reshape((1, -1))
    y_train = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]).reshape((1, -1))
elif dataset == 3: # unbalanced dataset 2
    x_train = np.array([0, 5, 10, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73]).reshape((1, -1))
    y_train = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]).reshape((1, -1))
m_train = x_train.size # number of training examples





epoch = 200000
lr = 0.002

x = x_train.T
y = y_train.T
# train
w = 0
b = 0
def sigmoid(x):
    return 1/(1+np.exp(-x))
train_loss = []
for _ in range(epoch):
    y_hat = sigmoid(w*x + b)
    b = b - 2*lr*(y_hat*(1-y_hat)*(y_hat-y)).sum()/m_train
    w = w - 2*lr*np.dot(x.T,y_hat*(1-y_hat)*(y_hat-y))/m_train
    train_loss.append(np.abs(y_hat - y).sum()/m_train)

逻辑回归

题目1

import pandas
import numpy as np
import matplotlib.pyplot as plt

# load dataset
df = pandas.read_csv('alcohol_dataset.csv')
data = np.array(df)

# shuffer
rng = np.random.default_rng(1)
data = rng.permutation(data)

# normal
# data = (data - np.amin(data))/(np.amax(data) - np.amin(data))

# data
m_train = 250
m_test = len(data) - m_train
train_data = data[0:m_train,0:5]
train_label = data[0:m_train,5]

test_data = data[m_train:,0:5]
test_label = data[m_train:,5]

#config
epoch = 200000

# train
w = np.random.randn(5).reshape((-1,1))
b = np.random.randn()


def sigmoid(x):
    return 1 / (1 + np.exp(-x))


train_recall = []
train_accuracy = []
lr = 0.001
x = train_data
y = train_label.reshape((-1, 1))
for _ in range(epoch):
    y_hat = sigmoid(np.dot(x, w) + b)
    b = b - lr * (y_hat - y).sum() / m_train
    w = w - lr * np.dot(x.T, y_hat - y) / m_train
    y_hat = np.where(y_hat >= 0.5, 1, 0)
    tp = np.logical_and(y_hat == 1, y == 1).sum()
    fp = np.logical_and(y_hat == 1, y == 0).sum()
    tn = np.logical_and(y_hat == 0, y == 0).sum()
    fn = np.logical_and(y_hat == 0, y == 1).sum()
    #     print(tp)
    #     print(fp)
    #     print(tn)
    #     print(fn)
    accuracy = (tp + tn) / (tp + tn + fp + fn) #准确率
    train_accuracy.append(accuracy)


#训练集错误个数
y_hat = sigmoid(np.dot(x,w) + b)
y_hat = np.where(y_hat >= 0.5,1,0)
print(m_train - (y_hat == y).sum())

#测试集错误个数
x = test_data
y = test_label.reshape((-1,1))
y_hat = sigmoid(np.dot(x,w) + b)
y_hat = np.where(y_hat >= 0.5,1,0)
print(m_test - (y_hat == y).sum())

plt.plot(train_accuracy)
plt.show()

马克图布

https://risingauroras.github.io/posts/53f5/

本博客所有文章除特別声明外，均采用 CC BY 4.0 许可协议。转载请注明来源马克图布 !

无标签

矩阵理论-1范数、2范数、无穷范数的通俗理解

2022-10-02 数学基础

线性代数

2022-09-29 数学基础