import numpy as np
import operatordef classify0(test_data: np.ndarray, training_data: np.ndarray, labels: list, k: int) -> str:"""kNN 算法分类器:param test_data: 用于分类的数据(测试集):param training_data: 用于训练的数据(训练集):param labels: 分类标签:param k: kNN 算法参数, 选择距离最小的 k 个点:return: 分类结果"""distances = np.sqrt(np.sum((np.tile(test_data, (training_data.shape[0], 1)) - training_data) ** 2, axis=1))sorted_indices = distances.argsort()class_count = {}for i in range(k):label = labels[sorted_indices[i]]class_count[label] = class_count.get(label, 0) + 1sorted_class_count = sorted(class_count.items(), key=operator.itemgetter(1), reverse=True)return sorted_class_count[0][0]def file2matrix(filename: str) -> (np.ndarray, list):"""打开并解析文件,对数据进行分类:1 代表不喜欢, 2 代表魅力一般, 3 代表极具魅力:param filename: 文件名:return: 特征矩阵和分类 Label 向量"""try:with open(filename, 'r') as file:lines = file.readlines()num_lines = len(lines)feature_matrix = np.zeros((num_lines, 3))label_vector = []for i, line in enumerate(lines):line = line.strip()data = line.split('\t')feature_matrix[i, :] = data[0:3]if data[-1] == 'didntLike':label_vector.append(1)elif data[-1] == 'smallDoses':label_vector.append(2)elif data[-1] == 'largeDoses':label_vector.append(3)return feature_matrix, label_vectorexcept FileNotFoundError:print(f"错误:未找到文件 {filename}")return np.array([]), []def autoNorm(data_set: np.ndarray) -> (np.ndarray, np.ndarray, np.ndarray):"""对数据进行归一化:param data_set: 特征矩阵:return: 归一化后的特征矩阵、数据范围、数据最小值"""min_vals = data_set.min(0)max_vals = data_set.max(0)ranges = max_vals - min_valsnorm_data_set = (data_set - min_vals) / rangesreturn norm_data_set, ranges, min_valsdef classifyPerson():"""通过输入一个人的三维特征,进行分类输出"""result_list = ['讨厌', '有些喜欢', '非常喜欢']try:percent_tats = float(input("玩视频游戏所耗时间百分比:"))ff_miles = float(input("每年获得的飞行常客里程数:"))ice_cream = float(input("每周消费的冰激淋公升数:"))filename = "datingTestSet.txt"dating_data_matrix, dating_labels = file2matrix(filename)if dating_data_matrix.size == 0:returnnorm_matrix, ranges, min_vals = autoNorm(dating_data_matrix)test_array = np.array([percent_tats, ff_miles, ice_cream])norm_test_array = (test_array - min_vals) / rangesclassifier_result = classify0(norm_test_array, norm_matrix, dating_labels, 3)print(f"你可能{result_list[classifier_result - 1]}这个人")except ValueError:print("错误:输入必须为有效的数字")if __name__ == '__main__':classifyPerson()
你可能有些喜欢这个人
import numpy as np
data = np.mat([[1,200,105,3,False],[2,165,80,2,False],[3,184.5,120,2,False],[4,116,70,8,False],[5,270,150,4,True]])row = 0
for line in data:row += 1
print(row)print(data.size)
5
25
import numpy as np
data = np.mat([[1,200,105,3,False],[2,165,80,2,False],[3,184.5,120,2,False],[4,116,70,8,False],[5,270,150,4,True]])
print(print(data[0,3]))
print(print(data[0,4]))
3.0
None
0.0
None
import numpy as np
data = np.mat([[1,200,105,3,False],[2,165,80,2,False],[3,184.5,120,2,False],[4,116,70,8,False],[5,270,150,4,True]])coll = []for row in data:coll.append(row[0,1])print(np.sum(coll))
print(np.mean(coll))
print(np.std(coll))
print(np.var(coll))
935.5
187.1
50.17808286493218
2517.84
import numpy as np
import pylab
import scipy.stats as statsdata = np.mat([[1,200,105,3,False],[2,165,80,2,False],[3,184.5,120,2,False],[4,116,70,8,False],[5,270,150,4,True]])coll = []
for row in data:coll.append(row[0,1])stats.probplot(coll,plot=pylab)
pylab.show()
import pandas as pd
import matplotlib.pyplot as plotrocksVmines = pd.DataFrame([[1,200,105,3,False],[2,165,80,2,False],[3,184.5,120,2,False],[4,116,70,8,False],[5,270,150,4,True]])
datarow1 = rocksVmines.iloc[1,0:3]
datarow2 = rocksVmines.iloc[2,0:3]plot.scatter(datarow1,datarow2)
plot.xlabel("Attribute1")
plot.ylabel("Attribute2")plot.show()datarow3 = rocksVmines.iloc[3,0:3]
plot.scatter(datarow2,datarow3)
plot.xlabel("Attribute2")
plot.ylabel("Attribute3")
plot.show()
plot.scatter(datarow2,datarow3)
plot.xlabel(“Attribute2”)
plot.ylabel(“Attribute3”)
plot.show()
[外链图片转存中...(img-BJOeAcI2-1743117156398)][外链图片转存中...(img-pvuHG2dQ-1743117156398)]