1.把三个csv文件中的feature值整合到一个文件中,同时添加相应的label。
# -*-coding:utf-8 -*-import csv;label1 = '1'label2 = '2'label3 = '3'a = "feature1,feature2,feature3,feature4,feature5,feature6,feature7,feature8,feature9,feature10,label" + "\n"with open("./dataset/dataTime2.csv", 'a') as rfile: rfile.writelines(a)with open("./dataset/f02.csv", 'rb') as file:a = file.readline().strip()while a:a = a + ',' + label1 + "\n"#a = label1 + ',' + a + "\n"with open("./dataset/dataTime2.csv", 'a') as rfile:rfile.writelines(a)a = file.readline().strip()with open("./dataset/g03.csv", 'rb') as file:a = file.readline().strip()while a:a = a + ',' + label2 + "\n"#a = label2 + ',' + a + "\n"with open("./dataset/dataTime2.csv", 'a') as rfile:rfile.writelines(a)a = file.readline().strip()with open("./dataset/normal05.csv", 'rb') as file:a = file.readline().strip()while a:a = a + ',' + label3 + "\n"#a = label3 + ',' + a + "\n"with open("./dataset/dataTime2.csv", 'a') as rfile:rfile.writelines(a)a = file.readline().strip()
2.获取csv文件中某一列,下面可以获得label为表头的列中对应的所有数值。
filename = "./dataset/dataTime2.csv"list1 = []with open(filename, 'r') as file:reader = csv.DictReader(file)column = [row['label'] for row in reader]
3.获取csv文件中某些列,下面可以获得除label表头的对应列之外所有数值。
import pandas as pdodata = pd.read_csv(filename)y = odata['label']x = odata.drop(['label'], axis=1) #除去label列之外的所有feature值
4.也可以处理成list[np.array]形式的数据。
filename = "./dataset/dataTime2.csv"list1 = []with open(filename, 'r') as file:a = file.readline()while a:c = np.array(a.strip("\n").split(","))list1.append(c)
5.也可以处理成tensor格式数据集
# -*-coding:utf-8 -*-import tensorflow as tf# 读取的时候需要跳过第一行filename = tf.train.string_input_producer(["./dataset/dataTime.csv"])reader = tf.TextLineReader(skip_header_lines=1)key, value = reader.read(filename)record_defaults = [[1.], [1.], [1.], [1.], [1.], [1.], [1.], [1.], [1.], [1.], tf.constant([], dtype=tf.int32)]col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11= tf.decode_csv(value, record_defaults=record_defaults)features = tf.stack([col1, col2, col3, col4, col5, col6, col7, col8, col9, col10])with tf.Session() as sess:# Start populating the filename queue.coord = tf.train.Coordinator()threads = tf.train.start_queue_runners(coord=coord)trainx = []trainy = []for i in range(81000):# Retrieve a single instance:example, label = sess.run([features, col11])trainx.append(example)trainy.append(label)coord.request_stop()coord.join(threads)#最后长度是81000,trainx是10个特征
参考资料:http://t.csdn.cn/HFTPy