The first one, fit
import keras from import Sequential from import Dense import numpy as np from import LabelEncoder from import OneHotEncoder from sklearn.model_selection import train_test_split #Read the data x_train = ("D:\\machineTest\\testmulPE_win7\\data_sprase.npy")[()] y_train = ("D:\\machineTest\\testmulPE_win7\\lable_sprase.npy") # Get the total number of classification categories classes = len((y_train)) #one-hot coding for labels, required label_encoder = LabelEncoder() integer_encoded = label_encoder.fit_transform(y_train) onehot_encoder = OneHotEncoder(sparse=False) integer_encoded = integer_encoded.reshape(len(integer_encoded), 1) y_train = onehot_encoder.fit_transform(integer_encoded) #shuffle X_train, X_test, y_train, y_test = train_test_split(x_train, y_train, test_size=0.3, random_state=0) model = Sequential() (Dense(units=1000, activation='relu', input_dim=784)) (Dense(units=classes, activation='softmax')) (loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy']) (X_train, y_train, epochs=50, batch_size=128) score = (X_test, y_test, batch_size=128) # #fit parameter details # ( # self, # x=None, #training data # y=None, #training data label labels # batch_size=None, #update the weights every number of samples, defult 32 # epochs=1, # number of training rounds epochs # verbose=1, #0 for not outputting log messages in the standard output stream, 1 for outputting progress bar records, 2 for outputting one row per epoch # callbacks=None,#list, the elements in the list are objects, and the callback function in the list will be called during the training process # validation_split=0., # float 0-1, a proportion of the training set will be used as the validation set, and then the following validation set validation_data will not play a role # validation_data=None, # validation_set # shuffle=True, # boolean and string, if boolean, indicates whether to randomly batch the order of input samples before each epoch training, if "batch", for processing HDF5 data # class_weight=None, #dict, when the classification problem, some categories may need extra attention, the wrong time to give the penalty will be larger, so the weight will be adjusted higher, reflected in the loss function above # sample_weight=None, #array, and input sample pairs of equal length, for each feature of the input + a weight, if the data is temporal, then use (samples, sequence_length) of the matrix # initial_epoch=0, # if training was done before, you can start training from the specified epoch # steps_per_epoch=None, #How many steps will an epoch be divided into, that is, how big is a batch_size, for example, steps_per_epoch=10, that is, the training set will be divided into 10 parts, can not be used together with the batch_size # validation_steps=None, # useful when steps_per_epoch is enabled, batch_size of validation set # **kwargs # for interacting with the backend # ) # # Returns aHistoryboyfriend,The training process can be viewed by,lossValue, etc.
The second, fit_generator (saves memory)
# The second one, it saves memory # ''' Created on 2018-4-11 fit_generate.txt, the last two columns are lable, already one-hot coded 1 2 0 1 2 3 1 0 1 3 0 1 1 4 0 1 2 4 1 0 2 5 1 0 ''' import keras from import Sequential from import Dense import numpy as np from sklearn.model_selection import train_test_split count =1 def generate_arrays_from_file(path): global count while 1: datas = (path,delimiter=' ',dtype="int") x = datas[:,:2] y = datas[:,2:] print("count:"+str(count)) count = count+1 yield (x,y) x_valid = ([[1,2],[2,3]]) y_valid = ([[0,1],[1,0]]) model = Sequential() (Dense(units=1000, activation='relu', input_dim=2)) (Dense(units=2, activation='softmax')) (loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy']) model.fit_generator(generate_arrays_from_file("D:\\fit_generate.txt"),steps_per_epoch=10, epochs=2,max_queue_size=1,validation_data=(x_valid, y_valid),workers=1) # steps_per_epoch For each step, execute the generate_arrays_from_file function. # max_queue_size The data from the production function can be cached in a queue. # The output is as follows. # Epoch 1/2 # count:1 # count:2 # # 1/10 [==>...........................] - ETA: 2s - loss: 0.7145 - acc: 0.3333count:3 # count:4 # count:5 # count:6 # count:7 # # 7/10 [====================>.........] - ETA: 0s - loss: 0.7001 - acc: 0.4286count:8 # count:9 # count:10 # count:11 # # 10/10 [==============================] - 0s 36ms/step - loss: 0.6960 - acc: 0.4500 - val_loss: 0.6794 - val_acc: 0.5000 # Epoch 2/2 # # 1/10 [==>...........................] - ETA: 0s - loss: 0.6829 - acc: 0.5000count:12 # count:13 # count:14 # count:15 # # 5/10 [==============>...............] - ETA: 0s - loss: 0.6800 - acc: 0.5000count:16 # count:17 # count:18 # count:19 # count:20 # # 10/10 [==============================] - 0s 11ms/step - loss: 0.6766 - acc: 0.5000 - val_loss: 0.6662 - val_acc: 0.5000
Additional knowledge:
Auto-generated data can also be inherited, and then you can write your own generated data class:.
keras data autogenerator, inheritance, combined with fit_generator to achieve memory-saving training
#coding=utf-8 ''' Created on 2018-7-10 ''' import keras import math import os import cv2 import numpy as np from import Sequential from import Dense class DataGenerator(): def __init__(self, datas, batch_size=1, shuffle=True): self.batch_size = batch_size = datas = (len()) = shuffle def __len__(self): # Calculate the number of iterations per epoch return (len() / float(self.batch_size)) def __getitem__(self, index): # Generate each batch of data, here it's a matter of playing around with how you read the data # Generate batch_size indexes batch_indexs = [index*self.batch_size:(index+1)*self.batch_size] # Fetch data in datasets based on indexes batch_datas = [[k] for k in batch_indexs] # Generate data X, y = self.data_generation(batch_datas) return X, y def on_epoch_end(self): # At the end of each epoch is it necessary to perform a randomization to re-randomize the index? if == True: () def data_generation(self, batch_datas): images = [] labels = [] # Generate data for i, data in enumerate(batch_datas): #x_train data image = (data) image = list(image) (image) #y_train data right = ("\\",0) left = ("\\",0,right)+1 class_name = data[left:right] if class_name=="dog": ([0,1]) else: ([1,0]) # If it is a multi-output model, the format of Y has to be changed a bit, the outer list format wrapped numpy format is list[numpy_out1,numpy_out2,numpy_out3] return (images), (labels) # Read the sample name, then go read the data based on the sample name class_num = 0 train_datas = [] for file in ("D:/xxx"): file_path = ("D:/xxx", file) if (file_path): class_num = class_num + 1 for sub_file in (file_path): train_datas.append((file_path, sub_file)) # Data generator training_generator = DataGenerator(train_datas) #Build the network model = Sequential() (Dense(units=64, activation='relu', input_dim=784)) (Dense(units=2, activation='softmax')) (loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy']) (optimizer='sgd', loss='categorical_crossentropy', metrics=['accuracy']) model.fit_generator(training_generator, epochs=50,max_queue_size=10,workers=1)
Above this keras two training model way detailed fit and fit_generator (save memory) is all I share with you, I hope to give you a reference, and I hope you support me more.