SoFunction
Updated on 2024-12-17

keras Two ways to train a model in detail fit and fit_generator (save memory)

The first one, fit

import keras
from  import Sequential
from  import Dense
import numpy as np
from  import LabelEncoder
from  import OneHotEncoder
from sklearn.model_selection import train_test_split

#Read the data
x_train = ("D:\\machineTest\\testmulPE_win7\\data_sprase.npy")[()]
y_train = ("D:\\machineTest\\testmulPE_win7\\lable_sprase.npy")

# Get the total number of classification categories
classes = len((y_train))

#one-hot coding for labels, required
label_encoder = LabelEncoder()
integer_encoded = label_encoder.fit_transform(y_train)
onehot_encoder = OneHotEncoder(sparse=False)
integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)
y_train = onehot_encoder.fit_transform(integer_encoded)

#shuffle
X_train, X_test, y_train, y_test = train_test_split(x_train, y_train, test_size=0.3, random_state=0)

model = Sequential()
(Dense(units=1000, activation='relu', input_dim=784))
(Dense(units=classes, activation='softmax'))
(loss='categorical_crossentropy',
    optimizer='sgd',
    metrics=['accuracy'])
(X_train, y_train, epochs=50, batch_size=128)
score = (X_test, y_test, batch_size=128)
# #fit parameter details
# (
# self,
# x=None, #training data
# y=None, #training data label labels
# batch_size=None, #update the weights every number of samples, defult 32
# epochs=1, # number of training rounds epochs
# verbose=1, #0 for not outputting log messages in the standard output stream, 1 for outputting progress bar records, 2 for outputting one row per epoch
# callbacks=None,#list, the elements in the list are objects, and the callback function in the list will be called during the training process
# validation_split=0., # float 0-1, a proportion of the training set will be used as the validation set, and then the following validation set validation_data will not play a role
# validation_data=None, # validation_set
# shuffle=True, # boolean and string, if boolean, indicates whether to randomly batch the order of input samples before each epoch training, if "batch", for processing HDF5 data
# class_weight=None, #dict, when the classification problem, some categories may need extra attention, the wrong time to give the penalty will be larger, so the weight will be adjusted higher, reflected in the loss function above
# sample_weight=None, #array, and input sample pairs of equal length, for each feature of the input + a weight, if the data is temporal, then use (samples, sequence_length) of the matrix
# initial_epoch=0, # if training was done before, you can start training from the specified epoch
# steps_per_epoch=None, #How many steps will an epoch be divided into, that is, how big is a batch_size, for example, steps_per_epoch=10, that is, the training set will be divided into 10 parts, can not be used together with the batch_size
# validation_steps=None, # useful when steps_per_epoch is enabled, batch_size of validation set
# **kwargs # for interacting with the backend
# )
# 
# Returns aHistoryboyfriend,The training process can be viewed by,lossValue, etc.

The second, fit_generator (saves memory)

# The second one, it saves memory #
'''
Created on 2018-4-11
fit_generate.txt, the last two columns are lable, already one-hot coded
1 2 0 1
2 3 1 0
1 3 0 1
1 4 0 1
2 4 1 0
2 5 1 0

'''
import keras
from  import Sequential
from  import Dense
import numpy as np
from sklearn.model_selection import train_test_split

count =1 
def generate_arrays_from_file(path):
 global count
 while 1:
  datas = (path,delimiter=' ',dtype="int")
  x = datas[:,:2]
  y = datas[:,2:]
  print("count:"+str(count))
  count = count+1
  yield (x,y)
x_valid = ([[1,2],[2,3]])
y_valid = ([[0,1],[1,0]])
model = Sequential()
(Dense(units=1000, activation='relu', input_dim=2))
(Dense(units=2, activation='softmax'))
(loss='categorical_crossentropy',
    optimizer='sgd',
    metrics=['accuracy'])

model.fit_generator(generate_arrays_from_file("D:\\fit_generate.txt"),steps_per_epoch=10, epochs=2,max_queue_size=1,validation_data=(x_valid, y_valid),workers=1)
# steps_per_epoch For each step, execute the generate_arrays_from_file function.
# max_queue_size The data from the production function can be cached in a queue.
# The output is as follows.
# Epoch 1/2
# count:1
# count:2
# 
# 1/10 [==>...........................] - ETA: 2s - loss: 0.7145 - acc: 0.3333count:3
# count:4
# count:5
# count:6
# count:7
# 
# 7/10 [====================>.........] - ETA: 0s - loss: 0.7001 - acc: 0.4286count:8
# count:9
# count:10
# count:11
# 
# 10/10 [==============================] - 0s 36ms/step - loss: 0.6960 - acc: 0.4500 - val_loss: 0.6794 - val_acc: 0.5000
# Epoch 2/2
# 
# 1/10 [==>...........................] - ETA: 0s - loss: 0.6829 - acc: 0.5000count:12
# count:13
# count:14
# count:15
# 
# 5/10 [==============>...............] - ETA: 0s - loss: 0.6800 - acc: 0.5000count:16
# count:17
# count:18
# count:19
# count:20
# 
# 10/10 [==============================] - 0s 11ms/step - loss: 0.6766 - acc: 0.5000 - val_loss: 0.6662 - val_acc: 0.5000

Additional knowledge:

Auto-generated data can also be inherited, and then you can write your own generated data class:.

keras data autogenerator, inheritance, combined with fit_generator to achieve memory-saving training

#coding=utf-8
'''
Created on 2018-7-10
'''
import keras
import math
import os
import cv2
import numpy as np
from  import Sequential
from  import Dense

class DataGenerator():
 
 def __init__(self, datas, batch_size=1, shuffle=True):
  self.batch_size = batch_size
   = datas
   = (len())
   = shuffle

 def __len__(self):
  # Calculate the number of iterations per epoch
  return (len() / float(self.batch_size))

 def __getitem__(self, index):
  # Generate each batch of data, here it's a matter of playing around with how you read the data
  # Generate batch_size indexes
  batch_indexs = [index*self.batch_size:(index+1)*self.batch_size]
  # Fetch data in datasets based on indexes
  batch_datas = [[k] for k in batch_indexs]

  # Generate data
  X, y = self.data_generation(batch_datas)

  return X, y

 def on_epoch_end(self):
  # At the end of each epoch is it necessary to perform a randomization to re-randomize the index?
  if  == True:
   ()

 def data_generation(self, batch_datas):
  images = []
  labels = []

  # Generate data
  for i, data in enumerate(batch_datas):
   #x_train data
   image = (data)
   image = list(image)
   (image)
   #y_train data
   right = ("\\",0)
   left = ("\\",0,right)+1
   class_name = data[left:right]
   if class_name=="dog":
    ([0,1])
   else: 
    ([1,0])
  # If it is a multi-output model, the format of Y has to be changed a bit, the outer list format wrapped numpy format is list[numpy_out1,numpy_out2,numpy_out3]
  return (images), (labels)
 
# Read the sample name, then go read the data based on the sample name
class_num = 0
train_datas = [] 
for file in ("D:/xxx"):
 file_path = ("D:/xxx", file)
 if (file_path):
  class_num = class_num + 1
  for sub_file in (file_path):
   train_datas.append((file_path, sub_file))

# Data generator
training_generator = DataGenerator(train_datas)

#Build the network
model = Sequential()
(Dense(units=64, activation='relu', input_dim=784))
(Dense(units=2, activation='softmax'))
(loss='categorical_crossentropy',
    optimizer='sgd',
    metrics=['accuracy'])
(optimizer='sgd', loss='categorical_crossentropy', metrics=['accuracy'])

model.fit_generator(training_generator, epochs=50,max_queue_size=10,workers=1)

Above this keras two training model way detailed fit and fit_generator (save memory) is all I share with you, I hope to give you a reference, and I hope you support me more.