I'll cut to the chase, so let's get right to the code~
# -*- coding: utf-8 -*- #keras==2.0.5 #tensorflow==1.1.0 import os,sys,string import sys import logging import multiprocessing import time import json import cv2 import numpy as np from sklearn.model_selection import train_test_split import keras import as K from import mnist from import * from import * from import * from import * from keras import backend as K # from .visualize_util import plot from visual_callbacks import AccLossPlotter plotter = AccLossPlotter(graphs=['acc', 'loss'], save_graph=True, save_graph_path=[0]) # Recognize character sets char_ocr='0123456789' # # Define the maximum length of the recognized string seq_len=8 # of recognized result sets 0-9 label_count=len(char_ocr)+1 def get_label(filepath): # print(str((filepath)[-1]).split('.')[0].split('_')[-1]) lab=[] for num in str((filepath)[-1]).split('.')[0].split('_')[-1]: (int(char_ocr.find(num))) if len(lab) < seq_len: cur_seq_len = len(lab) for i in range(seq_len - cur_seq_len): (label_count) # return lab def gen_image_data(dir=r'data\train', file_list=[]): dir_path = dir for rt, dirs, files in (dir_path): # =pathDir for filename in files: # print (filename) if ('.') >= 0: (shotname, extension) = (filename) # print shotname,extension if extension == '.tif': # extension == '.png' or file_list.append(('%s\\%s' % (rt, filename))) # print (filename) print(len(file_list)) index = 0 X = [] Y = [] for file in file_list: index += 1 # if index>1000: # break # print(file) img = (file, 0) # print((img)) # ("the window") # ("the window",img) img = (img, (150, 50), interpolation=cv2.INTER_CUBIC) img = (img,(50,150)) img =(img,1) # ("the window") # ("the window",img) # () img = (255 - img) / 256 # Reverse color processing ([img]) (get_label(file)) # print(get_label(file)) # print((X)) # print((X)) # print((X)) X = (X, (0, 2, 3, 1)) X = (X) Y = (Y) return X,Y # the actual loss calc occurs here despite it not being # an internal Keras loss function def ctc_lambda_func(args): y_pred, labels, input_length, label_length = args # the 2 is critical here since the first couple outputs of the RNN # tend to be garbage: # y_pred = y_pred[:, 2:, :] Testing feels like it has no effect y_pred = y_pred[:, :, :] return K.ctc_batch_cost(labels, y_pred, input_length, label_length) if __name__ == '__main__': height=150 width=50 input_tensor = Input((height, width, 1)) x = input_tensor for i in range(3): x = Convolution2D(32*2**i, (3, 3), activation='relu', padding='same')(x) # x = Convolution2D(32*2**i, (3, 3), activation='relu')(x) x = MaxPooling2D(pool_size=(2, 2))(x) conv_shape = x.get_shape() # print(conv_shape) x = Reshape(target_shape=(int(conv_shape[1]), int(conv_shape[2] * conv_shape[3])))(x) x = Dense(32, activation='relu')(x) gru_1 = GRU(32, return_sequences=True, kernel_initializer='he_normal', name='gru1')(x) gru_1b = GRU(32, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru1_b')(x) gru1_merged = add([gru_1, gru_1b]) ################### gru_2 = GRU(32, return_sequences=True, kernel_initializer='he_normal', name='gru2')(gru1_merged) gru_2b = GRU(32, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru2_b')( gru1_merged) x = concatenate([gru_2, gru_2b]) ###################### x = Dropout(0.25)(x) x = Dense(label_count, kernel_initializer='he_normal', activation='softmax')(x) base_model = Model(inputs=input_tensor, outputs=x) labels = Input(name='the_labels', shape=[seq_len], dtype='float32') input_length = Input(name='input_length', shape=[1], dtype='int64') label_length = Input(name='label_length', shape=[1], dtype='int64') loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([x, labels, input_length, label_length]) model = Model(inputs=[input_tensor, labels, input_length, label_length], outputs=[loss_out]) (loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer='adadelta') () def test(base_model): file_list = [] X, Y = gen_image_data(r'data\test', file_list) y_pred = base_model.predict(X) shape = y_pred[:, :, :].shape # 2: out = K.get_value(K.ctc_decode(y_pred[:, :, :], input_length=(shape[0]) * shape[1])[0][0])[:, :seq_len] # 2: print() error_count=0 for i in range(len(X)): print(file_list[i]) str_src = str((file_list[i])[-1]).split('.')[0].split('_')[-1] print(out[i]) str_out = ''.join([str(x) for x in out[i] if x!=-1 ]) print(str_src, str_out) if str_src!=str_out: error_count+=1 print('################################',error_count) # img = (file_list[i]) # ('image', img) # () class LossHistory(Callback): def on_train_begin(self, logs={}): = [] def on_epoch_end(self, epoch, logs=None): model.save_weights('model_1018.w') base_model.save_weights('base_model_1018.w') test(base_model) def on_batch_end(self, batch, logs={}): (('loss')) # checkpointer = ModelCheckpoint(filepath="keras_seq2seq_1018.hdf5", verbose=1, save_best_only=True, ) history = LossHistory() # base_model.load_weights('base_model_1018.w') # model.load_weights('model_1018.w') X,Y=gen_image_data() maxin=4900 subseq_size = 100 batch_size=10 result=([X[:maxin], Y[:maxin], ((len(X))*int(conv_shape[1]))[:maxin], ((len(X))*seq_len)[:maxin]], Y[:maxin], batch_size=20, epochs=1000, callbacks=[history, plotter, EarlyStopping(patience=10)], #checkpointer, history, validation_data=([X[maxin:], Y[maxin:], ((len(X))*int(conv_shape[1]))[maxin:], ((len(X))*seq_len)[maxin:]], Y[maxin:]), ) test(base_model) K.clear_session()
Additional knowledge:Daily Pit Fillers for .ctc_batch_cost parameter issues
InvalidArgumentError sequence_length(0) <=30 error
The following code is given in the vast majority of articles on the web about the use of the k.ctc_batch_cost() function
def ctc_lambda_func(args): y_pred, labels, input_length, label_length = args # the 2 is critical here since the first couple outputs of the RNN # tend to be garbage: y_pred = y_pred[:, 2:, :] return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
You can notice that there is a sentence: y_pred = y_pred[:, 2:, :], which removes two columns from the second dimension of y_pred, which in human terms means that it subtracts 2 steps from the step that is fed into the lstm sequence. Later, I happened to read an article that said the reason for the 2-step reduction is because the feature is automatically missing 2 dimensions when it is fed into the keras lstm, so it is written like this. I think this is a bug in the old version, but the new version has fixed it. If you still write it as above, you will get the following error:
InvalidArgumentError sequence_length(0) <=30
The value after '<=' = the last output dimension of your cnn - 2. I have been looking for this error for a long time, and have not been able to understand where the 30 comes from, and then line by line checking of the code is found to be very suspicious here, and then changed to the following form of error resolution.
def ctc_lambda_func(args): y_pred, labels, input_length, label_length = args return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
Appearance during trainingctc_loss_calculator.cc:144] No valid path foundmaybeloss: infincorrect
If you're familiar with the CTC algorithm, you should know that ctc didn't find a valid path. Since it didn't find a valid path, there must be a problem somewhere between the label and the input! The error related to the input has been resolved, so it must be the label. Looking at the four parameters of ctc_batch_cost, the labels and label_length are suspicious. For the parameters of ctc_batch_cost(), labels need one-hot coding, shape: [batch, max_labelLength], where max_labelLength refers to the maximum length of the predicted characters; label_length is the length of the characters in each label, affected by the previous tf. ctc_loss set the maximum length here, so the error is reported.
For the parameter labels, max_labelLength is the maximum character length that can be predicted. This value is related to the second dimension of the featue that sends lstm, that is, the max_step of the feature sequence, on the surface as long as max_labelLength<max_step can be, but if the small is not much will still appear the above error. As for how much smaller, but also from the ctc algorithm to find, due to the ctc algorithm in the label of each character after a space, so should take this length into account, so there max_labelLength < max_step//2. did not carefully study the keras in the ctc_batch_cost () function of the realization of the details, the above are my guesses. If there is a clear answer, please let me know, thanks in advance!
Error Code:
batch_label_length = (batch_size) * max_labelLength
The right way to open it:
batch_x, batch_y = [], [] batch_input_length = (batch_size) * (max_img_weigth//8) batch_label_length = [] for j in range(i, i + batch_size): x, y = self.get_img_data(index_all[j]) batch_x.append(x) batch_y.append(y) batch_label_length.append(self.label_length[j])
Attached at the end is a modeled drawing of my crnn:
The above this use of keras framework cnn + ctc_loss to recognize indeterminate long characters picture operation is all I have shared with you, I hope to be able to give you a reference, and I hope that you will support me more.