SoFunction
Updated on 2024-11-21

Using keras framework cnn+ctc_loss to recognize indeterminate long character image operation

I'll cut to the chase, so let's get right to the code~

# -*- coding: utf-8 -*-
#keras==2.0.5
#tensorflow==1.1.0

import os,sys,string
import sys
import logging
import multiprocessing
import time
import json
import cv2
import numpy as np
from sklearn.model_selection import train_test_split

import keras
import  as K
from  import mnist
from  import *
from  import *
from  import *
from  import *
from keras import backend as K
# from .visualize_util import plot
from visual_callbacks import AccLossPlotter
plotter = AccLossPlotter(graphs=['acc', 'loss'], save_graph=True, save_graph_path=[0])

# Recognize character sets
char_ocr='0123456789' #
# Define the maximum length of the recognized string
seq_len=8
# of recognized result sets 0-9
label_count=len(char_ocr)+1

def get_label(filepath):
 # print(str((filepath)[-1]).split('.')[0].split('_')[-1])
 lab=[]
 for num in str((filepath)[-1]).split('.')[0].split('_')[-1]:
 (int(char_ocr.find(num)))
 if len(lab) < seq_len:
 cur_seq_len = len(lab)
 for i in range(seq_len - cur_seq_len):
  (label_count) #
 return lab

def gen_image_data(dir=r'data\train', file_list=[]):
 dir_path = dir
 for rt, dirs, files in (dir_path): # =pathDir
 for filename in files:
  # print (filename)
  if ('.') >= 0:
  (shotname, extension) = (filename)
  # print shotname,extension
  if extension == '.tif': # extension == '.png' or
   file_list.append(('%s\\%s' % (rt, filename)))
   # print (filename)

 print(len(file_list))
 index = 0
 X = []
 Y = []
 for file in file_list:

 index += 1
 # if index>1000:
 # break
 # print(file)
 img = (file, 0)
 # print((img))
 # ("the window")
 # ("the window",img)
 img = (img, (150, 50), interpolation=cv2.INTER_CUBIC)
 img = (img,(50,150))
 img =(img,1)
 # ("the window")
 # ("the window",img)
 # ()
 img = (255 - img) / 256 # Reverse color processing
 ([img])
 (get_label(file))
 # print(get_label(file))
 # print((X))
 # print((X))

 # print((X))
 X = (X, (0, 2, 3, 1))
 X = (X)
 Y = (Y)
 return X,Y

# the actual loss calc occurs here despite it not being
# an internal Keras loss function

def ctc_lambda_func(args):
 y_pred, labels, input_length, label_length = args
 # the 2 is critical here since the first couple outputs of the RNN
 # tend to be garbage:
 # y_pred = y_pred[:, 2:, :] Testing feels like it has no effect
 y_pred = y_pred[:, :, :]
 return K.ctc_batch_cost(labels, y_pred, input_length, label_length)

if __name__ == '__main__':
 height=150
 width=50
 input_tensor = Input((height, width, 1))
 x = input_tensor
 for i in range(3):
 x = Convolution2D(32*2**i, (3, 3), activation='relu', padding='same')(x)
 # x = Convolution2D(32*2**i, (3, 3), activation='relu')(x)
 x = MaxPooling2D(pool_size=(2, 2))(x)

 conv_shape = x.get_shape()
 # print(conv_shape)
 x = Reshape(target_shape=(int(conv_shape[1]), int(conv_shape[2] * conv_shape[3])))(x)

 x = Dense(32, activation='relu')(x)

 gru_1 = GRU(32, return_sequences=True, kernel_initializer='he_normal', name='gru1')(x)
 gru_1b = GRU(32, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru1_b')(x)
 gru1_merged = add([gru_1, gru_1b]) ###################

 gru_2 = GRU(32, return_sequences=True, kernel_initializer='he_normal', name='gru2')(gru1_merged)
 gru_2b = GRU(32, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru2_b')(
 gru1_merged)
 x = concatenate([gru_2, gru_2b]) ######################
 x = Dropout(0.25)(x)
 x = Dense(label_count, kernel_initializer='he_normal', activation='softmax')(x)
 base_model = Model(inputs=input_tensor, outputs=x)

 labels = Input(name='the_labels', shape=[seq_len], dtype='float32')
 input_length = Input(name='input_length', shape=[1], dtype='int64')
 label_length = Input(name='label_length', shape=[1], dtype='int64')
 loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([x, labels, input_length, label_length])

 model = Model(inputs=[input_tensor, labels, input_length, label_length], outputs=[loss_out])
 (loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer='adadelta')
 ()

 def test(base_model):
 file_list = []
 X, Y = gen_image_data(r'data\test', file_list)
 y_pred = base_model.predict(X)
 shape = y_pred[:, :, :].shape # 2:
 out = K.get_value(K.ctc_decode(y_pred[:, :, :], input_length=(shape[0]) * shape[1])[0][0])[:,
  :seq_len] # 2:
 print()
 error_count=0
 for i in range(len(X)):
  print(file_list[i])
  str_src = str((file_list[i])[-1]).split('.')[0].split('_')[-1]
  print(out[i])
  str_out = ''.join([str(x) for x in out[i] if x!=-1 ])
  print(str_src, str_out)
  if str_src!=str_out:
  error_count+=1
  print('################################',error_count)
  # img = (file_list[i])
  # ('image', img)
  # ()

 class LossHistory(Callback):
 def on_train_begin(self, logs={}):
   = []

 def on_epoch_end(self, epoch, logs=None):
  model.save_weights('model_1018.w')
  base_model.save_weights('base_model_1018.w')
  test(base_model)

 def on_batch_end(self, batch, logs={}):
  (('loss'))


 # checkpointer = ModelCheckpoint(filepath="keras_seq2seq_1018.hdf5", verbose=1, save_best_only=True, )
 history = LossHistory()

 # base_model.load_weights('base_model_1018.w')
 # model.load_weights('model_1018.w')

 X,Y=gen_image_data()
 maxin=4900
 subseq_size = 100
 batch_size=10
 result=([X[:maxin], Y[:maxin], ((len(X))*int(conv_shape[1]))[:maxin], ((len(X))*seq_len)[:maxin]], Y[:maxin],
   batch_size=20,
   epochs=1000,
   callbacks=[history, plotter, EarlyStopping(patience=10)], #checkpointer, history,
   validation_data=([X[maxin:], Y[maxin:], ((len(X))*int(conv_shape[1]))[maxin:], ((len(X))*seq_len)[maxin:]], Y[maxin:]),
   )

 test(base_model)

 K.clear_session()

Additional knowledge:Daily Pit Fillers for .ctc_batch_cost parameter issues

InvalidArgumentError sequence_length(0) <=30 error

The following code is given in the vast majority of articles on the web about the use of the k.ctc_batch_cost() function

def ctc_lambda_func(args):
 y_pred, labels, input_length, label_length = args
 # the 2 is critical here since the first couple outputs of the RNN
 # tend to be garbage: 
 y_pred = y_pred[:, 2:, :]
 return K.ctc_batch_cost(labels, y_pred, input_length, label_length)

You can notice that there is a sentence: y_pred = y_pred[:, 2:, :], which removes two columns from the second dimension of y_pred, which in human terms means that it subtracts 2 steps from the step that is fed into the lstm sequence. Later, I happened to read an article that said the reason for the 2-step reduction is because the feature is automatically missing 2 dimensions when it is fed into the keras lstm, so it is written like this. I think this is a bug in the old version, but the new version has fixed it. If you still write it as above, you will get the following error:

InvalidArgumentError sequence_length(0) <=30

The value after '<=' = the last output dimension of your cnn - 2. I have been looking for this error for a long time, and have not been able to understand where the 30 comes from, and then line by line checking of the code is found to be very suspicious here, and then changed to the following form of error resolution.

def ctc_lambda_func(args):
 y_pred, labels, input_length, label_length = args 
 return K.ctc_batch_cost(labels, y_pred, input_length, label_length)

Appearance during trainingctc_loss_calculator.cc:144] No valid path foundmaybeloss: infincorrect

If you're familiar with the CTC algorithm, you should know that ctc didn't find a valid path. Since it didn't find a valid path, there must be a problem somewhere between the label and the input! The error related to the input has been resolved, so it must be the label. Looking at the four parameters of ctc_batch_cost, the labels and label_length are suspicious. For the parameters of ctc_batch_cost(), labels need one-hot coding, shape: [batch, max_labelLength], where max_labelLength refers to the maximum length of the predicted characters; label_length is the length of the characters in each label, affected by the previous tf. ctc_loss set the maximum length here, so the error is reported.

For the parameter labels, max_labelLength is the maximum character length that can be predicted. This value is related to the second dimension of the featue that sends lstm, that is, the max_step of the feature sequence, on the surface as long as max_labelLength<max_step can be, but if the small is not much will still appear the above error. As for how much smaller, but also from the ctc algorithm to find, due to the ctc algorithm in the label of each character after a space, so should take this length into account, so there max_labelLength < max_step//2. did not carefully study the keras in the ctc_batch_cost () function of the realization of the details, the above are my guesses. If there is a clear answer, please let me know, thanks in advance!

Error Code:

batch_label_length = (batch_size) * max_labelLength

The right way to open it:

batch_x, batch_y = [], []
batch_input_length = (batch_size) * (max_img_weigth//8)
batch_label_length = []
for j in range(i, i + batch_size):
 x, y = self.get_img_data(index_all[j])
 batch_x.append(x)
 batch_y.append(y)
 batch_label_length.append(self.label_length[j])

Attached at the end is a modeled drawing of my crnn:

The above this use of keras framework cnn + ctc_loss to recognize indeterminate long characters picture operation is all I have shared with you, I hope to be able to give you a reference, and I hope that you will support me more.