folks, we are trying to deploy an OCR model on T2.xlarge ec2 instance ( 4 cores, 16 gigs ). The problem is that inspite of following examples and going through multiple iterations, the inference still takes N*4 seconds instead of N (running 4 processes) seconds. I am certain its because i dont fully understand whats going on here (in terms of multiple sessions creation) so please do shed some light on what i am missing ..code below
ocr_model = None
decoderType = DecoderType.BestPath
def init_worker():
global ocr_model
ocr_graph = tf.Graph()
decoderType = DecoderType.BestPath
ocr_session = tf.Session(graph=ocr_graph)
print('***********The graph launched in this session', ocr_session.graph, ocr_session.sess_str )
ocr_model = OcrModel(open(FilePaths.fnCharList).read(), FilePaths.ocr_model_file, ocr_graph,
ocr_session, decoderType, mustRestore=True)
def startParallel( inp_file_list ):
import multiprocessing
process_pool = multiprocessing.Pool( processes=4, initializer=init_worker )
pool_output = process_pool.map( model_extract, inp_file_list )
return pool_output
def model_extract(img_path):
# .. whole bunch of image pre processing and then call the data loader
loader = DataLoader(text_blocks, 20, ocr_model.imgSize, detect_text_res.oriented_orig_gray.copy())
print("Total samples", len(loader.samples) )
## it works like a proper multi process till here
start_time = time.time()
while loader.hasNext():
iterInfo = loader.getIteratorInfo()
batch = loader.getNext()
print( 'Batch:', iterInfo[0], '/', iterInfo[1], time.time() - start_time )
(recognized, _) = ocr_model.inferBatch(batch=batch)
## this time is always number of process into 10 seconds while i would
## expect all process to finish within 10 seconds NOT 10 * num process
print( 'Time taken for recog ->', time.time() - start_time )
# print(recognized)
for id, text in zip(batch.ids, recognized):
texts_obj[id] = text
### code for ocr model infer batch
def inferBatch(self, batch, calcProbability=False, probabilityOfGT=False):
"feed a batch into the NN to recognize the texts"
# decode, optionally save RNN output
numBatchElements = len(batch.imgs)
evalRnnOutput = self.dump or calcProbability
evalList = [self.decoder] +self.cnnKernels + ([self.ctcIn3dTBC] if evalRnnOutput else [])
feedDict = {self.inputImgs: batch.imgs, self.seqLen: [OcrModel.maxTextLen] * numBatchElements,
self.is_train: False}
print('ABOUT TO begin ->', batch)
evalRes = self.sess.run(evalList, feedDict)
decoded = evalRes[0]
filters = evalRes[1]
print('DONEBRO')
# vis(filters)
texts = self.decoderOutputToText(decoded, numBatchElements)
# feed RNN output and recognized text into CTC loss to compute labeling probability
probs = None
if calcProbability:
sparse = self.toSparse(batch.gtTexts) if probabilityOfGT else self.toSparse(texts)
ctcInput = evalRes[2]
evalList = self.lossPerElement
feedDict = {self.savedCtcInput: ctcInput, self.gtTexts: sparse,
self.seqLen: [OcrModel.maxTextLen] * numBatchElements, self.is_train: False}
lossVals = self.sess.run(evalList, feedDict)
probs = np.exp(-lossVals)
# dump the output of the NN to CSV file(s)
if self.dump:
self.dumpNNOutput(evalRes[2])
return (texts, probs)
i am pretty sure this issue is due to my lack of understanding of how to use session run with multiple processes but i would assume that if i am creating 4 different session objects they should be able to run in their individual process and hence run parallely
question from:
https://stackoverflow.com/questions/65902047/parallelization-of-session-run-tensorflow-using-python-multiprocess-for-infe 与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…