暑假实习,作语音识别的项目,由于要用到TCP进行数据的传输,因而向同事学习了如何实现服务端和客户端数据传输。记下来以便之后能够用。python
话很少说,上菜:数组
(注:只是用来实现语音数据的一个简单传输,勿喷!)socket
client:函数
import socket import argparse HOST = '192.168.1.100' PORT = 88 def parse_args(): '''使用argparse的第一步是建立一个 ArgumentParser对象, 建立一个 ArgumentParser对象,这个ArgumentParser对象中会保存 全部将命令行参数转为python数据类型的必需信息 ''' #使用 argparse.ArgumentParser建立ArgumentParser对象 parser = argparse.ArgumentParser() #每个add_argument方法对应一个你要关注的参数或选项 parser.add_argument('--file', type=str, required=True, help='path to wav file') return parser.parse_args() def main(args): #建立socker实例 s = socket.socket() ##客户端创建链接,host是回环地址,通常用于测试 s.connect((HOST, PORT)) print('connected with server') f = open(args.file, 'rb') print('sending file') data = f.read(1024) while data: s.send(data) #读取1024字节 data = f.read(1024) ''' 使用shutdown来关闭socket的功能 SHUT_RDWR:关闭读写,即不能使用send/write/recv/read等 SHUT_RD:关闭读,即不能使用read/recv等 SHUT_WR:关闭写功能,即不能使用send/write等 ''' s.shutdown(socket.SHUT_WR) f.close() print('done sending data...waiting for response') print('识别结果:' + s.recv(1024).decode('utf-8')) if __name__ == '__main__': args = parse_args() main(args)
sever:学习
import socket import numpy as np import tensorflow as tf #读取.wav音频文件.它会返回一个元组,第一项为音频的采样率,第二项为音频数据的numpy数组. import scipy.io.wavfile as wav #该模块的主要功能有:语音识别、将指定文本合成语音以及语音信号输出等 import speechpy import utils from model_helper import las_model_fn VOCAB_DIR = 'output/tfdata/vocab.table' MODEL_DIR = 'output/model' SAVE_PATH = 'bin/' BEAM_WIDTH = 3 PORT = 8888 def input_fn(dataset_filename, vocab_filename, num_channels=39, batch_size=8, num_epochs=1): dataset = utils.read_dataset(dataset_filename, num_channels) vocab_table = utils.create_vocab_table(vocab_filename) dataset = utils.process_dataset( dataset, vocab_table, utils.SOS, utils.EOS, batch_size, num_epochs, is_infer=True) return dataset def extract_mfcc(filename): if not filename.endswith('.wav'): #用于判断字符串是否以指定后缀结尾 return None, None fs, signal = wav.read(filename) print('frequency={0}'.format(fs)) #assert fs == 48000 # downsample signal = signal[::3] fs = 16000 #从音频信号计算MFCC功能,返回包含mfcc特征的numpy数组 mfcc = speechpy.feature.mfcc(signal, fs) #此功能旨在执行全球倒谱平均值和输入特征向量“vec”上的方差归一化(CMVN) #返回: 均值(或均值+方差)归一化特征向量 mfcc_cmvn = speechpy.processing.cmvn(mfcc, True) #该函数提取时间导数特征,返回排列 mfcc_39 = speechpy.feature.extract_derivative_feature(mfcc_cmvn) return filename[:-4], mfcc_39.reshape(-1, 39) def predict(args, model): predictions = model.predict( input_fn=lambda: input_fn( args.data, args.vocab, num_channels=args.num_channels, batch_size=args.batch_size, num_epochs=1), predict_keys='sample_ids') if args.beam_width > 0: predictions = [vocab_list[y['sample_ids'][:, 0]].tolist() + [utils.EOS] for y in predictions] else: predictions = [vocab_list[y['sample_ids']].tolist() + [utils.EOS] for y in predictions] predictions = [y[:y.index(utils.EOS)] for y in predictions] predictions = predictions[0] # with open(args.save, 'w+') as f: # f.write('\n'.join(predictions)) # predictions = ''.join(predictions) result = '' for c in predictions: print('c=',c) result += c return result class Args: def __init__(self): self.vocab = VOCAB_DIR self.model_dir = MODEL_DIR self.beam_width = BEAM_WIDTH self.save = SAVE_PATH self.batch_size = 8 self.num_channels = 39 if __name__ == '__main__': # tf.logging.set_verbosity(tf.logging.INFO) print("initializing LAS model...") args = Args() vocab_list = np.array(utils.load_vocab(args.vocab)) vocab_size = len(vocab_list) config = tf.estimator.RunConfig(model_dir=args.model_dir) hparams = utils.create_hparams( args, vocab_size, utils.SOS_ID, utils.EOS_ID) hparams.decoder.set_hparam('beam_width', args.beam_width) model = tf.estimator.Estimator( model_fn=las_model_fn, config=config, params=hparams) # socket connection skt = socket.socket() #绑定端口 skt.bind(('', PORT)) ''' listen等待客户端链接--开始TCP监听。backlog指定在拒绝链接以前,操做系统能够挂起的最大连 接数量。该值至少为1,大部分应用程序设为5就能够了。 ''' skt.listen() print('server listening on port ', PORT) while True: #调用socket对象的accept方法.该方法等待客户端的链接,并返回connection对象,表示已链接到客户端 conn, addr = skt.accept() with conn: print('client {} connected...'.format(addr)) with open('bin/received.wav', 'wb') as f: while True: print('receiving data...') data = conn.recv(1024) if not data: print('no data...') f.close() break # write data to a file print('writing data...') f.write(data) # finished file transfering, now process file print('processing data...') try: name, feature = extract_mfcc('bin/received.wav') except: conn.send('ERROR:语音文件格式不匹配'.encode('utf-8')) continue feature_lists = tf.train.FeatureLists(feature_list={ 'labels': tf.train.FeatureList(feature=[ tf.train.Feature(bytes_list=tf.train.BytesList(value=[''.encode()])) ]), 'inputs': tf.train.FeatureList(feature=[ tf.train.Feature(float_list=tf.train.FloatList(value=f)) for f in feature ]) }) with tf.python_io.TFRecordWriter('{0}.tfrecord'.format(name)) as writer: writer.write(tf.train.SequenceExample(feature_lists=feature_lists).SerializeToString()) args.data = '{0}.tfrecord'.format(name) print('inferring...') s = predict(args, model) conn.send(s.encode('utf-8')) conn.close() os.system
路很长,要加油哦~~~测试