diff --git a/docker/infer.sh b/docker/infer.sh new file mode 100755 index 00000000000..1e8de03482d --- /dev/null +++ b/docker/infer.sh @@ -0,0 +1,163 @@ +#!/bin/bash + +docker_gpu=0 +docker_egs= +docker_folders= +docker_cuda=10.0 +docker_user=true +docker_env= +docker_cmd= +docker_os=u18 + +while test $# -gt 0 +do + case "$1" in + -h) echo "Usage: `basename $0` [-h] docker_gpu docker_egs docker_folders options" + exit 0;; + --help) echo "Usage: `basename $0` [-h] ] docker_gpu docker_egs docker_folders options" + exit 0;; + --docker*) ext=${1#--} + frombreak=true + for i in _ {a..z} {A..Z}; do + for var in `eval echo "\\${!${i}@}"`; do + if [ "$var" == "$ext" ]; then + eval ${ext}=$2 + frombreak=false + break 2 + fi + done + done + if ${frombreak} ; then + echo "bad option $1" + exit 1 + fi + ;; + --*) break + ;; + esac + shift + shift +done + +if [ -z "${docker_egs}" ]; then + echo "Select an example to work with from the egs folder." + exit 1 +fi + +from_tag="cpu" +if [ ! "${docker_gpu}" == "-1" ]; then + if [ -z "${docker_cuda}" ]; then + # If the docker_cuda is not set, the program will automatically + # search the installed version with default configurations (apt) + docker_cuda=$( nvcc -V | grep release ) + docker_cuda=${docker_cuda#*"release "} + docker_cuda=${docker_cuda%,*} + fi + # After search for your cuda version, if the variable docker_cuda is empty the program will raise an error + if [ -z "${docker_cuda}" ]; then + echo "CUDA was not found in your system. Use CPU image or install NVIDIA-DOCKER, CUDA and NVCC for GPU image." + exit 1 + else + from_tag="gpu-cuda${docker_cuda}-cudnn7" + fi +fi + +if [ ! -z "${docker_os}" ]; then + from_tag="${from_tag}-${docker_os}" +fi + +# Check if image exists in the system and download if required +docker_image=$( docker images -q espnet/espnet:${from_tag} ) +if ! [[ -n ${docker_image} ]]; then + docker pull espnet/espnet:${from_tag} +fi + +if [ ${docker_user} = true ]; then + # Build a container with the user account + container_tag="${from_tag}-user-${HOME##*/}" + docker_image=$( docker images -q espnet/espnet:${container_tag} ) + if ! [[ -n ${docker_image} ]]; then + echo "Building docker image..." + build_args="--build-arg FROM_TAG=${from_tag}" + build_args="${build_args} --build-arg THIS_USER=${HOME##*/}" + build_args="${build_args} --build-arg THIS_UID=${UID}" + + echo "Now running docker build ${build_args} -f prebuilt/Dockerfile -t espnet/espnet:${container_tag} ." + (docker build ${build_args} -f prebuilt/Dockerfile -t espnet/espnet:${container_tag} .) || exit 1 + fi +else + container_tag=${from_tag} +fi + +echo "Using image espnet/espnet:${container_tag}." + +this_time="$(date '+%Y%m%dT%H%M')" +if [ "${docker_gpu}" == "-1" ]; then + cmd0="docker" + container_name="espnet_cpu_${this_time}" +else + # --rm erase the container when the training is finished. + cmd0="NV_GPU='${docker_gpu}' nvidia-docker" + container_name="espnet_gpu${docker_gpu//,/_}_${this_time}" +fi + +cd .. + +vols="-v /data/mfs:/data -v ${PWD}/egs:/espnet/egs -v ${PWD}/espnet:/espnet/espnet -v ${PWD}/test:/espnet/test -v ${PWD}/utils:/espnet/utils" +if [ ! -z "${docker_folders}" ]; then + docker_folders=$(echo ${docker_folders} | tr "," "\n") + for i in ${docker_folders[@]} + do + vols=${vols}" -v $i:$i"; + done +fi + +cmd1="cd /espnet/egs/${docker_egs}" +if [ ! -z "${docker_cmd}" ]; then + cmd2="./${docker_cmd} $@" +else + cmd2="./run.sh $@" +fi + +if [ ${docker_user} = false ]; then + # Required to access to the folder once the training if finished in root access + cmd2="${cmd2}; chmod -R 777 /espnet/egs/${docker_egs}" +fi + +cmd="${cmd1}; ${cmd2}" +this_env="" +if [ ! -z "${docker_env}" ]; then + docker_env=$(echo ${docker_env} | tr "," "\n") + for i in ${docker_env[@]} + do + this_env="-e $i ${this_env}" + done +fi + +if [ ! -z "${HTTP_PROXY}" ]; then + this_env="${this_env} -e 'HTTP_PROXY=${HTTP_PROXY}'" +fi + +if [ ! -z "${http_proxy}" ]; then + this_env="${this_env} -e 'http_proxy=${http_proxy}'" +fi + +cmd="${cmd0} run -i -v /data/mfs:/data/mfs --rm ${this_env} --name ${container_name} ${vols} espnet/espnet:${container_tag} /bin/bash -c '${cmd}'" + + +trap ctrl_c INT + +function ctrl_c() { + echo "** Kill docker container ${container_name}" + docker rm -f ${container_name} +} + +echo "Executing application in Docker" +echo ${cmd} +eval ${cmd} & +PROC_ID=$! + +while kill -0 "$PROC_ID" 2> /dev/null; do + sleep 1 +done +echo "`basename $0` done." diff --git a/egs/aishell/asr1/asr_pipeline.py b/egs/aishell/asr1/asr_pipeline.py new file mode 100644 index 00000000000..408c8043b48 --- /dev/null +++ b/egs/aishell/asr1/asr_pipeline.py @@ -0,0 +1,393 @@ +# -*- coding: utf-8 -*- + +import sys + +if sys.version[0] == '2': + reload(sys) + sys.setdefaultencoding("utf-8") + +import os +import re +import time +import json +import redis +import codecs +import shutil +import hashlib +import tempfile +import argparse +import subprocess +from collections import defaultdict +from functools import partial +from concurrent.futures import ProcessPoolExecutor +from multiprocessing import cpu_count +from pydub import AudioSegment +from kafka import KafkaConsumer, KafkaProducer +from kafka.errors import CommitFailedError + + +def norm_aishell_data(indir, outdir): + os.system("mkdir -p %s" %outdir) + for path in os.listdir(indir): + fname = path.split(".")[0] + os.system("mkdir -p %s/data_aishell/wav/infer/%s" %(outdir, fname)) + cmd = "cp %s/%s %s/data_aishell/wav/infer/%s/." %(indir, path, outdir, fname) + os.system("cp %s/%s %s/data_aishell/wav/infer/%s/." %(indir, path, outdir, fname)) + + os.system("mkdir -p %s/data_aishell/transcript" %outdir) + outfile = "%s/data_aishell/transcript/aishell_transcript.txt" %outdir + fout = codecs.open(outfile, "w") + for path in os.listdir(indir): + fname = path.split(".")[0] + fout.write("%s %s\n" %(fname, "哈哈")) + + +def gen_sp_wav_and_get_path_one(wav_temppath, audio_id, sound, item, k): + cut_wav_name = "%s_%s_%s_%s.wav" % (audio_id, item['start'], item['end'], k) + save_cut_path = os.path.join(wav_temppath, cut_wav_name) + sp_wav = sound[int(item['start']):int(item['end'])] + if sp_wav.frame_rate != 8000: + sp_wav = sp_wav.set_frame_rate(8000) + sp_wav.export(save_cut_path, format="wav") + return save_cut_path + + +def load_hyp_file(path): + merge_dict = {} + with codecs.open(path) as f: + for line in f: + l = line.split("(") + if len(l) == 2: + _text, _id = l + text = _text.strip().replace(" ", "") + path = _id.split("-")[0] + ".wav" + merge_dict[path] = text + return merge_dict + + +def get_parser(): + parser = argparse.ArgumentParser(description='语音识别主函数参数') + parser.add_argument("-kh", "--kafka-host", + default="192.168.40.22:9090,192.168.40.19:9090,192.168.40.59:9090,192.168.40.60:9090,192.168.40.61:9090", + required=True, + help="kafka host:port 集群") + + parser.add_argument("-sct", "--seg-consumer-topics", + default="sp_vad_topic1", + help="输入kafka的topic") + + parser.add_argument("-ikg", "--seg-consumer-groupid", + default="asr_group1", + help="消费kafka的groupid") + + parser.add_argument("-stm", "--session-timeout-ms", + default=60000, + type=int, + help="消费kafka的心跳超时时间") + + parser.add_argument("-saor", "--seg-auto-offset-reset", + default="largest", + help="重置偏移量,earliest移到最早的可用消息,latest最新的消息, 默认为largest,即是latest.消费者消费类型参数:{'smallest': 'earliest', 'largest': 'latest'}") + + parser.add_argument("-apt", "--asr-producer-topics", + default="asr_topic1", + help="输入kafka的topic") + + parser.add_argument("-fp", "--father-path", + default="/data/mfs/k8s/speech_pipeline/sad", + help="切分来源音频存放父目录") + + parser.add_argument("-wp", "--hkust-path", + default="/home/app/asr_pipline/kaldi/egs/hkust/s5_iqianjin", + help="hkust的绝对目录,即kaldi的hkust目录") + + parser.add_argument("-nj", "--num-job", + type=int, default=10, + help="espnet decode num job default 10") + + parser.add_argument("-nw", "--num-workers", + type=int, default=18, + help="multiprocess number of workers") + + parser.add_argument("-cg", "--consumer-gap", + type=int, default=10, + help="kafka consumer msg num") + + parser.add_argument("-ptm", "--poll-timeout-ms", + type=int, default=60000, + help="") + + args = parser.parse_args() + return args + + +class DetectAlarmKeyword(object): + def __init__(self): + kws = ["狗杂种", "操你妈", "傻逼", "他妈的","你妈逼", + "狗日的","王八蛋", "妈了个逼","婊子", "去你妈", + "我操", "我草","贱人", "被车撞死", "搞死", + "密码给我", "老赖","曝通讯录", "所有联系人", "不要脸", + "去死","要不要脸", "打爆你", + ] + self.p_kw = re.compile("|".join(kws)) + + def process(self, text): + rst = self.p_kw.findall(text) + return rst + + +class ASR(object): + def __init__(self, kafka_servers, seg_consumer_topics, seg_consumer_groupid, + session_timeout_ms=60000, seg_auto_offset_reset="largest", + asr_producer_topics="asr_topic1", num_job=10, + poll_timeout_ms=60000, consumer_gap=None, num_workers=cpu_count()): + """ + :param kafka_servers: kafka host:port + :param seg_consumer_topics: 切分的消费者topic + :param seg_consumer_groupid: 消费者group id + :param session_timeout_ms: 心跳超时时间 + :param seg_auto_offset_reset: 重置偏移量, earliest移到最早的可用消息, latest最新的消息, 默认为largest,即是latest. + 源码定义: {'smallest': 'earliest', 'largest': 'latest'} + :param asr_producer_topics: 语音是被的生产者topic,默认值:asr_topic1 + :param num_job: 语音识别的线程数 + """ + self.kafka_servers = kafka_servers + self.seg_consumer_groupid = seg_consumer_groupid + self.session_timeout_ms = session_timeout_ms + self.seg_auto_offset_reset = seg_auto_offset_reset + self.seg_consumer_topics = seg_consumer_topics + self.num_job = num_job + self.poll_timeout_ms = poll_timeout_ms + self.consumer_gap = consumer_gap + self.num_workers = num_workers + + self._get_from_client() + + # 语音识别结果的kafka生产者 + self.to_client = KafkaProducer(bootstrap_servers=kafka_servers, # kafka host:port + compression_type="gzip", + max_request_size=1024 * 1024 * 20) + self.asr_producer_topics = asr_producer_topics # ASR生产者topic + self.redis_client = redis.Redis(host='192.168.192.202', port=40029, db=0, password="Q8TYmIwQSHNFbLJ2") + self.kw_client = DetectAlarmKeyword() + + def _get_from_client(self): + # 消费者切分好的音频kafka消费者 + self.from_client = KafkaConsumer(bootstrap_servers=self.kafka_servers, # kafka host:port + group_id=self.seg_consumer_groupid, # 消费者group id + session_timeout_ms=self.session_timeout_ms, # 设置心跳超时时间 + enable_auto_commit=False, # 是否自动提交 + auto_offset_reset=self.seg_auto_offset_reset) # 消费重置偏移量 + self.from_client.subscribe(self.seg_consumer_topics) # 切分的消费者topic + + def check_alarm_keyword(self, merge_dict): + rst = None + if merge_dict: + text = ";".join(merge_dict.values()) + kw_rsp = self.kw_client.process(text) + if kw_rsp: + rst = kw_rsp + return rst + + def asr_pipline_from_kafka(self, father_path): + """ + 获取kafka的数据流,并进行识别,合并,标点,存入es + :param father_path: 切分来源音频存放父目录 + :return: + """ + while True: + if not self.from_client: + self._get_from_client() + + tp_msgs = self.from_client.poll(timeout_ms=self.poll_timeout_ms, + max_records=self.consumer_gap) + msgs = [] + for tp, _msgs in tp_msgs.items(): + msgs.extend(_msgs) + + self.batch_asr_pipline(father_path, msgs) + + + def batch_asr_pipline(self, father_path, msgs): + flag = False # flag 是否语音识别成功并存入kafka + wav_temppath = tempfile.mkdtemp(prefix="asr_") + wav_normpath = wav_temppath + "_norm" + + batch_wav_lst = [] + # 所有数据 + batch_voice_data = {} + # 包含报警关键词的数据 + batch_voice_data_imp = {} + batch_merge_dict = None + + try: + for msg in msgs: + if msg is not None: + audio_id = json.loads(msg.value).get('audio_id', '') + if not self.redis_client.get('asr_espnet_' + str(audio_id)): + voice_data = json.loads(msg.value) + batch_voice_data[audio_id] = voice_data + if self.check_alarm_keyword(voice_data.get("merge_dict")): + voice_data['start_asr_espnet'] = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + batch_voice_data_imp[audio_id] = voice_data + + if batch_voice_data_imp: + batch_wav_lst = self.gen_sp_wav_and_get_path_mp(father_path, wav_temppath, batch_voice_data_imp) + norm_aishell_data(wav_temppath, wav_normpath) + merge_dict = self._asr_cmd(wav_normpath) if batch_wav_lst else {} + batch_merge_dict = self.split_merge_dict(merge_dict) + except Exception as e: + print("asr cmd error log: %s, msg: %s" % (e, "")) + finally: + # 删除临时音频的文件夹和语音识别结果的临时文件 + os.system("rm -rf %s" %wav_temppath) + os.system("rm -rf %s" %wav_normpath) + + for audio_id, voice_data in batch_voice_data.items(): + try: + if batch_merge_dict and audio_id in batch_merge_dict: + voice_data["merge_dict_espnet"] = batch_merge_dict[audio_id] + voice_data['end_asr_espnet'] = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + _alarm_rsp = self.check_alarm_keyword(batch_merge_dict[audio_id]) + if _alarm_rsp: + voice_data["has_alarm_keyword"] = True + voice_data["alarm_keywords"] = json.dumps(_alarm_rsp) + voice_data["asr_model"] = "espnet_20191030" + flag = self._kafka_producers(voice_data) + except Exception as e: + print("kafka producer error log: %s, msg: %s" % (e, "")) + try: + self.from_client.commit() + except CommitFailedError: + print('commit error!') + for audio_id, _ in batch_voice_data.items(): + self.redis_client.set('asr_espnet_' + str(audio_id), 1, ex=24 * 3600) + + return flag + + def split_merge_dict(self, merge_dict): + """ + 拆分merge_dict + :param merge_dict: {"A_00*_**.wav": "123", + "A_**_***.wav": "321", + "B_**_***.wav": "423"} + """ + split_dict = defaultdict(dict) + for wav_file, text in merge_dict.items(): + audio_id = wav_file.split("_")[0] + split_dict[audio_id][wav_file] = text + return split_dict + + def gen_sp_wav_and_get_path_mp(self, father_path, wav_temppath, batch_voice_data): + """ + multiprocess generator + step2-1: 根据msg信息获取音频,并按提前提供的音频片段开始结束时间,生成切分后的wav + :param father_path: 音频来源目录,结果存储 + :param wav_temppath: kafka来源信息中的包含切分音频的信息 + :return: wav_lst: + """ + p = ProcessPoolExecutor(max_workers=self.num_workers) #不填则默认为cpu的个数 + result = [] + wav_lst = [] # wav 音频存储 + for _, voice_data in batch_voice_data.items(): + source = voice_data["source"] + code = None if source != 'infobird' else 'pcm_s16le' + date = voice_data["date"] + audio_id = voice_data["audio_id"] + wav_father_path = os.path.join(father_path, source, date) # /data/mfs/k8s/speech_pipeline/raw/{source}/{date} + for k, v in voice_data['segments'].items(): + if k in ["gk", "kf"] and len(v) > 0: + wav_name = "%s_%s.wav" % (audio_id, k) + raw_wav_path = os.path.join(wav_father_path, wav_name) + sound = AudioSegment.from_file(raw_wav_path, codec=code, format="wav") + for item in v: + # print(item, type(item)) + obj = p.submit(partial(gen_sp_wav_and_get_path_one, wav_temppath, audio_id, sound, item, k)) + result.append(obj) + p.shutdown() + res = [obj.result() for obj in result] + return res + + def _asr_cmd(self, wav_path): + """ + step3: 语音识别, 并获取merge的text + :param hkust_path: hkust的绝对目录,即kaldi的hkust目录 + :param wav_path: 需识别音频存放目录 + :return: + """ + flag = wav_path.split("/")[-1].replace("asr", "").replace("norm", "") + decode_cmd = "./infer.sh {} {} {}".format(wav_path, flag, self.num_job) + os.system(decode_cmd) + decode_path = "exp/train_sp_pytorch_train/decode_infer_%s_decode_lm/hyp.trn" %flag + merge_dict = load_hyp_file(decode_path) + + ## 删除临时目录 + data_dir = "data/infer_%s" %flag + fbank_dir = "fbank_%s" %flag + dump_dir = "dump/infer_%s" %flag + decode_dir = "exp/train_sp_pytorch_train/decode_infer_%s_decode_lm" %flag + dump_feats_dir = "exp/dump_feats/recog/infer_%s" %flag + make_fbank_dir = "exp/make_fbank/infer_%s" %flag + + for _dir in [data_dir, fbank_dir, dump_dir, dump_feats_dir, make_fbank_dir, decode_dir]: + os.system("rm -rf %s" %_dir) + + return merge_dict + + def _kafka_producers(self, voice_data): + """ + step6: 将语音识别好的结果存入kafka中 + :param voice_data: 来源kafka音频结果与语音识别结果结合体,从中获取相关信息,包含识别结果 + :return: + """ + flag = False + try: + audio_id = voice_data.get("audio_id", "") + asr_model = voice_data.get("asr_model", "") + k = self._create_id_by_input(audio_id + asr_model).encode("utf8") + v = json.dumps(voice_data).encode("utf8") + self.to_client.send(topic=self.asr_producer_topics, key=k, value=v) + flag = True + except Exception as e: + # logger.error("error: %s, voice_data: %s" % (e, voice_data)) + print("error: %s, voice_data: %" % (e, json.dumps(voice_data, ensure_ascii=True))) + finally: + return flag + + def _create_id_by_input(self, id=""): + """ + 依据输入与时间生成一个唯一的md5的id + :param x: + :return: + """ + x = str(id).encode("utf8") + m = hashlib.md5(x) + return m.hexdigest() + + +if __name__ == '__main__': + args = get_parser() + kafka_host, session_timeout_ms = args.kafka_host, args.session_timeout_ms + seg_consumer_topics, seg_auto_offset_reset = args.seg_consumer_topics, args.seg_auto_offset_reset # 消费切分结果的消费者参数 + seg_consumer_groupid, asr_producer_topics = args.seg_consumer_groupid, args.asr_producer_topics # 语音识别生产者参数 + father_path, hkust_path = args.father_path, args.hkust_path # 音频与模型相关参数 + num_job = args.num_job # 线程数 + poll_timeout_ms = args.poll_timeout_ms + consumer_gap = args.consumer_gap + num_workers = args.num_workers + + # python2 asr_pipline.py --kafka-host 192.168.40.22:9090,192.168.40.19:9090,192.168.40.59:9090,192.168.40.60:9090,192.168.40.61:9090 \ + # --seg_consumer_topics sp_vad_topic1 \ + # --seg_consumer_groupid asr_group1 \ + # --seg-auto-offset-reset largest \ + # --father-path /data/mfs/k8s/speech_pipeline/sad \ + # --hkust-path /home/app/asr_pipline/kaldi/egs/hkust/s5_iqianjin + + if type(seg_consumer_topics) is str: + seg_consumer_topics = [seg_consumer_topics, ] + asr = ASR(kafka_servers=kafka_host, seg_consumer_topics=seg_consumer_topics, session_timeout_ms=session_timeout_ms, + seg_consumer_groupid=seg_consumer_groupid, seg_auto_offset_reset=seg_auto_offset_reset, + asr_producer_topics=asr_producer_topics, num_job=num_job, + poll_timeout_ms=poll_timeout_ms, consumer_gap=consumer_gap, num_workers=num_workers) + + asr.asr_pipline_from_kafka(father_path) diff --git a/egs/aishell/asr1/asr_pipeline.sh b/egs/aishell/asr1/asr_pipeline.sh new file mode 100644 index 00000000000..56b85396a9e --- /dev/null +++ b/egs/aishell/asr1/asr_pipeline.sh @@ -0,0 +1,33 @@ +#!/bin/bash +#vim /etc/hosts +## Entries added by HostAliases. +#192.168.176.181 daasoffline1.kafka.dc.puhuifinance.com +#192.168.176.182 daasoffline2.kafka.dc.puhuifinance.com +#192.168.176.183 daasoffline3.kafka.dc.puhuifinance.com +#192.168.176.184 daasoffline4.kafka.dc.puhuifinance.com +#192.168.176.185 daasoffline5.kafka.dc.puhuifinance.com + +# test +# 测试需要在代码内修改生产则kafka host +/espnet/tools/venv/bin/python asr_pipeline.py \ + --kafka-host daasoffline1.kafka.dc.puhuifinance.com:6667,daasoffline2.kafka.dc.puhuifinance.com:6667,daasoffline3.kafka.dc.puhuifinance.com:6667,daasoffline4.kafka.dc.puhuifinance.com:6667,daasoffline5.kafka.dc.puhuifinance.com:6667 \ + --seg-consumer-topics sp_asr_topic \ + --seg-consumer-groupid sp_sad_asr_group_np_20191028_v3 \ + --session-timeout-ms 30000 \ + --seg-auto-offset-reset smallest \ + --asr-producer-topics asr_topic1_t1 \ + --father-path /data/mfs/k8s/speech_pipeline/raw \ + --hkust-path /home/app/hkust/kaldi/egs/hkust/s5_daihou \ + --num-job 10 + + +## online +#python2 -u asr_pipline.py --kafka-host daasoffline1.kafka.dc.puhuifinance.com:6667,daasoffline2.kafka.dc.puhuifinance.com:6667,daasoffline3.kafka.dc.puhuifinance.com:6667,daasoffline4.kafka.dc.puhuifinance.com:6667,daasoffline5.kafka.dc.puhuifinance.com:6667 \ +# --seg-consumer-topics sp_sad_topic \ +# --seg-consumer-groupid sp_sad_asr_group \ +# --session-timeout-ms 60000 \ +# --seg-auto-offset-reset largest \ +# --asr-producer-topics sp_asr_topic \ +# --father-path /data/mfs/k8s/speech_pipeline/sad \ +# --hkust-path /home/app/asr_pipline/kaldi/egs/hkust/s5_daihou \ +# --num-job 10 diff --git a/egs/aishell/asr1/conf/fbank.conf b/egs/aishell/asr1/conf/fbank.conf index 82ac7bd0dbc..1ad20614eef 100644 --- a/egs/aishell/asr1/conf/fbank.conf +++ b/egs/aishell/asr1/conf/fbank.conf @@ -1,2 +1,2 @@ ---sample-frequency=16000 +--sample-frequency=8000 --num-mel-bins=80 diff --git a/egs/aishell/asr1/conf/pitch.conf b/egs/aishell/asr1/conf/pitch.conf index e959a19d5b8..926bcfca92a 100644 --- a/egs/aishell/asr1/conf/pitch.conf +++ b/egs/aishell/asr1/conf/pitch.conf @@ -1 +1 @@ ---sample-frequency=16000 +--sample-frequency=8000 diff --git a/egs/aishell/asr1/infer.sh b/egs/aishell/asr1/infer.sh new file mode 100755 index 00000000000..3274b4b92df --- /dev/null +++ b/egs/aishell/asr1/infer.sh @@ -0,0 +1,266 @@ +#!/bin/bash + +# Copyright 2017 Johns Hopkins University (Shinji Watanabe) +# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) + +. ./path.sh || exit 1; +. ./cmd.sh || exit 1; + +if [ $# != 3 ]; then + echo "Usage: $0 " + exit 1; +else + data=$1 # 输入文件夹。 + flag=$2 # 唯一标识,防止文件覆盖。 + nj_decoder=$3 # decode workers +fi + + +# general configuration +backend=pytorch +stage=0 # start from 0 if you need to start from data preparation +stop_stage=100 +ngpu=0 # number of gpus ("0" uses cpu, otherwise use gpu) +debugmode=1 +dumpdir=dump # directory to dump full features +N=0 # number of minibatches to be used (mainly for debugging). "0" uses all minibatches. +verbose=0 # verbose option +resume= # Resume the training from snapshot + +# feature configuration +do_delta=false + +train_config=conf/train.yaml +lm_config=conf/lm.yaml +decode_config=conf/decode.yaml + +# rnnlm related +lm_resume= # specify a snapshot file to resume LM training +lmtag= # tag for managing LMs + +# decoding parameter +recog_model=model.acc.best # set a model to be used for decoding: 'model.acc.best' or 'model.loss.best' +n_average=10 + +# data +#data=/export/a05/xna/data +#data_url=www.openslr.org/resources/33 + +# data=/data/nipeng/2019-10-07-aishell +# data=/data/nipeng/2019-1001-1020-aishell + +# exp tag +tag="" # tag for managing experiments. + +. utils/parse_options.sh || exit 1; + +# Set bash to 'debug' mode, it will exit on : +# -e 'error', -u 'undefined variable', -o ... 'error in pipeline', -x 'print commands', +set -e +set -u +set -o pipefail + +train_set=train_sp +train_dev=dev +#recog_set="dev test" +recog_set=infer_${flag} + +# if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then +# echo "stage -1: Data Download" +# local/download_and_untar.sh ${data} ${data_url} data_aishell +# local/download_and_untar.sh ${data} ${data_url} resource_aishell +# fi + +if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then + ### Task dependent. You have to make data the following preparation part by yourself. + ### But you can utilize Kaldi recipes in most cases + echo "stage 0: Data preparation" + local/aishell_data_prep_infer.sh ${data}/data_aishell/wav ${data}/data_aishell/transcript ${flag} + # remove space in text + for x in ${recog_set}; do + cp data/${x}/text data/${x}/text.org + paste -d " " <(cut -f 1 -d" " data/${x}/text.org) <(cut -f 2- -d" " data/${x}/text.org | tr -d " ") \ + > data/${x}/text + rm data/${x}/text.org + done +fi + +feat_tr_dir=${dumpdir}/${train_set}/delta${do_delta}; mkdir -p ${feat_tr_dir} +feat_dt_dir=${dumpdir}/${train_dev}/delta${do_delta}; mkdir -p ${feat_dt_dir} +if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then + ### Task dependent. You have to design training and dev sets by yourself. + ### But you can utilize Kaldi recipes in most cases + echo "stage 1: Feature Generation" + fbankdir=fbank_${flag} + # Generate the fbank features; by default 80-dimensional fbanks with pitch on each frame + # steps/make_fbank_pitch.sh --cmd "$train_cmd" --nj 32 --write_utt2num_frames true \ + # data/train exp/make_fbank/train ${fbankdir} + # utils/fix_data_dir.sh data/train + # steps/make_fbank_pitch.sh --cmd "$train_cmd" --nj 10 --write_utt2num_frames true \ + # data/dev exp/make_fbank/dev ${fbankdir} + # utils/fix_data_dir.sh data/dev + steps/make_fbank_pitch.sh --cmd "$train_cmd" --nj 10 --write_utt2num_frames true \ + data/${recog_set} exp/make_fbank/${recog_set} ${fbankdir} + utils/fix_data_dir.sh data/${recog_set} + + # # speed-perturbed + # utils/perturb_data_dir_speed.sh 0.9 data/train data/temp1 + # utils/perturb_data_dir_speed.sh 1.0 data/train data/temp2 + # utils/perturb_data_dir_speed.sh 1.1 data/train data/temp3 + # utils/combine_data.sh --extra-files utt2uniq data/${train_set} data/temp1 data/temp2 data/temp3 + # rm -r data/temp1 data/temp2 data/temp3 + # steps/make_fbank_pitch.sh --cmd "$train_cmd" --nj 32 --write_utt2num_frames true \ + # data/${train_set} exp/make_fbank/${train_set} ${fbankdir} + # utils/fix_data_dir.sh data/${train_set} + + # # compute global CMVN + # compute-cmvn-stats scp:data/${train_set}/feats.scp data/${train_set}/cmvn.ark + + # # dump features for training + # split_dir=$(echo $PWD | awk -F "/" '{print $NF "/" $(NF-1)}') + # if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d ${feat_tr_dir}/storage ]; then + # utils/create_split_dir.pl \ + # /export/a{11,12,13,14}/${USER}/espnet-data/egs/${split_dir}/dump/${train_set}/delta${do_delta}/storage \ + # ${feat_tr_dir}/storage + # fi + # if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d ${feat_dt_dir}/storage ]; then + # utils/create_split_dir.pl \ + # /export/a{11,12,13,14}/${USER}/espnet-data/egs/${split_dir}/dump/${train_dev}/delta${do_delta}/storage \ + # ${feat_dt_dir}/storage + # fi + # dump.sh --cmd "$train_cmd" --nj 32 --do_delta ${do_delta} \ + # data/${train_set}/feats.scp data/${train_set}/cmvn.ark exp/dump_feats/train ${feat_tr_dir} + for rtask in ${recog_set}; do + feat_recog_dir=${dumpdir}/${rtask}/delta${do_delta}; mkdir -p ${feat_recog_dir} + dump.sh --cmd "$train_cmd" --nj 10 --do_delta ${do_delta} \ + data/${rtask}/feats.scp data/${train_set}/cmvn.ark exp/dump_feats/recog/${rtask} \ + ${feat_recog_dir} + done +fi + +dict=data/lang_1char/${train_set}_units.txt +echo "dictionary: ${dict}" +if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then + ### Task dependent. You have to check non-linguistic symbols used in the corpus. + echo "stage 2: Dictionary and Json Data Preparation" + mkdir -p data/lang_1char/ + + # echo "make a dictionary" + # echo " 1" > ${dict} # must be 1, 0 will be used for "blank" in CTC + # text2token.py -s 1 -n 1 data/${train_set}/text | cut -f 2- -d" " | tr " " "\n" \ + # | sort | uniq | grep -v -e '^\s*$' | awk '{print $0 " " NR+1}' >> ${dict} + # wc -l ${dict} + + # echo "make json files" + # data2json.sh --feat ${feat_tr_dir}/feats.scp \ + # data/${train_set} ${dict} > ${feat_tr_dir}/data.json + for rtask in ${recog_set}; do + feat_recog_dir=${dumpdir}/${rtask}/delta${do_delta} + data2json.sh --feat ${feat_recog_dir}/feats.scp \ + data/${rtask} ${dict} > ${feat_recog_dir}/data.json + done +fi + +# you can skip this and remove --rnnlm option in the recognition (stage 5) +if [ -z ${lmtag} ]; then + lmtag=$(basename ${lm_config%.*}) +fi +lmexpname=train_rnnlm_${backend}_${lmtag} +lmexpdir=exp/${lmexpname} +# mkdir -p ${lmexpdir} + +# if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then +# echo "stage 3: LM Preparation" +# lmdatadir=data/local/lm_train +# mkdir -p ${lmdatadir} +# text2token.py -s 1 -n 1 data/train/text | cut -f 2- -d" " \ +# > ${lmdatadir}/train.txt +# text2token.py -s 1 -n 1 data/${train_dev}/text | cut -f 2- -d" " \ +# > ${lmdatadir}/valid.txt + +# ${cuda_cmd} --gpu ${ngpu} ${lmexpdir}/train.log \ +# lm_train.py \ +# --config ${lm_config} \ +# --ngpu ${ngpu} \ +# --backend ${backend} \ +# --verbose 1 \ +# --outdir ${lmexpdir} \ +# --tensorboard-dir tensorboard/${lmexpname} \ +# --train-label ${lmdatadir}/train.txt \ +# --valid-label ${lmdatadir}/valid.txt \ +# --resume ${lm_resume} \ +# --dict ${dict} +# fi + +if [ -z ${tag} ]; then + expname=${train_set}_${backend}_$(basename ${train_config%.*}) + if ${do_delta}; then + expname=${expname}_delta + fi +else + expname=${train_set}_${backend}_${tag} +fi +expdir=exp/${expname} +# mkdir -p ${expdir} + +# if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then +# echo "stage 4: Network Training" +# ${cuda_cmd} --gpu ${ngpu} ${expdir}/train.log \ +# asr_train.py \ +# --config ${train_config} \ +# --ngpu ${ngpu} \ +# --backend ${backend} \ +# --outdir ${expdir}/results \ +# --tensorboard-dir tensorboard/${expname} \ +# --debugmode ${debugmode} \ +# --dict ${dict} \ +# --debugdir ${expdir} \ +# --minibatches ${N} \ +# --verbose ${verbose} \ +# --resume ${resume} \ +# --train-json ${feat_tr_dir}/data.json \ +# --valid-json ${feat_dt_dir}/data.json +# fi + +if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then + echo "stage 5: Decoding" + nj=${nj_decoder} + if [[ $(get_yaml.py ${train_config} model-module) = *transformer* ]]; then + recog_model=model.last${n_average}.avg.best + # average_checkpoints.py --backend ${backend} \ + # --snapshots ${expdir}/results/snapshot.ep.* \ + # --out ${expdir}/results/${recog_model} \ + # --num ${n_average} + fi + pids=() # initialize pids + for rtask in ${recog_set}; do + ( + decode_dir=decode_${rtask}_$(basename ${decode_config%.*})_${lmtag} + feat_recog_dir=${dumpdir}/${rtask}/delta${do_delta} + + # split data + splitjson.py --parts ${nj} ${feat_recog_dir}/data.json + + #### use CPU for decoding + ngpu=0 + + ${decode_cmd} JOB=1:${nj} ${expdir}/${decode_dir}/log/decode.JOB.log \ + asr_recog.py \ + --config ${decode_config} \ + --ngpu ${ngpu} \ + --backend ${backend} \ + --batchsize 0 \ + --recog-json ${feat_recog_dir}/split${nj}utt/data.JOB.json \ + --result-label ${expdir}/${decode_dir}/data.JOB.json \ + --model ${expdir}/results/${recog_model} \ + --rnnlm ${lmexpdir}/rnnlm.model.best + + score_sclite.sh ${expdir}/${decode_dir} ${dict} + + ) & + pids+=($!) # store background pids + done + i=0; for pid in "${pids[@]}"; do wait ${pid} || ((++i)); done + [ ${i} -gt 0 ] && echo "$0: ${i} background jobs are failed." && false + echo "Finished" +fi diff --git a/egs/aishell/asr1/local/aishell_data_prep.sh b/egs/aishell/asr1/local/aishell_data_prep.sh index 4747e4f4d82..a4e411f175b 100755 --- a/egs/aishell/asr1/local/aishell_data_prep.sh +++ b/egs/aishell/asr1/local/aishell_data_prep.sh @@ -12,7 +12,7 @@ if [ $# != 2 ]; then fi aishell_audio_dir=$1 -aishell_text=$2/aishell_transcript_v0.8.txt +aishell_text=$2/aishell_transcript_v0.8.txt.add10w.20190918.tune_high_medium_lv.v2 train_dir=data/local/train dev_dir=data/local/dev diff --git a/egs/aishell/asr1/local/aishell_data_prep_infer.sh b/egs/aishell/asr1/local/aishell_data_prep_infer.sh new file mode 100755 index 00000000000..224c56ce33f --- /dev/null +++ b/egs/aishell/asr1/local/aishell_data_prep_infer.sh @@ -0,0 +1,70 @@ +#!/bin/bash + +# Copyright 2017 Xingyu Na +# Apache 2.0 + +. ./path.sh || exit 1; + +if [ $# != 3 ]; then + echo "Usage: $0 " + echo " $0 /export/a05/xna/data/data_aishell/wav /export/a05/xna/data/data_aishell/transcript uuid" + exit 1; +fi + +aishell_audio_dir=$1 +aishell_text=$2/aishell_transcript.txt +flag=$3 + +#train_dir=data/local/train +#dev_dir=data/local/dev +#test_dir=data/local/test +raw_infer_dir=data/infer_${flag} +infer_dir=data/local/infer_${flag} +tmp_dir=data/local/tmp_${flag} + +#mkdir -p $train_dir +#mkdir -p $dev_dir +#mkdir -p $test_dir +mkdir -p $raw_infer_dir +mkdir -p $infer_dir +mkdir -p $tmp_dir + +# data directory check +if [ ! -d $aishell_audio_dir ] || [ ! -f $aishell_text ]; then + echo "Error: $0 requires two directory arguments" + echo ${aishell_audio_dir}, ${aishell_text} + exit 1; +fi + +# find wav audio file for train, dev and test resp. +find $aishell_audio_dir -iname "*.wav" > $tmp_dir/wav.flist +n=`cat $tmp_dir/wav.flist | wc -l` +# [ $n -ne 141925 ] && \ +# echo Warning: expected 141925 data data files, found $n + +grep -i "wav/infer" $tmp_dir/wav.flist > $infer_dir/wav.flist || exit 1; + +rm -r $tmp_dir + +# Transcriptions preparation +for dir in $infer_dir; do + echo Preparing $dir transcriptions + sed -e 's/\.wav//' $dir/wav.flist | awk -F '/' '{print $NF}' > $dir/utt.list + sed -e 's/\.wav//' $dir/wav.flist | awk -F '/' '{i=NF-1;printf("%s %s\n",$NF,$i)}' > $dir/utt2spk_all + paste -d' ' $dir/utt.list $dir/wav.flist > $dir/wav.scp_all + utils/filter_scp.pl -f 1 $dir/utt.list $aishell_text > $dir/transcripts.txt + awk '{print $1}' $dir/transcripts.txt > $dir/utt.list + utils/filter_scp.pl -f 1 $dir/utt.list $dir/utt2spk_all | sort -u > $dir/utt2spk + utils/filter_scp.pl -f 1 $dir/utt.list $dir/wav.scp_all | sort -u > $dir/wav.scp + sort -u $dir/transcripts.txt > $dir/text + utils/utt2spk_to_spk2utt.pl $dir/utt2spk > $dir/spk2utt +done + +## mkdir -p data/infer_${flag} + +for f in spk2utt utt2spk wav.scp text; do + cp $infer_dir/$f ${raw_infer_dir}/$f || exit 1; +done + +echo "$0: AISHELL data preparation succeeded" +exit 0; diff --git a/egs/aishell/asr1/run.sh b/egs/aishell/asr1/run.sh index d406715d5d2..77119cf5694 100755 --- a/egs/aishell/asr1/run.sh +++ b/egs/aishell/asr1/run.sh @@ -10,7 +10,8 @@ backend=pytorch stage=0 # start from 0 if you need to start from data preparation stop_stage=100 -ngpu=1 # number of gpus ("0" uses cpu, otherwise use gpu) +# export CUDA_VISIBLE_DEVICES=0,2,3 +ngpu=4 # number of gpus ("0" uses cpu, otherwise use gpu) debugmode=1 dumpdir=dump # directory to dump full features N=0 # number of minibatches to be used (mainly for debugging). "0" uses all minibatches. @@ -33,8 +34,10 @@ recog_model=model.acc.best # set a model to be used for decoding: 'model.acc.bes n_average=10 # data -data=/export/a05/xna/data -data_url=www.openslr.org/resources/33 +#data=/export/a05/xna/data +#data_url=www.openslr.org/resources/33 + +data=/data/chenghengzhe/kf/dataset/fakes/fake_merged_v2 # exp tag tag="" # tag for managing experiments. @@ -51,11 +54,11 @@ train_set=train_sp train_dev=dev recog_set="dev test" -if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then - echo "stage -1: Data Download" - local/download_and_untar.sh ${data} ${data_url} data_aishell - local/download_and_untar.sh ${data} ${data_url} resource_aishell -fi +# if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then +# echo "stage -1: Data Download" +# local/download_and_untar.sh ${data} ${data_url} data_aishell +# local/download_and_untar.sh ${data} ${data_url} resource_aishell +# fi if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then ### Task dependent. You have to make data the following preparation part by yourself. @@ -210,7 +213,7 @@ fi if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then echo "stage 5: Decoding" - nj=32 + nj=40 if [[ $(get_yaml.py ${train_config} model-module) = *transformer* ]]; then recog_model=model.last${n_average}.avg.best average_checkpoints.py --backend ${backend} \ diff --git a/egs/aishell/asr1/test.py b/egs/aishell/asr1/test.py new file mode 100644 index 00000000000..336c1c7b4af --- /dev/null +++ b/egs/aishell/asr1/test.py @@ -0,0 +1,29 @@ +# -*- coding: utf-8 -*- + + +import codecs + + +def get_infer_result(): + path = "/data/nipeng/TTS/espnet/egs/aishell/asr1/exp/train_sp_pytorch_train/decode_infer_decode_lm/hyp.trn" + fout = codecs.open("infer.txt", "w") + with codecs.open(path) as f: + for line in f: + t, a = line.split("(") + text = t.replace(" ", "") + aid = a.split("-")[0] + aname = aid + ".wav" + fout.write("%s\t%s\n" %(aname, text)) + +def test2(): + from asr_pipeline import DetectAlarmKeyword + kw_client = DetectAlarmKeyword() + text = "你的是傻逼滚按时发大水发老赖法萨芬" + rst = kw_client.process(text) + print(rst) + + +if __name__ == "__main__": + print("ok") + #get_infer_result() + test2() diff --git a/egs/ljspeech/tts1/conf/tuning/train_fastspeech.v1.yaml b/egs/ljspeech/tts1/conf/tuning/train_fastspeech.v1.yaml index f3be22236ab..196449301f6 100644 --- a/egs/ljspeech/tts1/conf/tuning/train_fastspeech.v1.yaml +++ b/egs/ljspeech/tts1/conf/tuning/train_fastspeech.v1.yaml @@ -52,7 +52,7 @@ grad-clip: 1.0 weight-decay: 0.0 patience: 0 epochs: 1000 # 1,000 epochs * 330 batches / 2 accum-grad = 165,000 iters -teacher-model: exp/train_no_dev_pytorch_train_transformer.v1/results/model.last1.avg.best +teacher-model: exp/train_no_dev_pytorch_train_pytorch_transformer.v1/results/model.last1.avg.best # you can download pretrained teacher model from google drive # https://drive.google.com/open?id=1arZAxZOLep-1W5ByQMD1lCX2tEASnw7p diff --git a/egs/ljspeech/tts1/conf/tuning/train_pytorch_transformer.v1.yaml b/egs/ljspeech/tts1/conf/tuning/train_pytorch_transformer.v1.yaml index 7c5512ad253..c7be653b50f 100644 --- a/egs/ljspeech/tts1/conf/tuning/train_pytorch_transformer.v1.yaml +++ b/egs/ljspeech/tts1/conf/tuning/train_pytorch_transformer.v1.yaml @@ -62,7 +62,7 @@ accum-grad: 2 grad-clip: 1.0 weight-decay: 0.0 patience: 0 -epochs: 1000 # 1,000 epochs * 330 batches / 2 accum-grad = 165,000 iters +epochs: 500 # 1,000 epochs * 330 batches / 2 accum-grad = 165,000 iters # other save-interval-epoch: 10 diff --git a/egs/ljspeech/tts1/local/text/cleaners.py b/egs/ljspeech/tts1/local/text/cleaners.py index b99f6eefde1..7fb49245b5f 100644 --- a/egs/ljspeech/tts1/local/text/cleaners.py +++ b/egs/ljspeech/tts1/local/text/cleaners.py @@ -106,8 +106,8 @@ def english_cleaners(text): '''Pipeline for English text, including number and abbreviation expansion.''' text = convert_to_ascii(text) text = lowercase(text) - text = expand_numbers(text) - text = expand_abbreviations(text) + #text = expand_numbers(text) + #text = expand_abbreviations(text) text = expand_symbols(text) text = remove_unnecessary_symbols(text) text = uppercase(text) diff --git a/egs/ljspeech/tts1/run.sh b/egs/ljspeech/tts1/run.sh index eaa2084fe57..5dfd68d6e85 100755 --- a/egs/ljspeech/tts1/run.sh +++ b/egs/ljspeech/tts1/run.sh @@ -8,9 +8,9 @@ # general configuration backend=pytorch -stage=-1 +stage=4 stop_stage=100 -ngpu=1 # number of gpus ("0" uses cpu, otherwise use gpu) +ngpu=4 # number of gpus ("0" uses cpu, otherwise use gpu) nj=32 # numebr of parallel jobs dumpdir=dump # directory to dump full features verbose=0 # verbose option (if set > 0, get more log) @@ -28,9 +28,12 @@ n_shift=256 # number of shift points win_length="" # window length # config files -train_config=conf/train_pytorch_tacotron2.yaml # you can select from conf or conf/tuning. - # now we support tacotron2, transformer, and fastspeech - # see more info in the header of each config. +# you can select from conf or conf/tuning. +# now we support tacotron2, transformer, and fastspeech +# see more info in the header of each config. +train_config=conf/tuning/train_pytorch_transformer.v1.yaml +# train_config=conf/tuning/train_fastspeech.v1.yaml + decode_config=conf/decode.yaml # decoding related @@ -52,20 +55,21 @@ set -e set -u set -o pipefail +corpus_path="/data/nipeng/TTS/data/fake_lj_nosil" train_set="train_no_dev" dev_set="dev" eval_set="eval" -if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then - echo "stage -1: Data Download" - local/download.sh ${db_root} -fi +# if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then +# echo "stage -1: Data Download" +# local/download.sh ${db_root} +# fi if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then ### Task dependent. You have to make data the following preparation part by yourself. ### But you can utilize Kaldi recipes in most cases echo "stage 0: Data preparation" - local/data_prep.sh ${db_root}/LJSpeech-1.1 data/train + local/data_prep.sh ${corpus_path} data/train utils/validate_data_dir.sh --no-feats data/train fi diff --git a/tools/Makefile b/tools/Makefile index 8c88f218678..8bc37784713 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -1,14 +1,16 @@ # If a Python interpreter is specified, then creates a virtualenv from it # PYTHON := /usr/bin/python3.7 +# PYTHON := /root/anaconda3/envs/np-py36-tts/bin/python3 PYTHON := # The python version installed in the conda setup # NOTE(kan-bayashi): Use 3.7.3 to avoid sentencepiece installation error PYTHON_VERSION := 3.7.3 CUPY_VERSION := 6.0.0 -CUDA_VERSION := 10.0 +CUDA_VERSION := 9.0 # PyTorch version: 0.4.1 or 1.0.0 or 1.0.1 -TH_VERSION := 1.0.1 +TH_VERSION := 1.0.0 # Use a prebuild Kaldi to omit the installation +# KALDI := /data/nipeng/kaldi KALDI := WGET := wget --tries=3 @@ -18,13 +20,13 @@ CONDA_URL := https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64. GCC_VERSION := $(shell gcc -dumpversion) -ifneq ($(shell which nvidia-smi),) # 'nvcc' found -CONDA_PYTORCH := pytorch=$(TH_VERSION) cudatoolkit=$(CUDA_VERSION) -CUDA_DEPS := cupy.done -else +# ifneq ($(shell which nvidia-smi),) # 'nvcc' found +# CONDA_PYTORCH := pytorch=$(TH_VERSION) cudatoolkit=$(CUDA_VERSION) +# CUDA_DEPS := cupy.done +# else CONDA_PYTORCH := pytorch-cpu=$(TH_VERSION) CUDA_DEPS := -endif +# endif .PHONY: all clean @@ -52,9 +54,12 @@ ifneq ($(strip $(PYTHON)),) venv: test -d venv || virtualenv -p $(PYTHON) venv espnet.done: venv + . venv/bin/activate; pip install -U pip . venv/bin/activate; pip install pip --upgrade . venv/bin/activate; pip install -e .. - . venv/bin/activate; pip install torch==$(TH_VERSION) + . venv/bin/activate; pip config set global.index-url https://pypi.mirrors.ustc.edu.cn/simple/ + . venv/bin/activate; pip install --index https://pypi.mirrors.ustc.edu.cn/simple/ torch==$(TH_VERSION) + . venv/bin/activate; pip install --index https://pypi.mirrors.ustc.edu.cn/simple/ redis kafka-python pydub touch espnet.done else miniconda.sh: @@ -62,11 +67,28 @@ miniconda.sh: venv: miniconda.sh test -d $(PWD)/venv || bash miniconda.sh -b -p $(PWD)/venv . venv/bin/activate && conda update -y conda + . venv/bin/activate && conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/msys2/ + . venv/bin/activate && conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge/ + . venv/bin/activate && conda config --add channels https://mirror.tuna.tsinghua.edu.cn/anaconda/pkgs/main/ + . venv/bin/activate && conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/ + . venv/bin/activate && conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/bioconda/ + . venv/bin/activate && conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/menpo/ + . venv/bin/activate && conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/pytorch/ + . venv/bin/activate && conda config --set show_channel_urls yes . venv/bin/activate && conda install -y python=$(PYTHON_VERSION) . venv/bin/activate && conda info -a espnet.done: venv - . venv/bin/activate && conda install -y $(CONDA_PYTORCH) -c pytorch + . venv/bin/activate && conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/msys2/ + . venv/bin/activate && conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge/ + . venv/bin/activate && conda config --add channels https://mirror.tuna.tsinghua.edu.cn/anaconda/pkgs/main/ + . venv/bin/activate && conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/ + . venv/bin/activate && conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/bioconda/ + . venv/bin/activate && conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/menpo/ + . venv/bin/activate && conda config --add channels https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/pytorch/ + . venv/bin/activate && conda config --set show_channel_urls yes + . venv/bin/activate && conda install -y $(CONDA_PYTORCH) pytorch . venv/bin/activate && pip install -e .. + . venv/bin/activate; pip install --index https://pypi.mirrors.ustc.edu.cn/simple/ redis kafka-python pydub touch espnet.done endif @@ -106,7 +128,7 @@ warp-transducer.done: espnet.done chainer_ctc.done: espnet.done rm -rf chainer_ctc git clone https://github.com/jheymann85/chainer_ctc.git - . venv/bin/activate; pip install cython + . venv/bin/activate; pip install --index https://pypi.mirrors.ustc.edu.cn/simple/ cython . venv/bin/activate; cd chainer_ctc && chmod +x install_warp-ctc.sh && ./install_warp-ctc.sh ; true . venv/bin/activate; cd chainer_ctc && pip install . touch chainer_ctc.done @@ -125,7 +147,7 @@ mecab.done: espnet.done cd mecab/mecab-ipadic && ./configure --with-charset=utf8 --with-mecab-config=$(PWD)/mecab/bin/mecab-config --prefix=$(PWD)/mecab && $(MAKE) && $(MAKE) install cd mecab && git clone --depth 1 https://github.com/neologd/mecab-ipadic-neologd.git cd mecab/mecab-ipadic-neologd && export PATH=$(PWD)/mecab/bin:$(PATH) && ./bin/install-mecab-ipadic-neologd -n -y -p $(PWD)/mecab/mecab-ipadic-neologd - . venv/bin/activate; pip install mojimoji pykakasi + . venv/bin/activate; pip install --index https://pypi.mirrors.ustc.edu.cn/simple/ mojimoji pykakasi . venv/bin/activate; if [ `python --version | cut -c 8` -eq 3 ]; then \ if [ ! -e swig.done ]; then \ rm -rf swig; \