Spaces:

stormXT
/

TalkShow

Configuration error

TalkShow / data_utils /dataset_preprocess.py

mvreddy13

Adding new Folders

f0c7f08 over 1 year ago

5.28 kB

	import os
	import pickle
	from tqdm import tqdm
	import shutil
	import torch
	import numpy as np
	import librosa
	import random

	speakers = ['seth', 'conan', 'oliver', 'chemistry']
	data_root = "../ExpressiveWholeBodyDatasetv1.0/"
	split = 'train'



	def split_list(full_list,shuffle=False,ratio=0.2):
	n_total = len(full_list)
	offset_0 = int(n_total * ratio)
	offset_1 = int(n_total * ratio * 2)
	if n_total==0 or offset_1<1:
	return [],full_list
	if shuffle:
	random.shuffle(full_list)
	sublist_0 = full_list[:offset_0]
	sublist_1 = full_list[offset_0:offset_1]
	sublist_2 = full_list[offset_1:]
	return sublist_0, sublist_1, sublist_2


	def moveto(list, file):
	for f in list:
	before, after = '/'.join(f.split('/')[:-1]), f.split('/')[-1]
	new_path = os.path.join(before, file)
	new_path = os.path.join(new_path, after)
	# os.makedirs(new_path)
	# os.path.isdir(new_path)
	# shutil.move(f, new_path)

	#转移到新目录
	shutil.copytree(f, new_path)
	#删除原train里的文件
	shutil.rmtree(f)
	return None


	def read_pkl(data):
	betas = np.array(data['betas'])

	jaw_pose = np.array(data['jaw_pose'])
	leye_pose = np.array(data['leye_pose'])
	reye_pose = np.array(data['reye_pose'])
	global_orient = np.array(data['global_orient']).squeeze()
	body_pose = np.array(data['body_pose_axis'])
	left_hand_pose = np.array(data['left_hand_pose'])
	right_hand_pose = np.array(data['right_hand_pose'])

	full_body = np.concatenate(
	(jaw_pose, leye_pose, reye_pose, global_orient, body_pose, left_hand_pose, right_hand_pose), axis=1)

	expression = np.array(data['expression'])
	full_body = np.concatenate((full_body, expression), axis=1)

	if (full_body.shape[0] < 90) or (torch.isnan(torch.from_numpy(full_body)).sum() > 0):
	return 1
	else:
	return 0


	for speaker_name in speakers:
	speaker_root = os.path.join(data_root, speaker_name)

	videos = [v for v in os.listdir(speaker_root)]
	print(videos)

	haode = huaide = 0
	total_seqs = []

	for vid in tqdm(videos, desc="Processing training data of {}......".format(speaker_name)):
	# for vid in videos:
	source_vid = vid
	vid_pth = os.path.join(speaker_root, source_vid)
	# vid_pth = os.path.join(speaker_root, source_vid, 'images/half', split)
	t = os.path.join(speaker_root, source_vid, 'test')
	v = os.path.join(speaker_root, source_vid, 'val')

	# if os.path.exists(t):
	# shutil.rmtree(t)
	# if os.path.exists(v):
	# shutil.rmtree(v)
	try:
	seqs = [s for s in os.listdir(vid_pth)]
	except:
	continue
	# if len(seqs) == 0:
	# shutil.rmtree(os.path.join(speaker_root, source_vid))
	# None
	for s in seqs:
	quality = 0
	total_seqs.append(os.path.join(vid_pth,s))
	seq_root = os.path.join(vid_pth, s)
	key = seq_root # correspond to clip******
	audio_fname = os.path.join(speaker_root, source_vid, s, '%s.wav' % (s))

	# delete the data without audio or the audio file could not be read
	if os.path.isfile(audio_fname):
	try:
	audio = librosa.load(audio_fname)
	except:
	# print(key)
	shutil.rmtree(key)
	huaide = huaide + 1
	continue
	else:
	huaide = huaide + 1
	# print(key)
	shutil.rmtree(key)
	continue

	# check motion file
	motion_fname = os.path.join(speaker_root, source_vid, s, '%s.pkl' % (s))
	try:
	f = open(motion_fname, 'rb+')
	except:
	shutil.rmtree(key)
	huaide = huaide + 1
	continue

	data = pickle.load(f)
	w = read_pkl(data)
	f.close()
	quality = quality + w

	if w == 1:
	shutil.rmtree(key)
	# print(key)
	huaide = huaide + 1
	continue

	haode = haode + 1

	print("huaide:{}, haode:{}, total_seqs:{}".format(huaide, haode, total_seqs.__len__()))

	for speaker_name in speakers:
	speaker_root = os.path.join(data_root, speaker_name)

	videos = [v for v in os.listdir(speaker_root)]
	print(videos)

	haode = huaide = 0
	total_seqs = []

	for vid in tqdm(videos, desc="Processing training data of {}......".format(speaker_name)):
	# for vid in videos:
	source_vid = vid
	vid_pth = os.path.join(speaker_root, source_vid)
	try:
	seqs = [s for s in os.listdir(vid_pth)]
	except:
	continue
	for s in seqs:
	quality = 0
	total_seqs.append(os.path.join(vid_pth, s))
	print("total_seqs:{}".format(total_seqs.__len__()))
	# split the dataset
	test_list, val_list, train_list = split_list(total_seqs, True, 0.1)
	print(len(test_list), len(val_list), len(train_list))
	moveto(train_list, 'train')
	moveto(test_list, 'test')
	moveto(val_list, 'val')