-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathload_ali3.m
139 lines (120 loc) · 3.92 KB
/
load_ali3.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
function [Uid,Basic,Pdf,Phone,Phone_seq] = load_ali3(basic_ali,pdf_ali,phone_ali,phone_seq)
% Load from a Kaldi algignment file in integer basic, pdf, and
% phone formats, using decoding information from model.
% The arguments can made with prepare_ali.sh.
% Kaldi programs are not called locally.
% Uid{10} = 103-1240-0012-T
% Wrd{10} = 8 offset of the target word.
% Basic{10} 1 x 1517 vector of transition IDs, the basic alignment.
% Pdf{10} 1 x 1517 vector of pdf IDs
% Phone{10} 1 x 1517 vector of phone IDs
% Phone_seq{10} 2 x 178 matrix of phone IDs, and in second row frame
% counts.
% Demo arguments.
if nargin < 2
expbase = '/projects/speech/sys/kaldi-trunk/egs/librispeech/s5_word/exp/tri3b_ali_clean_100_CAN';
basic_ali = [expbase '/' 'ali.all.t'];
pdf_ali = [expbase '/' 'pdf_ali'];
phone_ali = [expbase '/' 'phone_ali'];
phone_seq = [expbase '/' 'phone_seq'];
end
% Read the tokne indez.
% 103-1240-0002-T 37 1125 1172 2 EVERYTHING-0 EH2 V R IY0 TH IH1 NG
% Cell array of strings, mapping indices to uid.
Uid = {};
% Map from uid strings to indices
m = containers.Map;
% Cell array mapping indices to word offsets.
Wrd = {};
% Open input streams for alignments in various formats.
% Basic alignment with transition IDs.
basic_stream = fopen(basic_ali);
% Probability density ids.
% For each frame, the id of the pdf used for it.
pdf_stream = fopen(pdf_ali);
% Phones.
% For each frame, the numerical phone it is in.
phone_stream = fopen(phone_ali);
% Sequence of numerical phones transcribing the utterance with lengths..
% ahh05_st0556_trn 1 11 ; 182 9 ; 16 6 ; 159 3 ; 90 11 ;
phone_seq_stream = fopen(phone_seq);
% Initialize cell arrays and index for cell arrays.
% Key = {};
Basic = {}; Pdf = {}; Phone = {}; Phone_seq = {};
% Index corresponding to line number.
j = 0;
% Iterate through the lines of alignments
line_basic = fgetl(basic_stream);
while ischar(line_basic)
line_pdf = fgetl(pdf_stream);
line_phone = fgetl(phone_stream);
line_phone_seq = fgetl(phone_seq_stream);
[key,ab] = parse_alignment(line_basic);
%disp(key);
%Key{j} = key;
%Basic{j} = ab;
[keyp,ap] = parse_alignment(line_pdf);
%Pdf{j} = ap;
[keyh,ah] = parse_alignment(line_phone);
%Phone{j} = ah;
%[keys,as] = parse_alignment_with_length(line_phone_seq);
[keys,as] = parse_alignment_with_length(line_phone_seq);
%if the keys are the same
if (strcmp(key,keyp) && strcmp(key,keyh) && strcmp(key,keys))
j = j+1;
Basic{j} = ab;
Pdf{j} = ap;
Phone{j} = ah;
Phone_seq{j} = as;
Uid{j} = key;
end
%Phone_seq{j} = as;
%disp(size(a));
%disp(j);
line_basic = fgetl(basic_stream);
end
% Close the input streams.
fclose(basic_stream);
fclose(pdf_stream);
fclose(phone_stream);
fclose(phone_seq_stream);
%if (nargin == 4)
% Stuff to be saved.
%[Uid,Wrd,Basic,Pdf,Phone,Phone_seq]
% disp(save);
% dat.uid = Uid;
% dat.wrd = Wrd;
% dat.basic = Basic;
% dat.pdf = Pdf;
% dat.phone = Phone;
% dat.phone_seq = Phone_seq;
% save(savefile,'dat');
%end
% Parse a line into a key and a vector of int.
function [key,a] = parse_alignment(line)
key = sscanf(line,'%s',1);
[~,klen] = size(key);
[~,llen] = size(line);
line = line((klen+1):llen);
a = sscanf(line,'%d')';
end
% Parse a line into a key and a vector of int.
% The input line looks like this.
% bns04_st1921_trn 1 12 ; 6 7 ; 143 3 ; 50 8 ; 60 3 ; 143 4 ; 146 13
function [key,A] = parse_alignment_with_length(line)
% Scan the key
key = sscanf(line,'%s',1);
[~,klen] = size(key);
[~,llen] = size(line);
% Get rid of the key.
line = line((klen+1):llen);
% Now we have this:
% 1 12 ; 6 7 ; 143 3 ; 50 8 ; 60 3 ; 143 4 ; 146 13
A = sscanf(line,'%d %d %*[;]',[2,Inf]);
% A has numerical phones in the first row, and
% length in frames in the second row.
end
% Illustrate Key and Align.
% Key{129}
% Align{129}(10:20)
end