Wataru commited on
Commit
a68a7b4
·
verified ·
1 Parent(s): 034a073

Initial demo upload

Browse files
.gitattributes CHANGED
@@ -33,3 +33,40 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ nakata26sigdial_dialoguesidon.pdf filter=lfs diff=lfs merge=lfs -text
37
+ wav/cf/dialoguesidon/deu_1082.wav filter=lfs diff=lfs merge=lfs -text
38
+ wav/cf/dialoguesidon/eng-n_4708.wav filter=lfs diff=lfs merge=lfs -text
39
+ wav/cf/dialoguesidon/fra-q_5110.wav filter=lfs diff=lfs merge=lfs -text
40
+ wav/cf/dialoguesidon/jpn_0921.wav filter=lfs diff=lfs merge=lfs -text
41
+ wav/cf/dialoguesidon/spa_1469.wav filter=lfs diff=lfs merge=lfs -text
42
+ wav/cf/dialoguesidon/zho-m_0941.wav filter=lfs diff=lfs merge=lfs -text
43
+ wav/cf/geneses/deu_1082.wav filter=lfs diff=lfs merge=lfs -text
44
+ wav/cf/geneses/eng-n_4708.wav filter=lfs diff=lfs merge=lfs -text
45
+ wav/cf/geneses/fra-q_5110.wav filter=lfs diff=lfs merge=lfs -text
46
+ wav/cf/geneses/jpn_0921.wav filter=lfs diff=lfs merge=lfs -text
47
+ wav/cf/geneses/spa_1469.wav filter=lfs diff=lfs merge=lfs -text
48
+ wav/cf/geneses/zho-m_0941.wav filter=lfs diff=lfs merge=lfs -text
49
+ wav/cf/noisy/deu_1082.wav filter=lfs diff=lfs merge=lfs -text
50
+ wav/cf/noisy/eng-n_4708.wav filter=lfs diff=lfs merge=lfs -text
51
+ wav/cf/noisy/fra-q_5110.wav filter=lfs diff=lfs merge=lfs -text
52
+ wav/cf/noisy/jpn_0921.wav filter=lfs diff=lfs merge=lfs -text
53
+ wav/cf/noisy/spa_1469.wav filter=lfs diff=lfs merge=lfs -text
54
+ wav/cf/noisy/zho-m_0941.wav filter=lfs diff=lfs merge=lfs -text
55
+ wav/od/dialoguesidon/example_1.wav filter=lfs diff=lfs merge=lfs -text
56
+ wav/od/dialoguesidon/example_2.wav filter=lfs diff=lfs merge=lfs -text
57
+ wav/od/dialoguesidon/example_3.wav filter=lfs diff=lfs merge=lfs -text
58
+ wav/od/geneses/example_1.wav filter=lfs diff=lfs merge=lfs -text
59
+ wav/od/geneses/example_2.wav filter=lfs diff=lfs merge=lfs -text
60
+ wav/od/geneses/example_3.wav filter=lfs diff=lfs merge=lfs -text
61
+ wav/od/noisy/example_1.wav filter=lfs diff=lfs merge=lfs -text
62
+ wav/od/noisy/example_2.wav filter=lfs diff=lfs merge=lfs -text
63
+ wav/od/noisy/example_3.wav filter=lfs diff=lfs merge=lfs -text
64
+ wav/swb/dialoguesidon/sw02007.wav filter=lfs diff=lfs merge=lfs -text
65
+ wav/swb/dialoguesidon/sw02093.wav filter=lfs diff=lfs merge=lfs -text
66
+ wav/swb/dialoguesidon/sw02157.wav filter=lfs diff=lfs merge=lfs -text
67
+ wav/swb/geneses/sw02007.wav filter=lfs diff=lfs merge=lfs -text
68
+ wav/swb/geneses/sw02093.wav filter=lfs diff=lfs merge=lfs -text
69
+ wav/swb/geneses/sw02157.wav filter=lfs diff=lfs merge=lfs -text
70
+ wav/swb/noisy/sw02007.wav filter=lfs diff=lfs merge=lfs -text
71
+ wav/swb/noisy/sw02093.wav filter=lfs diff=lfs merge=lfs -text
72
+ wav/swb/noisy/sw02157.wav filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,10 +1,13 @@
1
  ---
2
  title: DialogueSidon Demo
3
- emoji:
4
- colorFrom: purple
5
- colorTo: gray
6
  sdk: static
7
  pinned: false
 
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
1
  ---
2
  title: DialogueSidon Demo
3
+ emoji: 🗣️
4
+ colorFrom: indigo
5
+ colorTo: blue
6
  sdk: static
7
  pinned: false
8
+ short_description: Audio demo for the DialogueSidon dialogue restoration paper.
9
  ---
10
 
11
+ # DialogueSidon Demo
12
+
13
+ Demo page for *DialogueSidon: Recovering Full-Duplex Dialogue Tracks from In-the-Wild Two-speaker Dialogue Audio* (SIGDIAL 2026, under review).
index.html CHANGED
@@ -1,19 +1,204 @@
1
  <!doctype html>
2
- <html>
3
- <head>
4
- <meta charset="utf-8" />
5
- <meta name="viewport" content="width=device-width" />
6
- <title>My static Space</title>
7
- <link rel="stylesheet" href="style.css" />
8
- </head>
9
- <body>
10
- <div class="card">
11
- <h1>Welcome to your static Space!</h1>
12
- <p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
13
- <p>
14
- Also don't forget to check the
15
- <a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
16
- </p>
17
- </div>
18
- </body>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  </html>
 
1
  <!doctype html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="utf-8" />
5
+ <meta name="viewport" content="width=device-width, initial-scale=1" />
6
+ <title>DialogueSidon Recovering Full-Duplex Dialogue Tracks from In-the-Wild Two-speaker Dialogue Audio</title>
7
+ <meta name="description" content="Demo page for DialogueSidon: joint restoration and separation of degraded two-speaker dialogue audio via an SSL-VAE latent space and a diffusion-based latent predictor." />
8
+ <link rel="stylesheet" href="style.css" />
9
+ </head>
10
+ <body>
11
+ <header class="hero">
12
+ <div class="container">
13
+ <p class="venue">SIGDIAL 2026 &mdash; Under Review</p>
14
+ <h1>DialogueSidon: Recovering Full-Duplex Dialogue Tracks from In-the-Wild Two-speaker Dialogue Audio</h1>
15
+
16
+ <p class="authors">
17
+ Wataru Nakata<sup>1,2</sup>,
18
+ Yuki Saito<sup>1,2</sup>,
19
+ Kazuki Yamauchi<sup>1</sup>,
20
+ Emiru Tsunoo<sup>1</sup>,
21
+ Hiroshi Saruwatari<sup>1</sup>
22
+ </p>
23
+ <p class="affiliation">
24
+ <sup>1</sup>The University of Tokyo, Tokyo, Japan&nbsp;&nbsp;
25
+ <sup>2</sup>National Institute of Advanced Industrial Science and Technology (AIST), Tokyo, Japan
26
+ </p>
27
+
28
+ <nav class="actions">
29
+ <a class="btn" href="nakata26sigdial_dialoguesidon.pdf">Paper (PDF)</a>
30
+ <a class="btn" href="https://huggingface.co/spaces/sarulab-speech/DialogueSidon-demo" target="_blank" rel="noopener">Live Demo</a>
31
+ </nav>
32
+ </div>
33
+ </header>
34
+
35
+ <main class="container">
36
+
37
+ <section id="abstract">
38
+ <h2>Abstract</h2>
39
+ <p>
40
+ Full-duplex dialogue audio, in which each speaker is recorded on a separate track,
41
+ is an important resource for spoken dialogue research, but is difficult to collect at
42
+ scale. Most in-the-wild two-speaker dialogue is available only as degraded monaural
43
+ mixtures, which is unsuitable for systems requiring clean speaker-wise signals.
44
+ We propose <em>DialogueSidon</em>, a model for joint restoration and separation of
45
+ degraded two-speaker dialogue audio. DialogueSidon combines an SSL-VAE&mdash;which
46
+ compresses self-supervised speech features into a compact latent space&mdash;with a
47
+ diffusion-based latent predictor that recovers speaker-wise latent representations
48
+ from the degraded mixture. Experiments on English, multilingual, and in-the-wild
49
+ dialogue datasets show that DialogueSidon substantially improves intelligibility and
50
+ separation quality over a baseline, while also achieving much faster inference.
51
+ </p>
52
+ </section>
53
+
54
+ <section id="samples">
55
+ <h2>Audio Samples</h2>
56
+ <p class="note">
57
+ Each row plays the same utterance through three systems. The <strong>noisy</strong>
58
+ column is the raw monaural input given to every model. <strong>GENESES</strong> is the
59
+ baseline. <strong>DialogueSidon</strong> is ours (D = 32). Separated outputs are encoded
60
+ as stereo: speaker 1 on the left channel, speaker 2 on the right.
61
+ </p>
62
+
63
+ <h3>English &mdash; Switchboard</h3>
64
+ <div class="sample-table-wrapper">
65
+ <table class="sample-table">
66
+ <thead>
67
+ <tr>
68
+ <th>Example</th>
69
+ <th>Noisy mixture</th>
70
+ <th>GENESES</th>
71
+ <th>DialogueSidon (ours)</th>
72
+ </tr>
73
+ </thead>
74
+ <tbody>
75
+ <tr>
76
+ <td>sw02007</td>
77
+ <td><div class="waveform" data-src="wav/swb/noisy/sw02007.wav"></div></td>
78
+ <td><div class="waveform" data-src="wav/swb/geneses/sw02007.wav"></div></td>
79
+ <td><div class="waveform" data-src="wav/swb/dialoguesidon/sw02007.wav"></div></td>
80
+ </tr>
81
+ <tr>
82
+ <td>sw02093</td>
83
+ <td><div class="waveform" data-src="wav/swb/noisy/sw02093.wav"></div></td>
84
+ <td><div class="waveform" data-src="wav/swb/geneses/sw02093.wav"></div></td>
85
+ <td><div class="waveform" data-src="wav/swb/dialoguesidon/sw02093.wav"></div></td>
86
+ </tr>
87
+ <tr>
88
+ <td>sw02157</td>
89
+ <td><div class="waveform" data-src="wav/swb/noisy/sw02157.wav"></div></td>
90
+ <td><div class="waveform" data-src="wav/swb/geneses/sw02157.wav"></div></td>
91
+ <td><div class="waveform" data-src="wav/swb/dialoguesidon/sw02157.wav"></div></td>
92
+ </tr>
93
+ </tbody>
94
+ </table>
95
+ </div>
96
+
97
+ <h3>Multilingual &mdash; CallFriend</h3>
98
+ <div class="sample-table-wrapper">
99
+ <table class="sample-table">
100
+ <thead>
101
+ <tr>
102
+ <th>Language</th>
103
+ <th>Noisy mixture</th>
104
+ <th>GENESES</th>
105
+ <th>DialogueSidon (ours)</th>
106
+ </tr>
107
+ </thead>
108
+ <tbody>
109
+ <tr>
110
+ <td>German</td>
111
+ <td><div class="waveform" data-src="wav/cf/noisy/deu_1082.wav"></div></td>
112
+ <td><div class="waveform" data-src="wav/cf/geneses/deu_1082.wav"></div></td>
113
+ <td><div class="waveform" data-src="wav/cf/dialoguesidon/deu_1082.wav"></div></td>
114
+ </tr>
115
+ <tr>
116
+ <td>English</td>
117
+ <td><div class="waveform" data-src="wav/cf/noisy/eng-n_4708.wav"></div></td>
118
+ <td><div class="waveform" data-src="wav/cf/geneses/eng-n_4708.wav"></div></td>
119
+ <td><div class="waveform" data-src="wav/cf/dialoguesidon/eng-n_4708.wav"></div></td>
120
+ </tr>
121
+ <tr>
122
+ <td>French</td>
123
+ <td><div class="waveform" data-src="wav/cf/noisy/fra-q_5110.wav"></div></td>
124
+ <td><div class="waveform" data-src="wav/cf/geneses/fra-q_5110.wav"></div></td>
125
+ <td><div class="waveform" data-src="wav/cf/dialoguesidon/fra-q_5110.wav"></div></td>
126
+ </tr>
127
+ <tr>
128
+ <td>Japanese</td>
129
+ <td><div class="waveform" data-src="wav/cf/noisy/jpn_0921.wav"></div></td>
130
+ <td><div class="waveform" data-src="wav/cf/geneses/jpn_0921.wav"></div></td>
131
+ <td><div class="waveform" data-src="wav/cf/dialoguesidon/jpn_0921.wav"></div></td>
132
+ </tr>
133
+ <tr>
134
+ <td>Spanish</td>
135
+ <td><div class="waveform" data-src="wav/cf/noisy/spa_1469.wav"></div></td>
136
+ <td><div class="waveform" data-src="wav/cf/geneses/spa_1469.wav"></div></td>
137
+ <td><div class="waveform" data-src="wav/cf/dialoguesidon/spa_1469.wav"></div></td>
138
+ </tr>
139
+ <tr>
140
+ <td>Mandarin</td>
141
+ <td><div class="waveform" data-src="wav/cf/noisy/zho-m_0941.wav"></div></td>
142
+ <td><div class="waveform" data-src="wav/cf/geneses/zho-m_0941.wav"></div></td>
143
+ <td><div class="waveform" data-src="wav/cf/dialoguesidon/zho-m_0941.wav"></div></td>
144
+ </tr>
145
+ </tbody>
146
+ </table>
147
+ </div>
148
+
149
+ <h3>In-the-Wild &mdash; OpenDialog</h3>
150
+ <p class="note">
151
+ Real internet dialogue recordings with realistic, unknown degradations.
152
+ No clean reference exists for these clips.
153
+ </p>
154
+ <div class="sample-table-wrapper">
155
+ <table class="sample-table">
156
+ <thead>
157
+ <tr>
158
+ <th>Example</th>
159
+ <th>Noisy mixture</th>
160
+ <th>GENESES</th>
161
+ <th>DialogueSidon (ours)</th>
162
+ </tr>
163
+ </thead>
164
+ <tbody>
165
+ <tr>
166
+ <td>Example 1</td>
167
+ <td><div class="waveform" data-src="wav/od/noisy/example_1.wav"></div></td>
168
+ <td><div class="waveform" data-src="wav/od/geneses/example_1.wav"></div></td>
169
+ <td><div class="waveform" data-src="wav/od/dialoguesidon/example_1.wav"></div></td>
170
+ </tr>
171
+ <tr>
172
+ <td>Example 2</td>
173
+ <td><div class="waveform" data-src="wav/od/noisy/example_2.wav"></div></td>
174
+ <td><div class="waveform" data-src="wav/od/geneses/example_2.wav"></div></td>
175
+ <td><div class="waveform" data-src="wav/od/dialoguesidon/example_2.wav"></div></td>
176
+ </tr>
177
+ <tr>
178
+ <td>Example 3</td>
179
+ <td><div class="waveform" data-src="wav/od/noisy/example_3.wav"></div></td>
180
+ <td><div class="waveform" data-src="wav/od/geneses/example_3.wav"></div></td>
181
+ <td><div class="waveform" data-src="wav/od/dialoguesidon/example_3.wav"></div></td>
182
+ </tr>
183
+ </tbody>
184
+ </table>
185
+ </div>
186
+ </section>
187
+
188
+ <section id="bibtex">
189
+ <h2>Citation</h2>
190
+ <pre class="bibtex"><code>[BibTeX entry will be provided upon publication.]</code></pre>
191
+ </section>
192
+
193
+ </main>
194
+
195
+ <footer>
196
+ <div class="container">
197
+ <p>Demo page accompanying the SIGDIAL 2026 submission.
198
+ Code will be released upon publication.</p>
199
+ </div>
200
+ </footer>
201
+
202
+ <script type="module" src="script.js"></script>
203
+ </body>
204
  </html>
nakata26sigdial_dialoguesidon.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0e04e3f772703dfd197d7da80c994f880e97fc6ff7b0122b41764f158d3fd80
3
+ size 3587492
script.js ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import WaveSurfer from "https://cdn.jsdelivr.net/npm/wavesurfer.js@7/dist/wavesurfer.esm.js";
2
+
3
+ // Per-channel render config: speaker 1 (blue) on top, speaker 2 (green) on
4
+ // bottom for stereo separated outputs. Mono noisy mixtures only use the first
5
+ // (blue) entry — wavesurfer ignores unused channel configs.
6
+ const CH_HEIGHT = 40;
7
+ const SPK1 = {
8
+ waveColor: "#8a9bbf",
9
+ progressColor: "#1f3a78",
10
+ height: CH_HEIGHT,
11
+ };
12
+ const SPK2 = {
13
+ waveColor: "#bf8a9b",
14
+ progressColor: "#8b1a3a",
15
+ height: CH_HEIGHT,
16
+ };
17
+
18
+ let currentWs = null;
19
+ let currentBtn = null;
20
+
21
+ function setIdle(btn) {
22
+ btn.textContent = "▶";
23
+ btn.classList.remove("playing");
24
+ btn.setAttribute("aria-label", "Play");
25
+ }
26
+ function setPlaying(btn) {
27
+ btn.textContent = "⏸";
28
+ btn.classList.add("playing");
29
+ btn.setAttribute("aria-label", "Pause");
30
+ }
31
+
32
+ function initWaveform(container) {
33
+ if (container.dataset.initialized) return;
34
+ container.dataset.initialized = "1";
35
+
36
+ // Wrap [button | waveform] inside the original parent cell.
37
+ const parent = container.parentNode;
38
+ const wrapper = document.createElement("div");
39
+ wrapper.className = "player";
40
+ const btn = document.createElement("button");
41
+ btn.type = "button";
42
+ btn.className = "play-btn";
43
+ setIdle(btn);
44
+
45
+ parent.insertBefore(wrapper, container);
46
+ wrapper.appendChild(btn);
47
+ wrapper.appendChild(container);
48
+
49
+ const ws = WaveSurfer.create({
50
+ container,
51
+ height: CH_HEIGHT,
52
+ barWidth: 2,
53
+ barGap: 1,
54
+ barRadius: 1,
55
+ cursorColor: "#111111",
56
+ cursorWidth: 1,
57
+ normalize: true,
58
+ interact: true,
59
+ splitChannels: [SPK1, SPK2],
60
+ url: container.dataset.src,
61
+ });
62
+
63
+ ws.on("decode", () => {
64
+ const data = ws.getDecodedData();
65
+ if (data && data.numberOfChannels >= 2) {
66
+ container.classList.add("stereo");
67
+ if (!container.querySelector(".ch-label")) {
68
+ const l1 = document.createElement("span");
69
+ l1.className = "ch-label ch-label-1";
70
+ l1.textContent = "S1";
71
+ const l2 = document.createElement("span");
72
+ l2.className = "ch-label ch-label-2";
73
+ l2.textContent = "S2";
74
+ container.appendChild(l1);
75
+ container.appendChild(l2);
76
+ }
77
+ } else {
78
+ container.classList.add("mono");
79
+ if (!container.querySelector(".ch-label")) {
80
+ const l = document.createElement("span");
81
+ l.className = "ch-label ch-label-mono";
82
+ l.textContent = "MIX";
83
+ container.appendChild(l);
84
+ }
85
+ }
86
+ });
87
+
88
+ btn.addEventListener("click", () => {
89
+ if (currentWs && currentWs !== ws) currentWs.pause();
90
+ ws.playPause();
91
+ });
92
+
93
+ ws.on("play", () => {
94
+ if (currentWs && currentWs !== ws) {
95
+ currentWs.pause();
96
+ if (currentBtn) setIdle(currentBtn);
97
+ }
98
+ currentWs = ws;
99
+ currentBtn = btn;
100
+ setPlaying(btn);
101
+ });
102
+ ws.on("pause", () => setIdle(btn));
103
+ ws.on("finish", () => setIdle(btn));
104
+ ws.on("error", () => {
105
+ btn.disabled = true;
106
+ btn.title = "Failed to load";
107
+ });
108
+ }
109
+
110
+ // Lazy init: only decode files as their row scrolls into view.
111
+ const observer = new IntersectionObserver(
112
+ (entries) => {
113
+ for (const e of entries) {
114
+ if (e.isIntersecting) {
115
+ initWaveform(e.target);
116
+ observer.unobserve(e.target);
117
+ }
118
+ }
119
+ },
120
+ { rootMargin: "300px" }
121
+ );
122
+
123
+ document.querySelectorAll(".waveform").forEach((el) => observer.observe(el));
style.css CHANGED
@@ -1,28 +1,279 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  body {
2
- padding: 2rem;
3
- font-family: -apple-system, BlinkMacSystemFont, "Arial", sans-serif;
 
 
 
 
 
 
4
  }
5
 
6
- h1 {
7
- font-size: 16px;
8
- margin-top: 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  }
10
 
11
- p {
12
- color: rgb(107, 114, 128);
13
- font-size: 15px;
14
- margin-bottom: 10px;
15
- margin-top: 5px;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  }
17
 
18
- .card {
19
- max-width: 620px;
20
- margin: 0 auto;
21
- padding: 16px;
22
- border: 1px solid lightgray;
23
- border-radius: 16px;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  }
25
 
26
- .card p:last-child {
27
- margin-bottom: 0;
 
28
  }
 
1
+ /* DialogueSidon demo page — formal academic styling */
2
+ :root {
3
+ --bg: #ffffff;
4
+ --text: #111111;
5
+ --muted: #555555;
6
+ --rule: #222222;
7
+ --rule-faint: #cccccc;
8
+ --accent: #1f3a78; /* dark navy — speaker 1 */
9
+ --accent-2: #8b1a3a; /* dark maroon — speaker 2 */
10
+ --shade: #f5f5f5;
11
+ --serif: "Charter", "Iowan Old Style", "Source Serif Pro", "Cambria", Georgia, "Times New Roman", serif;
12
+ --sans: -apple-system, BlinkMacSystemFont, "Helvetica Neue", "Segoe UI", Arial, sans-serif;
13
+ --mono: ui-monospace, "SFMono-Regular", Menlo, Consolas, "Liberation Mono", monospace;
14
+ }
15
+
16
+ * { box-sizing: border-box; }
17
+ html { -webkit-text-size-adjust: 100%; }
18
  body {
19
+ margin: 0;
20
+ font-family: var(--serif);
21
+ color: var(--text);
22
+ background: var(--bg);
23
+ line-height: 1.55;
24
+ font-size: 17px;
25
+ -webkit-font-smoothing: antialiased;
26
+ -moz-osx-font-smoothing: grayscale;
27
  }
28
 
29
+ .container {
30
+ width: min(880px, 92%);
31
+ margin: 0 auto;
32
+ }
33
+
34
+ /* ---------- Hero / title block ---------- */
35
+ .hero {
36
+ padding: 64px 0 36px;
37
+ text-align: center;
38
+ border-bottom: 1px solid var(--rule-faint);
39
+ }
40
+ .hero .venue {
41
+ font-family: var(--sans);
42
+ font-size: 12px;
43
+ text-transform: uppercase;
44
+ letter-spacing: 0.18em;
45
+ color: var(--muted);
46
+ margin: 0 0 18px;
47
+ }
48
+ .hero h1 {
49
+ font-family: var(--serif);
50
+ font-size: clamp(26px, 3.4vw, 36px);
51
+ font-weight: 700;
52
+ line-height: 1.25;
53
+ margin: 0 auto 24px;
54
+ max-width: 760px;
55
+ letter-spacing: -0.005em;
56
+ color: var(--text);
57
+ }
58
+ .hero .authors {
59
+ font-family: var(--serif);
60
+ font-size: 17px;
61
+ margin: 0 0 4px;
62
+ color: var(--text);
63
+ }
64
+ .hero .affiliation {
65
+ font-family: var(--serif);
66
+ font-style: italic;
67
+ font-size: 15px;
68
+ color: var(--muted);
69
+ margin: 0 0 24px;
70
+ }
71
+ .actions {
72
+ display: flex;
73
+ flex-wrap: wrap;
74
+ gap: 12px;
75
+ justify-content: center;
76
+ }
77
+ .btn {
78
+ display: inline-block;
79
+ font-family: var(--sans);
80
+ font-size: 13px;
81
+ font-weight: 500;
82
+ padding: 8px 18px;
83
+ background: #ffffff;
84
+ color: var(--text);
85
+ text-decoration: none;
86
+ border: 1px solid var(--rule);
87
+ border-radius: 2px;
88
+ letter-spacing: 0.02em;
89
+ transition: background 0.12s, color 0.12s;
90
+ }
91
+ .btn:hover {
92
+ background: var(--text);
93
+ color: #ffffff;
94
  }
95
 
96
+ /* ---------- Sections ---------- */
97
+ main { padding: 48px 0 32px; }
98
+ section { margin: 0 0 48px; scroll-margin-top: 24px; }
99
+ section h2 {
100
+ font-family: var(--serif);
101
+ font-size: 22px;
102
+ font-weight: 700;
103
+ margin: 0 0 14px;
104
+ padding-bottom: 6px;
105
+ border-bottom: 1px solid var(--rule-faint);
106
+ letter-spacing: 0;
107
+ }
108
+ section h3 {
109
+ font-family: var(--serif);
110
+ font-size: 17px;
111
+ font-weight: 700;
112
+ font-style: italic;
113
+ margin: 32px 0 10px;
114
+ color: var(--text);
115
+ }
116
+ section p { color: var(--text); margin: 0 0 12px; }
117
+ #abstract p {
118
+ font-size: 16px;
119
+ text-align: justify;
120
+ hyphens: auto;
121
+ }
122
+ .note {
123
+ font-size: 14px;
124
+ color: var(--muted);
125
+ font-style: italic;
126
  }
127
 
128
+ /* ---------- Sample tables ---------- */
129
+ .sample-table-wrapper {
130
+ overflow-x: auto;
131
+ margin: 8px 0 4px;
132
+ }
133
+ .sample-table {
134
+ width: 100%;
135
+ border-collapse: collapse;
136
+ font-family: var(--sans);
137
+ font-size: 14px;
138
+ }
139
+ /* Booktabs-style: top + bottom rules, no vertical lines, no row striping */
140
+ .sample-table thead th {
141
+ text-align: left;
142
+ font-weight: 600;
143
+ padding: 10px 12px;
144
+ border-top: 1.5px solid var(--rule);
145
+ border-bottom: 1px solid var(--rule);
146
+ white-space: nowrap;
147
+ background: transparent;
148
+ color: var(--text);
149
+ }
150
+ .sample-table tbody td {
151
+ padding: 12px 12px;
152
+ border-bottom: 1px solid var(--rule-faint);
153
+ vertical-align: middle;
154
+ background: transparent;
155
+ }
156
+ .sample-table tbody tr:last-child td {
157
+ border-bottom: 1.5px solid var(--rule);
158
+ }
159
+ .sample-table td:first-child {
160
+ font-family: var(--serif);
161
+ font-style: italic;
162
+ font-weight: 400;
163
+ white-space: nowrap;
164
+ max-width: 200px;
165
+ overflow: hidden;
166
+ text-overflow: ellipsis;
167
+ color: var(--text);
168
+ }
169
+
170
+ /* ---------- Waveform players (wavesurfer.js) ---------- */
171
+ .player {
172
+ display: flex;
173
+ align-items: center;
174
+ gap: 10px;
175
+ width: 100%;
176
+ min-width: 220px;
177
+ }
178
+ .play-btn {
179
+ flex: 0 0 auto;
180
+ width: 30px;
181
+ height: 30px;
182
+ border-radius: 50%;
183
+ border: 1px solid var(--rule);
184
+ background: #ffffff;
185
+ color: var(--text);
186
+ cursor: pointer;
187
+ font-size: 11px;
188
+ line-height: 1;
189
+ display: inline-flex;
190
+ align-items: center;
191
+ justify-content: center;
192
+ padding: 0;
193
+ font-family: var(--sans);
194
+ transition: background 0.12s, color 0.12s, border-color 0.12s;
195
+ }
196
+ .play-btn:hover {
197
+ background: var(--text);
198
+ color: #ffffff;
199
+ }
200
+ .play-btn.playing {
201
+ background: var(--accent);
202
+ border-color: var(--accent);
203
+ color: #ffffff;
204
+ }
205
+ .play-btn:disabled { opacity: 0.4; cursor: not-allowed; }
206
+
207
+ .waveform {
208
+ flex: 1 1 auto;
209
+ position: relative;
210
+ min-width: 160px;
211
+ background: transparent;
212
+ }
213
+ /* Reserve consistent row heights so wavesurfer rendering doesn't shift the
214
+ layout. Noisy column = mono (single channel); the model output columns =
215
+ stereo (two stacked channels). */
216
+ .sample-table td:nth-child(2) .waveform { min-height: 40px; }
217
+ .sample-table td:nth-child(3) .waveform,
218
+ .sample-table td:nth-child(4) .waveform { min-height: 80px; }
219
+
220
+ /* Per-channel labels */
221
+ .ch-label {
222
+ position: absolute;
223
+ left: 4px;
224
+ font-family: var(--mono);
225
+ font-size: 9px;
226
+ font-weight: 600;
227
+ letter-spacing: 0.06em;
228
+ background: rgba(255, 255, 255, 0.92);
229
+ padding: 1px 4px;
230
+ border-radius: 2px;
231
+ pointer-events: none;
232
+ }
233
+ .ch-label-1 { top: 1px; color: var(--accent); }
234
+ .ch-label-2 { bottom: 1px; color: var(--accent-2); }
235
+ .ch-label-mono { top: 50%; transform: translateY(-50%); color: var(--muted); }
236
+
237
+ .waveform.stereo::after {
238
+ content: "";
239
+ position: absolute;
240
+ left: 0;
241
+ right: 0;
242
+ top: 50%;
243
+ height: 1px;
244
+ background: var(--rule-faint);
245
+ pointer-events: none;
246
+ }
247
+
248
+ /* ---------- Citation ---------- */
249
+ .bibtex {
250
+ background: var(--shade);
251
+ color: var(--text);
252
+ padding: 14px 16px;
253
+ border: 1px solid var(--rule-faint);
254
+ border-radius: 2px;
255
+ font-family: var(--mono);
256
+ font-size: 13px;
257
+ overflow-x: auto;
258
+ line-height: 1.55;
259
+ margin: 0;
260
+ }
261
+ .bibtex code { background: transparent; padding: 0; font-family: inherit; }
262
+
263
+ /* ---------- Footer ---------- */
264
+ footer {
265
+ border-top: 1px solid var(--rule-faint);
266
+ padding: 22px 0 36px;
267
+ text-align: center;
268
+ }
269
+ footer p {
270
+ font-family: var(--sans);
271
+ font-size: 12px;
272
+ color: var(--muted);
273
+ margin: 0;
274
  }
275
 
276
+ @media (max-width: 720px) {
277
+ .hero { padding: 44px 0 28px; }
278
+ body { font-size: 16px; }
279
  }
wav/cf/dialoguesidon/deu_1082.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d53db182574dc9db856ae577ee35c528e7bec11d5d2c4c485662d37fc2c52da8
3
+ size 1920026
wav/cf/dialoguesidon/eng-n_4708.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f63801183886caed955d644fd7973935e9186051d1aaeaed204a3a554a520906
3
+ size 1920026
wav/cf/dialoguesidon/fra-q_5110.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e96a12a8893406b178f71a85b77ab424fab1a251a2f00cf59d8ee56296384895
3
+ size 1920026
wav/cf/dialoguesidon/jpn_0921.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f0ddf5cd9ab6e31142a84051460e1423d5b4b904266dfa21cd0c425c0b5c7aa
3
+ size 1920026
wav/cf/dialoguesidon/spa_1469.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dea9cfbe7d448a4e5a4f191e55913ad1ba5cc96b5ba6ca4baf0e9aad4edaac78
3
+ size 1920026
wav/cf/dialoguesidon/zho-m_0941.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4583c736f4123a5f18942e83ec217d8889c6c06fa3352ceca7e1f7397912db9
3
+ size 1920026
wav/cf/geneses/deu_1082.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1c056f6186f2d39541bbb73e6bb49640f4eb5f69f91471d305c0cf43ee115e6
3
+ size 1919974
wav/cf/geneses/eng-n_4708.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dc74886835903da18d0f6457544032ba052dd4302782babdf23ca7e23404822
3
+ size 1919974
wav/cf/geneses/fra-q_5110.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fc2489925ab3f97e0ec2d77ee8a485acf9d52b50c5a1d8bb2c1c9ef95a39c49
3
+ size 1919974
wav/cf/geneses/jpn_0921.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c55d5a959f54d7a698070739453122513ca73fd0f33753a1ebdb622c54b2f13
3
+ size 1919974
wav/cf/geneses/spa_1469.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e6fa6e5de4f55cefa2ba52db6b544eda6a122bde127ce7b79d8eae485c6b40d
3
+ size 1919974
wav/cf/geneses/zho-m_0941.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b05f112964e3680670ef5b3bf961886b64d9c833f529bd9f6257ed2c2f2e2b64
3
+ size 1919974
wav/cf/noisy/deu_1082.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ded27b7702aaf5aefe27fa18883250801cd34d56a516960229f6b7764789285c
3
+ size 320078
wav/cf/noisy/eng-n_4708.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e798125b74800e256c7e3b952cc26850ba7be944922e20de9291747ff5f92fa8
3
+ size 320078
wav/cf/noisy/fra-q_5110.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b45573f3de6909dd97f8227fa64085c692675a319caeb4c92ddae77efc9d550
3
+ size 320078
wav/cf/noisy/jpn_0921.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcfd1738ffe439939fe9f05320a23af9e54be297e4a0377e5cff9fb91de69f2e
3
+ size 320078
wav/cf/noisy/spa_1469.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:950e15e2229764622b10a6698d4da2b9f086be98bef8253f80dde68f510bfa73
3
+ size 320078
wav/cf/noisy/zho-m_0941.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4cadea6972d5e2c6604b69743f5d2c8d6a19162f366d1fa2adcc0fe22012fbef
3
+ size 320078
wav/od/dialoguesidon/example_1.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90948744bd0813183b6f6261fd704f981dcba8a7dd03536f4af94d61c091b8ca
3
+ size 1969946
wav/od/dialoguesidon/example_2.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b6af3d0ac00ac17661cb13153b9ee414d02a484a993e223bec0b6a58f8f8e84
3
+ size 2286746
wav/od/dialoguesidon/example_3.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68f8bf1fd7e2e191e5db46ea1c16d7678d636f074a08ab8b4f98df2d3db60f8a
3
+ size 2712986
wav/od/geneses/example_1.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b49a4633593fae7c4918e4b3cc6c21ed8162422e2fa81658836a3ffdf7e04c4
3
+ size 1969998
wav/od/geneses/example_2.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a6980e19fa0ad7e5fd411b5c66e3df2868f8bcdc1a6ce80f3150a07f4b1de7e
3
+ size 2286798
wav/od/geneses/example_3.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e42c2251702bdb789801ff0b2d3c14cf6da1261b21f34f10bfb7ac87facead6
3
+ size 2713038
wav/od/noisy/example_1.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3554a0931a82012f927ebe1706eb294b899003aa67d31becb4ee6ee50aabead
3
+ size 656718
wav/od/noisy/example_2.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bc09e4361b20f18e30e610587aec2a94de37d04f49df2fda17114311fec417a
3
+ size 762318
wav/od/noisy/example_3.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6508d5570301fbd42f84608646204a736307ca0123baf00ee07c8651c7e2d90c
3
+ size 904398
wav/swb/dialoguesidon/sw02007.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78dd224a39cf4652f92806072f23f7b3bbf56ea5e79383f9a2b5ce8d1ec4673a
3
+ size 1920026
wav/swb/dialoguesidon/sw02093.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77a4d2007fb55e6e0d2083bb7477807a13bf83ad6dd0e1f7e74ef4c8e03c19e8
3
+ size 1920026
wav/swb/dialoguesidon/sw02157.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dcb89b3eeefdc108845650d2cb4a2262cf9a030fa1774898c7131e01004f9c8
3
+ size 1920026
wav/swb/geneses/sw02007.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be351318367b62713efab42dba315549e8b34e123e0ca197584ae091a94827e8
3
+ size 1919974
wav/swb/geneses/sw02093.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1748b94c69af09529df4f32e039efa043763f353dfc6d74dc81505be489d9e54
3
+ size 1919974
wav/swb/geneses/sw02157.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ed7e23fbf275cac08ff761f7ae828acd062c715cbfea6df0eaa440159fad11b
3
+ size 1919974
wav/swb/noisy/sw02007.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a39bdac9f16ff2168c7a6bbd4bc36b10b9fcec0b2bb313c947ab0e1f5855445
3
+ size 320078
wav/swb/noisy/sw02093.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fb772c418e93d9a8a495523618b514daa291c60ed5f2d26d008e81085a476a2
3
+ size 320078
wav/swb/noisy/sw02157.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bab9516c1672092facfb64b404687e82408d6edcd80f1c5a693c7886106f491c
3
+ size 320078