1$nyquist plug-in
2$version 4
3$type process
4$preview linear
5$name (_ "Vocal Reduction and Isolation")
6$manpage "Vocal_Reduction_and_Isolation"
7$action (_ "Applying Action...")
8$author (_ "Robert Haenggi")
9$release 3.0.1
10$copyright (_ "Released under terms of the GNU General Public License version 2")
11
12
13;; vocrediso.ny, based on rjh-stereo-tool.ny
14;; Released under terms of the GNU General Public License version 2:
15;; http://www.gnu.org/licenses/old-licenses/gpl-2.0.html
16;;
17;; Plug-in version 1.7, May  2017
18;; added legacy Vocal Remover since V. 1.56, 06-2015
19;; Requires Audacity 2.1.1  or later, developed under Audacity 2.2.0 Alpha
20;; requires Audacity 2.2.0 for embedded help (button)
21;;
22;; For information about writing and modifying Nyquist plug-ins:
23;; https://wiki.audacityteam.org/wiki/Nyquist_Plug-ins_Reference
24
25$control action (_ "Action") choice (
26    ("RemoveToMono" (_ "Remove Vocals: to mono"))
27    ("Remove" (_ "Remove Vocals"))
28    ("Isolate" (_ "Isolate Vocals"))
29    ("IsolateInvert" (_ "Isolate Vocals and Invert"))
30    ("RemoveCenterToMono" (_ "Remove Center: to mono"))
31    ("RemoveCenter" (_ "Remove Center"))
32    ("IsolateCenter" (_ "Isolate Center"))
33    ("IsolateCenterInvert" (_ "Isolate Center and Invert"))
34    (_ "Analyze")) 0
35$control strength (_ "Strength") real "" 1.0 0.0 50.0
36$control low-transition (_ "Low Cut for Vocals (Hz)") real "" 120 1 24000
37$control high-transition (_ "High Cut for Vocals (Hz)") real "" 9000 1 24000
38
39
40(setf bignum 1000000000)
41
42;;control rotation "Rotation (Degrees)" real "" 0 -180 180
43(setf rotation 0.0)
44
45;; make aref shorter
46(defmacro  : (array index) (backquote (aref ,array ,index)))
47;;
48;; input corrections
49(defmacro limit (symbol lower upper)
50  (backquote (min ,upper (max ,lower ,symbol))))
51
52
53;;; Some statistical functions
54;;
55;; Running Sum
56(defun sigma (sig)
57  (snd-biquad sig 1 0 0 1 0 0 0))
58
59
60;; Compares two sounds (Y = A + B * X)
61(defun least-squares-xy (x y &key show)
62  (let* ((n (float (min (snd-length x ny:all)
63                        (snd-length y ny:all))))
64         (t-n (/ (1- n) (snd-srate x)))
65         (bar-x (* (/ n) (snd-sref (sigma x) t-n)))
66         (bar-y (* (/ n) (snd-sref (sigma y) t-n)))
67         (x (diff x bar-x))
68         (y (diff y bar-y))
69         (pos-x (max (snd-sref (sigma (s-abs x)) t-n) 1e-17))
70         (pos-y (max (snd-sref (sigma (s-abs y)) t-n) 1e-17))
71         (pos-xy (cond
72            ((>  pos-x pos-y) (- (/  pos-y pos-x) 1))
73            (t (- 1 (/ pos-x pos-y)))))
74         (s-xy (* (/ n) (snd-sref (sigma (prod x y)) t-n)))
75         (s-x2 (* (/ n) (snd-sref (sigma (prod x x)) t-n)))
76         (s-y2 (* (/ n) (snd-sref (sigma (prod y y)) t-n)))
77         (r (/ s-xy (max 1e-17 (sqrt (* s-x2 s-y2)))))
78         (r2 (expt r 2.0))
79         (a1 (cond
80               ((= s-x2 0) 0)
81               ((= s-xy 0) 0)
82               (t (/ s-xy s-x2))))
83         (a0 (- bar-y (* a1 bar-x))))
84    (if show
85        (format t
86                (_ "Average x: ~a, y: ~a
87                    Covariance x y: ~a
88                    Average variance x: ~a, y: ~a
89                    Standard deviation x: ~a, y: ~a
90                    Coefficient of correlation: ~a
91                    Coefficient of determination: ~a
92                    Variation of residuals: ~a
93                    y equals ~a plus ~a times x~%")
94                bar-x   bar-y
95                s-xy
96                s-x2  s-y2
97                (sqrt s-x2) (sqrt s-y2)
98                r
99                r2
100                (* s-y2 (- 1 r2))
101                a0  a1))
102    (list r r2 pos-xy  a0 a1)))
103
104
105(defun between (val low high)
106  (and (> val low) (<= val high)))
107
108
109;; Summary for "Analyse", fed with coeff. of correlation
110(defun summary (analysis &aux (corr (car analysis)) (pan-position (third analysis)))
111  (format nil (_ "Pan position: ~a~%The left and right channels are correlated by about ~a %. This means:~%~a~%")
112          pan-position
113          (round (* corr 100))
114          (cond
115           ((between corr 0.97 1.1)
116            (_ " - The two channels are identical, i.e. dual mono.
117                The center can't be removed.
118                Any remaining difference may be caused by lossy encoding."))
119           ((between corr 0.9 0.97)
120            (_ " - The two Channels are strongly related, i.e. nearly mono or extremely panned.
121                Most likely, the center extraction will be poor."))
122           ((between corr 0.5 0.9)
123            (_ " - A fairly good value, at least stereo in average and not too wide spread."))
124           ((between corr 0.2 0.5)
125            (_ " - An ideal value for Stereo.
126                However, the center extraction depends also on the used reverb."))
127           ((between  corr -0.2 0.2)
128            (_ " - The two channels are almost not related.
129                Either you have only noise or the piece is mastered in a unbalanced manner.
130                The center extraction can still be good though."))
131           ((between corr -0.8 -0.2)
132            (_ " - Although the Track is stereo, the field is obviously extra wide.
133                This can cause strange effects.
134                Especially when played by only one speaker."))
135           (t (_ " - The two channels are nearly identical.
136                  Obviously, a pseudo stereo effect has been used
137                  to spread the signal over the physical distance between the speakers.
138                  Don't expect good results from a center removal.")))))
139
140
141;;; FFT Functionality
142;;
143;; different windows
144(defun fft-window (fs type hop zeros)
145  (cond
146    ; Bartlett, but first value > 0
147    ((= type 0)
148        (if (= zeros 0)
149            (snd-pwl 0 fs
150                     (list 0 (/ (float hop)) (1- hop) 1.0 (1- fs) 0.0 fs))
151            (progn (setf cut (truncate (- fs zeros 1)))
152                   (snd-pwl 0 fs
153                            (list 0 (/ (float hop))(- cut hop) 1.0 cut 0.0 fs 0.0 fs)))))
154    ; Hann
155    ((= type 1)
156        (seq (cue (control-srate-abs fs
157                        (mult 0.5
158                              (sum 1
159                                   (lfo (/ fs (* 2.0 hop)) (/ (- fs zeros) (get-duration fs)) *table* 270)))))
160              (cue (snd-const 0 0 fs (/ (float zeros) fs)))))
161    ; rectangle
162    (t  (if (= fs hop)
163            (snd-pwl 0 fs (list 0 1.0 fs 1.0 fs))
164        (snd-pwl 0 fs (list 0 1.0 (1- hop) 1.0 hop 0.0 fs 0.0 fs))))))
165
166
167;; objects and classes
168(setf fft-class (send class :new
169                 '(sound length skip window function argument2 wt-max)))
170
171(send fft-class :answer :next '() '(
172    (if argument2
173        (funcall function (snd-fft sound length skip window) argument2)
174        (funcall function  (snd-fft sound length skip window)))))
175
176(send fft-class :answer :isnew '(snd len skp win fn arg2) '(
177    (setf wt-max 0.0)
178    (setf sound snd)
179    (setf length len)
180    (setf skip skp)
181    (setf window win)
182    (setf function fn)
183    (setf argument2 arg2)))
184
185
186;;; Short Time Fourier Transform
187(defun stft (sound length skip window
188             &optional (function #'(lambda (fr) fr)) (argument2 nil))
189  (send fft-class :new sound length skip window function argument2))
190
191
192;; Power spectrum calculated from fft (as sound)
193(defun power-spectrum (frame size sr)
194  (let* ((snd (scale    (/ (sqrt 8.0) *win-sigma*) (snd-from-array 0 sr frame)))
195         (zero (snd-from-array 0 sr #(0))))
196     (s-log  (scale 2 (snd-avg (seq (cue zero) (cue (prod    snd snd))) 2 2 op-average)))))
197
198
199;; Make a weighted center (mono)
200;; that can be subtracted from L&R
201(defun steer (side obj &aux (mid (send obj :next)))
202  (cond
203    ((and mid side)
204        (let* ((power-sum  (power-spectrum mid fs 2))
205               (power-dif (power-spectrum side fs 2))
206               (wt-exp (s-exp   (scale strength    (diff power-dif power-sum))))
207               (weight (shape wt-exp *map* 0))
208               ;(weight (shape (db-to-linear power-dif)  (s-exp  (mult 2 (s-log *map2*))) 1))
209               (weight (snd-samples weight bignum)))  ;Fix for bug 2706
210          (do ((i low-transition (+ i 2)))
211              ((>= i high-transition))
212            (setf (: out i) (: weight (/ (1+ i) 2)))
213            (setf (: out (1+ i)) (: weight (/ (1+ i) 2))))
214          (snd-samples  (mult (snd-from-array 0 1 mid) (snd-from-array 0 1 out)) fs)))
215    (t nil)))
216
217
218;;; Sound Pre-processing
219;;
220;; rotate the stereo field around the center point
221;; between the two speakers
222(defun transform  (snd &optional (cosine (cos (abs rotation))) (sine (sin (abs rotation))))
223  (let* ((direction (/  (+ 1e-15 rotation) (abs  (+ 1e-15 rotation))))
224         (fft-offset (s-rest (if (< action 8) (/ hop (get-duration *sr*)) 0)))
225         (L (seq (cue fft-offset) (cue (: snd 0))))
226         (R (seq (cue fft-offset) (cue (: snd 1)))))
227    (vector (sum (mult cosine  L) (mult (- direction) sine R))
228            (sum (mult direction  sine L) (mult cosine R)))))
229
230
231;;; main procedure
232(defun catalog  (&aux  snd (original-len (/ (+ len hop) *sr*)) (dur (get-duration 1)))
233  (if (soundp *track*)
234      (return-from catalog  (_ "This plug-in works only with stereo tracks."))
235      (setf snd (vector (snd-copy (: *track* 0)) (snd-copy (: *track* 1)))))
236  (cond
237    ((= action 8)
238        (return-from catalog (summary (least-squares-xy (: snd 0) (: snd 1) :show nil))))
239    ((= action 0)
240        (display "" low-transition high-transition) ;values are quantized to bins
241        (return-from catalog
242                     (sum (: snd 0)
243                          (mult -1 (: snd 1))
244                          (lowpass8 (: snd 1) low-transition)
245                          (highpass8 (diff (: snd 1) (lowpass8 (: snd 1) low-transition))
246                                     high-transition))))
247    ((= action 4)
248        (return-from catalog (diff (: snd 0) (: snd 1))))
249    (t  ;For everything that involves center isolation
250        (setf snd  (transform  snd))
251        (setf analyze-win (s-sqrt (fft-window fs type hop zs)))
252        (setf synthesis-win analyze-win)
253        (unless double-win
254            (setf analyze-win (fft-window fs type hop zs))
255            (setf synthesis-win nil))
256        (setf *win-sigma* (* fs (peak (integrate analyze-win) ny:all)))
257        (setf sum-fft (stft (sum (: snd 0) (: snd 1)) fs hop analyze-win))
258        (setf dif-fft (stft (diff (: snd 0) (: snd 1)) fs hop analyze-win 'steer sum-fft))
259        (setf c (snd-ifft 0 *sr* dif-fft hop  synthesis-win))
260        (cond
261          ((member action '(1 5))
262              (setf output (vector (extract-abs  (/ hop *sr*) original-len (diff (: snd 0) c))
263                                   (extract-abs  (/ hop *sr*) original-len (diff (: snd 1)  c)))))
264          ((member action '(2 6))
265              (setf strength (recip strength))
266              (setf output (extract-abs (/ hop *sr*) original-len c)))
267          ((member action '(3 7))
268              (setf strength (recip strength))
269              (setf output (extract-abs  (/ hop *sr*) original-len (mult -1 c)))))))
270    (if (soundp output)
271        (setf output (vector output output)))
272    (mult *norm* output))
273
274
275;;;; Main
276
277*track* ;Return original audio if something goes wrong
278
279;;;  we start with some variable assignments
280(setf *sr* *sound-srate*)
281
282;; hard coded STFT parameters
283;; Change for experimental purposes
284(setf type 1); -1 = square 0 =triangle 1 = Han
285(setf double-win t); t = windows before and after
286(setf fs (* 16 512)); fft-frame-size
287(setf hop (* 7 512)); Hop (step size to advance)
288(setf zs (- fs (* 2 hop))); zero-padding
289
290;; Some input corrections
291(setf strength  (expt (limit strength 0.02 50.0) 2.0))
292
293; bins to be ignored (bass and treble)
294(if (> action 3)
295    (psetq low-transition 0.0 high-transition 24000.0))
296
297(let* ((ltrans (logior (truncate (/ (* 2 (1- fs) (limit low-transition 1 (/ *sr* 2.0))) *sr*)) 1))
298       (htrans (logior  (limit (truncate (/ (* 2 fs  high-transition) *sr*)) 1 (1- fs)) 1)))
299  (psetq low-transition (min ltrans htrans)
300         high-transition (max ltrans htrans)))
301
302; back to real frequencies for the classic Vocal Remover
303; Note: Fqs are quantized as if FFT would be used
304; ca. 2.6 Hz bin-distance @ 44.1 kHz
305(when (= action 0)
306  (setq bin-distance (/ *sr* 2.0 fs))
307  (psetq low-transition (* low-transition bin-distance)
308         high-transition (* high-transition bin-distance)))
309
310(setf out (snd-samples (snd-const 0.0 0 fs fs) fs)); holds the left/right weights (removal)
311(setf *map* (snd-pwl 0 10000 (list 0 0.5 10000 0.0 20000 -0.5 20001)))
312(setf *norm* 1.0)
313(expand 120); remove for lower efficiency/more conservative  memory management
314(catalog)
315