1$nyquist plug-in 2$version 4 3$type process 4$preview linear 5$name (_ "Vocal Reduction and Isolation") 6$manpage "Vocal_Reduction_and_Isolation" 7$action (_ "Applying Action...") 8$author (_ "Robert Haenggi") 9$release 3.0.1 10$copyright (_ "Released under terms of the GNU General Public License version 2") 11 12 13;; vocrediso.ny, based on rjh-stereo-tool.ny 14;; Released under terms of the GNU General Public License version 2: 15;; http://www.gnu.org/licenses/old-licenses/gpl-2.0.html 16;; 17;; Plug-in version 1.7, May 2017 18;; added legacy Vocal Remover since V. 1.56, 06-2015 19;; Requires Audacity 2.1.1 or later, developed under Audacity 2.2.0 Alpha 20;; requires Audacity 2.2.0 for embedded help (button) 21;; 22;; For information about writing and modifying Nyquist plug-ins: 23;; https://wiki.audacityteam.org/wiki/Nyquist_Plug-ins_Reference 24 25$control action (_ "Action") choice ( 26 ("RemoveToMono" (_ "Remove Vocals: to mono")) 27 ("Remove" (_ "Remove Vocals")) 28 ("Isolate" (_ "Isolate Vocals")) 29 ("IsolateInvert" (_ "Isolate Vocals and Invert")) 30 ("RemoveCenterToMono" (_ "Remove Center: to mono")) 31 ("RemoveCenter" (_ "Remove Center")) 32 ("IsolateCenter" (_ "Isolate Center")) 33 ("IsolateCenterInvert" (_ "Isolate Center and Invert")) 34 (_ "Analyze")) 0 35$control strength (_ "Strength") real "" 1.0 0.0 50.0 36$control low-transition (_ "Low Cut for Vocals (Hz)") real "" 120 1 24000 37$control high-transition (_ "High Cut for Vocals (Hz)") real "" 9000 1 24000 38 39 40(setf bignum 1000000000) 41 42;;control rotation "Rotation (Degrees)" real "" 0 -180 180 43(setf rotation 0.0) 44 45;; make aref shorter 46(defmacro : (array index) (backquote (aref ,array ,index))) 47;; 48;; input corrections 49(defmacro limit (symbol lower upper) 50 (backquote (min ,upper (max ,lower ,symbol)))) 51 52 53;;; Some statistical functions 54;; 55;; Running Sum 56(defun sigma (sig) 57 (snd-biquad sig 1 0 0 1 0 0 0)) 58 59 60;; Compares two sounds (Y = A + B * X) 61(defun least-squares-xy (x y &key show) 62 (let* ((n (float (min (snd-length x ny:all) 63 (snd-length y ny:all)))) 64 (t-n (/ (1- n) (snd-srate x))) 65 (bar-x (* (/ n) (snd-sref (sigma x) t-n))) 66 (bar-y (* (/ n) (snd-sref (sigma y) t-n))) 67 (x (diff x bar-x)) 68 (y (diff y bar-y)) 69 (pos-x (max (snd-sref (sigma (s-abs x)) t-n) 1e-17)) 70 (pos-y (max (snd-sref (sigma (s-abs y)) t-n) 1e-17)) 71 (pos-xy (cond 72 ((> pos-x pos-y) (- (/ pos-y pos-x) 1)) 73 (t (- 1 (/ pos-x pos-y))))) 74 (s-xy (* (/ n) (snd-sref (sigma (prod x y)) t-n))) 75 (s-x2 (* (/ n) (snd-sref (sigma (prod x x)) t-n))) 76 (s-y2 (* (/ n) (snd-sref (sigma (prod y y)) t-n))) 77 (r (/ s-xy (max 1e-17 (sqrt (* s-x2 s-y2))))) 78 (r2 (expt r 2.0)) 79 (a1 (cond 80 ((= s-x2 0) 0) 81 ((= s-xy 0) 0) 82 (t (/ s-xy s-x2)))) 83 (a0 (- bar-y (* a1 bar-x)))) 84 (if show 85 (format t 86 (_ "Average x: ~a, y: ~a 87 Covariance x y: ~a 88 Average variance x: ~a, y: ~a 89 Standard deviation x: ~a, y: ~a 90 Coefficient of correlation: ~a 91 Coefficient of determination: ~a 92 Variation of residuals: ~a 93 y equals ~a plus ~a times x~%") 94 bar-x bar-y 95 s-xy 96 s-x2 s-y2 97 (sqrt s-x2) (sqrt s-y2) 98 r 99 r2 100 (* s-y2 (- 1 r2)) 101 a0 a1)) 102 (list r r2 pos-xy a0 a1))) 103 104 105(defun between (val low high) 106 (and (> val low) (<= val high))) 107 108 109;; Summary for "Analyse", fed with coeff. of correlation 110(defun summary (analysis &aux (corr (car analysis)) (pan-position (third analysis))) 111 (format nil (_ "Pan position: ~a~%The left and right channels are correlated by about ~a %. This means:~%~a~%") 112 pan-position 113 (round (* corr 100)) 114 (cond 115 ((between corr 0.97 1.1) 116 (_ " - The two channels are identical, i.e. dual mono. 117 The center can't be removed. 118 Any remaining difference may be caused by lossy encoding.")) 119 ((between corr 0.9 0.97) 120 (_ " - The two Channels are strongly related, i.e. nearly mono or extremely panned. 121 Most likely, the center extraction will be poor.")) 122 ((between corr 0.5 0.9) 123 (_ " - A fairly good value, at least stereo in average and not too wide spread.")) 124 ((between corr 0.2 0.5) 125 (_ " - An ideal value for Stereo. 126 However, the center extraction depends also on the used reverb.")) 127 ((between corr -0.2 0.2) 128 (_ " - The two channels are almost not related. 129 Either you have only noise or the piece is mastered in a unbalanced manner. 130 The center extraction can still be good though.")) 131 ((between corr -0.8 -0.2) 132 (_ " - Although the Track is stereo, the field is obviously extra wide. 133 This can cause strange effects. 134 Especially when played by only one speaker.")) 135 (t (_ " - The two channels are nearly identical. 136 Obviously, a pseudo stereo effect has been used 137 to spread the signal over the physical distance between the speakers. 138 Don't expect good results from a center removal."))))) 139 140 141;;; FFT Functionality 142;; 143;; different windows 144(defun fft-window (fs type hop zeros) 145 (cond 146 ; Bartlett, but first value > 0 147 ((= type 0) 148 (if (= zeros 0) 149 (snd-pwl 0 fs 150 (list 0 (/ (float hop)) (1- hop) 1.0 (1- fs) 0.0 fs)) 151 (progn (setf cut (truncate (- fs zeros 1))) 152 (snd-pwl 0 fs 153 (list 0 (/ (float hop))(- cut hop) 1.0 cut 0.0 fs 0.0 fs))))) 154 ; Hann 155 ((= type 1) 156 (seq (cue (control-srate-abs fs 157 (mult 0.5 158 (sum 1 159 (lfo (/ fs (* 2.0 hop)) (/ (- fs zeros) (get-duration fs)) *table* 270))))) 160 (cue (snd-const 0 0 fs (/ (float zeros) fs))))) 161 ; rectangle 162 (t (if (= fs hop) 163 (snd-pwl 0 fs (list 0 1.0 fs 1.0 fs)) 164 (snd-pwl 0 fs (list 0 1.0 (1- hop) 1.0 hop 0.0 fs 0.0 fs)))))) 165 166 167;; objects and classes 168(setf fft-class (send class :new 169 '(sound length skip window function argument2 wt-max))) 170 171(send fft-class :answer :next '() '( 172 (if argument2 173 (funcall function (snd-fft sound length skip window) argument2) 174 (funcall function (snd-fft sound length skip window))))) 175 176(send fft-class :answer :isnew '(snd len skp win fn arg2) '( 177 (setf wt-max 0.0) 178 (setf sound snd) 179 (setf length len) 180 (setf skip skp) 181 (setf window win) 182 (setf function fn) 183 (setf argument2 arg2))) 184 185 186;;; Short Time Fourier Transform 187(defun stft (sound length skip window 188 &optional (function #'(lambda (fr) fr)) (argument2 nil)) 189 (send fft-class :new sound length skip window function argument2)) 190 191 192;; Power spectrum calculated from fft (as sound) 193(defun power-spectrum (frame size sr) 194 (let* ((snd (scale (/ (sqrt 8.0) *win-sigma*) (snd-from-array 0 sr frame))) 195 (zero (snd-from-array 0 sr #(0)))) 196 (s-log (scale 2 (snd-avg (seq (cue zero) (cue (prod snd snd))) 2 2 op-average))))) 197 198 199;; Make a weighted center (mono) 200;; that can be subtracted from L&R 201(defun steer (side obj &aux (mid (send obj :next))) 202 (cond 203 ((and mid side) 204 (let* ((power-sum (power-spectrum mid fs 2)) 205 (power-dif (power-spectrum side fs 2)) 206 (wt-exp (s-exp (scale strength (diff power-dif power-sum)))) 207 (weight (shape wt-exp *map* 0)) 208 ;(weight (shape (db-to-linear power-dif) (s-exp (mult 2 (s-log *map2*))) 1)) 209 (weight (snd-samples weight bignum))) ;Fix for bug 2706 210 (do ((i low-transition (+ i 2))) 211 ((>= i high-transition)) 212 (setf (: out i) (: weight (/ (1+ i) 2))) 213 (setf (: out (1+ i)) (: weight (/ (1+ i) 2)))) 214 (snd-samples (mult (snd-from-array 0 1 mid) (snd-from-array 0 1 out)) fs))) 215 (t nil))) 216 217 218;;; Sound Pre-processing 219;; 220;; rotate the stereo field around the center point 221;; between the two speakers 222(defun transform (snd &optional (cosine (cos (abs rotation))) (sine (sin (abs rotation)))) 223 (let* ((direction (/ (+ 1e-15 rotation) (abs (+ 1e-15 rotation)))) 224 (fft-offset (s-rest (if (< action 8) (/ hop (get-duration *sr*)) 0))) 225 (L (seq (cue fft-offset) (cue (: snd 0)))) 226 (R (seq (cue fft-offset) (cue (: snd 1))))) 227 (vector (sum (mult cosine L) (mult (- direction) sine R)) 228 (sum (mult direction sine L) (mult cosine R))))) 229 230 231;;; main procedure 232(defun catalog (&aux snd (original-len (/ (+ len hop) *sr*)) (dur (get-duration 1))) 233 (if (soundp *track*) 234 (return-from catalog (_ "This plug-in works only with stereo tracks.")) 235 (setf snd (vector (snd-copy (: *track* 0)) (snd-copy (: *track* 1))))) 236 (cond 237 ((= action 8) 238 (return-from catalog (summary (least-squares-xy (: snd 0) (: snd 1) :show nil)))) 239 ((= action 0) 240 (display "" low-transition high-transition) ;values are quantized to bins 241 (return-from catalog 242 (sum (: snd 0) 243 (mult -1 (: snd 1)) 244 (lowpass8 (: snd 1) low-transition) 245 (highpass8 (diff (: snd 1) (lowpass8 (: snd 1) low-transition)) 246 high-transition)))) 247 ((= action 4) 248 (return-from catalog (diff (: snd 0) (: snd 1)))) 249 (t ;For everything that involves center isolation 250 (setf snd (transform snd)) 251 (setf analyze-win (s-sqrt (fft-window fs type hop zs))) 252 (setf synthesis-win analyze-win) 253 (unless double-win 254 (setf analyze-win (fft-window fs type hop zs)) 255 (setf synthesis-win nil)) 256 (setf *win-sigma* (* fs (peak (integrate analyze-win) ny:all))) 257 (setf sum-fft (stft (sum (: snd 0) (: snd 1)) fs hop analyze-win)) 258 (setf dif-fft (stft (diff (: snd 0) (: snd 1)) fs hop analyze-win 'steer sum-fft)) 259 (setf c (snd-ifft 0 *sr* dif-fft hop synthesis-win)) 260 (cond 261 ((member action '(1 5)) 262 (setf output (vector (extract-abs (/ hop *sr*) original-len (diff (: snd 0) c)) 263 (extract-abs (/ hop *sr*) original-len (diff (: snd 1) c))))) 264 ((member action '(2 6)) 265 (setf strength (recip strength)) 266 (setf output (extract-abs (/ hop *sr*) original-len c))) 267 ((member action '(3 7)) 268 (setf strength (recip strength)) 269 (setf output (extract-abs (/ hop *sr*) original-len (mult -1 c))))))) 270 (if (soundp output) 271 (setf output (vector output output))) 272 (mult *norm* output)) 273 274 275;;;; Main 276 277*track* ;Return original audio if something goes wrong 278 279;;; we start with some variable assignments 280(setf *sr* *sound-srate*) 281 282;; hard coded STFT parameters 283;; Change for experimental purposes 284(setf type 1); -1 = square 0 =triangle 1 = Han 285(setf double-win t); t = windows before and after 286(setf fs (* 16 512)); fft-frame-size 287(setf hop (* 7 512)); Hop (step size to advance) 288(setf zs (- fs (* 2 hop))); zero-padding 289 290;; Some input corrections 291(setf strength (expt (limit strength 0.02 50.0) 2.0)) 292 293; bins to be ignored (bass and treble) 294(if (> action 3) 295 (psetq low-transition 0.0 high-transition 24000.0)) 296 297(let* ((ltrans (logior (truncate (/ (* 2 (1- fs) (limit low-transition 1 (/ *sr* 2.0))) *sr*)) 1)) 298 (htrans (logior (limit (truncate (/ (* 2 fs high-transition) *sr*)) 1 (1- fs)) 1))) 299 (psetq low-transition (min ltrans htrans) 300 high-transition (max ltrans htrans))) 301 302; back to real frequencies for the classic Vocal Remover 303; Note: Fqs are quantized as if FFT would be used 304; ca. 2.6 Hz bin-distance @ 44.1 kHz 305(when (= action 0) 306 (setq bin-distance (/ *sr* 2.0 fs)) 307 (psetq low-transition (* low-transition bin-distance) 308 high-transition (* high-transition bin-distance))) 309 310(setf out (snd-samples (snd-const 0.0 0 fs fs) fs)); holds the left/right weights (removal) 311(setf *map* (snd-pwl 0 10000 (list 0 0.5 10000 0.0 20000 -0.5 20001))) 312(setf *norm* 1.0) 313(expand 120); remove for lower efficiency/more conservative memory management 314(catalog) 315