1% fsk_lib.m 2% David Rowe Oct 2015 - present 3% 4% mFSK modem, started out life as RTTY demodulator for Project 5% Horus High Altitude Ballon (HAB) telemetry, also used for: 6% 7% FreeDV 2400A: 4FSK UHF/UHF digital voice 8% Wenet.......: 100 kbit/s HAB High Def image telemetry 9% 10% Handles frequency offsets, performance right on ideal, C implementation 11% in codec2/src 12 131; 14 15function states = fsk_init(Fs, Rs, M=2, P=8, nsym=50) 16 states.M = M; 17 states.bitspersymbol = log2(M); 18 states.Fs = Fs; 19 states.Rs = Rs; 20 21 states.nsym = nsym; % Number of symbols processed by demodulator in each call, also 22 % the timing estimator window 23 Ts = states.Ts = Fs/Rs; % number of samples per symbol 24 assert(Ts == floor(Ts), "Fs/Rs must be an integer"); 25 26 N = states.N = Ts*states.nsym; % processing buffer size, nice big window for timing est 27 bin_width_Hz = 0.1*Rs; % we want enough DFT bins to get within 10% of the tones centre 28 Ndft = Fs/bin_width_Hz; 29 states.Ndft = 2.^ceil(log2(Ndft)); % round to nearest power of 2 for efficent FFT 30 states.Sf = zeros(states.Ndft,1); % current memory of dft mag samples 31 states.tc = 0.1; % average DFT over longtime window, accurate at low Eb/No, but slow 32 33 states.nbit = states.nsym*states.bitspersymbol; % number of bits per processing frame 34 Nmem = states.Nmem = N+2*Ts; % two symbol memory in down converted signals to allow for timing adj 35 36 states.f_dc = zeros(M,Nmem); 37 states.P = P; % oversample rate out of filter 38 assert(Ts/states.P == floor(Ts/states.P), "Ts/P must be an integer"); 39 40 states.tx_tone_separation = 2*Rs; 41 states.nin = N; % can be N +/- Ts/P samples to adjust for sample clock offsets 42 states.verbose = 0; 43 states.phi = zeros(1, M); % keep down converter osc phase continuous 44 45 % BER stats 46 47 states.ber_state = 0; 48 states.ber_valid_thresh = 0.05; states.ber_invalid_thresh = 0.1; 49 states.Tbits = 0; 50 states.Terrs = 0; 51 states.nerr_log = 0; 52 53 % extra simulation parameters 54 55 states.tx_real = 1; 56 states.dA(1:M) = 1; 57 states.df(1:M) = 0; 58 states.f(1:M) = 0; 59 states.norm_rx_timing = 0; 60 states.ppm = 0; 61 states.prev_pkt = []; 62 63 % Freq. estimator limits 64 states.fest_fmax = Fs; 65 states.fest_fmin = 0; 66 states.fest_min_spacing = 0.75*Rs; 67 states.freq_est_type = 'peak'; 68 69 %printf("Octave: M: %d Fs: %d Rs: %d Ts: %d nsym: %d nbit: %d N: %d Ndft: %d fmin: %d fmax: %d\n", 70 % states.M, states.Fs, states.Rs, states.Ts, states.nsym, states.nbit, states.N, states.Ndft, states.fest_fmin, states.fest_fmax); 71 72endfunction 73 74 75% modulator function 76 77function tx = fsk_mod(states, tx_bits) 78 79 M = states.M; 80 Ts = states.Ts; 81 Fs = states.Fs; 82 ftx = states.ftx; 83 df = states.df; % tone freq change in Hz/s 84 dA = states.dA; % amplitude of each tone 85 86 num_bits = length(tx_bits); 87 num_symbols = num_bits/states.bitspersymbol; 88 tx = zeros(states.Ts*num_symbols,1); 89 tx_phase = 0; 90 s = 1; 91 92 for i=1:states.bitspersymbol:num_bits 93 94 % map bits to FSK symbol (tone number) 95 96 K = states.bitspersymbol; 97 tone = tx_bits(i:i+(K-1)) * (2.^(K-1:-1:0))' + 1; 98 99 tx_phase_vec = tx_phase + (1:Ts)*2*pi*ftx(tone)/Fs; 100 tx_phase = tx_phase_vec(Ts) - floor(tx_phase_vec(Ts)/(2*pi))*2*pi; 101 if states.tx_real 102 tx((s-1)*Ts+1:s*Ts) = dA(tone)*2.0*cos(tx_phase_vec); 103 else 104 tx((s-1)*Ts+1:s*Ts) = dA(tone)*exp(j*tx_phase_vec); 105 end 106 s++; 107 108 % freq drift 109 110 ftx += df*Ts/Fs; 111 end 112 states.ftx = ftx; 113endfunction 114 115 116% Estimate the frequency of the FSK tones. In some applications (such 117% as balloon telemetry) these may not be well controlled by the 118% transmitter, so we have to try to estimate them. 119 120function states = est_freq(states, sf, ntones) 121 N = states.N; 122 Ndft = states.Ndft; 123 Fs = states.Fs; 124 125 % This assumption is OK for balloon telemetry but may not be true in 126 % general 127 128 min_tone_spacing = states.fest_min_spacing; 129 130 % set some limits to search range, which will mean some manual re-tuning 131 132 fmin = states.fest_fmin; 133 fmax = states.fest_fmax; 134 % note 0 Hz is mapped to Ndft/2+1 via fftshift 135 st = floor(fmin*Ndft/Fs) + Ndft/2; st = max(1,st); 136 en = floor(fmax*Ndft/Fs) + Ndft/2; en = min(Ndft,en); 137 138 #printf("Fs: %f Ndft: %d fmin: %f fmax: %f st: %d en: %d\n",Fs, Ndft, fmin, fmax, st, en) 139 140 % Update mag DFT --------------------------------------------- 141 142 % we break up input buffer to a series of overlapping Ndft sequences 143 numffts = floor(length(sf)/(Ndft/2)) - 1; 144 h = hanning(Ndft); 145 for i=1:numffts 146 a = (i-1)*Ndft/2+1; b = a + Ndft - 1; 147 Sf = abs(fftshift(fft(sf(a:b) .* h, Ndft))); 148 149 % Smooth DFT mag spectrum, slower to respond to changes but more 150 % accurate. Single order IIR filter is an exponentially weighted 151 % moving average. This means the freq est window is wider than 152 % timing est window 153 tc = states.tc; states.Sf = (1-tc)*states.Sf + tc*Sf; 154 end 155 156 % Search for each tone method 1 - peak pick each tone location ---------------------------------- 157 158 f = []; a = []; 159 Sf = states.Sf; 160 for m=1:ntones 161 [tone_amp tone_index] = max(Sf(st:en)); 162 tone_index += st - 1; 163 164 f = [f (tone_index-1-Ndft/2)*Fs/Ndft]; 165 a = [a tone_amp]; 166 167 % zero out region min_tone_spacing either side of max so we can find next highest peak 168 % closest spacing for non-coh mFSK is Rs 169 170 stz = tone_index - floor((min_tone_spacing)*Ndft/Fs); 171 stz = max(1,stz); 172 enz = tone_index + floor((min_tone_spacing)*Ndft/Fs); 173 enz = min(Ndft,enz); 174 Sf(stz:enz) = 0; 175 end 176 177 states.f = sort(f); 178 179 % Search for each tone method 2 - correlate with mask with non-zero entries at tone spacings ----- 180 181 % Create a mask with non-zero entries at tone spacing. Might be 182 % smarter to use the DFT of a hanning window as mask 183 184 mask = zeros(1,Ndft); 185 mask(1:3) = 1; 186 for m=1:ntones-1 187 bin = round(m*states.tx_tone_separation*Ndft/Fs); 188 mask(bin:bin+2) = 1; 189 end 190 mask = mask(1:bin+2); 191 states.mask = mask; 192 193 % drag mask over Sf, looking for peak in correlation 194 b_max = st; corr_max = 0; 195 Sf = states.Sf; corr_log = []; 196 for b=st:en-length(mask) 197 corr = mask * Sf(b:b+length(mask)-1); 198 corr_log = [corr_log corr]; 199 if corr > corr_max 200 corr_max = corr; 201 b_max = b; 202 end 203 end 204 foff = ((b_max-1)-Ndft/2)*Fs/Ndft; 205 206 if bitand(states.verbose, 0x8) 207 % enable this to single step through frames 208 figure(1); clf; subplot(211); plot(Sf,'b;sf;'); 209 hold on; plot(max(Sf)*[zeros(1,b_max) mask],'g;mask;'); hold off; 210 subplot(212); plot(corr_log); ylabel('corr against f'); 211 printf("foff: %4.0f\n", foff); 212 kbhit; 213 end 214 states.f2 = foff + (0:ntones-1)*states.tx_tone_separation; 215end 216 217 218% ------------------------------------------------------------------------------------ 219% Given a buffer of nin input Rs baud FSK samples, returns nsym bits. 220% 221% nin is the number of input samples required by demodulator. This is 222% time varying. It will nominally be N (8000), and occasionally N +/- 223% Ts/2 (e.g. 8080 or 7920). This is how we compensate for differences between the 224% remote tx sample clock and our sample clock. This function always returns 225% N/Ts (e.g. 50) demodulated bits. Variable number of input samples, constant number 226% of output bits. 227 228function [rx_bits states] = fsk_demod(states, sf) 229 M = states.M; 230 N = states.N; 231 Ndft = states.Ndft; 232 Fs = states.Fs; 233 Rs = states.Rs; 234 Ts = states.Ts; 235 nsym = states.nsym; 236 P = states.P; 237 nin = states.nin; 238 verbose = states.verbose; 239 Nmem = states.Nmem; 240 f = states.f; 241 242 assert(length(sf) == nin); 243 244 % down convert and filter at rate P ------------------------------ 245 246 % update filter (integrator) memory by shifting in nin samples 247 248 nold = Nmem-nin; % number of old samples we retain 249 250 f_dc = states.f_dc; 251 f_dc(:,1:nold) = f_dc(:,Nmem-nold+1:Nmem); 252 253 % freq shift down to around DC, ensuring continuous phase from last frame, as nin may vary 254 for m=1:M 255 phi_vec = states.phi(m) + (1:nin)*2*pi*f(m)/Fs; 256 f_dc(m,nold+1:Nmem) = sf .* exp(j*phi_vec)'; 257 states.phi(m) = phi_vec(nin); 258 states.phi(m) -= 2*pi*floor(states.phi(m)/(2*pi)); 259 end 260 % save filter (integrator) memory for next time 261 states.f_dc = f_dc; 262 263 % integrate over symbol period, which is effectively a LPF, removing 264 % the -2Fc frequency image. Can also be interpreted as an ideal 265 % integrate and dump, non-coherent demod. We run the integrator at 266 % rate P*Rs (1/P symbol offsets) to get outputs at a range of 267 % different fine timing offsets. We calculate integrator output 268 % over nsym+1 symbols so we have extra samples for the fine timing 269 % re-sampler at either end of the array. 270 271 f_int = zeros(M,(nsym+1)*P); 272 for i=1:(nsym+1)*P 273 st = 1 + (i-1)*Ts/P; 274 en = st+Ts-1; 275 for m=1:M 276 f_int(m,i) = sum(f_dc(m,st:en)); 277 end 278 end 279 states.f_int = f_int; 280 281 % fine timing estimation ----------------------------------------------- 282 283 % Non linearity has a spectral line at Rs, with a phase 284 % related to the fine timing offset. See: 285 % http://www.rowetel.com/blog/?p=3573 286 % We have sampled the integrator output at Fs=P samples/symbol, so 287 % lets do a single point DFT at w = 2*pi*f/Fs = 2*pi*Rs/(P*Rs) 288 % 289 % Note timing non-linearity derived by experiment. Not quite sure what I'm doing here..... 290 % but it gives 0dB impl loss for 2FSK Eb/No=9dB, testmode 1: 291 % Fs: 8000 Rs: 50 Ts: 160 nsym: 50 292 % frames: 200 Tbits: 9700 Terrs: 93 BER 0.010 293 294 Np = length(f_int(1,:)); 295 w = 2*pi*(Rs)/(P*Rs); 296 timing_nl = sum(abs(f_int(:,:)).^2); 297 x = timing_nl * exp(-j*w*(0:Np-1))'; 298 norm_rx_timing = angle(x)/(2*pi); 299 rx_timing = norm_rx_timing*P; 300 301 states.x = x; 302 states.timing_nl = timing_nl; 303 states.rx_timing = rx_timing; 304 prev_norm_rx_timing = states.norm_rx_timing; 305 states.norm_rx_timing = norm_rx_timing; 306 307 % estimate sample clock offset in ppm 308 % d_norm_timing is fraction of symbol period shift over nsym symbols 309 310 d_norm_rx_timing = norm_rx_timing - prev_norm_rx_timing; 311 312 % filter out big jumps due to nin changes 313 314 if abs(d_norm_rx_timing) < 0.2 315 appm = 1E6*d_norm_rx_timing/nsym; 316 states.ppm = 0.9*states.ppm + 0.1*appm; 317 end 318 319 % work out how many input samples we need on the next call. The aim 320 % is to keep angle(x) away from the -pi/pi (+/- 0.5 fine timing 321 % offset) discontinuity. The side effect is to track sample clock 322 % offsets 323 324 next_nin = N; 325 if norm_rx_timing > 0.25 326 next_nin += Ts/4; 327 end 328 if norm_rx_timing < -0.25; 329 next_nin -= Ts/4; 330 end 331 states.nin = next_nin; 332 333 % Now we know the correct fine timing offset, Re-sample integrator 334 % outputs using fine timing estimate and linear interpolation, then 335 % extract the demodulated bits 336 337 low_sample = floor(rx_timing); 338 fract = rx_timing - low_sample; 339 high_sample = ceil(rx_timing); 340 341 if bitand(verbose,0x2) 342 printf("rx_timing: %3.2f low_sample: %d high_sample: %d fract: %3.3f nin_next: %d\n", rx_timing, low_sample, high_sample, fract, next_nin); 343 end 344 345 f_int_resample = zeros(M,nsym); 346 rx_bits = zeros(1,nsym*states.bitspersymbol); 347 tone_max = zeros(1,nsym); 348 rx_nse_pow = 1E-12; rx_sig_pow = 0.0; 349 350 for i=1:nsym 351 st = i*P+1; 352 f_int_resample(:,i) = f_int(:,st+low_sample)*(1-fract) + f_int(:,st+high_sample)*fract; 353 354 % Hard decision decoding, Largest amplitude tone is the winner. 355 % Map this FSK "symbol" back to bits, depending on M 356 357 [tone_max(i) tone_index] = max(f_int_resample(:,i)); 358 st = (i-1)*states.bitspersymbol + 1; 359 en = st + states.bitspersymbol-1; 360 arx_bits = dec2bin(tone_index - 1, states.bitspersymbol) - '0'; 361 rx_bits(st:en) = arx_bits; 362 363 % each filter is the DFT of a chunk of spectrum. If there is no tone in the 364 % filter it can be considered an estimate of noise in that bandwidth 365 rx_pows = f_int_resample(:,i) .* conj(f_int_resample(:,i)); 366 rx_sig_pow += rx_pows(tone_index); 367 rx_nse_pow += (sum(rx_pows) - rx_pows(tone_index))/(M-1); 368 end 369 370 states.f_int_resample = f_int_resample; 371 372 % Eb/No estimation (todo: this needs some work, like calibration, low Eb/No perf, work for all M) 373 tone_max = abs(tone_max); 374 states.EbNodB = -6 + 20*log10(1E-6+mean(tone_max)/(1E-6+std(tone_max))); 375 376 % Estimators for LDPC decoder, might be a bit rough if nsym is small 377 rx_sig_pow = rx_sig_pow/nsym; 378 rx_nse_pow = rx_nse_pow/nsym; 379 states.v_est = sqrt(rx_sig_pow-rx_nse_pow); 380 states.SNRest = rx_sig_pow/rx_nse_pow; 381endfunction 382 383 384% BER counter and test frame sync logic ------------------------------------------- 385% We look for test_frame in rx_bits_buf, rx_bits_buf must be twice as long as test_frame 386 387function states = ber_counter(states, test_frame, rx_bits_buf) 388 nbit = length(test_frame); 389 assert (length(rx_bits_buf) == 2*nbit); 390 state = states.ber_state; 391 next_state = state; 392 393 if state == 0 394 395 % try to sync up with test frame 396 397 nerrs_min = nbit; 398 for i=1:nbit 399 error_positions = xor(rx_bits_buf(i:nbit+i-1), test_frame); 400 nerrs = sum(error_positions); 401 if nerrs < nerrs_min 402 nerrs_min = nerrs; 403 states.coarse_offset = i; 404 end 405 end 406 if nerrs_min/nbit < states.ber_valid_thresh 407 next_state = 1; 408 end 409 if bitand(states.verbose,0x4) 410 printf("coarse offset: %d nerrs_min: %d next_state: %d\n", states.coarse_offset, nerrs_min, next_state); 411 end 412 states.nerr = nerrs_min; 413 end 414 415 if state == 1 416 417 % we're synced up, lets measure bit errors 418 419 error_positions = xor(rx_bits_buf(states.coarse_offset:states.coarse_offset+nbit-1), test_frame); 420 nerrs = sum(error_positions); 421 if nerrs/nbit > states.ber_invalid_thresh 422 next_state = 0; 423 if bitand(states.verbose,0x4) 424 printf("coarse offset: %d nerrs: %d next_state: %d\n", states.coarse_offset, nerrs, next_state); 425 end 426 else 427 states.Terrs += nerrs; 428 states.Tbits += nbit; 429 states.nerr_log = [states.nerr_log nerrs]; 430 end 431 states.nerr = nerrs; 432 end 433 434 states.ber_state = next_state; 435endfunction 436 437 438% Alternative stateless BER counter that works on packets that may have gaps between them 439 440function states = ber_counter_packet(states, test_frame, rx_bits_buf) 441 ntestframebits = states.ntestframebits; 442 nbit = states.nbit; 443 444 % look for offset with min errors 445 446 nerrs_min = ntestframebits; coarse_offset = 1; 447 for i=1:nbit 448 error_positions = xor(rx_bits_buf(i:ntestframebits+i-1), test_frame); 449 nerrs = sum(error_positions); 450 %printf("i: %d nerrs: %d\n", i, nerrs); 451 if nerrs < nerrs_min 452 nerrs_min = nerrs; 453 coarse_offset = i; 454 end 455 end 456 457 % if less than threshold count errors 458 459 if nerrs_min/ntestframebits < 0.05 460 states.Terrs += nerrs_min; 461 states.Tbits += ntestframebits; 462 states.nerr_log = [states.nerr_log nerrs_min]; 463 if bitand(states.verbose, 0x4) 464 printf("coarse_offset: %d nerrs_min: %d\n", coarse_offset, nerrs_min); 465 end 466 end 467endfunction 468 469 470