1% fsk_lib.m
2% David Rowe Oct 2015 - present
3%
4% mFSK modem, started out life as RTTY demodulator for Project
5% Horus High Altitude Ballon (HAB) telemetry, also used for:
6%
7% FreeDV 2400A: 4FSK UHF/UHF digital voice
8% Wenet.......: 100 kbit/s HAB High Def image telemetry
9%
10% Handles frequency offsets, performance right on ideal, C implementation
11% in codec2/src
12
131;
14
15function states = fsk_init(Fs, Rs, M=2, P=8, nsym=50)
16  states.M = M;
17  states.bitspersymbol = log2(M);
18  states.Fs = Fs;
19  states.Rs = Rs;
20
21  states.nsym = nsym;                             % Number of symbols processed by demodulator in each call, also
22                                                  % the timing estimator window
23  Ts = states.Ts = Fs/Rs;                         % number of samples per symbol
24  assert(Ts == floor(Ts), "Fs/Rs must be an integer");
25
26  N = states.N = Ts*states.nsym;                  % processing buffer size, nice big window for timing est
27  bin_width_Hz = 0.1*Rs;                          % we want enough DFT bins to get within 10% of the tones centre
28  Ndft = Fs/bin_width_Hz;
29  states.Ndft = 2.^ceil(log2(Ndft));              % round to nearest power of 2 for efficent FFT
30  states.Sf = zeros(states.Ndft,1);               % current memory of dft mag samples
31  states.tc = 0.1;                                % average DFT over longtime window, accurate at low Eb/No, but slow
32
33  states.nbit = states.nsym*states.bitspersymbol; % number of bits per processing frame
34  Nmem = states.Nmem  = N+2*Ts;                   % two symbol memory in down converted signals to allow for timing adj
35
36  states.f_dc = zeros(M,Nmem);
37  states.P = P;                                   % oversample rate out of filter
38  assert(Ts/states.P == floor(Ts/states.P), "Ts/P must be an integer");
39
40  states.tx_tone_separation = 2*Rs;
41  states.nin = N;                                 % can be N +/- Ts/P samples to adjust for sample clock offsets
42  states.verbose = 0;
43  states.phi = zeros(1, M);                       % keep down converter osc phase continuous
44
45  % BER stats
46
47  states.ber_state = 0;
48  states.ber_valid_thresh = 0.05;  states.ber_invalid_thresh = 0.1;
49  states.Tbits = 0;
50  states.Terrs = 0;
51  states.nerr_log = 0;
52
53  % extra simulation parameters
54
55  states.tx_real = 1;
56  states.dA(1:M) = 1;
57  states.df(1:M) = 0;
58  states.f(1:M) = 0;
59  states.norm_rx_timing = 0;
60  states.ppm = 0;
61  states.prev_pkt = [];
62
63  % Freq. estimator limits
64  states.fest_fmax = Fs;
65  states.fest_fmin = 0;
66  states.fest_min_spacing = 0.75*Rs;
67  states.freq_est_type = 'peak';
68
69  %printf("Octave: M: %d Fs: %d Rs: %d Ts: %d nsym: %d nbit: %d N: %d Ndft: %d fmin: %d fmax: %d\n",
70  %       states.M, states.Fs, states.Rs, states.Ts, states.nsym, states.nbit, states.N, states.Ndft, states.fest_fmin, states.fest_fmax);
71
72endfunction
73
74
75% modulator function
76
77function tx = fsk_mod(states, tx_bits)
78
79    M  = states.M;
80    Ts = states.Ts;
81    Fs = states.Fs;
82    ftx  = states.ftx;
83    df = states.df; % tone freq change in Hz/s
84    dA = states.dA; % amplitude of each tone
85
86    num_bits = length(tx_bits);
87    num_symbols = num_bits/states.bitspersymbol;
88    tx = zeros(states.Ts*num_symbols,1);
89    tx_phase = 0;
90    s = 1;
91
92    for i=1:states.bitspersymbol:num_bits
93
94      % map bits to FSK symbol (tone number)
95
96      K = states.bitspersymbol;
97      tone = tx_bits(i:i+(K-1)) * (2.^(K-1:-1:0))' + 1;
98
99      tx_phase_vec = tx_phase + (1:Ts)*2*pi*ftx(tone)/Fs;
100      tx_phase = tx_phase_vec(Ts) - floor(tx_phase_vec(Ts)/(2*pi))*2*pi;
101      if states.tx_real
102        tx((s-1)*Ts+1:s*Ts) = dA(tone)*2.0*cos(tx_phase_vec);
103      else
104        tx((s-1)*Ts+1:s*Ts) = dA(tone)*exp(j*tx_phase_vec);
105      end
106      s++;
107
108      % freq drift
109
110      ftx += df*Ts/Fs;
111    end
112    states.ftx = ftx;
113endfunction
114
115
116% Estimate the frequency of the FSK tones.  In some applications (such
117% as balloon telemetry) these may not be well controlled by the
118% transmitter, so we have to try to estimate them.
119
120function states = est_freq(states, sf, ntones)
121  N = states.N;
122  Ndft = states.Ndft;
123  Fs = states.Fs;
124
125  % This assumption is OK for balloon telemetry but may not be true in
126  % general
127
128  min_tone_spacing = states.fest_min_spacing;
129
130  % set some limits to search range, which will mean some manual re-tuning
131
132  fmin = states.fest_fmin;
133  fmax = states.fest_fmax;
134  % note 0 Hz is mapped to Ndft/2+1 via fftshift
135  st = floor(fmin*Ndft/Fs) + Ndft/2;  st = max(1,st);
136  en = floor(fmax*Ndft/Fs) + Ndft/2;  en = min(Ndft,en);
137
138  #printf("Fs: %f Ndft: %d fmin: %f fmax: %f st: %d en: %d\n",Fs, Ndft,  fmin, fmax, st, en)
139
140  % Update mag DFT  ---------------------------------------------
141
142  % we break up input buffer to a series of overlapping Ndft sequences
143  numffts = floor(length(sf)/(Ndft/2)) - 1;
144  h = hanning(Ndft);
145  for i=1:numffts
146    a = (i-1)*Ndft/2+1; b = a + Ndft - 1;
147    Sf = abs(fftshift(fft(sf(a:b) .* h, Ndft)));
148
149    % Smooth DFT mag spectrum, slower to respond to changes but more
150    % accurate.  Single order IIR filter is an exponentially weighted
151    % moving average.  This means the freq est window is wider than
152    % timing est window
153    tc = states.tc; states.Sf = (1-tc)*states.Sf + tc*Sf;
154  end
155
156  % Search for each tone method 1 - peak pick each tone location ----------------------------------
157
158  f = []; a = [];
159  Sf = states.Sf;
160  for m=1:ntones
161    [tone_amp tone_index] = max(Sf(st:en));
162    tone_index += st - 1;
163
164    f = [f (tone_index-1-Ndft/2)*Fs/Ndft];
165    a = [a tone_amp];
166
167    % zero out region min_tone_spacing either side of max so we can find next highest peak
168    % closest spacing for non-coh mFSK is Rs
169
170    stz = tone_index - floor((min_tone_spacing)*Ndft/Fs);
171    stz = max(1,stz);
172    enz = tone_index + floor((min_tone_spacing)*Ndft/Fs);
173    enz = min(Ndft,enz);
174    Sf(stz:enz) = 0;
175  end
176
177  states.f = sort(f);
178
179  % Search for each tone method 2 - correlate with mask with non-zero entries at tone spacings -----
180
181  % Create a mask with non-zero entries at tone spacing.  Might be
182  % smarter to use the DFT of a hanning window as mask
183
184  mask = zeros(1,Ndft);
185  mask(1:3) = 1;
186  for m=1:ntones-1
187    bin = round(m*states.tx_tone_separation*Ndft/Fs);
188    mask(bin:bin+2) = 1;
189  end
190  mask = mask(1:bin+2);
191  states.mask = mask;
192
193  % drag mask over Sf, looking for peak in correlation
194  b_max = st; corr_max = 0;
195  Sf = states.Sf; corr_log = [];
196  for b=st:en-length(mask)
197    corr = mask * Sf(b:b+length(mask)-1);
198    corr_log = [corr_log corr];
199    if corr > corr_max
200      corr_max = corr;
201      b_max = b;
202    end
203  end
204  foff = ((b_max-1)-Ndft/2)*Fs/Ndft;
205
206  if bitand(states.verbose, 0x8)
207    % enable this to single step through frames
208    figure(1); clf; subplot(211); plot(Sf,'b;sf;');
209    hold on; plot(max(Sf)*[zeros(1,b_max) mask],'g;mask;'); hold off;
210    subplot(212); plot(corr_log); ylabel('corr against f');
211    printf("foff: %4.0f\n", foff);
212    kbhit;
213  end
214  states.f2 = foff + (0:ntones-1)*states.tx_tone_separation;
215end
216
217
218% ------------------------------------------------------------------------------------
219% Given a buffer of nin input Rs baud FSK samples, returns nsym bits.
220%
221% nin is the number of input samples required by demodulator.  This is
222% time varying.  It will nominally be N (8000), and occasionally N +/-
223% Ts/2 (e.g. 8080 or 7920).  This is how we compensate for differences between the
224% remote tx sample clock and our sample clock.  This function always returns
225% N/Ts (e.g. 50) demodulated bits.  Variable number of input samples, constant number
226% of output bits.
227
228function [rx_bits states] = fsk_demod(states, sf)
229  M = states.M;
230  N = states.N;
231  Ndft = states.Ndft;
232  Fs = states.Fs;
233  Rs = states.Rs;
234  Ts = states.Ts;
235  nsym = states.nsym;
236  P = states.P;
237  nin = states.nin;
238  verbose = states.verbose;
239  Nmem = states.Nmem;
240  f = states.f;
241
242  assert(length(sf) == nin);
243
244  % down convert and filter at rate P ------------------------------
245
246  % update filter (integrator) memory by shifting in nin samples
247
248  nold = Nmem-nin; % number of old samples we retain
249
250  f_dc = states.f_dc;
251  f_dc(:,1:nold) = f_dc(:,Nmem-nold+1:Nmem);
252
253  % freq shift down to around DC, ensuring continuous phase from last frame, as nin may vary
254  for m=1:M
255    phi_vec = states.phi(m) + (1:nin)*2*pi*f(m)/Fs;
256    f_dc(m,nold+1:Nmem) = sf .* exp(j*phi_vec)';
257    states.phi(m)  = phi_vec(nin);
258    states.phi(m) -= 2*pi*floor(states.phi(m)/(2*pi));
259  end
260  % save filter (integrator) memory for next time
261  states.f_dc = f_dc;
262
263  % integrate over symbol period, which is effectively a LPF, removing
264  % the -2Fc frequency image.  Can also be interpreted as an ideal
265  % integrate and dump, non-coherent demod.  We run the integrator at
266  % rate P*Rs (1/P symbol offsets) to get outputs at a range of
267  % different fine timing offsets.  We calculate integrator output
268  % over nsym+1 symbols so we have extra samples for the fine timing
269  % re-sampler at either end of the array.
270
271  f_int = zeros(M,(nsym+1)*P);
272  for i=1:(nsym+1)*P
273    st = 1 + (i-1)*Ts/P;
274    en = st+Ts-1;
275    for m=1:M
276      f_int(m,i) = sum(f_dc(m,st:en));
277    end
278  end
279  states.f_int = f_int;
280
281  % fine timing estimation -----------------------------------------------
282
283  % Non linearity has a spectral line at Rs, with a phase
284  % related to the fine timing offset.  See:
285  %   http://www.rowetel.com/blog/?p=3573
286  % We have sampled the integrator output at Fs=P samples/symbol, so
287  % lets do a single point DFT at w = 2*pi*f/Fs = 2*pi*Rs/(P*Rs)
288  %
289  % Note timing non-linearity derived by experiment.  Not quite sure what I'm doing here.....
290  % but it gives 0dB impl loss for 2FSK Eb/No=9dB, testmode 1:
291  %   Fs: 8000 Rs: 50 Ts: 160 nsym: 50
292  %   frames: 200 Tbits: 9700 Terrs: 93 BER 0.010
293
294  Np = length(f_int(1,:));
295  w = 2*pi*(Rs)/(P*Rs);
296  timing_nl = sum(abs(f_int(:,:)).^2);
297  x = timing_nl * exp(-j*w*(0:Np-1))';
298  norm_rx_timing = angle(x)/(2*pi);
299  rx_timing = norm_rx_timing*P;
300
301  states.x = x;
302  states.timing_nl = timing_nl;
303  states.rx_timing = rx_timing;
304  prev_norm_rx_timing = states.norm_rx_timing;
305  states.norm_rx_timing = norm_rx_timing;
306
307  % estimate sample clock offset in ppm
308  % d_norm_timing is fraction of symbol period shift over nsym symbols
309
310  d_norm_rx_timing = norm_rx_timing - prev_norm_rx_timing;
311
312  % filter out big jumps due to nin changes
313
314  if abs(d_norm_rx_timing) < 0.2
315    appm = 1E6*d_norm_rx_timing/nsym;
316    states.ppm = 0.9*states.ppm + 0.1*appm;
317  end
318
319  % work out how many input samples we need on the next call. The aim
320  % is to keep angle(x) away from the -pi/pi (+/- 0.5 fine timing
321  % offset) discontinuity.  The side effect is to track sample clock
322  % offsets
323
324  next_nin = N;
325  if norm_rx_timing > 0.25
326     next_nin += Ts/4;
327  end
328  if norm_rx_timing < -0.25;
329     next_nin -= Ts/4;
330  end
331  states.nin = next_nin;
332
333  % Now we know the correct fine timing offset, Re-sample integrator
334  % outputs using fine timing estimate and linear interpolation, then
335  % extract the demodulated bits
336
337  low_sample = floor(rx_timing);
338  fract = rx_timing - low_sample;
339  high_sample = ceil(rx_timing);
340
341  if bitand(verbose,0x2)
342    printf("rx_timing: %3.2f low_sample: %d high_sample: %d fract: %3.3f nin_next: %d\n", rx_timing, low_sample, high_sample, fract, next_nin);
343  end
344
345  f_int_resample = zeros(M,nsym);
346  rx_bits = zeros(1,nsym*states.bitspersymbol);
347  tone_max = zeros(1,nsym);
348  rx_nse_pow = 1E-12; rx_sig_pow = 0.0;
349
350  for i=1:nsym
351    st = i*P+1;
352    f_int_resample(:,i) = f_int(:,st+low_sample)*(1-fract) + f_int(:,st+high_sample)*fract;
353
354    % Hard decision decoding, Largest amplitude tone is the winner.
355    % Map this FSK "symbol" back to bits, depending on M
356
357    [tone_max(i) tone_index] = max(f_int_resample(:,i));
358    st = (i-1)*states.bitspersymbol + 1;
359    en = st + states.bitspersymbol-1;
360    arx_bits = dec2bin(tone_index - 1, states.bitspersymbol) - '0';
361    rx_bits(st:en) = arx_bits;
362
363    % each filter is the DFT of a chunk of spectrum.  If there is no tone in the
364    % filter it can be considered an estimate of noise in that bandwidth
365    rx_pows = f_int_resample(:,i) .* conj(f_int_resample(:,i));
366    rx_sig_pow += rx_pows(tone_index);
367    rx_nse_pow += (sum(rx_pows) - rx_pows(tone_index))/(M-1);
368  end
369
370  states.f_int_resample = f_int_resample;
371
372  % Eb/No estimation (todo: this needs some work, like calibration, low Eb/No perf, work for all M)
373  tone_max = abs(tone_max);
374  states.EbNodB = -6 + 20*log10(1E-6+mean(tone_max)/(1E-6+std(tone_max)));
375
376  % Estimators for LDPC decoder, might be a bit rough if nsym is small
377  rx_sig_pow = rx_sig_pow/nsym;
378  rx_nse_pow = rx_nse_pow/nsym;
379  states.v_est = sqrt(rx_sig_pow-rx_nse_pow);
380  states.SNRest = rx_sig_pow/rx_nse_pow;
381endfunction
382
383
384% BER counter and test frame sync logic -------------------------------------------
385% We look for test_frame in rx_bits_buf,  rx_bits_buf must be twice as long as test_frame
386
387function states = ber_counter(states, test_frame, rx_bits_buf)
388  nbit = length(test_frame);
389  assert (length(rx_bits_buf) == 2*nbit);
390  state = states.ber_state;
391  next_state = state;
392
393  if state == 0
394
395    % try to sync up with test frame
396
397    nerrs_min = nbit;
398    for i=1:nbit
399      error_positions = xor(rx_bits_buf(i:nbit+i-1), test_frame);
400      nerrs = sum(error_positions);
401      if nerrs < nerrs_min
402        nerrs_min = nerrs;
403        states.coarse_offset = i;
404      end
405    end
406    if nerrs_min/nbit < states.ber_valid_thresh
407      next_state = 1;
408    end
409    if bitand(states.verbose,0x4)
410      printf("coarse offset: %d nerrs_min: %d next_state: %d\n", states.coarse_offset, nerrs_min, next_state);
411    end
412    states.nerr = nerrs_min;
413  end
414
415  if state == 1
416
417    % we're synced up, lets measure bit errors
418
419    error_positions = xor(rx_bits_buf(states.coarse_offset:states.coarse_offset+nbit-1), test_frame);
420    nerrs = sum(error_positions);
421    if nerrs/nbit > states.ber_invalid_thresh
422      next_state = 0;
423      if bitand(states.verbose,0x4)
424        printf("coarse offset: %d nerrs: %d next_state: %d\n", states.coarse_offset, nerrs, next_state);
425      end
426    else
427      states.Terrs += nerrs;
428      states.Tbits += nbit;
429      states.nerr_log = [states.nerr_log nerrs];
430    end
431    states.nerr = nerrs;
432  end
433
434  states.ber_state = next_state;
435endfunction
436
437
438% Alternative stateless BER counter that works on packets that may have gaps between them
439
440function states = ber_counter_packet(states, test_frame, rx_bits_buf)
441  ntestframebits = states.ntestframebits;
442  nbit = states.nbit;
443
444  % look for offset with min errors
445
446  nerrs_min = ntestframebits; coarse_offset = 1;
447  for i=1:nbit
448    error_positions = xor(rx_bits_buf(i:ntestframebits+i-1), test_frame);
449    nerrs = sum(error_positions);
450    %printf("i: %d nerrs: %d\n", i, nerrs);
451    if nerrs < nerrs_min
452      nerrs_min = nerrs;
453      coarse_offset = i;
454    end
455  end
456
457  % if less than threshold count errors
458
459  if nerrs_min/ntestframebits < 0.05
460    states.Terrs += nerrs_min;
461    states.Tbits += ntestframebits;
462    states.nerr_log = [states.nerr_log nerrs_min];
463    if bitand(states.verbose, 0x4)
464      printf("coarse_offset: %d nerrs_min: %d\n", coarse_offset, nerrs_min);
465    end
466  end
467endfunction
468
469
470