1 /* Copyright 2013-2016 IBM Corp.
2 *
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
12 * implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17
18 /*
19 * This code will enable retrieving of error log from FSP -> Sapphire in
20 * sequence.
21 * Here, FSP would send next log only when Sapphire sends a new log notification
22 * response to FSP. On Completion of reading the log from FSP,
23 * OPAL_EVENT_ERROR_LOG_AVAIL is signaled. This will remain raised until a call
24 * to opal_elog_read() is made and OPAL_SUCCESS is returned. Upon which, the
25 * operation is complete and the event is cleared. This is READ action from FSP.
26 */
27
28 /*
29 * Design of READ error log :
30 * When we receive a new error log entry notification from FSP, we queue it into
31 * the "pending" list. If the "pending" list is not empty, then we start
32 * fetching log from FSP.
33 *
34 * When Linux reads a log entry, we dequeue it from the "pending" list and
35 * enqueue it to another "processed" list. At this point, if the "pending"
36 * list is not empty, we continue to fetch the next log.
37 *
38 * When Linux calls opal_resend_pending_logs(), we fetch the log corresponding
39 * to the head of the pending list and move it to the processed list, and
40 * continue this process until the pending list is empty. If the pending list
41 * was empty earlier and is currently non-empty, we initiate an error log fetch.
42 *
43 * When Linux acks an error log, we remove it from processed list.
44 */
45
46 #include <errno.h>
47 #include <fsp.h>
48 #include <fsp-elog.h>
49 #include <lock.h>
50 #include <opal-api.h>
51 #include <psi.h>
52 #include <skiboot.h>
53
54 /*
55 * Maximum number of entries that are pre-allocated
56 * to keep track of pending elogs to be fetched.
57 */
58 #define ELOG_READ_MAX_RECORD 128
59
60 /* Structure to maintain log-id, log-size, pending and processed list. */
61 struct fsp_log_entry {
62 uint32_t log_id;
63 size_t log_size;
64 struct list_node link;
65 };
66
67 static LIST_HEAD(elog_read_pending);
68 static LIST_HEAD(elog_read_processed);
69 static LIST_HEAD(elog_read_free);
70 /*
71 * Lock is used to protect overwriting of processed and pending list
72 * and also used while updating state of each log.
73 */
74 static struct lock elog_read_lock = LOCK_UNLOCKED;
75
76 #define ELOG_READ_BUFFER_SIZE 0x00004000
77 /* Log buffer to copy FSP log for read */
78 static void *elog_read_buffer;
79 static uint32_t elog_head_id; /* FSP entry ID */
80 static size_t elog_head_size; /* Actual FSP log size */
81 static uint32_t elog_read_retries; /* Bad response status count */
82
83 /* Initialize the state of the log */
84 static enum elog_head_state elog_read_from_fsp_head_state = ELOG_STATE_NONE;
85
86 static bool elog_enabled = false;
87
88 /* Need forward declaration because of circular dependency. */
89 static void fsp_elog_queue_fetch(void);
90
91 /*
92 * Check the response message for mbox acknowledgement
93 * command send to FSP.
94 */
fsp_elog_ack_complete(struct fsp_msg * msg)95 static void fsp_elog_ack_complete(struct fsp_msg *msg)
96 {
97 uint8_t val;
98
99 val = (msg->resp->word1 >> 8) & 0xff;
100 if (val != 0)
101 prerror("ELOG: Acknowledgement error\n");
102
103 fsp_freemsg(msg);
104 }
105
106 /* Send error log PHYP acknowledgement to FSP with entry ID. */
fsp_send_elog_ack(uint32_t log_id)107 static int64_t fsp_send_elog_ack(uint32_t log_id)
108 {
109 struct fsp_msg *ack_msg;
110
111 ack_msg = fsp_mkmsg(FSP_CMD_ERRLOG_PHYP_ACK, 1, log_id);
112 if (!ack_msg) {
113 prerror("ELOG: Failed to allocate ack message\n");
114 return OPAL_INTERNAL_ERROR;
115 }
116
117 if (fsp_queue_msg(ack_msg, fsp_elog_ack_complete)) {
118 fsp_freemsg(ack_msg);
119 ack_msg = NULL;
120 prerror("ELOG: Error queueing elog ack complete\n");
121 return OPAL_INTERNAL_ERROR;
122 }
123
124 return OPAL_SUCCESS;
125 }
126
127 /* Retrieve error log from FSP with TCE for the data transfer. */
fsp_elog_check_and_fetch_head(void)128 static void fsp_elog_check_and_fetch_head(void)
129 {
130 lock(&elog_read_lock);
131 if (elog_read_from_fsp_head_state != ELOG_STATE_NONE ||
132 list_empty(&elog_read_pending)) {
133 unlock(&elog_read_lock);
134 return;
135 }
136
137 elog_read_retries = 0;
138 /* Start fetching first entry from the pending list */
139 fsp_elog_queue_fetch();
140 unlock(&elog_read_lock);
141 }
142
elog_set_head_state(bool opal_logs,enum elog_head_state state)143 void elog_set_head_state(bool opal_logs, enum elog_head_state state)
144 {
145 static enum elog_head_state opal_logs_state = ELOG_STATE_NONE;
146 static enum elog_head_state fsp_logs_state = ELOG_STATE_NONE;
147
148 /* ELOG disabled */
149 if (!elog_enabled)
150 return;
151
152 if (opal_logs)
153 opal_logs_state = state;
154 else
155 fsp_logs_state = state;
156
157 if (fsp_logs_state == ELOG_STATE_FETCHED_DATA ||
158 opal_logs_state == ELOG_STATE_FETCHED_DATA)
159 opal_update_pending_evt(OPAL_EVENT_ERROR_LOG_AVAIL,
160 OPAL_EVENT_ERROR_LOG_AVAIL);
161 else
162 opal_update_pending_evt(OPAL_EVENT_ERROR_LOG_AVAIL, 0);
163 }
164
165 /* This function should be called with the lock held. */
fsp_elog_set_head_state(enum elog_head_state state)166 static inline void fsp_elog_set_head_state(enum elog_head_state state)
167 {
168 elog_set_head_state(false, state);
169 elog_read_from_fsp_head_state = state;
170 }
171
172 /*
173 * When, we try maximum time of fetching log from FSP
174 * we call following function to delete log from the
175 * pending list and update the state to fetch next log.
176 *
177 * This function should be called with the lock held.
178 */
fsp_elog_fetch_failure(uint8_t fsp_status)179 static void fsp_elog_fetch_failure(uint8_t fsp_status)
180 {
181 struct fsp_log_entry *log_data;
182
183 /* Read top list and delete the node */
184 log_data = list_top(&elog_read_pending, struct fsp_log_entry, link);
185 if (!log_data) {
186 /**
187 * @fwts-label ElogFetchFailureInconsistent
188 * @fwts-advice Inconsistent state between OPAL and FSP
189 * in code path for handling failure of fetching error log
190 * from FSP. Likely a bug in interaction between FSP and OPAL.
191 */
192 prlog(PR_ERR, "%s: Inconsistent internal list state !\n",
193 __func__);
194 } else {
195 list_del(&log_data->link);
196 list_add(&elog_read_free, &log_data->link);
197 prerror("ELOG: received invalid data: %x FSP status: 0x%x\n",
198 log_data->log_id, fsp_status);
199 }
200
201 fsp_elog_set_head_state(ELOG_STATE_NONE);
202 }
203
204 /* Read response value from FSP for fetch sp data mbox command */
fsp_elog_read_complete(struct fsp_msg * read_msg)205 static void fsp_elog_read_complete(struct fsp_msg *read_msg)
206 {
207 uint8_t val;
208
209 lock(&elog_read_lock);
210 val = (read_msg->resp->word1 >> 8) & 0xff;
211 fsp_freemsg(read_msg);
212 if (elog_read_from_fsp_head_state == ELOG_STATE_REJECTED) {
213 fsp_elog_set_head_state(ELOG_STATE_NONE);
214 goto elog_read_out;
215 }
216
217 switch (val) {
218 case FSP_STATUS_SUCCESS:
219 fsp_elog_set_head_state(ELOG_STATE_FETCHED_DATA);
220 break;
221
222 case FSP_STATUS_DMA_ERROR:
223 if (elog_read_retries++ < MAX_RETRIES) {
224 /*
225 * For a error response value from FSP, we try to
226 * send fetch sp data mbox command again for three
227 * times if response from FSP is still not valid
228 * we send generic error response to FSP.
229 */
230 fsp_elog_queue_fetch();
231 break;
232 }
233
234 fsp_elog_fetch_failure(val);
235 break;
236
237 default:
238 fsp_elog_fetch_failure(val);
239 }
240
241 elog_read_out:
242 unlock(&elog_read_lock);
243
244 /* Check if a new log needs fetching */
245 fsp_elog_check_and_fetch_head();
246 }
247
248 /* Read error log from FSP through mbox commands */
fsp_elog_queue_fetch(void)249 static void fsp_elog_queue_fetch(void)
250 {
251 int rc;
252 uint8_t flags = 0;
253 struct fsp_log_entry *entry;
254
255 entry = list_top(&elog_read_pending, struct fsp_log_entry, link);
256 if (!entry) {
257 /**
258 * @fwts-label ElogQueueInconsistent
259 * @fwts-advice Bug in interaction between FSP and OPAL. We
260 * expected there to be a pending read from FSP but the list
261 * was empty.
262 */
263 prlog(PR_ERR, "%s: Inconsistent internal list state !\n",
264 __func__);
265 fsp_elog_set_head_state(ELOG_STATE_NONE);
266 return;
267 }
268
269 fsp_elog_set_head_state(ELOG_STATE_FETCHING);
270 elog_head_id = entry->log_id;
271 elog_head_size = entry->log_size;
272 rc = fsp_fetch_data_queue(flags, FSP_DATASET_ERRLOG, elog_head_id,
273 0, (void *)PSI_DMA_ERRLOG_READ_BUF,
274 &elog_head_size, fsp_elog_read_complete);
275 if (rc) {
276 prerror("ELOG: failed to queue read message: %d\n", rc);
277 fsp_elog_set_head_state(ELOG_STATE_NONE);
278 }
279 }
280
281 /* OPAL interface for PowerNV to read log size and log ID from Sapphire. */
fsp_opal_elog_info(uint64_t * opal_elog_id,uint64_t * opal_elog_size,uint64_t * elog_type)282 static int64_t fsp_opal_elog_info(uint64_t *opal_elog_id,
283 uint64_t *opal_elog_size, uint64_t *elog_type)
284 {
285 struct fsp_log_entry *log_data;
286
287 /* Copy type of the error log */
288 *elog_type = ELOG_TYPE_PEL;
289
290 /* Check if any OPAL log needs to be reported to the host */
291 if (opal_elog_info(opal_elog_id, opal_elog_size))
292 return OPAL_SUCCESS;
293
294 lock(&elog_read_lock);
295 if (elog_read_from_fsp_head_state != ELOG_STATE_FETCHED_DATA) {
296 unlock(&elog_read_lock);
297 return OPAL_WRONG_STATE;
298 }
299
300 log_data = list_top(&elog_read_pending, struct fsp_log_entry, link);
301 if (!log_data) {
302 /**
303 * @fwts-label ElogInfoInconsistentState
304 * @fwts-advice We expected there to be an entry in the list
305 * of error logs for the error log we're fetching information
306 * for. There wasn't. This means there's a bug.
307 */
308 prlog(PR_ERR, "%s: Inconsistent internal list state !\n",
309 __func__);
310 fsp_elog_set_head_state(ELOG_STATE_NONE);
311 unlock(&elog_read_lock);
312 return OPAL_WRONG_STATE;
313 }
314
315 *opal_elog_id = log_data->log_id;
316 *opal_elog_size = log_data->log_size;
317 fsp_elog_set_head_state(ELOG_STATE_HOST_INFO);
318 unlock(&elog_read_lock);
319 return OPAL_SUCCESS;
320 }
321
322 /* OPAL interface for PowerNV to read log from Sapphire. */
fsp_opal_elog_read(uint64_t * buffer,uint64_t opal_elog_size,uint64_t opal_elog_id)323 static int64_t fsp_opal_elog_read(uint64_t *buffer, uint64_t opal_elog_size,
324 uint64_t opal_elog_id)
325 {
326 int size = opal_elog_size;
327 struct fsp_log_entry *log_data;
328
329 /* Check if any OPAL log needs to be reported to the PowerNV */
330 if (opal_elog_read(buffer, opal_elog_size, opal_elog_id))
331 return OPAL_SUCCESS;
332
333 /*
334 * Read top entry from list.
335 * As we know always top record of the list is fetched from FSP
336 */
337 lock(&elog_read_lock);
338 if (elog_read_from_fsp_head_state != ELOG_STATE_HOST_INFO) {
339 unlock(&elog_read_lock);
340 return OPAL_WRONG_STATE;
341 }
342
343 log_data = list_top(&elog_read_pending, struct fsp_log_entry, link);
344 if (!log_data) {
345 /**
346 * @fwts-label ElogReadInconsistentState
347 * @fwts-advice Inconsistent state while reading error log
348 * from FSP. Bug in OPAL and FSP interaction.
349 */
350 prlog(PR_ERR, "%s: Inconsistent internal list state !\n",
351 __func__);
352 fsp_elog_set_head_state(ELOG_STATE_NONE);
353 unlock(&elog_read_lock);
354 return OPAL_WRONG_STATE;
355 }
356
357 /* Check log ID and then read log from buffer */
358 if (opal_elog_id != log_data->log_id) {
359 unlock(&elog_read_lock);
360 return OPAL_PARAMETER;
361 }
362
363 /* Do not copy more than actual log size */
364 if (opal_elog_size > log_data->log_size)
365 size = log_data->log_size;
366
367 memset((void *)buffer, 0, opal_elog_size);
368 memcpy((void *)buffer, elog_read_buffer, size);
369
370 /*
371 * Once log is read from linux move record from pending
372 * to processed list and delete record from pending list
373 * and change state of the log to fetch next record.
374 */
375 list_del(&log_data->link);
376 list_add(&elog_read_processed, &log_data->link);
377 fsp_elog_set_head_state(ELOG_STATE_NONE);
378 unlock(&elog_read_lock);
379
380 /* Read error log from FSP */
381 fsp_elog_check_and_fetch_head();
382
383 return OPAL_SUCCESS;
384 }
385
386 /* Set state of the log head before fetching the log. */
elog_reject_head(void)387 static void elog_reject_head(void)
388 {
389 if (elog_read_from_fsp_head_state == ELOG_STATE_FETCHING)
390 fsp_elog_set_head_state(ELOG_STATE_REJECTED);
391 else
392 fsp_elog_set_head_state(ELOG_STATE_NONE);
393 }
394
395 /* OPAL interface for PowerNV to send ack to FSP with log ID */
fsp_opal_elog_ack(uint64_t ack_id)396 static int64_t fsp_opal_elog_ack(uint64_t ack_id)
397 {
398 int rc = 0;
399 struct fsp_log_entry *record, *next_record;
400
401 if (opal_elog_ack(ack_id))
402 return rc;
403
404 /* Send acknowledgement to FSP */
405 rc = fsp_send_elog_ack(ack_id);
406 if (rc != OPAL_SUCCESS) {
407 prerror("ELOG: failed to send acknowledgement: %d\n", rc);
408 return rc;
409 }
410
411 lock(&elog_read_lock);
412 list_for_each_safe(&elog_read_processed, record, next_record, link) {
413 if (record->log_id != ack_id)
414 continue;
415
416 list_del(&record->link);
417 list_add(&elog_read_free, &record->link);
418 unlock(&elog_read_lock);
419 return rc;
420 }
421
422 list_for_each_safe(&elog_read_pending, record, next_record, link) {
423 if (record->log_id != ack_id)
424 continue;
425 /*
426 * It means PowerNV has sent ACK without reading actual data.
427 * Because of this elog_read_from_fsp_head_state may be
428 * stuck in wrong state (ELOG_STATE_HOST_INFO) and not able
429 * to send remaining ELOGs to PowerNV. Hence reset ELOG state
430 * and start sending remaining ELOGs.
431 */
432 list_del(&record->link);
433 list_add(&elog_read_free, &record->link);
434 elog_reject_head();
435 unlock(&elog_read_lock);
436 fsp_elog_check_and_fetch_head();
437 return rc;
438 }
439
440 unlock(&elog_read_lock);
441 return OPAL_PARAMETER;
442 }
443
444 /*
445 * Once Linux kexec's it ask to resend all logs which
446 * are not acknowledged from Linux.
447 */
fsp_opal_resend_pending_logs(void)448 static void fsp_opal_resend_pending_logs(void)
449 {
450 struct fsp_log_entry *entry;
451
452 lock(&elog_read_lock);
453 elog_enabled = true;
454 unlock(&elog_read_lock);
455
456 /* Check if any Sapphire logs are pending. */
457 opal_resend_pending_logs();
458
459 lock(&elog_read_lock);
460 /*
461 * If processed list is not empty add all record from
462 * processed list to pending list at head of the list
463 * and delete records from processed list.
464 */
465 while (!list_empty(&elog_read_processed)) {
466 entry = list_pop(&elog_read_processed,
467 struct fsp_log_entry, link);
468 list_add(&elog_read_pending, &entry->link);
469 }
470
471 unlock(&elog_read_lock);
472
473 /* Read error log from FSP */
474 elog_reject_head();
475 fsp_elog_check_and_fetch_head();
476 }
477
478 /* Disable ELOG event flag until PowerNV is ready to receive event */
opal_kexec_elog_notify(void * data __unused)479 static bool opal_kexec_elog_notify(void *data __unused)
480 {
481 lock(&elog_read_lock);
482 elog_enabled = false;
483 opal_update_pending_evt(OPAL_EVENT_ERROR_LOG_AVAIL, 0);
484 unlock(&elog_read_lock);
485
486 return true;
487 }
488
489 /* FSP elog notify function */
fsp_elog_msg(uint32_t cmd_sub_mod,struct fsp_msg * msg)490 static bool fsp_elog_msg(uint32_t cmd_sub_mod, struct fsp_msg *msg)
491 {
492 int rc = 0;
493 struct fsp_log_entry *record;
494 uint32_t log_id;
495 uint32_t log_size;
496
497 if (cmd_sub_mod != FSP_CMD_ERRLOG_NOTIFICATION)
498 return false;
499
500 log_id = msg->data.words[0];
501 log_size = msg->data.words[1];
502
503 prlog(PR_TRACE, "ELOG: Notified of log 0x%08x (size: %d)\n",
504 log_id, log_size);
505
506 /* Make sure we don't cross read buffer size */
507 if (log_size > ELOG_READ_BUFFER_SIZE) {
508 log_size = ELOG_READ_BUFFER_SIZE;
509 printf("ELOG: Truncated log (0x%08x) to 0x%x\n",
510 log_id, log_size);
511 }
512
513 /* Take a lock until we take out the node from elog_read_free */
514 lock(&elog_read_lock);
515 if (!list_empty(&elog_read_free)) {
516 /* Create a new entry in the pending list. */
517 record = list_pop(&elog_read_free, struct fsp_log_entry, link);
518 record->log_id = log_id;
519 record->log_size = log_size;
520 list_add_tail(&elog_read_pending, &record->link);
521 unlock(&elog_read_lock);
522
523 /* Send response back to FSP for a new elog notify message. */
524 rc = fsp_queue_msg(fsp_mkmsg(FSP_RSP_ERRLOG_NOTIFICATION,
525 1, log_id), fsp_freemsg);
526 if (rc)
527 prerror("ELOG: Failed to queue errlog notification"
528 " response: %d\n", rc);
529
530 /* Read error log from FSP */
531 fsp_elog_check_and_fetch_head();
532
533 } else {
534 prlog(PR_TRACE, "ELOG: Log entry 0x%08x discarded\n", log_id);
535
536 /* Unlock if elog_read_free is empty. */
537 unlock(&elog_read_lock);
538
539 rc = fsp_queue_msg(fsp_mkmsg(FSP_RSP_ERRLOG_NOTIFICATION,
540 1, log_id), fsp_freemsg);
541 if (rc)
542 prerror("ELOG: Failed to queue errlog notification"
543 " response: %d\n", rc);
544
545 /*
546 * If list is full with max record then we send discarded by
547 * phyp (condition full) ack to FSP.
548 *
549 * At some point in the future, we'll get notified again.
550 * This is largely up to FSP as to when they tell us about
551 * the log again.
552 */
553 rc = fsp_queue_msg(fsp_mkmsg(FSP_CMD_ERRLOG_PHYP_ACK | 0x02,
554 1, log_id), fsp_freemsg);
555 if (rc)
556 prerror("ELOG: Failed to queue errlog ack"
557 " response: %d\n", rc);
558 }
559
560 return true;
561 }
562
563 static struct fsp_client fsp_get_elog_notify = {
564 .message = fsp_elog_msg,
565 };
566
567 /* Pre-allocate memory for reading error log from FSP */
init_elog_read_free_list(uint32_t num_entries)568 static int init_elog_read_free_list(uint32_t num_entries)
569 {
570 struct fsp_log_entry *entry;
571 int i;
572
573 entry = zalloc(sizeof(struct fsp_log_entry) * num_entries);
574 if (!entry)
575 goto out_err;
576
577 for (i = 0; i < num_entries; ++i) {
578 list_add_tail(&elog_read_free, &entry->link);
579 entry++;
580 }
581
582 return 0;
583
584 out_err:
585 return -ENOMEM;
586 }
587
588 /* FSP elog read init function */
fsp_elog_read_init(void)589 void fsp_elog_read_init(void)
590 {
591 int val = 0;
592
593 if (!fsp_present())
594 return;
595
596 elog_read_buffer = memalign(TCE_PSIZE, ELOG_READ_BUFFER_SIZE);
597 if (!elog_read_buffer) {
598 prerror("FSP: could not allocate FSP ELOG_READ_BUFFER!\n");
599 return;
600 }
601
602 /* Map TCEs */
603 fsp_tce_map(PSI_DMA_ERRLOG_READ_BUF, elog_read_buffer,
604 PSI_DMA_ERRLOG_READ_BUF_SZ);
605
606 /* Pre allocate memory for 128 record */
607 val = init_elog_read_free_list(ELOG_READ_MAX_RECORD);
608 if (val != 0)
609 return;
610
611 /* Register error log class D2 */
612 fsp_register_client(&fsp_get_elog_notify, FSP_MCLASS_ERR_LOG);
613
614 /* Register for sync on PowerNV reboot call */
615 opal_add_host_sync_notifier(opal_kexec_elog_notify, NULL);
616
617 /* Register OPAL interface */
618 opal_register(OPAL_ELOG_READ, fsp_opal_elog_read, 3);
619 opal_register(OPAL_ELOG_ACK, fsp_opal_elog_ack, 1);
620 opal_register(OPAL_ELOG_RESEND, fsp_opal_resend_pending_logs, 0);
621 opal_register(OPAL_ELOG_SIZE, fsp_opal_elog_info, 3);
622 }
623