User Tools
Writing /app/www/public/data/meta/watchdogs/watchdog_overview.meta failed
watchdogs:watchdog_overview
Differences
This shows you the differences between two versions of the page.
| Both sides previous revisionPrevious revisionNext revision | Previous revision | ||
| watchdogs:watchdog_overview [2017/04/11 11:15] – mmcc | watchdogs:watchdog_overview [2021/06/25 10:09] (current) – external edit 127.0.0.1 | ||
|---|---|---|---|
| Line 1: | Line 1: | ||
| + | ====== The Watchdog Process ====== | ||
| + | |||
| + | Author: Sophie Renshaw | ||
| + | |||
| + | |||
| + | |||
| + | - We receive a Watchdog indicating that there is an issue with one of the servers that we are monitoring. We will receive this notification via email and, if it is a critical alarm, a text message. | ||
| + | - The person that is on the Watchdog Rota or On Call, if we get the notification at the weekend, will click on the link in the email. This will open the watchdog ticket in the mobile ticketer, which is publicly accessible, so it can be accessed outside of the office. | ||
| + | - NOTE: DO NOT LOG IN WITH THE ADMIN ACCOUNT - PLEASE USE YOUR OWN ACCOUNT PROVIDED | ||
| + | - The person Investigating the Watchdog should move the ticket to the Under Investigation state. We will get another email, sent to developers, informing us that the ticket has been moved to Under Investigation. We will see the login of the person who moved the ticket in the subject of the Under Investigation email. | ||
| + | - Once the Investigation has been completed, the Under Investigation email should be replied to with any findings. | ||
| + | - We will get another email when the alarm has cleared. | ||
| + | |||
| + | Once you are finished onboarding, there is more detail relating to the Watchdogs process which you can find here: [[watchdogs: | ||
| + | |||
| + | ---- | ||
| + | |||
| + | |||
| + | **Errigal Watchdog - Resource Alarm Workflow** | ||
| + | |||
| + | {{: | ||
| + | |||
| + | |||
| + | ---- | ||
| + | |||
| + | |||
| + | **SendSMSandEmail Groovlet (ID: 233)** | ||
| + | |||
| + | <code groovy> | ||
| + | arg, defaultArg, ticket -> | ||
| + | | ||
| + | /* | ||
| + | This groovlet is used to send a ticket email using a custom email template (defined in the workflow node | ||
| + | entrance rule)to a group of users who should be notified about a ticket. | ||
| + | |||
| + | If the email sends, the ticket will enter a successInSendingEmailState state and if it fails to send the ticket | ||
| + | will enter a failedToSendEmailState state. These are also defined in the workflow node rule as arguments. | ||
| + | |||
| + | If the groovlet fails to fully execute, any exceptions thrown will be sent to the " | ||
| + | */ | ||
| + | |||
| + | /* | ||
| + | Defining the sendSMS closure | ||
| + | */ | ||
| + | //Save on Users needed to resolve the issue that was being seen in SUPPORT-216, | ||
| + | def ticketStatusesByUsers = ticket.statuses.statusBy | ||
| + | ticketStatusesByUsers.each(){ | ||
| + | com.errigal.ticketer.User user = it | ||
| + | user.save() | ||
| + | } | ||
| + | log.info " | ||
| + | |||
| + | def sendEmail = {email,body -> final String username = " | ||
| + | final String password = " | ||
| + | |||
| + | Properties props = new Properties() | ||
| + | props.put(" | ||
| + | props.put(" | ||
| + | props.put(" | ||
| + | props.put(" | ||
| + | |||
| + | javax.mail.Session session = javax.mail.Session.getInstance(props, | ||
| + | new javax.mail.Authenticator() { | ||
| + | protected javax.mail.PasswordAuthentication getPasswordAuthentication() { | ||
| + | return new javax.mail.PasswordAuthentication(username, | ||
| + | } | ||
| + | }) | ||
| + | |||
| + | try { | ||
| + | |||
| + | javax.mail.Message message = new javax.mail.internet.MimeMessage(session) | ||
| + | message.setFrom(new javax.mail.internet.InternetAddress(" | ||
| + | message.setRecipients(javax.mail.Message.RecipientType.TO, | ||
| + | javax.mail.internet.InternetAddress.parse(email)) | ||
| + | message.setSubject("" | ||
| + | message.setText(body) | ||
| + | |||
| + | javax.mail.Transport.send(message) | ||
| + | |||
| + | } catch (javax.mail.MessagingException e) { | ||
| + | throw new RuntimeException(e) | ||
| + | } | ||
| + | } | ||
| + | |||
| + | | ||
| + | log.info " | ||
| + | log.info " | ||
| + | log.info " | ||
| + | |||
| + | def args = arg.split("," | ||
| + | |||
| + | def stateName | ||
| + | def successInSendingEmailState | ||
| + | def failedToSendEmailState// | ||
| + | |||
| + | | ||
| + | |||
| + | | ||
| + | | ||
| + | def emailService = com.errigal.ticketer.utils.DomainUtils.getGrailsService(" | ||
| + | |||
| + | com.errigal.ticketer.Visibility ticketVisibility = com.errigal.ticketer.Visibility.findByName(ticket.visibility) | ||
| + | log.info " | ||
| + | |||
| + | com.errigal.ticketer.EmailAccount fromEmailAccount = ticketVisibility.emailAccount | ||
| + | log.info "Email Account: $fromEmailAccount" | ||
| + | |||
| + | //TODO: Support exit email too | ||
| + | def entranceEmail = ticket.workflow.nodes.find{it.name == ticket.currentStatus.status}.entranceEmail | ||
| + | |||
| + | log.info "Email to use: ${entranceEmail}" | ||
| + | log.info " | ||
| + | |||
| + | def emailVo = emailService.getCustomEmailVOWithoutSecurityCheck(ticket.id, | ||
| + | log.info "Email VO: $emailVo" | ||
| + | |||
| + | try{ | ||
| + | | ||
| + | } | ||
| + | catch(Exception e){ | ||
| + | | ||
| + | } | ||
| + | |||
| + | Calendar cal = Calendar.getInstance(); | ||
| + | cal.setTime(new Date()); // sets calendar time/date | ||
| + | Boolean hasAlarmID = false | ||
| + | int currentHour = cal.get(Calendar.HOUR_OF_DAY) | ||
| + | def acceptedAlarmIDs = [" | ||
| + | " | ||
| + | " | ||
| + | acceptedAlarmIDs.each{it -> if(ticket.summary.contains(it)){hasAlarmID = true}} | ||
| + | |||
| + | if(hasAlarmID){ | ||
| + | log.info " | ||
| + | } else { | ||
| + | log.info "Could not find any of ${acceptedAlarmIDs} in ' | ||
| + | } | ||
| + | |||
| + | |||
| + | String body = """ | ||
| + | user: | ||
| + | password:# | ||
| + | from: | ||
| + | to: | ||
| + | text: | ||
| + | |||
| + | if(currentHour >= 16 && currentHour <= 23 && hasAlarmID) | ||
| + | { | ||
| + | sendEmail(" | ||
| + | sendEmail(" | ||
| + | sendEmail(" | ||
| + | sendEmail(" | ||
| + | sendEmail(" | ||
| + | } | ||
| + | else if (currentHour >= 0 && currentHour <= 7 && hasAlarmID) | ||
| + | { | ||
| + | sendEmail(" | ||
| + | } | ||
| + | else if (hasAlarmID) | ||
| + | { | ||
| + | sendEmail(" | ||
| + | sendEmail(" | ||
| + | sendEmail(" | ||
| + | sendEmail(" | ||
| + | sendEmail(" | ||
| + | sendEmail(" | ||
| + | } | ||
| + | |||
| + | | ||
| + | // This below code is used to change state if the email has been sent (i.e. "Alarm Received" | ||
| + | if (ticket.parent) { | ||
| + | log.info " | ||
| + | } else { | ||
| + | // Change the Ticket State if it is a top-level Ticket | ||
| + | if (com.errigal.ticketer.GNode.findByWorkflowAndName(ticket.workflow, | ||
| + | log.info " | ||
| + | ticket.updateStatus(stateName) | ||
| + | def saveResult = ticket.save(failOnError: | ||
| + | if (saveResult) { | ||
| + | log.info "State change result: | ||
| + | } else { | ||
| + | def sb = new StringBuilder(" | ||
| + | if (hasErrors()) { | ||
| + | sb.append " errors: | ||
| + | errors.allErrors.each {sb.append it} | ||
| + | } else { | ||
| + | sb.append " no error messages." | ||
| + | } | ||
| + | log.info sb | ||
| + | } | ||
| + | } else { | ||
| + | log.error " | ||
| + | } | ||
| + | } | ||
| + | </ | ||
| + | |||
| + | === Creating a Watchdog === | ||
| + | -Log onto the server which you want to create the watchdog on | ||
| + | -Go to the following directory; / | ||
| + | -Add your new rule to the bottom of the ResourceConfig.groovy file in this dir, e.g.< | ||
| + | ' | ||
| + | type = ' | ||
| + | parameters { | ||
| + | driver = ' | ||
| + | host = ' | ||
| + | port = ' | ||
| + | database = ' | ||
| + | user = ' | ||
| + | password = ' | ||
| + | checkRowCountQuery = ' | ||
| + | } | ||
| + | thresholds { | ||
| + | a = [name: ' | ||
| + | } | ||
| + | } | ||
| + | </ | ||
| + | -Double check the DB query on the DB to ensure that it is correct(careful as its production!) | ||
| + | -Once the rule is added and your query is checked, then the rule to the main method(called localSystem in the code) to be invoked, e.g.< | ||
| + | localSystem { | ||
| + | .... | ||
| + | dd = ' | ||
| + | ee = ' | ||
| + | } | ||
| + | </ | ||