1// Code generated by smithy-go-codegen DO NOT EDIT. 2 3package types 4 5import ( 6 "time" 7) 8 9// Defines an action to be initiated by a trigger. 10type Action struct { 11 12 // The job arguments used when this trigger fires. For this job run, they replace 13 // the default arguments set in the job definition itself. You can specify 14 // arguments here that your own job-execution script consumes, as well as arguments 15 // that AWS Glue itself consumes. For information about how to specify and consume 16 // your own Job arguments, see the Calling AWS Glue APIs in Python 17 // (https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-python-calling.html) 18 // topic in the developer guide. For information about the key-value pairs that AWS 19 // Glue consumes to set up your job, see the Special Parameters Used by AWS Glue 20 // (https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html) 21 // topic in the developer guide. 22 Arguments map[string]string 23 24 // The name of the crawler to be used with this action. 25 CrawlerName *string 26 27 // The name of a job to be executed. 28 JobName *string 29 30 // Specifies configuration properties of a job run notification. 31 NotificationProperty *NotificationProperty 32 33 // The name of the SecurityConfiguration structure to be used with this action. 34 SecurityConfiguration *string 35 36 // The JobRun timeout in minutes. This is the maximum time that a job run can 37 // consume resources before it is terminated and enters TIMEOUT status. The default 38 // is 2,880 minutes (48 hours). This overrides the timeout value set in the parent 39 // job. 40 Timeout *int32 41} 42 43// A list of errors that can occur when registering partition indexes for an 44// existing table. These errors give the details about why an index registration 45// failed and provide a limited number of partitions in the response, so that you 46// can fix the partitions at fault and try registering the index again. The most 47// common set of errors that can occur are categorized as follows: 48// 49// * 50// EncryptedPartitionError: The partitions are encrypted. 51// 52// * 53// InvalidPartitionTypeDataError: The partition value doesn't match the data type 54// for that partition column. 55// 56// * MissingPartitionValueError: The partitions are 57// encrypted. 58// 59// * UnsupportedPartitionCharacterError: Characters inside the 60// partition value are not supported. For example: U+0000 , U+0001, U+0002. 61// 62// * 63// InternalError: Any error which does not belong to other error codes. 64type BackfillError struct { 65 66 // The error code for an error that occurred when registering partition indexes for 67 // an existing table. 68 Code BackfillErrorCode 69 70 // A list of a limited number of partitions in the response. 71 Partitions []PartitionValueList 72} 73 74// Records an error that occurred when attempting to stop a specified job run. 75type BatchStopJobRunError struct { 76 77 // Specifies details about the error that was encountered. 78 ErrorDetail *ErrorDetail 79 80 // The name of the job definition that is used in the job run in question. 81 JobName *string 82 83 // The JobRunId of the job run in question. 84 JobRunId *string 85} 86 87// Records a successful request to stop a specified JobRun. 88type BatchStopJobRunSuccessfulSubmission struct { 89 90 // The name of the job definition used in the job run that was stopped. 91 JobName *string 92 93 // The JobRunId of the job run that was stopped. 94 JobRunId *string 95} 96 97// Contains information about a batch update partition error. 98type BatchUpdatePartitionFailureEntry struct { 99 100 // The details about the batch update partition error. 101 ErrorDetail *ErrorDetail 102 103 // A list of values defining the partitions. 104 PartitionValueList []string 105} 106 107// A structure that contains the values and structure used to update a partition. 108type BatchUpdatePartitionRequestEntry struct { 109 110 // The structure used to update a partition. 111 // 112 // This member is required. 113 PartitionInput *PartitionInput 114 115 // A list of values defining the partitions. 116 // 117 // This member is required. 118 PartitionValueList []string 119} 120 121// Defines column statistics supported for bit sequence data values. 122type BinaryColumnStatisticsData struct { 123 124 // The average bit sequence length in the column. 125 // 126 // This member is required. 127 AverageLength float64 128 129 // The size of the longest bit sequence in the column. 130 // 131 // This member is required. 132 MaximumLength int64 133 134 // The number of null values in the column. 135 // 136 // This member is required. 137 NumberOfNulls int64 138} 139 140// Defines column statistics supported for Boolean data columns. 141type BooleanColumnStatisticsData struct { 142 143 // The number of false values in the column. 144 // 145 // This member is required. 146 NumberOfFalses int64 147 148 // The number of null values in the column. 149 // 150 // This member is required. 151 NumberOfNulls int64 152 153 // The number of true values in the column. 154 // 155 // This member is required. 156 NumberOfTrues int64 157} 158 159// Specifies a table definition in the AWS Glue Data Catalog. 160type CatalogEntry struct { 161 162 // The database in which the table metadata resides. 163 // 164 // This member is required. 165 DatabaseName *string 166 167 // The name of the table in question. 168 // 169 // This member is required. 170 TableName *string 171} 172 173// A structure containing migration status information. 174type CatalogImportStatus struct { 175 176 // True if the migration has completed, or False otherwise. 177 ImportCompleted bool 178 179 // The time that the migration was started. 180 ImportTime *time.Time 181 182 // The name of the person who initiated the migration. 183 ImportedBy *string 184} 185 186// Specifies an AWS Glue Data Catalog target. 187type CatalogTarget struct { 188 189 // The name of the database to be synchronized. 190 // 191 // This member is required. 192 DatabaseName *string 193 194 // A list of the tables to be synchronized. 195 // 196 // This member is required. 197 Tables []string 198} 199 200// Classifiers are triggered during a crawl task. A classifier checks whether a 201// given file is in a format it can handle. If it is, the classifier creates a 202// schema in the form of a StructType object that matches that data format. You can 203// use the standard classifiers that AWS Glue provides, or you can write your own 204// classifiers to best categorize your data sources and specify the appropriate 205// schemas to use for them. A classifier can be a grok classifier, an XML 206// classifier, a JSON classifier, or a custom CSV classifier, as specified in one 207// of the fields in the Classifier object. 208type Classifier struct { 209 210 // A classifier for comma-separated values (CSV). 211 CsvClassifier *CsvClassifier 212 213 // A classifier that uses grok. 214 GrokClassifier *GrokClassifier 215 216 // A classifier for JSON content. 217 JsonClassifier *JsonClassifier 218 219 // A classifier for XML content. 220 XMLClassifier *XMLClassifier 221} 222 223// Specifies how Amazon CloudWatch data should be encrypted. 224type CloudWatchEncryption struct { 225 226 // The encryption mode to use for CloudWatch data. 227 CloudWatchEncryptionMode CloudWatchEncryptionMode 228 229 // The Amazon Resource Name (ARN) of the KMS key to be used to encrypt the data. 230 KmsKeyArn *string 231} 232 233// Represents a directional edge in a directed acyclic graph (DAG). 234type CodeGenEdge struct { 235 236 // The ID of the node at which the edge starts. 237 // 238 // This member is required. 239 Source *string 240 241 // The ID of the node at which the edge ends. 242 // 243 // This member is required. 244 Target *string 245 246 // The target of the edge. 247 TargetParameter *string 248} 249 250// Represents a node in a directed acyclic graph (DAG) 251type CodeGenNode struct { 252 253 // Properties of the node, in the form of name-value pairs. 254 // 255 // This member is required. 256 Args []CodeGenNodeArg 257 258 // A node identifier that is unique within the node's graph. 259 // 260 // This member is required. 261 Id *string 262 263 // The type of node that this is. 264 // 265 // This member is required. 266 NodeType *string 267 268 // The line number of the node. 269 LineNumber int32 270} 271 272// An argument or property of a node. 273type CodeGenNodeArg struct { 274 275 // The name of the argument or property. 276 // 277 // This member is required. 278 Name *string 279 280 // The value of the argument or property. 281 // 282 // This member is required. 283 Value *string 284 285 // True if the value is used as a parameter. 286 Param bool 287} 288 289// A column in a Table. 290type Column struct { 291 292 // The name of the Column. 293 // 294 // This member is required. 295 Name *string 296 297 // A free-form text comment. 298 Comment *string 299 300 // These key-value pairs define properties associated with the column. 301 Parameters map[string]string 302 303 // The data type of the Column. 304 Type *string 305} 306 307// Encapsulates a column name that failed and the reason for failure. 308type ColumnError struct { 309 310 // The name of the column that failed. 311 ColumnName *string 312 313 // An error message with the reason for the failure of an operation. 314 Error *ErrorDetail 315} 316 317// A structure containing the column name and column importance score for a column. 318// Column importance helps you understand how columns contribute to your model, by 319// identifying which columns in your records are more important than others. 320type ColumnImportance struct { 321 322 // The name of a column. 323 ColumnName *string 324 325 // The column importance score for the column, as a decimal. 326 Importance *float64 327} 328 329// Represents the generated column-level statistics for a table or partition. 330type ColumnStatistics struct { 331 332 // The timestamp of when column statistics were generated. 333 // 334 // This member is required. 335 AnalyzedTime *time.Time 336 337 // Name of column which statistics belong to. 338 // 339 // This member is required. 340 ColumnName *string 341 342 // The data type of the column. 343 // 344 // This member is required. 345 ColumnType *string 346 347 // A ColumnStatisticData object that contains the statistics data values. 348 // 349 // This member is required. 350 StatisticsData *ColumnStatisticsData 351} 352 353// Contains the individual types of column statistics data. Only one data object 354// should be set and indicated by the Type attribute. 355type ColumnStatisticsData struct { 356 357 // The type of column statistics data. 358 // 359 // This member is required. 360 Type ColumnStatisticsType 361 362 // Binary column statistics data. 363 BinaryColumnStatisticsData *BinaryColumnStatisticsData 364 365 // Boolean column statistics data. 366 BooleanColumnStatisticsData *BooleanColumnStatisticsData 367 368 // Date column statistics data. 369 DateColumnStatisticsData *DateColumnStatisticsData 370 371 // Decimal column statistics data. 372 DecimalColumnStatisticsData *DecimalColumnStatisticsData 373 374 // Double column statistics data. 375 DoubleColumnStatisticsData *DoubleColumnStatisticsData 376 377 // Long column statistics data. 378 LongColumnStatisticsData *LongColumnStatisticsData 379 380 // String column statistics data. 381 StringColumnStatisticsData *StringColumnStatisticsData 382} 383 384// Encapsulates a ColumnStatistics object that failed and the reason for failure. 385type ColumnStatisticsError struct { 386 387 // The ColumnStatistics of the column. 388 ColumnStatistics *ColumnStatistics 389 390 // An error message with the reason for the failure of an operation. 391 Error *ErrorDetail 392} 393 394// Defines a condition under which a trigger fires. 395type Condition struct { 396 397 // The state of the crawler to which this condition applies. 398 CrawlState CrawlState 399 400 // The name of the crawler to which this condition applies. 401 CrawlerName *string 402 403 // The name of the job whose JobRuns this condition applies to, and on which this 404 // trigger waits. 405 JobName *string 406 407 // A logical operator. 408 LogicalOperator LogicalOperator 409 410 // The condition state. Currently, the only job states that a trigger can listen 411 // for are SUCCEEDED, STOPPED, FAILED, and TIMEOUT. The only crawler states that a 412 // trigger can listen for are SUCCEEDED, FAILED, and CANCELLED. 413 State JobRunState 414} 415 416// The confusion matrix shows you what your transform is predicting accurately and 417// what types of errors it is making. For more information, see Confusion matrix 418// (https://en.wikipedia.org/wiki/Confusion_matrix) in Wikipedia. 419type ConfusionMatrix struct { 420 421 // The number of matches in the data that the transform didn't find, in the 422 // confusion matrix for your transform. 423 NumFalseNegatives *int64 424 425 // The number of nonmatches in the data that the transform incorrectly classified 426 // as a match, in the confusion matrix for your transform. 427 NumFalsePositives *int64 428 429 // The number of nonmatches in the data that the transform correctly rejected, in 430 // the confusion matrix for your transform. 431 NumTrueNegatives *int64 432 433 // The number of matches in the data that the transform correctly found, in the 434 // confusion matrix for your transform. 435 NumTruePositives *int64 436} 437 438// Defines a connection to a data source. 439type Connection struct { 440 441 // These key-value pairs define parameters for the connection: 442 // 443 // * HOST - The host 444 // URI: either the fully qualified domain name (FQDN) or the IPv4 address of the 445 // database host. 446 // 447 // * PORT - The port number, between 1024 and 65535, of the port on 448 // which the database host is listening for database connections. 449 // 450 // * USER_NAME - 451 // The name under which to log in to the database. The value string for USER_NAME 452 // is "USERNAME". 453 // 454 // * PASSWORD - A password, if one is used, for the user name. 455 // 456 // * 457 // ENCRYPTED_PASSWORD - When you enable connection password protection by setting 458 // ConnectionPasswordEncryption in the Data Catalog encryption settings, this field 459 // stores the encrypted password. 460 // 461 // * JDBC_DRIVER_JAR_URI - The Amazon Simple 462 // Storage Service (Amazon S3) path of the JAR file that contains the JDBC driver 463 // to use. 464 // 465 // * JDBC_DRIVER_CLASS_NAME - The class name of the JDBC driver to use. 466 // 467 // * 468 // JDBC_ENGINE - The name of the JDBC engine to use. 469 // 470 // * JDBC_ENGINE_VERSION - The 471 // version of the JDBC engine to use. 472 // 473 // * CONFIG_FILES - (Reserved for future 474 // use.) 475 // 476 // * INSTANCE_ID - The instance ID to use. 477 // 478 // * JDBC_CONNECTION_URL - The URL 479 // for connecting to a JDBC data source. 480 // 481 // * JDBC_ENFORCE_SSL - A Boolean string 482 // (true, false) specifying whether Secure Sockets Layer (SSL) with hostname 483 // matching is enforced for the JDBC connection on the client. The default is 484 // false. 485 // 486 // * CUSTOM_JDBC_CERT - An Amazon S3 location specifying the customer's 487 // root certificate. AWS Glue uses this root certificate to validate the customer’s 488 // certificate when connecting to the customer database. AWS Glue only handles 489 // X.509 certificates. The certificate provided must be DER-encoded and supplied in 490 // Base64 encoding PEM format. 491 // 492 // * SKIP_CUSTOM_JDBC_CERT_VALIDATION - By default, 493 // this is false. AWS Glue validates the Signature algorithm and Subject Public Key 494 // Algorithm for the customer certificate. The only permitted algorithms for the 495 // Signature algorithm are SHA256withRSA, SHA384withRSA or SHA512withRSA. For the 496 // Subject Public Key Algorithm, the key length must be at least 2048. You can set 497 // the value of this property to true to skip AWS Glue’s validation of the customer 498 // certificate. 499 // 500 // * CUSTOM_JDBC_CERT_STRING - A custom JDBC certificate string which 501 // is used for domain match or distinguished name match to prevent a 502 // man-in-the-middle attack. In Oracle database, this is used as the 503 // SSL_SERVER_CERT_DN; in Microsoft SQL Server, this is used as the 504 // hostNameInCertificate. 505 // 506 // * CONNECTION_URL - The URL for connecting to a general 507 // (non-JDBC) data source. 508 // 509 // * KAFKA_BOOTSTRAP_SERVERS - A comma-separated list of 510 // host and port pairs that are the addresses of the Apache Kafka brokers in a 511 // Kafka cluster to which a Kafka client will connect to and bootstrap itself. 512 // 513 // * 514 // KAFKA_SSL_ENABLED - Whether to enable or disable SSL on an Apache Kafka 515 // connection. Default value is "true". 516 // 517 // * KAFKA_CUSTOM_CERT - The Amazon S3 URL 518 // for the private CA cert file (.pem format). The default is an empty string. 519 // 520 // * 521 // KAFKA_SKIP_CUSTOM_CERT_VALIDATION - Whether to skip the validation of the CA 522 // cert file or not. AWS Glue validates for three algorithms: SHA256withRSA, 523 // SHA384withRSA and SHA512withRSA. Default value is "false". 524 // 525 // * SECRET_ID - The 526 // secret ID used for the secret manager of credentials. 527 // 528 // * CONNECTOR_URL - The 529 // connector URL for a MARKETPLACE or CUSTOM connection. 530 // 531 // * CONNECTOR_TYPE - The 532 // connector type for a MARKETPLACE or CUSTOM connection. 533 // 534 // * CONNECTOR_CLASS_NAME - 535 // The connector class name for a MARKETPLACE or CUSTOM connection. 536 // 537 // * 538 // KAFKA_CLIENT_KEYSTORE - The Amazon S3 location of the client keystore file for 539 // Kafka client side authentication (Optional). 540 // 541 // * KAFKA_CLIENT_KEYSTORE_PASSWORD - 542 // The password to access the provided keystore (Optional). 543 // 544 // * 545 // KAFKA_CLIENT_KEY_PASSWORD - A keystore can consist of multiple keys, so this is 546 // the password to access the client key to be used with the Kafka server side key 547 // (Optional). 548 // 549 // * ENCRYPTED_KAFKA_CLIENT_KEYSTORE_PASSWORD - The encrypted version 550 // of the Kafka client keystore password (if the user has the AWS Glue encrypt 551 // passwords setting selected). 552 // 553 // * ENCRYPTED_KAFKA_CLIENT_KEY_PASSWORD - The 554 // encrypted version of the Kafka client key password (if the user has the AWS Glue 555 // encrypt passwords setting selected). 556 ConnectionProperties map[string]string 557 558 // The type of the connection. Currently, SFTP is not supported. 559 ConnectionType ConnectionType 560 561 // The time that this connection definition was created. 562 CreationTime *time.Time 563 564 // The description of the connection. 565 Description *string 566 567 // The user, group, or role that last updated this connection definition. 568 LastUpdatedBy *string 569 570 // The last time that this connection definition was updated. 571 LastUpdatedTime *time.Time 572 573 // A list of criteria that can be used in selecting this connection. 574 MatchCriteria []string 575 576 // The name of the connection definition. 577 Name *string 578 579 // A map of physical connection requirements, such as virtual private cloud (VPC) 580 // and SecurityGroup, that are needed to make this connection successfully. 581 PhysicalConnectionRequirements *PhysicalConnectionRequirements 582} 583 584// A structure that is used to specify a connection to create or update. 585type ConnectionInput struct { 586 587 // These key-value pairs define parameters for the connection. 588 // 589 // This member is required. 590 ConnectionProperties map[string]string 591 592 // The type of the connection. Currently, these types are supported: 593 // 594 // * JDBC - 595 // Designates a connection to a database through Java Database Connectivity 596 // (JDBC). 597 // 598 // * KAFKA - Designates a connection to an Apache Kafka streaming 599 // platform. 600 // 601 // * MONGODB - Designates a connection to a MongoDB document 602 // database. 603 // 604 // * NETWORK - Designates a network connection to a data source within 605 // an Amazon Virtual Private Cloud environment (Amazon VPC). 606 // 607 // * MARKETPLACE - Uses 608 // configuration settings contained in a connector purchased from AWS Marketplace 609 // to read from and write to data stores that are not natively supported by AWS 610 // Glue. 611 // 612 // * CUSTOM - Uses configuration settings contained in a custom connector to 613 // read from and write to data stores that are not natively supported by AWS 614 // Glue. 615 // 616 // SFTP is not supported. 617 // 618 // This member is required. 619 ConnectionType ConnectionType 620 621 // The name of the connection. 622 // 623 // This member is required. 624 Name *string 625 626 // The description of the connection. 627 Description *string 628 629 // A list of criteria that can be used in selecting this connection. 630 MatchCriteria []string 631 632 // A map of physical connection requirements, such as virtual private cloud (VPC) 633 // and SecurityGroup, that are needed to successfully make this connection. 634 PhysicalConnectionRequirements *PhysicalConnectionRequirements 635} 636 637// The data structure used by the Data Catalog to encrypt the password as part of 638// CreateConnection or UpdateConnection and store it in the ENCRYPTED_PASSWORD 639// field in the connection properties. You can enable catalog encryption or only 640// password encryption. When a CreationConnection request arrives containing a 641// password, the Data Catalog first encrypts the password using your AWS KMS key. 642// It then encrypts the whole connection object again if catalog encryption is also 643// enabled. This encryption requires that you set AWS KMS key permissions to enable 644// or restrict access on the password key according to your security requirements. 645// For example, you might want only administrators to have decrypt permission on 646// the password key. 647type ConnectionPasswordEncryption struct { 648 649 // When the ReturnConnectionPasswordEncrypted flag is set to "true", passwords 650 // remain encrypted in the responses of GetConnection and GetConnections. This 651 // encryption takes effect independently from catalog encryption. 652 // 653 // This member is required. 654 ReturnConnectionPasswordEncrypted bool 655 656 // An AWS KMS key that is used to encrypt the connection password. If connection 657 // password protection is enabled, the caller of CreateConnection and 658 // UpdateConnection needs at least kms:Encrypt permission on the specified AWS KMS 659 // key, to encrypt passwords before storing them in the Data Catalog. You can set 660 // the decrypt permission to enable or restrict access on the password key 661 // according to your security requirements. 662 AwsKmsKeyId *string 663} 664 665// Specifies the connections used by a job. 666type ConnectionsList struct { 667 668 // A list of connections used by the job. 669 Connections []string 670} 671 672// The details of a crawl in the workflow. 673type Crawl struct { 674 675 // The date and time on which the crawl completed. 676 CompletedOn *time.Time 677 678 // The error message associated with the crawl. 679 ErrorMessage *string 680 681 // The log group associated with the crawl. 682 LogGroup *string 683 684 // The log stream associated with the crawl. 685 LogStream *string 686 687 // The date and time on which the crawl started. 688 StartedOn *time.Time 689 690 // The state of the crawler. 691 State CrawlState 692} 693 694// Specifies a crawler program that examines a data source and uses classifiers to 695// try to determine its schema. If successful, the crawler records metadata 696// concerning the data source in the AWS Glue Data Catalog. 697type Crawler struct { 698 699 // A list of UTF-8 strings that specify the custom classifiers that are associated 700 // with the crawler. 701 Classifiers []string 702 703 // Crawler configuration information. This versioned JSON string allows users to 704 // specify aspects of a crawler's behavior. For more information, see Configuring a 705 // Crawler (https://docs.aws.amazon.com/glue/latest/dg/crawler-configuration.html). 706 Configuration *string 707 708 // If the crawler is running, contains the total time elapsed since the last crawl 709 // began. 710 CrawlElapsedTime int64 711 712 // The name of the SecurityConfiguration structure to be used by this crawler. 713 CrawlerSecurityConfiguration *string 714 715 // The time that the crawler was created. 716 CreationTime *time.Time 717 718 // The name of the database in which the crawler's output is stored. 719 DatabaseName *string 720 721 // A description of the crawler. 722 Description *string 723 724 // The status of the last crawl, and potentially error information if an error 725 // occurred. 726 LastCrawl *LastCrawlInfo 727 728 // The time that the crawler was last updated. 729 LastUpdated *time.Time 730 731 // A configuration that specifies whether data lineage is enabled for the crawler. 732 LineageConfiguration *LineageConfiguration 733 734 // The name of the crawler. 735 Name *string 736 737 // A policy that specifies whether to crawl the entire dataset again, or to crawl 738 // only folders that were added since the last crawler run. 739 RecrawlPolicy *RecrawlPolicy 740 741 // The Amazon Resource Name (ARN) of an IAM role that's used to access customer 742 // resources, such as Amazon Simple Storage Service (Amazon S3) data. 743 Role *string 744 745 // For scheduled crawlers, the schedule when the crawler runs. 746 Schedule *Schedule 747 748 // The policy that specifies update and delete behaviors for the crawler. 749 SchemaChangePolicy *SchemaChangePolicy 750 751 // Indicates whether the crawler is running, or whether a run is pending. 752 State CrawlerState 753 754 // The prefix added to the names of tables that are created. 755 TablePrefix *string 756 757 // A collection of targets to crawl. 758 Targets *CrawlerTargets 759 760 // The version of the crawler. 761 Version int64 762} 763 764// Metrics for a specified crawler. 765type CrawlerMetrics struct { 766 767 // The name of the crawler. 768 CrawlerName *string 769 770 // The duration of the crawler's most recent run, in seconds. 771 LastRuntimeSeconds float64 772 773 // The median duration of this crawler's runs, in seconds. 774 MedianRuntimeSeconds float64 775 776 // True if the crawler is still estimating how long it will take to complete this 777 // run. 778 StillEstimating bool 779 780 // The number of tables created by this crawler. 781 TablesCreated int32 782 783 // The number of tables deleted by this crawler. 784 TablesDeleted int32 785 786 // The number of tables updated by this crawler. 787 TablesUpdated int32 788 789 // The estimated time left to complete a running crawl. 790 TimeLeftSeconds float64 791} 792 793// The details of a Crawler node present in the workflow. 794type CrawlerNodeDetails struct { 795 796 // A list of crawls represented by the crawl node. 797 Crawls []Crawl 798} 799 800// Specifies data stores to crawl. 801type CrawlerTargets struct { 802 803 // Specifies AWS Glue Data Catalog targets. 804 CatalogTargets []CatalogTarget 805 806 // Specifies Amazon DynamoDB targets. 807 DynamoDBTargets []DynamoDBTarget 808 809 // Specifies JDBC targets. 810 JdbcTargets []JdbcTarget 811 812 // Specifies Amazon DocumentDB or MongoDB targets. 813 MongoDBTargets []MongoDBTarget 814 815 // Specifies Amazon Simple Storage Service (Amazon S3) targets. 816 S3Targets []S3Target 817} 818 819// Specifies a custom CSV classifier for CreateClassifier to create. 820type CreateCsvClassifierRequest struct { 821 822 // The name of the classifier. 823 // 824 // This member is required. 825 Name *string 826 827 // Enables the processing of files that contain only one column. 828 AllowSingleColumn *bool 829 830 // Indicates whether the CSV file contains a header. 831 ContainsHeader CsvHeaderOption 832 833 // A custom symbol to denote what separates each column entry in the row. 834 Delimiter *string 835 836 // Specifies not to trim values before identifying the type of column values. The 837 // default value is true. 838 DisableValueTrimming *bool 839 840 // A list of strings representing column names. 841 Header []string 842 843 // A custom symbol to denote what combines content into a single column value. Must 844 // be different from the column delimiter. 845 QuoteSymbol *string 846} 847 848// Specifies a grok classifier for CreateClassifier to create. 849type CreateGrokClassifierRequest struct { 850 851 // An identifier of the data format that the classifier matches, such as Twitter, 852 // JSON, Omniture logs, Amazon CloudWatch Logs, and so on. 853 // 854 // This member is required. 855 Classification *string 856 857 // The grok pattern used by this classifier. 858 // 859 // This member is required. 860 GrokPattern *string 861 862 // The name of the new classifier. 863 // 864 // This member is required. 865 Name *string 866 867 // Optional custom grok patterns used by this classifier. 868 CustomPatterns *string 869} 870 871// Specifies a JSON classifier for CreateClassifier to create. 872type CreateJsonClassifierRequest struct { 873 874 // A JsonPath string defining the JSON data for the classifier to classify. AWS 875 // Glue supports a subset of JsonPath, as described in Writing JsonPath Custom 876 // Classifiers 877 // (https://docs.aws.amazon.com/glue/latest/dg/custom-classifier.html#custom-classifier-json). 878 // 879 // This member is required. 880 JsonPath *string 881 882 // The name of the classifier. 883 // 884 // This member is required. 885 Name *string 886} 887 888// Specifies an XML classifier for CreateClassifier to create. 889type CreateXMLClassifierRequest struct { 890 891 // An identifier of the data format that the classifier matches. 892 // 893 // This member is required. 894 Classification *string 895 896 // The name of the classifier. 897 // 898 // This member is required. 899 Name *string 900 901 // The XML tag designating the element that contains each record in an XML document 902 // being parsed. This can't identify a self-closing element (closed by />). An 903 // empty row element that contains only attributes can be parsed as long as it ends 904 // with a closing tag (for example, is okay, but is not). 905 RowTag *string 906} 907 908// A classifier for custom CSV content. 909type CsvClassifier struct { 910 911 // The name of the classifier. 912 // 913 // This member is required. 914 Name *string 915 916 // Enables the processing of files that contain only one column. 917 AllowSingleColumn *bool 918 919 // Indicates whether the CSV file contains a header. 920 ContainsHeader CsvHeaderOption 921 922 // The time that this classifier was registered. 923 CreationTime *time.Time 924 925 // A custom symbol to denote what separates each column entry in the row. 926 Delimiter *string 927 928 // Specifies not to trim values before identifying the type of column values. The 929 // default value is true. 930 DisableValueTrimming *bool 931 932 // A list of strings representing column names. 933 Header []string 934 935 // The time that this classifier was last updated. 936 LastUpdated *time.Time 937 938 // A custom symbol to denote what combines content into a single column value. It 939 // must be different from the column delimiter. 940 QuoteSymbol *string 941 942 // The version of this classifier. 943 Version int64 944} 945 946// The Database object represents a logical grouping of tables that might reside in 947// a Hive metastore or an RDBMS. 948type Database struct { 949 950 // The name of the database. For Hive compatibility, this is folded to lowercase 951 // when it is stored. 952 // 953 // This member is required. 954 Name *string 955 956 // The ID of the Data Catalog in which the database resides. 957 CatalogId *string 958 959 // Creates a set of default permissions on the table for principals. 960 CreateTableDefaultPermissions []PrincipalPermissions 961 962 // The time at which the metadata database was created in the catalog. 963 CreateTime *time.Time 964 965 // A description of the database. 966 Description *string 967 968 // The location of the database (for example, an HDFS path). 969 LocationUri *string 970 971 // These key-value pairs define parameters and properties of the database. 972 Parameters map[string]string 973 974 // A DatabaseIdentifier structure that describes a target database for resource 975 // linking. 976 TargetDatabase *DatabaseIdentifier 977} 978 979// A structure that describes a target database for resource linking. 980type DatabaseIdentifier struct { 981 982 // The ID of the Data Catalog in which the database resides. 983 CatalogId *string 984 985 // The name of the catalog database. 986 DatabaseName *string 987} 988 989// The structure used to create or update a database. 990type DatabaseInput struct { 991 992 // The name of the database. For Hive compatibility, this is folded to lowercase 993 // when it is stored. 994 // 995 // This member is required. 996 Name *string 997 998 // Creates a set of default permissions on the table for principals. 999 CreateTableDefaultPermissions []PrincipalPermissions 1000 1001 // A description of the database. 1002 Description *string 1003 1004 // The location of the database (for example, an HDFS path). 1005 LocationUri *string 1006 1007 // These key-value pairs define parameters and properties of the database. These 1008 // key-value pairs define parameters and properties of the database. 1009 Parameters map[string]string 1010 1011 // A DatabaseIdentifier structure that describes a target database for resource 1012 // linking. 1013 TargetDatabase *DatabaseIdentifier 1014} 1015 1016// Contains configuration information for maintaining Data Catalog security. 1017type DataCatalogEncryptionSettings struct { 1018 1019 // When connection password protection is enabled, the Data Catalog uses a 1020 // customer-provided key to encrypt the password as part of CreateConnection or 1021 // UpdateConnection and store it in the ENCRYPTED_PASSWORD field in the connection 1022 // properties. You can enable catalog encryption or only password encryption. 1023 ConnectionPasswordEncryption *ConnectionPasswordEncryption 1024 1025 // Specifies the encryption-at-rest configuration for the Data Catalog. 1026 EncryptionAtRest *EncryptionAtRest 1027} 1028 1029// The AWS Lake Formation principal. 1030type DataLakePrincipal struct { 1031 1032 // An identifier for the AWS Lake Formation principal. 1033 DataLakePrincipalIdentifier *string 1034} 1035 1036// Defines column statistics supported for timestamp data columns. 1037type DateColumnStatisticsData struct { 1038 1039 // The number of distinct values in a column. 1040 // 1041 // This member is required. 1042 NumberOfDistinctValues int64 1043 1044 // The number of null values in the column. 1045 // 1046 // This member is required. 1047 NumberOfNulls int64 1048 1049 // The highest value in the column. 1050 MaximumValue *time.Time 1051 1052 // The lowest value in the column. 1053 MinimumValue *time.Time 1054} 1055 1056// Defines column statistics supported for fixed-point number data columns. 1057type DecimalColumnStatisticsData struct { 1058 1059 // The number of distinct values in a column. 1060 // 1061 // This member is required. 1062 NumberOfDistinctValues int64 1063 1064 // The number of null values in the column. 1065 // 1066 // This member is required. 1067 NumberOfNulls int64 1068 1069 // The highest value in the column. 1070 MaximumValue *DecimalNumber 1071 1072 // The lowest value in the column. 1073 MinimumValue *DecimalNumber 1074} 1075 1076// Contains a numeric value in decimal format. 1077type DecimalNumber struct { 1078 1079 // The scale that determines where the decimal point falls in the unscaled value. 1080 // 1081 // This member is required. 1082 Scale int32 1083 1084 // The unscaled numeric value. 1085 // 1086 // This member is required. 1087 UnscaledValue []byte 1088} 1089 1090// A development endpoint where a developer can remotely debug extract, transform, 1091// and load (ETL) scripts. 1092type DevEndpoint struct { 1093 1094 // A map of arguments used to configure the DevEndpoint. Valid arguments are: 1095 // 1096 // * 1097 // "--enable-glue-datacatalog": "" 1098 // 1099 // * "GLUE_PYTHON_VERSION": "3" 1100 // 1101 // * 1102 // "GLUE_PYTHON_VERSION": "2" 1103 // 1104 // You can specify a version of Python support for 1105 // development endpoints by using the Arguments parameter in the CreateDevEndpoint 1106 // or UpdateDevEndpoint APIs. If no arguments are provided, the version defaults to 1107 // Python 2. 1108 Arguments map[string]string 1109 1110 // The AWS Availability Zone where this DevEndpoint is located. 1111 AvailabilityZone *string 1112 1113 // The point in time at which this DevEndpoint was created. 1114 CreatedTimestamp *time.Time 1115 1116 // The name of the DevEndpoint. 1117 EndpointName *string 1118 1119 // The path to one or more Java .jar files in an S3 bucket that should be loaded in 1120 // your DevEndpoint. You can only use pure Java/Scala libraries with a DevEndpoint. 1121 ExtraJarsS3Path *string 1122 1123 // The paths to one or more Python libraries in an Amazon S3 bucket that should be 1124 // loaded in your DevEndpoint. Multiple values must be complete paths separated by 1125 // a comma. You can only use pure Python libraries with a DevEndpoint. Libraries 1126 // that rely on C extensions, such as the pandas (http://pandas.pydata.org/) Python 1127 // data analysis library, are not currently supported. 1128 ExtraPythonLibsS3Path *string 1129 1130 // The reason for a current failure in this DevEndpoint. 1131 FailureReason *string 1132 1133 // Glue version determines the versions of Apache Spark and Python that AWS Glue 1134 // supports. The Python version indicates the version supported for running your 1135 // ETL scripts on development endpoints. For more information about the available 1136 // AWS Glue versions and corresponding Spark and Python versions, see Glue version 1137 // (https://docs.aws.amazon.com/glue/latest/dg/add-job.html) in the developer 1138 // guide. Development endpoints that are created without specifying a Glue version 1139 // default to Glue 0.9. You can specify a version of Python support for development 1140 // endpoints by using the Arguments parameter in the CreateDevEndpoint or 1141 // UpdateDevEndpoint APIs. If no arguments are provided, the version defaults to 1142 // Python 2. 1143 GlueVersion *string 1144 1145 // The point in time at which this DevEndpoint was last modified. 1146 LastModifiedTimestamp *time.Time 1147 1148 // The status of the last update. 1149 LastUpdateStatus *string 1150 1151 // The number of AWS Glue Data Processing Units (DPUs) allocated to this 1152 // DevEndpoint. 1153 NumberOfNodes int32 1154 1155 // The number of workers of a defined workerType that are allocated to the 1156 // development endpoint. The maximum number of workers you can define are 299 for 1157 // G.1X, and 149 for G.2X. 1158 NumberOfWorkers *int32 1159 1160 // A private IP address to access the DevEndpoint within a VPC if the DevEndpoint 1161 // is created within one. The PrivateAddress field is present only when you create 1162 // the DevEndpoint within your VPC. 1163 PrivateAddress *string 1164 1165 // The public IP address used by this DevEndpoint. The PublicAddress field is 1166 // present only when you create a non-virtual private cloud (VPC) DevEndpoint. 1167 PublicAddress *string 1168 1169 // The public key to be used by this DevEndpoint for authentication. This attribute 1170 // is provided for backward compatibility because the recommended attribute to use 1171 // is public keys. 1172 PublicKey *string 1173 1174 // A list of public keys to be used by the DevEndpoints for authentication. Using 1175 // this attribute is preferred over a single public key because the public keys 1176 // allow you to have a different private key per client. If you previously created 1177 // an endpoint with a public key, you must remove that key to be able to set a list 1178 // of public keys. Call the UpdateDevEndpoint API operation with the public key 1179 // content in the deletePublicKeys attribute, and the list of new keys in the 1180 // addPublicKeys attribute. 1181 PublicKeys []string 1182 1183 // The Amazon Resource Name (ARN) of the IAM role used in this DevEndpoint. 1184 RoleArn *string 1185 1186 // The name of the SecurityConfiguration structure to be used with this 1187 // DevEndpoint. 1188 SecurityConfiguration *string 1189 1190 // A list of security group identifiers used in this DevEndpoint. 1191 SecurityGroupIds []string 1192 1193 // The current status of this DevEndpoint. 1194 Status *string 1195 1196 // The subnet ID for this DevEndpoint. 1197 SubnetId *string 1198 1199 // The ID of the virtual private cloud (VPC) used by this DevEndpoint. 1200 VpcId *string 1201 1202 // The type of predefined worker that is allocated to the development endpoint. 1203 // Accepts a value of Standard, G.1X, or G.2X. 1204 // 1205 // * For the Standard worker type, 1206 // each worker provides 4 vCPU, 16 GB of memory and a 50GB disk, and 2 executors 1207 // per worker. 1208 // 1209 // * For the G.1X worker type, each worker maps to 1 DPU (4 vCPU, 16 1210 // GB of memory, 64 GB disk), and provides 1 executor per worker. We recommend this 1211 // worker type for memory-intensive jobs. 1212 // 1213 // * For the G.2X worker type, each worker 1214 // maps to 2 DPU (8 vCPU, 32 GB of memory, 128 GB disk), and provides 1 executor 1215 // per worker. We recommend this worker type for memory-intensive jobs. 1216 // 1217 // Known 1218 // issue: when a development endpoint is created with the G.2XWorkerType 1219 // configuration, the Spark drivers for the development endpoint will run on 4 1220 // vCPU, 16 GB of memory, and a 64 GB disk. 1221 WorkerType WorkerType 1222 1223 // The YARN endpoint address used by this DevEndpoint. 1224 YarnEndpointAddress *string 1225 1226 // The Apache Zeppelin port for the remote Apache Spark interpreter. 1227 ZeppelinRemoteSparkInterpreterPort int32 1228} 1229 1230// Custom libraries to be loaded into a development endpoint. 1231type DevEndpointCustomLibraries struct { 1232 1233 // The path to one or more Java .jar files in an S3 bucket that should be loaded in 1234 // your DevEndpoint. You can only use pure Java/Scala libraries with a DevEndpoint. 1235 ExtraJarsS3Path *string 1236 1237 // The paths to one or more Python libraries in an Amazon Simple Storage Service 1238 // (Amazon S3) bucket that should be loaded in your DevEndpoint. Multiple values 1239 // must be complete paths separated by a comma. You can only use pure Python 1240 // libraries with a DevEndpoint. Libraries that rely on C extensions, such as the 1241 // pandas (http://pandas.pydata.org/) Python data analysis library, are not 1242 // currently supported. 1243 ExtraPythonLibsS3Path *string 1244} 1245 1246// Defines column statistics supported for floating-point number data columns. 1247type DoubleColumnStatisticsData struct { 1248 1249 // The number of distinct values in a column. 1250 // 1251 // This member is required. 1252 NumberOfDistinctValues int64 1253 1254 // The number of null values in the column. 1255 // 1256 // This member is required. 1257 NumberOfNulls int64 1258 1259 // The highest value in the column. 1260 MaximumValue float64 1261 1262 // The lowest value in the column. 1263 MinimumValue float64 1264} 1265 1266// Specifies an Amazon DynamoDB table to crawl. 1267type DynamoDBTarget struct { 1268 1269 // The name of the DynamoDB table to crawl. 1270 Path *string 1271 1272 // Indicates whether to scan all the records, or to sample rows from the table. 1273 // Scanning all the records can take a long time when the table is not a high 1274 // throughput table. A value of true means to scan all records, while a value of 1275 // false means to sample the records. If no value is specified, the value defaults 1276 // to true. 1277 ScanAll *bool 1278 1279 // The percentage of the configured read capacity units to use by the AWS Glue 1280 // crawler. Read capacity units is a term defined by DynamoDB, and is a numeric 1281 // value that acts as rate limiter for the number of reads that can be performed on 1282 // that table per second. The valid values are null or a value between 0.1 to 1.5. 1283 // A null value is used when user does not provide a value, and defaults to 0.5 of 1284 // the configured Read Capacity Unit (for provisioned tables), or 0.25 of the max 1285 // configured Read Capacity Unit (for tables using on-demand mode). 1286 ScanRate *float64 1287} 1288 1289// An edge represents a directed connection between two AWS Glue components that 1290// are part of the workflow the edge belongs to. 1291type Edge struct { 1292 1293 // The unique of the node within the workflow where the edge ends. 1294 DestinationId *string 1295 1296 // The unique of the node within the workflow where the edge starts. 1297 SourceId *string 1298} 1299 1300// Specifies the encryption-at-rest configuration for the Data Catalog. 1301type EncryptionAtRest struct { 1302 1303 // The encryption-at-rest mode for encrypting Data Catalog data. 1304 // 1305 // This member is required. 1306 CatalogEncryptionMode CatalogEncryptionMode 1307 1308 // The ID of the AWS KMS key to use for encryption at rest. 1309 SseAwsKmsKeyId *string 1310} 1311 1312// Specifies an encryption configuration. 1313type EncryptionConfiguration struct { 1314 1315 // The encryption configuration for Amazon CloudWatch. 1316 CloudWatchEncryption *CloudWatchEncryption 1317 1318 // The encryption configuration for job bookmarks. 1319 JobBookmarksEncryption *JobBookmarksEncryption 1320 1321 // The encryption configuration for Amazon Simple Storage Service (Amazon S3) data. 1322 S3Encryption []S3Encryption 1323} 1324 1325// Contains details about an error. 1326type ErrorDetail struct { 1327 1328 // The code associated with this error. 1329 ErrorCode *string 1330 1331 // A message describing the error. 1332 ErrorMessage *string 1333} 1334 1335// An object containing error details. 1336type ErrorDetails struct { 1337 1338 // The error code for an error. 1339 ErrorCode *string 1340 1341 // The error message for an error. 1342 ErrorMessage *string 1343} 1344 1345// Evaluation metrics provide an estimate of the quality of your machine learning 1346// transform. 1347type EvaluationMetrics struct { 1348 1349 // The type of machine learning transform. 1350 // 1351 // This member is required. 1352 TransformType TransformType 1353 1354 // The evaluation metrics for the find matches algorithm. 1355 FindMatchesMetrics *FindMatchesMetrics 1356} 1357 1358// An execution property of a job. 1359type ExecutionProperty struct { 1360 1361 // The maximum number of concurrent runs allowed for the job. The default is 1. An 1362 // error is returned when this threshold is reached. The maximum value you can 1363 // specify is controlled by a service limit. 1364 MaxConcurrentRuns int32 1365} 1366 1367// Specifies configuration properties for an exporting labels task run. 1368type ExportLabelsTaskRunProperties struct { 1369 1370 // The Amazon Simple Storage Service (Amazon S3) path where you will export the 1371 // labels. 1372 OutputS3Path *string 1373} 1374 1375// The evaluation metrics for the find matches algorithm. The quality of your 1376// machine learning transform is measured by getting your transform to predict some 1377// matches and comparing the results to known matches from the same dataset. The 1378// quality metrics are based on a subset of your data, so they are not precise. 1379type FindMatchesMetrics struct { 1380 1381 // The area under the precision/recall curve (AUPRC) is a single number measuring 1382 // the overall quality of the transform, that is independent of the choice made for 1383 // precision vs. recall. Higher values indicate that you have a more attractive 1384 // precision vs. recall tradeoff. For more information, see Precision and recall 1385 // (https://en.wikipedia.org/wiki/Precision_and_recall) in Wikipedia. 1386 AreaUnderPRCurve *float64 1387 1388 // A list of ColumnImportance structures containing column importance metrics, 1389 // sorted in order of descending importance. 1390 ColumnImportances []ColumnImportance 1391 1392 // The confusion matrix shows you what your transform is predicting accurately and 1393 // what types of errors it is making. For more information, see Confusion matrix 1394 // (https://en.wikipedia.org/wiki/Confusion_matrix) in Wikipedia. 1395 ConfusionMatrix *ConfusionMatrix 1396 1397 // The maximum F1 metric indicates the transform's accuracy between 0 and 1, where 1398 // 1 is the best accuracy. For more information, see F1 score 1399 // (https://en.wikipedia.org/wiki/F1_score) in Wikipedia. 1400 F1 *float64 1401 1402 // The precision metric indicates when often your transform is correct when it 1403 // predicts a match. Specifically, it measures how well the transform finds true 1404 // positives from the total true positives possible. For more information, see 1405 // Precision and recall (https://en.wikipedia.org/wiki/Precision_and_recall) in 1406 // Wikipedia. 1407 Precision *float64 1408 1409 // The recall metric indicates that for an actual match, how often your transform 1410 // predicts the match. Specifically, it measures how well the transform finds true 1411 // positives from the total records in the source data. For more information, see 1412 // Precision and recall (https://en.wikipedia.org/wiki/Precision_and_recall) in 1413 // Wikipedia. 1414 Recall *float64 1415} 1416 1417// The parameters to configure the find matches transform. 1418type FindMatchesParameters struct { 1419 1420 // The value that is selected when tuning your transform for a balance between 1421 // accuracy and cost. A value of 0.5 means that the system balances accuracy and 1422 // cost concerns. A value of 1.0 means a bias purely for accuracy, which typically 1423 // results in a higher cost, sometimes substantially higher. A value of 0.0 means a 1424 // bias purely for cost, which results in a less accurate FindMatches transform, 1425 // sometimes with unacceptable accuracy. Accuracy measures how well the transform 1426 // finds true positives and true negatives. Increasing accuracy requires more 1427 // machine resources and cost. But it also results in increased recall. Cost 1428 // measures how many compute resources, and thus money, are consumed to run the 1429 // transform. 1430 AccuracyCostTradeoff *float64 1431 1432 // The value to switch on or off to force the output to match the provided labels 1433 // from users. If the value is True, the find matches transform forces the output 1434 // to match the provided labels. The results override the normal conflation 1435 // results. If the value is False, the find matches transform does not ensure all 1436 // the labels provided are respected, and the results rely on the trained model. 1437 // Note that setting this value to true may increase the conflation execution time. 1438 EnforceProvidedLabels *bool 1439 1440 // The value selected when tuning your transform for a balance between precision 1441 // and recall. A value of 0.5 means no preference; a value of 1.0 means a bias 1442 // purely for precision, and a value of 0.0 means a bias for recall. Because this 1443 // is a tradeoff, choosing values close to 1.0 means very low recall, and choosing 1444 // values close to 0.0 results in very low precision. The precision metric 1445 // indicates how often your model is correct when it predicts a match. The recall 1446 // metric indicates that for an actual match, how often your model predicts the 1447 // match. 1448 PrecisionRecallTradeoff *float64 1449 1450 // The name of a column that uniquely identifies rows in the source table. Used to 1451 // help identify matching records. 1452 PrimaryKeyColumnName *string 1453} 1454 1455// Specifies configuration properties for a Find Matches task run. 1456type FindMatchesTaskRunProperties struct { 1457 1458 // The job ID for the Find Matches task run. 1459 JobId *string 1460 1461 // The name assigned to the job for the Find Matches task run. 1462 JobName *string 1463 1464 // The job run ID for the Find Matches task run. 1465 JobRunId *string 1466} 1467 1468// Filters the connection definitions that are returned by the GetConnections API 1469// operation. 1470type GetConnectionsFilter struct { 1471 1472 // The type of connections to return. Currently, SFTP is not supported. 1473 ConnectionType ConnectionType 1474 1475 // A criteria string that must match the criteria recorded in the connection 1476 // definition for that connection definition to be returned. 1477 MatchCriteria []string 1478} 1479 1480// A structure for returning a resource policy. 1481type GluePolicy struct { 1482 1483 // The date and time at which the policy was created. 1484 CreateTime *time.Time 1485 1486 // Contains the hash value associated with this policy. 1487 PolicyHash *string 1488 1489 // Contains the requested policy document, in JSON format. 1490 PolicyInJson *string 1491 1492 // The date and time at which the policy was last updated. 1493 UpdateTime *time.Time 1494} 1495 1496// The database and table in the AWS Glue Data Catalog that is used for input or 1497// output data. 1498type GlueTable struct { 1499 1500 // A database name in the AWS Glue Data Catalog. 1501 // 1502 // This member is required. 1503 DatabaseName *string 1504 1505 // A table name in the AWS Glue Data Catalog. 1506 // 1507 // This member is required. 1508 TableName *string 1509 1510 // A unique identifier for the AWS Glue Data Catalog. 1511 CatalogId *string 1512 1513 // The name of the connection to the AWS Glue Data Catalog. 1514 ConnectionName *string 1515} 1516 1517// A classifier that uses grok patterns. 1518type GrokClassifier struct { 1519 1520 // An identifier of the data format that the classifier matches, such as Twitter, 1521 // JSON, Omniture logs, and so on. 1522 // 1523 // This member is required. 1524 Classification *string 1525 1526 // The grok pattern applied to a data store by this classifier. For more 1527 // information, see built-in patterns in Writing Custom Classifiers 1528 // (https://docs.aws.amazon.com/glue/latest/dg/custom-classifier.html). 1529 // 1530 // This member is required. 1531 GrokPattern *string 1532 1533 // The name of the classifier. 1534 // 1535 // This member is required. 1536 Name *string 1537 1538 // The time that this classifier was registered. 1539 CreationTime *time.Time 1540 1541 // Optional custom grok patterns defined by this classifier. For more information, 1542 // see custom patterns in Writing Custom Classifiers 1543 // (https://docs.aws.amazon.com/glue/latest/dg/custom-classifier.html). 1544 CustomPatterns *string 1545 1546 // The time that this classifier was last updated. 1547 LastUpdated *time.Time 1548 1549 // The version of this classifier. 1550 Version int64 1551} 1552 1553// Specifies configuration properties for an importing labels task run. 1554type ImportLabelsTaskRunProperties struct { 1555 1556 // The Amazon Simple Storage Service (Amazon S3) path from where you will import 1557 // the labels. 1558 InputS3Path *string 1559 1560 // Indicates whether to overwrite your existing labels. 1561 Replace bool 1562} 1563 1564// Specifies a JDBC data store to crawl. 1565type JdbcTarget struct { 1566 1567 // The name of the connection to use to connect to the JDBC target. 1568 ConnectionName *string 1569 1570 // A list of glob patterns used to exclude from the crawl. For more information, 1571 // see Catalog Tables with a Crawler 1572 // (https://docs.aws.amazon.com/glue/latest/dg/add-crawler.html). 1573 Exclusions []string 1574 1575 // The path of the JDBC target. 1576 Path *string 1577} 1578 1579// Specifies a job definition. 1580type Job struct { 1581 1582 // This field is deprecated. Use MaxCapacity instead. The number of AWS Glue data 1583 // processing units (DPUs) allocated to runs of this job. You can allocate from 2 1584 // to 100 DPUs; the default is 10. A DPU is a relative measure of processing power 1585 // that consists of 4 vCPUs of compute capacity and 16 GB of memory. For more 1586 // information, see the AWS Glue pricing page 1587 // (https://aws.amazon.com/glue/pricing/). 1588 // 1589 // Deprecated: This property is deprecated, use MaxCapacity instead. 1590 AllocatedCapacity int32 1591 1592 // The JobCommand that executes this job. 1593 Command *JobCommand 1594 1595 // The connections used for this job. 1596 Connections *ConnectionsList 1597 1598 // The time and date that this job definition was created. 1599 CreatedOn *time.Time 1600 1601 // The default arguments for this job, specified as name-value pairs. You can 1602 // specify arguments here that your own job-execution script consumes, as well as 1603 // arguments that AWS Glue itself consumes. For information about how to specify 1604 // and consume your own Job arguments, see the Calling AWS Glue APIs in Python 1605 // (https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-python-calling.html) 1606 // topic in the developer guide. For information about the key-value pairs that AWS 1607 // Glue consumes to set up your job, see the Special Parameters Used by AWS Glue 1608 // (https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html) 1609 // topic in the developer guide. 1610 DefaultArguments map[string]string 1611 1612 // A description of the job. 1613 Description *string 1614 1615 // An ExecutionProperty specifying the maximum number of concurrent runs allowed 1616 // for this job. 1617 ExecutionProperty *ExecutionProperty 1618 1619 // Glue version determines the versions of Apache Spark and Python that AWS Glue 1620 // supports. The Python version indicates the version supported for jobs of type 1621 // Spark. For more information about the available AWS Glue versions and 1622 // corresponding Spark and Python versions, see Glue version 1623 // (https://docs.aws.amazon.com/glue/latest/dg/add-job.html) in the developer 1624 // guide. Jobs that are created without specifying a Glue version default to Glue 1625 // 0.9. 1626 GlueVersion *string 1627 1628 // The last point in time when this job definition was modified. 1629 LastModifiedOn *time.Time 1630 1631 // This field is reserved for future use. 1632 LogUri *string 1633 1634 // The number of AWS Glue data processing units (DPUs) that can be allocated when 1635 // this job runs. A DPU is a relative measure of processing power that consists of 1636 // 4 vCPUs of compute capacity and 16 GB of memory. For more information, see the 1637 // AWS Glue pricing page (https://aws.amazon.com/glue/pricing/). Do not set Max 1638 // Capacity if using WorkerType and NumberOfWorkers. The value that can be 1639 // allocated for MaxCapacity depends on whether you are running a Python shell job, 1640 // an Apache Spark ETL job, or an Apache Spark streaming ETL job: 1641 // 1642 // * When you 1643 // specify a Python shell job (JobCommand.Name="pythonshell"), you can allocate 1644 // either 0.0625 or 1 DPU. The default is 0.0625 DPU. 1645 // 1646 // * When you specify an Apache 1647 // Spark ETL job (JobCommand.Name="glueetl") or Apache Spark streaming ETL job 1648 // (JobCommand.Name="gluestreaming"), you can allocate from 2 to 100 DPUs. The 1649 // default is 10 DPUs. This job type cannot have a fractional DPU allocation. 1650 MaxCapacity *float64 1651 1652 // The maximum number of times to retry this job after a JobRun fails. 1653 MaxRetries int32 1654 1655 // The name you assign to this job definition. 1656 Name *string 1657 1658 // Non-overridable arguments for this job, specified as name-value pairs. 1659 NonOverridableArguments map[string]string 1660 1661 // Specifies configuration properties of a job notification. 1662 NotificationProperty *NotificationProperty 1663 1664 // The number of workers of a defined workerType that are allocated when a job 1665 // runs. The maximum number of workers you can define are 299 for G.1X, and 149 for 1666 // G.2X. 1667 NumberOfWorkers *int32 1668 1669 // The name or Amazon Resource Name (ARN) of the IAM role associated with this job. 1670 Role *string 1671 1672 // The name of the SecurityConfiguration structure to be used with this job. 1673 SecurityConfiguration *string 1674 1675 // The job timeout in minutes. This is the maximum time that a job run can consume 1676 // resources before it is terminated and enters TIMEOUT status. The default is 1677 // 2,880 minutes (48 hours). 1678 Timeout *int32 1679 1680 // The type of predefined worker that is allocated when a job runs. Accepts a value 1681 // of Standard, G.1X, or G.2X. 1682 // 1683 // * For the Standard worker type, each worker 1684 // provides 4 vCPU, 16 GB of memory and a 50GB disk, and 2 executors per worker. 1685 // 1686 // * 1687 // For the G.1X worker type, each worker maps to 1 DPU (4 vCPU, 16 GB of memory, 64 1688 // GB disk), and provides 1 executor per worker. We recommend this worker type for 1689 // memory-intensive jobs. 1690 // 1691 // * For the G.2X worker type, each worker maps to 2 DPU (8 1692 // vCPU, 32 GB of memory, 128 GB disk), and provides 1 executor per worker. We 1693 // recommend this worker type for memory-intensive jobs. 1694 WorkerType WorkerType 1695} 1696 1697// Defines a point that a job can resume processing. 1698type JobBookmarkEntry struct { 1699 1700 // The attempt ID number. 1701 Attempt int32 1702 1703 // The bookmark itself. 1704 JobBookmark *string 1705 1706 // The name of the job in question. 1707 JobName *string 1708 1709 // The unique run identifier associated with the previous job run. 1710 PreviousRunId *string 1711 1712 // The run ID number. 1713 Run int32 1714 1715 // The run ID number. 1716 RunId *string 1717 1718 // The version of the job. 1719 Version int32 1720} 1721 1722// Specifies how job bookmark data should be encrypted. 1723type JobBookmarksEncryption struct { 1724 1725 // The encryption mode to use for job bookmarks data. 1726 JobBookmarksEncryptionMode JobBookmarksEncryptionMode 1727 1728 // The Amazon Resource Name (ARN) of the KMS key to be used to encrypt the data. 1729 KmsKeyArn *string 1730} 1731 1732// Specifies code executed when a job is run. 1733type JobCommand struct { 1734 1735 // The name of the job command. For an Apache Spark ETL job, this must be glueetl. 1736 // For a Python shell job, it must be pythonshell. For an Apache Spark streaming 1737 // ETL job, this must be gluestreaming. 1738 Name *string 1739 1740 // The Python version being used to execute a Python shell job. Allowed values are 1741 // 2 or 3. 1742 PythonVersion *string 1743 1744 // Specifies the Amazon Simple Storage Service (Amazon S3) path to a script that 1745 // executes a job. 1746 ScriptLocation *string 1747} 1748 1749// The details of a Job node present in the workflow. 1750type JobNodeDetails struct { 1751 1752 // The information for the job runs represented by the job node. 1753 JobRuns []JobRun 1754} 1755 1756// Contains information about a job run. 1757type JobRun struct { 1758 1759 // This field is deprecated. Use MaxCapacity instead. The number of AWS Glue data 1760 // processing units (DPUs) allocated to this JobRun. From 2 to 100 DPUs can be 1761 // allocated; the default is 10. A DPU is a relative measure of processing power 1762 // that consists of 4 vCPUs of compute capacity and 16 GB of memory. For more 1763 // information, see the AWS Glue pricing page 1764 // (https://aws.amazon.com/glue/pricing/). 1765 // 1766 // Deprecated: This property is deprecated, use MaxCapacity instead. 1767 AllocatedCapacity int32 1768 1769 // The job arguments associated with this run. For this job run, they replace the 1770 // default arguments set in the job definition itself. You can specify arguments 1771 // here that your own job-execution script consumes, as well as arguments that AWS 1772 // Glue itself consumes. For information about how to specify and consume your own 1773 // job arguments, see the Calling AWS Glue APIs in Python 1774 // (https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-python-calling.html) 1775 // topic in the developer guide. For information about the key-value pairs that AWS 1776 // Glue consumes to set up your job, see the Special Parameters Used by AWS Glue 1777 // (https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html) 1778 // topic in the developer guide. 1779 Arguments map[string]string 1780 1781 // The number of the attempt to run this job. 1782 Attempt int32 1783 1784 // The date and time that this job run completed. 1785 CompletedOn *time.Time 1786 1787 // An error message associated with this job run. 1788 ErrorMessage *string 1789 1790 // The amount of time (in seconds) that the job run consumed resources. 1791 ExecutionTime int32 1792 1793 // Glue version determines the versions of Apache Spark and Python that AWS Glue 1794 // supports. The Python version indicates the version supported for jobs of type 1795 // Spark. For more information about the available AWS Glue versions and 1796 // corresponding Spark and Python versions, see Glue version 1797 // (https://docs.aws.amazon.com/glue/latest/dg/add-job.html) in the developer 1798 // guide. Jobs that are created without specifying a Glue version default to Glue 1799 // 0.9. 1800 GlueVersion *string 1801 1802 // The ID of this job run. 1803 Id *string 1804 1805 // The name of the job definition being used in this run. 1806 JobName *string 1807 1808 // The current state of the job run. For more information about the statuses of 1809 // jobs that have terminated abnormally, see AWS Glue Job Run Statuses 1810 // (https://docs.aws.amazon.com/glue/latest/dg/job-run-statuses.html). 1811 JobRunState JobRunState 1812 1813 // The last time that this job run was modified. 1814 LastModifiedOn *time.Time 1815 1816 // The name of the log group for secure logging that can be server-side encrypted 1817 // in Amazon CloudWatch using AWS KMS. This name can be /aws-glue/jobs/, in which 1818 // case the default encryption is NONE. If you add a role name and 1819 // SecurityConfiguration name (in other words, 1820 // /aws-glue/jobs-yourRoleName-yourSecurityConfigurationName/), then that security 1821 // configuration is used to encrypt the log group. 1822 LogGroupName *string 1823 1824 // The number of AWS Glue data processing units (DPUs) that can be allocated when 1825 // this job runs. A DPU is a relative measure of processing power that consists of 1826 // 4 vCPUs of compute capacity and 16 GB of memory. For more information, see the 1827 // AWS Glue pricing page 1828 // (https://docs.aws.amazon.com/https:/aws.amazon.com/glue/pricing/). Do not set 1829 // Max Capacity if using WorkerType and NumberOfWorkers. The value that can be 1830 // allocated for MaxCapacity depends on whether you are running a Python shell job 1831 // or an Apache Spark ETL job: 1832 // 1833 // * When you specify a Python shell job 1834 // (JobCommand.Name="pythonshell"), you can allocate either 0.0625 or 1 DPU. The 1835 // default is 0.0625 DPU. 1836 // 1837 // * When you specify an Apache Spark ETL job 1838 // (JobCommand.Name="glueetl"), you can allocate from 2 to 100 DPUs. The default is 1839 // 10 DPUs. This job type cannot have a fractional DPU allocation. 1840 MaxCapacity *float64 1841 1842 // Specifies configuration properties of a job run notification. 1843 NotificationProperty *NotificationProperty 1844 1845 // The number of workers of a defined workerType that are allocated when a job 1846 // runs. The maximum number of workers you can define are 299 for G.1X, and 149 for 1847 // G.2X. 1848 NumberOfWorkers *int32 1849 1850 // A list of predecessors to this job run. 1851 PredecessorRuns []Predecessor 1852 1853 // The ID of the previous run of this job. For example, the JobRunId specified in 1854 // the StartJobRun action. 1855 PreviousRunId *string 1856 1857 // The name of the SecurityConfiguration structure to be used with this job run. 1858 SecurityConfiguration *string 1859 1860 // The date and time at which this job run was started. 1861 StartedOn *time.Time 1862 1863 // The JobRun timeout in minutes. This is the maximum time that a job run can 1864 // consume resources before it is terminated and enters TIMEOUT status. The default 1865 // is 2,880 minutes (48 hours). This overrides the timeout value set in the parent 1866 // job. 1867 Timeout *int32 1868 1869 // The name of the trigger that started this job run. 1870 TriggerName *string 1871 1872 // The type of predefined worker that is allocated when a job runs. Accepts a value 1873 // of Standard, G.1X, or G.2X. 1874 // 1875 // * For the Standard worker type, each worker 1876 // provides 4 vCPU, 16 GB of memory and a 50GB disk, and 2 executors per worker. 1877 // 1878 // * 1879 // For the G.1X worker type, each worker provides 4 vCPU, 16 GB of memory and a 1880 // 64GB disk, and 1 executor per worker. 1881 // 1882 // * For the G.2X worker type, each worker 1883 // provides 8 vCPU, 32 GB of memory and a 128GB disk, and 1 executor per worker. 1884 WorkerType WorkerType 1885} 1886 1887// Specifies information used to update an existing job definition. The previous 1888// job definition is completely overwritten by this information. 1889type JobUpdate struct { 1890 1891 // This field is deprecated. Use MaxCapacity instead. The number of AWS Glue data 1892 // processing units (DPUs) to allocate to this job. You can allocate from 2 to 100 1893 // DPUs; the default is 10. A DPU is a relative measure of processing power that 1894 // consists of 4 vCPUs of compute capacity and 16 GB of memory. For more 1895 // information, see the AWS Glue pricing page 1896 // (https://aws.amazon.com/glue/pricing/). 1897 // 1898 // Deprecated: This property is deprecated, use MaxCapacity instead. 1899 AllocatedCapacity int32 1900 1901 // The JobCommand that executes this job (required). 1902 Command *JobCommand 1903 1904 // The connections used for this job. 1905 Connections *ConnectionsList 1906 1907 // The default arguments for this job. You can specify arguments here that your own 1908 // job-execution script consumes, as well as arguments that AWS Glue itself 1909 // consumes. For information about how to specify and consume your own Job 1910 // arguments, see the Calling AWS Glue APIs in Python 1911 // (https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-python-calling.html) 1912 // topic in the developer guide. For information about the key-value pairs that AWS 1913 // Glue consumes to set up your job, see the Special Parameters Used by AWS Glue 1914 // (https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html) 1915 // topic in the developer guide. 1916 DefaultArguments map[string]string 1917 1918 // Description of the job being defined. 1919 Description *string 1920 1921 // An ExecutionProperty specifying the maximum number of concurrent runs allowed 1922 // for this job. 1923 ExecutionProperty *ExecutionProperty 1924 1925 // Glue version determines the versions of Apache Spark and Python that AWS Glue 1926 // supports. The Python version indicates the version supported for jobs of type 1927 // Spark. For more information about the available AWS Glue versions and 1928 // corresponding Spark and Python versions, see Glue version 1929 // (https://docs.aws.amazon.com/glue/latest/dg/add-job.html) in the developer 1930 // guide. 1931 GlueVersion *string 1932 1933 // This field is reserved for future use. 1934 LogUri *string 1935 1936 // The number of AWS Glue data processing units (DPUs) that can be allocated when 1937 // this job runs. A DPU is a relative measure of processing power that consists of 1938 // 4 vCPUs of compute capacity and 16 GB of memory. For more information, see the 1939 // AWS Glue pricing page (https://aws.amazon.com/glue/pricing/). Do not set Max 1940 // Capacity if using WorkerType and NumberOfWorkers. The value that can be 1941 // allocated for MaxCapacity depends on whether you are running a Python shell job 1942 // or an Apache Spark ETL job: 1943 // 1944 // * When you specify a Python shell job 1945 // (JobCommand.Name="pythonshell"), you can allocate either 0.0625 or 1 DPU. The 1946 // default is 0.0625 DPU. 1947 // 1948 // * When you specify an Apache Spark ETL job 1949 // (JobCommand.Name="glueetl") or Apache Spark streaming ETL job 1950 // (JobCommand.Name="gluestreaming"), you can allocate from 2 to 100 DPUs. The 1951 // default is 10 DPUs. This job type cannot have a fractional DPU allocation. 1952 MaxCapacity *float64 1953 1954 // The maximum number of times to retry this job if it fails. 1955 MaxRetries int32 1956 1957 // Non-overridable arguments for this job, specified as name-value pairs. 1958 NonOverridableArguments map[string]string 1959 1960 // Specifies the configuration properties of a job notification. 1961 NotificationProperty *NotificationProperty 1962 1963 // The number of workers of a defined workerType that are allocated when a job 1964 // runs. The maximum number of workers you can define are 299 for G.1X, and 149 for 1965 // G.2X. 1966 NumberOfWorkers *int32 1967 1968 // The name or Amazon Resource Name (ARN) of the IAM role associated with this job 1969 // (required). 1970 Role *string 1971 1972 // The name of the SecurityConfiguration structure to be used with this job. 1973 SecurityConfiguration *string 1974 1975 // The job timeout in minutes. This is the maximum time that a job run can consume 1976 // resources before it is terminated and enters TIMEOUT status. The default is 1977 // 2,880 minutes (48 hours). 1978 Timeout *int32 1979 1980 // The type of predefined worker that is allocated when a job runs. Accepts a value 1981 // of Standard, G.1X, or G.2X. 1982 // 1983 // * For the Standard worker type, each worker 1984 // provides 4 vCPU, 16 GB of memory and a 50GB disk, and 2 executors per worker. 1985 // 1986 // * 1987 // For the G.1X worker type, each worker maps to 1 DPU (4 vCPU, 16 GB of memory, 64 1988 // GB disk), and provides 1 executor per worker. We recommend this worker type for 1989 // memory-intensive jobs. 1990 // 1991 // * For the G.2X worker type, each worker maps to 2 DPU (8 1992 // vCPU, 32 GB of memory, 128 GB disk), and provides 1 executor per worker. We 1993 // recommend this worker type for memory-intensive jobs. 1994 WorkerType WorkerType 1995} 1996 1997// A classifier for JSON content. 1998type JsonClassifier struct { 1999 2000 // A JsonPath string defining the JSON data for the classifier to classify. AWS 2001 // Glue supports a subset of JsonPath, as described in Writing JsonPath Custom 2002 // Classifiers 2003 // (https://docs.aws.amazon.com/glue/latest/dg/custom-classifier.html#custom-classifier-json). 2004 // 2005 // This member is required. 2006 JsonPath *string 2007 2008 // The name of the classifier. 2009 // 2010 // This member is required. 2011 Name *string 2012 2013 // The time that this classifier was registered. 2014 CreationTime *time.Time 2015 2016 // The time that this classifier was last updated. 2017 LastUpdated *time.Time 2018 2019 // The version of this classifier. 2020 Version int64 2021} 2022 2023// A partition key pair consisting of a name and a type. 2024type KeySchemaElement struct { 2025 2026 // The name of a partition key. 2027 // 2028 // This member is required. 2029 Name *string 2030 2031 // The type of a partition key. 2032 // 2033 // This member is required. 2034 Type *string 2035} 2036 2037// Specifies configuration properties for a labeling set generation task run. 2038type LabelingSetGenerationTaskRunProperties struct { 2039 2040 // The Amazon Simple Storage Service (Amazon S3) path where you will generate the 2041 // labeling set. 2042 OutputS3Path *string 2043} 2044 2045// Status and error information about the most recent crawl. 2046type LastCrawlInfo struct { 2047 2048 // If an error occurred, the error information about the last crawl. 2049 ErrorMessage *string 2050 2051 // The log group for the last crawl. 2052 LogGroup *string 2053 2054 // The log stream for the last crawl. 2055 LogStream *string 2056 2057 // The prefix for a message about this crawl. 2058 MessagePrefix *string 2059 2060 // The time at which the crawl started. 2061 StartTime *time.Time 2062 2063 // Status of the last crawl. 2064 Status LastCrawlStatus 2065} 2066 2067// Specifies data lineage configuration settings for the crawler. 2068type LineageConfiguration struct { 2069 2070 // Specifies whether data lineage is enabled for the crawler. Valid values are: 2071 // 2072 // * 2073 // ENABLE: enables data lineage for the crawler 2074 // 2075 // * DISABLE: disables data lineage 2076 // for the crawler 2077 CrawlerLineageSettings CrawlerLineageSettings 2078} 2079 2080// The location of resources. 2081type Location struct { 2082 2083 // An Amazon DynamoDB table location. 2084 DynamoDB []CodeGenNodeArg 2085 2086 // A JDBC location. 2087 Jdbc []CodeGenNodeArg 2088 2089 // An Amazon Simple Storage Service (Amazon S3) location. 2090 S3 []CodeGenNodeArg 2091} 2092 2093// Defines column statistics supported for integer data columns. 2094type LongColumnStatisticsData struct { 2095 2096 // The number of distinct values in a column. 2097 // 2098 // This member is required. 2099 NumberOfDistinctValues int64 2100 2101 // The number of null values in the column. 2102 // 2103 // This member is required. 2104 NumberOfNulls int64 2105 2106 // The highest value in the column. 2107 MaximumValue int64 2108 2109 // The lowest value in the column. 2110 MinimumValue int64 2111} 2112 2113// Defines a mapping. 2114type MappingEntry struct { 2115 2116 // The source path. 2117 SourcePath *string 2118 2119 // The name of the source table. 2120 SourceTable *string 2121 2122 // The source type. 2123 SourceType *string 2124 2125 // The target path. 2126 TargetPath *string 2127 2128 // The target table. 2129 TargetTable *string 2130 2131 // The target type. 2132 TargetType *string 2133} 2134 2135// A structure containing metadata information for a schema version. 2136type MetadataInfo struct { 2137 2138 // The time at which the entry was created. 2139 CreatedTime *string 2140 2141 // The metadata key’s corresponding value. 2142 MetadataValue *string 2143 2144 // Other metadata belonging to the same metadata key. 2145 OtherMetadataValueList []OtherMetadataValueListItem 2146} 2147 2148// A structure containing a key value pair for metadata. 2149type MetadataKeyValuePair struct { 2150 2151 // A metadata key. 2152 MetadataKey *string 2153 2154 // A metadata key’s corresponding value. 2155 MetadataValue *string 2156} 2157 2158// A structure for a machine learning transform. 2159type MLTransform struct { 2160 2161 // A timestamp. The time and date that this machine learning transform was created. 2162 CreatedOn *time.Time 2163 2164 // A user-defined, long-form description text for the machine learning transform. 2165 // Descriptions are not guaranteed to be unique and can be changed at any time. 2166 Description *string 2167 2168 // An EvaluationMetrics object. Evaluation metrics provide an estimate of the 2169 // quality of your machine learning transform. 2170 EvaluationMetrics *EvaluationMetrics 2171 2172 // This value determines which version of AWS Glue this machine learning transform 2173 // is compatible with. Glue 1.0 is recommended for most customers. If the value is 2174 // not set, the Glue compatibility defaults to Glue 0.9. For more information, see 2175 // AWS Glue Versions 2176 // (https://docs.aws.amazon.com/glue/latest/dg/release-notes.html#release-notes-versions) 2177 // in the developer guide. 2178 GlueVersion *string 2179 2180 // A list of AWS Glue table definitions used by the transform. 2181 InputRecordTables []GlueTable 2182 2183 // A count identifier for the labeling files generated by AWS Glue for this 2184 // transform. As you create a better transform, you can iteratively download, 2185 // label, and upload the labeling file. 2186 LabelCount int32 2187 2188 // A timestamp. The last point in time when this machine learning transform was 2189 // modified. 2190 LastModifiedOn *time.Time 2191 2192 // The number of AWS Glue data processing units (DPUs) that are allocated to task 2193 // runs for this transform. You can allocate from 2 to 100 DPUs; the default is 10. 2194 // A DPU is a relative measure of processing power that consists of 4 vCPUs of 2195 // compute capacity and 16 GB of memory. For more information, see the AWS Glue 2196 // pricing page (http://aws.amazon.com/glue/pricing/). MaxCapacity is a mutually 2197 // exclusive option with NumberOfWorkers and WorkerType. 2198 // 2199 // * If either 2200 // NumberOfWorkers or WorkerType is set, then MaxCapacity cannot be set. 2201 // 2202 // * If 2203 // MaxCapacity is set then neither NumberOfWorkers or WorkerType can be set. 2204 // 2205 // * If 2206 // WorkerType is set, then NumberOfWorkers is required (and vice versa). 2207 // 2208 // * 2209 // MaxCapacity and NumberOfWorkers must both be at least 1. 2210 // 2211 // When the WorkerType 2212 // field is set to a value other than Standard, the MaxCapacity field is set 2213 // automatically and becomes read-only. 2214 MaxCapacity *float64 2215 2216 // The maximum number of times to retry after an MLTaskRun of the machine learning 2217 // transform fails. 2218 MaxRetries *int32 2219 2220 // A user-defined name for the machine learning transform. Names are not guaranteed 2221 // unique and can be changed at any time. 2222 Name *string 2223 2224 // The number of workers of a defined workerType that are allocated when a task of 2225 // the transform runs. If WorkerType is set, then NumberOfWorkers is required (and 2226 // vice versa). 2227 NumberOfWorkers *int32 2228 2229 // A TransformParameters object. You can use parameters to tune (customize) the 2230 // behavior of the machine learning transform by specifying what data it learns 2231 // from and your preference on various tradeoffs (such as precious vs. recall, or 2232 // accuracy vs. cost). 2233 Parameters *TransformParameters 2234 2235 // The name or Amazon Resource Name (ARN) of the IAM role with the required 2236 // permissions. The required permissions include both AWS Glue service role 2237 // permissions to AWS Glue resources, and Amazon S3 permissions required by the 2238 // transform. 2239 // 2240 // * This role needs AWS Glue service role permissions to allow access 2241 // to resources in AWS Glue. See Attach a Policy to IAM Users That Access AWS Glue 2242 // (https://docs.aws.amazon.com/glue/latest/dg/attach-policy-iam-user.html). 2243 // 2244 // * 2245 // This role needs permission to your Amazon Simple Storage Service (Amazon S3) 2246 // sources, targets, temporary directory, scripts, and any libraries used by the 2247 // task run for this transform. 2248 Role *string 2249 2250 // A map of key-value pairs representing the columns and data types that this 2251 // transform can run against. Has an upper bound of 100 columns. 2252 Schema []SchemaColumn 2253 2254 // The current status of the machine learning transform. 2255 Status TransformStatusType 2256 2257 // The timeout in minutes of the machine learning transform. 2258 Timeout *int32 2259 2260 // The encryption-at-rest settings of the transform that apply to accessing user 2261 // data. Machine learning transforms can access user data encrypted in Amazon S3 2262 // using KMS. 2263 TransformEncryption *TransformEncryption 2264 2265 // The unique transform ID that is generated for the machine learning transform. 2266 // The ID is guaranteed to be unique and does not change. 2267 TransformId *string 2268 2269 // The type of predefined worker that is allocated when a task of this transform 2270 // runs. Accepts a value of Standard, G.1X, or G.2X. 2271 // 2272 // * For the Standard worker 2273 // type, each worker provides 4 vCPU, 16 GB of memory and a 50GB disk, and 2 2274 // executors per worker. 2275 // 2276 // * For the G.1X worker type, each worker provides 4 vCPU, 2277 // 16 GB of memory and a 64GB disk, and 1 executor per worker. 2278 // 2279 // * For the G.2X 2280 // worker type, each worker provides 8 vCPU, 32 GB of memory and a 128GB disk, and 2281 // 1 executor per worker. 2282 // 2283 // MaxCapacity is a mutually exclusive option with 2284 // NumberOfWorkers and WorkerType. 2285 // 2286 // * If either NumberOfWorkers or WorkerType is 2287 // set, then MaxCapacity cannot be set. 2288 // 2289 // * If MaxCapacity is set then neither 2290 // NumberOfWorkers or WorkerType can be set. 2291 // 2292 // * If WorkerType is set, then 2293 // NumberOfWorkers is required (and vice versa). 2294 // 2295 // * MaxCapacity and NumberOfWorkers 2296 // must both be at least 1. 2297 WorkerType WorkerType 2298} 2299 2300// The encryption-at-rest settings of the transform that apply to accessing user 2301// data. 2302type MLUserDataEncryption struct { 2303 2304 // The encryption mode applied to user data. Valid values are: 2305 // 2306 // * DISABLED: 2307 // encryption is disabled 2308 // 2309 // * SSEKMS: use of server-side encryption with AWS Key 2310 // Management Service (SSE-KMS) for user data stored in Amazon S3. 2311 // 2312 // This member is required. 2313 MlUserDataEncryptionMode MLUserDataEncryptionModeString 2314 2315 // The ID for the customer-provided KMS key. 2316 KmsKeyId *string 2317} 2318 2319// Specifies an Amazon DocumentDB or MongoDB data store to crawl. 2320type MongoDBTarget struct { 2321 2322 // The name of the connection to use to connect to the Amazon DocumentDB or MongoDB 2323 // target. 2324 ConnectionName *string 2325 2326 // The path of the Amazon DocumentDB or MongoDB target (database/collection). 2327 Path *string 2328 2329 // Indicates whether to scan all the records, or to sample rows from the table. 2330 // Scanning all the records can take a long time when the table is not a high 2331 // throughput table. A value of true means to scan all records, while a value of 2332 // false means to sample the records. If no value is specified, the value defaults 2333 // to true. 2334 ScanAll *bool 2335} 2336 2337// A node represents an AWS Glue component such as a trigger, or job, etc., that is 2338// part of a workflow. 2339type Node struct { 2340 2341 // Details of the crawler when the node represents a crawler. 2342 CrawlerDetails *CrawlerNodeDetails 2343 2344 // Details of the Job when the node represents a Job. 2345 JobDetails *JobNodeDetails 2346 2347 // The name of the AWS Glue component represented by the node. 2348 Name *string 2349 2350 // Details of the Trigger when the node represents a Trigger. 2351 TriggerDetails *TriggerNodeDetails 2352 2353 // The type of AWS Glue component represented by the node. 2354 Type NodeType 2355 2356 // The unique Id assigned to the node within the workflow. 2357 UniqueId *string 2358} 2359 2360// Specifies configuration properties of a notification. 2361type NotificationProperty struct { 2362 2363 // After a job run starts, the number of minutes to wait before sending a job run 2364 // delay notification. 2365 NotifyDelayAfter *int32 2366} 2367 2368// Specifies the sort order of a sorted column. 2369type Order struct { 2370 2371 // The name of the column. 2372 // 2373 // This member is required. 2374 Column *string 2375 2376 // Indicates that the column is sorted in ascending order (== 1), or in descending 2377 // order (==0). 2378 // 2379 // This member is required. 2380 SortOrder int32 2381} 2382 2383// A structure containing other metadata for a schema version belonging to the same 2384// metadata key. 2385type OtherMetadataValueListItem struct { 2386 2387 // The time at which the entry was created. 2388 CreatedTime *string 2389 2390 // The metadata key’s corresponding value for the other metadata belonging to the 2391 // same metadata key. 2392 MetadataValue *string 2393} 2394 2395// Represents a slice of table data. 2396type Partition struct { 2397 2398 // The ID of the Data Catalog in which the partition resides. 2399 CatalogId *string 2400 2401 // The time at which the partition was created. 2402 CreationTime *time.Time 2403 2404 // The name of the catalog database in which to create the partition. 2405 DatabaseName *string 2406 2407 // The last time at which the partition was accessed. 2408 LastAccessTime *time.Time 2409 2410 // The last time at which column statistics were computed for this partition. 2411 LastAnalyzedTime *time.Time 2412 2413 // These key-value pairs define partition parameters. 2414 Parameters map[string]string 2415 2416 // Provides information about the physical location where the partition is stored. 2417 StorageDescriptor *StorageDescriptor 2418 2419 // The name of the database table in which to create the partition. 2420 TableName *string 2421 2422 // The values of the partition. 2423 Values []string 2424} 2425 2426// Contains information about a partition error. 2427type PartitionError struct { 2428 2429 // The details about the partition error. 2430 ErrorDetail *ErrorDetail 2431 2432 // The values that define the partition. 2433 PartitionValues []string 2434} 2435 2436// A structure for a partition index. 2437type PartitionIndex struct { 2438 2439 // The name of the partition index. 2440 // 2441 // This member is required. 2442 IndexName *string 2443 2444 // The keys for the partition index. 2445 // 2446 // This member is required. 2447 Keys []string 2448} 2449 2450// A descriptor for a partition index in a table. 2451type PartitionIndexDescriptor struct { 2452 2453 // The name of the partition index. 2454 // 2455 // This member is required. 2456 IndexName *string 2457 2458 // The status of the partition index. The possible statuses are: 2459 // 2460 // * CREATING: The 2461 // index is being created. When an index is in a CREATING state, the index or its 2462 // table cannot be deleted. 2463 // 2464 // * ACTIVE: The index creation succeeds. 2465 // 2466 // * FAILED: The 2467 // index creation fails. 2468 // 2469 // * DELETING: The index is deleted from the list of 2470 // indexes. 2471 // 2472 // This member is required. 2473 IndexStatus PartitionIndexStatus 2474 2475 // A list of one or more keys, as KeySchemaElement structures, for the partition 2476 // index. 2477 // 2478 // This member is required. 2479 Keys []KeySchemaElement 2480 2481 // A list of errors that can occur when registering partition indexes for an 2482 // existing table. 2483 BackfillErrors []BackfillError 2484} 2485 2486// The structure used to create and update a partition. 2487type PartitionInput struct { 2488 2489 // The last time at which the partition was accessed. 2490 LastAccessTime *time.Time 2491 2492 // The last time at which column statistics were computed for this partition. 2493 LastAnalyzedTime *time.Time 2494 2495 // These key-value pairs define partition parameters. 2496 Parameters map[string]string 2497 2498 // Provides information about the physical location where the partition is stored. 2499 StorageDescriptor *StorageDescriptor 2500 2501 // The values of the partition. Although this parameter is not required by the SDK, 2502 // you must specify this parameter for a valid input. The values for the keys for 2503 // the new partition must be passed as an array of String objects that must be 2504 // ordered in the same order as the partition keys appearing in the Amazon S3 2505 // prefix. Otherwise AWS Glue will add the values to the wrong keys. 2506 Values []string 2507} 2508 2509// Contains a list of values defining partitions. 2510type PartitionValueList struct { 2511 2512 // The list of values. 2513 // 2514 // This member is required. 2515 Values []string 2516} 2517 2518// Specifies the physical requirements for a connection. 2519type PhysicalConnectionRequirements struct { 2520 2521 // The connection's Availability Zone. This field is redundant because the 2522 // specified subnet implies the Availability Zone to be used. Currently the field 2523 // must be populated, but it will be deprecated in the future. 2524 AvailabilityZone *string 2525 2526 // The security group ID list used by the connection. 2527 SecurityGroupIdList []string 2528 2529 // The subnet ID used by the connection. 2530 SubnetId *string 2531} 2532 2533// A job run that was used in the predicate of a conditional trigger that triggered 2534// this job run. 2535type Predecessor struct { 2536 2537 // The name of the job definition used by the predecessor job run. 2538 JobName *string 2539 2540 // The job-run ID of the predecessor job run. 2541 RunId *string 2542} 2543 2544// Defines the predicate of the trigger, which determines when it fires. 2545type Predicate struct { 2546 2547 // A list of the conditions that determine when the trigger will fire. 2548 Conditions []Condition 2549 2550 // An optional field if only one condition is listed. If multiple conditions are 2551 // listed, then this field is required. 2552 Logical Logical 2553} 2554 2555// Permissions granted to a principal. 2556type PrincipalPermissions struct { 2557 2558 // The permissions that are granted to the principal. 2559 Permissions []Permission 2560 2561 // The principal who is granted permissions. 2562 Principal *DataLakePrincipal 2563} 2564 2565// Defines a property predicate. 2566type PropertyPredicate struct { 2567 2568 // The comparator used to compare this property to others. 2569 Comparator Comparator 2570 2571 // The key of the property. 2572 Key *string 2573 2574 // The value of the property. 2575 Value *string 2576} 2577 2578// When crawling an Amazon S3 data source after the first crawl is complete, 2579// specifies whether to crawl the entire dataset again or to crawl only folders 2580// that were added since the last crawler run. For more information, see 2581// Incremental Crawls in AWS Glue 2582// (https://docs.aws.amazon.com/glue/latest/dg/incremental-crawls.html) in the 2583// developer guide. 2584type RecrawlPolicy struct { 2585 2586 // Specifies whether to crawl the entire dataset again or to crawl only folders 2587 // that were added since the last crawler run. A value of CRAWL_EVERYTHING 2588 // specifies crawling the entire dataset again. A value of CRAWL_NEW_FOLDERS_ONLY 2589 // specifies crawling only folders that were added since the last crawler run. 2590 RecrawlBehavior RecrawlBehavior 2591} 2592 2593// A wrapper structure that may contain the registry name and Amazon Resource Name 2594// (ARN). 2595type RegistryId struct { 2596 2597 // Arn of the registry to be updated. One of RegistryArn or RegistryName has to be 2598 // provided. 2599 RegistryArn *string 2600 2601 // Name of the registry. Used only for lookup. One of RegistryArn or RegistryName 2602 // has to be provided. 2603 RegistryName *string 2604} 2605 2606// A structure containing the details for a registry. 2607type RegistryListItem struct { 2608 2609 // The data the registry was created. 2610 CreatedTime *string 2611 2612 // A description of the registry. 2613 Description *string 2614 2615 // The Amazon Resource Name (ARN) of the registry. 2616 RegistryArn *string 2617 2618 // The name of the registry. 2619 RegistryName *string 2620 2621 // The status of the registry. 2622 Status RegistryStatus 2623 2624 // The date the registry was updated. 2625 UpdatedTime *string 2626} 2627 2628// The URIs for function resources. 2629type ResourceUri struct { 2630 2631 // The type of the resource. 2632 ResourceType ResourceType 2633 2634 // The URI for accessing the resource. 2635 Uri *string 2636} 2637 2638// Specifies how Amazon Simple Storage Service (Amazon S3) data should be 2639// encrypted. 2640type S3Encryption struct { 2641 2642 // The Amazon Resource Name (ARN) of the KMS key to be used to encrypt the data. 2643 KmsKeyArn *string 2644 2645 // The encryption mode to use for Amazon S3 data. 2646 S3EncryptionMode S3EncryptionMode 2647} 2648 2649// Specifies a data store in Amazon Simple Storage Service (Amazon S3). 2650type S3Target struct { 2651 2652 // The name of a connection which allows a job or crawler to access data in Amazon 2653 // S3 within an Amazon Virtual Private Cloud environment (Amazon VPC). 2654 ConnectionName *string 2655 2656 // A list of glob patterns used to exclude from the crawl. For more information, 2657 // see Catalog Tables with a Crawler 2658 // (https://docs.aws.amazon.com/glue/latest/dg/add-crawler.html). 2659 Exclusions []string 2660 2661 // The path to the Amazon S3 target. 2662 Path *string 2663} 2664 2665// A scheduling object using a cron statement to schedule an event. 2666type Schedule struct { 2667 2668 // A cron expression used to specify the schedule (see Time-Based Schedules for 2669 // Jobs and Crawlers 2670 // (https://docs.aws.amazon.com/glue/latest/dg/monitor-data-warehouse-schedule.html). 2671 // For example, to run something every day at 12:15 UTC, you would specify: cron(15 2672 // 12 * * ? *). 2673 ScheduleExpression *string 2674 2675 // The state of the schedule. 2676 State ScheduleState 2677} 2678 2679// A policy that specifies update and deletion behaviors for the crawler. 2680type SchemaChangePolicy struct { 2681 2682 // The deletion behavior when the crawler finds a deleted object. 2683 DeleteBehavior DeleteBehavior 2684 2685 // The update behavior when the crawler finds a changed schema. 2686 UpdateBehavior UpdateBehavior 2687} 2688 2689// A key-value pair representing a column and data type that this transform can run 2690// against. The Schema parameter of the MLTransform may contain up to 100 of these 2691// structures. 2692type SchemaColumn struct { 2693 2694 // The type of data in the column. 2695 DataType *string 2696 2697 // The name of the column. 2698 Name *string 2699} 2700 2701// The unique ID of the schema in the AWS Glue schema registry. 2702type SchemaId struct { 2703 2704 // The name of the schema registry that contains the schema. 2705 RegistryName *string 2706 2707 // The Amazon Resource Name (ARN) of the schema. One of SchemaArn or SchemaName has 2708 // to be provided. 2709 SchemaArn *string 2710 2711 // The name of the schema. One of SchemaArn or SchemaName has to be provided. 2712 SchemaName *string 2713} 2714 2715// An object that contains minimal details for a schema. 2716type SchemaListItem struct { 2717 2718 // The date and time that a schema was created. 2719 CreatedTime *string 2720 2721 // A description for the schema. 2722 Description *string 2723 2724 // the name of the registry where the schema resides. 2725 RegistryName *string 2726 2727 // The Amazon Resource Name (ARN) for the schema. 2728 SchemaArn *string 2729 2730 // The name of the schema. 2731 SchemaName *string 2732 2733 // The status of the schema. 2734 SchemaStatus SchemaStatus 2735 2736 // The date and time that a schema was updated. 2737 UpdatedTime *string 2738} 2739 2740// An object that references a schema stored in the AWS Glue Schema Registry. 2741type SchemaReference struct { 2742 2743 // A structure that contains schema identity fields. Either this or the 2744 // SchemaVersionId has to be provided. 2745 SchemaId *SchemaId 2746 2747 // The unique ID assigned to a version of the schema. Either this or the SchemaId 2748 // has to be provided. 2749 SchemaVersionId *string 2750 2751 // The version number of the schema. 2752 SchemaVersionNumber int64 2753} 2754 2755// An object that contains the error details for an operation on a schema version. 2756type SchemaVersionErrorItem struct { 2757 2758 // The details of the error for the schema version. 2759 ErrorDetails *ErrorDetails 2760 2761 // The version number of the schema. 2762 VersionNumber int64 2763} 2764 2765// An object containing the details about a schema version. 2766type SchemaVersionListItem struct { 2767 2768 // The date and time the schema version was created. 2769 CreatedTime *string 2770 2771 // The Amazon Resource Name (ARN) of the schema. 2772 SchemaArn *string 2773 2774 // The unique identifier of the schema version. 2775 SchemaVersionId *string 2776 2777 // The status of the schema version. 2778 Status SchemaVersionStatus 2779 2780 // The version number of the schema. 2781 VersionNumber int64 2782} 2783 2784// A structure containing the schema version information. 2785type SchemaVersionNumber struct { 2786 2787 // The latest version available for the schema. 2788 LatestVersion bool 2789 2790 // The version number of the schema. 2791 VersionNumber int64 2792} 2793 2794// Specifies a security configuration. 2795type SecurityConfiguration struct { 2796 2797 // The time at which this security configuration was created. 2798 CreatedTimeStamp *time.Time 2799 2800 // The encryption configuration associated with this security configuration. 2801 EncryptionConfiguration *EncryptionConfiguration 2802 2803 // The name of the security configuration. 2804 Name *string 2805} 2806 2807// Defines a non-overlapping region of a table's partitions, allowing multiple 2808// requests to be executed in parallel. 2809type Segment struct { 2810 2811 // The zero-based index number of the segment. For example, if the total number of 2812 // segments is 4, SegmentNumber values range from 0 through 3. 2813 // 2814 // This member is required. 2815 SegmentNumber int32 2816 2817 // The total number of segments. 2818 // 2819 // This member is required. 2820 TotalSegments int32 2821} 2822 2823// Information about a serialization/deserialization program (SerDe) that serves as 2824// an extractor and loader. 2825type SerDeInfo struct { 2826 2827 // Name of the SerDe. 2828 Name *string 2829 2830 // These key-value pairs define initialization parameters for the SerDe. 2831 Parameters map[string]string 2832 2833 // Usually the class that implements the SerDe. An example is 2834 // org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe. 2835 SerializationLibrary *string 2836} 2837 2838// Specifies skewed values in a table. Skewed values are those that occur with very 2839// high frequency. 2840type SkewedInfo struct { 2841 2842 // A list of names of columns that contain skewed values. 2843 SkewedColumnNames []string 2844 2845 // A mapping of skewed values to the columns that contain them. 2846 SkewedColumnValueLocationMaps map[string]string 2847 2848 // A list of values that appear so frequently as to be considered skewed. 2849 SkewedColumnValues []string 2850} 2851 2852// Specifies a field to sort by and a sort order. 2853type SortCriterion struct { 2854 2855 // The name of the field on which to sort. 2856 FieldName *string 2857 2858 // An ascending or descending sort. 2859 Sort Sort 2860} 2861 2862// Describes the physical storage of table data. 2863type StorageDescriptor struct { 2864 2865 // A list of reducer grouping columns, clustering columns, and bucketing columns in 2866 // the table. 2867 BucketColumns []string 2868 2869 // A list of the Columns in the table. 2870 Columns []Column 2871 2872 // True if the data in the table is compressed, or False if not. 2873 Compressed bool 2874 2875 // The input format: SequenceFileInputFormat (binary), or TextInputFormat, or a 2876 // custom format. 2877 InputFormat *string 2878 2879 // The physical location of the table. By default, this takes the form of the 2880 // warehouse location, followed by the database location in the warehouse, followed 2881 // by the table name. 2882 Location *string 2883 2884 // Must be specified if the table contains any dimension columns. 2885 NumberOfBuckets int32 2886 2887 // The output format: SequenceFileOutputFormat (binary), or 2888 // IgnoreKeyTextOutputFormat, or a custom format. 2889 OutputFormat *string 2890 2891 // The user-supplied properties in key-value form. 2892 Parameters map[string]string 2893 2894 // An object that references a schema stored in the AWS Glue Schema Registry. When 2895 // creating a table, you can pass an empty list of columns for the schema, and 2896 // instead use a schema reference. 2897 SchemaReference *SchemaReference 2898 2899 // The serialization/deserialization (SerDe) information. 2900 SerdeInfo *SerDeInfo 2901 2902 // The information about values that appear frequently in a column (skewed values). 2903 SkewedInfo *SkewedInfo 2904 2905 // A list specifying the sort order of each bucket in the table. 2906 SortColumns []Order 2907 2908 // True if the table data is stored in subdirectories, or False if not. 2909 StoredAsSubDirectories bool 2910} 2911 2912// Defines column statistics supported for character sequence data values. 2913type StringColumnStatisticsData struct { 2914 2915 // The average string length in the column. 2916 // 2917 // This member is required. 2918 AverageLength float64 2919 2920 // The size of the longest string in the column. 2921 // 2922 // This member is required. 2923 MaximumLength int64 2924 2925 // The number of distinct values in a column. 2926 // 2927 // This member is required. 2928 NumberOfDistinctValues int64 2929 2930 // The number of null values in the column. 2931 // 2932 // This member is required. 2933 NumberOfNulls int64 2934} 2935 2936// Represents a collection of related data organized in columns and rows. 2937type Table struct { 2938 2939 // The table name. For Hive compatibility, this must be entirely lowercase. 2940 // 2941 // This member is required. 2942 Name *string 2943 2944 // The ID of the Data Catalog in which the table resides. 2945 CatalogId *string 2946 2947 // The time when the table definition was created in the Data Catalog. 2948 CreateTime *time.Time 2949 2950 // The person or entity who created the table. 2951 CreatedBy *string 2952 2953 // The name of the database where the table metadata resides. For Hive 2954 // compatibility, this must be all lowercase. 2955 DatabaseName *string 2956 2957 // A description of the table. 2958 Description *string 2959 2960 // Indicates whether the table has been registered with AWS Lake Formation. 2961 IsRegisteredWithLakeFormation bool 2962 2963 // The last time that the table was accessed. This is usually taken from HDFS, and 2964 // might not be reliable. 2965 LastAccessTime *time.Time 2966 2967 // The last time that column statistics were computed for this table. 2968 LastAnalyzedTime *time.Time 2969 2970 // The owner of the table. 2971 Owner *string 2972 2973 // These key-value pairs define properties associated with the table. 2974 Parameters map[string]string 2975 2976 // A list of columns by which the table is partitioned. Only primitive types are 2977 // supported as partition keys. When you create a table used by Amazon Athena, and 2978 // you do not specify any partitionKeys, you must at least set the value of 2979 // partitionKeys to an empty list. For example: "PartitionKeys": [] 2980 PartitionKeys []Column 2981 2982 // The retention time for this table. 2983 Retention int32 2984 2985 // A storage descriptor containing information about the physical storage of this 2986 // table. 2987 StorageDescriptor *StorageDescriptor 2988 2989 // The type of this table (EXTERNAL_TABLE, VIRTUAL_VIEW, etc.). 2990 TableType *string 2991 2992 // A TableIdentifier structure that describes a target table for resource linking. 2993 TargetTable *TableIdentifier 2994 2995 // The last time that the table was updated. 2996 UpdateTime *time.Time 2997 2998 // If the table is a view, the expanded text of the view; otherwise null. 2999 ViewExpandedText *string 3000 3001 // If the table is a view, the original text of the view; otherwise null. 3002 ViewOriginalText *string 3003} 3004 3005// An error record for table operations. 3006type TableError struct { 3007 3008 // The details about the error. 3009 ErrorDetail *ErrorDetail 3010 3011 // The name of the table. For Hive compatibility, this must be entirely lowercase. 3012 TableName *string 3013} 3014 3015// A structure that describes a target table for resource linking. 3016type TableIdentifier struct { 3017 3018 // The ID of the Data Catalog in which the table resides. 3019 CatalogId *string 3020 3021 // The name of the catalog database that contains the target table. 3022 DatabaseName *string 3023 3024 // The name of the target table. 3025 Name *string 3026} 3027 3028// A structure used to define a table. 3029type TableInput struct { 3030 3031 // The table name. For Hive compatibility, this is folded to lowercase when it is 3032 // stored. 3033 // 3034 // This member is required. 3035 Name *string 3036 3037 // A description of the table. 3038 Description *string 3039 3040 // The last time that the table was accessed. 3041 LastAccessTime *time.Time 3042 3043 // The last time that column statistics were computed for this table. 3044 LastAnalyzedTime *time.Time 3045 3046 // The table owner. 3047 Owner *string 3048 3049 // These key-value pairs define properties associated with the table. 3050 Parameters map[string]string 3051 3052 // A list of columns by which the table is partitioned. Only primitive types are 3053 // supported as partition keys. When you create a table used by Amazon Athena, and 3054 // you do not specify any partitionKeys, you must at least set the value of 3055 // partitionKeys to an empty list. For example: "PartitionKeys": [] 3056 PartitionKeys []Column 3057 3058 // The retention time for this table. 3059 Retention int32 3060 3061 // A storage descriptor containing information about the physical storage of this 3062 // table. 3063 StorageDescriptor *StorageDescriptor 3064 3065 // The type of this table (EXTERNAL_TABLE, VIRTUAL_VIEW, etc.). 3066 TableType *string 3067 3068 // A TableIdentifier structure that describes a target table for resource linking. 3069 TargetTable *TableIdentifier 3070 3071 // If the table is a view, the expanded text of the view; otherwise null. 3072 ViewExpandedText *string 3073 3074 // If the table is a view, the original text of the view; otherwise null. 3075 ViewOriginalText *string 3076} 3077 3078// Specifies a version of a table. 3079type TableVersion struct { 3080 3081 // The table in question. 3082 Table *Table 3083 3084 // The ID value that identifies this table version. A VersionId is a string 3085 // representation of an integer. Each version is incremented by 1. 3086 VersionId *string 3087} 3088 3089// An error record for table-version operations. 3090type TableVersionError struct { 3091 3092 // The details about the error. 3093 ErrorDetail *ErrorDetail 3094 3095 // The name of the table in question. 3096 TableName *string 3097 3098 // The ID value of the version in question. A VersionID is a string representation 3099 // of an integer. Each version is incremented by 1. 3100 VersionId *string 3101} 3102 3103// The sampling parameters that are associated with the machine learning transform. 3104type TaskRun struct { 3105 3106 // The last point in time that the requested task run was completed. 3107 CompletedOn *time.Time 3108 3109 // The list of error strings associated with this task run. 3110 ErrorString *string 3111 3112 // The amount of time (in seconds) that the task run consumed resources. 3113 ExecutionTime int32 3114 3115 // The last point in time that the requested task run was updated. 3116 LastModifiedOn *time.Time 3117 3118 // The names of the log group for secure logging, associated with this task run. 3119 LogGroupName *string 3120 3121 // Specifies configuration properties associated with this task run. 3122 Properties *TaskRunProperties 3123 3124 // The date and time that this task run started. 3125 StartedOn *time.Time 3126 3127 // The current status of the requested task run. 3128 Status TaskStatusType 3129 3130 // The unique identifier for this task run. 3131 TaskRunId *string 3132 3133 // The unique identifier for the transform. 3134 TransformId *string 3135} 3136 3137// The criteria that are used to filter the task runs for the machine learning 3138// transform. 3139type TaskRunFilterCriteria struct { 3140 3141 // Filter on task runs started after this date. 3142 StartedAfter *time.Time 3143 3144 // Filter on task runs started before this date. 3145 StartedBefore *time.Time 3146 3147 // The current status of the task run. 3148 Status TaskStatusType 3149 3150 // The type of task run. 3151 TaskRunType TaskType 3152} 3153 3154// The configuration properties for the task run. 3155type TaskRunProperties struct { 3156 3157 // The configuration properties for an exporting labels task run. 3158 ExportLabelsTaskRunProperties *ExportLabelsTaskRunProperties 3159 3160 // The configuration properties for a find matches task run. 3161 FindMatchesTaskRunProperties *FindMatchesTaskRunProperties 3162 3163 // The configuration properties for an importing labels task run. 3164 ImportLabelsTaskRunProperties *ImportLabelsTaskRunProperties 3165 3166 // The configuration properties for a labeling set generation task run. 3167 LabelingSetGenerationTaskRunProperties *LabelingSetGenerationTaskRunProperties 3168 3169 // The type of task run. 3170 TaskType TaskType 3171} 3172 3173// The sorting criteria that are used to sort the list of task runs for the machine 3174// learning transform. 3175type TaskRunSortCriteria struct { 3176 3177 // The column to be used to sort the list of task runs for the machine learning 3178 // transform. 3179 // 3180 // This member is required. 3181 Column TaskRunSortColumnType 3182 3183 // The sort direction to be used to sort the list of task runs for the machine 3184 // learning transform. 3185 // 3186 // This member is required. 3187 SortDirection SortDirectionType 3188} 3189 3190// The encryption-at-rest settings of the transform that apply to accessing user 3191// data. Machine learning transforms can access user data encrypted in Amazon S3 3192// using KMS. Additionally, imported labels and trained transforms can now be 3193// encrypted using a customer provided KMS key. 3194type TransformEncryption struct { 3195 3196 // An MLUserDataEncryption object containing the encryption mode and 3197 // customer-provided KMS key ID. 3198 MlUserDataEncryption *MLUserDataEncryption 3199 3200 // The name of the security configuration. 3201 TaskRunSecurityConfigurationName *string 3202} 3203 3204// The criteria used to filter the machine learning transforms. 3205type TransformFilterCriteria struct { 3206 3207 // The time and date after which the transforms were created. 3208 CreatedAfter *time.Time 3209 3210 // The time and date before which the transforms were created. 3211 CreatedBefore *time.Time 3212 3213 // This value determines which version of AWS Glue this machine learning transform 3214 // is compatible with. Glue 1.0 is recommended for most customers. If the value is 3215 // not set, the Glue compatibility defaults to Glue 0.9. For more information, see 3216 // AWS Glue Versions 3217 // (https://docs.aws.amazon.com/glue/latest/dg/release-notes.html#release-notes-versions) 3218 // in the developer guide. 3219 GlueVersion *string 3220 3221 // Filter on transforms last modified after this date. 3222 LastModifiedAfter *time.Time 3223 3224 // Filter on transforms last modified before this date. 3225 LastModifiedBefore *time.Time 3226 3227 // A unique transform name that is used to filter the machine learning transforms. 3228 Name *string 3229 3230 // Filters on datasets with a specific schema. The Map object is an array of 3231 // key-value pairs representing the schema this transform accepts, where Column is 3232 // the name of a column, and Type is the type of the data such as an integer or 3233 // string. Has an upper bound of 100 columns. 3234 Schema []SchemaColumn 3235 3236 // Filters the list of machine learning transforms by the last known status of the 3237 // transforms (to indicate whether a transform can be used or not). One of 3238 // "NOT_READY", "READY", or "DELETING". 3239 Status TransformStatusType 3240 3241 // The type of machine learning transform that is used to filter the machine 3242 // learning transforms. 3243 TransformType TransformType 3244} 3245 3246// The algorithm-specific parameters that are associated with the machine learning 3247// transform. 3248type TransformParameters struct { 3249 3250 // The type of machine learning transform. For information about the types of 3251 // machine learning transforms, see Creating Machine Learning Transforms 3252 // (https://docs.aws.amazon.com/glue/latest/dg/add-job-machine-learning-transform.html). 3253 // 3254 // This member is required. 3255 TransformType TransformType 3256 3257 // The parameters for the find matches algorithm. 3258 FindMatchesParameters *FindMatchesParameters 3259} 3260 3261// The sorting criteria that are associated with the machine learning transform. 3262type TransformSortCriteria struct { 3263 3264 // The column to be used in the sorting criteria that are associated with the 3265 // machine learning transform. 3266 // 3267 // This member is required. 3268 Column TransformSortColumnType 3269 3270 // The sort direction to be used in the sorting criteria that are associated with 3271 // the machine learning transform. 3272 // 3273 // This member is required. 3274 SortDirection SortDirectionType 3275} 3276 3277// Information about a specific trigger. 3278type Trigger struct { 3279 3280 // The actions initiated by this trigger. 3281 Actions []Action 3282 3283 // A description of this trigger. 3284 Description *string 3285 3286 // Reserved for future use. 3287 Id *string 3288 3289 // The name of the trigger. 3290 Name *string 3291 3292 // The predicate of this trigger, which defines when it will fire. 3293 Predicate *Predicate 3294 3295 // A cron expression used to specify the schedule (see Time-Based Schedules for 3296 // Jobs and Crawlers 3297 // (https://docs.aws.amazon.com/glue/latest/dg/monitor-data-warehouse-schedule.html). 3298 // For example, to run something every day at 12:15 UTC, you would specify: cron(15 3299 // 12 * * ? *). 3300 Schedule *string 3301 3302 // The current state of the trigger. 3303 State TriggerState 3304 3305 // The type of trigger that this is. 3306 Type TriggerType 3307 3308 // The name of the workflow associated with the trigger. 3309 WorkflowName *string 3310} 3311 3312// The details of a Trigger node present in the workflow. 3313type TriggerNodeDetails struct { 3314 3315 // The information of the trigger represented by the trigger node. 3316 Trigger *Trigger 3317} 3318 3319// A structure used to provide information used to update a trigger. This object 3320// updates the previous trigger definition by overwriting it completely. 3321type TriggerUpdate struct { 3322 3323 // The actions initiated by this trigger. 3324 Actions []Action 3325 3326 // A description of this trigger. 3327 Description *string 3328 3329 // Reserved for future use. 3330 Name *string 3331 3332 // The predicate of this trigger, which defines when it will fire. 3333 Predicate *Predicate 3334 3335 // A cron expression used to specify the schedule (see Time-Based Schedules for 3336 // Jobs and Crawlers 3337 // (https://docs.aws.amazon.com/glue/latest/dg/monitor-data-warehouse-schedule.html). 3338 // For example, to run something every day at 12:15 UTC, you would specify: cron(15 3339 // 12 * * ? *). 3340 Schedule *string 3341} 3342 3343// Specifies a custom CSV classifier to be updated. 3344type UpdateCsvClassifierRequest struct { 3345 3346 // The name of the classifier. 3347 // 3348 // This member is required. 3349 Name *string 3350 3351 // Enables the processing of files that contain only one column. 3352 AllowSingleColumn *bool 3353 3354 // Indicates whether the CSV file contains a header. 3355 ContainsHeader CsvHeaderOption 3356 3357 // A custom symbol to denote what separates each column entry in the row. 3358 Delimiter *string 3359 3360 // Specifies not to trim values before identifying the type of column values. The 3361 // default value is true. 3362 DisableValueTrimming *bool 3363 3364 // A list of strings representing column names. 3365 Header []string 3366 3367 // A custom symbol to denote what combines content into a single column value. It 3368 // must be different from the column delimiter. 3369 QuoteSymbol *string 3370} 3371 3372// Specifies a grok classifier to update when passed to UpdateClassifier. 3373type UpdateGrokClassifierRequest struct { 3374 3375 // The name of the GrokClassifier. 3376 // 3377 // This member is required. 3378 Name *string 3379 3380 // An identifier of the data format that the classifier matches, such as Twitter, 3381 // JSON, Omniture logs, Amazon CloudWatch Logs, and so on. 3382 Classification *string 3383 3384 // Optional custom grok patterns used by this classifier. 3385 CustomPatterns *string 3386 3387 // The grok pattern used by this classifier. 3388 GrokPattern *string 3389} 3390 3391// Specifies a JSON classifier to be updated. 3392type UpdateJsonClassifierRequest struct { 3393 3394 // The name of the classifier. 3395 // 3396 // This member is required. 3397 Name *string 3398 3399 // A JsonPath string defining the JSON data for the classifier to classify. AWS 3400 // Glue supports a subset of JsonPath, as described in Writing JsonPath Custom 3401 // Classifiers 3402 // (https://docs.aws.amazon.com/glue/latest/dg/custom-classifier.html#custom-classifier-json). 3403 JsonPath *string 3404} 3405 3406// Specifies an XML classifier to be updated. 3407type UpdateXMLClassifierRequest struct { 3408 3409 // The name of the classifier. 3410 // 3411 // This member is required. 3412 Name *string 3413 3414 // An identifier of the data format that the classifier matches. 3415 Classification *string 3416 3417 // The XML tag designating the element that contains each record in an XML document 3418 // being parsed. This cannot identify a self-closing element (closed by />). An 3419 // empty row element that contains only attributes can be parsed as long as it ends 3420 // with a closing tag (for example, is okay, but is not). 3421 RowTag *string 3422} 3423 3424// Represents the equivalent of a Hive user-defined function (UDF) definition. 3425type UserDefinedFunction struct { 3426 3427 // The ID of the Data Catalog in which the function resides. 3428 CatalogId *string 3429 3430 // The Java class that contains the function code. 3431 ClassName *string 3432 3433 // The time at which the function was created. 3434 CreateTime *time.Time 3435 3436 // The name of the catalog database that contains the function. 3437 DatabaseName *string 3438 3439 // The name of the function. 3440 FunctionName *string 3441 3442 // The owner of the function. 3443 OwnerName *string 3444 3445 // The owner type. 3446 OwnerType PrincipalType 3447 3448 // The resource URIs for the function. 3449 ResourceUris []ResourceUri 3450} 3451 3452// A structure used to create or update a user-defined function. 3453type UserDefinedFunctionInput struct { 3454 3455 // The Java class that contains the function code. 3456 ClassName *string 3457 3458 // The name of the function. 3459 FunctionName *string 3460 3461 // The owner of the function. 3462 OwnerName *string 3463 3464 // The owner type. 3465 OwnerType PrincipalType 3466 3467 // The resource URIs for the function. 3468 ResourceUris []ResourceUri 3469} 3470 3471// A workflow represents a flow in which AWS Glue components should be executed to 3472// complete a logical task. 3473type Workflow struct { 3474 3475 // The date and time when the workflow was created. 3476 CreatedOn *time.Time 3477 3478 // A collection of properties to be used as part of each execution of the workflow. 3479 DefaultRunProperties map[string]string 3480 3481 // A description of the workflow. 3482 Description *string 3483 3484 // The graph representing all the AWS Glue components that belong to the workflow 3485 // as nodes and directed connections between them as edges. 3486 Graph *WorkflowGraph 3487 3488 // The date and time when the workflow was last modified. 3489 LastModifiedOn *time.Time 3490 3491 // The information about the last execution of the workflow. 3492 LastRun *WorkflowRun 3493 3494 // You can use this parameter to prevent unwanted multiple updates to data, to 3495 // control costs, or in some cases, to prevent exceeding the maximum number of 3496 // concurrent runs of any of the component jobs. If you leave this parameter blank, 3497 // there is no limit to the number of concurrent workflow runs. 3498 MaxConcurrentRuns *int32 3499 3500 // The name of the workflow representing the flow. 3501 Name *string 3502} 3503 3504// A workflow graph represents the complete workflow containing all the AWS Glue 3505// components present in the workflow and all the directed connections between 3506// them. 3507type WorkflowGraph struct { 3508 3509 // A list of all the directed connections between the nodes belonging to the 3510 // workflow. 3511 Edges []Edge 3512 3513 // A list of the the AWS Glue components belong to the workflow represented as 3514 // nodes. 3515 Nodes []Node 3516} 3517 3518// A workflow run is an execution of a workflow providing all the runtime 3519// information. 3520type WorkflowRun struct { 3521 3522 // The date and time when the workflow run completed. 3523 CompletedOn *time.Time 3524 3525 // This error message describes any error that may have occurred in starting the 3526 // workflow run. Currently the only error message is "Concurrent runs exceeded for 3527 // workflow: foo." 3528 ErrorMessage *string 3529 3530 // The graph representing all the AWS Glue components that belong to the workflow 3531 // as nodes and directed connections between them as edges. 3532 Graph *WorkflowGraph 3533 3534 // Name of the workflow that was executed. 3535 Name *string 3536 3537 // The ID of the previous workflow run. 3538 PreviousRunId *string 3539 3540 // The date and time when the workflow run was started. 3541 StartedOn *time.Time 3542 3543 // The statistics of the run. 3544 Statistics *WorkflowRunStatistics 3545 3546 // The status of the workflow run. 3547 Status WorkflowRunStatus 3548 3549 // The ID of this workflow run. 3550 WorkflowRunId *string 3551 3552 // The workflow run properties which were set during the run. 3553 WorkflowRunProperties map[string]string 3554} 3555 3556// Workflow run statistics provides statistics about the workflow run. 3557type WorkflowRunStatistics struct { 3558 3559 // Total number of Actions that have failed. 3560 FailedActions int32 3561 3562 // Total number Actions in running state. 3563 RunningActions int32 3564 3565 // Total number of Actions that have stopped. 3566 StoppedActions int32 3567 3568 // Total number of Actions that have succeeded. 3569 SucceededActions int32 3570 3571 // Total number of Actions that timed out. 3572 TimeoutActions int32 3573 3574 // Total number of Actions in the workflow run. 3575 TotalActions int32 3576} 3577 3578// A classifier for XML content. 3579type XMLClassifier struct { 3580 3581 // An identifier of the data format that the classifier matches. 3582 // 3583 // This member is required. 3584 Classification *string 3585 3586 // The name of the classifier. 3587 // 3588 // This member is required. 3589 Name *string 3590 3591 // The time that this classifier was registered. 3592 CreationTime *time.Time 3593 3594 // The time that this classifier was last updated. 3595 LastUpdated *time.Time 3596 3597 // The XML tag designating the element that contains each record in an XML document 3598 // being parsed. This can't identify a self-closing element (closed by />). An 3599 // empty row element that contains only attributes can be parsed as long as it ends 3600 // with a closing tag (for example, is okay, but is not). 3601 RowTag *string 3602 3603 // The version of this classifier. 3604 Version int64 3605} 3606