1// Code generated by smithy-go-codegen DO NOT EDIT. 2 3package types 4 5import ( 6 "time" 7) 8 9// Defines an action to be initiated by a trigger. 10type Action struct { 11 12 // The job arguments used when this trigger fires. For this job run, they replace 13 // the default arguments set in the job definition itself. You can specify 14 // arguments here that your own job-execution script consumes, as well as arguments 15 // that AWS Glue itself consumes. For information about how to specify and consume 16 // your own Job arguments, see the Calling AWS Glue APIs in Python 17 // (https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-python-calling.html) 18 // topic in the developer guide. For information about the key-value pairs that AWS 19 // Glue consumes to set up your job, see the Special Parameters Used by AWS Glue 20 // (https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html) 21 // topic in the developer guide. 22 Arguments map[string]string 23 24 // The name of the crawler to be used with this action. 25 CrawlerName *string 26 27 // The name of a job to be executed. 28 JobName *string 29 30 // Specifies configuration properties of a job run notification. 31 NotificationProperty *NotificationProperty 32 33 // The name of the SecurityConfiguration structure to be used with this action. 34 SecurityConfiguration *string 35 36 // The JobRun timeout in minutes. This is the maximum time that a job run can 37 // consume resources before it is terminated and enters TIMEOUT status. The default 38 // is 2,880 minutes (48 hours). This overrides the timeout value set in the parent 39 // job. 40 Timeout *int32 41} 42 43// A list of errors that can occur when registering partition indexes for an 44// existing table. These errors give the details about why an index registration 45// failed and provide a limited number of partitions in the response, so that you 46// can fix the partitions at fault and try registering the index again. The most 47// common set of errors that can occur are categorized as follows: 48// 49// * 50// EncryptedPartitionError: The partitions are encrypted. 51// 52// * 53// InvalidPartitionTypeDataError: The partition value doesn't match the data type 54// for that partition column. 55// 56// * MissingPartitionValueError: The partitions are 57// encrypted. 58// 59// * UnsupportedPartitionCharacterError: Characters inside the 60// partition value are not supported. For example: U+0000 , U+0001, U+0002. 61// 62// * 63// InternalError: Any error which does not belong to other error codes. 64type BackfillError struct { 65 66 // The error code for an error that occurred when registering partition indexes for 67 // an existing table. 68 Code BackfillErrorCode 69 70 // A list of a limited number of partitions in the response. 71 Partitions []PartitionValueList 72} 73 74// Records an error that occurred when attempting to stop a specified job run. 75type BatchStopJobRunError struct { 76 77 // Specifies details about the error that was encountered. 78 ErrorDetail *ErrorDetail 79 80 // The name of the job definition that is used in the job run in question. 81 JobName *string 82 83 // The JobRunId of the job run in question. 84 JobRunId *string 85} 86 87// Records a successful request to stop a specified JobRun. 88type BatchStopJobRunSuccessfulSubmission struct { 89 90 // The name of the job definition used in the job run that was stopped. 91 JobName *string 92 93 // The JobRunId of the job run that was stopped. 94 JobRunId *string 95} 96 97// Contains information about a batch update partition error. 98type BatchUpdatePartitionFailureEntry struct { 99 100 // The details about the batch update partition error. 101 ErrorDetail *ErrorDetail 102 103 // A list of values defining the partitions. 104 PartitionValueList []string 105} 106 107// A structure that contains the values and structure used to update a partition. 108type BatchUpdatePartitionRequestEntry struct { 109 110 // The structure used to update a partition. 111 // 112 // This member is required. 113 PartitionInput *PartitionInput 114 115 // A list of values defining the partitions. 116 // 117 // This member is required. 118 PartitionValueList []string 119} 120 121// Defines column statistics supported for bit sequence data values. 122type BinaryColumnStatisticsData struct { 123 124 // The average bit sequence length in the column. 125 // 126 // This member is required. 127 AverageLength float64 128 129 // The size of the longest bit sequence in the column. 130 // 131 // This member is required. 132 MaximumLength int64 133 134 // The number of null values in the column. 135 // 136 // This member is required. 137 NumberOfNulls int64 138} 139 140// Defines column statistics supported for Boolean data columns. 141type BooleanColumnStatisticsData struct { 142 143 // The number of false values in the column. 144 // 145 // This member is required. 146 NumberOfFalses int64 147 148 // The number of null values in the column. 149 // 150 // This member is required. 151 NumberOfNulls int64 152 153 // The number of true values in the column. 154 // 155 // This member is required. 156 NumberOfTrues int64 157} 158 159// Specifies a table definition in the AWS Glue Data Catalog. 160type CatalogEntry struct { 161 162 // The database in which the table metadata resides. 163 // 164 // This member is required. 165 DatabaseName *string 166 167 // The name of the table in question. 168 // 169 // This member is required. 170 TableName *string 171} 172 173// A structure containing migration status information. 174type CatalogImportStatus struct { 175 176 // True if the migration has completed, or False otherwise. 177 ImportCompleted bool 178 179 // The time that the migration was started. 180 ImportTime *time.Time 181 182 // The name of the person who initiated the migration. 183 ImportedBy *string 184} 185 186// Specifies an AWS Glue Data Catalog target. 187type CatalogTarget struct { 188 189 // The name of the database to be synchronized. 190 // 191 // This member is required. 192 DatabaseName *string 193 194 // A list of the tables to be synchronized. 195 // 196 // This member is required. 197 Tables []string 198} 199 200// Classifiers are triggered during a crawl task. A classifier checks whether a 201// given file is in a format it can handle. If it is, the classifier creates a 202// schema in the form of a StructType object that matches that data format. You can 203// use the standard classifiers that AWS Glue provides, or you can write your own 204// classifiers to best categorize your data sources and specify the appropriate 205// schemas to use for them. A classifier can be a grok classifier, an XML 206// classifier, a JSON classifier, or a custom CSV classifier, as specified in one 207// of the fields in the Classifier object. 208type Classifier struct { 209 210 // A classifier for comma-separated values (CSV). 211 CsvClassifier *CsvClassifier 212 213 // A classifier that uses grok. 214 GrokClassifier *GrokClassifier 215 216 // A classifier for JSON content. 217 JsonClassifier *JsonClassifier 218 219 // A classifier for XML content. 220 XMLClassifier *XMLClassifier 221} 222 223// Specifies how Amazon CloudWatch data should be encrypted. 224type CloudWatchEncryption struct { 225 226 // The encryption mode to use for CloudWatch data. 227 CloudWatchEncryptionMode CloudWatchEncryptionMode 228 229 // The Amazon Resource Name (ARN) of the KMS key to be used to encrypt the data. 230 KmsKeyArn *string 231} 232 233// Represents a directional edge in a directed acyclic graph (DAG). 234type CodeGenEdge struct { 235 236 // The ID of the node at which the edge starts. 237 // 238 // This member is required. 239 Source *string 240 241 // The ID of the node at which the edge ends. 242 // 243 // This member is required. 244 Target *string 245 246 // The target of the edge. 247 TargetParameter *string 248} 249 250// Represents a node in a directed acyclic graph (DAG) 251type CodeGenNode struct { 252 253 // Properties of the node, in the form of name-value pairs. 254 // 255 // This member is required. 256 Args []CodeGenNodeArg 257 258 // A node identifier that is unique within the node's graph. 259 // 260 // This member is required. 261 Id *string 262 263 // The type of node that this is. 264 // 265 // This member is required. 266 NodeType *string 267 268 // The line number of the node. 269 LineNumber int32 270} 271 272// An argument or property of a node. 273type CodeGenNodeArg struct { 274 275 // The name of the argument or property. 276 // 277 // This member is required. 278 Name *string 279 280 // The value of the argument or property. 281 // 282 // This member is required. 283 Value *string 284 285 // True if the value is used as a parameter. 286 Param bool 287} 288 289// A column in a Table. 290type Column struct { 291 292 // The name of the Column. 293 // 294 // This member is required. 295 Name *string 296 297 // A free-form text comment. 298 Comment *string 299 300 // These key-value pairs define properties associated with the column. 301 Parameters map[string]string 302 303 // The data type of the Column. 304 Type *string 305} 306 307// Encapsulates a column name that failed and the reason for failure. 308type ColumnError struct { 309 310 // The name of the column that failed. 311 ColumnName *string 312 313 // An error message with the reason for the failure of an operation. 314 Error *ErrorDetail 315} 316 317// A structure containing the column name and column importance score for a column. 318// Column importance helps you understand how columns contribute to your model, by 319// identifying which columns in your records are more important than others. 320type ColumnImportance struct { 321 322 // The name of a column. 323 ColumnName *string 324 325 // The column importance score for the column, as a decimal. 326 Importance *float64 327} 328 329// Represents the generated column-level statistics for a table or partition. 330type ColumnStatistics struct { 331 332 // The timestamp of when column statistics were generated. 333 // 334 // This member is required. 335 AnalyzedTime *time.Time 336 337 // Name of column which statistics belong to. 338 // 339 // This member is required. 340 ColumnName *string 341 342 // The data type of the column. 343 // 344 // This member is required. 345 ColumnType *string 346 347 // A ColumnStatisticData object that contains the statistics data values. 348 // 349 // This member is required. 350 StatisticsData *ColumnStatisticsData 351} 352 353// Contains the individual types of column statistics data. Only one data object 354// should be set and indicated by the Type attribute. 355type ColumnStatisticsData struct { 356 357 // The type of column statistics data. 358 // 359 // This member is required. 360 Type ColumnStatisticsType 361 362 // Binary column statistics data. 363 BinaryColumnStatisticsData *BinaryColumnStatisticsData 364 365 // Boolean column statistics data. 366 BooleanColumnStatisticsData *BooleanColumnStatisticsData 367 368 // Date column statistics data. 369 DateColumnStatisticsData *DateColumnStatisticsData 370 371 // Decimal column statistics data. 372 DecimalColumnStatisticsData *DecimalColumnStatisticsData 373 374 // Double column statistics data. 375 DoubleColumnStatisticsData *DoubleColumnStatisticsData 376 377 // Long column statistics data. 378 LongColumnStatisticsData *LongColumnStatisticsData 379 380 // String column statistics data. 381 StringColumnStatisticsData *StringColumnStatisticsData 382} 383 384// Encapsulates a ColumnStatistics object that failed and the reason for failure. 385type ColumnStatisticsError struct { 386 387 // The ColumnStatistics of the column. 388 ColumnStatistics *ColumnStatistics 389 390 // An error message with the reason for the failure of an operation. 391 Error *ErrorDetail 392} 393 394// Defines a condition under which a trigger fires. 395type Condition struct { 396 397 // The state of the crawler to which this condition applies. 398 CrawlState CrawlState 399 400 // The name of the crawler to which this condition applies. 401 CrawlerName *string 402 403 // The name of the job whose JobRuns this condition applies to, and on which this 404 // trigger waits. 405 JobName *string 406 407 // A logical operator. 408 LogicalOperator LogicalOperator 409 410 // The condition state. Currently, the only job states that a trigger can listen 411 // for are SUCCEEDED, STOPPED, FAILED, and TIMEOUT. The only crawler states that a 412 // trigger can listen for are SUCCEEDED, FAILED, and CANCELLED. 413 State JobRunState 414} 415 416// The confusion matrix shows you what your transform is predicting accurately and 417// what types of errors it is making. For more information, see Confusion matrix 418// (https://en.wikipedia.org/wiki/Confusion_matrix) in Wikipedia. 419type ConfusionMatrix struct { 420 421 // The number of matches in the data that the transform didn't find, in the 422 // confusion matrix for your transform. 423 NumFalseNegatives *int64 424 425 // The number of nonmatches in the data that the transform incorrectly classified 426 // as a match, in the confusion matrix for your transform. 427 NumFalsePositives *int64 428 429 // The number of nonmatches in the data that the transform correctly rejected, in 430 // the confusion matrix for your transform. 431 NumTrueNegatives *int64 432 433 // The number of matches in the data that the transform correctly found, in the 434 // confusion matrix for your transform. 435 NumTruePositives *int64 436} 437 438// Defines a connection to a data source. 439type Connection struct { 440 441 // These key-value pairs define parameters for the connection: 442 // 443 // * HOST - The host 444 // URI: either the fully qualified domain name (FQDN) or the IPv4 address of the 445 // database host. 446 // 447 // * PORT - The port number, between 1024 and 65535, of the port on 448 // which the database host is listening for database connections. 449 // 450 // * USER_NAME - 451 // The name under which to log in to the database. The value string for USER_NAME 452 // is "USERNAME". 453 // 454 // * PASSWORD - A password, if one is used, for the user name. 455 // 456 // * 457 // ENCRYPTED_PASSWORD - When you enable connection password protection by setting 458 // ConnectionPasswordEncryption in the Data Catalog encryption settings, this field 459 // stores the encrypted password. 460 // 461 // * JDBC_DRIVER_JAR_URI - The Amazon Simple 462 // Storage Service (Amazon S3) path of the JAR file that contains the JDBC driver 463 // to use. 464 // 465 // * JDBC_DRIVER_CLASS_NAME - The class name of the JDBC driver to use. 466 // 467 // * 468 // JDBC_ENGINE - The name of the JDBC engine to use. 469 // 470 // * JDBC_ENGINE_VERSION - The 471 // version of the JDBC engine to use. 472 // 473 // * CONFIG_FILES - (Reserved for future 474 // use.) 475 // 476 // * INSTANCE_ID - The instance ID to use. 477 // 478 // * JDBC_CONNECTION_URL - The URL 479 // for connecting to a JDBC data source. 480 // 481 // * JDBC_ENFORCE_SSL - A Boolean string 482 // (true, false) specifying whether Secure Sockets Layer (SSL) with hostname 483 // matching is enforced for the JDBC connection on the client. The default is 484 // false. 485 // 486 // * CUSTOM_JDBC_CERT - An Amazon S3 location specifying the customer's 487 // root certificate. AWS Glue uses this root certificate to validate the customer’s 488 // certificate when connecting to the customer database. AWS Glue only handles 489 // X.509 certificates. The certificate provided must be DER-encoded and supplied in 490 // Base64 encoding PEM format. 491 // 492 // * SKIP_CUSTOM_JDBC_CERT_VALIDATION - By default, 493 // this is false. AWS Glue validates the Signature algorithm and Subject Public Key 494 // Algorithm for the customer certificate. The only permitted algorithms for the 495 // Signature algorithm are SHA256withRSA, SHA384withRSA or SHA512withRSA. For the 496 // Subject Public Key Algorithm, the key length must be at least 2048. You can set 497 // the value of this property to true to skip AWS Glue’s validation of the customer 498 // certificate. 499 // 500 // * CUSTOM_JDBC_CERT_STRING - A custom JDBC certificate string which 501 // is used for domain match or distinguished name match to prevent a 502 // man-in-the-middle attack. In Oracle database, this is used as the 503 // SSL_SERVER_CERT_DN; in Microsoft SQL Server, this is used as the 504 // hostNameInCertificate. 505 // 506 // * CONNECTION_URL - The URL for connecting to a general 507 // (non-JDBC) data source. 508 // 509 // * KAFKA_BOOTSTRAP_SERVERS - A comma-separated list of 510 // host and port pairs that are the addresses of the Apache Kafka brokers in a 511 // Kafka cluster to which a Kafka client will connect to and bootstrap itself. 512 // 513 // * 514 // KAFKA_SSL_ENABLED - Whether to enable or disable SSL on an Apache Kafka 515 // connection. Default value is "true". 516 // 517 // * KAFKA_CUSTOM_CERT - The Amazon S3 URL 518 // for the private CA cert file (.pem format). The default is an empty string. 519 // 520 // * 521 // KAFKA_SKIP_CUSTOM_CERT_VALIDATION - Whether to skip the validation of the CA 522 // cert file or not. AWS Glue validates for three algorithms: SHA256withRSA, 523 // SHA384withRSA and SHA512withRSA. Default value is "false". 524 // 525 // * SECRET_ID - The 526 // secret ID used for the secret manager of credentials. 527 // 528 // * CONNECTOR_URL - The 529 // connector URL for a MARKETPLACE or CUSTOM connection. 530 // 531 // * CONNECTOR_TYPE - The 532 // connector type for a MARKETPLACE or CUSTOM connection. 533 // 534 // * CONNECTOR_CLASS_NAME - 535 // The connector class name for a MARKETPLACE or CUSTOM connection. 536 ConnectionProperties map[string]string 537 538 // The type of the connection. Currently, SFTP is not supported. 539 ConnectionType ConnectionType 540 541 // The time that this connection definition was created. 542 CreationTime *time.Time 543 544 // The description of the connection. 545 Description *string 546 547 // The user, group, or role that last updated this connection definition. 548 LastUpdatedBy *string 549 550 // The last time that this connection definition was updated. 551 LastUpdatedTime *time.Time 552 553 // A list of criteria that can be used in selecting this connection. 554 MatchCriteria []string 555 556 // The name of the connection definition. 557 Name *string 558 559 // A map of physical connection requirements, such as virtual private cloud (VPC) 560 // and SecurityGroup, that are needed to make this connection successfully. 561 PhysicalConnectionRequirements *PhysicalConnectionRequirements 562} 563 564// A structure that is used to specify a connection to create or update. 565type ConnectionInput struct { 566 567 // These key-value pairs define parameters for the connection. 568 // 569 // This member is required. 570 ConnectionProperties map[string]string 571 572 // The type of the connection. Currently, these types are supported: 573 // 574 // * JDBC - 575 // Designates a connection to a database through Java Database Connectivity 576 // (JDBC). 577 // 578 // * KAFKA - Designates a connection to an Apache Kafka streaming 579 // platform. 580 // 581 // * MONGODB - Designates a connection to a MongoDB document 582 // database. 583 // 584 // * NETWORK - Designates a network connection to a data source within 585 // an Amazon Virtual Private Cloud environment (Amazon VPC). 586 // 587 // * MARKETPLACE - Uses 588 // configuration settings contained in a connector purchased from AWS Marketplace 589 // to read from and write to data stores that are not natively supported by AWS 590 // Glue. 591 // 592 // * CUSTOM - Uses configuration settings contained in a custom connector to 593 // read from and write to data stores that are not natively supported by AWS 594 // Glue. 595 // 596 // SFTP is not supported. 597 // 598 // This member is required. 599 ConnectionType ConnectionType 600 601 // The name of the connection. 602 // 603 // This member is required. 604 Name *string 605 606 // The description of the connection. 607 Description *string 608 609 // A list of criteria that can be used in selecting this connection. 610 MatchCriteria []string 611 612 // A map of physical connection requirements, such as virtual private cloud (VPC) 613 // and SecurityGroup, that are needed to successfully make this connection. 614 PhysicalConnectionRequirements *PhysicalConnectionRequirements 615} 616 617// The data structure used by the Data Catalog to encrypt the password as part of 618// CreateConnection or UpdateConnection and store it in the ENCRYPTED_PASSWORD 619// field in the connection properties. You can enable catalog encryption or only 620// password encryption. When a CreationConnection request arrives containing a 621// password, the Data Catalog first encrypts the password using your AWS KMS key. 622// It then encrypts the whole connection object again if catalog encryption is also 623// enabled. This encryption requires that you set AWS KMS key permissions to enable 624// or restrict access on the password key according to your security requirements. 625// For example, you might want only administrators to have decrypt permission on 626// the password key. 627type ConnectionPasswordEncryption struct { 628 629 // When the ReturnConnectionPasswordEncrypted flag is set to "true", passwords 630 // remain encrypted in the responses of GetConnection and GetConnections. This 631 // encryption takes effect independently from catalog encryption. 632 // 633 // This member is required. 634 ReturnConnectionPasswordEncrypted bool 635 636 // An AWS KMS key that is used to encrypt the connection password. If connection 637 // password protection is enabled, the caller of CreateConnection and 638 // UpdateConnection needs at least kms:Encrypt permission on the specified AWS KMS 639 // key, to encrypt passwords before storing them in the Data Catalog. You can set 640 // the decrypt permission to enable or restrict access on the password key 641 // according to your security requirements. 642 AwsKmsKeyId *string 643} 644 645// Specifies the connections used by a job. 646type ConnectionsList struct { 647 648 // A list of connections used by the job. 649 Connections []string 650} 651 652// The details of a crawl in the workflow. 653type Crawl struct { 654 655 // The date and time on which the crawl completed. 656 CompletedOn *time.Time 657 658 // The error message associated with the crawl. 659 ErrorMessage *string 660 661 // The log group associated with the crawl. 662 LogGroup *string 663 664 // The log stream associated with the crawl. 665 LogStream *string 666 667 // The date and time on which the crawl started. 668 StartedOn *time.Time 669 670 // The state of the crawler. 671 State CrawlState 672} 673 674// Specifies a crawler program that examines a data source and uses classifiers to 675// try to determine its schema. If successful, the crawler records metadata 676// concerning the data source in the AWS Glue Data Catalog. 677type Crawler struct { 678 679 // A list of UTF-8 strings that specify the custom classifiers that are associated 680 // with the crawler. 681 Classifiers []string 682 683 // Crawler configuration information. This versioned JSON string allows users to 684 // specify aspects of a crawler's behavior. For more information, see Configuring a 685 // Crawler (https://docs.aws.amazon.com/glue/latest/dg/crawler-configuration.html). 686 Configuration *string 687 688 // If the crawler is running, contains the total time elapsed since the last crawl 689 // began. 690 CrawlElapsedTime int64 691 692 // The name of the SecurityConfiguration structure to be used by this crawler. 693 CrawlerSecurityConfiguration *string 694 695 // The time that the crawler was created. 696 CreationTime *time.Time 697 698 // The name of the database in which the crawler's output is stored. 699 DatabaseName *string 700 701 // A description of the crawler. 702 Description *string 703 704 // The status of the last crawl, and potentially error information if an error 705 // occurred. 706 LastCrawl *LastCrawlInfo 707 708 // The time that the crawler was last updated. 709 LastUpdated *time.Time 710 711 // A configuration that specifies whether data lineage is enabled for the crawler. 712 LineageConfiguration *LineageConfiguration 713 714 // The name of the crawler. 715 Name *string 716 717 // A policy that specifies whether to crawl the entire dataset again, or to crawl 718 // only folders that were added since the last crawler run. 719 RecrawlPolicy *RecrawlPolicy 720 721 // The Amazon Resource Name (ARN) of an IAM role that's used to access customer 722 // resources, such as Amazon Simple Storage Service (Amazon S3) data. 723 Role *string 724 725 // For scheduled crawlers, the schedule when the crawler runs. 726 Schedule *Schedule 727 728 // The policy that specifies update and delete behaviors for the crawler. 729 SchemaChangePolicy *SchemaChangePolicy 730 731 // Indicates whether the crawler is running, or whether a run is pending. 732 State CrawlerState 733 734 // The prefix added to the names of tables that are created. 735 TablePrefix *string 736 737 // A collection of targets to crawl. 738 Targets *CrawlerTargets 739 740 // The version of the crawler. 741 Version int64 742} 743 744// Metrics for a specified crawler. 745type CrawlerMetrics struct { 746 747 // The name of the crawler. 748 CrawlerName *string 749 750 // The duration of the crawler's most recent run, in seconds. 751 LastRuntimeSeconds float64 752 753 // The median duration of this crawler's runs, in seconds. 754 MedianRuntimeSeconds float64 755 756 // True if the crawler is still estimating how long it will take to complete this 757 // run. 758 StillEstimating bool 759 760 // The number of tables created by this crawler. 761 TablesCreated int32 762 763 // The number of tables deleted by this crawler. 764 TablesDeleted int32 765 766 // The number of tables updated by this crawler. 767 TablesUpdated int32 768 769 // The estimated time left to complete a running crawl. 770 TimeLeftSeconds float64 771} 772 773// The details of a Crawler node present in the workflow. 774type CrawlerNodeDetails struct { 775 776 // A list of crawls represented by the crawl node. 777 Crawls []Crawl 778} 779 780// Specifies data stores to crawl. 781type CrawlerTargets struct { 782 783 // Specifies AWS Glue Data Catalog targets. 784 CatalogTargets []CatalogTarget 785 786 // Specifies Amazon DynamoDB targets. 787 DynamoDBTargets []DynamoDBTarget 788 789 // Specifies JDBC targets. 790 JdbcTargets []JdbcTarget 791 792 // Specifies Amazon DocumentDB or MongoDB targets. 793 MongoDBTargets []MongoDBTarget 794 795 // Specifies Amazon Simple Storage Service (Amazon S3) targets. 796 S3Targets []S3Target 797} 798 799// Specifies a custom CSV classifier for CreateClassifier to create. 800type CreateCsvClassifierRequest struct { 801 802 // The name of the classifier. 803 // 804 // This member is required. 805 Name *string 806 807 // Enables the processing of files that contain only one column. 808 AllowSingleColumn *bool 809 810 // Indicates whether the CSV file contains a header. 811 ContainsHeader CsvHeaderOption 812 813 // A custom symbol to denote what separates each column entry in the row. 814 Delimiter *string 815 816 // Specifies not to trim values before identifying the type of column values. The 817 // default value is true. 818 DisableValueTrimming *bool 819 820 // A list of strings representing column names. 821 Header []string 822 823 // A custom symbol to denote what combines content into a single column value. Must 824 // be different from the column delimiter. 825 QuoteSymbol *string 826} 827 828// Specifies a grok classifier for CreateClassifier to create. 829type CreateGrokClassifierRequest struct { 830 831 // An identifier of the data format that the classifier matches, such as Twitter, 832 // JSON, Omniture logs, Amazon CloudWatch Logs, and so on. 833 // 834 // This member is required. 835 Classification *string 836 837 // The grok pattern used by this classifier. 838 // 839 // This member is required. 840 GrokPattern *string 841 842 // The name of the new classifier. 843 // 844 // This member is required. 845 Name *string 846 847 // Optional custom grok patterns used by this classifier. 848 CustomPatterns *string 849} 850 851// Specifies a JSON classifier for CreateClassifier to create. 852type CreateJsonClassifierRequest struct { 853 854 // A JsonPath string defining the JSON data for the classifier to classify. AWS 855 // Glue supports a subset of JsonPath, as described in Writing JsonPath Custom 856 // Classifiers 857 // (https://docs.aws.amazon.com/glue/latest/dg/custom-classifier.html#custom-classifier-json). 858 // 859 // This member is required. 860 JsonPath *string 861 862 // The name of the classifier. 863 // 864 // This member is required. 865 Name *string 866} 867 868// Specifies an XML classifier for CreateClassifier to create. 869type CreateXMLClassifierRequest struct { 870 871 // An identifier of the data format that the classifier matches. 872 // 873 // This member is required. 874 Classification *string 875 876 // The name of the classifier. 877 // 878 // This member is required. 879 Name *string 880 881 // The XML tag designating the element that contains each record in an XML document 882 // being parsed. This can't identify a self-closing element (closed by />). An 883 // empty row element that contains only attributes can be parsed as long as it ends 884 // with a closing tag (for example, is okay, but is not). 885 RowTag *string 886} 887 888// A classifier for custom CSV content. 889type CsvClassifier struct { 890 891 // The name of the classifier. 892 // 893 // This member is required. 894 Name *string 895 896 // Enables the processing of files that contain only one column. 897 AllowSingleColumn *bool 898 899 // Indicates whether the CSV file contains a header. 900 ContainsHeader CsvHeaderOption 901 902 // The time that this classifier was registered. 903 CreationTime *time.Time 904 905 // A custom symbol to denote what separates each column entry in the row. 906 Delimiter *string 907 908 // Specifies not to trim values before identifying the type of column values. The 909 // default value is true. 910 DisableValueTrimming *bool 911 912 // A list of strings representing column names. 913 Header []string 914 915 // The time that this classifier was last updated. 916 LastUpdated *time.Time 917 918 // A custom symbol to denote what combines content into a single column value. It 919 // must be different from the column delimiter. 920 QuoteSymbol *string 921 922 // The version of this classifier. 923 Version int64 924} 925 926// The Database object represents a logical grouping of tables that might reside in 927// a Hive metastore or an RDBMS. 928type Database struct { 929 930 // The name of the database. For Hive compatibility, this is folded to lowercase 931 // when it is stored. 932 // 933 // This member is required. 934 Name *string 935 936 // The ID of the Data Catalog in which the database resides. 937 CatalogId *string 938 939 // Creates a set of default permissions on the table for principals. 940 CreateTableDefaultPermissions []PrincipalPermissions 941 942 // The time at which the metadata database was created in the catalog. 943 CreateTime *time.Time 944 945 // A description of the database. 946 Description *string 947 948 // The location of the database (for example, an HDFS path). 949 LocationUri *string 950 951 // These key-value pairs define parameters and properties of the database. 952 Parameters map[string]string 953 954 // A DatabaseIdentifier structure that describes a target database for resource 955 // linking. 956 TargetDatabase *DatabaseIdentifier 957} 958 959// A structure that describes a target database for resource linking. 960type DatabaseIdentifier struct { 961 962 // The ID of the Data Catalog in which the database resides. 963 CatalogId *string 964 965 // The name of the catalog database. 966 DatabaseName *string 967} 968 969// The structure used to create or update a database. 970type DatabaseInput struct { 971 972 // The name of the database. For Hive compatibility, this is folded to lowercase 973 // when it is stored. 974 // 975 // This member is required. 976 Name *string 977 978 // Creates a set of default permissions on the table for principals. 979 CreateTableDefaultPermissions []PrincipalPermissions 980 981 // A description of the database. 982 Description *string 983 984 // The location of the database (for example, an HDFS path). 985 LocationUri *string 986 987 // These key-value pairs define parameters and properties of the database. These 988 // key-value pairs define parameters and properties of the database. 989 Parameters map[string]string 990 991 // A DatabaseIdentifier structure that describes a target database for resource 992 // linking. 993 TargetDatabase *DatabaseIdentifier 994} 995 996// Contains configuration information for maintaining Data Catalog security. 997type DataCatalogEncryptionSettings struct { 998 999 // When connection password protection is enabled, the Data Catalog uses a 1000 // customer-provided key to encrypt the password as part of CreateConnection or 1001 // UpdateConnection and store it in the ENCRYPTED_PASSWORD field in the connection 1002 // properties. You can enable catalog encryption or only password encryption. 1003 ConnectionPasswordEncryption *ConnectionPasswordEncryption 1004 1005 // Specifies the encryption-at-rest configuration for the Data Catalog. 1006 EncryptionAtRest *EncryptionAtRest 1007} 1008 1009// The AWS Lake Formation principal. 1010type DataLakePrincipal struct { 1011 1012 // An identifier for the AWS Lake Formation principal. 1013 DataLakePrincipalIdentifier *string 1014} 1015 1016// Defines column statistics supported for timestamp data columns. 1017type DateColumnStatisticsData struct { 1018 1019 // The number of distinct values in a column. 1020 // 1021 // This member is required. 1022 NumberOfDistinctValues int64 1023 1024 // The number of null values in the column. 1025 // 1026 // This member is required. 1027 NumberOfNulls int64 1028 1029 // The highest value in the column. 1030 MaximumValue *time.Time 1031 1032 // The lowest value in the column. 1033 MinimumValue *time.Time 1034} 1035 1036// Defines column statistics supported for fixed-point number data columns. 1037type DecimalColumnStatisticsData struct { 1038 1039 // The number of distinct values in a column. 1040 // 1041 // This member is required. 1042 NumberOfDistinctValues int64 1043 1044 // The number of null values in the column. 1045 // 1046 // This member is required. 1047 NumberOfNulls int64 1048 1049 // The highest value in the column. 1050 MaximumValue *DecimalNumber 1051 1052 // The lowest value in the column. 1053 MinimumValue *DecimalNumber 1054} 1055 1056// Contains a numeric value in decimal format. 1057type DecimalNumber struct { 1058 1059 // The scale that determines where the decimal point falls in the unscaled value. 1060 // 1061 // This member is required. 1062 Scale int32 1063 1064 // The unscaled numeric value. 1065 // 1066 // This member is required. 1067 UnscaledValue []byte 1068} 1069 1070// A development endpoint where a developer can remotely debug extract, transform, 1071// and load (ETL) scripts. 1072type DevEndpoint struct { 1073 1074 // A map of arguments used to configure the DevEndpoint. Valid arguments are: 1075 // 1076 // * 1077 // "--enable-glue-datacatalog": "" 1078 // 1079 // * "GLUE_PYTHON_VERSION": "3" 1080 // 1081 // * 1082 // "GLUE_PYTHON_VERSION": "2" 1083 // 1084 // You can specify a version of Python support for 1085 // development endpoints by using the Arguments parameter in the CreateDevEndpoint 1086 // or UpdateDevEndpoint APIs. If no arguments are provided, the version defaults to 1087 // Python 2. 1088 Arguments map[string]string 1089 1090 // The AWS Availability Zone where this DevEndpoint is located. 1091 AvailabilityZone *string 1092 1093 // The point in time at which this DevEndpoint was created. 1094 CreatedTimestamp *time.Time 1095 1096 // The name of the DevEndpoint. 1097 EndpointName *string 1098 1099 // The path to one or more Java .jar files in an S3 bucket that should be loaded in 1100 // your DevEndpoint. You can only use pure Java/Scala libraries with a DevEndpoint. 1101 ExtraJarsS3Path *string 1102 1103 // The paths to one or more Python libraries in an Amazon S3 bucket that should be 1104 // loaded in your DevEndpoint. Multiple values must be complete paths separated by 1105 // a comma. You can only use pure Python libraries with a DevEndpoint. Libraries 1106 // that rely on C extensions, such as the pandas (http://pandas.pydata.org/) Python 1107 // data analysis library, are not currently supported. 1108 ExtraPythonLibsS3Path *string 1109 1110 // The reason for a current failure in this DevEndpoint. 1111 FailureReason *string 1112 1113 // Glue version determines the versions of Apache Spark and Python that AWS Glue 1114 // supports. The Python version indicates the version supported for running your 1115 // ETL scripts on development endpoints. For more information about the available 1116 // AWS Glue versions and corresponding Spark and Python versions, see Glue version 1117 // (https://docs.aws.amazon.com/glue/latest/dg/add-job.html) in the developer 1118 // guide. Development endpoints that are created without specifying a Glue version 1119 // default to Glue 0.9. You can specify a version of Python support for development 1120 // endpoints by using the Arguments parameter in the CreateDevEndpoint or 1121 // UpdateDevEndpoint APIs. If no arguments are provided, the version defaults to 1122 // Python 2. 1123 GlueVersion *string 1124 1125 // The point in time at which this DevEndpoint was last modified. 1126 LastModifiedTimestamp *time.Time 1127 1128 // The status of the last update. 1129 LastUpdateStatus *string 1130 1131 // The number of AWS Glue Data Processing Units (DPUs) allocated to this 1132 // DevEndpoint. 1133 NumberOfNodes int32 1134 1135 // The number of workers of a defined workerType that are allocated to the 1136 // development endpoint. The maximum number of workers you can define are 299 for 1137 // G.1X, and 149 for G.2X. 1138 NumberOfWorkers *int32 1139 1140 // A private IP address to access the DevEndpoint within a VPC if the DevEndpoint 1141 // is created within one. The PrivateAddress field is present only when you create 1142 // the DevEndpoint within your VPC. 1143 PrivateAddress *string 1144 1145 // The public IP address used by this DevEndpoint. The PublicAddress field is 1146 // present only when you create a non-virtual private cloud (VPC) DevEndpoint. 1147 PublicAddress *string 1148 1149 // The public key to be used by this DevEndpoint for authentication. This attribute 1150 // is provided for backward compatibility because the recommended attribute to use 1151 // is public keys. 1152 PublicKey *string 1153 1154 // A list of public keys to be used by the DevEndpoints for authentication. Using 1155 // this attribute is preferred over a single public key because the public keys 1156 // allow you to have a different private key per client. If you previously created 1157 // an endpoint with a public key, you must remove that key to be able to set a list 1158 // of public keys. Call the UpdateDevEndpoint API operation with the public key 1159 // content in the deletePublicKeys attribute, and the list of new keys in the 1160 // addPublicKeys attribute. 1161 PublicKeys []string 1162 1163 // The Amazon Resource Name (ARN) of the IAM role used in this DevEndpoint. 1164 RoleArn *string 1165 1166 // The name of the SecurityConfiguration structure to be used with this 1167 // DevEndpoint. 1168 SecurityConfiguration *string 1169 1170 // A list of security group identifiers used in this DevEndpoint. 1171 SecurityGroupIds []string 1172 1173 // The current status of this DevEndpoint. 1174 Status *string 1175 1176 // The subnet ID for this DevEndpoint. 1177 SubnetId *string 1178 1179 // The ID of the virtual private cloud (VPC) used by this DevEndpoint. 1180 VpcId *string 1181 1182 // The type of predefined worker that is allocated to the development endpoint. 1183 // Accepts a value of Standard, G.1X, or G.2X. 1184 // 1185 // * For the Standard worker type, 1186 // each worker provides 4 vCPU, 16 GB of memory and a 50GB disk, and 2 executors 1187 // per worker. 1188 // 1189 // * For the G.1X worker type, each worker maps to 1 DPU (4 vCPU, 16 1190 // GB of memory, 64 GB disk), and provides 1 executor per worker. We recommend this 1191 // worker type for memory-intensive jobs. 1192 // 1193 // * For the G.2X worker type, each worker 1194 // maps to 2 DPU (8 vCPU, 32 GB of memory, 128 GB disk), and provides 1 executor 1195 // per worker. We recommend this worker type for memory-intensive jobs. 1196 // 1197 // Known 1198 // issue: when a development endpoint is created with the G.2XWorkerType 1199 // configuration, the Spark drivers for the development endpoint will run on 4 1200 // vCPU, 16 GB of memory, and a 64 GB disk. 1201 WorkerType WorkerType 1202 1203 // The YARN endpoint address used by this DevEndpoint. 1204 YarnEndpointAddress *string 1205 1206 // The Apache Zeppelin port for the remote Apache Spark interpreter. 1207 ZeppelinRemoteSparkInterpreterPort int32 1208} 1209 1210// Custom libraries to be loaded into a development endpoint. 1211type DevEndpointCustomLibraries struct { 1212 1213 // The path to one or more Java .jar files in an S3 bucket that should be loaded in 1214 // your DevEndpoint. You can only use pure Java/Scala libraries with a DevEndpoint. 1215 ExtraJarsS3Path *string 1216 1217 // The paths to one or more Python libraries in an Amazon Simple Storage Service 1218 // (Amazon S3) bucket that should be loaded in your DevEndpoint. Multiple values 1219 // must be complete paths separated by a comma. You can only use pure Python 1220 // libraries with a DevEndpoint. Libraries that rely on C extensions, such as the 1221 // pandas (http://pandas.pydata.org/) Python data analysis library, are not 1222 // currently supported. 1223 ExtraPythonLibsS3Path *string 1224} 1225 1226// Defines column statistics supported for floating-point number data columns. 1227type DoubleColumnStatisticsData struct { 1228 1229 // The number of distinct values in a column. 1230 // 1231 // This member is required. 1232 NumberOfDistinctValues int64 1233 1234 // The number of null values in the column. 1235 // 1236 // This member is required. 1237 NumberOfNulls int64 1238 1239 // The highest value in the column. 1240 MaximumValue float64 1241 1242 // The lowest value in the column. 1243 MinimumValue float64 1244} 1245 1246// Specifies an Amazon DynamoDB table to crawl. 1247type DynamoDBTarget struct { 1248 1249 // The name of the DynamoDB table to crawl. 1250 Path *string 1251 1252 // Indicates whether to scan all the records, or to sample rows from the table. 1253 // Scanning all the records can take a long time when the table is not a high 1254 // throughput table. A value of true means to scan all records, while a value of 1255 // false means to sample the records. If no value is specified, the value defaults 1256 // to true. 1257 ScanAll *bool 1258 1259 // The percentage of the configured read capacity units to use by the AWS Glue 1260 // crawler. Read capacity units is a term defined by DynamoDB, and is a numeric 1261 // value that acts as rate limiter for the number of reads that can be performed on 1262 // that table per second. The valid values are null or a value between 0.1 to 1.5. 1263 // A null value is used when user does not provide a value, and defaults to 0.5 of 1264 // the configured Read Capacity Unit (for provisioned tables), or 0.25 of the max 1265 // configured Read Capacity Unit (for tables using on-demand mode). 1266 ScanRate *float64 1267} 1268 1269// An edge represents a directed connection between two AWS Glue components that 1270// are part of the workflow the edge belongs to. 1271type Edge struct { 1272 1273 // The unique of the node within the workflow where the edge ends. 1274 DestinationId *string 1275 1276 // The unique of the node within the workflow where the edge starts. 1277 SourceId *string 1278} 1279 1280// Specifies the encryption-at-rest configuration for the Data Catalog. 1281type EncryptionAtRest struct { 1282 1283 // The encryption-at-rest mode for encrypting Data Catalog data. 1284 // 1285 // This member is required. 1286 CatalogEncryptionMode CatalogEncryptionMode 1287 1288 // The ID of the AWS KMS key to use for encryption at rest. 1289 SseAwsKmsKeyId *string 1290} 1291 1292// Specifies an encryption configuration. 1293type EncryptionConfiguration struct { 1294 1295 // The encryption configuration for Amazon CloudWatch. 1296 CloudWatchEncryption *CloudWatchEncryption 1297 1298 // The encryption configuration for job bookmarks. 1299 JobBookmarksEncryption *JobBookmarksEncryption 1300 1301 // The encryption configuration for Amazon Simple Storage Service (Amazon S3) data. 1302 S3Encryption []S3Encryption 1303} 1304 1305// Contains details about an error. 1306type ErrorDetail struct { 1307 1308 // The code associated with this error. 1309 ErrorCode *string 1310 1311 // A message describing the error. 1312 ErrorMessage *string 1313} 1314 1315// An object containing error details. 1316type ErrorDetails struct { 1317 1318 // The error code for an error. 1319 ErrorCode *string 1320 1321 // The error message for an error. 1322 ErrorMessage *string 1323} 1324 1325// Evaluation metrics provide an estimate of the quality of your machine learning 1326// transform. 1327type EvaluationMetrics struct { 1328 1329 // The type of machine learning transform. 1330 // 1331 // This member is required. 1332 TransformType TransformType 1333 1334 // The evaluation metrics for the find matches algorithm. 1335 FindMatchesMetrics *FindMatchesMetrics 1336} 1337 1338// An execution property of a job. 1339type ExecutionProperty struct { 1340 1341 // The maximum number of concurrent runs allowed for the job. The default is 1. An 1342 // error is returned when this threshold is reached. The maximum value you can 1343 // specify is controlled by a service limit. 1344 MaxConcurrentRuns int32 1345} 1346 1347// Specifies configuration properties for an exporting labels task run. 1348type ExportLabelsTaskRunProperties struct { 1349 1350 // The Amazon Simple Storage Service (Amazon S3) path where you will export the 1351 // labels. 1352 OutputS3Path *string 1353} 1354 1355// The evaluation metrics for the find matches algorithm. The quality of your 1356// machine learning transform is measured by getting your transform to predict some 1357// matches and comparing the results to known matches from the same dataset. The 1358// quality metrics are based on a subset of your data, so they are not precise. 1359type FindMatchesMetrics struct { 1360 1361 // The area under the precision/recall curve (AUPRC) is a single number measuring 1362 // the overall quality of the transform, that is independent of the choice made for 1363 // precision vs. recall. Higher values indicate that you have a more attractive 1364 // precision vs. recall tradeoff. For more information, see Precision and recall 1365 // (https://en.wikipedia.org/wiki/Precision_and_recall) in Wikipedia. 1366 AreaUnderPRCurve *float64 1367 1368 // A list of ColumnImportance structures containing column importance metrics, 1369 // sorted in order of descending importance. 1370 ColumnImportances []ColumnImportance 1371 1372 // The confusion matrix shows you what your transform is predicting accurately and 1373 // what types of errors it is making. For more information, see Confusion matrix 1374 // (https://en.wikipedia.org/wiki/Confusion_matrix) in Wikipedia. 1375 ConfusionMatrix *ConfusionMatrix 1376 1377 // The maximum F1 metric indicates the transform's accuracy between 0 and 1, where 1378 // 1 is the best accuracy. For more information, see F1 score 1379 // (https://en.wikipedia.org/wiki/F1_score) in Wikipedia. 1380 F1 *float64 1381 1382 // The precision metric indicates when often your transform is correct when it 1383 // predicts a match. Specifically, it measures how well the transform finds true 1384 // positives from the total true positives possible. For more information, see 1385 // Precision and recall (https://en.wikipedia.org/wiki/Precision_and_recall) in 1386 // Wikipedia. 1387 Precision *float64 1388 1389 // The recall metric indicates that for an actual match, how often your transform 1390 // predicts the match. Specifically, it measures how well the transform finds true 1391 // positives from the total records in the source data. For more information, see 1392 // Precision and recall (https://en.wikipedia.org/wiki/Precision_and_recall) in 1393 // Wikipedia. 1394 Recall *float64 1395} 1396 1397// The parameters to configure the find matches transform. 1398type FindMatchesParameters struct { 1399 1400 // The value that is selected when tuning your transform for a balance between 1401 // accuracy and cost. A value of 0.5 means that the system balances accuracy and 1402 // cost concerns. A value of 1.0 means a bias purely for accuracy, which typically 1403 // results in a higher cost, sometimes substantially higher. A value of 0.0 means a 1404 // bias purely for cost, which results in a less accurate FindMatches transform, 1405 // sometimes with unacceptable accuracy. Accuracy measures how well the transform 1406 // finds true positives and true negatives. Increasing accuracy requires more 1407 // machine resources and cost. But it also results in increased recall. Cost 1408 // measures how many compute resources, and thus money, are consumed to run the 1409 // transform. 1410 AccuracyCostTradeoff *float64 1411 1412 // The value to switch on or off to force the output to match the provided labels 1413 // from users. If the value is True, the find matches transform forces the output 1414 // to match the provided labels. The results override the normal conflation 1415 // results. If the value is False, the find matches transform does not ensure all 1416 // the labels provided are respected, and the results rely on the trained model. 1417 // Note that setting this value to true may increase the conflation execution time. 1418 EnforceProvidedLabels *bool 1419 1420 // The value selected when tuning your transform for a balance between precision 1421 // and recall. A value of 0.5 means no preference; a value of 1.0 means a bias 1422 // purely for precision, and a value of 0.0 means a bias for recall. Because this 1423 // is a tradeoff, choosing values close to 1.0 means very low recall, and choosing 1424 // values close to 0.0 results in very low precision. The precision metric 1425 // indicates how often your model is correct when it predicts a match. The recall 1426 // metric indicates that for an actual match, how often your model predicts the 1427 // match. 1428 PrecisionRecallTradeoff *float64 1429 1430 // The name of a column that uniquely identifies rows in the source table. Used to 1431 // help identify matching records. 1432 PrimaryKeyColumnName *string 1433} 1434 1435// Specifies configuration properties for a Find Matches task run. 1436type FindMatchesTaskRunProperties struct { 1437 1438 // The job ID for the Find Matches task run. 1439 JobId *string 1440 1441 // The name assigned to the job for the Find Matches task run. 1442 JobName *string 1443 1444 // The job run ID for the Find Matches task run. 1445 JobRunId *string 1446} 1447 1448// Filters the connection definitions that are returned by the GetConnections API 1449// operation. 1450type GetConnectionsFilter struct { 1451 1452 // The type of connections to return. Currently, SFTP is not supported. 1453 ConnectionType ConnectionType 1454 1455 // A criteria string that must match the criteria recorded in the connection 1456 // definition for that connection definition to be returned. 1457 MatchCriteria []string 1458} 1459 1460// A structure for returning a resource policy. 1461type GluePolicy struct { 1462 1463 // The date and time at which the policy was created. 1464 CreateTime *time.Time 1465 1466 // Contains the hash value associated with this policy. 1467 PolicyHash *string 1468 1469 // Contains the requested policy document, in JSON format. 1470 PolicyInJson *string 1471 1472 // The date and time at which the policy was last updated. 1473 UpdateTime *time.Time 1474} 1475 1476// The database and table in the AWS Glue Data Catalog that is used for input or 1477// output data. 1478type GlueTable struct { 1479 1480 // A database name in the AWS Glue Data Catalog. 1481 // 1482 // This member is required. 1483 DatabaseName *string 1484 1485 // A table name in the AWS Glue Data Catalog. 1486 // 1487 // This member is required. 1488 TableName *string 1489 1490 // A unique identifier for the AWS Glue Data Catalog. 1491 CatalogId *string 1492 1493 // The name of the connection to the AWS Glue Data Catalog. 1494 ConnectionName *string 1495} 1496 1497// A classifier that uses grok patterns. 1498type GrokClassifier struct { 1499 1500 // An identifier of the data format that the classifier matches, such as Twitter, 1501 // JSON, Omniture logs, and so on. 1502 // 1503 // This member is required. 1504 Classification *string 1505 1506 // The grok pattern applied to a data store by this classifier. For more 1507 // information, see built-in patterns in Writing Custom Classifiers 1508 // (https://docs.aws.amazon.com/glue/latest/dg/custom-classifier.html). 1509 // 1510 // This member is required. 1511 GrokPattern *string 1512 1513 // The name of the classifier. 1514 // 1515 // This member is required. 1516 Name *string 1517 1518 // The time that this classifier was registered. 1519 CreationTime *time.Time 1520 1521 // Optional custom grok patterns defined by this classifier. For more information, 1522 // see custom patterns in Writing Custom Classifiers 1523 // (https://docs.aws.amazon.com/glue/latest/dg/custom-classifier.html). 1524 CustomPatterns *string 1525 1526 // The time that this classifier was last updated. 1527 LastUpdated *time.Time 1528 1529 // The version of this classifier. 1530 Version int64 1531} 1532 1533// Specifies configuration properties for an importing labels task run. 1534type ImportLabelsTaskRunProperties struct { 1535 1536 // The Amazon Simple Storage Service (Amazon S3) path from where you will import 1537 // the labels. 1538 InputS3Path *string 1539 1540 // Indicates whether to overwrite your existing labels. 1541 Replace bool 1542} 1543 1544// Specifies a JDBC data store to crawl. 1545type JdbcTarget struct { 1546 1547 // The name of the connection to use to connect to the JDBC target. 1548 ConnectionName *string 1549 1550 // A list of glob patterns used to exclude from the crawl. For more information, 1551 // see Catalog Tables with a Crawler 1552 // (https://docs.aws.amazon.com/glue/latest/dg/add-crawler.html). 1553 Exclusions []string 1554 1555 // The path of the JDBC target. 1556 Path *string 1557} 1558 1559// Specifies a job definition. 1560type Job struct { 1561 1562 // This field is deprecated. Use MaxCapacity instead. The number of AWS Glue data 1563 // processing units (DPUs) allocated to runs of this job. You can allocate from 2 1564 // to 100 DPUs; the default is 10. A DPU is a relative measure of processing power 1565 // that consists of 4 vCPUs of compute capacity and 16 GB of memory. For more 1566 // information, see the AWS Glue pricing page 1567 // (https://aws.amazon.com/glue/pricing/). 1568 // 1569 // Deprecated: This property is deprecated, use MaxCapacity instead. 1570 AllocatedCapacity int32 1571 1572 // The JobCommand that executes this job. 1573 Command *JobCommand 1574 1575 // The connections used for this job. 1576 Connections *ConnectionsList 1577 1578 // The time and date that this job definition was created. 1579 CreatedOn *time.Time 1580 1581 // The default arguments for this job, specified as name-value pairs. You can 1582 // specify arguments here that your own job-execution script consumes, as well as 1583 // arguments that AWS Glue itself consumes. For information about how to specify 1584 // and consume your own Job arguments, see the Calling AWS Glue APIs in Python 1585 // (https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-python-calling.html) 1586 // topic in the developer guide. For information about the key-value pairs that AWS 1587 // Glue consumes to set up your job, see the Special Parameters Used by AWS Glue 1588 // (https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html) 1589 // topic in the developer guide. 1590 DefaultArguments map[string]string 1591 1592 // A description of the job. 1593 Description *string 1594 1595 // An ExecutionProperty specifying the maximum number of concurrent runs allowed 1596 // for this job. 1597 ExecutionProperty *ExecutionProperty 1598 1599 // Glue version determines the versions of Apache Spark and Python that AWS Glue 1600 // supports. The Python version indicates the version supported for jobs of type 1601 // Spark. For more information about the available AWS Glue versions and 1602 // corresponding Spark and Python versions, see Glue version 1603 // (https://docs.aws.amazon.com/glue/latest/dg/add-job.html) in the developer 1604 // guide. Jobs that are created without specifying a Glue version default to Glue 1605 // 0.9. 1606 GlueVersion *string 1607 1608 // The last point in time when this job definition was modified. 1609 LastModifiedOn *time.Time 1610 1611 // This field is reserved for future use. 1612 LogUri *string 1613 1614 // The number of AWS Glue data processing units (DPUs) that can be allocated when 1615 // this job runs. A DPU is a relative measure of processing power that consists of 1616 // 4 vCPUs of compute capacity and 16 GB of memory. For more information, see the 1617 // AWS Glue pricing page (https://aws.amazon.com/glue/pricing/). Do not set Max 1618 // Capacity if using WorkerType and NumberOfWorkers. The value that can be 1619 // allocated for MaxCapacity depends on whether you are running a Python shell job, 1620 // an Apache Spark ETL job, or an Apache Spark streaming ETL job: 1621 // 1622 // * When you 1623 // specify a Python shell job (JobCommand.Name="pythonshell"), you can allocate 1624 // either 0.0625 or 1 DPU. The default is 0.0625 DPU. 1625 // 1626 // * When you specify an Apache 1627 // Spark ETL job (JobCommand.Name="glueetl") or Apache Spark streaming ETL job 1628 // (JobCommand.Name="gluestreaming"), you can allocate from 2 to 100 DPUs. The 1629 // default is 10 DPUs. This job type cannot have a fractional DPU allocation. 1630 MaxCapacity *float64 1631 1632 // The maximum number of times to retry this job after a JobRun fails. 1633 MaxRetries int32 1634 1635 // The name you assign to this job definition. 1636 Name *string 1637 1638 // Non-overridable arguments for this job, specified as name-value pairs. 1639 NonOverridableArguments map[string]string 1640 1641 // Specifies configuration properties of a job notification. 1642 NotificationProperty *NotificationProperty 1643 1644 // The number of workers of a defined workerType that are allocated when a job 1645 // runs. The maximum number of workers you can define are 299 for G.1X, and 149 for 1646 // G.2X. 1647 NumberOfWorkers *int32 1648 1649 // The name or Amazon Resource Name (ARN) of the IAM role associated with this job. 1650 Role *string 1651 1652 // The name of the SecurityConfiguration structure to be used with this job. 1653 SecurityConfiguration *string 1654 1655 // The job timeout in minutes. This is the maximum time that a job run can consume 1656 // resources before it is terminated and enters TIMEOUT status. The default is 1657 // 2,880 minutes (48 hours). 1658 Timeout *int32 1659 1660 // The type of predefined worker that is allocated when a job runs. Accepts a value 1661 // of Standard, G.1X, or G.2X. 1662 // 1663 // * For the Standard worker type, each worker 1664 // provides 4 vCPU, 16 GB of memory and a 50GB disk, and 2 executors per worker. 1665 // 1666 // * 1667 // For the G.1X worker type, each worker maps to 1 DPU (4 vCPU, 16 GB of memory, 64 1668 // GB disk), and provides 1 executor per worker. We recommend this worker type for 1669 // memory-intensive jobs. 1670 // 1671 // * For the G.2X worker type, each worker maps to 2 DPU (8 1672 // vCPU, 32 GB of memory, 128 GB disk), and provides 1 executor per worker. We 1673 // recommend this worker type for memory-intensive jobs. 1674 WorkerType WorkerType 1675} 1676 1677// Defines a point that a job can resume processing. 1678type JobBookmarkEntry struct { 1679 1680 // The attempt ID number. 1681 Attempt int32 1682 1683 // The bookmark itself. 1684 JobBookmark *string 1685 1686 // The name of the job in question. 1687 JobName *string 1688 1689 // The unique run identifier associated with the previous job run. 1690 PreviousRunId *string 1691 1692 // The run ID number. 1693 Run int32 1694 1695 // The run ID number. 1696 RunId *string 1697 1698 // The version of the job. 1699 Version int32 1700} 1701 1702// Specifies how job bookmark data should be encrypted. 1703type JobBookmarksEncryption struct { 1704 1705 // The encryption mode to use for job bookmarks data. 1706 JobBookmarksEncryptionMode JobBookmarksEncryptionMode 1707 1708 // The Amazon Resource Name (ARN) of the KMS key to be used to encrypt the data. 1709 KmsKeyArn *string 1710} 1711 1712// Specifies code executed when a job is run. 1713type JobCommand struct { 1714 1715 // The name of the job command. For an Apache Spark ETL job, this must be glueetl. 1716 // For a Python shell job, it must be pythonshell. For an Apache Spark streaming 1717 // ETL job, this must be gluestreaming. 1718 Name *string 1719 1720 // The Python version being used to execute a Python shell job. Allowed values are 1721 // 2 or 3. 1722 PythonVersion *string 1723 1724 // Specifies the Amazon Simple Storage Service (Amazon S3) path to a script that 1725 // executes a job. 1726 ScriptLocation *string 1727} 1728 1729// The details of a Job node present in the workflow. 1730type JobNodeDetails struct { 1731 1732 // The information for the job runs represented by the job node. 1733 JobRuns []JobRun 1734} 1735 1736// Contains information about a job run. 1737type JobRun struct { 1738 1739 // This field is deprecated. Use MaxCapacity instead. The number of AWS Glue data 1740 // processing units (DPUs) allocated to this JobRun. From 2 to 100 DPUs can be 1741 // allocated; the default is 10. A DPU is a relative measure of processing power 1742 // that consists of 4 vCPUs of compute capacity and 16 GB of memory. For more 1743 // information, see the AWS Glue pricing page 1744 // (https://aws.amazon.com/glue/pricing/). 1745 // 1746 // Deprecated: This property is deprecated, use MaxCapacity instead. 1747 AllocatedCapacity int32 1748 1749 // The job arguments associated with this run. For this job run, they replace the 1750 // default arguments set in the job definition itself. You can specify arguments 1751 // here that your own job-execution script consumes, as well as arguments that AWS 1752 // Glue itself consumes. For information about how to specify and consume your own 1753 // job arguments, see the Calling AWS Glue APIs in Python 1754 // (https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-python-calling.html) 1755 // topic in the developer guide. For information about the key-value pairs that AWS 1756 // Glue consumes to set up your job, see the Special Parameters Used by AWS Glue 1757 // (https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html) 1758 // topic in the developer guide. 1759 Arguments map[string]string 1760 1761 // The number of the attempt to run this job. 1762 Attempt int32 1763 1764 // The date and time that this job run completed. 1765 CompletedOn *time.Time 1766 1767 // An error message associated with this job run. 1768 ErrorMessage *string 1769 1770 // The amount of time (in seconds) that the job run consumed resources. 1771 ExecutionTime int32 1772 1773 // Glue version determines the versions of Apache Spark and Python that AWS Glue 1774 // supports. The Python version indicates the version supported for jobs of type 1775 // Spark. For more information about the available AWS Glue versions and 1776 // corresponding Spark and Python versions, see Glue version 1777 // (https://docs.aws.amazon.com/glue/latest/dg/add-job.html) in the developer 1778 // guide. Jobs that are created without specifying a Glue version default to Glue 1779 // 0.9. 1780 GlueVersion *string 1781 1782 // The ID of this job run. 1783 Id *string 1784 1785 // The name of the job definition being used in this run. 1786 JobName *string 1787 1788 // The current state of the job run. For more information about the statuses of 1789 // jobs that have terminated abnormally, see AWS Glue Job Run Statuses 1790 // (https://docs.aws.amazon.com/glue/latest/dg/job-run-statuses.html). 1791 JobRunState JobRunState 1792 1793 // The last time that this job run was modified. 1794 LastModifiedOn *time.Time 1795 1796 // The name of the log group for secure logging that can be server-side encrypted 1797 // in Amazon CloudWatch using AWS KMS. This name can be /aws-glue/jobs/, in which 1798 // case the default encryption is NONE. If you add a role name and 1799 // SecurityConfiguration name (in other words, 1800 // /aws-glue/jobs-yourRoleName-yourSecurityConfigurationName/), then that security 1801 // configuration is used to encrypt the log group. 1802 LogGroupName *string 1803 1804 // The number of AWS Glue data processing units (DPUs) that can be allocated when 1805 // this job runs. A DPU is a relative measure of processing power that consists of 1806 // 4 vCPUs of compute capacity and 16 GB of memory. For more information, see the 1807 // AWS Glue pricing page 1808 // (https://docs.aws.amazon.com/https:/aws.amazon.com/glue/pricing/). Do not set 1809 // Max Capacity if using WorkerType and NumberOfWorkers. The value that can be 1810 // allocated for MaxCapacity depends on whether you are running a Python shell job 1811 // or an Apache Spark ETL job: 1812 // 1813 // * When you specify a Python shell job 1814 // (JobCommand.Name="pythonshell"), you can allocate either 0.0625 or 1 DPU. The 1815 // default is 0.0625 DPU. 1816 // 1817 // * When you specify an Apache Spark ETL job 1818 // (JobCommand.Name="glueetl"), you can allocate from 2 to 100 DPUs. The default is 1819 // 10 DPUs. This job type cannot have a fractional DPU allocation. 1820 MaxCapacity *float64 1821 1822 // Specifies configuration properties of a job run notification. 1823 NotificationProperty *NotificationProperty 1824 1825 // The number of workers of a defined workerType that are allocated when a job 1826 // runs. The maximum number of workers you can define are 299 for G.1X, and 149 for 1827 // G.2X. 1828 NumberOfWorkers *int32 1829 1830 // A list of predecessors to this job run. 1831 PredecessorRuns []Predecessor 1832 1833 // The ID of the previous run of this job. For example, the JobRunId specified in 1834 // the StartJobRun action. 1835 PreviousRunId *string 1836 1837 // The name of the SecurityConfiguration structure to be used with this job run. 1838 SecurityConfiguration *string 1839 1840 // The date and time at which this job run was started. 1841 StartedOn *time.Time 1842 1843 // The JobRun timeout in minutes. This is the maximum time that a job run can 1844 // consume resources before it is terminated and enters TIMEOUT status. The default 1845 // is 2,880 minutes (48 hours). This overrides the timeout value set in the parent 1846 // job. 1847 Timeout *int32 1848 1849 // The name of the trigger that started this job run. 1850 TriggerName *string 1851 1852 // The type of predefined worker that is allocated when a job runs. Accepts a value 1853 // of Standard, G.1X, or G.2X. 1854 // 1855 // * For the Standard worker type, each worker 1856 // provides 4 vCPU, 16 GB of memory and a 50GB disk, and 2 executors per worker. 1857 // 1858 // * 1859 // For the G.1X worker type, each worker provides 4 vCPU, 16 GB of memory and a 1860 // 64GB disk, and 1 executor per worker. 1861 // 1862 // * For the G.2X worker type, each worker 1863 // provides 8 vCPU, 32 GB of memory and a 128GB disk, and 1 executor per worker. 1864 WorkerType WorkerType 1865} 1866 1867// Specifies information used to update an existing job definition. The previous 1868// job definition is completely overwritten by this information. 1869type JobUpdate struct { 1870 1871 // This field is deprecated. Use MaxCapacity instead. The number of AWS Glue data 1872 // processing units (DPUs) to allocate to this job. You can allocate from 2 to 100 1873 // DPUs; the default is 10. A DPU is a relative measure of processing power that 1874 // consists of 4 vCPUs of compute capacity and 16 GB of memory. For more 1875 // information, see the AWS Glue pricing page 1876 // (https://aws.amazon.com/glue/pricing/). 1877 // 1878 // Deprecated: This property is deprecated, use MaxCapacity instead. 1879 AllocatedCapacity int32 1880 1881 // The JobCommand that executes this job (required). 1882 Command *JobCommand 1883 1884 // The connections used for this job. 1885 Connections *ConnectionsList 1886 1887 // The default arguments for this job. You can specify arguments here that your own 1888 // job-execution script consumes, as well as arguments that AWS Glue itself 1889 // consumes. For information about how to specify and consume your own Job 1890 // arguments, see the Calling AWS Glue APIs in Python 1891 // (https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-python-calling.html) 1892 // topic in the developer guide. For information about the key-value pairs that AWS 1893 // Glue consumes to set up your job, see the Special Parameters Used by AWS Glue 1894 // (https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html) 1895 // topic in the developer guide. 1896 DefaultArguments map[string]string 1897 1898 // Description of the job being defined. 1899 Description *string 1900 1901 // An ExecutionProperty specifying the maximum number of concurrent runs allowed 1902 // for this job. 1903 ExecutionProperty *ExecutionProperty 1904 1905 // Glue version determines the versions of Apache Spark and Python that AWS Glue 1906 // supports. The Python version indicates the version supported for jobs of type 1907 // Spark. For more information about the available AWS Glue versions and 1908 // corresponding Spark and Python versions, see Glue version 1909 // (https://docs.aws.amazon.com/glue/latest/dg/add-job.html) in the developer 1910 // guide. 1911 GlueVersion *string 1912 1913 // This field is reserved for future use. 1914 LogUri *string 1915 1916 // The number of AWS Glue data processing units (DPUs) that can be allocated when 1917 // this job runs. A DPU is a relative measure of processing power that consists of 1918 // 4 vCPUs of compute capacity and 16 GB of memory. For more information, see the 1919 // AWS Glue pricing page (https://aws.amazon.com/glue/pricing/). Do not set Max 1920 // Capacity if using WorkerType and NumberOfWorkers. The value that can be 1921 // allocated for MaxCapacity depends on whether you are running a Python shell job 1922 // or an Apache Spark ETL job: 1923 // 1924 // * When you specify a Python shell job 1925 // (JobCommand.Name="pythonshell"), you can allocate either 0.0625 or 1 DPU. The 1926 // default is 0.0625 DPU. 1927 // 1928 // * When you specify an Apache Spark ETL job 1929 // (JobCommand.Name="glueetl") or Apache Spark streaming ETL job 1930 // (JobCommand.Name="gluestreaming"), you can allocate from 2 to 100 DPUs. The 1931 // default is 10 DPUs. This job type cannot have a fractional DPU allocation. 1932 MaxCapacity *float64 1933 1934 // The maximum number of times to retry this job if it fails. 1935 MaxRetries int32 1936 1937 // Non-overridable arguments for this job, specified as name-value pairs. 1938 NonOverridableArguments map[string]string 1939 1940 // Specifies the configuration properties of a job notification. 1941 NotificationProperty *NotificationProperty 1942 1943 // The number of workers of a defined workerType that are allocated when a job 1944 // runs. The maximum number of workers you can define are 299 for G.1X, and 149 for 1945 // G.2X. 1946 NumberOfWorkers *int32 1947 1948 // The name or Amazon Resource Name (ARN) of the IAM role associated with this job 1949 // (required). 1950 Role *string 1951 1952 // The name of the SecurityConfiguration structure to be used with this job. 1953 SecurityConfiguration *string 1954 1955 // The job timeout in minutes. This is the maximum time that a job run can consume 1956 // resources before it is terminated and enters TIMEOUT status. The default is 1957 // 2,880 minutes (48 hours). 1958 Timeout *int32 1959 1960 // The type of predefined worker that is allocated when a job runs. Accepts a value 1961 // of Standard, G.1X, or G.2X. 1962 // 1963 // * For the Standard worker type, each worker 1964 // provides 4 vCPU, 16 GB of memory and a 50GB disk, and 2 executors per worker. 1965 // 1966 // * 1967 // For the G.1X worker type, each worker maps to 1 DPU (4 vCPU, 16 GB of memory, 64 1968 // GB disk), and provides 1 executor per worker. We recommend this worker type for 1969 // memory-intensive jobs. 1970 // 1971 // * For the G.2X worker type, each worker maps to 2 DPU (8 1972 // vCPU, 32 GB of memory, 128 GB disk), and provides 1 executor per worker. We 1973 // recommend this worker type for memory-intensive jobs. 1974 WorkerType WorkerType 1975} 1976 1977// A classifier for JSON content. 1978type JsonClassifier struct { 1979 1980 // A JsonPath string defining the JSON data for the classifier to classify. AWS 1981 // Glue supports a subset of JsonPath, as described in Writing JsonPath Custom 1982 // Classifiers 1983 // (https://docs.aws.amazon.com/glue/latest/dg/custom-classifier.html#custom-classifier-json). 1984 // 1985 // This member is required. 1986 JsonPath *string 1987 1988 // The name of the classifier. 1989 // 1990 // This member is required. 1991 Name *string 1992 1993 // The time that this classifier was registered. 1994 CreationTime *time.Time 1995 1996 // The time that this classifier was last updated. 1997 LastUpdated *time.Time 1998 1999 // The version of this classifier. 2000 Version int64 2001} 2002 2003// A partition key pair consisting of a name and a type. 2004type KeySchemaElement struct { 2005 2006 // The name of a partition key. 2007 // 2008 // This member is required. 2009 Name *string 2010 2011 // The type of a partition key. 2012 // 2013 // This member is required. 2014 Type *string 2015} 2016 2017// Specifies configuration properties for a labeling set generation task run. 2018type LabelingSetGenerationTaskRunProperties struct { 2019 2020 // The Amazon Simple Storage Service (Amazon S3) path where you will generate the 2021 // labeling set. 2022 OutputS3Path *string 2023} 2024 2025// Status and error information about the most recent crawl. 2026type LastCrawlInfo struct { 2027 2028 // If an error occurred, the error information about the last crawl. 2029 ErrorMessage *string 2030 2031 // The log group for the last crawl. 2032 LogGroup *string 2033 2034 // The log stream for the last crawl. 2035 LogStream *string 2036 2037 // The prefix for a message about this crawl. 2038 MessagePrefix *string 2039 2040 // The time at which the crawl started. 2041 StartTime *time.Time 2042 2043 // Status of the last crawl. 2044 Status LastCrawlStatus 2045} 2046 2047// Specifies data lineage configuration settings for the crawler. 2048type LineageConfiguration struct { 2049 2050 // Specifies whether data lineage is enabled for the crawler. Valid values are: 2051 // 2052 // * 2053 // ENABLE: enables data lineage for the crawler 2054 // 2055 // * DISABLE: disables data lineage 2056 // for the crawler 2057 CrawlerLineageSettings CrawlerLineageSettings 2058} 2059 2060// The location of resources. 2061type Location struct { 2062 2063 // An Amazon DynamoDB table location. 2064 DynamoDB []CodeGenNodeArg 2065 2066 // A JDBC location. 2067 Jdbc []CodeGenNodeArg 2068 2069 // An Amazon Simple Storage Service (Amazon S3) location. 2070 S3 []CodeGenNodeArg 2071} 2072 2073// Defines column statistics supported for integer data columns. 2074type LongColumnStatisticsData struct { 2075 2076 // The number of distinct values in a column. 2077 // 2078 // This member is required. 2079 NumberOfDistinctValues int64 2080 2081 // The number of null values in the column. 2082 // 2083 // This member is required. 2084 NumberOfNulls int64 2085 2086 // The highest value in the column. 2087 MaximumValue int64 2088 2089 // The lowest value in the column. 2090 MinimumValue int64 2091} 2092 2093// Defines a mapping. 2094type MappingEntry struct { 2095 2096 // The source path. 2097 SourcePath *string 2098 2099 // The name of the source table. 2100 SourceTable *string 2101 2102 // The source type. 2103 SourceType *string 2104 2105 // The target path. 2106 TargetPath *string 2107 2108 // The target table. 2109 TargetTable *string 2110 2111 // The target type. 2112 TargetType *string 2113} 2114 2115// A structure containing metadata information for a schema version. 2116type MetadataInfo struct { 2117 2118 // The time at which the entry was created. 2119 CreatedTime *string 2120 2121 // The metadata key’s corresponding value. 2122 MetadataValue *string 2123 2124 // Other metadata belonging to the same metadata key. 2125 OtherMetadataValueList []OtherMetadataValueListItem 2126} 2127 2128// A structure containing a key value pair for metadata. 2129type MetadataKeyValuePair struct { 2130 2131 // A metadata key. 2132 MetadataKey *string 2133 2134 // A metadata key’s corresponding value. 2135 MetadataValue *string 2136} 2137 2138// A structure for a machine learning transform. 2139type MLTransform struct { 2140 2141 // A timestamp. The time and date that this machine learning transform was created. 2142 CreatedOn *time.Time 2143 2144 // A user-defined, long-form description text for the machine learning transform. 2145 // Descriptions are not guaranteed to be unique and can be changed at any time. 2146 Description *string 2147 2148 // An EvaluationMetrics object. Evaluation metrics provide an estimate of the 2149 // quality of your machine learning transform. 2150 EvaluationMetrics *EvaluationMetrics 2151 2152 // This value determines which version of AWS Glue this machine learning transform 2153 // is compatible with. Glue 1.0 is recommended for most customers. If the value is 2154 // not set, the Glue compatibility defaults to Glue 0.9. For more information, see 2155 // AWS Glue Versions 2156 // (https://docs.aws.amazon.com/glue/latest/dg/release-notes.html#release-notes-versions) 2157 // in the developer guide. 2158 GlueVersion *string 2159 2160 // A list of AWS Glue table definitions used by the transform. 2161 InputRecordTables []GlueTable 2162 2163 // A count identifier for the labeling files generated by AWS Glue for this 2164 // transform. As you create a better transform, you can iteratively download, 2165 // label, and upload the labeling file. 2166 LabelCount int32 2167 2168 // A timestamp. The last point in time when this machine learning transform was 2169 // modified. 2170 LastModifiedOn *time.Time 2171 2172 // The number of AWS Glue data processing units (DPUs) that are allocated to task 2173 // runs for this transform. You can allocate from 2 to 100 DPUs; the default is 10. 2174 // A DPU is a relative measure of processing power that consists of 4 vCPUs of 2175 // compute capacity and 16 GB of memory. For more information, see the AWS Glue 2176 // pricing page (http://aws.amazon.com/glue/pricing/). MaxCapacity is a mutually 2177 // exclusive option with NumberOfWorkers and WorkerType. 2178 // 2179 // * If either 2180 // NumberOfWorkers or WorkerType is set, then MaxCapacity cannot be set. 2181 // 2182 // * If 2183 // MaxCapacity is set then neither NumberOfWorkers or WorkerType can be set. 2184 // 2185 // * If 2186 // WorkerType is set, then NumberOfWorkers is required (and vice versa). 2187 // 2188 // * 2189 // MaxCapacity and NumberOfWorkers must both be at least 1. 2190 // 2191 // When the WorkerType 2192 // field is set to a value other than Standard, the MaxCapacity field is set 2193 // automatically and becomes read-only. 2194 MaxCapacity *float64 2195 2196 // The maximum number of times to retry after an MLTaskRun of the machine learning 2197 // transform fails. 2198 MaxRetries *int32 2199 2200 // A user-defined name for the machine learning transform. Names are not guaranteed 2201 // unique and can be changed at any time. 2202 Name *string 2203 2204 // The number of workers of a defined workerType that are allocated when a task of 2205 // the transform runs. If WorkerType is set, then NumberOfWorkers is required (and 2206 // vice versa). 2207 NumberOfWorkers *int32 2208 2209 // A TransformParameters object. You can use parameters to tune (customize) the 2210 // behavior of the machine learning transform by specifying what data it learns 2211 // from and your preference on various tradeoffs (such as precious vs. recall, or 2212 // accuracy vs. cost). 2213 Parameters *TransformParameters 2214 2215 // The name or Amazon Resource Name (ARN) of the IAM role with the required 2216 // permissions. The required permissions include both AWS Glue service role 2217 // permissions to AWS Glue resources, and Amazon S3 permissions required by the 2218 // transform. 2219 // 2220 // * This role needs AWS Glue service role permissions to allow access 2221 // to resources in AWS Glue. See Attach a Policy to IAM Users That Access AWS Glue 2222 // (https://docs.aws.amazon.com/glue/latest/dg/attach-policy-iam-user.html). 2223 // 2224 // * 2225 // This role needs permission to your Amazon Simple Storage Service (Amazon S3) 2226 // sources, targets, temporary directory, scripts, and any libraries used by the 2227 // task run for this transform. 2228 Role *string 2229 2230 // A map of key-value pairs representing the columns and data types that this 2231 // transform can run against. Has an upper bound of 100 columns. 2232 Schema []SchemaColumn 2233 2234 // The current status of the machine learning transform. 2235 Status TransformStatusType 2236 2237 // The timeout in minutes of the machine learning transform. 2238 Timeout *int32 2239 2240 // The encryption-at-rest settings of the transform that apply to accessing user 2241 // data. Machine learning transforms can access user data encrypted in Amazon S3 2242 // using KMS. 2243 TransformEncryption *TransformEncryption 2244 2245 // The unique transform ID that is generated for the machine learning transform. 2246 // The ID is guaranteed to be unique and does not change. 2247 TransformId *string 2248 2249 // The type of predefined worker that is allocated when a task of this transform 2250 // runs. Accepts a value of Standard, G.1X, or G.2X. 2251 // 2252 // * For the Standard worker 2253 // type, each worker provides 4 vCPU, 16 GB of memory and a 50GB disk, and 2 2254 // executors per worker. 2255 // 2256 // * For the G.1X worker type, each worker provides 4 vCPU, 2257 // 16 GB of memory and a 64GB disk, and 1 executor per worker. 2258 // 2259 // * For the G.2X 2260 // worker type, each worker provides 8 vCPU, 32 GB of memory and a 128GB disk, and 2261 // 1 executor per worker. 2262 // 2263 // MaxCapacity is a mutually exclusive option with 2264 // NumberOfWorkers and WorkerType. 2265 // 2266 // * If either NumberOfWorkers or WorkerType is 2267 // set, then MaxCapacity cannot be set. 2268 // 2269 // * If MaxCapacity is set then neither 2270 // NumberOfWorkers or WorkerType can be set. 2271 // 2272 // * If WorkerType is set, then 2273 // NumberOfWorkers is required (and vice versa). 2274 // 2275 // * MaxCapacity and NumberOfWorkers 2276 // must both be at least 1. 2277 WorkerType WorkerType 2278} 2279 2280// The encryption-at-rest settings of the transform that apply to accessing user 2281// data. 2282type MLUserDataEncryption struct { 2283 2284 // The encryption mode applied to user data. Valid values are: 2285 // 2286 // * DISABLED: 2287 // encryption is disabled 2288 // 2289 // * SSEKMS: use of server-side encryption with AWS Key 2290 // Management Service (SSE-KMS) for user data stored in Amazon S3. 2291 // 2292 // This member is required. 2293 MlUserDataEncryptionMode MLUserDataEncryptionModeString 2294 2295 // The ID for the customer-provided KMS key. 2296 KmsKeyId *string 2297} 2298 2299// Specifies an Amazon DocumentDB or MongoDB data store to crawl. 2300type MongoDBTarget struct { 2301 2302 // The name of the connection to use to connect to the Amazon DocumentDB or MongoDB 2303 // target. 2304 ConnectionName *string 2305 2306 // The path of the Amazon DocumentDB or MongoDB target (database/collection). 2307 Path *string 2308 2309 // Indicates whether to scan all the records, or to sample rows from the table. 2310 // Scanning all the records can take a long time when the table is not a high 2311 // throughput table. A value of true means to scan all records, while a value of 2312 // false means to sample the records. If no value is specified, the value defaults 2313 // to true. 2314 ScanAll *bool 2315} 2316 2317// A node represents an AWS Glue component such as a trigger, or job, etc., that is 2318// part of a workflow. 2319type Node struct { 2320 2321 // Details of the crawler when the node represents a crawler. 2322 CrawlerDetails *CrawlerNodeDetails 2323 2324 // Details of the Job when the node represents a Job. 2325 JobDetails *JobNodeDetails 2326 2327 // The name of the AWS Glue component represented by the node. 2328 Name *string 2329 2330 // Details of the Trigger when the node represents a Trigger. 2331 TriggerDetails *TriggerNodeDetails 2332 2333 // The type of AWS Glue component represented by the node. 2334 Type NodeType 2335 2336 // The unique Id assigned to the node within the workflow. 2337 UniqueId *string 2338} 2339 2340// Specifies configuration properties of a notification. 2341type NotificationProperty struct { 2342 2343 // After a job run starts, the number of minutes to wait before sending a job run 2344 // delay notification. 2345 NotifyDelayAfter *int32 2346} 2347 2348// Specifies the sort order of a sorted column. 2349type Order struct { 2350 2351 // The name of the column. 2352 // 2353 // This member is required. 2354 Column *string 2355 2356 // Indicates that the column is sorted in ascending order (== 1), or in descending 2357 // order (==0). 2358 // 2359 // This member is required. 2360 SortOrder int32 2361} 2362 2363// A structure containing other metadata for a schema version belonging to the same 2364// metadata key. 2365type OtherMetadataValueListItem struct { 2366 2367 // The time at which the entry was created. 2368 CreatedTime *string 2369 2370 // The metadata key’s corresponding value for the other metadata belonging to the 2371 // same metadata key. 2372 MetadataValue *string 2373} 2374 2375// Represents a slice of table data. 2376type Partition struct { 2377 2378 // The ID of the Data Catalog in which the partition resides. 2379 CatalogId *string 2380 2381 // The time at which the partition was created. 2382 CreationTime *time.Time 2383 2384 // The name of the catalog database in which to create the partition. 2385 DatabaseName *string 2386 2387 // The last time at which the partition was accessed. 2388 LastAccessTime *time.Time 2389 2390 // The last time at which column statistics were computed for this partition. 2391 LastAnalyzedTime *time.Time 2392 2393 // These key-value pairs define partition parameters. 2394 Parameters map[string]string 2395 2396 // Provides information about the physical location where the partition is stored. 2397 StorageDescriptor *StorageDescriptor 2398 2399 // The name of the database table in which to create the partition. 2400 TableName *string 2401 2402 // The values of the partition. 2403 Values []string 2404} 2405 2406// Contains information about a partition error. 2407type PartitionError struct { 2408 2409 // The details about the partition error. 2410 ErrorDetail *ErrorDetail 2411 2412 // The values that define the partition. 2413 PartitionValues []string 2414} 2415 2416// A structure for a partition index. 2417type PartitionIndex struct { 2418 2419 // The name of the partition index. 2420 // 2421 // This member is required. 2422 IndexName *string 2423 2424 // The keys for the partition index. 2425 // 2426 // This member is required. 2427 Keys []string 2428} 2429 2430// A descriptor for a partition index in a table. 2431type PartitionIndexDescriptor struct { 2432 2433 // The name of the partition index. 2434 // 2435 // This member is required. 2436 IndexName *string 2437 2438 // The status of the partition index. The possible statuses are: 2439 // 2440 // * CREATING: The 2441 // index is being created. When an index is in a CREATING state, the index or its 2442 // table cannot be deleted. 2443 // 2444 // * ACTIVE: The index creation succeeds. 2445 // 2446 // * FAILED: The 2447 // index creation fails. 2448 // 2449 // * DELETING: The index is deleted from the list of 2450 // indexes. 2451 // 2452 // This member is required. 2453 IndexStatus PartitionIndexStatus 2454 2455 // A list of one or more keys, as KeySchemaElement structures, for the partition 2456 // index. 2457 // 2458 // This member is required. 2459 Keys []KeySchemaElement 2460 2461 // A list of errors that can occur when registering partition indexes for an 2462 // existing table. 2463 BackfillErrors []BackfillError 2464} 2465 2466// The structure used to create and update a partition. 2467type PartitionInput struct { 2468 2469 // The last time at which the partition was accessed. 2470 LastAccessTime *time.Time 2471 2472 // The last time at which column statistics were computed for this partition. 2473 LastAnalyzedTime *time.Time 2474 2475 // These key-value pairs define partition parameters. 2476 Parameters map[string]string 2477 2478 // Provides information about the physical location where the partition is stored. 2479 StorageDescriptor *StorageDescriptor 2480 2481 // The values of the partition. Although this parameter is not required by the SDK, 2482 // you must specify this parameter for a valid input. The values for the keys for 2483 // the new partition must be passed as an array of String objects that must be 2484 // ordered in the same order as the partition keys appearing in the Amazon S3 2485 // prefix. Otherwise AWS Glue will add the values to the wrong keys. 2486 Values []string 2487} 2488 2489// Contains a list of values defining partitions. 2490type PartitionValueList struct { 2491 2492 // The list of values. 2493 // 2494 // This member is required. 2495 Values []string 2496} 2497 2498// Specifies the physical requirements for a connection. 2499type PhysicalConnectionRequirements struct { 2500 2501 // The connection's Availability Zone. This field is redundant because the 2502 // specified subnet implies the Availability Zone to be used. Currently the field 2503 // must be populated, but it will be deprecated in the future. 2504 AvailabilityZone *string 2505 2506 // The security group ID list used by the connection. 2507 SecurityGroupIdList []string 2508 2509 // The subnet ID used by the connection. 2510 SubnetId *string 2511} 2512 2513// A job run that was used in the predicate of a conditional trigger that triggered 2514// this job run. 2515type Predecessor struct { 2516 2517 // The name of the job definition used by the predecessor job run. 2518 JobName *string 2519 2520 // The job-run ID of the predecessor job run. 2521 RunId *string 2522} 2523 2524// Defines the predicate of the trigger, which determines when it fires. 2525type Predicate struct { 2526 2527 // A list of the conditions that determine when the trigger will fire. 2528 Conditions []Condition 2529 2530 // An optional field if only one condition is listed. If multiple conditions are 2531 // listed, then this field is required. 2532 Logical Logical 2533} 2534 2535// Permissions granted to a principal. 2536type PrincipalPermissions struct { 2537 2538 // The permissions that are granted to the principal. 2539 Permissions []Permission 2540 2541 // The principal who is granted permissions. 2542 Principal *DataLakePrincipal 2543} 2544 2545// Defines a property predicate. 2546type PropertyPredicate struct { 2547 2548 // The comparator used to compare this property to others. 2549 Comparator Comparator 2550 2551 // The key of the property. 2552 Key *string 2553 2554 // The value of the property. 2555 Value *string 2556} 2557 2558// When crawling an Amazon S3 data source after the first crawl is complete, 2559// specifies whether to crawl the entire dataset again or to crawl only folders 2560// that were added since the last crawler run. For more information, see 2561// Incremental Crawls in AWS Glue 2562// (https://docs.aws.amazon.com/glue/latest/dg/incremental-crawls.html) in the 2563// developer guide. 2564type RecrawlPolicy struct { 2565 2566 // Specifies whether to crawl the entire dataset again or to crawl only folders 2567 // that were added since the last crawler run. A value of CRAWL_EVERYTHING 2568 // specifies crawling the entire dataset again. A value of CRAWL_NEW_FOLDERS_ONLY 2569 // specifies crawling only folders that were added since the last crawler run. 2570 RecrawlBehavior RecrawlBehavior 2571} 2572 2573// A wrapper structure that may contain the registry name and Amazon Resource Name 2574// (ARN). 2575type RegistryId struct { 2576 2577 // Arn of the registry to be updated. One of RegistryArn or RegistryName has to be 2578 // provided. 2579 RegistryArn *string 2580 2581 // Name of the registry. Used only for lookup. One of RegistryArn or RegistryName 2582 // has to be provided. 2583 RegistryName *string 2584} 2585 2586// A structure containing the details for a registry. 2587type RegistryListItem struct { 2588 2589 // The data the registry was created. 2590 CreatedTime *string 2591 2592 // A description of the registry. 2593 Description *string 2594 2595 // The Amazon Resource Name (ARN) of the registry. 2596 RegistryArn *string 2597 2598 // The name of the registry. 2599 RegistryName *string 2600 2601 // The status of the registry. 2602 Status RegistryStatus 2603 2604 // The date the registry was updated. 2605 UpdatedTime *string 2606} 2607 2608// The URIs for function resources. 2609type ResourceUri struct { 2610 2611 // The type of the resource. 2612 ResourceType ResourceType 2613 2614 // The URI for accessing the resource. 2615 Uri *string 2616} 2617 2618// Specifies how Amazon Simple Storage Service (Amazon S3) data should be 2619// encrypted. 2620type S3Encryption struct { 2621 2622 // The Amazon Resource Name (ARN) of the KMS key to be used to encrypt the data. 2623 KmsKeyArn *string 2624 2625 // The encryption mode to use for Amazon S3 data. 2626 S3EncryptionMode S3EncryptionMode 2627} 2628 2629// Specifies a data store in Amazon Simple Storage Service (Amazon S3). 2630type S3Target struct { 2631 2632 // The name of a connection which allows a job or crawler to access data in Amazon 2633 // S3 within an Amazon Virtual Private Cloud environment (Amazon VPC). 2634 ConnectionName *string 2635 2636 // A list of glob patterns used to exclude from the crawl. For more information, 2637 // see Catalog Tables with a Crawler 2638 // (https://docs.aws.amazon.com/glue/latest/dg/add-crawler.html). 2639 Exclusions []string 2640 2641 // The path to the Amazon S3 target. 2642 Path *string 2643} 2644 2645// A scheduling object using a cron statement to schedule an event. 2646type Schedule struct { 2647 2648 // A cron expression used to specify the schedule (see Time-Based Schedules for 2649 // Jobs and Crawlers 2650 // (https://docs.aws.amazon.com/glue/latest/dg/monitor-data-warehouse-schedule.html). 2651 // For example, to run something every day at 12:15 UTC, you would specify: cron(15 2652 // 12 * * ? *). 2653 ScheduleExpression *string 2654 2655 // The state of the schedule. 2656 State ScheduleState 2657} 2658 2659// A policy that specifies update and deletion behaviors for the crawler. 2660type SchemaChangePolicy struct { 2661 2662 // The deletion behavior when the crawler finds a deleted object. 2663 DeleteBehavior DeleteBehavior 2664 2665 // The update behavior when the crawler finds a changed schema. 2666 UpdateBehavior UpdateBehavior 2667} 2668 2669// A key-value pair representing a column and data type that this transform can run 2670// against. The Schema parameter of the MLTransform may contain up to 100 of these 2671// structures. 2672type SchemaColumn struct { 2673 2674 // The type of data in the column. 2675 DataType *string 2676 2677 // The name of the column. 2678 Name *string 2679} 2680 2681// The unique ID of the schema in the AWS Glue schema registry. 2682type SchemaId struct { 2683 2684 // The name of the schema registry that contains the schema. 2685 RegistryName *string 2686 2687 // The Amazon Resource Name (ARN) of the schema. One of SchemaArn or SchemaName has 2688 // to be provided. 2689 SchemaArn *string 2690 2691 // The name of the schema. One of SchemaArn or SchemaName has to be provided. 2692 SchemaName *string 2693} 2694 2695// An object that contains minimal details for a schema. 2696type SchemaListItem struct { 2697 2698 // The date and time that a schema was created. 2699 CreatedTime *string 2700 2701 // A description for the schema. 2702 Description *string 2703 2704 // the name of the registry where the schema resides. 2705 RegistryName *string 2706 2707 // The Amazon Resource Name (ARN) for the schema. 2708 SchemaArn *string 2709 2710 // The name of the schema. 2711 SchemaName *string 2712 2713 // The status of the schema. 2714 SchemaStatus SchemaStatus 2715 2716 // The date and time that a schema was updated. 2717 UpdatedTime *string 2718} 2719 2720// An object that references a schema stored in the AWS Glue Schema Registry. 2721type SchemaReference struct { 2722 2723 // A structure that contains schema identity fields. Either this or the 2724 // SchemaVersionId has to be provided. 2725 SchemaId *SchemaId 2726 2727 // The unique ID assigned to a version of the schema. Either this or the SchemaId 2728 // has to be provided. 2729 SchemaVersionId *string 2730 2731 // The version number of the schema. 2732 SchemaVersionNumber int64 2733} 2734 2735// An object that contains the error details for an operation on a schema version. 2736type SchemaVersionErrorItem struct { 2737 2738 // The details of the error for the schema version. 2739 ErrorDetails *ErrorDetails 2740 2741 // The version number of the schema. 2742 VersionNumber int64 2743} 2744 2745// An object containing the details about a schema version. 2746type SchemaVersionListItem struct { 2747 2748 // The date and time the schema version was created. 2749 CreatedTime *string 2750 2751 // The Amazon Resource Name (ARN) of the schema. 2752 SchemaArn *string 2753 2754 // The unique identifier of the schema version. 2755 SchemaVersionId *string 2756 2757 // The status of the schema version. 2758 Status SchemaVersionStatus 2759 2760 // The version number of the schema. 2761 VersionNumber int64 2762} 2763 2764// A structure containing the schema version information. 2765type SchemaVersionNumber struct { 2766 2767 // The latest version available for the schema. 2768 LatestVersion bool 2769 2770 // The version number of the schema. 2771 VersionNumber int64 2772} 2773 2774// Specifies a security configuration. 2775type SecurityConfiguration struct { 2776 2777 // The time at which this security configuration was created. 2778 CreatedTimeStamp *time.Time 2779 2780 // The encryption configuration associated with this security configuration. 2781 EncryptionConfiguration *EncryptionConfiguration 2782 2783 // The name of the security configuration. 2784 Name *string 2785} 2786 2787// Defines a non-overlapping region of a table's partitions, allowing multiple 2788// requests to be executed in parallel. 2789type Segment struct { 2790 2791 // The zero-based index number of the segment. For example, if the total number of 2792 // segments is 4, SegmentNumber values range from 0 through 3. 2793 // 2794 // This member is required. 2795 SegmentNumber int32 2796 2797 // The total number of segments. 2798 // 2799 // This member is required. 2800 TotalSegments int32 2801} 2802 2803// Information about a serialization/deserialization program (SerDe) that serves as 2804// an extractor and loader. 2805type SerDeInfo struct { 2806 2807 // Name of the SerDe. 2808 Name *string 2809 2810 // These key-value pairs define initialization parameters for the SerDe. 2811 Parameters map[string]string 2812 2813 // Usually the class that implements the SerDe. An example is 2814 // org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe. 2815 SerializationLibrary *string 2816} 2817 2818// Specifies skewed values in a table. Skewed values are those that occur with very 2819// high frequency. 2820type SkewedInfo struct { 2821 2822 // A list of names of columns that contain skewed values. 2823 SkewedColumnNames []string 2824 2825 // A mapping of skewed values to the columns that contain them. 2826 SkewedColumnValueLocationMaps map[string]string 2827 2828 // A list of values that appear so frequently as to be considered skewed. 2829 SkewedColumnValues []string 2830} 2831 2832// Specifies a field to sort by and a sort order. 2833type SortCriterion struct { 2834 2835 // The name of the field on which to sort. 2836 FieldName *string 2837 2838 // An ascending or descending sort. 2839 Sort Sort 2840} 2841 2842// Describes the physical storage of table data. 2843type StorageDescriptor struct { 2844 2845 // A list of reducer grouping columns, clustering columns, and bucketing columns in 2846 // the table. 2847 BucketColumns []string 2848 2849 // A list of the Columns in the table. 2850 Columns []Column 2851 2852 // True if the data in the table is compressed, or False if not. 2853 Compressed bool 2854 2855 // The input format: SequenceFileInputFormat (binary), or TextInputFormat, or a 2856 // custom format. 2857 InputFormat *string 2858 2859 // The physical location of the table. By default, this takes the form of the 2860 // warehouse location, followed by the database location in the warehouse, followed 2861 // by the table name. 2862 Location *string 2863 2864 // Must be specified if the table contains any dimension columns. 2865 NumberOfBuckets int32 2866 2867 // The output format: SequenceFileOutputFormat (binary), or 2868 // IgnoreKeyTextOutputFormat, or a custom format. 2869 OutputFormat *string 2870 2871 // The user-supplied properties in key-value form. 2872 Parameters map[string]string 2873 2874 // An object that references a schema stored in the AWS Glue Schema Registry. When 2875 // creating a table, you can pass an empty list of columns for the schema, and 2876 // instead use a schema reference. 2877 SchemaReference *SchemaReference 2878 2879 // The serialization/deserialization (SerDe) information. 2880 SerdeInfo *SerDeInfo 2881 2882 // The information about values that appear frequently in a column (skewed values). 2883 SkewedInfo *SkewedInfo 2884 2885 // A list specifying the sort order of each bucket in the table. 2886 SortColumns []Order 2887 2888 // True if the table data is stored in subdirectories, or False if not. 2889 StoredAsSubDirectories bool 2890} 2891 2892// Defines column statistics supported for character sequence data values. 2893type StringColumnStatisticsData struct { 2894 2895 // The average string length in the column. 2896 // 2897 // This member is required. 2898 AverageLength float64 2899 2900 // The size of the longest string in the column. 2901 // 2902 // This member is required. 2903 MaximumLength int64 2904 2905 // The number of distinct values in a column. 2906 // 2907 // This member is required. 2908 NumberOfDistinctValues int64 2909 2910 // The number of null values in the column. 2911 // 2912 // This member is required. 2913 NumberOfNulls int64 2914} 2915 2916// Represents a collection of related data organized in columns and rows. 2917type Table struct { 2918 2919 // The table name. For Hive compatibility, this must be entirely lowercase. 2920 // 2921 // This member is required. 2922 Name *string 2923 2924 // The ID of the Data Catalog in which the table resides. 2925 CatalogId *string 2926 2927 // The time when the table definition was created in the Data Catalog. 2928 CreateTime *time.Time 2929 2930 // The person or entity who created the table. 2931 CreatedBy *string 2932 2933 // The name of the database where the table metadata resides. For Hive 2934 // compatibility, this must be all lowercase. 2935 DatabaseName *string 2936 2937 // A description of the table. 2938 Description *string 2939 2940 // Indicates whether the table has been registered with AWS Lake Formation. 2941 IsRegisteredWithLakeFormation bool 2942 2943 // The last time that the table was accessed. This is usually taken from HDFS, and 2944 // might not be reliable. 2945 LastAccessTime *time.Time 2946 2947 // The last time that column statistics were computed for this table. 2948 LastAnalyzedTime *time.Time 2949 2950 // The owner of the table. 2951 Owner *string 2952 2953 // These key-value pairs define properties associated with the table. 2954 Parameters map[string]string 2955 2956 // A list of columns by which the table is partitioned. Only primitive types are 2957 // supported as partition keys. When you create a table used by Amazon Athena, and 2958 // you do not specify any partitionKeys, you must at least set the value of 2959 // partitionKeys to an empty list. For example: "PartitionKeys": [] 2960 PartitionKeys []Column 2961 2962 // The retention time for this table. 2963 Retention int32 2964 2965 // A storage descriptor containing information about the physical storage of this 2966 // table. 2967 StorageDescriptor *StorageDescriptor 2968 2969 // The type of this table (EXTERNAL_TABLE, VIRTUAL_VIEW, etc.). 2970 TableType *string 2971 2972 // A TableIdentifier structure that describes a target table for resource linking. 2973 TargetTable *TableIdentifier 2974 2975 // The last time that the table was updated. 2976 UpdateTime *time.Time 2977 2978 // If the table is a view, the expanded text of the view; otherwise null. 2979 ViewExpandedText *string 2980 2981 // If the table is a view, the original text of the view; otherwise null. 2982 ViewOriginalText *string 2983} 2984 2985// An error record for table operations. 2986type TableError struct { 2987 2988 // The details about the error. 2989 ErrorDetail *ErrorDetail 2990 2991 // The name of the table. For Hive compatibility, this must be entirely lowercase. 2992 TableName *string 2993} 2994 2995// A structure that describes a target table for resource linking. 2996type TableIdentifier struct { 2997 2998 // The ID of the Data Catalog in which the table resides. 2999 CatalogId *string 3000 3001 // The name of the catalog database that contains the target table. 3002 DatabaseName *string 3003 3004 // The name of the target table. 3005 Name *string 3006} 3007 3008// A structure used to define a table. 3009type TableInput struct { 3010 3011 // The table name. For Hive compatibility, this is folded to lowercase when it is 3012 // stored. 3013 // 3014 // This member is required. 3015 Name *string 3016 3017 // A description of the table. 3018 Description *string 3019 3020 // The last time that the table was accessed. 3021 LastAccessTime *time.Time 3022 3023 // The last time that column statistics were computed for this table. 3024 LastAnalyzedTime *time.Time 3025 3026 // The table owner. 3027 Owner *string 3028 3029 // These key-value pairs define properties associated with the table. 3030 Parameters map[string]string 3031 3032 // A list of columns by which the table is partitioned. Only primitive types are 3033 // supported as partition keys. When you create a table used by Amazon Athena, and 3034 // you do not specify any partitionKeys, you must at least set the value of 3035 // partitionKeys to an empty list. For example: "PartitionKeys": [] 3036 PartitionKeys []Column 3037 3038 // The retention time for this table. 3039 Retention int32 3040 3041 // A storage descriptor containing information about the physical storage of this 3042 // table. 3043 StorageDescriptor *StorageDescriptor 3044 3045 // The type of this table (EXTERNAL_TABLE, VIRTUAL_VIEW, etc.). 3046 TableType *string 3047 3048 // A TableIdentifier structure that describes a target table for resource linking. 3049 TargetTable *TableIdentifier 3050 3051 // If the table is a view, the expanded text of the view; otherwise null. 3052 ViewExpandedText *string 3053 3054 // If the table is a view, the original text of the view; otherwise null. 3055 ViewOriginalText *string 3056} 3057 3058// Specifies a version of a table. 3059type TableVersion struct { 3060 3061 // The table in question. 3062 Table *Table 3063 3064 // The ID value that identifies this table version. A VersionId is a string 3065 // representation of an integer. Each version is incremented by 1. 3066 VersionId *string 3067} 3068 3069// An error record for table-version operations. 3070type TableVersionError struct { 3071 3072 // The details about the error. 3073 ErrorDetail *ErrorDetail 3074 3075 // The name of the table in question. 3076 TableName *string 3077 3078 // The ID value of the version in question. A VersionID is a string representation 3079 // of an integer. Each version is incremented by 1. 3080 VersionId *string 3081} 3082 3083// The sampling parameters that are associated with the machine learning transform. 3084type TaskRun struct { 3085 3086 // The last point in time that the requested task run was completed. 3087 CompletedOn *time.Time 3088 3089 // The list of error strings associated with this task run. 3090 ErrorString *string 3091 3092 // The amount of time (in seconds) that the task run consumed resources. 3093 ExecutionTime int32 3094 3095 // The last point in time that the requested task run was updated. 3096 LastModifiedOn *time.Time 3097 3098 // The names of the log group for secure logging, associated with this task run. 3099 LogGroupName *string 3100 3101 // Specifies configuration properties associated with this task run. 3102 Properties *TaskRunProperties 3103 3104 // The date and time that this task run started. 3105 StartedOn *time.Time 3106 3107 // The current status of the requested task run. 3108 Status TaskStatusType 3109 3110 // The unique identifier for this task run. 3111 TaskRunId *string 3112 3113 // The unique identifier for the transform. 3114 TransformId *string 3115} 3116 3117// The criteria that are used to filter the task runs for the machine learning 3118// transform. 3119type TaskRunFilterCriteria struct { 3120 3121 // Filter on task runs started after this date. 3122 StartedAfter *time.Time 3123 3124 // Filter on task runs started before this date. 3125 StartedBefore *time.Time 3126 3127 // The current status of the task run. 3128 Status TaskStatusType 3129 3130 // The type of task run. 3131 TaskRunType TaskType 3132} 3133 3134// The configuration properties for the task run. 3135type TaskRunProperties struct { 3136 3137 // The configuration properties for an exporting labels task run. 3138 ExportLabelsTaskRunProperties *ExportLabelsTaskRunProperties 3139 3140 // The configuration properties for a find matches task run. 3141 FindMatchesTaskRunProperties *FindMatchesTaskRunProperties 3142 3143 // The configuration properties for an importing labels task run. 3144 ImportLabelsTaskRunProperties *ImportLabelsTaskRunProperties 3145 3146 // The configuration properties for a labeling set generation task run. 3147 LabelingSetGenerationTaskRunProperties *LabelingSetGenerationTaskRunProperties 3148 3149 // The type of task run. 3150 TaskType TaskType 3151} 3152 3153// The sorting criteria that are used to sort the list of task runs for the machine 3154// learning transform. 3155type TaskRunSortCriteria struct { 3156 3157 // The column to be used to sort the list of task runs for the machine learning 3158 // transform. 3159 // 3160 // This member is required. 3161 Column TaskRunSortColumnType 3162 3163 // The sort direction to be used to sort the list of task runs for the machine 3164 // learning transform. 3165 // 3166 // This member is required. 3167 SortDirection SortDirectionType 3168} 3169 3170// The encryption-at-rest settings of the transform that apply to accessing user 3171// data. Machine learning transforms can access user data encrypted in Amazon S3 3172// using KMS. Additionally, imported labels and trained transforms can now be 3173// encrypted using a customer provided KMS key. 3174type TransformEncryption struct { 3175 3176 // An MLUserDataEncryption object containing the encryption mode and 3177 // customer-provided KMS key ID. 3178 MlUserDataEncryption *MLUserDataEncryption 3179 3180 // The name of the security configuration. 3181 TaskRunSecurityConfigurationName *string 3182} 3183 3184// The criteria used to filter the machine learning transforms. 3185type TransformFilterCriteria struct { 3186 3187 // The time and date after which the transforms were created. 3188 CreatedAfter *time.Time 3189 3190 // The time and date before which the transforms were created. 3191 CreatedBefore *time.Time 3192 3193 // This value determines which version of AWS Glue this machine learning transform 3194 // is compatible with. Glue 1.0 is recommended for most customers. If the value is 3195 // not set, the Glue compatibility defaults to Glue 0.9. For more information, see 3196 // AWS Glue Versions 3197 // (https://docs.aws.amazon.com/glue/latest/dg/release-notes.html#release-notes-versions) 3198 // in the developer guide. 3199 GlueVersion *string 3200 3201 // Filter on transforms last modified after this date. 3202 LastModifiedAfter *time.Time 3203 3204 // Filter on transforms last modified before this date. 3205 LastModifiedBefore *time.Time 3206 3207 // A unique transform name that is used to filter the machine learning transforms. 3208 Name *string 3209 3210 // Filters on datasets with a specific schema. The Map object is an array of 3211 // key-value pairs representing the schema this transform accepts, where Column is 3212 // the name of a column, and Type is the type of the data such as an integer or 3213 // string. Has an upper bound of 100 columns. 3214 Schema []SchemaColumn 3215 3216 // Filters the list of machine learning transforms by the last known status of the 3217 // transforms (to indicate whether a transform can be used or not). One of 3218 // "NOT_READY", "READY", or "DELETING". 3219 Status TransformStatusType 3220 3221 // The type of machine learning transform that is used to filter the machine 3222 // learning transforms. 3223 TransformType TransformType 3224} 3225 3226// The algorithm-specific parameters that are associated with the machine learning 3227// transform. 3228type TransformParameters struct { 3229 3230 // The type of machine learning transform. For information about the types of 3231 // machine learning transforms, see Creating Machine Learning Transforms 3232 // (https://docs.aws.amazon.com/glue/latest/dg/add-job-machine-learning-transform.html). 3233 // 3234 // This member is required. 3235 TransformType TransformType 3236 3237 // The parameters for the find matches algorithm. 3238 FindMatchesParameters *FindMatchesParameters 3239} 3240 3241// The sorting criteria that are associated with the machine learning transform. 3242type TransformSortCriteria struct { 3243 3244 // The column to be used in the sorting criteria that are associated with the 3245 // machine learning transform. 3246 // 3247 // This member is required. 3248 Column TransformSortColumnType 3249 3250 // The sort direction to be used in the sorting criteria that are associated with 3251 // the machine learning transform. 3252 // 3253 // This member is required. 3254 SortDirection SortDirectionType 3255} 3256 3257// Information about a specific trigger. 3258type Trigger struct { 3259 3260 // The actions initiated by this trigger. 3261 Actions []Action 3262 3263 // A description of this trigger. 3264 Description *string 3265 3266 // Reserved for future use. 3267 Id *string 3268 3269 // The name of the trigger. 3270 Name *string 3271 3272 // The predicate of this trigger, which defines when it will fire. 3273 Predicate *Predicate 3274 3275 // A cron expression used to specify the schedule (see Time-Based Schedules for 3276 // Jobs and Crawlers 3277 // (https://docs.aws.amazon.com/glue/latest/dg/monitor-data-warehouse-schedule.html). 3278 // For example, to run something every day at 12:15 UTC, you would specify: cron(15 3279 // 12 * * ? *). 3280 Schedule *string 3281 3282 // The current state of the trigger. 3283 State TriggerState 3284 3285 // The type of trigger that this is. 3286 Type TriggerType 3287 3288 // The name of the workflow associated with the trigger. 3289 WorkflowName *string 3290} 3291 3292// The details of a Trigger node present in the workflow. 3293type TriggerNodeDetails struct { 3294 3295 // The information of the trigger represented by the trigger node. 3296 Trigger *Trigger 3297} 3298 3299// A structure used to provide information used to update a trigger. This object 3300// updates the previous trigger definition by overwriting it completely. 3301type TriggerUpdate struct { 3302 3303 // The actions initiated by this trigger. 3304 Actions []Action 3305 3306 // A description of this trigger. 3307 Description *string 3308 3309 // Reserved for future use. 3310 Name *string 3311 3312 // The predicate of this trigger, which defines when it will fire. 3313 Predicate *Predicate 3314 3315 // A cron expression used to specify the schedule (see Time-Based Schedules for 3316 // Jobs and Crawlers 3317 // (https://docs.aws.amazon.com/glue/latest/dg/monitor-data-warehouse-schedule.html). 3318 // For example, to run something every day at 12:15 UTC, you would specify: cron(15 3319 // 12 * * ? *). 3320 Schedule *string 3321} 3322 3323// Specifies a custom CSV classifier to be updated. 3324type UpdateCsvClassifierRequest struct { 3325 3326 // The name of the classifier. 3327 // 3328 // This member is required. 3329 Name *string 3330 3331 // Enables the processing of files that contain only one column. 3332 AllowSingleColumn *bool 3333 3334 // Indicates whether the CSV file contains a header. 3335 ContainsHeader CsvHeaderOption 3336 3337 // A custom symbol to denote what separates each column entry in the row. 3338 Delimiter *string 3339 3340 // Specifies not to trim values before identifying the type of column values. The 3341 // default value is true. 3342 DisableValueTrimming *bool 3343 3344 // A list of strings representing column names. 3345 Header []string 3346 3347 // A custom symbol to denote what combines content into a single column value. It 3348 // must be different from the column delimiter. 3349 QuoteSymbol *string 3350} 3351 3352// Specifies a grok classifier to update when passed to UpdateClassifier. 3353type UpdateGrokClassifierRequest struct { 3354 3355 // The name of the GrokClassifier. 3356 // 3357 // This member is required. 3358 Name *string 3359 3360 // An identifier of the data format that the classifier matches, such as Twitter, 3361 // JSON, Omniture logs, Amazon CloudWatch Logs, and so on. 3362 Classification *string 3363 3364 // Optional custom grok patterns used by this classifier. 3365 CustomPatterns *string 3366 3367 // The grok pattern used by this classifier. 3368 GrokPattern *string 3369} 3370 3371// Specifies a JSON classifier to be updated. 3372type UpdateJsonClassifierRequest struct { 3373 3374 // The name of the classifier. 3375 // 3376 // This member is required. 3377 Name *string 3378 3379 // A JsonPath string defining the JSON data for the classifier to classify. AWS 3380 // Glue supports a subset of JsonPath, as described in Writing JsonPath Custom 3381 // Classifiers 3382 // (https://docs.aws.amazon.com/glue/latest/dg/custom-classifier.html#custom-classifier-json). 3383 JsonPath *string 3384} 3385 3386// Specifies an XML classifier to be updated. 3387type UpdateXMLClassifierRequest struct { 3388 3389 // The name of the classifier. 3390 // 3391 // This member is required. 3392 Name *string 3393 3394 // An identifier of the data format that the classifier matches. 3395 Classification *string 3396 3397 // The XML tag designating the element that contains each record in an XML document 3398 // being parsed. This cannot identify a self-closing element (closed by />). An 3399 // empty row element that contains only attributes can be parsed as long as it ends 3400 // with a closing tag (for example, is okay, but is not). 3401 RowTag *string 3402} 3403 3404// Represents the equivalent of a Hive user-defined function (UDF) definition. 3405type UserDefinedFunction struct { 3406 3407 // The ID of the Data Catalog in which the function resides. 3408 CatalogId *string 3409 3410 // The Java class that contains the function code. 3411 ClassName *string 3412 3413 // The time at which the function was created. 3414 CreateTime *time.Time 3415 3416 // The name of the catalog database that contains the function. 3417 DatabaseName *string 3418 3419 // The name of the function. 3420 FunctionName *string 3421 3422 // The owner of the function. 3423 OwnerName *string 3424 3425 // The owner type. 3426 OwnerType PrincipalType 3427 3428 // The resource URIs for the function. 3429 ResourceUris []ResourceUri 3430} 3431 3432// A structure used to create or update a user-defined function. 3433type UserDefinedFunctionInput struct { 3434 3435 // The Java class that contains the function code. 3436 ClassName *string 3437 3438 // The name of the function. 3439 FunctionName *string 3440 3441 // The owner of the function. 3442 OwnerName *string 3443 3444 // The owner type. 3445 OwnerType PrincipalType 3446 3447 // The resource URIs for the function. 3448 ResourceUris []ResourceUri 3449} 3450 3451// A workflow represents a flow in which AWS Glue components should be executed to 3452// complete a logical task. 3453type Workflow struct { 3454 3455 // The date and time when the workflow was created. 3456 CreatedOn *time.Time 3457 3458 // A collection of properties to be used as part of each execution of the workflow. 3459 DefaultRunProperties map[string]string 3460 3461 // A description of the workflow. 3462 Description *string 3463 3464 // The graph representing all the AWS Glue components that belong to the workflow 3465 // as nodes and directed connections between them as edges. 3466 Graph *WorkflowGraph 3467 3468 // The date and time when the workflow was last modified. 3469 LastModifiedOn *time.Time 3470 3471 // The information about the last execution of the workflow. 3472 LastRun *WorkflowRun 3473 3474 // You can use this parameter to prevent unwanted multiple updates to data, to 3475 // control costs, or in some cases, to prevent exceeding the maximum number of 3476 // concurrent runs of any of the component jobs. If you leave this parameter blank, 3477 // there is no limit to the number of concurrent workflow runs. 3478 MaxConcurrentRuns *int32 3479 3480 // The name of the workflow representing the flow. 3481 Name *string 3482} 3483 3484// A workflow graph represents the complete workflow containing all the AWS Glue 3485// components present in the workflow and all the directed connections between 3486// them. 3487type WorkflowGraph struct { 3488 3489 // A list of all the directed connections between the nodes belonging to the 3490 // workflow. 3491 Edges []Edge 3492 3493 // A list of the the AWS Glue components belong to the workflow represented as 3494 // nodes. 3495 Nodes []Node 3496} 3497 3498// A workflow run is an execution of a workflow providing all the runtime 3499// information. 3500type WorkflowRun struct { 3501 3502 // The date and time when the workflow run completed. 3503 CompletedOn *time.Time 3504 3505 // This error message describes any error that may have occurred in starting the 3506 // workflow run. Currently the only error message is "Concurrent runs exceeded for 3507 // workflow: foo." 3508 ErrorMessage *string 3509 3510 // The graph representing all the AWS Glue components that belong to the workflow 3511 // as nodes and directed connections between them as edges. 3512 Graph *WorkflowGraph 3513 3514 // Name of the workflow that was executed. 3515 Name *string 3516 3517 // The ID of the previous workflow run. 3518 PreviousRunId *string 3519 3520 // The date and time when the workflow run was started. 3521 StartedOn *time.Time 3522 3523 // The statistics of the run. 3524 Statistics *WorkflowRunStatistics 3525 3526 // The status of the workflow run. 3527 Status WorkflowRunStatus 3528 3529 // The ID of this workflow run. 3530 WorkflowRunId *string 3531 3532 // The workflow run properties which were set during the run. 3533 WorkflowRunProperties map[string]string 3534} 3535 3536// Workflow run statistics provides statistics about the workflow run. 3537type WorkflowRunStatistics struct { 3538 3539 // Total number of Actions that have failed. 3540 FailedActions int32 3541 3542 // Total number Actions in running state. 3543 RunningActions int32 3544 3545 // Total number of Actions that have stopped. 3546 StoppedActions int32 3547 3548 // Total number of Actions that have succeeded. 3549 SucceededActions int32 3550 3551 // Total number of Actions that timed out. 3552 TimeoutActions int32 3553 3554 // Total number of Actions in the workflow run. 3555 TotalActions int32 3556} 3557 3558// A classifier for XML content. 3559type XMLClassifier struct { 3560 3561 // An identifier of the data format that the classifier matches. 3562 // 3563 // This member is required. 3564 Classification *string 3565 3566 // The name of the classifier. 3567 // 3568 // This member is required. 3569 Name *string 3570 3571 // The time that this classifier was registered. 3572 CreationTime *time.Time 3573 3574 // The time that this classifier was last updated. 3575 LastUpdated *time.Time 3576 3577 // The XML tag designating the element that contains each record in an XML document 3578 // being parsed. This can't identify a self-closing element (closed by />). An 3579 // empty row element that contains only attributes can be parsed as long as it ends 3580 // with a closing tag (for example, is okay, but is not). 3581 RowTag *string 3582 3583 // The version of this classifier. 3584 Version int64 3585} 3586