databrew/types/types.go

// Code generated by smithy-go-codegen DO NOT EDIT.

package types

import (
	"time"
)

// Represents an individual condition that evaluates to true or false. Conditions
// are used with recipe actions: The action is only performed for column values
// where the condition evaluates to true. If a recipe requires more than one
// condition, then the recipe must specify multiple ConditionExpression elements.
// Each condition is applied to the rows in a dataset first, before the recipe
// action is performed.
type ConditionExpression struct {

	// A specific condition to apply to a recipe action. For more information, see
	// Recipe structure
	// (https://docs.aws.amazon.com/databrew/latest/dg/recipe-structure.html) in the
	// AWS Glue DataBrew Developer Guide.
	//
	// This member is required.
	Condition *string

	// A column to apply this condition to, within an AWS Glue DataBrew dataset.
	//
	// This member is required.
	TargetColumn *string

	// A value that the condition must evaluate to for the condition to succeed.
	Value *string
}

// Represents how metadata stored in the AWS Glue Data Catalog is defined in an AWS
// Glue DataBrew dataset.
type DataCatalogInputDefinition struct {

	// The name of a database in the Data Catalog.
	//
	// This member is required.
	DatabaseName *string

	// The name of a database table in the Data Catalog. This table corresponds to a
	// DataBrew dataset.
	//
	// This member is required.
	TableName *string

	// The unique identifier of the AWS account that holds the Data Catalog that stores
	// the data.
	CatalogId *string

	// An Amazon location that AWS Glue Data Catalog can use as a temporary directory.
	TempDirectory *S3Location
}

// Represents a dataset that can be processed by AWS Glue DataBrew.
type Dataset struct {

	// Information on how DataBrew can find the dataset, in either the AWS Glue Data
	// Catalog or Amazon S3.
	//
	// This member is required.
	Input *Input

	// The unique name of the dataset.
	//
	// This member is required.
	Name *string

	// The ID of the AWS account that owns the dataset.
	AccountId *string

	// The date and time that the dataset was created.
	CreateDate *time.Time

	// The identifier (the user name) of the user who created the dataset.
	CreatedBy *string

	// Options that define how DataBrew interprets the data in the dataset.
	FormatOptions *FormatOptions

	// The identifier (the user name) of the user who last modified the dataset.
	LastModifiedBy *string

	// The last modification date and time of the dataset.
	LastModifiedDate *time.Time

	// The unique Amazon Resource Name (ARN) for the dataset.
	ResourceArn *string

	// The location of the data for the dataset, either Amazon S3 or the AWS Glue Data
	// Catalog.
	Source Source

	// Metadata tags that have been applied to the dataset.
	Tags map[string]string
}

// Options that define how DataBrew will interpret a Microsoft Excel file, when
// creating a dataset from that file.
type ExcelOptions struct {

	// Specifies one or more sheet numbers in the Excel file, which will be included in
	// the dataset.
	SheetIndexes []int32

	// Specifies one or more named sheets in the Excel file, which will be included in
	// the dataset.
	SheetNames []string
}

// Options that define how Microsoft Excel input is to be interpreted by DataBrew.
type FormatOptions struct {

	// Options that define how Excel input is to be interpreted by DataBrew.
	Excel *ExcelOptions

	// Options that define how JSON input is to be interpreted by DataBrew.
	Json *JsonOptions
}

// Information on how AWS Glue DataBrew can find data, in either the AWS Glue Data
// Catalog or Amazon S3.
type Input struct {

	// The AWS Glue Data Catalog parameters for the data.
	DataCatalogInputDefinition *DataCatalogInputDefinition

	// The Amazon S3 location where the data is stored.
	S3InputDefinition *S3Location
}

// Represents all of the attributes of an AWS Glue DataBrew job.
type Job struct {

	// The unique name of the job.
	//
	// This member is required.
	Name *string

	// The ID of the AWS account that owns the job.
	AccountId *string

	// The date and time that the job was created.
	CreateDate *time.Time

	// The identifier (the user name) of the user who created the job.
	CreatedBy *string

	// A dataset that the job is to process.
	DatasetName *string

	// The Amazon Resource Name (ARN) of an encryption key that is used to protect a
	// job.
	EncryptionKeyArn *string

	// The encryption mode for the job, which can be one of the following:
	//
	// * SSE-KMS -
	// Server-side encryption with AWS KMS-managed keys.
	//
	// * SSE-S3 - Server-side
	// encryption with keys managed by Amazon S3.
	EncryptionMode EncryptionMode

	// The identifier (the user name) of the user who last modified the job.
	LastModifiedBy *string

	// The modification date and time of the job.
	LastModifiedDate *time.Time

	// The current status of Amazon CloudWatch logging for the job.
	LogSubscription LogSubscription

	// The maximum number of nodes that can be consumed when the job processes data.
	MaxCapacity int32

	// The maximum number of times to retry the job after a job run fails.
	MaxRetries int32

	// One or more artifacts that represent output from running the job.
	Outputs []Output

	// The name of the project that the job is associated with.
	ProjectName *string

	// A set of steps that the job runs.
	RecipeReference *RecipeReference

	// The unique Amazon Resource Name (ARN) for the job.
	ResourceArn *string

	// The Amazon Resource Name (ARN) of the role that will be assumed for this job.
	RoleArn *string

	// Metadata tags that have been applied to the job.
	Tags map[string]string

	// The job's timeout in minutes. A job that attempts to run longer than this
	// timeout period ends with a status of TIMEOUT.
	Timeout int32

	// The job type of the job, which must be one of the following:
	//
	// * PROFILE - A job
	// to analyze a dataset, to determine its size, data types, data distribution, and
	// more.
	//
	// * RECIPE - A job to apply one or more transformations to a dataset.
	Type JobType
}

// Represents one run of an AWS Glue DataBrew job.
type JobRun struct {

	// The number of times that DataBrew has attempted to run the job.
	Attempt int32

	// The date and time when the job completed processing.
	CompletedOn *time.Time

	// The name of the dataset for the job to process.
	DatasetName *string

	// A message indicating an error (if any) that was encountered when the job ran.
	ErrorMessage *string

	// The amount of time, in seconds, during which a job run consumed resources.
	ExecutionTime int32

	// The name of the job being processed during this run.
	JobName *string

	// The name of an Amazon CloudWatch log group, where the job writes diagnostic
	// messages when it runs.
	LogGroupName *string

	// The current status of Amazon CloudWatch logging for the job run.
	LogSubscription LogSubscription

	// One or more output artifacts from a job run.
	Outputs []Output

	// The set of steps processed by the job.
	RecipeReference *RecipeReference

	// The unique identifier of the job run.
	RunId *string

	// The identifier (the user name) of the user who initiated the job run.
	StartedBy *string

	// The date and time when the job run began.
	StartedOn *time.Time

	// The current state of the job run entity itself.
	State JobRunState
}

// Represents the JSON-specific options that define how input is to be interpreted
// by AWS Glue DataBrew.
type JsonOptions struct {

	// A value that specifies whether JSON input contains embedded new line characters.
	MultiLine bool
}

// Represents individual output from a particular job run.
type Output struct {

	// The location in Amazon S3 where the job writes its output.
	//
	// This member is required.
	Location *S3Location

	// The compression algorithm used to compress the output text of the job.
	CompressionFormat CompressionFormat

	// The data format of the output of the job.
	Format OutputFormat

	// A value that, if true, means that any data in the location specified for output
	// is overwritten with new output.
	Overwrite bool

	// The names of one or more partition columns for the output of the job.
	PartitionColumns []string
}

// Represents all of the attributes of an AWS Glue DataBrew project.
type Project struct {

	// The unique name of a project.
	//
	// This member is required.
	Name *string

	// The name of a recipe that will be developed during a project session.
	//
	// This member is required.
	RecipeName *string

	// The ID of the AWS account that owns the project.
	AccountId *string

	// The date and time that the project was created.
	CreateDate *time.Time

	// The identifier (the user name) of the user who crated the project.
	CreatedBy *string

	// The dataset that the project is to act upon.
	DatasetName *string

	// The identifier (user name) of the user who last modified the project.
	LastModifiedBy *string

	// The last modification date and time for the project.
	LastModifiedDate *time.Time

	// The date and time when the project was opened.
	OpenDate *time.Time

	// The identifier (the user name) of the user that opened the project for use.
	OpenedBy *string

	// The Amazon Resource Name (ARN) for the project.
	ResourceArn *string

	// The Amazon Resource Name (ARN) of the role that will be assumed for this
	// project.
	RoleArn *string

	// The sample size and sampling type to apply to the data. If this parameter isn't
	// specified, then the sample will consiste of the first 500 rows from the dataset.
	Sample *Sample

	// Metadata tags that have been applied to the project.
	Tags map[string]string
}

// Represents one or more actions to be performed on an AWS Glue DataBrew dataset.
type Recipe struct {

	// The unique name for the recipe.
	//
	// This member is required.
	Name *string

	// The date and time that the recipe was created.
	CreateDate *time.Time

	// The identifier (the user name) of the user who created the recipe.
	CreatedBy *string

	// The description of the recipe.
	Description *string

	// The identifier (user name) of the user who last modified the recipe.
	LastModifiedBy *string

	// The last modification date and time of the recipe.
	LastModifiedDate *time.Time

	// The name of the project that the recipe is associated with.
	ProjectName *string

	// The identifier (the user name) of the user who published the recipe.
	PublishedBy *string

	// The date and time when the recipe was published.
	PublishedDate *time.Time

	// The identifier for the version for the recipe.
	RecipeVersion *string

	// The Amazon Resource Name (ARN) for the recipe.
	ResourceArn *string

	// A list of steps that are defined by the recipe.
	Steps []RecipeStep

	// Metadata tags that have been applied to the recipe.
	Tags map[string]string
}

// Represents a transformation and associated parameters that are used to apply a
// change to an AWS Glue DataBrew dataset. For more information, see Recipe
// structure (https://docs.aws.amazon.com/databrew/latest/dg/recipe-structure.html)
// and ecipe actions reference
// (https://docs.aws.amazon.com/databrew/latest/dg/recipe-actions-reference.html) .
type RecipeAction struct {

	// The name of a valid DataBrew transformation to be performed on the data.
	//
	// This member is required.
	Operation *string

	// Contextual parameters for the transformation.
	Parameters map[string]string
}

// Represents all of the attributes of an AWS Glue DataBrew recipe.
type RecipeReference struct {

	// The name of the recipe.
	//
	// This member is required.
	Name *string

	// The identifier for the version for the recipe.
	RecipeVersion *string
}

// Represents a single step to be performed in an AWS Glue DataBrew recipe.
type RecipeStep struct {

	// The particular action to be performed in the recipe step.
	//
	// This member is required.
	Action *RecipeAction

	// One or more conditions that must be met, in order for the recipe step to
	// succeed. All of the conditions in the array must be met. In other words, all of
	// the conditions must be combined using a logical AND operation.
	ConditionExpressions []ConditionExpression
}

// Represents any errors encountered when attempting to delete multiple recipe
// versions.
type RecipeVersionErrorDetail struct {

	// The HTTP status code for the error.
	ErrorCode *string

	// The text of the error message.
	ErrorMessage *string

	// The identifier for the recipe version associated with this error.
	RecipeVersion *string
}

// An Amazon S3 location (bucket name an object key) where DataBrew can read input
// data, or write output from a job.
type S3Location struct {

	// The S3 bucket name.
	//
	// This member is required.
	Bucket *string

	// The unique name of the object in the bucket.
	Key *string
}

// Represents the sample size and sampling type for AWS Glue DataBrew to use for
// interactive data analysis.
type Sample struct {

	// The way in which DataBrew obtains rows from a dataset.
	//
	// This member is required.
	Type SampleType

	// The number of rows in the sample.
	Size *int32
}

// Represents one or more dates and times when a job is to run.
type Schedule struct {

	// The name of the schedule.
	//
	// This member is required.
	Name *string

	// The ID of the AWS account that owns the schedule.
	AccountId *string

	// The date and time that the schedule was created.
	CreateDate *time.Time

	// The identifier (the user name) of the user who created the schedule.
	CreatedBy *string

	// The date(s) and time(s), in cron format, when the job will run.
	CronExpression *string

	// A list of jobs to be run, according to the schedule.
	JobNames []string

	// The identifier (the user name) of the user who last modified the schedule.
	LastModifiedBy *string

	// The date and time when the schedule was last modified.
	LastModifiedDate *time.Time

	// The Amazon Resource Name (ARN) of the schedule.
	ResourceArn *string

	// Metadata tags that have been applied to the schedule.
	Tags map[string]string
}

// Represents the data being being transformed during an AWS Glue DataBrew project
// session.
type ViewFrame struct {

	// The starting index for the range of columns to return in the view frame.
	//
	// This member is required.
	StartColumnIndex *int32

	// The number of columns to include in the view frame, beginning with the
	// StartColumnIndex value and ignoring any columns in the HiddenColumns list.
	ColumnRange *int32

	// A list of columns to hide in the view frame.
	HiddenColumns []string
}