diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md index 969164d869..8081e01251 100644 --- a/NEXT_CHANGELOG.md +++ b/NEXT_CHANGELOG.md @@ -8,6 +8,8 @@ ### Bundles +* Add interactive SQL warehouse picker to `default-sql` and `dbt-sql` bundle templates ([#4170](https://github.com/databricks/cli/pull/4170)) + ### Dependency updates ### API Changes diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/README.md b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/README.md index 756a2eda4e..7a36fa56fc 100644 --- a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/README.md +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/README.md @@ -88,12 +88,12 @@ $ dbt test ## Production setup -Your production dbt profiles are defined in dbt_profiles/profiles.yml. -These profiles define the default catalog, schema, and any other +Your production dbt profiles are defined in `dbt_profiles/profiles.yml`. +These profiles define the default warehouse, catalog, schema, and any other target-specific settings. Read more about dbt profiles on Databricks at https://docs.databricks.com/en/workflows/jobs/how-to/use-dbt-in-workflows.html#advanced-run-dbt-with-a-custom-profile. -The target workspaces for staging and prod are defined in databricks.yml. +The target workspaces for staging and prod are defined in `databricks.yml`. You can manually deploy based on these configurations (see below). Or you can use CI/CD to automate deployment. See https://docs.databricks.com/dev-tools/bundles/ci-cd.html for documentation @@ -120,7 +120,7 @@ For example, the default template would deploy a job called You can find that job by opening your workpace and clicking on **Workflows**. You can also deploy to your production target directly from the command-line. -The warehouse, catalog, and schema for that target are configured in databricks.yml. +The warehouse, catalog, and schema for that target are configured in `dbt_profiles/profiles.yml`. When deploying to this target, note that the default job at resources/my_dbt_sql.job.yml has a schedule set that runs every day. The schedule is paused when deploying in development mode (see https://docs.databricks.com/dev-tools/bundles/deployment-modes.html). diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/README.md b/acceptance/bundle/templates/default-sql/output/my_default_sql/README.md index 67ded153f1..903869eebe 100644 --- a/acceptance/bundle/templates/default-sql/output/my_default_sql/README.md +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/README.md @@ -39,3 +39,8 @@ The 'my_default_sql' project was generated by using the default-sql template. 7. For documentation on the Databricks Asset Bundles format used for this project, and for CI/CD configuration, see https://docs.databricks.com/dev-tools/bundles/index.html. + +## Changing the warehouse, catalog, or schema + +The default SQL warehouse, catalog, and schema are configured in `databricks.yml`. +To change these settings, edit the `variables` section for each target (dev/prod). diff --git a/libs/cmdio/io.go b/libs/cmdio/io.go index 356c6f9ac8..d0bb9e6c2e 100644 --- a/libs/cmdio/io.go +++ b/libs/cmdio/io.go @@ -114,7 +114,7 @@ func (c *cmdIO) Select(items []Tuple, label string) (id string, err error) { StartInSearchMode: true, Searcher: func(input string, idx int) bool { lower := strings.ToLower(items[idx].Name) - return strings.Contains(lower, input) + return strings.Contains(lower, strings.ToLower(input)) }, Templates: &promptui.SelectTemplates{ Active: `{{.Name | bold}} ({{.Id|faint}})`, diff --git a/libs/databrickscfg/cfgpickers/clusters.go b/libs/databrickscfg/cfgpickers/clusters.go index 44e4f71fb7..d1be8158a9 100644 --- a/libs/databrickscfg/cfgpickers/clusters.go +++ b/libs/databrickscfg/cfgpickers/clusters.go @@ -194,7 +194,7 @@ func AskForCluster(ctx context.Context, w *databricks.WorkspaceClient, filters . Items: compatible, Searcher: func(input string, idx int) bool { lower := strings.ToLower(compatible[idx].ClusterName) - return strings.Contains(lower, input) + return strings.Contains(lower, strings.ToLower(input)) }, StartInSearchMode: true, Templates: &promptui.SelectTemplates{ diff --git a/libs/databrickscfg/cfgpickers/warehouses.go b/libs/databrickscfg/cfgpickers/warehouses.go index 65b5f8c83c..73c60dc08e 100644 --- a/libs/databrickscfg/cfgpickers/warehouses.go +++ b/libs/databrickscfg/cfgpickers/warehouses.go @@ -4,11 +4,17 @@ import ( "context" "errors" "fmt" + "sort" + "strings" "github.com/databricks/cli/libs/cmdio" "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/apierr" + "github.com/databricks/databricks-sdk-go/config" + "github.com/databricks/databricks-sdk-go/httpclient" "github.com/databricks/databricks-sdk-go/service/sql" "github.com/fatih/color" + "github.com/manifoldco/promptui" ) var ErrNoCompatibleWarehouses = errors.New("no compatible warehouses") @@ -63,3 +69,171 @@ func AskForWarehouse(ctx context.Context, w *databricks.WorkspaceClient, filters } return cmdio.Select(ctx, names, "Choose SQL Warehouse") } + +// sortWarehousesByState sorts warehouses by state priority (running first), then alphabetically by name. +// Deleted warehouses are filtered out. +func sortWarehousesByState(all []sql.EndpointInfo) []sql.EndpointInfo { + var warehouses []sql.EndpointInfo + for _, wh := range all { + if wh.State != sql.StateDeleted && wh.State != sql.StateDeleting { + warehouses = append(warehouses, wh) + } + } + + priorities := map[sql.State]int{ + sql.StateRunning: 1, + sql.StateStarting: 2, + sql.StateStopped: 3, + sql.StateStopping: 4, + } + sort.Slice(warehouses, func(i, j int) bool { + pi, pj := priorities[warehouses[i].State], priorities[warehouses[j].State] + if pi != pj { + return pi < pj + } + return strings.ToLower(warehouses[i].Name) < strings.ToLower(warehouses[j].Name) + }) + + return warehouses +} + +// GetDefaultWarehouse returns the default warehouse for the workspace. +// It tries the following in order: +// 1. The "default" warehouse via API (server-side convention, not yet fully rolled out) +// 2. The first usable warehouse sorted by state (running first) +func GetDefaultWarehouse(ctx context.Context, w *databricks.WorkspaceClient) (*sql.EndpointInfo, error) { + // Try the "default" warehouse convention first + // This is a new server-side feature that may not be available everywhere yet + warehouse, err := w.Warehouses.Get(ctx, sql.GetWarehouseRequest{Id: "default"}) + if err == nil { + return &sql.EndpointInfo{ + Id: warehouse.Id, + Name: warehouse.Name, + State: warehouse.State, + }, nil + } + var apiErr *apierr.APIError + if !errors.As(err, &apiErr) || apiErr.StatusCode >= 500 { + return nil, fmt.Errorf("get default warehouse: %w", err) + } + + warehouses, err := listUsableWarehouses(ctx, w) + if err != nil { + return nil, err + } + warehouses = sortWarehousesByState(warehouses) + if len(warehouses) == 0 { + return nil, ErrNoCompatibleWarehouses + } + return &warehouses[0], nil +} + +// listUsableWarehouses returns warehouses the user has permission to use. +// This uses the skip_cannot_use=true parameter to filter out inaccessible warehouses. +func listUsableWarehouses(ctx context.Context, w *databricks.WorkspaceClient) ([]sql.EndpointInfo, error) { + // The SDK doesn't expose skip_cannot_use parameter, so we use the raw API + clientCfg, err := config.HTTPClientConfigFromConfig(w.Config) + if err != nil { + return nil, fmt.Errorf("create HTTP client config: %w", err) + } + apiClient := httpclient.NewApiClient(clientCfg) + + var response sql.ListWarehousesResponse + err = apiClient.Do(ctx, "GET", "/api/2.0/sql/warehouses?skip_cannot_use=true", + httpclient.WithResponseUnmarshal(&response)) + if err != nil { + return nil, fmt.Errorf("list warehouses: %w", err) + } + return response.Warehouses, nil +} + +// SelectWarehouse prompts the user to select a SQL warehouse and returns the warehouse ID. +// Warehouses are sorted by state (running first) so the default selection is the best available. +// In non-interactive mode, returns the first (best) warehouse automatically. +// The description parameter is shown before the picker (if non-empty). +func SelectWarehouse(ctx context.Context, w *databricks.WorkspaceClient, description string, filters ...warehouseFilter) (string, error) { + all, err := w.Warehouses.ListAll(ctx, sql.ListWarehousesRequest{}) + if err != nil { + return "", fmt.Errorf("list warehouses: %w", err) + } + + warehouses := sortWarehousesByState(all) + + // Apply filters + var filtered []sql.EndpointInfo + for _, wh := range warehouses { + skip := false + for _, filter := range filters { + if !filter(wh) { + skip = true + break + } + } + if !skip { + filtered = append(filtered, wh) + } + } + warehouses = filtered + + if len(warehouses) == 0 { + return "", ErrNoCompatibleWarehouses + } + + if len(warehouses) == 1 || !cmdio.IsPromptSupported(ctx) { + return warehouses[0].Id, nil + } + + // The first warehouse (sorted by state, then alphabetically) is the default + defaultId := warehouses[0].Id + + // Sort by running state first, then alphabetically for display + sort.Slice(warehouses, func(i, j int) bool { + iRunning := warehouses[i].State == sql.StateRunning + jRunning := warehouses[j].State == sql.StateRunning + if iRunning != jRunning { + return iRunning + } + return strings.ToLower(warehouses[i].Name) < strings.ToLower(warehouses[j].Name) + }) + + // Build options for the picker (● = running, ○ = not running) + var items []cmdio.Tuple + for _, warehouse := range warehouses { + var icon string + if warehouse.State == sql.StateRunning { + icon = color.GreenString("●") + } else { + icon = color.HiBlackString("○") + } + + // Show type info in gray + typeInfo := strings.ToLower(string(warehouse.WarehouseType)) + if warehouse.EnableServerlessCompute { + typeInfo = "serverless" + } + + name := fmt.Sprintf("%s %s %s", icon, warehouse.Name, color.HiBlackString(typeInfo)) + if warehouse.Id == defaultId { + name += color.HiBlackString(" [DEFAULT]") + } + items = append(items, cmdio.Tuple{Name: name, Id: warehouse.Id}) + } + + if description != "" { + cmdio.LogString(ctx, description) + } + promptui.SearchPrompt = "Search: " + warehouseId, err := cmdio.SelectOrdered(ctx, items, "warehouse\n") + if err != nil { + return "", err + } + + for _, wh := range warehouses { + if wh.Id == warehouseId { + cmdio.LogString(ctx, fmt.Sprintf("warehouse_id: %s (%s)", warehouseId, wh.Name)) + break + } + } + + return warehouseId, nil +} diff --git a/libs/jsonschema/schema.go b/libs/jsonschema/schema.go index 47a6407470..99afa2f677 100644 --- a/libs/jsonschema/schema.go +++ b/libs/jsonschema/schema.go @@ -65,6 +65,9 @@ type Schema struct { // here: https://github.com/google/re2/wiki/Syntax Pattern string `json:"pattern,omitempty"` + // Format specifies custom input handling. Supported: "warehouse_path". + Format string `json:"format,omitempty"` + // Extension embeds our custom JSON schema extensions. Extension diff --git a/libs/template/config.go b/libs/template/config.go index d8f9829649..8e1e2ffe38 100644 --- a/libs/template/config.go +++ b/libs/template/config.go @@ -6,7 +6,9 @@ import ( "fmt" "io/fs" + "github.com/databricks/cli/libs/cmdctx" "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/databrickscfg/cfgpickers" "github.com/databricks/cli/libs/jsonschema" "github.com/databricks/cli/libs/log" "golang.org/x/exp/maps" @@ -112,6 +114,17 @@ func (c *config) assignDefaultValues(r *renderer) error { if _, ok := c.values[name]; ok { continue } + + // Resolve custom formats (e.g., warehouse_path auto-selects the default warehouse) + if property.Format != "" { + val, err := c.resolveFormat(property.Format, "") + if err != nil { + return err + } + c.values[name] = val + continue + } + // No default value defined for the property if property.Default == nil { continue @@ -133,6 +146,22 @@ func (c *config) assignDefaultValues(r *renderer) error { return nil } +// resolveFormat returns a value for custom format types. +// Returns empty string for unknown formats (to fall back to default handling). +func (c *config) resolveFormat(format, description string) (string, error) { + switch format { + case "warehouse_path": + w := cmdctx.WorkspaceClient(c.ctx) + warehouseId, err := cfgpickers.SelectWarehouse(c.ctx, w, description) + if err != nil { + return "", err + } + return "/sql/1.0/warehouses/" + warehouseId, nil + default: + return "", nil + } +} + func (c *config) skipPrompt(p jsonschema.Property, r *renderer) (bool, error) { // Config already has a value assigned. We don't have to prompt for a user input. if _, ok := c.values[p.Name]; ok { @@ -170,7 +199,14 @@ func (c *config) skipPrompt(p jsonschema.Property, r *renderer) (bool, error) { func (c *config) promptOnce(property *jsonschema.Schema, name, defaultVal, description string) error { var userInput string - if property.Enum != nil { + + if property.Format != "" { + var err error + userInput, err = c.resolveFormat(property.Format, description) + if err != nil { + return err + } + } else if property.Enum != nil { // List options for the user to select from options, err := property.EnumStringSlice() if err != nil { diff --git a/libs/template/templates/dbt-sql/databricks_template_schema.json b/libs/template/templates/dbt-sql/databricks_template_schema.json index 962913be06..34d2874f86 100644 --- a/libs/template/templates/dbt-sql/databricks_template_schema.json +++ b/libs/template/templates/dbt-sql/databricks_template_schema.json @@ -11,10 +11,9 @@ }, "http_path": { "type": "string", - "pattern": "^/sql/.\\../warehouses/[a-z0-9]+$", - "pattern_match_failure_message": "Path must be of the form /sql/1.0/warehouses/", - "description": "\nPlease provide the HTTP Path of the SQL warehouse you would like to use with dbt during development.\nYou can find this path by clicking on \"Connection details\" for your SQL warehouse.\nhttp_path [example: /sql/1.0/warehouses/abcdef1234567890]", - "order": 2 + "description": "\nSelect a SQL warehouse to use during development (you can change this in databricks.yml later).", + "order": 2, + "format": "warehouse_path" }, "default_catalog": { "type": "string", diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/README.md.tmpl b/libs/template/templates/dbt-sql/template/{{.project_name}}/README.md.tmpl index cd4c29a76d..1b40e4f3df 100644 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/README.md.tmpl +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/README.md.tmpl @@ -88,12 +88,12 @@ $ dbt test ## Production setup -Your production dbt profiles are defined in dbt_profiles/profiles.yml. -These profiles define the default catalog, schema, and any other +Your production dbt profiles are defined in `dbt_profiles/profiles.yml`. +These profiles define the default warehouse, catalog, schema, and any other target-specific settings. Read more about dbt profiles on Databricks at https://docs.databricks.com/en/workflows/jobs/how-to/use-dbt-in-workflows.html#advanced-run-dbt-with-a-custom-profile. -The target workspaces for staging and prod are defined in databricks.yml. +The target workspaces for staging and prod are defined in `databricks.yml`. You can manually deploy based on these configurations (see below). Or you can use CI/CD to automate deployment. See https://docs.databricks.com/dev-tools/bundles/ci-cd.html for documentation @@ -120,7 +120,7 @@ For example, the default template would deploy a job called You can find that job by opening your workpace and clicking on **Workflows**. You can also deploy to your production target directly from the command-line. -The warehouse, catalog, and schema for that target are configured in databricks.yml. +The warehouse, catalog, and schema for that target are configured in `dbt_profiles/profiles.yml`. When deploying to this target, note that the default job at resources/{{.project_name}}.job.yml has a schedule set that runs every day. The schedule is paused when deploying in development mode (see https://docs.databricks.com/dev-tools/bundles/deployment-modes.html). diff --git a/libs/template/templates/default-sql/databricks_template_schema.json b/libs/template/templates/default-sql/databricks_template_schema.json index 113cbef642..cb73006001 100644 --- a/libs/template/templates/default-sql/databricks_template_schema.json +++ b/libs/template/templates/default-sql/databricks_template_schema.json @@ -11,10 +11,9 @@ }, "http_path": { "type": "string", - "pattern": "^/sql/.\\../warehouses/[a-z0-9]+$", - "pattern_match_failure_message": "Path must be of the form /sql/1.0/warehouses/", - "description": "\nPlease provide the HTTP Path of the SQL warehouse you would like to use during development.\nYou can find this path by clicking on \"Connection details\" for your SQL warehouse.\nhttp_path [example: /sql/1.0/warehouses/abcdef1234567890]", - "order": 2 + "description": "\nSelect a SQL warehouse to use during development (you can change this in databricks.yml later).", + "order": 2, + "format": "warehouse_path" }, "default_catalog": { "type": "string", diff --git a/libs/template/templates/default-sql/template/{{.project_name}}/README.md.tmpl b/libs/template/templates/default-sql/template/{{.project_name}}/README.md.tmpl index e5c44320d9..866e2a41ab 100644 --- a/libs/template/templates/default-sql/template/{{.project_name}}/README.md.tmpl +++ b/libs/template/templates/default-sql/template/{{.project_name}}/README.md.tmpl @@ -39,3 +39,8 @@ The '{{.project_name}}' project was generated by using the default-sql template. 7. For documentation on the Databricks Asset Bundles format used for this project, and for CI/CD configuration, see https://docs.databricks.com/dev-tools/bundles/index.html. + +## Changing the warehouse, catalog, or schema + +The default SQL warehouse, catalog, and schema are configured in `databricks.yml`. +To change these settings, edit the `variables` section for each target (dev/prod).