diff --git a/cli/cmd/docs/generate_project.go b/cli/cmd/docs/generate_project.go index fcc5668e687..22501221e69 100644 --- a/cli/cmd/docs/generate_project.go +++ b/cli/cmd/docs/generate_project.go @@ -29,25 +29,15 @@ func GenerateProjectDocsCmd(rootCmd *cobra.Command, ch *cmdutil.Helper) *cobra.C projectPath := "runtime/parser/schema/project.schema.yaml" projectFilesSchema, err := parseSchemaYAML(projectPath) if err != nil { - return fmt.Errorf("resource schema error: %w", err) + return fmt.Errorf("project schema error: %w", err) } - rillyamlPath := "runtime/parser/schema/rillyaml.schema.yaml" - rillYamlSchema, err := parseSchemaYAML(rillyamlPath) - if err != nil { - return fmt.Errorf("rillyaml schema error: %w", err) - } - - // Add rillyaml to projectFilesSchema's oneOf - oneOfNode := getNodeForKey(projectFilesSchema, "oneOf") - if oneOfNode == nil { - oneOfNode = &yaml.Node{Kind: yaml.SequenceNode, Tag: "!!seq"} - projectFilesSchema.Content = append(projectFilesSchema.Content, - &yaml.Node{Kind: yaml.ScalarNode, Value: "oneOf"}, - oneOfNode, - ) - } - oneOfNode.Content = append(oneOfNode.Content, rillYamlSchema) + // Load rillyaml schema + // rillyamlPath := "runtime/parser/schema/rillyaml.schema.yaml" + // rillyamlSchema, err := parseSchemaYAML(rillyamlPath) + // if err != nil { + // return fmt.Errorf("rillyaml schema error: %w", err) + // } var projectFilesbuf strings.Builder sidebarPosition := 30 @@ -65,13 +55,30 @@ func GenerateProjectDocsCmd(rootCmd *cobra.Command, ch *cmdutil.Helper) *cobra.C projectFilesbuf.WriteString(fmt.Sprintf("%s\n\n", desc)) projectFilesbuf.WriteString("## Project files types\n\n") + // Get the oneOf node which contains all resource types + oneOfNode := getNodeForKey(projectFilesSchema, "oneOf") + if oneOfNode == nil { + return fmt.Errorf("no oneOf found in project schema") + } + for _, resource := range oneOfNode.Content { sidebarPosition++ var resourceFilebuf strings.Builder requiredMap := getRequiredMapFromNode(resource) - resourceFilebuf.WriteString(generateDoc(sidebarPosition, 0, resource, "", requiredMap)) resTitle := getScalarValue(resource, "title") - fileName := sanitizeFileName(resTitle) + ".md" + resID := getScalarValue(resource, "id") + + resourceFilebuf.WriteString(generateDoc(sidebarPosition, 0, resource, "", requiredMap, projectFilesSchema, resID)) + + // Use id if available, otherwise fall back to title + var fileName string + if resID != "" { + // Use the id directly for the filename + fileName = resID + ".md" + } else { + fileName = sanitizeFileName(resTitle) + ".md" + } + filePath := filepath.Join(outputDir, fileName) if err := os.WriteFile(filePath, []byte(resourceFilebuf.String()), 0o644); err != nil { return fmt.Errorf("failed writing resource doc: %w", err) @@ -79,6 +86,21 @@ func GenerateProjectDocsCmd(rootCmd *cobra.Command, ch *cmdutil.Helper) *cobra.C projectFilesbuf.WriteString(fmt.Sprintf("\n- [%s](%s)", resTitle, fileName)) } + // Generate rillyaml documentation + // sidebarPosition++ + // var rillyamlFilebuf strings.Builder + // rillyamlTitle := getScalarValue(rillyamlSchema, "title") + // // rillyamlDesc := getPrintableDescription(rillyamlSchema, "", "") + // requiredMap := getRequiredMapFromNode(rillyamlSchema) + // rillyamlFilebuf.WriteString(generateDoc(sidebarPosition, 0, rillyamlSchema, "", requiredMap)) + + // rillyamlFileName := "rillyaml.md" + // rillyamlFilePath := filepath.Join(outputDir, rillyamlFileName) + // if err := os.WriteFile(rillyamlFilePath, []byte(rillyamlFilebuf.String()), 0o644); err != nil { + // return fmt.Errorf("failed writing rillyaml doc: %w", err) + // } + // projectFilesbuf.WriteString(fmt.Sprintf("\n- [%s](%s)", rillyamlTitle, rillyamlFileName)) + if err := os.WriteFile(filepath.Join(outputDir, "index.md"), []byte(projectFilesbuf.String()), 0o644); err != nil { return fmt.Errorf("failed writing index.md: %w", err) } @@ -87,6 +109,7 @@ func GenerateProjectDocsCmd(rootCmd *cobra.Command, ch *cmdutil.Helper) *cobra.C return nil }, } + return cmd } @@ -147,26 +170,52 @@ func resolveRefsYAML(node, root *yaml.Node) error { keyNode := node.Content[i] valNode := node.Content[i+1] - if keyNode.Value == "$ref" && valNode.Kind == yaml.ScalarNode && strings.HasPrefix(valNode.Value, "#/") { - // Resolve local reference - ptrPath := strings.TrimPrefix(valNode.Value, "#/") - resolved, err := resolveYAMLPointer(root, ptrPath) - if err != nil { - return fmt.Errorf("resolve $ref %q: %w", valNode.Value, err) - } + if keyNode.Value == "$ref" && valNode.Kind == yaml.ScalarNode { + if strings.HasPrefix(valNode.Value, "#/") { + // Resolve local reference + ptrPath := strings.TrimPrefix(valNode.Value, "#/") + resolved, err := resolveYAMLPointer(root, ptrPath) + if err != nil { + return fmt.Errorf("resolve $ref %q: %w", valNode.Value, err) + } - // Replace the entire mapping with the resolved content - // First, remove $ref entry - node.Content = append(node.Content[:i], node.Content[i+2:]...) - // Then merge resolved content into current node - if resolved.Kind == yaml.MappingNode { - // Insert resolved mapping node's content at current position - node.Content = append(resolved.Content, node.Content...) - } else { - return fmt.Errorf("$ref does not point to a mapping node") + // Replace the entire mapping with the resolved content + // First, remove $ref entry + node.Content = append(node.Content[:i], node.Content[i+2:]...) + // Then merge resolved content into current node + if resolved.Kind == yaml.MappingNode { + // Insert resolved mapping node's content at current position + node.Content = append(resolved.Content, node.Content...) + } else { + return fmt.Errorf("$ref does not point to a mapping node") + } + // We modified Content length; restart loop + return resolveRefsYAML(node, root) + } else if strings.HasSuffix(valNode.Value, ".yaml#") { + // Resolve external file reference + fileName := strings.TrimSuffix(valNode.Value, "#") + // Remove quotes if present + fileName = strings.Trim(fileName, "'\"") + + // Load the external schema file + externalSchema, err := parseSchemaYAML("runtime/parser/schema/" + fileName) + if err != nil { + return fmt.Errorf("failed to load external schema %q: %w", fileName, err) + } + + // Replace the entire mapping with the external schema content + // First, remove $ref entry + node.Content = append(node.Content[:i], node.Content[i+2:]...) + // Then merge external schema content into current node + if externalSchema.Kind == yaml.MappingNode { + // Insert external schema's content at current position + node.Content = append(externalSchema.Content, node.Content...) + } else { + return fmt.Errorf("external schema %q does not contain a mapping node", fileName) + } + // We modified Content length; restart loop + return resolveRefsYAML(node, root) } - // We modified Content length; restart loop - return resolveRefsYAML(node, root) } if err := resolveRefsYAML(valNode, root); err != nil { return err @@ -311,7 +360,7 @@ func getPrintableDescription(node *yaml.Node, indentation, defaultValue string) return desc } -func generateDoc(sidebarPosition, level int, node *yaml.Node, indent string, requiredFields map[string]bool) string { +func generateDoc(sidebarPosition, level int, node *yaml.Node, indent string, requiredFields map[string]bool, rootSchema *yaml.Node, id string) string { if node == nil || node.Kind != yaml.MappingNode { return "" } @@ -320,6 +369,12 @@ func generateDoc(sidebarPosition, level int, node *yaml.Node, indent string, req currentLevel := level title := getScalarValue(node, "title") description := getPrintableDescription(node, indent, "") + + // Get the id at level 0, otherwise use the passed id + if level == 0 { + id = getScalarValue(node, "id") + } + if level == 0 { doc.WriteString("---\n") doc.WriteString("note: GENERATED. DO NOT EDIT.\n") @@ -329,6 +384,7 @@ func generateDoc(sidebarPosition, level int, node *yaml.Node, indent string, req if description != "" { doc.WriteString(fmt.Sprintf("\n\n%s", description)) } + level++ // level zero is to print base level info and its only onetime for a page so increasing level } else if level == 1 { if title != "" { @@ -358,46 +414,118 @@ func generateDoc(sidebarPosition, level int, node *yaml.Node, indent string, req propType := getScalarValue(propertiesValueNode, "type") if propType == "object" || propType == "array" || hasCombinators(propertiesValueNode) { newlevel := level + 1 - doc.WriteString(generateDoc(sidebarPosition, newlevel, propertiesValueNode, indent+" ", getRequiredMapFromNode(propertiesValueNode))) + doc.WriteString(generateDoc(sidebarPosition, newlevel, propertiesValueNode, indent+" ", getRequiredMapFromNode(propertiesValueNode), rootSchema, id)) } - if examples := getNodeForKey(propertiesValueNode, "examples"); examples != nil && examples.Kind == yaml.SequenceNode { - for _, example := range examples.Content { - b, err := yaml.Marshal(example) - if err != nil { - panic(err) + if examples := getNodeForKey(propertiesValueNode, "examples"); examples != nil { + if examples.Kind == yaml.SequenceNode { + // Handle array of YAML examples + for _, example := range examples.Content { + b, err := yaml.Marshal(example) + if err != nil { + panic(err) + } + doc.WriteString(fmt.Sprintf("\n\n```yaml\n%s```", string(b))) } - doc.WriteString(fmt.Sprintf("\n\n```yaml\n%s```", string(b))) + } else if examples.Kind == yaml.ScalarNode { + // Handle string examples (like markdown code blocks) + doc.WriteString(fmt.Sprintf("\n\n%s", examples.Value)) } } } } else if items := getNodeForKey(node, "items"); items != nil && items.Kind == yaml.MappingNode { items := getNodeForKey(node, "items") - doc.WriteString(generateDoc(sidebarPosition, level, items, indent, getRequiredMapFromNode(items))) + doc.WriteString(generateDoc(sidebarPosition, level, items, indent, getRequiredMapFromNode(items), rootSchema, id)) } // OneOf if oneOf := getNodeForKey(node, "oneOf"); oneOf != nil && oneOf.Kind == yaml.SequenceNode { - if len(oneOf.Content) == 1 { - doc.WriteString(generateDoc(sidebarPosition, level, oneOf.Content[0], indent, getRequiredMapFromNode(oneOf.Content[0]))) - } else { - if level == 1 { - doc.WriteString("\n\n## One of Properties Options") - for _, item := range oneOf.Content { - title := getScalarValue(item, "title") - if title != "" { - anchor := strings.ToLower(strings.ReplaceAll(title, " ", "-")) - doc.WriteString(fmt.Sprintf("\n- [%s](#%s)", title, anchor)) + // Special handling for connectors - generate copiable text for each connector type + if id == "connectors" { + doc.WriteString("\n\n## Available Connector Types\n\n") + for _, item := range oneOf.Content { + // Since $ref values are already resolved, look for connector definitions directly + title := getScalarValue(item, "title") + exampleOutput := generateConnectorExample(title, item) + + if title != "" { + doc.WriteString(fmt.Sprintf("\n\n### %s\n\n", title)) + // Generate copiable example for this connector + doc.WriteString(fmt.Sprintf("\n\n\n%s", exampleOutput)) + // Add description first + description := getPrintableDescription(item, indent, "") + if description != "" { + doc.WriteString(fmt.Sprintf("%s\n\n", description)) } } - for _, item := range oneOf.Content { - doc.WriteString(generateDoc(sidebarPosition, level, item, indent, getRequiredMapFromNode(item))) + + // Generate the connector definition documentation (properties, etc.) + if properties := getNodeForKey(item, "properties"); properties != nil && properties.Kind == yaml.MappingNode { + for j := 0; j < len(properties.Content); j += 2 { + propName := properties.Content[j].Value + propValue := properties.Content[j+1] + required := "" + if requiredFields := getRequiredMapFromNode(item); requiredFields[propName] { + required = "_(required)_" + } + + doc.WriteString(fmt.Sprintf("\n\n#### `%s`\n\n", propName)) + doc.WriteString(fmt.Sprintf("%s - %s %s", + getPrintableType(propValue), + getPrintableDescription(propValue, indent, "(no description)"), + required)) + } + } + + + } + // only for APIs, no other oneOf uses this, so we can be specific + } else if id == "apis" { + // Skip oneOf processing for connectors and data_properties at level 1 since we handle it above + if len(oneOf.Content) == 1 { + doc.WriteString(generateDoc(sidebarPosition, level, oneOf.Content[0], indent, getRequiredMapFromNode(oneOf.Content[0]), rootSchema, id)) + } else { + // Remove the summary list to avoid duplication with detailed sections + if level == 1 { + doc.WriteString("\n\n## One of Properties Options") + } + + for _, item := range oneOf.Content { + if hasType(item) || hasProperties(item) || hasCombinators(item) { + doc.WriteString(generateDoc(sidebarPosition, level, item, indent+" ", getRequiredMapFromNode(item), rootSchema, id)) + + } + + // Handle examples for oneOf items + if examples := getNodeForKey(item, "examples"); examples != nil { + if examples.Kind == yaml.SequenceNode { + // Handle array of YAML examples + for _, example := range examples.Content { + b, err := yaml.Marshal(example) + if err != nil { + panic(err) + } + doc.WriteString(fmt.Sprintf("\n\n```yaml\n%s```", string(b))) + } + } else if examples.Kind == yaml.ScalarNode { + // Handle string examples (like markdown code blocks) + doc.WriteString(fmt.Sprintf("\n\n%s", examples.Value)) + } + } + } } } else { - for i, item := range oneOf.Content { - if hasType(item) || hasProperties(item) || hasCombinators(item) { - doc.WriteString(fmt.Sprintf("\n\n%s- **option %d** - %s - %s", indent, i+1, getPrintableType(item), getPrintableDescription(item, indent, "(no description)"))) - doc.WriteString(generateDoc(sidebarPosition, level, item, indent+" ", getRequiredMapFromNode(item))) + if !(id == "connectors" || id == "apis") { + + if len(oneOf.Content) == 1 { + doc.WriteString(generateDoc(sidebarPosition, level, oneOf.Content[0], indent, getRequiredMapFromNode(oneOf.Content[0]), rootSchema, id)) + } else { + for i, item := range oneOf.Content { + if hasType(item) || hasProperties(item) || hasCombinators(item) { + doc.WriteString(fmt.Sprintf("\n\n%s- **option %d** - %s - %s", indent, i+1, getPrintableType(item), getPrintableDescription(item, indent, "(no description)"))) + doc.WriteString(generateDoc(sidebarPosition, level, item, indent+" ", getRequiredMapFromNode(item), rootSchema, id)) + + } } } } @@ -409,7 +537,7 @@ func generateDoc(sidebarPosition, level int, node *yaml.Node, indent string, req for i, item := range anyOf.Content { if hasType(item) || hasProperties(item) || hasCombinators(item) { doc.WriteString(fmt.Sprintf("\n\n%s- **option %d** - %s - %s", indent, i+1, getPrintableType(item), getPrintableDescription(item, indent, "(no description)"))) - doc.WriteString(generateDoc(sidebarPosition, level, item, indent+" ", getRequiredMapFromNode(item))) + doc.WriteString(generateDoc(sidebarPosition, level, item, indent+" ", getRequiredMapFromNode(item), rootSchema, id)) } } } @@ -417,6 +545,12 @@ func generateDoc(sidebarPosition, level int, node *yaml.Node, indent string, req // AllOf if allOf := getNodeForKey(node, "allOf"); allOf != nil && allOf.Kind == yaml.SequenceNode { for _, item := range allOf.Content { + // Special handling for connector oneOf + if id == "connectors" && getNodeForKey(item, "oneOf") != nil { + doc.WriteString(generateDoc(sidebarPosition, level, item, indent, getRequiredMapFromNode(item), rootSchema, id)) + continue + } + if hasIf(item) { ifNode := getNodeForKey(item, "if") title := getScalarValue(ifNode, "title") @@ -426,22 +560,67 @@ func generateDoc(sidebarPosition, level int, node *yaml.Node, indent string, req doc.WriteString(fmt.Sprintf("\n\n%s**%s**", indent, title)) } thenNode := getNodeForKey(item, "then") - doc.WriteString(generateDoc(sidebarPosition, level, thenNode, indent, getRequiredMapFromNode(item))) + doc.WriteString(generateDoc(sidebarPosition, level, thenNode, indent, getRequiredMapFromNode(item), rootSchema, id)) } else { - doc.WriteString(generateDoc(sidebarPosition, level, item, indent, getRequiredMapFromNode(item))) + doc.WriteString(generateDoc(sidebarPosition, level, item, indent, getRequiredMapFromNode(item), rootSchema, id)) + } + } + } + + // Definitions (for connectors) + if definitions := getNodeForKey(node, "definitions"); definitions != nil && definitions.Kind == yaml.MappingNode && id == "connectors" && level == 1 { + for i := 0; i < len(definitions.Content); i += 2 { + connectorDef := definitions.Content[i+1] + + title := getScalarValue(connectorDef, "title") + if title != "" { + doc.WriteString(fmt.Sprintf("\n\n## %s\n\n", title)) + // Add description first + description := getPrintableDescription(connectorDef, indent, "") + if description != "" { + doc.WriteString(fmt.Sprintf("%s\n\n", description)) + } } + exampleOutput := generateConnectorExample(title, connectorDef) + doc.WriteString(fmt.Sprintf("\n\n#### Example\n\n%s", exampleOutput)) + + // Generate the connector definition documentation (properties, etc.) but skip the header + // We need to process properties manually to avoid duplicate headers + if properties := getNodeForKey(connectorDef, "properties"); properties != nil && properties.Kind == yaml.MappingNode { + for j := 0; j < len(properties.Content); j += 2 { + propName := properties.Content[j].Value + propValue := properties.Content[j+1] + required := "" + if requiredFields := getRequiredMapFromNode(connectorDef); requiredFields[propName] { + required = "_(required)_" + } + // changed to #### so its not in the sidebar, and more compact + doc.WriteString(fmt.Sprintf("\n\n#### `%s`\n\n", propName)) + doc.WriteString(fmt.Sprintf("%s - %s %s", + getPrintableType(propValue), + getPrintableDescription(propValue, indent, "(no description)"), + required)) + } + } + } } // Examples - if examples := getNodeForKey(node, "examples"); examples != nil && examples.Kind == yaml.SequenceNode && currentLevel == 0 { + if examples := getNodeForKey(node, "examples"); examples != nil && currentLevel == 0 { doc.WriteString("\n\n## Examples") - for _, example := range examples.Content { - b, err := yaml.Marshal(example) - if err != nil { - panic(err) + if examples.Kind == yaml.SequenceNode { + // Handle array of YAML examples + for _, example := range examples.Content { + b, err := yaml.Marshal(example) + if err != nil { + panic(err) + } + doc.WriteString(fmt.Sprintf("\n\n```yaml\n%s```", string(b))) } - doc.WriteString(fmt.Sprintf("\n\n```yaml\n%s```", string(b))) + } else if examples.Kind == yaml.ScalarNode { + // Handle string examples (like markdown code blocks) + doc.WriteString(fmt.Sprintf("\n\n%s", examples.Value)) } } @@ -463,3 +642,83 @@ func hasProperties(node *yaml.Node) bool { func hasCombinators(node *yaml.Node) bool { return getNodeForKey(node, "anyOf") != nil || getNodeForKey(node, "oneOf") != nil || getNodeForKey(node, "allOf") != nil } + +func generateConnectorExample(connectorType string, connectorDef *yaml.Node) string { + if connectorDef == nil { + return "" + } + + var example strings.Builder + example.WriteString("```yaml\n") + example.WriteString("type: connector # Must be `connector` (required)\n") + + // Get the driver from the schema and add it first + driverAdded := false + if driver := getNodeForKey(connectorDef, "driver"); driver != nil { + if constVal := getScalarValue(driver, "const"); constVal != "" { + example.WriteString(fmt.Sprintf("driver: %s # Must be `%s` _(required)_\n\n", constVal, constVal)) + driverAdded = true + } + } + + // Fallback: if driver wasn't found, use the connector type name + if !driverAdded { + // Special case for MotherDuck which uses duckdb driver + if connectorType == "MotherDuck" { + example.WriteString("driver: duckdb # Must be `duckdb` _(required)_\n\n") + } else { + example.WriteString(fmt.Sprintf("driver: %s # Must be `%s` _(required)_\n\n", strings.ToLower(connectorType), strings.ToLower(connectorType))) + } + } + + // Get all properties from the schema + if properties := getNodeForKey(connectorDef, "properties"); properties != nil && properties.Kind == yaml.MappingNode { + for i := 0; i < len(properties.Content); i += 2 { + propName := properties.Content[i].Value + propValue := properties.Content[i+1] + + // Skip the driver property since we already added it + if propName == "driver" { + continue + } + + // Get property description + description := getPrintableDescription(propValue, "", "") + if description == "" { + description = "Property description" + } + + // Get sample value from the schema + sampleValue := getScalarValue(propValue, "sample") + if sampleValue == "" { + // Fallback to const value if no sample + sampleValue = getScalarValue(propValue, "const") + } + if sampleValue == "" { + // Final fallback + sampleValue = "example_value" + } + + // Check if it's required + required := "" + if requiredFields := getRequiredMapFromNode(connectorDef); requiredFields[propName] { + required = " _(required)_" + } + + // Format the line with proper alignment + example.WriteString(fmt.Sprintf("%s: %s", propName, sampleValue)) + + // Add padding for alignment + padding := 35 - len(propName) - len(sampleValue) + if padding > 0 { + example.WriteString(strings.Repeat(" ", padding)) + } + + example.WriteString(fmt.Sprintf("# %s%s\n", description, required)) + } + } + + example.WriteString("```\n\n") + return example.String() +} + diff --git a/docs/docs/hidden/yaml/model.md b/docs/docs/hidden/yaml/advanced-models.md similarity index 62% rename from docs/docs/hidden/yaml/model.md rename to docs/docs/hidden/yaml/advanced-models.md index d0cf32a661c..04df304f789 100644 --- a/docs/docs/hidden/yaml/model.md +++ b/docs/docs/hidden/yaml/advanced-models.md @@ -1,9 +1,25 @@ --- note: GENERATED. DO NOT EDIT. -title: Model YAML -sidebar_position: 38 +title: Models YAML +sidebar_position: 34 --- +:::tip + +Both regular models and source models can use the Model YAML specification described on this page. While [SQL models](./models) are perfect for simple transformations, Model YAML files provide advanced capabilities for complex data processing scenarios. + +**When to use Model YAML:** +- **Partitions** - Optimize performance with data partitioning strategies +- **Incremental models** - Process only new or changed data efficiently +- **Pre/post execution hooks** - Run custom logic before or after model execution +- **Staging** - Create intermediate tables for complex transformations +- **Output configuration** - Define specific output formats and destinations + +Model YAML files give you fine-grained control over how your data is processed and transformed, making them ideal for production workloads and complex analytics pipelines. + +::: + + ## Properties ### `type` @@ -24,15 +40,39 @@ _[object]_ - Specifies the refresh schedule that Rill should follow to re-ingest - **`run_in_dev`** - _[boolean]_ - If true, allows the schedule to run in development mode. +```yaml +refresh: + cron: "* * * * *" + #every: "24h" +``` + + ### `connector` -_[string]_ - Refers to the connector type or [named connector](./connector.md#name) for the source. - +_[string]_ - Refers to the resource type and is needed if setting an explicit OLAP engine. IE `clickhouse` ### `sql` _[string]_ - Raw SQL query to run against source _(required)_ +### `pre_exec` + +_[string]_ - Refers to SQL queries to run before the main query, available for DuckDB-based models. (optional). +Ensure pre_exec queries are idempotent. Use IF NOT EXISTS statements when applicable. +```yaml +pre_exec: ATTACH IF NOT EXISTS 'dbname=postgres host=localhost port=5432 user=postgres password=postgres' AS postgres_db (TYPE POSTGRES) +``` + + +### `post_exec` + +_[string]_ - Refers to a SQL query that is run after the main query, available for DuckDB-based models. (optional). +Ensure post_exec queries are idempotent. Use IF EXISTS statements when applicable. +```yaml +post_exec: DETACH DATABASE IF EXISTS postgres_db +``` + + ### `timeout` _[string]_ - The maximum time to wait for model ingestion @@ -81,6 +121,12 @@ _[oneOf]_ - Refers to the explicitly defined state of your model, cannot be used - **`where_error`** - _[boolean]_ - Indicates whether the condition should trigger when the resource is in an error state. +```yaml +state: + sql: SELECT MAX(date) as max_date +``` + + ### `partitions` _[oneOf]_ - Refers to the how your data is partitioned, cannot be used with state. (optional) @@ -117,6 +163,17 @@ _[oneOf]_ - Refers to the how your data is partitioned, cannot be used with stat - **`where_error`** - _[boolean]_ - Indicates whether the condition should trigger when the resource is in an error state. +```yaml +partitions: + glob: gcs://my_bucket/y=*/m=*/d=*/*.parquet +``` +```yaml +partitions: + connector: duckdb + sql: SELECT range AS num FROM range(0,10) + ``` + + ### `materialize` _[boolean]_ - models will be materialized in olap @@ -135,6 +192,15 @@ _[object]_ - in the case of staging models, where an input source does not suppo - **`connector`** - _[string]_ - Refers to the connector type for the staging table _(required)_ + - **`path`** - _[string]_ - Refers to the path to the staging table + +```yaml +stage: + connector: s3 + path: s3://my_bucket/my_staging_table +``` + + ### `output` _[object]_ - to define the properties of output @@ -201,198 +267,14 @@ _[object]_ - Overrides any properties in development environment. _[object]_ - Overrides any properties in production environment. -## Additional properties when `connector` is `athena` or [named connector](./connector.md#name) for athena - -### `output_location` - -_[string]_ - Output location for query results in S3. - -### `workgroup` - -_[string]_ - AWS Athena workgroup to use for queries. - -### `region` - -_[string]_ - AWS region to connect to Athena and the output location. - -## Additional properties when `connector` is `azure` or [named connector](./connector.md#name) of azure - -### `path` - -_[string]_ - Path to the source - -### `account` - -_[string]_ - Account identifier - -### `uri` - -_[string]_ - Source URI - -### `extract` - -_[object]_ - Arbitrary key-value pairs for extraction settings - -### `glob` - -_[object]_ - Settings related to glob file matching. - - - **`max_total_size`** - _[integer]_ - Maximum total size (in bytes) matched by glob - - - **`max_objects_matched`** - _[integer]_ - Maximum number of objects matched by glob - - - **`max_objects_listed`** - _[integer]_ - Maximum number of objects listed in glob - - - **`page_size`** - _[integer]_ - Page size for glob listing - -### `batch_size` - -_[string]_ - Size of a batch (e.g., '100MB') - -## Additional properties when `connector` is `bigquery` or [named connector](./connector.md#name) of bigquery - -### `project_id` - -_[string]_ - ID of the BigQuery project. - -## Additional properties when `connector` is `duckdb` or [named connector](./connector.md#name) of duckdb - -### `path` +## Depending on the connector, additional properties may be required -_[string]_ - Path to the data source. +Depending on the connector, additional properties may be required, for more information see the [connectors](./connectors.md) documentation -### `format` -_[string]_ - Format of the data source (e.g., csv, json, parquet). - -### `pre_exec` - -_[string]_ - refers to SQL queries to run before the main query, available for DuckDB-based models. _(optional)_. Ensure `pre_exec` queries are idempotent. Use `IF NOT EXISTS` statements when applicable. - -### `post_exec` - -_[string]_ - refers to a SQL query that is run after the main query, available for DuckDB-based models. _(optional)_. Ensure `post_exec` queries are idempotent. Use `IF EXISTS` statements when applicable. +## Examples +### Incremental model ```yaml -pre_exec: ATTACH IF NOT EXISTS 'dbname=postgres host=localhost port=5432 user=postgres password=postgres' AS postgres_db (TYPE POSTGRES); -sql: SELECT * FROM postgres_query('postgres_db', 'SELECT * FROM USERS') -post_exec: DETACH DATABASE IF EXISTS postgres_db +test ``` - -## Additional properties when `connector` is `gcs` or [named connector](./connector.md#name) of gcs - -### `path` - -_[string]_ - Path to the source - -### `uri` - -_[string]_ - Source URI - -### `extract` - -_[object]_ - key-value pairs for extraction settings - -### `glob` - -_[object]_ - Settings related to glob file matching. - - - **`max_total_size`** - _[integer]_ - Maximum total size (in bytes) matched by glob - - - **`max_objects_matched`** - _[integer]_ - Maximum number of objects matched by glob - - - **`max_objects_listed`** - _[integer]_ - Maximum number of objects listed in glob - - - **`page_size`** - _[integer]_ - Page size for glob listing - -### `batch_size` - -_[string]_ - Size of a batch (e.g., '100MB') - -## Additional properties when `connector` is `local_file` or [named connector](./connector.md#name) of local_file - -### `path` - -_[string]_ - Path to the data source. - -### `format` - -_[string]_ - Format of the data source (e.g., csv, json, parquet). - -## Additional properties when `connector` is `redshift` or [named connector](./connector.md#name) of redshift - -### `output_location` - -_[string]_ - S3 location where query results are stored. - -### `workgroup` - -_[string]_ - Redshift Serverless workgroup to use. - -### `database` - -_[string]_ - Name of the Redshift database. - -### `cluster_identifier` - -_[string]_ - Identifier of the Redshift cluster. - -### `role_arn` - -_[string]_ - ARN of the IAM role to assume for Redshift access. - -### `region` - -_[string]_ - AWS region of the Redshift deployment. - -## Additional properties when `connector` is `s3` or [named connector](./connector.md#name) of s3 - -### `region` - -_[string]_ - AWS region - -### `endpoint` - -_[string]_ - AWS Endpoint - -### `path` - -_[string]_ - Path to the source - -### `uri` - -_[string]_ - Source URI - -### `extract` - -_[object]_ - key-value pairs for extraction settings - -### `glob` - -_[object]_ - Settings related to glob file matching. - - - **`max_total_size`** - _[integer]_ - Maximum total size (in bytes) matched by glob - - - **`max_objects_matched`** - _[integer]_ - Maximum number of objects matched by glob - - - **`max_objects_listed`** - _[integer]_ - Maximum number of objects listed in glob - - - **`page_size`** - _[integer]_ - Page size for glob listing - -### `batch_size` - -_[string]_ - Size of a batch (e.g., '100MB') - -## Additional properties when `connector` is `salesforce` or [named connector](./connector.md#name) of salesforce - -### `soql` - -_[string]_ - SOQL query to execute against the Salesforce instance. - -### `sobject` - -_[string]_ - Salesforce object (e.g., Account, Contact) targeted by the query. - -### `queryAll` - -_[boolean]_ - Whether to include deleted and archived records in the query (uses queryAll API). \ No newline at end of file diff --git a/docs/docs/hidden/yaml/alert.md b/docs/docs/hidden/yaml/alerts.md similarity index 91% rename from docs/docs/hidden/yaml/alert.md rename to docs/docs/hidden/yaml/alerts.md index 8f1c6aaa99d..2a93f42ffd5 100644 --- a/docs/docs/hidden/yaml/alert.md +++ b/docs/docs/hidden/yaml/alerts.md @@ -1,7 +1,7 @@ --- note: GENERATED. DO NOT EDIT. title: Alert YAML -sidebar_position: 31 +sidebar_position: 38 --- Along with alertings at the dashboard level and can be created via the UI, there might be more extensive alerting that you might want to develop and can be done so the an alert.yaml. When creating an alert via a YAML file, you'll see this denoted in the UI as `Created through code`. @@ -12,13 +12,15 @@ Along with alertings at the dashboard level and can be created via the UI, there _[string]_ - Refers to the resource type and must be `alert` _(required)_ -### `display_name` - -_[string]_ - Refers to the display name for the alert - ### `refresh` -_[object]_ - Specifies the refresh schedule that Rill should follow to re-ingest and update the underlying data _(required)_ +_[object]_ - Refresh schedule for the alert + ```yaml + refresh: + cron: "* * * * *" + #every: "24h" + ``` + _(required)_ - **`cron`** - _[string]_ - A cron expression that defines the execution schedule @@ -30,6 +32,14 @@ _[object]_ - Specifies the refresh schedule that Rill should follow to re-ingest - **`run_in_dev`** - _[boolean]_ - If true, allows the schedule to run in development mode. +### `display_name` + +_[string]_ - Display name for the alert + +### `description` + +_[string]_ - Description for the alert + ### `intervals` _[object]_ - define the interval of the alert to check @@ -42,7 +52,7 @@ _[object]_ - define the interval of the alert to check ### `watermark` -_[string]_ - Specifies how the watermark is determined for incremental processing. Use 'trigger_time' to set it at runtime or 'inherit' to use the upstream model's watermark. +_[string]_ - Specifies how the watermark is determined for incremental processing. Use 'trigger_time' to set it at runtime or 'inherit' to use the upstream model's watermark. ### `timeout` @@ -50,7 +60,7 @@ _[string]_ - define the timeout of the alert in seconds (optional). ### `data` -_[oneOf]_ - Specifies one of the options to retrieve or compute the data used by alert _(required)_ +_[oneOf]_ - Data source for the alert _(required)_ - **option 1** - _[object]_ - Executes a raw SQL query against the project's data models. @@ -122,7 +132,7 @@ _[string]_ - Defines the re-notification interval for the alert (e.g., '10m','24 ### `notify` -_[object]_ - Defines how and where to send notifications. At least one method (email or Slack) is required. _(required)_ +_[object]_ - Notification configuration _(required)_ - **`email`** - _[object]_ - Send notifications via email. diff --git a/docs/docs/hidden/yaml/api.md b/docs/docs/hidden/yaml/apis.md similarity index 77% rename from docs/docs/hidden/yaml/api.md rename to docs/docs/hidden/yaml/apis.md index 177e67c4243..5f3006827cb 100644 --- a/docs/docs/hidden/yaml/api.md +++ b/docs/docs/hidden/yaml/apis.md @@ -1,10 +1,10 @@ --- note: GENERATED. DO NOT EDIT. title: API YAML -sidebar_position: 32 +sidebar_position: 39 --- -In your Rill project directory, create a new file name `.yaml` in the `apis` directory containing a custom API definition. See comprehensive documentation on how to define and use [custom APIs](/integrate/custom-apis/index.md) +Custom APIs allow you to create endpoints that can be called to retrieve or manipulate data. ## Properties @@ -26,14 +26,10 @@ _[object]_ - OpenAPI specification for the API endpoint ### `security` -_[object]_ - Defines security rules and access control policies for resources +_[object]_ - Defines [security rules and access control policies](/manage/security) for resources - **`access`** - _[oneOf]_ - Expression indicating if the user should be granted access to the dashboard. If not defined, it will resolve to false and the dashboard won't be accessible to anyone. Needs to be a valid SQL expression that evaluates to a boolean. - - **option 1** - _[string]_ - SQL expression that evaluates to a boolean to determine access - - - **option 2** - _[boolean]_ - Direct boolean value to allow or deny access - - **`row_filter`** - _[string]_ - SQL expression to filter the underlying model by. Can leverage templated user attributes to customize the filter for the requesting user. Needs to be a valid SQL expression that can be injected into a WHERE clause - **`include`** - _[array of object]_ - List of dimension or measure names to include in the dashboard. If include is defined all other dimensions and measures are excluded @@ -75,11 +71,6 @@ _[object]_ - Defines security rules and access control policies for resources _[boolean]_ - Flag to control security inheritance ## One of Properties Options -- [SQL Query](#sql-query) -- [Metrics View Query](#metrics-view-query) -- [Custom API Call](#custom-api-call) -- [File Glob Query](#file-glob-query) -- [Resource Status Check](#resource-status-check) ## SQL Query @@ -93,6 +84,12 @@ _[string]_ - Raw SQL query to run against existing models in the project. _(requ _[string]_ - specifies the connector to use when running SQL or glob queries. +```yaml +type: api +sql: "SELECT * FROM table_name WHERE date >= '2024-01-01'" +``` + + ## Metrics View Query Executes a SQL query that targets a defined metrics view. @@ -101,6 +98,13 @@ Executes a SQL query that targets a defined metrics view. _[string]_ - SQL query that targets a metrics view in the project _(required)_ +```yaml +type: api + +metrics_sql: "SELECT * FROM user_metrics WHERE date >= '2024-01-01'" +``` + + ## Custom API Call Calls a custom API defined in the project to compute data. @@ -113,6 +117,15 @@ _[string]_ - Name of a custom API defined in the project. _(required)_ _[object]_ - Arguments to pass to the custom API. +```yaml +type: api +api: "user_analytics_api" +args: + start_date: "2024-01-01" + limit: 10 +``` + + ## File Glob Query Uses a file-matching pattern (glob) to query data from a connector. @@ -121,14 +134,21 @@ Uses a file-matching pattern (glob) to query data from a connector. _[anyOf]_ - Defines the file path or pattern to query from the specified connector. _(required)_ - - **option 1** - _[string]_ - A simple file path/glob pattern as a string. + - **option 1** - _[string]_ - A simple file path/glob pattern as a string. - - **option 2** - _[object]_ - An object-based configuration for specifying a file path/glob pattern with advanced options. + - **option 2** - _[object]_ - An object-based configuration for specifying a file path/glob pattern with advanced options. ### `connector` _[string]_ - Specifies the connector to use with the glob input. +```yaml +type: api + +glob: "data/*.csv" +``` + + ## Resource Status Check Uses the status of a resource as data. @@ -137,18 +157,10 @@ Uses the status of a resource as data. _[object]_ - Based on resource status _(required)_ - - **`where_error`** - _[boolean]_ - Indicates whether the condition should trigger when the resource is in an error state. - -## Examples + - **`where_error`** - _[boolean]_ - Indicates whether the condition should trigger when the resource is in an error state. ```yaml -# Example: This api returns the top 10 authors by net line changes since the specified date provided in the arguments. type: api -name: metrics_view_api -metrics_sql: |- - SELECT author_name, net_line_changes - FROM advanced_metrics_view - where author_date > '{{ .args.date }}' - order by net_line_changes DESC - limit 10 -``` \ No newline at end of file +resource_status: + where_error: true +``` diff --git a/docs/docs/hidden/yaml/canvas.md b/docs/docs/hidden/yaml/canvas-dashboards.md similarity index 54% rename from docs/docs/hidden/yaml/canvas.md rename to docs/docs/hidden/yaml/canvas-dashboards.md index c1e160cb8a4..327136a5485 100644 --- a/docs/docs/hidden/yaml/canvas.md +++ b/docs/docs/hidden/yaml/canvas-dashboards.md @@ -1,10 +1,10 @@ --- note: GENERATED. DO NOT EDIT. -title: Canvas YAML -sidebar_position: 33 +title: Canvas Dashboard YAML +sidebar_position: 36 --- -In your Rill project directory, create a explore dashboard, `.yaml`, file in the `dashboards` directory. Rill will ingest the dashboard definition next time you run `rill start`. +Canvas dashboards provide a flexible way to create custom dashboards with drag-and-drop components. ## Properties @@ -14,12 +14,42 @@ _[string]_ - Refers to the resource type and must be `canvas` _(required)_ ### `display_name` -_[string]_ - Refers to the display name for the canvas +_[string]_ - Refers to the display name for the canvas _(required)_ + +### `description` + +_[string]_ - Description for the canvas dashboard ### `banner` _[string]_ - Refers to the custom banner displayed at the header of an Canvas dashboard +### `rows` + +_[array of object]_ - Refers to all of the rows displayed on the Canvas + + - **`height`** - _[string]_ - Height of the row in px + + - **`items`** - _[array of object]_ - List of components to display in the row + + - **`component`** - _[string]_ - Name of the component to display. Each component type has its own set of properties. + Available component types: + + - **markdown** - Text component, uses markdown formatting + - **kpi_grid** - KPI component, similar to TDD in Rill Explore, display quick KPI charts + - **stacked_bar_normalized** - Bar chart normalized to 100% values + - **line_chart** - Normal Line chart + - **bar_chart** - Normal Bar chart + - **stacked_bar** - Stacked Bar chart + - **area_chart** - Line chart with area + - **image** - Provide a URL to embed into canvas dashboard + - **table** - Similar to Pivot table, add dimensions and measures to visualize your data + - **heatmap** - Heat Map chart to visualize distribution of data + - **donut_chart** - Donut or Pie chart to display sums of total + + + - **`width`** - _[string, integer]_ - Width of the component (can be a number or string with unit) + ### `max_width` _[integer]_ - Max width in pixels of the canvas @@ -32,19 +62,11 @@ _[integer]_ - Horizontal gap in pixels of the canvas _[integer]_ - Vertical gap in pixels of the canvas -### `theme` - -_[oneOf]_ - Name of the theme to use or define a theme inline. Either theme name or inline theme can be set. - - - **option 1** - _[string]_ - Name of an existing theme to apply to the dashboard - - - **option 2** - _[object]_ - Inline theme configuration. - - - **`colors`** - _[object]_ - Used to override the dashboard colors. Either primary or secondary color must be provided. +### `filters` - - **`primary`** - _[string]_ - Overrides the primary blue color in the dashboard. Can have any hex (without the '#' character), [named colors](https://www.w3.org/TR/css-color-4/#named-colors) or hsl() formats. Note that the hue of the input colors is used for variants but the saturation and lightness is copied over from the [blue color palette](https://tailwindcss.com/docs/customizing-colors). +_[object]_ - Indicates if filters should be enabled for the canvas. - - **`secondary`** - _[string]_ - Overrides the secondary color in the dashboard. Applies to the loading spinner only as of now. Can have any hex (without the '#' character), [named colors](https://www.w3.org/TR/css-color-4/#named-colors) or hsl() formats. + - **`enable`** - _[boolean]_ - Toggles filtering functionality for the canvas dashboard. ### `allow_custom_time_range` @@ -52,7 +74,19 @@ _[boolean]_ - Defaults to true, when set to false it will hide the ability to se ### `time_ranges` -_[array of oneOf]_ - Overrides the list of default time range selections available in the dropdown. It can be string or an object with a 'range' and optional 'comparison_offsets' +_[array of oneOf]_ - Overrides the list of default time range selections available in the dropdown. It can be string or an object with a 'range' and optional 'comparison_offsets' + ```yaml + time_ranges: + - PT15M // Simplified syntax to specify only the range + - PT1H + - PT6H + - P7D + - range: P5D // Advanced syntax to specify comparison_offsets as well + - P4W + - rill-TD // Today + - rill-WTD // Week-To-date + ``` + - **option 1** - _[string]_ - a valid [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Durations) duration or one of the [Rill ISO 8601 extensions](https://docs.rilldata.com/reference/rill-iso-extensions#extensions) extensions for the selection @@ -74,47 +108,40 @@ _[array of oneOf]_ - Overrides the list of default time range selections availab _[array of string]_ - Refers to the time zones that should be pinned to the top of the time zone selector. It should be a list of [IANA time zone identifiers](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones) -### `filters` - -_[object]_ - Indicates if filters should be enabled for the canvas. - - - **`enable`** - _[boolean]_ - Toggles filtering functionality for the canvas dashboard. - ### `defaults` -_[object]_ - Preset UI state to show by default - - - **`time_range`** - _[string]_ - Default time range to display when the dashboard loads +_[object]_ - defines the defaults YAML struct + ```yaml + defaults: #define all the defaults within here + time_range: P1M + comparison_mode: dimension #time, none + comparison_dimension: filename + ``` + - - **`comparison_mode`** - _[string]_ - Default comparison mode for metrics (none, time, or dimension) + - **`time_range`** - _[string]_ - Refers to the default time range shown when a user initially loads the dashboard. The value must be either a valid [ISO 8601 duration](https://en.wikipedia.org/wiki/ISO_8601#Durations) (for example, PT12H for 12 hours, P1M for 1 month, or P26W for 26 weeks) or one of the [Rill ISO 8601 extensions](https://docs.rilldata.com/reference/rill-iso-extensions#extensions) - - **`comparison_dimension`** - _[string]_ - Default dimension to use for comparison when comparison_mode is 'dimension' + - **`comparison_mode`** - _[string]_ - Controls how to compare current data with historical or categorical baselines. Options: `none` (no comparison), `time` (compares with past based on default_time_range), `dimension` (compares based on comparison_dimension values) -### `variables` + - **`comparison_dimension`** - _[string]_ - for dimension mode, specify the comparison dimension by name -_[array of object]_ - Variables that can be used in the canvas - - - **`name`** - _[string]_ - Unique identifier for the variable _(required)_ - - - **`type`** - _[string]_ - Data type of the variable (e.g., string, number, boolean) _(required)_ - - - **`value`** - _[string, number, boolean, object, array]_ - Default value for the variable. Can be any valid JSON value type +### `theme` -### `rows` +_[oneOf]_ - Name of the theme to use. Only one of theme and embedded_theme can be set. -_[array of object]_ - Refers to all of the rows displayed on the Canvas _(required)_ + - **option 1** - _[string]_ - Name of an existing theme to apply to the dashboard - - **`height`** - _[string]_ - Height of the row in px + - **option 2** - _[object]_ - Inline theme configuration. - - **`items`** - _[array of object]_ - List of components to display in the row + - **`colors`** - _[object]_ - Used to override the dashboard colors. Either primary or secondary color must be provided. - - **`component`** - _[string]_ - Name of the component to display + - **`primary`** - _[string]_ - Overrides the primary blue color in the dashboard. Can have any hex (without the '#' character), [named colors](https://www.w3.org/TR/css-color-4/#named-colors) or hsl() formats. Note that the hue of the input colors is used for variants but the saturation and lightness is copied over from the [blue color palette](https://tailwindcss.com/docs/customizing-colors). - - **`width`** - _[string, integer]_ - Width of the component (can be a number or string with unit) + - **`secondary`** - _[string]_ - Overrides the secondary color in the dashboard. Applies to the loading spinner only as of now. Can have any hex (without the '#' character), [named colors](https://www.w3.org/TR/css-color-4/#named-colors) or hsl() formats. ### `security` -_[object]_ - Defines security rules and access control policies for resources +_[object]_ - Defines [security rules and access control policies](/manage/security) for dashboards (without row filtering) - **`access`** - _[oneOf]_ - Expression indicating if the user should be granted access to the dashboard. If not defined, it will resolve to false and the dashboard won't be accessible to anyone. Needs to be a valid SQL expression that evaluates to a boolean. @@ -122,42 +149,6 @@ _[object]_ - Defines security rules and access control policies for resources - **option 2** - _[boolean]_ - Direct boolean value to allow or deny access - - **`row_filter`** - _[string]_ - SQL expression to filter the underlying model by. Can leverage templated user attributes to customize the filter for the requesting user. Needs to be a valid SQL expression that can be injected into a WHERE clause - - - **`include`** - _[array of object]_ - List of dimension or measure names to include in the dashboard. If include is defined all other dimensions and measures are excluded - - - **`if`** - _[string]_ - Expression to decide if the column should be included or not. It can leverage templated user attributes. Needs to be a valid SQL expression that evaluates to a boolean _(required)_ - - - **`names`** - _[anyOf]_ - List of fields to include. Should match the name of one of the dashboard's dimensions or measures _(required)_ - - - **option 1** - _[array of string]_ - List of specific field names to include - - - **option 2** - _[string]_ - Wildcard '*' to include all fields - - - **`exclude`** - _[array of object]_ - List of dimension or measure names to exclude from the dashboard. If exclude is defined all other dimensions and measures are included - - - **`if`** - _[string]_ - Expression to decide if the column should be excluded or not. It can leverage templated user attributes. Needs to be a valid SQL expression that evaluates to a boolean _(required)_ - - - **`names`** - _[anyOf]_ - List of fields to exclude. Should match the name of one of the dashboard's dimensions or measures _(required)_ - - - **option 1** - _[array of string]_ - List of specific field names to exclude - - - **option 2** - _[string]_ - Wildcard '*' to exclude all fields - - - **`rules`** - _[array of object]_ - List of detailed security rules that can be used to define complex access control policies - - - **`type`** - _[string]_ - Type of security rule - access (overall access), field_access (field-level access), or row_filter (row-level filtering) _(required)_ - - - **`action`** - _[string]_ - Whether to allow or deny access for this rule - - - **`if`** - _[string]_ - Conditional expression that determines when this rule applies. Must be a valid SQL expression that evaluates to a boolean - - - **`names`** - _[array of string]_ - List of field names this rule applies to (for field_access type rules) - - - **`all`** - _[boolean]_ - When true, applies the rule to all fields (for field_access type rules) - - - **`sql`** - _[string]_ - SQL expression for row filtering (for row_filter type rules) - ## Common Properties ### `name` diff --git a/docs/docs/hidden/yaml/component.md b/docs/docs/hidden/yaml/component.md deleted file mode 100644 index b969461ddfb..00000000000 --- a/docs/docs/hidden/yaml/component.md +++ /dev/null @@ -1,59 +0,0 @@ ---- -note: GENERATED. DO NOT EDIT. -title: Component YAML -sidebar_position: 34 ---- - -Defines a reusable dashboard component that can be embedded in canvas dashboards - -## Properties - -### `type` - -_[string]_ - Refers to the resource type and must be `component` _(required)_ - -### `display_name` - -_[string]_ - Refers to the display name for the component - -### `description` - -_[string]_ - Detailed description of the component's purpose and functionality - -### `input` - -_[array of object]_ - List of input variables that can be passed to the component - - - **`name`** - _[string]_ - Unique identifier for the variable _(required)_ - - - **`type`** - _[string]_ - Data type of the variable (e.g., string, number, boolean) _(required)_ - - - **`value`** - _[string, number, boolean, object, array]_ - Default value for the variable. Can be any valid JSON value type - -### `output` - -_[object]_ - Output variable that the component produces - - - **`name`** - _[string]_ - Unique identifier for the variable _(required)_ - - - **`type`** - _[string]_ - Data type of the variable (e.g., string, number, boolean) _(required)_ - - - **`value`** - _[string, number, boolean, object, array]_ - Default value for the variable. Can be any valid JSON value type - -## Common Properties - -### `name` - -_[string]_ - Name is usually inferred from the filename, but can be specified manually. - -### `refs` - -_[array of string]_ - List of resource references - -### `dev` - -_[object]_ - Overrides any properties in development environment. - -### `prod` - -_[object]_ - Overrides any properties in production environment. \ No newline at end of file diff --git a/docs/docs/hidden/yaml/connector.md b/docs/docs/hidden/yaml/connector.md deleted file mode 100644 index 8bfa5ba1fdf..00000000000 --- a/docs/docs/hidden/yaml/connector.md +++ /dev/null @@ -1,671 +0,0 @@ ---- -note: GENERATED. DO NOT EDIT. -title: Connector YAML -sidebar_position: 35 ---- - -When you add olap_connector to your rill.yaml file, you will need to set up a `.yaml` file in the 'connectors' directory. This file requires the following parameters,type and driver (see below for more parameter options). Rill will automatically test the connectivity to the OLAP engine upon saving the file. This can be viewed in the connectors tab in the UI. - -:::tip Did you know? - -Starting from Rill 0.46, you can directly create OLAP engines from the UI! Select + Add -> Data -> Connect an OLAP engine - -::: - - -## Properties - -### `type` - -_[string]_ - Refers to the resource type and must be `connector` _(required)_ - -## Common Properties - -### `name` - -_[string]_ - Name is usually inferred from the filename, but can be specified manually. - -### `refs` - -_[array of string]_ - List of resource references - -### `dev` - -_[object]_ - Overrides any properties in development environment. - -### `prod` - -_[object]_ - Overrides any properties in production environment. - -## One of Properties Options -- [athena](#athena) -- [azure](#azure) -- [bigquery](#bigquery) -- [clickhouse](#clickhouse) -- [druid](#druid) -- [duckdb](#duckdb) -- [gcs](#gcs) -- [https](#https) -- [local_file](#local_file) -- [motherduck](#motherduck) -- [mysql](#mysql) -- [pinot](#pinot) -- [postgres](#postgres) -- [redshift](#redshift) -- [s3](#s3) -- [salesforce](#salesforce) -- [slack](#slack) -- [snowflake](#snowflake) -- [sqlite](#sqlite) - -## athena - -Configuration properties specific to the athena - -### `driver` - -_[string]_ - Refers to the driver type and must be driver `athena` _(required)_ - -### `aws_access_key_id` - -_[string]_ - AWS Access Key ID used for authentication. Required when using static credentials directly or as base credentials for assuming a role. - -### `aws_secret_access_key` - -_[string]_ - AWS Secret Access Key paired with the Access Key ID. Required when using static credentials directly or as base credentials for assuming a role. - -### `aws_access_token` - -_[string]_ - AWS session token used with temporary credentials. Required only if the Access Key and Secret Key are part of a temporary session credentials. - -### `role_arn` - -_[string]_ - ARN of the IAM role to assume. When specified, the SDK uses the base credentials to call STS AssumeRole and obtain temporary credentials scoped to this role. - -### `role_session_name` - -_[string]_ - Session name to associate with the STS AssumeRole session. Used only if 'role_arn' is specified. Useful for identifying and auditing the session. - -### `external_id` - -_[string]_ - External ID required by some roles when assuming them, typically for cross-account access. Used only if 'role_arn' is specified and the role's trust policy requires it. - -### `workgroup` - -_[string]_ - Athena workgroup to use for query execution. Defaults to 'primary' if not specified. - -### `output_location` - -_[string]_ - S3 URI where Athena query results should be stored (e.g., s3://your-bucket/athena/results/). Optional if the selected workgroup has a default result configuration. - -### `aws_region` - -_[string]_ - AWS region where Athena and the result S3 bucket are located (e.g., us-east-1). Defaults to 'us-east-1' if not specified. - -### `allow_host_access` - -_[boolean]_ - Allow the Athena client to access host environment configurations such as environment variables or local AWS credential files. Defaults to true, enabling use of credentials and settings from the host environment unless explicitly disabled. - -## azure - -Configuration properties specific to the azure - -### `driver` - -_[string]_ - Refers to the driver type and must be driver `azure` _(required)_ - -### `azure_storage_account` - -_[string]_ - Azure storage account name - -### `azure_storage_key` - -_[string]_ - Azure storage access key - -### `azure_storage_sas_token` - -_[string]_ - Optional azure SAS token for authentication - -### `azure_storage_connection_string` - -_[string]_ - Optional azure connection string for storage account - -### `azure_storage_bucket` - -_[string]_ - Name of the Azure Blob Storage container (equivalent to an S3 bucket) _(required)_ - -### `allow_host_access` - -_[boolean]_ - Allow access to host environment configuration - -## bigquery - -Configuration properties specific to the bigquery - -### `driver` - -_[string]_ - Refers to the driver type and must be driver `bigquery` _(required)_ - -### `google_application_credentials` - -_[string]_ - Raw contents of the Google Cloud service account key (in JSON format) used for authentication. - -### `project_id` - -_[string]_ - ID of the Google Cloud project to use for BigQuery operations. This can be omitted only if the project ID is included in the service account key. - -### `allow_host_access` - -_[boolean]_ - Enable the BigQuery client to use credentials from the host environment when no service account JSON is provided. This includes Application Default Credentials from environment variables, local credential files, or the Google Compute Engine metadata server. Defaults to true, allowing seamless authentication in GCP environments. - -## clickhouse - -Configuration properties specific to the clickhouse - -### `driver` - -_[string]_ - Refers to the driver type and must be driver `clickhouse` _(required)_ - -### `managed` - -_[boolean]_ - `true` means Rill will provision the connector using the default provisioner. `false` disables automatic provisioning. - -### `mode` - -_[string]_ - `read` - Controls the operation mode for the ClickHouse connection. Defaults to 'read' for safe operation with external databases. Set to 'readwrite' to enable model creation and table mutations. Note: When 'managed: true', this is automatically set to 'readwrite'. - -### `dsn` - -_[string]_ - DSN(Data Source Name) for the ClickHouse connection - -### `username` - -_[string]_ - Username for authentication - -### `password` - -_[string]_ - Password for authentication - -### `host` - -_[string]_ - Host where the ClickHouse instance is running - -### `port` - -_[integer]_ - Port where the ClickHouse instance is accessible - -### `database` - -_[string]_ - Name of the ClickHouse database within the cluster - -### `ssl` - -_[boolean]_ - Indicates whether a secured SSL connection is required - -### `cluster` - -_[string]_ - Cluster name, required for running distributed queries - -### `log_queries` - -_[boolean]_ - Controls whether to log raw SQL queries - -### `settings_override` - -_[string]_ - override the default settings used in queries. example `readonly = 1, session_timezone = 'UTC'` - -### `embed_port` - -_[integer]_ - Port to run ClickHouse locally (0 for random port) - -### `can_scale_to_zero` - -_[boolean]_ - Indicates if the database can scale to zero - -### `max_open_conns` - -_[integer]_ - Maximum number of open connections to the database - -### `max_idle_conns` - -_[integer]_ - Maximum number of idle connections in the pool - -### `dial_timeout` - -_[string]_ - Timeout for dialing the ClickHouse server - -### `conn_max_lifetime` - -_[string]_ - Maximum time a connection may be reused - -### `read_timeout` - -_[string]_ - Maximum time for a connection to read data - -## druid - -Configuration properties specific to the druid - -### `driver` - -_[string]_ - Refers to the driver type and must be driver `druid` _(required)_ - -### `dsn` - -_[string]_ - Data Source Name (DSN) for connecting to Druid _(required)_ - -### `username` - -_[string]_ - Username for authenticating with Druid - -### `password` - -_[string]_ - Password for authenticating with Druid - -### `host` - -_[string]_ - Hostname of the Druid coordinator or broker - -### `port` - -_[integer]_ - Port number of the Druid service - -### `ssl` - -_[boolean]_ - Enable SSL for secure connection - -### `log_queries` - -_[boolean]_ - Log raw SQL queries sent to Druid - -### `max_open_conns` - -_[integer]_ - Maximum number of open database connections (0 = default, -1 = unlimited) - -### `skip_version_check` - -_[boolean]_ - Skip checking Druid version compatibility - -## duckdb - -Configuration properties specific to the duckdb - -### `driver` - -_[string]_ - Refers to the driver type and must be driver `duckdb` _(required)_ - -### `pool_size` - -_[integer]_ - Number of concurrent connections and queries allowed - -### `allow_host_access` - -_[boolean]_ - Whether access to the local environment and file system is allowed - -### `cpu` - -_[integer]_ - Number of CPU cores available to the database - -### `memory_limit_gb` - -_[integer]_ - Amount of memory in GB available to the database - -### `read_write_ratio` - -_[number]_ - Ratio of resources allocated to the read database; used to divide CPU and memory - -### `init_sql` - -_[string]_ - is executed during database initialization. - -### `conn_init_sql` - -_[string]_ - is executed when a new connection is initialized. - -### `secrets` - -_[string]_ - Comma-separated list of other connector names to create temporary secrets for in DuckDB before executing a model. - -### `log_queries` - -_[boolean]_ - Whether to log raw SQL queries executed through OLAP - -## gcs - -Configuration properties specific to the gcs - -### `driver` - -_[string]_ - Refers to the driver type and must be driver `gcs` _(required)_ - -### `google_application_credentials` - -_[string]_ - Google Cloud credentials JSON string - -### `bucket` - -_[string]_ - Name of gcs bucket _(required)_ - -### `allow_host_access` - -_[boolean]_ - Allow access to host environment configuration - -### `key_id` - -_[string]_ - Optional S3-compatible Key ID when used in compatibility mode - -### `secret` - -_[string]_ - Optional S3-compatible Secret when used in compatibility mode - -## https - -Configuration properties specific to the https - -### `driver` - -_[string]_ - Refers to the driver type and must be driver `https` _(required)_ - -### `path` - -_[string]_ - The full HTTPS URI to fetch data from _(required)_ - -### `headers` - -_[object]_ - HTTP headers to include in the request - -## local_file - -Configuration properties specific to the local_file - -### `driver` - -_[string]_ - Refers to the driver type and must be driver `local_file` _(required)_ - -### `dsn` - -_[string]_ - Data Source Name (DSN) indicating the file path or location of the local file _(required)_ - -### `allow_host_access` - -_[boolean]_ - Flag to indicate if access to host-level file paths is permitted - -## motherduck - -Configuration properties specific to the motherduck - -### `driver` - -_[string]_ - Refers to the driver type and must be driver `motherduck` _(required)_ - -### `dsn` - -_[string]_ - Data Source Name (DSN) specifying the MotherDuck connection endpoint _(required)_ - -### `token` - -_[string]_ - Authentication token for accessing MotherDuck (secret) _(required)_ - -## mysql - -Configuration properties specific to the mysql - -### `driver` - -_[string]_ - Refers to the driver type and must be driver `mysql` _(required)_ - -### `dsn` - -_[string]_ - DSN(Data Source Name) for the mysql connection - -### `host` - -_[string]_ - Hostname of the MySQL server - -### `port` - -_[integer]_ - Port number for the MySQL server - -### `database` - -_[string]_ - Name of the MySQL database - -### `user` - -_[string]_ - Username for authentication - -### `password` - -_[string]_ - Password for authentication - -### `ssl_mode` - -_[string]_ - SSL mode can be DISABLED, PREFERRED or REQUIRED - -## pinot - -Configuration properties specific to the pinot - -### `driver` - -_[string]_ - Refers to the driver type and must be driver `pinot` _(required)_ - -### `dsn` - -_[string]_ - DSN(Data Source Name) for the Pinot connection _(required)_ - -### `username` - -_[string]_ - Username for authenticating with Pinot - -### `password` - -_[string]_ - Password for authenticating with Pinot - -### `broker_host` - -_[string]_ - Hostname of the Pinot broker _(required)_ - -### `broker_port` - -_[integer]_ - Port number for the Pinot broker - -### `controller_host` - -_[string]_ - Hostname of the Pinot controller _(required)_ - -### `controller_port` - -_[integer]_ - Port number for the Pinot controller - -### `ssl` - -_[boolean]_ - Enable SSL connection to Pinot - -### `log_queries` - -_[boolean]_ - Log raw SQL queries executed through Pinot - -### `max_open_conns` - -_[integer]_ - Maximum number of open connections to the Pinot database - -## postgres - -Configuration properties specific to the postgres - -### `driver` - -_[string]_ - Refers to the driver type and must be driver `postgres` _(required)_ - -### `dsn` - -_[string]_ - DSN(Data Source Name) for the postgres connection - -### `host` - -_[string]_ - Hostname of the Postgres server - -### `port` - -_[string]_ - Port number for the Postgres server - -### `dbname` - -_[string]_ - Name of the Postgres database - -### `user` - -_[string]_ - Username for authentication - -### `password` - -_[string]_ - Password for authentication - -### `sslmode` - -_[string]_ - SSL mode can be disable, allow, prefer or require - -## redshift - -Configuration properties specific to the redshift - -### `driver` - -_[string]_ - Refers to the driver type and must be driver `redshift` _(required)_ - -### `aws_access_key_id` - -_[string]_ - AWS Access Key ID used for authenticating with Redshift. _(required)_ - -### `aws_secret_access_key` - -_[string]_ - AWS Secret Access Key used for authenticating with Redshift. _(required)_ - -### `aws_access_token` - -_[string]_ - AWS Session Token for temporary credentials (optional). - -### `region` - -_[string]_ - AWS region where the Redshift cluster or workgroup is hosted (e.g., 'us-east-1'). - -### `database` - -_[string]_ - Name of the Redshift database to query. _(required)_ - -### `workgroup` - -_[string]_ - Workgroup name for Redshift Serverless, in case of provisioned Redshift clusters use 'cluster_identifier'. - -### `cluster_identifier` - -_[string]_ - Cluster identifier for provisioned Redshift clusters, in case of Redshift Serverless use 'workgroup' . - -## s3 - -Configuration properties specific to the s3 - -### `driver` - -_[string]_ - Refers to the driver type and must be driver `s3` _(required)_ - -### `aws_access_key_id` - -_[string]_ - AWS Access Key ID used for authentication - -### `aws_secret_access_key` - -_[string]_ - AWS Secret Access Key used for authentication - -### `aws_access_token` - -_[string]_ - Optional AWS session token for temporary credentials - -### `bucket` - -_[string]_ - Name of s3 bucket _(required)_ - -### `endpoint` - -_[string]_ - Optional custom endpoint URL for S3-compatible storage - -### `region` - -_[string]_ - AWS region of the S3 bucket - -### `allow_host_access` - -_[boolean]_ - Allow access to host environment configuration - -### `retain_files` - -_[boolean]_ - Whether to retain intermediate files after processing - -## salesforce - -Configuration properties specific to the salesforce - -### `driver` - -_[string]_ - Refers to the driver type and must be driver `salesforce` _(required)_ - -### `username` - -_[string]_ - Salesforce account username _(required)_ - -### `password` - -_[string]_ - Salesforce account password (secret) - -### `key` - -_[string]_ - Authentication key for Salesforce (secret) - -### `endpoint` - -_[string]_ - Salesforce API endpoint URL _(required)_ - -### `client_id` - -_[string]_ - Client ID used for Salesforce OAuth authentication - -## slack - -Configuration properties specific to the slack - -### `driver` - -_[string]_ - Refers to the driver type and must be driver `slack` _(required)_ - -### `bot_token` - -_[string]_ - Bot token used for authenticating Slack API requests _(required)_ - -## snowflake - -Configuration properties specific to the snowflake - -### `driver` - -_[string]_ - Refers to the driver type and must be driver `snowflake` _(required)_ - -### `dsn` - -_[string]_ - DSN (Data Source Name) for the Snowflake connection _(required)_ - -### `parallel_fetch_limit` - -_[integer]_ - Maximum number of concurrent fetches during query execution - -## sqlite - -Configuration properties specific to the sqlite - -### `driver` - -_[string]_ - Refers to the driver type and must be driver `sqlite` _(required)_ - -### `dsn` - -_[string]_ - DSN(Data Source Name) for the sqlite connection _(required)_ \ No newline at end of file diff --git a/docs/docs/hidden/yaml/connectors.md b/docs/docs/hidden/yaml/connectors.md new file mode 100644 index 00000000000..fe19cf56686 --- /dev/null +++ b/docs/docs/hidden/yaml/connectors.md @@ -0,0 +1,948 @@ +--- +note: GENERATED. DO NOT EDIT. +title: Connector YAML +sidebar_position: 31 +--- + +Connector YAML files define how Rill connects to external data sources and OLAP engines. Each connector specifies a driver type and its required connection parameters. + +## Available Connector Types + +### _OLAP Engines_ +- [**DuckDB**](#duckdb) - Embedded DuckDB engine (default) +- [**ClickHouse**](#clickhouse) - ClickHouse analytical database +- [**MotherDuck**](#motherduck) - MotherDuck cloud database +- [**Druid**](#druid) - Apache Druid +- [**Pinot**](#pinot) - Apache Pinot + +### _Data Warehouses_ +- [**Snowflake**](#snowflake) - Snowflake data warehouse +- [**BigQuery**](#bigquery) - Google BigQuery +- [**Redshift**](#redshift) - Amazon Redshift +- [**Athena**](#athena) - Amazon Athena + +### _Databases_ +- [**PostgreSQL**](#postgres) - PostgreSQL databases +- [**MySQL**](#mysql) - MySQL databases +- [**SQLite**](#sqlite) - SQLite databases + +### _Cloud Storage_ +- [**GCS**](#gcs) - Google Cloud Storage +- [**S3**](#s3) - Amazon S3 storage +- [**Azure**](#azure) - Azure Blob Storage + +### _Other_ +- [**HTTPS**](#https) - Public files via HTTP/HTTPS +- [**Salesforce**](#salesforce) - Salesforce data +- [**Slack**](#slack) - Slack data + +:::warning Security Recommendation +For all credential parameters (passwords, tokens, keys), use environment variables with the syntax `{{.env.connector..}}`. This keeps sensitive data out of your YAML files and version control. See our [credentials documentation](/connect/credentials/) for complete setup instructions. +::: + + +## Properties + +### `type` + +_[string]_ - Refers to the resource type and must be `connector` _(required)_ + +## Common Properties + +### `name` + +_[string]_ - Name is usually inferred from the filename, but can be specified manually. + +### `refs` + +_[array of string]_ - List of resource references + +### `dev` + +_[object]_ - Overrides any properties in development environment. + +### `prod` + +_[object]_ - Overrides any properties in production environment. + +## Available Connector Types + + + +### Athena + + + + +```yaml +type: connector # Must be `connector` (required) +driver: athena # Must be `athena` _(required)_ + +aws_access_key_id: AKIAIOSFODNN7EXAMPLE# AWS Access Key ID used for authentication. Required when using static credentials directly or as base credentials for assuming a role. +aws_secret_access_key: wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY# AWS Secret Access Key paired with the Access Key ID. Required when using static credentials directly or as base credentials for assuming a role. +aws_access_token: AKIAIOSFODNN7EXAMPLE# AWS session token used with temporary credentials. Required only if the Access Key and Secret Key are part of a temporary session credentials. +role_arn: arn:aws:iam::123456789012:role/MyRole# ARN of the IAM role to assume. When specified, the SDK uses the base credentials to call STS AssumeRole and obtain temporary credentials scoped to this role. +role_session_name: MySession # Session name to associate with the STS AssumeRole session. Used only if 'role_arn' is specified. Useful for identifying and auditing the session. +external_id: MyExternalID # External ID required by some roles when assuming them, typically for cross-account access. Used only if 'role_arn' is specified and the role's trust policy requires it. +workgroup: primary # Athena workgroup to use for query execution. Defaults to 'primary' if not specified. +output_location: s3://my-bucket/athena-output/# S3 URI where Athena query results should be stored (e.g., s3://your-bucket/athena/results/). Optional if the selected workgroup has a default result configuration. +aws_region: us-east-1 # AWS region where Athena and the result S3 bucket are located (e.g., us-east-1). Defaults to 'us-east-1' if not specified. +allow_host_access: true # Allow the Athena client to access host environment configurations such as environment variables or local AWS credential files. Defaults to true, enabling use of credentials and settings from the host environment unless explicitly disabled. +``` + + + +#### `driver` + +_[string]_ - Refers to the driver type and must be driver `athena` _(required)_ + +#### `aws_access_key_id` + +_[string]_ - AWS Access Key ID used for authentication. Required when using static credentials directly or as base credentials for assuming a role. + +#### `aws_secret_access_key` + +_[string]_ - AWS Secret Access Key paired with the Access Key ID. Required when using static credentials directly or as base credentials for assuming a role. + +#### `aws_access_token` + +_[string]_ - AWS session token used with temporary credentials. Required only if the Access Key and Secret Key are part of a temporary session credentials. + +#### `role_arn` + +_[string]_ - ARN of the IAM role to assume. When specified, the SDK uses the base credentials to call STS AssumeRole and obtain temporary credentials scoped to this role. + +#### `role_session_name` + +_[string]_ - Session name to associate with the STS AssumeRole session. Used only if 'role_arn' is specified. Useful for identifying and auditing the session. + +#### `external_id` + +_[string]_ - External ID required by some roles when assuming them, typically for cross-account access. Used only if 'role_arn' is specified and the role's trust policy requires it. + +#### `workgroup` + +_[string]_ - Athena workgroup to use for query execution. Defaults to 'primary' if not specified. + +#### `output_location` + +_[string]_ - S3 URI where Athena query results should be stored (e.g., s3://your-bucket/athena/results/). Optional if the selected workgroup has a default result configuration. + +#### `aws_region` + +_[string]_ - AWS region where Athena and the result S3 bucket are located (e.g., us-east-1). Defaults to 'us-east-1' if not specified. + +#### `allow_host_access` + +_[boolean]_ - Allow the Athena client to access host environment configurations such as environment variables or local AWS credential files. Defaults to true, enabling use of credentials and settings from the host environment unless explicitly disabled. + +### Azure + + + + +```yaml +type: connector # Must be `connector` (required) +driver: azure # Must be `azure` _(required)_ + +azure_storage_account: mystorageaccount# Azure storage account name +azure_storage_key: myaccesskey # Azure storage access key +azure_storage_bucket: my-container # Name of the Azure Blob Storage container (equivalent to an S3 bucket) _(required)_ +azure_storage_sas_token: my-sas-token# Optional azure SAS token for authentication +azure_storage_connection_string: DefaultEndpointsProtocol=https;AccountName=mystorageaccount;AccountKey=myaccesskey;EndpointSuffix=core.windows.net# Optional azure connection string for storage account +allow_host_access: true # Allow access to host environment configuratio +``` + + + +#### `driver` + +_[string]_ - Refers to the driver type and must be driver `azure` _(required)_ + +#### `azure_storage_account` + +_[string]_ - Azure storage account name + +#### `azure_storage_key` + +_[string]_ - Azure storage access key + +#### `azure_storage_bucket` + +_[string]_ - Name of the Azure Blob Storage container (equivalent to an S3 bucket) _(required)_ + +#### `azure_storage_sas_token` + +_[string]_ - Optional azure SAS token for authentication + +#### `azure_storage_connection_string` + +_[string]_ - Optional azure connection string for storage account + +#### `allow_host_access` + +_[boolean]_ - Allow access to host environment configuratio + +### BigQuery + + + + +```yaml +type: connector # Must be `connector` (required) +driver: bigquery # Must be `bigquery` _(required)_ + +google_application_credentials: {"type": "service_account", "project_id": "my-gcp-project"}# Raw contents of the Google Cloud service account key (in JSON format) used for authentication. +project_id: my-gcp-project # Google Cloud project ID +dataset_id: my_dataset # BigQuery dataset ID +location: US # BigQuery dataset location +allow_host_access: true # Enable the BigQuery client to use credentials from the host environment when no service account JSON is provided. This includes Application Default Credentials from environment variables, local credential files, or the Google Compute Engine metadata server. Defaults to true, allowing seamless authentication in GCP environments. +``` + + + +#### `driver` + +_[string]_ - Refers to the driver type and must be driver `bigquery` _(required)_ + +#### `google_application_credentials` + +_[string]_ - Raw contents of the Google Cloud service account key (in JSON format) used for authentication. + +#### `project_id` + +_[string]_ - Google Cloud project ID + +#### `dataset_id` + +_[string]_ - BigQuery dataset ID + +#### `location` + +_[string]_ - BigQuery dataset location + +#### `allow_host_access` + +_[boolean]_ - Enable the BigQuery client to use credentials from the host environment when no service account JSON is provided. This includes Application Default Credentials from environment variables, local credential files, or the Google Compute Engine metadata server. Defaults to true, allowing seamless authentication in GCP environments. + +### ClickHouse + + + + +```yaml +type: connector # Must be `connector` (required) +driver: clickhouse # Must be `clickhouse` _(required)_ + +managed: true # `true` means Rill will provision the connector using the default provisioner. `false` disables automatic provisioning. +mode: readwrite # `read` - Controls the operation mode for the ClickHouse connection. Defaults to 'read' for safe operation with external databases. Set to 'readwrite' to enable model creation and table mutations. Note: When 'managed: true', this is automatically set to 'readwrite'. +dsn: clickhouse://localhost:9000/default# DSN(Data Source Name) for the ClickHouse connection +username: default # Username for authentication +password: mypassword # Password for authentication +host: localhost # Host where the ClickHouse instance is running +port: 9000 # Port where the ClickHouse instance is accessible +database: default # Name of the ClickHouse database within the cluster +ssl: true # Indicates whether a secured SSL connection is required +cluster: my-cluster # Cluster name, required for running distributed queries +log_queries: true # Controls whether to log raw SQL queries +settings_override: readonly = 1, session_timezone = 'UTC'# override the default settings used in queries. example `readonly = 1, session_timezone = 'UTC'` +embed_port: 0 # Port to run ClickHouse locally (0 for random port) +can_scale_to_zero: true # Indicates if the database can scale to zero +max_open_conns: 10 # Maximum number of open connections to the database +max_idle_conns: 10 # Maximum number of idle connections in the pool +dial_timeout: 10s # Timeout for dialing the ClickHouse server +conn_max_lifetime: 10s # Maximum time a connection may be reused +read_timeout: 10s # Maximum time for a connection to read data +``` + + + +#### `driver` + +_[string]_ - Refers to the driver type and must be driver `clickhouse` _(required)_ + +#### `managed` + +_[boolean]_ - `true` means Rill will provision the connector using the default provisioner. `false` disables automatic provisioning. + +#### `mode` + +_[string]_ - `read` - Controls the operation mode for the ClickHouse connection. Defaults to 'read' for safe operation with external databases. Set to 'readwrite' to enable model creation and table mutations. Note: When 'managed: true', this is automatically set to 'readwrite'. + +#### `dsn` + +_[string]_ - DSN(Data Source Name) for the ClickHouse connection + +#### `username` + +_[string]_ - Username for authentication + +#### `password` + +_[string]_ - Password for authentication + +#### `host` + +_[string]_ - Host where the ClickHouse instance is running + +#### `port` + +_[integer]_ - Port where the ClickHouse instance is accessible + +#### `database` + +_[string]_ - Name of the ClickHouse database within the cluster + +#### `ssl` + +_[boolean]_ - Indicates whether a secured SSL connection is required + +#### `cluster` + +_[string]_ - Cluster name, required for running distributed queries + +#### `log_queries` + +_[boolean]_ - Controls whether to log raw SQL queries + +#### `settings_override` + +_[string]_ - override the default settings used in queries. example `readonly = 1, session_timezone = 'UTC'` + +#### `embed_port` + +_[integer]_ - Port to run ClickHouse locally (0 for random port) + +#### `can_scale_to_zero` + +_[boolean]_ - Indicates if the database can scale to zero + +#### `max_open_conns` + +_[integer]_ - Maximum number of open connections to the database + +#### `max_idle_conns` + +_[integer]_ - Maximum number of idle connections in the pool + +#### `dial_timeout` + +_[string]_ - Timeout for dialing the ClickHouse server + +#### `conn_max_lifetime` + +_[string]_ - Maximum time a connection may be reused + +#### `read_timeout` + +_[string]_ - Maximum time for a connection to read data + +### Druid + + + + +```yaml +type: connector # Must be `connector` (required) +driver: druid # Must be `druid` _(required)_ + +dsn: http://localhost:8082 # Data Source Name (DSN) for connecting to Druid _(required)_ +username: admin # Username for authenticating with Druid +password: admin123 # Password for authenticating with Druid +host: localhost # Hostname of the Druid coordinator or broker +port: 8082 # Port number of the Druid service +ssl: true # Enable SSL for secure connection +log_queries: true # Log raw SQL queries sent to Druid +max_open_conns: 10 # Maximum number of open database connections (0 = default, -1 = unlimited) +skip_version_check: true # Skip checking Druid version compatibility +``` + + + +#### `driver` + +_[string]_ - Refers to the driver type and must be driver `druid` _(required)_ + +#### `dsn` + +_[string]_ - Data Source Name (DSN) for connecting to Druid _(required)_ + +#### `username` + +_[string]_ - Username for authenticating with Druid + +#### `password` + +_[string]_ - Password for authenticating with Druid + +#### `host` + +_[string]_ - Hostname of the Druid coordinator or broker + +#### `port` + +_[integer]_ - Port number of the Druid service + +#### `ssl` + +_[boolean]_ - Enable SSL for secure connection + +#### `log_queries` + +_[boolean]_ - Log raw SQL queries sent to Druid + +#### `max_open_conns` + +_[integer]_ - Maximum number of open database connections (0 = default, -1 = unlimited) + +#### `skip_version_check` + +_[boolean]_ - Skip checking Druid version compatibility + +### DuckDB + + + + +```yaml +type: connector # Must be `connector` (required) +driver: duckdb # Must be `duckdb` _(required)_ + +pool_size: 10 # Number of concurrent connections and queries allowed +allow_host_access: true # Whether access to the local environment and file system is allowed +cpu: 10 # Number of CPU cores available to the database +memory_limit_gb: 10 # Amount of memory in GB available to the database +read_write_ratio: 0.5 # Ratio of resources allocated to the read database; used to divide CPU and memory +init_sql: CREATE TABLE IF NOT EXISTS users (id INTEGER PRIMARY KEY, name TEXT, email TEXT)# is executed during database initialization. +secrets: gcs,s3 # Comma-separated list of other connector names to create temporary secrets for in DuckDB before executing a model. +log_queries: true # Whether to log raw SQL queries executed through OLAP +``` + + + +#### `driver` + +_[string]_ - Refers to the driver type and must be driver `duckdb` _(required)_ + +#### `pool_size` + +_[integer]_ - Number of concurrent connections and queries allowed + +#### `allow_host_access` + +_[boolean]_ - Whether access to the local environment and file system is allowed + +#### `cpu` + +_[integer]_ - Number of CPU cores available to the database + +#### `memory_limit_gb` + +_[integer]_ - Amount of memory in GB available to the database + +#### `read_write_ratio` + +_[number]_ - Ratio of resources allocated to the read database; used to divide CPU and memory + +#### `init_sql` + +_[string]_ - is executed during database initialization. + +#### `secrets` + +_[string]_ - Comma-separated list of other connector names to create temporary secrets for in DuckDB before executing a model. + +#### `log_queries` + +_[boolean]_ - Whether to log raw SQL queries executed through OLAP + +### GCS + + + + +```yaml +type: connector # Must be `connector` (required) +driver: gcs # Must be `gcs` _(required)_ + +google_application_credentials: {"type": "service_account", "project_id": "my-project"}# Google Cloud credentials JSON string +bucket: my-gcs-bucket # Name of gcs bucket _(required)_ +allow_host_access: true # Allow access to host environment configuration +key_id: AKIAIOSFODNN7EXAMPLE # Optional S3-compatible Key ID when used in compatibility mode +secret: wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY# Optional S3-compatible Secret when used in compatibility mode +``` + + + +#### `driver` + +_[string]_ - Refers to the driver type and must be driver `gcs` _(required)_ + +#### `google_application_credentials` + +_[string]_ - Google Cloud credentials JSON string + +#### `bucket` + +_[string]_ - Name of gcs bucket _(required)_ + +#### `allow_host_access` + +_[boolean]_ - Allow access to host environment configuration + +#### `key_id` + +_[string]_ - Optional S3-compatible Key ID when used in compatibility mode + +#### `secret` + +_[string]_ - Optional S3-compatible Secret when used in compatibility mode + +### HTTPS + + + + +```yaml +type: connector # Must be `connector` (required) +driver: https # Must be `https` _(required)_ + +path: https://api.example.com/data.csv# The full HTTPS URI to fetch data from _(required)_ +headers: {"Authorization": "Bearer my-token"}# HTTP headers to include in the request +``` + + + +#### `driver` + +_[string]_ - Refers to the driver type and must be driver `https` _(required)_ + +#### `path` + +_[string]_ - The full HTTPS URI to fetch data from _(required)_ + +#### `headers` + +_[object]_ - HTTP headers to include in the request + +### MotherDuck + + + + +```yaml +type: connector # Must be `connector` (required) +driver: duckdb # Must be `duckdb` _(required)_ + +path: md:my_database # Path to your MD database _(required)_ +init_sql: INSTALL 'motherduck'; +LOAD 'motherduck'; +SET motherduck_token= '{{ .env.motherduck_token }}'# SQL executed during database initialization. _(required)_ +``` + + + +#### `driver` + +_[string]_ - Refers to the driver type and must be driver `duckdb` _(required)_ + +#### `path` + +_[string]_ - Path to your MD database _(required)_ + +#### `init_sql` + +_[string]_ - SQL executed during database initialization. _(required)_ + +### MySQL + + + + +```yaml +type: connector # Must be `connector` (required) +driver: mysql # Must be `mysql` _(required)_ + +dsn: mysql://user:password@localhost:3306/mydatabase# DSN(Data Source Name) for the mysql connection +host: localhost # Hostname of the MySQL server +port: 3306 # Port number for the MySQL server +database: mydatabase # Name of the MySQL database +user: myuser # Username for authentication +password: mypassword # Password for authentication +ssl_mode: PREFERRED # SSL mode can be DISABLED, PREFERRED or REQUIRED +``` + + + +#### `driver` + +_[string]_ - Refers to the driver type and must be driver `mysql` _(required)_ + +#### `dsn` + +_[string]_ - DSN(Data Source Name) for the mysql connection + +#### `host` + +_[string]_ - Hostname of the MySQL server + +#### `port` + +_[integer]_ - Port number for the MySQL server + +#### `database` + +_[string]_ - Name of the MySQL database + +#### `user` + +_[string]_ - Username for authentication + +#### `password` + +_[string]_ - Password for authentication + +#### `ssl_mode` + +_[string]_ - SSL mode can be DISABLED, PREFERRED or REQUIRED + +### Pinot + + + + +```yaml +type: connector # Must be `connector` (required) +driver: pinot # Must be `pinot` _(required)_ + +dsn: pinot://localhost:8099 # DSN(Data Source Name) for the Pinot connection _(required)_ +username: admin # Username for authenticating with Pinot +password: admin123 # Password for authenticating with Pinot +broker_host: localhost # Hostname of the Pinot broker _(required)_ +broker_port: 8099 # Port number for the Pinot broker +controller_host: localhost # Hostname of the Pinot controller _(required)_ +controller_port: 9000 # Port number for the Pinot controller +ssl: true # Enable SSL connection to Pinot +log_queries: true # Log raw SQL queries executed through Pinot +max_open_conns: 10 # Maximum number of open connections to the Pinot database +``` + + + +#### `driver` + +_[string]_ - Refers to the driver type and must be driver `pinot` _(required)_ + +#### `dsn` + +_[string]_ - DSN(Data Source Name) for the Pinot connection _(required)_ + +#### `username` + +_[string]_ - Username for authenticating with Pinot + +#### `password` + +_[string]_ - Password for authenticating with Pinot + +#### `broker_host` + +_[string]_ - Hostname of the Pinot broker _(required)_ + +#### `broker_port` + +_[integer]_ - Port number for the Pinot broker + +#### `controller_host` + +_[string]_ - Hostname of the Pinot controller _(required)_ + +#### `controller_port` + +_[integer]_ - Port number for the Pinot controller + +#### `ssl` + +_[boolean]_ - Enable SSL connection to Pinot + +#### `log_queries` + +_[boolean]_ - Log raw SQL queries executed through Pinot + +#### `max_open_conns` + +_[integer]_ - Maximum number of open connections to the Pinot database + +### Postgres + + + + +```yaml +type: connector # Must be `connector` (required) +driver: postgres # Must be `postgres` _(required)_ + +dsn: postgresql://user:password@localhost:5432/mydatabase# DSN(Data Source Name) for the postgres connection +host: localhost # Hostname of the Postgres server +port: 5432 # Port number for the Postgres server +dbname: mydatabase # Name of the Postgres database +user: postgres # Username for authentication +password: mypassword # Password for authentication +sslmode: prefer # SSL mode can be disable, allow, prefer or require +``` + + + +#### `driver` + +_[string]_ - Refers to the driver type and must be driver `postgres` _(required)_ + +#### `dsn` + +_[string]_ - DSN(Data Source Name) for the postgres connection + +#### `host` + +_[string]_ - Hostname of the Postgres server + +#### `port` + +_[string]_ - Port number for the Postgres server + +#### `dbname` + +_[string]_ - Name of the Postgres database + +#### `user` + +_[string]_ - Username for authentication + +#### `password` + +_[string]_ - Password for authentication + +#### `sslmode` + +_[string]_ - SSL mode can be disable, allow, prefer or require + +### Redshift + + + + +```yaml +type: connector # Must be `connector` (required) +driver: redshift # Must be `redshift` _(required)_ + +aws_access_key_id: AKIAIOSFODNN7EXAMPLE# AWS Access Key ID used for authenticating with Redshift. _(required)_ +aws_secret_access_key: wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY# AWS Secret Access Key used for authenticating with Redshift. _(required)_ +aws_access_token: AKIAIOSFODNN7EXAMPLE# AWS Session Token for temporary credentials (optional). +region: us-east-1 # AWS region where the Redshift cluster or workgroup is hosted (e.g., 'us-east-1'). +database: myredshiftdb # Name of the Redshift database to query. _(required)_ +workgroup: my-workgroup # Workgroup name for Redshift Serverless, in case of provisioned Redshift clusters use 'cluster_identifier'. +cluster_identifier: my-cluster # Cluster identifier for provisioned Redshift clusters, in case of Redshift Serverless use 'workgroup' . +``` + + + +#### `driver` + +_[string]_ - Refers to the driver type and must be driver `redshift` _(required)_ + +#### `aws_access_key_id` + +_[string]_ - AWS Access Key ID used for authenticating with Redshift. _(required)_ + +#### `aws_secret_access_key` + +_[string]_ - AWS Secret Access Key used for authenticating with Redshift. _(required)_ + +#### `aws_access_token` + +_[string]_ - AWS Session Token for temporary credentials (optional). + +#### `region` + +_[string]_ - AWS region where the Redshift cluster or workgroup is hosted (e.g., 'us-east-1'). + +#### `database` + +_[string]_ - Name of the Redshift database to query. _(required)_ + +#### `workgroup` + +_[string]_ - Workgroup name for Redshift Serverless, in case of provisioned Redshift clusters use 'cluster_identifier'. + +#### `cluster_identifier` + +_[string]_ - Cluster identifier for provisioned Redshift clusters, in case of Redshift Serverless use 'workgroup' . + +### S3 + + + + +```yaml +type: connector # Must be `connector` (required) +driver: s3 # Must be `s3` _(required)_ + +aws_access_key_id: AKIAIOSFODNN7EXAMPLE# AWS Access Key ID used for authentication +aws_secret_access_key: wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY# AWS Secret Access Key used for authentication +aws_access_token: AKIAIOSFODNN7EXAMPLE# Optional AWS session token for temporary credentials +bucket: my-s3-bucket # Name of s3 bucket _(required)_ +endpoint: https://s3.amazonaws.com # Optional custom endpoint URL for S3-compatible storage +region: us-east-1 # AWS region of the S3 bucket +allow_host_access: true # Allow access to host environment configuration +retain_files: true # Whether to retain intermediate files after processing +``` + + + +#### `driver` + +_[string]_ - Refers to the driver type and must be driver `s3` _(required)_ + +#### `aws_access_key_id` + +_[string]_ - AWS Access Key ID used for authentication + +#### `aws_secret_access_key` + +_[string]_ - AWS Secret Access Key used for authentication + +#### `aws_access_token` + +_[string]_ - Optional AWS session token for temporary credentials + +#### `bucket` + +_[string]_ - Name of s3 bucket _(required)_ + +#### `endpoint` + +_[string]_ - Optional custom endpoint URL for S3-compatible storage + +#### `region` + +_[string]_ - AWS region of the S3 bucket + +#### `allow_host_access` + +_[boolean]_ - Allow access to host environment configuration + +#### `retain_files` + +_[boolean]_ - Whether to retain intermediate files after processing + +### Salesforce + + + + +```yaml +type: connector # Must be `connector` (required) +driver: salesforce # Must be `salesforce` _(required)_ + +username: user@example.com # Salesforce account username _(required)_ +password: mypassword # Salesforce account password (secret) +key: mysecretkey # Authentication key for Salesforce (secret) +endpoint: https://login.salesforce.com# Salesforce API endpoint URL _(required)_ +client_id: myclientid # Client ID used for Salesforce OAuth authentication +``` + + + +#### `driver` + +_[string]_ - Refers to the driver type and must be driver `salesforce` _(required)_ + +#### `username` + +_[string]_ - Salesforce account username _(required)_ + +#### `password` + +_[string]_ - Salesforce account password (secret) + +#### `key` + +_[string]_ - Authentication key for Salesforce (secret) + +#### `endpoint` + +_[string]_ - Salesforce API endpoint URL _(required)_ + +#### `client_id` + +_[string]_ - Client ID used for Salesforce OAuth authentication + +### Slack + + + + +```yaml +type: connector # Must be `connector` (required) +driver: slack # Must be `slack` _(required)_ + +bot_token: xoxb-your-bot-token # Bot token used for authenticating Slack API requests _(required)_ +``` + + + +#### `driver` + +_[string]_ - Refers to the driver type and must be driver `slack` _(required)_ + +#### `bot_token` + +_[string]_ - Bot token used for authenticating Slack API requests _(required)_ + +### Snowflake + + + + +```yaml +type: connector # Must be `connector` (required) +driver: snowflake # Must be `snowflake` _(required)_ + +dsn: user:password@account/database/schema?warehouse=warehouse# DSN (Data Source Name) for the Snowflake connection _(required)_ +parallel_fetch_limit: 10 # Maximum number of concurrent fetches during query execution +``` + + + +#### `driver` + +_[string]_ - Refers to the driver type and must be driver `snowflake` _(required)_ + +#### `dsn` + +_[string]_ - DSN (Data Source Name) for the Snowflake connection _(required)_ + +#### `parallel_fetch_limit` + +_[integer]_ - Maximum number of concurrent fetches during query execution + +### SQLite + + + + +```yaml +type: connector # Must be `connector` (required) +driver: sqlite # Must be `sqlite` _(required)_ + +dsn: file:./mydatabase.db # DSN(Data Source Name) for the sqlite connection _(required)_ +``` + + + +#### `driver` + +_[string]_ - Refers to the driver type and must be driver `sqlite` _(required)_ + +#### `dsn` + +_[string]_ - DSN(Data Source Name) for the sqlite connection _(required)_ \ No newline at end of file diff --git a/docs/docs/hidden/yaml/explore.md b/docs/docs/hidden/yaml/explore-dashboards.md similarity index 70% rename from docs/docs/hidden/yaml/explore.md rename to docs/docs/hidden/yaml/explore-dashboards.md index 6b57f08fdc6..3dab0169ad4 100644 --- a/docs/docs/hidden/yaml/explore.md +++ b/docs/docs/hidden/yaml/explore-dashboards.md @@ -1,10 +1,10 @@ --- note: GENERATED. DO NOT EDIT. -title: Explore YAML -sidebar_position: 36 +title: Explore Dashboard YAML +sidebar_position: 37 --- -In your Rill project directory, create a explore dashboard, `.yaml`, file in the `dashboards` directory. Rill will ingest the dashboard definition next time you run `rill start`. +Explore dashboards provide an interactive way to explore data with predefined metrics and dimensions. ## Properties @@ -14,7 +14,11 @@ _[string]_ - Refers to the resource type and must be `explore` _(required)_ ### `display_name` -_[string]_ - Refers to the display name for the explore dashboard +_[string]_ - Refers to the display name for the explore dashboard _(required)_ + +### `metrics_view` + +_[string]_ - Refers to the metrics view resource _(required)_ ### `description` @@ -24,10 +28,6 @@ _[string]_ - Refers to the description of the explore dashboard _[string]_ - Refers to the custom banner displayed at the header of an explore dashboard -### `metrics_view` - -_[string]_ - Refers to the metrics view resource - ### `dimensions` _[oneOf]_ - List of dimension names. Use '*' to select all dimensions (default) @@ -44,9 +44,25 @@ _[oneOf]_ - List of dimension names. Use '*' to select all dimensions (default) - **`exclude`** - _[object]_ - Select all fields except those listed here +```yaml +# Example: Select a dimension +dimensions: + - country + +# Example: Select all dimensions except one +dimensions: + exclude: + - country + +# Example: Select all dimensions that match a regex +dimensions: +regex: "^public_.*$" +``` + + ### `measures` -_[oneOf]_ - List of measure names. Use ''*'' to select all measures (default) +_[oneOf]_ - List of measure names. Use '*' to select all measures (default) - **option 1** - _[string]_ - Wildcard(*) selector that includes all available fields in the selection @@ -60,9 +76,25 @@ _[oneOf]_ - List of measure names. Use ''*'' to select all measures (default) - **`exclude`** - _[object]_ - Select all fields except those listed here +```yaml +# Example: Select a dimension +measures: + - sum_of_total + +# Example: Select all dimensions except one +measures: + exclude: + - sum_of_total + +# Example: Select all dimensions that match a regex +measures: +regex: "^public_.*$" +``` + + ### `theme` -_[oneOf]_ - Name of the theme to use or define a theme inline. Either theme name or inline theme can be set. +_[oneOf]_ - Name of the theme to use. Only one of theme and embedded_theme can be set. - **option 1** - _[string]_ - Name of an existing theme to apply to the dashboard @@ -76,7 +108,19 @@ _[oneOf]_ - Name of the theme to use or define a theme inline. Either theme name ### `time_ranges` -_[array of oneOf]_ - Overrides the list of default time range selections available in the dropdown. It can be string or an object with a 'range' and optional 'comparison_offsets' +_[array of oneOf]_ - Overrides the list of default time range selections available in the dropdown. It can be string or an object with a 'range' and optional 'comparison_offsets' + ```yaml + time_ranges: + - PT15M // Simplified syntax to specify only the range + - PT1H + - PT6H + - P7D + - range: P5D // Advanced syntax to specify comparison_offsets as well + - P4W + - rill-TD // Today + - rill-WTD // Week-To-date + ``` + - **option 1** - _[string]_ - a valid [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Durations) duration or one of the [Rill ISO 8601 extensions](https://docs.rilldata.com/reference/rill-iso-extensions#extensions) extensions for the selection @@ -108,7 +152,20 @@ _[boolean]_ - Defaults to true, when set to false it will hide the ability to se ### `defaults` -_[object]_ - defines the defaults YAML struct +_[object]_ - defines the defaults YAML struct + ```yaml + defaults: #define all the defaults within here + dimensions: + - dim_1 + - dim_2 + measures: + - measure_1 + - measure_2 + time_range: P1M + comparison_mode: dimension #time, none + comparison_dimension: filename + ``` + - **`dimensions`** - _[oneOf]_ - Provides the default dimensions to load on viewing the dashboard @@ -152,7 +209,7 @@ _[object]_ - Configuration options for embedded dashboard views ### `security` -_[object]_ - Defines security rules and access control policies for resources +_[object]_ - Defines [security rules and access control policies](/manage/security) for dashboards (without row filtering) - **`access`** - _[oneOf]_ - Expression indicating if the user should be granted access to the dashboard. If not defined, it will resolve to false and the dashboard won't be accessible to anyone. Needs to be a valid SQL expression that evaluates to a boolean. @@ -160,42 +217,6 @@ _[object]_ - Defines security rules and access control policies for resources - **option 2** - _[boolean]_ - Direct boolean value to allow or deny access - - **`row_filter`** - _[string]_ - SQL expression to filter the underlying model by. Can leverage templated user attributes to customize the filter for the requesting user. Needs to be a valid SQL expression that can be injected into a WHERE clause - - - **`include`** - _[array of object]_ - List of dimension or measure names to include in the dashboard. If include is defined all other dimensions and measures are excluded - - - **`if`** - _[string]_ - Expression to decide if the column should be included or not. It can leverage templated user attributes. Needs to be a valid SQL expression that evaluates to a boolean _(required)_ - - - **`names`** - _[anyOf]_ - List of fields to include. Should match the name of one of the dashboard's dimensions or measures _(required)_ - - - **option 1** - _[array of string]_ - List of specific field names to include - - - **option 2** - _[string]_ - Wildcard '*' to include all fields - - - **`exclude`** - _[array of object]_ - List of dimension or measure names to exclude from the dashboard. If exclude is defined all other dimensions and measures are included - - - **`if`** - _[string]_ - Expression to decide if the column should be excluded or not. It can leverage templated user attributes. Needs to be a valid SQL expression that evaluates to a boolean _(required)_ - - - **`names`** - _[anyOf]_ - List of fields to exclude. Should match the name of one of the dashboard's dimensions or measures _(required)_ - - - **option 1** - _[array of string]_ - List of specific field names to exclude - - - **option 2** - _[string]_ - Wildcard '*' to exclude all fields - - - **`rules`** - _[array of object]_ - List of detailed security rules that can be used to define complex access control policies - - - **`type`** - _[string]_ - Type of security rule - access (overall access), field_access (field-level access), or row_filter (row-level filtering) _(required)_ - - - **`action`** - _[string]_ - Whether to allow or deny access for this rule - - - **`if`** - _[string]_ - Conditional expression that determines when this rule applies. Must be a valid SQL expression that evaluates to a boolean - - - **`names`** - _[array of string]_ - List of field names this rule applies to (for field_access type rules) - - - **`all`** - _[boolean]_ - When true, applies the rule to all fields (for field_access type rules) - - - **`sql`** - _[string]_ - SQL expression for row filtering (for row_filter type rules) - ## Common Properties ### `name` diff --git a/docs/docs/hidden/yaml/index.md b/docs/docs/hidden/yaml/index.md index 60f1d490f70..701f727ea3f 100644 --- a/docs/docs/hidden/yaml/index.md +++ b/docs/docs/hidden/yaml/index.md @@ -10,7 +10,7 @@ When you create models and dashboards, these objects are represented as object f :::info Working with resources outside their native folders -It is possible to define resources (such as [models](model.md), [metrics-views](metrics-view.md), [dashboards](explore.md), [custom APIs](api.md), or [themes](theme.md)) within any nested folder within your Rill project directory. However, for any YAML configuration file, it is then imperative that the `type` property is then appropriately defined within the underlying resource configuration or Rill will not able to resolve the resource type correctly! +It is possible to define resources (such as [models](advanced-models.md), [metrics-views](metrics-views.md), [dashboards](explore-dashboards.md), [custom APIs](apis.md), or [themes](themes.md)) within any nested folder within your Rill project directory. However, for any YAML configuration file, it is then imperative that the `type` property is then appropriately defined within the underlying resource configuration or Rill will not able to resolve the resource type correctly! ::: @@ -28,13 +28,14 @@ For more information about using Git or cloning projects locally, please see our ## Project files types -- [Alert YAML](alert.md) -- [API YAML](api.md) -- [Canvas YAML](canvas.md) -- [Component YAML](component.md) -- [Connector YAML](connector.md) -- [Explore YAML](explore.md) -- [Metrics View YAML](metrics-view.md) -- [Model YAML](model.md) -- [Theme YAML](theme.md) -- [Project YAML](project.md) \ No newline at end of file +- [Connector YAML](connectors.md) +- [Source YAML](sources.md) +- [Model SQL](models.md) +- [Models YAML](advanced-models.md) +- [Metrics View YAML](metrics-views.md) +- [Canvas Dashboard YAML](canvas-dashboards.md) +- [Explore Dashboard YAML](explore-dashboards.md) +- [Alert YAML](alerts.md) +- [API YAML](apis.md) +- [Theme YAML](themes.md) +- [Project YAML](rillyaml.md) \ No newline at end of file diff --git a/docs/docs/hidden/yaml/metrics-view.md b/docs/docs/hidden/yaml/metrics-views.md similarity index 61% rename from docs/docs/hidden/yaml/metrics-view.md rename to docs/docs/hidden/yaml/metrics-views.md index 4ea702b3ee1..d9674622940 100644 --- a/docs/docs/hidden/yaml/metrics-view.md +++ b/docs/docs/hidden/yaml/metrics-views.md @@ -1,20 +1,24 @@ --- note: GENERATED. DO NOT EDIT. title: Metrics View YAML -sidebar_position: 37 +sidebar_position: 35 --- In your Rill project directory, create a metrics view, `.yaml`, file in the `metrics` directory. Rill will ingest the metric view definition next time you run `rill start`. ## Properties +### `version` + +_[string]_ - The version of the metrics view schema + ### `type` _[string]_ - Refers to the resource type and must be `metrics_view` _(required)_ -### `parent` +### `connector` -_[string]_ - Refers to the parent metrics from which this metrics view is derived. If specified, this will inherit properties from the parent metrics view +_[string]_ - Refers to the connector type for the metrics view, see [OLAP engines](/connect/olap) for more information ### `display_name` @@ -26,11 +30,11 @@ _[string]_ - Refers to the description for the metrics view ### `ai_instructions` -_[string]_ - Extra instructions for AI agents. Used to guide natural language question answering and routing. +_[string]_ - Extra instructions for [AI agents](/explore/mcp). Used to guide natural language question answering and routing. ### `model` -_[string]_ - Refers to the model powering the dashboard (either model or table is required) +_[string]_ - Refers to the model powering the dashboard (either model or table is required) _(required)_ ### `database` @@ -86,17 +90,19 @@ _[array of object]_ - Relates to exploring segments or dimensions of your data a _[array of object]_ - Used to define the numeric aggregates of columns from your data model - - **`name`** - _[string]_ - a stable identifier for the measure + - **`name`** - _[string]_ - a stable identifier for the measure _(required)_ - - **`display_name`** - _[string]_ - the display name of your measure. + - **`display_name`** - _[string]_ - the display name of your measure. _(required)_ - - **`description`** - _[string]_ - a freeform text description of the dimension + - **`label`** - _[string]_ - a label for your measure, deprecated use display_name + + - **`description`** - _[string]_ - a freeform text description of the measure - **`type`** - _[string]_ - Measure calculation type: "simple" for basic aggregations, "derived" for calculations using other measures, or "time_comparison" for period-over-period analysis. Defaults to "simple" unless dependencies exist. - - **`expression`** - _[string]_ - a combination of operators and functions for aggregations + - **`expression`** - _[string]_ - a combination of operators and functions for aggregations _(required)_ - - **`window`** - _[anyOf]_ - A measure window can be defined as a keyword string (e.g. 'time' or 'all') or an object with detailed window configuration. + - **`window`** - _[anyOf]_ - A measure window can be defined as a keyword string (e.g. 'time' or 'all') or an object with detailed window configuration. For more information, see the [window functions](/build/metrics-view/advanced-expressions/windows) documentation. - **option 1** - _[string]_ - Shorthand: `time` or `true` means time-partitioned, `all` means non-partitioned. @@ -104,11 +110,11 @@ _[array of object]_ - Used to define the numeric aggregates of columns from your - **`partition`** - _[boolean]_ - Controls whether the window is partitioned. When true, calculations are performed within each partition separately. - - **`order`** - _[anyOf]_ - Specifies the fields to order the window by, determining the sequence of rows within each partition. + - **`order`** - _[string]_ - Specifies the fields to order the window by, determining the sequence of rows within each partition. - **option 1** - _[string]_ - Simple field name as a string. - - **option 2** - _[array of anyOf]_ - List of field selectors, each can be a string or an object with detailed configuration. + - **option 2** - _[array of oneOf]_ - List of field selectors, each can be a string or an object with detailed configuration. - **option 1** - _[string]_ - Shorthand field selector, interpreted as the name. @@ -120,11 +126,11 @@ _[array of object]_ - Used to define the numeric aggregates of columns from your - **`frame`** - _[string]_ - Defines the window frame boundaries for calculations, specifying which rows are included in the window relative to the current row. - - **`per`** - _[anyOf]_ - for per dimensions + - **`per`** - _[oneOf]_ - for per dimensions - **option 1** - _[string]_ - Simple field name as a string. - - **option 2** - _[array of anyOf]_ - List of field selectors, each can be a string or an object with detailed configuration. + - **option 2** - _[array of oneOf]_ - List of field selectors, each can be a string or an object with detailed configuration. - **option 1** - _[string]_ - Shorthand field selector, interpreted as the name. @@ -134,11 +140,11 @@ _[array of object]_ - Used to define the numeric aggregates of columns from your - **`time_grain`** - _[string]_ - Time grain for time-based dimensions. - - **`requires`** - _[anyOf]_ - using an available measure or dimension in your metrics view to set a required parameter, cannot be used with simple measures + - **`requires`** - _[oneOf]_ - using an available measure or dimension in your metrics view to set a required parameter, cannot be used with simple measures. See [referencing measures](/build/metrics-view/advanced-expressions/referencing) for more information. - **option 1** - _[string]_ - Simple field name as a string. - - **option 2** - _[array of anyOf]_ - List of field selectors, each can be a string or an object with detailed configuration. + - **option 2** - _[array of oneOf]_ - List of field selectors, each can be a string or an object with detailed configuration. - **option 1** - _[string]_ - Shorthand field selector, interpreted as the name. @@ -148,6 +154,8 @@ _[array of object]_ - Used to define the numeric aggregates of columns from your - **`time_grain`** - _[string]_ - Time grain for time-based dimensions. + - **`valid_percent_of_total`** - _[boolean]_ - a boolean indicating whether percent-of-total values should be rendered for this measure + - **`format_preset`** - _[string]_ - Controls the formatting of this measure using a predefined preset. Measures cannot have both `format_preset` and `format_d3`. If neither is supplied, the measure will be formatted using the `humanize` preset by default. Available options: @@ -161,43 +169,20 @@ _[array of object]_ - Used to define the numeric aggregates of columns from your - **`format_d3`** - _[string]_ - Controls the formatting of this measure using a [d3-format](https://d3js.org/d3-format) string. If an invalid format string is supplied, the measure will fall back to `format_preset: humanize`. A measure cannot have both `format_preset` and `format_d3`. If neither is provided, the humanize preset is used by default. Example: `format_d3: ".2f"` formats using fixed-point notation with two decimal places. Example: `format_d3: ",.2r"` formats using grouped thousands with two significant digits. (optional) - - **`format_d3_locale`** - _[object]_ - locale configuration passed through to D3, enabling changing the currency symbol among other things. For details, see the docs for D3's [formatLocale](https://d3js.org/d3-format#formatLocale) - - - **`valid_percent_of_total`** - _[boolean]_ - a boolean indicating whether percent-of-total values should be rendered for this measure - - - **`treat_nulls_as`** - _[string]_ - used to configure what value to fill in for missing time buckets. This also works generally as COALESCING over non empty time buckets. - -### `parent_dimensions` - -_[oneOf]_ - Optional field selectors for dimensions to inherit from the parent metrics view. - - - **option 1** - _[string]_ - Wildcard(*) selector that includes all available fields in the selection - - - **option 2** - _[array of string]_ - Explicit list of fields to include in the selection - - - **option 3** - _[object]_ - Advanced matching using regex, DuckDB expression, or exclusion - - - **`regex`** - _[string]_ - Select fields using a regular expression - - - **`expr`** - _[string]_ - DuckDB SQL expression to select fields based on custom logic - - - **`exclude`** - _[object]_ - Select all fields except those listed here - -### `parent_measures` - -_[oneOf]_ - Optional field selectors for measures to inherit from the parent metrics view. - - - **option 1** - _[string]_ - Wildcard(*) selector that includes all available fields in the selection - - - **option 2** - _[array of string]_ - Explicit list of fields to include in the selection - - - **option 3** - _[object]_ - Advanced matching using regex, DuckDB expression, or exclusion + - **`format_d3_locale`** - _[object]_ - locale configuration passed through to D3, enabling changing the currency symbol among other things. For details, see the docs for D3's formatLocale. + ```yaml + format_d3: "$," + format_d3_locale: + grouping: [3, 2] + currency: ["₹", ""] + ``` + - - **`regex`** - _[string]_ - Select fields using a regular expression + - **`grouping`** - _[array]_ - the grouping of the currency symbol - - **`expr`** - _[string]_ - DuckDB SQL expression to select fields based on custom logic + - **`currency`** - _[array]_ - the currency symbol - - **`exclude`** - _[object]_ - Select all fields except those listed here + - **`treat_nulls_as`** - _[string]_ - used to configure what value to fill in for missing time buckets. This also works generally as COALESCING over non empty time buckets. ### `annotations` @@ -205,7 +190,7 @@ _[array of object]_ - Used to define annotations that can be displayed on charts - **`name`** - _[string]_ - A stable identifier for the annotation. Defaults to model or table names when not specified - - **`model`** - _[string]_ - Refers to the model powering the annotation (either table or model is required). The model must have 'time' and 'description' columns. Optional columns include 'time_end' for range annotations and 'duration' to specify when the annotation should appear based on dashboard grain level. + - **`model`** - _[string]_ - Refers to the model powering the annotation (either table or model is required). The model must have 'time' and 'description' columns. Optional columns include 'time_end' for range annotations and 'grain' to specify when the annotation should appear based on dashboard grain level. - **`database`** - _[string]_ - Refers to the database to use in the OLAP engine (to be used in conjunction with table). Otherwise, will use the default database or schema if not specified @@ -231,7 +216,7 @@ _[array of object]_ - Used to define annotations that can be displayed on charts ### `security` -_[object]_ - Defines security rules and access control policies for resources +_[object]_ - Defines [security rules and access control policies](/manage/security) for resources - **`access`** - _[oneOf]_ - Expression indicating if the user should be granted access to the dashboard. If not defined, it will resolve to false and the dashboard won't be accessible to anyone. Needs to be a valid SQL expression that evaluates to a boolean. @@ -275,96 +260,6 @@ _[object]_ - Defines security rules and access control policies for resources - **`sql`** - _[string]_ - SQL expression for row filtering (for row_filter type rules) -### `explore` - -_[object]_ - Defines an optional inline explore view for the metrics view. If not specified a default explore will be emitted unless `skip` is set to true. - - - **`skip`** - _[boolean]_ - If true, disables the explore view for this metrics view. - - - **`name`** - _[string]_ - Name of the explore view. - - - **`display_name`** - _[string]_ - Display name for the explore view. - - - **`description`** - _[string]_ - Description for the explore view. - - - **`banner`** - _[string]_ - Custom banner displayed at the header of the explore view. - - - **`theme`** - _[oneOf]_ - Name of the theme to use or define a theme inline. Either theme name or inline theme can be set. - - - **option 1** - _[string]_ - Name of an existing theme to apply to the explore view. - - - **option 2** - _[object]_ - Inline theme configuration. - - - **`colors`** - _[object]_ - Used to override the dashboard colors. Either primary or secondary color must be provided. - - - **`primary`** - _[string]_ - Overrides the primary blue color in the dashboard. Can have any hex (without the '#' character), [named colors](https://www.w3.org/TR/css-color-4/#named-colors) or hsl() formats. Note that the hue of the input colors is used for variants but the saturation and lightness is copied over from the [blue color palette](https://tailwindcss.com/docs/customizing-colors). - - - **`secondary`** - _[string]_ - Overrides the secondary color in the dashboard. Applies to the loading spinner only as of now. Can have any hex (without the '#' character), [named colors](https://www.w3.org/TR/css-color-4/#named-colors) or hsl() formats. - - - **`time_ranges`** - _[array of oneOf]_ - Overrides the list of default time range selections available in the dropdown. It can be string or an object with a 'range' and optional 'comparison_offsets'. - - - **option 1** - _[string]_ - a valid [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Durations) duration or one of the [Rill ISO 8601 extensions](https://docs.rilldata.com/reference/rill-iso-extensions#extensions) extensions for the selection - - - **option 2** - _[object]_ - Object containing time range and comparison configuration - - - **`range`** - _[string]_ - a valid [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Durations) duration or one of the [Rill ISO 8601 extensions](https://docs.rilldata.com/reference/rill-iso-extensions#extensions) extensions for the selection _(required)_ - - - **`comparison_offsets`** - _[array of oneOf]_ - list of time comparison options for this time range selection (optional). Must be one of the [Rill ISO 8601 extensions](https://docs.rilldata.com/reference/rill-iso-extensions#extensions) - - - **option 1** - _[string]_ - Offset string only (range is inferred) - - - **option 2** - _[object]_ - Object containing offset and range configuration for time comparison - - - **`offset`** - _[string]_ - Time offset for comparison (e.g., 'P1D' for one day ago) - - - **`range`** - _[string]_ - Custom time range for comparison period - - - **`time_zones`** - _[array of string]_ - List of time zones to pin to the top of the time zone selector. Should be a list of IANA time zone identifiers. - - - **`lock_time_zone`** - _[boolean]_ - When true, the explore view will be locked to the first time zone provided in the time_zones list. If no time_zones are provided, it will be locked to UTC. - - - **`allow_custom_time_range`** - _[boolean]_ - Defaults to true. When set to false, hides the ability to set a custom time range for the user. - - - **`defaults`** - _[object]_ - Preset UI state to show by default. - - - **`dimensions`** - _[oneOf]_ - Default dimensions to load on viewing the explore view. - - - **option 1** - _[string]_ - Wildcard(*) selector that includes all available fields in the selection - - - **option 2** - _[array of string]_ - Explicit list of fields to include in the selection - - - **option 3** - _[object]_ - Advanced matching using regex, DuckDB expression, or exclusion - - - **`regex`** - _[string]_ - Select fields using a regular expression - - - **`expr`** - _[string]_ - DuckDB SQL expression to select fields based on custom logic - - - **`exclude`** - _[object]_ - Select all fields except those listed here - - - **`measures`** - _[oneOf]_ - Default measures to load on viewing the explore view. - - - **option 1** - _[string]_ - Wildcard(*) selector that includes all available fields in the selection - - - **option 2** - _[array of string]_ - Explicit list of fields to include in the selection - - - **option 3** - _[object]_ - Advanced matching using regex, DuckDB expression, or exclusion - - - **`regex`** - _[string]_ - Select fields using a regular expression - - - **`expr`** - _[string]_ - DuckDB SQL expression to select fields based on custom logic - - - **`exclude`** - _[object]_ - Select all fields except those listed here - - - **`time_range`** - _[string]_ - Default time range to display when the explore view loads. - - - **`comparison_mode`** - _[string]_ - Default comparison mode for metrics (none, time, or dimension). - - - **`comparison_dimension`** - _[string]_ - Default dimension to use for comparison when comparison_mode is 'dimension'. - - - **`embeds`** - _[object]_ - Configuration options for embedded explore views. - - - **`hide_pivot`** - _[boolean]_ - When true, hides the pivot table view in embedded mode. - ## Common Properties ### `name` diff --git a/docs/docs/hidden/yaml/models.md b/docs/docs/hidden/yaml/models.md new file mode 100644 index 00000000000..e87fd3aa0ba --- /dev/null +++ b/docs/docs/hidden/yaml/models.md @@ -0,0 +1,52 @@ +--- +note: GENERATED. DO NOT EDIT. +title: Model SQL +sidebar_position: 33 +--- + +When using Rill Developer, data transformations are powered by DuckDB and their dialect of SQL. Under the hood, by default, data models are created as views in DuckDB. Please check our modeling page and DuckDB documentation for more details about how to construct and write your model SQL syntax. + +In your Rill project directory, you can also create a `.sql` file containing an appropriate DuckDB `SELECT` statement, most commonly within the default `models` directory, to represent a model (or set of SQL transformations). Rill will automatically detect and parse the model next time you run `rill start`. + + ### Annotating your models with properties + In most cases, objects are represented in Rill as YAML files. Models are unique in that any model.sql file can be considered a model resource in Rill, representing a SQL transformation that you would like to inform using a set of inputs and outputting a view or table (depending on the materialization type). For most other resources, available properties can be set directly via the corresponding YAML file. In the case of a model SQL file though, configurable properties should be set by annotating the top of the file using the following syntax: + ```sql + -- @property: value + ``` + We will cover different available configurable properties in the below sections. + + +## Properties + +### `type` + +_[string]_ - By default, any new model that is created in a Rill project will populate a corresponding .sql file representing the model. Similarly, a .sql file that is directly created in the project directory will also be automatically assumed by Rill to be a model by default. Therefore, it is not necessary to annotate the model resource with the type property. + +For consistency or documentation purposes, if you'd like to annotate your model resource as well with the type property, you can do so by adding the following to the top of your model_name.sql: +```sql +-- @type: model +``` + + +### `materialize` + +_[boolean]_ - As mentioned, models will be materialized in DuckDB as views by default. However, you can choose to materialize them as tables instead of views. To do this, you can add the following annotation to the top of your model SQL file: +```sql +-- @materialize: true +``` + +Alternatively, it is possible to set it as a project-wide default as well that your models inherit via your rill.yaml file: +```yaml +models: + materialize: true +``` + +:::info To materialize or not to materialize? + +There are both pros and cons to materializing your models. +- Pros can include improved performance for downstream models and dashboards, especially with the SQL is complex and/or the data size is large. We generally recommend _materializing_ final models that power dashboards. +- Cons can include a degraded keystroke-by-keystroke modeling experience or for specific edge cases, such as when using cross joins. + +If unsure, we would generally recommend leaving the defaults and/or reaching out for further guidance! +::: + \ No newline at end of file diff --git a/docs/docs/hidden/yaml/project.md b/docs/docs/hidden/yaml/rillyaml.md similarity index 85% rename from docs/docs/hidden/yaml/project.md rename to docs/docs/hidden/yaml/rillyaml.md index 75f353e7966..d70073805c2 100644 --- a/docs/docs/hidden/yaml/project.md +++ b/docs/docs/hidden/yaml/rillyaml.md @@ -1,7 +1,7 @@ --- note: GENERATED. DO NOT EDIT. title: Project YAML -sidebar_position: 40 +sidebar_position: 41 --- The `rill.yaml` file contains metadata about your project. @@ -26,19 +26,19 @@ _[object]_ - Optional feature flags. Can be specified as a map of feature names ### `ai_instructions` -_[string]_ - Extra instructions for LLM/AI features. Used to guide natural language question answering and routing. +_[string]_ - Extra instructions for [AI agents](/explore/mcp). Used to guide natural language question answering and routing. ## Configuring the default OLAP Engine Rill allows you to specify the default OLAP engine to use in your project via `rill.yaml`. :::info Curious about OLAP Engines? -Please see our reference documentation on [OLAP Engines](/connect). +Please see our reference documentation on [OLAP Engines](/connect/olap). ::: ### `olap_connector` -_[string]_ - Specifies the default OLAP engine for the project. Defaults to duckdb if not set. +_[string]_ - Specifies the [default OLAP engine](/connect/olap) for the project. Defaults to duckdb if not set. ```yaml olap_connector: clickhouse @@ -46,7 +46,7 @@ olap_connector: clickhouse ## Project-wide defaults -In `rill.yaml`, project-wide defaults can be specified for a resource type within a project. Unless otherwise specified, _individual resources will inherit any defaults_ that have been specified in `rill.yaml`. For available properties that can be configured, please refer to the YAML specification for each individual resource type - [model](model.md), [metrics_view](metrics-view.md), and [explore](explore.md) +In `rill.yaml`, project-wide defaults can be specified for a resource type within a project. Unless otherwise specified, _individual resources will inherit any defaults_ that have been specified in `rill.yaml`. For available properties that can be configured, please refer to the YAML specification for each individual resource type - [model](advanced-models.md), [metrics_view](metrics-views.md), and [explore](explore-dashboards.md) :::note Use plurals when specifying project-wide defaults In your `rill.yaml`, the top level property for the resource type needs to be **plural**, such as `models`, `metrics_views` and `explores`. @@ -54,7 +54,7 @@ In your `rill.yaml`, the top level property for the resource type needs to be ** :::info Hierarchy of inheritance and property overrides As a general rule of thumb, properties that have been specified at a more _granular_ level will supercede or override higher level properties that have been inherited. Therefore, in order of inheritance, Rill will prioritize properties in the following order: -1. Individual [models](model.md)/[metrics_views](metrics-view.md)/[explore](explore.md) object level properties (e.g. `model.yaml` or `explore.yaml`) +1. Individual [models](advanced-models.md)/[metrics_views](metrics-views.md)/[explore](explore-dashboards.md) object level properties (e.g. `advanced-models.yaml` or `explore-dashboards.yaml`) 2. [Environment](/docs/build/models/environments.md) level properties (e.g. a specific property that have been set for `dev`) 3. [Project-wide defaults](#project-wide-defaults) for a specific property and resource type ::: @@ -74,7 +74,7 @@ _[object]_ - Defines project-wide default settings for explores. Unless overridd ```yaml # For example, the following YAML configuration below will set a project-wide default for: -# Models - Configure a [source refresh](/build/models/source-refresh). +# Models - Configure a [source refresh](/build/connect/source-refresh.md). # Metrics View - Set the [first day of the week](metrics-view.md) for timeseries aggregations to be Sunday along with setting the smallest_time_grain. # Explore Dashboards - Set the [default](explore-dashboards.md) values when a user opens a dashboard, and available time zones and/or time ranges. models: @@ -112,7 +112,7 @@ explores: ## Setting variables -Primarily useful for [templating](/connect/templating), variables can be set in the `rill.yaml` file directly. This allows variables to be set for your projects deployed to Rill Cloud while still being able to use different variable values locally if you prefer. +Primarily useful for [templating](/connect/templating.md), variables can be set in the `rill.yaml` file directly. This allows variables to be set for your projects deployed to Rill Cloud while still being able to use different variable values locally if you prefer. :::info Overriding variables locally Variables also follow an order of precedence and can be overridden locally. By default, any variables defined will be inherited from `rill.yaml`. However, if you manually pass in a variable when starting Rill Developer locally via the CLI, this value will be used instead for the current instance of your running project: ```bash @@ -189,10 +189,22 @@ mock_users: groups: - partners - email: anon@unknown.com + - email: embed@rilldata.com + name: embed + custom_variable_1: Value_1 + custom_variable_2: Value_2 ``` ## Common Properties +### `name` + +_[string]_ - Name is usually inferred from the filename, but can be specified manually. + +### `refs` + +_[array of string]_ - List of resource references + ### `dev` _[object]_ - Overrides any properties in development environment. diff --git a/docs/docs/hidden/yaml/sources.md b/docs/docs/hidden/yaml/sources.md new file mode 100644 index 00000000000..04098885635 --- /dev/null +++ b/docs/docs/hidden/yaml/sources.md @@ -0,0 +1,114 @@ +--- +note: GENERATED. DO NOT EDIT. +title: Source YAML +sidebar_position: 32 +--- + +:::warning Deprecated Feature +**Sources have been deprecated** and are now considered "source models." While sources remain backward compatible, we recommend migrating to the new source model format for access to the latest features and improvements. + +**Next steps:** +- Continue using sources if needed (backward compatible) +- Migrate to source models via the `type:model` parameter for existing projects +- See our [model YAML reference](advanced-models) for current documentation and best practices +::: + + +## Properties + +### `type` + +_[string]_ - Refers to the resource type and must be `connector` _(required)_ + +### `connector` + +_[string]_ - Refers to the connector type for the source, see [connectors](/reference/project-files/connectors) for more information _(required)_ + +### `uri` + +_[string]_ - Refers to the URI of the remote connector you are using for the source. Rill also supports glob patterns as part of the URI for S3 and GCS (required for type: http, s3, gcs). + +- `s3://your-org/bucket/file.parquet` — the s3 URI of your file +- `gs://your-org/bucket/file.parquet` — the gsutil URI of your file +- `https://data.example.org/path/to/file.parquet` — the web address of your file + + +### `path` + +_[string]_ - Refers to the local path of the connector you are using for the source + +### `sql` + +_[string]_ - Sets the SQL query to extract data from a SQL source + +### `region` + +_[string]_ - Sets the cloud region of the S3 bucket or Athena + +### `endpoint` + +_[string]_ - Overrides the S3 endpoint to connect to + +### `output_location` + +_[string]_ - Sets the query output location and result files in Athena + +### `workgroup` + +_[string]_ - Sets a workgroup for Athena connector + +### `project_id` + +_[string]_ - Sets a project id to be used to run BigQuery jobs + +### `timeout` + +_[string]_ - The maximum time to wait for source ingestion + +### `refresh` + +_[object]_ - Specifies the refresh schedule that Rill should follow to re-ingest and update the underlying source data (optional). +```yaml +refresh: + cron: "* * * * *" + every: "24h" +``` + + + - **`cron`** - _[string]_ - A cron schedule expression, which should be encapsulated in single quotes, e.g. `* * * * *` + + - **`every`** - _[string]_ - A Go duration string, such as `24h` + +### `db` + +_[string]_ - Sets the database for motherduck connections and/or the path to the DuckDB/SQLite db file + +### `database_url` + +_[string]_ - Postgres connection string that should be used + +### `duckdb` + +_[object]_ - Specifies the raw parameters to inject into the DuckDB read_csv, read_json or read_parquet statement + +### `dsn` + +_[string]_ - Used to set the Snowflake connection string + +## Common Properties + +### `name` + +_[string]_ - Name is usually inferred from the filename, but can be specified manually. + +### `refs` + +_[array of string]_ - List of resource references + +### `dev` + +_[object]_ - Overrides any properties in development environment. + +### `prod` + +_[object]_ - Overrides any properties in production environment. \ No newline at end of file diff --git a/docs/docs/hidden/yaml/theme.md b/docs/docs/hidden/yaml/themes.md similarity index 55% rename from docs/docs/hidden/yaml/theme.md rename to docs/docs/hidden/yaml/themes.md index 35d35542419..5f8c88d4ef5 100644 --- a/docs/docs/hidden/yaml/theme.md +++ b/docs/docs/hidden/yaml/themes.md @@ -1,7 +1,7 @@ --- note: GENERATED. DO NOT EDIT. title: Theme YAML -sidebar_position: 39 +sidebar_position: 40 --- In your Rill project directory, create a `.yaml` file in any directory containing `type: theme`. Rill will automatically ingest the theme next time you run `rill start` or deploy to Rill Cloud. @@ -17,11 +17,11 @@ _[string]_ - Refers to the resource type and must be `theme` _(required)_ ### `colors` -_[object]_ - Used to override the dashboard colors. Either primary or secondary color must be provided. _(required)_ +_[object]_ - Color palette for the theme - - **`primary`** - _[string]_ - Overrides the primary blue color in the dashboard. Can have any hex (without the '#' character), [named colors](https://www.w3.org/TR/css-color-4/#named-colors) or hsl() formats. Note that the hue of the input colors is used for variants but the saturation and lightness is copied over from the [blue color palette](https://tailwindcss.com/docs/customizing-colors). + - **`primary`** - _[string]_ - Primary color - - **`secondary`** - _[string]_ - Overrides the secondary color in the dashboard. Applies to the loading spinner only as of now. Can have any hex (without the '#' character), [named colors](https://www.w3.org/TR/css-color-4/#named-colors) or hsl() formats. + - **`secondary`** - _[string]_ - Secondary color ## Common Properties @@ -46,7 +46,8 @@ _[object]_ - Overrides any properties in production environment. ```yaml # Example: You can copy this directly into your .yaml file type: theme + colors: - primary: plum - secondary: violet -``` \ No newline at end of file + primary: plum + secondary: violet +``` diff --git a/runtime/parser/old/advanced-models.schema.yaml b/runtime/parser/old/advanced-models.schema.yaml new file mode 100644 index 00000000000..6104e2733f0 --- /dev/null +++ b/runtime/parser/old/advanced-models.schema.yaml @@ -0,0 +1,534 @@ +$schema: 'http://json-schema.org/draft-07/schema#' +$id: advanced-models.schema.yaml +title: Models YAML +type: object +description: | + :::tip + + Both regular models and source models can use the Model YAML specification described on this page. While [SQL models](./models) are perfect for simple transformations, Model YAML files provide advanced capabilities for complex data processing scenarios. + + **When to use Model YAML:** + - **Partitions** - Optimize performance with data partitioning strategies + - **Incremental models** - Process only new or changed data efficiently + - **Pre/post execution hooks** - Run custom logic before or after model execution + - **Staging** - Create intermediate tables for complex transformations + - **Output configuration** - Define specific output formats and destinations + + Model YAML files give you fine-grained control over how your data is processed and transformed, making them ideal for production workloads and complex analytics pipelines. + + ::: + + + +allOf: + - title: Properties + type: object + properties: + type: + type: string + const: model + description: Refers to the resource type and must be `model` + refresh: + $ref: '#/definitions/schedule_properties' + description: Specifies the refresh schedule that Rill should follow to re-ingest and update the underlying model data + connector: + type: string + const: connector + description: Refers to the resource type and is needed if setting an explicit OLAP engine. IE `clickhouse` + sql: + type: string + description: Raw SQL query to run against source + timeout: + type: string + description: The maximum time to wait for model ingestion + incremental: + type: boolean + description: whether incremental modeling is required (optional) + change_mode: + type: string + enum: + - reset + - manual + - patch + description: Configure how changes to the model specifications are applied (optional). 'reset' will drop and recreate the model automatically, 'manual' will require a manual full or incremental refresh to apply changes, and 'patch' will switch to the new logic without re-processing historical data (only applies for incremental models). + state: + $ref: '#/definitions/data_properties' + description: Refers to the explicitly defined state of your model, cannot be used with partitions (optional) + partitions: + $ref: '#/definitions/data_properties' + description: Refers to the how your data is partitioned, cannot be used with state. (optional) + materialize: + type: boolean + description: models will be materialized in olap + partitions_watermark: + type: string + description: Refers to a customizable timestamp that can be set to check if an object has been updated (optional). + partitions_concurrency: + type: integer + description: Refers to the number of concurrent partitions that can be read at the same time (optional). + stage: + type: object + properties: + connector: + type: string + description: Refers to the connector type for the staging table + required: + - connector + description: in the case of staging models, where an input source does not support direct write to the output and a staging table is required + additionalProperties: true + output: + type: object + description: to define the properties of output + properties: + table: + type: string + description: Name of the output table. If not specified, the model name is used. + materialize: + type: boolean + description: Whether to materialize the model as a table or view + connector: + type: string + description: Refers to the connector type for the output table. Can be `clickhouse` or `duckdb` and their named connector + incremental_strategy: + type: string + enum: + - append + - merge + - partition_overwrite + description: Strategy to use for incremental updates. Can be 'append', 'merge' or 'partition_overwrite' + unique_key: + type: array + items: + type: string + description: List of columns that uniquely identify a row for merge strategy + partition_by: + type: string + description: Column or expression to partition the table by + allOf: + - if: + title: Additional properties for `output` when `connector` is `clickhouse` + properties: + connector: + const: clickhouse + required: + - connector + then: + properties: + type: + type: string + description: Type to materialize the model into. Can be 'TABLE', 'VIEW' or 'DICTIONARY' + enum: + - TABLE + - VIEW + - DICTIONARY + columns: + type: string + description: Column names and types. Can also include indexes. If unspecified, detected from the query. + engine_full: + type: string + description: Full engine definition in SQL format. Can include partition keys, order, TTL, etc. + engine: + type: string + description: Table engine to use. Default is MergeTree + order_by: + type: string + description: ORDER BY clause. + partition_by: + type: string + description: Partition BY clause. + primary_key: + type: string + description: PRIMARY KEY clause. + sample_by: + type: string + description: SAMPLE BY clause. + ttl: + type: string + description: TTL settings for the table or columns. + table_settings: + type: string + description: Table-specific settings. + query_settings: + type: string + description: Settings used in insert/create table as select queries. + distributed_settings: + type: string + description: Settings for distributed table. + distributed_sharding_key: + type: string + description: Sharding key for distributed table. + dictionary_source_user: + type: string + description: User for accessing the source dictionary table (used if type is DICTIONARY). + dictionary_source_password: + type: string + description: Password for the dictionary source user. + required: + - type + - sql + - $ref: '#/definitions/common_properties' + - type: object + allOf: + - if: + title: Additional properties when `connector` is [`athena`](./connectors#athena) + properties: + connector: + const: athena + required: + - connector + then: + $ref: '#/definitions/model/definitions/athena' + - if: + title: Additional properties when `connector` is [`azure`](./connectors#azure) + properties: + connector: + const: azure + required: + - connector + then: + $ref: '#/definitions/model/definitions/azure' + - if: + title: Additional properties when `connector` is [`bigquery`](./connectors#bigquery) + properties: + connector: + const: bigquery + required: + - connector + then: + $ref: '#/definitions/model/definitions/bigquery' + - if: + title: Additional properties when `connector` is [`duckdb`](./connectors#duckdb) + properties: + connector: + const: duckdb + required: + - connector + then: + $ref: '#/definitions/model/definitions/duckdb' + - if: + title: Additional properties when `connector` is [`gcs`](./connectors#gcs) + properties: + connector: + const: gcs + required: + - connector + then: + $ref: '#/definitions/model/definitions/gcs' + - if: + title: Additional properties when `connector` is [`redshift`](./connectors#redshift) + properties: + connector: + const: redshift + required: + - connector + then: + $ref: '#/definitions/model/definitions/redshift' + - if: + title: Additional properties when `connector` is [`s3`](./connectors#s3) + properties: + connector: + const: s3 + required: + - connector + then: + $ref: '#/definitions/model/definitions/s3' + - if: + title: Additional properties when `connector` is [`salesforce`](./connectors#salesforce) + properties: + connector: + const: salesforce + required: + - connector + then: + $ref: '#/definitions/model/definitions/salesforce' +definitions: + schedule_properties: + type: object + properties: + cron: + type: string + description: A cron expression that defines the execution schedule + time_zone: + type: string + description: Time zone to interpret the schedule in (e.g., 'UTC', 'America/Los_Angeles'). + disable: + type: boolean + description: 'If true, disables the resource without deleting it.' + ref_update: + type: boolean + description: 'If true, allows the resource to run when a dependency updates.' + run_in_dev: + type: boolean + description: 'If true, allows the schedule to run in development mode.' + data_properties: + oneOf: + - title: SQL Query + type: object + description: Executes a raw SQL query against the project's data models. + properties: + sql: + type: string + description: Raw SQL query to run against existing models in the project. + connector: + type: string + description: specifies the connector to use when running SQL or glob queries. + required: + - sql + - title: Metrics View Query + type: object + description: Executes a SQL query that targets a defined metrics view. + properties: + metrics_sql: + type: string + description: SQL query that targets a metrics view in the project + required: + - metrics_sql + - title: Custom API Call + type: object + description: Calls a custom API defined in the project to compute data. + properties: + api: + type: string + description: Name of a custom API defined in the project. + args: + type: object + description: Arguments to pass to the custom API. + additionalProperties: true + required: + - api + - title: File Glob Query + type: object + description: Uses a file-matching pattern (glob) to query data from a connector. + properties: + glob: + description: Defines the file path or pattern to query from the specified connector. + anyOf: + - type: string + description: A simple file path/glob pattern as a string. + - type: object + description: An object-based configuration for specifying a file path/glob pattern with advanced options. + additionalProperties: true + connector: + type: string + description: Specifies the connector to use with the glob input. + required: + - glob + - title: Resource Status Check + type: object + description: Uses the status of a resource as data. + properties: + resource_status: + type: object + description: Based on resource status + properties: + where_error: + type: boolean + description: Indicates whether the condition should trigger when the resource is in an error state. + additionalProperties: true + required: + - resource_status + common_properties: + type: object + title: "Common Properties" + properties: + name: + type: string + description: Name is usually inferred from the filename, but can be specified manually. + refs: + type: array + description: 'List of resource references' + items: + type: string + description: A string reference like `` or ``. + dev: + type: object + description: Overrides any properties in development environment. + prod: + type: object + description: Overrides any properties in production environment. + model: + definitions: + athena: + type: object + properties: + output_location: + type: string + description: Output location for query results in S3. + workgroup: + type: string + description: AWS Athena workgroup to use for queries. + region: + type: string + description: AWS region to connect to Athena and the output location. + azure: + type: object + properties: + path: + type: string + description: Path to the source + account: + type: string + description: Account identifier + uri: + type: string + description: Source URI + extract: + type: object + description: Arbitrary key-value pairs for extraction settings + additionalProperties: true + glob: + type: object + description: Settings related to glob file matching. + properties: + max_total_size: + type: integer + description: Maximum total size (in bytes) matched by glob + max_objects_matched: + type: integer + description: Maximum number of objects matched by glob + max_objects_listed: + type: integer + description: Maximum number of objects listed in glob + page_size: + type: integer + description: Page size for glob listing + batch_size: + type: string + description: 'Size of a batch (e.g., ''100MB'')' + bigquery: + type: object + properties: + project_id: + type: string + description: ID of the BigQuery project. + duckdb: + type: object + properties: + path: + type: string + description: Path to the data source. + format: + type: string + description: 'Format of the data source (e.g., csv, json, parquet).' + pre_exec: + type: string + description: 'refers to SQL queries to run before the main query, available for DuckDB-based models. _(optional)_. Ensure `pre_exec` queries are idempotent. Use `IF NOT EXISTS` statements when applicable.' + post_exec: + type: string + description: 'refers to a SQL query that is run after the main query, available for DuckDB-based models. _(optional)_. Ensure `post_exec` queries are idempotent. Use `IF EXISTS` statements when applicable.' + examples: + - pre_exec: ATTACH IF NOT EXISTS 'dbname=postgres host=localhost port=5432 user=postgres password=postgres' AS postgres_db (TYPE POSTGRES); + sql: SELECT * FROM postgres_query('postgres_db', 'SELECT * FROM USERS') + post_exec: DETACH DATABASE IF EXISTS postgres_db + gcs: + type: object + properties: + path: + type: string + description: Path to the source + uri: + type: string + description: Source URI + extract: + type: object + description: key-value pairs for extraction settings + additionalProperties: true + glob: + type: object + description: Settings related to glob file matching. + properties: + max_total_size: + type: integer + description: Maximum total size (in bytes) matched by glob + max_objects_matched: + type: integer + description: Maximum number of objects matched by glob + max_objects_listed: + type: integer + description: Maximum number of objects listed in glob + page_size: + type: integer + description: Page size for glob listing + batch_size: + type: string + description: 'Size of a batch (e.g., ''100MB'')' + local_file: + type: object + properties: + path: + type: string + description: Path to the data source. + format: + type: string + description: 'Format of the data source (e.g., csv, json, parquet).' + redshift: + type: object + properties: + output_location: + type: string + description: S3 location where query results are stored. + workgroup: + type: string + description: Redshift Serverless workgroup to use. + database: + type: string + description: Name of the Redshift database. + cluster_identifier: + type: string + description: Identifier of the Redshift cluster. + role_arn: + type: string + description: ARN of the IAM role to assume for Redshift access. + region: + type: string + description: AWS region of the Redshift deployment. + s3: + type: object + properties: + region: + type: string + description: AWS region + endpoint: + type: string + description: AWS Endpoint + path: + type: string + description: Path to the source + uri: + type: string + description: Source URI + extract: + type: object + description: key-value pairs for extraction settings + additionalProperties: true + glob: + type: object + description: Settings related to glob file matching. + properties: + max_total_size: + type: integer + description: Maximum total size (in bytes) matched by glob + max_objects_matched: + type: integer + description: Maximum number of objects matched by glob + max_objects_listed: + type: integer + description: Maximum number of objects listed in glob + page_size: + type: integer + description: Page size for glob listing + batch_size: + type: string + description: 'Size of a batch (e.g., ''100MB'')' + salesforce: + type: object + properties: + soql: + type: string + description: SOQL query to execute against the Salesforce instance. + sobject: + type: string + description: Salesforce object (e.g., Account, Contact) targeted by the query. + queryAll: + type: boolean + description: Whether to include deleted and archived records in the query (uses queryAll API). \ No newline at end of file diff --git a/runtime/parser/old/alerts.schema.yaml b/runtime/parser/old/alerts.schema.yaml new file mode 100644 index 00000000000..cd2158fd735 --- /dev/null +++ b/runtime/parser/old/alerts.schema.yaml @@ -0,0 +1,282 @@ +$schema: 'http://json-schema.org/draft-07/schema#' +$id: alerts.schema.yaml +title: Alert YAML +type: object +description: Along with alertings at the dashboard level and can be created via the UI, there might be more extensive alerting that you might want to develop and can be done so the an alert.yaml. When creating an alert via a YAML file, you'll see this denoted in the UI as `Created through code`. +examples: + - # Example: To send alert when data lags by more than 1 day to slack channel #rill-cloud-alerts + type: alert + display_name: Data lags by more than 1 day + # Check the alert every hour. + refresh: + cron: 0 * * * * + # Query that returns non-empty results if the metrics lag by more than 1 day. + data: + sql: |- + SELECT * + FROM + ( + SELECT MAX(event_time) AS max_time + FROM rill_metrics_model + ) + WHERE max_time < NOW() - INTERVAL '1 day' + # Send notifications in Slack. + notify: + slack: + channels: + - '#rill-cloud-alerts' +allOf: + - title: Properties + type: object + properties: + type: + type: string + const: alert + description: Refers to the resource type and must be `alert` + display_name: + type: string + description: Refers to the display name for the alert + refresh: + $ref: '#/definitions/schedule_properties' + description: Specifies the refresh schedule that Rill should follow to re-ingest and update the underlying data + intervals: + type: object + description: define the interval of the alert to check + properties: + duration: + type: string + description: a valid ISO8601 duration to define the interval duration + limit: + type: integer + description: maximum number of intervals to check for on invocation + minimum: 0 + check_unclosed: + type: boolean + description: 'boolean, whether unclosed intervals should be checked' + watermark: + type: string + enum: + - trigger_time + - inherit + description: Specifies how the watermark is determined for incremental processing. Use 'trigger_time' to set it at runtime or 'inherit' to use the upstream model's watermark. + timeout: + type: string + description: define the timeout of the alert in seconds (optional). + data: + description: Specifies one of the options to retrieve or compute the data used by alert + $ref: '#/definitions/data_properties' + for: + description: "Specifies how user identity or attributes should be evaluated for security policy enforcement." + oneOf: + - type: object + description: Specifies a unique user identifier for applying security policies. + properties: + user_id: + type: string + description: "The unique user ID used to evaluate security policies." + required: + - user_id + additionalProperties: false + - type: object + description: Specifies a user's email address for applying security policies. + properties: + user_email: + type: string + description: "The user's email address used to evaluate security policies." + format: email + required: + - user_email + additionalProperties: false + - type: object + description: Specifies a set of arbitrary user attributes for applying security policies. + properties: + attributes: + type: object + description: A dictionary of user attributes used to evaluate security policies. + additionalProperties: true + required: + - attributes + additionalProperties: false + on_recover: + type: boolean + description: Send an alert when a previously failing alert recovers. Defaults to false. + on_fail: + type: boolean + description: Send an alert when a failure occurs. Defaults to true. + on_error: + type: boolean + description: Send an alert when an error occurs during evaluation. Defaults to false. + renotify: + type: boolean + description: Enable repeated notifications for unresolved alerts. Defaults to false. + renotify_after: + type: string + description: Defines the re-notification interval for the alert (e.g., '10m','24h'), equivalent to snooze duration in UI, defaults to 'Off' + notify: + $ref: '#/definitions/notify_properties' + description: Defines how and where to send notifications. At least one method (email or Slack) is required. + annotations: + type: object + description: Key value pair used for annotations + additionalProperties: + type: string + required: + - type + - refresh + - data + - notify + - $ref: '#/definitions/common_properties' +definitions: + schedule_properties: + type: object + properties: + cron: + type: string + description: A cron expression that defines the execution schedule + time_zone: + type: string + description: Time zone to interpret the schedule in (e.g., 'UTC', 'America/Los_Angeles'). + disable: + type: boolean + description: 'If true, disables the resource without deleting it.' + ref_update: + type: boolean + description: 'If true, allows the resource to run when a dependency updates.' + run_in_dev: + type: boolean + description: 'If true, allows the schedule to run in development mode.' + data_properties: + oneOf: + - title: SQL Query + type: object + description: Executes a raw SQL query against the project's data models. + properties: + sql: + type: string + description: Raw SQL query to run against existing models in the project. + connector: + type: string + description: specifies the connector to use when running SQL or glob queries. + required: + - sql + - title: Metrics View Query + type: object + description: Executes a SQL query that targets a defined metrics view. + properties: + metrics_sql: + type: string + description: SQL query that targets a metrics view in the project + required: + - metrics_sql + - title: Custom API Call + type: object + description: Calls a custom API defined in the project to compute data. + properties: + api: + type: string + description: Name of a custom API defined in the project. + args: + type: object + description: Arguments to pass to the custom API. + additionalProperties: true + required: + - api + - title: File Glob Query + type: object + description: Uses a file-matching pattern (glob) to query data from a connector. + properties: + glob: + description: Defines the file path or pattern to query from the specified connector. + anyOf: + - type: string + description: A simple file path/glob pattern as a string. + - type: object + description: An object-based configuration for specifying a file path/glob pattern with advanced options. + additionalProperties: true + connector: + type: string + description: Specifies the connector to use with the glob input. + required: + - glob + - title: Resource Status Check + type: object + description: Uses the status of a resource as data. + properties: + resource_status: + type: object + description: Based on resource status + properties: + where_error: + type: boolean + description: Indicates whether the condition should trigger when the resource is in an error state. + additionalProperties: true + required: + - resource_status + notify_properties: + type: object + properties: + email: + type: object + description: Send notifications via email. + properties: + recipients: + type: array + description: An array of email addresses to notify. + items: + type: string + minItems: 1 + required: + - recipients + slack: + type: object + description: Send notifications via Slack. + properties: + users: + type: array + description: An array of Slack user IDs to notify. + items: + type: string + minItems: 1 + channels: + type: array + description: An array of Slack channel IDs to notify. + items: + type: string + minItems: 1 + webhooks: + type: array + description: An array of Slack webhook URLs to send notifications to. + items: + type: string + minItems: 1 + anyOf: + - required: + - channels + - required: + - users + - required: + - webhooks + anyOf: + - required: + - slack + - required: + - email + common_properties: + type: object + title: "Common Properties" + properties: + name: + type: string + description: Name is usually inferred from the filename, but can be specified manually. + refs: + type: array + description: 'List of resource references' + items: + type: string + description: A string reference like `` or ``. + dev: + type: object + description: Overrides any properties in development environment. + prod: + type: object + description: Overrides any properties in production environment. \ No newline at end of file diff --git a/runtime/parser/old/apis.schema.yaml b/runtime/parser/old/apis.schema.yaml new file mode 100644 index 00000000000..5574f07b102 --- /dev/null +++ b/runtime/parser/old/apis.schema.yaml @@ -0,0 +1,221 @@ +$schema: 'http://json-schema.org/draft-07/schema#' +$id: apis.schema.yaml +title: API YAML +type: object +description: In your Rill project directory, create a new file name `.yaml` in the `apis` directory containing a custom API definition. See comprehensive documentation on how to define and use [custom APIs](/integrate/custom-apis/index.md) +examples: + - # Example: This api returns the top 10 authors by net line changes since the specified date provided in the arguments. + type: api + name: metrics_view_api + metrics_sql: |- + SELECT author_name, net_line_changes + FROM advanced_metrics_view + where author_date > '{{ .args.date }}' + order by net_line_changes DESC + limit 10 +allOf: + - title: Properties + type: object + properties: + type: + type: string + const: api + description: Refers to the resource type and must be `api` + openapi: + type: object + description: OpenAPI specification for the API endpoint + properties: + summary: + type: string + description: A brief description of what the API endpoint does + parameters: + type: array + description: List of parameters that the API endpoint accepts + items: + type: object + additionalProperties: true + request_schema: + type: object + description: JSON schema for the request body (use nested YAML instead of a JSON string) + additionalProperties: true + response_schema: + type: object + description: JSON schema for the response body (use nested YAML instead of a JSON string) + additionalProperties: true + security: + $ref: '#/definitions/security_policy_properties' + skip_nested_security: + type: boolean + description: Flag to control security inheritance + allOf: + - $ref: '#/definitions/data_properties' + required: + - type + +definitions: + data_properties: + oneOf: + - title: SQL Query + type: object + description: Executes a raw SQL query against the project's data models. + properties: + sql: + type: string + description: Raw SQL query to run against existing models in the project. + connector: + type: string + description: specifies the connector to use when running SQL or glob queries. + required: + - sql + - title: Metrics View Query + type: object + description: Executes a SQL query that targets a defined metrics view. + properties: + metrics_sql: + type: string + description: SQL query that targets a metrics view in the project + required: + - metrics_sql + - title: Custom API Call + type: object + description: Calls a custom API defined in the project to compute data. + properties: + api: + type: string + description: Name of a custom API defined in the project. + args: + type: object + description: Arguments to pass to the custom API. + additionalProperties: true + required: + - api + - title: File Glob Query + type: object + description: Uses a file-matching pattern (glob) to query data from a connector. + properties: + glob: + description: Defines the file path or pattern to query from the specified connector. + anyOf: + - type: string + description: A simple file path/glob pattern as a string. + - type: object + description: An object-based configuration for specifying a file path/glob pattern with advanced options. + additionalProperties: true + connector: + type: string + description: Specifies the connector to use with the glob input. + required: + - glob + - title: Resource Status Check + type: object + description: Uses the status of a resource as data. + properties: + resource_status: + type: object + description: Based on resource status + properties: + where_error: + type: boolean + description: Indicates whether the condition should trigger when the resource is in an error state. + additionalProperties: true + required: + - resource_status + + + security_policy_properties: + type: object + description: Defines security rules and access control policies for resources + properties: + access: + oneOf: + - type: string + description: SQL expression that evaluates to a boolean to determine access + - type: boolean + description: Direct boolean value to allow or deny access + description: Expression indicating if the user should be granted access to the dashboard. If not defined, it will resolve to false and the dashboard won't be accessible to anyone. Needs to be a valid SQL expression that evaluates to a boolean. + row_filter: + type: string + description: SQL expression to filter the underlying model by. Can leverage templated user attributes to customize the filter for the requesting user. Needs to be a valid SQL expression that can be injected into a WHERE clause + include: + type: array + description: List of dimension or measure names to include in the dashboard. If include is defined all other dimensions and measures are excluded + items: + type: object + properties: + if: + type: string + description: Expression to decide if the column should be included or not. It can leverage templated user attributes. Needs to be a valid SQL expression that evaluates to a boolean + names: + anyOf: + - type: array + description: List of specific field names to include + items: + type: string + - type: string + description: Wildcard '*' to include all fields + enum: + - '*' + description: List of fields to include. Should match the name of one of the dashboard's dimensions or measures + required: + - if + - names + exclude: + type: array + description: List of dimension or measure names to exclude from the dashboard. If exclude is defined all other dimensions and measures are included + items: + type: object + properties: + if: + type: string + description: Expression to decide if the column should be excluded or not. It can leverage templated user attributes. Needs to be a valid SQL expression that evaluates to a boolean + names: + anyOf: + - type: array + description: List of specific field names to exclude + items: + type: string + - type: string + description: Wildcard '*' to exclude all fields + enum: + - '*' + description: List of fields to exclude. Should match the name of one of the dashboard's dimensions or measures + required: + - if + - names + rules: + type: array + description: List of detailed security rules that can be used to define complex access control policies + items: + type: object + description: Individual security rule definition + properties: + type: + type: string + enum: + - access + - field_access + - row_filter + description: Type of security rule - access (overall access), field_access (field-level access), or row_filter (row-level filtering) + action: + type: string + enum: + - allow + - deny + description: Whether to allow or deny access for this rule + if: + type: string + description: Conditional expression that determines when this rule applies. Must be a valid SQL expression that evaluates to a boolean + names: + type: array + items: + type: string + description: List of field names this rule applies to (for field_access type rules) + all: + type: boolean + description: When true, applies the rule to all fields (for field_access type rules) + sql: + type: string + description: SQL expression for row filtering (for row_filter type rules) + required: + - type + \ No newline at end of file diff --git a/runtime/parser/old/canvas-dashboards.schema.yaml b/runtime/parser/old/canvas-dashboards.schema.yaml new file mode 100644 index 00000000000..a5fd240cc2e --- /dev/null +++ b/runtime/parser/old/canvas-dashboards.schema.yaml @@ -0,0 +1,316 @@ +$schema: 'http://json-schema.org/draft-07/schema#' +$id: canvas-dashboards.schema.yaml +title: Canvas Dashboard YAML +type: object +description: In your Rill project directory, create a canvas dashboard, `.yaml`, file in the `dashboards` directory. Rill will ingest the dashboard definition next time you run `rill start`. +allOf: + - title: Properties + type: object + properties: + type: + type: string + const: canvas + description: Refers to the resource type and must be `canvas` + display_name: + type: string + description: Refers to the display name for the canvas + banner: + type: string + description: Refers to the custom banner displayed at the header of an Canvas dashboard + rows: + type: array + description: Refers to all of the rows displayed on the Canvas + items: + type: object + properties: + height: + type: string + description: Height of the row in px + items: + type: array + description: List of components to display in the row + items: + type: object + properties: + component: + type: string + description: | + Name of the component to display. Each component type has its own set of properties. + Available component types: + + - **markdown** - Text component, uses markdown formatting + - **kpi_grid** - KPI component, similar to TDD in Rill Explore, display quick KPI charts + - **stacked_bar_normalized** - Bar chart normalized to 100% values + - **line_chart** - Normal Line chart + - **bar_chart** - Normal Bar chart + - **stacked_bar** - Stacked Bar chart + - **area_chart** - Line chart with area + - **image** - Provide a URL to embed into canvas dashboard + - **table** - Similar to Pivot table, add dimensions and measures to visualize your data + - **heatmap** - Heat Map chart to visualize distribution of data + - **donut_chart** - Donut or Pie chart to display sums of total + + width: + type: + - string + - integer + description: Width of the component (can be a number or string with unit) + additionalProperties: true + additionalProperties: false + max_width: + type: integer + description: Max width in pixels of the canvas + minimum: 0 + gap_x: + type: integer + description: Horizontal gap in pixels of the canvas + minimum: 0 + gap_y: + type: integer + description: Vertical gap in pixels of the canvas + minimum: + theme: + oneOf: + - title: Existing theme + type: string + description: Name of an existing theme to apply to the dashboard + - title: Inline theme + type: object + description: Inline theme configuration. + $ref: '#/definitions/theme' + + description: Theme configuration. Can be either a string reference to an existing theme or an inline theme configuration object. + + + allow_custom_time_range: + type: boolean + description: Defaults to true, when set to false it will hide the ability to set a custom time range for the user. + time_ranges: + type: array + description: Overrides the list of default time range selections available in the dropdown. It can be string or an object with a 'range' and optional 'comparison_offsets' + items: + $ref: '#/definitions/explore_time_range_properties' + time_zones: + type: array + description: Refers to the time zones that should be pinned to the top of the time zone selector. It should be a list of [IANA time zone identifiers](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones) + items: + type: string + filters: + type: object + description: Indicates if filters should be enabled for the canvas. + properties: + enable: + type: boolean + description: Toggles filtering functionality for the canvas dashboard. + additionalProperties: false + defaults: + type: object + description: Preset UI state to show by default + properties: + time_range: + type: string + description: Default time range to display when the dashboard loads + comparison_mode: + type: string + description: Default comparison mode for metrics (none, time, or dimension) + comparison_dimension: + type: string + description: Default dimension to use for comparison when comparison_mode is 'dimension' + additionalProperties: false + variables: + type: array + description: Variables that can be used in the canvas + items: + $ref: '#/definitions/component_variable_properties' + security: + $ref: '#/definitions/security_policy_properties' + description: Security rules to apply for access to the canvas + required: + - type + - rows + - $ref: '#/definitions/common_properties' +definitions: + explore_time_range_properties: + oneOf: + - type: string + description: A valid ISO 8601 duration or one of the Rill ISO 8601 extensions for the selection + - type: object + description: Object containing time range and comparison configuration + properties: + range: + type: string + description: A valid ISO 8601 duration or one of the Rill ISO 8601 extensions for the selection + comparison_offsets: + type: array + description: List of time comparison options for this time range selection (optional). Must be one of the Rill ISO 8601 extensions + items: + oneOf: + - type: string + description: Offset string only (range is inferred) + - type: object + description: Object containing offset and range configuration for time comparison + properties: + offset: + type: string + description: Time offset for comparison (e.g., 'P1D' for one day ago) + range: + type: string + description: Custom time range for comparison period + additionalProperties: false + required: + - range + additionalProperties: false + component_variable_properties: + type: object + properties: + name: + type: string + description: Unique identifier for the variable + type: + type: string + description: Data type of the variable (e.g., string, number, boolean) + value: + description: Default value for the variable. Can be any valid JSON value type + type: + - string + - number + - boolean + - object + - array + required: + - name + - type + additionalProperties: false + security_policy_properties: + type: object + description: Defines security rules and access control policies for resources + properties: + access: + oneOf: + - type: string + description: SQL expression that evaluates to a boolean to determine access + - type: boolean + description: Direct boolean value to allow or deny access + description: Expression indicating if the user should be granted access to the dashboard. If not defined, it will resolve to false and the dashboard won't be accessible to anyone. Needs to be a valid SQL expression that evaluates to a boolean. + row_filter: + type: string + description: SQL expression to filter the underlying model by. Can leverage templated user attributes to customize the filter for the requesting user. Needs to be a valid SQL expression that can be injected into a WHERE clause + include: + type: array + description: List of dimension or measure names to include in the dashboard. If include is defined all other dimensions and measures are excluded + items: + type: object + properties: + if: + type: string + description: Expression to decide if the column should be included or not. It can leverage templated user attributes. Needs to be a valid SQL expression that evaluates to a boolean + names: + anyOf: + - type: array + description: List of specific field names to include + items: + type: string + - type: string + description: Wildcard '*' to include all fields + enum: + - '*' + description: List of fields to include. Should match the name of one of the dashboard's dimensions or measures + required: + - if + - names + exclude: + type: array + description: List of dimension or measure names to exclude from the dashboard. If exclude is defined all other dimensions and measures are included + items: + type: object + properties: + if: + type: string + description: Expression to decide if the column should be excluded or not. It can leverage templated user attributes. Needs to be a valid SQL expression that evaluates to a boolean + names: + anyOf: + - type: array + description: List of specific field names to exclude + items: + type: string + - type: string + description: Wildcard '*' to exclude all fields + enum: + - '*' + description: List of fields to exclude. Should match the name of one of the dashboard's dimensions or measures + required: + - if + - names + rules: + type: array + description: List of detailed security rules that can be used to define complex access control policies + items: + type: object + description: Individual security rule definition + properties: + type: + type: string + enum: + - access + - field_access + - row_filter + description: Type of security rule - access (overall access), field_access (field-level access), or row_filter (row-level filtering) + action: + type: string + enum: + - allow + - deny + description: Whether to allow or deny access for this rule + if: + type: string + description: Conditional expression that determines when this rule applies. Must be a valid SQL expression that evaluates to a boolean + names: + type: array + items: + type: string + description: List of field names this rule applies to (for field_access type rules) + all: + type: boolean + description: When true, applies the rule to all fields (for field_access type rules) + sql: + type: string + description: SQL expression for row filtering (for row_filter type rules) + required: + - type + common_properties: + type: object + title: "Common Properties" + properties: + name: + type: string + description: Name is usually inferred from the filename, but can be specified manually. + refs: + type: array + description: 'List of resource references' + items: + type: string + description: A string reference like `` or ``. + dev: + type: object + description: Overrides any properties in development environment. + prod: + type: object + description: Overrides any properties in production environment. + theme: + type: object + properties: + colors: + type: object + description: Used to override the dashboard colors. Either primary or secondary color must be provided. + properties: + primary: + type: string + description: Overrides the primary blue color in the dashboard. Can have any hex (without the '#' character), [named colors](https://www.w3.org/TR/css-color-4/#named-colors) or hsl() formats. Note that the hue of the input colors is used for variants but the saturation and lightness is copied over from the [blue color palette](https://tailwindcss.com/docs/customizing-colors). + secondary: + type: string + description: Overrides the secondary color in the dashboard. Applies to the loading spinner only as of now. Can have any hex (without the '#' character), [named colors](https://www.w3.org/TR/css-color-4/#named-colors) or hsl() formats. + anyOf: + - required: + - primary + - required: + - secondary \ No newline at end of file diff --git a/runtime/parser/old/component.schema.yaml b/runtime/parser/old/component.schema.yaml new file mode 100644 index 00000000000..e5c71366692 --- /dev/null +++ b/runtime/parser/old/component.schema.yaml @@ -0,0 +1,71 @@ +$schema: 'http://json-schema.org/draft-07/schema#' +$id: component.schema.yaml +title: Component YAML +type: object +description: Defines a reusable dashboard component that can be embedded in canvas dashboards +allOf: + - title: Properties + type: object + properties: + type: + type: string + const: component + description: Refers to the resource type and must be `component` + display_name: + type: string + description: Refers to the display name for the component + description: + type: string + description: Detailed description of the component's purpose and functionality + input: + type: array + description: List of input variables that can be passed to the component + items: + $ref: '#/definitions/component_variable_properties' + output: + description: Output variable that the component produces + $ref: '#/definitions/component_variable_properties' + required: + - type + - $ref: '#/definitions/common_properties' +definitions: + component_variable_properties: + type: object + properties: + name: + type: string + description: Unique identifier for the variable + type: + type: string + description: Data type of the variable (e.g., string, number, boolean) + value: + description: Default value for the variable. Can be any valid JSON value type + type: + - string + - number + - boolean + - object + - array + required: + - name + - type + additionalProperties: false + common_properties: + type: object + title: "Common Properties" + properties: + name: + type: string + description: Name is usually inferred from the filename, but can be specified manually. + refs: + type: array + description: 'List of resource references' + items: + type: string + description: A string reference like `` or ``. + dev: + type: object + description: Overrides any properties in development environment. + prod: + type: object + description: Overrides any properties in production environment. \ No newline at end of file diff --git a/runtime/parser/old/connectors.schema.yaml b/runtime/parser/old/connectors.schema.yaml new file mode 100644 index 00000000000..fe4a70e4543 --- /dev/null +++ b/runtime/parser/old/connectors.schema.yaml @@ -0,0 +1,672 @@ +$schema: 'http://json-schema.org/draft-07/schema#' +$id: connectors.schema.yaml +title: Connector YAML +type: object +description: | + Connector YAML files define how Rill connects to external data sources and OLAP engines. Each connector specifies a driver type and its required connection parameters. + + ## Available Connector Types + + ### _OLAP Engines_ + - [**DuckDB**](#duckdb) - Embedded DuckDB engine (default) + - [**ClickHouse**](#clickhouse) - ClickHouse analytical database + - [**MotherDuck**](#motherduck) - MotherDuck cloud database + - [**Druid**](#druid) - Apache Druid + - [**Pinot**](#pinot) - Apache Pinot + + ### _Data Warehouses_ + - [**Snowflake**](#snowflake) - Snowflake data warehouse + - [**BigQuery**](#bigquery) - Google BigQuery + - [**Redshift**](#redshift) - Amazon Redshift + - [**Athena**](#athena) - Amazon Athena + + ### _Databases_ + - [**PostgreSQL**](#postgres) - PostgreSQL databases + - [**MySQL**](#mysql) - MySQL databases + - [**SQLite**](#sqlite) - SQLite databases + + ### _Cloud Storage_ + - [**GCS**](#gcs) - Google Cloud Storage + - [**S3**](#s3) - Amazon S3 storage + - [**Azure**](#azure) - Azure Blob Storage + + ### _Other_ + - [**HTTPS**](#https) - Public files via HTTP/HTTPS + - [**Salesforce**](#salesforce) - Salesforce data + - [**Slack**](#slack) - Slack data + + :::warning Security Recommendation + For all credential parameters (passwords, tokens, keys), use environment variables with the syntax `{{.env.connector..}}`. This keeps sensitive data out of your YAML files and version control. See our [credentials documentation](/build/credentials/) for complete setup instructions. + ::: + +allOf: + - $ref: '#/definitions/common_properties' + - title: Properties + type: object + properties: + type: + type: string + const: connector + description: Refers to the resource type and must be `connector` + # driver: + # type: string + # description: The type of connector, see [available connectors](#available-connector-types) (required) + required: + - type + - driver + + - oneOf: + - $ref: '#/definitions/athena' + - $ref: '#/definitions/azure' + - $ref: '#/definitions/bigquery' + - $ref: '#/definitions/clickhouse' + - $ref: '#/definitions/druid' + - $ref: '#/definitions/duckdb' + - $ref: '#/definitions/gcs' + - $ref: '#/definitions/https' + # - $ref: '#/definitions/local_file' + - $ref: '#/definitions/motherduck' + - $ref: '#/definitions/mysql' + - $ref: '#/definitions/pinot' + - $ref: '#/definitions/postgres' + - $ref: '#/definitions/redshift' + - $ref: '#/definitions/s3' + - $ref: '#/definitions/salesforce' + - $ref: '#/definitions/slack' + - $ref: '#/definitions/snowflake' + - $ref: '#/definitions/sqlite' +definitions: + common_properties: + type: object + title: "Common Properties" + properties: + name: + type: string + description: Name is usually inferred from the filename, but can be specified manually. + refs: + type: array + description: 'List of resource references' + items: + type: string + description: A string reference like `` or ``. + dev: + type: object + description: Overrides any properties in development environment. + prod: + type: object + description: Overrides any properties in production environment. + athena: + type: object + title: Athena + description: Configuration properties specific to the athena + properties: + driver: + type: string + description: Refers to the driver type and must be driver `athena` + const: athena + aws_access_key_id: + type: string + description: AWS Access Key ID used for authentication. Required when using static credentials directly or as base credentials for assuming a role. + aws_secret_access_key: + type: string + description: AWS Secret Access Key paired with the Access Key ID. Required when using static credentials directly or as base credentials for assuming a role. + aws_access_token: + type: string + description: AWS session token used with temporary credentials. Required only if the Access Key and Secret Key are part of a temporary session credentials. + role_arn: + type: string + description: ARN of the IAM role to assume. When specified, the SDK uses the base credentials to call STS AssumeRole and obtain temporary credentials scoped to this role. + role_session_name: + type: string + description: Session name to associate with the STS AssumeRole session. Used only if 'role_arn' is specified. Useful for identifying and auditing the session. + external_id: + type: string + description: External ID required by some roles when assuming them, typically for cross-account access. Used only if 'role_arn' is specified and the role's trust policy requires it. + workgroup: + type: string + description: Athena workgroup to use for query execution. Defaults to 'primary' if not specified. + output_location: + type: string + description: S3 URI where Athena query results should be stored (e.g., s3://your-bucket/athena/results/). Optional if the selected workgroup has a default result configuration. + aws_region: + type: string + description: AWS region where Athena and the result S3 bucket are located (e.g., us-east-1). Defaults to 'us-east-1' if not specified. + allow_host_access: + type: boolean + description: Allow the Athena client to access host environment configurations such as environment variables or local AWS credential files. Defaults to true, enabling use of credentials and settings from the host environment unless explicitly disabled. + required: + - driver + azure: + type: object + title: Azure + description: Configuration properties specific to the azure + properties: + driver: + type: string + description: Refers to the driver type and must be driver `azure` + const: azure + azure_storage_account: + type: string + description: Azure storage account name + azure_storage_key: + type: string + description: Azure storage access key + azure_storage_sas_token: + type: string + description: Optional azure SAS token for authentication + azure_storage_connection_string: + type: string + description: Optional azure connection string for storage account + azure_storage_bucket: + type: string + description: Name of the Azure Blob Storage container (equivalent to an S3 bucket) + allow_host_access: + type: boolean + description: Allow access to host environment configuration + required: + - driver + - azure_storage_bucket + bigquery: + type: object + title: BigQuery + description: Configuration properties specific to the bigquery + properties: + driver: + type: string + description: Refers to the driver type and must be driver `bigquery` + const: bigquery + google_application_credentials: + type: string + description: Raw contents of the Google Cloud service account key (in JSON format) used for authentication. + project_id: + type: string + description: ID of the Google Cloud project to use for BigQuery operations. This can be omitted only if the project ID is included in the service account key. + allow_host_access: + type: boolean + description: Enable the BigQuery client to use credentials from the host environment when no service account JSON is provided. This includes Application Default Credentials from environment variables, local credential files, or the Google Compute Engine metadata server. Defaults to true, allowing seamless authentication in GCP environments. + required: + - driver + clickhouse: + type: object + title: ClickHouse + description: Configuration properties specific to the clickhouse + properties: + driver: + type: string + description: Refers to the driver type and must be driver `clickhouse` + const: clickhouse + managed: + type: boolean + description: '`true` means Rill will provision the connector using the default provisioner. `false` disables automatic provisioning.' + mode: + type: string + description: "`read` - Controls the operation mode for the ClickHouse connection. Defaults to 'read' for safe operation with external databases. Set to 'readwrite' to enable model creation and table mutations. Note: When 'managed: true', this is automatically set to 'readwrite'." + dsn: + type: string + description: DSN(Data Source Name) for the ClickHouse connection + username: + type: string + description: Username for authentication + password: + type: string + description: Password for authentication + host: + type: string + description: Host where the ClickHouse instance is running + port: + type: integer + description: Port where the ClickHouse instance is accessible + database: + type: string + description: Name of the ClickHouse database within the cluster + ssl: + type: boolean + description: Indicates whether a secured SSL connection is required + cluster: + type: string + description: 'Cluster name, required for running distributed queries' + log_queries: + type: boolean + description: Controls whether to log raw SQL queries + settings_override: + type: string + description: override the default settings used in queries. example `readonly = 1, session_timezone = 'UTC'` + embed_port: + type: integer + description: Port to run ClickHouse locally (0 for random port) + can_scale_to_zero: + type: boolean + description: Indicates if the database can scale to zero + max_open_conns: + type: integer + description: Maximum number of open connections to the database + max_idle_conns: + type: integer + description: Maximum number of idle connections in the pool + dial_timeout: + type: string + description: Timeout for dialing the ClickHouse server + conn_max_lifetime: + type: string + description: Maximum time a connection may be reused + read_timeout: + type: string + description: Maximum time for a connection to read data + required: + - driver + druid: + type: object + title: Druid + description: Configuration properties specific to the druid + properties: + driver: + type: string + description: Refers to the driver type and must be driver `druid` + const: druid + dsn: + type: string + description: Data Source Name (DSN) for connecting to Druid + username: + type: string + description: Username for authenticating with Druid + password: + type: string + description: Password for authenticating with Druid + host: + type: string + description: Hostname of the Druid coordinator or broker + port: + type: integer + description: Port number of the Druid service + ssl: + type: boolean + description: Enable SSL for secure connection + log_queries: + type: boolean + description: Log raw SQL queries sent to Druid + max_open_conns: + type: integer + description: Maximum number of open database connections (0 = default, -1 = unlimited) + skip_version_check: + type: boolean + description: Skip checking Druid version compatibility + required: + - driver + - dsn + duckdb: + type: object + title: DuckDB + description: Configuration properties specific to the duckdb + properties: + driver: + type: string + description: Refers to the driver type and must be driver `duckdb` + const: duckdb + pool_size: + type: integer + description: Number of concurrent connections and queries allowed + allow_host_access: + type: boolean + description: Whether access to the local environment and file system is allowed + cpu: + type: integer + description: Number of CPU cores available to the database + memory_limit_gb: + type: integer + description: Amount of memory in GB available to the database + read_write_ratio: + type: number + description: Ratio of resources allocated to the read database; used to divide CPU and memory + init_sql: + type: string + description: is executed during database initialization. + conn_init_sql: + type: string + description: is executed when a new connection is initialized. + secrets: + type: string + description: Comma-separated list of other connector names to create temporary secrets for in DuckDB before executing a model. + log_queries: + type: boolean + description: Whether to log raw SQL queries executed through OLAP + required: + - driver + gcs: + type: object + title: GCS + description: Configuration properties specific to the gcs + properties: + driver: + type: string + description: Refers to the driver type and must be driver `gcs` + const: gcs + google_application_credentials: + type: string + description: Google Cloud credentials JSON string + bucket: + type: string + description: Name of gcs bucket + allow_host_access: + type: boolean + description: Allow access to host environment configuration + key_id: + type: string + description: Optional S3-compatible Key ID when used in compatibility mode + secret: + type: string + description: Optional S3-compatible Secret when used in compatibility mode + required: + - driver + - bucket + https: + type: object + title: HTTPS + description: Configuration properties specific to the https + properties: + driver: + type: string + description: Refers to the driver type and must be driver `https` + const: https + path: + type: string + description: The full HTTPS URI to fetch data from + headers: + type: object + description: HTTP headers to include in the request + additionalProperties: + type: string + required: + - driver + - path + # local_file: + # type: object + # title: local_file + # description: Configuration properties specific to the local_file + # properties: + # driver: + # type: string + # description: Refers to the driver type and must be driver `local_file` + # const: local_file + # sql: + # type: string + # description: SQL query to execute on the local file, file should exist in the local root data directory + # allow_host_access: + # type: boolean + # description: Flag to indicate if access to host-level file paths is permitted + # required: + # - driver + # - sql + motherduck: + type: object + title: MotherDuck + description: Configuration properties specific to the motherduck + properties: + driver: + type: string + description: Refers to the driver type and must be driver `duckdb` + const: duckdb + path: + type: string + description: Path to your MD database + init_sql: + type: string + description: SQL executed during database initialization. + required: + - driver + - path + - init_sql + mysql: + type: object + title: MySQL + description: Configuration properties specific to the mysql + properties: + driver: + type: string + description: Refers to the driver type and must be driver `mysql` + const: mysql + dsn: + type: string + description: DSN(Data Source Name) for the mysql connection + host: + type: string + description: Hostname of the MySQL server + port: + type: integer + description: Port number for the MySQL server + database: + type: string + description: Name of the MySQL database + user: + type: string + description: Username for authentication + password: + type: string + description: Password for authentication + ssl_mode: + type: string + description: SSL mode can be DISABLED, PREFERRED or REQUIRED + required: + - driver + pinot: + type: object + title: Pinot + description: Configuration properties specific to the pinot + properties: + driver: + type: string + description: Refers to the driver type and must be driver `pinot` + const: pinot + dsn: + type: string + description: DSN(Data Source Name) for the Pinot connection + username: + type: string + description: Username for authenticating with Pinot + password: + type: string + description: Password for authenticating with Pinot + broker_host: + type: string + description: Hostname of the Pinot broker + broker_port: + type: integer + description: Port number for the Pinot broker + controller_host: + type: string + description: Hostname of the Pinot controller + controller_port: + type: integer + description: Port number for the Pinot controller + ssl: + type: boolean + description: Enable SSL connection to Pinot + log_queries: + type: boolean + description: Log raw SQL queries executed through Pinot + max_open_conns: + type: integer + description: Maximum number of open connections to the Pinot database + required: + - driver + - dsn + - broker_host + - controller_host + postgres: + type: object + title: Postgres + description: Configuration properties specific to the postgres + properties: + driver: + type: string + description: Refers to the driver type and must be driver `postgres` + const: postgres + dsn: + type: string + description: DSN(Data Source Name) for the postgres connection + host: + type: string + description: Hostname of the Postgres server + port: + type: string + description: Port number for the Postgres server + dbname: + type: string + description: Name of the Postgres database + user: + type: string + description: Username for authentication + password: + type: string + description: Password for authentication + sslmode: + type: string + description: SSL mode can be disable, allow, prefer or require + required: + - driver + redshift: + type: object + title: Redshift + description: Configuration properties specific to the redshift + properties: + driver: + type: string + description: Refers to the driver type and must be driver `redshift` + const: redshift + aws_access_key_id: + type: string + description: AWS Access Key ID used for authenticating with Redshift. + aws_secret_access_key: + type: string + description: AWS Secret Access Key used for authenticating with Redshift. + aws_access_token: + type: string + description: AWS Session Token for temporary credentials (optional). + region: + type: string + description: AWS region where the Redshift cluster or workgroup is hosted (e.g., 'us-east-1'). + database: + type: string + description: Name of the Redshift database to query. + workgroup: + type: string + description: Workgroup name for Redshift Serverless, in case of provisioned Redshift clusters use 'cluster_identifier'. + cluster_identifier: + type: string + description: Cluster identifier for provisioned Redshift clusters, in case of Redshift Serverless use 'workgroup' . + required: + - driver + - aws_access_key_id + - aws_secret_access_key + - database + s3: + type: object + title: S3 + description: Configuration properties specific to the s3 + properties: + driver: + type: string + description: Refers to the driver type and must be driver `s3` + const: s3 + aws_access_key_id: + type: string + description: AWS Access Key ID used for authentication + aws_secret_access_key: + type: string + description: AWS Secret Access Key used for authentication + aws_access_token: + type: string + description: Optional AWS session token for temporary credentials + bucket: + type: string + description: Name of s3 bucket + endpoint: + type: string + description: Optional custom endpoint URL for S3-compatible storage + region: + type: string + description: AWS region of the S3 bucket + allow_host_access: + type: boolean + description: Allow access to host environment configuration + retain_files: + type: boolean + description: Whether to retain intermediate files after processing + required: + - driver + - bucket + salesforce: + type: object + title: Salesforce + description: Configuration properties specific to the salesforce + properties: + driver: + type: string + description: Refers to the driver type and must be driver `salesforce` + const: salesforce + username: + type: string + description: Salesforce account username + password: + type: string + description: Salesforce account password (secret) + key: + type: string + description: Authentication key for Salesforce (secret) + endpoint: + type: string + description: Salesforce API endpoint URL + client_id: + type: string + description: Client ID used for Salesforce OAuth authentication + required: + - driver + - username + - endpoint + slack: + type: object + title: Slack + description: Configuration properties specific to the slack + properties: + driver: + type: string + description: Refers to the driver type and must be driver `slack` + const: slack + bot_token: + type: string + description: Bot token used for authenticating Slack API requests + required: + - driver + - bot_token + snowflake: + type: object + title: Snowflake + description: Configuration properties specific to the snowflake + properties: + driver: + type: string + description: Refers to the driver type and must be driver `snowflake` + const: snowflake + dsn: + type: string + description: DSN (Data Source Name) for the Snowflake connection + parallel_fetch_limit: + type: integer + description: Maximum number of concurrent fetches during query execution + required: + - driver + - dsn + sqlite: + type: object + title: SQLite + description: Configuration properties specific to the sqlite + properties: + driver: + type: string + description: Refers to the driver type and must be driver `sqlite` + const: sqlite + dsn: + type: string + description: DSN(Data Source Name) for the sqlite connection + required: + - driver + - dsn diff --git a/runtime/parser/old/explore-dashboards.schema.yaml b/runtime/parser/old/explore-dashboards.schema.yaml new file mode 100644 index 00000000000..ad6bc203752 --- /dev/null +++ b/runtime/parser/old/explore-dashboards.schema.yaml @@ -0,0 +1,290 @@ +$schema: 'http://json-schema.org/draft-07/schema#' +$id: explore-dashboards.schema.yaml +title: Explore Dashboard YAML +type: object +description: In your Rill project directory, create a explore dashboard, `.yaml`, file in the `dashboards` directory. Rill will ingest the dashboard definition next time you run `rill start`. +allOf: + - title: Properties + type: object + properties: + type: + type: string + const: explore + description: Refers to the resource type and must be `explore` + display_name: + type: string + description: Refers to the display name for the explore dashboard + metrics_view: + type: string + description: Refers to the metrics view resource + description: + type: string + description: Refers to the description of the explore dashboard + banner: + type: string + description: Refers to the custom banner displayed at the header of an explore dashboard + dimensions: + description: List of dimension names. Use '*' to select all dimensions (default) + $ref: '#/definitions/explore/definitions/field_selector_properties' + measures: + description: List of measure names. Use ''*'' to select all measures (default) + $ref: '#/definitions/explore/definitions/field_selector_properties' + theme: + oneOf: + - title: Existing theme + type: string + description: Name of an existing theme to apply to the dashboard + - title: Inline theme + type: object + description: Inline theme configuration. + description: Name of the theme to use. Only one of theme and embedded_theme can be set. + time_ranges: + type: array + description: Overrides the list of default time range selections available in the dropdown. It can be string or an object with a 'range' and optional 'comparison_offsets' + items: + $ref: '#/definitions/explore_time_range_properties' + time_zones: + type: array + description: Refers to the time zones that should be pinned to the top of the time zone selector. It should be a list of [IANA time zone identifiers](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones) + items: + type: string + lock_time_zone: + type: boolean + description: When true, the dashboard will be locked to the first time provided in the time_zones list. When no time_zones are provided, the dashboard will be locked to UTC + allow_custom_time_range: + type: boolean + description: Defaults to true, when set to false it will hide the ability to set a custom time range for the user. + defaults: + type: object + description: defines the defaults YAML struct + properties: + dimensions: + description: Provides the default dimensions to load on viewing the dashboard + $ref: '#/definitions/explore/definitions/field_selector_properties' + measures: + description: Provides the default measures to load on viewing the dashboard + $ref: '#/definitions/explore/definitions/field_selector_properties' + time_range: + description: Refers to the default time range shown when a user initially loads the dashboard. The value must be either a valid [ISO 8601 duration](https://en.wikipedia.org/wiki/ISO_8601#Durations) (for example, PT12H for 12 hours, P1M for 1 month, or P26W for 26 weeks) or one of the [Rill ISO 8601 extensions](https://docs.rilldata.com/reference/rill-iso-extensions#extensions) + type: string + comparison_mode: + description: 'Controls how to compare current data with historical or categorical baselines. Options: `none` (no comparison), `time` (compares with past based on default_time_range), `dimension` (compares based on comparison_dimension values)' + type: string + comparison_dimension: + description: 'for dimension mode, specify the comparison dimension by name' + type: string + additionalProperties: false + embeds: + type: object + description: Configuration options for embedded dashboard views + properties: + hide_pivot: + type: boolean + description: When true, hides the pivot table view in embedded mode + additionalProperties: false + security: + description: Security rules to apply for access to the explore dashboard + $ref: '#/definitions/security_policy_properties' + required: + - type + - $ref: '#/definitions/common_properties' +definitions: + explore_time_range_properties: + oneOf: + - type: string + description: a valid [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Durations) duration or one of the [Rill ISO 8601 extensions](https://docs.rilldata.com/reference/rill-iso-extensions#extensions) extensions for the selection + - type: object + description: Object containing time range and comparison configuration + properties: + range: + type: string + description: a valid [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Durations) duration or one of the [Rill ISO 8601 extensions](https://docs.rilldata.com/reference/rill-iso-extensions#extensions) extensions for the selection + comparison_offsets: + type: array + description: list of time comparison options for this time range selection (optional). Must be one of the [Rill ISO 8601 extensions](https://docs.rilldata.com/reference/rill-iso-extensions#extensions) + items: + oneOf: + - type: string + description: Offset string only (range is inferred) + - type: object + description: Object containing offset and range configuration for time comparison + properties: + offset: + type: string + description: Time offset for comparison (e.g., 'P1D' for one day ago) + range: + type: string + description: Custom time range for comparison period + additionalProperties: false + required: + - range + additionalProperties: false + security_policy_properties: + type: object + description: Defines security rules and access control policies for resources + properties: + access: + oneOf: + - type: string + description: SQL expression that evaluates to a boolean to determine access + - type: boolean + description: Direct boolean value to allow or deny access + description: Expression indicating if the user should be granted access to the dashboard. If not defined, it will resolve to false and the dashboard won't be accessible to anyone. Needs to be a valid SQL expression that evaluates to a boolean. + row_filter: + type: string + description: SQL expression to filter the underlying model by. Can leverage templated user attributes to customize the filter for the requesting user. Needs to be a valid SQL expression that can be injected into a WHERE clause + include: + type: array + description: List of dimension or measure names to include in the dashboard. If include is defined all other dimensions and measures are excluded + items: + type: object + properties: + if: + type: string + description: Expression to decide if the column should be included or not. It can leverage templated user attributes. Needs to be a valid SQL expression that evaluates to a boolean + names: + anyOf: + - type: array + description: List of specific field names to include + items: + type: string + - type: string + description: Wildcard '*' to include all fields + enum: + - '*' + description: List of fields to include. Should match the name of one of the dashboard's dimensions or measures + required: + - if + - names + exclude: + type: array + description: List of dimension or measure names to exclude from the dashboard. If exclude is defined all other dimensions and measures are included + items: + type: object + properties: + if: + type: string + description: Expression to decide if the column should be excluded or not. It can leverage templated user attributes. Needs to be a valid SQL expression that evaluates to a boolean + names: + anyOf: + - type: array + description: List of specific field names to exclude + items: + type: string + - type: string + description: Wildcard '*' to exclude all fields + enum: + - '*' + description: List of fields to exclude. Should match the name of one of the dashboard's dimensions or measures + required: + - if + - names + rules: + type: array + description: List of detailed security rules that can be used to define complex access control policies + items: + type: object + description: Individual security rule definition + properties: + type: + type: string + enum: + - access + - field_access + - row_filter + description: Type of security rule - access (overall access), field_access (field-level access), or row_filter (row-level filtering) + action: + type: string + enum: + - allow + - deny + description: Whether to allow or deny access for this rule + if: + type: string + description: Conditional expression that determines when this rule applies. Must be a valid SQL expression that evaluates to a boolean + names: + type: array + items: + type: string + description: List of field names this rule applies to (for field_access type rules) + all: + type: boolean + description: When true, applies the rule to all fields (for field_access type rules) + sql: + type: string + description: SQL expression for row filtering (for row_filter type rules) + required: + - type + common_properties: + type: object + title: "Common Properties" + properties: + name: + type: string + description: Name is usually inferred from the filename, but can be specified manually. + refs: + type: array + description: 'List of resource references' + items: + type: string + description: A string reference like `` or ``. + dev: + type: object + description: Overrides any properties in development environment. + prod: + type: object + description: Overrides any properties in production environment. + explore: + definitions: + field_selector_properties: + oneOf: + - title: Wildcard(*) selector + type: string + const: '*' + description: Wildcard(*) selector that includes all available fields in the selection + - title: Explicit list of fields + type: array + items: + type: string + description: Explicit list of fields to include in the selection + - title: Regex matching + type: object + description: 'Advanced matching using regex, DuckDB expression, or exclusion' + properties: + regex: + type: string + description: Select dimensions using a regular expression + expr: + type: string + description: DuckDB SQL expression to select fields based on custom logic + exclude: + type: object + description: Select all dimensions except those listed here + additionalProperties: false + oneOf: + - required: + - regex + - required: + - expr + - required: + - exclude + theme: + definitions: + theme_properties: + type: object + properties: + colors: + type: object + description: Used to override the dashboard colors. Either primary or secondary color must be provided. + properties: + primary: + type: string + description: Overrides the primary blue color in the dashboard. Can have any hex (without the '#' character), [named colors](https://www.w3.org/TR/css-color-4/#named-colors) or hsl() formats. Note that the hue of the input colors is used for variants but the saturation and lightness is copied over from the [blue color palette](https://tailwindcss.com/docs/customizing-colors). + secondary: + type: string + description: Overrides the secondary color in the dashboard. Applies to the loading spinner only as of now. Can have any hex (without the '#' character), [named colors](https://www.w3.org/TR/css-color-4/#named-colors) or hsl() formats. + anyOf: + - required: + - primary + - required: + - secondary \ No newline at end of file diff --git a/runtime/parser/old/metrics_views.schema.yaml b/runtime/parser/old/metrics_views.schema.yaml new file mode 100644 index 00000000000..3141247dcd1 --- /dev/null +++ b/runtime/parser/old/metrics_views.schema.yaml @@ -0,0 +1,395 @@ +$schema: 'http://json-schema.org/draft-07/schema#' +$id: metrics-views.schema.yaml +title: Metrics View YAML +type: object +description: In your Rill project directory, create a metrics view, `.yaml`, file in the `metrics` directory. Rill will ingest the metric view definition next time you run `rill start`. +allOf: + - title: Properties + type: object + properties: + version: + type: string + description: The version of the metrics view schema + type: + type: string + const: metrics_view + description: Refers to the resource type and must be `metrics_view` + display_name: + type: string + description: Refers to the display name for the metrics view + description: + type: string + description: Refers to the description for the metrics view + ai_instructions: + type: string + description: Extra instructions for AI agents. Used to guide natural language question answering and routing. + model: + type: string + description: Refers to the model powering the dashboard (either model or table is required) + database: + type: string + description: Refers to the database to use in the OLAP engine (to be used in conjunction with table). Otherwise, will use the default database or schema if not specified + database_schema: + type: string + description: Refers to the schema to use in the OLAP engine (to be used in conjunction with table). Otherwise, will use the default database or schema if not specified + table: + type: string + description: Refers to the table powering the dashboard, should be used instead of model for dashboards create from external OLAP tables (either table or model is required) + timeseries: + type: string + description: Refers to the timestamp column from your model that will underlie x-axis data in the line charts. If not specified, the line charts will not appear + watermark: + type: string + description: A SQL expression that tells us the max timestamp that the metrics are considered valid for. Usually does not need to be overwritten + smallest_time_grain: + type: string + description: 'Refers to the smallest time granularity the user is allowed to view. The valid values are: millisecond, second, minute, hour, day, week, month, quarter, year' + first_day_of_week: + type: integer + description: Refers to the first day of the week for time grain aggregation (for example, Sunday instead of Monday). The valid values are 1 through 7 where Monday=1 and Sunday=7 + first_month_of_year: + type: integer + description: Refers to the first month of the year for time grain aggregation. The valid values are 1 through 12 where January=1 and December=12 + dimensions: + type: array + description: Relates to exploring segments or dimensions of your data and filtering the dashboard + items: + type: object + properties: + name: + type: string + description: a stable identifier for the dimension + display_name: + type: string + description: a display name for your dimension + description: + type: string + description: a freeform text description of the dimension + column: + type: string + description: a categorical column + expression: + type: string + description: a non-aggregate expression such as string_split(domain, '.'). One of column and expression is required but cannot have both at the same time + unnest: + type: boolean + description: if true, allows multi-valued dimension to be unnested (such as lists) and filters will automatically switch to "contains" instead of exact match + uri: + type: + - string + - boolean + description: enable if your dimension is a clickable URL to enable single click navigation (boolean or valid SQL expression) + anyOf: + - required: + - column + - required: + - expression + measures: + type: array + description: Used to define the numeric aggregates of columns from your data model + items: + type: object + properties: + name: + type: string + description: a stable identifier for the measure + display_name: + type: string + description: the display name of your measure. + description: + type: string + description: a freeform text description of the dimension + type: + type: string + description: 'Measure calculation type: "simple" for basic aggregations, "derived" for calculations using other measures, or "time_comparison" for period-over-period analysis. Defaults to "simple" unless dependencies exist.' + expression: + type: string + description: a combination of operators and functions for aggregations + window: + description: A measure window can be defined as a keyword string (e.g. 'time' or 'all') or an object with detailed window configuration. For more information, see the [window functions](/build/metrics-view/advanced-expressions/windows) documentation. + anyOf: + - type: string + enum: + - time + - 'true' + - all + description: 'Shorthand: `time` or `true` means time-partitioned, `all` means non-partitioned.' + - type: object + description: 'Detailed window configuration for measure calculations, allowing control over partitioning, ordering, and frame definition.' + properties: + partition: + type: boolean + description: 'Controls whether the window is partitioned. When true, calculations are performed within each partition separately.' + order: + $ref: '#/definitions/metrics_view/definitions/field_selectors_properties' + description: 'Specifies the fields to order the window by, determining the sequence of rows within each partition.' + frame: + type: string + description: 'Defines the window frame boundaries for calculations, specifying which rows are included in the window relative to the current row.' + additionalProperties: false + per: + $ref: '#/definitions/metrics_view/definitions/field_selectors_properties' + description: for per dimensions + requires: + $ref: '#/definitions/metrics_view/definitions/field_selectors_properties' + description: using an available measure or dimension in your metrics view to set a required parameter, cannot be used with simple measures + format_preset: + type: string + description: | + Controls the formatting of this measure using a predefined preset. Measures cannot have both `format_preset` and `format_d3`. If neither is supplied, the measure will be formatted using the `humanize` preset by default. + + Available options: + - `humanize`: Round numbers into thousands (K), millions(M), billions (B), etc. + - `none`: Raw output. + - `currency_usd`: Round to 2 decimal points with a dollar sign ($). + - `currency_eur`: Round to 2 decimal points with a euro sign (€). + - `percentage`: Convert a rate into a percentage with a % sign. + - `interval_ms`: Convert milliseconds into human-readable durations like hours (h), days (d), years (y), etc. (optional) + format_d3: + type: string + description: 'Controls the formatting of this measure using a [d3-format](https://d3js.org/d3-format) string. If an invalid format string is supplied, the measure will fall back to `format_preset: humanize`. A measure cannot have both `format_preset` and `format_d3`. If neither is provided, the humanize preset is used by default. Example: `format_d3: ".2f"` formats using fixed-point notation with two decimal places. Example: `format_d3: ",.2r"` formats using grouped thousands with two significant digits. (optional)' + format_d3_locale: + type: object + description: locale configuration passed through to D3, enabling changing the currency symbol among other things. For details, see the docs for D3's [formatLocale](https://d3js.org/d3-format#formatLocale) + additionalProperties: true + valid_percent_of_total: + type: boolean + description: a boolean indicating whether percent-of-total values should be rendered for this measure + treat_nulls_as: + type: string + description: used to configure what value to fill in for missing time buckets. This also works generally as COALESCING over non empty time buckets. + minItems: 1 + + annotations: + type: array + description: Used to define annotations that can be displayed on charts + items: + type: object + properties: + name: + type: string + description: A stable identifier for the annotation. Defaults to model or table names when not specified + model: + type: string + description: Refers to the model powering the annotation (either table or model is required). The model must have 'time' and 'description' columns. Optional columns include 'time_end' for range annotations and 'grain' to specify when the annotation should appear based on dashboard grain level. + database: + type: string + description: Refers to the database to use in the OLAP engine (to be used in conjunction with table). Otherwise, will use the default database or schema if not specified + database_schema: + type: string + description: Refers to the schema to use in the OLAP engine (to be used in conjunction with table). Otherwise, will use the default database or schema if not specified + table: + type: string + description: Refers to the table powering the annotation, should be used instead of model for annotations from external OLAP tables (either table or model is required) + connector: + type: string + description: Refers to the connector to use for the annotation + measures: + description: Specifies which measures to apply the annotation to. Applies to all measures if not specified + anyOf: + - type: string + description: Simple field name as a string. + - type: array + description: List of field selectors, each can be a string or an object with detailed configuration. + items: + anyOf: + - type: string + description: Shorthand field selector, interpreted as the name. + - type: object + description: Detailed field selector configuration with name and optional time grain. + properties: + name: + type: string + description: Name of the field to select. + time_grain: + type: string + description: Time grain for time-based dimensions. + enum: + - '' + - ms + - millisecond + - s + - second + - min + - minute + - h + - hour + - d + - day + - w + - week + - month + - q + - quarter + - 'y' + - year + required: + - name + additionalProperties: false + security: + $ref: '#/definitions/security_policy_properties' + description: Defines a security policy for the dashboard + required: + - type + - version + - $ref: '#/definitions/common_properties' +definitions: + security_policy_properties: + type: object + description: Defines security rules and access control policies for resources + properties: + access: + oneOf: + - type: string + description: SQL expression that evaluates to a boolean to determine access + - type: boolean + description: Direct boolean value to allow or deny access + description: Expression indicating if the user should be granted access to the dashboard. If not defined, it will resolve to false and the dashboard won't be accessible to anyone. Needs to be a valid SQL expression that evaluates to a boolean. + row_filter: + type: string + description: SQL expression to filter the underlying model by. Can leverage templated user attributes to customize the filter for the requesting user. Needs to be a valid SQL expression that can be injected into a WHERE clause + include: + type: array + description: List of dimension or measure names to include in the dashboard. If include is defined all other dimensions and measures are excluded + items: + type: object + properties: + if: + type: string + description: Expression to decide if the column should be included or not. It can leverage templated user attributes. Needs to be a valid SQL expression that evaluates to a boolean + names: + anyOf: + - type: array + description: List of specific field names to include + items: + type: string + - type: string + description: Wildcard '*' to include all fields + enum: + - '*' + description: List of fields to include. Should match the name of one of the dashboard's dimensions or measures + required: + - if + - names + exclude: + type: array + description: List of dimension or measure names to exclude from the dashboard. If exclude is defined all other dimensions and measures are included + items: + type: object + properties: + if: + type: string + description: Expression to decide if the column should be excluded or not. It can leverage templated user attributes. Needs to be a valid SQL expression that evaluates to a boolean + names: + anyOf: + - type: array + description: List of specific field names to exclude + items: + type: string + - type: string + description: Wildcard '*' to exclude all fields + enum: + - '*' + description: List of fields to exclude. Should match the name of one of the dashboard's dimensions or measures + required: + - if + - names + rules: + type: array + description: List of detailed security rules that can be used to define complex access control policies + items: + type: object + description: Individual security rule definition + properties: + type: + type: string + enum: + - access + - field_access + - row_filter + description: Type of security rule - access (overall access), field_access (field-level access), or row_filter (row-level filtering) + action: + type: string + enum: + - allow + - deny + description: Whether to allow or deny access for this rule + if: + type: string + description: Conditional expression that determines when this rule applies. Must be a valid SQL expression that evaluates to a boolean + names: + type: array + items: + type: string + description: List of field names this rule applies to (for field_access type rules) + all: + type: boolean + description: When true, applies the rule to all fields (for field_access type rules) + sql: + type: string + description: SQL expression for row filtering (for row_filter type rules) + required: + - type + common_properties: + type: object + title: "Common Properties" + properties: + name: + type: string + description: Name is usually inferred from the filename, but can be specified manually. + refs: + type: array + description: 'List of resource references' + items: + type: string + description: A string reference like `` or ``. + dev: + type: object + description: Overrides any properties in development environment. + prod: + type: object + description: Overrides any properties in production environment. + metrics_view: + definitions: + field_selectors_properties: + anyOf: + - type: string + description: 'Simple field name as a string.' + - type: array + description: 'List of field selectors, each can be a string or an object with detailed configuration.' + items: + anyOf: + - type: string + description: 'Shorthand field selector, interpreted as the name.' + - type: object + description: 'Detailed field selector configuration with name and optional time grain.' + properties: + name: + type: string + description: 'Name of the field to select.' + time_grain: + type: string + description: 'Time grain for time-based dimensions.' + enum: + - '' + - ms + - millisecond + - s + - second + - min + - minute + - h + - hour + - d + - day + - w + - week + - month + - q + - quarter + - 'y' + - year + required: + - name + additionalProperties: false + minItems: 1 \ No newline at end of file diff --git a/runtime/parser/old/models.schema.yaml b/runtime/parser/old/models.schema.yaml new file mode 100644 index 00000000000..6ac4c160371 --- /dev/null +++ b/runtime/parser/old/models.schema.yaml @@ -0,0 +1,52 @@ +$schema: 'http://json-schema.org/draft-07/schema#' +$id: models.schema.yaml +title: Model SQL +type: object +description: | + When using Rill Developer, data transformations are powered by DuckDB and their dialect of SQL. Under the hood, by default, data models are created as views in DuckDB. Please check our modeling page and DuckDB documentation for more details about how to construct and write your model SQL syntax. + + In your Rill project directory, you can also create a `.sql` file containing an appropriate DuckDB `SELECT` statement, most commonly within the default `models` directory, to represent a model (or set of SQL transformations). Rill will automatically detect and parse the model next time you run `rill start`. + + ### Annotating your models with properties + In most cases, objects are represented in Rill as YAML files. Models are unique in that any model.sql file can be considered a model resource in Rill, representing a SQL transformation that you would like to inform using a set of inputs and outputting a view or table (depending on the materialization type). For most other resources, available properties can be set directly via the corresponding YAML file. In the case of a model SQL file though, configurable properties should be set by annotating the top of the file using the following syntax: + ```sql + -- @property: value + ``` + We will cover different available configurable properties in the below sections. + +allOf: + - title: Properties + type: object + properties: + type: + type: string + const: model + description: | + By default, any new model that is created in a Rill project will populate a corresponding .sql file representing the model. Similarly, a .sql file that is directly created in the project directory will also be automatically assumed by Rill to be a model by default. Therefore, it is not necessary to annotate the model resource with the type property. + + For consistency or documentation purposes, if you'd like to annotate your model resource as well with the type property, you can do so by adding the following to the top of your model_name.sql: + ```sql + -- @type: model + ``` + materialize: + type: boolean + description: | + As mentioned, models will be materialized in DuckDB as views by default. However, you can choose to materialize them as tables instead of views. To do this, you can add the following annotation to the top of your model SQL file: + ```sql + -- @materialize: true + ``` + + Alternatively, it is possible to set it as a project-wide default as well that your models inherit via your rill.yaml file: + ```yaml + models: + materialize: true + ``` + + :::info To materialize or not to materialize? + + There are both pros and cons to materializing your models. + - Pros can include improved performance for downstream models and dashboards, especially with the SQL is complex and/or the data size is large. We generally recommend _materializing_ final models that power dashboards. + - Cons can include a degraded keystroke-by-keystroke modeling experience or for specific edge cases, such as when using cross joins. + + If unsure, we would generally recommend leaving the defaults and/or reaching out for further guidance! + ::: diff --git a/runtime/parser/schema/rillyaml.schema.yaml b/runtime/parser/old/rillyaml.schema.yaml similarity index 95% rename from runtime/parser/schema/rillyaml.schema.yaml rename to runtime/parser/old/rillyaml.schema.yaml index 8c5a1fd59ae..d8f8878372b 100644 --- a/runtime/parser/schema/rillyaml.schema.yaml +++ b/runtime/parser/old/rillyaml.schema.yaml @@ -38,7 +38,7 @@ allOf: - title: Project-wide defaults type: object description: | - In `rill.yaml`, project-wide defaults can be specified for a resource type within a project. Unless otherwise specified, _individual resources will inherit any defaults_ that have been specified in `rill.yaml`. For available properties that can be configured, please refer to the YAML specification for each individual resource type - [model](model.md), [metrics_view](metrics-view.md), and [explore](explore.md) + In `rill.yaml`, project-wide defaults can be specified for a resource type within a project. Unless otherwise specified, _individual resources will inherit any defaults_ that have been specified in `rill.yaml`. For available properties that can be configured, please refer to the YAML specification for each individual resource type - [model](advanced-models.md), [metrics_view](metrics-views.md), and [explore](explore-dashboards.md) :::note Use plurals when specifying project-wide defaults In your `rill.yaml`, the top level property for the resource type needs to be **plural**, such as `models`, `metrics_views` and `explores`. @@ -46,7 +46,7 @@ allOf: :::info Hierarchy of inheritance and property overrides As a general rule of thumb, properties that have been specified at a more _granular_ level will supercede or override higher level properties that have been inherited. Therefore, in order of inheritance, Rill will prioritize properties in the following order: - 1. Individual [models](model.md)/[metrics_views](metrics-view.md)/[explore](explore.md) object level properties (e.g. `model.yaml` or `explore.yaml`) + 1. Individual [models](advanced-models.md)/[metrics_views](metrics-views.md)/[explore](explore-dashboards.md) object level properties (e.g. `advanced-models.yaml` or `explore-dashboards.yaml`) 2. [Environment](/docs/build/models/environments.md) level properties (e.g. a specific property that have been set for `dev`) 3. [Project-wide defaults](#project-wide-defaults) for a specific property and resource type ::: @@ -165,6 +165,11 @@ allOf: groups: - partners - email: anon@unknown.com + - email: embed@rilldata.com + name: embed + custom_variable_1: Value_1 + custom_variable_2: Value_2 + items: type: object properties: diff --git a/runtime/parser/old/sources.schema.yaml b/runtime/parser/old/sources.schema.yaml new file mode 100644 index 00000000000..9995f8601f0 --- /dev/null +++ b/runtime/parser/old/sources.schema.yaml @@ -0,0 +1,98 @@ +$schema: 'http://json-schema.org/draft-07/schema#' +$id: sources.schema.yaml +title: Source YAML +type: object +description: | + :::warning Deprecated Feature + **Sources have been deprecated** and are now considered "source models." While sources remain backward compatible, we recommend migrating to the new source model format for access to the latest features and improvements. + + **Next steps:** + - Continue using sources if needed (backward compatible) + - Migrate to source models via the `type:model` parameter for existing projects + - See our [model YAML reference](advanced-models) for current documentation and best practices + ::: +allOf: + - title: Properties + type: object + properties: + type: + type: string + const: model + description: Refers to the resource type and must be `model` + connector: + type: string + description: Refers to the connector type for the source, see [connectors](/reference/project-files/connectors) for more information + enum: + - https + - s3 + - gcs + - local_file + - motherduck + - athena + - redshift + - postgres + - sqlite + - snowflake + - bigquery + - duckdb + uri: + type: string + description: | + Refers to the URI of the remote connector you are using for the source. Rill also supports glob patterns as part of the URI for S3 and GCS (required for type: http, s3, gcs). + + - `s3://your-org/bucket/file.parquet` — the s3 URI of your file + - `gs://your-org/bucket/file.parquet` — the gsutil URI of your file + - `https://data.example.org/path/to/file.parquet` — the web address of your file + path: + type: string + description: Refers to the local path of the connector you are using for the source + sql: + type: string + description: Sets the SQL query to extract data from a SQL source + region: + type: string + description: Sets the cloud region of the S3 bucket or Athena + endpoint: + type: string + description: Overrides the S3 endpoint to connect to + output_location: + type: string + description: Sets the query output location and result files in Athena + workgroup: + type: string + description: Sets a workgroup for Athena connector + project_id: + type: string + description: Sets a project id to be used to run BigQuery jobs + timeout: + type: string + description: The maximum time to wait for source ingestion + refresh: + type: object + description: | + Specifies the refresh schedule that Rill should follow to re-ingest and update the underlying source data (optional). + - **cron** - a cron schedule expression, which should be encapsulated in single quotes, e.g. `* * * * *` (optional) + - **every** - a Go duration string, such as `24h` (optional) + properties: + cron: + type: string + description: A cron schedule expression, which should be encapsulated in single quotes, e.g. `* * * * *` + every: + type: string + description: A Go duration string, such as `24h` + db: + type: string + description: Sets the database for motherduck connections and/or the path to the DuckDB/SQLite db file + database_url: + type: string + description: Postgres connection string that should be used + duckdb: + type: object + description: Specifies the raw parameters to inject into the DuckDB read_csv, read_json or read_parquet statement + additionalProperties: true + dsn: + type: string + description: Used to set the Snowflake connection string + required: + - type + - connector \ No newline at end of file diff --git a/runtime/parser/old/themes.schema.yaml b/runtime/parser/old/themes.schema.yaml new file mode 100644 index 00000000000..f1b8788b925 --- /dev/null +++ b/runtime/parser/old/themes.schema.yaml @@ -0,0 +1,68 @@ +$schema: 'http://json-schema.org/draft-07/schema#' +$id: themes.schema.yaml +title: Theme YAML +type: object +description: | + In your Rill project directory, create a `.yaml` file in any directory containing `type: theme`. Rill will automatically ingest the theme next time you run `rill start` or deploy to Rill Cloud. + + To apply that theme to a dashboard, add `default_theme: ` to the yaml file for that dashboard. Alternatively, you can add this to the end of the URL in your browser: `?theme=` +examples: + - # Example: You can copy this directly into your .yaml file + type: theme + colors: + primary: plum + secondary: violet +allOf: + - title: Properties + type: object + properties: + type: + type: string + const: theme + description: Refers to the resource type and must be `theme` + required: + - type + - $ref: '#/definitions/theme/definitions/theme_properties' + required: + - colors + - $ref: '#/definitions/common_properties' +definitions: + theme: + definitions: + theme_properties: + type: object + properties: + colors: + type: object + description: Used to override the dashboard colors. Either primary or secondary color must be provided. + properties: + primary: + type: string + description: Overrides the primary blue color in the dashboard. Can have any hex (without the '#' character), [named colors](https://www.w3.org/TR/css-color-4/#named-colors) or hsl() formats. Note that the hue of the input colors is used for variants but the saturation and lightness is copied over from the [blue color palette](https://tailwindcss.com/docs/customizing-colors). + secondary: + type: string + description: Overrides the secondary color in the dashboard. Applies to the loading spinner only as of now. Can have any hex (without the '#' character), [named colors](https://www.w3.org/TR/css-color-4/#named-colors) or hsl() formats. + anyOf: + - required: + - primary + - required: + - secondary + common_properties: + type: object + title: "Common Properties" + properties: + name: + type: string + description: Name is usually inferred from the filename, but can be specified manually. + refs: + type: array + description: 'List of resource references' + items: + type: string + description: A string reference like `` or ``. + dev: + type: object + description: Overrides any properties in development environment. + prod: + type: object + description: Overrides any properties in production environment. \ No newline at end of file diff --git a/runtime/parser/schema/project.schema.yaml b/runtime/parser/schema/project.schema.yaml index f3aade1b99d..8cfc4c8bf51 100644 --- a/runtime/parser/schema/project.schema.yaml +++ b/runtime/parser/schema/project.schema.yaml @@ -7,7 +7,7 @@ description: | :::info Working with resources outside their native folders - It is possible to define resources (such as [models](model.md), [metrics-views](metrics-view.md), [dashboards](explore.md), [custom APIs](api.md), or [themes](theme.md)) within any nested folder within your Rill project directory. However, for any YAML configuration file, it is then imperative that the `type` property is then appropriately defined within the underlying resource configuration or Rill will not able to resolve the resource type correctly! + It is possible to define resources (such as [models](advanced-models.md), [metrics-views](metrics-views.md), [dashboards](explore-dashboards.md), [custom APIs](apis.md), or [themes](themes.md)) within any nested folder within your Rill project directory. However, for any YAML configuration file, it is then imperative that the `type` property is then appropriately defined within the underlying resource configuration or Rill will not able to resolve the resource type correctly! ::: @@ -21,1053 +21,496 @@ description: | ::: oneOf: - - $ref: '#/definitions/alert' - - $ref: '#/definitions/api' - - $ref: '#/definitions/canvas' - - $ref: '#/definitions/component' - - $ref: '#/definitions/connector' - - $ref: '#/definitions/explore' - - $ref: '#/definitions/metrics_view' - - $ref: '#/definitions/model' - - $ref: '#/definitions/theme' -definitions: - alert: - type: object - title: Alert YAML - description: Along with alertings at the dashboard level and can be created via the UI, there might be more extensive alerting that you might want to develop and can be done so the an alert.yaml. When creating an alert via a YAML file, you'll see this denoted in the UI as `Created through code`. - examples: - - # Example: To send alert when data lags by more than 1 day to slack channel #rill-cloud-alerts - type: alert - display_name: Data lags by more than 1 day - # Check the alert every hour. - refresh: - cron: 0 * * * * - # Query that returns non-empty results if the metrics lag by more than 1 day. - data: - sql: |- - SELECT * - FROM - ( - SELECT MAX(event_time) AS max_time - FROM rill_metrics_model - ) - WHERE max_time < NOW() - INTERVAL '1 day' - # Send notifications in Slack. - notify: - slack: - channels: - - '#rill-cloud-alerts' - allOf: - - title: Properties - type: object - properties: - type: - type: string - const: alert - description: Refers to the resource type and must be `alert` - display_name: - type: string - description: Refers to the display name for the alert - refresh: - $ref: '#/definitions/schedule_properties' - description: Specifies the refresh schedule that Rill should follow to re-ingest and update the underlying data - intervals: - type: object - description: define the interval of the alert to check - properties: - duration: - type: string - description: a valid ISO8601 duration to define the interval duration - limit: - type: integer - description: maximum number of intervals to check for on invocation - minimum: 0 - check_unclosed: - type: boolean - description: 'boolean, whether unclosed intervals should be checked' - watermark: - type: string - enum: - - trigger_time - - inherit - description: Specifies how the watermark is determined for incremental processing. Use 'trigger_time' to set it at runtime or 'inherit' to use the upstream model's watermark. - timeout: - type: string - description: define the timeout of the alert in seconds (optional). - data: - description: Specifies one of the options to retrieve or compute the data used by alert - $ref: '#/definitions/data_properties' - for: - description: "Specifies how user identity or attributes should be evaluated for security policy enforcement." - oneOf: - - type: object - description: Specifies a unique user identifier for applying security policies. - properties: - user_id: - type: string - description: "The unique user ID used to evaluate security policies." - required: - - user_id - additionalProperties: false - - type: object - description: Specifies a user's email address for applying security policies. - properties: - user_email: - type: string - description: "The user's email address used to evaluate security policies." - format: email - required: - - user_email - additionalProperties: false - - type: object - description: Specifies a set of arbitrary user attributes for applying security policies. - properties: - attributes: - type: object - description: A dictionary of user attributes used to evaluate security policies. - additionalProperties: true - required: - - attributes - additionalProperties: false - on_recover: - type: boolean - description: Send an alert when a previously failing alert recovers. Defaults to false. - on_fail: - type: boolean - description: Send an alert when a failure occurs. Defaults to true. - on_error: - type: boolean - description: Send an alert when an error occurs during evaluation. Defaults to false. - renotify: - type: boolean - description: Enable repeated notifications for unresolved alerts. Defaults to false. - renotify_after: - type: string - description: Defines the re-notification interval for the alert (e.g., '10m','24h'), equivalent to snooze duration in UI, defaults to 'Off' - notify: - $ref: '#/definitions/notify_properties' - description: Defines how and where to send notifications. At least one method (email or Slack) is required. - annotations: - type: object - description: Key value pair used for annotations - additionalProperties: - type: string - required: - - type - - refresh - - data - - notify - - $ref: '#/definitions/common_properties' - api: - type: object - title: API YAML - description: In your Rill project directory, create a new file name `.yaml` in the `apis` directory containing a custom API definition. See comprehensive documentation on how to define and use [custom APIs](/integrate/custom-apis/index.md) - examples: - - # Example: This api returns the top 10 authors by net line changes since the specified date provided in the arguments. - type: api - name: metrics_view_api - metrics_sql: |- - SELECT author_name, net_line_changes - FROM advanced_metrics_view - where author_date > '{{ .args.date }}' - order by net_line_changes DESC - limit 10 - allOf: - - title: Properties - type: object - properties: - type: - type: string - const: api - description: Refers to the resource type and must be `api` - openapi: - type: object - description: OpenAPI specification for the API endpoint - properties: - summary: - type: string - description: A brief description of what the API endpoint does - parameters: - type: array - description: List of parameters that the API endpoint accepts - items: - type: object - additionalProperties: true - request_schema: - type: object - description: JSON schema for the request body (use nested YAML instead of a JSON string) - additionalProperties: true - response_schema: - type: object - description: JSON schema for the response body (use nested YAML instead of a JSON string) - additionalProperties: true - security: - $ref: '#/definitions/security_policy_properties' - skip_nested_security: - type: boolean - description: Flag to control security inheritance - allOf: - - $ref: '#/definitions/data_properties' - required: - - type - canvas: - type: object - title: Canvas YAML - description: In your Rill project directory, create a explore dashboard, `.yaml`, file in the `dashboards` directory. Rill will ingest the dashboard definition next time you run `rill start`. - allOf: - - title: Properties - type: object - properties: - type: - type: string - const: canvas - description: Refers to the resource type and must be `canvas` - display_name: - type: string - description: Refers to the display name for the canvas - banner: - type: string - description: Refers to the custom banner displayed at the header of an Canvas dashboard - max_width: - type: integer - description: Max width in pixels of the canvas - minimum: 0 - gap_x: - type: integer - description: Horizontal gap in pixels of the canvas - minimum: 0 - gap_y: - type: integer - description: Vertical gap in pixels of the canvas - minimum: 0 - theme: - oneOf: - - type: string - description: Name of an existing theme to apply to the dashboard - - $ref: '#/definitions/theme/definitions/theme_properties' - description: Inline theme configuration. - description: Name of the theme to use or define a theme inline. Either theme name or inline theme can be set. - allow_custom_time_range: - type: boolean - description: Defaults to true, when set to false it will hide the ability to set a custom time range for the user. - time_ranges: - type: array - description: Overrides the list of default time range selections available in the dropdown. It can be string or an object with a 'range' and optional 'comparison_offsets' - items: - $ref: '#/definitions/explore_time_range_properties' - time_zones: - type: array - description: Refers to the time zones that should be pinned to the top of the time zone selector. It should be a list of [IANA time zone identifiers](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones) - items: - type: string - filters: - type: object - description: Indicates if filters should be enabled for the canvas. - properties: - enable: - type: boolean - description: Toggles filtering functionality for the canvas dashboard. - additionalProperties: false - defaults: - type: object - description: Preset UI state to show by default - properties: - time_range: - type: string - description: Default time range to display when the dashboard loads - comparison_mode: - type: string - description: Default comparison mode for metrics (none, time, or dimension) - comparison_dimension: - type: string - description: Default dimension to use for comparison when comparison_mode is 'dimension' - additionalProperties: false - variables: - type: array - description: Variables that can be used in the canvas - items: - $ref: '#/definitions/component_variable_properties' - rows: - type: array - description: Refers to all of the rows displayed on the Canvas - items: - type: object - properties: - height: - type: string - description: Height of the row in px - items: - type: array - description: List of components to display in the row - items: - type: object - properties: - component: - type: string - description: Name of the component to display - width: - type: - - string - - integer - description: Width of the component (can be a number or string with unit) - additionalProperties: true - additionalProperties: false - security: - $ref: '#/definitions/security_policy_properties' - description: Security rules to apply for access to the canvas - required: - - type - - rows - - $ref: '#/definitions/common_properties' - component: - type: object - title: Component YAML - description: Defines a reusable dashboard component that can be embedded in canvas dashboards - allOf: - - title: Properties - type: object - properties: - type: - type: string - const: component - description: Refers to the resource type and must be `component` - display_name: - type: string - description: Refers to the display name for the component - description: - type: string - description: Detailed description of the component's purpose and functionality - input: - type: array - description: List of input variables that can be passed to the component - items: - $ref: '#/definitions/component_variable_properties' - output: - description: Output variable that the component produces - $ref: '#/definitions/component_variable_properties' - required: - - type - - $ref: '#/definitions/common_properties' - connector: - type: object - title: Connector YAML - description: | - When you add olap_connector to your rill.yaml file, you will need to set up a `.yaml` file in the 'connectors' directory. This file requires the following parameters,type and driver (see below for more parameter options). Rill will automatically test the connectivity to the OLAP engine upon saving the file. This can be viewed in the connectors tab in the UI. - - :::tip Did you know? - - Starting from Rill 0.46, you can directly create OLAP engines from the UI! Select + Add -> Data -> Connect an OLAP engine - - ::: - allOf: - - title: Properties - type: object - properties: - type: - type: string - const: connector - description: Refers to the resource type and must be `connector` - required: - - type - - $ref: '#/definitions/common_properties' - - oneOf: - - $ref: '#/definitions/connector/definitions/athena' - - $ref: '#/definitions/connector/definitions/azure' - - $ref: '#/definitions/connector/definitions/bigquery' - - $ref: '#/definitions/connector/definitions/clickhouse' - - $ref: '#/definitions/connector/definitions/druid' - - $ref: '#/definitions/connector/definitions/duckdb' - - $ref: '#/definitions/connector/definitions/gcs' - - $ref: '#/definitions/connector/definitions/https' - - $ref: '#/definitions/connector/definitions/local_file' - - $ref: '#/definitions/connector/definitions/motherduck' - - $ref: '#/definitions/connector/definitions/mysql' - - $ref: '#/definitions/connector/definitions/pinot' - - $ref: '#/definitions/connector/definitions/postgres' - - $ref: '#/definitions/connector/definitions/redshift' - - $ref: '#/definitions/connector/definitions/s3' - - $ref: '#/definitions/connector/definitions/salesforce' - - $ref: '#/definitions/connector/definitions/slack' - - $ref: '#/definitions/connector/definitions/snowflake' - - $ref: '#/definitions/connector/definitions/sqlite' - definitions: - athena: - type: object - title: athena - description: Configuration properties specific to the athena - properties: - driver: - type: string - description: Refers to the driver type and must be driver `athena` - const: athena - aws_access_key_id: - type: string - description: AWS Access Key ID used for authentication. Required when using static credentials directly or as base credentials for assuming a role. - aws_secret_access_key: - type: string - description: AWS Secret Access Key paired with the Access Key ID. Required when using static credentials directly or as base credentials for assuming a role. - aws_access_token: - type: string - description: AWS session token used with temporary credentials. Required only if the Access Key and Secret Key are part of a temporary session credentials. - role_arn: - type: string - description: ARN of the IAM role to assume. When specified, the SDK uses the base credentials to call STS AssumeRole and obtain temporary credentials scoped to this role. - role_session_name: - type: string - description: Session name to associate with the STS AssumeRole session. Used only if 'role_arn' is specified. Useful for identifying and auditing the session. - external_id: - type: string - description: External ID required by some roles when assuming them, typically for cross-account access. Used only if 'role_arn' is specified and the role's trust policy requires it. - workgroup: - type: string - description: Athena workgroup to use for query execution. Defaults to 'primary' if not specified. - output_location: - type: string - description: S3 URI where Athena query results should be stored (e.g., s3://your-bucket/athena/results/). Optional if the selected workgroup has a default result configuration. - aws_region: - type: string - description: AWS region where Athena and the result S3 bucket are located (e.g., us-east-1). Defaults to 'us-east-1' if not specified. - allow_host_access: - type: boolean - description: Allow the Athena client to access host environment configurations such as environment variables or local AWS credential files. Defaults to true, enabling use of credentials and settings from the host environment unless explicitly disabled. - required: - - driver - azure: - type: object - title: azure - description: Configuration properties specific to the azure - properties: - driver: - type: string - description: Refers to the driver type and must be driver `azure` - const: azure - azure_storage_account: - type: string - description: Azure storage account name - azure_storage_key: - type: string - description: Azure storage access key - azure_storage_sas_token: - type: string - description: Optional azure SAS token for authentication - azure_storage_connection_string: - type: string - description: Optional azure connection string for storage account - azure_storage_bucket: - type: string - description: Name of the Azure Blob Storage container (equivalent to an S3 bucket) - allow_host_access: - type: boolean - description: Allow access to host environment configuration - required: - - driver - - azure_storage_bucket - bigquery: - type: object - title: bigquery - description: Configuration properties specific to the bigquery - properties: - driver: - type: string - description: Refers to the driver type and must be driver `bigquery` - const: bigquery - google_application_credentials: - type: string - description: Raw contents of the Google Cloud service account key (in JSON format) used for authentication. - project_id: - type: string - description: ID of the Google Cloud project to use for BigQuery operations. This can be omitted only if the project ID is included in the service account key. - allow_host_access: - type: boolean - description: Enable the BigQuery client to use credentials from the host environment when no service account JSON is provided. This includes Application Default Credentials from environment variables, local credential files, or the Google Compute Engine metadata server. Defaults to true, allowing seamless authentication in GCP environments. - required: - - driver - clickhouse: - type: object - title: clickhouse - description: Configuration properties specific to the clickhouse - properties: - driver: - type: string - description: Refers to the driver type and must be driver `clickhouse` - const: clickhouse - managed: - type: boolean - description: '`true` means Rill will provision the connector using the default provisioner. `false` disables automatic provisioning.' - mode: - type: string - description: "`read` - Controls the operation mode for the ClickHouse connection. Defaults to 'read' for safe operation with external databases. Set to 'readwrite' to enable model creation and table mutations. Note: When 'managed: true', this is automatically set to 'readwrite'." - dsn: - type: string - description: DSN(Data Source Name) for the ClickHouse connection - username: - type: string - description: Username for authentication - password: - type: string - description: Password for authentication - host: - type: string - description: Host where the ClickHouse instance is running - port: - type: integer - description: Port where the ClickHouse instance is accessible - database: - type: string - description: Name of the ClickHouse database within the cluster - ssl: - type: boolean - description: Indicates whether a secured SSL connection is required - cluster: - type: string - description: 'Cluster name, required for running distributed queries' - log_queries: - type: boolean - description: Controls whether to log raw SQL queries - settings_override: - type: string - description: override the default settings used in queries. example `readonly = 1, session_timezone = 'UTC'` - embed_port: - type: integer - description: Port to run ClickHouse locally (0 for random port) - can_scale_to_zero: - type: boolean - description: Indicates if the database can scale to zero - max_open_conns: - type: integer - description: Maximum number of open connections to the database - max_idle_conns: - type: integer - description: Maximum number of idle connections in the pool - dial_timeout: - type: string - description: Timeout for dialing the ClickHouse server - conn_max_lifetime: - type: string - description: Maximum time a connection may be reused - read_timeout: - type: string - description: Maximum time for a connection to read data - required: - - driver - druid: - type: object - title: druid - description: Configuration properties specific to the druid - properties: - driver: - type: string - description: Refers to the driver type and must be driver `druid` - const: druid - dsn: - type: string - description: Data Source Name (DSN) for connecting to Druid - username: - type: string - description: Username for authenticating with Druid - password: - type: string - description: Password for authenticating with Druid - host: - type: string - description: Hostname of the Druid coordinator or broker - port: - type: integer - description: Port number of the Druid service - ssl: - type: boolean - description: Enable SSL for secure connection - log_queries: - type: boolean - description: Log raw SQL queries sent to Druid - max_open_conns: - type: integer - description: Maximum number of open database connections (0 = default, -1 = unlimited) - skip_version_check: - type: boolean - description: Skip checking Druid version compatibility - required: - - driver - - dsn - duckdb: - type: object - title: duckdb - description: Configuration properties specific to the duckdb - properties: - driver: - type: string - description: Refers to the driver type and must be driver `duckdb` - const: duckdb - pool_size: - type: integer - description: Number of concurrent connections and queries allowed - allow_host_access: - type: boolean - description: Whether access to the local environment and file system is allowed - cpu: - type: integer - description: Number of CPU cores available to the database - memory_limit_gb: - type: integer - description: Amount of memory in GB available to the database - read_write_ratio: - type: number - description: Ratio of resources allocated to the read database; used to divide CPU and memory - init_sql: - type: string - description: is executed during database initialization. - conn_init_sql: - type: string - description: is executed when a new connection is initialized. - secrets: - type: string - description: Comma-separated list of other connector names to create temporary secrets for in DuckDB before executing a model. - log_queries: - type: boolean - description: Whether to log raw SQL queries executed through OLAP - required: - - driver - gcs: - type: object - title: gcs - description: Configuration properties specific to the gcs - properties: - driver: - type: string - description: Refers to the driver type and must be driver `gcs` - const: gcs - google_application_credentials: - type: string - description: Google Cloud credentials JSON string - bucket: - type: string - description: Name of gcs bucket - allow_host_access: - type: boolean - description: Allow access to host environment configuration - key_id: - type: string - description: Optional S3-compatible Key ID when used in compatibility mode - secret: - type: string - description: Optional S3-compatible Secret when used in compatibility mode - required: - - driver - - bucket - https: - type: object - title: https - description: Configuration properties specific to the https - properties: - driver: - type: string - description: Refers to the driver type and must be driver `https` - const: https - path: - type: string - description: The full HTTPS URI to fetch data from - headers: - type: object - description: HTTP headers to include in the request - additionalProperties: - type: string - required: - - driver - - path - local_file: - type: object - title: local_file - description: Configuration properties specific to the local_file - properties: - driver: - type: string - description: Refers to the driver type and must be driver `local_file` - const: local_file - dsn: - type: string - description: Data Source Name (DSN) indicating the file path or location of the local file - allow_host_access: - type: boolean - description: Flag to indicate if access to host-level file paths is permitted - required: - - driver - - dsn - motherduck: - type: object - title: motherduck - description: Configuration properties specific to the motherduck - properties: - driver: - type: string - description: Refers to the driver type and must be driver `motherduck` - const: motherduck - dsn: - type: string - description: Data Source Name (DSN) specifying the MotherDuck connection endpoint - token: - type: string - description: Authentication token for accessing MotherDuck (secret) - required: - - driver - - dsn - - token - mysql: - type: object - title: mysql - description: Configuration properties specific to the mysql - properties: - driver: - type: string - description: Refers to the driver type and must be driver `mysql` - const: mysql - dsn: - type: string - description: DSN(Data Source Name) for the mysql connection - host: - type: string - description: Hostname of the MySQL server - port: - type: integer - description: Port number for the MySQL server - database: - type: string - description: Name of the MySQL database - user: - type: string - description: Username for authentication - password: - type: string - description: Password for authentication - ssl_mode: - type: string - description: SSL mode can be DISABLED, PREFERRED or REQUIRED - required: - - driver - pinot: + - $ref: '#/definitions/connectors' + - $ref: '#/definitions/sources' + - $ref: '#/definitions/models' + - $ref: '#/definitions/advanced-models' + - $ref: '#/definitions/metrics-views' + - $ref: '#/definitions/canvas-dashboards' + - $ref: '#/definitions/explore-dashboards' + - $ref: '#/definitions/alerts' + - $ref: '#/definitions/apis' + - $ref: '#/definitions/themes' + - $ref: '#/definitions/rillyaml' + +definitions: + # Connector YAML + connectors: + title: Connector YAML + id: connectors + type: object + description: | + Connector YAML files define how Rill connects to external data sources and OLAP engines. Each connector specifies a driver type and its required connection parameters. + + ## Available Connector Types + + ### _OLAP Engines_ + - [**DuckDB**](#duckdb) - Embedded DuckDB engine (default) + - [**ClickHouse**](#clickhouse) - ClickHouse analytical database + - [**MotherDuck**](#motherduck) - MotherDuck cloud database + - [**Druid**](#druid) - Apache Druid + - [**Pinot**](#pinot) - Apache Pinot + + ### _Data Warehouses_ + - [**Snowflake**](#snowflake) - Snowflake data warehouse + - [**BigQuery**](#bigquery) - Google BigQuery + - [**Redshift**](#redshift) - Amazon Redshift + - [**Athena**](#athena) - Amazon Athena + + ### _Databases_ + - [**PostgreSQL**](#postgres) - PostgreSQL databases + - [**MySQL**](#mysql) - MySQL databases + - [**SQLite**](#sqlite) - SQLite databases + + ### _Cloud Storage_ + - [**GCS**](#gcs) - Google Cloud Storage + - [**S3**](#s3) - Amazon S3 storage + - [**Azure**](#azure) - Azure Blob Storage + + ### _Other_ + - [**HTTPS**](#https) - Public files via HTTP/HTTPS + - [**Salesforce**](#salesforce) - Salesforce data + - [**Slack**](#slack) - Slack data + + :::warning Security Recommendation + For all credential parameters (passwords, tokens, keys), use environment variables with the syntax `{{.env.connector..}}`. This keeps sensitive data out of your YAML files and version control. See our [credentials documentation](/connect/credentials/) for complete setup instructions. + ::: + allOf: + - title: Properties type: object - title: pinot - description: Configuration properties specific to the pinot properties: - driver: - type: string - description: Refers to the driver type and must be driver `pinot` - const: pinot - dsn: - type: string - description: DSN(Data Source Name) for the Pinot connection - username: - type: string - description: Username for authenticating with Pinot - password: - type: string - description: Password for authenticating with Pinot - broker_host: - type: string - description: Hostname of the Pinot broker - broker_port: - type: integer - description: Port number for the Pinot broker - controller_host: + type: type: string - description: Hostname of the Pinot controller - controller_port: - type: integer - description: Port number for the Pinot controller - ssl: - type: boolean - description: Enable SSL connection to Pinot - log_queries: - type: boolean - description: Log raw SQL queries executed through Pinot - max_open_conns: - type: integer - description: Maximum number of open connections to the Pinot database + const: connector + description: Refers to the resource type and must be `connector` required: - - driver - - dsn - - broker_host - - controller_host - postgres: + - type + + + - $ref: '#/definitions/common_properties' + + - oneOf: + - $ref: '#/definitions/athena' + - $ref: '#/definitions/azure' + - $ref: '#/definitions/bigquery' + - $ref: '#/definitions/clickhouse' + - $ref: '#/definitions/druid' + - $ref: '#/definitions/duckdb' + - $ref: '#/definitions/gcs' + - $ref: '#/definitions/https' + - $ref: '#/definitions/motherduck' + - $ref: '#/definitions/mysql' + - $ref: '#/definitions/pinot' + - $ref: '#/definitions/postgres' + - $ref: '#/definitions/redshift' + - $ref: '#/definitions/s3' + - $ref: '#/definitions/salesforce' + - $ref: '#/definitions/slack' + - $ref: '#/definitions/snowflake' + - $ref: '#/definitions/sqlite' + + # Source YAML + sources: + title: Source YAML + type: object + id: sources + description: | + :::warning Deprecated Feature + **Sources have been deprecated** and are now considered "source models." While sources remain backward compatible, we recommend migrating to the new source model format for access to the latest features and improvements. + + **Next steps:** + - Continue using sources if needed (backward compatible) + - Migrate to source models via the `type:model` parameter for existing projects + - See our [model YAML reference](advanced-models) for current documentation and best practices + ::: + allOf: + - title: Properties type: object - title: postgres - description: Configuration properties specific to the postgres properties: - driver: - type: string - description: Refers to the driver type and must be driver `postgres` - const: postgres - dsn: - type: string - description: DSN(Data Source Name) for the postgres connection - host: - type: string - description: Hostname of the Postgres server - port: - type: string - description: Port number for the Postgres server - dbname: - type: string - description: Name of the Postgres database - user: - type: string - description: Username for authentication - password: - type: string - description: Password for authentication - sslmode: + type: type: string - description: SSL mode can be disable, allow, prefer or require - required: - - driver - redshift: - type: object - title: redshift - description: Configuration properties specific to the redshift - properties: - driver: + const: connector + description: Refers to the resource type and must be `connector` + connector: type: string - description: Refers to the driver type and must be driver `redshift` - const: redshift - aws_access_key_id: + description: Refers to the connector type for the source, see [connectors](/reference/project-files/connectors) for more information + enum: + - https + - s3 + - gcs + - local_file + - motherduck + - athena + - redshift + - postgres + - sqlite + - snowflake + - bigquery + - duckdb + uri: type: string - description: AWS Access Key ID used for authenticating with Redshift. - aws_secret_access_key: + description: | + Refers to the URI of the remote connector you are using for the source. Rill also supports glob patterns as part of the URI for S3 and GCS (required for type: http, s3, gcs). + + - `s3://your-org/bucket/file.parquet` — the s3 URI of your file + - `gs://your-org/bucket/file.parquet` — the gsutil URI of your file + - `https://data.example.org/path/to/file.parquet` — the web address of your file + path: type: string - description: AWS Secret Access Key used for authenticating with Redshift. - aws_access_token: + description: Refers to the local path of the connector you are using for the source + sql: type: string - description: AWS Session Token for temporary credentials (optional). + description: Sets the SQL query to extract data from a SQL source region: type: string - description: AWS region where the Redshift cluster or workgroup is hosted (e.g., 'us-east-1'). - database: - type: string - description: Name of the Redshift database to query. - workgroup: - type: string - description: Workgroup name for Redshift Serverless, in case of provisioned Redshift clusters use 'cluster_identifier'. - cluster_identifier: - type: string - description: Cluster identifier for provisioned Redshift clusters, in case of Redshift Serverless use 'workgroup' . - required: - - driver - - aws_access_key_id - - aws_secret_access_key - - database - s3: - type: object - title: s3 - description: Configuration properties specific to the s3 - properties: - driver: - type: string - description: Refers to the driver type and must be driver `s3` - const: s3 - aws_access_key_id: - type: string - description: AWS Access Key ID used for authentication - aws_secret_access_key: - type: string - description: AWS Secret Access Key used for authentication - aws_access_token: - type: string - description: Optional AWS session token for temporary credentials - bucket: - type: string - description: Name of s3 bucket + description: Sets the cloud region of the S3 bucket or Athena endpoint: type: string - description: Optional custom endpoint URL for S3-compatible storage - region: - type: string - description: AWS region of the S3 bucket - allow_host_access: - type: boolean - description: Allow access to host environment configuration - retain_files: - type: boolean - description: Whether to retain intermediate files after processing - required: - - driver - - bucket - salesforce: - type: object - title: salesforce - description: Configuration properties specific to the salesforce - properties: - driver: - type: string - description: Refers to the driver type and must be driver `salesforce` - const: salesforce - username: - type: string - description: Salesforce account username - password: - type: string - description: Salesforce account password (secret) - key: + description: Overrides the S3 endpoint to connect to + output_location: type: string - description: Authentication key for Salesforce (secret) - endpoint: + description: Sets the query output location and result files in Athena + workgroup: type: string - description: Salesforce API endpoint URL - client_id: + description: Sets a workgroup for Athena connector + project_id: type: string - description: Client ID used for Salesforce OAuth authentication - required: - - driver - - username - - endpoint - slack: - type: object - title: slack - description: Configuration properties specific to the slack - properties: - driver: + description: Sets a project id to be used to run BigQuery jobs + timeout: type: string - description: Refers to the driver type and must be driver `slack` - const: slack - bot_token: + description: The maximum time to wait for source ingestion + refresh: + type: object + description: | + Specifies the refresh schedule that Rill should follow to re-ingest and update the underlying source data (optional). + ```yaml + refresh: + cron: "* * * * *" + every: "24h" + ``` + properties: + cron: + type: string + description: A cron schedule expression, which should be encapsulated in single quotes, e.g. `* * * * *` + every: + type: string + description: A Go duration string, such as `24h` + db: type: string - description: Bot token used for authenticating Slack API requests - required: - - driver - - bot_token - snowflake: - type: object - title: snowflake - description: Configuration properties specific to the snowflake - properties: - driver: + description: Sets the database for motherduck connections and/or the path to the DuckDB/SQLite db file + database_url: type: string - description: Refers to the driver type and must be driver `snowflake` - const: snowflake + description: Postgres connection string that should be used + duckdb: + type: object + description: Specifies the raw parameters to inject into the DuckDB read_csv, read_json or read_parquet statement + additionalProperties: true dsn: type: string - description: DSN (Data Source Name) for the Snowflake connection - parallel_fetch_limit: - type: integer - description: Maximum number of concurrent fetches during query execution + description: Used to set the Snowflake connection string required: - - driver - - dsn - sqlite: + - type + - connector + - $ref: '#/definitions/common_properties' + + # Model SQL + models: + title: Model SQL + type: object + id: models + description: | + When using Rill Developer, data transformations are powered by DuckDB and their dialect of SQL. Under the hood, by default, data models are created as views in DuckDB. Please check our modeling page and DuckDB documentation for more details about how to construct and write your model SQL syntax. + + In your Rill project directory, you can also create a `.sql` file containing an appropriate DuckDB `SELECT` statement, most commonly within the default `models` directory, to represent a model (or set of SQL transformations). Rill will automatically detect and parse the model next time you run `rill start`. + + ### Annotating your models with properties + In most cases, objects are represented in Rill as YAML files. Models are unique in that any model.sql file can be considered a model resource in Rill, representing a SQL transformation that you would like to inform using a set of inputs and outputting a view or table (depending on the materialization type). For most other resources, available properties can be set directly via the corresponding YAML file. In the case of a model SQL file though, configurable properties should be set by annotating the top of the file using the following syntax: + ```sql + -- @property: value + ``` + We will cover different available configurable properties in the below sections. + allOf: + - title: Properties type: object - title: sqlite - description: Configuration properties specific to the sqlite properties: - driver: - type: string - description: Refers to the driver type and must be driver `sqlite` - const: sqlite - dsn: + type: type: string - description: DSN(Data Source Name) for the sqlite connection - required: - - driver - - dsn - explore: + const: model + description: | + By default, any new model that is created in a Rill project will populate a corresponding .sql file representing the model. Similarly, a .sql file that is directly created in the project directory will also be automatically assumed by Rill to be a model by default. Therefore, it is not necessary to annotate the model resource with the type property. + + For consistency or documentation purposes, if you'd like to annotate your model resource as well with the type property, you can do so by adding the following to the top of your model_name.sql: + ```sql + -- @type: model + ``` + materialize: + type: boolean + description: | + As mentioned, models will be materialized in DuckDB as views by default. However, you can choose to materialize them as tables instead of views. To do this, you can add the following annotation to the top of your model SQL file: + ```sql + -- @materialize: true + ``` + + Alternatively, it is possible to set it as a project-wide default as well that your models inherit via your rill.yaml file: + ```yaml + models: + materialize: true + ``` + + :::info To materialize or not to materialize? + + There are both pros and cons to materializing your models. + - Pros can include improved performance for downstream models and dashboards, especially with the SQL is complex and/or the data size is large. We generally recommend _materializing_ final models that power dashboards. + - Cons can include a degraded keystroke-by-keystroke modeling experience or for specific edge cases, such as when using cross joins. + + If unsure, we would generally recommend leaving the defaults and/or reaching out for further guidance! + ::: + + # Advanced Models + advanced-models: + + title: Models YAML + id: advanced-models type: object - title: Explore YAML - description: In your Rill project directory, create a explore dashboard, `.yaml`, file in the `dashboards` directory. Rill will ingest the dashboard definition next time you run `rill start`. + description: | + :::tip + + Both regular models and source models can use the Model YAML specification described on this page. While [SQL models](./models) are perfect for simple transformations, Model YAML files provide advanced capabilities for complex data processing scenarios. + + **When to use Model YAML:** + - **Partitions** - Optimize performance with data partitioning strategies + - **Incremental models** - Process only new or changed data efficiently + - **Pre/post execution hooks** - Run custom logic before or after model execution + - **Staging** - Create intermediate tables for complex transformations + - **Output configuration** - Define specific output formats and destinations + + Model YAML files give you fine-grained control over how your data is processed and transformed, making them ideal for production workloads and complex analytics pipelines. + + ::: allOf: - title: Properties type: object properties: type: type: string - const: explore - description: Refers to the resource type and must be `explore` - display_name: + const: model + description: Refers to the resource type and must be `model` + refresh: + $ref: '#/definitions/schedule_properties' + description: Specifies the refresh schedule that Rill should follow to re-ingest and update the underlying model data + examples: | + ```yaml + refresh: + cron: "* * * * *" + #every: "24h" + ``` + + connector: type: string - description: Refers to the display name for the explore dashboard - description: + const: connector + description: Refers to the resource type and is needed if setting an explicit OLAP engine. IE `clickhouse` + sql: type: string - description: Refers to the description of the explore dashboard - banner: + description: Raw SQL query to run against source + pre_exec: type: string - description: Refers to the custom banner displayed at the header of an explore dashboard - metrics_view: + description: | + Refers to SQL queries to run before the main query, available for DuckDB-based models. (optional). + Ensure pre_exec queries are idempotent. Use IF NOT EXISTS statements when applicable. + ```yaml + pre_exec: ATTACH IF NOT EXISTS 'dbname=postgres host=localhost port=5432 user=postgres password=postgres' AS postgres_db (TYPE POSTGRES) + ``` + post_exec: type: string - description: Refers to the metrics view resource - dimensions: - description: List of dimension names. Use '*' to select all dimensions (default) - $ref: '#/definitions/field_selector_properties' - measures: - description: List of measure names. Use ''*'' to select all measures (default) - $ref: '#/definitions/field_selector_properties' - theme: - oneOf: - - type: string - description: Name of an existing theme to apply to the dashboard - - $ref: '#/definitions/theme/definitions/theme_properties' - description: Inline theme configuration. - description: Name of the theme to use or define a theme inline. Either theme name or inline theme can be set. - time_ranges: - type: array - description: Overrides the list of default time range selections available in the dropdown. It can be string or an object with a 'range' and optional 'comparison_offsets' - items: - $ref: '#/definitions/explore_time_range_properties' - time_zones: - type: array - description: Refers to the time zones that should be pinned to the top of the time zone selector. It should be a list of [IANA time zone identifiers](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones) - items: - type: string - lock_time_zone: + description: | + Refers to a SQL query that is run after the main query, available for DuckDB-based models. (optional). + Ensure post_exec queries are idempotent. Use IF EXISTS statements when applicable. + ```yaml + post_exec: DETACH DATABASE IF EXISTS postgres_db + ``` + timeout: + type: string + description: The maximum time to wait for model ingestion + incremental: type: boolean - description: When true, the dashboard will be locked to the first time provided in the time_zones list. When no time_zones are provided, the dashboard will be locked to UTC - allow_custom_time_range: + description: whether incremental modeling is required (optional) + change_mode: + type: string + enum: + - reset + - manual + - patch + description: Configure how changes to the model specifications are applied (optional). 'reset' will drop and recreate the model automatically, 'manual' will require a manual full or incremental refresh to apply changes, and 'patch' will switch to the new logic without re-processing historical data (only applies for incremental models). + state: + $ref: '#/definitions/data_properties' + description: Refers to the explicitly defined state of your model, cannot be used with partitions (optional) + examples: | + ```yaml + state: + sql: SELECT MAX(date) as max_date + ``` + partitions: + $ref: '#/definitions/data_properties' + description: Refers to the how your data is partitioned, cannot be used with state. (optional) + examples: | + ```yaml + partitions: + glob: gcs://my_bucket/y=*/m=*/d=*/*.parquet + ``` + ```yaml + partitions: + connector: duckdb + sql: SELECT range AS num FROM range(0,10) + ``` + materialize: type: boolean - description: Defaults to true, when set to false it will hide the ability to set a custom time range for the user. - defaults: + description: models will be materialized in olap + partitions_watermark: + type: string + description: Refers to a customizable timestamp that can be set to check if an object has been updated (optional). + partitions_concurrency: + type: integer + description: Refers to the number of concurrent partitions that can be read at the same time (optional). + stage: type: object - description: defines the defaults YAML struct properties: - dimensions: - description: Provides the default dimensions to load on viewing the dashboard - $ref: '#/definitions/field_selector_properties' - measures: - description: Provides the default measures to load on viewing the dashboard - $ref: '#/definitions/field_selector_properties' - time_range: - description: Refers to the default time range shown when a user initially loads the dashboard. The value must be either a valid [ISO 8601 duration](https://en.wikipedia.org/wiki/ISO_8601#Durations) (for example, PT12H for 12 hours, P1M for 1 month, or P26W for 26 weeks) or one of the [Rill ISO 8601 extensions](https://docs.rilldata.com/reference/rill-iso-extensions#extensions) - type: string - comparison_mode: - description: 'Controls how to compare current data with historical or categorical baselines. Options: `none` (no comparison), `time` (compares with past based on default_time_range), `dimension` (compares based on comparison_dimension values)' + connector: type: string - comparison_dimension: - description: 'for dimension mode, specify the comparison dimension by name' + description: Refers to the connector type for the staging table + path: type: string - additionalProperties: false - embeds: + description: Refers to the path to the staging table + + required: + - connector + description: in the case of staging models, where an input source does not support direct write to the output and a staging table is required + examples: | + ```yaml + stage: + connector: s3 + path: s3://my_bucket/my_staging_table + ``` + additionalProperties: true + output: type: object - description: Configuration options for embedded dashboard views + description: to define the properties of output properties: - hide_pivot: + table: + type: string + description: Name of the output table. If not specified, the model name is used. + materialize: type: boolean - description: When true, hides the pivot table view in embedded mode - additionalProperties: false - security: - description: Security rules to apply for access to the explore dashboard - $ref: '#/definitions/security_policy_properties' + description: Whether to materialize the model as a table or view + connector: + type: string + description: Refers to the connector type for the output table. Can be `clickhouse` or `duckdb` and their named connector + incremental_strategy: + type: string + enum: + - append + - merge + - partition_overwrite + description: Strategy to use for incremental updates. Can be 'append', 'merge' or 'partition_overwrite' + unique_key: + type: array + items: + type: string + description: List of columns that uniquely identify a row for merge strategy + partition_by: + type: string + description: Column or expression to partition the table by + allOf: + - if: + title: Additional properties for `output` when `connector` is `clickhouse` + properties: + connector: + const: clickhouse + required: + - connector + then: + properties: + type: + type: string + description: Type to materialize the model into. Can be 'TABLE', 'VIEW' or 'DICTIONARY' + enum: + - TABLE + - VIEW + - DICTIONARY + columns: + type: string + description: Column names and types. Can also include indexes. If unspecified, detected from the query. + engine_full: + type: string + description: Full engine definition in SQL format. Can include partition keys, order, TTL, etc. + engine: + type: string + description: Table engine to use. Default is MergeTree + order_by: + type: string + description: ORDER BY clause. + partition_by: + type: string + description: Partition BY clause. + primary_key: + type: string + description: PRIMARY KEY clause. + sample_by: + type: string + description: SAMPLE BY clause. + ttl: + type: string + description: TTL settings for the table or columns. + table_settings: + type: string + description: Table-specific settings. + query_settings: + type: string + description: Settings used in insert/create table as select queries. + distributed_settings: + type: string + description: Settings for distributed table. + distributed_sharding_key: + type: string + description: Sharding key for distributed table. + dictionary_source_user: + type: string + description: User for accessing the source dictionary table (used if type is DICTIONARY). + dictionary_source_password: + type: string + description: Password for the dictionary source user. required: - - type + - type + - sql - $ref: '#/definitions/common_properties' - metrics_view: - type: object + - title: Depending on the connector, additional properties may be required + description: | + Depending on the connector, additional properties may be required, for more information see the [connectors](./connectors.md) documentation + examples: | + ### Incremental model + ```yaml + test + ``` + + # Metrics Views + metrics-views: title: Metrics View YAML + id: metrics-views + type: object description: In your Rill project directory, create a metrics view, `.yaml`, file in the `metrics` directory. Rill will ingest the metric view definition next time you run `rill start`. allOf: - title: Properties type: object properties: + version: + type: string + description: The version of the metrics view schema type: type: string const: metrics_view description: Refers to the resource type and must be `metrics_view` - parent: + connector: type: string - description: Refers to the parent metrics from which this metrics view is derived. If specified, this will inherit properties from the parent metrics view + description: Refers to the connector type for the metrics view, see [OLAP engines](/connect/olap) for more information display_name: type: string description: Refers to the display name for the metrics view @@ -1076,7 +519,7 @@ definitions: description: Refers to the description for the metrics view ai_instructions: type: string - description: Extra instructions for AI agents. Used to guide natural language question answering and routing. + description: Extra instructions for [AI agents](/explore/mcp). Used to guide natural language question answering and routing. model: type: string description: Refers to the model powering the dashboard (either model or table is required) @@ -1150,9 +593,12 @@ definitions: display_name: type: string description: the display name of your measure. + label: + type: string + description: a label for your measure, deprecated use display_name description: type: string - description: a freeform text description of the dimension + description: a freeform text description of the measure type: type: string description: 'Measure calculation type: "simple" for basic aggregations, "derived" for calculations using other measures, or "time_comparison" for period-over-period analysis. Defaults to "simple" unless dependencies exist.' @@ -1160,7 +606,7 @@ definitions: type: string description: a combination of operators and functions for aggregations window: - description: A measure window can be defined as a keyword string (e.g. 'time' or 'all') or an object with detailed window configuration. + description: A measure window can be defined as a keyword string (e.g. 'time' or 'all') or an object with detailed window configuration. For more information, see the [window functions](/build/metrics-view/advanced-expressions/windows) documentation. anyOf: - type: string enum: @@ -1175,23 +621,27 @@ definitions: type: boolean description: 'Controls whether the window is partitioned. When true, calculations are performed within each partition separately.' order: - $ref: '#/definitions/metrics_view/definitions/field_selectors_properties' + type: string + $ref: '#/definitions/field_selectors_properties' description: 'Specifies the fields to order the window by, determining the sequence of rows within each partition.' frame: type: string description: 'Defines the window frame boundaries for calculations, specifying which rows are included in the window relative to the current row.' additionalProperties: false per: - $ref: '#/definitions/metrics_view/definitions/field_selectors_properties' + $ref: '#/definitions/field_selectors_properties' description: for per dimensions requires: - $ref: '#/definitions/metrics_view/definitions/field_selectors_properties' - description: using an available measure or dimension in your metrics view to set a required parameter, cannot be used with simple measures + $ref: '#/definitions/field_selectors_properties' + description: using an available measure or dimension in your metrics view to set a required parameter, cannot be used with simple measures. See [referencing measures](/build/metrics-view/advanced-expressions/referencing) for more information. + valid_percent_of_total: + type: boolean + description: a boolean indicating whether percent-of-total values should be rendered for this measure format_preset: type: string description: | Controls the formatting of this measure using a predefined preset. Measures cannot have both `format_preset` and `format_d3`. If neither is supplied, the measure will be formatted using the `humanize` preset by default. - + Available options: - `humanize`: Round numbers into thousands (K), millions(M), billions (B), etc. - `none`: Raw output. @@ -1204,21 +654,31 @@ definitions: description: 'Controls the formatting of this measure using a [d3-format](https://d3js.org/d3-format) string. If an invalid format string is supplied, the measure will fall back to `format_preset: humanize`. A measure cannot have both `format_preset` and `format_d3`. If neither is provided, the humanize preset is used by default. Example: `format_d3: ".2f"` formats using fixed-point notation with two decimal places. Example: `format_d3: ",.2r"` formats using grouped thousands with two significant digits. (optional)' format_d3_locale: type: object - description: locale configuration passed through to D3, enabling changing the currency symbol among other things. For details, see the docs for D3's [formatLocale](https://d3js.org/d3-format#formatLocale) - additionalProperties: true - valid_percent_of_total: - type: boolean - description: a boolean indicating whether percent-of-total values should be rendered for this measure + description: | + locale configuration passed through to D3, enabling changing the currency symbol among other things. For details, see the docs for D3's formatLocale. + ```yaml + format_d3: "$," + format_d3_locale: + grouping: [3, 2] + currency: ["₹", ""] + ``` + properties: + grouping: + type: array + description: the grouping of the currency symbol + currency: + type: array + description: the currency symbol + treat_nulls_as: type: string description: used to configure what value to fill in for missing time buckets. This also works generally as COALESCING over non empty time buckets. - minItems: 1 - parent_dimensions: - description: Optional field selectors for dimensions to inherit from the parent metrics view. - $ref: '#/definitions/field_selector_properties' - parent_measures: - description: Optional field selectors for measures to inherit from the parent metrics view. - $ref: '#/definitions/field_selector_properties' + + required: + - name + - display_name + - expression + annotations: type: array description: Used to define annotations that can be displayed on charts @@ -1230,7 +690,7 @@ definitions: description: A stable identifier for the annotation. Defaults to model or table names when not specified model: type: string - description: Refers to the model powering the annotation (either table or model is required). The model must have 'time' and 'description' columns. Optional columns include 'time_end' for range annotations and 'duration' to specify when the annotation should appear based on dashboard grain level. + description: Refers to the model powering the annotation (either table or model is required). The model must have 'time' and 'description' columns. Optional columns include 'time_end' for range annotations and 'grain' to specify when the annotation should appear based on dashboard grain level. database: type: string description: Refers to the database to use in the OLAP engine (to be used in conjunction with table). Otherwise, will use the default database or schema if not specified @@ -1244,635 +704,921 @@ definitions: type: string description: Refers to the connector to use for the annotation measures: - $ref: '#/definitions/metrics_view/definitions/field_selectors_properties' description: Specifies which measures to apply the annotation to. Applies to all measures if not specified + anyOf: + - type: string + description: Simple field name as a string. + - type: array + description: List of field selectors, each can be a string or an object with detailed configuration. + items: + anyOf: + - type: string + description: Shorthand field selector, interpreted as the name. + - type: object + description: Detailed field selector configuration with name and optional time grain. + properties: + name: + type: string + description: Name of the field to select. + time_grain: + type: string + description: Time grain for time-based dimensions. + enum: + - '' + - ms + - millisecond + - s + - second + - min + - minute + - h + - hour + - d + - day + - w + - week + - month + - q + - quarter + - 'y' + - year + required: + - name + additionalProperties: fal security: - $ref: '#/definitions/security_policy_properties' - description: Defines a security policy for the dashboard - explore: - $ref: '#/definitions/metrics_view/definitions/explore_properties' - description: Defines an optional inline explore view for the metrics view. If not specified a default explore will be emitted unless `skip` is set to true. + $ref: '#/definitions/security_policy_properties' + description: Defines a security policy for the dashboard required: - type - - $ref: '#/definitions/common_properties' - definitions: - field_selectors_properties: - anyOf: - - type: string - description: 'Simple field name as a string.' - - type: array - description: 'List of field selectors, each can be a string or an object with detailed configuration.' - items: - anyOf: - - type: string - description: 'Shorthand field selector, interpreted as the name.' - - type: object - description: 'Detailed field selector configuration with name and optional time grain.' - properties: - name: - type: string - description: 'Name of the field to select.' - time_grain: - type: string - description: 'Time grain for time-based dimensions.' - enum: - - '' - - ms - - millisecond - - s - - second - - min - - minute - - h - - hour - - d - - day - - w - - week - - month - - q - - quarter - - 'y' - - year - required: - - name - additionalProperties: false - minItems: 1 - explore_properties: + - model + + - $ref: '#/definitions/common_properties' + + # Canvas Dashboards + canvas-dashboards: + title: Canvas Dashboard YAML + id: canvas-dashboards + type: object + description: Canvas dashboards provide a flexible way to create custom dashboards with drag-and-drop components. + allOf: + - title: Properties type: object properties: - skip: + type: + type: string + const: canvas + description: Refers to the resource type and must be `canvas` + display_name: + type: string + description: Refers to the display name for the canvas + description: + type: string + description: Description for the canvas dashboard + banner: + type: string + description: Refers to the custom banner displayed at the header of an Canvas dashboard + rows: + type: array + description: Refers to all of the rows displayed on the Canvas + items: + type: object + properties: + height: + type: string + description: Height of the row in px + items: + type: array + description: List of components to display in the row + items: + type: object + properties: + component: + type: string + description: | + Name of the component to display. Each component type has its own set of properties. + Available component types: + + - **markdown** - Text component, uses markdown formatting + - **kpi_grid** - KPI component, similar to TDD in Rill Explore, display quick KPI charts + - **stacked_bar_normalized** - Bar chart normalized to 100% values + - **line_chart** - Normal Line chart + - **bar_chart** - Normal Bar chart + - **stacked_bar** - Stacked Bar chart + - **area_chart** - Line chart with area + - **image** - Provide a URL to embed into canvas dashboard + - **table** - Similar to Pivot table, add dimensions and measures to visualize your data + - **heatmap** - Heat Map chart to visualize distribution of data + - **donut_chart** - Donut or Pie chart to display sums of total + width: + type: + - string + - integer + description: Width of the component (can be a number or string with unit) + additionalProperties: true + additionalProperties: false + max_width: + type: integer + description: Max width in pixels of the canvas + minimum: 0 + gap_x: + type: integer + description: Horizontal gap in pixels of the canvas + minimum: 0 + gap_y: + type: integer + description: Vertical gap in pixels of the canvas + minimum: 0 + filters: + type: object + description: Indicates if filters should be enabled for the canvas. + additionalProperties: true + properties: + enable: + type: boolean + description: Toggles filtering functionality for the canvas dashboard. + allow_custom_time_range: type: boolean - description: If true, disables the explore view for this metrics view. - name: + description: Defaults to true, when set to false it will hide the ability to set a custom time range for the user. + time_ranges: + type: array + description: | + Overrides the list of default time range selections available in the dropdown. It can be string or an object with a 'range' and optional 'comparison_offsets' + ```yaml + time_ranges: + - PT15M // Simplified syntax to specify only the range + - PT1H + - PT6H + - P7D + - range: P5D // Advanced syntax to specify comparison_offsets as well + - P4W + - rill-TD // Today + - rill-WTD // Week-To-date + ``` + items: + $ref: '#/definitions/explore_time_range_properties' + time_zones: + type: array + description: Refers to the time zones that should be pinned to the top of the time zone selector. It should be a list of [IANA time zone identifiers](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones) + items: + type: string + defaults: + type: object + description: | + defines the defaults YAML struct + ```yaml + defaults: #define all the defaults within here + time_range: P1M + comparison_mode: dimension #time, none + comparison_dimension: filename + ``` + properties: + time_range: + description: Refers to the default time range shown when a user initially loads the dashboard. The value must be either a valid [ISO 8601 duration](https://en.wikipedia.org/wiki/ISO_8601#Durations) (for example, PT12H for 12 hours, P1M for 1 month, or P26W for 26 weeks) or one of the [Rill ISO 8601 extensions](https://docs.rilldata.com/reference/rill-iso-extensions#extensions) + type: string + comparison_mode: + description: 'Controls how to compare current data with historical or categorical baselines. Options: `none` (no comparison), `time` (compares with past based on default_time_range), `dimension` (compares based on comparison_dimension values)' + type: string + enum: + - none + - time + - dimension + comparison_dimension: + description: 'for dimension mode, specify the comparison dimension by name' + type: string + additionalProperties: false + theme: + oneOf: + - type: string + description: Name of an existing theme to apply to the dashboard + - $ref: '#/definitions/theme_properties' + description: Inline theme configuration. + description: Name of the theme to use. Only one of theme and embedded_theme can be set. + security: + description: Security rules to apply for access to the canvas dashboard + $ref: '#/definitions/dashboard_security_policy_properties' + required: + - type + - display_name + - $ref: '#/definitions/common_properties' + + # Explore dashboards + explore-dashboards: + title: Explore Dashboard YAML + id: explore-dashboards + type: object + description: Explore dashboards provide an interactive way to explore data with predefined metrics and dimensions. + allOf: + - title: Properties + type: object + properties: + type: type: string - description: Name of the explore view. + const: explore + description: Refers to the resource type and must be `explore` display_name: type: string - description: Display name for the explore view. + description: Refers to the display name for the explore dashboard + metrics_view: + type: string + description: Refers to the metrics view resource description: type: string - description: Description for the explore view. + description: Refers to the description of the explore dashboard banner: type: string - description: Custom banner displayed at the header of the explore view. + description: Refers to the custom banner displayed at the header of an explore dashboard + dimensions: + description: List of dimension names. Use '*' to select all dimensions (default) + $ref: '#/definitions/field_selector_properties' + examples: | + ```yaml + # Example: Select a dimension + dimensions: + - country + + # Example: Select all dimensions except one + dimensions: + exclude: + - country + + # Example: Select all dimensions that match a regex + dimensions: + regex: "^public_.*$" + ``` + measures: + description: List of measure names. Use '*' to select all measures (default) + $ref: '#/definitions/field_selector_properties' + examples: | + ```yaml + # Example: Select a dimension + measures: + - sum_of_total + + # Example: Select all dimensions except one + measures: + exclude: + - sum_of_total + + # Example: Select all dimensions that match a regex + measures: + regex: "^public_.*$" + ``` theme: oneOf: - type: string - description: Name of an existing theme to apply to the explore view. - - $ref: '#/definitions/theme/definitions/theme_properties' + description: Name of an existing theme to apply to the dashboard + - $ref: '#/definitions/theme_properties' description: Inline theme configuration. - description: Name of the theme to use or define a theme inline. Either theme name or inline theme can be set. + description: Name of the theme to use. Only one of theme and embedded_theme can be set. time_ranges: type: array - description: Overrides the list of default time range selections available in the dropdown. It can be string or an object with a 'range' and optional 'comparison_offsets'. + description: | + Overrides the list of default time range selections available in the dropdown. It can be string or an object with a 'range' and optional 'comparison_offsets' + ```yaml + time_ranges: + - PT15M // Simplified syntax to specify only the range + - PT1H + - PT6H + - P7D + - range: P5D // Advanced syntax to specify comparison_offsets as well + - P4W + - rill-TD // Today + - rill-WTD // Week-To-date + ``` items: $ref: '#/definitions/explore_time_range_properties' time_zones: type: array - description: List of time zones to pin to the top of the time zone selector. Should be a list of IANA time zone identifiers. + description: Refers to the time zones that should be pinned to the top of the time zone selector. It should be a list of [IANA time zone identifiers](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones) items: type: string lock_time_zone: type: boolean - description: When true, the explore view will be locked to the first time zone provided in the time_zones list. If no time_zones are provided, it will be locked to UTC. + description: When true, the dashboard will be locked to the first time provided in the time_zones list. When no time_zones are provided, the dashboard will be locked to UTC allow_custom_time_range: type: boolean - description: Defaults to true. When set to false, hides the ability to set a custom time range for the user. + description: Defaults to true, when set to false it will hide the ability to set a custom time range for the user. defaults: type: object - description: Preset UI state to show by default. + description: | + defines the defaults YAML struct + ```yaml + defaults: #define all the defaults within here + dimensions: + - dim_1 + - dim_2 + measures: + - measure_1 + - measure_2 + time_range: P1M + comparison_mode: dimension #time, none + comparison_dimension: filename + ``` properties: dimensions: + description: Provides the default dimensions to load on viewing the dashboard $ref: '#/definitions/field_selector_properties' - description: Default dimensions to load on viewing the explore view. measures: + description: Provides the default measures to load on viewing the dashboard $ref: '#/definitions/field_selector_properties' - description: Default measures to load on viewing the explore view. time_range: + description: Refers to the default time range shown when a user initially loads the dashboard. The value must be either a valid [ISO 8601 duration](https://en.wikipedia.org/wiki/ISO_8601#Durations) (for example, PT12H for 12 hours, P1M for 1 month, or P26W for 26 weeks) or one of the [Rill ISO 8601 extensions](https://docs.rilldata.com/reference/rill-iso-extensions#extensions) type: string - description: Default time range to display when the explore view loads. comparison_mode: + description: 'Controls how to compare current data with historical or categorical baselines. Options: `none` (no comparison), `time` (compares with past based on default_time_range), `dimension` (compares based on comparison_dimension values)' type: string - description: Default comparison mode for metrics (none, time, or dimension). + enum: + - none + - time + - dimension comparison_dimension: + description: 'for dimension mode, specify the comparison dimension by name' type: string - description: Default dimension to use for comparison when comparison_mode is 'dimension'. additionalProperties: false embeds: type: object - description: Configuration options for embedded explore views. + description: Configuration options for embedded dashboard views properties: hide_pivot: type: boolean - description: When true, hides the pivot table view in embedded mode. + description: When true, hides the pivot table view in embedded mode additionalProperties: false - additionalProperties: false - model: + + security: + description: Security rules to apply for access to the explore dashboard + $ref: '#/definitions/dashboard_security_policy_properties' + required: + - type + - display_name + - metrics_view + - $ref: '#/definitions/common_properties' + + # Alerts + alerts: + title: Alert YAML + id: alerts type: object - title: Model YAML + description: Along with alertings at the dashboard level and can be created via the UI, there might be more extensive alerting that you might want to develop and can be done so the an alert.yaml. When creating an alert via a YAML file, you'll see this denoted in the UI as `Created through code`. + examples: + - # Example: To send alert when data lags by more than 1 day to slack channel #rill-cloud-alerts + type: alert + display_name: Data lags by more than 1 day + # Check the alert every hour. + refresh: + cron: 0 * * * * + # Query that returns non-empty results if the metrics lag by more than 1 day. + data: + sql: |- + SELECT * + FROM + ( + SELECT MAX(event_time) AS max_time + FROM rill_metrics_model + ) + WHERE max_time < NOW() - INTERVAL '1 day' + # Send notifications in Slack. + notify: + slack: + channels: + - '#rill-cloud-alerts' allOf: - title: Properties type: object properties: type: type: string - const: model - description: Refers to the resource type and must be `model` + const: alert + description: Refers to the resource type and must be `alert` refresh: $ref: '#/definitions/schedule_properties' - description: Specifies the refresh schedule that Rill should follow to re-ingest and update the underlying model data - connector: - type: string description: | - Refers to the connector type or [named connector](./connector.md#name) for the source. - sql: + Refresh schedule for the alert + ```yaml + refresh: + cron: "* * * * *" + #every: "24h" + ``` + display_name: type: string - description: Raw SQL query to run against source - timeout: + description: Display name for the alert + description: type: string - description: The maximum time to wait for model ingestion - incremental: - type: boolean - description: whether incremental modeling is required (optional) - change_mode: + description: Description for the alert + intervals: + type: object + description: define the interval of the alert to check + properties: + duration: + type: string + description: a valid ISO8601 duration to define the interval duration + limit: + type: integer + description: maximum number of intervals to check for on invocation + minimum: 0 + check_unclosed: + type: boolean + description: 'boolean, whether unclosed intervals should be checked' + watermark: type: string enum: - - reset - - manual - - patch - description: Configure how changes to the model specifications are applied (optional). 'reset' will drop and recreate the model automatically, 'manual' will require a manual full or incremental refresh to apply changes, and 'patch' will switch to the new logic without re-processing historical data (only applies for incremental models). - state: - $ref: '#/definitions/data_properties' - description: Refers to the explicitly defined state of your model, cannot be used with partitions (optional) - partitions: + - trigger_time + - inherit + description: Specifies how the watermark is determined for incremental processing. Use 'trigger_time' to set it at runtime or 'inherit' to use the upstream model's watermark. + timeout: + type: string + description: define the timeout of the alert in seconds (optional). + data: $ref: '#/definitions/data_properties' - description: Refers to the how your data is partitioned, cannot be used with state. (optional) - materialize: + description: Data source for the alert + for: + description: "Specifies how user identity or attributes should be evaluated for security policy enforcement." + oneOf: + - type: object + description: Specifies a unique user identifier for applying security policies. + properties: + user_id: + type: string + description: "The unique user ID used to evaluate security policies." + required: + - user_id + additionalProperties: false + - type: object + description: Specifies a user's email address for applying security policies. + properties: + user_email: + type: string + description: "The user's email address used to evaluate security policies." + format: email + required: + - user_email + additionalProperties: false + - type: object + description: Specifies a set of arbitrary user attributes for applying security policies. + properties: + attributes: + type: object + description: A dictionary of user attributes used to evaluate security policies. + additionalProperties: true + required: + - attributes + additionalProperties: false + on_recover: type: boolean - description: models will be materialized in olap - partitions_watermark: + description: Send an alert when a previously failing alert recovers. Defaults to false. + on_fail: + type: boolean + description: Send an alert when a failure occurs. Defaults to true. + on_error: + type: boolean + description: Send an alert when an error occurs during evaluation. Defaults to false. + renotify: + type: boolean + description: Enable repeated notifications for unresolved alerts. Defaults to false. + renotify_after: type: string - description: Refers to a customizable timestamp that can be set to check if an object has been updated (optional). - partitions_concurrency: - type: integer - description: Refers to the number of concurrent partitions that can be read at the same time (optional). - stage: - type: object - properties: - connector: - type: string - description: Refers to the connector type for the staging table - required: - - connector - description: in the case of staging models, where an input source does not support direct write to the output and a staging table is required - additionalProperties: true - output: + description: Defines the re-notification interval for the alert (e.g., '10m','24h'), equivalent to snooze duration in UI, defaults to 'Off' + notify: + $ref: '#/definitions/notify_properties' + description: Notification configuration + annotations: type: object - description: to define the properties of output - properties: - table: - type: string - description: Name of the output table. If not specified, the model name is used. - materialize: - type: boolean - description: Whether to materialize the model as a table or view - connector: - type: string - description: Refers to the connector type for the output table. Can be `clickhouse` or `duckdb` and their named connector - incremental_strategy: - type: string - enum: - - append - - merge - - partition_overwrite - description: Strategy to use for incremental updates. Can be 'append', 'merge' or 'partition_overwrite' - unique_key: - type: array - items: - type: string - description: List of columns that uniquely identify a row for merge strategy - partition_by: - type: string - description: Column or expression to partition the table by - allOf: - - if: - title: Additional properties for `output` when `connector` is `clickhouse` - properties: - connector: - const: clickhouse - required: - - connector - then: - properties: - type: - type: string - description: Type to materialize the model into. Can be 'TABLE', 'VIEW' or 'DICTIONARY' - enum: - - TABLE - - VIEW - - DICTIONARY - columns: - type: string - description: Column names and types. Can also include indexes. If unspecified, detected from the query. - engine_full: - type: string - description: Full engine definition in SQL format. Can include partition keys, order, TTL, etc. - engine: - type: string - description: Table engine to use. Default is MergeTree - order_by: - type: string - description: ORDER BY clause. - partition_by: - type: string - description: Partition BY clause. - primary_key: - type: string - description: PRIMARY KEY clause. - sample_by: - type: string - description: SAMPLE BY clause. - ttl: - type: string - description: TTL settings for the table or columns. - table_settings: - type: string - description: Table-specific settings. - query_settings: - type: string - description: Settings used in insert/create table as select queries. - distributed_settings: - type: string - description: Settings for distributed table. - distributed_sharding_key: - type: string - description: Sharding key for distributed table. - dictionary_source_user: - type: string - description: User for accessing the source dictionary table (used if type is DICTIONARY). - dictionary_source_password: - type: string - description: Password for the dictionary source user. + description: Key value pair used for annotations + additionalProperties: + type: string + + required: + - type + - refresh + - data + - notify + - $ref: '#/definitions/common_properties' + + # APIs + apis: + title: API YAML + id: apis + type: object + description: Custom APIs allow you to create endpoints that can be called to retrieve or manipulate data. + allOf: + - title: Properties + type: object + properties: + type: + type: string + const: api + description: Refers to the resource type and must be `api` + openapi: + type: object + description: OpenAPI specification for the API endpoint + properties: + summary: + type: string + description: A brief description of what the API endpoint does + sample: "Get user analytics data" + parameters: + type: array + description: List of parameters that the API endpoint accepts + items: + type: object + additionalProperties: true + request_schema: + type: object + description: JSON schema for the request body (use nested YAML instead of a JSON string) + additionalProperties: true + response_schema: + type: object + description: JSON schema for the response body (use nested YAML instead of a JSON string) + additionalProperties: true + security: + $ref: '#/definitions/security_policy_properties' + description: Security configuration for the API + skip_nested_security: + type: boolean + description: Flag to control security inheritance + sample: false required: - type - - sql - - $ref: '#/definitions/common_properties' - - type: object - allOf: - - if: - title: Additional properties when `connector` is `athena` or [named connector](./connector.md#name) for athena - properties: - connector: - const: athena - required: - - connector - then: - $ref: '#/definitions/model/definitions/athena' - - if: - title: Additional properties when `connector` is `azure` or [named connector](./connector.md#name) of azure - properties: - connector: - const: azure - required: - - connector - then: - $ref: '#/definitions/model/definitions/azure' - - if: - title: Additional properties when `connector` is `bigquery` or [named connector](./connector.md#name) of bigquery - properties: - connector: - const: bigquery - required: - - connector - then: - $ref: '#/definitions/model/definitions/bigquery' - - if: - title: Additional properties when `connector` is `duckdb` or [named connector](./connector.md#name) of duckdb - properties: - connector: - const: duckdb - required: - - connector - then: - $ref: '#/definitions/model/definitions/duckdb' - - if: - title: Additional properties when `connector` is `gcs` or [named connector](./connector.md#name) of gcs - properties: - connector: - const: gcs - required: - - connector - then: - $ref: '#/definitions/model/definitions/gcs' - - if: - title: Additional properties when `connector` is `local_file` or [named connector](./connector.md#name) of local_file - properties: - connector: - const: local_file - required: - - connector - then: - $ref: '#/definitions/model/definitions/local_file' - - if: - title: Additional properties when `connector` is `redshift` or [named connector](./connector.md#name) of redshift - properties: - connector: - const: redshift - required: - - connector - then: - $ref: '#/definitions/model/definitions/redshift' - - if: - title: Additional properties when `connector` is `s3` or [named connector](./connector.md#name) of s3 - properties: - connector: - const: s3 - required: - - connector - then: - $ref: '#/definitions/model/definitions/s3' - - if: - title: Additional properties when `connector` is `salesforce` or [named connector](./connector.md#name) of salesforce - properties: - connector: - const: salesforce - required: - - connector - then: - $ref: '#/definitions/model/definitions/salesforce' - definitions: - athena: + - $ref: '#/definitions/api_data_properties' + # - $ref: '#/definitions/common_properties' + + # Themes + themes: + title: Theme YAML + id: themes + type: object + description: | + In your Rill project directory, create a `.yaml` file in any directory containing `type: theme`. Rill will automatically ingest the theme next time you run `rill start` or deploy to Rill Cloud. + + To apply that theme to a dashboard, add `default_theme: ` to the yaml file for that dashboard. Alternatively, you can add this to the end of the URL in your browser: `?theme=` + examples: | + ```yaml + # Example: You can copy this directly into your .yaml file + type: theme + + colors: + primary: plum + secondary: violet + ``` + allOf: + - title: Properties type: object properties: - output_location: - type: string - description: Output location for query results in S3. - workgroup: - type: string - description: AWS Athena workgroup to use for queries. - region: + type: type: string - description: AWS region to connect to Athena and the output location. - azure: + const: theme + description: Refers to the resource type and must be `theme` + colors: + type: object + description: Color palette for the theme + properties: + primary: + type: string + description: Primary color + secondary: + type: string + description: Secondary color + additionalProperties: true + + required: + - type + - display_name + - $ref: '#/definitions/common_properties' + + # Rill YAML + rillyaml: + title: Project YAML + id: rillyaml + type: object + description: The `rill.yaml` file contains metadata about your project. + allOf: + - title: Properties type: object properties: - path: + compiler: type: string - description: Path to the source - account: + description: Specifies the parser version to use for compiling resources + display_name: type: string - description: Account identifier - uri: + description: The display name of the project, shown in the upper-left corner of the UI + description: type: string - description: Source URI - extract: - type: object - description: Arbitrary key-value pairs for extraction settings - additionalProperties: true - glob: + description: A brief description of the project + features: type: object - description: Settings related to glob file matching. - properties: - max_total_size: - type: integer - description: Maximum total size (in bytes) matched by glob - max_objects_matched: - type: integer - description: Maximum number of objects matched by glob - max_objects_listed: - type: integer - description: Maximum number of objects listed in glob - page_size: - type: integer - description: Page size for glob listing - batch_size: + description: Optional feature flags. Can be specified as a map of feature names to booleans. + ai_instructions: type: string - description: 'Size of a batch (e.g., ''100MB'')' - bigquery: + description: Extra instructions for [AI agents](/explore/mcp). Used to guide natural language question answering and routing. + - title: Configuring the default OLAP Engine + description: | + Rill allows you to specify the default OLAP engine to use in your project via `rill.yaml`. + :::info Curious about OLAP Engines? + Please see our reference documentation on [OLAP Engines](/connect/olap). + ::: type: object properties: - project_id: + olap_connector: type: string - description: ID of the BigQuery project. - duckdb: + description: Specifies the [default OLAP engine](/connect/olap) for the project. Defaults to duckdb if not set. + examples: + - olap_connector: clickhouse + - title: Project-wide defaults type: object + description: | + In `rill.yaml`, project-wide defaults can be specified for a resource type within a project. Unless otherwise specified, _individual resources will inherit any defaults_ that have been specified in `rill.yaml`. For available properties that can be configured, please refer to the YAML specification for each individual resource type - [model](advanced-models.md), [metrics_view](metrics-views.md), and [explore](explore-dashboards.md) + + :::note Use plurals when specifying project-wide defaults + In your `rill.yaml`, the top level property for the resource type needs to be **plural**, such as `models`, `metrics_views` and `explores`. + ::: + + :::info Hierarchy of inheritance and property overrides + As a general rule of thumb, properties that have been specified at a more _granular_ level will supercede or override higher level properties that have been inherited. Therefore, in order of inheritance, Rill will prioritize properties in the following order: + 1. Individual [models](advanced-models.md)/[metrics_views](metrics-views.md)/[explore](explore-dashboards.md) object level properties (e.g. `advanced-models.yaml` or `explore-dashboards.yaml`) + 2. [Environment](/docs/build/models/environments.md) level properties (e.g. a specific property that have been set for `dev`) + 3. [Project-wide defaults](#project-wide-defaults) for a specific property and resource type + ::: properties: - path: - type: string - description: Path to the data source. - format: - type: string - description: 'Format of the data source (e.g., csv, json, parquet).' - pre_exec: - type: string - description: 'refers to SQL queries to run before the main query, available for DuckDB-based models. _(optional)_. Ensure `pre_exec` queries are idempotent. Use `IF NOT EXISTS` statements when applicable.' - post_exec: - type: string - description: 'refers to a SQL query that is run after the main query, available for DuckDB-based models. _(optional)_. Ensure `post_exec` queries are idempotent. Use `IF EXISTS` statements when applicable.' + models: + type: object + description: Defines project-wide default settings for models. Unless overridden, individual models will inherit these defaults. + metrics_views: + type: object + description: Defines project-wide default settings for metrics_views. Unless overridden, individual metrics_views will inherit these defaults. + explores: + type: object + description: Defines project-wide default settings for explores. Unless overridden, individual explores will inherit these defaults. examples: - - pre_exec: ATTACH IF NOT EXISTS 'dbname=postgres host=localhost port=5432 user=postgres password=postgres' AS postgres_db (TYPE POSTGRES); - sql: SELECT * FROM postgres_query('postgres_db', 'SELECT * FROM USERS') - post_exec: DETACH DATABASE IF EXISTS postgres_db - gcs: + - # For example, the following YAML configuration below will set a project-wide default for: + # Models - Configure a [source refresh](/build/connect/source-refresh.md). + # Metrics View - Set the [first day of the week](metrics-view.md) for timeseries aggregations to be Sunday along with setting the smallest_time_grain. + # Explore Dashboards - Set the [default](explore-dashboards.md) values when a user opens a dashboard, and available time zones and/or time ranges. + models: + refresh: + cron: '0 * * * *' + + metrics_views: + first_day_of_week: 1 + smallest_time_grain: month + + explores: + defaults: + time_range: P24M + time_zones: + - America/Denver + - UTC + - America/Los_Angeles + - America/Chicago + - America/New_York + - Europe/London + - Europe/Paris + - Asia/Jerusalem + - Europe/Moscow + - Asia/Kolkata + - Asia/Shanghai + - Asia/Tokyo + - Australia/Sydney + time_ranges: + - PT24H + - P7D + - P14D + - P30D + - P3M + - P6M + - P12M + - title: Setting variables + description: | + Primarily useful for [templating](/connect/templating.md), variables can be set in the `rill.yaml` file directly. This allows variables to be set for your projects deployed to Rill Cloud while still being able to use different variable values locally if you prefer. + :::info Overriding variables locally + Variables also follow an order of precedence and can be overridden locally. By default, any variables defined will be inherited from `rill.yaml`. However, if you manually pass in a variable when starting Rill Developer locally via the CLI, this value will be used instead for the current instance of your running project: + ```bash + rill start --env numeric_var=100 --env string_var="different_value" + ``` + ::: + :::tip Setting variables through `.env` + Variables can also be set through your project's `/.env` file (or using the `rill env set` CLI command), such as: + ```bash + variable=xyz + ``` + Similar to how [connector credentials can be pushed / pulled](/connect/credentials#pulling-credentials-and-variables-from-a-deployed-project-on-rill-cloud) from local to cloud or vice versa, project variables set locally in Rill Developer can be pushed to Rill Cloud and/or pulled back to your local instance from your deployed project by using the `rill env push` and `rill env pull` commands respectively. + ::: type: object properties: - path: - type: string - description: Path to the source - uri: - type: string - description: Source URI - extract: - type: object - description: key-value pairs for extraction settings - additionalProperties: true - glob: + env: type: object - description: Settings related to glob file matching. - properties: - max_total_size: - type: integer - description: Maximum total size (in bytes) matched by glob - max_objects_matched: - type: integer - description: Maximum number of objects matched by glob - max_objects_listed: - type: integer - description: Maximum number of objects listed in glob - page_size: - type: integer - description: Page size for glob listing - batch_size: - type: string - description: 'Size of a batch (e.g., ''100MB'')' - local_file: + description: "To define a variable in `rill.yaml`, pass in the appropriate key-value pair for the variable under the `env` key" + examples: + - env: + numeric_var: 10 + string_var: "string_value" + - title: Managing Paths in Rill + description: | + The public_paths and ignore_paths properties in the rill.yaml file provide control over which files and directories are processed or exposed by Rill. The public_paths property defines a list of file or directory paths to expose over HTTP. By default, it includes ['./public']. The ignore_paths property specifies a list of files or directories that Rill excludes during ingestion and parsing. This prevents unnecessary or incompatible content from affecting the project. + :::tip + Don't forget the leading `/` when specifying the path for `ignore_paths` and this path is also assuming the relative path from your project root. + ::: type: object properties: - path: - type: string - description: Path to the data source. - format: - type: string - description: 'Format of the data source (e.g., csv, json, parquet).' - redshift: + public_paths: + type: array + description: List of file or directory paths to expose over HTTP. Defaults to ['./public'] + items: + type: string + ignore_paths: + type: array + description: A list of file or directory paths to exclude from parsing. Useful for ignoring extraneous or non-Rill files in the project + examples: + - ignore_paths: + - /path/to/ignore + - /file_to_ignore.yaml + items: + type: string + - title: Testing access policies + description: | + During development, it is always a good idea to check if your [access policies](/manage/security.md) are behaving the way you designed them to before pushing these changes into production. You can set mock users which enables a drop down in the dashboard preview to view as a specific user. + :::info The View as selector is not visible in my dashboard, why? + This feature is _only_ enabled when you have set a security policy on the dashboard. By default, the dashboard and it's contents is viewable by every user. + ::: type: object properties: - output_location: - type: string - description: S3 location where query results are stored. - workgroup: - type: string - description: Redshift Serverless workgroup to use. - database: - type: string - description: Name of the Redshift database. - cluster_identifier: - type: string - description: Identifier of the Redshift cluster. - role_arn: - type: string - description: ARN of the IAM role to assume for Redshift access. - region: - type: string - description: AWS region of the Redshift deployment. - s3: + mock_users: + type: array + description: A list of mock users used to test dashboard security policies within the project + examples: + - mock_users: + - email: john@yourcompany.com + name: John Doe + admin: true + - email: jane@partnercompany.com + groups: + - partners + - email: anon@unknown.com + - email: embed@rilldata.com + name: embed + custom_variable_1: Value_1 + custom_variable_2: Value_2 + + items: + type: object + properties: + email: + type: string + description: The email address of the mock user. This field is required + name: + type: string + description: The name of the mock user. + admin: + type: boolean + description: Indicates whether the mock user has administrative privileges + groups: + type: array + description: An array of group names that the mock user is a member of + items: + type: string + required: + - email + - $ref: '#/definitions/common_properties' + + # Common Properties + common_properties: + type: object + title: "Common Properties" + properties: + name: + type: string + description: Name is usually inferred from the filename, but can be specified manually. + refs: + type: array + description: 'List of resource references' + items: + type: string + description: A string reference like `` or ``. + dev: + type: object + description: Overrides any properties in development environment. + prod: + type: object + description: Overrides any properties in production environment. + + # Reusable Properties + theme_properties: + type: object + properties: + colors: type: object + description: Used to override the dashboard colors. Either primary or secondary color must be provided. properties: - region: + primary: type: string - description: AWS region - endpoint: - type: string - description: AWS Endpoint - path: + description: Overrides the primary blue color in the dashboard. Can have any hex (without the '#' character), [named colors](https://www.w3.org/TR/css-color-4/#named-colors) or hsl() formats. Note that the hue of the input colors is used for variants but the saturation and lightness is copied over from the [blue color palette](https://tailwindcss.com/docs/customizing-colors). + secondary: type: string - description: Path to the source - uri: + description: Overrides the secondary color in the dashboard. Applies to the loading spinner only as of now. Can have any hex (without the '#' character), [named colors](https://www.w3.org/TR/css-color-4/#named-colors) or hsl() formats. + anyOf: + - required: + - primary + - required: + - secondary + api_data_properties: + oneOf: + - title: SQL Query + type: object + description: Executes a raw SQL query against the project's data models. + + properties: + sql: type: string - description: Source URI - extract: - type: object - description: key-value pairs for extraction settings - additionalProperties: true - glob: - type: object - description: Settings related to glob file matching. - properties: - max_total_size: - type: integer - description: Maximum total size (in bytes) matched by glob - max_objects_matched: - type: integer - description: Maximum number of objects matched by glob - max_objects_listed: - type: integer - description: Maximum number of objects listed in glob - page_size: - type: integer - description: Page size for glob listing - batch_size: + description: Raw SQL query to run against existing models in the project. + connector: type: string - description: 'Size of a batch (e.g., ''100MB'')' - salesforce: + description: specifies the connector to use when running SQL or glob queries. + required: + - sql + examples: | + ```yaml + type: api + sql: "SELECT * FROM table_name WHERE date >= '2024-01-01'" + ``` + - title: Metrics View Query type: object + description: Executes a SQL query that targets a defined metrics view. properties: - soql: - type: string - description: SOQL query to execute against the Salesforce instance. - sobject: + metrics_sql: type: string - description: Salesforce object (e.g., Account, Contact) targeted by the query. - queryAll: - type: boolean - description: Whether to include deleted and archived records in the query (uses queryAll API). - - theme: - type: object - title: Theme YAML - description: | - In your Rill project directory, create a `.yaml` file in any directory containing `type: theme`. Rill will automatically ingest the theme next time you run `rill start` or deploy to Rill Cloud. + description: SQL query that targets a metrics view in the project + required: + - metrics_sql + examples: | + ```yaml + type: api - To apply that theme to a dashboard, add `default_theme: ` to the yaml file for that dashboard. Alternatively, you can add this to the end of the URL in your browser: `?theme=` - examples: - - # Example: You can copy this directly into your .yaml file - type: theme - colors: - primary: plum - secondary: violet - allOf: - - title: Properties + metrics_sql: "SELECT * FROM user_metrics WHERE date >= '2024-01-01'" + ``` + - title: Custom API Call type: object + description: Calls a custom API defined in the project to compute data. properties: - type: + api: + type: string + description: Name of a custom API defined in the project. + args: + type: object + description: Arguments to pass to the custom API. + additionalProperties: true + required: + - api + examples: | + ```yaml + type: api + api: "user_analytics_api" + args: + start_date: "2024-01-01" + limit: 10 + ``` + - title: File Glob Query + type: object + description: Uses a file-matching pattern (glob) to query data from a connector. + properties: + glob: + description: Defines the file path or pattern to query from the specified connector. + anyOf: + - type: string + description: A simple file path/glob pattern as a string. + sample: "data/*.csv" + - type: object + description: An object-based configuration for specifying a file path/glob pattern with advanced options. + additionalProperties: true + connector: type: string - const: theme - description: Refers to the resource type and must be `theme` - required: - - type - - $ref: '#/definitions/theme/definitions/theme_properties' + description: Specifies the connector to use with the glob input. required: - - colors - - $ref: '#/definitions/common_properties' - definitions: - theme_properties: + - glob + examples: | + ```yaml + type: api + + glob: "data/*.csv" + ``` + - title: Resource Status Check type: object + description: Uses the status of a resource as data. properties: - colors: + resource_status: type: object - description: Used to override the dashboard colors. Either primary or secondary color must be provided. + description: Based on resource status properties: - primary: - type: string - description: Overrides the primary blue color in the dashboard. Can have any hex (without the '#' character), [named colors](https://www.w3.org/TR/css-color-4/#named-colors) or hsl() formats. Note that the hue of the input colors is used for variants but the saturation and lightness is copied over from the [blue color palette](https://tailwindcss.com/docs/customizing-colors). - secondary: - type: string - description: Overrides the secondary color in the dashboard. Applies to the loading spinner only as of now. Can have any hex (without the '#' character), [named colors](https://www.w3.org/TR/css-color-4/#named-colors) or hsl() formats. - anyOf: - - required: - - primary - - required: - - secondary - common_properties: - type: object - title: "Common Properties" - properties: - name: - type: string - description: Name is usually inferred from the filename, but can be specified manually. - refs: - type: array - description: 'List of resource references' - items: - type: string - description: A string reference like `` or ``. - dev: - type: object - description: Overrides any properties in development environment. - prod: - type: object - description: Overrides any properties in production environment. - component_variable_properties: - type: object - properties: - name: - type: string - description: Unique identifier for the variable - type: - type: string - description: Data type of the variable (e.g., string, number, boolean) - value: - description: Default value for the variable. Can be any valid JSON value type - type: - - string - - number - - boolean - - object - - array - required: - - name - - type - additionalProperties: false + where_error: + type: boolean + description: Indicates whether the condition should trigger when the resource is in an error state. + sample: true + additionalProperties: true + required: + - resource_status + examples: | + ```yaml + type: api + resource_status: + where_error: true + ``` data_properties: oneOf: - title: SQL Query @@ -1914,10 +1660,11 @@ definitions: description: Uses a file-matching pattern (glob) to query data from a connector. properties: glob: - description: Defines the file path or pattern to query from the specified connector. + description: Defines the file path or pattern to query from the specified connector. anyOf: - type: string description: A simple file path/glob pattern as a string. + sample: "data/*.csv" - type: object description: An object-based configuration for specifying a file path/glob pattern with advanced options. additionalProperties: true @@ -1937,9 +1684,15 @@ definitions: where_error: type: boolean description: Indicates whether the condition should trigger when the resource is in an error state. + sample: true additionalProperties: true required: - resource_status + examples: | + ```yaml + resource_status: + where_error: true + ``` explore_time_range_properties: oneOf: - type: string @@ -2039,7 +1792,7 @@ definitions: description: 'If true, allows the schedule to run in development mode.' security_policy_properties: type: object - description: Defines security rules and access control policies for resources + description: Defines [security rules and access control policies](/manage/security) for resources properties: access: oneOf: @@ -2133,16 +1886,72 @@ definitions: description: SQL expression for row filtering (for row_filter type rules) required: - type - field_selector_properties: + dashboard_security_policy_properties: + type: object + description: Defines [security rules and access control policies](/manage/security) for dashboards (without row filtering) + properties: + access: + oneOf: + - type: string + description: SQL expression that evaluates to a boolean to determine access + - type: boolean + description: Direct boolean value to allow or deny access + description: Expression indicating if the user should be granted access to the dashboard. If not defined, it will resolve to false and the dashboard won't be accessible to anyone. Needs to be a valid SQL expression that evaluates to a boolean. + field_selectors_properties: oneOf: - type: string + description: 'Simple field name as a string.' + - type: array + description: 'List of field selectors, each can be a string or an object with detailed configuration.' + items: + oneOf: + - type: string + description: 'Shorthand field selector, interpreted as the name.' + - type: object + description: 'Detailed field selector configuration with name and optional time grain.' + properties: + name: + type: string + description: 'Name of the field to select.' + time_grain: + type: string + description: 'Time grain for time-based dimensions.' + enum: + - '' + - ms + - millisecond + - s + - second + - min + - minute + - h + - hour + - d + - day + - w + - week + - month + - q + - quarter + - 'y' + - year + required: + - name + additionalProperties: false + minItems: 1 + field_selector_properties: + oneOf: + - title: Wildcard(*) selector + type: string const: '*' description: Wildcard(*) selector that includes all available fields in the selection - - type: array + - title: Explicit list of fields + type: array items: type: string description: Explicit list of fields to include in the selection - - type: object + - title: Regex matching + type: object description: 'Advanced matching using regex, DuckDB expression, or exclusion' properties: regex: @@ -2162,3 +1971,687 @@ definitions: - expr - required: - exclude + + # Connector definitions + athena: + type: object + title: Athena + properties: + driver: + type: string + description: Refers to the driver type and must be driver `athena` + const: athena + aws_access_key_id: + type: string + description: AWS Access Key ID used for authentication. Required when using static credentials directly or as base credentials for assuming a role. + sample: "AKIAIOSFODNN7EXAMPLE" + aws_secret_access_key: + type: string + description: AWS Secret Access Key paired with the Access Key ID. Required when using static credentials directly or as base credentials for assuming a role. + sample: "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY" + aws_access_token: + type: string + description: AWS session token used with temporary credentials. Required only if the Access Key and Secret Key are part of a temporary session credentials. + sample: "AKIAIOSFODNN7EXAMPLE" + role_arn: + type: string + description: ARN of the IAM role to assume. When specified, the SDK uses the base credentials to call STS AssumeRole and obtain temporary credentials scoped to this role. + sample: "arn:aws:iam::123456789012:role/MyRole" + role_session_name: + type: string + description: Session name to associate with the STS AssumeRole session. Used only if 'role_arn' is specified. Useful for identifying and auditing the session. + sample: "MySession" + external_id: + type: string + description: External ID required by some roles when assuming them, typically for cross-account access. Used only if 'role_arn' is specified and the role's trust policy requires it. + sample: "MyExternalID" + workgroup: + type: string + description: Athena workgroup to use for query execution. Defaults to 'primary' if not specified. + sample: "primary" + output_location: + type: string + description: S3 URI where Athena query results should be stored (e.g., s3://your-bucket/athena/results/). Optional if the selected workgroup has a default result configuration. + sample: "s3://my-bucket/athena-output/" + aws_region: + type: string + description: AWS region where Athena and the result S3 bucket are located (e.g., us-east-1). Defaults to 'us-east-1' if not specified. + sample: "us-east-1" + allow_host_access: + type: boolean + description: Allow the Athena client to access host environment configurations such as environment variables or local AWS credential files. Defaults to true, enabling use of credentials and settings from the host environment unless explicitly disabled. + sample: true + required: + - driver + azure: + type: object + title: Azure + properties: + driver: + type: string + description: Refers to the driver type and must be driver `azure` + const: azure + azure_storage_account: + type: string + description: Azure storage account name + sample: "mystorageaccount" + azure_storage_key: + type: string + description: Azure storage access key + sample: "myaccesskey" + azure_storage_bucket: + type: string + description: Name of the Azure Blob Storage container (equivalent to an S3 bucket) + sample: "my-container" + azure_storage_sas_token: + type: string + description: Optional azure SAS token for authentication + sample: "my-sas-token" + azure_storage_connection_string: + type: string + description: Optional azure connection string for storage account + sample: "DefaultEndpointsProtocol=https;AccountName=mystorageaccount;AccountKey=myaccesskey;EndpointSuffix=core.windows.net" + allow_host_access: + type: boolean + description: Allow access to host environment configuratio + sample: true + required: + - driver + - azure_storage_bucket + bigquery: + type: object + title: BigQuery + properties: + driver: + type: string + description: Refers to the driver type and must be driver `bigquery` + const: bigquery + google_application_credentials: + type: string + description: Raw contents of the Google Cloud service account key (in JSON format) used for authentication. + sample: "{\"type\": \"service_account\", \"project_id\": \"my-gcp-project\"}" + project_id: + type: string + description: Google Cloud project ID + sample: "my-gcp-project" + dataset_id: + type: string + description: BigQuery dataset ID + sample: "my_dataset" + location: + type: string + description: BigQuery dataset location + sample: "US" + allow_host_access: + type: boolean + description: Enable the BigQuery client to use credentials from the host environment when no service account JSON is provided. This includes Application Default Credentials from environment variables, local credential files, or the Google Compute Engine metadata server. Defaults to true, allowing seamless authentication in GCP environments. + sample: true + required: + - driver + clickhouse: + type: object + title: ClickHouse + properties: + driver: + type: string + description: Refers to the driver type and must be driver `clickhouse` + const: clickhouse + managed: + type: boolean + description: '`true` means Rill will provision the connector using the default provisioner. `false` disables automatic provisioning.' + sample: true + mode: + type: string + description: "`read` - Controls the operation mode for the ClickHouse connection. Defaults to 'read' for safe operation with external databases. Set to 'readwrite' to enable model creation and table mutations. Note: When 'managed: true', this is automatically set to 'readwrite'." + sample: "readwrite" + dsn: + type: string + description: DSN(Data Source Name) for the ClickHouse connection + sample: "clickhouse://localhost:9000/default" + username: + type: string + description: Username for authentication + sample: "default" + password: + type: string + description: Password for authentication + sample: "mypassword" + host: + type: string + description: Host where the ClickHouse instance is running + sample: "localhost" + port: + type: integer + description: Port where the ClickHouse instance is accessible + sample: 9000 + database: + type: string + description: Name of the ClickHouse database within the cluster + sample: "default" + ssl: + type: boolean + description: Indicates whether a secured SSL connection is required + sample: true + cluster: + type: string + description: 'Cluster name, required for running distributed queries' + sample: "my-cluster" + log_queries: + type: boolean + description: Controls whether to log raw SQL queries + sample: true + settings_override: + type: string + description: override the default settings used in queries. example `readonly = 1, session_timezone = 'UTC'` + sample: "readonly = 1, session_timezone = 'UTC'" + embed_port: + type: integer + description: Port to run ClickHouse locally (0 for random port) + sample: 0 + can_scale_to_zero: + type: boolean + description: Indicates if the database can scale to zero + sample: true + max_open_conns: + type: integer + description: Maximum number of open connections to the database + sample: 10 + max_idle_conns: + type: integer + description: Maximum number of idle connections in the pool + sample: 10 + dial_timeout: + type: string + description: Timeout for dialing the ClickHouse server + sample: "10s" + conn_max_lifetime: + type: string + description: Maximum time a connection may be reused + sample: "10s" + read_timeout: + type: string + description: Maximum time for a connection to read data + sample: "10s" + required: + - driver + druid: + type: object + title: Druid + properties: + driver: + type: string + description: Refers to the driver type and must be driver `druid` + const: druid + dsn: + type: string + description: Data Source Name (DSN) for connecting to Druid + sample: "http://localhost:8082" + username: + type: string + description: Username for authenticating with Druid + sample: "admin" + password: + type: string + description: Password for authenticating with Druid + sample: "admin123" + host: + type: string + description: Hostname of the Druid coordinator or broker + sample: "localhost" + port: + type: integer + description: Port number of the Druid service + sample: 8082 + ssl: + type: boolean + description: Enable SSL for secure connection + sample: true + log_queries: + type: boolean + description: Log raw SQL queries sent to Druid + sample: true + max_open_conns: + type: integer + description: Maximum number of open database connections (0 = default, -1 = unlimited) + sample: 10 + skip_version_check: + type: boolean + description: Skip checking Druid version compatibility + sample: true + required: + - driver + - dsn + duckdb: + type: object + title: DuckDB + properties: + driver: + type: string + description: Refers to the driver type and must be driver `duckdb` + const: duckdb + pool_size: + type: integer + description: Number of concurrent connections and queries allowed + sample: 10 + allow_host_access: + type: boolean + description: Whether access to the local environment and file system is allowed + sample: true + cpu: + type: integer + description: Number of CPU cores available to the database + sample: 10 + memory_limit_gb: + type: integer + description: Amount of memory in GB available to the database + sample: 10 + read_write_ratio: + type: number + description: Ratio of resources allocated to the read database; used to divide CPU and memory + sample: 0.5 + init_sql: + type: string + description: is executed during database initialization. + sample: "CREATE TABLE IF NOT EXISTS users (id INTEGER PRIMARY KEY, name TEXT, email TEXT)" + secrets: + type: string + description: Comma-separated list of other connector names to create temporary secrets for in DuckDB before executing a model. + sample: "gcs,s3" + log_queries: + type: boolean + description: Whether to log raw SQL queries executed through OLAP + sample: true + required: + - driver + gcs: + type: object + title: GCS + properties: + driver: + type: string + description: Refers to the driver type and must be driver `gcs` + const: gcs + google_application_credentials: + type: string + description: Google Cloud credentials JSON string + sample: "{\"type\": \"service_account\", \"project_id\": \"my-project\"}" + bucket: + type: string + description: Name of gcs bucket + sample: "my-gcs-bucket" + allow_host_access: + type: boolean + description: Allow access to host environment configuration + sample: true + key_id: + type: string + description: Optional S3-compatible Key ID when used in compatibility mode + sample: "AKIAIOSFODNN7EXAMPLE" + secret: + type: string + description: Optional S3-compatible Secret when used in compatibility mode + sample: "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY" + required: + - driver + - bucket + https: + type: object + title: HTTPS + properties: + driver: + type: string + description: Refers to the driver type and must be driver `https` + const: https + path: + type: string + description: The full HTTPS URI to fetch data from + sample: "https://api.example.com/data.csv" + headers: + type: object + description: HTTP headers to include in the request + sample: "{\"Authorization\": \"Bearer my-token\"}" + additionalProperties: + type: string + required: + - driver + - path + motherduck: + type: object + title: MotherDuck + properties: + driver: + type: string + description: Refers to the driver type and must be driver `duckdb` + const: duckdb + path: + type: string + description: Path to your MD database + sample: "md:my_database" + init_sql: + type: string + description: SQL executed during database initialization. + sample: "INSTALL 'motherduck';\nLOAD 'motherduck';\nSET motherduck_token= '{{ .env.motherduck_token }}'" + required: + - driver + - path + - init_sql + mysql: + type: object + title: MySQL + properties: + driver: + type: string + description: Refers to the driver type and must be driver `mysql` + const: mysql + dsn: + type: string + description: DSN(Data Source Name) for the mysql connection + sample: "mysql://user:password@localhost:3306/mydatabase" + host: + type: string + description: Hostname of the MySQL server + sample: "localhost" + port: + type: integer + description: Port number for the MySQL server + sample: 3306 + database: + type: string + description: Name of the MySQL database + sample: "mydatabase" + user: + type: string + description: Username for authentication + sample: "myuser" + password: + type: string + description: Password for authentication + sample: "mypassword" + ssl_mode: + type: string + description: SSL mode can be DISABLED, PREFERRED or REQUIRED + sample: "PREFERRED" + required: + - driver + pinot: + type: object + title: Pinot + properties: + driver: + type: string + description: Refers to the driver type and must be driver `pinot` + const: pinot + dsn: + type: string + description: DSN(Data Source Name) for the Pinot connection + sample: "pinot://localhost:8099" + username: + type: string + description: Username for authenticating with Pinot + sample: "admin" + password: + type: string + description: Password for authenticating with Pinot + sample: "admin123" + broker_host: + type: string + description: Hostname of the Pinot broker + sample: "localhost" + broker_port: + type: integer + description: Port number for the Pinot broker + sample: 8099 + controller_host: + type: string + description: Hostname of the Pinot controller + sample: "localhost" + controller_port: + type: integer + description: Port number for the Pinot controller + sample: 9000 + ssl: + type: boolean + description: Enable SSL connection to Pinot + sample: true + log_queries: + type: boolean + description: Log raw SQL queries executed through Pinot + sample: true + max_open_conns: + type: integer + description: Maximum number of open connections to the Pinot database + sample: 10 + required: + - driver + - dsn + - broker_host + - controller_host + postgres: + type: object + title: Postgres + properties: + driver: + type: string + description: Refers to the driver type and must be driver `postgres` + const: postgres + dsn: + type: string + description: DSN(Data Source Name) for the postgres connection + sample: "postgresql://user:password@localhost:5432/mydatabase" + host: + type: string + description: Hostname of the Postgres server + sample: "localhost" + port: + type: string + description: Port number for the Postgres server + sample: "5432" + dbname: + type: string + description: Name of the Postgres database + sample: "mydatabase" + user: + type: string + description: Username for authentication + sample: "postgres" + password: + type: string + description: Password for authentication + sample: "mypassword" + sslmode: + type: string + description: SSL mode can be disable, allow, prefer or require + sample: "prefer" + required: + - driver + redshift: + type: object + title: Redshift + properties: + driver: + type: string + description: Refers to the driver type and must be driver `redshift` + const: redshift + aws_access_key_id: + type: string + description: AWS Access Key ID used for authenticating with Redshift. + sample: "AKIAIOSFODNN7EXAMPLE" + aws_secret_access_key: + type: string + description: AWS Secret Access Key used for authenticating with Redshift. + sample: "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY" + aws_access_token: + type: string + description: AWS Session Token for temporary credentials (optional). + sample: "AKIAIOSFODNN7EXAMPLE" + region: + type: string + description: AWS region where the Redshift cluster or workgroup is hosted (e.g., 'us-east-1'). + sample: "us-east-1" + database: + type: string + description: Name of the Redshift database to query. + sample: "myredshiftdb" + workgroup: + type: string + description: Workgroup name for Redshift Serverless, in case of provisioned Redshift clusters use 'cluster_identifier'. + sample: "my-workgroup" + cluster_identifier: + type: string + description: Cluster identifier for provisioned Redshift clusters, in case of Redshift Serverless use 'workgroup' . + sample: "my-cluster" + required: + - driver + - aws_access_key_id + - aws_secret_access_key + - database + s3: + type: object + title: S3 + properties: + driver: + type: string + description: Refers to the driver type and must be driver `s3` + const: s3 + aws_access_key_id: + type: string + description: AWS Access Key ID used for authentication + sample: "AKIAIOSFODNN7EXAMPLE" + aws_secret_access_key: + type: string + description: AWS Secret Access Key used for authentication + sample: "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY" + aws_access_token: + type: string + description: Optional AWS session token for temporary credentials + sample: "AKIAIOSFODNN7EXAMPLE" + bucket: + type: string + description: Name of s3 bucket + sample: "my-s3-bucket" + endpoint: + type: string + description: Optional custom endpoint URL for S3-compatible storage + sample: "https://s3.amazonaws.com" + region: + type: string + description: AWS region of the S3 bucket + sample: "us-east-1" + allow_host_access: + type: boolean + description: Allow access to host environment configuration + sample: true + retain_files: + type: boolean + description: Whether to retain intermediate files after processing + sample: true + required: + - driver + - bucket + salesforce: + type: object + title: Salesforce + properties: + driver: + type: string + description: Refers to the driver type and must be driver `salesforce` + const: salesforce + username: + type: string + description: Salesforce account username + sample: "user@example.com" + password: + type: string + description: Salesforce account password (secret) + sample: "mypassword" + key: + type: string + description: Authentication key for Salesforce (secret) + sample: "mysecretkey" + endpoint: + type: string + description: Salesforce API endpoint URL + sample: "https://login.salesforce.com" + client_id: + type: string + description: Client ID used for Salesforce OAuth authentication + sample: "myclientid" + required: + - driver + - username + - endpoint + slack: + type: object + title: Slack + properties: + driver: + type: string + description: Refers to the driver type and must be driver `slack` + const: slack + bot_token: + type: string + description: Bot token used for authenticating Slack API requests + sample: "xoxb-your-bot-token" + required: + - driver + - bot_token + snowflake: + type: object + title: Snowflake + properties: + driver: + type: string + description: Refers to the driver type and must be driver `snowflake` + const: snowflake + dsn: + type: string + description: DSN (Data Source Name) for the Snowflake connection + sample: "user:password@account/database/schema?warehouse=warehouse" + parallel_fetch_limit: + type: integer + description: Maximum number of concurrent fetches during query execution + sample: 10 + required: + - driver + - dsn + sqlite: + type: object + title: SQLite + properties: + driver: + type: string + description: Refers to the driver type and must be driver `sqlite` + const: sqlite + dsn: + type: string + description: DSN(Data Source Name) for the sqlite connection + sample: "file:./mydatabase.db" + required: + - driver + - dsn + - driver + - dsn + + + + # component_variable_properties: + # type: object + # properties: + # name: + # type: string + # description: Unique identifier for the variable + # type: + # type: string + # description: Data type of the variable (e.g., string, number, boolean) + # value: + # description: Default value for the variable. Can be any valid JSON value type + # type: + # - string + # - number + # - boolean + # - object + # - array + # required: + # - name + # - type + # additionalProperties: false