Skip to content

fix: improve logic for existing README validation #325

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Aug 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 45 additions & 11 deletions cmd/readmevalidation/coderresources.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (

var (
supportedResourceTypes = []string{"modules", "templates"}
operatingSystems = []string{"windows", "macos", "linux"}

// TODO: This is a holdover from the validation logic used by the Coder Modules repo. It gives us some assurance, but
// realistically, we probably want to parse any Terraform code snippets, and make some deeper guarantees about how it's
Expand All @@ -25,11 +26,21 @@ var (
)

type coderResourceFrontmatter struct {
Description string `yaml:"description"`
IconURL string `yaml:"icon"`
DisplayName *string `yaml:"display_name"`
Verified *bool `yaml:"verified"`
Tags []string `yaml:"tags"`
Description string `yaml:"description"`
IconURL string `yaml:"icon"`
DisplayName *string `yaml:"display_name"`
Verified *bool `yaml:"verified"`
Tags []string `yaml:"tags"`
OperatingSystems []string `yaml:"supported_os"`
}

// A slice version of the struct tags from coderResourceFrontmatter. Might be worth using reflection to generate this
// list at runtime in the future, but this should be okay for now
var supportedCoderResourceStructKeys = []string{
"description", "icon", "display_name", "verified", "tags", "supported_os",
// TODO: This is an old, officially deprecated key from the archived coder/modules repo. We can remove this once we
// make sure that the Registry Server is no longer checking this field.
"maintainer_github",
}

// coderResourceReadme represents a README describing a Terraform resource used
Expand All @@ -42,6 +53,17 @@ type coderResourceReadme struct {
frontmatter coderResourceFrontmatter
}

func validateSupportedOperatingSystems(systems []string) []error {
var errs []error
for _, s := range systems {
if slices.Contains(operatingSystems, s) {
continue
}
errs = append(errs, xerrors.Errorf("detected unknown operating system %q", s))
}
return errs
}

func validateCoderResourceDisplayName(displayName *string) error {
if displayName != nil && *displayName == "" {
return xerrors.New("if defined, display_name must not be empty string")
Expand Down Expand Up @@ -211,19 +233,31 @@ func validateCoderResourceReadme(rm coderResourceReadme) []error {
for _, err := range validateCoderResourceIconURL(rm.frontmatter.IconURL) {
errs = append(errs, addFilePathToError(rm.filePath, err))
}
for _, err := range validateSupportedOperatingSystems(rm.frontmatter.OperatingSystems) {
errs = append(errs, addFilePathToError(rm.filePath, err))
}

return errs
}

func parseCoderResourceReadme(resourceType string, rm readme) (coderResourceReadme, error) {
func parseCoderResourceReadme(resourceType string, rm readme) (coderResourceReadme, []error) {
fm, body, err := separateFrontmatter(rm.rawText)
if err != nil {
return coderResourceReadme{}, xerrors.Errorf("%q: failed to parse frontmatter: %v", rm.filePath, err)
return coderResourceReadme{}, []error{xerrors.Errorf("%q: failed to parse frontmatter: %v", rm.filePath, err)}
}

keyErrs := validateFrontmatterYamlKeys(fm, supportedCoderResourceStructKeys)
if len(keyErrs) != 0 {
remapped := []error{}
for _, e := range keyErrs {
remapped = append(remapped, addFilePathToError(rm.filePath, e))
}
return coderResourceReadme{}, remapped
}

yml := coderResourceFrontmatter{}
if err := yaml.Unmarshal([]byte(fm), &yml); err != nil {
return coderResourceReadme{}, xerrors.Errorf("%q: failed to parse: %v", rm.filePath, err)
return coderResourceReadme{}, []error{xerrors.Errorf("%q: failed to parse: %v", rm.filePath, err)}
}

return coderResourceReadme{
Expand All @@ -238,9 +272,9 @@ func parseCoderResourceReadmeFiles(resourceType string, rms []readme) (map[strin
resources := map[string]coderResourceReadme{}
var yamlParsingErrs []error
for _, rm := range rms {
p, err := parseCoderResourceReadme(resourceType, rm)
if err != nil {
yamlParsingErrs = append(yamlParsingErrs, err)
p, errs := parseCoderResourceReadme(resourceType, rm)
if len(errs) != 0 {
yamlParsingErrs = append(yamlParsingErrs, errs...)
continue
}

Expand Down
45 changes: 39 additions & 6 deletions cmd/readmevalidation/contributors.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,16 @@ type contributorProfileFrontmatter struct {
Bio string `yaml:"bio"`
ContributorStatus string `yaml:"status"`
AvatarURL *string `yaml:"avatar"`
GithubUsername *string `yaml:"github"`
LinkedinURL *string `yaml:"linkedin"`
WebsiteURL *string `yaml:"website"`
SupportEmail *string `yaml:"support_email"`
}

// A slice version of the struct tags from contributorProfileFrontmatter. Might be worth using reflection to generate
// this list at runtime in the future, but this should be okay for now
var supportedContributorProfileStructKeys = []string{"display_name", "bio", "status", "avatar", "linkedin", "github", "website", "support_email"}

type contributorProfileReadme struct {
frontmatter contributorProfileFrontmatter
namespace string
Expand All @@ -50,6 +55,22 @@ func validateContributorLinkedinURL(linkedinURL *string) error {
return nil
}

func validateGithubUsername(username *string) error {
if username == nil {
return nil
}

name := *username
trimmed := strings.TrimSpace(name)
if trimmed == "" {
return xerrors.New("username must have non-whitespace characters")
}
if name != trimmed {
return xerrors.Errorf("username %q has extra whitespace", trimmed)
}
return nil
}

// validateContributorSupportEmail does best effort validation of a contributors email address. We can't 100% validate
// that this is correct without actually sending an email, especially because some contributors are individual developers
// and we don't want to do that on every single run of the CI pipeline. The best we can do is verify the general structure.
Expand Down Expand Up @@ -153,6 +174,9 @@ func validateContributorReadme(rm contributorProfileReadme) []error {
if err := validateContributorLinkedinURL(rm.frontmatter.LinkedinURL); err != nil {
allErrs = append(allErrs, addFilePathToError(rm.filePath, err))
}
if err := validateGithubUsername(rm.frontmatter.GithubUsername); err != nil {
allErrs = append(allErrs, addFilePathToError(rm.filePath, err))
}
if err := validateContributorWebsite(rm.frontmatter.WebsiteURL); err != nil {
allErrs = append(allErrs, addFilePathToError(rm.filePath, err))
}
Expand All @@ -170,15 +194,24 @@ func validateContributorReadme(rm contributorProfileReadme) []error {
return allErrs
}

func parseContributorProfile(rm readme) (contributorProfileReadme, error) {
func parseContributorProfile(rm readme) (contributorProfileReadme, []error) {
fm, _, err := separateFrontmatter(rm.rawText)
if err != nil {
return contributorProfileReadme{}, xerrors.Errorf("%q: failed to parse frontmatter: %v", rm.filePath, err)
return contributorProfileReadme{}, []error{xerrors.Errorf("%q: failed to parse frontmatter: %v", rm.filePath, err)}
}

keyErrs := validateFrontmatterYamlKeys(fm, supportedContributorProfileStructKeys)
if len(keyErrs) != 0 {
remapped := []error{}
for _, e := range keyErrs {
remapped = append(remapped, addFilePathToError(rm.filePath, e))
}
return contributorProfileReadme{}, remapped
}

yml := contributorProfileFrontmatter{}
if err := yaml.Unmarshal([]byte(fm), &yml); err != nil {
return contributorProfileReadme{}, xerrors.Errorf("%q: failed to parse: %v", rm.filePath, err)
return contributorProfileReadme{}, []error{xerrors.Errorf("%q: failed to parse: %v", rm.filePath, err)}
}

return contributorProfileReadme{
Expand All @@ -192,9 +225,9 @@ func parseContributorFiles(readmeEntries []readme) (map[string]contributorProfil
profilesByNamespace := map[string]contributorProfileReadme{}
yamlParsingErrors := []error{}
for _, rm := range readmeEntries {
p, err := parseContributorProfile(rm)
if err != nil {
yamlParsingErrors = append(yamlParsingErrors, err)
p, errs := parseContributorProfile(rm)
if len(errs) != 0 {
yamlParsingErrors = append(yamlParsingErrors, errs...)
continue
}

Expand Down
27 changes: 26 additions & 1 deletion cmd/readmevalidation/readmefiles.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"bufio"
"fmt"
"regexp"
"slices"
"strings"

"golang.org/x/xerrors"
Expand Down Expand Up @@ -39,7 +40,9 @@ const (

var (
supportedAvatarFileFormats = []string{".png", ".jpeg", ".jpg", ".gif", ".svg"}
// Matches markdown headers, must be at the beginning of a line, such as "# " or "### ".
// Matches markdown headers placed at the beginning of a line (e.g., "# " or "### "). To make the logic for
// validateReadmeBody easier, this pattern deliberately matches on invalid headers (header levels must be in the
// range 1–6 to be valid). The function has checks to see if the level is correct.
readmeHeaderRe = regexp.MustCompile(`^(#+)(\s*)`)
)

Expand Down Expand Up @@ -168,3 +171,25 @@ func validateReadmeBody(body string) []error {

return errs
}

func validateFrontmatterYamlKeys(frontmatter string, allowedKeys []string) []error {
if len(allowedKeys) == 0 {
return []error{xerrors.New("Set of allowed keys is empty")}
}

var key string
var cutOk bool
var line string

var errs []error
lineScanner := bufio.NewScanner(strings.NewReader(frontmatter))
for lineScanner.Scan() {
line = lineScanner.Text()
key, _, cutOk = strings.Cut(line, ":")
if !cutOk || slices.Contains(allowedKeys, key) {
continue
}
errs = append(errs, xerrors.Errorf("detected unknown key %q", key))
}
return errs
}
53 changes: 32 additions & 21 deletions cmd/readmevalidation/repostructure.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,21 @@ import (
"golang.org/x/xerrors"
)

var supportedUserNameSpaceDirectories = append(supportedResourceTypes, ".icons", ".images")
var supportedUserNameSpaceDirectories = append(supportedResourceTypes, ".images")

// validateCoderResourceSubdirectory validates that the structure of a module or template within a namespace follows all
// expected file conventions
func validateCoderResourceSubdirectory(dirPath string) []error {
subDir, err := os.Stat(dirPath)
resourceDir, err := os.Stat(dirPath)
if err != nil {
// It's valid for a specific resource directory not to exist. It's just that if it does exist, it must follow specific rules.
// It's valid for a specific resource directory not to exist. It's just that if it does exist, it must follow
// specific rules.
if !errors.Is(err, os.ErrNotExist) {
return []error{addFilePathToError(dirPath, err)}
}
}

if !subDir.IsDir() {
if !resourceDir.IsDir() {
return []error{xerrors.Errorf("%q: path is not a directory", dirPath)}
}

Expand All @@ -30,10 +33,11 @@ func validateCoderResourceSubdirectory(dirPath string) []error {
return []error{addFilePathToError(dirPath, err)}
}

errs := []error{}
var errs []error
for _, f := range files {
// The .coder subdirectories are sometimes generated as part of Bun tests. These subdirectories will never be
// committed to the repo, but in the off chance that they don't get cleaned up properly, we want to skip over them.
// The .coder subdirectories are sometimes generated as part of our Bun tests. These subdirectories will never
// be committed to the repo, but in the off chance that they don't get cleaned up properly, we want to skip over
// them.
if !f.IsDir() || f.Name() == ".coder" {
continue
}
Expand All @@ -59,56 +63,63 @@ func validateCoderResourceSubdirectory(dirPath string) []error {
return errs
}

// validateRegistryDirectory validates that the contents of `/registry` follow all expected file conventions. This
// includes the top-level structure of the individual namespace directories.
func validateRegistryDirectory() []error {
userDirs, err := os.ReadDir(rootRegistryPath)
namespaceDirs, err := os.ReadDir(rootRegistryPath)
if err != nil {
return []error{err}
}

allErrs := []error{}
for _, d := range userDirs {
dirPath := path.Join(rootRegistryPath, d.Name())
if !d.IsDir() {
allErrs = append(allErrs, xerrors.Errorf("detected non-directory file %q at base of main Registry directory", dirPath))
var allErrs []error
for _, nDir := range namespaceDirs {
namespacePath := path.Join(rootRegistryPath, nDir.Name())
if !nDir.IsDir() {
allErrs = append(allErrs, xerrors.Errorf("detected non-directory file %q at base of main Registry directory", namespacePath))
continue
}

contributorReadmePath := path.Join(dirPath, "README.md")
contributorReadmePath := path.Join(namespacePath, "README.md")
if _, err := os.Stat(contributorReadmePath); err != nil {
allErrs = append(allErrs, err)
}

files, err := os.ReadDir(dirPath)
files, err := os.ReadDir(namespacePath)
if err != nil {
allErrs = append(allErrs, err)
continue
}

for _, f := range files {
// TODO: Decide if there's anything more formal that we want to ensure about non-directories scoped to user namespaces.
// TODO: Decide if there's anything more formal that we want to ensure about non-directories at the top
// level of each user namespace.
if !f.IsDir() {
continue
}

segment := f.Name()
filePath := path.Join(dirPath, segment)
filePath := path.Join(namespacePath, segment)

if !slices.Contains(supportedUserNameSpaceDirectories, segment) {
allErrs = append(allErrs, xerrors.Errorf("%q: only these sub-directories are allowed at top of user namespace: [%s]", filePath, strings.Join(supportedUserNameSpaceDirectories, ", ")))
continue
}
if !slices.Contains(supportedResourceTypes, segment) {
continue
}

if slices.Contains(supportedResourceTypes, segment) {
if errs := validateCoderResourceSubdirectory(filePath); len(errs) != 0 {
allErrs = append(allErrs, errs...)
}
if errs := validateCoderResourceSubdirectory(filePath); len(errs) != 0 {
allErrs = append(allErrs, errs...)
}
}
}

return allErrs
}

// validateRepoStructure validates that the structure of the repo is "correct enough" to do all necessary validation
// checks. It is NOT an exhaustive validation of the entire repo structure – it only checks the parts of the repo that
// are relevant for the main validation steps
func validateRepoStructure() error {
var errs []error
if vrdErrs := validateRegistryDirectory(); len(vrdErrs) != 0 {
Expand Down