Skip to content

Commit 45dc925

Browse files
authored
fix: update repo structure validation logic to disallow false positives (#10)
* refactor: update file structure to reflect new changes * refactor: start splitting up files * refactor: more domain splitting * refactor: remove directory validation from contributors file * fix: update repo structure checks * fix: improve check for user namespace subdirectories * docs: add missing words to comment * docs: update typo * refactor: make code easier to read * fix: update README files * fix: remove employer field entirely * fix: make Github field optional * refactor: rename files
1 parent 9e18a4e commit 45dc925

File tree

15 files changed

+670
-506
lines changed

15 files changed

+670
-506
lines changed

.github/workflows/ci.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,6 @@ jobs:
6565
with:
6666
go-version: "1.23.2"
6767
- name: Validate contributors
68-
run: go build ./scripts/contributors && ./contributors
68+
run: go build ./cmd/readmevalidation && ./readmevalidation
6969
- name: Remove build file artifact
70-
run: rm ./contributors
70+
run: rm ./readmevalidation

.gitignore

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,8 +135,8 @@ dist
135135
.yarn/install-state.gz
136136
.pnp.*
137137

138-
# Script output
139-
/contributors
138+
# Things needed for CI
139+
/readmevalidation
140140

141141
# Terraform files generated during testing
142142
.terraform*

cmd/readmevalidation/contributors.go

Lines changed: 340 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,340 @@
1+
package main
2+
3+
import (
4+
"errors"
5+
"fmt"
6+
"log"
7+
"net/url"
8+
"os"
9+
"path"
10+
"slices"
11+
"strings"
12+
13+
"gopkg.in/yaml.v3"
14+
)
15+
16+
var validContributorStatuses = []string{"official", "partner", "community"}
17+
18+
type contributorProfileFrontmatter struct {
19+
DisplayName string `yaml:"display_name"`
20+
Bio string `yaml:"bio"`
21+
// Script assumes that if value is nil, the Registry site build step will
22+
// backfill the value with the user's GitHub avatar URL
23+
AvatarURL *string `yaml:"avatar"`
24+
LinkedinURL *string `yaml:"linkedin"`
25+
WebsiteURL *string `yaml:"website"`
26+
SupportEmail *string `yaml:"support_email"`
27+
ContributorStatus *string `yaml:"status"`
28+
}
29+
30+
type contributorProfile struct {
31+
frontmatter contributorProfileFrontmatter
32+
namespace string
33+
filePath string
34+
}
35+
36+
func validateContributorDisplayName(displayName string) error {
37+
if displayName == "" {
38+
return fmt.Errorf("missing display_name")
39+
}
40+
41+
return nil
42+
}
43+
44+
func validateContributorLinkedinURL(linkedinURL *string) error {
45+
if linkedinURL == nil {
46+
return nil
47+
}
48+
49+
if _, err := url.ParseRequestURI(*linkedinURL); err != nil {
50+
return fmt.Errorf("linkedIn URL %q is not valid: %v", *linkedinURL, err)
51+
}
52+
53+
return nil
54+
}
55+
56+
func validateContributorSupportEmail(email *string) []error {
57+
if email == nil {
58+
return nil
59+
}
60+
61+
errs := []error{}
62+
63+
// Can't 100% validate that this is correct without actually sending
64+
// an email, and especially with some contributors being individual
65+
// developers, we don't want to do that on every single run of the CI
66+
// pipeline. Best we can do is verify the general structure
67+
username, server, ok := strings.Cut(*email, "@")
68+
if !ok {
69+
errs = append(errs, fmt.Errorf("email address %q is missing @ symbol", *email))
70+
return errs
71+
}
72+
73+
if username == "" {
74+
errs = append(errs, fmt.Errorf("email address %q is missing username", *email))
75+
}
76+
77+
domain, tld, ok := strings.Cut(server, ".")
78+
if !ok {
79+
errs = append(errs, fmt.Errorf("email address %q is missing period for server segment", *email))
80+
return errs
81+
}
82+
83+
if domain == "" {
84+
errs = append(errs, fmt.Errorf("email address %q is missing domain", *email))
85+
}
86+
if tld == "" {
87+
errs = append(errs, fmt.Errorf("email address %q is missing top-level domain", *email))
88+
}
89+
if strings.Contains(*email, "?") {
90+
errs = append(errs, errors.New("email is not allowed to contain query parameters"))
91+
}
92+
93+
return errs
94+
}
95+
96+
func validateContributorWebsite(websiteURL *string) error {
97+
if websiteURL == nil {
98+
return nil
99+
}
100+
101+
if _, err := url.ParseRequestURI(*websiteURL); err != nil {
102+
return fmt.Errorf("linkedIn URL %q is not valid: %v", *websiteURL, err)
103+
}
104+
105+
return nil
106+
}
107+
108+
func validateContributorStatus(status *string) error {
109+
if status == nil {
110+
return nil
111+
}
112+
113+
if !slices.Contains(validContributorStatuses, *status) {
114+
return fmt.Errorf("contributor status %q is not valid", *status)
115+
}
116+
117+
return nil
118+
}
119+
120+
// Can't validate the image actually leads to a valid resource in a pure
121+
// function, but can at least catch obvious problems
122+
func validateContributorAvatarURL(avatarURL *string) []error {
123+
if avatarURL == nil {
124+
return nil
125+
}
126+
127+
errs := []error{}
128+
if *avatarURL == "" {
129+
errs = append(errs, errors.New("avatar URL must be omitted or non-empty string"))
130+
return errs
131+
}
132+
133+
// Have to use .Parse instead of .ParseRequestURI because this is the
134+
// one field that's allowed to be a relative URL
135+
if _, err := url.Parse(*avatarURL); err != nil {
136+
errs = append(errs, fmt.Errorf("URL %q is not a valid relative or absolute URL", *avatarURL))
137+
}
138+
if strings.Contains(*avatarURL, "?") {
139+
errs = append(errs, errors.New("avatar URL is not allowed to contain search parameters"))
140+
}
141+
142+
matched := false
143+
for _, ff := range supportedAvatarFileFormats {
144+
matched = strings.HasSuffix(*avatarURL, ff)
145+
if matched {
146+
break
147+
}
148+
}
149+
if !matched {
150+
segments := strings.Split(*avatarURL, ".")
151+
fileExtension := segments[len(segments)-1]
152+
errs = append(errs, fmt.Errorf("avatar URL '.%s' does not end in a supported file format: [%s]", fileExtension, strings.Join(supportedAvatarFileFormats, ", ")))
153+
}
154+
155+
return errs
156+
}
157+
158+
func validateContributorYaml(yml contributorProfile) []error {
159+
allErrs := []error{}
160+
161+
if err := validateContributorDisplayName(yml.frontmatter.DisplayName); err != nil {
162+
allErrs = append(allErrs, addFilePathToError(yml.filePath, err))
163+
}
164+
if err := validateContributorLinkedinURL(yml.frontmatter.LinkedinURL); err != nil {
165+
allErrs = append(allErrs, addFilePathToError(yml.filePath, err))
166+
}
167+
if err := validateContributorWebsite(yml.frontmatter.WebsiteURL); err != nil {
168+
allErrs = append(allErrs, addFilePathToError(yml.filePath, err))
169+
}
170+
if err := validateContributorStatus(yml.frontmatter.ContributorStatus); err != nil {
171+
allErrs = append(allErrs, addFilePathToError(yml.filePath, err))
172+
}
173+
174+
for _, err := range validateContributorSupportEmail(yml.frontmatter.SupportEmail) {
175+
allErrs = append(allErrs, addFilePathToError(yml.filePath, err))
176+
}
177+
for _, err := range validateContributorAvatarURL(yml.frontmatter.AvatarURL) {
178+
allErrs = append(allErrs, addFilePathToError(yml.filePath, err))
179+
}
180+
181+
return allErrs
182+
}
183+
184+
func parseContributorProfile(rm readme) (contributorProfile, error) {
185+
fm, _, err := separateFrontmatter(rm.rawText)
186+
if err != nil {
187+
return contributorProfile{}, fmt.Errorf("%q: failed to parse frontmatter: %v", rm.filePath, err)
188+
}
189+
190+
yml := contributorProfileFrontmatter{}
191+
if err := yaml.Unmarshal([]byte(fm), &yml); err != nil {
192+
return contributorProfile{}, fmt.Errorf("%q: failed to parse: %v", rm.filePath, err)
193+
}
194+
195+
return contributorProfile{
196+
filePath: rm.filePath,
197+
frontmatter: yml,
198+
namespace: strings.TrimSuffix(strings.TrimPrefix(rm.filePath, "registry/"), "/README.md"),
199+
}, nil
200+
}
201+
202+
func parseContributorFiles(readmeEntries []readme) (map[string]contributorProfile, error) {
203+
profilesByNamespace := map[string]contributorProfile{}
204+
yamlParsingErrors := []error{}
205+
for _, rm := range readmeEntries {
206+
p, err := parseContributorProfile(rm)
207+
if err != nil {
208+
yamlParsingErrors = append(yamlParsingErrors, err)
209+
continue
210+
}
211+
212+
if prev, alreadyExists := profilesByNamespace[p.namespace]; alreadyExists {
213+
yamlParsingErrors = append(yamlParsingErrors, fmt.Errorf("%q: namespace %q conflicts with namespace from %q", p.filePath, p.namespace, prev.filePath))
214+
continue
215+
}
216+
profilesByNamespace[p.namespace] = p
217+
}
218+
if len(yamlParsingErrors) != 0 {
219+
return nil, validationPhaseError{
220+
phase: validationPhaseReadmeParsing,
221+
errors: yamlParsingErrors,
222+
}
223+
}
224+
225+
yamlValidationErrors := []error{}
226+
for _, p := range profilesByNamespace {
227+
errors := validateContributorYaml(p)
228+
if len(errors) > 0 {
229+
yamlValidationErrors = append(yamlValidationErrors, errors...)
230+
continue
231+
}
232+
}
233+
if len(yamlValidationErrors) != 0 {
234+
return nil, validationPhaseError{
235+
phase: validationPhaseReadmeParsing,
236+
errors: yamlValidationErrors,
237+
}
238+
}
239+
240+
return profilesByNamespace, nil
241+
}
242+
243+
func aggregateContributorReadmeFiles() ([]readme, error) {
244+
dirEntries, err := os.ReadDir(rootRegistryPath)
245+
if err != nil {
246+
return nil, err
247+
}
248+
249+
allReadmeFiles := []readme{}
250+
errs := []error{}
251+
for _, e := range dirEntries {
252+
dirPath := path.Join(rootRegistryPath, e.Name())
253+
if !e.IsDir() {
254+
continue
255+
}
256+
257+
readmePath := path.Join(dirPath, "README.md")
258+
rmBytes, err := os.ReadFile(readmePath)
259+
if err != nil {
260+
errs = append(errs, err)
261+
continue
262+
}
263+
allReadmeFiles = append(allReadmeFiles, readme{
264+
filePath: readmePath,
265+
rawText: string(rmBytes),
266+
})
267+
}
268+
269+
if len(errs) != 0 {
270+
return nil, validationPhaseError{
271+
phase: validationPhaseFileLoad,
272+
errors: errs,
273+
}
274+
}
275+
276+
return allReadmeFiles, nil
277+
}
278+
279+
func validateContributorRelativeUrls(contributors map[string]contributorProfile) error {
280+
// This function only validates relative avatar URLs for now, but it can be
281+
// beefed up to validate more in the future
282+
errs := []error{}
283+
284+
for _, con := range contributors {
285+
// If the avatar URL is missing, we'll just assume that the Registry
286+
// site build step will take care of filling in the data properly
287+
if con.frontmatter.AvatarURL == nil {
288+
continue
289+
}
290+
291+
isRelativeURL := strings.HasPrefix(*con.frontmatter.AvatarURL, ".") ||
292+
strings.HasPrefix(*con.frontmatter.AvatarURL, "/")
293+
if !isRelativeURL {
294+
continue
295+
}
296+
297+
if strings.HasPrefix(*con.frontmatter.AvatarURL, "..") {
298+
errs = append(errs, fmt.Errorf("%q: relative avatar URLs cannot be placed outside a user's namespaced directory", con.filePath))
299+
continue
300+
}
301+
302+
absolutePath := strings.TrimSuffix(con.filePath, "README.md") +
303+
*con.frontmatter.AvatarURL
304+
_, err := os.ReadFile(absolutePath)
305+
if err != nil {
306+
errs = append(errs, fmt.Errorf("%q: relative avatar path %q does not point to image in file system", con.filePath, *con.frontmatter.AvatarURL))
307+
}
308+
}
309+
310+
if len(errs) == 0 {
311+
return nil
312+
}
313+
return validationPhaseError{
314+
phase: validationPhaseAssetCrossReference,
315+
errors: errs,
316+
}
317+
}
318+
319+
func validateAllContributorFiles() error {
320+
allReadmeFiles, err := aggregateContributorReadmeFiles()
321+
if err != nil {
322+
return err
323+
}
324+
325+
log.Printf("Processing %d README files\n", len(allReadmeFiles))
326+
contributors, err := parseContributorFiles(allReadmeFiles)
327+
if err != nil {
328+
return err
329+
}
330+
log.Printf("Processed %d README files as valid contributor profiles", len(contributors))
331+
332+
err = validateContributorRelativeUrls(contributors)
333+
if err != nil {
334+
return err
335+
}
336+
log.Println("All relative URLs for READMEs are valid")
337+
338+
log.Printf("Processed all READMEs in the %q directory\n", rootRegistryPath)
339+
return nil
340+
}

cmd/readmevalidation/errors.go

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
package main
2+
3+
import "fmt"
4+
5+
// validationPhaseError represents an error that occurred during a specific
6+
// phase of README validation. It should be used to collect ALL validation
7+
// errors that happened during a specific phase, rather than the first one
8+
// encountered.
9+
type validationPhaseError struct {
10+
phase validationPhase
11+
errors []error
12+
}
13+
14+
var _ error = validationPhaseError{}
15+
16+
func (vpe validationPhaseError) Error() string {
17+
msg := fmt.Sprintf("Error during %q phase of README validation:", vpe.phase.String())
18+
for _, e := range vpe.errors {
19+
msg += fmt.Sprintf("\n- %v", e)
20+
}
21+
msg += "\n"
22+
23+
return msg
24+
}
25+
26+
func addFilePathToError(filePath string, err error) error {
27+
return fmt.Errorf("%q: %v", filePath, err)
28+
}

0 commit comments

Comments
 (0)